56 lines
1.9 KiB
JavaScript
56 lines
1.9 KiB
JavaScript
const fs = require("fs");
|
|
const path = require("path");
|
|
const { resolveData } = require("./paths");
|
|
|
|
class TrainingExporter {
|
|
constructor({ feedback, corrections, outputDir }) {
|
|
this.feedback = feedback;
|
|
this.corrections = corrections;
|
|
this.outputDir = outputDir || resolveData("exports");
|
|
}
|
|
|
|
export(format) {
|
|
if (!["instruction", "dpo"].includes(format)) throw new Error("Unknown training export format.");
|
|
const examples = approvedExamples(this.feedback.all(), this.corrections.all());
|
|
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
|
|
const filename = `lumi-ai-${format}-${timestamp}.jsonl`;
|
|
const file = path.join(this.outputDir, filename);
|
|
const rows = examples.map((entry) => format === "dpo"
|
|
? {
|
|
prompt: entry.prompt,
|
|
preferred_answer: entry.preferred_answer,
|
|
rejected_answer: entry.rejected_answer
|
|
}
|
|
: {
|
|
instruction: entry.prompt,
|
|
input: "",
|
|
output: entry.preferred_answer
|
|
});
|
|
fs.writeFileSync(file, rows.map((row) => JSON.stringify(row)).join("\n") + (rows.length ? "\n" : ""));
|
|
return { file, filename: path.basename(file), count: rows.length, format };
|
|
}
|
|
}
|
|
|
|
function approvedExamples(feedbackRows, correctionRows) {
|
|
const byFeedback = new Map(
|
|
correctionRows
|
|
.filter((entry) => entry.approved && entry.corrected_answer)
|
|
.map((entry) => [entry.source_feedback_id, entry])
|
|
);
|
|
return feedbackRows
|
|
.filter((entry) => entry.export_approved && entry.status === "approved")
|
|
.map((entry) => {
|
|
const correction = byFeedback.get(entry.id);
|
|
const preferred = correction?.corrected_answer || entry.optional_correction;
|
|
if (!preferred) return null;
|
|
return {
|
|
prompt: entry.user_message,
|
|
preferred_answer: preferred,
|
|
rejected_answer: entry.assistant_answer
|
|
};
|
|
})
|
|
.filter(Boolean);
|
|
}
|
|
|
|
module.exports = { TrainingExporter, approvedExamples };
|