const fs = require("fs"); const crypto = require("crypto"); const { resolveData } = require("./paths"); const { atomicJson, paginate } = require("./feedback"); class EvalStore { constructor(options = {}) { this.casesFile = options.casesFile || resolveData("evals", "cases.json"); this.resultsFile = options.resultsFile || resolveData("evals", "results.json"); } add(values, actor) { const prompt = clean(values.prompt, 6000); if (!prompt) throw new Error("Eval prompt is required."); const entry = { id: crypto.randomUUID(), prompt, role: normalizeRole(values.role), origin: clean(values.origin, 80) || "webui", expected_behavior: clean(values.expected_behavior, 8000), forbidden_behavior: clean(values.forbidden_behavior, 8000), expected_link: clean(values.expected_link, 2000), notes: clean(values.notes, 4000), created_by: String(actor.id), created_at: new Date().toISOString() }; const store = this.readCases(); store.entries.unshift(entry); atomicJson(this.casesFile, store); return entry; } list({ page = 1, pageSize = 20 } = {}) { return paginate(this.readCases().entries, page, pageSize); } results(limit = 100) { return this.readResults().entries.slice(0, limit); } delete(id) { const store = this.readCases(); const before = store.entries.length; store.entries = store.entries.filter((entry) => entry.id !== id); if (store.entries.length === before) return false; atomicJson(this.casesFile, store); return true; } async runAll({ provider, actor }) { const results = []; for (const testCase of this.readCases().entries) { const simulatedUser = { id: `eval:${actor.id}`, username: "lumi-eval", isAdmin: testCase.role === "admin", isMod: testCase.role === "mod" }; try { const response = await provider.generate({ message: testCase.prompt, user: simulatedUser, sessionId: `eval:${testCase.id}:${Date.now()}`, scope: "eval", originContext: { origin: testCase.origin, platform: testCase.origin, role: testCase.role, permission_context: { webui_actions_allowed: false } } }); results.push(evaluateCase(testCase, response.text, response.links)); } catch (error) { results.push({ case_id: testCase.id, prompt: testCase.prompt, status: "manual_review", error: error.message, run_at: new Date().toISOString() }); } } const store = this.readResults(); store.entries = [...results, ...store.entries].slice(0, 1000); atomicJson(this.resultsFile, store); return results; } readCases() { return readStore(this.casesFile); } readResults() { return readStore(this.resultsFile); } } function evaluateCase(testCase, answer, links = []) { const text = String(answer || ""); const expected = splitChecks(testCase.expected_behavior); const forbidden = splitChecks(testCase.forbidden_behavior); const expectedPass = expected.every((check) => text.toLowerCase().includes(check.toLowerCase())); const forbiddenPass = forbidden.every((check) => !text.toLowerCase().includes(check.toLowerCase())); const linkPass = !testCase.expected_link || text.includes(testCase.expected_link) || links.some((link) => link.href === testCase.expected_link); const hasAutomatedChecks = expected.length || forbidden.length || testCase.expected_link; return { case_id: testCase.id, prompt: testCase.prompt, status: !hasAutomatedChecks ? "manual_review" : expectedPass && forbiddenPass && linkPass ? "pass" : "fail", expected_pass: expectedPass, forbidden_pass: forbiddenPass, link_pass: linkPass, answer: text.slice(0, 16000), run_at: new Date().toISOString() }; } function splitChecks(value) { return String(value || "").split(/\r?\n|;/).map((entry) => entry.trim()).filter(Boolean); } function readStore(file) { try { const parsed = JSON.parse(fs.readFileSync(file, "utf8")); return { entries: Array.isArray(parsed.entries) ? parsed.entries : [] }; } catch { return { entries: [] }; } } function clean(value, max) { return String(value || "").trim().slice(0, max); } function normalizeRole(value) { return ["admin", "mod", "user"].includes(value) ? value : "user"; } module.exports = { EvalStore, evaluateCase };