const fs = require("fs"); const path = require("path"); const KNOWLEDGE_SCOPES = Object.freeze(["corrections", "community", "plugins", "core"]); const SCOPE_PRIORITY = Object.freeze({ corrections: 400, community: 300, plugins: 200, core: 100 }); const ACTIVE_STATUSES = new Set(["active", "published", "approved"]); const HIDDEN_STATUSES = new Set(["archived", "deleted", "disabled", "draft"]); const VISIBILITY_VALUES = new Set(["user", "mod", "admin"]); const PLACEHOLDER_SUGGEST_RESERVED_KEYS = new Set([ "id", "title", "scope", "status", "priority", "visibility", "category", "tags", "generated", "editable", "created_at", "updated_at" ]); const SEARCH_STOPWORDS = new Set([ "a", "an", "and", "are", "about", "describe", "for", "identify", "is", "me", "of", "please", "tell", "the", "to", "what", "who", "was", "were" ]); const knowledgeIndexCache = new Map(); function knowledgeRoot(rootDir = process.cwd()) { return path.join(rootDir, "knowledge"); } function ensureKnowledgeDirs(rootDir = process.cwd()) { const root = knowledgeRoot(rootDir); for (const scope of KNOWLEDGE_SCOPES) { fs.mkdirSync(path.join(root, scope), { recursive: true }); } return root; } function migrateSingleBracePlaceholders(rootDir = process.cwd()) { const root = ensureKnowledgeDirs(rootDir); let changed = 0; for (const filePath of listKnowledgeFiles(rootDir)) { const content = fs.readFileSync(filePath, "utf8"); const next = content.replace(/(^|[^\{])\{([A-Za-z0-9_.-]+\.[A-Za-z0-9_.-]+)\}(?!\})/g, (match, prefix, key) => { return `${prefix}{{${key}}}`; }); if (next !== content) { fs.writeFileSync(filePath, next); changed += 1; } } if (changed) knowledgeIndexCache.clear(); return { root, changed }; } function listKnowledgeFiles(rootDir = process.cwd()) { const root = knowledgeRoot(rootDir); const files = []; for (const scope of KNOWLEDGE_SCOPES) { const scopeRoot = path.join(root, scope); if (!fs.existsSync(scopeRoot)) continue; scanMarkdownFiles(scopeRoot, files); } return files.sort(); } function loadKnowledgeEntries(rootDir = process.cwd(), options = {}) { const root = knowledgeRoot(rootDir); const files = listKnowledgeFiles(rootDir); const cacheKey = `${path.resolve(root)}:${options.includeHidden ? "all" : "active"}`; const previous = knowledgeIndexCache.get(cacheKey) || { files: new Map(), entries: [] }; const nextFiles = new Map(); const entries = []; for (const filePath of files) { const stat = fs.statSync(filePath); const previousFile = previous.files.get(filePath); const cacheMeta = { mtimeMs: stat.mtimeMs, size: stat.size }; const parsed = previousFile && previousFile.mtimeMs === cacheMeta.mtimeMs && previousFile.size === cacheMeta.size ? previousFile.entry : parseKnowledgeFile(filePath, root, options); nextFiles.set(filePath, { ...cacheMeta, entry: parsed }); if (parsed) entries.push(parsed); } knowledgeIndexCache.set(cacheKey, { files: nextFiles, entries }); return entries.map(cloneKnowledgeEntry); } function searchFileKnowledge({ query = "", user, limit = 5, rootDir = process.cwd() } = {}) { const access = accessForUser(user); if (!access.authenticated) return []; const tokens = tokenSet(query); const entries = resolveVisibleKnowledgePlaceholders(loadKnowledgeEntries(rootDir) .filter((entry) => canSeeKnowledgeEntry(entry, access)) ); return entries .flatMap((entry) => entry.chunks.map((chunk) => scoreKnowledgeChunk(entry, chunk, tokens))) .filter((result) => result.matched || !tokens.size) .sort((a, b) => b.score - a.score || b.priority - a.priority || a.source_metadata.path.localeCompare(b.source_metadata.path)) .slice(0, Math.max(1, Math.min(Number(limit) || 5, 25))) .map((result) => ({ id: result.id, slug: result.slug, title: result.title, category: result.category, visibility: result.visibility, summary: result.summary, facts: result.facts, source: result.source, source_metadata: result.source_metadata })); } function listKnowledgePlaceholders({ user, rootDir = process.cwd(), includeHidden = false } = {}) { const access = accessForUser(user); if (!access.authenticated) return []; const entries = loadKnowledgeEntries(rootDir, { includeHidden }) .filter((entry) => canSeeKnowledgeEntry(entry, access)); const placeholders = new Set(); for (const entry of entries) { for (const key of Object.keys(entry.frontmatter || {})) { if (PLACEHOLDER_SUGGEST_RESERVED_KEYS.has(key)) continue; const value = placeholderValue(entry.frontmatter[key]); if (!value) continue; placeholders.add(`${entry.id}.${key}`); placeholders.add(`${entry.scope}.${entry.slug}.${key}`); } } return Array.from(placeholders) .sort((a, b) => a.localeCompare(b)) .map((key) => `{{${key}}}`); } function registerKnowledgePlaceholderDefinitions(placeholders, { rootDir = process.cwd() } = {}) { if (!placeholders?.registerPlaceholders) return []; placeholders.unregisterNamespace?.("okf.file"); const definitions = []; const entries = loadKnowledgeEntries(rootDir) .filter((entry) => !HIDDEN_STATUSES.has(entry.status)); for (const entry of entries) { for (const key of Object.keys(entry.frontmatter || {})) { if (PLACEHOLDER_SUGGEST_RESERVED_KEYS.has(key)) continue; const value = placeholderValue(entry.frontmatter[key]); if (!value) continue; definitions.push({ id: `okf.file.${entry.scope}.${entry.slug}.${key}`, namespace: `okf.file.${entry.scope}`, aliases: [ `${entry.id}.${key}`, `${entry.scope}.${entry.slug}.${key}` ], label: `${entry.title}: ${key.replace(/[_-]+/g, " ")}`, description: `Frontmatter value from ${entry.title}.`, value_type: "string", sensitivity: visibilitySensitivity(entry.visibility), min_editor_role: "user", min_viewer_role: visibilityRole(entry.visibility), allowed_field_types: ["okf_markdown"], group: `OKF ${entry.scope}`, resolver: () => value }); } } return placeholders.registerPlaceholders(definitions); } function parseKnowledgeFile(filePath, root, options = {}) { const content = fs.readFileSync(filePath, "utf8"); const relativePath = normalizePath(path.relative(root, filePath)); const scope = relativePath.split("/")[0] || "core"; const parsed = splitFrontmatter(content); const metadata = normalizeMetadata(parsed.frontmatter, filePath, relativePath, scope); if (!options.includeHidden && HIDDEN_STATUSES.has(metadata.status)) return null; const body = cleanText(parsed.body, 180000); return { ...metadata, path: relativePath, file_slug: slugify(path.basename(relativePath, path.extname(relativePath))), body, chunks: chunkMarkdown(body, metadata).map((chunk) => ({ ...chunk, path: relativePath })) }; } function listCommunityKnowledgeFiles(rootDir = process.cwd()) { return loadKnowledgeEntries(rootDir, { includeHidden: true }) .filter((entry) => entry.scope === "community") .map((entry) => ({ id: entry.id, slug: entry.file_slug, entry_slug: entry.slug, title: entry.title, status: entry.status, visibility: entry.visibility, priority: entry.priority, tags: entry.tags, editable: entry.editable, generated: entry.generated, path: `knowledge/${entry.path}`, updated_at: entry.updated_at })) .sort((a, b) => a.title.localeCompare(b.title)); } function getCommunityKnowledgeFile(rootDir = process.cwd(), slug) { const entry = loadKnowledgeEntries(rootDir, { includeHidden: true }) .find((item) => item.scope === "community" && (item.file_slug === slug || item.slug === slug || item.id === slug)); return entry ? { ...entry, slug: entry.file_slug, entry_slug: entry.slug, path: `knowledge/${entry.path}` } : null; } function saveCommunityKnowledgeFile(rootDir = process.cwd(), values = {}) { const root = ensureKnowledgeDirs(rootDir); const slug = slugify(values.slug || values.id || values.title); const existing = values.existing_slug ? getCommunityKnowledgeFile(rootDir, values.existing_slug) : null; if (existing && (existing.generated || !existing.editable)) { throw new Error("This community OKF file is not editable."); } const filePath = path.join(root, "community", `${slug}.md`); if (existing && path.resolve(filePath) !== path.resolve(rootDir, existing.path)) { const existingPath = path.resolve(rootDir, existing.path); if (fs.existsSync(existingPath)) fs.rmSync(existingPath, { force: true }); } const metadata = normalizeCommunityFileValues(values, slug); const markdown = serializeKnowledgeFile(metadata, values.body || ""); fs.writeFileSync(filePath, markdown); knowledgeIndexCache.clear(); parseKnowledgeFile(filePath, root, { includeHidden: true }); return getCommunityKnowledgeFile(rootDir, slug); } function saveCorrectionKnowledgeFile(rootDir = process.cwd(), values = {}) { const root = ensureKnowledgeDirs(rootDir); const slug = slugify(values.slug || values.id || values.title); const filePath = path.join(root, "corrections", `${slug}.md`); const metadata = normalizeCorrectionFileValues(values, slug); const markdown = serializeKnowledgeFile(metadata, values.body || ""); fs.writeFileSync(filePath, markdown); knowledgeIndexCache.clear(); const entry = parseKnowledgeFile(filePath, root, { includeHidden: true }); return entry ? { ...entry, slug: entry.file_slug, entry_slug: entry.slug, path: `knowledge/${entry.path}` } : null; } function cloneKnowledgeEntry(entry) { return { ...entry, tags: [...entry.tags], frontmatter: { ...entry.frontmatter }, chunks: entry.chunks.map((chunk) => ({ ...chunk })) }; } function resolveVisibleKnowledgePlaceholders(entries) { const placeholders = buildPlaceholderMap(entries); return entries.map((entry) => { const body = resolvePlaceholders(entry.body, placeholders); return { ...entry, body, chunks: chunkMarkdown(body, entry).map((chunk) => ({ ...chunk, path: entry.path })) }; }); } function buildPlaceholderMap(entries) { const map = new Map(); for (const entry of entries) { const values = { ...entry.frontmatter, id: entry.id, title: entry.title, scope: entry.scope, status: entry.status, priority: entry.priority, visibility: entry.visibility, category: entry.category, tags: entry.tags, generated: entry.generated, editable: entry.editable, created_at: entry.created_at, updated_at: entry.updated_at }; for (const [key, value] of Object.entries(values)) { const normalizedValue = placeholderValue(value); if (!normalizedValue) continue; map.set(`${entry.id}.${key}`, normalizedValue); map.set(`${entry.scope}.${entry.slug}.${key}`, normalizedValue); } } return map; } function resolvePlaceholders(value, placeholders) { let output = String(value || ""); for (let pass = 0; pass < 3; pass += 1) { const next = output.replace(/\{\{\s*([A-Za-z0-9_.-]+)\s*\}\}/g, (match, key) => { const replacement = placeholders.get(key); return replacement === undefined ? "[missing OKF reference]" : replacement; }); if (next === output) break; output = next; } return output; } function splitFrontmatter(content) { const normalized = String(content || "").replace(/\r\n?/g, "\n"); if (!normalized.startsWith("---\n")) { return { frontmatter: {}, body: normalized }; } const end = normalized.indexOf("\n---", 4); if (end === -1) { return { frontmatter: {}, body: normalized }; } const rawFrontmatter = normalized.slice(4, end); const body = normalized.slice(end + 4).replace(/^\n/, ""); return { frontmatter: parseFrontmatter(rawFrontmatter), body }; } function parseFrontmatter(raw) { const out = {}; for (const line of String(raw || "").split("\n")) { if (!line.trim() || line.trim().startsWith("#")) continue; const match = line.match(/^([A-Za-z0-9_-]+)\s*:\s*(.*)$/); if (!match) continue; out[match[1].trim()] = parseFrontmatterValue(match[2].trim()); } return out; } function parseFrontmatterValue(value) { const unquoted = value.replace(/^["']|["']$/g, ""); if (/^(true|false)$/i.test(unquoted)) return unquoted.toLowerCase() === "true"; if (/^-?\d+(?:\.\d+)?$/.test(unquoted)) return Number(unquoted); if (unquoted.startsWith("[") && unquoted.endsWith("]")) { return unquoted .slice(1, -1) .split(",") .map((item) => cleanText(item.replace(/^["']|["']$/g, ""), 120)) .filter(Boolean); } return unquoted; } function normalizeCommunityFileValues(values, slug) { const id = cleanText(values.id, 180) || `community.${slug}`; const title = cleanText(values.title, 180); if (!title) throw new Error("Community OKF title is required."); const status = cleanText(values.status, 40).toLowerCase() || "active"; const visibility = cleanText(values.visibility, 20).toLowerCase() || "user"; return { id, title, scope: "community", status, priority: Number.isFinite(Number(values.priority)) ? Number(values.priority) : 0, visibility: VISIBILITY_VALUES.has(visibility) ? visibility : "user", category: cleanText(values.category || "Community", 120), tags: splitList(values.tags), generated: false, editable: true, created_at: cleanText(values.created_at, 80) || new Date().toISOString(), updated_at: new Date().toISOString() }; } function normalizeCorrectionFileValues(values, slug) { const id = cleanText(values.id, 180) || `correction.${slug}`; const title = cleanText(values.title, 180); if (!title) throw new Error("Correction OKF title is required."); const status = cleanText(values.status, 40).toLowerCase() || "active"; const visibility = cleanText(values.visibility, 20).toLowerCase() || "user"; return { id, title, scope: "corrections", status, priority: Number.isFinite(Number(values.priority)) ? Number(values.priority) : 100, visibility: VISIBILITY_VALUES.has(visibility) ? visibility : "user", category: cleanText(values.category || "Correction", 120), tags: splitList(values.tags || "feedback, correction"), generated: false, editable: true, created_at: cleanText(values.created_at, 80) || new Date().toISOString(), updated_at: new Date().toISOString(), extra_frontmatter: { source_feedback_id: cleanText(values.source_feedback_id, 180), source_feedback_url: cleanText(values.source_feedback_url, 1000) } }; } function serializeKnowledgeFile(metadata, body) { const frontmatter = { id: metadata.id, title: metadata.title, scope: metadata.scope, status: metadata.status, priority: metadata.priority, visibility: metadata.visibility, category: metadata.category, tags: metadata.tags.join(", "), generated: metadata.generated, editable: metadata.editable, created_at: metadata.created_at, updated_at: metadata.updated_at, ...(metadata.extra_frontmatter || {}) }; const lines = ["---"]; for (const [key, value] of Object.entries(frontmatter)) { if (value === "" || value === null || value === undefined) continue; lines.push(`${key}: ${frontmatterValue(value)}`); } lines.push("---", "", cleanText(body, 64000) || `# ${metadata.title}`, ""); return lines.join("\n"); } function frontmatterValue(value) { if (typeof value === "boolean" || typeof value === "number") return String(value); const text = Array.isArray(value) ? value.join(", ") : String(value); return /[:#[\]{}"'\\]|^\s|\s$/.test(text) ? JSON.stringify(text) : text; } function normalizeMetadata(frontmatter, filePath, relativePath, scope) { const id = cleanText(frontmatter.id, 180) || normalizePath(relativePath).replace(/\.md$/i, "").replace(/\//g, "."); const title = cleanText(frontmatter.title, 180) || titleFromPath(filePath); const status = cleanText(frontmatter.status, 40).toLowerCase() || "active"; const visibility = VISIBILITY_VALUES.has(cleanText(frontmatter.visibility, 20).toLowerCase()) ? cleanText(frontmatter.visibility, 20).toLowerCase() : "user"; return { id, slug: slugify(id), title, scope: KNOWLEDGE_SCOPES.includes(scope) ? scope : "core", status, priority: Number.isFinite(Number(frontmatter.priority)) ? Number(frontmatter.priority) : 0, visibility, category: cleanText(frontmatter.category || frontmatter.scope || scope, 120), tags: splitList(frontmatter.tags), generated: Boolean(frontmatter.generated), editable: frontmatter.editable === undefined ? scope === "community" || scope === "corrections" : Boolean(frontmatter.editable), created_at: cleanText(frontmatter.created_at, 80), updated_at: cleanText(frontmatter.updated_at, 80), frontmatter: normalizeFrontmatterValues(frontmatter) }; } function chunkMarkdown(body, metadata) { const lines = String(body || "").split("\n"); const chunks = []; let current = { heading: metadata.title, level: 1, lines: [] }; const flush = () => { const text = cleanText(current.lines.join("\n"), 8000); if (!text) return; chunks.push({ id: `${metadata.id}#${slugify(current.heading || "section")}`, heading: cleanText(current.heading || metadata.title, 180), level: current.level, text }); }; for (const line of lines) { const heading = line.match(/^(#{1,6})\s+(.+)$/); if (heading) { flush(); current = { heading: cleanText(heading[2], 180), level: heading[1].length, lines: [] }; continue; } current.lines.push(line); } flush(); if (!chunks.length && body.trim()) { chunks.push({ id: `${metadata.id}#body`, heading: metadata.title, level: 1, text: cleanText(body, 8000) }); } return chunks; } function scoreKnowledgeChunk(entry, chunk, queryTokens) { const text = [entry.title, entry.category, entry.tags.join(" "), chunk.heading, chunk.text].join(" "); const textTokens = tokenSet(text); const overlap = queryTokens.size ? intersectionSize(queryTokens, textTokens) : 1; const score = (overlap * 100) + SCOPE_PRIORITY[entry.scope] + Number(entry.priority || 0); const excerpt = excerptForChunk(chunk.text, queryTokens); return { id: entry.id, slug: entry.slug, title: entry.title, category: entry.category, visibility: entry.visibility, summary: excerpt, facts: chunk.text.slice(0, 4000), priority: SCOPE_PRIORITY[entry.scope] + Number(entry.priority || 0), score, matched: overlap > 0, source: `knowledge/${chunk.path}${chunk.heading ? `#${slugify(chunk.heading)}` : ""}`, source_metadata: { path: `knowledge/${chunk.path}`, id: entry.id, heading: chunk.heading, score, excerpt } }; } function canSeeKnowledgeEntry(entry, access) { if (!entry) return false; if (!ACTIVE_STATUSES.has(entry.status) && !access.isAdmin) return false; if (entry.visibility === "admin") return access.isAdmin; if (entry.visibility === "mod") return access.isMod || access.isAdmin; return true; } function accessForUser(user) { return { authenticated: Boolean(user), isAdmin: Boolean(user?.isAdmin), isMod: Boolean(user?.isAdmin || user?.isMod) }; } function scanMarkdownFiles(dir, output) { for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { if (entry.name.startsWith(".")) continue; const fullPath = path.join(dir, entry.name); if (entry.isDirectory()) { scanMarkdownFiles(fullPath, output); } else if (entry.isFile() && entry.name.toLowerCase().endsWith(".md")) { output.push(fullPath); } } } function excerptForChunk(text, queryTokens) { const cleaned = cleanText(text, 1200); if (!queryTokens.size) return cleaned.slice(0, 360); const lower = cleaned.toLowerCase(); const token = Array.from(queryTokens).find((item) => lower.includes(item)); if (!token) return cleaned.slice(0, 360); const index = Math.max(0, lower.indexOf(token) - 120); return cleaned.slice(index, index + 420).trim(); } function tokenSet(value) { const cleaned = cleanText(value, 4000); const expanded = cleaned.replace(/([a-z0-9])([A-Z])/g, "$1 $2"); const tokens = new Set(); for (const token of `${cleaned} ${expanded}`.toLowerCase().split(/[^a-z0-9_]+/)) { if (token.length < 2 || SEARCH_STOPWORDS.has(token)) continue; tokens.add(token); } return tokens; } function intersectionSize(a, b) { let count = 0; for (const item of a) { if (b.has(item)) count += 1; } return count; } function normalizeFrontmatterValues(frontmatter = {}) { const out = {}; for (const [key, value] of Object.entries(frontmatter)) { const normalizedKey = cleanText(key, 80).replace(/[^A-Za-z0-9_-]/g, "_"); const normalizedValue = placeholderValue(value); if (normalizedKey && normalizedValue) out[normalizedKey] = normalizedValue; } return out; } function placeholderValue(value) { if (Array.isArray(value)) { return value.map((item) => cleanText(item, 240)).filter(Boolean).join(", "); } if (typeof value === "boolean") return value ? "true" : "false"; if (typeof value === "number" && Number.isFinite(value)) return String(value); return cleanText(value, 1000); } function visibilityRole(value) { const visibility = VISIBILITY_VALUES.has(value) ? value : "user"; return visibility === "admin" ? "admin" : visibility === "mod" ? "mod" : "user"; } function visibilitySensitivity(value) { const visibility = VISIBILITY_VALUES.has(value) ? value : "user"; return visibility === "admin" ? "admin" : visibility === "mod" ? "moderator" : "public_safe"; } function splitList(value) { if (Array.isArray(value)) return value.map((item) => cleanText(item, 120)).filter(Boolean).slice(0, 50); return String(value || "") .split(",") .map((item) => cleanText(item, 120)) .filter(Boolean) .slice(0, 50); } function titleFromPath(filePath) { return path.basename(filePath, path.extname(filePath)).replace(/[-_]+/g, " ").replace(/\b\w/g, (letter) => letter.toUpperCase()); } function slugify(value) { return cleanText(value, 180) .toLowerCase() .replace(/[^a-z0-9]+/g, "-") .replace(/^-+|-+$/g, "") || "entry"; } function normalizePath(value) { return String(value || "").replace(/\\/g, "/"); } function cleanText(value, maximum = 4000) { return String(value || "").replace(/\r\n?/g, "\n").trim().slice(0, maximum); } module.exports = { KNOWLEDGE_SCOPES, ensureKnowledgeDirs, getCommunityKnowledgeFile, knowledgeRoot, listKnowledgeFiles, listCommunityKnowledgeFiles, listKnowledgePlaceholders, loadKnowledgeEntries, migrateSingleBracePlaceholders, parseKnowledgeFile, registerKnowledgePlaceholderDefinitions, saveCorrectionKnowledgeFile, saveCommunityKnowledgeFile, searchFileKnowledge };