Lumi/plugins/lumi_ai_web_search/backend/url_policy.js
2026-06-13 21:32:36 +02:00

133 lines
4.8 KiB
JavaScript

const dns = require("dns");
const net = require("net");
const METADATA_HOSTS = new Set([
"metadata.google.internal",
"metadata.aws.internal",
"instance-data.ec2.internal"
]);
async function evaluateUrl(value, options = {}) {
let url;
try { url = new URL(String(value)); }
catch { return denied("invalid_url"); }
if (!["http:", "https:"].includes(url.protocol)) return denied("unsafe_protocol");
if (url.username || url.password) return denied("credentials_in_url");
const hostname = url.hostname.toLowerCase().replace(/^\[|\]$/g, "").replace(/\.$/, "");
if (isLocalHostname(hostname)) return denied("local_or_metadata_host");
if (net.isIP(hostname) && isPrivateAddress(hostname)) return denied("private_network");
const resolveHost = options.resolveHost || defaultResolveHost;
try {
const addresses = await resolveHost(hostname);
if (!addresses.length || addresses.some(isPrivateAddress)) return denied("private_network");
} catch {
return denied("dns_resolution_failed");
}
const mode = options.mode === "blacklist" ? "blacklist" : "whitelist";
const rules = Array.isArray(options.rules) ? options.rules : [];
const matched = rules.some((rule) => matchesRule(url, rule));
if (mode === "whitelist" && !matched) return denied("not_whitelisted");
if (mode === "blacklist" && matched) return denied("blacklisted");
return { allowed: true, url: canonicalUrl(url), reason: null };
}
async function evaluateNetworkTarget(value, options = {}) {
return evaluateUrl(value, { ...options, mode: "blacklist", rules: [] });
}
function matchesRule(url, ruleValue) {
const raw = String(ruleValue || "").trim();
if (!raw) return false;
const rule = raw.replace(/^GET\s+/i, "").replace(/#.*$/, "");
const hostname = url.hostname.toLowerCase();
const hostPath = `${hostname}${url.pathname}${url.search}`;
const full = canonicalUrl(url);
if (!rule.includes("*")) {
try {
const hasScheme = rule.includes("://");
const parsed = new URL(hasScheme ? rule : `https://${rule}`);
const ruleHost = parsed.hostname.toLowerCase();
const hostMatches = hostname === ruleHost || hostname.endsWith(`.${ruleHost}`);
if (!hostMatches) return false;
if (hasScheme && (url.protocol !== parsed.protocol || url.port !== parsed.port)) return false;
const rulePath = parsed.pathname === "/" && !rule.includes("/") ? "/" : parsed.pathname;
if (rulePath !== "/" && !url.pathname.startsWith(rulePath)) return false;
return !parsed.search || url.search === parsed.search;
} catch {
return false;
}
}
const expression = wildcardExpression(rule.toLowerCase());
return expression.test(full.toLowerCase()) ||
expression.test(hostPath.toLowerCase()) ||
expression.test(hostname);
}
function wildcardExpression(value) {
const escaped = value.replace(/[.+?^${}()|[\]\\]/g, "\\$&").replaceAll("*", ".*");
return new RegExp(`^${escaped}$`, "i");
}
function canonicalUrl(url) {
const copy = new URL(url.href);
copy.hash = "";
return copy.href;
}
function isLocalHostname(hostname) {
return hostname === "localhost" ||
hostname.endsWith(".localhost") ||
hostname.endsWith(".local") ||
METADATA_HOSTS.has(hostname);
}
function isPrivateAddress(address) {
const version = net.isIP(address);
if (version === 4) {
const [a, b, c] = address.split(".").map(Number);
return a === 0 || a === 10 || a === 127 ||
(a === 100 && b >= 64 && b <= 127) ||
(a === 169 && b === 254) ||
(a === 172 && b >= 16 && b <= 31) ||
(a === 192 && b === 0) ||
(a === 192 && b === 168) ||
(a === 192 && b === 88 && c === 99) ||
(a === 198 && (b === 18 || b === 19)) ||
(a === 198 && b === 51 && c === 100) ||
(a === 203 && b === 0 && c === 113) ||
a >= 224;
}
if (version === 6) {
const normalized = address.toLowerCase();
if (normalized === "::" || normalized === "::1") return true;
if (normalized.startsWith("fc") || normalized.startsWith("fd") ||
/^fe[89ab]/.test(normalized) || normalized.startsWith("ff")) return true;
if (normalized.startsWith("::ffff:")) return true;
if (normalized.startsWith("2001:db8:")) return true;
const first = Number.parseInt(normalized.split(":")[0], 16);
return !Number.isFinite(first) || first < 0x2000 || first > 0x3fff;
}
return true;
}
async function defaultResolveHost(hostname) {
if (net.isIP(hostname)) return [hostname];
const rows = await dns.promises.lookup(hostname, { all: true, verbatim: true });
return rows.map((row) => row.address);
}
function denied(reason) {
return { allowed: false, url: null, reason };
}
module.exports = {
canonicalUrl,
defaultResolveHost,
evaluateNetworkTarget,
evaluateUrl,
isLocalHostname,
isPrivateAddress,
matchesRule,
wildcardExpression
};