import { createDefu } from "defu"; import { withoutLeadingSlash } from "ufo"; export function parseRobotsTxt(s) { const groups = []; const sitemaps = []; const errors = []; let createNewGroup = false; let currentGroup = { comment: [], // comments are too hard to parse in a logical order, we'll just omit them disallow: [], allow: [], userAgent: [] }; let ln = -1; for (const _line of s.split("\n")) { ln++; const [line] = _line.split("#").map((s2) => s2.trim()); const sepIndex = line.indexOf(":"); if (sepIndex === -1) continue; const rule = line.substring(0, sepIndex).trim().toLowerCase(); const val = line.substring(sepIndex + 1).trim(); switch (rule) { case "user-agent": case "useragent": case "user agent": if (createNewGroup) { groups.push({ ...currentGroup }); currentGroup = { comment: [], disallow: [], allow: [], userAgent: [] }; createNewGroup = false; } currentGroup.userAgent.push(val); break; case "allow": currentGroup.allow.push(val); createNewGroup = true; break; case "disallow": case "dissallow": case "dissalow": case "disalow": case "diasllow": case "disallaw": currentGroup.disallow.push(val); createNewGroup = true; break; case "sitemap": case "site-map": sitemaps.push(val); break; case "host": currentGroup.host = val; break; case "clean-param": if (currentGroup.userAgent.some((u) => u.toLowerCase().includes("yandex"))) { currentGroup.cleanParam = currentGroup.cleanParam || []; currentGroup.cleanParam.push(val); } else { errors.push(`L${ln}: Clean-param directive is only when targeting Yandex user agent.`); } break; default: errors.push(`L${ln}: Unknown directive ${rule} `); break; } } groups.push({ ...currentGroup }); return { groups, sitemaps, errors }; } function validateGroupRules(group, errors) { const toCheck = ["allow", "disallow"]; toCheck.forEach((key) => { (group[key] || []).filter((rule) => { if (rule === "") return true; if (!rule.startsWith("/") && !rule.startsWith("*")) { errors.push(`Disallow rule "${rule}" must start with a \`/\` or be a \`*\`.`); return false; } return true; }); }); } function matches(pattern, path) { const pathLength = path.length; const patternLength = pattern.length; const matchingLengths = Array.from({ length: pathLength + 1 }).fill(0); let numMatchingLengths = 1; let p = 0; while (p < patternLength) { if (pattern[p] === "$" && p + 1 === patternLength) { return matchingLengths[numMatchingLengths - 1] === pathLength; } if (pattern[p] === "*") { numMatchingLengths = pathLength - matchingLengths[0] + 1; for (let i = 1; i < numMatchingLengths; i++) { matchingLengths[i] = matchingLengths[i - 1] + 1; } } else { let numMatches = 0; for (let i = 0; i < numMatchingLengths; i++) { const matchLength = matchingLengths[i]; if (matchLength < pathLength && path[matchLength] === pattern[p]) { matchingLengths[numMatches++] = matchLength + 1; } } if (numMatches === 0) { return false; } numMatchingLengths = numMatches; } p++; } return true; } export function matchPathToRule(path, _rules) { let matchedRule = null; const rules = _rules.filter(Boolean); const rulesLength = rules.length; let i = 0; while (i < rulesLength) { const rule = rules[i]; if (!matches(rule.pattern, path)) { i++; continue; } if (!matchedRule || rule.pattern.length > matchedRule.pattern.length) { matchedRule = rule; } else if (rule.pattern.length === matchedRule.pattern.length && rule.allow && !matchedRule.allow) { matchedRule = rule; } i++; } return matchedRule; } export function validateRobots(robotsTxt) { robotsTxt.groups = robotsTxt.groups.filter((group) => { if (!group.allow.length && !group.disallow.length) { robotsTxt.errors.push(`Group "${group.userAgent.join(", ")}" has no allow or disallow rules. You must provide one of either.`); return false; } validateGroupRules(group, robotsTxt.errors); return true; }); return robotsTxt; } export function asArray(v) { return typeof v === "undefined" ? [] : Array.isArray(v) ? v : [v]; } export function normalizeGroup(group) { const disallow = asArray(group.disallow); const allow = asArray(group.allow).filter((rule) => Boolean(rule)); return { ...group, userAgent: group.userAgent ? asArray(group.userAgent) : ["*"], disallow, allow, _indexable: !disallow.includes((rule) => rule === "/"), _rules: [ ...disallow.filter(Boolean).map((r) => ({ pattern: r, allow: false })), ...allow.map((r) => ({ pattern: r, allow: true })) ] }; } export function generateRobotsTxt({ groups, sitemaps }) { const lines = []; for (const group of groups) { for (const comment of group.comment || []) lines.push(`# ${comment}`); for (const userAgent of group.userAgent || ["*"]) lines.push(`User-agent: ${userAgent}`); for (const allow of group.allow || []) lines.push(`Allow: ${allow}`); for (const disallow of group.disallow || []) lines.push(`Disallow: ${disallow}`); for (const cleanParam of group.cleanParam || []) lines.push(`Clean-param: ${cleanParam}`); lines.push(""); } for (const sitemap of sitemaps) lines.push(`Sitemap: ${sitemap}`); return lines.join("\n"); } const merger = createDefu((obj, key, value) => { if (Array.isArray(obj[key]) && Array.isArray(value)) obj[key] = Array.from(/* @__PURE__ */ new Set([...obj[key], ...value])); return obj[key]; }); export function mergeOnKey(arr, key) { const res = {}; arr.forEach((item) => { const k = item[key]; res[k] = merger(item, res[k] || {}); }); return Object.values(res); } export function isInternalRoute(_path) { const path = withoutLeadingSlash(_path); if (path.startsWith(".") || path.startsWith("_")) return true; if (path.startsWith("cgi-bin") || path.startsWith("cdn-cgi") || path.startsWith("api")) return true; const lastSegment = path.split("/").pop() || path; return lastSegment.includes(".") || path.startsWith("@"); } export function normaliseRobotsRouteRule(config) { let allow; if (typeof config.robots === "boolean") allow = config.robots; else if (typeof config.robots === "object" && typeof config.robots.indexable !== "undefined") allow = config.robots.indexable; else if (typeof config.index !== "undefined") allow = config.index; let rule; if (typeof config.robots === "object" && typeof config.robots.rule !== "undefined") rule = config.robots.rule; else if (typeof config.robots === "string") rule = config.robots; if (rule && !allow) allow = rule !== "none" && !rule.includes("noindex"); if (typeof allow === "undefined" && typeof rule === "undefined") return; return { allow, rule }; }