242 lines
7.2 KiB
JavaScript
242 lines
7.2 KiB
JavaScript
import { createDefu } from "defu";
|
|
import { withoutLeadingSlash } from "ufo";
|
|
export function parseRobotsTxt(s) {
|
|
const groups = [];
|
|
const sitemaps = [];
|
|
const errors = [];
|
|
let createNewGroup = false;
|
|
let currentGroup = {
|
|
comment: [],
|
|
// comments are too hard to parse in a logical order, we'll just omit them
|
|
disallow: [],
|
|
allow: [],
|
|
userAgent: []
|
|
};
|
|
let ln = -1;
|
|
for (const _line of s.split("\n")) {
|
|
ln++;
|
|
const [line] = _line.split("#").map((s2) => s2.trim());
|
|
const sepIndex = line.indexOf(":");
|
|
if (sepIndex === -1)
|
|
continue;
|
|
const rule = line.substring(0, sepIndex).trim().toLowerCase();
|
|
const val = line.substring(sepIndex + 1).trim();
|
|
switch (rule) {
|
|
case "user-agent":
|
|
case "useragent":
|
|
case "user agent":
|
|
if (createNewGroup) {
|
|
groups.push({
|
|
...currentGroup
|
|
});
|
|
currentGroup = {
|
|
comment: [],
|
|
disallow: [],
|
|
allow: [],
|
|
userAgent: []
|
|
};
|
|
createNewGroup = false;
|
|
}
|
|
currentGroup.userAgent.push(val);
|
|
break;
|
|
case "allow":
|
|
currentGroup.allow.push(val);
|
|
createNewGroup = true;
|
|
break;
|
|
case "disallow":
|
|
case "dissallow":
|
|
case "dissalow":
|
|
case "disalow":
|
|
case "diasllow":
|
|
case "disallaw":
|
|
currentGroup.disallow.push(val);
|
|
createNewGroup = true;
|
|
break;
|
|
case "sitemap":
|
|
case "site-map":
|
|
sitemaps.push(val);
|
|
break;
|
|
case "host":
|
|
currentGroup.host = val;
|
|
break;
|
|
case "clean-param":
|
|
if (currentGroup.userAgent.some((u) => u.toLowerCase().includes("yandex"))) {
|
|
currentGroup.cleanParam = currentGroup.cleanParam || [];
|
|
currentGroup.cleanParam.push(val);
|
|
} else {
|
|
errors.push(`L${ln}: Clean-param directive is only when targeting Yandex user agent.`);
|
|
}
|
|
break;
|
|
default:
|
|
errors.push(`L${ln}: Unknown directive ${rule} `);
|
|
break;
|
|
}
|
|
}
|
|
groups.push({
|
|
...currentGroup
|
|
});
|
|
return {
|
|
groups,
|
|
sitemaps,
|
|
errors
|
|
};
|
|
}
|
|
function validateGroupRules(group, errors) {
|
|
const toCheck = ["allow", "disallow"];
|
|
toCheck.forEach((key) => {
|
|
(group[key] || []).filter((rule) => {
|
|
if (rule === "")
|
|
return true;
|
|
if (!rule.startsWith("/") && !rule.startsWith("*")) {
|
|
errors.push(`Disallow rule "${rule}" must start with a \`/\` or be a \`*\`.`);
|
|
return false;
|
|
}
|
|
return true;
|
|
});
|
|
});
|
|
}
|
|
function matches(pattern, path) {
|
|
const pathLength = path.length;
|
|
const patternLength = pattern.length;
|
|
const matchingLengths = Array.from({ length: pathLength + 1 }).fill(0);
|
|
let numMatchingLengths = 1;
|
|
let p = 0;
|
|
while (p < patternLength) {
|
|
if (pattern[p] === "$" && p + 1 === patternLength) {
|
|
return matchingLengths[numMatchingLengths - 1] === pathLength;
|
|
}
|
|
if (pattern[p] === "*") {
|
|
numMatchingLengths = pathLength - matchingLengths[0] + 1;
|
|
for (let i = 1; i < numMatchingLengths; i++) {
|
|
matchingLengths[i] = matchingLengths[i - 1] + 1;
|
|
}
|
|
} else {
|
|
let numMatches = 0;
|
|
for (let i = 0; i < numMatchingLengths; i++) {
|
|
const matchLength = matchingLengths[i];
|
|
if (matchLength < pathLength && path[matchLength] === pattern[p]) {
|
|
matchingLengths[numMatches++] = matchLength + 1;
|
|
}
|
|
}
|
|
if (numMatches === 0) {
|
|
return false;
|
|
}
|
|
numMatchingLengths = numMatches;
|
|
}
|
|
p++;
|
|
}
|
|
return true;
|
|
}
|
|
export function matchPathToRule(path, _rules) {
|
|
let matchedRule = null;
|
|
const rules = _rules.filter(Boolean);
|
|
const rulesLength = rules.length;
|
|
let i = 0;
|
|
while (i < rulesLength) {
|
|
const rule = rules[i];
|
|
if (!matches(rule.pattern, path)) {
|
|
i++;
|
|
continue;
|
|
}
|
|
if (!matchedRule || rule.pattern.length > matchedRule.pattern.length) {
|
|
matchedRule = rule;
|
|
} else if (rule.pattern.length === matchedRule.pattern.length && rule.allow && !matchedRule.allow) {
|
|
matchedRule = rule;
|
|
}
|
|
i++;
|
|
}
|
|
return matchedRule;
|
|
}
|
|
export function validateRobots(robotsTxt) {
|
|
robotsTxt.groups = robotsTxt.groups.filter((group) => {
|
|
if (!group.allow.length && !group.disallow.length) {
|
|
robotsTxt.errors.push(`Group "${group.userAgent.join(", ")}" has no allow or disallow rules. You must provide one of either.`);
|
|
return false;
|
|
}
|
|
validateGroupRules(group, robotsTxt.errors);
|
|
return true;
|
|
});
|
|
return robotsTxt;
|
|
}
|
|
export function asArray(v) {
|
|
return typeof v === "undefined" ? [] : Array.isArray(v) ? v : [v];
|
|
}
|
|
export function normalizeGroup(group) {
|
|
const disallow = asArray(group.disallow);
|
|
const allow = asArray(group.allow).filter((rule) => Boolean(rule));
|
|
return {
|
|
...group,
|
|
userAgent: group.userAgent ? asArray(group.userAgent) : ["*"],
|
|
disallow,
|
|
allow,
|
|
_indexable: !disallow.includes((rule) => rule === "/"),
|
|
_rules: [
|
|
...disallow.filter(Boolean).map((r) => ({ pattern: r, allow: false })),
|
|
...allow.map((r) => ({ pattern: r, allow: true }))
|
|
]
|
|
};
|
|
}
|
|
export function generateRobotsTxt({ groups, sitemaps }) {
|
|
const lines = [];
|
|
for (const group of groups) {
|
|
for (const comment of group.comment || [])
|
|
lines.push(`# ${comment}`);
|
|
for (const userAgent of group.userAgent || ["*"])
|
|
lines.push(`User-agent: ${userAgent}`);
|
|
for (const allow of group.allow || [])
|
|
lines.push(`Allow: ${allow}`);
|
|
for (const disallow of group.disallow || [])
|
|
lines.push(`Disallow: ${disallow}`);
|
|
for (const cleanParam of group.cleanParam || [])
|
|
lines.push(`Clean-param: ${cleanParam}`);
|
|
lines.push("");
|
|
}
|
|
for (const sitemap of sitemaps)
|
|
lines.push(`Sitemap: ${sitemap}`);
|
|
return lines.join("\n");
|
|
}
|
|
const merger = createDefu((obj, key, value) => {
|
|
if (Array.isArray(obj[key]) && Array.isArray(value))
|
|
obj[key] = Array.from(/* @__PURE__ */ new Set([...obj[key], ...value]));
|
|
return obj[key];
|
|
});
|
|
export function mergeOnKey(arr, key) {
|
|
const res = {};
|
|
arr.forEach((item) => {
|
|
const k = item[key];
|
|
res[k] = merger(item, res[k] || {});
|
|
});
|
|
return Object.values(res);
|
|
}
|
|
export function isInternalRoute(_path) {
|
|
const path = withoutLeadingSlash(_path);
|
|
if (path.startsWith(".") || path.startsWith("_"))
|
|
return true;
|
|
if (path.startsWith("cgi-bin") || path.startsWith("cdn-cgi") || path.startsWith("api"))
|
|
return true;
|
|
const lastSegment = path.split("/").pop() || path;
|
|
return lastSegment.includes(".") || path.startsWith("@");
|
|
}
|
|
export function normaliseRobotsRouteRule(config) {
|
|
let allow;
|
|
if (typeof config.robots === "boolean")
|
|
allow = config.robots;
|
|
else if (typeof config.robots === "object" && typeof config.robots.indexable !== "undefined")
|
|
allow = config.robots.indexable;
|
|
else if (typeof config.index !== "undefined")
|
|
allow = config.index;
|
|
let rule;
|
|
if (typeof config.robots === "object" && typeof config.robots.rule !== "undefined")
|
|
rule = config.robots.rule;
|
|
else if (typeof config.robots === "string")
|
|
rule = config.robots;
|
|
if (rule && !allow)
|
|
allow = rule !== "none" && !rule.includes("noindex");
|
|
if (typeof allow === "undefined" && typeof rule === "undefined")
|
|
return;
|
|
return {
|
|
allow,
|
|
rule
|
|
};
|
|
}
|