2025-09-05 14:59:21 +08:00

32 lines
1.6 KiB
TypeScript

import type { NitroRouteConfig } from 'nitropack';
import type { ParsedRobotsTxt, RobotsGroupInput, RobotsGroupResolved } from './types.js';
/**
* We're going to read the robots.txt and extract any disallow or sitemaps rules from it.
*
* We're going to use the Google specification, the keys should be checked:
*
* - user-agent: identifies which crawler the rules apply to.
* - allow: a URL path that may be crawled.
* - disallow: a URL path that may not be crawled.
* - sitemap: the complete URL of a sitemap.
* - host: the host name of the site, this is optional non-standard directive.
*
* @see https://developers.google.com/search/docs/crawling-indexing/robots/robots_txt
* @see https://github.com/google/robotstxt/blob/86d5836ba2d5a0b6b938ab49501be0e09d9c276c/robots.cc#L714C1-L720C2
*/
export declare function parseRobotsTxt(s: string): ParsedRobotsTxt;
export declare function matchPathToRule(path: string, _rules: RobotsGroupResolved['_rules']): RobotsGroupResolved['_rules'][number] | null;
export declare function validateRobots(robotsTxt: ParsedRobotsTxt): ParsedRobotsTxt;
export declare function asArray(v: any): any[];
export declare function normalizeGroup(group: RobotsGroupInput): RobotsGroupResolved;
export declare function generateRobotsTxt({ groups, sitemaps }: {
groups: RobotsGroupResolved[];
sitemaps: string[];
}): string;
export declare function mergeOnKey<T, K extends keyof T>(arr: T[], key: K): T[];
export declare function isInternalRoute(_path: string): boolean;
export declare function normaliseRobotsRouteRule(config: NitroRouteConfig): {
allow: boolean | undefined;
rule: string | undefined;
} | undefined;