32 lines
1.6 KiB
TypeScript
32 lines
1.6 KiB
TypeScript
import type { NitroRouteConfig } from 'nitropack';
|
|
import type { ParsedRobotsTxt, RobotsGroupInput, RobotsGroupResolved } from './types.js';
|
|
/**
|
|
* We're going to read the robots.txt and extract any disallow or sitemaps rules from it.
|
|
*
|
|
* We're going to use the Google specification, the keys should be checked:
|
|
*
|
|
* - user-agent: identifies which crawler the rules apply to.
|
|
* - allow: a URL path that may be crawled.
|
|
* - disallow: a URL path that may not be crawled.
|
|
* - sitemap: the complete URL of a sitemap.
|
|
* - host: the host name of the site, this is optional non-standard directive.
|
|
*
|
|
* @see https://developers.google.com/search/docs/crawling-indexing/robots/robots_txt
|
|
* @see https://github.com/google/robotstxt/blob/86d5836ba2d5a0b6b938ab49501be0e09d9c276c/robots.cc#L714C1-L720C2
|
|
*/
|
|
export declare function parseRobotsTxt(s: string): ParsedRobotsTxt;
|
|
export declare function matchPathToRule(path: string, _rules: RobotsGroupResolved['_rules']): RobotsGroupResolved['_rules'][number] | null;
|
|
export declare function validateRobots(robotsTxt: ParsedRobotsTxt): ParsedRobotsTxt;
|
|
export declare function asArray(v: any): any[];
|
|
export declare function normalizeGroup(group: RobotsGroupInput): RobotsGroupResolved;
|
|
export declare function generateRobotsTxt({ groups, sitemaps }: {
|
|
groups: RobotsGroupResolved[];
|
|
sitemaps: string[];
|
|
}): string;
|
|
export declare function mergeOnKey<T, K extends keyof T>(arr: T[], key: K): T[];
|
|
export declare function isInternalRoute(_path: string): boolean;
|
|
export declare function normaliseRobotsRouteRule(config: NitroRouteConfig): {
|
|
allow: boolean | undefined;
|
|
rule: string | undefined;
|
|
} | undefined;
|