NebulaCloud/lib/sitemap-utils.ts
2025-09-15 17:28:58 +08:00

290 lines
9.1 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { locales, defaultLocale } from './i18n';
import { getSitemapStats, generateSitemap } from './sitemap-generator';
/**
* 站点地图验证工具
*/
export class SitemapValidator {
private baseUrl: string;
constructor(baseUrl: string) {
this.baseUrl = baseUrl;
}
/**
* 验证所有 URL 是否可访问
*/
async validateUrls(): Promise<{
valid: string[];
invalid: string[];
errors: Array<{ url: string; error: string }>;
}> {
const sitemap = generateSitemap(this.baseUrl);
const valid: string[] = [];
const invalid: string[] = [];
const errors: Array<{ url: string; error: string }> = [];
for (const entry of sitemap) {
try {
const response = await fetch(entry.url, { method: 'HEAD' });
if (response.ok) {
valid.push(entry.url);
} else {
invalid.push(entry.url);
errors.push({
url: entry.url,
error: `HTTP ${response.status}: ${response.statusText}`,
});
}
} catch (error) {
invalid.push(entry.url);
errors.push({
url: entry.url,
error: error instanceof Error ? error.message : 'Unknown error',
});
}
}
return { valid, invalid, errors };
}
/**
* 检查重复的 URL
*/
checkDuplicates(): string[] {
const sitemap = generateSitemap(this.baseUrl);
const urls = sitemap.map((entry) => entry.url);
const duplicates: string[] = [];
const seen = new Set<string>();
for (const url of urls) {
if (seen.has(url)) {
duplicates.push(url);
} else {
seen.add(url);
}
}
return duplicates;
}
/**
* 生成站点地图报告
*/
generateReport(): {
stats: ReturnType<typeof getSitemapStats>;
duplicates: string[];
recommendations: string[];
} {
const stats = getSitemapStats(this.baseUrl);
const duplicates = this.checkDuplicates();
const recommendations: string[] = [];
// 生成建议
if (duplicates.length > 0) {
recommendations.push(`发现 ${duplicates.length} 个重复的 URL请检查站点地图生成逻辑`);
}
if (stats.urlCount > 50000) {
recommendations.push('站点地图包含超过 50,000 个 URL建议分割为多个站点地图文件');
}
if (stats.blogPages === 0) {
recommendations.push('未发现博客页面,请确认博客内容是否正确配置');
}
return {
stats,
duplicates,
recommendations,
};
}
}
/**
* 站点地图提交工具
*/
export class SitemapSubmitter {
private baseUrl: string;
constructor(baseUrl: string) {
this.baseUrl = baseUrl;
}
/**
* 生成搜索引擎提交 URL
*/
getSubmissionUrls(): Record<string, string> {
const sitemapUrl = `${this.baseUrl}/sitemap.xml`;
return {
google: `https://www.google.com/ping?sitemap=${encodeURIComponent(sitemapUrl)}`,
bing: `https://www.bing.com/ping?sitemap=${encodeURIComponent(sitemapUrl)}`,
yandex: `https://webmaster.yandex.com/ping?sitemap=${encodeURIComponent(sitemapUrl)}`,
baidu: `https://ping.baidu.com/ping/RPC2?sitemap=${encodeURIComponent(sitemapUrl)}`,
};
}
/**
* 自动提交站点地图到搜索引擎
*/
async submitToSearchEngines(): Promise<{
success: string[];
failed: Array<{ engine: string; error: string }>;
}> {
const urls = this.getSubmissionUrls();
const success: string[] = [];
const failed: Array<{ engine: string; error: string }> = [];
for (const [engine, url] of Object.entries(urls)) {
try {
const response = await fetch(url, { method: 'GET' });
if (response.ok) {
success.push(engine);
} else {
failed.push({
engine,
error: `HTTP ${response.status}: ${response.statusText}`,
});
}
} catch (error) {
failed.push({
engine,
error: error instanceof Error ? error.message : 'Unknown error',
});
}
}
return { success, failed };
}
}
/**
* 生成站点地图索引文件
*/
export function generateSitemapIndex(baseUrl: string): string {
const lastmod = new Date().toISOString();
return `<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<sitemap>
<loc>${baseUrl}/sitemap.xml</loc>
<lastmod>${lastmod}</lastmod>
</sitemap>
</sitemapindex>`;
}
/**
* 生成人类可读的站点地图 HTML
*/
export function generateHtmlSitemap(baseUrl: string): string {
const sitemap = generateSitemap(baseUrl);
const stats = getSitemapStats(baseUrl);
// 按页面类型分组
const groupedPages = sitemap.reduce(
(acc, entry) => {
let category = 'other';
if (entry.url.includes('/blog/') && !entry.url.endsWith('/blog')) {
category = 'blog-posts';
} else if (entry.url.endsWith('/blog')) {
category = 'blog';
} else if (entry.url.endsWith('/contact')) {
category = 'contact';
} else if (entry.url.endsWith('/products')) {
category = 'products';
} else if (entry.url === baseUrl || entry.url.match(/\/[a-z-]+$/)) {
category = 'main';
}
if (!acc[category]) acc[category] = [];
acc[category].push(entry);
return acc;
},
{} as Record<string, typeof sitemap>,
);
const categoryNames = {
main: '主要页面',
blog: '博客',
'blog-posts': '博客文章',
contact: '联系我们',
products: '产品',
other: '其他页面',
};
let html = `<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>站点地图 - Eco Life</title>
<style>
body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; margin: 40px; line-height: 1.6; }
.header { border-bottom: 2px solid #10b981; padding-bottom: 20px; margin-bottom: 30px; }
.stats { background: #f0fdf4; padding: 20px; border-radius: 8px; margin-bottom: 30px; }
.category { margin-bottom: 30px; }
.category h2 { color: #10b981; border-bottom: 1px solid #d1d5db; padding-bottom: 10px; }
.url-list { list-style: none; padding: 0; }
.url-item { margin: 8px 0; padding: 12px; background: #f9fafb; border-radius: 6px; }
.url-item a { text-decoration: none; color: #1f2937; font-weight: 500; }
.url-item a:hover { color: #10b981; }
.url-meta { font-size: 0.875rem; color: #6b7280; margin-top: 4px; }
.priority { display: inline-block; padding: 2px 8px; border-radius: 12px; font-size: 0.75rem; }
.priority-high { background: #fef3c7; color: #92400e; }
.priority-medium { background: #dbeafe; color: #1e40af; }
.priority-low { background: #f3f4f6; color: #374151; }
</style>
</head>
<body>
<div class="header">
<h1>站点地图</h1>
<p>生成时间: ${new Date().toLocaleString('zh-CN')}</p>
</div>
<div class="stats">
<h3>统计信息</h3>
<p><strong>总页面数:</strong> ${stats.urlCount}</p>
<p><strong>静态页面:</strong> ${stats.staticPages}</p>
<p><strong>博客文章:</strong> ${stats.blogPages}</p>
<p><strong>支持语言:</strong> ${stats.languages} (${locales.join(', ')})</p>
</div>`;
Object.entries(groupedPages).forEach(([category, pages]) => {
html += `
<div class="category">
<h2>${categoryNames[category as keyof typeof categoryNames] || category}</h2>
<ul class="url-list">`;
pages.forEach((page) => {
const priorityClass =
page.priority >= 0.8
? 'priority-high'
: page.priority >= 0.6
? 'priority-medium'
: 'priority-low';
html += `
<li class="url-item">
<a href="${page.url}" target="_blank">${page.url}</a>
<div class="url-meta">
<span class="priority ${priorityClass}">优先级: ${page.priority}</span>
<span>更新频率: ${page.changeFrequency}</span>
<span>最后修改: ${page.lastModified.toLocaleDateString('zh-CN')}</span>
</div>
</li>`;
});
html += `
</ul>
</div>`;
});
html += `
</body>
</html>`;
return html;
}