Skip to content

Commit 9b91e68

Browse files
committed
add logic
1 parent e4a39d2 commit 9b91e68

File tree

3 files changed

+150
-9
lines changed

3 files changed

+150
-9
lines changed

app/api/source-map/route.ts

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import {NextResponse} from 'next/server';
2+
3+
import {isDeveloperDocs} from 'sentry-docs/isDeveloperDocs';
4+
import {getDevDocsFrontMatter, getDocsFrontMatter} from 'sentry-docs/mdx';
5+
6+
/**
7+
* API endpoint that returns a mapping of slugs to their source file paths.
8+
* This is used by the 404 link checker to deduplicate pages that share the same source.
9+
*/
10+
export async function GET() {
11+
const docs = await (isDeveloperDocs ? getDevDocsFrontMatter() : getDocsFrontMatter());
12+
13+
const sourceMap: Record<string, string | null> = {};
14+
15+
for (const doc of docs) {
16+
// Normalize slug (remove trailing slash if present)
17+
const slug = doc.slug.replace(/\/$/, '');
18+
// sourcePath will be null for API-generated pages, which we want to keep
19+
sourceMap[slug] = doc.sourcePath ?? null;
20+
}
21+
22+
return NextResponse.json(sourceMap);
23+
}
24+

scripts/lint-404s/README.md

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# 404 Link Checker
2+
3+
This script checks all documentation pages for broken internal links (404s).
4+
5+
## Usage
6+
7+
```bash
8+
# Basic usage (with deduplication - recommended)
9+
bun ./scripts/lint-404s/main.ts
10+
11+
# Show progress for each page
12+
bun ./scripts/lint-404s/main.ts --progress
13+
14+
# Skip deduplication and check all pages (for debugging)
15+
bun ./scripts/lint-404s/main.ts --skip-deduplication
16+
17+
# Filter to a specific path
18+
bun ./scripts/lint-404s/main.ts --path platforms/javascript
19+
```
20+
21+
## Deduplication
22+
23+
By default, the checker **deduplicates common files** to improve performance.
24+
25+
### Why?
26+
27+
The Sentry docs use a "common" file system where documentation is shared across multiple platforms. For example:
28+
29+
- `/platforms/apple/common/configuration/index.mdx` is rendered as:
30+
- `/platforms/apple/guides/ios/configuration/`
31+
- `/platforms/apple/guides/macos/configuration/`
32+
- `/platforms/apple/guides/watchos/configuration/`
33+
- ... and many more
34+
35+
Without deduplication, the checker would fetch and test the same content dozens of times, which:
36+
37+
- Takes much longer to run
38+
- Wastes CI resources
39+
- Provides no additional value (the content is identical)
40+
41+
### How it works
42+
43+
1. The checker fetches a source map from `/api/source-map` that maps each slug to its source file
44+
2. It tracks which source files have been checked
45+
3. For common files, it only checks the first instance
46+
4. **API-generated pages** are always checked (they have no source file)
47+
48+
This typically reduces the number of pages checked from **~9,000 to ~2,500**, a **72% reduction**.
49+
50+
### When to use `--skip-deduplication`
51+
52+
Use this flag to skip deduplication and verify that all rendered pages work correctly, even if they share the same source. This is rarely necessary but can help debug issues with:
53+
54+
- Path routing
55+
- Platform-specific rendering bugs
56+
- Edge cases in the build system
57+
58+
## Ignore List
59+
60+
The `ignore-list.txt` file contains paths that should be skipped during checking. Add paths here (one per line) if they're known to be inaccessible or are special cases.
61+
62+
## Exit Codes
63+
64+
- `0` - No 404s found
65+
- `1` - 404s were detected

scripts/lint-404s/main.ts

Lines changed: 61 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ const trimSlashes = (s: string) => s.replace(/(^\/|\/$)/g, '');
1313
const ignoreListFile = path.join(dirname(import.meta.url), './ignore-list.txt');
1414

1515
const showProgress = process.argv.includes('--progress');
16+
const deduplicatePages = !process.argv.includes('--skip-deduplication');
1617

1718
// Get the path filter if specified
1819
const pathFilterIndex = process.argv.indexOf('--path');
@@ -35,22 +36,73 @@ async function fetchWithFollow(url: URL | string): Promise<Response> {
3536
return r;
3637
}
3738

39+
async function deduplicateSlugs(
40+
allSlugs: string[]
41+
): Promise<{skippedCount: number; slugsToCheck: string[]}> {
42+
try {
43+
const sourceMap: Record<string, string | null> = await fetch(
44+
`${baseURL}api/source-map`
45+
).then(r => r.json());
46+
47+
const checkedSources = new Set<string>();
48+
const slugsToCheck: string[] = [];
49+
let skippedCount = 0;
50+
51+
for (const slug of allSlugs) {
52+
const normalizedSlug = slug.replace(/\/$/, '');
53+
const sourcePath = sourceMap[normalizedSlug];
54+
55+
// Always check API-generated pages (no source file)
56+
if (!sourcePath) {
57+
slugsToCheck.push(slug);
58+
continue;
59+
}
60+
61+
// Skip if we've already checked this source file
62+
if (checkedSources.has(sourcePath)) {
63+
skippedCount++;
64+
continue;
65+
}
66+
67+
// First time seeing this source file
68+
checkedSources.add(sourcePath);
69+
slugsToCheck.push(slug);
70+
}
71+
72+
return {skippedCount, slugsToCheck};
73+
} catch (error) {
74+
console.warn('⚠️ Failed to fetch source map:', error.message);
75+
console.warn('Falling back to checking all pages...\n');
76+
return {skippedCount: 0, slugsToCheck: allSlugs};
77+
}
78+
}
79+
3880
async function main() {
3981
const sitemap = await fetch(`${baseURL}sitemap.xml`).then(r => r.text());
4082

41-
const slugs = [...sitemap.matchAll(/<loc>([^<]*)<\/loc>/g)]
83+
const allSlugs = [...sitemap.matchAll(/<loc>([^<]*)<\/loc>/g)]
4284
.map(l => l[1])
4385
.map(url => trimSlashes(new URL(url).pathname))
4486
.filter(Boolean)
4587
.filter(slug => (pathFilter ? slug.startsWith(pathFilter) : true));
46-
const allSlugsSet = new Set(slugs);
47-
48-
if (pathFilter) {
49-
console.log('Checking 404s on %d pages in /%s', slugs.length, pathFilter);
50-
} else {
51-
console.log('Checking 404s on %d pages', slugs.length);
88+
const allSlugsSet = new Set(allSlugs);
89+
90+
// Deduplicate pages with same source file (default behavior)
91+
const {skippedCount, slugsToCheck} = deduplicatePages
92+
? await deduplicateSlugs(allSlugs)
93+
: {skippedCount: 0, slugsToCheck: allSlugs};
94+
95+
if (skippedCount > 0) {
96+
console.log(
97+
'Deduplication: checking %d unique pages (skipped %d duplicates)\n',
98+
slugsToCheck.length,
99+
skippedCount
100+
);
52101
}
53102

103+
const pathInfo = pathFilter ? ` in /${pathFilter}` : '';
104+
console.log('Checking 404s on %d pages%s', slugsToCheck.length, pathInfo);
105+
54106
const all404s: {page404s: Link[]; slug: string}[] = [];
55107

56108
// check if the slug equivalent of the href is in the sitemap
@@ -100,7 +152,7 @@ async function main() {
100152
return false;
101153
}
102154

103-
for (const slug of slugs) {
155+
for (const slug of slugsToCheck) {
104156
const pageUrl = new URL(slug, baseURL);
105157
const now = performance.now();
106158
const html = await fetchWithFollow(pageUrl.href).then(r => r.text());
@@ -134,7 +186,7 @@ async function main() {
134186
}
135187

136188
if (all404s.length === 0) {
137-
console.log('\n\n🎉 No 404s found');
189+
console.log('\n🎉 No 404s found');
138190
return false;
139191
}
140192
const numberOf404s = all404s.map(x => x.page404s.length).reduce((a, b) => a + b, 0);

0 commit comments

Comments
 (0)