fix(docs): handle anchors in docs link audit

This commit is contained in:
Peter Steinberger
2026-03-21 19:58:05 +00:00
parent 8cac327c19
commit c29ba9d21a
3 changed files with 170 additions and 106 deletions

View File

@@ -0,0 +1,22 @@
export type BrokenDocLink = {
file: string;
line: number;
link: string;
reason: string;
};
export type ResolveRouteResult = {
ok: boolean;
terminal: string;
loop?: boolean;
};
export function normalizeRoute(route: string): string;
export function resolveRoute(
route: string,
options?: { redirects?: Map<string, string>; routes?: Set<string> },
): ResolveRouteResult;
export function auditDocsLinks(): {
checked: number;
broken: BrokenDocLink[];
};

View File

@@ -2,6 +2,7 @@
import fs from "node:fs";
import path from "node:path";
import { pathToFileURL } from "node:url";
const ROOT = process.cwd();
const DOCS_DIR = path.join(ROOT, "docs");
@@ -42,8 +43,10 @@ function normalizeSlashes(p) {
}
/** @param {string} p */
function normalizeRoute(p) {
const stripped = p.replace(/^\/+|\/+$/g, "");
export function normalizeRoute(p) {
const [withoutFragment] = p.split("#");
const [withoutQuery] = withoutFragment.split("?");
const stripped = withoutQuery.replace(/^\/+|\/+$/g, "");
return stripped ? `/${stripped}` : "/";
}
@@ -105,22 +108,27 @@ for (const abs of markdownFiles) {
routes.add(normalizeRoute(permalink));
}
/** @param {string} route */
function resolveRoute(route) {
/**
* @param {string} route
* @param {{redirects?: Map<string, string>, routes?: Set<string>}} [options]
*/
export function resolveRoute(route, options = {}) {
const redirectMap = options.redirects ?? redirects;
const publishedRoutes = options.routes ?? routes;
let current = normalizeRoute(route);
if (current === "/") {
return { ok: true, terminal: "/" };
}
const seen = new Set([current]);
while (redirects.has(current)) {
current = redirects.get(current);
while (redirectMap.has(current)) {
current = normalizeRoute(redirectMap.get(current));
if (seen.has(current)) {
return { ok: false, terminal: current, loop: true };
}
seen.add(current);
}
return { ok: routes.has(current), terminal: current };
return { ok: publishedRoutes.has(current), terminal: current };
}
/** @param {unknown} node */
@@ -160,138 +168,141 @@ function collectNavPageEntries(node) {
const markdownLinkRegex = /!?\[[^\]]*\]\(([^)]+)\)/g;
/** @type {{file: string; line: number; link: string; reason: string}[]} */
const broken = [];
let checked = 0;
export function auditDocsLinks() {
/** @type {{file: string; line: number; link: string; reason: string}[]} */
const broken = [];
let checked = 0;
for (const abs of markdownFiles) {
const rel = normalizeSlashes(path.relative(DOCS_DIR, abs));
const baseDir = normalizeSlashes(path.dirname(rel));
const rawText = fs.readFileSync(abs, "utf8");
const lines = rawText.split("\n");
for (const abs of markdownFiles) {
const rel = normalizeSlashes(path.relative(DOCS_DIR, abs));
const baseDir = normalizeSlashes(path.dirname(rel));
const rawText = fs.readFileSync(abs, "utf8");
const lines = rawText.split("\n");
// Track if we're inside a code fence
let inCodeFence = false;
let inCodeFence = false;
for (let lineNum = 0; lineNum < lines.length; lineNum++) {
let line = lines[lineNum];
for (let lineNum = 0; lineNum < lines.length; lineNum++) {
let line = lines[lineNum];
// Toggle code fence state
if (line.trim().startsWith("```")) {
inCodeFence = !inCodeFence;
continue;
}
if (inCodeFence) {
continue;
}
// Strip inline code to avoid false positives
line = stripInlineCode(line);
for (const match of line.matchAll(markdownLinkRegex)) {
const raw = match[1]?.trim();
if (!raw) {
if (line.trim().startsWith("```")) {
inCodeFence = !inCodeFence;
continue;
}
// Skip external links, mailto, tel, data, and same-page anchors
if (/^(https?:|mailto:|tel:|data:|#)/i.test(raw)) {
if (inCodeFence) {
continue;
}
const [pathPart] = raw.split("#");
const clean = pathPart.split("?")[0];
if (!clean) {
// Same-page anchor only (already skipped above)
continue;
}
checked++;
line = stripInlineCode(line);
if (clean.startsWith("/")) {
const route = normalizeRoute(clean);
const resolvedRoute = resolveRoute(route);
if (!resolvedRoute.ok) {
const staticRel = route.replace(/^\//, "");
if (!relAllFiles.has(staticRel)) {
for (const match of line.matchAll(markdownLinkRegex)) {
const raw = match[1]?.trim();
if (!raw) {
continue;
}
if (/^(https?:|mailto:|tel:|data:|#)/i.test(raw)) {
continue;
}
const [pathPart] = raw.split("#");
const clean = pathPart.split("?")[0];
if (!clean) {
continue;
}
checked++;
if (clean.startsWith("/")) {
const route = normalizeRoute(clean);
const resolvedRoute = resolveRoute(route);
if (!resolvedRoute.ok) {
const staticRel = route.replace(/^\//, "");
if (!relAllFiles.has(staticRel)) {
broken.push({
file: rel,
line: lineNum + 1,
link: raw,
reason: `route/file not found (terminal: ${resolvedRoute.terminal})`,
});
continue;
}
}
continue;
}
if (!clean.startsWith(".") && !clean.includes("/")) {
continue;
}
const normalizedRel = normalizeSlashes(path.normalize(path.join(baseDir, clean)));
if (/\.[a-zA-Z0-9]+$/.test(normalizedRel)) {
if (!relAllFiles.has(normalizedRel)) {
broken.push({
file: rel,
line: lineNum + 1,
link: raw,
reason: `route/file not found (terminal: ${resolvedRoute.terminal})`,
reason: "relative file not found",
});
continue;
}
continue;
}
// Skip anchor validation - Mintlify generates anchors from MDX components,
// accordions, and config schemas that we can't reliably extract from markdown.
continue;
}
// Relative placeholder strings used in code examples (for example "url")
// are intentionally skipped.
if (!clean.startsWith(".") && !clean.includes("/")) {
continue;
}
const candidates = [
normalizedRel,
`${normalizedRel}.md`,
`${normalizedRel}.mdx`,
`${normalizedRel}/index.md`,
`${normalizedRel}/index.mdx`,
];
const normalizedRel = normalizeSlashes(path.normalize(path.join(baseDir, clean)));
if (/\.[a-zA-Z0-9]+$/.test(normalizedRel)) {
if (!relAllFiles.has(normalizedRel)) {
if (!candidates.some((candidate) => relAllFiles.has(candidate))) {
broken.push({
file: rel,
line: lineNum + 1,
link: raw,
reason: "relative file not found",
reason: "relative doc target not found",
});
}
continue;
}
const candidates = [
normalizedRel,
`${normalizedRel}.md`,
`${normalizedRel}.mdx`,
`${normalizedRel}/index.md`,
`${normalizedRel}/index.mdx`,
];
if (!candidates.some((candidate) => relAllFiles.has(candidate))) {
broken.push({
file: rel,
line: lineNum + 1,
link: raw,
reason: "relative doc target not found",
});
}
}
}
}
for (const page of collectNavPageEntries(docsConfig.navigation || [])) {
if (isGeneratedTranslatedDoc(String(page))) {
continue;
}
checked++;
const route = normalizeRoute(page);
const resolvedRoute = resolveRoute(route);
if (resolvedRoute.ok) {
continue;
for (const page of collectNavPageEntries(docsConfig.navigation || [])) {
if (isGeneratedTranslatedDoc(String(page))) {
continue;
}
checked++;
const route = normalizeRoute(page);
const resolvedRoute = resolveRoute(route);
if (resolvedRoute.ok) {
continue;
}
broken.push({
file: "docs.json",
line: 0,
link: page,
reason: `navigation page not published (terminal: ${resolvedRoute.terminal})`,
});
}
broken.push({
file: "docs.json",
line: 0,
link: page,
reason: `navigation page not published (terminal: ${resolvedRoute.terminal})`,
});
return { checked, broken };
}
console.log(`checked_internal_links=${checked}`);
console.log(`broken_links=${broken.length}`);
for (const item of broken) {
console.log(`${item.file}:${item.line} :: ${item.link} :: ${item.reason}`);
function isCliEntry() {
const cliArg = process.argv[1];
return cliArg ? import.meta.url === pathToFileURL(cliArg).href : false;
}
if (broken.length > 0) {
process.exit(1);
if (isCliEntry()) {
const { checked, broken } = auditDocsLinks();
console.log(`checked_internal_links=${checked}`);
console.log(`broken_links=${broken.length}`);
for (const item of broken) {
console.log(`${item.file}:${item.line} :: ${item.link} :: ${item.reason}`);
}
if (broken.length > 0) {
process.exit(1);
}
}

View File

@@ -0,0 +1,31 @@
import { describe, expect, it } from "vitest";
const { normalizeRoute, resolveRoute } =
(await import("../../scripts/docs-link-audit.mjs")) as unknown as {
normalizeRoute: (route: string) => string;
resolveRoute: (
route: string,
options?: { redirects?: Map<string, string>; routes?: Set<string> },
) => { ok: boolean; terminal: string; loop?: boolean };
};
describe("docs-link-audit", () => {
it("normalizes route fragments away", () => {
expect(normalizeRoute("/plugins/building-plugins#registering-agent-tools")).toBe(
"/plugins/building-plugins",
);
expect(normalizeRoute("/plugins/building-plugins?tab=all")).toBe("/plugins/building-plugins");
});
it("resolves redirects that land on anchored sections", () => {
const redirects = new Map([
["/plugins/agent-tools", "/plugins/building-plugins#registering-agent-tools"],
]);
const routes = new Set(["/plugins/building-plugins"]);
expect(resolveRoute("/plugins/agent-tools", { redirects, routes })).toEqual({
ok: true,
terminal: "/plugins/building-plugins",
});
});
});