-
-
Notifications
You must be signed in to change notification settings - Fork 1.6k
feat: AI client user-agent detection for automatic markdown serving #15059
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
583f637
c0acd2b
e46a232
a378e1b
9072234
86d7ead
6af0688
a725164
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
import {createMcpHandler} from 'mcp-handler'; | ||
import {z} from 'zod'; | ||
|
||
import {readDocContent} from '../../shared/docs-utils'; | ||
import {formatMatchAsBlock, searchIndex} from '../search/searchIndex'; | ||
|
||
const handler = createMcpHandler( | ||
server => { | ||
server.tool( | ||
'search_docs', | ||
'Search the precomputed markdown index and return matching documentation entry points.', | ||
{ | ||
query: z.string().min(1), | ||
limit: z.number().int().min(1).max(25).default(5), | ||
}, | ||
async ({query, limit}) => { | ||
const matches = await searchIndex(query, limit); | ||
const contentText = matches.length | ||
? matches.map(formatMatchAsBlock).join('\n\n') | ||
: 'No matches found.'; | ||
|
||
return { | ||
content: [{type: 'text', text: contentText}], | ||
}; | ||
} | ||
); | ||
|
||
server.tool( | ||
'get_doc', | ||
'Fetch raw markdown from the documentation exports. Reads local files when available, otherwise fetches from DOCS_PUBLIC_BASE.', | ||
{ | ||
path: z.string().min(1), | ||
}, | ||
async ({path}) => { | ||
const content = await readDocContent(path); | ||
return { | ||
content: [{type: 'text', text: content}], | ||
}; | ||
} | ||
); | ||
}, | ||
{ | ||
// Optional server options | ||
}, | ||
{ | ||
basePath: '/api', | ||
maxDuration: 60, | ||
verboseLogs: false, | ||
} | ||
); | ||
|
||
function normalizeRequest(request: Request): Request { | ||
const url = new URL(request.url); | ||
if (url.pathname.endsWith('/') && url.pathname.length > 1) { | ||
url.pathname = url.pathname.slice(0, -1); | ||
} | ||
|
||
return new Request(url.toString(), { | ||
method: request.method, | ||
headers: request.headers, | ||
body: request.body, | ||
// @ts-ignore - duplex is needed for streaming | ||
duplex: 'half', | ||
}); | ||
} | ||
|
||
function wrappedHandler(request: Request) { | ||
const normalizedRequest = normalizeRequest(request); | ||
return handler(normalizedRequest); | ||
} | ||
|
||
export {wrappedHandler as GET, wrappedHandler as POST, wrappedHandler as DELETE}; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import {NextRequest, NextResponse} from 'next/server'; | ||
|
||
import {mapMatchToResponse, searchIndex} from './searchIndex'; | ||
|
||
export const runtime = 'nodejs'; | ||
|
||
export async function GET(request: NextRequest) { | ||
const {searchParams} = new URL(request.url); | ||
const query = searchParams.get('q') ?? ''; | ||
const limitParam = searchParams.get('limit'); | ||
const limit = limitParam ? Math.min(25, Math.max(1, Number(limitParam))) : 10; | ||
|
||
try { | ||
const matches = await searchIndex(query, limit); | ||
const results = matches.map(mapMatchToResponse); | ||
|
||
return NextResponse.json({ | ||
query, | ||
limit, | ||
count: results.length, | ||
results, | ||
}); | ||
Comment on lines
+17
to
+22
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Critical severity vulnerability may affect your project—review required: ℹ️ Why this mattersAffected versions of next are vulnerable to Improper Authorization / Incorrect Authorization. Next.js middleware-based authorization checks can be bypassed by an attacker who forges external requests that include a specific header, tricking the application into treating the request as a trusted internal call and thus bypassing access controls. To resolve this comment:
💬 Ignore this findingTo ignore this, reply with:
You can view more details on this finding in the Semgrep AppSec Platform here. |
||
} catch (error) { | ||
return NextResponse.json( | ||
{ | ||
query, | ||
limit, | ||
error: error instanceof Error ? error.message : 'Unknown error', | ||
}, | ||
{status: 500} | ||
); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,222 @@ | ||
import {promises as fs} from 'node:fs'; | ||
import path from 'node:path'; | ||
|
||
import {buildDocUrl} from '../../shared/docs-utils'; | ||
|
||
const SEARCH_INDEX_PATH = path.join(process.cwd(), 'public', 'search-index.json'); | ||
|
||
type RawSearchIndexEntry = { | ||
content: string; | ||
hierarchy: string[]; | ||
path: string; | ||
summary: string; | ||
title: string; | ||
}; | ||
|
||
type SearchIndexFile = { | ||
entries: RawSearchIndexEntry[]; | ||
generatedAt: string; | ||
total: number; | ||
}; | ||
|
||
export type SearchMatch = { | ||
hierarchy: string[]; | ||
matchedTokens: number; | ||
path: string; | ||
score: number; | ||
snippet: string | null; | ||
summary: string; | ||
title: string; | ||
}; | ||
|
||
type CachedEntry = RawSearchIndexEntry & { | ||
contentLower: string; | ||
hierarchyLower: string[]; | ||
pathLower: string; | ||
titleLower: string; | ||
}; | ||
|
||
let searchIndexPromise: Promise<CachedEntry[]> | null = null; | ||
|
||
async function loadSearchIndexInternal(): Promise<CachedEntry[]> { | ||
const raw = await fs.readFile(SEARCH_INDEX_PATH, 'utf8'); | ||
const parsed = JSON.parse(raw) as SearchIndexFile; | ||
return parsed.entries.map(entry => ({ | ||
...entry, | ||
pathLower: entry.path.toLowerCase(), | ||
titleLower: entry.title.toLowerCase(), | ||
hierarchyLower: entry.hierarchy.map(segment => segment.toLowerCase()), | ||
contentLower: entry.content.toLowerCase(), | ||
})); | ||
} | ||
|
||
export function ensureSearchIndex(): Promise<CachedEntry[]> { | ||
if (!searchIndexPromise) { | ||
searchIndexPromise = loadSearchIndexInternal().catch(error => { | ||
searchIndexPromise = null; | ||
throw error; | ||
}); | ||
} | ||
|
||
return searchIndexPromise; | ||
} | ||
|
||
function scoreEntry(entry: CachedEntry, tokens: string[]) { | ||
let score = 0; | ||
let matchedTokens = 0; | ||
|
||
for (const token of tokens) { | ||
let tokenMatched = false; | ||
|
||
if (entry.titleLower.includes(token)) { | ||
score += 6; | ||
tokenMatched = true; | ||
} | ||
|
||
if (entry.pathLower.includes(token)) { | ||
score += 4; | ||
tokenMatched = true; | ||
} | ||
|
||
if (entry.hierarchyLower.some(segment => segment.includes(token))) { | ||
score += 3; | ||
tokenMatched = true; | ||
} | ||
|
||
if (entry.contentLower.includes(token)) { | ||
score += 1; | ||
tokenMatched = true; | ||
} | ||
|
||
if (tokenMatched) { | ||
matchedTokens += 1; | ||
} | ||
} | ||
|
||
if (matchedTokens === 0) { | ||
return null; | ||
} | ||
|
||
score += getInstallBias(entry); | ||
|
||
return {score, matchedTokens}; | ||
} | ||
|
||
function buildSnippet(entry: CachedEntry, tokens: string[]): string | null { | ||
const lines = entry.content.split(/\r?\n/); | ||
for (const line of lines) { | ||
const lineLower = line.toLowerCase(); | ||
if (tokens.some(token => lineLower.includes(token))) { | ||
const trimmed = line.trim(); | ||
if (trimmed.length === 0) { | ||
continue; | ||
} | ||
return trimmed.length > 200 ? `${trimmed.slice(0, 199)}…` : trimmed; | ||
} | ||
} | ||
return null; | ||
} | ||
|
||
export async function searchIndex(query: string, limit: number): Promise<SearchMatch[]> { | ||
const tokens = query | ||
.toLowerCase() | ||
.split(/\s+/) | ||
.map(token => token.trim()) | ||
.filter(Boolean); | ||
|
||
if (tokens.length === 0) { | ||
return []; | ||
} | ||
|
||
const entries = await ensureSearchIndex(); | ||
const matches: SearchMatch[] = []; | ||
|
||
for (const entry of entries) { | ||
const scoreResult = scoreEntry(entry, tokens); | ||
if (!scoreResult) { | ||
continue; | ||
} | ||
|
||
matches.push({ | ||
path: entry.path, | ||
title: entry.title, | ||
hierarchy: entry.hierarchy, | ||
summary: entry.summary, | ||
snippet: buildSnippet(entry, tokens), | ||
score: scoreResult.score, | ||
matchedTokens: scoreResult.matchedTokens, | ||
}); | ||
} | ||
|
||
matches.sort((a, b) => { | ||
if (b.score !== a.score) { | ||
return b.score - a.score; | ||
} | ||
if (b.matchedTokens !== a.matchedTokens) { | ||
return b.matchedTokens - a.matchedTokens; | ||
} | ||
return a.path.localeCompare(b.path); | ||
}); | ||
|
||
return matches.slice(0, limit); | ||
} | ||
|
||
function getInstallBias(entry: CachedEntry): number { | ||
const segments = entry.pathLower.split('/'); | ||
const fileName = segments[segments.length - 1] ?? ''; | ||
const baseName = fileName.replace(/\.md$/, ''); | ||
|
||
let bias = 0; | ||
|
||
// Top-level platform doc like "platforms/react.md" | ||
if (segments[0] === 'platforms' && segments.length === 2) { | ||
bias += 40; | ||
} | ||
|
||
// JavaScript guide root doc like "platforms/javascript/guides/react.md" | ||
if ( | ||
segments[0] === 'platforms' && | ||
segments[1] === 'javascript' && | ||
segments[2] === 'guides' && | ||
segments.length === 4 | ||
) { | ||
bias += 50; | ||
} | ||
|
||
// Files under an install directory get a boost | ||
if (segments.includes('install')) { | ||
bias += 20; | ||
} | ||
|
||
// Common install filenames get additional weight | ||
if (['install', 'installation', 'setup', 'getting-started'].includes(baseName)) { | ||
bias += 25; | ||
} | ||
|
||
return bias; | ||
} | ||
|
||
export function formatMatchAsBlock(match: SearchMatch): string { | ||
const header = `# ${match.hierarchy.join(' > ')}`; | ||
const link = `[${match.title}](${match.path})`; | ||
const lines = [header, link]; | ||
|
||
if (match.snippet) { | ||
lines.push(match.snippet); | ||
} | ||
|
||
return lines.join('\n'); | ||
} | ||
|
||
export function mapMatchToResponse(match: SearchMatch) { | ||
return { | ||
path: match.path, | ||
title: match.title, | ||
hierarchy: match.hierarchy, | ||
summary: match.summary, | ||
snippet: match.snippet, | ||
url: buildDocUrl(match.path), | ||
score: match.score, | ||
matchedTokens: match.matchedTokens, | ||
}; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Bug: Invalid Query Parameter Causes Empty Search Results
When the
limit
query parameter is a non-numeric value, it's parsed asNaN
. ThisNaN
is then passed tosearchIndex
, causingmatches.slice(0, NaN)
to return an empty array, effectively yielding no search results.