1
+ const fs = require ( 'fs' ) ;
2
+ const path = require ( 'path' ) ;
3
+
4
+ function getAllHtmlFiles ( dir , fileList = [ ] ) {
5
+ const files = fs . readdirSync ( dir ) ;
6
+
7
+ files . forEach ( file => {
8
+ const filePath = path . join ( dir , file ) ;
9
+ const stat = fs . statSync ( filePath ) ;
10
+
11
+ if ( stat . isDirectory ( ) ) {
12
+ getAllHtmlFiles ( filePath , fileList ) ;
13
+ } else if ( file === 'index.html' ) {
14
+ fileList . push ( filePath ) ;
15
+ }
16
+ } ) ;
17
+
18
+ return fileList ;
19
+ }
20
+
21
+ module . exports = function customLLMsTxtPlugin ( context , options ) {
22
+ return {
23
+ name : 'custom-llms-txt' ,
24
+ async postBuild ( { siteConfig, outDir } ) {
25
+ // Group docs by their sidebar category
26
+ const sections = {
27
+ 'Getting Started' : [ ] ,
28
+ 'Features' : [ ] ,
29
+ 'Guides' : [ ] ,
30
+ 'Customization' : [ ] ,
31
+ 'Advanced' : [ ] ,
32
+ 'Hub' : [ ]
33
+ } ;
34
+
35
+ // Get all HTML files
36
+ const htmlFiles = getAllHtmlFiles ( outDir ) ;
37
+
38
+ for ( const htmlPath of htmlFiles ) {
39
+ try {
40
+ const htmlContent = fs . readFileSync ( htmlPath , 'utf8' ) ;
41
+
42
+ // Extract title from HTML (handle both formats)
43
+ const titleMatch = htmlContent . match ( / < t i t l e [ ^ > ] * > ( [ ^ < ] + ) < \/ t i t l e > / ) ;
44
+ const title = titleMatch ? titleMatch [ 1 ] . replace ( ' | Continue' , '' ) . trim ( ) : 'Untitled' ;
45
+
46
+ // Skip if title is just "Continue" (likely homepage)
47
+ if ( title === 'Continue' ) continue ;
48
+
49
+ // Extract description from meta tag
50
+ const descMatch = htmlContent . match ( / < m e t a \s + n a m e = " d e s c r i p t i o n " \s + c o n t e n t = " ( [ ^ " ] + ) " / ) ;
51
+ const description = descMatch ? descMatch [ 1 ] : '' ;
52
+
53
+ // Get relative path from build directory
54
+ const relativePath = path . relative ( outDir , htmlPath ) ;
55
+ // Convert to URL path
56
+ const cleanPath = relativePath . replace ( / i n d e x \. h t m l $ / , '' ) . replace ( / \/ $ / , '' ) ;
57
+ const url = `https://docs.continue.dev/${ cleanPath } ` ;
58
+
59
+ const docInfo = { title, url, description, path : cleanPath } ;
60
+
61
+ // Determine section based on path
62
+ if ( cleanPath . includes ( 'getting-started' ) ) {
63
+ sections [ 'Getting Started' ] . push ( docInfo ) ;
64
+ } else if ( cleanPath . includes ( 'features' ) ) {
65
+ sections [ 'Features' ] . push ( docInfo ) ;
66
+ } else if ( cleanPath . includes ( 'guides' ) ) {
67
+ sections [ 'Guides' ] . push ( docInfo ) ;
68
+ } else if ( cleanPath . includes ( 'customization' ) ) {
69
+ sections [ 'Customization' ] . push ( docInfo ) ;
70
+ } else if ( cleanPath . includes ( 'advanced' ) ) {
71
+ sections [ 'Advanced' ] . push ( docInfo ) ;
72
+ } else if ( cleanPath . includes ( 'hub' ) ) {
73
+ sections [ 'Hub' ] . push ( docInfo ) ;
74
+ } else if ( cleanPath === '' || cleanPath === '/' ) {
75
+ // Root page
76
+ sections [ 'Getting Started' ] . unshift ( docInfo ) ;
77
+ }
78
+ } catch ( error ) {
79
+ console . warn ( `Failed to process ${ htmlPath } :` , error . message ) ;
80
+ }
81
+ }
82
+
83
+ // Generate the structured llms.txt content
84
+ let content = '# Continue Documentation\n\n' ;
85
+ content += 'Documentation for Continue - the open-source AI code assistant for developers\n\n' ;
86
+
87
+ // Add each section
88
+ Object . entries ( sections ) . forEach ( ( [ sectionName , docs ] ) => {
89
+ if ( docs . length > 0 ) {
90
+ content += `## ${ sectionName } \n\n` ;
91
+
92
+ // Sort docs within each section
93
+ docs . sort ( ( a , b ) => {
94
+ // Sort by path depth first (shorter paths first)
95
+ const depthA = ( a . path . match ( / \/ / g) || [ ] ) . length ;
96
+ const depthB = ( b . path . match ( / \/ / g) || [ ] ) . length ;
97
+ if ( depthA !== depthB ) return depthA - depthB ;
98
+
99
+ // Then alphabetically
100
+ return a . path . localeCompare ( b . path ) ;
101
+ } ) ;
102
+
103
+ docs . forEach ( doc => {
104
+ content += `- [${ doc . title } ](${ doc . url } )` ;
105
+ if ( doc . description ) {
106
+ content += `: ${ doc . description } ` ;
107
+ }
108
+ content += '\n' ;
109
+ } ) ;
110
+ content += '\n' ;
111
+ }
112
+ } ) ;
113
+
114
+ // Write to file
115
+ const outputPath = path . join ( outDir , 'llms.txt' ) ;
116
+ fs . writeFileSync ( outputPath , content , 'utf8' ) ;
117
+
118
+ console . log ( `Generated structured llms.txt with ${ Object . values ( sections ) . flat ( ) . length } pages` ) ;
119
+ }
120
+ } ;
121
+ } ;
0 commit comments