@@ -6,16 +6,14 @@ import fs from 'fs';
6
6
import { IDisposable } from 'monaco-editor' ;
7
7
import sql from 'node:sqlite' ;
8
8
import path from 'path' ;
9
- import { CancelablePromise , ThrottledDelayer , createCancelablePromise , raceTimeout } from '../../../util/vs/base/common/async' ;
9
+ import { CancelablePromise , createCancelablePromise } from '../../../util/vs/base/common/async' ;
10
10
import { CancellationToken } from '../../../util/vs/base/common/cancellation' ;
11
- import { Disposable } from '../../../util/vs/base/common/lifecycle' ;
12
11
import { ResourceMap } from '../../../util/vs/base/common/map' ;
13
12
import { Schemas } from '../../../util/vs/base/common/network' ;
14
13
import { URI } from '../../../util/vs/base/common/uri' ;
15
14
import { IRange , Range } from '../../../util/vs/editor/common/core/range' ;
16
15
import { IInstantiationService , ServicesAccessor } from '../../../util/vs/platform/instantiation/common/instantiation' ;
17
- import { FileChunk , FileChunkWithEmbedding } from '../../chunking/common/chunk' ;
18
- import { stripChunkTextMetadata } from '../../chunking/common/chunkingStringUtils' ;
16
+ import { FileChunkWithEmbedding } from '../../chunking/common/chunk' ;
19
17
import { Embedding , EmbeddingType , EmbeddingVector } from '../../embeddings/common/embeddingsComputer' ;
20
18
import { IFileSystemService } from '../../filesystem/common/fileSystemService' ;
21
19
import { ILogService } from '../../log/common/logService' ;
@@ -78,24 +76,13 @@ export async function createWorkspaceChunkAndEmbeddingCache(
78
76
workspaceIndex : IWorkspaceFileIndex
79
77
) : Promise < IWorkspaceChunkAndEmbeddingCache > {
80
78
const instantiationService = accessor . get ( IInstantiationService ) ;
81
- if ( cacheRoot ) {
82
- const db = await instantiationService . invokeFunction ( accessor => DbCache . create ( accessor , embeddingType , cacheRoot , workspaceIndex ) ) ;
83
- if ( db ) {
84
- return db ;
85
- }
86
- }
87
- return instantiationService . invokeFunction ( accessor => DiskCache . load ( accessor , embeddingType , cacheRoot , workspaceIndex ) ) ;
79
+ return instantiationService . invokeFunction ( accessor => DbCache . create ( accessor , embeddingType , cacheRoot ?? ':memory:' , workspaceIndex ) ) ;
88
80
}
89
81
90
- class DiskCache extends Disposable implements IWorkspaceChunkAndEmbeddingCache {
82
+ class OldDiskCache {
91
83
private static readonly version = '1.0.0' ;
92
84
private static cacheFileName = 'workspace-chunks.json' ;
93
85
94
- private static encodeEmbedding ( embedding : EmbeddingVector ) : string {
95
- const floatArray = Float32Array . from ( embedding ) ;
96
- return Buffer . from ( floatArray . buffer ) . toString ( 'base64' ) ;
97
- }
98
-
99
86
public static decodeEmbedding ( base64Str : string ) : EmbeddingVector {
100
87
const decoded = Buffer . from ( base64Str , 'base64' ) ;
101
88
const float32Array = new Float32Array ( decoded . buffer , decoded . byteOffset , decoded . byteLength / Float32Array . BYTES_PER_ELEMENT ) ;
@@ -105,7 +92,7 @@ class DiskCache extends Disposable implements IWorkspaceChunkAndEmbeddingCache {
105
92
public static async readDiskCache ( accessor : ServicesAccessor , embeddingType : EmbeddingType , cacheRoot : URI , logService : ILogService ) : Promise < Iterable < [ string , PersistedCacheEntry ] > | undefined > {
106
93
const fileSystem = accessor . get ( IFileSystemService ) ;
107
94
108
- const cachePath = URI . joinPath ( cacheRoot , DiskCache . cacheFileName ) ;
95
+ const cachePath = URI . joinPath ( cacheRoot , OldDiskCache . cacheFileName ) ;
109
96
try {
110
97
let file : Uint8Array | undefined ;
111
98
try {
@@ -116,8 +103,8 @@ class DiskCache extends Disposable implements IWorkspaceChunkAndEmbeddingCache {
116
103
}
117
104
118
105
const data : PersistedCache = JSON . parse ( new TextDecoder ( ) . decode ( file ) ) ;
119
- if ( data . version !== DiskCache . version ) {
120
- logService . debug ( `WorkspaceChunkAndEmbeddingCache: invalidating cache due to version mismatch. Expected ${ DiskCache . version } but found ${ data . version } ` ) ;
106
+ if ( data . version !== OldDiskCache . version ) {
107
+ logService . debug ( `WorkspaceChunkAndEmbeddingCache: invalidating cache due to version mismatch. Expected ${ OldDiskCache . version } but found ${ data . version } ` ) ;
121
108
return undefined ;
122
109
}
123
110
@@ -139,215 +126,15 @@ class DiskCache extends Disposable implements IWorkspaceChunkAndEmbeddingCache {
139
126
140
127
static async deleteDiskCache ( accessor : ServicesAccessor , cacheRoot : URI ) {
141
128
const fileSystem = accessor . get ( IFileSystemService ) ;
142
- const cachePath = URI . joinPath ( cacheRoot , DiskCache . cacheFileName ) ;
129
+ const cachePath = URI . joinPath ( cacheRoot , OldDiskCache . cacheFileName ) ;
143
130
try {
144
131
await fileSystem . delete ( cachePath ) ;
145
132
} catch {
146
133
// noop
147
134
}
148
135
}
149
136
150
- static async load (
151
- accessor : ServicesAccessor ,
152
- embeddingType : EmbeddingType ,
153
- cacheRoot : URI | undefined ,
154
- workspaceIndex : IWorkspaceFileIndex
155
- ) : Promise < DiskCache > {
156
- const fileSystem = accessor . get ( IFileSystemService ) ;
157
- const instantiationService = accessor . get ( IInstantiationService ) ;
158
- const logService = accessor . get ( ILogService ) ;
159
-
160
- const cachePath = cacheRoot ? URI . joinPath ( cacheRoot , DiskCache . cacheFileName ) : undefined ;
161
- const cache = new DiskCache ( embeddingType , cachePath , workspaceIndex , fileSystem , logService ) ;
162
-
163
- if ( cacheRoot && cachePath ) {
164
- await workspaceIndex . initialize ( ) ;
165
-
166
- const cacheValues = await instantiationService . invokeFunction ( accessor => DiskCache . readDiskCache ( accessor , embeddingType , cacheRoot , logService ) ) ;
167
- if ( cacheValues ) {
168
- logService . debug ( `Restoring workspace chunk + embeddings cache from ${ cachePath . fsPath } ` ) ;
169
-
170
- for ( const [ uriStr , entry ] of cacheValues ) {
171
- const docUri = URI . parse ( uriStr ) ;
172
- if ( ! workspaceIndex . get ( docUri ) ) {
173
- continue ;
174
- }
175
-
176
- cache . _cache . set ( docUri , {
177
- contentVersionId : entry . contentVersionId ,
178
- fileHash : entry . hash ,
179
- state : 'resolved' ,
180
- value : entry . entries . map ( ( x ) : FileChunkWithEmbedding => ( {
181
- embedding : {
182
- value : typeof x . embedding === 'string' ? DiskCache . decodeEmbedding ( x . embedding ) : x . embedding ,
183
- type : embeddingType ,
184
- } ,
185
- chunkHash : x . chunkHash ,
186
- chunk : {
187
- file : docUri ,
188
- text : stripChunkTextMetadata ( x . text ) ,
189
- rawText : undefined ,
190
- range : Range . lift ( x . range ) ,
191
- } satisfies FileChunk
192
- } ) )
193
- } ) ;
194
- }
195
- }
196
- }
197
-
198
- return cache ;
199
- }
200
-
201
- private readonly _cache = new ResourceMap < CacheEntry > ( ) ;
202
-
203
- private _isDisposed = false ;
204
-
205
- private readonly _writeDelayer = this . _register ( new ThrottledDelayer < void > ( 5000 ) ) ;
206
-
207
- private constructor (
208
- private readonly embeddingType : EmbeddingType ,
209
- private readonly cachePath : URI | undefined ,
210
- @IWorkspaceFileIndex private readonly _workspaceIndex : IWorkspaceFileIndex ,
211
- @IFileSystemService private readonly fileSystem : IFileSystemService ,
212
- @ILogService private readonly logService : ILogService
213
- ) {
214
- super ( ) ;
215
-
216
- this . _register ( this . _workspaceIndex . onDidDeleteFiles ( uris => {
217
- for ( const uri of uris ) {
218
- this . _cache . delete ( uri ) ;
219
- }
220
- } ) ) ;
221
- }
222
-
223
- public override dispose ( ) : void {
224
- this . _isDisposed = true ;
225
- super . dispose ( ) ;
226
- }
227
-
228
- /**
229
- * Checks if {@linkcode file} is currently indexed. Does not wait for any current indexing operation to complete.
230
- */
231
- async isIndexed ( file : FileRepresentation ) : Promise < boolean > {
232
- const entry = await this . getEntry ( file ) ;
233
- return entry ?. state === 'resolved' ;
234
- }
235
-
236
- async get ( file : FileRepresentation ) : Promise < readonly FileChunkWithEmbedding [ ] | undefined > {
237
- return ( await this . getEntry ( file ) ) ?. value ;
238
- }
239
-
240
- getCurrentChunksForUri ( uri : URI ) : ReadonlyMap < string , FileChunkWithEmbedding > | undefined {
241
- const entry = this . _cache . get ( uri ) ;
242
- if ( entry ?. state === 'resolved' || entry ?. state === 'rejected' ) {
243
- if ( entry . value ) {
244
- const out = new Map < string , FileChunkWithEmbedding > ( ) ;
245
- for ( const x of entry . value ) {
246
- if ( x . chunkHash ) {
247
- out . set ( x . chunkHash , x ) ;
248
- }
249
- }
250
- return out ;
251
- }
252
- }
253
- return undefined ;
254
- }
255
-
256
- private async getEntry ( file : FileRepresentation ) : Promise < CacheEntry | undefined > {
257
- const entry = this . _cache . get ( file . uri ) ;
258
- if ( ! entry ) {
259
- return undefined ;
260
- }
261
-
262
- if ( entry . contentVersionId === await file . getFastContentVersionId ( ) ) {
263
- return entry ;
264
- }
265
-
266
- return undefined ;
267
- }
268
-
269
- async update ( file : FileRepresentation , compute : ( token : CancellationToken ) => Promise < readonly FileChunkWithEmbedding [ ] | undefined > ) : Promise < readonly FileChunkWithEmbedding [ ] | undefined > {
270
- const existing = this . _cache . get ( file . uri ) ;
271
- const inContentVersionId = await file . getFastContentVersionId ( ) ;
272
- if ( existing ?. contentVersionId === inContentVersionId ) {
273
- // Already up to date
274
- return existing . value ;
275
- }
276
-
277
- // Overwrite
278
- if ( existing ?. state === 'pending' ) {
279
- existing . value . cancel ( ) ;
280
- }
281
- const chunks = createCancelablePromise ( compute ) ;
282
- const entry : CacheEntry = {
283
- contentVersionId : inContentVersionId ,
284
- fileHash : undefined ,
285
- state : 'pending' ,
286
- value : chunks
287
- } ;
288
- this . _cache . set ( file . uri , entry ) ;
289
-
290
- chunks
291
- . then ( ( result ) : CacheEntry => {
292
- return { contentVersionId : inContentVersionId , fileHash : undefined , state : Array . isArray ( result ) ? 'resolved' : 'rejected' , value : result } ;
293
- } , ( ) : CacheEntry => {
294
- return { contentVersionId : inContentVersionId , fileHash : undefined , state : 'rejected' , value : undefined } ;
295
- } )
296
- . then ( newEntry => {
297
- const current = this . _cache . get ( file . uri ) ;
298
- if ( entry === current ) {
299
- this . _cache . set ( file . uri , newEntry ) ;
300
- return this . _writeDelayer . trigger ( ( ) => this . save ( ) ) ;
301
- }
302
- } ) ;
303
-
304
- return chunks ;
305
- }
306
-
307
- private async save ( ) {
308
- if ( ! this . cachePath || this . _isDisposed ) {
309
- return ;
310
- }
311
-
312
- const entries : Record < string , PersistedCacheEntry > = { } ;
313
- await Promise . all ( Array . from ( this . _cache . entries ( ) , async ( [ uri , entry ] ) => {
314
- let chunkAndEmbeddings : readonly FileChunkWithEmbedding [ ] | undefined ;
315
- try {
316
- // Don't block saving on entries that are still resolving
317
- chunkAndEmbeddings = entry . state === 'pending' ? await raceTimeout ( entry . value , 1000 ) : entry . value ;
318
- } catch {
319
- // noop
320
- }
321
-
322
- if ( ! chunkAndEmbeddings ) {
323
- return ;
324
- }
325
-
326
- entries [ uri . toString ( ) ] = {
327
- contentVersionId : entry . contentVersionId ,
328
- hash : undefined ,
329
- entries : chunkAndEmbeddings . map ( x => ( {
330
- text : x . chunk . text ,
331
- range : x . chunk . range . toJSON ( ) ,
332
- embedding : DiskCache . encodeEmbedding ( x . embedding . value ) ,
333
- chunkHash : x . chunkHash ,
334
- } ) ) ,
335
- } ;
336
- } ) ) ;
337
-
338
- if ( this . _isDisposed ) {
339
- return ;
340
- }
341
-
342
- const data : PersistedCache = {
343
- version : DiskCache . version ,
344
- embeddingModel : this . embeddingType . id ,
345
- entries : entries ,
346
- } ;
347
- await this . fileSystem . writeFile ( this . cachePath , new TextEncoder ( ) . encode ( JSON . stringify ( data ) ) ) ;
348
-
349
- this . logService . debug ( `Wrote workspace chunk + embeddings cache to ${ this . cachePath . fsPath } ` ) ;
350
- }
137
+ private constructor ( ) { }
351
138
}
352
139
353
140
@@ -358,29 +145,30 @@ class DbCache implements IWorkspaceChunkAndEmbeddingCache {
358
145
public static async create (
359
146
accessor : ServicesAccessor ,
360
147
embeddingType : EmbeddingType ,
361
- cacheRoot : URI ,
148
+ cacheRoot : URI | ':memory:' ,
362
149
workspaceIndex : IWorkspaceFileIndex ,
363
- ) : Promise < DbCache | undefined > {
150
+ ) : Promise < DbCache > {
364
151
const instantiationService = accessor . get ( IInstantiationService ) ;
365
152
366
153
const syncOptions : sql . DatabaseSyncOptions = {
367
154
open : true ,
368
155
enableForeignKeyConstraints : true
369
156
} ;
370
157
371
- const dbPath = URI . joinPath ( cacheRoot , `workspace-chunks.db` ) ;
372
158
373
159
let db : sql . DatabaseSync | undefined ;
374
- if ( dbPath . scheme === Schemas . file ) {
160
+ if ( cacheRoot !== ':memory:' && cacheRoot . scheme === Schemas . file ) {
161
+ const dbPath = URI . joinPath ( cacheRoot , `workspace-chunks.db` ) ;
375
162
try {
376
163
await fs . promises . mkdir ( path . dirname ( dbPath . fsPath ) , { recursive : true } ) ;
377
164
db = new sql . DatabaseSync ( dbPath . fsPath , syncOptions ) ;
378
165
} catch ( e ) {
379
166
console . error ( 'Failed to open SQLite database on disk' , e ) ;
380
167
}
381
168
}
169
+
382
170
if ( ! db ) {
383
- return ;
171
+ db = new sql . DatabaseSync ( ':memory:' , syncOptions ) ;
384
172
}
385
173
386
174
db . exec ( `
@@ -431,12 +219,14 @@ class DbCache implements IWorkspaceChunkAndEmbeddingCache {
431
219
db . prepare ( 'INSERT INTO CacheMeta (version, embeddingModel) VALUES (?, ?)' ) . run ( this . version , embeddingType . id ) ;
432
220
433
221
// Load existing disk db if it exists
434
- const diskCache = await instantiationService . invokeFunction ( accessor => DiskCache . readDiskCache (
435
- accessor ,
436
- embeddingType ,
437
- cacheRoot ,
438
- accessor . get ( ILogService )
439
- ) ) ;
222
+ const diskCache = cacheRoot !== ':memory:' ?
223
+ await instantiationService . invokeFunction ( accessor => OldDiskCache . readDiskCache (
224
+ accessor ,
225
+ embeddingType ,
226
+ cacheRoot ,
227
+ accessor . get ( ILogService )
228
+ ) )
229
+ : undefined ;
440
230
if ( diskCache ) {
441
231
try {
442
232
const insertFileStatement = db . prepare ( 'INSERT OR REPLACE INTO Files (uri, contentVersionId) VALUES (?, ?)' ) ;
@@ -457,7 +247,7 @@ class DbCache implements IWorkspaceChunkAndEmbeddingCache {
457
247
chunk . range . endColumn ,
458
248
packEmbedding ( {
459
249
type : embeddingType ,
460
- value : typeof chunk . embedding === 'string' ? DiskCache . decodeEmbedding ( chunk . embedding ) : chunk . embedding ,
250
+ value : typeof chunk . embedding === 'string' ? OldDiskCache . decodeEmbedding ( chunk . embedding ) : chunk . embedding ,
461
251
} ) ,
462
252
chunk . chunkHash ?? ''
463
253
) ;
@@ -467,7 +257,9 @@ class DbCache implements IWorkspaceChunkAndEmbeddingCache {
467
257
db . exec ( 'COMMIT' ) ;
468
258
}
469
259
470
- void instantiationService . invokeFunction ( accessor => DiskCache . deleteDiskCache ( accessor , cacheRoot ) ) ;
260
+ if ( cacheRoot !== ':memory:' ) {
261
+ void instantiationService . invokeFunction ( accessor => OldDiskCache . deleteDiskCache ( accessor , cacheRoot ) ) ;
262
+ }
471
263
}
472
264
473
265
// Validate all files in the database against the workspace index and remove any that are no longer present
0 commit comments