Skip to content

Commit b46c958

Browse files
authored
Fallback to in-memory sqllite db in all cases (#591)
This lets up remove a bunch of logic for the old disk cache because it only needs to be read and then eventually deleted
1 parent ccafd86 commit b46c958

File tree

1 file changed

+27
-235
lines changed

1 file changed

+27
-235
lines changed

src/platform/workspaceChunkSearch/node/workspaceChunkAndEmbeddingCache.ts

Lines changed: 27 additions & 235 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,14 @@ import fs from 'fs';
66
import { IDisposable } from 'monaco-editor';
77
import sql from 'node:sqlite';
88
import path from 'path';
9-
import { CancelablePromise, ThrottledDelayer, createCancelablePromise, raceTimeout } from '../../../util/vs/base/common/async';
9+
import { CancelablePromise, createCancelablePromise } from '../../../util/vs/base/common/async';
1010
import { CancellationToken } from '../../../util/vs/base/common/cancellation';
11-
import { Disposable } from '../../../util/vs/base/common/lifecycle';
1211
import { ResourceMap } from '../../../util/vs/base/common/map';
1312
import { Schemas } from '../../../util/vs/base/common/network';
1413
import { URI } from '../../../util/vs/base/common/uri';
1514
import { IRange, Range } from '../../../util/vs/editor/common/core/range';
1615
import { IInstantiationService, ServicesAccessor } from '../../../util/vs/platform/instantiation/common/instantiation';
17-
import { FileChunk, FileChunkWithEmbedding } from '../../chunking/common/chunk';
18-
import { stripChunkTextMetadata } from '../../chunking/common/chunkingStringUtils';
16+
import { FileChunkWithEmbedding } from '../../chunking/common/chunk';
1917
import { Embedding, EmbeddingType, EmbeddingVector } from '../../embeddings/common/embeddingsComputer';
2018
import { IFileSystemService } from '../../filesystem/common/fileSystemService';
2119
import { ILogService } from '../../log/common/logService';
@@ -78,24 +76,13 @@ export async function createWorkspaceChunkAndEmbeddingCache(
7876
workspaceIndex: IWorkspaceFileIndex
7977
): Promise<IWorkspaceChunkAndEmbeddingCache> {
8078
const instantiationService = accessor.get(IInstantiationService);
81-
if (cacheRoot) {
82-
const db = await instantiationService.invokeFunction(accessor => DbCache.create(accessor, embeddingType, cacheRoot, workspaceIndex));
83-
if (db) {
84-
return db;
85-
}
86-
}
87-
return instantiationService.invokeFunction(accessor => DiskCache.load(accessor, embeddingType, cacheRoot, workspaceIndex));
79+
return instantiationService.invokeFunction(accessor => DbCache.create(accessor, embeddingType, cacheRoot ?? ':memory:', workspaceIndex));
8880
}
8981

90-
class DiskCache extends Disposable implements IWorkspaceChunkAndEmbeddingCache {
82+
class OldDiskCache {
9183
private static readonly version = '1.0.0';
9284
private static cacheFileName = 'workspace-chunks.json';
9385

94-
private static encodeEmbedding(embedding: EmbeddingVector): string {
95-
const floatArray = Float32Array.from(embedding);
96-
return Buffer.from(floatArray.buffer).toString('base64');
97-
}
98-
9986
public static decodeEmbedding(base64Str: string): EmbeddingVector {
10087
const decoded = Buffer.from(base64Str, 'base64');
10188
const float32Array = new Float32Array(decoded.buffer, decoded.byteOffset, decoded.byteLength / Float32Array.BYTES_PER_ELEMENT);
@@ -105,7 +92,7 @@ class DiskCache extends Disposable implements IWorkspaceChunkAndEmbeddingCache {
10592
public static async readDiskCache(accessor: ServicesAccessor, embeddingType: EmbeddingType, cacheRoot: URI, logService: ILogService): Promise<Iterable<[string, PersistedCacheEntry]> | undefined> {
10693
const fileSystem = accessor.get(IFileSystemService);
10794

108-
const cachePath = URI.joinPath(cacheRoot, DiskCache.cacheFileName);
95+
const cachePath = URI.joinPath(cacheRoot, OldDiskCache.cacheFileName);
10996
try {
11097
let file: Uint8Array | undefined;
11198
try {
@@ -116,8 +103,8 @@ class DiskCache extends Disposable implements IWorkspaceChunkAndEmbeddingCache {
116103
}
117104

118105
const data: PersistedCache = JSON.parse(new TextDecoder().decode(file));
119-
if (data.version !== DiskCache.version) {
120-
logService.debug(`WorkspaceChunkAndEmbeddingCache: invalidating cache due to version mismatch. Expected ${DiskCache.version} but found ${data.version}`);
106+
if (data.version !== OldDiskCache.version) {
107+
logService.debug(`WorkspaceChunkAndEmbeddingCache: invalidating cache due to version mismatch. Expected ${OldDiskCache.version} but found ${data.version}`);
121108
return undefined;
122109
}
123110

@@ -139,215 +126,15 @@ class DiskCache extends Disposable implements IWorkspaceChunkAndEmbeddingCache {
139126

140127
static async deleteDiskCache(accessor: ServicesAccessor, cacheRoot: URI) {
141128
const fileSystem = accessor.get(IFileSystemService);
142-
const cachePath = URI.joinPath(cacheRoot, DiskCache.cacheFileName);
129+
const cachePath = URI.joinPath(cacheRoot, OldDiskCache.cacheFileName);
143130
try {
144131
await fileSystem.delete(cachePath);
145132
} catch {
146133
// noop
147134
}
148135
}
149136

150-
static async load(
151-
accessor: ServicesAccessor,
152-
embeddingType: EmbeddingType,
153-
cacheRoot: URI | undefined,
154-
workspaceIndex: IWorkspaceFileIndex
155-
): Promise<DiskCache> {
156-
const fileSystem = accessor.get(IFileSystemService);
157-
const instantiationService = accessor.get(IInstantiationService);
158-
const logService = accessor.get(ILogService);
159-
160-
const cachePath = cacheRoot ? URI.joinPath(cacheRoot, DiskCache.cacheFileName) : undefined;
161-
const cache = new DiskCache(embeddingType, cachePath, workspaceIndex, fileSystem, logService);
162-
163-
if (cacheRoot && cachePath) {
164-
await workspaceIndex.initialize();
165-
166-
const cacheValues = await instantiationService.invokeFunction(accessor => DiskCache.readDiskCache(accessor, embeddingType, cacheRoot, logService));
167-
if (cacheValues) {
168-
logService.debug(`Restoring workspace chunk + embeddings cache from ${cachePath.fsPath}`);
169-
170-
for (const [uriStr, entry] of cacheValues) {
171-
const docUri = URI.parse(uriStr);
172-
if (!workspaceIndex.get(docUri)) {
173-
continue;
174-
}
175-
176-
cache._cache.set(docUri, {
177-
contentVersionId: entry.contentVersionId,
178-
fileHash: entry.hash,
179-
state: 'resolved',
180-
value: entry.entries.map((x): FileChunkWithEmbedding => ({
181-
embedding: {
182-
value: typeof x.embedding === 'string' ? DiskCache.decodeEmbedding(x.embedding) : x.embedding,
183-
type: embeddingType,
184-
},
185-
chunkHash: x.chunkHash,
186-
chunk: {
187-
file: docUri,
188-
text: stripChunkTextMetadata(x.text),
189-
rawText: undefined,
190-
range: Range.lift(x.range),
191-
} satisfies FileChunk
192-
}))
193-
});
194-
}
195-
}
196-
}
197-
198-
return cache;
199-
}
200-
201-
private readonly _cache = new ResourceMap<CacheEntry>();
202-
203-
private _isDisposed = false;
204-
205-
private readonly _writeDelayer = this._register(new ThrottledDelayer<void>(5000));
206-
207-
private constructor(
208-
private readonly embeddingType: EmbeddingType,
209-
private readonly cachePath: URI | undefined,
210-
@IWorkspaceFileIndex private readonly _workspaceIndex: IWorkspaceFileIndex,
211-
@IFileSystemService private readonly fileSystem: IFileSystemService,
212-
@ILogService private readonly logService: ILogService
213-
) {
214-
super();
215-
216-
this._register(this._workspaceIndex.onDidDeleteFiles(uris => {
217-
for (const uri of uris) {
218-
this._cache.delete(uri);
219-
}
220-
}));
221-
}
222-
223-
public override dispose(): void {
224-
this._isDisposed = true;
225-
super.dispose();
226-
}
227-
228-
/**
229-
* Checks if {@linkcode file} is currently indexed. Does not wait for any current indexing operation to complete.
230-
*/
231-
async isIndexed(file: FileRepresentation): Promise<boolean> {
232-
const entry = await this.getEntry(file);
233-
return entry?.state === 'resolved';
234-
}
235-
236-
async get(file: FileRepresentation): Promise<readonly FileChunkWithEmbedding[] | undefined> {
237-
return (await this.getEntry(file))?.value;
238-
}
239-
240-
getCurrentChunksForUri(uri: URI): ReadonlyMap<string, FileChunkWithEmbedding> | undefined {
241-
const entry = this._cache.get(uri);
242-
if (entry?.state === 'resolved' || entry?.state === 'rejected') {
243-
if (entry.value) {
244-
const out = new Map<string, FileChunkWithEmbedding>();
245-
for (const x of entry.value) {
246-
if (x.chunkHash) {
247-
out.set(x.chunkHash, x);
248-
}
249-
}
250-
return out;
251-
}
252-
}
253-
return undefined;
254-
}
255-
256-
private async getEntry(file: FileRepresentation): Promise<CacheEntry | undefined> {
257-
const entry = this._cache.get(file.uri);
258-
if (!entry) {
259-
return undefined;
260-
}
261-
262-
if (entry.contentVersionId === await file.getFastContentVersionId()) {
263-
return entry;
264-
}
265-
266-
return undefined;
267-
}
268-
269-
async update(file: FileRepresentation, compute: (token: CancellationToken) => Promise<readonly FileChunkWithEmbedding[] | undefined>): Promise<readonly FileChunkWithEmbedding[] | undefined> {
270-
const existing = this._cache.get(file.uri);
271-
const inContentVersionId = await file.getFastContentVersionId();
272-
if (existing?.contentVersionId === inContentVersionId) {
273-
// Already up to date
274-
return existing.value;
275-
}
276-
277-
// Overwrite
278-
if (existing?.state === 'pending') {
279-
existing.value.cancel();
280-
}
281-
const chunks = createCancelablePromise(compute);
282-
const entry: CacheEntry = {
283-
contentVersionId: inContentVersionId,
284-
fileHash: undefined,
285-
state: 'pending',
286-
value: chunks
287-
};
288-
this._cache.set(file.uri, entry);
289-
290-
chunks
291-
.then((result): CacheEntry => {
292-
return { contentVersionId: inContentVersionId, fileHash: undefined, state: Array.isArray(result) ? 'resolved' : 'rejected', value: result };
293-
}, (): CacheEntry => {
294-
return { contentVersionId: inContentVersionId, fileHash: undefined, state: 'rejected', value: undefined };
295-
})
296-
.then(newEntry => {
297-
const current = this._cache.get(file.uri);
298-
if (entry === current) {
299-
this._cache.set(file.uri, newEntry);
300-
return this._writeDelayer.trigger(() => this.save());
301-
}
302-
});
303-
304-
return chunks;
305-
}
306-
307-
private async save() {
308-
if (!this.cachePath || this._isDisposed) {
309-
return;
310-
}
311-
312-
const entries: Record<string, PersistedCacheEntry> = {};
313-
await Promise.all(Array.from(this._cache.entries(), async ([uri, entry]) => {
314-
let chunkAndEmbeddings: readonly FileChunkWithEmbedding[] | undefined;
315-
try {
316-
// Don't block saving on entries that are still resolving
317-
chunkAndEmbeddings = entry.state === 'pending' ? await raceTimeout(entry.value, 1000) : entry.value;
318-
} catch {
319-
// noop
320-
}
321-
322-
if (!chunkAndEmbeddings) {
323-
return;
324-
}
325-
326-
entries[uri.toString()] = {
327-
contentVersionId: entry.contentVersionId,
328-
hash: undefined,
329-
entries: chunkAndEmbeddings.map(x => ({
330-
text: x.chunk.text,
331-
range: x.chunk.range.toJSON(),
332-
embedding: DiskCache.encodeEmbedding(x.embedding.value),
333-
chunkHash: x.chunkHash,
334-
})),
335-
};
336-
}));
337-
338-
if (this._isDisposed) {
339-
return;
340-
}
341-
342-
const data: PersistedCache = {
343-
version: DiskCache.version,
344-
embeddingModel: this.embeddingType.id,
345-
entries: entries,
346-
};
347-
await this.fileSystem.writeFile(this.cachePath, new TextEncoder().encode(JSON.stringify(data)));
348-
349-
this.logService.debug(`Wrote workspace chunk + embeddings cache to ${this.cachePath.fsPath}`);
350-
}
137+
private constructor() { }
351138
}
352139

353140

@@ -358,29 +145,30 @@ class DbCache implements IWorkspaceChunkAndEmbeddingCache {
358145
public static async create(
359146
accessor: ServicesAccessor,
360147
embeddingType: EmbeddingType,
361-
cacheRoot: URI,
148+
cacheRoot: URI | ':memory:',
362149
workspaceIndex: IWorkspaceFileIndex,
363-
): Promise<DbCache | undefined> {
150+
): Promise<DbCache> {
364151
const instantiationService = accessor.get(IInstantiationService);
365152

366153
const syncOptions: sql.DatabaseSyncOptions = {
367154
open: true,
368155
enableForeignKeyConstraints: true
369156
};
370157

371-
const dbPath = URI.joinPath(cacheRoot, `workspace-chunks.db`);
372158

373159
let db: sql.DatabaseSync | undefined;
374-
if (dbPath.scheme === Schemas.file) {
160+
if (cacheRoot !== ':memory:' && cacheRoot.scheme === Schemas.file) {
161+
const dbPath = URI.joinPath(cacheRoot, `workspace-chunks.db`);
375162
try {
376163
await fs.promises.mkdir(path.dirname(dbPath.fsPath), { recursive: true });
377164
db = new sql.DatabaseSync(dbPath.fsPath, syncOptions);
378165
} catch (e) {
379166
console.error('Failed to open SQLite database on disk', e);
380167
}
381168
}
169+
382170
if (!db) {
383-
return;
171+
db = new sql.DatabaseSync(':memory:', syncOptions);
384172
}
385173

386174
db.exec(`
@@ -431,12 +219,14 @@ class DbCache implements IWorkspaceChunkAndEmbeddingCache {
431219
db.prepare('INSERT INTO CacheMeta (version, embeddingModel) VALUES (?, ?)').run(this.version, embeddingType.id);
432220

433221
// Load existing disk db if it exists
434-
const diskCache = await instantiationService.invokeFunction(accessor => DiskCache.readDiskCache(
435-
accessor,
436-
embeddingType,
437-
cacheRoot,
438-
accessor.get(ILogService)
439-
));
222+
const diskCache = cacheRoot !== ':memory:' ?
223+
await instantiationService.invokeFunction(accessor => OldDiskCache.readDiskCache(
224+
accessor,
225+
embeddingType,
226+
cacheRoot,
227+
accessor.get(ILogService)
228+
))
229+
: undefined;
440230
if (diskCache) {
441231
try {
442232
const insertFileStatement = db.prepare('INSERT OR REPLACE INTO Files (uri, contentVersionId) VALUES (?, ?)');
@@ -457,7 +247,7 @@ class DbCache implements IWorkspaceChunkAndEmbeddingCache {
457247
chunk.range.endColumn,
458248
packEmbedding({
459249
type: embeddingType,
460-
value: typeof chunk.embedding === 'string' ? DiskCache.decodeEmbedding(chunk.embedding) : chunk.embedding,
250+
value: typeof chunk.embedding === 'string' ? OldDiskCache.decodeEmbedding(chunk.embedding) : chunk.embedding,
461251
}),
462252
chunk.chunkHash ?? ''
463253
);
@@ -467,7 +257,9 @@ class DbCache implements IWorkspaceChunkAndEmbeddingCache {
467257
db.exec('COMMIT');
468258
}
469259

470-
void instantiationService.invokeFunction(accessor => DiskCache.deleteDiskCache(accessor, cacheRoot));
260+
if (cacheRoot !== ':memory:') {
261+
void instantiationService.invokeFunction(accessor => OldDiskCache.deleteDiskCache(accessor, cacheRoot));
262+
}
471263
}
472264

473265
// Validate all files in the database against the workspace index and remove any that are no longer present

0 commit comments

Comments
 (0)