From deb9faacee53e9a4dde07e25a635f6397401ac56 Mon Sep 17 00:00:00 2001 From: Josh Holmer Date: Sun, 15 Jun 2025 14:22:59 -0400 Subject: [PATCH 1/3] fix: refresh codebase index on config change --- core/indexing/CodebaseIndexer.test.ts | 305 +++++++++++++++++++++++++- core/indexing/CodebaseIndexer.ts | 72 +++++- core/indexing/docs/DocsService.ts | 107 +++++++-- 3 files changed, 449 insertions(+), 35 deletions(-) diff --git a/core/indexing/CodebaseIndexer.test.ts b/core/indexing/CodebaseIndexer.test.ts index f2a31209c80..63822bea9d9 100644 --- a/core/indexing/CodebaseIndexer.test.ts +++ b/core/indexing/CodebaseIndexer.test.ts @@ -1,3 +1,5 @@ +/* eslint-disable max-lines-per-function */ +/* lint is not useful for test classes */ import { jest } from "@jest/globals"; import { execSync } from "node:child_process"; import fs from "node:fs"; @@ -14,8 +16,11 @@ import { } from "../test/testDir.js"; import { getIndexSqlitePath } from "../util/paths.js"; +import { ConfigResult } from "@continuedev/config-yaml"; +import CodebaseContextProvider from "../context/providers/CodebaseContextProvider.js"; +import { ContinueConfig } from "../index.js"; import { localPathToUri } from "../util/pathToUri.js"; -import { CodebaseIndexer, PauseToken } from "./CodebaseIndexer.js"; +import { CodebaseIndexer } from "./CodebaseIndexer.js"; import { getComputeDeleteAddRemove } from "./refreshIndex.js"; import { TestCodebaseIndex } from "./TestCodebaseIndex.js"; import { CodebaseIndex } from "./types.js"; @@ -57,6 +62,17 @@ class TestCodebaseIndexer extends CodebaseIndexer { protected async getIndexesToBuild(): Promise { return [new TestCodebaseIndex()]; } + + // Add public methods to test private methods + public testHasCodebaseContextProvider() { + return (this as any).hasCodebaseContextProvider(); + } + + public async testHandleConfigUpdate( + configResult: ConfigResult, + ) { + return (this as any).handleConfigUpdate({ config: configResult.config }); + } } // Create a mock messenger type that doesn't require actual protocol imports @@ -71,8 +87,6 @@ type MockMessengerType = { // These are more like integration tests, whereas we should separately test // the individual CodebaseIndex classes describe("CodebaseIndexer", () => { - const pauseToken = new PauseToken(false); - // Replace mockProgressReporter with mockMessenger const mockMessenger: MockMessengerType = { send: jest.fn(), @@ -82,13 +96,8 @@ describe("CodebaseIndexer", () => { onError: jest.fn(), }; - const codebaseIndexer = new TestCodebaseIndexer( - testConfigHandler, - testIde, - mockMessenger as any, - false, - ); - const testIndex = new TestCodebaseIndex(); + let codebaseIndexer: TestCodebaseIndexer; + let testIndex: TestCodebaseIndex; beforeAll(async () => { tearDownTestDir(); @@ -99,6 +108,15 @@ describe("CodebaseIndexer", () => { cwd: TEST_DIR_PATH, }); execSync('git config user.name "Test"', { cwd: TEST_DIR_PATH }); + + codebaseIndexer = new TestCodebaseIndexer( + testConfigHandler, + testIde, + mockMessenger as any, + false, + ); + await codebaseIndexer.initPromise; + testIndex = new TestCodebaseIndex(); }); afterAll(async () => { @@ -161,6 +179,7 @@ describe("CodebaseIndexer", () => { } test("should index test folder without problem", async () => { + walkDirCache.invalidate(); addToTestDir([ ["test.ts", TEST_TS], ["py/main.py", TEST_PY], @@ -403,4 +422,270 @@ describe("CodebaseIndexer", () => { expect(codebaseIndexer.currentIndexingState).toEqual(testState); }); }); + + // New describe block for testing handleConfigUpdate functionality + describe("handleConfigUpdate functionality", () => { + let testIndexer: TestCodebaseIndexer; + let mockRefreshCodebaseIndex: jest.MockedFunction; + let mockGetWorkspaceDirs: jest.MockedFunction; + + beforeEach(() => { + testIndexer = new TestCodebaseIndexer( + testConfigHandler, + testIde, + mockMessenger as any, + false, + ); + + // Mock the refreshCodebaseIndex method to avoid actual indexing + mockRefreshCodebaseIndex = jest + .spyOn(testIndexer, "refreshCodebaseIndex") + .mockImplementation(async () => {}); + + // Mock getWorkspaceDirs to return test directories + mockGetWorkspaceDirs = jest + .spyOn(testIde, "getWorkspaceDirs") + .mockResolvedValue(["/test/workspace"]); + }); + + afterEach(() => { + jest.clearAllMocks(); + }); + + describe("hasCodebaseContextProvider", () => { + test("should return true when codebase context provider is present", () => { + // Set up config with codebase context provider + (testIndexer as any).config = { + contextProviders: [ + { + description: { + title: CodebaseContextProvider.description.title, + }, + }, + ], + }; + + const result = testIndexer.testHasCodebaseContextProvider(); + expect(result).toBe(true); + }); + + test("should return false when no context providers are configured", () => { + (testIndexer as any).config = { + contextProviders: undefined, + }; + + const result = testIndexer.testHasCodebaseContextProvider(); + expect(result).toBe(false); + }); + + test("should return false when context providers exist but no codebase provider", () => { + (testIndexer as any).config = { + contextProviders: [ + { + description: { + title: "SomeOtherProvider", + }, + }, + ], + }; + + const result = testIndexer.testHasCodebaseContextProvider(); + expect(result).toBe(false); + }); + + test("should return false when context providers is empty array", () => { + (testIndexer as any).config = { + contextProviders: [], + }; + + const result = testIndexer.testHasCodebaseContextProvider(); + expect(result).toBe(false); + }); + }); + + describe("handleConfigUpdate", () => { + test("should return early when newConfig is null", async () => { + const configResult: ConfigResult = { + config: null as any, + errors: [], + configLoadInterrupted: false, + }; + + await testIndexer.testHandleConfigUpdate(configResult); + + // These get called once on init, so we want them to not get called again + expect(mockRefreshCodebaseIndex).toHaveBeenCalledTimes(1); + expect(mockGetWorkspaceDirs).toHaveBeenCalledTimes(1); + }); + + test("should return early when newConfig is undefined", async () => { + const configResult: ConfigResult = { + config: undefined as any, + errors: [], + configLoadInterrupted: false, + }; + + await testIndexer.testHandleConfigUpdate(configResult); + + // These get called once on init, so we want them to not get called again + expect(mockRefreshCodebaseIndex).toHaveBeenCalledTimes(1); + expect(mockGetWorkspaceDirs).toHaveBeenCalledTimes(1); + }); + + test("should return early when no codebase context provider is present", async () => { + const configResult: ConfigResult = { + config: { + contextProviders: [ + { + description: { + title: "SomeOtherProvider", + }, + }, + ], + selectedModelByRole: { + embed: { + model: "test-model", + provider: "test-provider", + }, + }, + } as unknown as ContinueConfig, + errors: [], + configLoadInterrupted: false, + }; + + await testIndexer.testHandleConfigUpdate(configResult); + + // These get called once on init, so we want them to not get called again + expect(mockRefreshCodebaseIndex).toHaveBeenCalledTimes(1); + expect(mockGetWorkspaceDirs).toHaveBeenCalledTimes(1); + }); + + test("should return early when no embed model is configured", async () => { + const configResult: ConfigResult = { + config: { + contextProviders: [ + { + description: { + title: CodebaseContextProvider.description.title, + }, + }, + ], + selectedModelByRole: { + embed: undefined, + }, + } as unknown as ContinueConfig, + errors: [], + configLoadInterrupted: false, + }; + + await testIndexer.testHandleConfigUpdate(configResult); + + // These get called once on init, so we want them to not get called again + expect(mockRefreshCodebaseIndex).toHaveBeenCalledTimes(1); + expect(mockGetWorkspaceDirs).toHaveBeenCalledTimes(1); + }); + + test("should call refreshCodebaseIndex when all conditions are met", async () => { + const configResult: ConfigResult = { + config: { + contextProviders: [ + { + description: { + title: CodebaseContextProvider.description.title, + }, + }, + ], + selectedModelByRole: { + embed: { + model: "test-model", + provider: "test-provider", + }, + }, + } as unknown as ContinueConfig, + errors: [], + configLoadInterrupted: false, + }; + + await testIndexer.testHandleConfigUpdate(configResult); + + // These get called once on init, and we want them to get called again + expect(mockGetWorkspaceDirs).toHaveBeenCalledTimes(2); + expect(mockRefreshCodebaseIndex).toHaveBeenCalledTimes(2); + expect(mockRefreshCodebaseIndex).toHaveBeenCalledWith([ + "/test/workspace", + ]); + }); + + test("should set config property before checking conditions", async () => { + const testConfig = { + contextProviders: [ + { + description: { + title: CodebaseContextProvider.description.title, + }, + }, + ], + selectedModelByRole: { + embed: { + model: "test-model", + provider: "test-provider", + }, + }, + } as unknown as ContinueConfig; + + const configResult: ConfigResult = { + config: testConfig, + errors: [], + configLoadInterrupted: false, + }; + + await testIndexer.testHandleConfigUpdate(configResult); + + // Verify that the config was set + expect((testIndexer as any).config).toBe(testConfig); + // These get called once on init, and we want them to get called again + expect(mockRefreshCodebaseIndex).toHaveBeenCalledTimes(2); + }); + + test("should handle multiple context providers correctly", async () => { + const configResult: ConfigResult = { + config: { + contextProviders: [ + { + description: { + title: "SomeOtherProvider", + }, + }, + { + description: { + title: CodebaseContextProvider.description.title, + }, + }, + { + description: { + title: "AnotherProvider", + }, + }, + ], + selectedModelByRole: { + embed: { + model: "test-model", + provider: "test-provider", + }, + }, + } as unknown as ContinueConfig, + errors: [], + configLoadInterrupted: false, + }; + + await testIndexer.testHandleConfigUpdate(configResult); + + // These get called once on init, and we want them to get called again + expect(mockRefreshCodebaseIndex).toHaveBeenCalledTimes(2); + expect(mockRefreshCodebaseIndex).toHaveBeenCalledWith([ + "/test/workspace", + ]); + }); + }); + }); }); diff --git a/core/indexing/CodebaseIndexer.ts b/core/indexing/CodebaseIndexer.ts index 0bd9b1328a5..862f1bb24a5 100644 --- a/core/indexing/CodebaseIndexer.ts +++ b/core/indexing/CodebaseIndexer.ts @@ -1,7 +1,12 @@ import * as fs from "fs/promises"; import { ConfigHandler } from "../config/ConfigHandler.js"; -import { IDE, IndexingProgressUpdate, IndexTag } from "../index.js"; +import { + ContinueConfig, + IDE, + IndexingProgressUpdate, + IndexTag, +} from "../index.js"; import type { FromCoreProtocol, ToCoreProtocol } from "../protocol"; import type { IMessenger } from "../protocol/messenger"; import { extractMinimalStackTraceInfo } from "../util/extractMinimalStackTraceInfo.js"; @@ -9,11 +14,14 @@ import { getIndexSqlitePath, getLanceDbPath } from "../util/paths.js"; import { Telemetry } from "../util/posthog.js"; import { findUriInDirs, getUriPathBasename } from "../util/uri.js"; +import { ConfigResult } from "@continuedev/config-yaml"; +import CodebaseContextProvider from "../context/providers/CodebaseContextProvider.js"; import { ContinueServerClient } from "../continueServer/stubs/client"; import { LLMError } from "../llm/index.js"; import { getRootCause } from "../util/errors.js"; import { ChunkCodebaseIndex } from "./chunk/ChunkCodebaseIndex.js"; import { CodeSnippetsCodebaseIndex } from "./CodeSnippetsIndex.js"; +import { embedModelsAreEqual } from "./docs/DocsService.js"; import { FullTextSearchCodebaseIndex } from "./FullTextSearchCodebaseIndex.js"; import { LanceDbIndex } from "./LanceDbIndex.js"; import { getComputeDeleteAddRemove, IndexLock } from "./refreshIndex.js"; @@ -44,6 +52,10 @@ export class CodebaseIndexer { * - To make as few requests as possible to the embeddings providers */ filesPerBatch = 200; + // We normally allow this to run in the background, + // and only need to `await` it for tests. + public initPromise: Promise; + private config!: ContinueConfig; private indexingCancellationController: AbortController | undefined; private codebaseIndexingState: IndexingProgressUpdate; private readonly pauseToken: PauseToken; @@ -83,6 +95,17 @@ export class CodebaseIndexer { // Initialize pause token this.pauseToken = new PauseToken(initialPaused); + + this.initPromise = this.init(configHandler); + } + + // Initialization - load config and attach config listener + private async init(configHandler: ConfigHandler) { + const result = await configHandler.loadConfig(); + await this.handleConfigUpdate(result); + configHandler.onConfigUpdate( + this.handleConfigUpdate.bind(this) as (arg: any) => void, + ); } set paused(value: boolean) { @@ -192,7 +215,7 @@ export class CodebaseIndexer { workspaceDirs: string[], ): Promise { if (this.pauseToken.paused) { - // NOTE: by returning here, there is a chance that while paused a file is modified and + // FIXME: by returning here, there is a chance that while paused a file is modified and // then after unpausing the file is not reindexed return; } @@ -763,4 +786,49 @@ export class CodebaseIndexer { public get currentIndexingState(): IndexingProgressUpdate { return this.codebaseIndexingState; } + + private hasCodebaseContextProvider() { + return !!this.config.contextProviders?.some( + (provider) => + provider.description.title === + CodebaseContextProvider.description.title, + ); + } + + private isIndexingConfigSame( + config1: ContinueConfig | undefined, + config2: ContinueConfig, + ) { + return embedModelsAreEqual( + config1?.selectedModelByRole.embed, + config2.selectedModelByRole.embed, + ); + } + + private async handleConfigUpdate({ + config: newConfig, + }: ConfigResult) { + if (newConfig) { + const needsReindex = !this.isIndexingConfigSame(this.config, newConfig); + + this.config = newConfig; // IMPORTANT - need to set up top, other methods below use this without passing it in + + // No point in indexing if no codebase context provider + const hasCodebaseContextProvider = this.hasCodebaseContextProvider(); + if (!hasCodebaseContextProvider) { + return; + } + + // Skip codebase indexing if not supported + // No warning message here because would show on ANY config update + if (!this.config.selectedModelByRole.embed) { + return; + } + + if (needsReindex) { + const dirs = await this.ide.getWorkspaceDirs(); + await this.refreshCodebaseIndex(dirs); + } + } + } } diff --git a/core/indexing/docs/DocsService.ts b/core/indexing/docs/DocsService.ts index 0ccb4804e01..babcacba75f 100644 --- a/core/indexing/docs/DocsService.ts +++ b/core/indexing/docs/DocsService.ts @@ -65,11 +65,20 @@ export type AddParams = { favicon?: string; }; -const markFailedInGlobalContext = (siteIndexingConfig: SiteIndexingConfig) => { +const markFailedInGlobalContext = ( + siteIndexingConfig: SiteIndexingConfig, + continueConfig: ContinueConfig, +) => { const globalContext = new GlobalContext(); const failedDocs = globalContext.get("failedDocs") ?? []; const newFailedDocs = failedDocs.filter( - (d) => !siteIndexingConfigsAreEqual(siteIndexingConfig, d), + (d) => + !siteIndexingConfigsAreEqual( + siteIndexingConfig, + d, + continueConfig, + continueConfig, + ), ); newFailedDocs.push(siteIndexingConfig); globalContext.update("failedDocs", newFailedDocs); @@ -77,44 +86,70 @@ const markFailedInGlobalContext = (siteIndexingConfig: SiteIndexingConfig) => { const removeFromFailedGlobalContext = ( siteIndexingConfig: SiteIndexingConfig, + continueConfig: ContinueConfig, ) => { const globalContext = new GlobalContext(); const failedDocs = globalContext.get("failedDocs") ?? []; const newFailedDocs = failedDocs.filter( - (d) => !siteIndexingConfigsAreEqual(siteIndexingConfig, d), + (d) => + !siteIndexingConfigsAreEqual( + siteIndexingConfig, + d, + continueConfig, + continueConfig, + ), ); globalContext.update("failedDocs", newFailedDocs); }; -const hasIndexingFailed = (siteIndexingConfig: SiteIndexingConfig) => { +const hasIndexingFailed = ( + siteIndexingConfig: SiteIndexingConfig, + continueConfig: ContinueConfig, +) => { const globalContext = new GlobalContext(); const failedDocs = globalContext.get("failedDocs") ?? []; return failedDocs.find((d) => - siteIndexingConfigsAreEqual(siteIndexingConfig, d), + siteIndexingConfigsAreEqual( + siteIndexingConfig, + d, + continueConfig, + continueConfig, + ), ); }; const siteIndexingConfigsAreEqual = ( - config1: SiteIndexingConfig, - config2: SiteIndexingConfig, + siteConfig1: SiteIndexingConfig, + siteConfig2: SiteIndexingConfig, + contConfig1: ContinueConfig | undefined, + contConfig2: ContinueConfig, ) => { return ( - config1.startUrl === config2.startUrl && - config1.faviconUrl === config2.faviconUrl && - config1.title === config2.title && - config1.maxDepth === config2.maxDepth && - config1.useLocalCrawling === config2.useLocalCrawling + siteConfig1.faviconUrl === siteConfig2.faviconUrl && + siteConfig1.title === siteConfig2.title && + siteIndexingConfigsAreEqualExceptTitleAndFavicon( + siteConfig1, + siteConfig2, + contConfig1, + contConfig2, + ) ); }; const siteIndexingConfigsAreEqualExceptTitleAndFavicon = ( - config1: SiteIndexingConfig, - config2: SiteIndexingConfig, + siteConfig1: SiteIndexingConfig, + siteConfig2: SiteIndexingConfig, + contConfig1: ContinueConfig | undefined, + contConfig2: ContinueConfig, ) => { return ( - config1.startUrl === config2.startUrl && - config1.maxDepth === config2.maxDepth && - config1.useLocalCrawling === config2.useLocalCrawling + siteConfig1.startUrl === siteConfig2.startUrl && + siteConfig1.maxDepth === siteConfig2.maxDepth && + siteConfig1.useLocalCrawling === siteConfig2.useLocalCrawling && + embedModelsAreEqual( + contConfig1?.selectedModelByRole.embed, + contConfig2.selectedModelByRole.embed, + ) ); }; @@ -506,7 +541,7 @@ export default class DocsService { // If not force-reindexing and has failed with same config, don't reattempt if (!forceReindex) { - if (hasIndexingFailed(siteIndexingConfig)) { + if (hasIndexingFailed(siteIndexingConfig, this.config)) { console.log( `Not reattempting to index ${siteIndexingConfig.startUrl}, has already failed with same config`, ); @@ -689,7 +724,7 @@ export default class DocsService { }); // void this.ide.showToast("info", `Failed to index ${startUrl}`); - markFailedInGlobalContext(siteIndexingConfig); + markFailedInGlobalContext(siteIndexingConfig, this.config); return; } @@ -747,7 +782,7 @@ export default class DocsService { void this.ide.showToast("info", `Successfully indexed ${startUrl}`); } - removeFromFailedGlobalContext(siteIndexingConfig); + removeFromFailedGlobalContext(siteIndexingConfig, this.config); } catch (e) { console.error( `Error indexing docs at: ${siteIndexingConfig.startUrl}`, @@ -768,7 +803,7 @@ export default class DocsService { status: "failed", progress: 1, }); - markFailedInGlobalContext(siteIndexingConfig); + markFailedInGlobalContext(siteIndexingConfig, this.config); } finally { this.docsIndexingQueue.delete(startUrl); } @@ -1036,12 +1071,22 @@ export default class DocsService { ); // TODO: Changes to the docs config made while Continue isn't running won't be caught - if (oldConfigDoc && !siteIndexingConfigsAreEqual(oldConfigDoc, doc)) { + if ( + oldConfigDoc && + !siteIndexingConfigsAreEqual( + oldConfigDoc, + doc, + oldConfig, + newConfig, + ) + ) { // When only the title or faviconUrl changed, Update the sqlite metadate instead of reindexing if ( siteIndexingConfigsAreEqualExceptTitleAndFavicon( oldConfigDoc, doc, + oldConfig, + newConfig, ) ) { await this.updateMetadataInSqlite(doc); @@ -1261,7 +1306,12 @@ export default class DocsService { // Handles the case where a user has manually added the doc to config.json // so it already exists in the file const doesEquivalentDocExist = this.config.docs?.some((doc) => - siteIndexingConfigsAreEqual(doc, siteIndexingConfig), + siteIndexingConfigsAreEqual( + doc, + siteIndexingConfig, + this.config, + this.config, + ), ); if (!doesEquivalentDocExist) { @@ -1364,3 +1414,14 @@ export default class DocsService { this.statuses.delete(startUrl); } } + +export function embedModelsAreEqual( + llm1: ILLM | null | undefined, + llm2: ILLM | null | undefined, +): boolean { + return ( + llm1?.underlyingProviderName === llm2?.underlyingProviderName && + llm1?.title === llm2?.title && + llm1?.maxEmbeddingChunkSize === llm2?.maxEmbeddingChunkSize + ); +} From ce294714589da135b0f581416b2b8bcb31524ad6 Mon Sep 17 00:00:00 2001 From: Josh Holmer Date: Fri, 11 Jul 2025 09:24:42 -0400 Subject: [PATCH 2/3] refactor: add helper function to clarify intention --- core/indexing/docs/DocsService.ts | 44 +++++++++++++------------------ 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/core/indexing/docs/DocsService.ts b/core/indexing/docs/DocsService.ts index babcacba75f..5216dc5cbab 100644 --- a/core/indexing/docs/DocsService.ts +++ b/core/indexing/docs/DocsService.ts @@ -72,13 +72,7 @@ const markFailedInGlobalContext = ( const globalContext = new GlobalContext(); const failedDocs = globalContext.get("failedDocs") ?? []; const newFailedDocs = failedDocs.filter( - (d) => - !siteIndexingConfigsAreEqual( - siteIndexingConfig, - d, - continueConfig, - continueConfig, - ), + (d) => !docConfigsAreEqual(siteIndexingConfig, d, continueConfig), ); newFailedDocs.push(siteIndexingConfig); globalContext.update("failedDocs", newFailedDocs); @@ -91,13 +85,7 @@ const removeFromFailedGlobalContext = ( const globalContext = new GlobalContext(); const failedDocs = globalContext.get("failedDocs") ?? []; const newFailedDocs = failedDocs.filter( - (d) => - !siteIndexingConfigsAreEqual( - siteIndexingConfig, - d, - continueConfig, - continueConfig, - ), + (d) => !docConfigsAreEqual(siteIndexingConfig, d, continueConfig), ); globalContext.update("failedDocs", newFailedDocs); }; @@ -109,12 +97,21 @@ const hasIndexingFailed = ( const globalContext = new GlobalContext(); const failedDocs = globalContext.get("failedDocs") ?? []; return failedDocs.find((d) => - siteIndexingConfigsAreEqual( - siteIndexingConfig, - d, - continueConfig, - continueConfig, - ), + docConfigsAreEqual(siteIndexingConfig, d, continueConfig), + ); +}; + +const docConfigsAreEqual = ( + siteConfig1: SiteIndexingConfig, + siteConfig2: SiteIndexingConfig, + contConfig: ContinueConfig, +) => { + return siteIndexingConfigsAreEqual( + siteConfig1, + siteConfig2, + // These are equal because this function only checks the doc configs for changes + contConfig, + contConfig, ); }; @@ -1306,12 +1303,7 @@ export default class DocsService { // Handles the case where a user has manually added the doc to config.json // so it already exists in the file const doesEquivalentDocExist = this.config.docs?.some((doc) => - siteIndexingConfigsAreEqual( - doc, - siteIndexingConfig, - this.config, - this.config, - ), + docConfigsAreEqual(doc, siteIndexingConfig, this.config), ); if (!doesEquivalentDocExist) { From 634aafb08fc51a52cb7d8410eec89e9925e73e8d Mon Sep 17 00:00:00 2001 From: Josh Holmer Date: Mon, 14 Jul 2025 10:26:31 -0400 Subject: [PATCH 3/3] refactor: reorganize equality checks to avoid unnecessary params --- core/indexing/docs/DocsService.ts | 88 ++++++++++++++----------------- 1 file changed, 41 insertions(+), 47 deletions(-) diff --git a/core/indexing/docs/DocsService.ts b/core/indexing/docs/DocsService.ts index 5216dc5cbab..6fed928428e 100644 --- a/core/indexing/docs/DocsService.ts +++ b/core/indexing/docs/DocsService.ts @@ -65,14 +65,11 @@ export type AddParams = { favicon?: string; }; -const markFailedInGlobalContext = ( - siteIndexingConfig: SiteIndexingConfig, - continueConfig: ContinueConfig, -) => { +const markFailedInGlobalContext = (siteIndexingConfig: SiteIndexingConfig) => { const globalContext = new GlobalContext(); const failedDocs = globalContext.get("failedDocs") ?? []; const newFailedDocs = failedDocs.filter( - (d) => !docConfigsAreEqual(siteIndexingConfig, d, continueConfig), + (d) => !docConfigsAreEqual(siteIndexingConfig, d), ); newFailedDocs.push(siteIndexingConfig); globalContext.update("failedDocs", newFailedDocs); @@ -80,38 +77,51 @@ const markFailedInGlobalContext = ( const removeFromFailedGlobalContext = ( siteIndexingConfig: SiteIndexingConfig, - continueConfig: ContinueConfig, ) => { const globalContext = new GlobalContext(); const failedDocs = globalContext.get("failedDocs") ?? []; const newFailedDocs = failedDocs.filter( - (d) => !docConfigsAreEqual(siteIndexingConfig, d, continueConfig), + (d) => !docConfigsAreEqual(siteIndexingConfig, d), ); globalContext.update("failedDocs", newFailedDocs); }; -const hasIndexingFailed = ( - siteIndexingConfig: SiteIndexingConfig, - continueConfig: ContinueConfig, -) => { +const hasIndexingFailed = (siteIndexingConfig: SiteIndexingConfig) => { const globalContext = new GlobalContext(); const failedDocs = globalContext.get("failedDocs") ?? []; - return failedDocs.find((d) => - docConfigsAreEqual(siteIndexingConfig, d, continueConfig), - ); + return failedDocs.find((d) => docConfigsAreEqual(siteIndexingConfig, d)); }; +export function embedModelsAreEqual( + llm1: ILLM | null | undefined, + llm2: ILLM | null | undefined, +): boolean { + return ( + llm1?.underlyingProviderName === llm2?.underlyingProviderName && + llm1?.title === llm2?.title && + llm1?.maxEmbeddingChunkSize === llm2?.maxEmbeddingChunkSize + ); +} + const docConfigsAreEqual = ( siteConfig1: SiteIndexingConfig, siteConfig2: SiteIndexingConfig, - contConfig: ContinueConfig, ) => { - return siteIndexingConfigsAreEqual( - siteConfig1, - siteConfig2, - // These are equal because this function only checks the doc configs for changes - contConfig, - contConfig, + return ( + siteConfig1.faviconUrl === siteConfig2.faviconUrl && + siteConfig1.title === siteConfig2.title && + docConfigsAreEqualExceptTitleAndFavicon(siteConfig1, siteConfig2) + ); +}; + +const docConfigsAreEqualExceptTitleAndFavicon = ( + siteConfig1: SiteIndexingConfig, + siteConfig2: SiteIndexingConfig, +) => { + return ( + siteConfig1.startUrl === siteConfig2.startUrl && + siteConfig1.maxDepth === siteConfig2.maxDepth && + siteConfig1.useLocalCrawling === siteConfig2.useLocalCrawling ); }; @@ -122,13 +132,10 @@ const siteIndexingConfigsAreEqual = ( contConfig2: ContinueConfig, ) => { return ( - siteConfig1.faviconUrl === siteConfig2.faviconUrl && - siteConfig1.title === siteConfig2.title && - siteIndexingConfigsAreEqualExceptTitleAndFavicon( - siteConfig1, - siteConfig2, - contConfig1, - contConfig2, + docConfigsAreEqual(siteConfig1, siteConfig2) && + embedModelsAreEqual( + contConfig1?.selectedModelByRole.embed, + contConfig2.selectedModelByRole.embed, ) ); }; @@ -140,9 +147,7 @@ const siteIndexingConfigsAreEqualExceptTitleAndFavicon = ( contConfig2: ContinueConfig, ) => { return ( - siteConfig1.startUrl === siteConfig2.startUrl && - siteConfig1.maxDepth === siteConfig2.maxDepth && - siteConfig1.useLocalCrawling === siteConfig2.useLocalCrawling && + docConfigsAreEqualExceptTitleAndFavicon(siteConfig1, siteConfig2) && embedModelsAreEqual( contConfig1?.selectedModelByRole.embed, contConfig2.selectedModelByRole.embed, @@ -538,7 +543,7 @@ export default class DocsService { // If not force-reindexing and has failed with same config, don't reattempt if (!forceReindex) { - if (hasIndexingFailed(siteIndexingConfig, this.config)) { + if (hasIndexingFailed(siteIndexingConfig)) { console.log( `Not reattempting to index ${siteIndexingConfig.startUrl}, has already failed with same config`, ); @@ -721,7 +726,7 @@ export default class DocsService { }); // void this.ide.showToast("info", `Failed to index ${startUrl}`); - markFailedInGlobalContext(siteIndexingConfig, this.config); + markFailedInGlobalContext(siteIndexingConfig); return; } @@ -779,7 +784,7 @@ export default class DocsService { void this.ide.showToast("info", `Successfully indexed ${startUrl}`); } - removeFromFailedGlobalContext(siteIndexingConfig, this.config); + removeFromFailedGlobalContext(siteIndexingConfig); } catch (e) { console.error( `Error indexing docs at: ${siteIndexingConfig.startUrl}`, @@ -800,7 +805,7 @@ export default class DocsService { status: "failed", progress: 1, }); - markFailedInGlobalContext(siteIndexingConfig, this.config); + markFailedInGlobalContext(siteIndexingConfig); } finally { this.docsIndexingQueue.delete(startUrl); } @@ -1303,7 +1308,7 @@ export default class DocsService { // Handles the case where a user has manually added the doc to config.json // so it already exists in the file const doesEquivalentDocExist = this.config.docs?.some((doc) => - docConfigsAreEqual(doc, siteIndexingConfig, this.config), + docConfigsAreEqual(doc, siteIndexingConfig), ); if (!doesEquivalentDocExist) { @@ -1406,14 +1411,3 @@ export default class DocsService { this.statuses.delete(startUrl); } } - -export function embedModelsAreEqual( - llm1: ILLM | null | undefined, - llm2: ILLM | null | undefined, -): boolean { - return ( - llm1?.underlyingProviderName === llm2?.underlyingProviderName && - llm1?.title === llm2?.title && - llm1?.maxEmbeddingChunkSize === llm2?.maxEmbeddingChunkSize - ); -}