Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions .changeset/enable-nested-auto-index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
---
"@tanstack/db": patch
---

Enable auto-indexing for nested field paths

Previously, auto-indexes were only created for top-level fields. Queries filtering on nested fields like `vehicleDispatch.date` or `profile.score` were forced to perform full table scans, causing significant performance issues.

Now, auto-indexes are automatically created for nested field paths of any depth when using `eq()`, `gt()`, `gte()`, `lt()`, `lte()`, or `in()` operations.

**Performance Impact:**

Before this fix, filtering on nested fields resulted in expensive full scans:

- Query time: ~353ms for 39 executions (from issue #727)
- "graph run" and "d2ts join" operations dominated execution time

After this fix, nested field queries use indexes:

- Query time: Sub-millisecond (typical indexed lookup)
- Proper index utilization verified through query optimizer

**Example:**

```typescript
const collection = createCollection({
getKey: (item) => item.id,
autoIndex: "eager", // default
// ... sync config
})

// These now automatically create and use indexes:
collection.subscribeChanges((items) => console.log(items), {
whereExpression: eq(row.vehicleDispatch?.date, "2024-01-01"),
})

collection.subscribeChanges((items) => console.log(items), {
whereExpression: gt(row.profile?.stats.rating, 4.5),
})
```

**Index Naming:**

Auto-indexes for nested paths use the format `auto:field.path` to avoid naming conflicts:

- `auto:status` for top-level field `status`
- `auto:profile.score` for nested field `profile.score`
- `auto:metadata.stats.views` for deeply nested field `metadata.stats.views`

Fixes #727
33 changes: 23 additions & 10 deletions packages/db/src/indexes/auto-index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,25 @@ export function ensureIndexForField<

// Create a new index for this field using the collection's createIndex method
try {
collection.createIndex((row) => (row as any)[fieldName], {
name: `auto_${fieldName}`,
indexType: BTreeIndex,
options: compareFn ? { compareFn, compareOptions } : {},
})
// Use the proxy-based approach to create the proper accessor for nested paths
collection.createIndex(
(row) => {
// Navigate through the field path
let current: any = row
for (const part of fieldPath) {
current = current[part]
}
return current
},
{
name: `auto:${fieldPath.join(`.`)}`,
indexType: BTreeIndex,
options: compareFn ? { compareFn, compareOptions } : {},
}
)
} catch (error) {
console.warn(
`${collection.id ? `[${collection.id}] ` : ``}Failed to create auto-index for field "${fieldName}":`,
`${collection.id ? `[${collection.id}] ` : ``}Failed to create auto-index for field path "${fieldPath.join(`.`)}":`,
error
)
}
Expand Down Expand Up @@ -108,20 +119,22 @@ function extractIndexableExpressions(
return
}

// Check if the first argument is a property reference (single field)
// Check if the first argument is a property reference
if (func.args.length < 1 || func.args[0].type !== `ref`) {
return
}

const fieldRef = func.args[0]
const fieldPath = fieldRef.path

// Skip if it's not a simple field (e.g., nested properties or array access)
if (fieldPath.length !== 1) {
// Skip if the path is empty
if (fieldPath.length === 0) {
return
}

const fieldName = fieldPath[0]
// For nested paths, use the full path joined with underscores as the field name
// For simple paths, use the first (and only) element
const fieldName = fieldPath.join(`_`)
results.push({ fieldName, fieldPath })
}

Expand Down
144 changes: 144 additions & 0 deletions packages/db/tests/collection-auto-index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -750,4 +750,148 @@ describe(`Collection Auto-Indexing`, () => {

subscription.unsubscribe()
})

it(`should create auto-indexes for nested field paths`, async () => {
interface NestedTestItem {
id: string
name: string
profile?: {
score: number
bio: string
}
metadata?: {
tags: Array<string>
stats: {
views: number
likes: number
}
}
}

const nestedTestData: Array<NestedTestItem> = [
{
id: `1`,
name: `Alice`,
profile: { score: 85, bio: `Developer` },
metadata: {
tags: [`tech`, `coding`],
stats: { views: 100, likes: 50 },
},
},
{
id: `2`,
name: `Bob`,
profile: { score: 92, bio: `Designer` },
metadata: {
tags: [`design`, `ui`],
stats: { views: 200, likes: 75 },
},
},
{
id: `3`,
name: `Charlie`,
profile: { score: 78, bio: `Manager` },
metadata: {
tags: [`management`, `leadership`],
stats: { views: 150, likes: 60 },
},
},
]

const collection = createCollection<NestedTestItem, string>({
getKey: (item) => item.id,
autoIndex: `eager`,
startSync: true,
sync: {
sync: ({ begin, write, commit, markReady }) => {
begin()
for (const item of nestedTestData) {
write({
type: `insert`,
value: item,
})
}
commit()
markReady()
},
},
})

await collection.stateWhenReady()

// Should have no indexes initially
expect(collection.indexes.size).toBe(0)

// Test 1: Nested field one level deep (profile.score)
const changes1: Array<any> = []
const subscription1 = collection.subscribeChanges(
(items) => {
changes1.push(...items)
},
{
includeInitialState: true,
whereExpression: gt(new PropRef([`profile`, `score`]), 80),
}
)

// Should have created an auto-index for profile.score
const profileScoreIndex = Array.from(collection.indexes.values()).find(
(index) =>
index.expression.type === `ref` &&
(index.expression as any).path.length === 2 &&
(index.expression as any).path[0] === `profile` &&
(index.expression as any).path[1] === `score`
)
expect(profileScoreIndex).toBeDefined()

// Verify the filtered results are correct
expect(changes1.filter((c) => c.type === `insert`).length).toBe(2) // Alice (85) and Bob (92)

subscription1.unsubscribe()

// Test 2: Deeply nested field (metadata.stats.views)
const changes2: Array<any> = []
const subscription2 = collection.subscribeChanges(
(items) => {
changes2.push(...items)
},
{
includeInitialState: true,
whereExpression: eq(new PropRef([`metadata`, `stats`, `views`]), 200),
}
)

// Should have created an auto-index for metadata.stats.views
const viewsIndex = Array.from(collection.indexes.values()).find(
(index) =>
index.expression.type === `ref` &&
(index.expression as any).path.length === 3 &&
(index.expression as any).path[0] === `metadata` &&
(index.expression as any).path[1] === `stats` &&
(index.expression as any).path[2] === `views`
)
expect(viewsIndex).toBeDefined()

// Verify the filtered results are correct
expect(changes2.filter((c) => c.type === `insert`).length).toBe(1) // Only Bob has 200 views

subscription2.unsubscribe()

// Test 3: Index usage verification with tracker
withIndexTracking(collection, (tracker) => {
const result = collection.currentStateAsChanges({
where: gt(new PropRef([`profile`, `score`]), 80),
})!

expect(result.length).toBe(2) // Alice and Bob

// Verify it used the auto-created index
expectIndexUsage(tracker.stats, {
shouldUseIndex: true,
shouldUseFullScan: false,
indexCallCount: 1,
fullScanCallCount: 0,
})
})
})
})
Loading