@@ -17,7 +17,29 @@ use chroma_sqlite::db::SqliteDb;
17
17
use chroma_sysdb:: { GetCollectionsOptions , SysDb } ;
18
18
use chroma_system:: System ;
19
19
use chroma_types:: {
20
- operator:: { Filter , KnnBatch , KnnProjection , Limit , Projection , Scan } , plan:: { Count , Get , Knn , Search } , AddCollectionRecordsError , AddCollectionRecordsRequest , AddCollectionRecordsResponse , Collection , CollectionAndSegments , CollectionUuid , CountCollectionsError , CountCollectionsRequest , CountCollectionsResponse , CountRequest , CountResponse , CreateCollectionError , CreateCollectionRequest , CreateCollectionResponse , CreateDatabaseError , CreateDatabaseRequest , CreateDatabaseResponse , CreateTenantError , CreateTenantRequest , CreateTenantResponse , DeleteCollectionError , DeleteCollectionRecordsError , DeleteCollectionRecordsRequest , DeleteCollectionRecordsResponse , DeleteCollectionRequest , DeleteDatabaseError , DeleteDatabaseRequest , DeleteDatabaseResponse , ForkCollectionError , ForkCollectionRequest , ForkCollectionResponse , GetCollectionByCrnError , GetCollectionByCrnRequest , GetCollectionByCrnResponse , GetCollectionError , GetCollectionRequest , GetCollectionResponse , GetCollectionsError , GetDatabaseError , GetDatabaseRequest , GetDatabaseResponse , GetRequest , GetResponse , GetTenantError , GetTenantRequest , GetTenantResponse , HealthCheckResponse , HeartbeatError , HeartbeatResponse , Include , KnnIndex , ListCollectionsRequest , ListCollectionsResponse , ListDatabasesError , ListDatabasesRequest , ListDatabasesResponse , Operation , OperationRecord , QueryError , QueryRequest , QueryResponse , ResetError , ResetResponse , SearchRequest , SearchResponse , Segment , SegmentScope , SegmentType , SegmentUuid , UpdateCollectionError , UpdateCollectionRecordsError , UpdateCollectionRecordsRequest , UpdateCollectionRecordsResponse , UpdateCollectionRequest , UpdateCollectionResponse , UpdateTenantError , UpdateTenantRequest , UpdateTenantResponse , UpsertCollectionRecordsError , UpsertCollectionRecordsRequest , UpsertCollectionRecordsResponse , VectorIndexConfiguration , Where
20
+ operator:: { Filter , KnnBatch , KnnProjection , Limit , Projection , Scan } ,
21
+ plan:: { Count , Get , Knn , Search } ,
22
+ AddCollectionRecordsError , AddCollectionRecordsRequest , AddCollectionRecordsResponse ,
23
+ Collection , CollectionAndSegments , CollectionUuid , CountCollectionsError ,
24
+ CountCollectionsRequest , CountCollectionsResponse , CountRequest , CountResponse ,
25
+ CreateCollectionError , CreateCollectionRequest , CreateCollectionResponse , CreateDatabaseError ,
26
+ CreateDatabaseRequest , CreateDatabaseResponse , CreateTenantError , CreateTenantRequest ,
27
+ CreateTenantResponse , DeleteCollectionError , DeleteCollectionRecordsError ,
28
+ DeleteCollectionRecordsRequest , DeleteCollectionRecordsResponse , DeleteCollectionRequest ,
29
+ DeleteDatabaseError , DeleteDatabaseRequest , DeleteDatabaseResponse , ForkCollectionError ,
30
+ ForkCollectionRequest , ForkCollectionResponse , GetCollectionByCrnError ,
31
+ GetCollectionByCrnRequest , GetCollectionByCrnResponse , GetCollectionError ,
32
+ GetCollectionRequest , GetCollectionResponse , GetCollectionsError , GetDatabaseError ,
33
+ GetDatabaseRequest , GetDatabaseResponse , GetRequest , GetResponse , GetTenantError ,
34
+ GetTenantRequest , GetTenantResponse , HealthCheckResponse , HeartbeatError , HeartbeatResponse ,
35
+ Include , KnnIndex , ListCollectionsRequest , ListCollectionsResponse , ListDatabasesError ,
36
+ ListDatabasesRequest , ListDatabasesResponse , Operation , OperationRecord , QueryError ,
37
+ QueryRequest , QueryResponse , ResetError , ResetResponse , SearchRequest , SearchResponse , Segment ,
38
+ SegmentScope , SegmentType , SegmentUuid , UpdateCollectionError , UpdateCollectionRecordsError ,
39
+ UpdateCollectionRecordsRequest , UpdateCollectionRecordsResponse , UpdateCollectionRequest ,
40
+ UpdateCollectionResponse , UpdateTenantError , UpdateTenantRequest , UpdateTenantResponse ,
41
+ UpsertCollectionRecordsError , UpsertCollectionRecordsRequest , UpsertCollectionRecordsResponse ,
42
+ VectorIndexConfiguration , Where ,
21
43
} ;
22
44
use opentelemetry:: global;
23
45
use opentelemetry:: metrics:: Counter ;
@@ -53,7 +75,6 @@ struct Metrics {
53
75
create_tenant_retries_counter : Counter < u64 > ,
54
76
update_tenant_retries_counter : Counter < u64 > ,
55
77
get_collection_with_segments_counter : Counter < u64 > ,
56
- search_retries_counter : Counter < u64 > ,
57
78
metering_fork_counter : Counter < u64 > ,
58
79
metering_read_counter : Counter < u64 > ,
59
80
metering_write_counter : Counter < u64 > ,
@@ -95,7 +116,6 @@ impl ServiceBasedFrontend {
95
116
let add_retries_counter = meter. u64_counter ( "add_retries" ) . build ( ) ;
96
117
let update_retries_counter = meter. u64_counter ( "update_retries" ) . build ( ) ;
97
118
let upsert_retries_counter = meter. u64_counter ( "upsert_retries" ) . build ( ) ;
98
- let search_retries_counter = meter. u64_counter ( "search_retries" ) . build ( ) ;
99
119
let metering_fork_counter = meter. u64_counter ( "metering_events_sent.fork" ) . with_description ( "The number of fork metering events sent by the frontend to the metering event receiver." ) . build ( ) ;
100
120
let metering_read_counter = meter. u64_counter ( "metering_events_sent.read" ) . with_description ( "The number of read metering events sent by the frontend to the metering event receiver." ) . build ( ) ;
101
121
let metering_write_counter = meter. u64_counter ( "metering_events_sent.write" ) . with_description ( "The number of write metering events sent by the frontend to the metering event receiver." ) . build ( ) ;
@@ -144,7 +164,6 @@ impl ServiceBasedFrontend {
144
164
create_db_retries_counter,
145
165
delete_db_retries_counter,
146
166
delete_collection_retries_counter,
147
- search_retries_counter,
148
167
metering_fork_counter,
149
168
metering_read_counter,
150
169
metering_write_counter,
@@ -1578,7 +1597,9 @@ impl ServiceBasedFrontend {
1578
1597
} ;
1579
1598
1580
1599
if let Some ( event) = read_event {
1581
- event. submit ( ) . await ;
1600
+ if let Ok ( ( ) ) = event. submit ( ) . await {
1601
+ self . metrics . metering_read_counter . add ( 1 , & [ ] ) ;
1602
+ }
1582
1603
}
1583
1604
1584
1605
Ok ( records)
@@ -1997,10 +2018,8 @@ impl ServiceBasedFrontend {
1997
2018
// TODO: The dispatch logic is mostly the same for count/get/query/search, we should consider unifying them
1998
2019
// Get collection and segments once for all queries
1999
2020
let collection_and_segments = self
2000
- . collections_with_segments_provider
2001
- . get_collection_with_segments ( request. collection_id )
2002
- . await
2003
- . map_err ( |err| QueryError :: Other ( Box :: new ( err) as Box < dyn ChromaError > ) ) ?;
2021
+ . retryable_get_collection_with_segments ( request. collection_id )
2022
+ . await ?;
2004
2023
2005
2024
let latest_collection_logical_size_bytes = collection_and_segments
2006
2025
. collection
@@ -2032,8 +2051,36 @@ impl ServiceBasedFrontend {
2032
2051
payloads : request. searches ,
2033
2052
} ;
2034
2053
2054
+ let collection_id = search_plan
2055
+ . scan
2056
+ . collection_and_segments
2057
+ . collection
2058
+ . collection_id ;
2059
+
2035
2060
// Execute the single search plan using the executor
2036
- let result = self . executor . search ( search_plan) . await ?;
2061
+ let result = self
2062
+ . executor
2063
+ . search ( search_plan. clone ( ) , |code : tonic:: Code | {
2064
+ let mut provider = self . collections_with_segments_provider . clone ( ) ;
2065
+ let mut search_replanned = search_plan. clone ( ) ;
2066
+ async move {
2067
+ if code == tonic:: Code :: NotFound {
2068
+ provider
2069
+ . collections_with_segments_cache
2070
+ . remove ( & collection_id)
2071
+ . await ;
2072
+ let collection_and_segments = provider
2073
+ . get_collection_with_segments ( collection_id)
2074
+ . await
2075
+ . map_err ( |err| Box :: new ( err) as Box < dyn ChromaError > ) ?;
2076
+ search_replanned. scan = Scan {
2077
+ collection_and_segments,
2078
+ } ;
2079
+ }
2080
+ Ok ( search_replanned)
2081
+ }
2082
+ } )
2083
+ . await ?;
2037
2084
2038
2085
// Calculate return bytes (approximate size of the response)
2039
2086
let return_bytes = result. size_bytes ( ) ;
@@ -2085,50 +2132,7 @@ impl ServiceBasedFrontend {
2085
2132
}
2086
2133
2087
2134
pub async fn search ( & mut self , request : SearchRequest ) -> Result < SearchResponse , QueryError > {
2088
- // TODO: The retry logic is mostly the same for count/get/query/search, we should consider unifying them
2089
- let retries = Arc :: new ( AtomicUsize :: new ( 0 ) ) ;
2090
- let search_to_retry = || {
2091
- let mut self_clone = self . clone ( ) ;
2092
- let request_clone = request. clone ( ) ;
2093
- let cache_clone = self
2094
- . collections_with_segments_provider
2095
- . collections_with_segments_cache
2096
- . clone ( ) ;
2097
- async move {
2098
- let res = self_clone. retryable_search ( request_clone) . await ;
2099
- match res {
2100
- Ok ( res) => Ok ( res) ,
2101
- Err ( e) => {
2102
- if e. code ( ) == ErrorCodes :: NotFound {
2103
- tracing:: info!(
2104
- "Invalidating cache for collection {}" ,
2105
- request. collection_id
2106
- ) ;
2107
- cache_clone. remove ( & request. collection_id ) . await ;
2108
- }
2109
- Err ( e)
2110
- }
2111
- }
2112
- }
2113
- } ;
2114
- let res = search_to_retry
2115
- . retry ( self . collections_with_segments_provider . get_retry_backoff ( ) )
2116
- // NOTE: Transport level errors will manifest as unknown errors, and they should also be retried
2117
- . when ( |e| matches ! ( e. code( ) , ErrorCodes :: NotFound | ErrorCodes :: Unknown ) )
2118
- . notify ( |_, _| {
2119
- let retried = retries. fetch_add ( 1 , Ordering :: Relaxed ) ;
2120
- if retried > 0 {
2121
- tracing:: info!(
2122
- "Retrying search() request for collection {}" ,
2123
- request. collection_id
2124
- ) ;
2125
- }
2126
- } )
2127
- . await ;
2128
- self . metrics
2129
- . search_retries_counter
2130
- . add ( retries. load ( Ordering :: Relaxed ) as u64 , & [ ] ) ;
2131
- res
2135
+ self . retryable_search ( request) . await
2132
2136
}
2133
2137
2134
2138
pub async fn healthcheck ( & self ) -> HealthCheckResponse {
0 commit comments