@@ -75,7 +75,6 @@ struct Metrics {
75
75
create_tenant_retries_counter : Counter < u64 > ,
76
76
update_tenant_retries_counter : Counter < u64 > ,
77
77
get_collection_with_segments_counter : Counter < u64 > ,
78
- search_retries_counter : Counter < u64 > ,
79
78
metering_fork_counter : Counter < u64 > ,
80
79
metering_read_counter : Counter < u64 > ,
81
80
metering_write_counter : Counter < u64 > ,
@@ -113,7 +112,6 @@ impl ServiceBasedFrontend {
113
112
let add_retries_counter = meter. u64_counter ( "add_retries" ) . build ( ) ;
114
113
let update_retries_counter = meter. u64_counter ( "update_retries" ) . build ( ) ;
115
114
let upsert_retries_counter = meter. u64_counter ( "upsert_retries" ) . build ( ) ;
116
- let search_retries_counter = meter. u64_counter ( "search_retries" ) . build ( ) ;
117
115
let metering_fork_counter = meter. u64_counter ( "metering_events_sent.fork" ) . with_description ( "The number of fork metering events sent by the frontend to the metering event receiver." ) . build ( ) ;
118
116
let metering_read_counter = meter. u64_counter ( "metering_events_sent.read" ) . with_description ( "The number of read metering events sent by the frontend to the metering event receiver." ) . build ( ) ;
119
117
let metering_write_counter = meter. u64_counter ( "metering_events_sent.write" ) . with_description ( "The number of write metering events sent by the frontend to the metering event receiver." ) . build ( ) ;
@@ -162,7 +160,6 @@ impl ServiceBasedFrontend {
162
160
create_db_retries_counter,
163
161
delete_db_retries_counter,
164
162
delete_collection_retries_counter,
165
- search_retries_counter,
166
163
metering_fork_counter,
167
164
metering_read_counter,
168
165
metering_write_counter,
@@ -1577,7 +1574,9 @@ impl ServiceBasedFrontend {
1577
1574
} ;
1578
1575
1579
1576
if let Some ( event) = read_event {
1580
- event. submit ( ) . await ;
1577
+ if let Ok ( ( ) ) = event. submit ( ) . await {
1578
+ self . metrics . metering_read_counter . add ( 1 , & [ ] ) ;
1579
+ }
1581
1580
}
1582
1581
1583
1582
Ok ( records)
@@ -1996,10 +1995,8 @@ impl ServiceBasedFrontend {
1996
1995
// TODO: The dispatch logic is mostly the same for count/get/query/search, we should consider unifying them
1997
1996
// Get collection and segments once for all queries
1998
1997
let collection_and_segments = self
1999
- . collections_with_segments_provider
2000
- . get_collection_with_segments ( request. collection_id )
2001
- . await
2002
- . map_err ( |err| QueryError :: Other ( Box :: new ( err) as Box < dyn ChromaError > ) ) ?;
1998
+ . retryable_get_collection_with_segments ( request. collection_id )
1999
+ . await ?;
2003
2000
2004
2001
let latest_collection_logical_size_bytes = collection_and_segments
2005
2002
. collection
@@ -2032,8 +2029,36 @@ impl ServiceBasedFrontend {
2032
2029
payloads : request. searches ,
2033
2030
} ;
2034
2031
2032
+ let collection_id = search_plan
2033
+ . scan
2034
+ . collection_and_segments
2035
+ . collection
2036
+ . collection_id ;
2037
+
2035
2038
// Execute the single search plan using the executor
2036
- let result = self . executor . search ( search_plan) . await ?;
2039
+ let result = self
2040
+ . executor
2041
+ . search ( search_plan. clone ( ) , |code : tonic:: Code | {
2042
+ let mut provider = self . collections_with_segments_provider . clone ( ) ;
2043
+ let mut search_replanned = search_plan. clone ( ) ;
2044
+ async move {
2045
+ if code == tonic:: Code :: NotFound {
2046
+ provider
2047
+ . collections_with_segments_cache
2048
+ . remove ( & collection_id)
2049
+ . await ;
2050
+ let collection_and_segments = provider
2051
+ . get_collection_with_segments ( collection_id)
2052
+ . await
2053
+ . map_err ( |err| Box :: new ( err) as Box < dyn ChromaError > ) ?;
2054
+ search_replanned. scan = Scan {
2055
+ collection_and_segments,
2056
+ } ;
2057
+ }
2058
+ Ok ( search_replanned)
2059
+ }
2060
+ } )
2061
+ . await ?;
2037
2062
2038
2063
// Calculate return bytes (approximate size of the response)
2039
2064
let return_bytes = result. size_bytes ( ) ;
@@ -2079,50 +2104,7 @@ impl ServiceBasedFrontend {
2079
2104
}
2080
2105
2081
2106
pub async fn search ( & mut self , request : SearchRequest ) -> Result < SearchResponse , QueryError > {
2082
- // TODO: The retry logic is mostly the same for count/get/query/search, we should consider unifying them
2083
- let retries = Arc :: new ( AtomicUsize :: new ( 0 ) ) ;
2084
- let search_to_retry = || {
2085
- let mut self_clone = self . clone ( ) ;
2086
- let request_clone = request. clone ( ) ;
2087
- let cache_clone = self
2088
- . collections_with_segments_provider
2089
- . collections_with_segments_cache
2090
- . clone ( ) ;
2091
- async move {
2092
- let res = self_clone. retryable_search ( request_clone) . await ;
2093
- match res {
2094
- Ok ( res) => Ok ( res) ,
2095
- Err ( e) => {
2096
- if e. code ( ) == ErrorCodes :: NotFound {
2097
- tracing:: info!(
2098
- "Invalidating cache for collection {}" ,
2099
- request. collection_id
2100
- ) ;
2101
- cache_clone. remove ( & request. collection_id ) . await ;
2102
- }
2103
- Err ( e)
2104
- }
2105
- }
2106
- }
2107
- } ;
2108
- let res = search_to_retry
2109
- . retry ( self . collections_with_segments_provider . get_retry_backoff ( ) )
2110
- // NOTE: Transport level errors will manifest as unknown errors, and they should also be retried
2111
- . when ( |e| matches ! ( e. code( ) , ErrorCodes :: NotFound | ErrorCodes :: Unknown ) )
2112
- . notify ( |_, _| {
2113
- let retried = retries. fetch_add ( 1 , Ordering :: Relaxed ) ;
2114
- if retried > 0 {
2115
- tracing:: info!(
2116
- "Retrying search() request for collection {}" ,
2117
- request. collection_id
2118
- ) ;
2119
- }
2120
- } )
2121
- . await ;
2122
- self . metrics
2123
- . search_retries_counter
2124
- . add ( retries. load ( Ordering :: Relaxed ) as u64 , & [ ] ) ;
2125
- res
2107
+ self . retryable_search ( request) . await
2126
2108
}
2127
2109
2128
2110
pub async fn healthcheck ( & self ) -> HealthCheckResponse {
0 commit comments