Skip to content

Commit d682af8

Browse files
committed
Auto merge of #147918 - yotamofek:pr/stringdex-fork, r=GuillaumeGomez
Upgrade `stringdex` to 0.0.3 Includes a bunch of optimizations for a nice perf win
2 parents 5dbf406 + e921e28 commit d682af8

File tree

3 files changed

+53
-44
lines changed

3 files changed

+53
-44
lines changed

Cargo.lock

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1289,7 +1289,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
12891289
checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad"
12901290
dependencies = [
12911291
"libc",
1292-
"windows-sys 0.52.0",
1292+
"windows-sys 0.60.2",
12931293
]
12941294

12951295
[[package]]
@@ -2155,7 +2155,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
21552155
checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667"
21562156
dependencies = [
21572157
"cfg-if",
2158-
"windows-targets 0.52.6",
2158+
"windows-targets 0.53.3",
21592159
]
21602160

21612161
[[package]]
@@ -4907,7 +4907,7 @@ dependencies = [
49074907
"errno",
49084908
"libc",
49094909
"linux-raw-sys",
4910-
"windows-sys 0.52.0",
4910+
"windows-sys 0.61.2",
49114911
]
49124912

49134913
[[package]]
@@ -5275,9 +5275,9 @@ dependencies = [
52755275

52765276
[[package]]
52775277
name = "stringdex"
5278-
version = "0.0.2"
5278+
version = "0.0.3"
52795279
source = "registry+https://github.com/rust-lang/crates.io-index"
5280-
checksum = "18b3bd4f10d15ef859c40291769f0d85209de6b0f1c30713ff9cdf45ac43ea36"
5280+
checksum = "556a6126952cb2f5150057c98a77cc6c771027dea2825bf7fa03d3d638b0a4f8"
52815281
dependencies = [
52825282
"stacker",
52835283
]

src/librustdoc/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ rustdoc-json-types = { path = "../rustdoc-json-types" }
2121
serde = { version = "1.0", features = ["derive"] }
2222
serde_json = "1.0"
2323
smallvec = "1.8.1"
24-
stringdex = "=0.0.2"
24+
stringdex = "=0.0.3"
2525
tempfile = "3"
2626
threadpool = "1.8.1"
2727
tikv-jemalloc-sys = { version = "0.6.1", optional = true, features = ['override_allocator_on_supported_platforms'] }

src/librustdoc/html/render/search_index.rs

Lines changed: 47 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@ mod serde;
33

44
use std::collections::BTreeSet;
55
use std::collections::hash_map::Entry;
6+
use std::io;
67
use std::path::Path;
8+
use std::string::FromUtf8Error;
79

810
use ::serde::de::{self, Deserializer, Error as _};
911
use ::serde::ser::{SerializeSeq, Serializer};
@@ -95,21 +97,22 @@ impl SerializedSearchIndex {
9597
) -> Result<(), Error> {
9698
let root_path = doc_root.join(format!("search.index/root{resource_suffix}.js"));
9799
let column_path = doc_root.join(format!("search.index/{column_name}/"));
100+
101+
let mut consume = |_, cell: &[u8]| {
102+
column.push(String::from_utf8(cell.to_vec())?);
103+
Ok::<_, FromUtf8Error>(())
104+
};
105+
98106
stringdex_internals::read_data_from_disk_column(
99107
root_path,
100108
column_name.as_bytes(),
101109
column_path.clone(),
102-
&mut |_id, item| {
103-
column.push(String::from_utf8(item.to_vec())?);
104-
Ok(())
105-
},
106-
)
107-
.map_err(
108-
|error: stringdex_internals::ReadDataError<Box<dyn std::error::Error>>| Error {
109-
file: column_path,
110-
error: format!("failed to read column from disk: {error}"),
111-
},
110+
&mut consume,
112111
)
112+
.map_err(|error| Error {
113+
file: column_path,
114+
error: format!("failed to read column from disk: {error}"),
115+
})
113116
}
114117
fn perform_read_serde(
115118
resource_suffix: &str,
@@ -119,25 +122,26 @@ impl SerializedSearchIndex {
119122
) -> Result<(), Error> {
120123
let root_path = doc_root.join(format!("search.index/root{resource_suffix}.js"));
121124
let column_path = doc_root.join(format!("search.index/{column_name}/"));
125+
126+
let mut consume = |_, cell: &[u8]| {
127+
if cell.is_empty() {
128+
column.push(None);
129+
} else {
130+
column.push(Some(serde_json::from_slice(cell)?));
131+
}
132+
Ok::<_, serde_json::Error>(())
133+
};
134+
122135
stringdex_internals::read_data_from_disk_column(
123136
root_path,
124137
column_name.as_bytes(),
125138
column_path.clone(),
126-
&mut |_id, item| {
127-
if item.is_empty() {
128-
column.push(None);
129-
} else {
130-
column.push(Some(serde_json::from_slice(item)?));
131-
}
132-
Ok(())
133-
},
134-
)
135-
.map_err(
136-
|error: stringdex_internals::ReadDataError<Box<dyn std::error::Error>>| Error {
137-
file: column_path,
138-
error: format!("failed to read column from disk: {error}"),
139-
},
139+
&mut consume,
140140
)
141+
.map_err(|error| Error {
142+
file: column_path,
143+
error: format!("failed to read column from disk: {error}"),
144+
})
141145
}
142146
fn perform_read_postings(
143147
resource_suffix: &str,
@@ -147,23 +151,28 @@ impl SerializedSearchIndex {
147151
) -> Result<(), Error> {
148152
let root_path = doc_root.join(format!("search.index/root{resource_suffix}.js"));
149153
let column_path = doc_root.join(format!("search.index/{column_name}/"));
154+
155+
fn consumer(
156+
column: &mut Vec<Vec<Vec<u32>>>,
157+
) -> impl FnMut(u32, &[u8]) -> io::Result<()> {
158+
|_, cell| {
159+
let mut postings = Vec::new();
160+
encode::read_postings_from_string(&mut postings, cell);
161+
column.push(postings);
162+
Ok(())
163+
}
164+
}
165+
150166
stringdex_internals::read_data_from_disk_column(
151167
root_path,
152168
column_name.as_bytes(),
153169
column_path.clone(),
154-
&mut |_id, buf| {
155-
let mut postings = Vec::new();
156-
encode::read_postings_from_string(&mut postings, buf);
157-
column.push(postings);
158-
Ok(())
159-
},
160-
)
161-
.map_err(
162-
|error: stringdex_internals::ReadDataError<Box<dyn std::error::Error>>| Error {
163-
file: column_path,
164-
error: format!("failed to read column from disk: {error}"),
165-
},
170+
&mut consumer(column),
166171
)
172+
.map_err(|error| Error {
173+
file: column_path,
174+
error: format!("failed to read column from disk: {error}"),
175+
})
167176
}
168177

169178
assert_eq!(names.len(), path_data.len());
@@ -1055,12 +1064,12 @@ impl Serialize for TypeData {
10551064
let mut buf = Vec::new();
10561065
encode::write_postings_to_string(&self.inverted_function_inputs_index, &mut buf);
10571066
let mut serialized_result = Vec::new();
1058-
stringdex_internals::encode::write_base64_to_bytes(&buf, &mut serialized_result);
1067+
stringdex_internals::encode::write_base64_to_bytes(&buf, &mut serialized_result).unwrap();
10591068
seq.serialize_element(&str::from_utf8(&serialized_result).unwrap())?;
10601069
buf.clear();
10611070
serialized_result.clear();
10621071
encode::write_postings_to_string(&self.inverted_function_output_index, &mut buf);
1063-
stringdex_internals::encode::write_base64_to_bytes(&buf, &mut serialized_result);
1072+
stringdex_internals::encode::write_base64_to_bytes(&buf, &mut serialized_result).unwrap();
10641073
seq.serialize_element(&str::from_utf8(&serialized_result).unwrap())?;
10651074
if self.search_unbox {
10661075
seq.serialize_element(&1)?;

0 commit comments

Comments
 (0)