Skip to content

Commit b13e32d

Browse files
committed
Optimize proc maps parsing code size
The current code size is really wastefully large. Originally, it was 1500 lines of assembly in Godbolt, now I reduced it to just under 800. The effect of `.text` size in hello world is from 297028 to 295453 which is small but not completely irrelevant. It's just a small fish in the bigger pond of DWARF parsing, but it's better than nothing. I extracted the parsing of each component into a separate function to allow for better sharing. I replaced the string methods with manual iteration since that results in simpler code because it has to handle fewer cases. I also had to use unsafe because the bounds checks were sadly not optimized out and were really large. I also made the parser less resilient against whitespace, now it no longer handles Unicode whitespace (an obvious simplification) but also no longer handles any whitespace except the normal SP. I think this is fine, it seems highly unlikely that a system would suddenly use another type of whitespace (but I guess not impossible?). Another simplification was simply removing the parsing of unused fields that were not needed.
1 parent b65ab93 commit b13e32d

File tree

1 file changed

+58
-75
lines changed

1 file changed

+58
-75
lines changed

src/symbolize/gimli/parse_running_mmaps_unix.rs

Lines changed: 58 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,8 @@ use core::str::FromStr;
1313
pub(super) struct MapsEntry {
1414
/// start (inclusive) and limit (exclusive) of address range.
1515
address: (usize, usize),
16-
/// The perms field are the permissions for the entry
17-
///
18-
/// r = read
19-
/// w = write
20-
/// x = execute
21-
/// s = shared
22-
/// p = private (copy on write)
23-
perms: [char; 4],
2416
/// Offset into the file (or "whatever").
2517
offset: u64,
26-
/// device (major, minor)
27-
dev: (usize, usize),
28-
/// inode on the device. 0 indicates that no inode is associated with the memory region (e.g. uninitalized data aka BSS).
29-
inode: usize,
3018
/// Usually the file backing the mapping.
3119
///
3220
/// Note: The man page for proc includes a note about "coordination" by
@@ -56,15 +44,21 @@ pub(super) struct MapsEntry {
5644
}
5745

5846
pub(super) fn parse_maps() -> Result<Vec<MapsEntry>, &'static str> {
59-
let mut v = Vec::new();
6047
let mut proc_self_maps =
6148
File::open("/proc/self/maps").map_err(|_| "Couldn't open /proc/self/maps")?;
6249
let mut buf = String::new();
6350
let _bytes_read = proc_self_maps
6451
.read_to_string(&mut buf)
6552
.map_err(|_| "Couldn't read /proc/self/maps")?;
66-
for line in buf.lines() {
53+
54+
let mut v = Vec::new();
55+
let mut buf = buf.as_str();
56+
while let Some(match_idx) = buf.bytes().position(|b| b == b'\n') {
57+
let line = unsafe { buf.get_unchecked(..match_idx) };
58+
6759
v.push(line.parse()?);
60+
61+
buf = unsafe { buf.get_unchecked((match_idx + 1)..) };
6862
}
6963

7064
Ok(v)
@@ -92,70 +86,86 @@ impl FromStr for MapsEntry {
9286
// e.g.: "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]"
9387
// e.g.: "7f5985f46000-7f5985f48000 rw-p 00039000 103:06 76021795 /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2"
9488
// e.g.: "35b1a21000-35b1a22000 rw-p 00000000 00:00 0"
95-
//
96-
// Note that paths may contain spaces, so we can't use `str::split` for parsing (until
97-
// Split::remainder is stabilized #77998).
9889
fn from_str(s: &str) -> Result<Self, Self::Err> {
99-
let (range_str, s) = s.trim_start().split_once(' ').unwrap_or((s, ""));
90+
// While there are nicer standard library APIs available for this, we aim for minimal code size.
91+
92+
let mut state = s;
93+
94+
fn parse_start<'a>(state: &mut &'a str) -> &'a str {
95+
let start_idx = state.bytes().position(|b| b != b' ');
96+
if let Some(start_idx) = start_idx {
97+
// SAFETY: It comes from position, so it's in bounds.
98+
// It must be on a UTF-8 boundary as it's the first byte that isn't ' '.
99+
*state = unsafe { state.get_unchecked(start_idx..) };
100+
}
101+
let match_idx = state.bytes().position(|b| b == b' ');
102+
match match_idx {
103+
None => {
104+
let result = *state;
105+
*state = "";
106+
result
107+
}
108+
Some(match_idx) => {
109+
// SAFETY: match_index comes from .bytes().position() of an ASCII character,
110+
// so it's both in bounds and a UTF-8 boundary
111+
let result = unsafe { state.get_unchecked(..match_idx) };
112+
// SAFETY: Since match_idx is the ' ', there must be at least the end after it.
113+
*state = unsafe { state.get_unchecked((match_idx + 1)..) };
114+
result
115+
}
116+
}
117+
}
118+
119+
fn error(msg: &str) -> &str {
120+
if cfg!(debug_assertions) {
121+
msg
122+
} else {
123+
"invalid map entry"
124+
}
125+
}
126+
127+
let range_str = parse_start(&mut state);
100128
if range_str.is_empty() {
101-
return Err("Couldn't find address");
129+
return Err(error("Couldn't find address"));
102130
}
103131

104-
let (perms_str, s) = s.trim_start().split_once(' ').unwrap_or((s, ""));
132+
let perms_str = parse_start(&mut state);
105133
if perms_str.is_empty() {
106-
return Err("Couldn't find permissions");
134+
return Err(error("Couldn't find permissions"));
107135
}
108136

109-
let (offset_str, s) = s.trim_start().split_once(' ').unwrap_or((s, ""));
137+
let offset_str = parse_start(&mut state);
110138
if offset_str.is_empty() {
111-
return Err("Couldn't find offset");
139+
return Err(error("Couldn't find offset"));
112140
}
113141

114-
let (dev_str, s) = s.trim_start().split_once(' ').unwrap_or((s, ""));
142+
let dev_str = parse_start(&mut state);
115143
if dev_str.is_empty() {
116-
return Err("Couldn't find dev");
144+
return Err(error("Couldn't find dev"));
117145
}
118146

119-
let (inode_str, s) = s.trim_start().split_once(' ').unwrap_or((s, ""));
147+
let inode_str = parse_start(&mut state);
120148
if inode_str.is_empty() {
121149
return Err("Couldn't find inode");
122150
}
123151

124152
// Pathname may be omitted in which case it will be empty
125-
let pathname_str = s.trim_start();
153+
let pathname_str = state.trim_ascii_start();
126154

127-
let hex = |s| usize::from_str_radix(s, 16).map_err(|_| "Couldn't parse hex number");
128-
let hex64 = |s| u64::from_str_radix(s, 16).map_err(|_| "Couldn't parse hex number");
155+
let hex = |s| usize::from_str_radix(s, 16).map_err(|_| error("Couldn't parse hex number"));
156+
let hex64 = |s| u64::from_str_radix(s, 16).map_err(|_| error("Couldn't parse hex number"));
129157

130158
let address = if let Some((start, limit)) = range_str.split_once('-') {
131159
(hex(start)?, hex(limit)?)
132160
} else {
133-
return Err("Couldn't parse address range");
134-
};
135-
let perms: [char; 4] = {
136-
let mut chars = perms_str.chars();
137-
let mut c = || chars.next().ok_or("insufficient perms");
138-
let perms = [c()?, c()?, c()?, c()?];
139-
if chars.next().is_some() {
140-
return Err("too many perms");
141-
}
142-
perms
161+
return Err(error("Couldn't parse address range"));
143162
};
144163
let offset = hex64(offset_str)?;
145-
let dev = if let Some((major, minor)) = dev_str.split_once(':') {
146-
(hex(major)?, hex(minor)?)
147-
} else {
148-
return Err("Couldn't parse dev");
149-
};
150-
let inode = hex(inode_str)?;
151164
let pathname = pathname_str.into();
152165

153166
Ok(MapsEntry {
154167
address,
155-
perms,
156168
offset,
157-
dev,
158-
inode,
159169
pathname,
160170
})
161171
}
@@ -172,10 +182,7 @@ fn check_maps_entry_parsing_64bit() {
172182
.unwrap(),
173183
MapsEntry {
174184
address: (0xffffffffff600000, 0xffffffffff601000),
175-
perms: ['-', '-', 'x', 'p'],
176185
offset: 0x00000000,
177-
dev: (0x00, 0x00),
178-
inode: 0x0,
179186
pathname: "[vsyscall]".into(),
180187
}
181188
);
@@ -187,10 +194,7 @@ fn check_maps_entry_parsing_64bit() {
187194
.unwrap(),
188195
MapsEntry {
189196
address: (0x7f5985f46000, 0x7f5985f48000),
190-
perms: ['r', 'w', '-', 'p'],
191197
offset: 0x00039000,
192-
dev: (0x103, 0x06),
193-
inode: 0x76021795,
194198
pathname: "/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2".into(),
195199
}
196200
);
@@ -200,10 +204,7 @@ fn check_maps_entry_parsing_64bit() {
200204
.unwrap(),
201205
MapsEntry {
202206
address: (0x35b1a21000, 0x35b1a22000),
203-
perms: ['r', 'w', '-', 'p'],
204207
offset: 0x00000000,
205-
dev: (0x00, 0x00),
206-
inode: 0x0,
207208
pathname: Default::default(),
208209
}
209210
);
@@ -224,10 +225,7 @@ fn check_maps_entry_parsing_32bit() {
224225
.unwrap(),
225226
MapsEntry {
226227
address: (0x08056000, 0x08077000),
227-
perms: ['r', 'w', '-', 'p'],
228228
offset: 0x00000000,
229-
dev: (0x00, 0x00),
230-
inode: 0x0,
231229
pathname: "[heap]".into(),
232230
}
233231
);
@@ -239,10 +237,7 @@ fn check_maps_entry_parsing_32bit() {
239237
.unwrap(),
240238
MapsEntry {
241239
address: (0xb7c79000, 0xb7e02000),
242-
perms: ['r', '-', '-', 'p'],
243240
offset: 0x00000000,
244-
dev: (0x08, 0x01),
245-
inode: 0x60662705,
246241
pathname: "/usr/lib/locale/locale-archive".into(),
247242
}
248243
);
@@ -252,10 +247,7 @@ fn check_maps_entry_parsing_32bit() {
252247
.unwrap(),
253248
MapsEntry {
254249
address: (0xb7e02000, 0xb7e03000),
255-
perms: ['r', 'w', '-', 'p'],
256250
offset: 0x00000000,
257-
dev: (0x00, 0x00),
258-
inode: 0x0,
259251
pathname: Default::default(),
260252
}
261253
);
@@ -266,10 +258,7 @@ fn check_maps_entry_parsing_32bit() {
266258
.unwrap(),
267259
MapsEntry {
268260
address: (0xb7c79000, 0xb7e02000),
269-
perms: ['r', '-', '-', 'p'],
270261
offset: 0x00000000,
271-
dev: (0x08, 0x01),
272-
inode: 0x60662705,
273262
pathname: "/executable/path/with some spaces".into(),
274263
}
275264
);
@@ -280,10 +269,7 @@ fn check_maps_entry_parsing_32bit() {
280269
.unwrap(),
281270
MapsEntry {
282271
address: (0xb7c79000, 0xb7e02000),
283-
perms: ['r', '-', '-', 'p'],
284272
offset: 0x00000000,
285-
dev: (0x08, 0x01),
286-
inode: 0x60662705,
287273
pathname: "/executable/path/with multiple-continuous spaces ".into(),
288274
}
289275
);
@@ -294,10 +280,7 @@ fn check_maps_entry_parsing_32bit() {
294280
.unwrap(),
295281
MapsEntry {
296282
address: (0xb7c79000, 0xb7e02000),
297-
perms: ['r', '-', '-', 'p'],
298283
offset: 0x00000000,
299-
dev: (0x08, 0x01),
300-
inode: 0x60662705,
301284
pathname: "/executable/path/starts-with-spaces".into(),
302285
}
303286
);

0 commit comments

Comments
 (0)