|
2 | 2 |
|
3 | 3 | use memchr::{memchr, memchr2_iter, memchr3}; |
4 | 4 | use std::borrow::Cow; |
| 5 | +use std::fmt::{self, Write}; |
5 | 6 | use std::num::ParseIntError; |
6 | 7 | use std::ops::Range; |
7 | 8 |
|
@@ -147,54 +148,55 @@ pub fn minimal_escape<'a>(raw: impl Into<Cow<'a, str>>) -> Cow<'a, str> { |
147 | 148 | _escape(raw, |ch| matches!(ch, b'<' | b'&')) |
148 | 149 | } |
149 | 150 |
|
| 151 | +pub(crate) fn escape_char<W>(writer: &mut W, value: &str, from: usize, to: usize) -> fmt::Result |
| 152 | +where |
| 153 | + W: fmt::Write, |
| 154 | +{ |
| 155 | + writer.write_str(&value[from..to])?; |
| 156 | + match value.as_bytes()[to] { |
| 157 | + b'<' => writer.write_str("<")?, |
| 158 | + b'>' => writer.write_str(">")?, |
| 159 | + b'\'' => writer.write_str("'")?, |
| 160 | + b'&' => writer.write_str("&")?, |
| 161 | + b'"' => writer.write_str(""")?, |
| 162 | + |
| 163 | + // This set of escapes handles characters that should be escaped |
| 164 | + // in elements of xs:lists, because those characters works as |
| 165 | + // delimiters of list elements |
| 166 | + b'\t' => writer.write_str("	")?, |
| 167 | + b'\n' => writer.write_str(" ")?, |
| 168 | + b'\r' => writer.write_str(" ")?, |
| 169 | + b' ' => writer.write_str(" ")?, |
| 170 | + _ => unreachable!("Only '<', '>','\', '&', '\"', '\\t', '\\r', '\\n', and ' ' are escaped"), |
| 171 | + } |
| 172 | + Ok(()) |
| 173 | +} |
| 174 | + |
150 | 175 | /// Escapes an `&str` and replaces a subset of xml special characters (`<`, `>`, |
151 | 176 | /// `&`, `'`, `"`) with their corresponding xml escaped value. |
152 | | -pub(crate) fn _escape<'a, F: Fn(u8) -> bool>( |
153 | | - raw: impl Into<Cow<'a, str>>, |
154 | | - escape_chars: F, |
155 | | -) -> Cow<'a, str> { |
| 177 | +fn _escape<'a, F: Fn(u8) -> bool>(raw: impl Into<Cow<'a, str>>, escape_chars: F) -> Cow<'a, str> { |
156 | 178 | let raw = raw.into(); |
157 | 179 | let bytes = raw.as_bytes(); |
158 | 180 | let mut escaped = None; |
159 | 181 | let mut iter = bytes.iter(); |
160 | 182 | let mut pos = 0; |
161 | 183 | while let Some(i) = iter.position(|&b| escape_chars(b)) { |
162 | 184 | if escaped.is_none() { |
163 | | - escaped = Some(Vec::with_capacity(raw.len())); |
| 185 | + escaped = Some(String::with_capacity(raw.len())); |
164 | 186 | } |
165 | 187 | let escaped = escaped.as_mut().expect("initialized"); |
166 | 188 | let new_pos = pos + i; |
167 | | - escaped.extend_from_slice(&bytes[pos..new_pos]); |
168 | | - match bytes[new_pos] { |
169 | | - b'<' => escaped.extend_from_slice(b"<"), |
170 | | - b'>' => escaped.extend_from_slice(b">"), |
171 | | - b'\'' => escaped.extend_from_slice(b"'"), |
172 | | - b'&' => escaped.extend_from_slice(b"&"), |
173 | | - b'"' => escaped.extend_from_slice(b"""), |
174 | | - |
175 | | - // This set of escapes handles characters that should be escaped |
176 | | - // in elements of xs:lists, because those characters works as |
177 | | - // delimiters of list elements |
178 | | - b'\t' => escaped.extend_from_slice(b"	"), |
179 | | - b'\n' => escaped.extend_from_slice(b" "), |
180 | | - b'\r' => escaped.extend_from_slice(b" "), |
181 | | - b' ' => escaped.extend_from_slice(b" "), |
182 | | - _ => unreachable!( |
183 | | - "Only '<', '>','\', '&', '\"', '\\t', '\\r', '\\n', and ' ' are escaped" |
184 | | - ), |
185 | | - } |
| 189 | + // SAFETY: It should fail only on OOM |
| 190 | + escape_char(escaped, &raw, pos, new_pos).unwrap(); |
186 | 191 | pos = new_pos + 1; |
187 | 192 | } |
188 | 193 |
|
189 | 194 | if let Some(mut escaped) = escaped { |
190 | | - if let Some(raw) = bytes.get(pos..) { |
191 | | - escaped.extend_from_slice(raw); |
| 195 | + if let Some(raw) = raw.get(pos..) { |
| 196 | + // SAFETY: It should fail only on OOM |
| 197 | + escaped.write_str(raw).unwrap(); |
192 | 198 | } |
193 | | - // SAFETY: we operate on UTF-8 input and search for an one byte chars only, |
194 | | - // so all slices that was put to the `escaped` is a valid UTF-8 encoded strings |
195 | | - // TODO: Can be replaced with `unsafe { String::from_utf8_unchecked() }` |
196 | | - // if unsafe code will be allowed |
197 | | - Cow::Owned(String::from_utf8(escaped).unwrap()) |
| 199 | + Cow::Owned(escaped) |
198 | 200 | } else { |
199 | 201 | raw |
200 | 202 | } |
|
0 commit comments