Skip to content

Commit f206a71

Browse files
committed
temp
1 parent 21687c7 commit f206a71

File tree

1 file changed

+80
-85
lines changed

1 file changed

+80
-85
lines changed

src/events/attributes.rs

Lines changed: 80 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,88 @@ pub struct Attribute<'a> {
3333
}
3434

3535
impl<'a> Attribute<'a> {
36+
/// Normalize the attribute value according to xml specification section 3.3.3
3637
///
38+
/// https://www.w3.org/TR/xml/#AVNormalize
39+
///
40+
/// * Whitespace-like characters (\r, \n, \t, ' ') are trimmed from the ends of the value
41+
/// * Sequences of whitespace-like characters are replaced with a single whitespace character
42+
/// * Character and entity references are substituted as defined by the spec
3743
pub fn normalized_value(&'a self) -> Result<Cow<'a, [u8]>, EscapeError> {
38-
let normalized = normalize_attribute_value(self.value.as_ref());
44+
// TODO: character references, entity references, error handling associated with those
45+
46+
#[derive(PartialEq)]
47+
enum ParseState {
48+
Space,
49+
CDATA,
50+
}
51+
52+
// Trim characters from the beginning and end of the attribute value - this can't fail.
53+
fn trim_value(attr: &[u8]) -> &[u8] {
54+
let first_non_space_char = attr.iter().position(|c| !is_whitespace(*c));
55+
56+
if first_non_space_char.is_none() {
57+
// The entire value was whitespace-like characters
58+
return b"";
59+
}
60+
61+
let last_non_space_char = attr.iter().rposition(|c| !is_whitespace(*c));
62+
63+
// Trim all whitespace-like characters away from the beginning and end of the attribute value.
64+
let begin = first_non_space_char.unwrap();
65+
let end = last_non_space_char.unwrap_or(attr.len());
66+
&attr[begin..=end]
67+
}
68+
69+
let trimmed_attr = trim_value(self.value.as_ref());
70+
71+
// A new buffer is only created when we encounter a situation that requires it.
72+
let mut normalized: Option<Vec<u8>> = None;
73+
// We start on character data because all whitespace-like characters are already trimmed away.
74+
let mut current_state = ParseState::CDATA;
75+
76+
// Perform a single pass over the trimmed attribute value. If we encounter a character / entity reference
77+
// or whitespace-like characters that need to be substituted, copy everything processed thus far to a new
78+
// buffer and continue using this buffer.
79+
for (idx, ch) in trimmed_attr.iter().enumerate() {
80+
match ch {
81+
b'\n' | b'\r' | b'\t' | b' ' => match current_state {
82+
ParseState::Space => match normalized {
83+
Some(_) => continue,
84+
None => normalized = Some(Vec::from(&trimmed_attr[..idx])),
85+
},
86+
ParseState::CDATA => {
87+
current_state = ParseState::Space;
88+
match normalized.as_mut() {
89+
Some(buf) => buf.push(b' '),
90+
None => {
91+
let mut buf = Vec::from(&trimmed_attr[..idx]);
92+
buf.push(b' ');
93+
normalized = Some(buf);
94+
}
95+
}
96+
}
97+
},
98+
c @ _ => match current_state {
99+
ParseState::Space => {
100+
current_state = ParseState::CDATA;
101+
if let Some(normalized) = normalized.as_mut() {
102+
normalized.push(*c);
103+
}
104+
}
105+
ParseState::CDATA => {
106+
if let Some(normalized) = normalized.as_mut() {
107+
normalized.push(*c);
108+
}
109+
}
110+
},
111+
}
112+
}
113+
114+
let normalized = match normalized {
115+
Some(normalized) => Cow::Owned(normalized),
116+
None => Cow::Borrowed(trimmed_attr),
117+
};
39118
let escaped = do_unescape(&*normalized, None)?;
40119
Ok(Cow::Owned(escaped.into_owned()))
41120
}
@@ -190,90 +269,6 @@ impl<'a> From<Attr<&'a [u8]>> for Attribute<'a> {
190269
}
191270
}
192271

193-
/// Normalize the attribute value according to xml specification section 3.3.3
194-
///
195-
/// https://www.w3.org/TR/xml/#AVNormalize
196-
///
197-
/// * Whitespace-like characters (\r, \n, \t, ' ') are trimmed from the ends of the value
198-
/// * Sequences of whitespace-like characters are replaced with a single whitespace character
199-
/// * Character and entity references are substituted as defined by the spec
200-
fn normalize_attribute_value(attr: &[u8]) -> Cow<[u8]> {
201-
// TODO: character references, entity references, error handling associated with those
202-
203-
#[derive(PartialEq)]
204-
enum ParseState {
205-
Space,
206-
CDATA,
207-
}
208-
209-
// Trim characters from the beginning and end of the attribute value - this can't fail.
210-
fn trim_value(attr: &[u8]) -> &[u8] {
211-
let first_non_space_char = attr.iter().position(|c| !is_whitespace(*c));
212-
213-
if first_non_space_char.is_none() {
214-
// The entire value was whitespace-like characters
215-
return b"";
216-
}
217-
218-
let last_non_space_char = attr.iter().rposition(|c| !is_whitespace(*c));
219-
220-
// Trim all whitespace-like characters away from the beginning and end of the attribute value.
221-
let begin = first_non_space_char.unwrap();
222-
let end = last_non_space_char.unwrap_or(attr.len());
223-
&attr[begin..=end]
224-
}
225-
226-
let trimmed_attr = trim_value(attr);
227-
228-
// A new buffer is only created when we encounter a situation that requires it.
229-
let mut normalized: Option<Vec<u8>> = None;
230-
// We start on character data because all whitespace-like characters are already trimmed away.
231-
let mut current_state = ParseState::CDATA;
232-
233-
// Perform a single pass over the trimmed attribute value. If we encounter a character / entity reference
234-
// or whitespace-like characters that need to be substituted, copy everything processed thus far to a new
235-
// buffer and continue using this buffer.
236-
for (idx, ch) in trimmed_attr.iter().enumerate() {
237-
match ch {
238-
b'\n' | b'\r' | b'\t' | b' ' => match current_state {
239-
ParseState::Space => match normalized {
240-
Some(_) => continue,
241-
None => normalized = Some(Vec::from(&trimmed_attr[..idx])),
242-
},
243-
ParseState::CDATA => {
244-
current_state = ParseState::Space;
245-
match normalized.as_mut() {
246-
Some(buf) => buf.push(b' '),
247-
None => {
248-
let mut buf = Vec::from(&trimmed_attr[..idx]);
249-
buf.push(b' ');
250-
normalized = Some(buf);
251-
}
252-
}
253-
}
254-
},
255-
c @ _ => match current_state {
256-
ParseState::Space => {
257-
current_state = ParseState::CDATA;
258-
if let Some(normalized) = normalized.as_mut() {
259-
normalized.push(*c);
260-
}
261-
}
262-
ParseState::CDATA => {
263-
if let Some(normalized) = normalized.as_mut() {
264-
normalized.push(*c);
265-
}
266-
}
267-
},
268-
}
269-
}
270-
271-
match normalized {
272-
Some(normalized) => Cow::Owned(normalized),
273-
None => Cow::Borrowed(trimmed_attr),
274-
}
275-
}
276-
277272
////////////////////////////////////////////////////////////////////////////////////////////////////
278273

279274
/// Iterator over XML attributes.

0 commit comments

Comments
 (0)