Skip to content

Commit 66d8fa2

Browse files
committed
Fix incorrect normalization of \rX EOL sequences where X is a char which is UTF-8 encoded as [c2 xx], except [c2 85]
1 parent f8a8364 commit 66d8fa2

File tree

2 files changed

+12
-4
lines changed

2 files changed

+12
-4
lines changed

Changelog.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,13 @@
1818

1919
### Bug Fixes
2020

21+
- [#895]: Fix incorrect normalization of `\rX` EOL sequences where `X` is a char which is
22+
UTF-8 encoded as [c2 xx], except [c2 85].
23+
2124
### Misc Changes
2225

26+
[#895]: https://github.com/tafia/quick-xml/pull/895
27+
2328

2429
## 0.38.2 -- 2025-08-19
2530

src/escape.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -388,15 +388,15 @@ fn normalize_xml_eol_step(normalized: &mut String, text: &str, index: usize, ch:
388388
normalized.push(ch);
389389
return index + 2; // skip \r\n
390390
}
391-
// Because input is correct UTF-8 and in UTF-8 every character has
392-
// an unique prefix, byte C2 means only start of #x85 character
393391
if next == 0xC2 {
392+
// UTF-8 encoding of #x85 character is [c2 85]
394393
if index + 2 < input.len() && input[index + 2] == 0x85 {
395394
normalized.push(ch);
396395
} else {
396+
normalized.push(ch);
397397
// NOTE: unsafe { text.get_unchecked(index..index + 3) } could be used because
398398
// we are sure that index within string
399-
normalized.push_str(&text[index..index + 3]);
399+
normalized.push_str(&text[index + 1..index + 3]);
400400
}
401401
return index + 3; // skip \r + UTF-8 encoding of character (c2 xx)
402402
}
@@ -2173,7 +2173,10 @@ mod normalization {
21732173
if ch == '\u{0085}' {
21742174
assert_eq!(normalize_xml_eols(input), "\n", "{}", description);
21752175
} else {
2176-
assert_eq!(normalize_xml_eols(input), input, "{}", description);
2176+
let mut expected = utf8.clone();
2177+
expected[0] = b'\n';
2178+
let expected = std::str::from_utf8(&expected).expect(&description);
2179+
assert_eq!(normalize_xml_eols(input), expected, "{}", description);
21772180
}
21782181
}
21792182
assert_eq!((first..=last).count(), 64);

0 commit comments

Comments
 (0)