Skip to content

Commit 4e9e8a6

Browse files
Ninja3047Mingun
authored andcommitted
Implement serializing CDATA in SimpleTypeSerializer
1 parent 7ab6add commit 4e9e8a6

File tree

1 file changed

+132
-4
lines changed

1 file changed

+132
-4
lines changed

src/se/simple_type.rs

Lines changed: 132 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
66
use crate::escape::escape_char;
77
use crate::se::{QuoteLevel, SeError};
8+
use crate::utils::CDataIterator;
89
use serde::ser::{
910
Impossible, Serialize, SerializeSeq, SerializeTuple, SerializeTupleStruct,
1011
SerializeTupleVariant, Serializer,
@@ -20,6 +21,9 @@ pub enum QuoteTarget {
2021
DoubleQAttr,
2122
/// Escape data for a single-quoted attribute. `'` always escaped
2223
SingleQAttr,
24+
/// Escape data for a CDATA content. No escaping for `&` and `>`, but split
25+
/// content on `]]>` and make several CDATA sections
26+
CData,
2327
}
2428

2529
fn escape_into<W, F>(mut writer: W, value: &str, escape_chars: F) -> fmt::Result
@@ -44,14 +48,25 @@ where
4448

4549
/// Escapes atomic value that could be part of a `xs:list`. All whitespace characters
4650
/// additionally escaped
47-
fn escape_item<W>(writer: W, value: &str, target: QuoteTarget, level: QuoteLevel) -> fmt::Result
51+
fn escape_item<W>(mut writer: W, value: &str, target: QuoteTarget, level: QuoteLevel) -> fmt::Result
4852
where
4953
W: Write,
5054
{
5155
use QuoteLevel::*;
5256
use QuoteTarget::*;
5357

5458
match (target, level) {
59+
(CData, _) => {
60+
let mut it = CDataIterator::new(value);
61+
if let Some(part) = it.next() {
62+
writer.write_str(part)?;
63+
}
64+
while let Some(part) = it.next() {
65+
writer.write_str("]]><![CDATA[")?;
66+
writer.write_str(part)?;
67+
}
68+
Ok(())
69+
}
5570
(_, Full) => escape_into(writer, value, |ch| match ch {
5671
// Spaces used as delimiters of list items, cannot be used in the item
5772
b' ' | b'\r' | b'\n' | b'\t' => true,
@@ -116,14 +131,22 @@ where
116131
}
117132

118133
/// Escapes XSD simple type value
119-
fn escape_list<W>(writer: W, value: &str, target: QuoteTarget, level: QuoteLevel) -> fmt::Result
134+
fn escape_list<W>(mut writer: W, value: &str, target: QuoteTarget, level: QuoteLevel) -> fmt::Result
120135
where
121136
W: Write,
122137
{
123138
use QuoteLevel::*;
124139
use QuoteTarget::*;
125140

126141
match (target, level) {
142+
(CData, _) => {
143+
for part in CDataIterator::new(value) {
144+
writer.write_str("<![CDATA[")?;
145+
writer.write_str(part)?;
146+
writer.write_str("]]>")?;
147+
}
148+
Ok(())
149+
}
127150
(_, Full) => escape_into(writer, value, |ch| match ch {
128151
// Required characters to escape
129152
b'&' | b'<' | b'>' | b'\'' | b'\"' => true,
@@ -488,7 +511,10 @@ impl<W: Write> Serializer for SimpleTypeSerializer<W> {
488511
}
489512

490513
#[inline]
491-
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
514+
fn serialize_seq(mut self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
515+
if let QuoteTarget::CData = self.target {
516+
self.writer.write_str("<![CDATA[")?;
517+
}
492518
Ok(SimpleSeq {
493519
writer: self.writer,
494520
target: self.target,
@@ -585,7 +611,10 @@ impl<W: Write> SerializeSeq for SimpleSeq<W> {
585611
}
586612

587613
#[inline]
588-
fn end(self) -> Result<Self::Ok, Self::Error> {
614+
fn end(mut self) -> Result<Self::Ok, Self::Error> {
615+
if let QuoteTarget::CData = self.target {
616+
self.writer.write_str("]]>")?;
617+
}
589618
Ok(self.writer)
590619
}
591620
}
@@ -683,6 +712,7 @@ mod tests {
683712

684713
mod escape_item {
685714
use super::*;
715+
use pretty_assertions::assert_eq;
686716

687717
fn escape_item(value: &str, target: QuoteTarget, level: QuoteLevel) -> String {
688718
let mut result = String::new();
@@ -808,10 +838,40 @@ mod tests {
808838
);
809839
}
810840
}
841+
842+
/// Escape function does not surround text with `<![CDATA[` and `]]>`, that should be done outside
843+
#[test]
844+
fn cdata() {
845+
assert_eq!(
846+
escape_item(
847+
"text<\"'&>]]> \t\n\rtext",
848+
QuoteTarget::CData,
849+
QuoteLevel::Full
850+
),
851+
"text<\"'&>]]]]><![CDATA[> \t\n\rtext"
852+
);
853+
assert_eq!(
854+
escape_item(
855+
"text<\"'&>]]> \t\n\rtext",
856+
QuoteTarget::CData,
857+
QuoteLevel::Partial
858+
),
859+
"text<\"'&>]]]]><![CDATA[> \t\n\rtext"
860+
);
861+
assert_eq!(
862+
escape_item(
863+
"text<\"'&>]]> \t\n\rtext",
864+
QuoteTarget::CData,
865+
QuoteLevel::Minimal
866+
),
867+
"text<\"'&>]]]]><![CDATA[> \t\n\rtext"
868+
);
869+
}
811870
}
812871

813872
mod escape_list {
814873
use super::*;
874+
use pretty_assertions::assert_eq;
815875

816876
fn escape_list(value: &str, target: QuoteTarget, level: QuoteLevel) -> String {
817877
let mut result = String::new();
@@ -937,6 +997,34 @@ mod tests {
937997
);
938998
}
939999
}
1000+
1001+
#[test]
1002+
fn cdata() {
1003+
assert_eq!(
1004+
escape_list(
1005+
"text<\"'&>]]> \t\n\rtext",
1006+
QuoteTarget::CData,
1007+
QuoteLevel::Full
1008+
),
1009+
"<![CDATA[text<\"'&>]]]]><![CDATA[> \t\n\rtext]]>"
1010+
);
1011+
assert_eq!(
1012+
escape_list(
1013+
"text<\"'&>]]> \t\n\rtext",
1014+
QuoteTarget::CData,
1015+
QuoteLevel::Partial
1016+
),
1017+
"<![CDATA[text<\"'&>]]]]><![CDATA[> \t\n\rtext]]>"
1018+
);
1019+
assert_eq!(
1020+
escape_list(
1021+
"text<\"'&>]]> \t\n\rtext",
1022+
QuoteTarget::CData,
1023+
QuoteLevel::Minimal
1024+
),
1025+
"<![CDATA[text<\"'&>]]]]><![CDATA[> \t\n\rtext]]>"
1026+
);
1027+
}
9401028
}
9411029

9421030
/// Tests for serialize atomic and union values, as defined in XSD specification
@@ -1261,4 +1349,44 @@ mod tests {
12611349
assert_eq!(buffer, "1 2 3");
12621350
}
12631351
}
1352+
1353+
mod cdata {
1354+
use super::*;
1355+
use pretty_assertions::assert_eq;
1356+
1357+
macro_rules! serialize_as_cdata {
1358+
($name:ident: $data:expr => $expected:literal) => {
1359+
#[test]
1360+
fn $name() {
1361+
let ser = SimpleTypeSerializer {
1362+
writer: String::new(),
1363+
target: QuoteTarget::CData,
1364+
level: QuoteLevel::Full,
1365+
};
1366+
1367+
let buffer = $data.serialize(ser).unwrap();
1368+
assert_eq!(buffer, $expected);
1369+
}
1370+
};
1371+
}
1372+
1373+
serialize_as_cdata!(empty_string: "" => "");
1374+
serialize_as_cdata!(simple_text: "Hello World" => "<![CDATA[Hello World]]>");
1375+
serialize_as_cdata!(with_markup: "<tag>content</tag>" => "<![CDATA[<tag>content</tag>]]>");
1376+
serialize_as_cdata!(with_ampersand: "Tom & Jerry" => "<![CDATA[Tom & Jerry]]>");
1377+
serialize_as_cdata!(with_quotes: r#"He said "Hello""# => r#"<![CDATA[He said "Hello"]]>"#);
1378+
serialize_as_cdata!(all_xml_chars: "<>&\"'" => "<![CDATA[<>&\"']]>");
1379+
1380+
serialize_as_cdata!(with_cdata_end: "foo]]>bar" => "<![CDATA[foo]]]]><![CDATA[>bar]]>");
1381+
serialize_as_cdata!(multiple_cdata_ends: "a]]>b]]>c" => "<![CDATA[a]]]]><![CDATA[>b]]]]><![CDATA[>c]]>");
1382+
serialize_as_cdata!(starts_with_cdata_end: "]]>hello" => "<![CDATA[]]]]><![CDATA[>hello]]>");
1383+
serialize_as_cdata!(ends_with_cdata_end: "hello]]>" => "<![CDATA[hello]]]]><![CDATA[>]]>");
1384+
serialize_as_cdata!(only_cdata_end: "]]>" => "<![CDATA[]]]]><![CDATA[>]]>");
1385+
1386+
serialize_as_cdata!(seq_basic: vec!["foo", "bar", "baz"] => "<![CDATA[foo bar baz]]>");
1387+
serialize_as_cdata!(seq_with_space: vec!["hello world", "hello\tworld", "world"] => "<![CDATA[hello world hello\tworld world]]>");
1388+
serialize_as_cdata!(seq_with_markup_chars: vec!["<tag>", "&entity", "\"quoted\""] => "<![CDATA[<tag> &entity \"quoted\"]]>");
1389+
serialize_as_cdata!(seq_with_cdata_end_split: vec!["foo]]>bar", "test"] => "<![CDATA[foo]]]]><![CDATA[>bar test]]>");
1390+
serialize_as_cdata!(tuple_cdata: ("first", 42, "third") => "<![CDATA[first 42 third]]>");
1391+
}
12641392
}

0 commit comments

Comments
 (0)