From a95c8943a288810a4847780828597aee078114c7 Mon Sep 17 00:00:00 2001 From: Luxalpa Date: Tue, 12 Dec 2023 15:41:28 +0100 Subject: [PATCH 01/14] basis from old PR --- src/de.rs | 108 +++++++++++++++++++++++++++++++++++++++++++ tests/array.rs | 122 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 230 insertions(+) create mode 100644 tests/array.rs diff --git a/src/de.rs b/src/de.rs index 9975b40a5..2999a3ddc 100644 --- a/src/de.rs +++ b/src/de.rs @@ -160,6 +160,15 @@ impl<'de, R: Read<'de>> Deserializer { } } + /// Parse the JSON array as a stream of values. + pub fn into_array(self) -> ArrayDeserializer<'de, R> { + ArrayDeserializer { + de: self, + started: false, + lifetime: PhantomData, + } + } + /// Parse arbitrarily deep JSON structures without any consideration for /// overflowing the stack. /// @@ -2467,6 +2476,105 @@ where ////////////////////////////////////////////////////////////////////////////// +/// A streaming JSON array deserializer. +/// +/// An array deserializer can be created from any JSON deserializer using the +/// `Deserializer::into_array` method. +/// +/// The top-level data should be a JSON array, but each array element can consist of any JSON +/// value. An array deserializer only needs to keep a single array element in memory, and is +/// therefore preferable over deserializing into a container type such as `Vec` when the complete +/// array is too large to fit in memory. +/// +/// ```edition2018 +/// use serde_json::{Deserializer, Value}; +/// +/// fn main() { +/// let data = "[{\"k\": 3}, 1, \"cool\", \"stuff\", [0, 1, 2]]"; +/// +/// let mut iter = Deserializer::from_str(data).into_array(); +/// +/// while let Some(value) = iter.next::() { +/// println!("{}", value.unwrap()); +/// } +/// } +/// ``` +pub struct ArrayDeserializer<'de, R> { + de: Deserializer, + started: bool, // True if we have consumed the first '[' + lifetime: PhantomData<&'de ()>, +} + +impl<'de, R> ArrayDeserializer<'de, R> +where + R: read::Read<'de>, +{ + /// Create a JSON array deserializer from one of the possible serde_json + /// input sources. + /// + /// Typically it is more convenient to use one of these methods instead: + /// + /// - Deserializer::from_str(...).into_array() + /// - Deserializer::from_bytes(...).into_array() + /// - Deserializer::from_reader(...).into_array() + pub fn new(read: R) -> Self { + ArrayDeserializer { + de: Deserializer::new(read), + started: false, + lifetime: PhantomData, + } + } + + fn end>(&mut self) -> Option> { + self.de.eat_char(); + match self.de.end() { + Ok(_) => None, + Err(e) => Some(Err(e)), + } + } + + fn next_value>(&mut self) -> Option> { + match de::Deserialize::deserialize(&mut self.de) { + Ok(v) => Some(Ok(v)), + Err(e) => Some(Err(e)), + } + } + + /// Return the next element from the array. Returns None if there are no more elements. + pub fn next>(&mut self) -> Option> { + match self.de.parse_whitespace() { + Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))), + Ok(Some(b'[')) if !self.started => { + self.started = true; + self.de.eat_char(); + + // We have to peek at the next character here to handle an empty array. + match self.de.parse_whitespace() { + Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))), + Ok(Some(b']')) => self.end(), + Ok(Some(_)) => self.next_value(), + Err(e) => Some(Err(e)), + } + } + Ok(Some(b']')) if self.started => self.end(), + Ok(Some(b',')) if self.started => { + self.de.eat_char(); + + match self.de.parse_whitespace() { + Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))), + Ok(Some(b']')) => Some(Err(self.de.peek_error(ErrorCode::TrailingComma))), + Ok(Some(_)) => self.next_value(), + Err(e) => Some(Err(e)), + } + } + Ok(Some(_)) => Some(Err(self.de.peek_error(ErrorCode::ExpectedSomeValue))), + Err(e) => Some(Err(e)), + } + } +} + +////////////////////////////////////////////////////////////////////////////// + fn from_trait<'de, R, T>(read: R) -> Result where R: Read<'de>, diff --git a/tests/array.rs b/tests/array.rs new file mode 100644 index 000000000..bea58ad57 --- /dev/null +++ b/tests/array.rs @@ -0,0 +1,122 @@ +#![cfg(not(feature = "preserve_order"))] + +extern crate serde; + +#[macro_use] +extern crate serde_json; + +use serde_json::{Deserializer, Value}; + +// Rustfmt issue https://github.com/rust-lang-nursery/rustfmt/issues/2740 +#[cfg_attr(rustfmt, rustfmt_skip)] +macro_rules! test_stream { + ($data:expr, |$stream:ident| $test:block) => { + { + let de = Deserializer::from_str($data); + let mut $stream = de.into_array(); + $test + } + { + let de = Deserializer::from_slice($data.as_bytes()); + let mut $stream = de.into_array(); + $test + } + { + let mut bytes = $data.as_bytes(); + let de = Deserializer::from_reader(&mut bytes); + let mut $stream = de.into_array(); + $test + } + }; +} + +#[test] +fn test_json_array_empty() { + let data = "[]"; + + test_stream!(data, |stream| { + assert!(stream.next::().is_none()); + }); +} + +#[test] +fn test_json_array_whitespace() { + let data = "\r [\n{\"x\":42}\t, {\"y\":43}\n] \t\n"; + + test_stream!(data, |stream| { + assert_eq!(stream.next::().unwrap().unwrap()["x"], 42); + + assert_eq!(stream.next::().unwrap().unwrap()["y"], 43); + + assert!(stream.next::().is_none()); + }); +} + +#[test] +fn test_json_array_truncated() { + let data = "[{\"x\":40},{\"x\":"; + + test_stream!(data, |stream| { + assert_eq!(stream.next::().unwrap().unwrap()["x"], 40); + + assert!(stream.next::().unwrap().unwrap_err().is_eof()); + }); +} + +#[test] +fn test_json_array_primitive() { + let data = "[{}, true, 1, [], 1.0, \"hey\", null]"; + + test_stream!(data, |stream| { + assert_eq!(stream.next::().unwrap().unwrap(), json!({})); + + assert_eq!(stream.next::().unwrap().unwrap(), true); + + assert_eq!(stream.next::().unwrap().unwrap(), 1); + + assert_eq!(stream.next::().unwrap().unwrap(), json!([])); + + assert_eq!(stream.next::().unwrap().unwrap(), 1.0); + + assert_eq!(stream.next::().unwrap().unwrap(), "hey"); + + assert_eq!(stream.next::().unwrap().unwrap(), Value::Null); + + assert!(stream.next::().is_none()); + }); +} + +#[test] +fn test_json_array_tailing_data() { + let data = "[]e"; + + test_stream!(data, |stream| { + let second = stream.next::().unwrap().unwrap_err(); + assert_eq!(second.to_string(), "trailing characters at line 1 column 3"); + }); +} + +#[test] +fn test_json_array_tailing_comma() { + let data = "[true,]"; + + test_stream!(data, |stream| { + assert_eq!(stream.next::().unwrap().unwrap(), true); + + let second = stream.next::().unwrap().unwrap_err(); + assert_eq!(second.to_string(), "trailing comma at line 1 column 7"); + }); +} + +#[test] +fn test_json_array_eof() { + let data = ""; + + test_stream!(data, |stream| { + let second = stream.next::().unwrap().unwrap_err(); + assert_eq!( + second.to_string(), + "EOF while parsing a value at line 1 column 0" + ); + }); +} From f820b05fb7fcff02d67fcbe16679fd347fa81c51 Mon Sep 17 00:00:00 2001 From: Luxalpa Date: Wed, 13 Dec 2023 13:59:41 +0100 Subject: [PATCH 02/14] array nesting --- src/de.rs | 324 ++++++++++++++++++++++++++++++++++++++++++------- tests/array.rs | 17 ++- 2 files changed, 293 insertions(+), 48 deletions(-) diff --git a/src/de.rs b/src/de.rs index 2999a3ddc..af7ccab79 100644 --- a/src/de.rs +++ b/src/de.rs @@ -162,11 +162,7 @@ impl<'de, R: Read<'de>> Deserializer { /// Parse the JSON array as a stream of values. pub fn into_array(self) -> ArrayDeserializer<'de, R> { - ArrayDeserializer { - de: self, - started: false, - lifetime: PhantomData, - } + self.into() } /// Parse arbitrarily deep JSON structures without any consideration for @@ -2476,6 +2472,230 @@ where ////////////////////////////////////////////////////////////////////////////// +#[derive(Eq, PartialEq)] +enum ContainerKind { + Array, + Object, +} + +enum NestingDirection { + Enter, + Leave, +} + +struct NestingCommand { + kind: ContainerKind, + direction: NestingDirection, +} + +struct IterativeBaseDeserializer<'de, R> { + de: Deserializer, + lifetime: PhantomData<&'de ()>, + nesting_change: Vec, + cur_resolved_level: usize, + cur_expected_level: usize, +} + +impl<'de, R> IterativeBaseDeserializer<'de, R> +where + R: read::Read<'de>, +{ + pub fn new(de: Deserializer, initial_kind: ContainerKind) -> Self { + IterativeBaseDeserializer { + de, + lifetime: PhantomData, + nesting_change: vec![NestingCommand { + kind: initial_kind, + direction: NestingDirection::Enter, + }], + cur_resolved_level: 0, + cur_expected_level: 1, + } + } + + fn next_value>(&mut self) -> Option> { + Some(de::Deserialize::deserialize(&mut self.de)) + } + + fn nest_enter( + de: &mut Deserializer, + cur_nesting: &mut usize, + kind: ContainerKind, + last_entered: bool, + ) -> Result<()> { + let needs_comma = *cur_nesting != 0 && !last_entered; + if needs_comma { + match de.parse_whitespace() { + Ok(Some(b',')) => { + de.eat_char(); + } + Ok(Some(_)) => { + return Err(de.peek_error(ErrorCode::Message("Expected Comma".into()))) + } + Ok(None) => return Err(de.peek_error(ErrorCode::EofWhileParsingValue)), + Err(e) => return Err(e), + } + } + + match de.parse_whitespace() { + Ok(Some(b'[')) => { + if !matches!(kind, ContainerKind::Array) { + // TODO: ErrorCode + return Err(de.peek_error(ErrorCode::Message( + "Unexpected value while nesting 1".into(), + ))); + } + } + Ok(Some(b'{')) => { + if !matches!(kind, ContainerKind::Object) { + // TODO: ErrorCode + return Err(de.peek_error(ErrorCode::Message( + "Unexpected value while nesting 2".into(), + ))); + } + } + Ok(None) => return Err(de.peek_error(ErrorCode::EofWhileParsingValue)), + Err(e) => return Err(e), + Ok(Some(_)) => { + // TODO: ErrorCode + return Err(de.peek_error(ErrorCode::Message( + "Unexpected value while nesting 3".into(), + ))); + } + } + de.eat_char(); + *cur_nesting += 1; + println!("nest_enter: {}", *cur_nesting); + + Ok(()) + } + + fn nest_leave( + de: &mut Deserializer, + cur_nesting: &mut usize, + kind: ContainerKind, + ) -> Result<()> { + match de.parse_whitespace() { + Ok(Some(b']')) => { + if !matches!(kind, ContainerKind::Array) { + // TODO: ErrorCode + return Err(de.peek_error(ErrorCode::Message( + "Unexpected value while unnesting".into(), + ))); + } + } + Ok(Some(b'}')) => { + if !matches!(kind, ContainerKind::Object) { + // TODO: ErrorCode + return Err(de.peek_error(ErrorCode::Message( + "Unexpected value while unnesting".into(), + ))); + } + } + Ok(None) => return Err(de.peek_error(ErrorCode::EofWhileParsingValue)), + Err(e) => return Err(e), + Ok(Some(_)) => { + // TODO: ErrorCode + return Err(de.peek_error(ErrorCode::Message( + "Unexpected value while unnesting".into(), + ))); + } + } + de.eat_char(); + *cur_nesting -= 1; + if *cur_nesting == 0 { + return de.end(); + } + + Ok(()) + } + + fn adjust_nesting(&mut self) -> Result { + let mut last_entered = false; + for cmd in self.nesting_change.drain(..) { + last_entered = match cmd.direction { + NestingDirection::Enter => { + Self::nest_enter( + &mut self.de, + &mut self.cur_resolved_level, + cmd.kind, + last_entered, + )?; + true + } + NestingDirection::Leave => { + Self::nest_leave(&mut self.de, &mut self.cur_resolved_level, cmd.kind)?; + false + } + }; + } + + Ok(last_entered) + } + + fn next_arr_val>( + &mut self, + expected_level: usize, + ) -> Option> { + let entered = match self.adjust_nesting() { + Ok(entered) => entered, + Err(e) => return Some(Err(e)), + }; + + if self.cur_resolved_level == 0 || self.cur_resolved_level != expected_level { + // We already ran `end()` at this point during `nest_leave` + dbg!(self.cur_resolved_level, expected_level); + return None; + } + + match self.de.parse_whitespace() { + Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))), + Ok(Some(b']')) => { + self.de.eat_char(); + self.cur_resolved_level -= 1; + self.cur_expected_level -= 1; + if self.cur_resolved_level == 0 { + return match self.de.end() { + Ok(()) => None, + Err(e) => Some(Err(e)), + }; + } + None + } + Ok(Some(b',')) if !entered => { + self.de.eat_char(); + + match self.de.parse_whitespace() { + Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))), + Ok(Some(b']')) => Some(Err(self.de.peek_error(ErrorCode::TrailingComma))), + Ok(Some(_)) => self.next_value(), + Err(e) => Some(Err(e)), + } + } + Ok(Some(_)) if entered => self.next_value(), + Ok(Some(_)) => Some(Err(self.de.peek_error(ErrorCode::ExpectedSomeValue))), + Err(e) => Some(Err(e)), + } + } + + fn next_array<'a>(&'a mut self) -> SubArrayDeserializer<'de, 'a, R> + where + 'de: 'a, + { + self.nesting_change.push(NestingCommand { + kind: ContainerKind::Array, + direction: NestingDirection::Enter, + }); + + self.cur_expected_level += 1; + + SubArrayDeserializer { + expected_level: self.cur_expected_level, + base: self, + } + } +} + /// A streaming JSON array deserializer. /// /// An array deserializer can be created from any JSON deserializer using the @@ -2500,9 +2720,7 @@ where /// } /// ``` pub struct ArrayDeserializer<'de, R> { - de: Deserializer, - started: bool, // True if we have consumed the first '[' - lifetime: PhantomData<&'de ()>, + base: IterativeBaseDeserializer<'de, R>, } impl<'de, R> ArrayDeserializer<'de, R> @@ -2519,56 +2737,68 @@ where /// - Deserializer::from_reader(...).into_array() pub fn new(read: R) -> Self { ArrayDeserializer { - de: Deserializer::new(read), - started: false, - lifetime: PhantomData, + base: IterativeBaseDeserializer::new(Deserializer::new(read), ContainerKind::Array), } } - fn end>(&mut self) -> Option> { - self.de.eat_char(); - match self.de.end() { - Ok(_) => None, - Err(e) => Some(Err(e)), - } + /// Return the next element from the array. Returns None if there are no more elements. + pub fn next>(&mut self) -> Option> { + self.base.next_arr_val(1) } - fn next_value>(&mut self) -> Option> { - match de::Deserialize::deserialize(&mut self.de) { - Ok(v) => Some(Ok(v)), - Err(e) => Some(Err(e)), + /// Some docs TODO + pub fn next_array<'a>(&'a mut self) -> SubArrayDeserializer<'de, 'a, R> + where + 'de: 'a, + { + self.base.next_array() + } +} + +impl<'de, R: read::Read<'de>> From> for ArrayDeserializer<'de, R> { + fn from(de: Deserializer) -> Self { + ArrayDeserializer { + base: IterativeBaseDeserializer::new(de, ContainerKind::Array), } } +} +/// docs TODO +pub struct SubArrayDeserializer<'de1, 'de2, R> { + base: &'de2 mut IterativeBaseDeserializer<'de1, R>, + expected_level: usize, +} + +impl<'de1, 'de2, R> SubArrayDeserializer<'de1, 'de2, R> +where + R: read::Read<'de1>, +{ /// Return the next element from the array. Returns None if there are no more elements. - pub fn next>(&mut self) -> Option> { - match self.de.parse_whitespace() { - Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))), - Ok(Some(b'[')) if !self.started => { - self.started = true; - self.de.eat_char(); + pub fn next>(&mut self) -> Option> { + self.base.next_arr_val(self.expected_level) + } - // We have to peek at the next character here to handle an empty array. - match self.de.parse_whitespace() { - Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))), - Ok(Some(b']')) => self.end(), - Ok(Some(_)) => self.next_value(), - Err(e) => Some(Err(e)), - } - } - Ok(Some(b']')) if self.started => self.end(), - Ok(Some(b',')) if self.started => { - self.de.eat_char(); + /// Some docs TODO + pub fn next_array<'a>(&'a mut self) -> SubArrayDeserializer<'de1, 'a, R> + where + 'de1: 'a, + { + self.base.next_array() + } +} - match self.de.parse_whitespace() { - Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))), - Ok(Some(b']')) => Some(Err(self.de.peek_error(ErrorCode::TrailingComma))), - Ok(Some(_)) => self.next_value(), - Err(e) => Some(Err(e)), - } - } - Ok(Some(_)) => Some(Err(self.de.peek_error(ErrorCode::ExpectedSomeValue))), - Err(e) => Some(Err(e)), +impl<'de1, 'de2, R> Drop for SubArrayDeserializer<'de1, 'de2, R> { + fn drop(&mut self) { + println!( + "dropping: C={}, E={}", + self.base.cur_expected_level, self.expected_level + ); + if self.base.cur_expected_level == self.expected_level { + self.base.cur_expected_level -= 1; + self.base.nesting_change.push(NestingCommand { + kind: ContainerKind::Array, + direction: NestingDirection::Leave, + }); } } } diff --git a/tests/array.rs b/tests/array.rs index bea58ad57..ae3016b42 100644 --- a/tests/array.rs +++ b/tests/array.rs @@ -65,7 +65,7 @@ fn test_json_array_truncated() { #[test] fn test_json_array_primitive() { - let data = "[{}, true, 1, [], 1.0, \"hey\", null]"; + let data = r#"[{}, true, 1, [], 1.0, "hey", [1.0, []], 2.0, null]"#; test_stream!(data, |stream| { assert_eq!(stream.next::().unwrap().unwrap(), json!({})); @@ -80,6 +80,21 @@ fn test_json_array_primitive() { assert_eq!(stream.next::().unwrap().unwrap(), "hey"); + { + let mut sub = stream.next_array(); + + assert_eq!(sub.next::().unwrap().unwrap(), 1.0); + + { + let mut sub2 = sub.next_array(); + assert!(sub2.next::().is_none()); + } + println!("after sub2"); + assert!(sub.next::().is_none()); + println!("is_none"); + } + assert_eq!(stream.next::().unwrap().unwrap(), 2.0); + assert_eq!(stream.next::().unwrap().unwrap(), Value::Null); assert!(stream.next::().is_none()); From 60d0aaa16029ae5b8e0147870486afffe42609f5 Mon Sep 17 00:00:00 2001 From: Luxalpa Date: Wed, 13 Dec 2023 14:46:25 +0100 Subject: [PATCH 03/14] adjust the test_stream macro to be more rust and less macro --- tests/array.rs | 56 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/tests/array.rs b/tests/array.rs index ae3016b42..bb638f923 100644 --- a/tests/array.rs +++ b/tests/array.rs @@ -1,31 +1,32 @@ -#![cfg(not(feature = "preserve_order"))] +// #![cfg(not(feature = "preserve_order"))] TODO extern crate serde; #[macro_use] extern crate serde_json; -use serde_json::{Deserializer, Value}; +use serde_json::de::ArrayDeserializer; +use serde_json::{de::Read, Deserializer, Value}; + +fn test_stream(data: &str) { + T::test(Deserializer::from_str(data).into_array()); + T::test(Deserializer::from_slice(data.as_bytes()).into_array()); + T::test(Deserializer::from_reader(data.as_bytes()).into_array()); +} + +trait Tester { + fn test<'reader, R: Read<'reader>>(stream: ArrayDeserializer<'reader, R>); +} -// Rustfmt issue https://github.com/rust-lang-nursery/rustfmt/issues/2740 -#[cfg_attr(rustfmt, rustfmt_skip)] macro_rules! test_stream { ($data:expr, |$stream:ident| $test:block) => { { - let de = Deserializer::from_str($data); - let mut $stream = de.into_array(); - $test - } - { - let de = Deserializer::from_slice($data.as_bytes()); - let mut $stream = de.into_array(); - $test - } - { - let mut bytes = $data.as_bytes(); - let de = Deserializer::from_reader(&mut bytes); - let mut $stream = de.into_array(); - $test + struct Test; + impl Tester for Test { + fn test<'r, R: Read<'r>>(mut $stream: ArrayDeserializer<'r, R>) + $test + } + test_stream::($data); } }; } @@ -39,6 +40,25 @@ fn test_json_array_empty() { }); } +#[test] +fn test_json_array_empty2() { + let data = "[]"; + + { + struct Test; + impl Tester for Test { + fn test<'a, R: Read<'a>>(mut stream: ArrayDeserializer<'a, R>) { + assert!(stream.next::().is_none()); + } + } + test_stream::(data); + } + + // test_stream!(data, |stream| { + // assert!(stream.next::().is_none()); + // }); +} + #[test] fn test_json_array_whitespace() { let data = "\r [\n{\"x\":42}\t, {\"y\":43}\n] \t\n"; From ba90d865772f8bd44a83536aaa17af597bbc1912 Mon Sep 17 00:00:00 2001 From: Luxalpa Date: Wed, 13 Dec 2023 15:23:30 +0100 Subject: [PATCH 04/14] add test for nesting --- src/de.rs | 5 --- tests/array.rs | 107 ++++++++++++++++++++++++++++--------------------- 2 files changed, 62 insertions(+), 50 deletions(-) diff --git a/src/de.rs b/src/de.rs index af7ccab79..c2d6dc11f 100644 --- a/src/de.rs +++ b/src/de.rs @@ -2565,7 +2565,6 @@ where } de.eat_char(); *cur_nesting += 1; - println!("nest_enter: {}", *cur_nesting); Ok(()) } @@ -2789,10 +2788,6 @@ where impl<'de1, 'de2, R> Drop for SubArrayDeserializer<'de1, 'de2, R> { fn drop(&mut self) { - println!( - "dropping: C={}, E={}", - self.base.cur_expected_level, self.expected_level - ); if self.base.cur_expected_level == self.expected_level { self.base.cur_expected_level -= 1; self.base.nesting_change.push(NestingCommand { diff --git a/tests/array.rs b/tests/array.rs index bb638f923..4e887cc1a 100644 --- a/tests/array.rs +++ b/tests/array.rs @@ -5,6 +5,7 @@ extern crate serde; #[macro_use] extern crate serde_json; +use serde::de::IgnoredAny; use serde_json::de::ArrayDeserializer; use serde_json::{de::Read, Deserializer, Value}; @@ -40,34 +41,13 @@ fn test_json_array_empty() { }); } -#[test] -fn test_json_array_empty2() { - let data = "[]"; - - { - struct Test; - impl Tester for Test { - fn test<'a, R: Read<'a>>(mut stream: ArrayDeserializer<'a, R>) { - assert!(stream.next::().is_none()); - } - } - test_stream::(data); - } - - // test_stream!(data, |stream| { - // assert!(stream.next::().is_none()); - // }); -} - #[test] fn test_json_array_whitespace() { let data = "\r [\n{\"x\":42}\t, {\"y\":43}\n] \t\n"; test_stream!(data, |stream| { assert_eq!(stream.next::().unwrap().unwrap()["x"], 42); - assert_eq!(stream.next::().unwrap().unwrap()["y"], 43); - assert!(stream.next::().is_none()); }); } @@ -78,45 +58,22 @@ fn test_json_array_truncated() { test_stream!(data, |stream| { assert_eq!(stream.next::().unwrap().unwrap()["x"], 40); - assert!(stream.next::().unwrap().unwrap_err().is_eof()); }); } #[test] fn test_json_array_primitive() { - let data = r#"[{}, true, 1, [], 1.0, "hey", [1.0, []], 2.0, null]"#; + let data = r#"[{}, true, 1, [], 1.0, "hey", null]"#; test_stream!(data, |stream| { assert_eq!(stream.next::().unwrap().unwrap(), json!({})); - assert_eq!(stream.next::().unwrap().unwrap(), true); - assert_eq!(stream.next::().unwrap().unwrap(), 1); - assert_eq!(stream.next::().unwrap().unwrap(), json!([])); - assert_eq!(stream.next::().unwrap().unwrap(), 1.0); - assert_eq!(stream.next::().unwrap().unwrap(), "hey"); - - { - let mut sub = stream.next_array(); - - assert_eq!(sub.next::().unwrap().unwrap(), 1.0); - - { - let mut sub2 = sub.next_array(); - assert!(sub2.next::().is_none()); - } - println!("after sub2"); - assert!(sub.next::().is_none()); - println!("is_none"); - } - assert_eq!(stream.next::().unwrap().unwrap(), 2.0); - assert_eq!(stream.next::().unwrap().unwrap(), Value::Null); - assert!(stream.next::().is_none()); }); } @@ -155,3 +112,63 @@ fn test_json_array_eof() { ); }); } + +#[test] +fn test_nesting() { + let data = r#"[1, [[3, []]], 4]"#; + + // With explicit is_none checks + test_stream!(data, |stream| { + assert_eq!(stream.next::().unwrap().unwrap(), 1); + { + let mut sub = stream.next_array(); + { + let mut sub2 = sub.next_array(); + assert_eq!(sub2.next::().unwrap().unwrap(), 3); + { + let mut sub3 = sub2.next_array(); + assert!(sub3.next::().is_none()); + } + assert!(sub2.next::().is_none()); + } + assert!(sub.next::().is_none()); + } + assert_eq!(stream.next::().unwrap().unwrap(), 4); + assert!(stream.next::().is_none()); + }); + + // Without inner is_none checks + test_stream!(data, |stream| { + assert_eq!(stream.next::().unwrap().unwrap(), 1); + { + let mut sub = stream.next_array(); + { + let mut sub2 = sub.next_array(); + assert_eq!(sub2.next::().unwrap().unwrap(), 3); + { + let mut sub3 = sub2.next_array(); + } + } + } + assert_eq!(stream.next::().unwrap().unwrap(), 4); + assert!(stream.next::().is_none()); + }); + + // Mixed is_none checks + test_stream!(data, |stream| { + assert_eq!(stream.next::().unwrap().unwrap(), 1); + { + let mut sub = stream.next_array(); + { + let mut sub2 = sub.next_array(); + assert_eq!(sub2.next::().unwrap().unwrap(), 3); + { + let mut sub3 = sub2.next_array(); + } + assert!(sub2.next::().is_none()); + } + } + assert_eq!(stream.next::().unwrap().unwrap(), 4); + assert!(stream.next::().is_none()); + }); +} From 2cf1c7d8ca8fa498c4973285f2a5e2dc96b2b8f2 Mon Sep 17 00:00:00 2001 From: Luxalpa Date: Wed, 13 Dec 2023 16:47:05 +0100 Subject: [PATCH 05/14] some code cleanup --- src/de.rs | 229 ++++++++++++++++++++++++++----------------------- src/error.rs | 25 ++++++ tests/array.rs | 4 +- 3 files changed, 149 insertions(+), 109 deletions(-) diff --git a/src/de.rs b/src/de.rs index c2d6dc11f..177c225ad 100644 --- a/src/de.rs +++ b/src/de.rs @@ -2472,7 +2472,7 @@ where ////////////////////////////////////////////////////////////////////////////// -#[derive(Eq, PartialEq)] +#[derive(Eq, PartialEq, Clone, Copy)] enum ContainerKind { Array, Object, @@ -2492,8 +2492,8 @@ struct IterativeBaseDeserializer<'de, R> { de: Deserializer, lifetime: PhantomData<&'de ()>, nesting_change: Vec, - cur_resolved_level: usize, - cur_expected_level: usize, + cur_level: usize, + future_level: usize, } impl<'de, R> IterativeBaseDeserializer<'de, R> @@ -2508,122 +2508,109 @@ where kind: initial_kind, direction: NestingDirection::Enter, }], - cur_resolved_level: 0, - cur_expected_level: 1, + cur_level: 0, + future_level: 1, } } - fn next_value>(&mut self) -> Option> { - Some(de::Deserialize::deserialize(&mut self.de)) - } - + // This is an associated function instead of using `self` due to the mutable borrow on the caller. fn nest_enter( de: &mut Deserializer, cur_nesting: &mut usize, kind: ContainerKind, last_entered: bool, ) -> Result<()> { + // We don't want a preceding comma if we just entered a container. + // In the case of cur_nesting == 0, we are before the top-level container, and we also don't want commas there! let needs_comma = *cur_nesting != 0 && !last_entered; if needs_comma { - match de.parse_whitespace() { + tri!(match de.parse_whitespace() { Ok(Some(b',')) => { de.eat_char(); + Ok(()) } - Ok(Some(_)) => { - return Err(de.peek_error(ErrorCode::Message("Expected Comma".into()))) - } - Ok(None) => return Err(de.peek_error(ErrorCode::EofWhileParsingValue)), - Err(e) => return Err(e), - } + Ok(Some(_)) => Err(de.peek_error(ErrorCode::ExpectedComma)), + Ok(None) => Err(de.peek_error(ErrorCode::EofWhileParsingValue)), + Err(e) => Err(e), + }); } - match de.parse_whitespace() { - Ok(Some(b'[')) => { - if !matches!(kind, ContainerKind::Array) { - // TODO: ErrorCode - return Err(de.peek_error(ErrorCode::Message( - "Unexpected value while nesting 1".into(), - ))); - } - } - Ok(Some(b'{')) => { - if !matches!(kind, ContainerKind::Object) { - // TODO: ErrorCode - return Err(de.peek_error(ErrorCode::Message( - "Unexpected value while nesting 2".into(), - ))); + // ensure we find the correct opening token ('[' or '{') + tri!(match de.parse_whitespace() { + Err(e) => Err(e), + Ok(None) => Err(de.peek_error(ErrorCode::EofWhileParsingValue)), + Ok(Some(token)) => match (token, kind) { + (b'[', ContainerKind::Array) | (b'{', ContainerKind::Object) => { + de.eat_char(); + Ok(()) } - } - Ok(None) => return Err(de.peek_error(ErrorCode::EofWhileParsingValue)), - Err(e) => return Err(e), - Ok(Some(_)) => { - // TODO: ErrorCode - return Err(de.peek_error(ErrorCode::Message( - "Unexpected value while nesting 3".into(), - ))); - } - } - de.eat_char(); + _ => Err(de.peek_error(match kind { + ContainerKind::Array => ErrorCode::ExpectedListStart, + ContainerKind::Object => ErrorCode::ExpectedObjectStart, + })), + }, + }); + *cur_nesting += 1; Ok(()) } + // This is an associated function instead of using `self` due to the mutable borrow on the caller. fn nest_leave( de: &mut Deserializer, cur_nesting: &mut usize, kind: ContainerKind, ) -> Result<()> { - match de.parse_whitespace() { - Ok(Some(b']')) => { - if !matches!(kind, ContainerKind::Array) { - // TODO: ErrorCode - return Err(de.peek_error(ErrorCode::Message( - "Unexpected value while unnesting".into(), - ))); - } - } - Ok(Some(b'}')) => { - if !matches!(kind, ContainerKind::Object) { - // TODO: ErrorCode - return Err(de.peek_error(ErrorCode::Message( - "Unexpected value while unnesting".into(), - ))); + // ensure we find the correct closing token (']' or '}') + tri!(match de.parse_whitespace() { + Err(e) => Err(e), + Ok(None) => Err(de.peek_error(ErrorCode::EofWhileParsingValue)), + Ok(Some(token)) => match (token, kind) { + (b']', ContainerKind::Array) | (b'}', ContainerKind::Object) => { + de.eat_char(); + Ok(()) } - } - Ok(None) => return Err(de.peek_error(ErrorCode::EofWhileParsingValue)), - Err(e) => return Err(e), - Ok(Some(_)) => { - // TODO: ErrorCode - return Err(de.peek_error(ErrorCode::Message( - "Unexpected value while unnesting".into(), - ))); - } - } - de.eat_char(); + _ => Err(de.peek_error(match kind { + ContainerKind::Array => ErrorCode::ExpectedListEnd, + ContainerKind::Object => ErrorCode::ExpectedObjectEnd, + })), + }, + }); + *cur_nesting -= 1; + if *cur_nesting == 0 { + // Anything that comes after we left the top-level container is an error. return de.end(); } Ok(()) } - fn adjust_nesting(&mut self) -> Result { + // Applies all the queued up nesting commands. Afterwards, current_level and future_level should + // be the same. + fn resolve_nesting(&mut self) -> Result { + // When we just entered a container, the next character must not be a comma. + // Otherwise, it must be a comma. let mut last_entered = false; for cmd in self.nesting_change.drain(..) { last_entered = match cmd.direction { NestingDirection::Enter => { - Self::nest_enter( + tri!(Self::nest_enter( &mut self.de, - &mut self.cur_resolved_level, + &mut self.cur_level, cmd.kind, - last_entered, - )?; + last_entered + )); true } NestingDirection::Leave => { - Self::nest_leave(&mut self.de, &mut self.cur_resolved_level, cmd.kind)?; + tri!(Self::nest_leave( + &mut self.de, + &mut self.cur_level, + cmd.kind + )); false } }; @@ -2632,18 +2619,24 @@ where Ok(last_entered) } + // Gets the actual JSON value that was requested + fn next_value>(&mut self) -> Option> { + Some(de::Deserialize::deserialize(&mut self.de)) + } + + // Due to the way the lifetimes work, we can be sure that at this point only an array is expected fn next_arr_val>( &mut self, expected_level: usize, ) -> Option> { - let entered = match self.adjust_nesting() { - Ok(entered) => entered, + // First, we need to resolve all the nesting that was queued up before. + let needs_comma = match self.resolve_nesting() { + Ok(entered) => !entered, Err(e) => return Some(Err(e)), }; - if self.cur_resolved_level == 0 || self.cur_resolved_level != expected_level { + if self.cur_level == 0 || self.cur_level != expected_level { // We already ran `end()` at this point during `nest_leave` - dbg!(self.cur_resolved_level, expected_level); return None; } @@ -2651,9 +2644,9 @@ where Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))), Ok(Some(b']')) => { self.de.eat_char(); - self.cur_resolved_level -= 1; - self.cur_expected_level -= 1; - if self.cur_resolved_level == 0 { + self.cur_level -= 1; + self.future_level -= 1; + if self.cur_level == 0 { return match self.de.end() { Ok(()) => None, Err(e) => Some(Err(e)), @@ -2661,7 +2654,7 @@ where } None } - Ok(Some(b',')) if !entered => { + Ok(Some(b',')) if needs_comma => { self.de.eat_char(); match self.de.parse_whitespace() { @@ -2671,28 +2664,45 @@ where Err(e) => Some(Err(e)), } } - Ok(Some(_)) if entered => self.next_value(), - Ok(Some(_)) => Some(Err(self.de.peek_error(ErrorCode::ExpectedSomeValue))), + Ok(Some(_)) => { + if !needs_comma { + self.next_value() + } else { + Some(Err(self.de.peek_error(ErrorCode::ExpectedSomeValue))) + } + } Err(e) => Some(Err(e)), } } - fn next_array<'a>(&'a mut self) -> SubArrayDeserializer<'de, 'a, R> + fn queue_enter_array<'new_parent>( + &'new_parent mut self, + ) -> SubArrayDeserializer<'de, 'new_parent, R> where - 'de: 'a, + 'de: 'new_parent, { self.nesting_change.push(NestingCommand { kind: ContainerKind::Array, direction: NestingDirection::Enter, }); - self.cur_expected_level += 1; + self.future_level += 1; SubArrayDeserializer { - expected_level: self.cur_expected_level, + expected_level: self.future_level, base: self, } } + + fn queue_leave(&mut self, expected_level: usize, kind: ContainerKind) { + if self.future_level == expected_level { + self.future_level -= 1; + self.nesting_change.push(NestingCommand { + kind, + direction: NestingDirection::Leave, + }); + } + } } /// A streaming JSON array deserializer. @@ -2746,11 +2756,13 @@ where } /// Some docs TODO - pub fn next_array<'a>(&'a mut self) -> SubArrayDeserializer<'de, 'a, R> + pub fn next_array<'new_parent>( + &'new_parent mut self, + ) -> SubArrayDeserializer<'de, 'new_parent, R> where - 'de: 'a, + 'de: 'new_parent, { - self.base.next_array() + self.base.queue_enter_array() } } @@ -2763,38 +2775,41 @@ impl<'de, R: read::Read<'de>> From> for ArrayDeserializer<'de, R } /// docs TODO -pub struct SubArrayDeserializer<'de1, 'de2, R> { - base: &'de2 mut IterativeBaseDeserializer<'de1, R>, +pub struct SubArrayDeserializer<'de, 'parent, R> +where + R: read::Read<'de>, +{ + base: &'parent mut IterativeBaseDeserializer<'de, R>, expected_level: usize, } -impl<'de1, 'de2, R> SubArrayDeserializer<'de1, 'de2, R> +impl<'de, 'parent, R> SubArrayDeserializer<'de, 'parent, R> where - R: read::Read<'de1>, + R: read::Read<'de>, { /// Return the next element from the array. Returns None if there are no more elements. - pub fn next>(&mut self) -> Option> { + pub fn next>(&mut self) -> Option> { self.base.next_arr_val(self.expected_level) } /// Some docs TODO - pub fn next_array<'a>(&'a mut self) -> SubArrayDeserializer<'de1, 'a, R> + pub fn next_array<'new_parent>( + &'new_parent mut self, + ) -> SubArrayDeserializer<'de, 'new_parent, R> where - 'de1: 'a, + 'de: 'new_parent, { - self.base.next_array() + self.base.queue_enter_array() } } -impl<'de1, 'de2, R> Drop for SubArrayDeserializer<'de1, 'de2, R> { +impl<'de, 'parent, R> Drop for SubArrayDeserializer<'de, 'parent, R> +where + R: read::Read<'de>, +{ fn drop(&mut self) { - if self.base.cur_expected_level == self.expected_level { - self.base.cur_expected_level -= 1; - self.base.nesting_change.push(NestingCommand { - kind: ContainerKind::Array, - direction: NestingDirection::Leave, - }); - } + self.base + .queue_leave(self.expected_level, ContainerKind::Array); } } diff --git a/src/error.rs b/src/error.rs index 03555eb4c..625625705 100644 --- a/src/error.rs +++ b/src/error.rs @@ -60,8 +60,13 @@ impl Error { | ErrorCode::EofWhileParsingString | ErrorCode::EofWhileParsingValue => Category::Eof, ErrorCode::ExpectedColon + | ErrorCode::ExpectedComma | ErrorCode::ExpectedListCommaOrEnd | ErrorCode::ExpectedObjectCommaOrEnd + | ErrorCode::ExpectedListEnd + | ErrorCode::ExpectedObjectEnd + | ErrorCode::ExpectedListStart + | ErrorCode::ExpectedObjectStart | ErrorCode::ExpectedSomeIdent | ErrorCode::ExpectedSomeValue | ErrorCode::ExpectedDoubleQuote @@ -255,12 +260,27 @@ pub(crate) enum ErrorCode { /// Expected this character to be a `':'`. ExpectedColon, + /// Expected this character to be a `','`. + ExpectedComma, + /// Expected this character to be either a `','` or a `']'`. ExpectedListCommaOrEnd, /// Expected this character to be either a `','` or a `'}'`. ExpectedObjectCommaOrEnd, + /// Expected this character to be a `']'`. + ExpectedListEnd, + + /// Expected this character to be a `'}'`. + ExpectedObjectEnd, + + /// Expected this character to be a `'['`. + ExpectedListStart, + + /// Expected this character to be a `'{'`. + ExpectedObjectStart, + /// Expected to parse either a `true`, `false`, or a `null`. ExpectedSomeIdent, @@ -356,8 +376,13 @@ impl Display for ErrorCode { ErrorCode::EofWhileParsingString => f.write_str("EOF while parsing a string"), ErrorCode::EofWhileParsingValue => f.write_str("EOF while parsing a value"), ErrorCode::ExpectedColon => f.write_str("expected `:`"), + ErrorCode::ExpectedComma => f.write_str("expected `,`"), ErrorCode::ExpectedListCommaOrEnd => f.write_str("expected `,` or `]`"), ErrorCode::ExpectedObjectCommaOrEnd => f.write_str("expected `,` or `}`"), + ErrorCode::ExpectedListEnd => f.write_str("expected `]`"), + ErrorCode::ExpectedObjectEnd => f.write_str("expected `}`"), + ErrorCode::ExpectedListStart => f.write_str("expected `[`"), + ErrorCode::ExpectedObjectStart => f.write_str("expected `{`"), ErrorCode::ExpectedSomeIdent => f.write_str("expected ident"), ErrorCode::ExpectedSomeValue => f.write_str("expected value"), ErrorCode::ExpectedDoubleQuote => f.write_str("expected `\"`"), diff --git a/tests/array.rs b/tests/array.rs index 4e887cc1a..9ee38650f 100644 --- a/tests/array.rs +++ b/tests/array.rs @@ -146,7 +146,7 @@ fn test_nesting() { let mut sub2 = sub.next_array(); assert_eq!(sub2.next::().unwrap().unwrap(), 3); { - let mut sub3 = sub2.next_array(); + sub2.next_array(); } } } @@ -163,7 +163,7 @@ fn test_nesting() { let mut sub2 = sub.next_array(); assert_eq!(sub2.next::().unwrap().unwrap(), 3); { - let mut sub3 = sub2.next_array(); + sub2.next_array(); } assert!(sub2.next::().is_none()); } From c6e3aa5a0742fa167807d044fd2e3566cadfd9ca Mon Sep 17 00:00:00 2001 From: Luxalpa Date: Wed, 13 Dec 2023 17:34:30 +0100 Subject: [PATCH 06/14] more documentation --- src/de.rs | 84 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 49 insertions(+), 35 deletions(-) diff --git a/src/de.rs b/src/de.rs index 177c225ad..50bcadf42 100644 --- a/src/de.rs +++ b/src/de.rs @@ -2478,22 +2478,30 @@ enum ContainerKind { Object, } -enum NestingDirection { +enum NestingAction { Enter, Leave, } struct NestingCommand { kind: ContainerKind, - direction: NestingDirection, + action: NestingAction, } +// The underlying deserializer for arrays and objects. The other deserializers are just handles to control this one. struct IterativeBaseDeserializer<'de, R> { de: Deserializer, lifetime: PhantomData<&'de ()>, + + // Queue of all nesting-adjustment-commands that will be applied on the call to `next` before reading + // the actual value. nesting_change: Vec, - cur_level: usize, - future_level: usize, + + // The nesting depth we are currently at, before applying the queue of commands. + cur_depth: usize, + + // The nesting depth we will be at after applying the queue of commands. + future_depth: usize, } impl<'de, R> IterativeBaseDeserializer<'de, R> @@ -2504,12 +2512,13 @@ where IterativeBaseDeserializer { de, lifetime: PhantomData, + // First action will be to enter the top-level container nesting_change: vec![NestingCommand { kind: initial_kind, - direction: NestingDirection::Enter, + action: NestingAction::Enter, }], - cur_level: 0, - future_level: 1, + cur_depth: 0, + future_depth: 1, } } @@ -2588,27 +2597,27 @@ where Ok(()) } - // Applies all the queued up nesting commands. Afterwards, current_level and future_level should + // Applies all the queued up nesting commands. Afterwards, current_depth and future_depth should // be the same. fn resolve_nesting(&mut self) -> Result { // When we just entered a container, the next character must not be a comma. - // Otherwise, it must be a comma. + // Otherwise (on leave but also just on any actual value), it must be a comma. let mut last_entered = false; for cmd in self.nesting_change.drain(..) { - last_entered = match cmd.direction { - NestingDirection::Enter => { + last_entered = match cmd.action { + NestingAction::Enter => { tri!(Self::nest_enter( &mut self.de, - &mut self.cur_level, + &mut self.cur_depth, cmd.kind, last_entered )); true } - NestingDirection::Leave => { + NestingAction::Leave => { tri!(Self::nest_leave( &mut self.de, - &mut self.cur_level, + &mut self.cur_depth, cmd.kind )); false @@ -2627,7 +2636,7 @@ where // Due to the way the lifetimes work, we can be sure that at this point only an array is expected fn next_arr_val>( &mut self, - expected_level: usize, + expected_depth: usize, ) -> Option> { // First, we need to resolve all the nesting that was queued up before. let needs_comma = match self.resolve_nesting() { @@ -2635,7 +2644,9 @@ where Err(e) => return Some(Err(e)), }; - if self.cur_level == 0 || self.cur_level != expected_level { + // if expected_depth differs from our current one at this point, it means we have already nested out of the + // container, so there's no more values to get. + if self.cur_depth == 0 || self.cur_depth != expected_depth { // We already ran `end()` at this point during `nest_leave` return None; } @@ -2644,9 +2655,9 @@ where Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))), Ok(Some(b']')) => { self.de.eat_char(); - self.cur_level -= 1; - self.future_level -= 1; - if self.cur_level == 0 { + self.cur_depth -= 1; + self.future_depth -= 1; + if self.cur_depth == 0 { return match self.de.end() { Ok(()) => None, Err(e) => Some(Err(e)), @@ -2675,31 +2686,30 @@ where } } - fn queue_enter_array<'new_parent>( - &'new_parent mut self, - ) -> SubArrayDeserializer<'de, 'new_parent, R> + // Nest one level deeper + fn sub_array<'new_parent>(&'new_parent mut self) -> SubArrayDeserializer<'de, 'new_parent, R> where 'de: 'new_parent, { self.nesting_change.push(NestingCommand { kind: ContainerKind::Array, - direction: NestingDirection::Enter, + action: NestingAction::Enter, }); - self.future_level += 1; + self.future_depth += 1; SubArrayDeserializer { - expected_level: self.future_level, + expected_depth: self.future_depth, base: self, } } - fn queue_leave(&mut self, expected_level: usize, kind: ContainerKind) { - if self.future_level == expected_level { - self.future_level -= 1; + fn queue_leave(&mut self, expected_depth: usize, kind: ContainerKind) { + if self.future_depth == expected_depth { + self.future_depth -= 1; self.nesting_change.push(NestingCommand { kind, - direction: NestingDirection::Leave, + action: NestingAction::Leave, }); } } @@ -2715,7 +2725,7 @@ where /// therefore preferable over deserializing into a container type such as `Vec` when the complete /// array is too large to fit in memory. /// -/// ```edition2018 +/// ``` /// use serde_json::{Deserializer, Value}; /// /// fn main() { @@ -2762,7 +2772,7 @@ where where 'de: 'new_parent, { - self.base.queue_enter_array() + self.base.sub_array() } } @@ -2780,7 +2790,11 @@ where R: read::Read<'de>, { base: &'parent mut IterativeBaseDeserializer<'de, R>, - expected_level: usize, + // The nesting depth of this container. It functions like an ID in order to check whether we are still in the + // container. This is necessary because there are two ways of leaving a container: Either by calling `next` past + // the end (and getting None), or by dropping this struct (for example at the end of the scope). We must make + // sure we don't queue up more than one leave command. + expected_depth: usize, } impl<'de, 'parent, R> SubArrayDeserializer<'de, 'parent, R> @@ -2789,7 +2803,7 @@ where { /// Return the next element from the array. Returns None if there are no more elements. pub fn next>(&mut self) -> Option> { - self.base.next_arr_val(self.expected_level) + self.base.next_arr_val(self.expected_depth) } /// Some docs TODO @@ -2799,7 +2813,7 @@ where where 'de: 'new_parent, { - self.base.queue_enter_array() + self.base.sub_array() } } @@ -2809,7 +2823,7 @@ where { fn drop(&mut self) { self.base - .queue_leave(self.expected_level, ContainerKind::Array); + .queue_leave(self.expected_depth, ContainerKind::Array); } } From 6e8b7f427509be3fc4ca0e67def32669fb798b50 Mon Sep 17 00:00:00 2001 From: Luxalpa Date: Wed, 13 Dec 2023 19:11:42 +0100 Subject: [PATCH 07/14] refactor ArrayDeserializer to be just a single struct --- src/de.rs | 98 +++++++++++++++++++++++++++----------------------- tests/array.rs | 4 +-- 2 files changed, 55 insertions(+), 47 deletions(-) diff --git a/src/de.rs b/src/de.rs index 50bcadf42..0902f11a5 100644 --- a/src/de.rs +++ b/src/de.rs @@ -15,6 +15,7 @@ use core::result; use core::str::FromStr; use serde::de::{self, Expected, Unexpected}; use serde::forward_to_deserialize_any; +use std::ops::{Deref, DerefMut}; #[cfg(feature = "arbitrary_precision")] use crate::number::NumberDeserializer; @@ -161,7 +162,7 @@ impl<'de, R: Read<'de>> Deserializer { } /// Parse the JSON array as a stream of values. - pub fn into_array(self) -> ArrayDeserializer<'de, R> { + pub fn into_array(self) -> ArrayDeserializer<'de, 'de, R> { self.into() } @@ -2687,7 +2688,7 @@ where } // Nest one level deeper - fn sub_array<'new_parent>(&'new_parent mut self) -> SubArrayDeserializer<'de, 'new_parent, R> + fn sub_array<'new_parent>(&'new_parent mut self) -> ArrayDeserializer<'de, 'new_parent, R> where 'de: 'new_parent, { @@ -2698,9 +2699,9 @@ where self.future_depth += 1; - SubArrayDeserializer { + ArrayDeserializer { expected_depth: self.future_depth, - base: self, + base: OwnedOrBorrowed::Borrowed(self), } } @@ -2738,11 +2739,19 @@ where /// } /// } /// ``` -pub struct ArrayDeserializer<'de, R> { - base: IterativeBaseDeserializer<'de, R>, +pub struct ArrayDeserializer<'de, 'parent, R> +where + R: read::Read<'de>, +{ + base: OwnedOrBorrowed<'parent, IterativeBaseDeserializer<'de, R>>, + // The nesting depth of this container. It functions like an ID in order to check whether we are still in the + // container. This is necessary because there are two ways of leaving a container: Either by calling `next` past + // the end (and getting None), or by dropping this struct (for example at the end of the scope). We must make + // sure we don't queue up more than one leave command. + expected_depth: usize, } -impl<'de, R> ArrayDeserializer<'de, R> +impl<'de, 'parent, R> ArrayDeserializer<'de, 'parent, R> where R: read::Read<'de>, { @@ -2756,19 +2765,21 @@ where /// - Deserializer::from_reader(...).into_array() pub fn new(read: R) -> Self { ArrayDeserializer { - base: IterativeBaseDeserializer::new(Deserializer::new(read), ContainerKind::Array), + base: OwnedOrBorrowed::Owned(IterativeBaseDeserializer::new( + Deserializer::new(read), + ContainerKind::Array, + )), + expected_depth: 1, } } /// Return the next element from the array. Returns None if there are no more elements. pub fn next>(&mut self) -> Option> { - self.base.next_arr_val(1) + self.base.next_arr_val(self.expected_depth) } /// Some docs TODO - pub fn next_array<'new_parent>( - &'new_parent mut self, - ) -> SubArrayDeserializer<'de, 'new_parent, R> + pub fn next_array<'new_parent>(&'new_parent mut self) -> ArrayDeserializer<'de, 'new_parent, R> where 'de: 'new_parent, { @@ -2776,54 +2787,51 @@ where } } -impl<'de, R: read::Read<'de>> From> for ArrayDeserializer<'de, R> { +impl<'de, 'parent, R> From> for ArrayDeserializer<'de, 'parent, R> +where + R: read::Read<'de>, +{ fn from(de: Deserializer) -> Self { ArrayDeserializer { - base: IterativeBaseDeserializer::new(de, ContainerKind::Array), + base: OwnedOrBorrowed::Owned(IterativeBaseDeserializer::new(de, ContainerKind::Array)), + expected_depth: 1, } } } -/// docs TODO -pub struct SubArrayDeserializer<'de, 'parent, R> +impl<'de, 'parent, R> Drop for ArrayDeserializer<'de, 'parent, R> where R: read::Read<'de>, { - base: &'parent mut IterativeBaseDeserializer<'de, R>, - // The nesting depth of this container. It functions like an ID in order to check whether we are still in the - // container. This is necessary because there are two ways of leaving a container: Either by calling `next` past - // the end (and getting None), or by dropping this struct (for example at the end of the scope). We must make - // sure we don't queue up more than one leave command. - expected_depth: usize, + fn drop(&mut self) { + if let OwnedOrBorrowed::Borrowed(base) = &mut self.base { + base.queue_leave(self.expected_depth, ContainerKind::Array); + } + } } -impl<'de, 'parent, R> SubArrayDeserializer<'de, 'parent, R> -where - R: read::Read<'de>, -{ - /// Return the next element from the array. Returns None if there are no more elements. - pub fn next>(&mut self) -> Option> { - self.base.next_arr_val(self.expected_depth) - } +enum OwnedOrBorrowed<'a, T> { + Owned(T), + Borrowed(&'a mut T), +} - /// Some docs TODO - pub fn next_array<'new_parent>( - &'new_parent mut self, - ) -> SubArrayDeserializer<'de, 'new_parent, R> - where - 'de: 'new_parent, - { - self.base.sub_array() +impl<'a, T> Deref for OwnedOrBorrowed<'a, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + match self { + OwnedOrBorrowed::Owned(f) => f, + OwnedOrBorrowed::Borrowed(f) => *f, + } } } -impl<'de, 'parent, R> Drop for SubArrayDeserializer<'de, 'parent, R> -where - R: read::Read<'de>, -{ - fn drop(&mut self) { - self.base - .queue_leave(self.expected_depth, ContainerKind::Array); +impl<'a, T> DerefMut for OwnedOrBorrowed<'a, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + match self { + OwnedOrBorrowed::Owned(f) => f, + OwnedOrBorrowed::Borrowed(f) => *f, + } } } diff --git a/tests/array.rs b/tests/array.rs index 9ee38650f..b7ceaf98a 100644 --- a/tests/array.rs +++ b/tests/array.rs @@ -16,7 +16,7 @@ fn test_stream(data: &str) { } trait Tester { - fn test<'reader, R: Read<'reader>>(stream: ArrayDeserializer<'reader, R>); + fn test<'reader, R: Read<'reader>>(stream: ArrayDeserializer<'reader, 'reader, R>); } macro_rules! test_stream { @@ -24,7 +24,7 @@ macro_rules! test_stream { { struct Test; impl Tester for Test { - fn test<'r, R: Read<'r>>(mut $stream: ArrayDeserializer<'r, R>) + fn test<'r, R: Read<'r>>(mut $stream: ArrayDeserializer<'r, 'r, R>) $test } test_stream::($data); From 50014c3261356ef7a64f5b701992bfac62192407 Mon Sep 17 00:00:00 2001 From: Luxalpa Date: Wed, 13 Dec 2023 19:25:26 +0100 Subject: [PATCH 08/14] remove OwnedOrBorrowed --- src/de.rs | 87 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 44 insertions(+), 43 deletions(-) diff --git a/src/de.rs b/src/de.rs index 0902f11a5..c0f95f05e 100644 --- a/src/de.rs +++ b/src/de.rs @@ -2700,8 +2700,10 @@ where self.future_depth += 1; ArrayDeserializer { - expected_depth: self.future_depth, - base: OwnedOrBorrowed::Borrowed(self), + inner: ArrayDeserializerInner::Borrowed { + expected_depth: self.future_depth, + base: self, + }, } } @@ -2743,12 +2745,22 @@ pub struct ArrayDeserializer<'de, 'parent, R> where R: read::Read<'de>, { - base: OwnedOrBorrowed<'parent, IterativeBaseDeserializer<'de, R>>, - // The nesting depth of this container. It functions like an ID in order to check whether we are still in the - // container. This is necessary because there are two ways of leaving a container: Either by calling `next` past - // the end (and getting None), or by dropping this struct (for example at the end of the scope). We must make - // sure we don't queue up more than one leave command. - expected_depth: usize, + inner: ArrayDeserializerInner<'de, 'parent, R>, +} + +enum ArrayDeserializerInner<'de, 'parent, R> +where + R: read::Read<'de>, +{ + Owned(IterativeBaseDeserializer<'de, R>), + Borrowed { + base: &'parent mut IterativeBaseDeserializer<'de, R>, + // The nesting depth of this container. It functions like an ID in order to check whether we are still in the + // container. This is necessary because there are two ways of leaving a container: Either by calling `next` past + // the end (and getting None), or by dropping this struct (for example at the end of the scope). We must make + // sure we don't queue up more than one leave command. + expected_depth: usize, + }, } impl<'de, 'parent, R> ArrayDeserializer<'de, 'parent, R> @@ -2764,18 +2776,23 @@ where /// - Deserializer::from_bytes(...).into_array() /// - Deserializer::from_reader(...).into_array() pub fn new(read: R) -> Self { - ArrayDeserializer { - base: OwnedOrBorrowed::Owned(IterativeBaseDeserializer::new( + Self { + inner: ArrayDeserializerInner::Owned(IterativeBaseDeserializer::new( Deserializer::new(read), ContainerKind::Array, )), - expected_depth: 1, } } /// Return the next element from the array. Returns None if there are no more elements. pub fn next>(&mut self) -> Option> { - self.base.next_arr_val(self.expected_depth) + match &mut self.inner { + ArrayDeserializerInner::Owned(base) => base.next_arr_val(1), + ArrayDeserializerInner::Borrowed { + base, + expected_depth, + } => base.next_arr_val(*expected_depth), + } } /// Some docs TODO @@ -2783,7 +2800,10 @@ where where 'de: 'new_parent, { - self.base.sub_array() + match &mut self.inner { + ArrayDeserializerInner::Owned(base) => base.sub_array(), + ArrayDeserializerInner::Borrowed { base, .. } => base.sub_array(), + } } } @@ -2792,9 +2812,11 @@ where R: read::Read<'de>, { fn from(de: Deserializer) -> Self { - ArrayDeserializer { - base: OwnedOrBorrowed::Owned(IterativeBaseDeserializer::new(de, ContainerKind::Array)), - expected_depth: 1, + Self { + inner: ArrayDeserializerInner::Owned(IterativeBaseDeserializer::new( + de, + ContainerKind::Array, + )), } } } @@ -2804,33 +2826,12 @@ where R: read::Read<'de>, { fn drop(&mut self) { - if let OwnedOrBorrowed::Borrowed(base) = &mut self.base { - base.queue_leave(self.expected_depth, ContainerKind::Array); - } - } -} - -enum OwnedOrBorrowed<'a, T> { - Owned(T), - Borrowed(&'a mut T), -} - -impl<'a, T> Deref for OwnedOrBorrowed<'a, T> { - type Target = T; - - fn deref(&self) -> &Self::Target { - match self { - OwnedOrBorrowed::Owned(f) => f, - OwnedOrBorrowed::Borrowed(f) => *f, - } - } -} - -impl<'a, T> DerefMut for OwnedOrBorrowed<'a, T> { - fn deref_mut(&mut self) -> &mut Self::Target { - match self { - OwnedOrBorrowed::Owned(f) => f, - OwnedOrBorrowed::Borrowed(f) => *f, + if let ArrayDeserializerInner::Borrowed { + base, + expected_depth, + } = &mut self.inner + { + base.queue_leave(*expected_depth, ContainerKind::Array); } } } From 47c9e70c1b6c146f0274ddacccb7b52b509dd3b9 Mon Sep 17 00:00:00 2001 From: Luxalpa Date: Wed, 13 Dec 2023 22:32:11 +0100 Subject: [PATCH 09/14] handle Object nesting --- src/de.rs | 311 ++++++++++++++++++++++++++++++++++++++++++------- tests/array.rs | 75 ++++++++++-- 2 files changed, 336 insertions(+), 50 deletions(-) diff --git a/src/de.rs b/src/de.rs index c0f95f05e..6b7fb6a6b 100644 --- a/src/de.rs +++ b/src/de.rs @@ -15,7 +15,6 @@ use core::result; use core::str::FromStr; use serde::de::{self, Expected, Unexpected}; use serde::forward_to_deserialize_any; -use std::ops::{Deref, DerefMut}; #[cfg(feature = "arbitrary_precision")] use crate::number::NumberDeserializer; @@ -2473,14 +2472,25 @@ where ////////////////////////////////////////////////////////////////////////////// -#[derive(Eq, PartialEq, Clone, Copy)] +#[derive(Clone, Copy)] enum ContainerKind { Array, Object, } +impl ContainerKind { + #[inline] + const fn end_char(self) -> u8 { + match self { + ContainerKind::Array => b']', + ContainerKind::Object => b'}', + } + } +} + enum NestingAction { - Enter, + // The nesting happens after a comma if we're inside an array, or after a colon if we're inside an object. + Enter(bool), Leave, } @@ -2516,7 +2526,7 @@ where // First action will be to enter the top-level container nesting_change: vec![NestingCommand { kind: initial_kind, - action: NestingAction::Enter, + action: NestingAction::Enter(false), }], cur_depth: 0, future_depth: 1, @@ -2528,18 +2538,21 @@ where de: &mut Deserializer, cur_nesting: &mut usize, kind: ContainerKind, + needs_comma: bool, last_entered: bool, ) -> Result<()> { // We don't want a preceding comma if we just entered a container. - // In the case of cur_nesting == 0, we are before the top-level container, and we also don't want commas there! - let needs_comma = *cur_nesting != 0 && !last_entered; + let needs_comma = needs_comma && !last_entered; if needs_comma { tri!(match de.parse_whitespace() { Ok(Some(b',')) => { de.eat_char(); Ok(()) } - Ok(Some(_)) => Err(de.peek_error(ErrorCode::ExpectedComma)), + Ok(Some(token)) => { + println!("expected comma: {}", token as char); + Err(de.peek_error(ErrorCode::ExpectedComma)) + } Ok(None) => Err(de.peek_error(ErrorCode::EofWhileParsingValue)), Err(e) => Err(e), }); @@ -2606,11 +2619,12 @@ where let mut last_entered = false; for cmd in self.nesting_change.drain(..) { last_entered = match cmd.action { - NestingAction::Enter => { + NestingAction::Enter(needs_comma) => { tri!(Self::nest_enter( &mut self.de, &mut self.cur_depth, cmd.kind, + needs_comma, last_entered )); true @@ -2629,16 +2643,8 @@ where Ok(last_entered) } - // Gets the actual JSON value that was requested - fn next_value>(&mut self) -> Option> { - Some(de::Deserialize::deserialize(&mut self.de)) - } - - // Due to the way the lifetimes work, we can be sure that at this point only an array is expected - fn next_arr_val>( - &mut self, - expected_depth: usize, - ) -> Option> { + // The lifetimes prevent the container types from accidentally getting mixed up. + fn advance(&mut self, expected_depth: usize, kind: ContainerKind) -> Option> { // First, we need to resolve all the nesting that was queued up before. let needs_comma = match self.resolve_nesting() { Ok(entered) => !entered, @@ -2654,33 +2660,32 @@ where match self.de.parse_whitespace() { Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))), - Ok(Some(b']')) => { - self.de.eat_char(); - self.cur_depth -= 1; - self.future_depth -= 1; - if self.cur_depth == 0 { - return match self.de.end() { - Ok(()) => None, - Err(e) => Some(Err(e)), - }; - } - None - } Ok(Some(b',')) if needs_comma => { self.de.eat_char(); match self.de.parse_whitespace() { Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))), Ok(Some(b']')) => Some(Err(self.de.peek_error(ErrorCode::TrailingComma))), - Ok(Some(_)) => self.next_value(), + Ok(Some(_)) => Some(Ok(())), Err(e) => Some(Err(e)), } } - Ok(Some(_)) => { - if !needs_comma { - self.next_value() + Ok(Some(token)) => { + if token == kind.end_char() { + self.de.eat_char(); + self.cur_depth -= 1; + self.future_depth -= 1; + if self.cur_depth == 0 { + return match self.de.end() { + Ok(()) => None, + Err(e) => Some(Err(e)), + }; + } + None + } else if needs_comma { + Some(Err(self.de.peek_error(ErrorCode::ExpectedComma))) } else { - Some(Err(self.de.peek_error(ErrorCode::ExpectedSomeValue))) + Some(Ok(())) } } Err(e) => Some(Err(e)), @@ -2688,13 +2693,16 @@ where } // Nest one level deeper - fn sub_array<'new_parent>(&'new_parent mut self) -> ArrayDeserializer<'de, 'new_parent, R> + fn sub_array<'new_parent>( + &'new_parent mut self, + needs_comma: bool, + ) -> ArrayDeserializer<'de, 'new_parent, R> where 'de: 'new_parent, { self.nesting_change.push(NestingCommand { kind: ContainerKind::Array, - action: NestingAction::Enter, + action: NestingAction::Enter(needs_comma), }); self.future_depth += 1; @@ -2707,6 +2715,101 @@ where } } + fn sub_object<'new_parent>( + &'new_parent mut self, + needs_comma: bool, + ) -> ObjectDeserializer<'de, 'new_parent, R> + where + 'de: 'new_parent, + { + self.nesting_change.push(NestingCommand { + kind: ContainerKind::Object, + action: NestingAction::Enter(needs_comma), + }); + + self.future_depth += 1; + + ObjectDeserializer { + inner: ObjectDeserializerInner::Borrowed { + expected_depth: self.future_depth, + base: self, + }, + } + } + + fn object_key(&mut self, expected_depth: usize) -> Option> { + match self.advance(expected_depth, ContainerKind::Object) { + Some(Ok(())) => {} + Some(Err(e)) => return Some(Err(e)), + None => return None, + }; + + let key: String = match de::Deserialize::deserialize(&mut self.de) { + Ok(key) => key, + Err(e) => return Some(Err(e)), + }; + + if let Err(e) = self.de.parse_object_colon() { + return Some(Err(e)); + }; + + Some(Ok(key)) + } + + fn next_arr_val>( + &mut self, + expected_depth: usize, + ) -> Option> { + match self.advance(expected_depth, ContainerKind::Array) { + Some(Ok(())) => {} + Some(Err(e)) => return Some(Err(e)), + None => return None, + }; + + // Gets the actual JSON value that was requested + Some(de::Deserialize::deserialize(&mut self.de)) + } + + fn next_obj_val>( + &mut self, + expected_depth: usize, + ) -> Option> { + self.object_key(expected_depth).map(|r| { + r.and_then(|k| { + Ok(( + k, + match de::Deserialize::deserialize(&mut self.de) { + Ok(value) => value, + Err(e) => return Err(e), + }, + )) + }) + }) + } + + // Nest one level deeper from within an object to a new array. + fn object_sub_array<'new_parent>( + &'new_parent mut self, + expected_depth: usize, + ) -> Option)>> + where + 'de: 'new_parent, + { + self.object_key(expected_depth) + .map(|r| r.map(|k| (k, self.sub_array(false)))) + } + + fn object_sub_object<'new_parent>( + &'new_parent mut self, + expected_depth: usize, + ) -> Option)>> + where + 'de: 'new_parent, + { + self.object_key(expected_depth) + .map(|r| r.map(|k| (k, self.sub_object(false)))) + } + fn queue_leave(&mut self, expected_depth: usize, kind: ContainerKind) { if self.future_depth == expected_depth { self.future_depth -= 1; @@ -2796,13 +2899,24 @@ where } /// Some docs TODO - pub fn next_array<'new_parent>(&'new_parent mut self) -> ArrayDeserializer<'de, 'new_parent, R> + pub fn sub_array<'new_parent>(&'new_parent mut self) -> ArrayDeserializer<'de, 'new_parent, R> where 'de: 'new_parent, { match &mut self.inner { - ArrayDeserializerInner::Owned(base) => base.sub_array(), - ArrayDeserializerInner::Borrowed { base, .. } => base.sub_array(), + ArrayDeserializerInner::Owned(base) => base.sub_array(true), + ArrayDeserializerInner::Borrowed { base, .. } => base.sub_array(true), + } + } + + /// Some docs TODO + pub fn sub_object<'new_parent>(&'new_parent mut self) -> ObjectDeserializer<'de, 'new_parent, R> + where + 'de: 'new_parent, + { + match &mut self.inner { + ArrayDeserializerInner::Owned(base) => base.sub_object(true), + ArrayDeserializerInner::Borrowed { base, .. } => base.sub_object(true), } } } @@ -2836,6 +2950,123 @@ where } } +/// TODO +pub struct ObjectDeserializer<'de, 'parent, R> +where + R: read::Read<'de>, +{ + inner: ObjectDeserializerInner<'de, 'parent, R>, +} + +enum ObjectDeserializerInner<'de, 'parent, R> +where + R: read::Read<'de>, +{ + Owned(IterativeBaseDeserializer<'de, R>), + Borrowed { + base: &'parent mut IterativeBaseDeserializer<'de, R>, + // The nesting depth of this container. It functions like an ID in order to check whether we are still in the + // container. This is necessary because there are two ways of leaving a container: Either by calling `next` past + // the end (and getting None), or by dropping this struct (for example at the end of the scope). We must make + // sure we don't queue up more than one leave command. + expected_depth: usize, + }, +} + +impl<'de, 'parent, R> ObjectDeserializer<'de, 'parent, R> +where + R: read::Read<'de>, +{ + /// Create a JSON array deserializer from one of the possible serde_json + /// input sources. + /// + /// Typically it is more convenient to use one of these methods instead: + /// + /// - Deserializer::from_str(...).into_array() + /// - Deserializer::from_bytes(...).into_array() + /// - Deserializer::from_reader(...).into_array() + pub fn new(read: R) -> Self { + Self { + inner: ObjectDeserializerInner::Owned(IterativeBaseDeserializer::new( + Deserializer::new(read), + ContainerKind::Object, + )), + } + } + + /// Return the next element from the array. Returns None if there are no more elements. + pub fn next>(&mut self) -> Option> { + match &mut self.inner { + ObjectDeserializerInner::Owned(base) => base.next_obj_val(1), + ObjectDeserializerInner::Borrowed { + base, + expected_depth, + } => base.next_obj_val(*expected_depth), + } + } + + /// Some docs TODO + pub fn sub_array<'new_parent>( + &'new_parent mut self, + ) -> Option)>> + where + 'de: 'new_parent, + { + match &mut self.inner { + ObjectDeserializerInner::Owned(base) => base.object_sub_array(1), + ObjectDeserializerInner::Borrowed { + base, + expected_depth, + } => base.object_sub_array(*expected_depth), + } + } + + /// Some docs TODO + pub fn sub_object<'new_parent>( + &'new_parent mut self, + ) -> Option)>> + where + 'de: 'new_parent, + { + match &mut self.inner { + ObjectDeserializerInner::Owned(base) => base.object_sub_object(1), + ObjectDeserializerInner::Borrowed { + base, + expected_depth, + } => base.object_sub_object(*expected_depth), + } + } +} + +impl<'de, 'parent, R> From> for ObjectDeserializer<'de, 'parent, R> +where + R: read::Read<'de>, +{ + fn from(de: Deserializer) -> Self { + Self { + inner: ObjectDeserializerInner::Owned(IterativeBaseDeserializer::new( + de, + ContainerKind::Object, + )), + } + } +} + +impl<'de, 'parent, R> Drop for ObjectDeserializer<'de, 'parent, R> +where + R: read::Read<'de>, +{ + fn drop(&mut self) { + if let ObjectDeserializerInner::Borrowed { + base, + expected_depth, + } = &mut self.inner + { + base.queue_leave(*expected_depth, ContainerKind::Object); + } + } +} + ////////////////////////////////////////////////////////////////////////////// fn from_trait<'de, R, T>(read: R) -> Result diff --git a/tests/array.rs b/tests/array.rs index b7ceaf98a..76e275fa1 100644 --- a/tests/array.rs +++ b/tests/array.rs @@ -115,18 +115,24 @@ fn test_json_array_eof() { #[test] fn test_nesting() { - let data = r#"[1, [[3, []]], 4]"#; + let data = r#"[1, [[3, []]], 4, { + "x": 5, + "y": { + "z": 6 + }, + "w": [7, 8] + }]"#; // With explicit is_none checks test_stream!(data, |stream| { assert_eq!(stream.next::().unwrap().unwrap(), 1); { - let mut sub = stream.next_array(); + let mut sub = stream.sub_array(); { - let mut sub2 = sub.next_array(); + let mut sub2 = sub.sub_array(); assert_eq!(sub2.next::().unwrap().unwrap(), 3); { - let mut sub3 = sub2.next_array(); + let mut sub3 = sub2.sub_array(); assert!(sub3.next::().is_none()); } assert!(sub2.next::().is_none()); @@ -134,6 +140,24 @@ fn test_nesting() { assert!(sub.next::().is_none()); } assert_eq!(stream.next::().unwrap().unwrap(), 4); + { + let mut sub = stream.sub_object(); + assert_eq!(sub.next::().unwrap().unwrap(), ("x".to_string(), 5)); + { + let (k, mut sub2) = sub.sub_object().unwrap().unwrap(); + assert_eq!(k, "y"); + assert_eq!(sub2.next::().unwrap().unwrap(), ("z".to_string(), 6)); + assert!(sub2.next::().is_none()); + } + { + let (k, mut sub2) = sub.sub_array().unwrap().unwrap(); + assert_eq!(k, "w"); + assert_eq!(sub2.next::().unwrap().unwrap(), 7); + assert_eq!(sub2.next::().unwrap().unwrap(), 8); + assert!(sub2.next::().is_none()); + } + assert!(sub.next::().is_none()); + } assert!(stream.next::().is_none()); }); @@ -141,16 +165,31 @@ fn test_nesting() { test_stream!(data, |stream| { assert_eq!(stream.next::().unwrap().unwrap(), 1); { - let mut sub = stream.next_array(); + let mut sub = stream.sub_array(); { - let mut sub2 = sub.next_array(); + let mut sub2 = sub.sub_array(); assert_eq!(sub2.next::().unwrap().unwrap(), 3); { - sub2.next_array(); + sub2.sub_array(); } } } assert_eq!(stream.next::().unwrap().unwrap(), 4); + { + let mut sub = stream.sub_object(); + assert_eq!(sub.next::().unwrap().unwrap(), ("x".to_string(), 5)); + { + let (k, mut sub2) = sub.sub_object().unwrap().unwrap(); + assert_eq!(k, "y"); + assert_eq!(sub2.next::().unwrap().unwrap(), ("z".to_string(), 6)); + } + { + let (k, mut sub2) = sub.sub_array().unwrap().unwrap(); + assert_eq!(k, "w"); + assert_eq!(sub2.next::().unwrap().unwrap(), 7); + assert_eq!(sub2.next::().unwrap().unwrap(), 8); + } + } assert!(stream.next::().is_none()); }); @@ -158,17 +197,33 @@ fn test_nesting() { test_stream!(data, |stream| { assert_eq!(stream.next::().unwrap().unwrap(), 1); { - let mut sub = stream.next_array(); + let mut sub = stream.sub_array(); { - let mut sub2 = sub.next_array(); + let mut sub2 = sub.sub_array(); assert_eq!(sub2.next::().unwrap().unwrap(), 3); { - sub2.next_array(); + sub2.sub_array(); } assert!(sub2.next::().is_none()); } } assert_eq!(stream.next::().unwrap().unwrap(), 4); + { + let mut sub = stream.sub_object(); + assert_eq!(sub.next::().unwrap().unwrap(), ("x".to_string(), 5)); + { + let (k, mut sub2) = sub.sub_object().unwrap().unwrap(); + assert_eq!(k, "y"); + assert_eq!(sub2.next::().unwrap().unwrap(), ("z".to_string(), 6)); + } + { + let (k, mut sub2) = sub.sub_array().unwrap().unwrap(); + assert_eq!(k, "w"); + assert_eq!(sub2.next::().unwrap().unwrap(), 7); + assert_eq!(sub2.next::().unwrap().unwrap(), 8); + assert!(sub2.next::().is_none()); + } + } assert!(stream.next::().is_none()); }); } From 792a287d04d534f672ba94b837b737b4d1393057 Mon Sep 17 00:00:00 2001 From: Luxalpa Date: Mon, 18 Dec 2023 15:11:41 +0100 Subject: [PATCH 10/14] Documentation and some small improvements --- src/de.rs | 342 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 249 insertions(+), 93 deletions(-) diff --git a/src/de.rs b/src/de.rs index 6b7fb6a6b..697f279b8 100644 --- a/src/de.rs +++ b/src/de.rs @@ -160,11 +160,16 @@ impl<'de, R: Read<'de>> Deserializer { } } - /// Parse the JSON array as a stream of values. + /// Parse the JSON array as a stream of values. Expects the top-level element to be an array. pub fn into_array(self) -> ArrayDeserializer<'de, 'de, R> { self.into() } + /// Parse the JSON object as a stream of values. Expects the top-level element to be an object. + pub fn into_object(self) -> ObjectDeserializer<'de, 'de, R> { + self.into() + } + /// Parse arbitrarily deep JSON structures without any consideration for /// overflowing the stack. /// @@ -2490,7 +2495,7 @@ impl ContainerKind { enum NestingAction { // The nesting happens after a comma if we're inside an array, or after a colon if we're inside an object. - Enter(bool), + Enter { needs_comma: bool }, Leave, } @@ -2499,7 +2504,8 @@ struct NestingCommand { action: NestingAction, } -// The underlying deserializer for arrays and objects. The other deserializers are just handles to control this one. +/// The underlying deserializer for arrays and objects. [`ObjectDeserializer`] and [`ArrayDeserializer`] act as +/// handles to control this one. struct IterativeBaseDeserializer<'de, R> { de: Deserializer, lifetime: PhantomData<&'de ()>, @@ -2526,14 +2532,14 @@ where // First action will be to enter the top-level container nesting_change: vec![NestingCommand { kind: initial_kind, - action: NestingAction::Enter(false), + action: NestingAction::Enter { needs_comma: false }, }], cur_depth: 0, future_depth: 1, } } - // This is an associated function instead of using `self` due to the mutable borrow on the caller. + // This is an associated function instead of a method due to the mutable borrow on the caller. fn nest_enter( de: &mut Deserializer, cur_nesting: &mut usize, @@ -2579,13 +2585,13 @@ where Ok(()) } - // This is an associated function instead of using `self` due to the mutable borrow on the caller. + // This is an associated function instead of a method due to the mutable borrow on the caller. fn nest_leave( de: &mut Deserializer, cur_nesting: &mut usize, kind: ContainerKind, ) -> Result<()> { - // ensure we find the correct closing token (']' or '}') + // ensure we find the appropriate closing token (']' or '}') tri!(match de.parse_whitespace() { Err(e) => Err(e), Ok(None) => Err(de.peek_error(ErrorCode::EofWhileParsingValue)), @@ -2611,15 +2617,14 @@ where Ok(()) } - // Applies all the queued up nesting commands. Afterwards, current_depth and future_depth should - // be the same. + // Applies all the queued up nesting commands. Afterwards, current_depth should equal future_depth. fn resolve_nesting(&mut self) -> Result { - // When we just entered a container, the next character must not be a comma. + // When we just entered a container, the next character must not be a comma. (i.e. we don't allow `[, 1]`) // Otherwise (on leave but also just on any actual value), it must be a comma. let mut last_entered = false; for cmd in self.nesting_change.drain(..) { last_entered = match cmd.action { - NestingAction::Enter(needs_comma) => { + NestingAction::Enter { needs_comma } => { tri!(Self::nest_enter( &mut self.de, &mut self.cur_depth, @@ -2643,7 +2648,7 @@ where Ok(last_entered) } - // The lifetimes prevent the container types from accidentally getting mixed up. + // Lifetimes ensure that we are always passing the right ContainerKind. fn advance(&mut self, expected_depth: usize, kind: ContainerKind) -> Option> { // First, we need to resolve all the nesting that was queued up before. let needs_comma = match self.resolve_nesting() { @@ -2671,6 +2676,8 @@ where } } Ok(Some(token)) => { + // If we're past the end of the container, we need to leave it here immediately so that we can report + // an error in case the container doesn't close properly (or has trailing characters). if token == kind.end_char() { self.de.eat_char(); self.cur_depth -= 1; @@ -2692,7 +2699,7 @@ where } } - // Nest one level deeper + /// Nest one level into an array fn sub_array<'new_parent>( &'new_parent mut self, needs_comma: bool, @@ -2702,19 +2709,20 @@ where { self.nesting_change.push(NestingCommand { kind: ContainerKind::Array, - action: NestingAction::Enter(needs_comma), + action: NestingAction::Enter { needs_comma }, }); self.future_depth += 1; ArrayDeserializer { - inner: ArrayDeserializerInner::Borrowed { + inner: IterativeDeserializerInner::Borrowed { expected_depth: self.future_depth, base: self, }, } } + /// Nest one level into an object fn sub_object<'new_parent>( &'new_parent mut self, needs_comma: bool, @@ -2724,19 +2732,20 @@ where { self.nesting_change.push(NestingCommand { kind: ContainerKind::Object, - action: NestingAction::Enter(needs_comma), + action: NestingAction::Enter { needs_comma }, }); self.future_depth += 1; ObjectDeserializer { - inner: ObjectDeserializerInner::Borrowed { + inner: IterativeDeserializerInner::Borrowed { expected_depth: self.future_depth, base: self, }, } } + /// Helper function for getting the next key when we're inside an object fn object_key(&mut self, expected_depth: usize) -> Option> { match self.advance(expected_depth, ContainerKind::Object) { Some(Ok(())) => {} @@ -2756,6 +2765,7 @@ where Some(Ok(key)) } + /// Get the next value inside an array. fn next_arr_val>( &mut self, expected_depth: usize, @@ -2770,6 +2780,7 @@ where Some(de::Deserialize::deserialize(&mut self.de)) } + /// Get the next value inside an object. fn next_obj_val>( &mut self, expected_depth: usize, @@ -2787,7 +2798,7 @@ where }) } - // Nest one level deeper from within an object to a new array. + /// Nest one level deeper from within an object to a new array. fn object_sub_array<'new_parent>( &'new_parent mut self, expected_depth: usize, @@ -2799,6 +2810,7 @@ where .map(|r| r.map(|k| (k, self.sub_array(false)))) } + /// Nest one level deeper from within an object to a new object. fn object_sub_object<'new_parent>( &'new_parent mut self, expected_depth: usize, @@ -2821,21 +2833,42 @@ where } } +/// [`ArrayDeserializer`] and [`ObjectDeserializer`] can operate on sub-containers of the JSON data in which case they +/// borrow the underlying [`IterativeBaseDeserializer`], but for convenience, the root-level deserializers own their +/// [`IterativeBaseDeserializer`]. This enables `Deserializer::from_str(...).into_array();` without needing a +/// separate variable. +enum IterativeDeserializerInner<'de, 'parent, R> +where + R: read::Read<'de>, +{ + Owned(IterativeBaseDeserializer<'de, R>), + Borrowed { + base: &'parent mut IterativeBaseDeserializer<'de, R>, + // The nesting depth of this container. It functions like an ID in order to check whether we are still inside the + // container. This is necessary because there are two ways of leaving a container: Either by calling `next` past + // the end (and getting None), or by dropping this struct (for example at the end of the scope). We must make + // sure we don't queue up a leave command after we already left due to iterating past the end of the container. + expected_depth: usize, + }, +} + /// A streaming JSON array deserializer. /// /// An array deserializer can be created from any JSON deserializer using the -/// `Deserializer::into_array` method. +/// [`Deserializer::into_array`] method. +/// +/// This can be used to deserialize a JSON array one element at a time without needing to +/// keep the entire array in memory. See also [`ObjectDeserializer`] for the same functionality +/// on JSON objects. /// -/// The top-level data should be a JSON array, but each array element can consist of any JSON -/// value. An array deserializer only needs to keep a single array element in memory, and is -/// therefore preferable over deserializing into a container type such as `Vec` when the complete -/// array is too large to fit in memory. +/// Use [`next`](ArrayDeserializer::next) to get the next element from the array. Nested sub-arrays and sub-objects +/// can be iterated using [`sub_array`](ArrayDeserializer::sub_array) and [`sub_object`](ArrayDeserializer::sub_object). /// /// ``` /// use serde_json::{Deserializer, Value}; /// /// fn main() { -/// let data = "[{\"k\": 3}, 1, \"cool\", \"stuff\", [0, 1, 2]]"; +/// let data = r#"[{"k": 3}, 1, "cool", "stuff", [0, 1, 2]]"#; /// /// let mut iter = Deserializer::from_str(data).into_array(); /// @@ -2848,75 +2881,125 @@ pub struct ArrayDeserializer<'de, 'parent, R> where R: read::Read<'de>, { - inner: ArrayDeserializerInner<'de, 'parent, R>, -} - -enum ArrayDeserializerInner<'de, 'parent, R> -where - R: read::Read<'de>, -{ - Owned(IterativeBaseDeserializer<'de, R>), - Borrowed { - base: &'parent mut IterativeBaseDeserializer<'de, R>, - // The nesting depth of this container. It functions like an ID in order to check whether we are still in the - // container. This is necessary because there are two ways of leaving a container: Either by calling `next` past - // the end (and getting None), or by dropping this struct (for example at the end of the scope). We must make - // sure we don't queue up more than one leave command. - expected_depth: usize, - }, + inner: IterativeDeserializerInner<'de, 'parent, R>, } impl<'de, 'parent, R> ArrayDeserializer<'de, 'parent, R> where R: read::Read<'de>, { - /// Create a JSON array deserializer from one of the possible serde_json - /// input sources. + /// Create a JSON array deserializer from a serde_json input source. /// - /// Typically it is more convenient to use one of these methods instead: + /// Typically, it is more convenient to use one of these methods instead: /// - /// - Deserializer::from_str(...).into_array() - /// - Deserializer::from_bytes(...).into_array() - /// - Deserializer::from_reader(...).into_array() + /// - `Deserializer::from_str(...).`[`into_array()`](Deserializer::into_array) + /// - `Deserializer::from_bytes(...).`[`into_array()`](Deserializer::into_array) + /// - `Deserializer::from_reader(...).`[`into_array()`](Deserializer::into_array) pub fn new(read: R) -> Self { Self { - inner: ArrayDeserializerInner::Owned(IterativeBaseDeserializer::new( + inner: IterativeDeserializerInner::Owned(IterativeBaseDeserializer::new( Deserializer::new(read), ContainerKind::Array, )), } } - /// Return the next element from the array. Returns None if there are no more elements. + /// Return the next element from the array. Returns `None` if there are no more elements. pub fn next>(&mut self) -> Option> { match &mut self.inner { - ArrayDeserializerInner::Owned(base) => base.next_arr_val(1), - ArrayDeserializerInner::Borrowed { + IterativeDeserializerInner::Owned(base) => base.next_arr_val(1), + IterativeDeserializerInner::Borrowed { base, expected_depth, } => base.next_arr_val(*expected_depth), } } - /// Some docs TODO + /// Enter a nested array. + /// + /// Returns the an [`ArrayDeserializer`] for the nested array. + /// + /// Note: This does not verify whether the value is actually an array until you call + /// [`next`](ArrayDeserializer::next) on the returned [`ArrayDeserializer`]. + /// + /// The [`ArrayDeserializer`] is mutably borrowed and thus cannot be used until the returned + /// [`ArrayDeserializer`] is dropped. + /// + /// See also [`ArrayDeserializer::sub_object`]. + /// ``` + /// use serde_json::Deserializer; + /// + /// fn main() { + /// let data = r#"[[1, 2, 3], 4]"#; + /// + /// let mut iter = Deserializer::from_str(data).into_array(); + /// + /// { + /// let mut sub = iter.sub_array(); + /// + /// while let Some(v) = sub.next::() { + /// let value = v.unwrap(); + /// println!("{}", value); + /// } + /// // `sub` is dropped here + /// } + /// + /// // can use iter again + /// let value = iter.next::().unwrap().unwrap(); + /// // ... + /// } + /// ``` pub fn sub_array<'new_parent>(&'new_parent mut self) -> ArrayDeserializer<'de, 'new_parent, R> where 'de: 'new_parent, { match &mut self.inner { - ArrayDeserializerInner::Owned(base) => base.sub_array(true), - ArrayDeserializerInner::Borrowed { base, .. } => base.sub_array(true), + IterativeDeserializerInner::Owned(base) => base.sub_array(true), + IterativeDeserializerInner::Borrowed { base, .. } => base.sub_array(true), } } - /// Some docs TODO + /// Enter a nested object. + /// + /// Returns the key and an [`ObjectDeserializer`] that must be used to iterate over the nested object. + /// + /// Note: This does not verify whether the value is actually an object until you call + /// [`next`](ObjectDeserializer::next) on the returned [`ObjectDeserializer`]. + /// + /// The [`ArrayDeserializer`] is mutably borrowed and thus cannot be used until the returned + /// [`ObjectDeserializer`] is dropped. + /// + /// See also [`ArrayDeserializer::sub_array`]. + /// ``` + /// use serde_json::Deserializer; + /// + /// fn main() { + /// let data = r#"[{"k1": 1, "k2": 2}, 3]"#; + /// + /// let mut iter = Deserializer::from_str(data).into_array(); + /// + /// { + /// let mut sub = iter.sub_object(); + /// + /// while let Some(v) = sub.next::() { + /// let (key, value) = v.unwrap(); + /// println!("{}: {}", key, value); + /// } + /// // `sub` is dropped here + /// } + /// + /// // can use iter again + /// let value = iter.next::().unwrap().unwrap(); + /// // ... + /// } + /// ``` pub fn sub_object<'new_parent>(&'new_parent mut self) -> ObjectDeserializer<'de, 'new_parent, R> where 'de: 'new_parent, { match &mut self.inner { - ArrayDeserializerInner::Owned(base) => base.sub_object(true), - ArrayDeserializerInner::Borrowed { base, .. } => base.sub_object(true), + IterativeDeserializerInner::Owned(base) => base.sub_object(true), + IterativeDeserializerInner::Borrowed { base, .. } => base.sub_object(true), } } } @@ -2927,7 +3010,7 @@ where { fn from(de: Deserializer) -> Self { Self { - inner: ArrayDeserializerInner::Owned(IterativeBaseDeserializer::new( + inner: IterativeDeserializerInner::Owned(IterativeBaseDeserializer::new( de, ContainerKind::Array, )), @@ -2940,7 +3023,7 @@ where R: read::Read<'de>, { fn drop(&mut self) { - if let ArrayDeserializerInner::Borrowed { + if let IterativeDeserializerInner::Borrowed { base, expected_depth, } = &mut self.inner @@ -2950,62 +3033,101 @@ where } } -/// TODO +/// A streaming JSON object deserializer. +/// +/// An object deserializer can be created from any JSON deserializer using the +/// [`Deserializer::into_object`] method. +/// +/// This deserializer is similar to [`ArrayDeserializer`], but for JSON objects. See its +/// documentation for more details. +/// +/// ``` +/// use serde_json::{Deserializer, Value}; +/// +/// fn main() { +/// let data = r#"{"first-key":"some-string", "second-key": [1, 2, 3]}"#; +/// +/// let mut iter = Deserializer::from_str(data).into_object(); +/// +/// while let Some(v) = iter.next::() { +/// let (key, value) = v.unwrap(); +/// println!("{}: {}", key, value); +/// } +/// } +/// ``` pub struct ObjectDeserializer<'de, 'parent, R> where R: read::Read<'de>, { - inner: ObjectDeserializerInner<'de, 'parent, R>, -} - -enum ObjectDeserializerInner<'de, 'parent, R> -where - R: read::Read<'de>, -{ - Owned(IterativeBaseDeserializer<'de, R>), - Borrowed { - base: &'parent mut IterativeBaseDeserializer<'de, R>, - // The nesting depth of this container. It functions like an ID in order to check whether we are still in the - // container. This is necessary because there are two ways of leaving a container: Either by calling `next` past - // the end (and getting None), or by dropping this struct (for example at the end of the scope). We must make - // sure we don't queue up more than one leave command. - expected_depth: usize, - }, + inner: IterativeDeserializerInner<'de, 'parent, R>, } impl<'de, 'parent, R> ObjectDeserializer<'de, 'parent, R> where R: read::Read<'de>, { - /// Create a JSON array deserializer from one of the possible serde_json - /// input sources. + /// Create a JSON object deserializer from a `serde_json` input source. /// - /// Typically it is more convenient to use one of these methods instead: + /// Typically, it is more convenient to use one of these methods instead: /// - /// - Deserializer::from_str(...).into_array() - /// - Deserializer::from_bytes(...).into_array() - /// - Deserializer::from_reader(...).into_array() + /// - `Deserializer::from_str(...).`[`into_object()`](Deserializer::into_object) + /// - `Deserializer::from_bytes(...).`[`into_object()`](Deserializer::into_object) + /// - `Deserializer::from_reader(...).`[`into_object()`](Deserializer::into_object) pub fn new(read: R) -> Self { Self { - inner: ObjectDeserializerInner::Owned(IterativeBaseDeserializer::new( + inner: IterativeDeserializerInner::Owned(IterativeBaseDeserializer::new( Deserializer::new(read), ContainerKind::Object, )), } } - /// Return the next element from the array. Returns None if there are no more elements. + /// Return the next record (key-value pair) from the object. Returns `None` if there are no more records. pub fn next>(&mut self) -> Option> { match &mut self.inner { - ObjectDeserializerInner::Owned(base) => base.next_obj_val(1), - ObjectDeserializerInner::Borrowed { + IterativeDeserializerInner::Owned(base) => base.next_obj_val(1), + IterativeDeserializerInner::Borrowed { base, expected_depth, } => base.next_obj_val(*expected_depth), } } - /// Some docs TODO + /// Enter a nested array. + /// + /// Returns the key and an [`ArrayDeserializer`] for the nested array. + /// + /// Note: This does not verify whether the value is actually an array until you call + /// [`next`](ArrayDeserializer::next) on the returned [`ArrayDeserializer`]. + /// + /// The [`ObjectDeserializer`] is mutably borrowed and thus cannot be used until the returned + /// [`ArrayDeserializer`] is dropped. + /// + /// See also [`ObjectDeserializer::sub_object`]. + /// ``` + /// use serde_json::Deserializer; + /// + /// fn main() { + /// let data = r#"{"some-array": [1, 2, 3], "more": 1}"#; + /// + /// let mut iter = Deserializer::from_str(data).into_object(); + /// + /// { + /// let (key, mut sub) = iter.sub_array().unwrap().unwrap(); + /// println!("Entering {}", key); + /// + /// while let Some(v) = sub.next::() { + /// let value = v.unwrap(); + /// println!("{}", value); + /// } + /// // `sub` is dropped here + /// } + /// + /// // can use iter again + /// let (key, value) = iter.next::().unwrap().unwrap(); + /// // ... + /// } + /// ``` pub fn sub_array<'new_parent>( &'new_parent mut self, ) -> Option)>> @@ -3013,15 +3135,49 @@ where 'de: 'new_parent, { match &mut self.inner { - ObjectDeserializerInner::Owned(base) => base.object_sub_array(1), - ObjectDeserializerInner::Borrowed { + IterativeDeserializerInner::Owned(base) => base.object_sub_array(1), + IterativeDeserializerInner::Borrowed { base, expected_depth, } => base.object_sub_array(*expected_depth), } } - /// Some docs TODO + /// Enter a nested object. + /// + /// Returns the key and an [`ObjectDeserializer`] that must be used to iterate over the nested object. + /// + /// Note: This does not verify whether the value is actually an object until you call + /// [`next`](ObjectDeserializer::next) on the returned [`ObjectDeserializer`]. + /// + /// The [`ObjectDeserializer`] is mutably borrowed and thus cannot be used until the returned + /// [`ObjectDeserializer`] is dropped. + /// + /// See also [`ObjectDeserializer::sub_array`]. + /// ``` + /// use serde_json::Deserializer; + /// + /// fn main() { + /// let data = r#"{"some-obj": {"k1": 1, "k2": 2}}"#; + /// + /// let mut iter = Deserializer::from_str(data).into_object(); + /// + /// { + /// let (key, mut sub) = iter.sub_object().unwrap().unwrap(); + /// println!("Entering {}", key); + /// + /// while let Some(v) = sub.next::() { + /// let (key, value) = v.unwrap(); + /// println!("{}: {}", key, value); + /// } + /// // `sub` is dropped here + /// } + /// + /// // can use iter again + /// let (key, value) = iter.next::().unwrap().unwrap(); + /// // ... + /// } + /// ``` pub fn sub_object<'new_parent>( &'new_parent mut self, ) -> Option)>> @@ -3029,8 +3185,8 @@ where 'de: 'new_parent, { match &mut self.inner { - ObjectDeserializerInner::Owned(base) => base.object_sub_object(1), - ObjectDeserializerInner::Borrowed { + IterativeDeserializerInner::Owned(base) => base.object_sub_object(1), + IterativeDeserializerInner::Borrowed { base, expected_depth, } => base.object_sub_object(*expected_depth), @@ -3044,7 +3200,7 @@ where { fn from(de: Deserializer) -> Self { Self { - inner: ObjectDeserializerInner::Owned(IterativeBaseDeserializer::new( + inner: IterativeDeserializerInner::Owned(IterativeBaseDeserializer::new( de, ContainerKind::Object, )), @@ -3057,7 +3213,7 @@ where R: read::Read<'de>, { fn drop(&mut self) { - if let ObjectDeserializerInner::Borrowed { + if let IterativeDeserializerInner::Borrowed { base, expected_depth, } = &mut self.inner From dcb4b238c9ff7e319a42db626450594acf0871ee Mon Sep 17 00:00:00 2001 From: Luxalpa Date: Mon, 18 Dec 2023 15:18:09 +0100 Subject: [PATCH 11/14] Fix test --- src/de.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/de.rs b/src/de.rs index 697f279b8..0e94c89e0 100644 --- a/src/de.rs +++ b/src/de.rs @@ -3158,7 +3158,7 @@ where /// use serde_json::Deserializer; /// /// fn main() { - /// let data = r#"{"some-obj": {"k1": 1, "k2": 2}}"#; + /// let data = r#"{"some-obj": {"k1": 1, "k2": 2}, "some-num": 3}"#; /// /// let mut iter = Deserializer::from_str(data).into_object(); /// From 9fdd78afc33397ab9ca8725d4b631501c15ec851 Mon Sep 17 00:00:00 2001 From: Luxalpa Date: Mon, 18 Dec 2023 15:22:41 +0100 Subject: [PATCH 12/14] remove todo --- tests/array.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/array.rs b/tests/array.rs index 76e275fa1..af43c062d 100644 --- a/tests/array.rs +++ b/tests/array.rs @@ -1,5 +1,3 @@ -// #![cfg(not(feature = "preserve_order"))] TODO - extern crate serde; #[macro_use] From a1c78966bb41f47d7cb993170b8066413a780715 Mon Sep 17 00:00:00 2001 From: Luxalpa Date: Mon, 18 Dec 2023 15:42:34 +0100 Subject: [PATCH 13/14] rename test file --- tests/{array.rs => array_stream.rs} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{array.rs => array_stream.rs} (100%) diff --git a/tests/array.rs b/tests/array_stream.rs similarity index 100% rename from tests/array.rs rename to tests/array_stream.rs From 693c80c9b3fd320d929f224447c6f2867f405d35 Mon Sep 17 00:00:00 2001 From: Luxalpa Date: Mon, 18 Dec 2023 16:04:07 +0100 Subject: [PATCH 14/14] Fixes for alloc feature flag --- src/de.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/de.rs b/src/de.rs index 0e94c89e0..7df56f073 100644 --- a/src/de.rs +++ b/src/de.rs @@ -2526,14 +2526,17 @@ where R: read::Read<'de>, { pub fn new(de: Deserializer, initial_kind: ContainerKind) -> Self { + let mut nesting_change = Vec::new(); + nesting_change.push(NestingCommand { + kind: initial_kind, + action: NestingAction::Enter { needs_comma: false }, + }); + IterativeBaseDeserializer { de, lifetime: PhantomData, // First action will be to enter the top-level container - nesting_change: vec![NestingCommand { - kind: initial_kind, - action: NestingAction::Enter { needs_comma: false }, - }], + nesting_change, cur_depth: 0, future_depth: 1, } @@ -2555,8 +2558,7 @@ where de.eat_char(); Ok(()) } - Ok(Some(token)) => { - println!("expected comma: {}", token as char); + Ok(Some(_)) => { Err(de.peek_error(ErrorCode::ExpectedComma)) } Ok(None) => Err(de.peek_error(ErrorCode::EofWhileParsingValue)),