From 6f4d67a3cc8c928c484731b73e24925b90b89968 Mon Sep 17 00:00:00 2001 From: rnantes Date: Wed, 15 Nov 2023 12:05:38 -0500 Subject: [PATCH] add support for automatically parsing headers from the value, add keyEncodingStrategy and keyDecodingStrategy, add timeZone and encoding decoding and Strategy --- Package.swift | 8 +- sources/Strategy.swift | 199 ++++++++++++++++++ sources/declarative/decodable/Decoder.swift | 14 +- .../decodable/DecoderConfiguration.swift | 3 + .../containers/KeyedDecodingContainer.swift | 2 + .../SingleValueDecodingContainer.swift | 73 ++++++- sources/declarative/encodable/Encoder.swift | 20 +- .../encodable/EncoderConfiguration.swift | 6 + .../containers/KeyedEncodingContainer.swift | 12 +- .../SingleValueEncodingContainer.swift | 24 +++ .../containers/UnkeyedEncodingContainer.swift | 13 ++ .../declarative/encodable/internal/Sink.swift | 63 +++++- sources/imperative/reader/Reader.swift | 10 +- .../reader/ReaderConfiguration.swift | 5 +- sources/imperative/writer/Writer.swift | 4 + .../writer/WriterConfiguration.swift | 7 +- .../DecodingRegularUsageTests.swift | 81 +++++++ .../EncodingRegularUsageTests.swift | 47 +++++ 18 files changed, 562 insertions(+), 29 deletions(-) diff --git a/Package.swift b/Package.swift index a5aa072..876d429 100644 --- a/Package.swift +++ b/Package.swift @@ -9,9 +9,13 @@ let package = Package( products: [ .library(name: "CodableCSV", targets: ["CodableCSV"]), ], - dependencies: [], + dependencies: [ + .package(url: "https://github.com/apple/swift-collections.git", from: "1.0.5") + ], targets: [ - .target(name: "CodableCSV", dependencies: [], path: "sources"), + .target(name: "CodableCSV", dependencies: [ + .product(name: "Collections", package: "swift-collections") + ], path: "sources"), .testTarget(name: "CodableCSVTests", dependencies: ["CodableCSV"], path: "tests"), .testTarget(name: "CodableCSVBenchmarks", dependencies: ["CodableCSV"], path: "benchmarks") ] diff --git a/sources/Strategy.swift b/sources/Strategy.swift index fa5e70a..3e08e81 100644 --- a/sources/Strategy.swift +++ b/sources/Strategy.swift @@ -1,3 +1,5 @@ +import Foundation + /// The strategies to use when encoding/decoding. public enum Strategy { /// The strategy to allow/disable escaped fields and how. @@ -35,3 +37,200 @@ public enum Strategy { case convert(positiveInfinity: String, negativeInfinity: String, nan: String) } } + + +/// The strategy to use for automatically changing the value of keys before decoding. +/// - NOTE: sourced from: https://github.com/apple/swift-foundation/blob/9a9e3c15bb14020b69cf5b2f95694a257f329c41/Sources/FoundationEssentials/JSON/JSONDecoder.swift#L103 +public enum KeyDecodingStrategy : Sendable { + /// Use the keys specified by each type. This is the default strategy. + case useDefaultKeys + + /// Convert from "snake_case_keys" to "camelCaseKeys" before attempting to match a key with the one specified by each type. + /// + /// The conversion to upper case uses `Locale.system`, also known as the ICU "root" locale. This means the result is consistent regardless of the current user's locale and language preferences. + /// + /// Converting from snake case to camel case: + /// 1. Capitalizes the word starting after each `_` + /// 2. Removes all `_` + /// 3. Preserves starting and ending `_` (as these are often used to indicate private variables or other metadata). + /// For example, `one_two_three` becomes `oneTwoThree`. `_one_two_three_` becomes `_oneTwoThree_`. + /// + /// - Note: Using a key decoding strategy has a nominal performance cost, as each string key has to be inspected for the `_` character. + case convertFromSnakeCase + + /// Provide a custom conversion from the key in the encoded JSON to the keys specified by the decoded types. + /// The full path to the current decoding position is provided for context (in case you need to locate this key within the payload). The returned key is used in place of the last component in the coding path before decoding. + /// If the result of the conversion is a duplicate key, then only one value will be present in the container for the type to decode from. + @preconcurrency + case custom(@Sendable (_ key: String) -> String) + + static func _convertFromSnakeCase(_ stringKey: String) -> String { + guard !stringKey.isEmpty else { return stringKey } + + // Find the first non-underscore character + guard let firstNonUnderscore = stringKey.firstIndex(where: { $0 != "_" }) else { + // Reached the end without finding an _ + return stringKey + } + + // Find the last non-underscore character + var lastNonUnderscore = stringKey.index(before: stringKey.endIndex) + while lastNonUnderscore > firstNonUnderscore && stringKey[lastNonUnderscore] == "_" { + stringKey.formIndex(before: &lastNonUnderscore) + } + + let keyRange = firstNonUnderscore...lastNonUnderscore + let leadingUnderscoreRange = stringKey.startIndex.. String) + + static func convertToSnakeCase(_ stringKey: String) -> String { + guard !stringKey.isEmpty else { return stringKey } + + var words : [Range] = [] + // The general idea of this algorithm is to split words on transition from lower to upper case, then on transition of >1 upper case characters to lowercase + // + // myProperty -> my_property + // myURLProperty -> my_url_property + // + // We assume, per Swift naming conventions, that the first character of the key is lowercase. + var wordStart = stringKey.startIndex + var searchRange = stringKey.index(after: wordStart)..1 capital letters. Turn those into a word, stopping at the capital before the lower case character. + let beforeLowerIndex = stringKey.index(before: lowerCaseRange.lowerBound) + words.append(upperCaseRange.lowerBound.. Void) +} + +/// The strategy to use for encoding the header +public enum HeaderEncodingStrategy : Sendable { + /// will encode headers if they are provided, otherwise will not include a header row + case automatic + /// will try to parse the headers from the properties being encoded + case parseFromValue +} + + +public enum TimeZoneDecodingStrategy : Sendable { + case identifier + case abbreviation + case secondsFromGMT + + case json + + /// Decode the `Date` as a custom value decoded by the given closure. If the closure fails to decode a value from the given decoder, the error will be bubled up. + /// + /// Custom `Date` decoding adheres to the same behavior as a custom `Decodable` type. For example: + /// + /// let decoder = CSVDecoder() + /// decoder.dateStrategy = .custom({ + /// let container = try $0.singleValueContainer() + /// let string = try container.decode(String.self) + /// // Now returns the date represented by the custom string or throw an error if the string cannot be converted to a date. + /// }) + /// + /// - parameter decoding: Function receiving the CSV decoder used to parse a custom `Date` value. + /// - parameter decoder: The decoder on which to fetch a single value container to obtain the underlying `String` value. + /// - returns: `Date` value decoded from the underlying storage. + case custom(_ decoding: (_ decoder: Decoder) throws -> TimeZone) +} + diff --git a/sources/declarative/decodable/Decoder.swift b/sources/declarative/decodable/Decoder.swift index 881dbf4..ec22e5a 100644 --- a/sources/declarative/decodable/Decoder.swift +++ b/sources/declarative/decodable/Decoder.swift @@ -34,7 +34,7 @@ extension CSVDecoder { /// Returns a value of the type you specify, decoded from a CSV file (given as a `Data` blob). /// - parameter type: The type of the value to decode from the supplied file. /// - parameter data: The data blob representing a CSV file. - open func decode(_ type: T.Type, from data: Data) throws -> T { + public func decode(_ type: T.Type, from data: Data) throws -> T { let reader = try CSVReader(input: data, configuration: self._configuration.readerConfiguration) return try withExtendedLifetime(ShadowDecoder.Source(reader: reader, configuration: self._configuration, userInfo: self.userInfo)) { try T(from: ShadowDecoder(source: .passUnretained($0), codingPath: [])) @@ -44,7 +44,7 @@ extension CSVDecoder { /// Returns a value of the type you specify, decoded from a CSV file (given as a `String`). /// - parameter type: The type of the value to decode from the supplied file. /// - parameter string: A Swift string representing a CSV file. - open func decode(_ type: T.Type, from string: String) throws -> T { + public func decode(_ type: T.Type, from string: String) throws -> T { let reader = try CSVReader(input: string, configuration: self._configuration.readerConfiguration) return try withExtendedLifetime(ShadowDecoder.Source(reader: reader, configuration: self._configuration, userInfo: self.userInfo)) { try T(from: ShadowDecoder(source: .passUnretained($0), codingPath: [])) @@ -54,7 +54,7 @@ extension CSVDecoder { /// Returns a value of the type you specify, decoded from a CSV file (being pointed by the url). /// - parameter type: The type of the value to decode from the supplied file. /// - parameter url: The URL pointing to the file to decode. - open func decode(_ type: T.Type, from url: URL) throws -> T { + public func decode(_ type: T.Type, from url: URL) throws -> T { let reader = try CSVReader(input: url, configuration: self._configuration.readerConfiguration) return try withExtendedLifetime(ShadowDecoder.Source(reader: reader, configuration: self._configuration, userInfo: self.userInfo)) { try T(from: ShadowDecoder(source: .passUnretained($0), codingPath: [])) @@ -64,7 +64,7 @@ extension CSVDecoder { /// Returns a value of the type you specify, decoded from a CSV file (provided by the input stream). /// - parameter type: The type of the value to decode from the supplied file. /// - parameter stream: The input stream providing the raw bytes. - open func decode(_ type: T.Type, from stream: InputStream) throws -> T { + public func decode(_ type: T.Type, from stream: InputStream) throws -> T { let reader = try CSVReader(input: stream, configuration: self._configuration.readerConfiguration) return try withExtendedLifetime(ShadowDecoder.Source(reader: reader, configuration: self._configuration, userInfo: self.userInfo)) { try T(from: ShadowDecoder(source: .passUnretained($0), codingPath: [])) @@ -76,7 +76,7 @@ extension CSVDecoder { /// Returns a sequence for decoding row-by-row from a CSV file (given as a `Data` blob). /// - parameter data: The data blob representing a CSV file. /// - throws: `CSVError` exclusively. - open func lazy(from data: Data) throws -> Lazy { + public func lazy(from data: Data) throws -> Lazy { let reader = try CSVReader(input: data, configuration: self._configuration.readerConfiguration) let source = ShadowDecoder.Source(reader: reader, configuration: self._configuration, userInfo: self.userInfo) return Lazy(source: source) @@ -85,7 +85,7 @@ extension CSVDecoder { /// Returns a sequence for decoding row-by-row from a CSV file (given as a `String`). /// - parameter string: A Swift string representing a CSV file. /// - throws: `CSVError` exclusively. - open func lazy(from string: String) throws -> Lazy { + public func lazy(from string: String) throws -> Lazy { let reader = try CSVReader(input: string, configuration: self._configuration.readerConfiguration) let source = ShadowDecoder.Source(reader: reader, configuration: self._configuration, userInfo: self.userInfo) return Lazy(source: source) @@ -94,7 +94,7 @@ extension CSVDecoder { /// Returns a sequence for decoding row-by-row from a CSV file (being pointed by `url`). /// - parameter url: The URL pointing to the file to decode. /// - throws: `CSVError` exclusively. - open func lazy(from url: URL) throws -> Lazy { + public func lazy(from url: URL) throws -> Lazy { let reader = try CSVReader(input: url, configuration: self._configuration.readerConfiguration) let source = ShadowDecoder.Source(reader: reader, configuration: self._configuration, userInfo: self.userInfo) return Lazy(source: source) diff --git a/sources/declarative/decodable/DecoderConfiguration.swift b/sources/declarative/decodable/DecoderConfiguration.swift index 47e3e79..74901c5 100644 --- a/sources/declarative/decodable/DecoderConfiguration.swift +++ b/sources/declarative/decodable/DecoderConfiguration.swift @@ -19,6 +19,8 @@ extension CSVDecoder { public var dataStrategy: Strategy.DataDecoding /// The amount of CSV rows kept in memory after decoding to allow the random-order jumping exposed by keyed containers. public var bufferingStrategy: Strategy.DecodingBuffer + /// The strategy to use when encoding timeZones + public var timeZoneStrategy: TimeZoneDecodingStrategy /// Designated initializer setting the default values. public init() { @@ -30,6 +32,7 @@ extension CSVDecoder { self.dateStrategy = .deferredToDate self.dataStrategy = .base64 self.bufferingStrategy = .keepAll + self.timeZoneStrategy = .identifier } /// Gives direct access to all CSV reader's configuration values. diff --git a/sources/declarative/decodable/containers/KeyedDecodingContainer.swift b/sources/declarative/decodable/containers/KeyedDecodingContainer.swift index f8fff2e..18df373 100644 --- a/sources/declarative/decodable/containers/KeyedDecodingContainer.swift +++ b/sources/declarative/decodable/containers/KeyedDecodingContainer.swift @@ -185,6 +185,8 @@ extension ShadowDecoder.KeyedContainer { func decode(_ type: T.Type, forKey key: Key) throws -> T where T:Decodable { if T.self == Date.self { return try self._fieldContainer(forKey: key).decode(Date.self) as! T + } else if T.self == TimeZone.self { + return try self._fieldContainer(forKey: key).decode(TimeZone.self) as! T } else if T.self == Data.self { return try self._fieldContainer(forKey: key).decode(Data.self) as! T } else if T.self == Decimal.self { diff --git a/sources/declarative/decodable/containers/SingleValueDecodingContainer.swift b/sources/declarative/decodable/containers/SingleValueDecodingContainer.swift index 545dd5a..35bb81c 100644 --- a/sources/declarative/decodable/containers/SingleValueDecodingContainer.swift +++ b/sources/declarative/decodable/containers/SingleValueDecodingContainer.swift @@ -1,5 +1,9 @@ import Foundation +fileprivate extension JSONDecoder { + static let shared = JSONDecoder() +} + extension ShadowDecoder { /// Single value container for the CSV shadow decoder. struct SingleValueContainer: SingleValueDecodingContainer { @@ -167,10 +171,11 @@ extension ShadowDecoder.SingleValueContainer { func decode(_ type: T.Type) throws -> T where T:Decodable { switch type { - case is Date.Type: return try self.decode(Date.self) as! T - case is Data.Type: return try self.decode(Data.self) as! T - case is Decimal.Type: return try self.decode(Decimal.self) as! T - case is URL.Type: return try self.decode(URL.self) as! T + case is Date.Type: return try self.decode(Date.self) as! T + case is TimeZone.Type: return try self.decode(TimeZone.self) as! T + case is Data.Type: return try self.decode(Data.self) as! T + case is Decimal.Type: return try self.decode(Decimal.self) as! T + case is URL.Type: return try self.decode(URL.self) as! T default: return try T(from: self._decoder) } } @@ -200,6 +205,42 @@ extension ShadowDecoder.SingleValueContainer { return try closure(self._decoder) } } + + /// Decodes a single value of the given type. + /// - parameter type: The type to decode as. + /// - returns: A value of the requested type. + func decode(_ type: TimeZone.Type) throws -> TimeZone { + switch self._decoder.source._withUnsafeGuaranteedRef({ $0.configuration.timeZoneStrategy }) { + case .identifier: + let string = try self.decode(String.self) + guard let timezone = Foundation.TimeZone.init(identifier: string) else { + throw CSVDecoder.Error._invalidTimeZoneIdentifier(string: string, codingPath: self.codingPath) + } + return timezone + case .abbreviation: + let string = try self.decode(String.self) + guard let timezone = Foundation.TimeZone.init(abbreviation: string) else { + throw CSVDecoder.Error._invalidTimeZoneAbbreviation(string: string, codingPath: self.codingPath) + } + return timezone + case .secondsFromGMT: + let number = try self.decode(Int.self) + guard let timezone = Foundation.TimeZone.init(secondsFromGMT: number) else { + throw CSVDecoder.Error._invalidTimeZoneSecondsFromGMT(number: number, codingPath: self.codingPath) + } + return timezone + case .json: + let string = try self.decode(String.self) + do { + let timezone = try JSONDecoder.shared.decode(TimeZone.self, from: Data(string.utf8)) + return timezone + } catch { + throw CSVDecoder.Error._invalidTimeZoneJSON(string: string, codingPath: self.codingPath) + } + case .custom(let closure): + return try closure(self._decoder) + } + } /// Decodes a single value of the given type. /// - parameter type: The type to decode as. @@ -320,6 +361,30 @@ fileprivate extension CSVDecoder.Error { codingPath: codingPath, debugDescription: "The field '\(string)' couldn't be transformed into a Date using the '.formatted' strategy.")) } + /// Error raised when a string value cannot be transformed into a `TimeZone` using TimeZone identifier initializer. + static func _invalidTimeZoneIdentifier(string: String, codingPath: [CodingKey]) -> DecodingError { + .dataCorrupted(DecodingError.Context( + codingPath: codingPath, + debugDescription: "The field '\(string)' couldn't be transformed into a Timezone using the '.identifier' strategy.")) + } + /// Error raised when a string value cannot be transformed into a `TimeZone` using TimeZone identifier initializer. + static func _invalidTimeZoneAbbreviation(string: String, codingPath: [CodingKey]) -> DecodingError { + .dataCorrupted(DecodingError.Context( + codingPath: codingPath, + debugDescription: "The field '\(string)' couldn't be transformed into a Timezone using the '.abbreviation' strategy.")) + } + /// Error raised when a string value cannot be transformed into a `TimeZone` using TimeZone identifier initializer. + static func _invalidTimeZoneSecondsFromGMT(number: Int, codingPath: [CodingKey]) -> DecodingError { + .dataCorrupted(DecodingError.Context( + codingPath: codingPath, + debugDescription: "The field '\(number)' couldn't be transformed into a Timezone using the '.secondsFromGMT' strategy.")) + } + /// Error raised when a json string value cannot be transformed into a `TimeZone` using JSONDecoder. + static func _invalidTimeZoneJSON(string: String, codingPath: [CodingKey]) -> DecodingError { + .dataCorrupted(DecodingError.Context( + codingPath: codingPath, + debugDescription: "The field '\(string)' couldn't be transformed into a Timezone using the '.json' strategy.")) + } /// Error raised when a string value cannot be transformed into a Base64 data blob. static func _invalidData64(string: String, codingPath: [CodingKey]) -> DecodingError { .dataCorrupted(DecodingError.Context( diff --git a/sources/declarative/encodable/Encoder.swift b/sources/declarative/encodable/Encoder.swift index 52b86fe..128b874 100644 --- a/sources/declarative/encodable/Encoder.swift +++ b/sources/declarative/encodable/Encoder.swift @@ -35,11 +35,15 @@ extension CSVEncoder { /// - parameter value: The value to encode as CSV. /// - parameter type: The Swift type for a data blob. /// - returns: `Data` blob with the CSV representation of `value`. - open func encode(_ value: T, into type: Data.Type) throws -> Data { + public func encode(_ value: T, into type: Data.Type) throws -> Data { let writer = try CSVWriter(configuration: self._configuration.writerConfiguration) try withExtendedLifetime(try ShadowEncoder.Sink(writer: writer, configuration: self._configuration, userInfo: self.userInfo)) { - try value.encode(to: ShadowEncoder(sink: .passUnretained($0), codingPath: [])) - try $0.completeEncoding() + do { + try value.encode(to: ShadowEncoder(sink: .passUnretained($0), codingPath: [])) + try $0.completeEncoding() + } catch { + throw error + } } return try writer.data() } @@ -48,7 +52,7 @@ extension CSVEncoder { /// - parameter value: The value to encode as CSV. /// - parameter type: The Swift type for a string. /// - returns: `String` with the CSV representation of `value`. - open func encode(_ value: T, into type: String.Type) throws -> String { + public func encode(_ value: T, into type: String.Type) throws -> String { let data = try self.encode(value, into: Data.self) let encoding = self._configuration.writerConfiguration.encoding ?? .utf8 return String(data: data, encoding: encoding)! @@ -58,7 +62,7 @@ extension CSVEncoder { /// - parameter value: The value to encode as CSV. /// - parameter fileURL: The file receiving the encoded values. /// - parameter append: In case an existing file is under the given URL, this Boolean indicates that the information will be appended to the file (`true`), or the file will be overwritten (`false`). - open func encode(_ value: T, into fileURL: URL, append: Bool = false) throws { + public func encode(_ value: T, into fileURL: URL, append: Bool = false) throws { let writer = try CSVWriter(fileURL: fileURL, append: append, configuration: self._configuration.writerConfiguration) try withExtendedLifetime(try ShadowEncoder.Sink(writer: writer, configuration: self._configuration, userInfo: self.userInfo)) { try value.encode(to: ShadowEncoder(sink: .passUnretained($0), codingPath: [])) @@ -71,7 +75,7 @@ extension CSVEncoder { /// Returns an instance to encode row-by-row the feeded values. /// - parameter type: The Swift type for a data blob. /// - returns: Instance used for _on demand_ encoding. - open func lazy(into type: Data.Type) throws -> Lazy { + public func lazy(into type: Data.Type) throws -> Lazy { let writer = try CSVWriter(configuration: self._configuration.writerConfiguration) let sink = try ShadowEncoder.Sink(writer: writer, configuration: self._configuration, userInfo: self.userInfo) return Lazy(sink: sink) @@ -80,7 +84,7 @@ extension CSVEncoder { /// Returns an instance to encode row-by-row the feeded values. /// - parameter type: The Swift type for a data blob. /// - returns: Instance used for _on demand_ encoding. - open func lazy(into type: String.Type) throws -> Lazy { + public func lazy(into type: String.Type) throws -> Lazy { let writer = try CSVWriter(configuration: self._configuration.writerConfiguration) let sink = try ShadowEncoder.Sink(writer: writer, configuration: self._configuration, userInfo: self.userInfo) return Lazy(sink: sink) @@ -90,7 +94,7 @@ extension CSVEncoder { /// - parameter fileURL: The file receiving the encoded values. /// - parameter append: In case an existing file is under the given URL, this Boolean indicates that the information will be appended to the file (`true`), or the file will be overwritten (`false`). /// - returns: Instance used for _on demand_ encoding. - open func lazy(into fileURL: URL, append: Bool = false) throws -> Lazy { + public func lazy(into fileURL: URL, append: Bool = false) throws -> Lazy { let writer = try CSVWriter(fileURL: fileURL, append: append, configuration: self._configuration.writerConfiguration) let sink = try ShadowEncoder.Sink(writer: writer, configuration: self._configuration, userInfo: self.userInfo) return Lazy(sink: sink) diff --git a/sources/declarative/encodable/EncoderConfiguration.swift b/sources/declarative/encodable/EncoderConfiguration.swift index d6eaef1..94566a6 100644 --- a/sources/declarative/encodable/EncoderConfiguration.swift +++ b/sources/declarative/encodable/EncoderConfiguration.swift @@ -19,6 +19,10 @@ extension CSVEncoder { public var dataStrategy: Strategy.DataEncoding /// Indication on how encoded CSV rows are cached and actually written to the output target. public var bufferingStrategy: Strategy.EncodingBuffer + /// The strategy to use for decoding keys. Defaults to `.useDefaultKeys`. + public var keyEncodingStrategy: KeyEncodingStrategy + /// The strategy to use when encoding timeZones + public var timeZoneStrategy: TimeZoneEncodingStrategy /// Designated initializer setting the default values. public init() { @@ -30,6 +34,8 @@ extension CSVEncoder { self.dateStrategy = .deferredToDate self.dataStrategy = .base64 self.bufferingStrategy = .keepAll + self.keyEncodingStrategy = .useDefaultKeys + self.timeZoneStrategy = .identifier } /// Gives direct access to all CSV writer's configuration values. diff --git a/sources/declarative/encodable/containers/KeyedEncodingContainer.swift b/sources/declarative/encodable/containers/KeyedEncodingContainer.swift index 24aca09..2c1e6fd 100644 --- a/sources/declarative/encodable/containers/KeyedEncodingContainer.swift +++ b/sources/declarative/encodable/containers/KeyedEncodingContainer.swift @@ -170,6 +170,9 @@ extension ShadowEncoder.KeyedContainer { case let date as Date: var container = try self._fieldContainer(forKey: key) try container.encode(date) + case let timeZone as TimeZone: + var container = try self._fieldContainer(forKey: key) + try container.encode(timeZone) case let data as Data: var container = try self._fieldContainer(forKey: key) try container.encode(data) @@ -302,10 +305,17 @@ private extension ShadowEncoder.KeyedContainer { let index: (row: Int, field: Int) var codingPath = self._encoder.codingPath codingPath.append(key) + + let encodedKey = key.stringValue + //print("encodedKey: \(encodedKey)") + switch self._focus { case .row(let rowIndex): - index = (rowIndex, try self._encoder.sink._withUnsafeGuaranteedRef({ try $0.fieldIndex(forKey: key, codingPath: self.codingPath) })) + index = (rowIndex, try self._encoder.sink._withUnsafeGuaranteedRef({ + try $0.addParsedHeader(encodedKey) + return try $0.fieldIndex(forKey: key, codingPath: self.codingPath) + })) case .file: guard let rowIndex = key.intValue else { throw CSVEncoder.Error._invalidRowKey(forKey: key, codingPath: codingPath) } // Values are only allowed to be decoded directly from a nested container in "file level" if the CSV rows have a single column. diff --git a/sources/declarative/encodable/containers/SingleValueEncodingContainer.swift b/sources/declarative/encodable/containers/SingleValueEncodingContainer.swift index d20ff65..1d48b7a 100644 --- a/sources/declarative/encodable/containers/SingleValueEncodingContainer.swift +++ b/sources/declarative/encodable/containers/SingleValueEncodingContainer.swift @@ -1,5 +1,9 @@ import Foundation +fileprivate extension JSONEncoder { + static let shared = JSONEncoder() +} + extension ShadowEncoder { /// Single value container for the CSV shadow encoder. struct SingleValueContainer: SingleValueEncodingContainer { @@ -143,6 +147,7 @@ extension ShadowEncoder.SingleValueContainer { mutating func encode(_ value: T) throws where T:Encodable { switch value { case let date as Date: try self.encode(date) + case let timeZone as TimeZone: try self.encode(timeZone) case let data as Data: try self.encode(data) case let num as Decimal: try self.encode(num) case let url as URL: try self.encode(url) @@ -172,6 +177,25 @@ extension ShadowEncoder.SingleValueContainer { try closure(value, self._encoder) } } + + /// Encodes a single value of the given type. + /// - parameter value: The value to encode. + mutating func encode(_ value: TimeZone) throws { + switch self._encoder.sink._withUnsafeGuaranteedRef({ $0.configuration.timeZoneStrategy }) { + case .identifier: + try self.encode(value.identifier) + case .abbreviation: + try self.encode(value.abbreviation()) + case .secondsFromGMT: + try self.encode(value.secondsFromGMT()) + case .json: + let jsonData = try JSONEncoder.shared.encode(value) + let jsonString = String(data: jsonData, encoding: .utf8)! + try self.encode(jsonString) + case .custom(let closure): + try closure(value, self._encoder) + } + } /// Encodes a single value of the given type. /// - parameter value: The value to encode. diff --git a/sources/declarative/encodable/containers/UnkeyedEncodingContainer.swift b/sources/declarative/encodable/containers/UnkeyedEncodingContainer.swift index 6fd2fd4..caeeb37 100644 --- a/sources/declarative/encodable/containers/UnkeyedEncodingContainer.swift +++ b/sources/declarative/encodable/containers/UnkeyedEncodingContainer.swift @@ -193,6 +193,9 @@ extension ShadowEncoder.UnkeyedContainer { case let date as Date: var container = try self._fieldContainer() try container.encode(date) + case let timeZone as TimeZone: + var container = try self._fieldContainer() + try container.encode(timeZone) case let data as Data: var container = try self._fieldContainer() try container.encode(data) @@ -208,6 +211,16 @@ extension ShadowEncoder.UnkeyedContainer { let encoder = ShadowEncoder(sink: self._encoder.sink, codingPath: codingPath) try value.encode(to: encoder) } + + // if the first row has completed encoding and the headerStrategy is parseFromValue then add the parsed headers to the first line + if currentIndex == 0 { + try self._encoder.sink._withUnsafeGuaranteedRef({ + if $0.configuration.writerConfiguration.headerStrategy == .parseFromValue { + try $0.writeHeaderRow() + } + }) + } + //print("self.currentIndex: \(self.currentIndex)") self.currentIndex += 1 } diff --git a/sources/declarative/encodable/internal/Sink.swift b/sources/declarative/encodable/internal/Sink.swift index 43d2bf5..64ebe44 100644 --- a/sources/declarative/encodable/internal/Sink.swift +++ b/sources/declarative/encodable/internal/Sink.swift @@ -1,4 +1,5 @@ import Foundation +import OrderedCollections extension ShadowEncoder { /// Sink of all CSV data. @@ -8,13 +9,48 @@ extension ShadowEncoder { /// The rows buffer. private let _buffer: Buffer /// The decoding configuration. - let configuration: CSVEncoder.Configuration + var configuration: CSVEncoder.Configuration /// Any contextual information set by the user for decoding. let userInfo: [CodingUserInfoKey:Any] /// Lookup dictionary providing fast index discovery for header names. private var _headerLookup: [Int:Int] /// Encodes the given field in the given position. let fieldValue: (_ value: String, _ rowIndex: Int, _ fieldIndex: Int) throws -> Void + + /// write the header row + func writeHeaderRow() throws { + let sourceHeaders = parsedHeaders.elements + + let convertedHeaders = switch configuration.keyEncodingStrategy { + case .useDefaultKeys: + sourceHeaders + case .convertToSnakeCase: + sourceHeaders.map({ KeyEncodingStrategy.convertToSnakeCase($0) }) + case .custom(let converter): + sourceHeaders.map({ converter($0) }) + } + + try _writer.write(row: convertedHeaders) + _writer.resetRowIndex() + } + + /// An ordered set of headers parsed from the value during encoding + /// Only populated and used when headerStrategy is set to .parseFromValue + private var parsedHeaders = OrderedSet() + + /// set to true when addParsedHeader attempts to insert a header that the parsedHeaders OrderedSet already contains + private var hasParsedAllHeaders = false + + /// attempts to append a parsed header to the parsedHeaders OrderedSet + func addParsedHeader(_ header: String) throws { + if !hasParsedAllHeaders { + if parsedHeaders.contains(header) { + hasParsedAllHeaders = true + } else { + parsedHeaders.append(header) + } + } + } /// Creates the unique data sink for the encoding process. init(writer: CSVWriter, configuration: CSVEncoder.Configuration, userInfo: [CodingUserInfoKey:Any]) throws { @@ -158,9 +194,25 @@ extension ShadowEncoder.Sink { let name = key.stringValue // 3. Get the header lookup dictionary (building it if it is the first time accessing it). if self._headerLookup.isEmpty { - guard !self.configuration.headers.isEmpty else { throw CSVEncoder.Error._emptyHeader(forKey: key, codingPath: codingPath) } - self._headerLookup = try self.configuration.headers.lookupDictionary(onCollision: CSVEncoder.Error._invalidHashableHeader) + // if empty use parsed headers + switch self.configuration.headerStrategy { + case .automatic: + guard !self.configuration.headers.isEmpty else { + throw CSVEncoder.Error._emptyHeader(forKey: key, codingPath: codingPath) + } + self._headerLookup = try self.configuration.headers.lookupDictionary(onCollision: CSVEncoder.Error._invalidHashableHeader) + case .parseFromValue: + self._headerLookup = try self.parsedHeaders.elements.lookupDictionary(onCollision: CSVEncoder.Error._invalidHashableHeader) + } } + + if self.configuration.headerStrategy == .parseFromValue { + if self.configuration.headers.isEmpty && _headerLookup.count != self.parsedHeaders.count { + self._headerLookup = try self.parsedHeaders.elements.lookupDictionary(onCollision: CSVEncoder.Error._invalidHashableHeader) + } + } + + // 4. Get the index from the header lookup up and the header name. guard let index = self._headerLookup[name.hashValue] else { throw CSVEncoder.Error._unmatchedHeader(forKey: key, codingPath: codingPath) @@ -177,7 +229,9 @@ extension ShadowEncoder.Sink { // 2. Check whether there is any remaining row whatsoever. if let firstIndex = remainings.firstIndex { // 3. The first indeces must be the same or greater than the writer ones. - guard firstIndex.row >= self._writer.rowIndex, firstIndex.field >= self._writer.fieldIndex else { throw CSVEncoder.Error._corruptedBuffer() } + guard firstIndex.row >= self._writer.rowIndex, firstIndex.field >= self._writer.fieldIndex else { + throw CSVEncoder.Error._corruptedBuffer() + } // 4. Iterate through all the remaining rows. while var row = remainings.next() { // 5. If the writer is further back from the next remaining row. Fill the writer with empty rows. @@ -188,6 +242,7 @@ extension ShadowEncoder.Sink { while self._writer.fieldIndex < field.index { try self._writer.write(field: "") } // 8. Write the targeted field. try self._writer.write(field: field.value) + } // 9. Finish the targeted row. try self._writer.endRow() diff --git a/sources/imperative/reader/Reader.swift b/sources/imperative/reader/Reader.swift index ee0d9ff..c79f007 100644 --- a/sources/imperative/reader/Reader.swift +++ b/sources/imperative/reader/Reader.swift @@ -50,7 +50,15 @@ public final class CSVReader: IteratorProtocol, Sequence { case .firstLine: guard let headers = try self._parseLine(rowIndex: 0) else { self.status = .finished; return } guard !headers.isEmpty else { throw Error._invalidEmptyHeader() } - self.headers = headers + self.headers = switch configuration.keyDecodingStrategy { + case .useDefaultKeys: + headers + case .convertFromSnakeCase: + headers.map({ KeyDecodingStrategy._convertFromSnakeCase($0) }) + case .custom(let converter): + headers.map({ converter($0) }) + } + self.count = (rows: 1, fields: headers.count) // case .unknown: #warning("TODO") } diff --git a/sources/imperative/reader/ReaderConfiguration.swift b/sources/imperative/reader/ReaderConfiguration.swift index 1e8920c..814570e 100644 --- a/sources/imperative/reader/ReaderConfiguration.swift +++ b/sources/imperative/reader/ReaderConfiguration.swift @@ -17,7 +17,9 @@ extension CSVReader { public var trimStrategy: CharacterSet /// Boolean indicating whether the data/file/string should be completely parsed at reader's initialization. public var presample: Bool - + /// The strategy to use for decoding keys (i.e headers). Defaults to `.useDefaultKeys`. + public var keyDecodingStrategy: KeyDecodingStrategy + /// Designated initializer setting the default values. public init() { self.encoding = nil @@ -26,6 +28,7 @@ extension CSVReader { self.headerStrategy = .none self.trimStrategy = CharacterSet() self.presample = false + self.keyDecodingStrategy = .useDefaultKeys } } } diff --git a/sources/imperative/writer/Writer.swift b/sources/imperative/writer/Writer.swift index 46f2538..cfad679 100644 --- a/sources/imperative/writer/Writer.swift +++ b/sources/imperative/writer/Writer.swift @@ -45,6 +45,10 @@ public final class CSVWriter { self.rowIndex = 0 } } + + func resetRowIndex() { + self.rowIndex = 0 + } deinit { try? self.endEncoding() diff --git a/sources/imperative/writer/WriterConfiguration.swift b/sources/imperative/writer/WriterConfiguration.swift index 7f7b0ae..16b7325 100644 --- a/sources/imperative/writer/WriterConfiguration.swift +++ b/sources/imperative/writer/WriterConfiguration.swift @@ -1,3 +1,5 @@ +import Foundation + extension CSVWriter { /// Configuration for how to write CSV data. public struct Configuration { @@ -13,8 +15,10 @@ extension CSVWriter { public var delimiters: Delimiter.Pair /// The strategy to allow/disable escaped fields and how. public var escapingStrategy: Strategy.Escaping + /// The strategy of weather to use the given headers or parse them from the value + public var headerStrategy: HeaderEncodingStrategy + /// The row of headers to write at the beginning of the CSV data. - /// /// If empty, no row will be written. public var headers: [String] @@ -24,6 +28,7 @@ extension CSVWriter { self.bomStrategy = .convention self.delimiters = (field: ",", row: "\n") self.escapingStrategy = .doubleQuote + self.headerStrategy = .automatic self.headers = Array() } } diff --git a/tests/declarative/DecodingRegularUsageTests.swift b/tests/declarative/DecodingRegularUsageTests.swift index 4623e0b..4aab2a0 100644 --- a/tests/declarative/DecodingRegularUsageTests.swift +++ b/tests/declarative/DecodingRegularUsageTests.swift @@ -22,6 +22,29 @@ extension DecodingRegularUsageTests { ["4", "Chum" , "0.2" , "feminine" , "hamster" , "true" ], ["5", "Bacterio", "999.9", "" , "bacteria", "false" ] ] + + static let headersStudent: [String] = ["firstName", "lastName", "age", "countryOfStudy", "hasPet", "timeZone"] + static let headersStudentSnakeCase: [String] = ["first_name", "last_name", "age", "country_of_study", "has_pet", "time_zone"] + + /// List of pets available in the pet store. + static let contentStudent: [[String]] = [ + ["Marcos", "aaa" , "1" , "Spain", "true" , "EST" ], + ["Anaïs", "bbb" , "2" , "France", "false" , "PST" ], + ["Alex", "ccc" , "3" , "", "false" , "NST" ], + ["家豪", "ddd" , "4" , "China", "true" , "AST" ], + ["Дэниел", "eee" , "5" , "Russia", "true" , "MST" ], + ["ももこ", "fff" , "6" , "Japan", "false" , "CST" ], + ] + + // Note csv would look like the below: + // Marcos,aaa,1,Spain,true,"{""identifier"": ""America/New_York""}" + // Anaïs,bbb,2,France,false,"{""identifier"": ""America/New_York""}" + static let contentStudentJsonTimeZone: [[String]] = [ + ["Marcos", "aaa" , "1" , "Spain", "true" , "\"{\"\"identifier\"\": \"\"America/New_York\"\"}\"" ], + ["Anaïs", "bbb" , "2" , "France", "false" , "\"{\"\"identifier\"\": \"\"America/Los_Angeles\"\"}\"" ] + ] + + /// Encodes the test data into a Swift `String`. /// - parameter sample: /// - parameter delimiters: Unicode scalars to use to mark fields and rows. @@ -30,6 +53,15 @@ extension DecodingRegularUsageTests { let (f, r) = (delimiters.field.description, delimiters.row.description) return sample.map { $0.joined(separator: f) }.joined(separator: r).appending(r) } + + struct KeyedStudentCamelCaseTimeZone: Codable, Equatable { + var firstName: String + var lastName: String + var age: Int + var countryOfStudy: String? + var hasPet: Bool + var timeZone: TimeZone + } } } @@ -192,4 +224,53 @@ extension DecodingRegularUsageTests { XCTAssertEqual(store.mammals, _TestData.content.filter { $0[5] == "true" }.map { $0[1] }) } } + + + /// Decodes the list of animals using nested keyed containers. + func testTimeZone() throws { + let delimiters: Delimiter.Pair = (",", "\n") + let encoding: String.Encoding = .utf8 + + let csvString = _TestData.toCSV([_TestData.headersStudentSnakeCase] + _TestData.contentStudent, delimiters: delimiters) + let csvData = csvString.data(using: encoding)! + + let decoder = CSVDecoder() + decoder.delimiters = delimiters + decoder.encoding = .utf8 + decoder.headerStrategy = .firstLine + decoder.keyDecodingStrategy = .convertFromSnakeCase + decoder.timeZoneStrategy = .abbreviation + + let studentsRes = try XCTUnwrap(try decoder.decode([_TestData.KeyedStudentCamelCaseTimeZone].self, from: csvData)) + XCTAssertEqual(studentsRes.count, 6) + + let studentRes3 = try XCTUnwrap(studentsRes[2]) + XCTAssertEqual(studentRes3.age, 3) + XCTAssertEqual(studentRes3.hasPet, false) + XCTAssertEqual(studentRes3.timeZone.identifier, "America/St_Johns") + } + + func testTimeZoneJSON() throws { + let delimiters: Delimiter.Pair = (",", "\n") + let encoding: String.Encoding = .utf8 + + let csvString = _TestData.toCSV([_TestData.headersStudentSnakeCase] + _TestData.contentStudentJsonTimeZone, delimiters: delimiters) + let csvData = csvString.data(using: encoding)! + + let decoder = CSVDecoder() + decoder.delimiters = delimiters + decoder.encoding = .utf8 + decoder.headerStrategy = .firstLine + decoder.keyDecodingStrategy = .convertFromSnakeCase + decoder.timeZoneStrategy = .json + + let studentsRes = try XCTUnwrap(try decoder.decode([_TestData.KeyedStudentCamelCaseTimeZone].self, from: csvData)) + XCTAssertEqual(studentsRes.count, 2) + + let studentRes1 = try XCTUnwrap(studentsRes[1]) + XCTAssertEqual(studentRes1.firstName, "Anaïs") + XCTAssertEqual(studentRes1.age, 2) + XCTAssertEqual(studentRes1.hasPet, false) + XCTAssertEqual(studentRes1.timeZone.identifier, "America/Los_Angeles") + } } diff --git a/tests/declarative/EncodingRegularUsageTests.swift b/tests/declarative/EncodingRegularUsageTests.swift index 103407d..44bdb69 100644 --- a/tests/declarative/EncodingRegularUsageTests.swift +++ b/tests/declarative/EncodingRegularUsageTests.swift @@ -17,6 +17,15 @@ extension EncodingRegularUsageTests { var country: String var hasPet: Bool } + + struct KeyedStudentCamelCaseTimeZone: Codable { + var firstName: String + var lastName: String + var age: Int + var countryOfStudy: String? + var hasPet: Bool + var timeZone: TimeZone + } struct UnkeyedStudent: Encodable { var name: String @@ -331,4 +340,42 @@ extension EncodingRegularUsageTests { } } } + + /// Tests multiple custom types encoding. + func testKeyEncodingStrategy() throws { + // The configuration values to be tests. + let encoding: String.Encoding = .utf8 + //let bomStrategy: Strategy.BOM = .never + let delimiters: Delimiter.Pair = (",", "\n") + let headers = ["name", "age", "country", "hasPet"] + //The data used for testing. + typealias Student = _TestData.KeyedStudentCamelCaseTimeZone + let students: [Student] = [ + Student(firstName: "Marcos", lastName: "aaa", age: 1, countryOfStudy: "Spain", hasPet: true, timeZone: .init(abbreviation: "EST")!), + Student(firstName: "Anaïs", lastName: "bbb", age: 2, countryOfStudy: "France", hasPet: false, timeZone: .init(abbreviation: "PST")!), + Student(firstName: "Alex", lastName: "ccc", age: 3, countryOfStudy: nil, hasPet: false, timeZone: .init(abbreviation: "NST")!), + Student(firstName: "家豪", lastName: "ddd", age: 4, countryOfStudy: "China", hasPet: true, timeZone: .init(abbreviation: "AST")!), + Student(firstName: "Дэниел", lastName: "eee", age: 5, countryOfStudy: "Russia", hasPet: true, timeZone: .init(abbreviation: "MST")!), + Student(firstName: "ももこ", lastName: "fff", age: 6, countryOfStudy: "Japan", hasPet: false, timeZone: .init(abbreviation: "CST")!) + ] + + let jsonEncoder = JSONEncoder() + let res = try jsonEncoder.encode(students) + let rs = String(data: res, encoding: encoding)! + print(rs) + + let encoder = CSVEncoder() + //encoder.headers = headers + encoder.encoding = encoding + encoder.keyEncodingStrategy = .convertToSnakeCase + encoder.timeZoneStrategy = .json + encoder.delimiters = delimiters + encoder.headerStrategy = .parseFromValue + + let string = try XCTUnwrap(try encoder.encode(students, into: String.self)) + let content = string.split(separator: delimiters.row.description.first!).map { String($0) } + XCTAssertEqual(content.count, 1+students.count) + } + + }