diff --git a/.changes/next-release/feature-bfffb04affbc8abaf3d60265eeb6705d0fd84d0b.json b/.changes/next-release/feature-bfffb04affbc8abaf3d60265eeb6705d0fd84d0b.json new file mode 100644 index 00000000000..8f438461546 --- /dev/null +++ b/.changes/next-release/feature-bfffb04affbc8abaf3d60265eeb6705d0fd84d0b.json @@ -0,0 +1,7 @@ +{ + "type": "feature", + "description": "Added byte strings and byte text blocks to the IDL to support encoding human readable text as blob values", + "pull_requests": [ + "[#2853](https://github.com/smithy-lang/smithy/pull/2853)" + ] +} diff --git a/docs/source-2.0/spec/idl.rst b/docs/source-2.0/spec/idl.rst index 2a9bf1be523..e527696d132 100644 --- a/docs/source-2.0/spec/idl.rst +++ b/docs/source-2.0/spec/idl.rst @@ -117,13 +117,15 @@ string support defined in :rfc:`7405`. .. productionlist:: smithy ControlSection :*(`ControlStatement`) - ControlStatement :"$" `NodeObjectKey` [`SP`] ":" [`SP`] `NodeValue` `BR` + ControlStatement :"$" `ControlKey` [`SP`] ":" [`SP`] `NodeValue` `BR` + ControlKey :`QuotedText` / `Identifier` .. rubric:: Metadata .. productionlist:: smithy MetadataSection :*(`MetadataStatement`) - MetadataStatement :%s"metadata" `SP` `NodeObjectKey` [`SP`] "=" [`SP`] `NodeValue` `BR` + MetadataStatement :%s"metadata" `SP` `MetadataKey` [`SP`] "=" [`SP`] `NodeValue` `BR` + MetadataKey :`QuotedText` / `Identifier` .. rubric:: Node values @@ -136,7 +138,7 @@ string support defined in :rfc:`7405`. NodeArray :"[" [`WS`] *(`NodeValue` [`WS`]) "]" NodeObject :"{" [`WS`] [`NodeObjectKvp` *(`WS` `NodeObjectKvp`)] [`WS`] "}" NodeObjectKvp :`NodeObjectKey` [`WS`] ":" [`WS`] `NodeValue` - NodeObjectKey :`QuotedText` / `Identifier` + NodeObjectKey :`QuotedText` / `ByteString` / `Identifier` Number :[`Minus`] `Int` [`Frac`] [`Exp`] DecimalPoint :%x2E ; . DigitOneToNine :%x31-39 ; 1-9 @@ -148,7 +150,8 @@ string support defined in :rfc:`7405`. Plus :%x2B ; + Zero :%x30 ; 0 NodeKeyword :%s"true" / %s"false" / %s"null" - NodeStringValue :`ShapeId` / `TextBlock` / `QuotedText` + NodeStringValue :`ShapeId` / `TextBlock` / `ByteTextBlock` / `QuotedText` / `ByteString` + ByteString :"b" `QuotedText` QuotedText :DQUOTE *`QuotedChar` DQUOTE QuotedChar :%x09 ; tab :/ %x20-21 ; space - "!" @@ -162,6 +165,7 @@ string support defined in :rfc:`7405`. UnicodeEscape :%s"u" `Hex` `Hex` `Hex` `Hex` Hex :DIGIT / %x41-46 / %x61-66 Escape :%x5C ; backslash + ByteTextBlock : "b" `TextBlock` TextBlock :`ThreeDquotes` [`SP`] `NL` *`TextBlockContent` `ThreeDquotes` TextBlockContent :`QuotedChar` / (1*2DQUOTE 1*`QuotedChar`) ThreeDquotes :DQUOTE DQUOTE DQUOTE @@ -2398,4 +2402,68 @@ example is interpreted as ``Foo\nBaz Bam``: Baz \ Bam""" +Byte Strings +============ + +The byte string and byte text block productions are used to encode binary +values as human readable strings. These offer an alternative to having to +embed opaque base64 strings in places where binary values are required. + +Byte strings follow the same high-level parsing logic as standard strings. +The escape sequences, line normalization, and incidental whitespace behaviors +that exists in standard strings also work the same way in byte strings. +Converting a valid standard string into a byte string is equivalent to encoding +the original string into its UTF-8 bytes and then base64 encoding those bytes. + +The following values are all logically equivalent after parsing: + +.. tab:: Smithy + + .. code-block:: smithy + + version: "2" + metadata foo = { + byteString: b"Hello\nWorld" + byteTextBlock: b""" + Hello + World""" + string: "SGVsbG8KV29ybGQ=" + textBlock: """ + SGVsbG8KV29ybGQ=""" + } + +.. tab:: JSON + + .. code-block:: json + + { + "smithy": "2", + "metadata": { + "foo": { + "byteString": "SGVsbG8KV29ybGQ=", + "byteTextBlock": "SGVsbG8KV29ybGQ=", + "string": "SGVsbG8KV29ybGQ=", + "textBlock": "SGVsbG8KV29ybGQ=" + } + } + } + +In addition to the :ref:`string escape characters `, +byte strings support additional escape characters to make encoding arbitrary +byte sequences possible: + +.. list-table:: + :header-rows: 1 + :widths: 20 30 50 + + * - Byte value + - Escape + - Meaning + * - ``00`` + - ``\0`` + - NULL byte + * - ``HH`` + - ``\xHH`` + - 2-digit hexadecimal byte value + .. _CommonMark: https://spec.commonmark.org/ diff --git a/smithy-model/src/main/java/software/amazon/smithy/model/loader/DefaultTokenizer.java b/smithy-model/src/main/java/software/amazon/smithy/model/loader/DefaultTokenizer.java index 2ff17be69b4..0926e96c2a9 100644 --- a/smithy-model/src/main/java/software/amazon/smithy/model/loader/DefaultTokenizer.java +++ b/smithy-model/src/main/java/software/amazon/smithy/model/loader/DefaultTokenizer.java @@ -21,6 +21,7 @@ class DefaultTokenizer implements IdlTokenizer { private int currentTokenColumn = -1; private Number currentTokenNumber; private CharSequence currentTokenStringSlice; + private byte[] currentTokenBytes; private String currentTokenError; DefaultTokenizer(String filename, CharSequence model) { @@ -97,6 +98,17 @@ public final CharSequence getCurrentTokenStringSlice() { } } + @Override + public final byte[] getCurrentTokenBytes() { + getCurrentToken(); + if (currentTokenBytes == null) { + throw syntax("The current token must be a byte string but found: " + + currentTokenType.getDebug(getCurrentTokenLexeme()), getCurrentTokenLocation()); + } + + return currentTokenBytes; + } + @Override public final Number getCurrentTokenNumberValue() { getCurrentToken(); @@ -125,6 +137,7 @@ public final boolean hasNext() { @Override public IdlToken next() { currentTokenStringSlice = null; + currentTokenBytes = null; currentTokenNumber = null; currentTokenColumn = parser.column(); currentTokenLine = parser.line(); @@ -175,6 +188,11 @@ public IdlToken next() { return parseString(); case '/': return parseComment(); + case 'b': + if (parser.peek(1) == '"') { + return parseByteString(); + } + return parseIdentifier(); case '-': case '0': case '1': @@ -215,7 +233,6 @@ public IdlToken next() { case 'Z': case '_': case 'a': - case 'b': case 'c': case 'd': case 'e': @@ -388,6 +405,35 @@ private IdlToken parseString() { } } + private IdlToken parseByteString() { + parser.expect('b'); + parser.expect('"'); // skip first quote. + + if (parser.peek() == '"') { + parser.skip(); // skip second quote. + if (parser.peek() == '"') { // A third consecutive quote is a BYTE_TEXT_BLOCK. + parser.skip(); + return parseByteTextBlock(); + } else { + // Empty byte string. + currentTokenEnd = parser.position(); + currentTokenBytes = new byte[0]; + return currentTokenType = IdlToken.BYTE_STRING; + } + } + + try { + // Parse the contents of a byte string. + currentTokenBytes = parseByteStringAndTextBlock(false); + currentTokenEnd = parser.position(); + return currentTokenType = IdlToken.BYTE_STRING; + } catch (RuntimeException e) { + currentTokenEnd = parser.position(); + currentTokenError = "Error parsing byte string: " + e.getMessage(); + return currentTokenType = IdlToken.ERROR; + } + } + private IdlToken parseTextBlock() { try { currentTokenStringSlice = parseQuotedTextAndTextBlock(true); @@ -400,14 +446,26 @@ private IdlToken parseTextBlock() { } } - // Parses both quoted_text and text_block + private IdlToken parseByteTextBlock() { + try { + currentTokenBytes = parseByteStringAndTextBlock(true); + currentTokenEnd = parser.position(); + return currentTokenType = IdlToken.BYTE_TEXT_BLOCK; + } catch (RuntimeException e) { + currentTokenEnd = parser.position(); + currentTokenError = "Error parsing byte text block: " + e.getMessage(); + return currentTokenType = IdlToken.ERROR; + } + } + + // Parses quoted_text and text_block body private CharSequence parseQuotedTextAndTextBlock(boolean triple) { int start = parser.position(); while (!parser.eof()) { char next = parser.peek(); if (next == '"' && (!triple || (parser.peek(1) == '"' && parser.peek(2) == '"'))) { - // Found closing quotes of quoted_text and/or text_block + // Found closing quotes break; } parser.skip(); @@ -427,4 +485,32 @@ private CharSequence parseQuotedTextAndTextBlock(boolean triple) { return IdlStringLexer.scanStringContents(result, triple); } + + // Parses quoted_text and text_block body + private byte[] parseByteStringAndTextBlock(boolean triple) { + int start = parser.position(); + + while (!parser.eof()) { + char next = parser.peek(); + if (next == '"' && (!triple || (parser.peek(1) == '"' && parser.peek(2) == '"'))) { + // Found closing quotes + break; + } + parser.skip(); + if (next == '\\') { + parser.skip(); + } + } + + // Strip the ending '"'. + CharSequence result = parser.borrowSliceFrom(start); + parser.expect('"'); + + if (triple) { + parser.expect('"'); + parser.expect('"'); + } + + return IdlStringLexer.scanByteStringContents(result, triple); + } } diff --git a/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlNodeParser.java b/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlNodeParser.java index 3972b4ba087..e8e31fe9ba1 100644 --- a/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlNodeParser.java +++ b/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlNodeParser.java @@ -4,6 +4,7 @@ */ package software.amazon.smithy.model.loader; +import java.util.Base64; import java.util.function.Consumer; import software.amazon.smithy.model.SourceLocation; import software.amazon.smithy.model.node.ArrayNode; @@ -53,7 +54,9 @@ static Node expectAndSkipNode(IdlModelLoader loader) { static Node expectAndSkipNode(IdlModelLoader loader, SourceLocation location) { IdlInternalTokenizer tokenizer = loader.getTokenizer(); IdlToken token = tokenizer.expect(IdlToken.STRING, + IdlToken.BYTE_STRING, IdlToken.TEXT_BLOCK, + IdlToken.BYTE_TEXT_BLOCK, IdlToken.NUMBER, IdlToken.IDENTIFIER, IdlToken.LBRACE, @@ -61,17 +64,26 @@ static Node expectAndSkipNode(IdlModelLoader loader, SourceLocation location) { switch (token) { case STRING: - case TEXT_BLOCK: - Node result = new StringNode(tokenizer.getCurrentTokenStringSlice().toString(), location); + case TEXT_BLOCK: { + String value = tokenizer.getCurrentTokenStringSlice().toString(); tokenizer.next(); - return result; - case IDENTIFIER: + return new StringNode(value, location); + } + case BYTE_STRING: + case BYTE_TEXT_BLOCK: { + String value = Base64.getEncoder().encodeToString(tokenizer.getCurrentTokenBytes()); + tokenizer.next(); + return new StringNode(value, location); + } + case IDENTIFIER: { String shapeId = loader.internString(IdlShapeIdParser.expectAndSkipShapeId(tokenizer)); return createIdentifier(loader, shapeId, location); - case NUMBER: - Number number = tokenizer.getCurrentTokenNumberValue(); + } + case NUMBER: { + Number value = tokenizer.getCurrentTokenNumberValue(); tokenizer.next(); - return new NumberNode(number, location); + return new NumberNode(value, location); + } case LBRACE: return parseObjectNode(loader, location); case LBRACKET: @@ -191,7 +203,9 @@ private static ObjectNode parseObjectNode(IdlModelLoader loader, SourceLocation ObjectNode.Builder builder = ObjectNode.builder().sourceLocation(location); while (tokenizer.hasNext()) { - if (tokenizer.expect(IdlToken.RBRACE, IdlToken.STRING, IdlToken.IDENTIFIER) == IdlToken.RBRACE) { + IdlToken token = + tokenizer.expect(IdlToken.RBRACE, IdlToken.STRING, IdlToken.BYTE_STRING, IdlToken.IDENTIFIER); + if (token == IdlToken.RBRACE) { break; } diff --git a/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlStringLexer.java b/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlStringLexer.java index eba1a2fda28..e59d0fb467c 100644 --- a/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlStringLexer.java +++ b/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlStringLexer.java @@ -4,16 +4,28 @@ */ package software.amazon.smithy.model.loader; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; +import java.util.Map; +import software.amazon.smithy.utils.MapUtils; final class IdlStringLexer { - private IdlStringLexer() {} + private static final Map STANDARD_ESCAPE_CHARACTERS = MapUtils.ofEntries( + MapUtils.entry('"', '"'), + MapUtils.entry('\\', '\\'), + MapUtils.entry('/', '/'), + MapUtils.entry('b', '\b'), + MapUtils.entry('f', '\f'), + MapUtils.entry('n', '\n'), + MapUtils.entry('r', '\r'), + MapUtils.entry('t', '\t')); - private enum State { - NORMAL, AFTER_ESCAPE, UNICODE - } + private IdlStringLexer() {} // Use the original lexeme of a string when possible, but creates a new string when escapes are used. private static final class StringBuilderProxy { @@ -57,88 +69,135 @@ static CharSequence scanStringContents(CharSequence lexeme, boolean scanningText //StringBuilder result = new StringBuilder(lexeme.length()); StringBuilderProxy result = new StringBuilderProxy(lexeme); - State state = State.NORMAL; - int hexCount = 0; - int unicode = 0; - - // Skip quotes from the start and end. for (int i = 0; i < lexeme.length(); i++) { char c = lexeme.charAt(i); - switch (state) { - case NORMAL: - if (c == '\\') { - state = State.AFTER_ESCAPE; - result.capture(); - } else if (isValidNormalCharacter(c, scanningTextBlock)) { - result.append(c); - } else { - throw new RuntimeException("Invalid string character: `" + c + "`"); - } - break; - case AFTER_ESCAPE: - state = State.NORMAL; - switch (c) { - case '"': - result.append('"'); - continue; - case '\\': - result.append('\\'); - break; - case '/': - result.append('/'); - break; - case 'b': - result.append('\b'); - break; - case 'f': - result.append('\f'); - break; - case 'n': - result.append('\n'); - break; - case 'r': - result.append('\r'); - break; - case 't': - result.append('\t'); - break; - case 'u': - state = State.UNICODE; - break; - case '\n': - // Skip writing the escaped new line. - break; - default: - throw new RuntimeException("Invalid escape found in string: `\\" + c + "`"); + + if (c == '\\') { + result.capture(); + + // Next character is guaranteed to exist + i += 1; + char escapeChar = lexeme.charAt(i); + + if (STANDARD_ESCAPE_CHARACTERS.containsKey(escapeChar)) { + result.append(STANDARD_ESCAPE_CHARACTERS.get(escapeChar)); + continue; + } + + switch (escapeChar) { + case 'u': + if (i + 5 > lexeme.length()) { + throw new RuntimeException("Invalid unclosed unicode escape found in string"); + } + + result.append((char) parseHex(lexeme, i + 1, i + 5)); + i += 4; + break; + + case '\n': + // Skip writing the escaped new line. + break; + + default: + throw new RuntimeException("Invalid escape found in string: `\\" + escapeChar + "`"); + } + + } else if (isValidNormalCharacter(c, scanningTextBlock)) { + result.append(c); + } else { + throw new RuntimeException("Invalid string character: `" + c + "`"); + } + } + + return result.getResult(); + } + + static byte[] scanByteStringContents(CharSequence lexeme, boolean scanningByteTextBlock) { + lexeme = normalizeLineEndings(lexeme); + + // Format the byte text block and remove incidental whitespace. + if (scanningByteTextBlock) { + lexeme = formatTextBlock(lexeme); + } + + ByteArrayOutputStream result = new ByteArrayOutputStream(lexeme.length()); + try (OutputStreamWriter resultWriter = new OutputStreamWriter(result, StandardCharsets.UTF_8)) { + + // The lexeme length will be close to the required final byte array length unless it contains a significant + // portion of Unicode codepoints. + int spanStart = 0; + + for (int i = 0; i < lexeme.length(); i++) { + char c = lexeme.charAt(i); + + if (c == '\\') { + // Delay encoding a standard span until an escaped value is encountered + if (spanStart != i) { + resultWriter.append(lexeme, spanStart, i); } - break; - case UNICODE: - if (c >= '0' && c <= '9') { - unicode = (unicode << 4) | (c - '0'); - } else if (c >= 'a' && c <= 'f') { - unicode = (unicode << 4) | (10 + c - 'a'); - } else if (c >= 'A' && c <= 'F') { - unicode = (unicode << 4) | (10 + c - 'A'); + + // Next character is guaranteed to exist + i += 1; + char escapeChar = lexeme.charAt(i); + + if (STANDARD_ESCAPE_CHARACTERS.containsKey(escapeChar)) { + resultWriter.append(STANDARD_ESCAPE_CHARACTERS.get(escapeChar)); + } else { - throw new RuntimeException("Invalid unicode escape character: `" + c + "`"); - } + switch (escapeChar) { + case 'u': + if (i + 5 > lexeme.length()) { + throw new RuntimeException("Invalid unclosed unicode escape found in string"); + } + + resultWriter.append((char) parseHex(lexeme, i + 1, i + 5)); + + i += 4; + break; + + case '0': + // Flush writer prior to attempting to write bytes to the underlying stream + resultWriter.flush(); - if (++hexCount == 4) { - result.append((char) unicode); - hexCount = 0; - state = State.NORMAL; + result.write((byte) 0); + break; + + case 'x': + if (i + 3 > lexeme.length()) { + throw new RuntimeException("Invalid unclosed hex escape found in string"); + } + + // Flush writer prior to attempting to write bytes to the underlying stream + resultWriter.flush(); + + result.write((byte) parseHex(lexeme, i + 1, i + 3)); + i += 2; + break; + + case '\n': + // Skip writing the escaped new line. + break; + + default: + throw new RuntimeException("Invalid escape found in string: `\\" + escapeChar + "`"); + } } - break; - default: - throw new IllegalStateException("Unreachable"); + + spanStart = i + 1; + + } else if (!isValidNormalCharacter(c, scanningByteTextBlock)) { + throw new RuntimeException("Invalid string character: `" + c + "`"); + } } - } - if (state == State.UNICODE) { - throw new RuntimeException("Invalid unclosed unicode escape found in string"); + if (spanStart != lexeme.length()) { + resultWriter.append(lexeme, spanStart, lexeme.length()); + } + } catch (IOException e) { + throw new IllegalStateException("Unreachable", e); } - return result.getResult(); + return result.toByteArray(); } // New lines in strings are normalized from CR (u000D) and CRLF (u000Du000A) to @@ -279,8 +338,37 @@ private static boolean isValidNormalCharacter(char c, boolean isTextBlock) { || c == '\n' || c == '\r' || (c >= 0x20 && c <= 0x21) // space - "!" - || (isTextBlock && c == 0x22) // DQUOTE is allowed in text_block + || (isTextBlock && c == 0x22) // DQUOTE is allowed in text_block and byte_text_block || (c >= 0x23 && c <= 0x5b) // "#" - "[" || c >= 0x5d; // "]"+ } + + /** + * Parses up to 8 hex characters, passing a subsequence larger than 8 characters will result in incorrect output + * + * @param lexeme source character sequence + * @param start position in the sequence to start at + * @param end position in the sequence to stop at + * @return the parsed hex value + */ + private static int parseHex(CharSequence lexeme, int start, int end) { + int hex = 0; + + for (int i = start; i < end; i++) { + char c = lexeme.charAt(i); + hex <<= 4; + + if (c >= '0' && c <= '9') { + hex |= c - '0'; + } else if (c >= 'a' && c <= 'f') { + hex |= 10 + c - 'a'; + } else if (c >= 'A' && c <= 'F') { + hex |= 10 + c - 'A'; + } else { + throw new RuntimeException("Invalid hex character: `" + c + "`"); + } + } + + return hex; + } } diff --git a/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlToken.java b/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlToken.java index bdbfcac1b88..d69c11cc436 100644 --- a/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlToken.java +++ b/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlToken.java @@ -39,7 +39,9 @@ public boolean isWhitespace() { DOC_COMMENT("///"), AT("@"), STRING("\""), + BYTE_STRING("b\""), TEXT_BLOCK("\"\"\""), + BYTE_TEXT_BLOCK("b\"\"\""), COLON(":"), WALRUS(":="), IDENTIFIER(""), diff --git a/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlTokenizer.java b/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlTokenizer.java index 590f58649e3..7c95f7a18ba 100644 --- a/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlTokenizer.java +++ b/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlTokenizer.java @@ -151,6 +151,14 @@ default CharSequence getCurrentTokenLexeme() { */ CharSequence getCurrentTokenStringSlice(); + /** + * If the current token is a byte string or byte text block, get the parsed content as a byte array. + * + * @return Returns the byte array associated with the current token. + * @throws ModelSyntaxException if the current token is not a byte string or byte text block. + */ + byte[] getCurrentTokenBytes(); + /** * If the current token is a number, get the associated parsed number. * diff --git a/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlTraitParser.java b/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlTraitParser.java index 5e077d16ccf..2f1efb470fe 100644 --- a/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlTraitParser.java +++ b/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlTraitParser.java @@ -5,6 +5,7 @@ package software.amazon.smithy.model.loader; import java.util.ArrayList; +import java.util.Base64; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -158,29 +159,53 @@ private static Node parseTraitValueBody(IdlModelLoader loader, SourceLocation lo tokenizer.expect(IdlToken.LBRACE, IdlToken.LBRACKET, IdlToken.TEXT_BLOCK, + IdlToken.BYTE_TEXT_BLOCK, IdlToken.STRING, + IdlToken.BYTE_STRING, IdlToken.NUMBER, IdlToken.IDENTIFIER); switch (tokenizer.getCurrentToken()) { case LBRACE: - case LBRACKET: + case LBRACKET: { Node result = IdlNodeParser.expectAndSkipNode(loader, location); tokenizer.skipWsAndDocs(); return result; - case TEXT_BLOCK: - Node textBlockResult = new StringNode(tokenizer.getCurrentTokenStringSlice().toString(), location); + } + case TEXT_BLOCK: { + String value = tokenizer.getCurrentTokenStringSlice().toString(); tokenizer.next(); tokenizer.skipWsAndDocs(); - return textBlockResult; - case NUMBER: - Number number = tokenizer.getCurrentTokenNumberValue(); + return new StringNode(value, location); + } + case BYTE_TEXT_BLOCK: { + String value = Base64.getEncoder().encodeToString(tokenizer.getCurrentTokenBytes()); + tokenizer.next(); + tokenizer.skipWsAndDocs(); + return new StringNode(value, location); + } + case NUMBER: { + Number value = tokenizer.getCurrentTokenNumberValue(); tokenizer.next(); tokenizer.skipWsAndDocs(); - return new NumberNode(number, location); - case STRING: - String stringValue = tokenizer.getCurrentTokenStringSlice().toString(); - StringNode stringNode = new StringNode(stringValue, location); + return new NumberNode(value, location); + } + case STRING: { + String value = tokenizer.getCurrentTokenStringSlice().toString(); + StringNode stringNode = new StringNode(value, location); + tokenizer.next(); + tokenizer.skipWsAndDocs(); + if (tokenizer.getCurrentToken() == IdlToken.COLON) { + tokenizer.next(); + tokenizer.skipWsAndDocs(); + return parseStructuredTrait(loader, stringNode); + } else { + return stringNode; + } + } + case BYTE_STRING: { + String value = Base64.getEncoder().encodeToString(tokenizer.getCurrentTokenBytes()); + StringNode stringNode = new StringNode(value, location); tokenizer.next(); tokenizer.skipWsAndDocs(); if (tokenizer.getCurrentToken() == IdlToken.COLON) { @@ -190,8 +215,9 @@ private static Node parseTraitValueBody(IdlModelLoader loader, SourceLocation lo } else { return stringNode; } + } case IDENTIFIER: - default: + default: { // Handle: `foo`, `foo$bar`, `foo.bar#baz`, `foo.bar#baz$bam`, `foo: bam` String identifier = loader.internString(IdlShapeIdParser.expectAndSkipShapeId(tokenizer)); tokenizer.skipWsAndDocs(); @@ -203,6 +229,7 @@ private static Node parseTraitValueBody(IdlModelLoader loader, SourceLocation lo tokenizer.skipWsAndDocs(); return parseStructuredTrait(loader, new StringNode(identifier, location)); } + } } } @@ -228,7 +255,7 @@ private static ObjectNode parseStructuredTrait(IdlModelLoader loader, StringNode tokenizer.skipWsAndDocs(); while (tokenizer.getCurrentToken() != IdlToken.RPAREN) { - tokenizer.expect(IdlToken.IDENTIFIER, IdlToken.STRING); + tokenizer.expect(IdlToken.IDENTIFIER, IdlToken.STRING, IdlToken.BYTE_STRING); String key = loader.internString(tokenizer.getCurrentTokenStringSlice()); StringNode keyNode = new StringNode(key, tokenizer.getCurrentTokenLocation()); tokenizer.next(); diff --git a/smithy-model/src/test/java/software/amazon/smithy/model/loader/IdlInternalTokenizerTest.java b/smithy-model/src/test/java/software/amazon/smithy/model/loader/IdlInternalTokenizerTest.java index daed768697c..725561183d4 100644 --- a/smithy-model/src/test/java/software/amazon/smithy/model/loader/IdlInternalTokenizerTest.java +++ b/smithy-model/src/test/java/software/amazon/smithy/model/loader/IdlInternalTokenizerTest.java @@ -10,6 +10,7 @@ import static org.hamcrest.Matchers.nullValue; import static org.hamcrest.Matchers.startsWith; +import java.nio.charset.StandardCharsets; import java.util.stream.Stream; import org.hamcrest.MatcherAssert; import org.junit.jupiter.api.Assertions; @@ -246,4 +247,71 @@ public void parsesTextBlocks(String model, String stringValue) { assertThat(tokenizer.getCurrentTokenStringSlice().toString(), equalTo(stringValue)); } + + public static Stream byteTextBlockTests() { + return Stream.of( + Arguments.of( + "b\"\"\"\n" + + " Hello\n" + + " - Indented\"\"\"\n", + "Hello\n - Indented"), + Arguments.of( + "b\"\"\"\n" + + " Hello\n" + + " - Indented\n" + + " \"\"\"\n", + "Hello\n - Indented\n"), + Arguments.of( + "b\"\"\"\n" + + " Hello\n" + + " - Indented\n" + + "\"\"\"\n", + " Hello\n - Indented\n"), + Arguments.of( + "b\"\"\"\n" + + " Hello\"\"\"\n", + "Hello"), + Arguments.of( + "b\"\"\"\n" + + " Hello\n" + + "\n" + + " - Indented\n" + + "\"\"\"\n", + " Hello\n\n - Indented\n"), + Arguments.of( + "b\"\"\"\n" + + " \n" // only WS doesn't influence line length calculations. + + " Hello\n" + + " \n" // only WS doesn't influence line length calculations. + + " \"\"\"", + "\nHello\n\n"), + Arguments.of( + "b\"\"\"\n" + + "\n" // empty lines are incidental whitespace. + + " Hello\n" + + " \n" // only WS doesn't influence line length calculations. + + " \"\"\"", + "\nHello\n\n"), + Arguments.of( + "b\"\"\"\n" + + "\n" // empty lines are incidental whitespace. + + "Hello\n" + + "\n" + + "\n" + + "\"\"\"", + "\nHello\n\n\n"), + Arguments.of( + "b\"\"\"\n" + + "\"\"\"", + "")); + } + + @ParameterizedTest + @MethodSource("byteTextBlockTests") + public void parsesByteTextBlocks(String model, String stringValue) { + IdlInternalTokenizer tokenizer = new IdlInternalTokenizer("a.smithy", model); + tokenizer.expect(IdlToken.BYTE_TEXT_BLOCK); + + assertThat(tokenizer.getCurrentTokenBytes(), equalTo(stringValue.getBytes(StandardCharsets.UTF_8))); + } } diff --git a/smithy-model/src/test/java/software/amazon/smithy/model/loader/TokenizerTest.java b/smithy-model/src/test/java/software/amazon/smithy/model/loader/TokenizerTest.java index 338d1a210b7..43ef83c9e56 100644 --- a/smithy-model/src/test/java/software/amazon/smithy/model/loader/TokenizerTest.java +++ b/smithy-model/src/test/java/software/amazon/smithy/model/loader/TokenizerTest.java @@ -8,6 +8,7 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; +import java.nio.charset.StandardCharsets; import java.util.NoSuchElementException; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -214,6 +215,30 @@ public void storesCurrentTokenString() { assertThat(tokenizer.getCurrentTokenStringSlice().toString(), equalTo("hello")); } + @Test + public void storesCurrentTokenByteString() { + IdlTokenizer tokenizer = IdlTokenizer.create("b\"hello\""); + + assertThat(tokenizer.next(), is(IdlToken.BYTE_STRING)); + assertThat(tokenizer.getCurrentTokenBytes(), equalTo("hello".getBytes(StandardCharsets.UTF_8))); + } + + @Test + public void storesCurrentTokenTextBlock() { + IdlTokenizer tokenizer = IdlTokenizer.create("\"\"\"\nhello\"\"\""); + + assertThat(tokenizer.next(), is(IdlToken.TEXT_BLOCK)); + assertThat(tokenizer.getCurrentTokenStringSlice().toString(), equalTo("hello")); + } + + @Test + public void storesCurrentTokenByteTextBlock() { + IdlTokenizer tokenizer = IdlTokenizer.create("b\"\"\"\nhello\"\"\""); + + assertThat(tokenizer.next(), is(IdlToken.BYTE_TEXT_BLOCK)); + assertThat(tokenizer.getCurrentTokenBytes(), equalTo("hello".getBytes(StandardCharsets.UTF_8))); + } + @Test public void storesCurrentTokenStringForIdentifier() { IdlTokenizer tokenizer = IdlTokenizer.create("hello"); @@ -269,4 +294,33 @@ public void tokenizesEmptyStrings() { assertThat(tokenizer.getCurrentTokenLexeme().toString(), equalTo("\"\"")); assertThat(tokenizer.getCurrentTokenSpan(), is(2)); } + + @Test + public void tokenizesByteStringWithNewlines() { + IdlTokenizer tokenizer = IdlTokenizer.create("b\"hi\nthere\""); + + tokenizer.next(); + + assertThat(tokenizer.getCurrentToken(), is(IdlToken.BYTE_STRING)); + assertThat(tokenizer.getCurrentTokenBytes(), equalTo("hi\nthere".getBytes(StandardCharsets.UTF_8))); + assertThat(tokenizer.getCurrentTokenLexeme().toString(), equalTo("b\"hi\nthere\"")); + assertThat(tokenizer.getCurrentTokenSpan(), is(11)); + + tokenizer.next(); + assertThat(tokenizer.getCurrentToken(), is(IdlToken.EOF)); + assertThat(tokenizer.getCurrentTokenLine(), is(2)); + assertThat(tokenizer.getCurrentTokenColumn(), is(7)); + } + + @Test + public void tokenizesEmptyByteStrings() { + IdlTokenizer tokenizer = IdlTokenizer.create("b\"\""); + + tokenizer.next(); + + assertThat(tokenizer.getCurrentToken(), is(IdlToken.BYTE_STRING)); + assertThat(tokenizer.getCurrentTokenBytes(), equalTo(new byte[0])); + assertThat(tokenizer.getCurrentTokenLexeme().toString(), equalTo("b\"\"")); + assertThat(tokenizer.getCurrentTokenSpan(), is(3)); + } } diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-escape-apostrophe.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-escape-apostrophe.smithy new file mode 100644 index 00000000000..430aafcad68 --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-escape-apostrophe.smithy @@ -0,0 +1,5 @@ +// Syntax error at line 4, column 16: Error parsing byte string: Invalid escape found in string: `\'` | Model +namespace smithy.example + +@documentation(b"\'") +string MyString diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-escape-space.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-escape-space.smithy new file mode 100644 index 00000000000..76f38186459 --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-escape-space.smithy @@ -0,0 +1,5 @@ +// Syntax error at line 4, column 16: Error parsing byte string: Invalid escape found in string: `\ ` | Model +namespace smithy.example + +@documentation(b"\ ") +string MyString diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-hex-escape.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-hex-escape.smithy new file mode 100644 index 00000000000..e85c9ca4159 --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-hex-escape.smithy @@ -0,0 +1,5 @@ +// Syntax error at line 4, column 16: Error parsing byte string: Invalid unclosed hex escape found in string | Model +namespace smithy.example + +@documentation(b"\x0") +string MyString diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-hex-escape2.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-hex-escape2.smithy new file mode 100644 index 00000000000..4d355c33fe9 --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-hex-escape2.smithy @@ -0,0 +1,5 @@ +// Syntax error at line 4, column 16: Error parsing byte string: Invalid hex character: `t` | Model +namespace smithy.example + +@documentation(b"\xt0") +string MyString diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-unicode-escape.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-unicode-escape.smithy new file mode 100644 index 00000000000..e8414ab3ead --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-unicode-escape.smithy @@ -0,0 +1,5 @@ +// Syntax error at line 4, column 16: Error parsing byte string: Invalid unclosed unicode escape found in string | Model +namespace smithy.example + +@documentation(b"\ua") +string MyString diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-unicode-escape2.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-unicode-escape2.smithy new file mode 100644 index 00000000000..f6bdda0ff1a --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-unicode-escape2.smithy @@ -0,0 +1,5 @@ +// Syntax error at line 4, column 16: Error parsing byte string: Invalid unclosed unicode escape found in string | Model +namespace smithy.example + +@documentation(b"\uaa") +string MyString diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-unicode-escape3.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-unicode-escape3.smithy new file mode 100644 index 00000000000..fca62f1746b --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-unicode-escape3.smithy @@ -0,0 +1,5 @@ +// Syntax error at line 4, column 16: Error parsing byte string: Invalid unclosed unicode escape found in string | Model +namespace smithy.example + +@documentation(b"\uaaa") +string MyString diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-unicode-escape4.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-unicode-escape4.smithy new file mode 100644 index 00000000000..25e7ff3faca --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-unicode-escape4.smithy @@ -0,0 +1,5 @@ +// Syntax error at line 4, column 16: Error parsing byte string: Invalid hex character: `t` | Model +namespace smithy.example + +@documentation(b"\uaaat") +string MyString diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-unicode-escape5.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-unicode-escape5.smithy new file mode 100644 index 00000000000..81f18f5cbae --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/invalid-unicode-escape5.smithy @@ -0,0 +1,5 @@ +// Syntax error at line 4, column 16: Error parsing byte string: Invalid hex character: `t` | Model +namespace smithy.example + +@documentation(b"\uataa") +string MyString diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/text-block-empty.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/text-block-empty.smithy new file mode 100644 index 00000000000..4a9375761bd --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/text-block-empty.smithy @@ -0,0 +1,5 @@ +// Syntax error at line 4, column 16: Error parsing byte text block: Text block is empty | Model +namespace smithy.example + +@documentation(b"""""") +string MyString diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/text-block-invalid-unicode.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/text-block-invalid-unicode.smithy new file mode 100644 index 00000000000..bab43ca707a --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/text-block-invalid-unicode.smithy @@ -0,0 +1,6 @@ +// Syntax error at line 4, column 16: Error parsing byte text block: Invalid unclosed unicode escape found in string | Model +namespace smithy.example + +@documentation(b""" + \ua""") +string MyString diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/text-block-missing-newline.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/text-block-missing-newline.smithy new file mode 100644 index 00000000000..970f8f2db3a --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/text-block-missing-newline.smithy @@ -0,0 +1,5 @@ +// Syntax error at line 4, column 16: Error parsing byte text block: Text block must start with a new line | Model +namespace smithy.example + +@documentation(b"""Hi""") +string MyString diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/unclosed-string.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/unclosed-string.smithy new file mode 100644 index 00000000000..048911cc721 --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/unclosed-string.smithy @@ -0,0 +1,5 @@ +// Syntax error at line 6, column 1: Expected: '"', but found '[EOF]' +namespace smithy.example + +@documentation(b") +string MyString diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/unclosed-string2.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/unclosed-string2.smithy new file mode 100644 index 00000000000..a10aac0eaeb --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/unclosed-string2.smithy @@ -0,0 +1,5 @@ +// Syntax error at line 6, column 1: Expected: '"', but found '[EOF]' +namespace smithy.example + +@documentation(b"\") +string MyString diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/unclosed-string3.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/unclosed-string3.smithy new file mode 100644 index 00000000000..9a4dc829afc --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/unclosed-string3.smithy @@ -0,0 +1,5 @@ +// Syntax error at line 6, column 1: Expected: '"', but found '[EOF]' +namespace smithy.example + +@documentation(b"\\\") +string MyString diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/unclosed-text-block.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/unclosed-text-block.smithy new file mode 100644 index 00000000000..f88971d1692 --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/byte-strings/unclosed-text-block.smithy @@ -0,0 +1,6 @@ +// Syntax error at line 7, column 1: Expected: '"', but found '[EOF]' +namespace smithy.example + +@documentation(b""" +) +string MyString diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/control/control-with-invalid-byte-string-key.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/control/control-with-invalid-byte-string-key.smithy new file mode 100644 index 00000000000..76cf1e33e49 --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/control/control-with-invalid-byte-string-key.smithy @@ -0,0 +1,2 @@ +// Syntax error at line 2, column 2: Expected one of IDENTIFIER, STRING('"'); but found BYTE_STRING('b"foo"') | Model +$b"foo": "1.0" diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/control/control-with-invalid-byte-text-block-key.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/control/control-with-invalid-byte-text-block-key.smithy new file mode 100644 index 00000000000..7632daa3f97 --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/control/control-with-invalid-byte-text-block-key.smithy @@ -0,0 +1,3 @@ +// Syntax error at line 2, column 2: Expected one of IDENTIFIER, STRING('"'); but found BYTE_TEXT_BLOCK('b"""\nfoo"""') | Model +$b""" +foo""": "1.0" diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/control/control-with-invalid-key2.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/control/control-with-invalid-identifieir-key2.smithy similarity index 100% rename from smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/control/control-with-invalid-key2.smithy rename to smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/control/control-with-invalid-identifieir-key2.smithy diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/control/control-with-invalid-key.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/control/control-with-invalid-identifier-key.smithy similarity index 100% rename from smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/control/control-with-invalid-key.smithy rename to smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/control/control-with-invalid-identifier-key.smithy diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/control/control-with-invalid-text-block-key.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/control/control-with-invalid-text-block-key.smithy new file mode 100644 index 00000000000..167974e00c5 --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/control/control-with-invalid-text-block-key.smithy @@ -0,0 +1,3 @@ +// Syntax error at line 2, column 2: Expected one of IDENTIFIER, STRING('"'); but found TEXT_BLOCK('"""\nfoo"""') | Model +$""" +foo""": "1.0" diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/defaults/default-incomplete-node.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/defaults/default-incomplete-node.smithy index 2a9d91db3e9..fce4b48bd23 100644 --- a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/defaults/default-incomplete-node.smithy +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/defaults/default-incomplete-node.smithy @@ -1,4 +1,4 @@ -// Syntax error at line 8, column 1: Expected one of STRING('"'), TEXT_BLOCK('"""'), NUMBER, IDENTIFIER, LBRACE('{'), LBRACKET('['); but found EOF | Model +// Syntax error at line 8, column 1: Expected one of STRING('"'), BYTE_STRING('b"'), TEXT_BLOCK('"""'), BYTE_TEXT_BLOCK('b"""'), NUMBER, IDENTIFIER, LBRACE('{'), LBRACKET('['); but found EOF | Model $version: "2.0" namespace com.foo diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/metadata/invalid-object-text-block-key.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/metadata/invalid-object-text-block-key.smithy index 5ecb7af8fc7..4f73d301142 100644 --- a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/metadata/invalid-object-text-block-key.smithy +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/metadata/invalid-object-text-block-key.smithy @@ -1,4 +1,4 @@ -// Syntax error at line 3, column 3: Expected one of RBRACE('}'), STRING('"'), IDENTIFIER; but found TEXT_BLOCK('"""\n Key"""') | Model +// Syntax error at line 3, column 3: Expected one of RBRACE('}'), STRING('"'), BYTE_STRING('b"'), IDENTIFIER; but found TEXT_BLOCK('"""\n Key"""') | Model metadata foo = { """ Key""": "hello" diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/metadata/metadata-multiple-lines.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/metadata/metadata-multiple-lines.smithy index c4853f8b8ff..4a7aa33b19d 100644 --- a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/metadata/metadata-multiple-lines.smithy +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/metadata/metadata-multiple-lines.smithy @@ -1,4 +1,4 @@ -// Syntax error at line 3, column 16: Expected one of STRING('"'), TEXT_BLOCK('"""'), NUMBER, IDENTIFIER, LBRACE('{'), LBRACKET('['); but found COMMENT('// this is not allowed\n') | Model +// Syntax error at line 3, column 16: Expected one of STRING('"'), BYTE_STRING('b"'), TEXT_BLOCK('"""'), BYTE_TEXT_BLOCK('b"""'), NUMBER, IDENTIFIER, LBRACE('{'), LBRACKET('['); but found COMMENT('// this is not allowed\n') | Model $version: "2.0" metadata foo = // this is not allowed "bar" diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/metadata/metadata-with-byte-string-key.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/metadata/metadata-with-byte-string-key.smithy new file mode 100644 index 00000000000..9d73f677b7c --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/metadata/metadata-with-byte-string-key.smithy @@ -0,0 +1,2 @@ +// Syntax error at line 2, column 10: Expected one of IDENTIFIER, STRING('"'); but found BYTE_STRING('b"foo"') | Model +metadata b"foo" = "baz" diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/metadata/metadata-with-byte-text-block-key.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/metadata/metadata-with-byte-text-block-key.smithy new file mode 100644 index 00000000000..98ea10eec59 --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/metadata/metadata-with-byte-text-block-key.smithy @@ -0,0 +1,3 @@ +// Syntax error at line 2, column 10: Expected one of IDENTIFIER, STRING('"'); but found BYTE_TEXT_BLOCK('b"""\nfoo"""') | Model +metadata b""" +foo""" = "baz" diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/metadata/metadata-with-invalid-key.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/metadata/metadata-with-invalid-identifier-key.smithy similarity index 100% rename from smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/metadata/metadata-with-invalid-key.smithy rename to smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/metadata/metadata-with-invalid-identifier-key.smithy diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/metadata/metadata-with-text-block-key.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/metadata/metadata-with-text-block-key.smithy new file mode 100644 index 00000000000..0dd44487c6a --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/metadata/metadata-with-text-block-key.smithy @@ -0,0 +1,3 @@ +// Syntax error at line 2, column 10: Expected one of IDENTIFIER, STRING('"'); but found TEXT_BLOCK('"""\nfoo"""') | Model +metadata """ +foo""" = "baz" diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/numbers/leading-decimal.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/numbers/leading-decimal.smithy index 19556cdbb33..335a668448f 100644 --- a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/numbers/leading-decimal.smithy +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/numbers/leading-decimal.smithy @@ -1,4 +1,4 @@ -// Syntax error at line 7, column 7: Expected one of LBRACE('{'), LBRACKET('['), TEXT_BLOCK('"""'), STRING('"'), NUMBER, IDENTIFIER; but found DOT('.') | Model +// Syntax error at line 7, column 7: Expected one of LBRACE('{'), LBRACKET('['), TEXT_BLOCK('"""'), BYTE_TEXT_BLOCK('b"""'), STRING('"'), BYTE_STRING('b"'), NUMBER, IDENTIFIER; but found DOT('.') | Model namespace smithy.example @trait diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/strings/invalid-escape-hex.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/strings/invalid-escape-hex.smithy new file mode 100644 index 00000000000..f4955d21f9a --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/strings/invalid-escape-hex.smithy @@ -0,0 +1,5 @@ +// Syntax error at line 4, column 16: Error parsing quoted string: Invalid escape found in string: `\x` | Model +namespace smithy.example + +@documentation("\x00") +string MyString diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/strings/invalid-unicode-escape4.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/strings/invalid-unicode-escape4.smithy index 0b2e9770b0e..99f23c782df 100644 --- a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/strings/invalid-unicode-escape4.smithy +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/strings/invalid-unicode-escape4.smithy @@ -1,4 +1,4 @@ -// Syntax error at line 4, column 16: Error parsing quoted string: Invalid unicode escape character: `t` | Model +// Syntax error at line 4, column 16: Error parsing quoted string: Invalid hex character: `t` | Model namespace smithy.example @documentation("\uaaat") diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/strings/invalid-unicode-escape5.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/strings/invalid-unicode-escape5.smithy index 98de13ddb08..b0fca656f69 100644 --- a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/strings/invalid-unicode-escape5.smithy +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/strings/invalid-unicode-escape5.smithy @@ -1,4 +1,4 @@ -// Syntax error at line 4, column 16: Error parsing quoted string: Invalid unicode escape character: `t` | Model +// Syntax error at line 4, column 16: Error parsing quoted string: Invalid hex character: `t` | Model namespace smithy.example @documentation("\uataa") diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/traits/annotation-invalid-object-key3.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/traits/annotation-invalid-object-key3.smithy index 4f960f34a80..965e044d6e3 100644 --- a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/traits/annotation-invalid-object-key3.smithy +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/traits/annotation-invalid-object-key3.smithy @@ -1,4 +1,4 @@ -// Syntax error at line 3, column 17: Expected one of IDENTIFIER, STRING('"'); but found LBRACE('{') | Model +// Syntax error at line 3, column 17: Expected one of IDENTIFIER, STRING('"'), BYTE_STRING('b"'); but found LBRACE('{') | Model namespace com.foo @foo(foo: true, {}: true) string MyString diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/traits/annotation-unclosed-list1.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/traits/annotation-unclosed-list1.smithy index ca03d073f7f..782941f0180 100644 --- a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/traits/annotation-unclosed-list1.smithy +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/traits/annotation-unclosed-list1.smithy @@ -1,4 +1,4 @@ -// Syntax error at line 5, column 1: Expected one of STRING('"'), TEXT_BLOCK('"""'), NUMBER, IDENTIFIER, LBRACE('{'), LBRACKET('['); but found EOF | Model +// Syntax error at line 5, column 1: Expected one of STRING('"'), BYTE_STRING('b"'), TEXT_BLOCK('"""'), BYTE_TEXT_BLOCK('b"""'), NUMBER, IDENTIFIER, LBRACE('{'), LBRACKET('['); but found EOF | Model namespace com.foo @tags(["foo", "bar" string MyString diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/traits/annotation-unclosed-list2.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/traits/annotation-unclosed-list2.smithy index 3c29b82cac4..2543caa3405 100644 --- a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/traits/annotation-unclosed-list2.smithy +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/traits/annotation-unclosed-list2.smithy @@ -1,4 +1,4 @@ -// Syntax error at line 5, column 1: Expected one of STRING('"'), TEXT_BLOCK('"""'), NUMBER, IDENTIFIER, LBRACE('{'), LBRACKET('['); but found EOF | Model +// Syntax error at line 5, column 1: Expected one of STRING('"'), BYTE_STRING('b"'), TEXT_BLOCK('"""'), BYTE_TEXT_BLOCK('b"""'), NUMBER, IDENTIFIER, LBRACE('{'), LBRACKET('['); but found EOF | Model namespace com.foo @tags(["foo", "bar", string MyString diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/traits/annotation-unclosed-list3.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/traits/annotation-unclosed-list3.smithy index 5b8468d8478..dd95e00b5e5 100644 --- a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/traits/annotation-unclosed-list3.smithy +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/traits/annotation-unclosed-list3.smithy @@ -1,4 +1,4 @@ -// Syntax error at line 5, column 1: Expected one of STRING('"'), TEXT_BLOCK('"""'), NUMBER, IDENTIFIER, LBRACE('{'), LBRACKET('['); but found EOF | Model +// Syntax error at line 5, column 1: Expected one of STRING('"'), BYTE_STRING('b"'), TEXT_BLOCK('"""'), BYTE_TEXT_BLOCK('b"""'), NUMBER, IDENTIFIER, LBRACE('{'), LBRACKET('['); but found EOF | Model namespace com.foo @tags([ string MyString diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/traits/annotation-unclosed-list4.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/traits/annotation-unclosed-list4.smithy index 3fffd8a6d07..5a9b9656bf5 100644 --- a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/traits/annotation-unclosed-list4.smithy +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/traits/annotation-unclosed-list4.smithy @@ -1,3 +1,3 @@ -// Syntax error at line 4, column 1: Expected one of STRING('"'), TEXT_BLOCK('"""'), NUMBER, IDENTIFIER, LBRACE('{'), LBRACKET('['); but found EOF | Model +// Syntax error at line 4, column 1: Expected one of STRING('"'), BYTE_STRING('b"'), TEXT_BLOCK('"""'), BYTE_TEXT_BLOCK('b"""'), NUMBER, IDENTIFIER, LBRACE('{'), LBRACKET('['); but found EOF | Model namespace com.foo @tags([[[[[[[[[[[[ diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/traits/invalid-trait-bytetextblock-key.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/traits/invalid-trait-bytetextblock-key.smithy new file mode 100644 index 00000000000..9ccff13cf1e --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/traits/invalid-trait-bytetextblock-key.smithy @@ -0,0 +1,7 @@ +// Syntax error at line 6, column 11: Expected RPAREN(')') but found COLON(':') | Model +namespace smithy.example + +@externalDocumentation( + b""" + Foo""": "bar") +string MyString diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/byte-text-blocks.json b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/byte-text-blocks.json new file mode 100644 index 00000000000..8db162e7ea8 --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/byte-text-blocks.json @@ -0,0 +1,77 @@ +{ + "smithy": "1.0", + "shapes": { + "smithy.example#DiscardLastLineOffset": { + "type": "string", + "traits": { + "smithy.api#documentation": "SGVsbG8KVGhlcmUKT2sK" + } + }, + "smithy.example#EmbeddedQuotesTextBlock": { + "type": "string", + "traits": { + "smithy.api#documentation": "IkhpIg==" + } + }, + "smithy.example#EmptyLinesAreIncidentalWs": { + "type": "string", + "traits": { + "smithy.api#documentation": "SGVsbG8KCnRoZXJlCg==" + } + }, + "smithy.example#EmptyString": { + "type": "string", + "traits": { + "smithy.api#documentation": "" + } + }, + "smithy.example#EscapedNewlineString": { + "type": "string", + "traits": { + "smithy.api#documentation": "SGkgdGhlcmUgYnll" + } + }, + "smithy.example#NewlineString": { + "type": "string", + "traits": { + "smithy.api#documentation": "Cg==" + } + }, + "smithy.example#NoTrailingNewlineString": { + "type": "string", + "traits": { + "smithy.api#documentation": "SGVsbG8=" + } + }, + "smithy.example#SingleCharacterTextBlock": { + "type": "string", + "traits": { + "smithy.api#documentation": "Zg==" + } + }, + "smithy.example#TrailingNewlineString": { + "type": "string", + "traits": { + "smithy.api#documentation": "SGVsbG8K" + } + }, + "smithy.example#WithHex": { + "type": "string", + "traits": { + "smithy.api#documentation": "SGkgdGhlcmUK" + } + }, + "smithy.example#WithNullByte": { + "type": "string", + "traits": { + "smithy.api#documentation": "SGkACg==" + } + }, + "smithy.example#WithUnicode": { + "type": "string", + "traits": { + "smithy.api#documentation": "SGkg7oGaLgo=" + } + } + } +} diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/byte-text-blocks.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/byte-text-blocks.smithy new file mode 100644 index 00000000000..5cf54327e5a --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/byte-text-blocks.smithy @@ -0,0 +1,64 @@ +namespace smithy.example + +@documentation(b""" + """) +string EmptyString + +@documentation(b""" + + """) +string NewlineString + +@documentation(b""" + Hello + """) +string TrailingNewlineString + +@documentation(b""" + Hello""") +string NoTrailingNewlineString + +@documentation(b""" + Hi \ + there \ + bye""") +string EscapedNewlineString + +@documentation(b""" + Hi \uE05A. + """) +string WithUnicode + +@documentation(b""" + Hi\x20there + """) +string WithHex + +@documentation(b""" + Hi\0 + """) +string WithNullByte + +// If the last line is offset to the right, it's discarded since it's all whitespace. +@documentation(b""" + Hello + There + Ok + """) +string DiscardLastLineOffset + +// Empty lines and lines with only ws do not contribute to incidental ws. +@documentation(b""" + Hello + + there + """) +string EmptyLinesAreIncidentalWs + +@documentation(b""" + f""") +string SingleCharacterTextBlock + +@documentation(b""" + "Hi\"""") +string EmbeddedQuotesTextBlock diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/defaults/valid-defaults.json b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/defaults/valid-defaults.json index 78594e2a20c..fd3b8393cff 100644 --- a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/defaults/valid-defaults.json +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/defaults/valid-defaults.json @@ -117,6 +117,12 @@ "traits": { "smithy.api#default": 0 } + }, + "t": { + "target": "smithy.api#Blob", + "traits": { + "smithy.api#default": "Zm9v" + } } } }, diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/defaults/valid-defaults.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/defaults/valid-defaults.smithy index 872e45e99e8..7e1ea615594 100644 --- a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/defaults/valid-defaults.smithy +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/defaults/valid-defaults.smithy @@ -22,6 +22,7 @@ structure Foo { q: StringMap = {} // comment r: BigInteger = 0 s: BigDecimal = 0 + t: Blob = b"foo" } list StringList { diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/trait-locations.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/trait-locations.smithy index a04e8b4dc6a..96ddc69f2ff 100644 --- a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/trait-locations.smithy +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/trait-locations.smithy @@ -14,6 +14,12 @@ document documentTrait @trait string stringTrait +@trait +string byteStringTrait + +@trait +string byteTextBlockTrait + @trait structure annotationTrait {} @@ -26,33 +32,37 @@ structure annotationTrait {} // variation. /// Documentation trait using the doc comment syntax -@annotationTrait // Annotation trait without parens -@internal() // Annotation trait with parens -@deprecated( // Structured trait with no braces +@annotationTrait // Annotation trait without parens +@internal() // Annotation trait with parens +@deprecated( // Structured trait with no braces since: "1.0" ) -@tags(["foo"]) // List trait -@unstable({}) // Structured trait with braces -@stringTrait("foo") // String trait using normal string syntax +@tags(["foo"]) // List trait +@unstable({}) // Structured trait with braces +@stringTrait("foo") // String trait using normal string syntax +@byteStringTrait(b"foo") // String trait using byte string syntax @since(""" - 0.9""") // String trait using block syntax -@numberTrait(1) // Number trait -@boolTrait(true) // Boolean trait -@documentTrait(null) // Null value trait + 0.9""") // String trait using block syntax +@byteTextBlockTrait(b"foo") // String trait using byte text block syntax +@numberTrait(1) // Number trait +@boolTrait(true) // Boolean trait +@documentTrait(null) // Null value trait structure TraitBearer { /// Documentation trait using the doc comment syntax - @annotationTrait // Annotation trait without parens - @internal() // Annotation trait with parens - @deprecated( // Structured trait with no braces + @annotationTrait // Annotation trait without parens + @internal() // Annotation trait with parens + @deprecated( // Structured trait with no braces since: "1.0" ) - @tags(["foo"]) // List trait - @unstable({}) // Structured trait with braces - @stringTrait("foo") // String trait using normal string syntax + @tags(["foo"]) // List trait + @unstable({}) // Structured trait with braces + @stringTrait("foo") // String trait using normal string syntax + @byteStringTrait(b"foo") // String trait using byte string syntax @since(""" - 0.9""") // String trait using block syntax - @numberTrait(1) // Number trait - @boolTrait(true) // Boolean trait - @documentTrait(null) // Null value trait + 0.9""") // String trait using block syntax + @byteTextBlockTrait(b"foo") // String trait using byte text block syntax + @numberTrait(1) // Number trait + @boolTrait(true) // Boolean trait + @documentTrait(null) // Null value trait member: String } diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/traits.json b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/traits.json index 0a1b75dc678..a0024513e18 100644 --- a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/traits.json +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/traits.json @@ -201,6 +201,24 @@ "com.example#bInteger": 9223372036854775808 } }, + "com.example#ZB": { + "type": "string", + "traits": { + "smithy.api#documentation": "SGVsbG8h" + } + }, + "com.example#ZC": { + "type": "string", + "traits": { + "smithy.api#documentation": "CkhlbGxvISBUaGlzIGlzIGEgdGVzdC4KCklnbm9yZSB0aGVzZSB0b2tlbnM6IHt9W10oKSw6LT4kdmVyc2lvbi8vPD4gKzEwIC0xMCA9CgpJcyBpdCB3b3JraW5nPyBJcyAiVGhpcyIgdGhlICdleHBlY3RlZCcgcmVzdWx0PwpJcyB0aGlzIGEgYmFja3NsYXNoPyAiXCIu" + } + }, + "com.example#ZD": { + "type": "string", + "traits": { + "smithy.api#documentation": "VGhpcyBpcyBhCnN0cmluZyBkZWZpbmVkIG9uIG11bHRpcGxlIGxpbmVzLgpJdCBjYW4gc3BhbgphCmdyZWF0Cm1hbnkKbGluZXMu" + } + }, "com.example#bDecimal": { "type": "bigDecimal", "traits": { diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/traits.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/traits.smithy index 6b7d1b6a1cf..0d5646aa34e 100644 --- a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/traits.smithy +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/traits.smithy @@ -29,6 +29,9 @@ string D @documentation("Hello!") string E +@documentation(b"Hello!") +string ZB + // Multiple lines and escapes @documentation(" Hello! This is a test. @@ -39,6 +42,15 @@ Is it working? Is \"This\" the 'expected' result? Is this a backslash? \"\\\".") string F +@documentation(b" +Hello! This is a test. + +Ignore these tokens: {}[](),:->$version//<> +10 -10 = + +Is it working? Is \"This\" the 'expected' result? +Is this a backslash? \"\\\".") +string ZC + // Unquoted string resolves to a shape ID @documentation(H) @@ -130,6 +142,18 @@ string T lines.""") string U +@documentation(b""" + This is a + string defined on multiple lines. + It \ + can \ + span + a + great + many + lines.""") +string ZD + apply E @deprecated @documentation("") diff --git a/smithy-syntax/src/main/java/software/amazon/smithy/syntax/BracketFormatter.java b/smithy-syntax/src/main/java/software/amazon/smithy/syntax/BracketFormatter.java index 59f99f849a2..958924a8487 100644 --- a/smithy-syntax/src/main/java/software/amazon/smithy/syntax/BracketFormatter.java +++ b/smithy-syntax/src/main/java/software/amazon/smithy/syntax/BracketFormatter.java @@ -98,11 +98,14 @@ private boolean hasHardLine(TreeCursor cursor) { List children = cursor.findChildrenByType( TreeType.COMMENT, TreeType.TEXT_BLOCK, + TreeType.BYTE_TEXT_BLOCK, TreeType.NODE_ARRAY, TreeType.NODE_OBJECT, - TreeType.QUOTED_TEXT); + TreeType.QUOTED_TEXT, + TreeType.BYTE_STRING); for (TreeCursor child : children) { - if (child.getTree().getType() != TreeType.QUOTED_TEXT) { + if (child.getTree().getType() != TreeType.QUOTED_TEXT + && child.getTree().getType() != TreeType.BYTE_STRING) { return true; } else if (child.getTree().getStartLine() != child.getTree().getEndLine()) { // Detect strings with line breaks. diff --git a/smithy-syntax/src/main/java/software/amazon/smithy/syntax/CapturedToken.java b/smithy-syntax/src/main/java/software/amazon/smithy/syntax/CapturedToken.java index 48a0904123a..f0dfc64a107 100644 --- a/smithy-syntax/src/main/java/software/amazon/smithy/syntax/CapturedToken.java +++ b/smithy-syntax/src/main/java/software/amazon/smithy/syntax/CapturedToken.java @@ -33,6 +33,7 @@ public final class CapturedToken implements FromSourceLocation, ToSmithyBuilder< private final int endColumn; private final CharSequence lexeme; private final String stringContents; + private final byte[] byteContents; private final String errorMessage; private final Number numberValue; @@ -46,6 +47,7 @@ private CapturedToken( int endColumn, CharSequence lexeme, String stringContents, + byte[] byteContents, Number numberValue, String errorMessage ) { @@ -57,9 +59,11 @@ private CapturedToken( this.startColumn = startColumn; this.endLine = endLine; this.endColumn = endColumn; + this.byteContents = byteContents; if (stringContents == null - && (token == IdlToken.IDENTIFIER || token == IdlToken.STRING || token == IdlToken.TEXT_BLOCK)) { + && (token == IdlToken.IDENTIFIER || token == IdlToken.STRING + || token == IdlToken.TEXT_BLOCK)) { this.stringContents = lexeme.toString(); } else { this.stringContents = stringContents; @@ -92,6 +96,7 @@ public static final class Builder implements SmithyBuilder { private int endColumn; private CharSequence lexeme; private String stringContents; + private byte[] byteContents; private String errorMessage; private Number numberValue; @@ -109,6 +114,7 @@ public CapturedToken build() { endColumn, lexeme, stringContents, + byteContents, numberValue, errorMessage); } @@ -158,6 +164,11 @@ public Builder stringContents(String stringContents) { return this; } + public Builder byteContents(byte[] byteContents) { + this.byteContents = byteContents; + return this; + } + public Builder errorMessage(String errorMessage) { this.errorMessage = errorMessage; return this; @@ -197,9 +208,13 @@ public static CapturedToken from(IdlTokenizer tokenizer, Function new RuntimeException("TEXT_BLOCK cursor does not have an IDL token")) + .orElseThrow(() -> new RuntimeException( + "TEXT_BLOCK or BYTE_TEXT_BLOCK cursor does not have an IDL token")) .getStringContents(); // If the last character is a newline, then the closing triple quote must be on the next line. boolean endQuoteOnNextLine = stringValue.endsWith("\n") || stringValue.endsWith("\r"); List resultLines = new ArrayList<>(); - resultLines.add(Doc.text("\"\"\"")); + if (treeType == TreeType.TEXT_BLOCK) { + resultLines.add(Doc.text("\"\"\"")); + } else { + resultLines.add(Doc.text("b\"\"\"")); + } String[] inputLines = stringValue.split("\\r?\\n", -1); for (int i = 0; i < inputLines.length; i++) { @@ -502,6 +509,7 @@ Doc visit(TreeCursor cursor) { case TOKEN: case QUOTED_TEXT: + case BYTE_STRING: case NUMBER: case SHAPE_ID: case ROOT_SHAPE_ID: @@ -703,7 +711,8 @@ private static Doc formatNodeObjectKvp( // have a NODE_VALUE child. TreeCursor nodeValue = cursor.getFirstChild(TreeType.NODE_VALUE); boolean isTextBlock = Optional.ofNullable(nodeValue.getFirstChild(TreeType.NODE_STRING_VALUE)) - .map(nodeString -> nodeString.getFirstChild(TreeType.TEXT_BLOCK)) + .map(nodeString -> Optional.ofNullable(nodeString.getFirstChild(TreeType.TEXT_BLOCK)) + .orElseGet(() -> nodeString.getFirstChild(TreeType.BYTE_TEXT_BLOCK))) .isPresent(); Doc nodeValueDoc = valueVisitor.apply(nodeValue); diff --git a/smithy-syntax/src/main/java/software/amazon/smithy/syntax/TreeType.java b/smithy-syntax/src/main/java/software/amazon/smithy/syntax/TreeType.java index ebe15ae9045..c4f33fee80c 100644 --- a/smithy-syntax/src/main/java/software/amazon/smithy/syntax/TreeType.java +++ b/smithy-syntax/src/main/java/software/amazon/smithy/syntax/TreeType.java @@ -23,7 +23,9 @@ *
  • The {@code Number} production is combined into a single {@link #NUMBER} node. Productions like * {@code DecimalPoint}, {@code Exp}, etc are not exposed in the token tree.
  • *
  • The {@code QuotedText} production is combined into a single {@link #QUOTED_TEXT} node. + *
  • The {@code ByteString} production is combined into a single {@link #BYTE_STRING} node. *
  • The {@code TextBlock} production is combined into a single {@link #TEXT_BLOCK} node. + *
  • The {@code ByteTextBlock} production is combined into a single {@link #BYTE_TEXT_BLOCK} node. * */ public enum TreeType { @@ -604,17 +606,21 @@ void parse(CapturingTokenizer tokenizer) { tokenizer.expect(IdlToken.LBRACE, IdlToken.LBRACKET, IdlToken.TEXT_BLOCK, + IdlToken.BYTE_TEXT_BLOCK, IdlToken.STRING, + IdlToken.BYTE_STRING, IdlToken.NUMBER, IdlToken.IDENTIFIER); switch (tokenizer.getCurrentToken()) { case LBRACE: case LBRACKET: case TEXT_BLOCK: + case BYTE_TEXT_BLOCK: case NUMBER: TRAIT_NODE.parse(tokenizer); break; case STRING: + case BYTE_STRING: case IDENTIFIER: default: CapturedToken nextPastWs = tokenizer.peekWhile(1, @@ -710,7 +716,9 @@ void parse(CapturingTokenizer tokenizer) { void parse(CapturingTokenizer tokenizer) { tokenizer.withState(this, () -> { IdlToken token = tokenizer.expect(IdlToken.STRING, + IdlToken.BYTE_STRING, IdlToken.TEXT_BLOCK, + IdlToken.BYTE_TEXT_BLOCK, IdlToken.NUMBER, IdlToken.IDENTIFIER, IdlToken.LBRACE, @@ -725,7 +733,9 @@ void parse(CapturingTokenizer tokenizer) { } break; case STRING: + case BYTE_STRING: case TEXT_BLOCK: + case BYTE_TEXT_BLOCK: NODE_STRING_VALUE.parse(tokenizer); break; case NUMBER: @@ -772,7 +782,9 @@ void parse(CapturingTokenizer tokenizer) { optionalWs(tokenizer); while (tokenizer.hasNext()) { - if (tokenizer.expect(IdlToken.RBRACE, IdlToken.STRING, IdlToken.IDENTIFIER) == IdlToken.RBRACE) { + IdlToken token = tokenizer + .expect(IdlToken.RBRACE, IdlToken.STRING, IdlToken.BYTE_STRING, IdlToken.IDENTIFIER); + if (token == IdlToken.RBRACE) { break; } NODE_OBJECT_KVP.parse(tokenizer); @@ -803,10 +815,18 @@ void parse(CapturingTokenizer tokenizer) { @Override void parse(CapturingTokenizer tokenizer) { tokenizer.withState(this, () -> { - if (tokenizer.expect(IdlToken.IDENTIFIER, IdlToken.STRING) == IdlToken.IDENTIFIER) { - IDENTIFIER.parse(tokenizer); - } else { - QUOTED_TEXT.parse(tokenizer); + IdlToken token = tokenizer.expect(IdlToken.IDENTIFIER, IdlToken.STRING, IdlToken.BYTE_STRING); + switch (token) { + case STRING: + QUOTED_TEXT.parse(tokenizer); + break; + case BYTE_STRING: + BYTE_STRING.parse(tokenizer); + break; + case IDENTIFIER: + default: + IDENTIFIER.parse(tokenizer); + break; } }); } @@ -824,13 +844,25 @@ void parse(CapturingTokenizer tokenizer) { @Override void parse(CapturingTokenizer tokenizer) { tokenizer.withState(this, () -> { - switch (tokenizer.expect(IdlToken.STRING, IdlToken.TEXT_BLOCK, IdlToken.IDENTIFIER)) { + IdlToken token = tokenizer.expect(IdlToken.STRING, + IdlToken.BYTE_STRING, + IdlToken.TEXT_BLOCK, + IdlToken.BYTE_TEXT_BLOCK, + IdlToken.IDENTIFIER); + + switch (token) { case STRING: QUOTED_TEXT.parse(tokenizer); break; + case BYTE_STRING: + BYTE_STRING.parse(tokenizer); + break; case TEXT_BLOCK: TEXT_BLOCK.parse(tokenizer); break; + case BYTE_TEXT_BLOCK: + BYTE_TEXT_BLOCK.parse(tokenizer); + break; case IDENTIFIER: default: SHAPE_ID.parse(tokenizer); @@ -849,6 +881,16 @@ void parse(CapturingTokenizer tokenizer) { } }, + BYTE_STRING { + @Override + void parse(CapturingTokenizer tokenizer) { + tokenizer.withState(this, () -> { + tokenizer.expect(IdlToken.BYTE_STRING); + tokenizer.next(); + }); + } + }, + TEXT_BLOCK { @Override void parse(CapturingTokenizer tokenizer) { @@ -859,6 +901,16 @@ void parse(CapturingTokenizer tokenizer) { } }, + BYTE_TEXT_BLOCK { + @Override + void parse(CapturingTokenizer tokenizer) { + tokenizer.withState(this, () -> { + tokenizer.expect(IdlToken.BYTE_TEXT_BLOCK); + tokenizer.next(); + }); + } + }, + NUMBER { @Override void parse(CapturingTokenizer tokenizer) { diff --git a/smithy-syntax/src/test/java/software/amazon/smithy/syntax/TreeTypeTest.java b/smithy-syntax/src/test/java/software/amazon/smithy/syntax/TreeTypeTest.java index ec80fc06d17..121dec5fbc6 100644 --- a/smithy-syntax/src/test/java/software/amazon/smithy/syntax/TreeTypeTest.java +++ b/smithy-syntax/src/test/java/software/amazon/smithy/syntax/TreeTypeTest.java @@ -53,26 +53,6 @@ public void controlStatement() { TreeType.BR); } - @Test - public void identifierNodeObjectKey() { - String identifier = "version"; - TokenTree tree = getTree(TreeType.NODE_OBJECT_KEY, identifier); - assertTreeIsValid(tree); - rootAndChildTypesEqual(tree, - TreeType.NODE_OBJECT_KEY, - TreeType.IDENTIFIER); - } - - @Test - public void stringNodeObjectKey() { - String string = "\"foo bar\""; - TokenTree tree = getTree(TreeType.NODE_OBJECT_KEY, string); - assertTreeIsValid(tree); - rootAndChildTypesEqual(tree, - TreeType.NODE_OBJECT_KEY, - TreeType.QUOTED_TEXT); - } - @Test public void metadataSection() { String metadataSection = "metadata foo = bar\nmetadata bar=baz\n"; @@ -1137,12 +1117,26 @@ public void nodeValue() { TreeType.NODE_VALUE, TreeType.NODE_STRING_VALUE); + String byteString = "b\"foo\""; + TokenTree byteStringTree = getTree(TreeType.NODE_VALUE, byteString); + assertTreeIsValid(byteStringTree); + rootAndChildTypesEqual(byteStringTree, + TreeType.NODE_VALUE, + TreeType.NODE_STRING_VALUE); + String textBlock = "\"\"\"\nfoo\"\"\""; TokenTree textBlockTree = getTree(TreeType.NODE_VALUE, textBlock); assertTreeIsValid(textBlockTree); rootAndChildTypesEqual(textBlockTree, TreeType.NODE_VALUE, TreeType.NODE_STRING_VALUE); + + String byteTextBlock = "b\"\"\"\nfoo\"\"\""; + TokenTree byteTextBlockTree = getTree(TreeType.NODE_VALUE, byteTextBlock); + assertTreeIsValid(byteTextBlockTree); + rootAndChildTypesEqual(byteTextBlockTree, + TreeType.NODE_VALUE, + TreeType.NODE_STRING_VALUE); } @Test @@ -1263,6 +1257,11 @@ public void nodeObjectKey() { assertTreeIsValid(quotedTree); rootAndChildTypesEqual(quotedTree, TreeType.NODE_OBJECT_KEY, TreeType.QUOTED_TEXT); + String byteString = "b\"foo bar\""; + TokenTree byteStringTree = getTree(TreeType.NODE_OBJECT_KEY, byteString); + assertTreeIsValid(byteStringTree); + rootAndChildTypesEqual(byteStringTree, TreeType.NODE_OBJECT_KEY, TreeType.BYTE_STRING); + String identifier = "foo"; TokenTree idTree = getTree(TreeType.NODE_OBJECT_KEY, identifier); assertTreeIsValid(idTree); @@ -1288,10 +1287,20 @@ public void nodeStringValue() { assertTreeIsValid(quotedTree); rootAndChildTypesEqual(quotedTree, TreeType.NODE_STRING_VALUE, TreeType.QUOTED_TEXT); + String byteString = "b\"foo bar\""; + TokenTree byteStringTree = getTree(TreeType.NODE_STRING_VALUE, byteString); + assertTreeIsValid(byteStringTree); + rootAndChildTypesEqual(byteStringTree, TreeType.NODE_STRING_VALUE, TreeType.BYTE_STRING); + String block = "\"\"\"\nfoo\"\"\""; TokenTree blockTree = getTree(TreeType.NODE_STRING_VALUE, block); assertTreeIsValid(blockTree); rootAndChildTypesEqual(blockTree, TreeType.NODE_STRING_VALUE, TreeType.TEXT_BLOCK); + + String byteTextBlock = "b\"\"\"\nfoo\"\"\""; + TokenTree byteTextBlockTree = getTree(TreeType.NODE_STRING_VALUE, byteTextBlock); + assertTreeIsValid(byteTextBlockTree); + rootAndChildTypesEqual(byteTextBlockTree, TreeType.NODE_STRING_VALUE, TreeType.BYTE_TEXT_BLOCK); } @Test @@ -1307,6 +1316,19 @@ public void textBlock() { rootAndChildTypesEqual(withQuotesTree, TreeType.TEXT_BLOCK, TreeType.TOKEN); } + @Test + public void byteTextBlock() { + String empty = "b\"\"\"\n\"\"\""; + TokenTree emptyTree = getTree(TreeType.BYTE_TEXT_BLOCK, empty); + assertTreeIsValid(emptyTree); + rootAndChildTypesEqual(emptyTree, TreeType.BYTE_TEXT_BLOCK, TreeType.TOKEN); + + String withQuotes = "b\"\"\"\n\"\"foo\"\n\"\"bar\"\"\""; + TokenTree withQuotesTree = getTree(TreeType.BYTE_TEXT_BLOCK, withQuotes); + assertTreeIsValid(withQuotesTree); + rootAndChildTypesEqual(withQuotesTree, TreeType.BYTE_TEXT_BLOCK, TreeType.TOKEN); + } + @Test public void invalidControlSection() { String invalidTrailing = "$foo: bar\n$"; diff --git a/smithy-utils/src/main/java/software/amazon/smithy/utils/StringUtils.java b/smithy-utils/src/main/java/software/amazon/smithy/utils/StringUtils.java index 85433522726..bf7d33d1daf 100644 --- a/smithy-utils/src/main/java/software/amazon/smithy/utils/StringUtils.java +++ b/smithy-utils/src/main/java/software/amazon/smithy/utils/StringUtils.java @@ -4,7 +4,9 @@ */ package software.amazon.smithy.utils; +import java.nio.charset.StandardCharsets; import java.util.Arrays; +import java.util.Base64; import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern;