diff --git a/addOns/commonlib/commonlib.gradle.kts b/addOns/commonlib/commonlib.gradle.kts index 51165bad48f..68ac70acbdc 100644 --- a/addOns/commonlib/commonlib.gradle.kts +++ b/addOns/commonlib/commonlib.gradle.kts @@ -1,7 +1,12 @@ +import net.ltgt.gradle.errorprone.errorprone import org.zaproxy.gradle.addon.AddOnStatus description = "A common library, for use by other add-ons." +plugins { + antlr +} + zapAddOn { addOnName.set("Common Library") addOnStatus.set(AddOnStatus.RELEASE) @@ -32,9 +37,39 @@ dependencies { api("com.fasterxml.jackson.datatype:jackson-datatype-jdk8") api("com.fasterxml.jackson.datatype:jackson-datatype-jsr310") + val antlrVersion = "4.13.0" + antlr("org.antlr:antlr4:$antlrVersion") + implementation("org.antlr:antlr4-runtime:$antlrVersion") + implementation("commons-io:commons-io:2.16.1") implementation("org.apache.commons:commons-csv:1.10.0") implementation("org.apache.commons:commons-collections4:4.4") testImplementation(project(":testutils")) } + +val jsParserPkg = "org.zaproxy.addon.commonlib.parserapi.impl" +val jsParserDir = jsParserPkg.replace('.', '/') +val generateGrammarSource by tasks.existing(AntlrTask::class) { + val libDir = "$outputDirectory/$jsParserDir" + arguments = arguments + listOf("-package", jsParserPkg, "-lib", libDir) + + doFirst { + mkdir(libDir) + } +} + +tasks.withType().configureEach { + options.errorprone.excludedPaths.set(".*/(generated-src|$jsParserDir)/.*") +} + +tasks.named("generateGrammarSource") { + mustRunAfter(tasks.named("generateEffectiveLombokConfig")) +} + +spotless { + javaWith3rdPartyFormatted( + project, + listOf("src/**/$jsParserDir/*.java"), + ) +} diff --git a/addOns/commonlib/src/main/antlr/org/zaproxy/addon/commonlib/parserapi/impl/JavaScriptLexer.g4 b/addOns/commonlib/src/main/antlr/org/zaproxy/addon/commonlib/parserapi/impl/JavaScriptLexer.g4 new file mode 100644 index 00000000000..f02948ecae9 --- /dev/null +++ b/addOns/commonlib/src/main/antlr/org/zaproxy/addon/commonlib/parserapi/impl/JavaScriptLexer.g4 @@ -0,0 +1,285 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2014 by Bart Kiers (original author) and Alexandre Vitorelli (contributor -> ported to CSharp) + * Copyright (c) 2017-2020 by Ivan Kochurkin (Positive Technologies): + added ECMAScript 6 support, cleared and transformed to the universal grammar. + * Copyright (c) 2018 by Juan Alvarez (contributor -> ported to Go) + * Copyright (c) 2019 by Student Main (contributor -> ES2020) + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false +// $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine +// $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true + +lexer grammar JavaScriptLexer; + +channels { + ERROR +} + +options { + superClass = JavaScriptLexerBase; +} + +// Insert here @header for C++ lexer. + +HashBangLine : { this.IsStartOfFile()}? '#!' ~[\r\n\u2028\u2029]*; // only allowed at start +MultiLineComment : '/*' .*? '*/' -> channel(HIDDEN); +SingleLineComment : '//' ~[\r\n\u2028\u2029]* -> channel(HIDDEN); +RegularExpressionLiteral: + '/' RegularExpressionFirstChar RegularExpressionChar* {this.IsRegexPossible()}? '/' IdentifierPart* +; + +OpenBracket : '['; +CloseBracket : ']'; +OpenParen : '('; +CloseParen : ')'; +OpenBrace : '{' {this.ProcessOpenBrace();}; +TemplateCloseBrace : {this.IsInTemplateString()}? '}' // Break lines here to ensure proper transformation by Go/transformGrammar.py + {this.ProcessTemplateCloseBrace();} -> popMode; +CloseBrace : '}' {this.ProcessCloseBrace();}; +SemiColon : ';'; +Comma : ','; +Assign : '='; +QuestionMark : '?'; +QuestionMarkDot : '?.'; +Colon : ':'; +Ellipsis : '...'; +Dot : '.'; +PlusPlus : '++'; +MinusMinus : '--'; +Plus : '+'; +Minus : '-'; +BitNot : '~'; +Not : '!'; +Multiply : '*'; +Divide : '/'; +Modulus : '%'; +Power : '**'; +NullCoalesce : '??'; +Hashtag : '#'; +RightShiftArithmetic : '>>'; +LeftShiftArithmetic : '<<'; +RightShiftLogical : '>>>'; +LessThan : '<'; +MoreThan : '>'; +LessThanEquals : '<='; +GreaterThanEquals : '>='; +Equals_ : '=='; +NotEquals : '!='; +IdentityEquals : '==='; +IdentityNotEquals : '!=='; +BitAnd : '&'; +BitXOr : '^'; +BitOr : '|'; +And : '&&'; +Or : '||'; +MultiplyAssign : '*='; +DivideAssign : '/='; +ModulusAssign : '%='; +PlusAssign : '+='; +MinusAssign : '-='; +LeftShiftArithmeticAssign : '<<='; +RightShiftArithmeticAssign : '>>='; +RightShiftLogicalAssign : '>>>='; +BitAndAssign : '&='; +BitXorAssign : '^='; +BitOrAssign : '|='; +PowerAssign : '**='; +NullishCoalescingAssign : '??='; +ARROW : '=>'; + +/// Null Literals + +NullLiteral: 'null'; + +/// Boolean Literals + +BooleanLiteral: 'true' | 'false'; + +/// Numeric Literals + +DecimalLiteral: + DecimalIntegerLiteral '.' [0-9] [0-9_]* ExponentPart? + | '.' [0-9] [0-9_]* ExponentPart? + | DecimalIntegerLiteral ExponentPart? +; + +/// Numeric Literals + +HexIntegerLiteral : '0' [xX] [0-9a-fA-F] HexDigit*; +OctalIntegerLiteral : '0' [0-7]+ {!this.IsStrictMode()}?; +OctalIntegerLiteral2 : '0' [oO] [0-7] [_0-7]*; +BinaryIntegerLiteral : '0' [bB] [01] [_01]*; + +BigHexIntegerLiteral : '0' [xX] [0-9a-fA-F] HexDigit* 'n'; +BigOctalIntegerLiteral : '0' [oO] [0-7] [_0-7]* 'n'; +BigBinaryIntegerLiteral : '0' [bB] [01] [_01]* 'n'; +BigDecimalIntegerLiteral : DecimalIntegerLiteral 'n'; + +/// Keywords + +Break : 'break'; +Do : 'do'; +Instanceof : 'instanceof'; +Typeof : 'typeof'; +Case : 'case'; +Else : 'else'; +New : 'new'; +Var : 'var'; +Catch : 'catch'; +Finally : 'finally'; +Return : 'return'; +Void : 'void'; +Continue : 'continue'; +For : 'for'; +Switch : 'switch'; +While : 'while'; +Debugger : 'debugger'; +Function_ : 'function'; +This : 'this'; +With : 'with'; +Default : 'default'; +If : 'if'; +Throw : 'throw'; +Delete : 'delete'; +In : 'in'; +Try : 'try'; +As : 'as'; +From : 'from'; +Of : 'of'; +Yield : 'yield'; +YieldStar : 'yield*'; + +/// Future Reserved Words + +Class : 'class'; +Enum : 'enum'; +Extends : 'extends'; +Super : 'super'; +Const : 'const'; +Export : 'export'; +Import : 'import'; + +Async : 'async'; +Await : 'await'; + +/// The following tokens are also considered to be FutureReservedWords +/// when parsing strict mode + +Implements : 'implements' {this.IsStrictMode()}?; +StrictLet : 'let' {this.IsStrictMode()}?; +NonStrictLet : 'let' {!this.IsStrictMode()}?; +Private : 'private' {this.IsStrictMode()}?; +Public : 'public' {this.IsStrictMode()}?; +Interface : 'interface' {this.IsStrictMode()}?; +Package : 'package' {this.IsStrictMode()}?; +Protected : 'protected' {this.IsStrictMode()}?; +Static : 'static' {this.IsStrictMode()}?; + +/// Identifier Names and Identifiers + +Identifier: IdentifierStart IdentifierPart*; +/// String Literals +StringLiteral: + ('"' DoubleStringCharacter* '"' | '\'' SingleStringCharacter* '\'') {this.ProcessStringLiteral();} +; + +BackTick: '`' -> pushMode(TEMPLATE); + +WhiteSpaces: [\t\u000B\u000C\u0020\u00A0]+ -> channel(HIDDEN); + +LineTerminator: [\r\n\u2028\u2029] -> channel(HIDDEN); + +/// Comments + +HtmlComment : '' -> channel(HIDDEN); +CDataComment : '' -> channel(HIDDEN); +UnexpectedCharacter : . -> channel(ERROR); + +mode TEMPLATE; + +BackTickInside : '`' -> type(BackTick), popMode; +TemplateStringStartExpression : '${' {this.ProcessTemplateOpenBrace();} -> pushMode(DEFAULT_MODE); +TemplateStringAtom : ~[`]; + +// Fragment rules + +fragment DoubleStringCharacter: ~["\\\r\n] | '\\' EscapeSequence | LineContinuation; + +fragment SingleStringCharacter: ~['\\\r\n] | '\\' EscapeSequence | LineContinuation; + +fragment EscapeSequence: + CharacterEscapeSequence + | '0' // no digit ahead! TODO + | HexEscapeSequence + | UnicodeEscapeSequence + | ExtendedUnicodeEscapeSequence +; + +fragment CharacterEscapeSequence: SingleEscapeCharacter | NonEscapeCharacter; + +fragment HexEscapeSequence: 'x' HexDigit HexDigit; + +fragment UnicodeEscapeSequence: + 'u' HexDigit HexDigit HexDigit HexDigit + | 'u' '{' HexDigit HexDigit+ '}' +; + +fragment ExtendedUnicodeEscapeSequence: 'u' '{' HexDigit+ '}'; + +fragment SingleEscapeCharacter: ['"\\bfnrtv]; + +fragment NonEscapeCharacter: ~['"\\bfnrtv0-9xu\r\n]; + +fragment EscapeCharacter: SingleEscapeCharacter | [0-9] | [xu]; + +fragment LineContinuation: '\\' [\r\n\u2028\u2029]+; + +fragment HexDigit: [_0-9a-fA-F]; + +fragment DecimalIntegerLiteral: '0' | [1-9] [0-9_]*; + +fragment ExponentPart: [eE] [+-]? [0-9_]+; + +fragment IdentifierPart: IdentifierStart | [\p{Mn}] | [\p{Nd}] | [\p{Pc}] | '\u200C' | '\u200D'; + +fragment IdentifierStart: [\p{L}] | [$_] | '\\' UnicodeEscapeSequence; + +fragment RegularExpressionFirstChar: + ~[*\r\n\u2028\u2029\\/[] + | RegularExpressionBackslashSequence + | '[' RegularExpressionClassChar* ']' +; + +fragment RegularExpressionChar: + ~[\r\n\u2028\u2029\\/[] + | RegularExpressionBackslashSequence + | '[' RegularExpressionClassChar* ']' +; + +fragment RegularExpressionClassChar: ~[\r\n\u2028\u2029\]\\] | RegularExpressionBackslashSequence; + +fragment RegularExpressionBackslashSequence: '\\' ~[\r\n\u2028\u2029]; \ No newline at end of file diff --git a/addOns/commonlib/src/main/antlr/org/zaproxy/addon/commonlib/parserapi/impl/JavaScriptParser.g4 b/addOns/commonlib/src/main/antlr/org/zaproxy/addon/commonlib/parserapi/impl/JavaScriptParser.g4 new file mode 100644 index 00000000000..49e8623ab2a --- /dev/null +++ b/addOns/commonlib/src/main/antlr/org/zaproxy/addon/commonlib/parserapi/impl/JavaScriptParser.g4 @@ -0,0 +1,580 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2014 by Bart Kiers (original author) and Alexandre Vitorelli (contributor -> ported to CSharp) + * Copyright (c) 2017-2020 by Ivan Kochurkin (Positive Technologies): + added ECMAScript 6 support, cleared and transformed to the universal grammar. + * Copyright (c) 2018 by Juan Alvarez (contributor -> ported to Go) + * Copyright (c) 2019 by Student Main (contributor -> ES2020) + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// $antlr-format alignTrailingComments true, columnLimit 150, minEmptyLines 1, maxEmptyLinesToKeep 1, reflowComments false, useTab false +// $antlr-format allowShortRulesOnASingleLine false, allowShortBlocksOnASingleLine true, alignSemicolons hanging, alignColons hanging + +parser grammar JavaScriptParser; + +// Insert here @header for C++ parser. + +options { + tokenVocab = JavaScriptLexer; + superClass = JavaScriptParserBase; +} + +program + : HashBangLine? sourceElements? EOF + ; + +sourceElement + : statement + ; + +statement + : block + | variableStatement + | importStatement + | exportStatement + | emptyStatement_ + | classDeclaration + | functionDeclaration + | expressionStatement + | ifStatement + | iterationStatement + | continueStatement + | breakStatement + | returnStatement + | yieldStatement + | withStatement + | labelledStatement + | switchStatement + | throwStatement + | tryStatement + | debuggerStatement + ; + +block + : '{' statementList? '}' + ; + +statementList + : statement+ + ; + +importStatement + : Import importFromBlock + ; + +importFromBlock + : importDefault? (importNamespace | importModuleItems) importFrom eos + | StringLiteral eos + ; + +importModuleItems + : '{' (importAliasName ',')* (importAliasName ','?)? '}' + ; + +importAliasName + : moduleExportName (As importedBinding)? + ; + +moduleExportName + : identifierName + | StringLiteral + ; + +// yield and await are permitted as BindingIdentifier in the grammar +importedBinding + : Identifier + | Yield + | Await + ; + +importDefault + : aliasName ',' + ; + +importNamespace + : ('*' | identifierName) (As identifierName)? + ; + +importFrom + : From StringLiteral + ; + +aliasName + : identifierName (As identifierName)? + ; + +exportStatement + : Export Default? (exportFromBlock | declaration) eos # ExportDeclaration + | Export Default singleExpression eos # ExportDefaultDeclaration + ; + +exportFromBlock + : importNamespace importFrom eos + | exportModuleItems importFrom? eos + ; + +exportModuleItems + : '{' (exportAliasName ',')* (exportAliasName ','?)? '}' + ; + +exportAliasName + : moduleExportName (As moduleExportName)? + ; + +declaration + : variableStatement + | classDeclaration + | functionDeclaration + ; + +variableStatement + : variableDeclarationList eos + ; + +variableDeclarationList + : varModifier variableDeclaration (',' variableDeclaration)* + ; + +variableDeclaration + : assignable ('=' singleExpression)? // ECMAScript 6: Array & Object Matching + ; + +emptyStatement_ + : SemiColon + ; + +expressionStatement + : {this.notOpenBraceAndNotFunction()}? expressionSequence eos + ; + +ifStatement + : If '(' expressionSequence ')' statement (Else statement)? + ; + +iterationStatement + : Do statement While '(' expressionSequence ')' eos # DoStatement + | While '(' expressionSequence ')' statement # WhileStatement + | For '(' (expressionSequence | variableDeclarationList)? ';' expressionSequence? ';' expressionSequence? ')' statement # ForStatement + | For '(' (singleExpression | variableDeclarationList) In expressionSequence ')' statement # ForInStatement + | For Await? '(' (singleExpression | variableDeclarationList) Of expressionSequence ')' statement # ForOfStatement + ; + +varModifier // let, const - ECMAScript 6 + : Var + | let_ + | Const + ; + +continueStatement + : Continue ({this.notLineTerminator()}? identifier)? eos + ; + +breakStatement + : Break ({this.notLineTerminator()}? identifier)? eos + ; + +returnStatement + : Return ({this.notLineTerminator()}? expressionSequence)? eos + ; + +yieldStatement + : (Yield | YieldStar) ({this.notLineTerminator()}? expressionSequence)? eos + ; + +withStatement + : With '(' expressionSequence ')' statement + ; + +switchStatement + : Switch '(' expressionSequence ')' caseBlock + ; + +caseBlock + : '{' caseClauses? (defaultClause caseClauses?)? '}' + ; + +caseClauses + : caseClause+ + ; + +caseClause + : Case expressionSequence ':' statementList? + ; + +defaultClause + : Default ':' statementList? + ; + +labelledStatement + : identifier ':' statement + ; + +throwStatement + : Throw {this.notLineTerminator()}? expressionSequence eos + ; + +tryStatement + : Try block (catchProduction finallyProduction? | finallyProduction) + ; + +catchProduction + : Catch ('(' assignable? ')')? block + ; + +finallyProduction + : Finally block + ; + +debuggerStatement + : Debugger eos + ; + +functionDeclaration + : Async? Function_ '*'? identifier '(' formalParameterList? ')' functionBody + ; + +classDeclaration + : Class identifier classTail + ; + +classTail + : (Extends singleExpression)? '{' classElement* '}' + ; + +classElement + : (Static | {this.n("static")}? identifier)? methodDefinition + | (Static | {this.n("static")}? identifier)? fieldDefinition + | (Static | {this.n("static")}? identifier) block + | emptyStatement_ + ; + +methodDefinition + : (Async {this.notLineTerminator()}?)? '*'? classElementName '(' formalParameterList? ')' functionBody + | '*'? getter '(' ')' functionBody + | '*'? setter '(' formalParameterList? ')' functionBody + ; + +fieldDefinition + : classElementName initializer? + ; + +classElementName + : propertyName + | privateIdentifier + ; + +privateIdentifier + : '#' identifierName + ; + +formalParameterList + : formalParameterArg (',' formalParameterArg)* (',' lastFormalParameterArg)? + | lastFormalParameterArg + ; + +formalParameterArg + : assignable ('=' singleExpression)? // ECMAScript 6: Initialization + ; + +lastFormalParameterArg // ECMAScript 6: Rest Parameter + : Ellipsis singleExpression + ; + +functionBody + : '{' sourceElements? '}' + ; + +sourceElements + : sourceElement+ + ; + +arrayLiteral + : ('[' elementList ']') + ; + +// JavaScript supports arrasys like [,,1,2,,]. +elementList + : ','* arrayElement? (','+ arrayElement) * ','* // Yes, everything is optional + ; + +arrayElement + : Ellipsis? singleExpression + ; + +propertyAssignment + : propertyName ':' singleExpression # PropertyExpressionAssignment + | '[' singleExpression ']' ':' singleExpression # ComputedPropertyExpressionAssignment + | Async? '*'? propertyName '(' formalParameterList? ')' functionBody # FunctionProperty + | getter '(' ')' functionBody # PropertyGetter + | setter '(' formalParameterArg ')' functionBody # PropertySetter + | Ellipsis? singleExpression # PropertyShorthand + ; + +propertyName + : identifierName + | StringLiteral + | numericLiteral + | '[' singleExpression ']' + ; + +arguments + : '(' (argument (',' argument)* ','?)? ')' + ; + +argument + : Ellipsis? (singleExpression | identifier) + ; + +expressionSequence + : singleExpression (',' singleExpression)* + ; + +singleExpression + : anonymousFunction # FunctionExpression + | Class identifier? classTail # ClassExpression + | singleExpression '?.' singleExpression # OptionalChainExpression + | singleExpression '?.'? '[' expressionSequence ']' # MemberIndexExpression + | singleExpression '?'? '.' '#'? identifierName # MemberDotExpression + // Split to try `new Date()` first, then `new Date`. + | New identifier arguments # NewExpression + | New singleExpression arguments # NewExpression + | New singleExpression # NewExpression + | singleExpression arguments # ArgumentsExpression + | New '.' identifier # MetaExpression // new.target + | singleExpression {this.notLineTerminator()}? '++' # PostIncrementExpression + | singleExpression {this.notLineTerminator()}? '--' # PostDecreaseExpression + | Delete singleExpression # DeleteExpression + | Void singleExpression # VoidExpression + | Typeof singleExpression # TypeofExpression + | '++' singleExpression # PreIncrementExpression + | '--' singleExpression # PreDecreaseExpression + | '+' singleExpression # UnaryPlusExpression + | '-' singleExpression # UnaryMinusExpression + | '~' singleExpression # BitNotExpression + | '!' singleExpression # NotExpression + | Await singleExpression # AwaitExpression + | singleExpression '**' singleExpression # PowerExpression + | singleExpression ('*' | '/' | '%') singleExpression # MultiplicativeExpression + | singleExpression ('+' | '-') singleExpression # AdditiveExpression + | singleExpression '??' singleExpression # CoalesceExpression + | singleExpression ('<<' | '>>' | '>>>') singleExpression # BitShiftExpression + | singleExpression ('<' | '>' | '<=' | '>=') singleExpression # RelationalExpression + | singleExpression Instanceof singleExpression # InstanceofExpression + | singleExpression In singleExpression # InExpression + | singleExpression ('==' | '!=' | '===' | '!==') singleExpression # EqualityExpression + | singleExpression '&' singleExpression # BitAndExpression + | singleExpression '^' singleExpression # BitXOrExpression + | singleExpression '|' singleExpression # BitOrExpression + | singleExpression '&&' singleExpression # LogicalAndExpression + | singleExpression '||' singleExpression # LogicalOrExpression + | singleExpression '?' singleExpression ':' singleExpression # TernaryExpression + | singleExpression '=' singleExpression # AssignmentExpression + | singleExpression assignmentOperator singleExpression # AssignmentOperatorExpression + | Import '(' singleExpression ')' # ImportExpression + | singleExpression templateStringLiteral # TemplateStringExpression // ECMAScript 6 + | yieldStatement # YieldExpression // ECMAScript 6 + | This # ThisExpression + | identifier # IdentifierExpression + | Super # SuperExpression + | literal # LiteralExpression + | arrayLiteral # ArrayLiteralExpression + | objectLiteral # ObjectLiteralExpression + | '(' expressionSequence ')' # ParenthesizedExpression + ; + +initializer + // TODO: must be `= AssignmentExpression` and we have such label alredy but it doesn't respect the specification. + // See https://tc39.es/ecma262/multipage/ecmascript-language-expressions.html#prod-Initializer + : '=' singleExpression + ; + +assignable + : identifier + | keyword + | arrayLiteral + | objectLiteral + ; + +objectLiteral + : '{' (propertyAssignment (',' propertyAssignment)* ','?)? '}' + ; + +anonymousFunction + : functionDeclaration # NamedFunction + | Async? Function_ '*'? '(' formalParameterList? ')' functionBody # AnonymousFunctionDecl + | Async? arrowFunctionParameters '=>' arrowFunctionBody # ArrowFunction + ; + +arrowFunctionParameters + : propertyName + | '(' formalParameterList? ')' + ; + +arrowFunctionBody + : singleExpression + | functionBody + ; + +assignmentOperator + : '*=' + | '/=' + | '%=' + | '+=' + | '-=' + | '<<=' + | '>>=' + | '>>>=' + | '&=' + | '^=' + | '|=' + | '**=' + | '??=' + ; + +literal + : NullLiteral + | BooleanLiteral + | StringLiteral + | templateStringLiteral + | RegularExpressionLiteral + | numericLiteral + | bigintLiteral + ; + +templateStringLiteral + : BackTick templateStringAtom* BackTick + ; + +templateStringAtom + : TemplateStringAtom + | TemplateStringStartExpression singleExpression TemplateCloseBrace + ; + +numericLiteral + : DecimalLiteral + | HexIntegerLiteral + | OctalIntegerLiteral + | OctalIntegerLiteral2 + | BinaryIntegerLiteral + ; + +bigintLiteral + : BigDecimalIntegerLiteral + | BigHexIntegerLiteral + | BigOctalIntegerLiteral + | BigBinaryIntegerLiteral + ; + +getter + : {this.n("get")}? identifier classElementName + ; + +setter + : {this.n("set")}? identifier classElementName + ; + +identifierName + : identifier + | reservedWord + ; + +identifier + : Identifier + | NonStrictLet + | Async + | As + | From + | Yield + | Of + ; + +reservedWord + : keyword + | NullLiteral + | BooleanLiteral + ; + +keyword + : Break + | Do + | Instanceof + | Typeof + | Case + | Else + | New + | Var + | Catch + | Finally + | Return + | Void + | Continue + | For + | Switch + | While + | Debugger + | Function_ + | This + | With + | Default + | If + | Throw + | Delete + | In + | Try + | Class + | Enum + | Extends + | Super + | Const + | Export + | Import + | Implements + | let_ + | Private + | Public + | Interface + | Package + | Protected + | Static + | Yield + | YieldStar + | Async + | Await + | From + | As + | Of + ; + +let_ + : NonStrictLet + | StrictLet + ; + +eos + : SemiColon + | EOF + | {this.lineTerminatorAhead()}? + | {this.closeBrace()}? + ; \ No newline at end of file diff --git a/addOns/commonlib/src/main/java/org/zaproxy/addon/commonlib/parserapi/ParserApi.java b/addOns/commonlib/src/main/java/org/zaproxy/addon/commonlib/parserapi/ParserApi.java new file mode 100644 index 00000000000..c96523164a3 --- /dev/null +++ b/addOns/commonlib/src/main/java/org/zaproxy/addon/commonlib/parserapi/ParserApi.java @@ -0,0 +1,156 @@ +/* + * Zed Attack Proxy (ZAP) and its related class files. + * + * ZAP is an HTTP/HTTPS proxy for assessing web application security. + * + * Copyright 2022 The ZAP Development Team + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.zaproxy.addon.commonlib.parserapi; + +import java.io.IOException; +import java.util.List; +import net.htmlparser.jericho.Element; +import net.htmlparser.jericho.HTMLElementName; +import net.htmlparser.jericho.Source; +import org.antlr.v4.runtime.BaseErrorListener; +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.CharStreams; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.RecognitionException; +import org.antlr.v4.runtime.Recognizer; +import org.antlr.v4.runtime.Token; +import org.parosproxy.paros.network.HttpMessage; +import org.zaproxy.addon.commonlib.parserapi.impl.JavaScriptLexer; +import org.zaproxy.addon.commonlib.parserapi.impl.JavaScriptParser; + +public class ParserApi { + + private static final char SINGLE_QUOTE_CHARACTER = '\''; + private static final char DOUBLE_QUOTE_CHARACTER = '"'; + private static final char FORWARD_SLASH_CHARACTER = '/'; + + public enum Context { + NO_QUOTE, + SINGLE_QUOTE, + DOUBLE_QUOTE, + SLASH_QUOTE + } + + private String scriptCode; + private int targetBlockNumber; + + public void getTargetScriptBlock(HttpMessage msg, String target) { + String htmlCode = msg.getResponseBody().toString(); + Source htmlSrc = new Source(htmlCode); + List scripts = htmlSrc.getAllElements(HTMLElementName.SCRIPT); + for (Element ele : scripts) { + String code = ele.getContent().toString(); + if (code.contains(target)) { + break; + } + targetBlockNumber += 1; + } + } + + public void getTargetScriptCode(HttpMessage msg, String target) { + String htmlCode = msg.getResponseBody().toString(); + Source htmlSrc = new Source(htmlCode); + scriptCode = + htmlSrc.getAllElements(HTMLElementName.SCRIPT) + .get(targetBlockNumber) + .getContent() + .toString(); + } + + public boolean parseScript() throws IOException { + CharStream charStream = CharStreams.fromString(scriptCode); + JavaScriptLexer jsLexer = new JavaScriptLexer(charStream); + CommonTokenStream cts = new CommonTokenStream(jsLexer); + JavaScriptParser jsParser = new JavaScriptParser(cts); + jsParser.removeErrorListeners(); + jsParser.addErrorListener(ThrowOnSyntaxErrorListener.INSTANCE); + + try { + jsParser.program(); + } catch (Exception e) { + return false; + } + + return true; + } + + public boolean inExecutionContext(String target) throws IOException { + CharStream charStream = CharStreams.fromString(scriptCode); + JavaScriptLexer jsLexer = new JavaScriptLexer(charStream); + + Token token = jsLexer.nextToken(); + while (token.getType() != -1) { + if (token.getType() == JavaScriptLexer.Identifier && token.getText().equals(target)) { + return true; + } + token = jsLexer.nextToken(); + } + + return false; + } + + public Context getContext(String target) throws IOException { + CharStream charStream = CharStreams.fromString(scriptCode); + JavaScriptLexer jsLexer = new JavaScriptLexer(charStream); + + Token token = jsLexer.nextToken(); + while (token.getType() != -1) { + String tokenText = token.getText(); + if (tokenText.contains(target)) { + switch (tokenText.charAt(0)) { + case DOUBLE_QUOTE_CHARACTER: + return Context.DOUBLE_QUOTE; + case SINGLE_QUOTE_CHARACTER: + return Context.SINGLE_QUOTE; + case FORWARD_SLASH_CHARACTER: + return Context.SLASH_QUOTE; + default: + return Context.NO_QUOTE; + } + } + + token = jsLexer.nextToken(); + } + + return Context.NO_QUOTE; + } + + private static class ThrowOnSyntaxErrorListener extends BaseErrorListener { + + static final ThrowOnSyntaxErrorListener INSTANCE = new ThrowOnSyntaxErrorListener(); + + // Reuse the exception, used just for control flow. + private static final RuntimeException SYNTAX_EXCEPTION = + new IllegalArgumentException("Syntax Error"); + + private ThrowOnSyntaxErrorListener() {} + + @Override + public void syntaxError( + Recognizer recognizer, + Object offendingSymbol, + int line, + int charPositionInLine, + String msg, + RecognitionException e) { + throw SYNTAX_EXCEPTION; + } + } +} diff --git a/addOns/commonlib/src/main/java/org/zaproxy/addon/commonlib/parserapi/README.md b/addOns/commonlib/src/main/java/org/zaproxy/addon/commonlib/parserapi/README.md new file mode 100644 index 00000000000..1fe12cb17d2 --- /dev/null +++ b/addOns/commonlib/src/main/java/org/zaproxy/addon/commonlib/parserapi/README.md @@ -0,0 +1,11 @@ +## JavaScript Parser + +The JavaScript parser used in this add-on is generated using ANTLR with their [JavaScript grammar](https://github.com/antlr/grammars-v4/blob/14fc51dfd712a99663497035f1f63fa8eac1a225/javascript/javascript/). + +The following files were copied from the referenced repository: + - [`JavaScriptLexerBase.java`](src/main/java/org/zaproxy/addon/commonlib/parserapi/impl/JavaScriptLexerBase.java); + - [`JavaScriptParserBase.java`](src/main/java/org/zaproxy/addon/commonlib/parserapi/impl/JavaScriptParserBase.java); + - [`JavaScriptLexer.g4`](src/main/antlr/org/zaproxy/addon/commonlib/parserapi/impl/JavaScriptLexer.g4); + - [`JavaScriptParser.g4`](src/main/antlr/org/zaproxy/addon/commonlib/parserapi/impl/JavaScriptParser.g4). + +The parser is automatically generated when the code is compiled through the [`antlr` Gradle plugin](https://docs.gradle.org/current/userguide/antlr_plugin.html). diff --git a/addOns/commonlib/src/main/java/org/zaproxy/addon/commonlib/parserapi/impl/JavaScriptLexerBase.java b/addOns/commonlib/src/main/java/org/zaproxy/addon/commonlib/parserapi/impl/JavaScriptLexerBase.java new file mode 100644 index 00000000000..d8082152e91 --- /dev/null +++ b/addOns/commonlib/src/main/java/org/zaproxy/addon/commonlib/parserapi/impl/JavaScriptLexerBase.java @@ -0,0 +1,157 @@ +// Refer to README.md +package org.zaproxy.addon.commonlib.parserapi.impl; + +import java.util.ArrayDeque; +import java.util.Deque; +import org.antlr.v4.runtime.*; + +/** + * All lexer methods that used in grammar (IsStrictMode) should start with Upper Case Char similar + * to Lexer rules. + */ +public abstract class JavaScriptLexerBase extends Lexer { + /** + * Stores values of nested modes. By default mode is strict or defined externally + * (useStrictDefault) + */ + private final Deque scopeStrictModes = new ArrayDeque<>(); + + private Token lastToken = null; + + /** Default value of strict mode Can be defined externally by setUseStrictDefault */ + private boolean useStrictDefault = false; + + /** + * Current value of strict mode Can be defined during parsing, see StringFunctions.js and + * StringGlobal.js samples + */ + private boolean useStrictCurrent = false; + + /** Preserves depth due to braces including template literals. */ + private int currentDepth = 0; + + /** + * Preserves the starting depth of template literals to correctly handle braces inside template + * literals. + */ + private Deque templateDepthStack = new ArrayDeque(); + + public JavaScriptLexerBase(CharStream input) { + super(input); + } + + public boolean IsStartOfFile() { + return lastToken == null; + } + + public boolean getStrictDefault() { + return useStrictDefault; + } + + public void setUseStrictDefault(boolean value) { + useStrictDefault = value; + useStrictCurrent = value; + } + + public boolean IsStrictMode() { + return useStrictCurrent; + } + + public boolean IsInTemplateString() { + return !templateDepthStack.isEmpty() && templateDepthStack.peek() == currentDepth; + } + + /** + * Return the next token from the character stream and records this last token in case it + * resides on the default channel. This recorded token is used to determine when the lexer could + * possibly match a regex literal. Also changes scopeStrictModes stack if tokenize special + * string 'use strict'; + * + * @return the next token from the character stream. + */ + @Override + public Token nextToken() { + Token next = super.nextToken(); + + if (next.getChannel() == Token.DEFAULT_CHANNEL) { + // Keep track of the last token on the default channel. + this.lastToken = next; + } + + return next; + } + + protected void ProcessOpenBrace() { + currentDepth++; + useStrictCurrent = + scopeStrictModes.size() > 0 && scopeStrictModes.peek() ? true : useStrictDefault; + scopeStrictModes.push(useStrictCurrent); + } + + protected void ProcessCloseBrace() { + useStrictCurrent = scopeStrictModes.size() > 0 ? scopeStrictModes.pop() : useStrictDefault; + currentDepth--; + } + + protected void ProcessTemplateOpenBrace() { + currentDepth++; + this.templateDepthStack.push(currentDepth); + } + + protected void ProcessTemplateCloseBrace() { + this.templateDepthStack.pop(); + currentDepth--; + } + + protected void ProcessStringLiteral() { + if (lastToken == null || lastToken.getType() == JavaScriptLexer.OpenBrace) { + String text = getText(); + if (text.equals("\"use strict\"") || text.equals("'use strict'")) { + if (scopeStrictModes.size() > 0) scopeStrictModes.pop(); + useStrictCurrent = true; + scopeStrictModes.push(useStrictCurrent); + } + } + } + + /** Returns {@code true} if the lexer can match a regex literal. */ + protected boolean IsRegexPossible() { + + if (this.lastToken == null) { + // No token has been produced yet: at the start of the input, + // no division is possible, so a regex literal _is_ possible. + return true; + } + + switch (this.lastToken.getType()) { + case JavaScriptLexer.Identifier: + case JavaScriptLexer.NullLiteral: + case JavaScriptLexer.BooleanLiteral: + case JavaScriptLexer.This: + case JavaScriptLexer.CloseBracket: + case JavaScriptLexer.CloseParen: + case JavaScriptLexer.OctalIntegerLiteral: + case JavaScriptLexer.DecimalLiteral: + case JavaScriptLexer.HexIntegerLiteral: + case JavaScriptLexer.StringLiteral: + case JavaScriptLexer.PlusPlus: + case JavaScriptLexer.MinusMinus: + // After any of the tokens above, no regex literal can follow. + return false; + default: + // In all other cases, a regex literal _is_ possible. + return true; + } + } + + @Override + public void reset() { + this.scopeStrictModes.clear(); + this.lastToken = null; + this.useStrictDefault = false; + this.useStrictCurrent = false; + this.currentDepth = 0; + this.templateDepthStack = new ArrayDeque(); + super.reset(); + } +} diff --git a/addOns/commonlib/src/main/java/org/zaproxy/addon/commonlib/parserapi/impl/JavaScriptParserBase.java b/addOns/commonlib/src/main/java/org/zaproxy/addon/commonlib/parserapi/impl/JavaScriptParserBase.java new file mode 100644 index 00000000000..f71439087b7 --- /dev/null +++ b/addOns/commonlib/src/main/java/org/zaproxy/addon/commonlib/parserapi/impl/JavaScriptParserBase.java @@ -0,0 +1,91 @@ +// Refer to README.md +package org.zaproxy.addon.commonlib.parserapi.impl; + +import org.antlr.v4.runtime.*; + +/** + * All parser methods that used in grammar (p, prev, notLineTerminator, etc.) should start with + * lower case char similar to parser rules. + */ +public abstract class JavaScriptParserBase extends Parser { + public JavaScriptParserBase(TokenStream input) { + super(input); + } + + /** Short form for prev(String str) */ + protected boolean p(String str) { + return prev(str); + } + + /** Whether the previous token value equals to @param str */ + protected boolean prev(String str) { + return _input.LT(-1).getText().equals(str); + } + + /** Short form for next(String str) */ + protected boolean n(String str) { + return next(str); + } + + /** Whether the next token value equals to @param str */ + protected boolean next(String str) { + return _input.LT(1).getText().equals(str); + } + + protected boolean notLineTerminator() { + return !lineTerminatorAhead(); + } + + protected boolean notOpenBraceAndNotFunction() { + int nextTokenType = _input.LT(1).getType(); + return nextTokenType != JavaScriptParser.OpenBrace + && nextTokenType != JavaScriptParser.Function_; + } + + protected boolean closeBrace() { + return _input.LT(1).getType() == JavaScriptParser.CloseBrace; + } + + /** + * Returns {@code true} iff on the current index of the parser's token stream a token exists on + * the {@code HIDDEN} channel which either is a line terminator, or is a multi line comment that + * contains a line terminator. + * + * @return {@code true} iff on the current index of the parser's token stream a token exists on + * the {@code HIDDEN} channel which either is a line terminator, or is a multi line comment + * that contains a line terminator. + */ + protected boolean lineTerminatorAhead() { + + // Get the token ahead of the current index. + int possibleIndexEosToken = this.getCurrentToken().getTokenIndex() - 1; + if (possibleIndexEosToken < 0) return false; + Token ahead = _input.get(possibleIndexEosToken); + + if (ahead.getChannel() != Lexer.HIDDEN) { + // We're only interested in tokens on the HIDDEN channel. + return false; + } + + if (ahead.getType() == JavaScriptParser.LineTerminator) { + // There is definitely a line terminator ahead. + return true; + } + + if (ahead.getType() == JavaScriptParser.WhiteSpaces) { + // Get the token ahead of the current whitespaces. + possibleIndexEosToken = this.getCurrentToken().getTokenIndex() - 2; + if (possibleIndexEosToken < 0) return false; + ahead = _input.get(possibleIndexEosToken); + } + + // Get the token's text and type. + String text = ahead.getText(); + int type = ahead.getType(); + + // Check if the token is, or contains a line terminator. + return (type == JavaScriptParser.MultiLineComment + && (text.contains("\r") || text.contains("\n"))) + || (type == JavaScriptParser.LineTerminator); + } +}