From 4cc1d9edcf4d6791cfeb223ba07da531a87fdac7 Mon Sep 17 00:00:00 2001 From: Tomasz Kowalczyk Date: Sun, 25 Feb 2018 03:23:50 +0100 Subject: [PATCH 1/2] parameter and bbcode simple values now allow non-conflicting syntax tokens inside --- src/Parser/RegularParser.php | 35 +++++++++++++++++++++++++---------- tests/ParserTest.php | 6 ++++++ 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/src/Parser/RegularParser.php b/src/Parser/RegularParser.php index d46a36a..71f6111 100644 --- a/src/Parser/RegularParser.php +++ b/src/Parser/RegularParser.php @@ -238,10 +238,9 @@ private function value() return $this->match(self::TOKEN_DELIMITER, false) ? $value : false; } - if('' !== $tmp = $this->match(self::TOKEN_STRING, false)) { - $value .= $tmp; - while('' !== $tmp = $this->match(self::TOKEN_STRING, false)) { - $value .= $tmp; + if($this->lookahead(self::TOKEN_STRING) || $this->lookahead(self::TOKEN_MARKER)) { + while(false === ($this->lookahead(self::TOKEN_WS) || $this->lookahead(self::TOKEN_CLOSE) || $this->lookaheadN(array(self::TOKEN_MARKER, self::TOKEN_CLOSE)))) { + $value .= $this->match(null, false); } return $value; @@ -302,12 +301,28 @@ private function lookahead($type) return $this->position < $this->tokensCount && $this->tokens[$this->position][0] === $type; } - /** - * @param int|null $type - * @param bool $ws - * - * @return string - */ + private function lookaheadN(array $types) + { + $count = count($types); + if($this->position + $count > $this->tokensCount) { + return false; + } + + $position = $this->position; + foreach($types as $type) { + // note: automatically skips whitespace tokens + if($this->tokens[$position][0] === self::TOKEN_WS) { + $position++; + } + if($type !== $this->tokens[$position][0]) { + return false; + } + $position++; + } + + return true; + } + private function match($type, $ws) { if($this->position >= $this->tokensCount) { diff --git a/tests/ParserTest.php b/tests/ParserTest.php index a4b5df9..6a04f60 100644 --- a/tests/ParserTest.php +++ b/tests/ParserTest.php @@ -232,6 +232,12 @@ public function provideShortcodes() array($s, '[a=0 b=0]0[/a]', array( new ParsedShortcode(new Shortcode('a', array('b' => '0'), '0', '0'), '[a=0 b=0]0[/a]', 0), )), + array($s, '[x=/[/] [y a=/"//] [z=http://url/] [a=http://url ]', array( + new ParsedShortcode(new Shortcode('x', array(), null, '/['), '[x=/[/]', 0), + new ParsedShortcode(new Shortcode('y', array('a' => '/"/'), null, null), '[y a=/"//]', 8), + new ParsedShortcode(new Shortcode('z', array(), null, 'http://url'), '[z=http://url/]', 19), + new ParsedShortcode(new Shortcode('a', array(), null, 'http://url'), '[a=http://url ]', 35), + )), ); /** From 63cafc5c3b60d6c916dbd899f8259bcd17f3b3cf Mon Sep 17 00:00:00 2001 From: Tomasz Kowalczyk Date: Mon, 10 Dec 2018 00:54:33 +0100 Subject: [PATCH 2/2] WIP standard / aggressive value matching modes --- Makefile | 2 +- src/Parser/RegularParser.php | 28 +++++++++++++++++++++++++++- tests/ParserTest.php | 27 +++++++++++++++++++++++++-- 3 files changed, 53 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 6b23fcd..017c195 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ composer-update: test: docker-compose run --rm php-${PHP} php -v - docker-compose run --rm php-${PHP} php /app/vendor/bin/phpunit -c /app/phpunit.xml.dist + docker-compose run --rm php-${PHP} php /app/vendor/bin/phpunit -c /app/phpunit.xml.dist --filter "${TEST}" test-local: php -v php vendor/bin/phpunit diff --git a/src/Parser/RegularParser.php b/src/Parser/RegularParser.php index 71f6111..fb0b853 100644 --- a/src/Parser/RegularParser.php +++ b/src/Parser/RegularParser.php @@ -35,6 +35,12 @@ final class RegularParser implements ParserInterface const TOKEN_STRING = 6; const TOKEN_WS = 7; + const VALUE_REGULAR = 0x01; + const VALUE_AGGRESSIVE = 0x02; + + /** @var int */ + public $valueMode = self::VALUE_REGULAR; + public function __construct(SyntaxInterface $syntax = null) { $this->lexerRegex = $this->prepareLexer($syntax ?: new CommonSyntax()); @@ -239,7 +245,16 @@ private function value() } if($this->lookahead(self::TOKEN_STRING) || $this->lookahead(self::TOKEN_MARKER)) { - while(false === ($this->lookahead(self::TOKEN_WS) || $this->lookahead(self::TOKEN_CLOSE) || $this->lookaheadN(array(self::TOKEN_MARKER, self::TOKEN_CLOSE)))) { + while(true) { + if($this->lookahead(self::TOKEN_WS) || $this->lookahead(self::TOKEN_CLOSE)) { + break; + } + if($this->lookaheadN(array(self::TOKEN_MARKER, self::TOKEN_CLOSE))) { + if($this->valueMode === self::VALUE_AGGRESSIVE) { + $value .= $this->match(null, false); + } + break; + } $value .= $this->match(null, false); } @@ -301,6 +316,11 @@ private function lookahead($type) return $this->position < $this->tokensCount && $this->tokens[$this->position][0] === $type; } + /** + * @param int[] $types + * + * @return bool + */ private function lookaheadN(array $types) { $count = count($types); @@ -323,6 +343,12 @@ private function lookaheadN(array $types) return true; } + /** + * @param int|null $type + * @param bool $ws + * + * @return string + */ private function match($type, $ws) { if($this->position >= $this->tokensCount) { diff --git a/tests/ParserTest.php b/tests/ParserTest.php index 6a04f60..d9840fe 100644 --- a/tests/ParserTest.php +++ b/tests/ParserTest.php @@ -252,7 +252,7 @@ public function provideShortcodes() * * Tests cases from array above with identifiers in the array below must be skipped. */ - $wordpressSkip = array(3, 6, 16, 21, 22, 23, 25, 32, 33, 34, 46, 47, 49, 51); + $wordpressSkip = array(3, 6, 16, 21, 22, 23, 25, 32, 33, 34, 46, 47, 49, 51, 52); $result = array(); foreach($tests as $key => $test) { $syntax = array_shift($test); @@ -276,12 +276,35 @@ public function testIssue77() new ParsedShortcode(new Shortcode('x', array(), '', null), '[x][/x]', 3), new ParsedShortcode(new Shortcode('y', array(), 'x', null), '[y]x[/y]', 22), )); - $this->assertShortcodes($parser->parse('[a k="v][x][/x]'), array( new ParsedShortcode(new Shortcode('x', array(), '', null), '[x][/x]', 8), )); } + public function testValueModeAggressive() + { + $parser = new RegularParser(new CommonSyntax()); + $parser->valueMode = RegularParser::VALUE_AGGRESSIVE; + $parsed = $parser->parse('[x=/[/] [y a=/"//] [z=http://url/] [a=http://url ]'); + $tested = array( + new ParsedShortcode(new Shortcode('x', array(), null, '/[/'), '[x=/[/]', 0), + new ParsedShortcode(new Shortcode('y', array('a' => '/"//'), null, null), '[y a=/"//]', 8), + new ParsedShortcode(new Shortcode('z', array(), null, 'http://url/'), '[z=http://url/]', 19), + new ParsedShortcode(new Shortcode('a', array(), null, 'http://url'), '[a=http://url ]', 35), + ); + + $count = count($tested); + static::assertCount($count, $parsed, 'counts'); + for ($i = 0; $i < $count; $i++) { + static::assertSame($tested[$i]->getName(), $parsed[$i]->getName(), 'name'); + static::assertSame($tested[$i]->getParameters(), $parsed[$i]->getParameters(), 'parameters'); + static::assertSame($tested[$i]->getContent(), $parsed[$i]->getContent(), 'content'); + static::assertSame($tested[$i]->getText(), $parsed[$i]->getText(), 'text'); + static::assertSame($tested[$i]->getOffset(), $parsed[$i]->getOffset(), 'offset'); + static::assertSame($tested[$i]->getBbCode(), $parsed[$i]->getBbCode(), 'bbCode'); + } + } + public function testWordPress() { $parser = new WordpressParser();