From 03f531061733702aaf423e973976b898ca33173d Mon Sep 17 00:00:00 2001 From: Joschi Kuphal <joschi@tollwerk.de> Date: Tue, 5 May 2020 15:28:03 +0200 Subject: [PATCH] [BUGFIX] Correctly consider nested tags in frontend HTML parser Nested tags starting with the same literals, like `<s><span>...` or `<a ...><abbr>...` are not correctly nested due to a flaw in identifying proper start and end of HTML tags. Resolves: #91194 Releases: master Change-Id: I2029c8e01d66e5286790fd04a259153cd130c753 Reviewed-on: https://review.typo3.org/c/Packages/TYPO3.CMS/+/64409 Tested-by: TYPO3com <noreply@typo3.com> Tested-by: Oliver Hader <oliver.hader@typo3.org> Tested-by: Markus Klein <markus.klein@typo3.org> Tested-by: Susanne Moog <look@susi.dev> Reviewed-by: Markus Klein <markus.klein@typo3.org> Reviewed-by: Helmut Hummel <typo3@helhum.io> Reviewed-by: Daniel Goerz <daniel.goerz@posteo.de> Reviewed-by: Oliver Hader <oliver.hader@typo3.org> Reviewed-by: Susanne Moog <look@susi.dev> --- .../ContentObject/ContentObjectRenderer.php | 11 +- .../ContentObjectRendererTest.php | 266 +++++++----------- 2 files changed, 116 insertions(+), 161 deletions(-) diff --git a/typo3/sysext/frontend/Classes/ContentObject/ContentObjectRenderer.php b/typo3/sysext/frontend/Classes/ContentObject/ContentObjectRenderer.php index b1a7c11b1ef3..96e756eb25b9 100644 --- a/typo3/sysext/frontend/Classes/ContentObject/ContentObjectRenderer.php +++ b/typo3/sysext/frontend/Classes/ContentObject/ContentObjectRenderer.php @@ -3932,7 +3932,8 @@ class ContentObjectRenderer implements LoggerAwareInterface // call this method recursively if found if (strpos($data, '<') !== false && isset($conf['tags.']) && is_array($conf['tags.'])) { foreach ($conf['tags.'] as $tag => $tagConfig) { - if (strpos($data, '<' . $tag) !== false) { + // only match tag `a` in `<a href"...">` but not in `<abbr>` + if (preg_match('#<' . $tag . '[\s/>]#', $data)) { $data = $this->_parseFunc($data, $conf); break; } @@ -7113,7 +7114,13 @@ class ContentObjectRenderer implements LoggerAwareInterface // Take care for nested tags do { $nextMatchingEndTagPosition = strpos($tempContent, $endTag); - $nextSameTypeTagPosition = strpos($tempContent, $startTag); + // only match tag `a` in `<a href"...">` but not in `<abbr>` + $nextSameTypeTagPosition = preg_match( + '#' . $startTag . '[\s/>]#', + $tempContent, + $nextSameStartTagMatches, + PREG_OFFSET_CAPTURE + ) ? $nextSameStartTagMatches[0][1] : false; // filter out nested tag contents to help getting the correct closing tag if ($nextSameTypeTagPosition !== false && $nextSameTypeTagPosition < $nextMatchingEndTagPosition) { diff --git a/typo3/sysext/frontend/Tests/Unit/ContentObject/ContentObjectRendererTest.php b/typo3/sysext/frontend/Tests/Unit/ContentObject/ContentObjectRendererTest.php index b78fab81b344..55f8240ad751 100644 --- a/typo3/sysext/frontend/Tests/Unit/ContentObject/ContentObjectRendererTest.php +++ b/typo3/sysext/frontend/Tests/Unit/ContentObject/ContentObjectRendererTest.php @@ -2295,83 +2295,97 @@ class ContentObjectRendererTest extends UnitTestCase ]; } + /** + * @test + * @dataProvider _parseFuncReturnsCorrectHtmlDataProvider + * @param string $value + * @param array $configuration + * @param string $expectedResult + */ + public function stdWrap_parseFuncReturnsParsedHtml($value, $configuration, $expectedResult): void + { + self::assertEquals($expectedResult, $this->subject->stdWrap_parseFunc($value, $configuration)); + } + /** * Data provider for the parseFuncParsesNestedTagsProperly test * - * @return \Generator multi-dimensional array with test data + * @return array multi-dimensional array with test data * @see parseFuncParsesNestedTagsProperly */ - public function _parseFuncParsesNestedTagsProperlyDataProvider(): \Generator + public function _parseFuncParsesNestedTagsProperlyDataProvider(): array { - yield 'list with empty and filled li' => [ - '<ul> - <li></li> - <li>second</li> -</ul>', - [ - 'parseFunc' => '', - 'parseFunc.' => [ - 'tags.' => [ - 'li' => 'TEXT', - 'li.' => [ - 'wrap' => '<li>LI:|</li>', - 'current' => '1' - ] + $defaultListItemParseFunc = [ + 'parseFunc' => '', + 'parseFunc.' => [ + 'tags.' => [ + 'li' => 'TEXT', + 'li.' => [ + 'wrap' => '<li>LI:|</li>', + 'current' => '1' ] ] + ] + ]; + + return [ + 'parent & child tags with same beginning are processed' => [ + '<div><any data-skip><anyother data-skip>content</anyother></any>', + [ + 'parseFunc' => '', + 'parseFunc.' => [ + 'tags.' => [ + 'any' => 'TEXT', + 'any.' => [ + 'wrap' => '<any data-processed>|</any>', + 'current' => 1, + ], + 'anyother' => 'TEXT', + 'anyother.' => [ + 'wrap' => '<anyother data-processed>|</anyother>', + 'current' => 1, + ], + ], + ], + ], + '<div><any data-processed><anyother data-processed>content</anyother></any>', ], - '<ul> + 'list with empty and filled li' => [ + '<ul> + <li></li> + <li>second</li> +</ul>', + $defaultListItemParseFunc, + '<ul> <li>LI:</li> <li>LI:second</li> </ul>', - ]; - yield 'list with filled li wrapped by a div containing text' => [ - '<div>text<ul><li></li><li>second</li></ul></div>', - [ - 'parseFunc' => '', - 'parseFunc.' => [ - 'tags.' => [ - 'li' => 'TEXT', - 'li.' => [ - 'wrap' => '<li>LI:|</li>', - 'current' => '1' - ] - ] - ] ], - '<div>text<ul><li>LI:</li><li>LI:second</li></ul></div>', - ]; - yield 'link list with empty li modification' => [ - '<ul> + 'list with filled li wrapped by a div containing text' => [ + '<div>text<ul><li></li><li>second</li></ul></div>', + $defaultListItemParseFunc, + '<div>text<ul><li>LI:</li><li>LI:second</li></ul></div>', + ], + 'link list with empty li modification' => [ + '<ul> <li> <ul> <li></li> </ul> </li> </ul>', - [ - 'parseFunc' => '', - 'parseFunc.' => [ - 'tags.' => [ - 'li' => 'TEXT', - 'li.' => [ - 'wrap' => '<li>LI:|</li>', - 'current' => '1' - ] - ] - ] - ], - '<ul> + $defaultListItemParseFunc, + '<ul> <li>LI: <ul> <li>LI:</li> </ul> </li> </ul>', - ]; + ], - yield 'link list with li modifications' => [ - '<ul> + 'link list with li modifications' => [ + '<ul> <li>first</li> <li>second <ul> @@ -2380,19 +2394,8 @@ class ContentObjectRendererTest extends UnitTestCase </ul> </li> </ul>', - [ - 'parseFunc' => '', - 'parseFunc.' => [ - 'tags.' => [ - 'li' => 'TEXT', - 'li.' => [ - 'wrap' => '<li>LI:|</li>', - 'current' => '1' - ] - ] - ] - ], - '<ul> + $defaultListItemParseFunc, + '<ul> <li>LI:first</li> <li>LI:second <ul> @@ -2401,9 +2404,9 @@ class ContentObjectRendererTest extends UnitTestCase </ul> </li> </ul>' - ]; - yield 'link list with li modifications and no text' => [ - '<ul> + ], + 'link list with li modifications and no text' => [ + '<ul> <li>first</li> <li> <ul> @@ -2412,19 +2415,8 @@ class ContentObjectRendererTest extends UnitTestCase </ul> </li> </ul>', - [ - 'parseFunc' => '', - 'parseFunc.' => [ - 'tags.' => [ - 'li' => 'TEXT', - 'li.' => [ - 'wrap' => '<li>LI:|</li>', - 'current' => '1' - ] - ] - ] - ], - '<ul> + $defaultListItemParseFunc, + '<ul> <li>LI:first</li> <li>LI: <ul> @@ -2433,9 +2425,9 @@ class ContentObjectRendererTest extends UnitTestCase </ul> </li> </ul>', - ]; - yield 'link list with li modifications on third level' => [ - '<ul> + ], + 'link list with li modifications on third level' => [ + '<ul> <li>first</li> <li>second <ul> @@ -2449,19 +2441,8 @@ class ContentObjectRendererTest extends UnitTestCase </ul> </li> </ul>', - [ - 'parseFunc' => '', - 'parseFunc.' => [ - 'tags.' => [ - 'li' => 'TEXT', - 'li.' => [ - 'wrap' => '<li>LI:|</li>', - 'current' => '1' - ] - ] - ] - ], - '<ul> + $defaultListItemParseFunc, + '<ul> <li>LI:first</li> <li>LI:second <ul> @@ -2475,9 +2456,9 @@ class ContentObjectRendererTest extends UnitTestCase </ul> </li> </ul>', - ]; - yield 'link list with li modifications on third level no text' => [ - '<ul> + ], + 'link list with li modifications on third level no text' => [ + '<ul> <li>first</li> <li> <ul> @@ -2491,19 +2472,8 @@ class ContentObjectRendererTest extends UnitTestCase </ul> </li> </ul>', - [ - 'parseFunc' => '', - 'parseFunc.' => [ - 'tags.' => [ - 'li' => 'TEXT', - 'li.' => [ - 'wrap' => '<li>LI:|</li>', - 'current' => '1' - ] - ] - ] - ], - '<ul> + $defaultListItemParseFunc, + '<ul> <li>LI:first</li> <li>LI: <ul> @@ -2517,9 +2487,9 @@ class ContentObjectRendererTest extends UnitTestCase </ul> </li> </ul>', - ]; - yield 'link list with ul and li modifications' => [ - '<ul> + ], + 'link list with ul and li modifications' => [ + '<ul> <li>first</li> <li>second <ul> @@ -2528,24 +2498,24 @@ class ContentObjectRendererTest extends UnitTestCase </ul> </li> </ul>', - [ - 'parseFunc' => '', - 'parseFunc.' => [ - 'tags.' => [ - 'ul' => 'TEXT', - 'ul.' => [ - 'wrap' => '<ul><li>intro</li>|<li>outro</li></ul>', - 'current' => '1' - ], - 'li' => 'TEXT', - 'li.' => [ - 'wrap' => '<li>LI:|</li>', - 'current' => '1' + [ + 'parseFunc' => '', + 'parseFunc.' => [ + 'tags.' => [ + 'ul' => 'TEXT', + 'ul.' => [ + 'wrap' => '<ul><li>intro</li>|<li>outro</li></ul>', + 'current' => '1' + ], + 'li' => 'TEXT', + 'li.' => [ + 'wrap' => '<li>LI:|</li>', + 'current' => '1' + ] ] ] - ] - ], - '<ul><li>intro</li> + ], + '<ul><li>intro</li> <li>LI:first</li> <li>LI:second <ul><li>intro</li> @@ -2554,10 +2524,10 @@ class ContentObjectRendererTest extends UnitTestCase <li>outro</li></ul> </li> <li>outro</li></ul>', - ]; + ], - yield 'link list with li containing p tag and sub list' => [ - '<ul> + 'link list with li containing p tag and sub list' => [ + '<ul> <li>first</li> <li> <ul> @@ -2573,19 +2543,8 @@ class ContentObjectRendererTest extends UnitTestCase </ul> </li> </ul>', - [ - 'parseFunc' => '', - 'parseFunc.' => [ - 'tags.' => [ - 'li' => 'TEXT', - 'li.' => [ - 'wrap' => '<li>LI:|</li>', - 'current' => '1' - ] - ] - ] - ], - '<ul> + $defaultListItemParseFunc, + '<ul> <li>LI:first</li> <li>LI: <ul> @@ -2601,21 +2560,10 @@ class ContentObjectRendererTest extends UnitTestCase </ul> </li> </ul>', + ] ]; } - /** - * @test - * @dataProvider _parseFuncReturnsCorrectHtmlDataProvider - * @param string $value - * @param array $configuration - * @param string $expectedResult - */ - public function stdWrap_parseFuncReturnsParsedHtml($value, $configuration, $expectedResult): void - { - self::assertEquals($expectedResult, $this->subject->stdWrap_parseFunc($value, $configuration)); - } - /** * @test * @dataProvider _parseFuncParsesNestedTagsProperlyDataProvider -- GitLab