diff --git a/typo3/sysext/core/Classes/Html/HtmlParser.php b/typo3/sysext/core/Classes/Html/HtmlParser.php index 15aab06f85909b1d1c36ece6eb83a2f5958ff89c..8053d1da8a175dc707f10b824aa400accccb5440 100644 --- a/typo3/sysext/core/Classes/Html/HtmlParser.php +++ b/typo3/sysext/core/Classes/Html/HtmlParser.php @@ -986,19 +986,24 @@ class HtmlParser * @param string $tagList The comma separated list of tags to be stripped. * If empty, all empty tags will be stripped * @param bool $treatNonBreakingSpaceAsEmpty If TRUE tags containing only entities will be treated as empty. + * @param bool $keepTags If true, the provided tags will be kept instead of stripped. * @return string the stripped content */ - public function stripEmptyTags($content, $tagList = null, $treatNonBreakingSpaceAsEmpty = false) + public function stripEmptyTags($content, $tagList = '', $treatNonBreakingSpaceAsEmpty = false, $keepTags = false) { - $tagRegEx = '[^ >]+'; // all characters until you reach a > or space; - if ($tagList) { - $tags = preg_split('/,/', $tagList); - $tagRegEx = preg_replace('/ */', '', join('|', $tags)); + if (!empty($tagList)) { + $tagRegEx = join('|', GeneralUtility::trimExplode(',', $tagList, true)); + if ($keepTags) { + $tagRegEx = '(?!' . $tagRegEx . ')[^ >]+'; + } + } else { + $tagRegEx = '[^ >]+'; // all characters until you reach a > or space; } $count = 1; $nbspRegex = $treatNonBreakingSpaceAsEmpty ? '|( )' : ''; - while ($count != 0) { - $content = preg_replace(sprintf('/<(%s)[^>]*>( %s)*<\/\\1[^>]*>/i', $tagRegEx, $nbspRegex), '', $content, -1, $count); + $finalRegex = sprintf('/<(%s)[^>]*>( %s)*<\/\\1[^>]*>/i', $tagRegEx, $nbspRegex); + while ($count !== 0) { + $content = preg_replace($finalRegex, '', $content, -1, $count); } return $content; } @@ -1012,26 +1017,21 @@ class HtmlParser */ protected function stripEmptyTagsIfConfigured($value, $configuration) { - if (isset($configuration['stripEmptyTags']) && $configuration['stripEmptyTags']) { - $tags = null; - if ( - isset($configuration['stripEmptyTags.']['tags']) - && $configuration['stripEmptyTags.']['tags'] !== '' - ) { - $tags = $configuration['stripEmptyTags.']['tags']; - } - - $treatNonBreakingSpaceAsEmpty = false; - if ( - isset($configuration['stripEmptyTags.']['treatNonBreakingSpaceAsEmpty']) - && $configuration['stripEmptyTags.']['treatNonBreakingSpaceAsEmpty'] - ) { - $treatNonBreakingSpaceAsEmpty = (bool)$configuration['stripEmptyTags.']['treatNonBreakingSpaceAsEmpty']; - } + if (empty($configuration['stripEmptyTags'])) { + return $value; + } - $value = $this->stripEmptyTags($value, $tags, $treatNonBreakingSpaceAsEmpty); + $tags = null; + $keepTags = false; + if (!empty($configuration['stripEmptyTags.']['keepTags'])) { + $tags = $configuration['stripEmptyTags.']['keepTags']; + $keepTags = true; + } elseif (!empty($configuration['stripEmptyTags.']['tags'])) { + $tags = $configuration['stripEmptyTags.']['tags']; } - return $value; + $treatNonBreakingSpaceAsEmpty = !empty($configuration['stripEmptyTags.']['treatNonBreakingSpaceAsEmpty']); + + return $this->stripEmptyTags($value, $tags, $treatNonBreakingSpaceAsEmpty, $keepTags); } } diff --git a/typo3/sysext/core/Documentation/Changelog/master/Feature-72045-KeepTagsInHtmlParserWhenStrippingEmptyTags.rst b/typo3/sysext/core/Documentation/Changelog/master/Feature-72045-KeepTagsInHtmlParserWhenStrippingEmptyTags.rst new file mode 100644 index 0000000000000000000000000000000000000000..77571fb69fc0703f9f9978e8a7ea6f0b0a3130b0 --- /dev/null +++ b/typo3/sysext/core/Documentation/Changelog/master/Feature-72045-KeepTagsInHtmlParserWhenStrippingEmptyTags.rst @@ -0,0 +1,28 @@ +==================================================== +Feature: #72045 - HTMLparser.stripEmptyTags.keepTags +==================================================== + +Description +=========== + +A new option for the HTMLparser.stripEmptyTags configuration is added. +It allows keeping configured tags. Before this change only a list of tags +could be provided that should be removed. + +The following example will strip all empty tags **except** ``tr`` and ``td`` tags. + +:: + + HTMLparser.stripEmptyTags = 1 + HTMLparser.stripEmptyTags.keepTags = tr,td + + +**Important!** If this setting is used the stripEmptyTags.tags configuration will +have no effect any more. You can only use one option at a time. + + +Impact +====== + +Unless the configuration of the HTMLparser is changed, the stripEmptyTags +feature will work as before. \ No newline at end of file diff --git a/typo3/sysext/core/Tests/Unit/Html/HtmlParserTest.php b/typo3/sysext/core/Tests/Unit/Html/HtmlParserTest.php index eaa7eb7f188dd1d3bdd1a9527a6bf04acfbe484d..01711fc020aa0a1fd6558e3fa2e48bdca8f3e3c4 100644 --- a/typo3/sysext/core/Tests/Unit/Html/HtmlParserTest.php +++ b/typo3/sysext/core/Tests/Unit/Html/HtmlParserTest.php @@ -294,23 +294,38 @@ class HtmlParserTest extends \TYPO3\CMS\Core\Tests\UnitTestCase public function emptyTagsDataProvider() { return array( - array(0 , null, false, '<h1></h1>', '<h1></h1>'), - array(1 , null, false, '<h1></h1>', ''), - array(1 , null, false, '<h1>hallo</h1>', '<h1>hallo</h1>'), - array(1 , null, false, '<h1 class="something"></h1>', ''), - array(1 , null, false, '<h1 class="something"></h1><h2></h2>', ''), - array(1 , 'h2', false, '<h1 class="something"></h1><h2></h2>', '<h1 class="something"></h1>'), - array(1 , 'h2, h1', false, '<h1 class="something"></h1><h2></h2>', ''), - array(1 , null, false, '<div><p></p></div>', ''), - array(1 , null, false, '<div><p> </p></div>', '<div><p> </p></div>'), - array(1 , null, true, '<div><p> </p></div>', ''), - array(1 , null, true, '<div> <p></p></div>', ''), - array(1 , null, false, '<div>Some content<p></p></div>', '<div>Some content</div>'), - array(1 , null, true, '<div>Some content<p></p></div>', '<div>Some content</div>'), - array(1 , null, false, '<div>Some content</div>', '<div>Some content</div>'), - array(1 , null, true, '<div>Some content</div>', '<div>Some content</div>'), - array(1 , null, false, '<a href="#skiplinks">Skiplinks </a><b></b>', '<a href="#skiplinks">Skiplinks </a>'), - array(1 , null, true, '<a href="#skiplinks">Skiplinks </a><b></b>', '<a href="#skiplinks">Skiplinks </a>'), + array(0, null, false, '<h1></h1>', '<h1></h1>'), + array(1, null, false, '<h1></h1>', ''), + array(1, null, false, '<h1>hallo</h1>', '<h1>hallo</h1>'), + array(1, null, false, '<h1 class="something"></h1>', ''), + array(1, null, false, '<h1 class="something"></h1><h2></h2>', ''), + array(1, 'h2', false, '<h1 class="something"></h1><h2></h2>', '<h1 class="something"></h1>'), + array(1, 'h2, h1', false, '<h1 class="something"></h1><h2></h2>', ''), + array(1, null, false, '<div><p></p></div>', ''), + array(1, null, false, '<div><p> </p></div>', '<div><p> </p></div>'), + array(1, null, true, '<div><p> </p></div>', ''), + array(1, null, true, '<div> <p></p></div>', ''), + array(1, null, false, '<div>Some content<p></p></div>', '<div>Some content</div>'), + array(1, null, true, '<div>Some content<p></p></div>', '<div>Some content</div>'), + array(1, null, false, '<div>Some content</div>', '<div>Some content</div>'), + array(1, null, true, '<div>Some content</div>', '<div>Some content</div>'), + array(1, null, false, '<a href="#skiplinks">Skiplinks </a><b></b>', '<a href="#skiplinks">Skiplinks </a>'), + array(1, null, true, '<a href="#skiplinks">Skiplinks </a><b></b>', '<a href="#skiplinks">Skiplinks </a>'), + array(0, '', false, '<h1></h1>', '<h1></h1>'), + array(1, '', false, '<h1></h1>', ''), + array(1, '', false, '<h1>hallo</h1>', '<h1>hallo</h1>'), + array(1, '', false, '<h1 class="something"></h1>', ''), + array(1, '', false, '<h1 class="something"></h1><h2></h2>', ''), + array(1, '', false, '<div><p></p></div>', ''), + array(1, '', false, '<div><p> </p></div>', '<div><p> </p></div>'), + array(1, '', true, '<div><p> </p></div>', ''), + array(1, '', true, '<div> <p></p></div>', ''), + array(1, '', false, '<div>Some content<p></p></div>', '<div>Some content</div>'), + array(1, '', true, '<div>Some content<p></p></div>', '<div>Some content</div>'), + array(1, '', false, '<div>Some content</div>', '<div>Some content</div>'), + array(1, '', true, '<div>Some content</div>', '<div>Some content</div>'), + array(1, '', false, '<a href="#skiplinks">Skiplinks </a><b></b>', '<a href="#skiplinks">Skiplinks </a>'), + array(1, '', true, '<a href="#skiplinks">Skiplinks </a><b></b>', '<a href="#skiplinks">Skiplinks </a>'), ); } @@ -338,6 +353,48 @@ class HtmlParserTest extends \TYPO3\CMS\Core\Tests\UnitTestCase $this->assertEquals($expectedResult, $result); } + /** + * @return array + */ + public function stripEmptyTagsKeepsConfiguredTagsDataProvider() { + return [ + array( + 'tr,td', + false, + '<div><p><tr><td></td></tr></p></div><div class="test"></div><tr></tr><p></p><td></td><i></i>', + '<div><p><tr><td></td></tr></p></div><tr></tr><td></td>' + ), + array( + 'tr,td', + true, + '<div><p><tr><td></td></tr></p></div><p class="test"> </p><tr></tr><p></p><td></td><i></i>', + '<div><p><tr><td></td></tr></p></div><tr></tr><td></td>' + ), + ]; + } + + /** + * @test + * @dataProvider stripEmptyTagsKeepsConfiguredTagsDataProvider + * @param string $tagList List of tags that should be kept, event if they are empty. + * @param bool $treatNonBreakingSpaceAsEmpty If true will be considered empty. + * @param string $content The HTML content that should be parsed. + * @param string $expectedResult The expected HTML code result. + */ + public function stripEmptyTagsKeepsConfiguredTags($tagList, $treatNonBreakingSpaceAsEmpty, $content, $expectedResult) { + $tsConfig = array( + 'keepNonMatchedTags' => 1, + 'stripEmptyTags' => 1, + 'stripEmptyTags.' => array( + 'keepTags' => $tagList, + 'treatNonBreakingSpaceAsEmpty' => $treatNonBreakingSpaceAsEmpty + ), + ); + + $result = $this->parseConfigAndCleanHtml($tsConfig, $content); + $this->assertEquals($expectedResult, $result); + } + /** * Calls HTMLparserConfig() and passes the generated config to the HTMLcleaner() method on the current subject. *