diff --git a/typo3/sysext/indexed_search/Documentation/TechnicalDetails/HtmlContent/Index.rst b/typo3/sysext/indexed_search/Documentation/TechnicalDetails/HtmlContent/Index.rst index 4658a2058b647094cd05eee4a093f97371bf7333..517e570bc62c0a72cc45a283182c8462905416a2 100644 --- a/typo3/sysext/indexed_search/Documentation/TechnicalDetails/HtmlContent/Index.rst +++ b/typo3/sysext/indexed_search/Documentation/TechnicalDetails/HtmlContent/Index.rst @@ -40,3 +40,5 @@ Rules: until that point is excluded and preceding content until next "end" marker is included. +#. If there are multiple marker pairs in HTML, content from in between + all pairs is included. \ No newline at end of file diff --git a/typo3/sysext/indexed_search/Tests/Unit/IndexerTest.php b/typo3/sysext/indexed_search/Tests/Unit/IndexerTest.php index 6c6cc404a258422339f9f371c49322c5b41aeb07..5bcad4e15a02f2cc3c5429d9f1a99f6403ca2381 100644 --- a/typo3/sysext/indexed_search/Tests/Unit/IndexerTest.php +++ b/typo3/sysext/indexed_search/Tests/Unit/IndexerTest.php @@ -120,4 +120,102 @@ class IndexerTest extends \TYPO3\CMS\Core\Tests\UnitTestCase $result = $this->subject->extractBaseHref($html); $this->assertEquals($baseHref, $result); } + + /** + * Tests whether indexer can extract content between "TYPO3SEARCH_begin" and "TYPO3SEARCH_end" markers + * + * @test + */ + public function typoSearchTagsRemovesBodyContentOutsideMarkers() + { + $body = <<<EOT +<html> +<head> +<meta http-equiv="Content-Type" content="text/html;charset=utf-8"/> +<title>Some Title</title> +<link href='css/normalize.css' rel='stylesheet' type='text/css'/> +</head> +<body> +<div> +<div class="non_searchable"> + not searchable content +</div> +<!--TYPO3SEARCH_begin--> +<div class="searchable"> + lorem ipsum +</div> +<!--TYPO3SEARCH_end--> +<div class="non_searchable"> + not searchable content +</div> +</body> +</html> +EOT; + $expected = <<<EOT + +<div class="searchable"> + lorem ipsum +</div> + +EOT; + + $result = $this->subject->typoSearchTags($body); + $this->assertTrue($result); + $this->assertEquals($expected, $body); + } + + /** + * Tests whether indexer can extract content between multiple pairs of "TYPO3SEARCH" markers + * + * @test + */ + public function typoSearchTagsHandlesMultipleMarkerPairs() + { + $body = <<<EOT +<html> +<head> +<meta http-equiv="Content-Type" content="text/html;charset=utf-8"/> +<title>Some Title</title> +<link href='css/normalize.css' rel='stylesheet' type='text/css'/> +</head> +<body> +<div> +<div class="non_searchable"> + not searchable content +</div> +<!--TYPO3SEARCH_begin--> +<div class="searchable"> + lorem ipsum +</div> +<!--TYPO3SEARCH_end--> +<div class="non_searchable"> + not searchable content +</div> +<!--TYPO3SEARCH_begin--> +<div class="searchable"> + lorem ipsum2 +</div> +<!--TYPO3SEARCH_end--> +<div class="non_searchable"> + not searchable content +</div> +</body> +</html> +EOT; + $expected = <<<EOT + +<div class="searchable"> + lorem ipsum +</div> + +<div class="searchable"> + lorem ipsum2 +</div> + +EOT; + + $result = $this->subject->typoSearchTags($body); + $this->assertTrue($result); + $this->assertEquals($expected, $body); + } }