From 62c07455b72f56198e9a74a0ab8fc34b0d0b12ab Mon Sep 17 00:00:00 2001 From: Tymoteusz Motylewski <t.motylewski@gmail.com> Date: Thu, 10 Mar 2016 14:44:22 +0100 Subject: [PATCH] [TASK] Add unit tests for TYPO3SEARCH markers Add two unit tests for Indexer, covering content extraction from between TYPO3SEARCH_begin and TYPO3SEARCH_end markers. Add note to documentation that it's possible to have multiple TYPO3SEARCH marker pairs. Resolves: #74815 Releases: master, 7.6, 6.2 Change-Id: I37c67dfc055dc30698831eef6d0231d929fef957 Reviewed-on: https://review.typo3.org/47175 Reviewed-by: Daniel Goerz <ervaude@gmail.com> Tested-by: Daniel Goerz <ervaude@gmail.com> Reviewed-by: Andreas Fernandez <typo3@scripting-base.de> Tested-by: Andreas Fernandez <typo3@scripting-base.de> --- .../TechnicalDetails/HtmlContent/Index.rst | 2 + .../indexed_search/Tests/Unit/IndexerTest.php | 98 +++++++++++++++++++ 2 files changed, 100 insertions(+) diff --git a/typo3/sysext/indexed_search/Documentation/TechnicalDetails/HtmlContent/Index.rst b/typo3/sysext/indexed_search/Documentation/TechnicalDetails/HtmlContent/Index.rst index 4658a2058b64..517e570bc62c 100644 --- a/typo3/sysext/indexed_search/Documentation/TechnicalDetails/HtmlContent/Index.rst +++ b/typo3/sysext/indexed_search/Documentation/TechnicalDetails/HtmlContent/Index.rst @@ -40,3 +40,5 @@ Rules: until that point is excluded and preceding content until next "end" marker is included. +#. If there are multiple marker pairs in HTML, content from in between + all pairs is included. \ No newline at end of file diff --git a/typo3/sysext/indexed_search/Tests/Unit/IndexerTest.php b/typo3/sysext/indexed_search/Tests/Unit/IndexerTest.php index 6c6cc404a258..5bcad4e15a02 100644 --- a/typo3/sysext/indexed_search/Tests/Unit/IndexerTest.php +++ b/typo3/sysext/indexed_search/Tests/Unit/IndexerTest.php @@ -120,4 +120,102 @@ class IndexerTest extends \TYPO3\CMS\Core\Tests\UnitTestCase $result = $this->subject->extractBaseHref($html); $this->assertEquals($baseHref, $result); } + + /** + * Tests whether indexer can extract content between "TYPO3SEARCH_begin" and "TYPO3SEARCH_end" markers + * + * @test + */ + public function typoSearchTagsRemovesBodyContentOutsideMarkers() + { + $body = <<<EOT +<html> +<head> +<meta http-equiv="Content-Type" content="text/html;charset=utf-8"/> +<title>Some Title</title> +<link href='css/normalize.css' rel='stylesheet' type='text/css'/> +</head> +<body> +<div> +<div class="non_searchable"> + not searchable content +</div> +<!--TYPO3SEARCH_begin--> +<div class="searchable"> + lorem ipsum +</div> +<!--TYPO3SEARCH_end--> +<div class="non_searchable"> + not searchable content +</div> +</body> +</html> +EOT; + $expected = <<<EOT + +<div class="searchable"> + lorem ipsum +</div> + +EOT; + + $result = $this->subject->typoSearchTags($body); + $this->assertTrue($result); + $this->assertEquals($expected, $body); + } + + /** + * Tests whether indexer can extract content between multiple pairs of "TYPO3SEARCH" markers + * + * @test + */ + public function typoSearchTagsHandlesMultipleMarkerPairs() + { + $body = <<<EOT +<html> +<head> +<meta http-equiv="Content-Type" content="text/html;charset=utf-8"/> +<title>Some Title</title> +<link href='css/normalize.css' rel='stylesheet' type='text/css'/> +</head> +<body> +<div> +<div class="non_searchable"> + not searchable content +</div> +<!--TYPO3SEARCH_begin--> +<div class="searchable"> + lorem ipsum +</div> +<!--TYPO3SEARCH_end--> +<div class="non_searchable"> + not searchable content +</div> +<!--TYPO3SEARCH_begin--> +<div class="searchable"> + lorem ipsum2 +</div> +<!--TYPO3SEARCH_end--> +<div class="non_searchable"> + not searchable content +</div> +</body> +</html> +EOT; + $expected = <<<EOT + +<div class="searchable"> + lorem ipsum +</div> + +<div class="searchable"> + lorem ipsum2 +</div> + +EOT; + + $result = $this->subject->typoSearchTags($body); + $this->assertTrue($result); + $this->assertEquals($expected, $body); + } } -- GitLab