From 62c07455b72f56198e9a74a0ab8fc34b0d0b12ab Mon Sep 17 00:00:00 2001
From: Tymoteusz Motylewski <t.motylewski@gmail.com>
Date: Thu, 10 Mar 2016 14:44:22 +0100
Subject: [PATCH] [TASK] Add unit tests for TYPO3SEARCH markers

Add two unit tests for Indexer, covering content extraction
from between TYPO3SEARCH_begin and TYPO3SEARCH_end markers.

Add note to documentation that it's possible to have multiple
TYPO3SEARCH marker pairs.

Resolves: #74815
Releases: master, 7.6, 6.2
Change-Id: I37c67dfc055dc30698831eef6d0231d929fef957
Reviewed-on: https://review.typo3.org/47175
Reviewed-by: Daniel Goerz <ervaude@gmail.com>
Tested-by: Daniel Goerz <ervaude@gmail.com>
Reviewed-by: Andreas Fernandez <typo3@scripting-base.de>
Tested-by: Andreas Fernandez <typo3@scripting-base.de>
---
 .../TechnicalDetails/HtmlContent/Index.rst    |  2 +
 .../indexed_search/Tests/Unit/IndexerTest.php | 98 +++++++++++++++++++
 2 files changed, 100 insertions(+)

diff --git a/typo3/sysext/indexed_search/Documentation/TechnicalDetails/HtmlContent/Index.rst b/typo3/sysext/indexed_search/Documentation/TechnicalDetails/HtmlContent/Index.rst
index 4658a2058b64..517e570bc62c 100644
--- a/typo3/sysext/indexed_search/Documentation/TechnicalDetails/HtmlContent/Index.rst
+++ b/typo3/sysext/indexed_search/Documentation/TechnicalDetails/HtmlContent/Index.rst
@@ -40,3 +40,5 @@ Rules:
    until that point is excluded and preceding content until next "end"
    marker is included.
 
+#. If there are multiple marker pairs in HTML, content from in between
+   all pairs is included.
\ No newline at end of file
diff --git a/typo3/sysext/indexed_search/Tests/Unit/IndexerTest.php b/typo3/sysext/indexed_search/Tests/Unit/IndexerTest.php
index 6c6cc404a258..5bcad4e15a02 100644
--- a/typo3/sysext/indexed_search/Tests/Unit/IndexerTest.php
+++ b/typo3/sysext/indexed_search/Tests/Unit/IndexerTest.php
@@ -120,4 +120,102 @@ class IndexerTest extends \TYPO3\CMS\Core\Tests\UnitTestCase
         $result = $this->subject->extractBaseHref($html);
         $this->assertEquals($baseHref, $result);
     }
+
+    /**
+     * Tests whether indexer can extract content between "TYPO3SEARCH_begin" and "TYPO3SEARCH_end" markers
+     *
+     * @test
+     */
+    public function typoSearchTagsRemovesBodyContentOutsideMarkers()
+    {
+        $body = <<<EOT
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html;charset=utf-8"/>
+<title>Some Title</title>
+<link href='css/normalize.css' rel='stylesheet' type='text/css'/>
+</head>
+<body>
+<div>
+<div class="non_searchable">
+    not searchable content
+</div>
+<!--TYPO3SEARCH_begin-->
+<div class="searchable">
+    lorem ipsum
+</div>
+<!--TYPO3SEARCH_end-->
+<div class="non_searchable">
+    not searchable content
+</div>
+</body>
+</html>
+EOT;
+        $expected = <<<EOT
+
+<div class="searchable">
+    lorem ipsum
+</div>
+
+EOT;
+
+        $result = $this->subject->typoSearchTags($body);
+        $this->assertTrue($result);
+        $this->assertEquals($expected, $body);
+    }
+
+    /**
+     * Tests whether indexer can extract content between multiple pairs of "TYPO3SEARCH" markers
+     *
+     * @test
+     */
+    public function typoSearchTagsHandlesMultipleMarkerPairs()
+    {
+        $body = <<<EOT
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html;charset=utf-8"/>
+<title>Some Title</title>
+<link href='css/normalize.css' rel='stylesheet' type='text/css'/>
+</head>
+<body>
+<div>
+<div class="non_searchable">
+    not searchable content
+</div>
+<!--TYPO3SEARCH_begin-->
+<div class="searchable">
+    lorem ipsum
+</div>
+<!--TYPO3SEARCH_end-->
+<div class="non_searchable">
+    not searchable content
+</div>
+<!--TYPO3SEARCH_begin-->
+<div class="searchable">
+    lorem ipsum2
+</div>
+<!--TYPO3SEARCH_end-->
+<div class="non_searchable">
+    not searchable content
+</div>
+</body>
+</html>
+EOT;
+        $expected = <<<EOT
+
+<div class="searchable">
+    lorem ipsum
+</div>
+
+<div class="searchable">
+    lorem ipsum2
+</div>
+
+EOT;
+
+        $result = $this->subject->typoSearchTags($body);
+        $this->assertTrue($result);
+        $this->assertEquals($expected, $body);
+    }
 }
-- 
GitLab