From 6d47cc477bcd248d35da85c4f8ab190b7dcb6c07 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stephan=20Gro=C3=9Fberndt?= <stephan.grossberndt@typo3.org>
Date: Tue, 19 Oct 2021 15:37:10 +0200
Subject: [PATCH] [BUGFIX] Keep hyphens when lexing words for indexed_search

This change makes sure hyphens are kept as part of a word instead of
removing them. Removing them led to "casesensitive" instead of
"case-sensitive" being lexed and added to the index, which in turn led
to that occurrence not being found when searching for "case-sensitive".

Releases: main, 11.5
Resolves: #93401
Resolves: #77644
Change-Id: I72f8d297384cea002e1ca6cb8e3b1973774199f2
Reviewed-on: https://review.typo3.org/c/Packages/TYPO3.CMS/+/71885
Tested-by: core-ci <typo3@b13.com>
Tested-by: Tomas Norre Mikkelsen <tomasnorre@gmail.com>
Tested-by: Benni Mack <benni@typo3.org>
Reviewed-by: Tomas Norre Mikkelsen <tomasnorre@gmail.com>
Reviewed-by: Benni Mack <benni@typo3.org>
---
 typo3/sysext/indexed_search/Classes/Lexer.php | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/typo3/sysext/indexed_search/Classes/Lexer.php b/typo3/sysext/indexed_search/Classes/Lexer.php
index 28b1c8299f25..75eb802fcae7 100644
--- a/typo3/sysext/indexed_search/Classes/Lexer.php
+++ b/typo3/sysext/indexed_search/Classes/Lexer.php
@@ -50,11 +50,16 @@ class Lexer
      * @var array
      */
     public $lexerConf = [
-        //Characters: . - _ : / '
-        'printjoins' => [46, 45, 95, 58, 47, 39],
-        'casesensitive' => false,
-        // Set, if case sensitive indexing is wanted.
-        'removeChars' => [45],
+        'printjoins' => [
+            46, // .
+            45, // -
+            95, // _
+            58, // :
+            47, // /
+            39, // '
+        ],
+        'casesensitive' => false, // Set, if case-sensitive indexing is wanted
+        'removeChars' => [],
     ];
 
     /**
-- 
GitLab