From 9b1350159d605cabe69ec18da25273447e88681f Mon Sep 17 00:00:00 2001
From: Benni Mack <benni@typo3.org>
Date: Thu, 3 Mar 2016 12:05:56 +0100
Subject: [PATCH] [!!!][TASK] Remove unused csConv parameters

There are several additional parameters that are not used
anymore.

CharsetConverter->entities_to_utf8() - now always
CharsetConverter->utf8_to_numberarray() now always
converts entities, and real UTF-8 characters are returned

Resolves: #74031
Releases: master
Change-Id: I6ee4e74bb9122a2ac8b9843a7026c933d7199381
Reviewed-on: https://review.typo3.org/47004
Reviewed-by: Daniel Maier <dani-maier@gmx.de>
Tested-by: Daniel Maier <dani-maier@gmx.de>
Reviewed-by: Benjamin Kott <info@bk2k.info>
Tested-by: Benjamin Kott <info@bk2k.info>
---
 .../core/Classes/Charset/CharsetConverter.php | 34 ++++++++-----------
 .../Classes/Imaging/GraphicalFunctions.php    |  6 ++--
 ...4031-CharsetConverterParametersRemoved.rst | 32 +++++++++++++++++
 .../Classes/Controller/SearchController.php   |  2 +-
 .../Controller/SearchFormController.php       |  2 +-
 .../sysext/indexed_search/Classes/Indexer.php |  4 +--
 6 files changed, 54 insertions(+), 26 deletions(-)
 create mode 100644 typo3/sysext/core/Documentation/Changelog/master/Breaking-74031-CharsetConverterParametersRemoved.rst

diff --git a/typo3/sysext/core/Classes/Charset/CharsetConverter.php b/typo3/sysext/core/Classes/Charset/CharsetConverter.php
index 65ff728200bd..6c98ca59c48c 100644
--- a/typo3/sysext/core/Classes/Charset/CharsetConverter.php
+++ b/typo3/sysext/core/Classes/Charset/CharsetConverter.php
@@ -551,17 +551,14 @@ class CharsetConverter implements SingletonInterface
     }
 
     /**
-     * Converts numeric entities (UNICODE, eg. decimal (&#1234;) or hexadecimal (&#x1b;)) to UTF-8 multibyte chars
-     *
+     * Converts numeric entities (UNICODE, eg. decimal (&#1234;) or hexadecimal (&#x1b;)) to UTF-8 multibyte chars.
+     * All string-HTML entities (like &amp; or &pound;) will be converted as well
      * @param string $str Input string, UTF-8
-     * @param bool $alsoStdHtmlEnt If set, then all string-HTML entities (like &amp; or &pound; will be converted as well)
      * @return string Output string
      */
-    public function entities_to_utf8($str, $alsoStdHtmlEnt = false)
+    public function entities_to_utf8($str)
     {
-        if ($alsoStdHtmlEnt) {
-            $trans_tbl = array_flip(get_html_translation_table(HTML_ENTITIES, ENT_COMPAT, 'UTF-8'));
-        }
+        $trans_tbl = array_flip(get_html_translation_table(HTML_ENTITIES, ENT_COMPAT));
         $token = md5(microtime());
         $parts = explode($token, preg_replace('/(&([#[:alnum:]]*);)/', $token . '${2}' . $token, $str));
         foreach ($parts as $k => $v) {
@@ -579,7 +576,7 @@ class CharsetConverter implements SingletonInterface
                     $v = substr($v, $position);
                 }
                 $parts[$k] = $this->UnumberToChar($v);
-            } elseif ($alsoStdHtmlEnt && isset($trans_tbl['&' . $v . ';'])) {
+            } elseif (isset($trans_tbl['&' . $v . ';'])) {
                 // Other entities:
                 $v = $trans_tbl['&' . $v . ';'];
                 $parts[$k] = $v;
@@ -592,19 +589,18 @@ class CharsetConverter implements SingletonInterface
     }
 
     /**
-     * Converts all chars in the input UTF-8 string into integer numbers returned in an array
+     * Converts all chars in the input UTF-8 string into integer numbers returned in an array.
+     * All HTML entities (like &amp; or &pound; or &#123; or &#x3f5d;) will be detected as characters.
+     * Also, instead of integer numbers the real UTF-8 char is returned.
      *
      * @param string $str Input string, UTF-8
-     * @param bool $convEntities If set, then all HTML entities (like &amp; or &pound; or &#123; or &#x3f5d;) will be detected as characters.
-     * @param bool $retChar If set, then instead of integer numbers the real UTF-8 char is returned.
      * @return array Output array with the char numbers
      */
-    public function utf8_to_numberarray($str, $convEntities = false, $retChar = false)
+    public function utf8_to_numberarray($str)
     {
-        // If entities must be registered as well...:
-        if ($convEntities) {
-            $str = $this->entities_to_utf8($str, 1);
-        }
+        // Entities must be registered as well
+        $str = $this->entities_to_utf8($str);
+
         // Do conversion:
         $strLen = strlen($str);
         $outArr = array();
@@ -631,12 +627,12 @@ class CharsetConverter implements SingletonInterface
                             break;
                         }
                     }
-                    $outArr[] = $retChar ? $buf : $this->utf8CharToUnumber($buf);
+                    $outArr[] = $buf;
                 } else {
-                    $outArr[] = $retChar ? chr($this->noCharByteVal) : $this->noCharByteVal;
+                    $outArr[] = chr($this->noCharByteVal);
                 }
             } else {
-                $outArr[] = $retChar ? chr($ord) : $ord;
+                $outArr[] = chr($ord);
             }
         }
         return $outArr;
diff --git a/typo3/sysext/core/Classes/Imaging/GraphicalFunctions.php b/typo3/sysext/core/Classes/Imaging/GraphicalFunctions.php
index 980c1b4fc8e8..b20f5cf748f4 100644
--- a/typo3/sysext/core/Classes/Imaging/GraphicalFunctions.php
+++ b/typo3/sysext/core/Classes/Imaging/GraphicalFunctions.php
@@ -811,7 +811,7 @@ class GraphicalFunctions
                     $x += $wordW + $wordSpacing;
                 }
             } else {
-                $utf8Chars = $this->csConvObj->utf8_to_numberarray($theText, true, true);
+                $utf8Chars = $this->csConvObj->utf8_to_numberarray($theText);
                 // For each UTF-8 char, do:
                 foreach ($utf8Chars as $char) {
                     $charInf = $this->ImageTTFBBoxWrapper($conf['fontSize'], $conf['angle'], $conf['fontFile'], $char, $conf['splitRendering.'], $sF);
@@ -928,7 +928,7 @@ class GraphicalFunctions
                 $x += $wordW + $wordSpacing;
             }
         } else {
-            $utf8Chars = $this->csConvObj->utf8_to_numberarray($text, true, true);
+            $utf8Chars = $this->csConvObj->utf8_to_numberarray($text);
             // For each UTF-8 char, do:
             foreach ($utf8Chars as $char) {
                 $charInf = $this->ImageTTFBBoxWrapper($fontSize, $angle, $fontFile, $char, $splitRenderingConf, $sF);
@@ -1152,7 +1152,7 @@ class GraphicalFunctions
                                 $currentState = -1;
                                 $bankAccum = '';
                                 // Explode the string value by the word value to highlight:
-                                $utf8Chars = $this->csConvObj->utf8_to_numberarray($part['str'], true, true);
+                                $utf8Chars = $this->csConvObj->utf8_to_numberarray($part['str']);
                                 foreach ($utf8Chars as $utfChar) {
                                     // Find number and evaluate position:
                                     $uNumber = (int)$this->csConvObj->utf8CharToUnumber($utfChar);
diff --git a/typo3/sysext/core/Documentation/Changelog/master/Breaking-74031-CharsetConverterParametersRemoved.rst b/typo3/sysext/core/Documentation/Changelog/master/Breaking-74031-CharsetConverterParametersRemoved.rst
new file mode 100644
index 000000000000..e88167187f4c
--- /dev/null
+++ b/typo3/sysext/core/Documentation/Changelog/master/Breaking-74031-CharsetConverterParametersRemoved.rst
@@ -0,0 +1,32 @@
+======================================================
+Breaking: #74031 - CharsetConverter parameters removed
+======================================================
+
+Description
+===========
+
+The second parameter for the method ``CharsetConverter->entities_to_utf8()`` has been removed.
+
+The second and the third parameter of ``CharsetConverter->utf8_to_numberarray()`` has been removed.
+
+
+Impact
+======
+
+When calling ``CharsetConverter->entities_to_utf8()``, string-HTML entities (like &amp; or &pound;) will be converted
+to UTF-8 as well at all times. Previously this behaviour was configurable.
+
+When calling ``CharsetConverter->utf8_to_numberarray()``, string-HTML entities (like &amp; or &pound;) will be converted to UTF-8 as well at all times. Additionally instead of integer numbers the real UTF-8 char is returned
+at any times. Previously these behaviours were configurable.
+
+
+Affected Installations
+======================
+
+Installations with custom extensions that used these methods directly in PHP code.
+
+
+Migration
+=========
+
+Remove these parameters from the calling PHP code.
\ No newline at end of file
diff --git a/typo3/sysext/indexed_search/Classes/Controller/SearchController.php b/typo3/sysext/indexed_search/Classes/Controller/SearchController.php
index 937116aa0b21..c68e1d403e13 100644
--- a/typo3/sysext/indexed_search/Classes/Controller/SearchController.php
+++ b/typo3/sysext/indexed_search/Classes/Controller/SearchController.php
@@ -812,7 +812,7 @@ class SearchController extends \TYPO3\CMS\Extbase\Mvc\Controller\ActionControlle
         $searchWords = substr($this->sword, 0, 200);
         // Convert to UTF-8 + conv. entities (was also converted during indexing!)
         $searchWords = $this->charsetConverter->conv($searchWords, $GLOBALS['TSFE']->metaCharset, 'utf-8');
-        $searchWords = $this->charsetConverter->entities_to_utf8($searchWords, true);
+        $searchWords = $this->charsetConverter->entities_to_utf8($searchWords);
         $sWordArray = false;
         if ($hookObj = $this->hookRequest('getSearchWords')) {
             $sWordArray = $hookObj->getSearchWords_splitSWords($searchWords, $defaultOperator);
diff --git a/typo3/sysext/indexed_search/Classes/Controller/SearchFormController.php b/typo3/sysext/indexed_search/Classes/Controller/SearchFormController.php
index 27afe22865c0..037c742988df 100755
--- a/typo3/sysext/indexed_search/Classes/Controller/SearchFormController.php
+++ b/typo3/sysext/indexed_search/Classes/Controller/SearchFormController.php
@@ -434,7 +434,7 @@ class SearchFormController extends \TYPO3\CMS\Frontend\Plugin\AbstractPlugin
         $inSW = substr($this->piVars['sword'], 0, 200);
         // Convert to UTF-8 + conv. entities (was also converted during indexing!)
         $inSW = $this->charsetConverter->conv($inSW, $this->frontendController->metaCharset, 'utf-8');
-        $inSW = $this->charsetConverter->entities_to_utf8($inSW, true);
+        $inSW = $this->charsetConverter->entities_to_utf8($inSW);
         $sWordArray = false;
         if ($hookObj = $this->hookRequest('getSearchWords')) {
             $sWordArray = $hookObj->getSearchWords_splitSWords($inSW, $defOp);
diff --git a/typo3/sysext/indexed_search/Classes/Indexer.php b/typo3/sysext/indexed_search/Classes/Indexer.php
index c82bd6f68daf..18f043190c3d 100644
--- a/typo3/sysext/indexed_search/Classes/Indexer.php
+++ b/typo3/sysext/indexed_search/Classes/Indexer.php
@@ -687,7 +687,7 @@ class Indexer
             $content = $this->csObj->conv($content, $charset, 'utf-8');
         }
         // Convert entities, assuming document is now UTF-8:
-        return $this->csObj->entities_to_utf8($content, true);
+        return $this->csObj->entities_to_utf8($content);
     }
 
     /**
@@ -1289,7 +1289,7 @@ class Indexer
                     $contentArr[$key] = $this->csObj->conv($contentArr[$key], $charset, 'utf-8');
                 }
                 // decode all numeric / html-entities in the string to real characters:
-                $contentArr[$key] = $this->csObj->entities_to_utf8($contentArr[$key], true);
+                $contentArr[$key] = $this->csObj->entities_to_utf8($contentArr[$key]);
             }
         }
     }
-- 
GitLab