diff --git a/typo3/sysext/core/Classes/Charset/CharsetConverter.php b/typo3/sysext/core/Classes/Charset/CharsetConverter.php index 65ff728200bd458461bcc659e317d06b6f21be5c..6c98ca59c48caca0d5cfb657d52a9b24102ea07a 100644 --- a/typo3/sysext/core/Classes/Charset/CharsetConverter.php +++ b/typo3/sysext/core/Classes/Charset/CharsetConverter.php @@ -551,17 +551,14 @@ class CharsetConverter implements SingletonInterface } /** - * Converts numeric entities (UNICODE, eg. decimal (Ӓ) or hexadecimal ()) to UTF-8 multibyte chars - * + * Converts numeric entities (UNICODE, eg. decimal (Ӓ) or hexadecimal ()) to UTF-8 multibyte chars. + * All string-HTML entities (like & or £) will be converted as well * @param string $str Input string, UTF-8 - * @param bool $alsoStdHtmlEnt If set, then all string-HTML entities (like & or £ will be converted as well) * @return string Output string */ - public function entities_to_utf8($str, $alsoStdHtmlEnt = false) + public function entities_to_utf8($str) { - if ($alsoStdHtmlEnt) { - $trans_tbl = array_flip(get_html_translation_table(HTML_ENTITIES, ENT_COMPAT, 'UTF-8')); - } + $trans_tbl = array_flip(get_html_translation_table(HTML_ENTITIES, ENT_COMPAT)); $token = md5(microtime()); $parts = explode($token, preg_replace('/(&([#[:alnum:]]*);)/', $token . '${2}' . $token, $str)); foreach ($parts as $k => $v) { @@ -579,7 +576,7 @@ class CharsetConverter implements SingletonInterface $v = substr($v, $position); } $parts[$k] = $this->UnumberToChar($v); - } elseif ($alsoStdHtmlEnt && isset($trans_tbl['&' . $v . ';'])) { + } elseif (isset($trans_tbl['&' . $v . ';'])) { // Other entities: $v = $trans_tbl['&' . $v . ';']; $parts[$k] = $v; @@ -592,19 +589,18 @@ class CharsetConverter implements SingletonInterface } /** - * Converts all chars in the input UTF-8 string into integer numbers returned in an array + * Converts all chars in the input UTF-8 string into integer numbers returned in an array. + * All HTML entities (like & or £ or { or 㽝) will be detected as characters. + * Also, instead of integer numbers the real UTF-8 char is returned. * * @param string $str Input string, UTF-8 - * @param bool $convEntities If set, then all HTML entities (like & or £ or { or 㽝) will be detected as characters. - * @param bool $retChar If set, then instead of integer numbers the real UTF-8 char is returned. * @return array Output array with the char numbers */ - public function utf8_to_numberarray($str, $convEntities = false, $retChar = false) + public function utf8_to_numberarray($str) { - // If entities must be registered as well...: - if ($convEntities) { - $str = $this->entities_to_utf8($str, 1); - } + // Entities must be registered as well + $str = $this->entities_to_utf8($str); + // Do conversion: $strLen = strlen($str); $outArr = array(); @@ -631,12 +627,12 @@ class CharsetConverter implements SingletonInterface break; } } - $outArr[] = $retChar ? $buf : $this->utf8CharToUnumber($buf); + $outArr[] = $buf; } else { - $outArr[] = $retChar ? chr($this->noCharByteVal) : $this->noCharByteVal; + $outArr[] = chr($this->noCharByteVal); } } else { - $outArr[] = $retChar ? chr($ord) : $ord; + $outArr[] = chr($ord); } } return $outArr; diff --git a/typo3/sysext/core/Classes/Imaging/GraphicalFunctions.php b/typo3/sysext/core/Classes/Imaging/GraphicalFunctions.php index 980c1b4fc8e8e1e6699fb5bea902c0232591e24d..b20f5cf748f4a4f40f07fb00301276f00021a6b6 100644 --- a/typo3/sysext/core/Classes/Imaging/GraphicalFunctions.php +++ b/typo3/sysext/core/Classes/Imaging/GraphicalFunctions.php @@ -811,7 +811,7 @@ class GraphicalFunctions $x += $wordW + $wordSpacing; } } else { - $utf8Chars = $this->csConvObj->utf8_to_numberarray($theText, true, true); + $utf8Chars = $this->csConvObj->utf8_to_numberarray($theText); // For each UTF-8 char, do: foreach ($utf8Chars as $char) { $charInf = $this->ImageTTFBBoxWrapper($conf['fontSize'], $conf['angle'], $conf['fontFile'], $char, $conf['splitRendering.'], $sF); @@ -928,7 +928,7 @@ class GraphicalFunctions $x += $wordW + $wordSpacing; } } else { - $utf8Chars = $this->csConvObj->utf8_to_numberarray($text, true, true); + $utf8Chars = $this->csConvObj->utf8_to_numberarray($text); // For each UTF-8 char, do: foreach ($utf8Chars as $char) { $charInf = $this->ImageTTFBBoxWrapper($fontSize, $angle, $fontFile, $char, $splitRenderingConf, $sF); @@ -1152,7 +1152,7 @@ class GraphicalFunctions $currentState = -1; $bankAccum = ''; // Explode the string value by the word value to highlight: - $utf8Chars = $this->csConvObj->utf8_to_numberarray($part['str'], true, true); + $utf8Chars = $this->csConvObj->utf8_to_numberarray($part['str']); foreach ($utf8Chars as $utfChar) { // Find number and evaluate position: $uNumber = (int)$this->csConvObj->utf8CharToUnumber($utfChar); diff --git a/typo3/sysext/core/Documentation/Changelog/master/Breaking-74031-CharsetConverterParametersRemoved.rst b/typo3/sysext/core/Documentation/Changelog/master/Breaking-74031-CharsetConverterParametersRemoved.rst new file mode 100644 index 0000000000000000000000000000000000000000..e88167187f4cb73ba996ade7a28eeb61e3493a3f --- /dev/null +++ b/typo3/sysext/core/Documentation/Changelog/master/Breaking-74031-CharsetConverterParametersRemoved.rst @@ -0,0 +1,32 @@ +====================================================== +Breaking: #74031 - CharsetConverter parameters removed +====================================================== + +Description +=========== + +The second parameter for the method ``CharsetConverter->entities_to_utf8()`` has been removed. + +The second and the third parameter of ``CharsetConverter->utf8_to_numberarray()`` has been removed. + + +Impact +====== + +When calling ``CharsetConverter->entities_to_utf8()``, string-HTML entities (like & or £) will be converted +to UTF-8 as well at all times. Previously this behaviour was configurable. + +When calling ``CharsetConverter->utf8_to_numberarray()``, string-HTML entities (like & or £) will be converted to UTF-8 as well at all times. Additionally instead of integer numbers the real UTF-8 char is returned +at any times. Previously these behaviours were configurable. + + +Affected Installations +====================== + +Installations with custom extensions that used these methods directly in PHP code. + + +Migration +========= + +Remove these parameters from the calling PHP code. \ No newline at end of file diff --git a/typo3/sysext/indexed_search/Classes/Controller/SearchController.php b/typo3/sysext/indexed_search/Classes/Controller/SearchController.php index 937116aa0b2163f950b8bb673683272a29063fbd..c68e1d403e135eb2e0bf1d185f6b600ba7ec780a 100644 --- a/typo3/sysext/indexed_search/Classes/Controller/SearchController.php +++ b/typo3/sysext/indexed_search/Classes/Controller/SearchController.php @@ -812,7 +812,7 @@ class SearchController extends \TYPO3\CMS\Extbase\Mvc\Controller\ActionControlle $searchWords = substr($this->sword, 0, 200); // Convert to UTF-8 + conv. entities (was also converted during indexing!) $searchWords = $this->charsetConverter->conv($searchWords, $GLOBALS['TSFE']->metaCharset, 'utf-8'); - $searchWords = $this->charsetConverter->entities_to_utf8($searchWords, true); + $searchWords = $this->charsetConverter->entities_to_utf8($searchWords); $sWordArray = false; if ($hookObj = $this->hookRequest('getSearchWords')) { $sWordArray = $hookObj->getSearchWords_splitSWords($searchWords, $defaultOperator); diff --git a/typo3/sysext/indexed_search/Classes/Controller/SearchFormController.php b/typo3/sysext/indexed_search/Classes/Controller/SearchFormController.php index 27afe22865c02278659b706b62a06f23014eda06..037c742988df8142e0cc23fb8ef91fdb42acca6f 100755 --- a/typo3/sysext/indexed_search/Classes/Controller/SearchFormController.php +++ b/typo3/sysext/indexed_search/Classes/Controller/SearchFormController.php @@ -434,7 +434,7 @@ class SearchFormController extends \TYPO3\CMS\Frontend\Plugin\AbstractPlugin $inSW = substr($this->piVars['sword'], 0, 200); // Convert to UTF-8 + conv. entities (was also converted during indexing!) $inSW = $this->charsetConverter->conv($inSW, $this->frontendController->metaCharset, 'utf-8'); - $inSW = $this->charsetConverter->entities_to_utf8($inSW, true); + $inSW = $this->charsetConverter->entities_to_utf8($inSW); $sWordArray = false; if ($hookObj = $this->hookRequest('getSearchWords')) { $sWordArray = $hookObj->getSearchWords_splitSWords($inSW, $defOp); diff --git a/typo3/sysext/indexed_search/Classes/Indexer.php b/typo3/sysext/indexed_search/Classes/Indexer.php index c82bd6f68dafee86a2cbc84e1799530e4b64e6c6..18f043190c3dd68735aaf283e368629bfedf3ec8 100644 --- a/typo3/sysext/indexed_search/Classes/Indexer.php +++ b/typo3/sysext/indexed_search/Classes/Indexer.php @@ -687,7 +687,7 @@ class Indexer $content = $this->csObj->conv($content, $charset, 'utf-8'); } // Convert entities, assuming document is now UTF-8: - return $this->csObj->entities_to_utf8($content, true); + return $this->csObj->entities_to_utf8($content); } /** @@ -1289,7 +1289,7 @@ class Indexer $contentArr[$key] = $this->csObj->conv($contentArr[$key], $charset, 'utf-8'); } // decode all numeric / html-entities in the string to real characters: - $contentArr[$key] = $this->csObj->entities_to_utf8($contentArr[$key], true); + $contentArr[$key] = $this->csObj->entities_to_utf8($contentArr[$key]); } } }