From 1a1ea9e5772ae2a2bf7633cc954b73cabb58dff1 Mon Sep 17 00:00:00 2001 From: Benni Mack <benni@typo3.org> Date: Wed, 14 Jun 2017 14:11:47 +0200 Subject: [PATCH] [BUGFIX] Use mb_* methods directly instead of CharsetConverter There are several places where mb_* methods are still not used, although they are even better to use instead of the $csConv->parseCharset() method for instance, as it has more aliases defined already. Same goes for "entities_to_utf8" which is a PHP user-space implementation of html_entity_decode() dated back to Kaspers' change in 2003. Using native PHP methods should also increase performance marginally. Resolves: #81575 Releases: master, 8.7 Change-Id: I5e97881cc7107883a5ff53d534161a7ec17e1ee5 Reviewed-on: https://review.typo3.org/53208 Tested-by: TYPO3com <no-reply@typo3.com> Reviewed-by: Markus Klein <markus.klein@typo3.org> Tested-by: Markus Klein <markus.klein@typo3.org> Reviewed-by: Wouter Wolters <typo3@wouterwolters.nl> Reviewed-by: Anja Leichsenring <aleichsenring@ab-softlab.de> Tested-by: Anja Leichsenring <aleichsenring@ab-softlab.de> Reviewed-by: Benni Mack <benni@typo3.org> Tested-by: Benni Mack <benni@typo3.org> --- .../Wizard/SuggestWizardDefaultReceiver.php | 9 +++---- .../sysext/core/Classes/Page/PageRenderer.php | 21 +++++++++++++--- .../core/Classes/Utility/GeneralUtility.php | 13 +++++++--- .../ViewHelpers/Format/CaseViewHelper.php | 12 ++++++--- .../ContentObject/ContentObjectRenderer.php | 19 +++++++------- .../TypoScriptFrontendController.php | 25 ++++++++++++++----- .../ContentObjectRendererTest.php | 6 ++--- .../Classes/Controller/SearchController.php | 16 ++++++------ .../Classes/FileContentParser.php | 2 +- .../sysext/indexed_search/Classes/Indexer.php | 9 +++---- 10 files changed, 83 insertions(+), 49 deletions(-) diff --git a/typo3/sysext/backend/Classes/Form/Wizard/SuggestWizardDefaultReceiver.php b/typo3/sysext/backend/Classes/Form/Wizard/SuggestWizardDefaultReceiver.php index a967c83b4497..abe989dc6743 100644 --- a/typo3/sysext/backend/Classes/Form/Wizard/SuggestWizardDefaultReceiver.php +++ b/typo3/sysext/backend/Classes/Form/Wizard/SuggestWizardDefaultReceiver.php @@ -15,7 +15,6 @@ namespace TYPO3\CMS\Backend\Form\Wizard; */ use TYPO3\CMS\Backend\Utility\BackendUtility; -use TYPO3\CMS\Core\Charset\CharsetConverter; use TYPO3\CMS\Core\Database\Connection; use TYPO3\CMS\Core\Database\ConnectionPool; use TYPO3\CMS\Core\Database\Query\QueryBuilder; @@ -156,8 +155,6 @@ class SuggestWizardDefaultReceiver ->execute(); $allRowsCount = $result->rowCount(); if ($allRowsCount) { - /** @var CharsetConverter $charsetConverter */ - $charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class); while ($row = $result->fetch()) { // check if we already have collected the maximum number of records if (count($rows) > $this->maxItems) { @@ -172,12 +169,12 @@ class SuggestWizardDefaultReceiver $spriteIcon = $this->iconFactory->getIconForRecord($this->table, $row, Icon::SIZE_SMALL)->render(); $uid = $row['t3ver_oid'] > 0 ? $row['t3ver_oid'] : $row['uid']; $path = $this->getRecordPath($row, $uid); - if (strlen($path) > 30) { + if (mb_strlen($path, 'utf-8') > 30) { $croppedPath = '<abbr title="' . htmlspecialchars($path) . '">' . htmlspecialchars( - $charsetConverter->crop('utf-8', $path, 10) + mb_substr($path, 0, 10, 'utf-8') . '...' - . $charsetConverter->crop('utf-8', $path, -20) + . mb_substr($path, -20, null, 'utf-8') ) . '</abbr>'; } else { diff --git a/typo3/sysext/core/Classes/Page/PageRenderer.php b/typo3/sysext/core/Classes/Page/PageRenderer.php index 3d53c6c79992..7b924305de38 100644 --- a/typo3/sysext/core/Classes/Page/PageRenderer.php +++ b/typo3/sysext/core/Classes/Page/PageRenderer.php @@ -2087,9 +2087,24 @@ class PageRenderer implements \TYPO3\CMS\Core\SingletonInterface } $this->inlineLanguageLabelFiles = []; // Convert settings back to UTF-8 since json_encode() only works with UTF-8: - if (TYPO3_MODE === 'FE' && $this->getCharSet() !== 'utf-8') { - if ($this->inlineSettings) { - $this->csConvObj->convArray($this->inlineSettings, $this->getCharSet(), 'utf-8'); + if ($this->getCharSet() && $this->getCharSet() !== 'utf-8' && is_array($this->inlineSettings)) { + $this->convertCharsetRecursivelyToUtf8($this->inlineSettings, $this->getCharSet()); + } + } + + /** + * Small helper function to convert charsets for arrays into utf-8 + * + * @param mixed $data given by reference (string/array usually) + * @param string $fromCharset convert FROM this charset + */ + protected function convertCharsetRecursivelyToUtf8(&$data, string $fromCharset) + { + foreach ($data as $key => $value) { + if (is_array($data[$key])) { + $this->convertCharsetRecursivelyToUtf8($data[$key], $fromCharset); + } elseif (is_string($data[$key])) { + $data[$key] = mb_convert_encoding($data[$key], 'utf-8', $fromCharset); } } } diff --git a/typo3/sysext/core/Classes/Utility/GeneralUtility.php b/typo3/sysext/core/Classes/Utility/GeneralUtility.php index bd44ca74fe82..812c8330eeff 100644 --- a/typo3/sysext/core/Classes/Utility/GeneralUtility.php +++ b/typo3/sysext/core/Classes/Utility/GeneralUtility.php @@ -15,7 +15,6 @@ namespace TYPO3\CMS\Core\Utility; */ use GuzzleHttp\Exception\RequestException; -use TYPO3\CMS\Core\Charset\CharsetConverter; use TYPO3\CMS\Core\Core\ApplicationContext; use TYPO3\CMS\Core\Core\ClassLoadingInformation; use TYPO3\CMS\Core\Database\ConnectionPool; @@ -256,9 +255,15 @@ class GeneralUtility */ public static function fixed_lgd_cs($string, $chars, $appendString = '...') { - /** @var CharsetConverter $charsetConverter */ - $charsetConverter = self::makeInstance(\TYPO3\CMS\Core\Charset\CharsetConverter::class); - return $charsetConverter->crop('utf-8', $string, $chars, $appendString); + if ((int)$chars === 0 || mb_strlen($string, 'utf-8') <= abs($chars)) { + return $string; + } + if ($chars > 0) { + $string = mb_substr($string, 0, $chars, 'utf-8') . $appendString; + } else { + $string = $appendString . mb_substr($string, $len, mb_strlen($string, 'utf-8'), 'utf-8'); + } + return $string; } /** diff --git a/typo3/sysext/fluid/Classes/ViewHelpers/Format/CaseViewHelper.php b/typo3/sysext/fluid/Classes/ViewHelpers/Format/CaseViewHelper.php index d46f3888da65..09f84ed7f437 100644 --- a/typo3/sysext/fluid/Classes/ViewHelpers/Format/CaseViewHelper.php +++ b/typo3/sysext/fluid/Classes/ViewHelpers/Format/CaseViewHelper.php @@ -14,8 +14,6 @@ namespace TYPO3\CMS\Fluid\ViewHelpers\Format; * The TYPO3 project - inspiring people to share! */ -use TYPO3\CMS\Core\Charset\CharsetConverter; -use TYPO3\CMS\Core\Utility\GeneralUtility; use TYPO3\CMS\Fluid\Core\ViewHelper\AbstractViewHelper; use TYPO3\CMS\Fluid\Core\ViewHelper\Exception\InvalidVariableException; use TYPO3Fluid\Fluid\Core\Rendering\RenderingContextInterface; @@ -136,10 +134,16 @@ class CaseViewHelper extends AbstractViewHelper $output = mb_strtoupper($value, 'utf-8'); break; case self::CASE_CAPITAL: - $output = GeneralUtility::makeInstance(CharsetConverter::class)->convCaseFirst('utf-8', $value, 'toUpper'); + $firstChar = mb_substr($value, 0, 1, 'utf-8'); + $firstChar = mb_strtoupper($firstChar, 'utf-8'); + $remainder = mb_substr($value, 1, null, 'utf-8'); + $output = $firstChar . $remainder; break; case self::CASE_UNCAPITAL: - $output = GeneralUtility::makeInstance(CharsetConverter::class)->convCaseFirst('utf-8', $value, 'toLower'); + $firstChar = mb_substr($value, 0, 1, 'utf-8'); + $firstChar = mb_strtolower($firstChar, 'utf-8'); + $remainder = mb_substr($value, 1, null, 'utf-8'); + $output = $firstChar . $remainder; break; case self::CASE_CAPITAL_WORDS: // @todo: Implement method once there is a proper solution with using the CharsetConverter diff --git a/typo3/sysext/frontend/Classes/ContentObject/ContentObjectRenderer.php b/typo3/sysext/frontend/Classes/ContentObject/ContentObjectRenderer.php index d32f4883d878..65b400e0c30f 100644 --- a/typo3/sysext/frontend/Classes/ContentObject/ContentObjectRenderer.php +++ b/typo3/sysext/frontend/Classes/ContentObject/ContentObjectRenderer.php @@ -17,7 +17,6 @@ namespace TYPO3\CMS\Frontend\ContentObject; use Doctrine\DBAL\DBALException; use Doctrine\DBAL\Driver\Statement; use TYPO3\CMS\Core\Cache\CacheManager; -use TYPO3\CMS\Core\Charset\CharsetConverter; use TYPO3\CMS\Core\Database\Connection; use TYPO3\CMS\Core\Database\ConnectionPool; use TYPO3\CMS\Core\Database\Query\QueryBuilder; @@ -2090,8 +2089,7 @@ class ContentObjectRenderer public function stdWrap_csConv($content = '', $conf = []) { if (!empty($conf['csConv'])) { - $charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class); - $output = $charsetConverter->conv($content, $charsetConverter->parse_charset($conf['csConv']), 'utf-8'); + $output = mb_convert_encoding($content, 'utf-8', $conf['csConv']); return $output !== false && $output !== '' ? $output : $content; } else { return $content; @@ -2290,9 +2288,7 @@ class ContentObjectRenderer $content = (string)$content === '' ? $GLOBALS['EXEC_TIME'] : (int)$content; $content = $conf['strftime.']['GMT'] ? gmstrftime($conf['strftime'], $content) : strftime($conf['strftime'], $content); if (!empty($conf['strftime.']['charset'])) { - /** @var CharsetConverter $charsetConverter */ - $charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class); - $output = $charsetConverter->conv($content, $charsetConverter->parse_charset($conf['strftime.']['charset']), 'utf-8'); + $output = mb_convert_encoding($content, 'utf-8', $conf['strftime.']['charset']); return $output ?: $content; } return $content; @@ -5928,7 +5924,6 @@ class ContentObjectRenderer */ public function caseshift($theValue, $case) { - $charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class); switch (strtolower($case)) { case 'upper': $theValue = mb_strtoupper($theValue, 'utf-8'); @@ -5940,10 +5935,16 @@ class ContentObjectRenderer $theValue = mb_convert_case($theValue, MB_CASE_TITLE, 'utf-8'); break; case 'ucfirst': - $theValue = $charsetConverter->convCaseFirst('utf-8', $theValue, 'toUpper'); + $firstChar = mb_substr($theValue, 0, 1, 'utf-8'); + $firstChar = mb_strtoupper($firstChar, 'utf-8'); + $remainder = mb_substr($theValue, 1, null, 'utf-8'); + $theValue = $firstChar . $remainder; break; case 'lcfirst': - $theValue = $charsetConverter->convCaseFirst('utf-8', $theValue, 'toLower'); + $firstChar = mb_substr($theValue, 0, 1, 'utf-8'); + $firstChar = mb_strtolower($firstChar, 'utf-8'); + $remainder = mb_substr($theValue, 1, null, 'utf-8'); + $theValue = $firstChar . $remainder; break; case 'uppercamelcase': $theValue = GeneralUtility::underscoredToUpperCamelCase($theValue); diff --git a/typo3/sysext/frontend/Classes/Controller/TypoScriptFrontendController.php b/typo3/sysext/frontend/Classes/Controller/TypoScriptFrontendController.php index 8d7c34c6820e..dffd47e9409a 100644 --- a/typo3/sysext/frontend/Classes/Controller/TypoScriptFrontendController.php +++ b/typo3/sysext/frontend/Classes/Controller/TypoScriptFrontendController.php @@ -4172,9 +4172,7 @@ class TypoScriptFrontendController // Rendering charset of HTML page. if ($this->config['config']['metaCharset']) { - /** @var CharsetConverter $charsetConverter */ - $charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class); - $this->metaCharset = $charsetConverter->parse_charset($this->config['config']['metaCharset']); + $this->metaCharset = $this->config['config']['metaCharset']; } } @@ -4200,13 +4198,28 @@ class TypoScriptFrontendController public function convPOSTCharset() { if ($this->metaCharset !== 'utf-8' && is_array($_POST) && !empty($_POST)) { - /** @var CharsetConverter $charsetConverter */ - $charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class); - $charsetConverter->convArray($_POST, $this->metaCharset, 'utf-8'); + $this->convertCharsetRecursivelyToUtf8($_POST, $this->metaCharset); $GLOBALS['HTTP_POST_VARS'] = $_POST; } } + /** + * Small helper function to convert charsets for arrays to UTF-8 + * + * @param mixed $data given by reference (string/array usually) + * @param string $fromCharset convert FROM this charset + */ + protected function convertCharsetRecursivelyToUtf8(&$data, string $fromCharset) + { + foreach ($data as $key => $value) { + if (is_array($data[$key])) { + $this->convertCharsetRecursivelyToUtf8($data[$key], $fromCharset); + } elseif (is_string($data[$key])) { + $data[$key] = mb_convert_encoding($data[$key], 'utf-8', $fromCharset); + } + } + } + /** * Calculates page cache timeout according to the records with starttime/endtime on the page. * diff --git a/typo3/sysext/frontend/Tests/Unit/ContentObject/ContentObjectRendererTest.php b/typo3/sysext/frontend/Tests/Unit/ContentObject/ContentObjectRendererTest.php index 270e0176bed1..ec54c04ff890 100644 --- a/typo3/sysext/frontend/Tests/Unit/ContentObject/ContentObjectRendererTest.php +++ b/typo3/sysext/frontend/Tests/Unit/ContentObject/ContentObjectRendererTest.php @@ -17,7 +17,6 @@ namespace TYPO3\CMS\Frontend\Tests\Unit\ContentObject; use Psr\Log\LoggerInterface; use TYPO3\CMS\Core\Cache\CacheManager; use TYPO3\CMS\Core\Cache\Frontend\FrontendInterface as CacheFrontendInterface; -use TYPO3\CMS\Core\Charset\CharsetConverter; use TYPO3\CMS\Core\Core\ApplicationContext; use TYPO3\CMS\Core\Log\LogManager; use TYPO3\CMS\Core\Resource\File; @@ -184,9 +183,8 @@ class ContentObjectRendererTest extends \TYPO3\TestingFramework\Core\Unit\UnitTe */ protected function handleCharset(&$subject, &$expected) { - $charsetConverter = new CharsetConverter(); - $subject = $charsetConverter->conv($subject, 'iso-8859-1', 'utf-8'); - $expected = $charsetConverter->conv($expected, 'iso-8859-1', 'utf-8'); + $subject = mb_convert_encoding($subject, 'utf-8', 'iso-8859-1'); + $expected = mb_convert_encoding($expected, 'utf-8', 'iso-8859-1'); } ///////////////////////////////////////////// diff --git a/typo3/sysext/indexed_search/Classes/Controller/SearchController.php b/typo3/sysext/indexed_search/Classes/Controller/SearchController.php index 5edf1f0f6846..1ab3fba3b63c 100644 --- a/typo3/sysext/indexed_search/Classes/Controller/SearchController.php +++ b/typo3/sysext/indexed_search/Classes/Controller/SearchController.php @@ -456,7 +456,7 @@ class SearchController extends \TYPO3\CMS\Extbase\Mvc\Controller\ActionControlle } } $title = $resultData['item_title'] . $resultData['titleaddition']; - $title = $this->charsetConverter->crop('utf-8', $title, $this->settings['results.']['titleCropAfter'], $this->settings['results.']['titleCropSignifier']); + $title = GeneralUtility::fixed_lgd_cs($title, $this->settings['results.']['titleCropAfter'], $this->settings['results.']['titleCropSignifier']); // If external media, link to the media-file instead. if ($row['item_type']) { if ($row['show_resume']) { @@ -712,7 +712,7 @@ class SearchController extends \TYPO3\CMS\Extbase\Mvc\Controller\ActionControlle } } if (!trim($markedSW)) { - $outputStr = $this->charsetConverter->crop('utf-8', $row['item_description'], $length, $this->settings['results.']['summaryCropSignifier']); + $outputStr = GeneralUtility::fixed_lgd_cs($row['item_description'], $length, $this->settings['results.']['summaryCropSignifier']); $outputStr = htmlspecialchars($outputStr); } $output = $outputStr ?: $markedSW; @@ -764,16 +764,16 @@ class SearchController extends \TYPO3\CMS\Extbase\Mvc\Controller\ActionControlle if (!$k) { // First entry at all (only cropped on the frontside) if ($strLen > $postPreLgd) { - $output[$k] = $divider . preg_replace('/^[^[:space:]]+[[:space:]]/', '', $this->charsetConverter->crop('utf-8', $parts[$k], -($postPreLgd - $postPreLgd_offset))); + $output[$k] = $divider . preg_replace('/^[^[:space:]]+[[:space:]]/', '', GeneralUtility::fixed_lgd_cs($parts[$k], -($postPreLgd - $postPreLgd_offset))); } } elseif ($summaryLgd > $summaryMax || !isset($parts[$k + 1])) { // In case summary length is exceed OR if there are no more entries at all: if ($strLen > $postPreLgd) { - $output[$k] = preg_replace('/[[:space:]][^[:space:]]+$/', '', $this->charsetConverter->crop('utf-8', $parts[$k], ($postPreLgd - $postPreLgd_offset))) . $divider; + $output[$k] = preg_replace('/[[:space:]][^[:space:]]+$/', '', GeneralUtility::fixed_lgd_cs($parts[$k], ($postPreLgd - $postPreLgd_offset))) . $divider; } } else { if ($strLen > $postPreLgd * 2) { - $output[$k] = preg_replace('/[[:space:]][^[:space:]]+$/', '', $this->charsetConverter->crop('utf-8', $parts[$k], ($postPreLgd - $postPreLgd_offset))) . $divider . preg_replace('/^[^[:space:]]+[[:space:]]/', '', $this->charsetConverter->crop('utf-8', $parts[$k], -($postPreLgd - $postPreLgd_offset))); + $output[$k] = preg_replace('/[[:space:]][^[:space:]]+$/', '', GeneralUtility::fixed_lgd_cs($parts[$k], ($postPreLgd - $postPreLgd_offset))) . $divider . preg_replace('/^[^[:space:]]+[[:space:]]/', '', GeneralUtility::fixed_lgd_cs($parts[$k], -($postPreLgd - $postPreLgd_offset))); } } $summaryLgd += mb_strlen($output[$k], 'utf-8'); @@ -860,8 +860,10 @@ class SearchController extends \TYPO3\CMS\Extbase\Mvc\Controller\ActionControlle // shortening the string here is only a run-away feature!) $searchWords = substr($this->getSword(), 0, 200); // Convert to UTF-8 + conv. entities (was also converted during indexing!) - $searchWords = $this->charsetConverter->conv($searchWords, $GLOBALS['TSFE']->metaCharset, 'utf-8'); - $searchWords = $this->charsetConverter->entities_to_utf8($searchWords); + if ($GLOBALS['TSFE']->metaCharset && $GLOBALS['TSFE']->metaCharset !== 'utf-8') { + $searchWords = mb_convert_encoding($searchWords, 'utf-8', $GLOBALS['TSFE']->metaCharset); + $searchWords = html_entity_decode($searchWords); + } $sWordArray = false; if ($hookObj = $this->hookRequest('getSearchWords')) { $sWordArray = $hookObj->getSearchWords_splitSWords($searchWords, $defaultOperator); diff --git a/typo3/sysext/indexed_search/Classes/FileContentParser.php b/typo3/sysext/indexed_search/Classes/FileContentParser.php index c40e6c3ddcf8..da964e932e62 100644 --- a/typo3/sysext/indexed_search/Classes/FileContentParser.php +++ b/typo3/sysext/indexed_search/Classes/FileContentParser.php @@ -660,7 +660,7 @@ class FileContentParser $fileContent = GeneralUtility::getUrl($absFile); // Finding charset: preg_match('/^[[:space:]]*<\\?xml[^>]+encoding[[:space:]]*=[[:space:]]*["\'][[:space:]]*([[:alnum:]_-]+)[[:space:]]*["\']/i', substr($fileContent, 0, 200), $reg); - $charset = $reg[1] ? $this->pObj->csObj->parse_charset($reg[1]) : 'utf-8'; + $charset = $reg[1] ?: 'utf-8'; // Converting content: $fileContent = $this->pObj->convertHTMLToUtf8(strip_tags(str_replace('<', ' <', $fileContent)), $charset); $contentArr = $this->pObj->splitRegularContent($fileContent); diff --git a/typo3/sysext/indexed_search/Classes/Indexer.php b/typo3/sysext/indexed_search/Classes/Indexer.php index 1012c0d151b9..20704283b0b0 100644 --- a/typo3/sysext/indexed_search/Classes/Indexer.php +++ b/typo3/sysext/indexed_search/Classes/Indexer.php @@ -674,13 +674,12 @@ class Indexer { // Find charset: $charset = $charset ?: $this->getHTMLcharset($content); - $charset = $this->csObj->parse_charset($charset); // Convert charset: if ($charset && $charset !== 'utf-8') { - $content = $this->csObj->conv($content, $charset, 'utf-8'); + $content = mb_convert_encoding($content, 'utf-8', $charset); } // Convert entities, assuming document is now UTF-8: - return $this->csObj->entities_to_utf8($content); + return html_entity_decode($content); } /** @@ -1270,10 +1269,10 @@ class Indexer foreach ($contentArr as $key => $value) { if ((string)$contentArr[$key] !== '') { if ($charset !== 'utf-8') { - $contentArr[$key] = $this->csObj->conv($contentArr[$key], $charset, 'utf-8'); + $contentArr[$key] = mb_convert_encoding($contentArr[$key], 'utf-8', $charset); } // decode all numeric / html-entities in the string to real characters: - $contentArr[$key] = $this->csObj->entities_to_utf8($contentArr[$key]); + $contentArr[$key] = html_entity_decode($contentArr[$key]); } } } -- GitLab