From a543c479f6a988bc4d81d568bb3a1aed900d7c41 Mon Sep 17 00:00:00 2001 From: Benni Mack <benni@typo3.org> Date: Sat, 16 Jan 2016 14:33:34 +0100 Subject: [PATCH] [!!!][TASK] Remove charset functionality for locales MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TYPO3 frontend resolves config.locale_all (e.g. set to de_AT.UTF-8) and stores it in $TSFE->localeCharset by using a "best guess" based on decade-old mappings. $TSFE->localeCharset is only used in stdWrap.strftime to convert the result from the localeCharset to the ->renderCharset. However, as it is obvious that a misconfiguration of the system, which happens only when config.set_locale = de_AT@iso-8859-15 config.renderCharset = utf-8 is set, this exception needs to be configured manually when using stdWrap.strftime.charset = iso-8859-15 Resolves: #72826 Releases: master Change-Id: I1bba231879ebaf8e8700099bb87a03aba5d1b562 Reviewed-on: https://review.typo3.org/46011 Reviewed-by: Georg Ringer <georg.ringer@gmail.com> Tested-by: Georg Ringer <georg.ringer@gmail.com> Reviewed-by: Frank Nägler <frank.naegler@typo3.org> Tested-by: Frank Nägler <frank.naegler@typo3.org> --- .../core/Classes/Charset/CharsetConverter.php | 281 ------------------ ...edCustomCharsetConfigurationForLocales.rst | 43 +++ .../ContentObject/ContentObjectRenderer.php | 6 +- .../TypoScriptFrontendController.php | 7 - 4 files changed, 45 insertions(+), 292 deletions(-) create mode 100644 typo3/sysext/core/Documentation/Changelog/master/Breaking-72826-RemovedCustomCharsetConfigurationForLocales.rst diff --git a/typo3/sysext/core/Classes/Charset/CharsetConverter.php b/typo3/sysext/core/Classes/Charset/CharsetConverter.php index be81939a8778..efd6c8d2d942 100644 --- a/typo3/sysext/core/Classes/Charset/CharsetConverter.php +++ b/typo3/sysext/core/Classes/Charset/CharsetConverter.php @@ -220,245 +220,6 @@ class CharsetConverter implements SingletonInterface 'ucs4' => 'ucs-4' ); - /** - * Mapping of iso-639-1 language codes to script names - * - * @var array - */ - public $lang_to_script = array( - // iso-639-1 language codes, see http://www.loc.gov/standards/iso639-2/php/code_list.php - 'af' => 'west_european', // Afrikaans - 'ar' => 'arabic', - 'bg' => 'cyrillic', // Bulgarian - 'bs' => 'east_european', // Bosnian - 'cs' => 'east_european', // Czech - 'da' => 'west_european', // Danish - 'de' => 'west_european', // German - 'es' => 'west_european', // Spanish - 'et' => 'estonian', - 'eo' => 'unicode', // Esperanto - 'eu' => 'west_european', // Basque - 'fa' => 'arabic', // Persian - 'fi' => 'west_european', // Finish - 'fo' => 'west_european', // Faroese - 'fr' => 'west_european', // French - 'ga' => 'west_european', // Irish - 'gl' => 'west_european', // Galician - 'gr' => 'greek', - 'he' => 'hebrew', // Hebrew (since 1998) - 'hi' => 'unicode', // Hindi - 'hr' => 'east_european', // Croatian - 'hu' => 'east_european', // Hungarian - 'iw' => 'hebrew', // Hebrew (til 1998) - 'is' => 'west_european', // Icelandic - 'it' => 'west_european', // Italian - 'ja' => 'japanese', - 'ka' => 'unicode', // Georgian - 'kl' => 'west_european', // Greenlandic - 'km' => 'unicode', // Khmer - 'ko' => 'korean', - 'lt' => 'lithuanian', - 'lv' => 'west_european', // Latvian/Lettish - 'nl' => 'west_european', // Dutch - 'no' => 'west_european', // Norwegian - 'nb' => 'west_european', // Norwegian Bokmal - 'nn' => 'west_european', // Norwegian Nynorsk - 'pl' => 'east_european', // Polish - 'pt' => 'west_european', // Portuguese - 'ro' => 'east_european', // Romanian - 'ru' => 'cyrillic', // Russian - 'sk' => 'east_european', // Slovak - 'sl' => 'east_european', // Slovenian - 'sr' => 'cyrillic', // Serbian - 'sv' => 'west_european', // Swedish - 'sq' => 'albanian', // Albanian - 'th' => 'thai', - 'uk' => 'cyrillic', // Ukranian - 'vi' => 'vietnamese', - 'zh' => 'chinese', - - // MS language codes, see http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vclib/html/_crt_language_strings.asp - // http://msdn.microsoft.com/library/default.asp?url=/library/en-us/wceinternational5/html/wce50conLanguageIdentifiersandLocales.asp - 'afk' => 'west_european', // Afrikaans - 'ara' => 'arabic', - 'bgr' => 'cyrillic', // Bulgarian - 'cat' => 'west_european', // Catalan - 'chs' => 'simpl_chinese', - 'cht' => 'trad_chinese', - 'csy' => 'east_european', // Czech - 'dan' => 'west_european', // Danish - 'deu' => 'west_european', // German - 'dea' => 'west_european', // German (Austrian) - 'des' => 'west_european', // German (Swiss) - 'ena' => 'west_european', // English (Australian) - 'enc' => 'west_european', // English (Canadian) - 'eng' => 'west_european', // English - 'enz' => 'west_european', // English (New Zealand) - 'enu' => 'west_european', // English (United States) - 'euq' => 'west_european', // Basque - 'fos' => 'west_european', // Faroese - 'far' => 'arabic', // Persian - 'fin' => 'west_european', // Finish - 'fra' => 'west_european', // French - 'frb' => 'west_european', // French (Belgian) - 'frc' => 'west_european', // French (Canadian) - 'frs' => 'west_european', // French (Swiss) - 'geo' => 'unicode', // Georgian - 'glg' => 'west_european', // Galician - 'ell' => 'greek', - 'heb' => 'hebrew', - 'hin' => 'unicode', // Hindi - 'hun' => 'east_european', // Hungarian - 'isl' => 'west_european', // Icelandic - 'ita' => 'west_european', // Italian - 'its' => 'west_european', // Italian (Swiss) - 'jpn' => 'japanese', - 'khm' => 'unicode', // Khmer - 'kor' => 'korean', - 'lth' => 'lithuanian', - 'lvi' => 'west_european', // Latvian/Lettish - 'msl' => 'west_european', // Malay - 'nlb' => 'west_european', // Dutch (Belgian) - 'nld' => 'west_european', // Dutch - 'nor' => 'west_european', // Norwegian (bokmal) - 'non' => 'west_european', // Norwegian (nynorsk) - 'plk' => 'east_european', // Polish - 'ptg' => 'west_european', // Portuguese - 'ptb' => 'west_european', // Portuguese (Brazil) - 'rom' => 'east_european', // Romanian - 'rus' => 'cyrillic', // Russian - 'slv' => 'east_european', // Slovenian - 'sky' => 'east_european', // Slovak - 'srl' => 'east_european', // Serbian (Latin) - 'srb' => 'cyrillic', // Serbian (Cyrillic) - 'esp' => 'west_european', // Spanish (trad. sort) - 'esm' => 'west_european', // Spanish (Mexican) - 'esn' => 'west_european', // Spanish (internat. sort) - 'sve' => 'west_european', // Swedish - 'sqi' => 'albanian', // Albanian - 'tha' => 'thai', - 'trk' => 'turkish', - 'ukr' => 'cyrillic', // Ukrainian - - // English language names - 'afrikaans' => 'west_european', - 'albanian' => 'albanian', - 'arabic' => 'arabic', - 'basque' => 'west_european', - 'bosnian' => 'east_european', - 'bulgarian' => 'east_european', - 'catalan' => 'west_european', - 'croatian' => 'east_european', - 'czech' => 'east_european', - 'danish' => 'west_european', - 'dutch' => 'west_european', - 'english' => 'west_european', - 'esperanto' => 'unicode', - 'estonian' => 'estonian', - 'faroese' => 'west_european', - 'farsi' => 'arabic', - 'finnish' => 'west_european', - 'french' => 'west_european', - 'galician' => 'west_european', - 'georgian' => 'unicode', - 'german' => 'west_european', - 'greek' => 'greek', - 'greenlandic' => 'west_european', - 'hebrew' => 'hebrew', - 'hindi' => 'unicode', - 'hungarian' => 'east_european', - 'icelandic' => 'west_european', - 'italian' => 'west_european', - 'khmer' => 'unicode', - 'latvian' => 'west_european', - 'lettish' => 'west_european', - 'lithuanian' => 'lithuanian', - 'malay' => 'west_european', - 'norwegian' => 'west_european', - 'persian' => 'arabic', - 'polish' => 'east_european', - 'portuguese' => 'west_european', - 'russian' => 'cyrillic', - 'romanian' => 'east_european', - 'serbian' => 'cyrillic', - 'slovak' => 'east_european', - 'slovenian' => 'east_european', - 'spanish' => 'west_european', - 'svedish' => 'west_european', - 'that' => 'thai', - 'turkish' => 'turkish', - 'ukrainian' => 'cyrillic' - ); - - /** - * Mapping of language (family) names to charsets on Unix - * - * @var array - */ - public $script_to_charset_unix = array( - 'west_european' => 'iso-8859-1', - 'estonian' => 'iso-8859-1', - 'east_european' => 'iso-8859-2', - 'baltic' => 'iso-8859-4', - 'cyrillic' => 'iso-8859-5', - 'arabic' => 'iso-8859-6', - 'greek' => 'iso-8859-7', - 'hebrew' => 'iso-8859-8', - 'turkish' => 'iso-8859-9', - 'thai' => 'iso-8859-11', // = TIS-620 - 'lithuanian' => 'iso-8859-13', - 'chinese' => 'gb2312', // = euc-cn - 'japanese' => 'euc-jp', - 'korean' => 'euc-kr', - 'simpl_chinese' => 'gb2312', - 'trad_chinese' => 'big5', - 'vietnamese' => '', - 'unicode' => 'utf-8', - 'albanian' => 'utf-8' - ); - - /** - * Mapping of language (family) names to charsets on Windows - * - * @var array - */ - public $script_to_charset_windows = array( - 'east_european' => 'windows-1250', - 'cyrillic' => 'windows-1251', - 'west_european' => 'windows-1252', - 'greek' => 'windows-1253', - 'turkish' => 'windows-1254', - 'hebrew' => 'windows-1255', - 'arabic' => 'windows-1256', - 'baltic' => 'windows-1257', - 'estonian' => 'windows-1257', - 'lithuanian' => 'windows-1257', - 'vietnamese' => 'windows-1258', - 'thai' => 'cp874', - 'korean' => 'cp949', - 'chinese' => 'gb2312', - 'japanese' => 'shift_jis', - 'simpl_chinese' => 'gb2312', - 'trad_chinese' => 'big5', - 'albanian' => 'windows-1250', - 'unicode' => 'utf-8' - ); - - /** - * Mapping of locale names to charsets - * - * @var array - */ - public $locale_to_charset = array( - 'japanese.euc' => 'euc-jp', - 'ja_jp.ujis' => 'euc-jp', - 'korean.euc' => 'euc-kr', - 'sr@Latn' => 'iso-8859-2', - 'zh_cn' => 'gb2312', - 'zh_hk' => 'big5', - 'zh_tw' => 'big5' - ); - /** * TYPO3 specific: Array with the system charsets used for each system language in TYPO3: * Empty values means "utf-8" @@ -549,48 +310,6 @@ class CharsetConverter implements SingletonInterface return $charset; } - /** - * Get the charset of a locale. - * - * ln language - * ln_CN language / country - * ln_CN.cs language / country / charset - * ln_CN.cs@mod language / country / charset / modifier - * - * @param string $locale Locale string - * @return string Charset resolved for locale string - */ - public function get_locale_charset($locale) - { - $locale = strtolower($locale); - // Exact locale specific charset? - if (isset($this->locale_to_charset[$locale])) { - return $this->locale_to_charset[$locale]; - } - // Get modifier - list($locale, $modifier) = explode('@', $locale); - // Locale contains charset: use it - list($locale, $charset) = explode('.', $locale); - if ($charset) { - return $this->parse_charset($charset); - } - // Modifier is 'euro' (after charset check, because of xx.utf-8@euro) - if ($modifier === 'euro') { - return 'iso-8859-15'; - } - // Get language - list($language, ) = explode('_', $locale); - if (isset($this->lang_to_script[$language])) { - $script = $this->lang_to_script[$language]; - } - if (TYPO3_OS === 'WIN') { - $cs = $this->script_to_charset_windows[$script] ?: 'windows-1252'; - } else { - $cs = $this->script_to_charset_unix[$script] ?: 'utf-8'; - } - return $cs; - } - /******************************************** * * Charset Conversion functions diff --git a/typo3/sysext/core/Documentation/Changelog/master/Breaking-72826-RemovedCustomCharsetConfigurationForLocales.rst b/typo3/sysext/core/Documentation/Changelog/master/Breaking-72826-RemovedCustomCharsetConfigurationForLocales.rst new file mode 100644 index 000000000000..95a1a46416c1 --- /dev/null +++ b/typo3/sysext/core/Documentation/Changelog/master/Breaking-72826-RemovedCustomCharsetConfigurationForLocales.rst @@ -0,0 +1,43 @@ +=================================================================== +Breaking: #72826 - Removed custom charset configuration for locales +=================================================================== + +Description +=========== + +The TYPO3 Frontend resolves the TypoScript option ``config.locale_all`` and stores the charset part within ``$TSFE->localeCharset``. If the option ``locale_all`` does not provide a charset (e.g. when it is set to ``de_AT`` instead of ``de_AT.UTF-8`` a "best guess" was done based on a static list set up in 2004. + +The option ``$TSFE->localeCharset`` was removed, along with the following calculation options and methods +available in the CharsetConverter class: + + * CharsetConverter->lang_to_script + * CharsetConverter->script_to_charset_unix + * CharsetConverter->script_to_charset_windows + * CharsetConverter->locale_to_charset + * CharsetConverter->get_locale_charset() + +The localeCharset option was solely used within the TypoScript functionality ``stdWrap.strftime`` when no +custom character set was given, and a character set conversion from the "localeCharset" (based on the best guess +or explicitly set via ``config.locale_all = de_AT.UTF-8`` and it was different than the renderCharset option of the TYPO3 Frontend. + + +Impact +====== + +When custom locales are configured in TypoScript which are not present on the server, or the character set of ``config.locale_all`` differs from the ``config.renderCharset``, or ``config.locale_all`` does not set a character set, could lead to unexpected output in the TYPO3 Frontend. + + +Affected Installations +====================== + +Instances which have a different ``config.locale_all`` character set given than set via ``config.renderCharset``, or on +servers that don't have the charset of the locale available but the output should be a certain but not given character set. + + +Migration +========= + +As this is a misconfiguration and only necessary if e.g. can not handle UTF-8 locales, config.set_locale can explicitly set to ``de_AT@iso-8859-15`` and the output should be renderCharset. On instances where ``stdWrap.strftime``is used, the subproperty ``charset`` can be set to the custom character set (e.g. ``iso-8859-15``). + +In each case, it should be configured that the ``config.locale_all`` option should have a character set given, to avoid +any side-effects with the TypoScript stdWrap option ``strftime``. \ No newline at end of file diff --git a/typo3/sysext/frontend/Classes/ContentObject/ContentObjectRenderer.php b/typo3/sysext/frontend/Classes/ContentObject/ContentObjectRenderer.php index 164c189df9fd..ef2b26ed60ab 100644 --- a/typo3/sysext/frontend/Classes/ContentObject/ContentObjectRenderer.php +++ b/typo3/sysext/frontend/Classes/ContentObject/ContentObjectRenderer.php @@ -2728,10 +2728,8 @@ class ContentObjectRenderer // Check for zero length string to mimic default case of strtime/gmstrftime $content = (string)$content === '' ? $GLOBALS['EXEC_TIME'] : (int)$content; $content = $conf['strftime.']['GMT'] ? gmstrftime($conf['strftime'], $content) : strftime($conf['strftime'], $content); - $tsfe = $this->getTypoScriptFrontendController(); - $tmp_charset = $conf['strftime.']['charset'] ? $conf['strftime.']['charset'] : $tsfe->localeCharset; - if ($tmp_charset) { - $content = $tsfe->csConv($content, $tmp_charset); + if (!empty($conf['strftime.']['charset'])) { + $content = $this->getTypoScriptFrontendController()->csConv($content, $conf['strftime.']['charset']); } return $content; } diff --git a/typo3/sysext/frontend/Classes/Controller/TypoScriptFrontendController.php b/typo3/sysext/frontend/Classes/Controller/TypoScriptFrontendController.php index 898a2e81ba86..7880d77b002c 100644 --- a/typo3/sysext/frontend/Classes/Controller/TypoScriptFrontendController.php +++ b/typo3/sysext/frontend/Classes/Controller/TypoScriptFrontendController.php @@ -747,12 +747,6 @@ class TypoScriptFrontendController */ public $metaCharset = 'utf-8'; - /** - * Assumed charset of locale strings. - * @var string - */ - public $localeCharset = ''; - /** * Set to the system language key (used on the site) * @var string @@ -2763,7 +2757,6 @@ class TypoScriptFrontendController setlocale(LC_CTYPE, $this->config['config']['locale_all']); setlocale(LC_MONETARY, $this->config['config']['locale_all']); setlocale(LC_TIME, $this->config['config']['locale_all']); - $this->localeCharset = $this->csConvObj->get_locale_charset($this->config['config']['locale_all']); } else { $this->getTimeTracker()->setTSlogMessage('Locale "' . htmlspecialchars($this->config['config']['locale_all']) . '" not found.', 3); } -- GitLab