diff --git a/typo3/sysext/backend/Classes/Configuration/TCA/UserFunctions.php b/typo3/sysext/backend/Classes/Configuration/TCA/UserFunctions.php index 82a0965e83b3ed098d87c8dacc2729d9117e7269..d64c8c5b6ab879f50fd5fd3a54b74cbe900e7a56 100644 --- a/typo3/sysext/backend/Classes/Configuration/TCA/UserFunctions.php +++ b/typo3/sysext/backend/Classes/Configuration/TCA/UserFunctions.php @@ -18,6 +18,7 @@ declare(strict_types=1); namespace TYPO3\CMS\Backend\Configuration\TCA; use TYPO3\CMS\Core\Localization\LanguageService; +use TYPO3\CMS\Core\Localization\Locale; use TYPO3\CMS\Core\Utility\CommandUtility; use TYPO3\CMS\Core\Utility\GeneralUtility; @@ -109,10 +110,22 @@ class UserFunctions $rawOutput = []; CommandUtility::exec('locale -a', $rawOutput); - ksort($rawOutput, SORT_NATURAL); + sort($rawOutput, SORT_NATURAL); $locales = []; + $usedLocales = []; foreach ($rawOutput as $item) { + // do not show C/POSIX in the list of locales, as this is the default anyway + $obj = new Locale($item); + if ($obj->getPosixCodeSet() === 'C' || $obj->getPosixCodeSet() === 'POSIX') { + continue; + } + // Skip locales with appended language or country code (e.g. "de_DE.UTF-8", "de_DE.ISO8859-1"). + // The user should only choose "de_DE". + if (in_array($obj->getName(), $usedLocales, true)) { + continue; + } $locales[] = [$item, $item]; + $usedLocales[] = $obj->getName(); } return $locales; diff --git a/typo3/sysext/core/Classes/Localization/Locale.php b/typo3/sysext/core/Classes/Localization/Locale.php new file mode 100644 index 0000000000000000000000000000000000000000..69c4772f82825985df5908acaf3a8bc26be0a510 --- /dev/null +++ b/typo3/sysext/core/Classes/Localization/Locale.php @@ -0,0 +1,181 @@ +<?php + +declare(strict_types=1); + +/* + * This file is part of the TYPO3 CMS project. + * + * It is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License, either version 2 + * of the License, or any later version. + * + * For the full copyright and license information, please read the + * LICENSE.txt file that was distributed with this source code. + * + * The TYPO3 project - inspiring people to share! + */ + +namespace TYPO3\CMS\Core\Localization; + +/** + * A representation of + * language key (based on ISO 639-1 / ISO 639-2) + * - the optional four-letter script code that can follow the language code according to the Unicode ISO 15924 Registry (e.g. Hans in zh_Hans) + * - region / country (based on ISO 3166-1) + * separated with a "-". + * + * This conforms to IETF - RFC 5646 (see https://datatracker.ietf.org/doc/rfc5646/) in a simplified form. + */ +class Locale implements \Stringable +{ + protected string $locale; + protected string $languageCode; + protected ?string $languageScript = null; + protected ?string $countryCode = null; + protected ?string $codeSet = null; + // see https://wiki.archlinux.org/title/locale#Generating_locales + protected ?string $charsetModifier = null; + + // taken from https://meta.wikimedia.org/wiki/Template:List_of_language_names_ordered_by_code + protected const RIGHT_TO_LEFT_LANGUAGE_CODES = [ + 'ar', // Arabic + 'arc', // Aramaic + 'arz', // Egyptian Arabic + 'ckb', // Kurdish (Sorani) + 'dv', // Divehi + 'fa', // Persian + 'ha', // Hausa + 'he', // Hebrew + 'khw', // Khowar + 'ks', // Kashmiri + 'ps', // Pashto + 'sd', // Sindhi + 'ur', // Urdu + 'uz-AF', // Uzbeki Afghanistan + 'yi', // Yiddish + ]; + + /** + * List of language dependencies for an actual language. This setting is used for local variants of a language + * that depend on their "main" language, like Brazilian Portuguese or Canadian French. + * + * @var array<int, string> + */ + protected array $dependencies = []; + + public function __construct( + string $locale = 'en', + array $dependencies = [] + ) { + $locale = $this->normalize($locale); + if (str_contains($locale, '@')) { + [$locale, $this->charsetModifier] = explode('@', $locale); + } + if (str_contains($locale, '.')) { + [$locale, $this->codeSet] = explode('.', $locale); + } + if (strtolower($locale) === 'c') { + $this->codeSet = 'C'; + $locale = 'en'; + } elseif (strtolower($locale) === 'posix') { + $this->codeSet = 'POSIX'; + $locale = 'en'; + } + if (str_contains($locale, '-')) { + [$this->languageCode, $tail] = explode('-', $locale, 2); + if (str_contains($tail, '-')) { + [$this->languageScript, $this->countryCode] = explode('-', $tail); + } elseif (strlen($tail) === 4) { + $this->languageScript = $tail; + } else { + $this->countryCode = $tail ?: null; + } + $this->languageCode = strtolower($this->languageCode); + $this->languageScript = $this->languageScript ? ucfirst(strtolower($this->languageScript)) : null; + $this->countryCode = $this->countryCode ? strtoupper($this->countryCode) : null; + } else { + $this->languageCode = strtolower($locale); + } + + $this->locale = $this->languageCode . ($this->languageScript ? '-' . $this->languageScript : '') . ($this->countryCode ? '-' . $this->countryCode : ''); + $this->dependencies = array_map(fn ($dep) => $this->normalize($dep), $dependencies); + } + + public function getName(): string + { + return $this->locale; + } + + public function getLanguageCode(): string + { + return $this->languageCode; + } + + public function isRightToLeftLanguageDirection(): bool + { + return in_array($this->languageCode, self::RIGHT_TO_LEFT_LANGUAGE_CODES, true) || in_array($this->locale, self::RIGHT_TO_LEFT_LANGUAGE_CODES, true); + } + + public function getLanguageScriptCode(): ?string + { + return $this->languageScript; + } + + public function getCountryCode(): ?string + { + return $this->countryCode; + } + + /** + * Return the locale as ISO/IEC 15897 format, including a possible POSIX charset + * "cs_CZ.UTF-8" + * see https://en.wikipedia.org/wiki/ISO/IEC_15897 + * https://en.wikipedia.org/wiki/Locale_(computer_software)#POSIX_platforms + * @internal + */ + public function posixFormatted(): string + { + $charsetModifier = $this->charsetModifier ? '@' . $this->charsetModifier : ''; + if ($this->codeSet === 'C' || $this->codeSet === 'POSIX') { + return $this->codeSet . $charsetModifier; + } + $formatted = $this->languageCode; + if ($this->countryCode) { + $formatted .= '_' . $this->countryCode; + } + if ($this->codeSet) { + $formatted .= '.' . $this->codeSet; + } + return $formatted . $charsetModifier; + } + + /** + * @internal + */ + public function getPosixCodeSet(): ?string + { + return $this->codeSet; + } + + public function getDependencies(): array + { + return $this->dependencies; + } + + protected function normalize(string $locale): string + { + if ($locale === 'default') { + return 'en'; + } + if (str_contains($locale, '_')) { + $locale = str_replace('_', '-', $locale); + } + + return $locale; + } + + public function __toString(): string + { + return $this->locale; + } +} diff --git a/typo3/sysext/core/Tests/Unit/Localization/LocaleTest.php b/typo3/sysext/core/Tests/Unit/Localization/LocaleTest.php new file mode 100644 index 0000000000000000000000000000000000000000..5309c9012bc48a5c68860b6b2a3ae51d318b77a7 --- /dev/null +++ b/typo3/sysext/core/Tests/Unit/Localization/LocaleTest.php @@ -0,0 +1,125 @@ +<?php + +declare(strict_types=1); + +/* + * This file is part of the TYPO3 CMS project. + * + * It is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License, either version 2 + * of the License, or any later version. + * + * For the full copyright and license information, please read the + * LICENSE.txt file that was distributed with this source code. + * + * The TYPO3 project - inspiring people to share! + */ + +namespace TYPO3\CMS\Core\Tests\Unit\Localization; + +use TYPO3\CMS\Core\Localization\Locale; +use TYPO3\TestingFramework\Core\Unit\UnitTestCase; + +class LocaleTest extends UnitTestCase +{ + /** + * @test + */ + public function localeWithJustLanguageCodeSanitizesIncomingValuesProperly(): void + { + $subject = new Locale('en'); + self::assertNull($subject->getLanguageScriptCode()); + self::assertNull($subject->getCountryCode()); + self::assertEquals('en', $subject->getLanguageCode()); + self::assertEquals('en', (string)$subject); + + $subject = new Locale('C'); + self::assertNull($subject->getLanguageScriptCode()); + self::assertNull($subject->getCountryCode()); + self::assertEquals('en', $subject->getLanguageCode()); + self::assertEquals('C', $subject->getPosixCodeSet()); + self::assertEquals('C', $subject->posixFormatted()); + self::assertEquals('en', (string)$subject); + + $subject = new Locale('de_DE.UTF-8'); + self::assertNull($subject->getLanguageScriptCode()); + self::assertEquals('DE', $subject->getCountryCode()); + self::assertEquals('de', $subject->getLanguageCode()); + self::assertEquals('de-DE', (string)$subject); + self::assertEquals('de_DE.UTF-8', $subject->posixFormatted()); + + $subject = new Locale('de_DE@euro'); + self::assertNull($subject->getLanguageScriptCode()); + self::assertEquals('DE', $subject->getCountryCode()); + self::assertEquals('de', $subject->getLanguageCode()); + self::assertEquals('de-DE', (string)$subject); + self::assertEquals('de_DE@euro', $subject->posixFormatted()); + + // Also with mixed case + $subject = new Locale('eN'); + self::assertNull($subject->getLanguageScriptCode()); + self::assertNull($subject->getCountryCode()); + self::assertEquals('en', $subject->getLanguageCode()); + self::assertEquals('en', (string)$subject); + } + + /** + * @test + */ + public function localeWithLanguageAndScriptCodeSanitizesIncomingValuesProperly(): void + { + $subject = new Locale('zh_HANS'); + self::assertEquals('Hans', $subject->getLanguageScriptCode()); + self::assertNull($subject->getCountryCode()); + self::assertEquals('zh', $subject->getLanguageCode()); + self::assertEquals('zh-Hans', (string)$subject); + } + + /** + * @test + */ + public function localeWithLanguageAndScriptCodeAndCountryCodeSanitizesIncomingValuesProperly(): void + { + $subject = new Locale('zh_HANS_CN'); + self::assertEquals('Hans', $subject->getLanguageScriptCode()); + self::assertEquals('CN', $subject->getCountryCode()); + self::assertEquals('zh', $subject->getLanguageCode()); + self::assertEquals('zh-Hans-CN', (string)$subject); + } + + /** + * @test + */ + public function variousCombinationsOfLanguageAndCountryCodeReturnsSanitizedValues(): void + { + $subject = new Locale('fr_CA'); + self::assertNull($subject->getLanguageScriptCode()); + self::assertEquals('CA', $subject->getCountryCode()); + self::assertEquals('fr', $subject->getLanguageCode()); + self::assertEquals('fr-CA', (string)$subject); + $subject = new Locale('de-AT'); + self::assertNull($subject->getLanguageScriptCode()); + self::assertEquals('AT', $subject->getCountryCode()); + self::assertEquals('de', $subject->getLanguageCode()); + self::assertEquals('de-AT', (string)$subject); + } + + /** + * @test + */ + public function dependenciesAreSetAndRetrievedCorrectly(): void + { + $subject = new Locale('fr_CA', ['fr', 'en']); + self::assertNull($subject->getLanguageScriptCode()); + self::assertEquals('CA', $subject->getCountryCode()); + self::assertEquals('fr', $subject->getLanguageCode()); + self::assertEquals(['fr', 'en'], $subject->getDependencies()); + self::assertEquals('fr-CA', (string)$subject); + $subject = new Locale('en-US', ['en-UK', 'en']); + self::assertNull($subject->getLanguageScriptCode()); + self::assertEquals('US', $subject->getCountryCode()); + self::assertEquals('en', $subject->getLanguageCode()); + self::assertEquals(['en-UK', 'en'], $subject->getDependencies()); + self::assertEquals('en-US', (string)$subject); + } +}