diff --git a/typo3/sysext/core/Classes/Database/Schema/ConnectionMigrator.php b/typo3/sysext/core/Classes/Database/Schema/ConnectionMigrator.php index 990fb6cc4afc89645074fa576ba1f97b971145c2..68b1343b33d2cd7e562e77b01cad6b2342009ad5 100644 --- a/typo3/sysext/core/Classes/Database/Schema/ConnectionMigrator.php +++ b/typo3/sysext/core/Classes/Database/Schema/ConnectionMigrator.php @@ -311,6 +311,9 @@ class ConnectionMigrator $schemaConfig = GeneralUtility::makeInstance(SchemaConfig::class); $schemaConfig->setName($this->connection->getDatabase()); + if (isset($this->connection->getParams()['tableoptions'])) { + $schemaConfig->setDefaultTableOptions($this->connection->getParams()['tableoptions']); + } return GeneralUtility::makeInstance(Schema::class, $tablesForConnection, [], $schemaConfig); } @@ -1097,6 +1100,7 @@ class ConnectionMigrator */ protected function transformTablesForDatabasePlatform(array $tables, Connection $connection): array { + $defaultTableOptions = $connection->getParams()['tableoptions'] ?? []; foreach ($tables as &$table) { $indexes = []; foreach ($table->getIndexes() as $key => $index) { @@ -1141,7 +1145,7 @@ class ConnectionMigrator $indexes, $table->getForeignKeys(), 0, - $table->getOptions() + array_merge($defaultTableOptions, $table->getOptions()) ); } diff --git a/typo3/sysext/core/Documentation/Changelog/master/Feature-80398-Utf8mb4OnMysqlByDefaultForNewInstances.rst b/typo3/sysext/core/Documentation/Changelog/master/Feature-80398-Utf8mb4OnMysqlByDefaultForNewInstances.rst new file mode 100644 index 0000000000000000000000000000000000000000..5c3dfe4d45b9940d131b1394ac1e4fa1a25edbd7 --- /dev/null +++ b/typo3/sysext/core/Documentation/Changelog/master/Feature-80398-Utf8mb4OnMysqlByDefaultForNewInstances.rst @@ -0,0 +1,59 @@ +.. include:: ../../Includes.txt + +=============================================================== +Feature: #80398 - utf8mb4 on mysql by default for new instances +=============================================================== + +See :issue:`80398` + +Description +=========== + +New instances created by the TYPO3 installer now set `utf8mb4` as charset and `utf8mb4_unicode_ci` +collation by default for instances running on MySQL. This allows 4 byte unicode characters +like emojis in MySQL. + +If upgrading instances, admins may change :file:`LocalConfiguration.php` to use this feature. +The core does not provide mechanisms to update the collation of existing tables +from `utf8_unicode_ci` to `utf8mb4_unicode_ci` for existing instances, though. Admins need +to manage that on their own if needed, the reports module shows an information if the +table schema use mixed collations. This should be avoided and fixed after manually configuring +`utf8mb4` to avoid SQL errors when joining tables having different collations. + +Also note that manually upgrading to `utf8mb4` may lead to index length issues: The maximum key +length on InnoDB tables is often 767 bytes and options to increase that have even been actively +removed, for instance in recent MariaDB versions. +A typical case is an index on a varchar(255) field: The DBMS assumes the worst case for the index +length, which is 3 bytes per character for a utf8 (utf8mb3), but 4 bytes for utf8mb4: With utf8, +the maximum index length is 3*255 + 1 = 766 bytes which fits into 767, but with utf8mb4, this +is 4*255 + 1 = 1021 bytes, which exceeds the maximum length and leads to SQL errors when setting +such an index. +This scenario gets more complex with combined indexes and may need manual investigation when +upgrading an existing instance from from `utf8` to `utf8mb4`. One solution is to restrict the +index length in ext_tables.sql of the affected extension: :php:`KEY myKey (myField(191))`, which +in this case leads to 4*191 + 1 = 764 bytes as maximum used length. + +The basic settings to use `utf8mb4` in :file:`LocalConfiguration` are:: + +'DB' => [ + 'Connections' => [ + 'Default' => [ + 'driver' => 'mysqli', + ... + 'charset' => 'utf8mb4', + 'tableoptions' => [ + 'charset' => 'utf8mb4', + 'collate' => 'utf8mb4_unicode_ci', + ], + ], + ], +], + + +Impact +====== + +`utf8mb4` is an allowed charset and `utf8mb4_unicode_ci` is an allowed collation and +used by default for new instances running on MySQL. + +.. index:: PHP-API, LocalConfiguration, Database diff --git a/typo3/sysext/core/Resources/Private/Sql/Cache/Backend/Typo3DatabaseBackendCache.sql b/typo3/sysext/core/Resources/Private/Sql/Cache/Backend/Typo3DatabaseBackendCache.sql index 386bbcedd04476f304b460ec111424b0d7804cb1..e3754e43e2a9114a65b7b01bca98043a9a01f758 100644 --- a/typo3/sysext/core/Resources/Private/Sql/Cache/Backend/Typo3DatabaseBackendCache.sql +++ b/typo3/sysext/core/Resources/Private/Sql/Cache/Backend/Typo3DatabaseBackendCache.sql @@ -4,5 +4,5 @@ CREATE TABLE ###CACHE_TABLE### ( expires int(11) unsigned DEFAULT '0' NOT NULL, content longblob, PRIMARY KEY (id), - KEY cache_id (identifier,expires) + KEY cache_id (identifier(180),expires) ) ENGINE=InnoDB; \ No newline at end of file diff --git a/typo3/sysext/core/Resources/Private/Sql/Cache/Backend/Typo3DatabaseBackendTags.sql b/typo3/sysext/core/Resources/Private/Sql/Cache/Backend/Typo3DatabaseBackendTags.sql index d9b3672daa280e10d29829ab7b730371a5ed139e..00eb9f269d838452d14f7477b18a5302398a4133 100644 --- a/typo3/sysext/core/Resources/Private/Sql/Cache/Backend/Typo3DatabaseBackendTags.sql +++ b/typo3/sysext/core/Resources/Private/Sql/Cache/Backend/Typo3DatabaseBackendTags.sql @@ -3,6 +3,6 @@ CREATE TABLE ###TAGS_TABLE### ( identifier varchar(250) DEFAULT '' NOT NULL, tag varchar(250) DEFAULT '' NOT NULL, PRIMARY KEY (id), - KEY cache_id (identifier), - KEY cache_tag (tag) + KEY cache_id (identifier(191)), + KEY cache_tag (tag(191)) ) ENGINE=InnoDB; \ No newline at end of file diff --git a/typo3/sysext/core/ext_tables.sql b/typo3/sysext/core/ext_tables.sql index b84d29a5ca820616c0561c18a54f54e4115e5cc1..de02f5c2244dc2806c424015b98b0e54803c844d 100644 --- a/typo3/sysext/core/ext_tables.sql +++ b/typo3/sysext/core/ext_tables.sql @@ -254,8 +254,8 @@ CREATE TABLE sys_file_processedfile ( height int(11) DEFAULT '0', PRIMARY KEY (uid), - KEY combined_1 (original,task_type,configurationsha1), - KEY identifier (storage,identifier(199)) + KEY combined_1 (original,task_type(100),configurationsha1), + KEY identifier (storage,identifier(180)) ); # @@ -344,8 +344,8 @@ CREATE TABLE sys_history ( history_data mediumtext, workspace int(11) DEFAULT '0', - KEY recordident_1 (tablename,recuid), - KEY recordident_2 (tablename,tstamp) + KEY recordident_1 (tablename(100),recuid), + KEY recordident_2 (tablename(100),tstamp) ) ENGINE=InnoDB; # @@ -383,9 +383,9 @@ CREATE TABLE sys_refindex ( ref_string varchar(1024) DEFAULT '' NOT NULL, PRIMARY KEY (hash), - KEY lookup_rec (tablename(240),recuid), - KEY lookup_uid (ref_table(240),ref_uid), - KEY lookup_string (ref_string(255)) + KEY lookup_rec (tablename(100),recuid), + KEY lookup_uid (ref_table(100),ref_uid), + KEY lookup_string (ref_string(191)) ); # diff --git a/typo3/sysext/frontend/ext_tables.sql b/typo3/sysext/frontend/ext_tables.sql index 317be4cd5a2d7cea357735719e212d71b530e4b7..419869d6e1e67a0003b22a8e83168c258dc2a054 100644 --- a/typo3/sysext/frontend/ext_tables.sql +++ b/typo3/sysext/frontend/ext_tables.sql @@ -66,8 +66,8 @@ CREATE TABLE fe_users ( lastlogin int(10) unsigned DEFAULT '0' NOT NULL, is_online int(10) unsigned DEFAULT '0' NOT NULL, - KEY parent (pid,username), - KEY username (username), + KEY parent (pid,username(100)), + KEY username (username(100)), KEY is_online (is_online) ); @@ -124,7 +124,7 @@ CREATE TABLE sys_domain ( domainName varchar(255) DEFAULT '' NOT NULL, KEY getSysDomain (hidden), - KEY getDomainStartPage (pid,hidden,domainName) + KEY getDomainStartPage (pid,hidden,domainName(100)) ); # diff --git a/typo3/sysext/install/Classes/Controller/InstallerController.php b/typo3/sysext/install/Classes/Controller/InstallerController.php index 4bd732e17d141420054a8d2580098da3e8635e20..192ae5264f93f8899ce5d428065a752cfb3a1b8e 100644 --- a/typo3/sysext/install/Classes/Controller/InstallerController.php +++ b/typo3/sysext/install/Classes/Controller/InstallerController.php @@ -442,6 +442,14 @@ class InstallerController $defaultConnectionSettings['path'] = Environment::getConfigPath() . $dbFilename; } } + // For mysql, set utf8mb4 as default charset + if (isset($postValues['driver']) && in_array($postValues['driver'], ['mysqli', 'pdo_mysql'])) { + $defaultConnectionSettings['charset'] = 'utf8mb4'; + $defaultConnectionSettings['tableoptions'] = [ + 'charset' => 'utf8mb4', + 'collate' => 'utf8mb4_unicode_ci', + ]; + } } $success = false; @@ -450,7 +458,10 @@ class InstallerController try { $connectionParams = $defaultConnectionSettings; $connectionParams['wrapperClass'] = Connection::class; - $connectionParams['charset'] = 'utf-8'; + if (!isset($connectionParams['charset'])) { + // utf-8 as default for non mysql + $connectionParams['charset'] = 'utf-8'; + } DriverManager::getConnection($connectionParams)->ping(); $success = true; } catch (DBALException $e) { @@ -1054,7 +1065,7 @@ For each website you need a TypoScript template on the main page of your website if (strpos($defaultDatabaseCharset, 'utf8') !== 0) { $result = new FlashMessage( 'Your database uses character set "' . $defaultDatabaseCharset . '", ' - . 'but only "utf8" is supported with TYPO3. You probably want to change this before proceeding.', + . 'but only "utf8" and "utf8mb4" are supported with TYPO3. You probably want to change this before proceeding.', 'Invalid Charset', FlashMessage::ERROR ); diff --git a/typo3/sysext/redirects/ext_tables.sql b/typo3/sysext/redirects/ext_tables.sql index ed440fe37abb858ee3f4b27801aca789125585d5..5b6d6314f716e3c423f41573c4c654c2d54c6c57 100644 --- a/typo3/sysext/redirects/ext_tables.sql +++ b/typo3/sysext/redirects/ext_tables.sql @@ -15,5 +15,5 @@ CREATE TABLE sys_redirect ( lasthiton int(11) DEFAULT '0' NOT NULL, disable_hitcount tinyint(1) unsigned DEFAULT '0' NOT NULL, - KEY index_source (source_host,source_path) + KEY index_source (source_host(80),source_path(80)) ); diff --git a/typo3/sysext/reports/Classes/Report/Status/ConfigurationStatus.php b/typo3/sysext/reports/Classes/Report/Status/ConfigurationStatus.php index c3f280eea638fcb3071e9f2244523d9372fa5fe1..c31527d60284492433fdb41d1b0e563f1cfe648a 100644 --- a/typo3/sysext/reports/Classes/Report/Status/ConfigurationStatus.php +++ b/typo3/sysext/reports/Classes/Report/Status/ConfigurationStatus.php @@ -244,6 +244,7 @@ class ConfigurationStatus implements StatusProviderInterface if (strpos($defaultDatabaseCharset, 'utf8') !== 0) { // If the default character set is e.g. latin1, BUT all tables in the system are UTF-8, // we assume that TYPO3 has the correct charset for adding tables, and everything is fine + $queryBuilder = $connection->createQueryBuilder(); $nonUtf8TableCollationsFound = $queryBuilder->select('table_collation') ->from('information_schema.tables') ->where( @@ -265,6 +266,61 @@ class ConfigurationStatus implements StatusProviderInterface $severity = ReportStatus::INFO; $statusValue = $this->getLanguageService()->getLL('status_info'); } + } elseif (isset($GLOBALS['TYPO3_CONF_VARS']['DB']['Connections'][ConnectionPool::DEFAULT_CONNECTION_NAME]['tableoptions'])) { + $message = $this->getLanguageService()->getLL('status_MysqlDatabaseCharacterSet_Ok'); + + $tableOptions = $GLOBALS['TYPO3_CONF_VARS']['DB']['Connections'][ConnectionPool::DEFAULT_CONNECTION_NAME]['tableoptions']; + if (isset($tableOptions['collate'])) { + $collationConstraint = $queryBuilder->expr()->neq('table_collation', $queryBuilder->quote($tableOptions['collate'])); + $charset = $tableOptions['collate']; + } elseif (isset($tableOptions['charset'])) { + $collationConstraint = $queryBuilder->expr()->notLike('table_collation', $queryBuilder->quote($tableOptions['charset'] . '%')); + $charset = $tableOptions['charset']; + } + + if (isset($collationConstraint)) { + $queryBuilder = $connection->createQueryBuilder(); + $wrongCollationTablesFound = $queryBuilder->select('table_collation') + ->from('information_schema.tables') + ->where( + $queryBuilder->expr()->andX( + $queryBuilder->expr()->eq('table_schema', $queryBuilder->quote($connection->getDatabase())), + $collationConstraint + ) + ) + ->setMaxResults(1) + ->execute(); + + if ($wrongCollationTablesFound->rowCount() > 0) { + $message = sprintf($this->getLanguageService()->getLL('status_MysqlDatabaseCharacterSet_MixedCollations'), $charset); + $severity = ReportStatus::ERROR; + $statusValue = $this->getLanguageService()->getLL('status_checkFailed'); + } else { + if (isset($tableOptions['collate'])) { + $collationConstraint = $queryBuilder->expr()->neq('collation_name', $queryBuilder->quote($tableOptions['collate'])); + } elseif (isset($tableOptions['charset'])) { + $collationConstraint = $queryBuilder->expr()->notLike('collation_name', $queryBuilder->quote($tableOptions['charset'] . '%')); + } + + $queryBuilder = $connection->createQueryBuilder(); + $wrongCollationColumnsFound = $queryBuilder->select('collation_name') + ->from('information_schema.columns') + ->where( + $queryBuilder->expr()->andX( + $queryBuilder->expr()->eq('table_schema', $queryBuilder->quote($connection->getDatabase())), + $collationConstraint + ) + ) + ->setMaxResults(1) + ->execute(); + + if ($wrongCollationColumnsFound->rowCount() > 0) { + $message = sprintf($this->getLanguageService()->getLL('status_MysqlDatabaseCharacterSet_MixedCollations'), $charset); + $severity = ReportStatus::ERROR; + $statusValue = $this->getLanguageService()->getLL('status_checkFailed'); + } + } + } } else { $message = $this->getLanguageService()->getLL('status_MysqlDatabaseCharacterSet_Ok'); } diff --git a/typo3/sysext/reports/Resources/Private/Language/locallang_reports.xlf b/typo3/sysext/reports/Resources/Private/Language/locallang_reports.xlf index 9d383f592aa01beebe8c5e292615ce33f533937a..a02dea11deef0d208df8b52cddabfdd22abeb1c1 100644 --- a/typo3/sysext/reports/Resources/Private/Language/locallang_reports.xlf +++ b/typo3/sysext/reports/Resources/Private/Language/locallang_reports.xlf @@ -123,6 +123,9 @@ <trans-unit id="status_MysqlDatabaseCharacterSet_Info"> <source>Your default database uses a different charset, but all tables uses utf-8. All good. But consider fixing your database collation and check the table creation settings.</source> </trans-unit> + <trans-unit id="status_MysqlDatabaseCharacterSet_MixedCollations"> + <source>Your default database is set to create tables with character set "%1$s", but contains tables or columns with different collations. Please fix these tables to avoid "illegal mix of collations" errors.</source> + </trans-unit> <trans-unit id="status_encryptedConnectionStatus"> <source>Encrypted backend connection (HTTPS)</source> </trans-unit>