From 1075c03e1fdf86204ac5fbefbe6cb8aa9e26c507 Mon Sep 17 00:00:00 2001 From: Jigal van Hemert <jigal.van.hemert@typo3.org> Date: Mon, 9 Sep 2019 15:55:54 +0200 Subject: [PATCH] [BUGFIX] Make wizard SeparateSysHistoryFromSysLogUpdate repeatable To prevent memory issues with huge sys_history tables and to prevent timeouts when handling large amounts of data the wizard processes small batches of records at a time, keeps track of its progress in the registry and this way allows repeated runs without losing data. If both the registry and sys_history share the same connection a database transaction is used to make sure that modifying data and progress tracking are in sync. Resolves: #88487 Releases: master, 9.5 Change-Id: I7d323ac3dae1d416f5883b2e52f94ee50bd66327 Reviewed-on: https://review.typo3.org/c/Packages/TYPO3.CMS/+/61654 Tested-by: TYPO3com <noreply@typo3.com> Tested-by: Roland Golla <rolandgolla@gmail.com> Tested-by: Susanne Moog <look@susi.dev> Reviewed-by: Benni Mack <benni@typo3.org> Reviewed-by: Susanne Moog <look@susi.dev> --- .../SeparateSysHistoryFromSysLogUpdate.php | 345 ++++++++++++++---- 1 file changed, 273 insertions(+), 72 deletions(-) diff --git a/typo3/sysext/install/Classes/Updates/SeparateSysHistoryFromSysLogUpdate.php b/typo3/sysext/install/Classes/Updates/SeparateSysHistoryFromSysLogUpdate.php index ab777f3ef928..017e0d691398 100644 --- a/typo3/sysext/install/Classes/Updates/SeparateSysHistoryFromSysLogUpdate.php +++ b/typo3/sysext/install/Classes/Updates/SeparateSysHistoryFromSysLogUpdate.php @@ -18,14 +18,25 @@ namespace TYPO3\CMS\Install\Updates; use TYPO3\CMS\Core\Database\Connection; use TYPO3\CMS\Core\Database\ConnectionPool; use TYPO3\CMS\Core\DataHandling\History\RecordHistoryStore; +use TYPO3\CMS\Core\Registry; use TYPO3\CMS\Core\Utility\GeneralUtility; /** * Merge data stored in sys_log that belongs to sys_history * @internal This class is only meant to be used within EXT:install and is not part of the TYPO3 Core API. */ -class SeparateSysHistoryFromSysLogUpdate implements UpgradeWizardInterface +class SeparateSysHistoryFromSysLogUpdate implements UpgradeWizardInterface, RepeatableInterface { + + /** @var int Number of records to process in a single query to reduce memory footprint */ + private const BATCH_SIZE = 100; + + /** @var int Phase that copies data from sys_log to sys_history */ + private const MOVE_DATA = 0; + + /** @var int Phase that adds history records for inserts and deletes */ + private const UPDATE_HISTORY = 1; + /** * @return string Unique identifier of this updater */ @@ -91,96 +102,265 @@ class SeparateSysHistoryFromSysLogUpdate implements UpgradeWizardInterface * where a reference is still there: sys_history.sys_log_uid > 0 * * @return bool + * @throws \Doctrine\DBAL\ConnectionException + * @throws \Exception */ public function executeUpdate(): bool { - // update "modify" statements (= decoupling) + // If rows from the target table that is updated and the sys_registry table are on the + // same connection, the update statement and sys_registry position update will be + // handled in a transaction to have an atomic operation in case of errors during execution. + $connectionPool = GeneralUtility::makeInstance(ConnectionPool::class); $connection = GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable('sys_history'); - $queryBuilder = $connection->createQueryBuilder(); - $rows = $queryBuilder - ->select('sys_history.uid AS history_uid', 'sys_history.history_data', 'sys_log.*') - ->from('sys_history') - ->leftJoin( - 'sys_history', - 'sys_log', - 'sys_log', - $queryBuilder->expr()->eq('sys_history.sys_log_uid', $queryBuilder->quoteIdentifier('sys_log.uid')) - ) - ->execute() - ->fetchAll(); - - foreach ($rows as $row) { - $logData = $row['log_data'] !== null ? unserialize($row['log_data'], ['allowed_classes' => false]) : []; - $updateData = [ - 'actiontype' => RecordHistoryStore::ACTION_MODIFY, - 'usertype' => 'BE', - 'userid' => $row['userid'], - 'sys_log_uid' => 0, - 'history_data' => json_encode($row['history_data'] !== null ? unserialize($row['history_data'], ['allowed_classes' => false]) : []), - 'originaluserid' => empty($logData['originalUser']) ? null : $logData['originalUser'] - ]; - $connection->update( - 'sys_history', - $updateData, - ['uid' => (int)$row['history_uid']], - ['uid' => Connection::PARAM_INT] + $connectionForSysRegistry = $connectionPool->getConnectionForTable('sys_registry'); + + // In case the PHP ended for whatever reason, fetch the last position from registry + // and only execute the phase(s) that has/have not been executed yet + $startPositionAndPhase = $this->getStartPositionAndPhase(); + + if ($startPositionAndPhase['phase'] === self::MOVE_DATA) { + $startPositionAndPhase = $this->moveDataFromSysLogToSysHistory( + $connection, + $connectionForSysRegistry, + $startPositionAndPhase ); - // Store information about history entry in sys_log table - $logData['history'] = $row['history_uid']; - $connection->update( - 'sys_log', - ['log_data' => serialize($logData)], - ['uid' => (int)$row['uid']], - ['uid' => Connection::PARAM_INT] + } + + if ($startPositionAndPhase['phase'] === self::UPDATE_HISTORY) { + $this->keepHistoryForInsertAndDeleteActions( + $connectionForSysRegistry, + $startPositionAndPhase ); } - // Add insert/delete calls - $logQueryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('sys_log'); - $result = $logQueryBuilder - ->select('uid', 'userid', 'action', 'tstamp', 'log_data', 'tablename', 'recuid') - ->from('sys_log') - ->where( - $logQueryBuilder->expr()->eq('type', $logQueryBuilder->createNamedParameter(1, \PDO::PARAM_INT)), - $logQueryBuilder->expr()->orX( - $logQueryBuilder->expr()->eq('action', $logQueryBuilder->createNamedParameter(1, \PDO::PARAM_INT)), - $logQueryBuilder->expr()->eq('action', $logQueryBuilder->createNamedParameter(3, \PDO::PARAM_INT)) + return true; + } + + /** + * @param \TYPO3\CMS\Core\Database\Connection $connection + * @param \TYPO3\CMS\Core\Database\Connection $connectionForSysRegistry + * @param array $startPositionAndPhase + * @return array + * @throws \Doctrine\DBAL\ConnectionException + * @throws \Exception + */ + protected function moveDataFromSysLogToSysHistory( + Connection $connection, + Connection $connectionForSysRegistry, + array $startPositionAndPhase + ): array { + do { + $processedRows = 0; + + // update "modify" statements (= decoupling) + $queryBuilder = $connection->createQueryBuilder(); + $rows = $queryBuilder->select('sys_history.uid AS history_uid', 'sys_history.history_data', 'sys_log.*') + ->from('sys_history') + ->leftJoin( + 'sys_history', + 'sys_log', + 'sys_log', + $queryBuilder->expr()->eq('sys_history.sys_log_uid', $queryBuilder->quoteIdentifier('sys_log.uid')) ) - ) - ->orderBy('uid', 'DESC') - ->execute(); - - foreach ($result as $row) { - $logData = (array)unserialize($row['log_data']); - $store = GeneralUtility::makeInstance( - RecordHistoryStore::class, - RecordHistoryStore::USER_BACKEND, - $row['userid'], - (empty($logData['originalUser']) ? null : $logData['originalUser']), - $row['tstamp'] - ); - switch ($row['action']) { - // Insert - case 1: - $store->addRecord($row['tablename'], (int)$row['recuid'], $logData); - break; - case 3: + ->where($queryBuilder->expr()->gt('sys_history.uid', $queryBuilder->createNamedParameter($startPositionAndPhase['uid']))) + ->setMaxResults(self::BATCH_SIZE) + ->orderBy('sys_history.uid', 'ASC') + ->execute() + ->fetchAll(); + + foreach ($rows as $row) { + $logData = $row['log_data'] !== null ? unserialize($row['log_data'], ['allowed_classes' => false]) : []; + $updateData = [ + 'actiontype' => RecordHistoryStore::ACTION_MODIFY, + 'usertype' => 'BE', + 'userid' => $row['userid'], + 'sys_log_uid' => 0, + 'history_data' => json_encode( + $row['history_data'] !== null + ? unserialize($row['history_data'], ['allowed_classes' => false]) + : [] + ), + 'originaluserid' => empty($logData['originalUser']) ? null : $logData['originalUser'] + ]; + + if ($connection === $connectionForSysRegistry) { + // sys_history and sys_registry tables are on the same connection, use a transaction + $connection->beginTransaction(); + try { + $startPositionAndPhase = $this->updateTablesAndTrackProgress( + $connection, + $connection, + $updateData, + $logData, + $row + ); + $connection->commit(); + } catch (\Exception $up) { + $connection->rollBack(); + throw ($up); + } + } else { + // Different connections for sys_history and sys_registry -> execute two + // distinct queries and hope for the best. + $startPositionAndPhase = $this->updateTablesAndTrackProgress( + $connection, + $connectionForSysRegistry, + $updateData, + $logData, + $row + ); + } + + $processedRows++; + } + // repeat until a resultset smaller than the batch size was processed + } while ($processedRows === self::BATCH_SIZE); + + // phase 0 is finished + $registry = GeneralUtility::makeInstance(Registry::class); + $startPositionAndPhase = [ + 'phase' => self::UPDATE_HISTORY, + 'uid' => 0, + ]; + $registry->set('installSeparateHistoryFromSysLog', 'phaseAndPosition', $startPositionAndPhase); + + return $startPositionAndPhase; + } + + /** + * Update sys_history and sys_log tables + * + * Also keep track of progress in sys_registry + * + * @param \TYPO3\CMS\Core\Database\Connection $connection + * @param \TYPO3\CMS\Core\Database\Connection $connectionForSysRegistry + * @param array $updateData + * @param array $logData + * @param array $row + * @return array + */ + protected function updateTablesAndTrackProgress( + Connection $connection, + Connection $connectionForSysRegistry, + array $updateData, + array $logData, + array $row + ): array { + $connection->update( + 'sys_history', + $updateData, + ['uid' => (int)$row['history_uid']], + ['uid' => Connection::PARAM_INT] + ); + + // Store information about history entry in sys_log table + $logData['history'] = $row['history_uid']; + $connection->update( + 'sys_log', + ['log_data' => serialize($logData)], + ['uid' => (int)$row['uid']], + ['uid' => Connection::PARAM_INT] + ); + $startPositionAndPhase = [ + 'phase' => self::MOVE_DATA, + 'uid' => $row['history_uid'], + ]; + $connectionForSysRegistry->update( + 'sys_registry', + [ + 'entry_value' => serialize($startPositionAndPhase) + ], + [ + 'entry_namespace' => 'installSeparateHistoryFromSysLog', + 'entry_key' => 'phaseAndPosition', + ] + ); + + return $startPositionAndPhase; + } + + /** + * Add Insert and Delete actions from sys_log to sys_history + * + * @param \TYPO3\CMS\Core\Database\Connection $connectionForSysRegistry + * @param array $startPositionAndPhase + */ + protected function keepHistoryForInsertAndDeleteActions( + Connection $connectionForSysRegistry, + array $startPositionAndPhase + ) { + do { + $processedRows = 0; + + // Add insert/delete calls + $logQueryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('sys_log'); + $result = $logQueryBuilder->select('uid', 'userid', 'action', 'tstamp', 'log_data', 'tablename', 'recuid') + ->from('sys_log') + ->where( + $logQueryBuilder->expr()->eq('type', $logQueryBuilder->createNamedParameter(1, \PDO::PARAM_INT)), + $logQueryBuilder->expr()->orX( + $logQueryBuilder->expr()->eq('action', $logQueryBuilder->createNamedParameter(1, \PDO::PARAM_INT)), + $logQueryBuilder->expr()->eq('action', $logQueryBuilder->createNamedParameter(3, \PDO::PARAM_INT)) + ) + ) + ->andWhere( + $logQueryBuilder->expr()->gt('uid', $logQueryBuilder->createNamedParameter($startPositionAndPhase['uid'])) + ) + ->orderBy('uid', 'ASC') + ->setMaxResults(self::BATCH_SIZE) + ->execute(); + + foreach ($result as $row) { + $logData = (array)unserialize($row['log_data'], ['allowed_classes' => false]); + + $store = GeneralUtility::makeInstance( + RecordHistoryStore::class, + RecordHistoryStore::USER_BACKEND, + $row['userid'], + (empty($logData['originalUser']) ? null : $logData['originalUser']), + $row['tstamp'] + ); + + switch ($row['action']) { + // Insert + case 1: + $store->addRecord($row['tablename'], (int)$row['recuid'], $logData); + break; // Delete - $store->deleteRecord($row['tablename'], (int)$row['recuid']); - break; + case 3: + $store->deleteRecord($row['tablename'], (int)$row['recuid']); + break; + } + + $startPositionAndPhase = [ + 'phase' => self::UPDATE_HISTORY, + 'uid' => $row['uid'], + ]; + $connectionForSysRegistry->update( + 'sys_registry', + [ + 'entry_value' => serialize($startPositionAndPhase) + ], + [ + 'entry_namespace' => 'installSeparateHistoryFromSysLog', + 'entry_key' => 'phaseAndPosition', + ] + ); + + $processedRows++; } - } - return true; + // repeat until a result set smaller than the batch size was processed + } while ($processedRows === self::BATCH_SIZE); } /** - * Check if given field /column in a table exists + * Checks if given field /column in a table exists * * @param string $table * @param string $fieldName * @return bool */ - protected function checkIfFieldInTableExists($table, $fieldName) + protected function checkIfFieldInTableExists($table, $fieldName): bool { $tableColumns = GeneralUtility::makeInstance(ConnectionPool::class) ->getConnectionForTable($table) @@ -188,4 +368,25 @@ class SeparateSysHistoryFromSysLogUpdate implements UpgradeWizardInterface ->listTableColumns($table); return isset($tableColumns[$fieldName]); } + + /** + * Returns an array with phase / uid combination that specifies the start position the + * update process should start with. + * + * @return array New start position + */ + protected function getStartPositionAndPhase(): array + { + $registry = GeneralUtility::makeInstance(Registry::class); + $startPosition = $registry->get('installSeparateHistoryFromSysLog', 'phaseAndPosition', []); + if (empty($startPosition)) { + $startPosition = [ + 'phase' => self::MOVE_DATA, + 'uid' => 0, + ]; + $registry->set('installSeparateHistoryFromSysLog', 'phaseAndPosition', $startPosition); + } + + return $startPosition; + } } -- GitLab