From c2a9726c4efb23d76c084e3319b29f74c14ccdce Mon Sep 17 00:00:00 2001
From: Claus Due <claus@namelesscoder.net>
Date: Tue, 29 Nov 2016 12:58:38 +0100
Subject: [PATCH] [BUGFIX] Avoid duplicates if ReferenceIndex is unable to
 finish

Solves an issue where DB may end up containing duplicates
if the reference indexer is unable to finish, e.g. if memory
or max execution time are exhausted. Such cases would
leave duplicate indexed references. The patch avoids those
duplicates by ensuring that the patch does not contain a
sorting value, which lets the reference indexer retrieve the
original reference even if it has a new sorting.

Further an update wizard is added that allows to rewrite the hash of
existing records.

Resolves: #78829
Releases: master
Change-Id: I145aa60460c5904a83b6c8373f107c7bfce8b434
Reviewed-on: https://review.typo3.org/50803
Reviewed-by: Christian Kuhn <lolli@schwarzbu.ch>
Tested-by: Christian Kuhn <lolli@schwarzbu.ch>
Tested-by: TYPO3com <no-reply@typo3.com>
Reviewed-by: Morton Jonuschat <m.jonuschat@mojocode.de>
Tested-by: Morton Jonuschat <m.jonuschat@mojocode.de>
---
 .../core/Classes/Database/ReferenceIndex.php  |  21 +-
 .../Updates/SysRefindexHashUpdater.php        | 179 ++++++++++++++++++
 typo3/sysext/install/ext_localconf.php        |   2 +
 3 files changed, 196 insertions(+), 6 deletions(-)
 create mode 100644 typo3/sysext/install/Classes/Updates/SysRefindexHashUpdater.php

diff --git a/typo3/sysext/core/Classes/Database/ReferenceIndex.php b/typo3/sysext/core/Classes/Database/ReferenceIndex.php
index 27f70950e98b..879f5b383dce 100644
--- a/typo3/sysext/core/Classes/Database/ReferenceIndex.php
+++ b/typo3/sysext/core/Classes/Database/ReferenceIndex.php
@@ -122,7 +122,7 @@ class ReferenceIndex
      * @var int
      * @see updateRefIndexTable()
      */
-    public $hashVersion = 1;
+    public $hashVersion = 2;
 
     /**
      * Current workspace id
@@ -209,8 +209,8 @@ class ReferenceIndex
 
         $connection = GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable('sys_refindex');
 
-        // Get current index from Database with hash as index using $uidIndexField
-        // no restrictions are needed, since sys_refindex is not a TCA table
+        // Get current index from database with hash as index using $uidIndexField
+        // No restrictions are needed, since sys_refindex is not a TCA table
         $queryBuilder = $connection->createQueryBuilder();
         $queryBuilder->getRestrictions()->removeAll();
         $queryResult = $queryBuilder->select('*')->from('sys_refindex')->where(
@@ -236,14 +236,23 @@ class ReferenceIndex
                     if (!is_array($relation)) {
                         continue;
                     }
-                    $relation['hash'] = md5(implode('///', $relation) . '///' . $this->hashVersion);
-                    // First, check if already indexed and if so, unset that row (so in the end we know which rows to remove!)
+
+                    // Exclude sorting from the list of hashed fields as generateRefIndexData()
+                    // can generate arbitrary sorting values
+                    // @see createEntryData_dbRels and createEntryData_fileRels
+                    $relation['hash'] = md5(
+                        implode('///', array_diff_key($relation, ['sorting' => true]))
+                        . '///'
+                        . $this->hashVersion
+                    );
+
+                    // First, check if already indexed and if so, unset that row
+                    // (so in the end we know which rows to remove!)
                     if (isset($currentRelations[$relation['hash']])) {
                         unset($currentRelations[$relation['hash']]);
                         $result['keptNodes']++;
                         $relation['_ACTION'] = 'KEPT';
                     } else {
-                        // If new, add it:
                         if (!$testOnly) {
                             $connection->insert('sys_refindex', $relation);
                         }
diff --git a/typo3/sysext/install/Classes/Updates/SysRefindexHashUpdater.php b/typo3/sysext/install/Classes/Updates/SysRefindexHashUpdater.php
new file mode 100644
index 000000000000..35edf65a9f60
--- /dev/null
+++ b/typo3/sysext/install/Classes/Updates/SysRefindexHashUpdater.php
@@ -0,0 +1,179 @@
+<?php
+declare(strict_types=1);
+namespace TYPO3\CMS\Install\Updates;
+
+/*
+ * This file is part of the TYPO3 CMS project.
+ *
+ * It is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License, either version 2
+ * of the License, or any later version.
+ *
+ * For the full copyright and license information, please read the
+ * LICENSE.txt file that was distributed with this source code.
+ *
+ * The TYPO3 project - inspiring people to share!
+ */
+
+use Doctrine\DBAL\DBALException;
+use Doctrine\DBAL\Platforms\SqlitePlatform;
+use Doctrine\DBAL\Platforms\SQLServerPlatform;
+use TYPO3\CMS\Core\Database\ConnectionPool;
+use TYPO3\CMS\Core\Utility\GeneralUtility;
+
+/**
+ * Storing new hashes without sorting column in sys_refindex
+ */
+class SysRefindexHashUpdater extends AbstractUpdate
+{
+    /**
+     * @var string
+     */
+    protected $title = 'Update the hash field of sys_refindex to exclude the sorting field';
+
+    /**
+     * Fields that make up the hash value
+     *
+     * @var array
+     */
+    protected $hashMemberFields = [
+        'tablename',
+        'recuid',
+        'field',
+        'flexpointer',
+        'softref_key',
+        'softref_id',
+        'deleted',
+        'workspace',
+        'ref_table',
+        'ref_uid',
+        'ref_string'
+    ];
+
+    /**
+     * The new hash version
+     *
+     * @var int
+     */
+    protected $hashVersion = 2;
+
+    /**
+     * Checks if an update is needed
+     *
+     * @param string &$description The description for the update
+     * @return bool Whether an update is needed (true) or not (false)
+     * @throws \InvalidArgumentException
+     */
+    public function checkForUpdate(&$description)
+    {
+        if ($this->isWizardDone()) {
+            return false;
+        }
+
+        $description = 'The hash calculation for records within the table sys_refindex was changed'
+            . ' to exclude the sorting field. The records need to be updated with a newly calculated hash.';
+
+        $connection = GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable('sys_refindex');
+
+        // SQLite does not have any helpful string/hash functions, unless the wizard is marked done
+        // we need to assume this updater needs to run.
+        if ($connection->getDatabasePlatform() instanceof SqlitePlatform) {
+            return true;
+        }
+
+        $queryBuilder = $connection->createQueryBuilder();
+        $count = (int)$queryBuilder->count('*')
+            ->from('sys_refindex')
+            ->where($queryBuilder->expr()->neq('hash', $this->calculateHashFragment()))
+            ->execute()
+            ->fetchColumn(0);
+
+        return $count !== 0;
+    }
+
+    /**
+     * Performs the hash update for sys_refindex records
+     *
+     * @param array &$databaseQueries Queries done in this update
+     * @param string &$customMessage Custom messages
+     *
+     * @return bool
+     * @throws \InvalidArgumentException
+     * @throws \Doctrine\DBAL\DBALException
+     * @throws \Doctrine\DBAL\ConnectionException
+     */
+    public function performUpdate(array &$databaseQueries, &$customMessage)
+    {
+        $connection = GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable('sys_refindex');
+        $queryBuilder = $connection->createQueryBuilder();
+
+        $statement = $queryBuilder->select('hash', ...$this->hashMemberFields)
+            ->from('sys_refindex')
+            ->where($queryBuilder->expr()->neq('hash', $this->calculateHashFragment()))
+            ->execute();
+
+        $updateQueryBuilder = $connection->createQueryBuilder();
+        $updateQueryBuilder->update('sys_refindex')
+            ->where(
+                $updateQueryBuilder->expr()->eq(
+                    'hash',
+                    $updateQueryBuilder->createPositionalParameter('', \PDO::PARAM_STR)
+                )
+            )
+            ->set('hash', $updateQueryBuilder->createPositionalParameter('', \PDO::PARAM_STR), false);
+        $databaseQueries[] = $updateQueryBuilder->getSQL();
+        $updateStatement = $connection->prepare($updateQueryBuilder->getSQL());
+
+        $connection->beginTransaction();
+        try {
+            while ($row = $statement->fetch()) {
+                $newHash = md5(implode('///', array_diff_key($row, ['hash' => true])) . '///' . $this->hashVersion);
+                $updateStatement->execute([$row['hash'], $newHash]);
+            }
+            $connection->commit();
+            $this->markWizardAsDone();
+        } catch (DBALException $e) {
+            $connection->rollBack();
+            throw $e;
+        }
+
+        return true;
+    }
+
+    /**
+     * Build the DBMS specific SQL fragment that calculates the MD5 hash for the given fields within the database.
+     *
+     * @return string
+     * @throws \InvalidArgumentException
+     */
+    protected function calculateHashFragment(): string
+    {
+        $connection = GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable('sys_refindex');
+        $databasePlatform = $connection->getDatabasePlatform();
+
+        $quotedFields = array_map(
+            function ($fieldName) use ($connection) {
+                return sprintf('CAST(%s AS CHAR)', $connection->quoteIdentifier($fieldName));
+            },
+            $this->hashMemberFields
+        );
+
+        // Add the new hash version to the list of fields
+        $quotedFields[] = $connection->quote('2');
+
+        if ($databasePlatform instanceof SQLServerPlatform) {
+            $concatFragment = sprintf('CONCAT_WS(%s, %s)', $connection->quote('///'), implode(', ', $quotedFields));
+            return sprintf(
+                'LOWER(CONVERT(NVARCHAR(32),HashBytes(%s, %s), 2))',
+                $connection->quote('MD5'),
+                $concatFragment
+            );
+        } elseif ($databasePlatform instanceof SqlitePlatform) {
+            // SQLite cannot do MD5 in database, so update all records which have a hash
+            return $connection->quote('');
+        } else {
+            $concatFragment = sprintf('CONCAT_WS(%s, %s)', $connection->quote('///'), implode(', ', $quotedFields));
+            return sprintf('LOWER(MD5(%s))', $concatFragment);
+        }
+    }
+}
diff --git a/typo3/sysext/install/ext_localconf.php b/typo3/sysext/install/ext_localconf.php
index e005ce688122..afdfd8c5d2d9 100644
--- a/typo3/sysext/install/ext_localconf.php
+++ b/typo3/sysext/install/ext_localconf.php
@@ -64,3 +64,5 @@ $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['ext/install']['update'][\TYPO3\CMS\In
     = \TYPO3\CMS\Install\Updates\MigrateCscStaticTemplateUpdate::class;
 $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['ext/install']['update'][\TYPO3\CMS\Install\Updates\MigrateFscStaticTemplateUpdate::class]
     = \TYPO3\CMS\Install\Updates\MigrateFscStaticTemplateUpdate::class;
+$GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['ext/install']['update'][\TYPO3\CMS\Install\Updates\SysRefindexHashUpdater::class]
+    = \TYPO3\CMS\Install\Updates\SysRefindexHashUpdater::class;
-- 
GitLab