From c746c186013c350f80c5c5b161c562ab3c95c32f Mon Sep 17 00:00:00 2001
From: Oliver Bartsch <bo@cedev.de>
Date: Fri, 8 Oct 2021 19:36:45 +0200
Subject: [PATCH] [BUGFIX] Properly set softRefParams in LinkAnalyzer

This correctly sets "subst" as a value of the
softRefParams array. Previously "subst" was
set as key, which only worked because of a
non strict "in_array" check, which also
returned TRUE for this case, but then broke
with the refactoring in #94687.

Additionally the regular expression of the
UrlSoftReferenceParser, used to find urls
in content, also got extended in #94687.
This is now partially reset to not longer
match links in tag attributes. This otherwise
would lead to duplicate entries, since those
urls are already matched by the
TypolinkTagSoftReferenceParser.

The impact of the changed regular expression
was overlooked beforehand, due to the bug
in LinkAnalyzer.

Resolves: #95449
Related: #94687
Releases: master
Change-Id: I3ca134cd1eed95b8563335708e393f5b5fa79420
Reviewed-on: https://review.typo3.org/c/Packages/TYPO3.CMS/+/71511
Tested-by: Nikita Hovratov <nikita.h@live.de>
Tested-by: core-ci <typo3@b13.com>
Tested-by: Georg Ringer <georg.ringer@gmail.com>
Tested-by: Oliver Bartsch <bo@cedev.de>
Reviewed-by: Nikita Hovratov <nikita.h@live.de>
Reviewed-by: Georg Ringer <georg.ringer@gmail.com>
Reviewed-by: Oliver Bartsch <bo@cedev.de>
---
 .../SoftReference/UrlSoftReferenceParser.php      |  2 +-
 .../SoftReference/UrlSoftReferenceParserTest.php  | 15 ++++++++++++---
 .../sysext/linkvalidator/Classes/LinkAnalyzer.php |  2 +-
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/typo3/sysext/core/Classes/DataHandling/SoftReference/UrlSoftReferenceParser.php b/typo3/sysext/core/Classes/DataHandling/SoftReference/UrlSoftReferenceParser.php
index 1ba8ab2cde43..32cc7fb3e6f8 100644
--- a/typo3/sysext/core/Classes/DataHandling/SoftReference/UrlSoftReferenceParser.php
+++ b/typo3/sysext/core/Classes/DataHandling/SoftReference/UrlSoftReferenceParser.php
@@ -22,7 +22,7 @@ namespace TYPO3\CMS\Core\DataHandling\SoftReference;
  */
 class UrlSoftReferenceParser extends AbstractSoftReferenceParser
 {
-    protected const REGEXP = '/([\s\'"]+)((https?|ftp):\\/\\/(?:[!#$&-;=?-\[\]_a-z~]+|%[0-9a-fA-F]{2})+)([\s\'"])?/';
+    protected const REGEXP = '/([^[:alnum:]"\']+)((https?|ftp):\\/\\/(?:[!#$&-;=?-\[\]_a-z~]+|%[0-9a-fA-F]{2})+)([[:space:]])?/';
 
     public function parse(string $table, string $field, int $uid, string $content, string $structurePath = ''): SoftReferenceParserResult
     {
diff --git a/typo3/sysext/core/Tests/Unit/DataHandling/SoftReference/UrlSoftReferenceParserTest.php b/typo3/sysext/core/Tests/Unit/DataHandling/SoftReference/UrlSoftReferenceParserTest.php
index 142d7d0de1e4..f44cc347166d 100644
--- a/typo3/sysext/core/Tests/Unit/DataHandling/SoftReference/UrlSoftReferenceParserTest.php
+++ b/typo3/sysext/core/Tests/Unit/DataHandling/SoftReference/UrlSoftReferenceParserTest.php
@@ -46,10 +46,10 @@ class UrlSoftReferenceParserTest extends AbstractSoftReferenceParserTest
                     ],
                 ],
             ],
-            'URLs in html match' => [
-                '<a href="https://foo-bar.baz">foo</a>',
+            'URLs in content match' => [
+                'Lorem ipsum https://foo-bar.baz dolor sit',
                 [
-                    'content' => '<a href="https://foo-bar.baz">foo</a>',
+                    'content' => 'Lorem ipsum https://foo-bar.baz dolor sit',
                     'elements' => [
                         2 => [
                             'matchString' => 'https://foo-bar.baz',
@@ -101,6 +101,15 @@ class UrlSoftReferenceParserTest extends AbstractSoftReferenceParserTest
                     ],
                 ],
             ],
+            // The two cases below are handled by typolink_tag
+            'URLs in anchor tag attributes do NOT match' => [
+                '<a href="https://foo-bar.baz">some link</a>',
+                null,
+            ],
+            'URLs in link tag attributes do NOT match' => [
+                '<link href="https://foo-bar.baz/style.css" rel="stylesheet">',
+                null,
+            ],
         ];
     }
 
diff --git a/typo3/sysext/linkvalidator/Classes/LinkAnalyzer.php b/typo3/sysext/linkvalidator/Classes/LinkAnalyzer.php
index 4fdb0e36802e..e6209b0e351c 100644
--- a/typo3/sysext/linkvalidator/Classes/LinkAnalyzer.php
+++ b/typo3/sysext/linkvalidator/Classes/LinkAnalyzer.php
@@ -329,7 +329,7 @@ class LinkAnalyzer
             }
             // Traverse soft references
             // set subst such that findRef will return substitutes for urls, emails etc
-            $softRefParams = ['subst' => true];
+            $softRefParams = ['subst'];
             foreach ($this->softReferenceParserFactory->getParsersBySoftRefParserList($conf['softref'], $softRefParams) as $softReferenceParser) {
                 $parserResult = $softReferenceParser->parse($table, $field, $idRecord, $valueField);
                 if (!$parserResult->hasMatched()) {
-- 
GitLab