From adc8b4402393668b6a148b736e46c692e88ed7e5 Mon Sep 17 00:00:00 2001 From: Oliver Hader <oliver@typo3.org> Date: Mon, 7 Nov 2022 23:45:44 +0100 Subject: [PATCH] [BUGFIX] Ensure best matching URL is used during site resolving Symfony's UrlMatcher takes multiple routes and returns the first match. However there might be better matches that are not considered at all. That's why a custom BestUrlMatcher is introduce, which resolves all matches for further reduction. Previously the route collection was sorted by names and identifiers, which now has been adjusted to focus on the actual matches when being compared to the current request. The fallback route (site base URI). This way, overlapping route definitions can be resolved better. Resolves: #93240 Releases: main, 11.5 Change-Id: Ib1a7361dc86ed48a474d5e55583a622df58e8939 Reviewed-on: https://review.typo3.org/c/Packages/TYPO3.CMS/+/76464 Tested-by: Benni Mack <benni@typo3.org> Tested-by: Susanne Moog <look@susi.dev> Tested-by: core-ci <typo3@b13.com> Tested-by: Oliver Hader <oliver.hader@typo3.org> Reviewed-by: Benni Mack <benni@typo3.org> Reviewed-by: Susanne Moog <look@susi.dev> Reviewed-by: Oliver Hader <oliver.hader@typo3.org> --- .../core/Classes/Routing/BestUrlMatcher.php | 151 ++++++++++++++++++ .../core/Classes/Routing/MatchedRoute.php | 83 ++++++++++ .../core/Classes/Routing/SiteMatcher.php | 37 +---- .../Tests/Unit/Routing/SiteMatcherTest.php | 104 ++++++++++-- 4 files changed, 330 insertions(+), 45 deletions(-) create mode 100644 typo3/sysext/core/Classes/Routing/BestUrlMatcher.php create mode 100644 typo3/sysext/core/Classes/Routing/MatchedRoute.php diff --git a/typo3/sysext/core/Classes/Routing/BestUrlMatcher.php b/typo3/sysext/core/Classes/Routing/BestUrlMatcher.php new file mode 100644 index 000000000000..593a7e05aa83 --- /dev/null +++ b/typo3/sysext/core/Classes/Routing/BestUrlMatcher.php @@ -0,0 +1,151 @@ +<?php + +declare(strict_types=1); + +/* + * This file is part of the TYPO3 CMS project. + * + * It is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License, either version 2 + * of the License, or any later version. + * + * For the full copyright and license information, please read the + * LICENSE.txt file that was distributed with this source code. + * + * The TYPO3 project - inspiring people to share! + */ + +namespace TYPO3\CMS\Core\Routing; + +use Symfony\Component\Routing\Matcher\RedirectableUrlMatcherInterface; +use Symfony\Component\Routing\Matcher\UrlMatcher; +use Symfony\Component\Routing\RouteCollection as SymfonyRouteCollection; +use TYPO3\CMS\Core\Utility\GeneralUtility; + +/** + * @internal + */ +class BestUrlMatcher extends UrlMatcher +{ + protected function matchCollection(string $pathinfo, SymfonyRouteCollection $routes): array + { + $matchedRoutes = $this->preMatchCollection($pathinfo, $routes); + $matches = count($matchedRoutes); + if ($matches === 0) { + return []; + } + if ($matches === 1) { + return $matchedRoutes[0]->getRouteResult(); + } + usort($matchedRoutes, [$this, 'sortMatchedRoutes']); + return array_shift($matchedRoutes)->getRouteResult(); + } + + /** + * Tries to match a URL with a set of routes. + * Basically all code has been duplicated from `UrlMatcher::matchCollection`, the difference is + * it does not just return the first match, but return all possible matches for further reduction. + * + * @param string $pathinfo The path info to be parsed + * @return list<MatchedRoute> + */ + protected function preMatchCollection(string $pathinfo, SymfonyRouteCollection $routes): array + { + $matchedRoutes = []; + + // HEAD and GET are equivalent as per RFC + $method = $this->context->getMethod(); + if ($method === 'HEAD') { + $method = 'GET'; + } + $supportsTrailingSlash = $method === 'GET' && $this instanceof RedirectableUrlMatcherInterface; + $trimmedPathinfo = rtrim($pathinfo, '/') ?: '/'; + + foreach ($routes as $name => $route) { + $compiledRoute = $route->compile(); + $staticPrefix = rtrim($compiledRoute->getStaticPrefix(), '/'); + $requiredMethods = $route->getMethods(); + + // check the static prefix of the URL first. Only use the more expensive preg_match when it matches + if ($staticPrefix !== '' && !str_starts_with($trimmedPathinfo, $staticPrefix)) { + continue; + } + $regex = $compiledRoute->getRegex(); + + $pos = strrpos($regex, '$'); + $hasTrailingSlash = $regex[$pos - 1] === '/'; + $regex = substr_replace($regex, '/?$', $pos - $hasTrailingSlash, 1 + $hasTrailingSlash); + + if (!preg_match($regex, $pathinfo, $matches)) { + continue; + } + + $hasTrailingVar = $trimmedPathinfo !== $pathinfo && preg_match('#\{[\w\x80-\xFF]+\}/?$#', $route->getPath()); + + if ($hasTrailingVar && ($hasTrailingSlash || (null === $m = $matches[\count($compiledRoute->getPathVariables())] ?? null) || '/' !== ($m[-1] ?? '/')) && preg_match($regex, $trimmedPathinfo, $m)) { + if ($hasTrailingSlash) { + $matches = $m; + } else { + $hasTrailingVar = false; + } + } + + $hostMatches = []; + if ($compiledRoute->getHostRegex() && !preg_match($compiledRoute->getHostRegex(), $this->context->getHost(), $hostMatches)) { + continue; + } + + $attributes = $this->getAttributes($route, $name, array_replace($matches, $hostMatches)); + + $status = $this->handleRouteRequirements($pathinfo, $name, $route, $attributes); + + if ($status[0] === self::REQUIREMENT_MISMATCH) { + continue; + } + + if ($pathinfo !== '/' && !$hasTrailingVar && $hasTrailingSlash === ($trimmedPathinfo === $pathinfo)) { + if ($supportsTrailingSlash && (!$requiredMethods || \in_array('GET', $requiredMethods))) { + return $this->allow = $this->allowSchemes = []; + } + continue; + } + + if ($route->getSchemes() && !$route->hasScheme($this->context->getScheme())) { + $this->allowSchemes = array_merge($this->allowSchemes, $route->getSchemes()); + continue; + } + + if ($requiredMethods && !\in_array($method, $requiredMethods)) { + $this->allow = array_merge($this->allow, $requiredMethods); + continue; + } + + $matchedRoute = GeneralUtility::makeInstance( + MatchedRoute::class, + $route, + array_replace($attributes, $status[1] ?? []) + ); + $matchedRoutes[] = $matchedRoute->withPathMatches($matches)->withHostMatches($hostMatches); + } + + return $matchedRoutes; + } + + /** + * Sorts the best matching route result to the beginning + */ + protected function sortMatchedRoutes(MatchedRoute $a, MatchedRoute $b): int + { + if ($a->getFallbackScore() !== $b->getFallbackScore()) { + // sort fallbacks to the end + return $a->getFallbackScore() <=> $b->getFallbackScore(); + } + if ($b->getHostMatchScore() !== $a->getHostMatchScore()) { + // sort more specific host matches to the beginning + return $b->getHostMatchScore() <=> $a->getHostMatchScore(); + } + // index `1` refers to the array index containing the corresponding `tail` match + // @todo not sure, whether `tail` can be defined generic, it's hard coded in `SiteMatcher` + return $b->getPathMatchScore(1) <=> $a->getPathMatchScore(1); + } +} diff --git a/typo3/sysext/core/Classes/Routing/MatchedRoute.php b/typo3/sysext/core/Classes/Routing/MatchedRoute.php new file mode 100644 index 000000000000..4e1c952f50c0 --- /dev/null +++ b/typo3/sysext/core/Classes/Routing/MatchedRoute.php @@ -0,0 +1,83 @@ +<?php + +declare(strict_types=1); + +/* + * This file is part of the TYPO3 CMS project. + * + * It is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License, either version 2 + * of the License, or any later version. + * + * For the full copyright and license information, please read the + * LICENSE.txt file that was distributed with this source code. + * + * The TYPO3 project - inspiring people to share! + */ + +namespace TYPO3\CMS\Core\Routing; + +use Symfony\Component\Routing\Route as SymfonyRoute; + +/** + * @internal + */ +class MatchedRoute +{ + protected array $hostMatches = []; + protected array $pathMatches = []; + + public function __construct(protected SymfonyRoute $route, protected array $routeResult) + { + } + + public function withPathMatches(array $pathMatches): self + { + $target = clone $this; + $target->pathMatches = $pathMatches; + return $target; + } + + public function withHostMatches(array $hostMatches): self + { + $target = clone $this; + $target->hostMatches = $hostMatches; + return $target; + } + + public function getRoute(): SymfonyRoute + { + return $this->route; + } + + /** + * @return array + */ + public function getRouteResult(): array + { + return $this->routeResult; + } + + public function getFallbackScore(): int + { + return $this->route->getOption('fallback') === true ? 1 : 0; + } + + public function getHostMatchScore(): int + { + return empty($this->hostMatches[0]) ? 0 : 1; + } + + public function getPathMatchScore(int $index): int + { + $completeMatch = $this->pathMatches[0]; + $tailMatch = $this->pathMatches[$index] ?? ''; + // no tail, it's a complete match + if ($tailMatch === '') { + return strlen($completeMatch); + } + // otherwise, find length of complete match that does not contain tail + // example: complete: `/french/other`, tail: `/other` -> `strlen` of `/french` + return strpos($completeMatch, $tailMatch); + } +} diff --git a/typo3/sysext/core/Classes/Routing/SiteMatcher.php b/typo3/sysext/core/Classes/Routing/SiteMatcher.php index 57cca489b317..3428ef9a252e 100644 --- a/typo3/sysext/core/Classes/Routing/SiteMatcher.php +++ b/typo3/sysext/core/Classes/Routing/SiteMatcher.php @@ -20,7 +20,6 @@ namespace TYPO3\CMS\Core\Routing; use Psr\Http\Message\ServerRequestInterface; use Symfony\Component\Routing\Exception\NoConfigurationException; use Symfony\Component\Routing\Exception\ResourceNotFoundException; -use Symfony\Component\Routing\Matcher\UrlMatcher; use Symfony\Component\Routing\RequestContext; use TYPO3\CMS\Core\Cache\CacheManager; use TYPO3\CMS\Core\Exception\SiteNotFoundException; @@ -146,7 +145,7 @@ class SiteMatcher implements SingletonInterface 443, $uri->getPath() ); - $matcher = new UrlMatcher($collection, $context); + $matcher = new BestUrlMatcher($collection, $context); try { $result = $matcher->match($uri->getPath()); return new SiteRouteResult( @@ -194,23 +193,25 @@ class SiteMatcher implements SingletonInterface */ protected function getRouteCollectionForAllSites(): RouteCollection { - $groupedRoutes = []; + $collection = new RouteCollection(); foreach ($this->finder->getAllSites() as $site) { // Add the site as entrypoint // @todo Find a way to test only this basic route against chinese characters, as site languages kicking // always in. Do the rawurldecode() here to to be consistent with language preparations. + $uri = $site->getBase(); $route = new Route( (rawurldecode($uri->getPath()) ?: '/') . '{tail}', ['site' => $site, 'language' => null, 'tail' => ''], array_filter(['tail' => '.*', 'port' => (string)$uri->getPort()]), - ['utf8' => true], + ['utf8' => true, 'fallback' => true], // @todo Verify if host should here covered with idn_to_ascii() to be consistent with preparation for languages. $uri->getHost() ?: '', $uri->getScheme() === '' ? [] : [$uri->getScheme()] ); $identifier = 'site_' . $site->getIdentifier(); - $groupedRoutes[($uri->getScheme() ?: '-') . ($uri->getHost() ?: '-')][$uri->getPath() ?: '/'][$identifier] = $route; + $collection->add($identifier, $route); + // Add all languages foreach ($site->getAllLanguages() as $siteLanguage) { $uri = $siteLanguage->getBase(); @@ -223,31 +224,7 @@ class SiteMatcher implements SingletonInterface $uri->getScheme() === '' ? [] : [$uri->getScheme()] ); $identifier = 'site_' . $site->getIdentifier() . '_' . $siteLanguage->getLanguageId(); - $groupedRoutes[($uri->getScheme() ?: '-') . ($uri->getHost() ?: '-')][$uri->getPath() ?: '/'][$identifier] = $route; - } - } - return $this->createRouteCollectionFromGroupedRoutes($groupedRoutes); - } - - /** - * As the {tail} parameter is greedy, it needs to be ensured that the one with the - * most specific part matches first. - * - * @param array $groupedRoutes - * @return RouteCollection - */ - protected function createRouteCollectionFromGroupedRoutes(array $groupedRoutes): RouteCollection - { - $collection = new RouteCollection(); - // Ensure more generic routes containing '-' in host identifier, processed at last - krsort($groupedRoutes); - foreach ($groupedRoutes as $groupedRoutesPerHost) { - krsort($groupedRoutesPerHost); - foreach ($groupedRoutesPerHost as $groupedRoutesPerPath) { - krsort($groupedRoutesPerPath); - foreach ($groupedRoutesPerPath as $identifier => $route) { - $collection->add($identifier, $route); - } + $collection->add($identifier, $route); } } return $collection; diff --git a/typo3/sysext/core/Tests/Unit/Routing/SiteMatcherTest.php b/typo3/sysext/core/Tests/Unit/Routing/SiteMatcherTest.php index 54605caed746..970bf15204f6 100644 --- a/typo3/sysext/core/Tests/Unit/Routing/SiteMatcherTest.php +++ b/typo3/sysext/core/Tests/Unit/Routing/SiteMatcherTest.php @@ -71,12 +71,7 @@ class SiteMatcherTest extends UnitTestCase ], ], ]); - $finderMock = $this - ->getMockBuilder(SiteFinder::class) - ->onlyMethods(['getAllSites']) - ->disableOriginalConstructor() - ->getMock(); - $finderMock->method('getAllSites')->willReturn(['main' => $site, 'second' => $secondSite]); + $finderMock = $this->createSiteFinderMock($site, $secondSite); $subject = new SiteMatcher($finderMock); $request = new ServerRequest('http://9-5.typo3.test/da/my-page/'); @@ -109,12 +104,14 @@ class SiteMatcherTest extends UnitTestCase // Matches english self::assertEquals(0, $result->getLanguage()->getLanguageId()); + // @todo this is a random result and needs to be refined + // www.example.com is not defined at all, but it actually "matches"... $request = new ServerRequest('http://www.example.com/'); /** @var SiteRouteResult $result */ $result = $subject->matchRequest($request); - // Nothing found, only the empty site, but finds the last site ("second") according to the algorithm + // Nothing found, only the empty site, but finds a random site ("main") according to the algorithm self::assertNull($result->getLanguage()); - self::assertEquals('second', $result->getSite()->getIdentifier()); + self::assertEquals('main', $result->getSite()->getIdentifier()); } /** @@ -169,13 +166,7 @@ class SiteMatcherTest extends UnitTestCase ], ], ]); - /** @var SiteFinder $finderMock */ - $finderMock = $this - ->getMockBuilder(SiteFinder::class) - ->onlyMethods(['getAllSites']) - ->disableOriginalConstructor() - ->getMock(); - $finderMock->method('getAllSites')->willReturn(['main' => $site, 'second' => $secondSite]); + $finderMock = $this->createSiteFinderMock($site, $secondSite); $subject = new SiteMatcher($finderMock); $request = new ServerRequest('https://www.example.com/de'); @@ -198,4 +189,87 @@ class SiteMatcherTest extends UnitTestCase self::assertEquals($secondSite, $result->getSite()); self::assertNull($result->getLanguage()); } + + public static function bestMatchingUrlIsUsedDataProvider(): \Generator + { + yield ['https://example.org/page', '1-main', 'en_US']; + yield ['https://example.org/', '1-main', 'en_US']; + yield ['https://example.org/f', '1-main', 'en_US']; + yield ['https://example.org/fr', '3-fr', 'fr_FR']; + yield ['https://example.org/friendly-page', '1-main', 'en_US']; + yield ['https://example.org/fr/', '3-fr', 'fr_FR']; + yield ['https://example.org/fr/page', '3-fr', 'fr_FR']; + yield ['https://example.org/de', '1-main', 'de_DE']; + yield ['https://example.org/deterministic', '1-main', 'en_US']; + yield ['https://example.org/dk', '2-dk', 'da_DK']; + yield ['https://example.org/dkother', '1-main', 'en_US']; + } + + /** + * @test + * @dataProvider bestMatchingUrlIsUsedDataProvider + */ + public function bestMatchingUrlIsUsed(string $requestUri, string $expectedSite, string $expectedLocale): void + { + $mainSite = new Site('1-main', 31, [ + 'base' => 'https://example.org/', + 'languages' => [ + [ + 'languageId' => 0, + 'base' => 'https://example.org/', + 'locale' => 'en_US', + ], + [ + 'languageId' => 2, + 'base' => 'https://example.org/de/', + 'locale' => 'de_DE', + ], + ], + ]); + $dkSite = new Site('2-dk', 21, [ + 'base' => 'https://example.org/', + 'languages' => [ + [ + 'languageId' => 0, + 'base' => 'https://example.org/dk/', + 'locale' => 'da_DK', + ], + ], + ]); + $frSite = new Site('3-fr', 11, [ + 'base' => 'https://example.org/', + 'languages' => [ + [ + 'languageId' => 0, + 'base' => 'https://example.org/fr/', + 'locale' => 'fr_FR', + ], + ], + ]); + + $finderMock = $this->createSiteFinderMock($mainSite, $dkSite, $frSite); + $subject = new SiteMatcher($finderMock); + + $request = new ServerRequest($requestUri); + /** @var SiteRouteResult $result */ + $result = $subject->matchRequest($request); + + self::assertSame($expectedSite, $result->getSite()->getIdentifier()); + self::assertSame($expectedLocale, $result->getLanguage()->getLocale()); + } + + private function createSiteFinderMock(Site ...$sites): SiteFinder + { + /** @var SiteFinder $finderMock */ + $finderMock = $this + ->getMockBuilder(SiteFinder::class) + ->onlyMethods(['getAllSites']) + ->disableOriginalConstructor() + ->getMock(); + $finderMock->method('getAllSites')->willReturn(array_combine( + array_map(static function (Site $site) { return $site->getIdentifier(); }, $sites), + $sites + )); + return $finderMock; + } } -- GitLab