diff --git a/typo3/sysext/core/Classes/Routing/BestUrlMatcher.php b/typo3/sysext/core/Classes/Routing/BestUrlMatcher.php new file mode 100644 index 0000000000000000000000000000000000000000..593a7e05aa83938bc7448bf0bb27b208084ce53e --- /dev/null +++ b/typo3/sysext/core/Classes/Routing/BestUrlMatcher.php @@ -0,0 +1,151 @@ +<?php + +declare(strict_types=1); + +/* + * This file is part of the TYPO3 CMS project. + * + * It is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License, either version 2 + * of the License, or any later version. + * + * For the full copyright and license information, please read the + * LICENSE.txt file that was distributed with this source code. + * + * The TYPO3 project - inspiring people to share! + */ + +namespace TYPO3\CMS\Core\Routing; + +use Symfony\Component\Routing\Matcher\RedirectableUrlMatcherInterface; +use Symfony\Component\Routing\Matcher\UrlMatcher; +use Symfony\Component\Routing\RouteCollection as SymfonyRouteCollection; +use TYPO3\CMS\Core\Utility\GeneralUtility; + +/** + * @internal + */ +class BestUrlMatcher extends UrlMatcher +{ + protected function matchCollection(string $pathinfo, SymfonyRouteCollection $routes): array + { + $matchedRoutes = $this->preMatchCollection($pathinfo, $routes); + $matches = count($matchedRoutes); + if ($matches === 0) { + return []; + } + if ($matches === 1) { + return $matchedRoutes[0]->getRouteResult(); + } + usort($matchedRoutes, [$this, 'sortMatchedRoutes']); + return array_shift($matchedRoutes)->getRouteResult(); + } + + /** + * Tries to match a URL with a set of routes. + * Basically all code has been duplicated from `UrlMatcher::matchCollection`, the difference is + * it does not just return the first match, but return all possible matches for further reduction. + * + * @param string $pathinfo The path info to be parsed + * @return list<MatchedRoute> + */ + protected function preMatchCollection(string $pathinfo, SymfonyRouteCollection $routes): array + { + $matchedRoutes = []; + + // HEAD and GET are equivalent as per RFC + $method = $this->context->getMethod(); + if ($method === 'HEAD') { + $method = 'GET'; + } + $supportsTrailingSlash = $method === 'GET' && $this instanceof RedirectableUrlMatcherInterface; + $trimmedPathinfo = rtrim($pathinfo, '/') ?: '/'; + + foreach ($routes as $name => $route) { + $compiledRoute = $route->compile(); + $staticPrefix = rtrim($compiledRoute->getStaticPrefix(), '/'); + $requiredMethods = $route->getMethods(); + + // check the static prefix of the URL first. Only use the more expensive preg_match when it matches + if ($staticPrefix !== '' && !str_starts_with($trimmedPathinfo, $staticPrefix)) { + continue; + } + $regex = $compiledRoute->getRegex(); + + $pos = strrpos($regex, '$'); + $hasTrailingSlash = $regex[$pos - 1] === '/'; + $regex = substr_replace($regex, '/?$', $pos - $hasTrailingSlash, 1 + $hasTrailingSlash); + + if (!preg_match($regex, $pathinfo, $matches)) { + continue; + } + + $hasTrailingVar = $trimmedPathinfo !== $pathinfo && preg_match('#\{[\w\x80-\xFF]+\}/?$#', $route->getPath()); + + if ($hasTrailingVar && ($hasTrailingSlash || (null === $m = $matches[\count($compiledRoute->getPathVariables())] ?? null) || '/' !== ($m[-1] ?? '/')) && preg_match($regex, $trimmedPathinfo, $m)) { + if ($hasTrailingSlash) { + $matches = $m; + } else { + $hasTrailingVar = false; + } + } + + $hostMatches = []; + if ($compiledRoute->getHostRegex() && !preg_match($compiledRoute->getHostRegex(), $this->context->getHost(), $hostMatches)) { + continue; + } + + $attributes = $this->getAttributes($route, $name, array_replace($matches, $hostMatches)); + + $status = $this->handleRouteRequirements($pathinfo, $name, $route, $attributes); + + if ($status[0] === self::REQUIREMENT_MISMATCH) { + continue; + } + + if ($pathinfo !== '/' && !$hasTrailingVar && $hasTrailingSlash === ($trimmedPathinfo === $pathinfo)) { + if ($supportsTrailingSlash && (!$requiredMethods || \in_array('GET', $requiredMethods))) { + return $this->allow = $this->allowSchemes = []; + } + continue; + } + + if ($route->getSchemes() && !$route->hasScheme($this->context->getScheme())) { + $this->allowSchemes = array_merge($this->allowSchemes, $route->getSchemes()); + continue; + } + + if ($requiredMethods && !\in_array($method, $requiredMethods)) { + $this->allow = array_merge($this->allow, $requiredMethods); + continue; + } + + $matchedRoute = GeneralUtility::makeInstance( + MatchedRoute::class, + $route, + array_replace($attributes, $status[1] ?? []) + ); + $matchedRoutes[] = $matchedRoute->withPathMatches($matches)->withHostMatches($hostMatches); + } + + return $matchedRoutes; + } + + /** + * Sorts the best matching route result to the beginning + */ + protected function sortMatchedRoutes(MatchedRoute $a, MatchedRoute $b): int + { + if ($a->getFallbackScore() !== $b->getFallbackScore()) { + // sort fallbacks to the end + return $a->getFallbackScore() <=> $b->getFallbackScore(); + } + if ($b->getHostMatchScore() !== $a->getHostMatchScore()) { + // sort more specific host matches to the beginning + return $b->getHostMatchScore() <=> $a->getHostMatchScore(); + } + // index `1` refers to the array index containing the corresponding `tail` match + // @todo not sure, whether `tail` can be defined generic, it's hard coded in `SiteMatcher` + return $b->getPathMatchScore(1) <=> $a->getPathMatchScore(1); + } +} diff --git a/typo3/sysext/core/Classes/Routing/MatchedRoute.php b/typo3/sysext/core/Classes/Routing/MatchedRoute.php new file mode 100644 index 0000000000000000000000000000000000000000..4e1c952f50c050bc27a6a9546a038344df1911d4 --- /dev/null +++ b/typo3/sysext/core/Classes/Routing/MatchedRoute.php @@ -0,0 +1,83 @@ +<?php + +declare(strict_types=1); + +/* + * This file is part of the TYPO3 CMS project. + * + * It is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License, either version 2 + * of the License, or any later version. + * + * For the full copyright and license information, please read the + * LICENSE.txt file that was distributed with this source code. + * + * The TYPO3 project - inspiring people to share! + */ + +namespace TYPO3\CMS\Core\Routing; + +use Symfony\Component\Routing\Route as SymfonyRoute; + +/** + * @internal + */ +class MatchedRoute +{ + protected array $hostMatches = []; + protected array $pathMatches = []; + + public function __construct(protected SymfonyRoute $route, protected array $routeResult) + { + } + + public function withPathMatches(array $pathMatches): self + { + $target = clone $this; + $target->pathMatches = $pathMatches; + return $target; + } + + public function withHostMatches(array $hostMatches): self + { + $target = clone $this; + $target->hostMatches = $hostMatches; + return $target; + } + + public function getRoute(): SymfonyRoute + { + return $this->route; + } + + /** + * @return array + */ + public function getRouteResult(): array + { + return $this->routeResult; + } + + public function getFallbackScore(): int + { + return $this->route->getOption('fallback') === true ? 1 : 0; + } + + public function getHostMatchScore(): int + { + return empty($this->hostMatches[0]) ? 0 : 1; + } + + public function getPathMatchScore(int $index): int + { + $completeMatch = $this->pathMatches[0]; + $tailMatch = $this->pathMatches[$index] ?? ''; + // no tail, it's a complete match + if ($tailMatch === '') { + return strlen($completeMatch); + } + // otherwise, find length of complete match that does not contain tail + // example: complete: `/french/other`, tail: `/other` -> `strlen` of `/french` + return strpos($completeMatch, $tailMatch); + } +} diff --git a/typo3/sysext/core/Classes/Routing/SiteMatcher.php b/typo3/sysext/core/Classes/Routing/SiteMatcher.php index 57cca489b3170fb934c36bfe2fe774cc01d55350..3428ef9a252e72e1cc291007f6324ec5278cc05b 100644 --- a/typo3/sysext/core/Classes/Routing/SiteMatcher.php +++ b/typo3/sysext/core/Classes/Routing/SiteMatcher.php @@ -20,7 +20,6 @@ namespace TYPO3\CMS\Core\Routing; use Psr\Http\Message\ServerRequestInterface; use Symfony\Component\Routing\Exception\NoConfigurationException; use Symfony\Component\Routing\Exception\ResourceNotFoundException; -use Symfony\Component\Routing\Matcher\UrlMatcher; use Symfony\Component\Routing\RequestContext; use TYPO3\CMS\Core\Cache\CacheManager; use TYPO3\CMS\Core\Exception\SiteNotFoundException; @@ -146,7 +145,7 @@ class SiteMatcher implements SingletonInterface 443, $uri->getPath() ); - $matcher = new UrlMatcher($collection, $context); + $matcher = new BestUrlMatcher($collection, $context); try { $result = $matcher->match($uri->getPath()); return new SiteRouteResult( @@ -194,23 +193,25 @@ class SiteMatcher implements SingletonInterface */ protected function getRouteCollectionForAllSites(): RouteCollection { - $groupedRoutes = []; + $collection = new RouteCollection(); foreach ($this->finder->getAllSites() as $site) { // Add the site as entrypoint // @todo Find a way to test only this basic route against chinese characters, as site languages kicking // always in. Do the rawurldecode() here to to be consistent with language preparations. + $uri = $site->getBase(); $route = new Route( (rawurldecode($uri->getPath()) ?: '/') . '{tail}', ['site' => $site, 'language' => null, 'tail' => ''], array_filter(['tail' => '.*', 'port' => (string)$uri->getPort()]), - ['utf8' => true], + ['utf8' => true, 'fallback' => true], // @todo Verify if host should here covered with idn_to_ascii() to be consistent with preparation for languages. $uri->getHost() ?: '', $uri->getScheme() === '' ? [] : [$uri->getScheme()] ); $identifier = 'site_' . $site->getIdentifier(); - $groupedRoutes[($uri->getScheme() ?: '-') . ($uri->getHost() ?: '-')][$uri->getPath() ?: '/'][$identifier] = $route; + $collection->add($identifier, $route); + // Add all languages foreach ($site->getAllLanguages() as $siteLanguage) { $uri = $siteLanguage->getBase(); @@ -223,31 +224,7 @@ class SiteMatcher implements SingletonInterface $uri->getScheme() === '' ? [] : [$uri->getScheme()] ); $identifier = 'site_' . $site->getIdentifier() . '_' . $siteLanguage->getLanguageId(); - $groupedRoutes[($uri->getScheme() ?: '-') . ($uri->getHost() ?: '-')][$uri->getPath() ?: '/'][$identifier] = $route; - } - } - return $this->createRouteCollectionFromGroupedRoutes($groupedRoutes); - } - - /** - * As the {tail} parameter is greedy, it needs to be ensured that the one with the - * most specific part matches first. - * - * @param array $groupedRoutes - * @return RouteCollection - */ - protected function createRouteCollectionFromGroupedRoutes(array $groupedRoutes): RouteCollection - { - $collection = new RouteCollection(); - // Ensure more generic routes containing '-' in host identifier, processed at last - krsort($groupedRoutes); - foreach ($groupedRoutes as $groupedRoutesPerHost) { - krsort($groupedRoutesPerHost); - foreach ($groupedRoutesPerHost as $groupedRoutesPerPath) { - krsort($groupedRoutesPerPath); - foreach ($groupedRoutesPerPath as $identifier => $route) { - $collection->add($identifier, $route); - } + $collection->add($identifier, $route); } } return $collection; diff --git a/typo3/sysext/core/Tests/Unit/Routing/SiteMatcherTest.php b/typo3/sysext/core/Tests/Unit/Routing/SiteMatcherTest.php index 54605caed74627c5ca4d4ecc781d1b01a7378925..970bf15204f67a9f42df6389d149f22aab5a8b80 100644 --- a/typo3/sysext/core/Tests/Unit/Routing/SiteMatcherTest.php +++ b/typo3/sysext/core/Tests/Unit/Routing/SiteMatcherTest.php @@ -71,12 +71,7 @@ class SiteMatcherTest extends UnitTestCase ], ], ]); - $finderMock = $this - ->getMockBuilder(SiteFinder::class) - ->onlyMethods(['getAllSites']) - ->disableOriginalConstructor() - ->getMock(); - $finderMock->method('getAllSites')->willReturn(['main' => $site, 'second' => $secondSite]); + $finderMock = $this->createSiteFinderMock($site, $secondSite); $subject = new SiteMatcher($finderMock); $request = new ServerRequest('http://9-5.typo3.test/da/my-page/'); @@ -109,12 +104,14 @@ class SiteMatcherTest extends UnitTestCase // Matches english self::assertEquals(0, $result->getLanguage()->getLanguageId()); + // @todo this is a random result and needs to be refined + // www.example.com is not defined at all, but it actually "matches"... $request = new ServerRequest('http://www.example.com/'); /** @var SiteRouteResult $result */ $result = $subject->matchRequest($request); - // Nothing found, only the empty site, but finds the last site ("second") according to the algorithm + // Nothing found, only the empty site, but finds a random site ("main") according to the algorithm self::assertNull($result->getLanguage()); - self::assertEquals('second', $result->getSite()->getIdentifier()); + self::assertEquals('main', $result->getSite()->getIdentifier()); } /** @@ -169,13 +166,7 @@ class SiteMatcherTest extends UnitTestCase ], ], ]); - /** @var SiteFinder $finderMock */ - $finderMock = $this - ->getMockBuilder(SiteFinder::class) - ->onlyMethods(['getAllSites']) - ->disableOriginalConstructor() - ->getMock(); - $finderMock->method('getAllSites')->willReturn(['main' => $site, 'second' => $secondSite]); + $finderMock = $this->createSiteFinderMock($site, $secondSite); $subject = new SiteMatcher($finderMock); $request = new ServerRequest('https://www.example.com/de'); @@ -198,4 +189,87 @@ class SiteMatcherTest extends UnitTestCase self::assertEquals($secondSite, $result->getSite()); self::assertNull($result->getLanguage()); } + + public static function bestMatchingUrlIsUsedDataProvider(): \Generator + { + yield ['https://example.org/page', '1-main', 'en_US']; + yield ['https://example.org/', '1-main', 'en_US']; + yield ['https://example.org/f', '1-main', 'en_US']; + yield ['https://example.org/fr', '3-fr', 'fr_FR']; + yield ['https://example.org/friendly-page', '1-main', 'en_US']; + yield ['https://example.org/fr/', '3-fr', 'fr_FR']; + yield ['https://example.org/fr/page', '3-fr', 'fr_FR']; + yield ['https://example.org/de', '1-main', 'de_DE']; + yield ['https://example.org/deterministic', '1-main', 'en_US']; + yield ['https://example.org/dk', '2-dk', 'da_DK']; + yield ['https://example.org/dkother', '1-main', 'en_US']; + } + + /** + * @test + * @dataProvider bestMatchingUrlIsUsedDataProvider + */ + public function bestMatchingUrlIsUsed(string $requestUri, string $expectedSite, string $expectedLocale): void + { + $mainSite = new Site('1-main', 31, [ + 'base' => 'https://example.org/', + 'languages' => [ + [ + 'languageId' => 0, + 'base' => 'https://example.org/', + 'locale' => 'en_US', + ], + [ + 'languageId' => 2, + 'base' => 'https://example.org/de/', + 'locale' => 'de_DE', + ], + ], + ]); + $dkSite = new Site('2-dk', 21, [ + 'base' => 'https://example.org/', + 'languages' => [ + [ + 'languageId' => 0, + 'base' => 'https://example.org/dk/', + 'locale' => 'da_DK', + ], + ], + ]); + $frSite = new Site('3-fr', 11, [ + 'base' => 'https://example.org/', + 'languages' => [ + [ + 'languageId' => 0, + 'base' => 'https://example.org/fr/', + 'locale' => 'fr_FR', + ], + ], + ]); + + $finderMock = $this->createSiteFinderMock($mainSite, $dkSite, $frSite); + $subject = new SiteMatcher($finderMock); + + $request = new ServerRequest($requestUri); + /** @var SiteRouteResult $result */ + $result = $subject->matchRequest($request); + + self::assertSame($expectedSite, $result->getSite()->getIdentifier()); + self::assertSame($expectedLocale, $result->getLanguage()->getLocale()); + } + + private function createSiteFinderMock(Site ...$sites): SiteFinder + { + /** @var SiteFinder $finderMock */ + $finderMock = $this + ->getMockBuilder(SiteFinder::class) + ->onlyMethods(['getAllSites']) + ->disableOriginalConstructor() + ->getMock(); + $finderMock->method('getAllSites')->willReturn(array_combine( + array_map(static function (Site $site) { return $site->getIdentifier(); }, $sites), + $sites + )); + return $finderMock; + } }