} (that forbids the dot) when a dot must follow // after it. This optimization cannot be applied when the next char is no real separator or when the next variable is // directly adjacent, e.g. '/{x}{y}'. $regexp .= '+'; } } else { if (!preg_match('//u', $regexp)) { $useUtf8 = false; } elseif (!$needsUtf8 && preg_match('/[\x80-\xFF]|(?= 0; --$i) { $token = $tokens[$i]; // variable is optional when it is not important and has a default value if ('variable' === $token[0] && !($token[5] ?? false) && $route->hasDefault($token[3])) { $firstOptional = $i; } else { break; } } } // compute the matching regexp $regexp = ''; for ($i = 0, $nbToken = \count($tokens); $i < $nbToken; ++$i) { $regexp .= self::computeRegexp($tokens, $i, $firstOptional); } $regexp = '{^'.$regexp.'$}sD'.($isHost ? 'i' : ''); // enable Utf8 matching if really required if ($needsUtf8) { $regexp .= 'u'; for ($i = 0, $nbToken = \count($tokens); $i < $nbToken; ++$i) { if ('variable' === $tokens[$i][0]) { $tokens[$i][4] = true; } } } return [ 'staticPrefix' => self::determineStaticPrefix($route, $tokens), 'regex' => $regexp, 'tokens' => array_reverse($tokens), 'variables' => $variables, ]; } /** * Determines the longest static prefix possible for a route. */ private static function determineStaticPrefix(Route $route, array $tokens): string { if ('text' !== $tokens[0][0]) { return ($route->hasDefault($tokens[0][3]) || '/' === $tokens[0][1]) ? '' : $tokens[0][1]; } $prefix = $tokens[0][1]; if (isset($tokens[1][1]) && '/' !== $tokens[1][1] && false === $route->hasDefault($tokens[1][3])) { $prefix .= $tokens[1][1]; } return $prefix; } /** * Returns the next static character in the Route pattern that will serve as a separator (or the empty string when none available). */ private static function findNextSeparator(string $pattern, bool $useUtf8): string { if ('' == $pattern) { // return empty string if pattern is empty or false (false which can be returned by substr) return ''; } // first remove all placeholders from the pattern so we can find the next real static character if ('' === $pattern = preg_replace('#\{[\w\x80-\xFF]+\}#', '', $pattern)) { return ''; } if ($useUtf8) { preg_match('/^./u', $pattern, $pattern); } return str_contains(static::SEPARATORS, $pattern[0]) ? $pattern[0] : ''; } /** * Computes the regexp used to match a specific token. It can be static text or a subpattern. * * @param array $tokens The route tokens * @param int $index The index of the current token * @param int $firstOptional The index of the first optional token */ private static function computeRegexp(array $tokens, int $index, int $firstOptional): string { $token = $tokens[$index]; if ('text' === $token[0]) { // Text tokens return preg_quote($token[1]); } else { // Variable tokens if (0 === $index && 0 === $firstOptional) { // When the only token is an optional variable token, the separator is required return sprintf('%s(?P<%s>%s)?', preg_quote($token[1]), $token[3], $token[2]); } else { $regexp = sprintf('%s(?P<%s>%s)', preg_quote($token[1]), $token[3], $token[2]); if ($index >= $firstOptional) { // Enclose each optional token in a subpattern to make it optional. // "?:" means it is non-capturing, i.e. the portion of the subject string that // matched the optional subpattern is not passed back. $regexp = "(?:$regexp"; $nbTokens = \count($tokens); if ($nbTokens - 1 == $index) { // Close the optional subpatterns $regexp .= str_repeat(')?', $nbTokens - $firstOptional - (0 === $firstOptional ? 1 : 0)); } } return $regexp; } } } private static function transformCapturingGroupsToNonCapturings(string $regexp): string { for ($i = 0; $i < \strlen($regexp); ++$i) { if ('\\' === $regexp[$i]) { ++$i; continue; } if ('(' !== $regexp[$i] || !isset($regexp[$i + 2])) { continue; } if ('*' === $regexp[++$i] || '?' === $regexp[$i]) { ++$i; continue; } $regexp = substr_replace($regexp, '?:', $i, 0); ++$i; } return $regexp; } }