Skip to content

Commit

Permalink
TL-21732 scssphp: Improve peformance by disabling unicode matching wh…
Browse files Browse the repository at this point in the history
…ere possibl
  • Loading branch information
derschatta authored and codyfinegan committed Apr 10, 2024
1 parent 1f22412 commit 9f3e5a4
Showing 1 changed file with 82 additions and 35 deletions.
117 changes: 82 additions & 35 deletions src/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,7 @@ public function __construct($sourceName, $sourceIndex = 0, $encoding = 'utf-8',
{
$this->sourceName = $sourceName ?: '(stdin)';
$this->sourceIndex = $sourceIndex;
$this->utf8 = ! $encoding || strtolower($encoding) === 'utf-8';
$this->patternModifiers = $this->utf8 ? 'Aisu' : 'Ais';
$this->setUtf8(! $encoding || strtolower($encoding) === 'utf-8');
$this->commentsSeen = [];
$this->allowVars = true;
$this->cssOnly = $cssOnly;
Expand All @@ -159,14 +158,33 @@ public function __construct($sourceName, $sourceIndex = 0, $encoding = 'utf-8',
$commentMultiRight = '\*\/';

static::$commentPattern = $commentMultiLeft . '.*?' . $commentMultiRight;
static::$whitePattern = $this->utf8
? '/' . $commentSingle . '[^\n]*\s*|(' . static::$commentPattern . ')\s*|\s+/AisuS'
: '/' . $commentSingle . '[^\n]*\s*|(' . static::$commentPattern . ')\s*|\s+/AisS';
static::$whitePattern = $commentSingle . '[^\n]*\s*|(' . static::$commentPattern . ')\s*|\s+';
}

$this->cache = $cache;
}

/**
* Get whether utf-8 parsing is enabled.
*
* @return bool
*/
protected function getUtf8()
{
return $this->utf8;
}

/**
* Set whether utf-8 parsing is enabled.
*
* @param bool $utf8
*/
protected function setUtf8($utf8)
{
$this->utf8 = $utf8;
$this->patternModifiers = $this->utf8 ? 'Aisu' : 'Ais';
}

/**
* Get source file name
*
Expand Down Expand Up @@ -264,12 +282,26 @@ public function parse($buffer)
$buffer = substr($buffer, 3);
}

$old_utf8 = $this->getUtf8();

$this->buffer = rtrim($buffer, "\x00..\x1f");
$this->count = 0;
$this->env = null;
$this->inParens = false;
$this->eatWhiteDefault = true;

// Disable utf-8 mode if file only contains ASCII characters.
// In versions of PHP below 7.4, when the unicode (u) flag is set PREG
// will validate the entire string is valid utf-8 on every preg_match
// call.
// This is an exponential time problem as the longer a file is the more
// characters it has to check and the more regex calls are made. This
// causes parsing to take more than 200x longer with utf-8 enabled on
// a large file.
if ($old_utf8) {
$this->setUtf8(! mb_check_encoding($buffer, 'ascii'));
}

$this->saveEncoding();
$this->extractLineNumbers($buffer);

Expand All @@ -278,25 +310,33 @@ public function parse($buffer)
throw new ParserException($message);
}

$this->pushBlock(null); // root block
$this->whitespace();
$this->pushBlock(null);
$this->popBlock();
try {
$this->pushBlock(null); // root block
$this->whitespace();
$this->pushBlock(null);
$this->popBlock();

while ($this->parseChunk()) {
;
}
while ($this->parseChunk()) {
;
}

if ($this->count !== \strlen($this->buffer)) {
throw $this->parseError();
}
if ($this->count !== strlen($this->buffer)) {
throw $this->parseError();
}

if (! empty($this->env->parent)) {
throw $this->parseError('unclosed block');
}
if (! empty($this->env->parent)) {
throw $this->parseError('unclosed block');
}

$this->restoreEncoding();
assert($this->env !== null);
if ($this->charset) {
array_unshift($this->env->children, $this->charset);
}
} finally {
assert($this->env !== null);
$this->restoreEncoding();
$this->setUtf8($old_utf8);
}


if ($this->cache) {
$this->cache->setCache('parse', $cacheKey, $this->env, $parseOptions);
Expand All @@ -323,20 +363,23 @@ public function parseValue($buffer, &$out)
$this->eatWhiteDefault = true;
$this->buffer = (string) $buffer;

$this->setUtf8(! mb_check_encoding($buffer, 'ascii'));
$this->saveEncoding();
$this->extractLineNumbers($this->buffer);

$list = $this->valueList($out);
try {
$list = $this->valueList($out);
} finally {
if ($this->count !== \strlen($this->buffer)) {
$error = $this->parseError('Expected end of value');
$message = 'Passing trailing content after the expression when parsing a value is deprecated since Scssphp 1.12.0 and will be an error in 2.0. ' . $error->getMessage();

if ($this->count !== \strlen($this->buffer)) {
$error = $this->parseError('Expected end of value');
$message = 'Passing trailing content after the expression when parsing a value is deprecated since Scssphp 1.12.0 and will be an error in 2.0. ' . $error->getMessage();
@trigger_error($message, E_USER_DEPRECATED);
}

@trigger_error($message, E_USER_DEPRECATED);
$this->restoreEncoding();
}

$this->restoreEncoding();

return $list;
}

Expand Down Expand Up @@ -367,9 +410,11 @@ public function parseSelector($buffer, &$out, $shouldValidate = true)
$this->whitespace();
$this->discardComments = false;

$selector = $this->selectors($out);

$this->restoreEncoding();
try {
$selector = $this->selectors($out);
} finally {
$this->restoreEncoding();
}

if ($shouldValidate && $this->count !== strlen($buffer)) {
throw $this->parseError("`" . substr($buffer, $this->count) . "` is not a valid Selector in `$buffer`");
Expand Down Expand Up @@ -402,9 +447,11 @@ public function parseMediaQueryList($buffer, &$out)

$this->whitespace();

$isMediaQuery = $this->mediaQueryList($out);

$this->restoreEncoding();
try {
$isMediaQuery = $this->mediaQueryList($out);
} finally {
$this->restoreEncoding();
}

return $isMediaQuery;
}
Expand Down Expand Up @@ -1607,7 +1654,7 @@ protected function whitespace()
{
$gotWhite = false;

while (preg_match(static::$whitePattern, $this->buffer, $m, 0, $this->count)) {
while (preg_match('/' . static::$whitePattern . '/' . $this->patternModifiers . 'S', $this->buffer, $m, 0, $this->count)) {
if (isset($m[1]) && empty($this->commentsSeen[$this->count])) {
// comment that are kept in the output CSS
$comment = [];
Expand Down Expand Up @@ -3411,7 +3458,7 @@ protected function propertyName(&$out)
}

// match comment hack
if (preg_match(static::$whitePattern, $this->buffer, $m, 0, $this->count)) {
if (preg_match('/' . static::$whitePattern . '/' . $this->patternModifiers . 'S', $this->buffer, $m, 0, $this->count)) {
if (! empty($m[0])) {
$parts[] = $m[0];
$this->count += \strlen($m[0]);
Expand Down

0 comments on commit 9f3e5a4

Please sign in to comment.