Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use StringScanner instead of String#scan to improve block parsing performance #167

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* Deprecate `add_rule!` (positional arguments)and `add_rule_with_offsets!` for `add_rule!` (keyword argument)
* RuleSet initialize now takes keyword argument, positional arguments are still supported but deprecated
* Removed OffsetAwareRuleSet, it's a RuleSet with optional attributes filename and offset
* Improved performance of block parsing by using StringScanner

### Version v1.18.0

Expand Down
19 changes: 13 additions & 6 deletions lib/css_parser/parser.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# frozen_string_literal: true

require 'strscan'

module CssParser
# Exception class used for any errors encountered while downloading remote files.
class RemoteFileError < IOError; end
Expand All @@ -17,6 +19,7 @@ class CircularReferenceError < StandardError; end
# [<tt>io_exceptions</tt>] Throw an exception if a link can not be found. Boolean, default is <tt>true</tt>.
class Parser
USER_AGENT = "Ruby CSS Parser/#{CssParser::VERSION} (https://github.com/premailer/css_parser)".freeze
RULESET_TOKENIZER_RX = /\s+|\\{2,}|\\?[{}\s"]|[()]|.[^\s"{}()\\]*/.freeze
STRIP_CSS_COMMENTS_RX = %r{/\*.*?\*/}m.freeze
STRIP_HTML_COMMENTS_RX = /<!--|-->/m.freeze

Expand Down Expand Up @@ -362,11 +365,15 @@ def parse_block_into_rule_sets!(block, options = {}) # :nodoc:

# once we are in a rule, we will use this to store where we started if we are capturing offsets
rule_start = nil
offset = nil
start_offset = nil
end_offset = nil

block.scan(/\s+|\\{2,}|\\?[{}\s"]|[()]|.[^\s"{}()\\]*/) do |token|
scanner = StringScanner.new(block)
until scanner.eos?
# save the regex offset so that we know where in the file we are
offset = Regexp.last_match.offset(0) if options[:capture_offsets]
start_offset = scanner.pos
token = scanner.scan(RULESET_TOKENIZER_RX)
end_offset = scanner.pos

if token.start_with?('"') # found un-escaped double quote
in_string = !in_string
Expand Down Expand Up @@ -398,7 +405,7 @@ def parse_block_into_rule_sets!(block, options = {}) # :nodoc:
media_types: current_media_queries
}
if options[:capture_offsets]
add_rule_options.merge!(filename: options[:filename], offset: rule_start..offset.last)
add_rule_options.merge!(filename: options[:filename], offset: rule_start..end_offset)
end
add_rule!(**add_rule_options)
end
Expand Down Expand Up @@ -459,7 +466,7 @@ def parse_block_into_rule_sets!(block, options = {}) # :nodoc:
current_selectors << token

# mark this as the beginning of the selector unless we have already marked it
rule_start = offset.first if options[:capture_offsets] && rule_start.nil? && token =~ /^[^\s]+$/
rule_start = start_offset if options[:capture_offsets] && rule_start.nil? && token =~ /^[^\s]+$/
end
end

Expand All @@ -471,7 +478,7 @@ def parse_block_into_rule_sets!(block, options = {}) # :nodoc:
media_types: current_media_queries
}
if options[:capture_offsets]
add_rule_options.merge!(filename: options[:filename], offset: rule_start..offset.last)
add_rule_options.merge!(filename: options[:filename], offset: rule_start..end_offset)
end
add_rule!(**add_rule_options)
end
Expand Down