Improve performance of ignoring files (#1971)

* Determine the type of ignore pattern once per pattern.

Performing this work when checking when each file was ignored accounted
for around 4.5 of the 6 seconds that processing ignored files was taking
on a site with ~14,000 files and a small number of ignore patterns.
After this change, processing ignored files takes less than 1.5 seconds.

* Cache the normalized paths on Resource and SourceFile.

Normalizing the paths is expensive, so avoid doing it multiple
times. `Util::normalize_path` is marked as memoized, but this
is not effective as:

1. `memoize` doesn't appear to work with module functions.
2. Checking whether we have a memoized value is as expensive as
   normalizing the path.

This further drops the time it takes to process ignored files on a site
with ~14,000 files from ~1.5 seconds to ~0.6 seconds.
This commit is contained in:
Mark Rowe 2016-08-12 16:33:18 -07:00 committed by Thomas Reynolds
parent c264b05906
commit 65462cbc43
4 changed files with 57 additions and 20 deletions

View file

@ -14,16 +14,31 @@ module Middleman
Contract Or[String, Regexp, Proc] => RespondTo[:execute_descriptor] Contract Or[String, Regexp, Proc] => RespondTo[:execute_descriptor]
def ignore(path=nil, &block) def ignore(path=nil, &block)
@app.sitemap.invalidate_resources_not_ignored_cache! @app.sitemap.invalidate_resources_not_ignored_cache!
IgnoreDescriptor.new(path, block)
if path.is_a? Regexp
RegexpIgnoreDescriptor.new(path)
elsif path.is_a? String
path_clean = ::Middleman::Util.normalize_path(path)
if path_clean.include?('*') # It's a glob
GlobIgnoreDescriptor.new(path_clean)
else
StringIgnoreDescriptor.new(path_clean)
end
elsif block
BlockIgnoreDescriptor.new(nil, block)
else
IgnoreDescriptor.new(path, block)
end
end end
IgnoreDescriptor = Struct.new(:path, :block) do IgnoreDescriptor = Struct.new(:path, :block) do
def execute_descriptor(_app, resources) def execute_descriptor(_app, resources)
resources.map do |r| resources.map do |r|
# Ignore based on the source path (without template extensions) # Ignore based on the source path (without template extensions)
if ignored?(r.path) if ignored?(r.normalized_path)
r.ignore! r.ignore!
elsif !r.is_a?(ProxyResource) && r.file_descriptor && ignored?(r.file_descriptor[:relative_path].to_s) elsif !r.is_a?(ProxyResource) && r.file_descriptor && ignored?(r.file_descriptor.normalized_relative_path)
# This allows files to be ignored by their source file name (with template extensions) # This allows files to be ignored by their source file name (with template extensions)
r.ignore! r.ignore!
end end
@ -33,27 +48,38 @@ module Middleman
end end
def ignored?(match_path) def ignored?(match_path)
match_path = ::Middleman::Util.normalize_path(match_path) raise NotImplementedError
end
end
if path.is_a? Regexp class RegexpIgnoreDescriptor < IgnoreDescriptor
match_path =~ path def ignored?(match_path)
elsif path.is_a? String match_path =~ path
path_clean = ::Middleman::Util.normalize_path(path) end
end
if path_clean.include?('*') # It's a glob class GlobIgnoreDescriptor < IgnoreDescriptor
if defined?(::File::FNM_EXTGLOB) def ignored?(match_path)
::File.fnmatch(path_clean, match_path, ::File::FNM_EXTGLOB) if defined?(::File::FNM_EXTGLOB)
else ::File.fnmatch(path, match_path, ::File::FNM_EXTGLOB)
::File.fnmatch(path_clean, match_path) else
end ::File.fnmatch(path, match_path)
else
match_path == path_clean
end
elsif block
block.call(match_path)
end end
end end
end end
class StringIgnoreDescriptor < IgnoreDescriptor
def ignored?(match_path)
match_path == path
end
end
class BlockIgnoreDescriptor
def ignored?(match_path)
block.call(match_path)
end
end
end end
end end
end end

View file

@ -47,7 +47,7 @@ module Middleman
) )
if should_ignore if should_ignore
d = ::Middleman::Sitemap::Extensions::Ignores::IgnoreDescriptor.new(target) d = ::Middleman::Sitemap::Extensions::Ignores::StringIgnoreDescriptor.new(target)
d.execute_descriptor(app, resources) d.execute_descriptor(app, resources)
end end

View file

@ -197,6 +197,13 @@ module Middleman
options[:content_type] || ::Rack::Mime.mime_type(ext, nil) options[:content_type] || ::Rack::Mime.mime_type(ext, nil)
end end
# The normalized source path of this resource (relative to the source directory,
# without template extensions)
# @return [String]
def normalized_path
@normalized_path ||= ::Middleman::Util.normalize_path @path
end
def to_s def to_s
"#<#{self.class} path=#{@path}>" "#<#{self.class} path=#{@path}>"
end end

View file

@ -8,6 +8,10 @@ module Middleman
::Middleman::Sources.file_cache[full_path] ||= {} ::Middleman::Sources.file_cache[full_path] ||= {}
::Middleman::Sources.file_cache[full_path][version] ||= ::File.read(full_path) ::Middleman::Sources.file_cache[full_path][version] ||= ::File.read(full_path)
end end
def normalized_relative_path
@normalized_relative_path ||= ::Middleman::Util.normalize_path relative_path.to_s
end
end end
# Sources handle multiple on-disk collections of files which make up # Sources handle multiple on-disk collections of files which make up