Improve performance of ignoring files (#1971)

* Determine the type of ignore pattern once per pattern.

Performing this work when checking when each file was ignored accounted
for around 4.5 of the 6 seconds that processing ignored files was taking
on a site with ~14,000 files and a small number of ignore patterns.
After this change, processing ignored files takes less than 1.5 seconds.

* Cache the normalized paths on Resource and SourceFile.

Normalizing the paths is expensive, so avoid doing it multiple
times. `Util::normalize_path` is marked as memoized, but this
is not effective as:

1. `memoize` doesn't appear to work with module functions.
2. Checking whether we have a memoized value is as expensive as
   normalizing the path.

This further drops the time it takes to process ignored files on a site
with ~14,000 files from ~1.5 seconds to ~0.6 seconds.
This commit is contained in:
Mark Rowe 2016-08-12 16:33:18 -07:00 committed by Thomas Reynolds
parent c264b05906
commit 65462cbc43
4 changed files with 57 additions and 20 deletions

View file

@ -14,16 +14,31 @@ module Middleman
Contract Or[String, Regexp, Proc] => RespondTo[:execute_descriptor]
def ignore(path=nil, &block)
@app.sitemap.invalidate_resources_not_ignored_cache!
IgnoreDescriptor.new(path, block)
if path.is_a? Regexp
RegexpIgnoreDescriptor.new(path)
elsif path.is_a? String
path_clean = ::Middleman::Util.normalize_path(path)
if path_clean.include?('*') # It's a glob
GlobIgnoreDescriptor.new(path_clean)
else
StringIgnoreDescriptor.new(path_clean)
end
elsif block
BlockIgnoreDescriptor.new(nil, block)
else
IgnoreDescriptor.new(path, block)
end
end
IgnoreDescriptor = Struct.new(:path, :block) do
def execute_descriptor(_app, resources)
resources.map do |r|
# Ignore based on the source path (without template extensions)
if ignored?(r.path)
if ignored?(r.normalized_path)
r.ignore!
elsif !r.is_a?(ProxyResource) && r.file_descriptor && ignored?(r.file_descriptor[:relative_path].to_s)
elsif !r.is_a?(ProxyResource) && r.file_descriptor && ignored?(r.file_descriptor.normalized_relative_path)
# This allows files to be ignored by their source file name (with template extensions)
r.ignore!
end
@ -33,27 +48,38 @@ module Middleman
end
def ignored?(match_path)
match_path = ::Middleman::Util.normalize_path(match_path)
raise NotImplementedError
end
end
if path.is_a? Regexp
match_path =~ path
elsif path.is_a? String
path_clean = ::Middleman::Util.normalize_path(path)
class RegexpIgnoreDescriptor < IgnoreDescriptor
def ignored?(match_path)
match_path =~ path
end
end
if path_clean.include?('*') # It's a glob
if defined?(::File::FNM_EXTGLOB)
::File.fnmatch(path_clean, match_path, ::File::FNM_EXTGLOB)
else
::File.fnmatch(path_clean, match_path)
end
else
match_path == path_clean
end
elsif block
block.call(match_path)
class GlobIgnoreDescriptor < IgnoreDescriptor
def ignored?(match_path)
if defined?(::File::FNM_EXTGLOB)
::File.fnmatch(path, match_path, ::File::FNM_EXTGLOB)
else
::File.fnmatch(path, match_path)
end
end
end
class StringIgnoreDescriptor < IgnoreDescriptor
def ignored?(match_path)
match_path == path
end
end
class BlockIgnoreDescriptor
def ignored?(match_path)
block.call(match_path)
end
end
end
end
end

View file

@ -47,7 +47,7 @@ module Middleman
)
if should_ignore
d = ::Middleman::Sitemap::Extensions::Ignores::IgnoreDescriptor.new(target)
d = ::Middleman::Sitemap::Extensions::Ignores::StringIgnoreDescriptor.new(target)
d.execute_descriptor(app, resources)
end

View file

@ -197,6 +197,13 @@ module Middleman
options[:content_type] || ::Rack::Mime.mime_type(ext, nil)
end
# The normalized source path of this resource (relative to the source directory,
# without template extensions)
# @return [String]
def normalized_path
@normalized_path ||= ::Middleman::Util.normalize_path @path
end
def to_s
"#<#{self.class} path=#{@path}>"
end

View file

@ -8,6 +8,10 @@ module Middleman
::Middleman::Sources.file_cache[full_path] ||= {}
::Middleman::Sources.file_cache[full_path][version] ||= ::File.read(full_path)
end
def normalized_relative_path
@normalized_relative_path ||= ::Middleman::Util.normalize_path relative_path.to_s
end
end
# Sources handle multiple on-disk collections of files which make up