From 3f18efeed92bf79367540cf73188086375d52b80 Mon Sep 17 00:00:00 2001 From: Thomas Reynolds Date: Sun, 31 Jan 2016 14:13:52 -0800 Subject: [PATCH] split up util module, fix some rubocop complaints --- .rubocop.yml | 4 +- middleman-core/lib/middleman-core.rb | 3 - middleman-core/lib/middleman-core/builder.rb | 6 +- .../middleman-core/core_extensions/data.rb | 4 +- .../lib/middleman-core/renderers/haml.rb | 2 + .../middleman-core/sources/source_watcher.rb | 2 +- middleman-core/lib/middleman-core/util.rb | 609 +----------------- .../lib/middleman-core/util/binary.rb | 79 +++ .../lib/middleman-core/util/data.rb | 36 +- .../lib/middleman-core/util/files.rb | 134 ++++ .../lib/middleman-core/util/paths.rb | 251 ++++++++ .../lib/middleman-core/util/rack.rb | 52 ++ .../lib/middleman-core/util/uri_templates.rb | 97 +++ 13 files changed, 660 insertions(+), 619 deletions(-) create mode 100644 middleman-core/lib/middleman-core/util/binary.rb create mode 100644 middleman-core/lib/middleman-core/util/files.rb create mode 100644 middleman-core/lib/middleman-core/util/paths.rb create mode 100644 middleman-core/lib/middleman-core/util/rack.rb create mode 100644 middleman-core/lib/middleman-core/util/uri_templates.rb diff --git a/.rubocop.yml b/.rubocop.yml index 0145ceef..dbd74993 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -39,8 +39,6 @@ AssignmentInCondition: Enabled: false CyclomaticComplexity: Enabled: false -AbcSize: - Enabled: false HandleExceptions: Enabled: false EndAlignment: @@ -54,7 +52,7 @@ FormatString: CaseIndentation: IndentWhenRelativeTo: end TrivialAccessors: - ExactNameMatch: true + Enabled: false SingleLineBlockParams: Enabled: false Metrics/AbcSize: diff --git a/middleman-core/lib/middleman-core.rb b/middleman-core/lib/middleman-core.rb index 8170fefb..687a53e5 100644 --- a/middleman-core/lib/middleman-core.rb +++ b/middleman-core/lib/middleman-core.rb @@ -6,9 +6,6 @@ $LOAD_PATH.unshift(libdir) unless $LOAD_PATH.include?(libdir) # Top-level Middleman namespace module Middleman - # Backwards compatibility namespace - module Features; end - autoload :Application, 'middleman-core/application' end diff --git a/middleman-core/lib/middleman-core/builder.rb b/middleman-core/lib/middleman-core/builder.rb index 3549fc34..9bb3c031 100644 --- a/middleman-core/lib/middleman-core/builder.rb +++ b/middleman-core/lib/middleman-core/builder.rb @@ -113,8 +113,8 @@ module Middleman logger.debug '== Building files' resources = @app.sitemap.resources - .reject { |resource| resource.ext == '.css' } - .sort_by { |resource| SORT_ORDER.index(resource.ext) || 100 } + .reject { |resource| resource.ext == '.css' } + .sort_by { |resource| SORT_ORDER.index(resource.ext) || 100 } if @glob resources = resources.select { |resource| File.fnmatch(@glob, resource.destination_path) } @@ -214,7 +214,7 @@ module Middleman # @return [void] Contract IsA['Middleman::Sitemap::Resource'] => Or[Pathname, Bool] def output_resource(resource) - ::Middleman::Util.instrument "builder.output.resource", path: File.basename(resource.destination_path) do + ::Middleman::Util.instrument 'builder.output.resource', path: File.basename(resource.destination_path) do output_file = @build_dir + resource.destination_path.gsub('%20', ' ') begin diff --git a/middleman-core/lib/middleman-core/core_extensions/data.rb b/middleman-core/lib/middleman-core/core_extensions/data.rb index 7ff60f41..2f2f4280 100644 --- a/middleman-core/lib/middleman-core/core_extensions/data.rb +++ b/middleman-core/lib/middleman-core/core_extensions/data.rb @@ -100,13 +100,13 @@ module Middleman extension = File.extname(data_path) basename = File.basename(data_path, extension) + return unless %w(.yaml .yml .json).include?(extension) + if %w(.yaml .yml).include?(extension) data, postscript = ::Middleman::Util::Data.parse(file, @app.config[:frontmatter_delims], :yaml) data[:postscript] = postscript if !postscript.nil? && data.is_a?(Hash) elsif extension == '.json' data, _postscript = ::Middleman::Util::Data.parse(file, @app.config[:frontmatter_delims], :json) - else - return end data_branch = @local_data diff --git a/middleman-core/lib/middleman-core/renderers/haml.rb b/middleman-core/lib/middleman-core/renderers/haml.rb index c5bf4699..8d3f40bb 100644 --- a/middleman-core/lib/middleman-core/renderers/haml.rb +++ b/middleman-core/lib/middleman-core/renderers/haml.rb @@ -46,6 +46,7 @@ module Middleman ::Haml::Options.defaults[:context] = nil ::Haml::Options.send :attr_accessor, :context + # rubocop:disable NestedMethodDefinition [::Haml::Filters::Sass, ::Haml::Filters::Scss, ::Haml::Filters::Markdown].each do |f| f.class_exec do def self.render_with_options(text, compiler_options) @@ -57,6 +58,7 @@ module Middleman end end end + # rubocop:enable NestedMethodDefinition ::Tilt.prefer(::Middleman::Renderers::HamlTemplate, :haml) diff --git a/middleman-core/lib/middleman-core/sources/source_watcher.rb b/middleman-core/lib/middleman-core/sources/source_watcher.rb index cb163abd..cbcf1fae 100644 --- a/middleman-core/lib/middleman-core/sources/source_watcher.rb +++ b/middleman-core/lib/middleman-core/sources/source_watcher.rb @@ -49,7 +49,7 @@ module Middleman # Reference to lower level listener attr_reader :listener - IGNORED_DIRECTORIES = %w(.git node_modules .sass-cache) + IGNORED_DIRECTORIES = %w(.git node_modules .sass-cache).freeze # Construct a new SourceWatcher # diff --git a/middleman-core/lib/middleman-core/util.rb b/middleman-core/lib/middleman-core/util.rb index f0ca99cd..a0d3fc78 100644 --- a/middleman-core/lib/middleman-core/util.rb +++ b/middleman-core/lib/middleman-core/util.rb @@ -1,621 +1,24 @@ # For instrumenting require 'active_support/notifications' -# Core Pathname library used for traversal -require 'pathname' - -# Template and Mime detection -require 'tilt' -require 'rack/mime' - -# DbC -require 'middleman-core/contracts' require 'middleman-core/application' require 'middleman-core/sources' require 'middleman-core/sitemap/resource' - -# Indifferent Access -require 'hashie' - -# For URI templating -require 'addressable/uri' -require 'addressable/template' -require 'active_support/inflector' -require 'active_support/inflector/transliterate' +require 'middleman-core/util/binary' +require 'middleman-core/util/data' +require 'middleman-core/util/files' +require 'middleman-core/util/paths' +require 'middleman-core/util/rack' +require 'middleman-core/util/uri_templates' module Middleman module Util - include Contracts - module_function - # Whether the source file is binary. - # - # @param [String] filename The file to check. - # @return [Boolean] - Contract Or[String, Pathname] => Bool - def binary?(filename) - @binary_cache ||= {} - - return @binary_cache[filename] if @binary_cache.key?(filename) - - @binary_cache[filename] = begin - path = Pathname(filename) - ext = path.extname - - # We hardcode detecting of gzipped SVG files - if ext == '.svgz' - true - elsif Tilt.registered?(ext.sub('.', '')) - false - else - dot_ext = (ext.to_s[0] == '.') ? ext.dup : ".#{ext}" - - if mime = ::Rack::Mime.mime_type(dot_ext, nil) - !nonbinary_mime?(mime) - else - file_contents_include_binary_bytes?(path.to_s) - end - end - end - end - - # Takes a matcher, which can be a literal string - # or a string containing glob expressions, or a - # regexp, or a proc, or anything else that responds - # to #match or #call, and returns whether or not the - # given path matches that matcher. - # - # @param [String, #match, #call] matcher A matcher String, RegExp, Proc, etc. - # @param [String] path A path as a string - # @return [Boolean] Whether the path matches the matcher - Contract PATH_MATCHER, String => Bool - def path_match(matcher, path) - case - when matcher.is_a?(String) - if matcher.include? '*' - File.fnmatch(matcher, path) - else - path == matcher - end - when matcher.respond_to?(:match) - !!(path =~ matcher) - when matcher.respond_to?(:call) - matcher.call(path) - else - File.fnmatch(matcher.to_s, path) - end - end - - class EnhancedHash < ::Hashie::Mash - # include ::Hashie::Extensions::MergeInitializer - # include ::Hashie::Extensions::MethodReader - # include ::Hashie::Extensions::IndifferentAccess - end - - # Recursively convert a normal Hash into a EnhancedHash - # - # @private - # @param [Hash] data Normal hash - # @return [Hash] - Contract Any => Maybe[Or[Array, EnhancedHash]] - def recursively_enhance(obj) - if obj.is_a? ::Array - obj.map { |e| recursively_enhance(e) } - elsif obj.is_a? ::Hash - ::Hashie::Mash.new(obj) - else - obj - end - end - - # Normalize a path to not include a leading slash - # @param [String] path - # @return [String] - Contract String => String - def normalize_path(path) - # The tr call works around a bug in Ruby's Unicode handling - ::URI.decode(path).sub(%r{^/}, '').tr('', '') - end - - # This is a separate method from normalize_path in case we - # change how we normalize paths - Contract String => String - def strip_leading_slash(path) - path.sub(%r{^/}, '') - end - # Facade for ActiveSupport/Notification def instrument(name, payload={}, &block) suffixed_name = (name =~ /\.middleman$/) ? name.dup : "#{name}.middleman" ::ActiveSupport::Notifications.instrument(suffixed_name, payload, &block) end - - # Extract the text of a Rack response as a string. - # Useful for extensions implemented as Rack middleware. - # @param response The response from #call - # @return [String] The whole response as a string. - Contract RespondTo[:each] => String - def extract_response_text(response) - # The rack spec states all response bodies must respond to each - result = '' - response.each do |part, _| - result << part - end - result - end - - # Get a recusive list of files inside a path. - # Works with symlinks. - # - # @param path Some path string or Pathname - # @param ignore A proc/block that returns true if a given path should be ignored - if a path - # is ignored, nothing below it will be searched either. - # @return [Array] An array of Pathnames for each file (no directories) - Contract Or[String, Pathname], Proc => ArrayOf[Pathname] - def all_files_under(path, &ignore) - path = Pathname(path) - - if ignore && ignore.call(path) - [] - elsif path.directory? - path.children.flat_map do |child| - all_files_under(child, &ignore) - end.compact - elsif path.file? - [path] - else - [] - end - end - - # Get the path of a file of a given type - # - # @param [Middleman::Application] app The app. - # @param [Symbol] kind The type of file - # @param [String, Symbol] source The path to the file - # @param [Hash] options Data to pass through. - # @return [String] - Contract ::Middleman::Application, Symbol, Or[String, Symbol], Hash => String - def asset_path(app, kind, source, options={}) - return source if source.to_s.include?('//') || source.to_s.start_with?('data:') - - asset_folder = case kind - when :css - app.config[:css_dir] - when :js - app.config[:js_dir] - when :images - app.config[:images_dir] - when :fonts - app.config[:fonts_dir] - else - kind.to_s - end - - source = source.to_s.tr(' ', '') - ignore_extension = (kind == :images || kind == :fonts) # don't append extension - source << ".#{kind}" unless ignore_extension || source.end_with?(".#{kind}") - asset_folder = '' if source.start_with?('/') # absolute path - - asset_url(app, source, asset_folder, options) - end - - # Get the URL of an asset given a type/prefix - # - # @param [String] path The path (such as "photo.jpg") - # @param [String] prefix The type prefix (such as "images") - # @param [Hash] options Data to pass through. - # @return [String] The fully qualified asset url - Contract ::Middleman::Application, String, String, Hash => String - def asset_url(app, path, prefix='', options={}) - # Don't touch assets which already have a full path - return path if path.include?('//') || path.start_with?('data:') - - if options[:relative] && !options[:current_resource] - raise ArgumentError, '#asset_url must be run in a context with current_resource if relative: true' - end - - uri = URI(path) - path = uri.path - - result = if resource = app.sitemap.find_resource_by_destination_path(url_for(app, path, options)) - resource.url - else - path = File.join(prefix, path) - if resource = app.sitemap.find_resource_by_path(path) - resource.url - else - File.join(app.config[:http_prefix], path) - end - end - - final_result = ::URI.encode(relative_path_from_resource(options[:current_resource], result, options[:relative])) - - result_uri = URI(final_result) - result_uri.query = uri.query - result_uri.fragment = uri.fragment - result_uri.to_s - end - - # Given a source path (referenced either absolutely or relatively) - # or a Resource, this will produce the nice URL configured for that - # path, respecting :relative_links, directory indexes, etc. - Contract ::Middleman::Application, Or[String, ::Middleman::Sitemap::Resource], Hash => String - def url_for(app, path_or_resource, options={}) - # Handle Resources and other things which define their own url method - url = if path_or_resource.respond_to?(:url) - path_or_resource.url - else - path_or_resource.dup - end - - # Try to parse URL - begin - uri = URI(url) - rescue ::URI::InvalidURIError - # Nothing we can do with it, it's not really a URI - return url - end - - relative = options[:relative] - raise "Can't use the relative option with an external URL" if relative && uri.host - - # Allow people to turn on relative paths for all links with - # set :relative_links, true - # but still override on a case by case basis with the :relative parameter. - effective_relative = relative || false - effective_relative = true if relative.nil? && app.config[:relative_links] - - # Try to find a sitemap resource corresponding to the desired path - this_resource = options[:current_resource] - - if path_or_resource.is_a?(::Middleman::Sitemap::Resource) - resource = path_or_resource - resource_url = url - elsif this_resource && uri.path && !uri.host - # Handle relative urls - url_path = Pathname(uri.path) - current_source_dir = Pathname('/' + this_resource.path).dirname - url_path = current_source_dir.join(url_path) if url_path.relative? - resource = app.sitemap.find_resource_by_path(url_path.to_s) - if resource - resource_url = resource.url - else - # Try to find a resource relative to destination paths - url_path = Pathname(uri.path) - current_source_dir = Pathname('/' + this_resource.destination_path).dirname - url_path = current_source_dir.join(url_path) if url_path.relative? - resource = app.sitemap.find_resource_by_destination_path(url_path.to_s) - resource_url = resource.url if resource - end - elsif options[:find_resource] && uri.path && !uri.host - resource = app.sitemap.find_resource_by_path(uri.path) - resource_url = resource.url if resource - end - - if resource - uri.path = if this_resource - ::URI.encode(relative_path_from_resource(this_resource, resource_url, effective_relative)) - else - resource_url - end - end - - # Support a :query option that can be a string or hash - if query = options[:query] - uri.query = query.respond_to?(:to_param) ? query.to_param : query.to_s - end - - # Support a :fragment or :anchor option just like Padrino - fragment = options[:anchor] || options[:fragment] - uri.fragment = fragment.to_s if fragment - - # Finally make the URL back into a string - uri.to_s - end - - # Expand a path to include the index file if it's a directory - # - # @param [String] path Request path/ - # @param [Middleman::Application] app The requesting app. - # @return [String] Path with index file if necessary. - Contract String, ::Middleman::Application => String - def full_path(path, app) - resource = app.sitemap.find_resource_by_destination_path(path) - - unless resource - # Try it with /index.html at the end - indexed_path = File.join(path.sub(%r{/$}, ''), app.config[:index_file]) - resource = app.sitemap.find_resource_by_destination_path(indexed_path) - end - - if resource - '/' + resource.destination_path - else - '/' + normalize_path(path) - end - end - - Contract String, String, ArrayOf[String], Proc => String - def rewrite_paths(body, _path, exts, &_block) - matcher = /([\'\"\(,]\s*)([^\s\'\"\)>]+(#{Regexp.union(exts)}))/ - - url_fn_prefix = 'url(' - - body.dup.gsub(matcher) do |match| - opening_character = $1 - asset_path = $2 - - if asset_path.start_with?(url_fn_prefix) - opening_character << url_fn_prefix - asset_path = asset_path[url_fn_prefix.length..-1] - end - - begin - uri = ::Addressable::URI.parse(asset_path) - - if uri.relative? && uri.host.nil? && !(asset_path =~ /^[^\/].*[a-z]+\.[a-z]+\/.*/) && (result = yield(asset_path)) - "#{opening_character}#{result}" - else - match - end - rescue ::Addressable::URI::InvalidURIError - match - end - end - end - - # Is mime type known to be non-binary? - # - # @param [String] mime The mimetype to check. - # @return [Boolean] - Contract String => Bool - def nonbinary_mime?(mime) - case - when mime.start_with?('text/') - true - when mime.include?('xml') && !mime.include?('officedocument') - true - when mime.include?('json') - true - when mime.include?('javascript') - true - else - false - end - end - - # Read a few bytes from the file and see if they are binary. - # - # @param [String] filename The file to check. - # @return [Boolean] - Contract String => Bool - def file_contents_include_binary_bytes?(filename) - binary_bytes = [0, 1, 2, 3, 4, 5, 6, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 28, 29, 30, 31] - s = File.read(filename, 4096) || '' - s.each_byte do |c| - return true if binary_bytes.include?(c) - end - - false - end - - # Glob a directory and try to keep path encoding consistent. - # - # @param [String] path The glob path. - # @return [Array] - def glob_directory(path) - results = ::Dir[path] - - return results unless RUBY_PLATFORM =~ /darwin/ - - results.map { |r| r.encode('UTF-8', 'UTF-8-MAC') } - end - - # Get the PWD and try to keep path encoding consistent. - # - # @param [String] path The glob path. - # @return [Array] - def current_directory - result = ::Dir.pwd - - return result unless RUBY_PLATFORM =~ /darwin/ - - result.encode('UTF-8', 'UTF-8-MAC') - end - - # Get a relative path to a resource. - # - # @param [Middleman::Sitemap::Resource] curr_resource The resource. - # @param [String] resource_url The target url. - # @param [Boolean] relative If the path should be relative. - # @return [String] - Contract ::Middleman::Sitemap::Resource, String, Bool => String - def relative_path_from_resource(curr_resource, resource_url, relative) - # Switch to the relative path between resource and the given resource - # if we've been asked to. - if relative - # Output urls relative to the destination path, not the source path - current_dir = Pathname('/' + curr_resource.destination_path).dirname - relative_path = Pathname(resource_url).relative_path_from(current_dir).to_s - - # Put back the trailing slash to avoid unnecessary Apache redirects - if resource_url.end_with?('/') && !relative_path.end_with?('/') - relative_path << '/' - end - - relative_path - else - resource_url - end - end - - Contract String => String - def step_through_extensions(path) - while ::Tilt[path] - ext = File.extname(path) - yield ext if block_given? - - # Strip templating extensions as long as Tilt knows them - path = path[0..-(ext.length + 1)] - end - - yield File.extname(path) if block_given? - - path - end - - # Removes the templating extensions, while keeping the others - # @param [String] path - # @return [String] - Contract String => String - def remove_templating_extensions(path) - step_through_extensions(path) - end - - # Removes the templating extensions, while keeping the others - # @param [String] path - # @return [String] - Contract String => ArrayOf[String] - def collect_extensions(path) - return [] if File.basename(path).start_with?('.') - - result = [] - - step_through_extensions(path) { |e| result << e } - - result - end - - # Finds files which should also be considered to be dirty when - # the given file(s) are touched. - # - # @param [Middleman::Application] app The app. - # @param [Pathname] files The original touched file paths. - # @return [Middleman::SourceFile] All related file paths, not including the source file paths. - Contract ::Middleman::Application, ArrayOf[Pathname] => ArrayOf[::Middleman::SourceFile] - def find_related_files(app, files) - return [] if files.empty? - - all_extensions = files.flat_map { |f| collect_extensions(f.to_s) } - - sass_type_aliasing = ['.scss', '.sass'] - erb_type_aliasing = ['.erb', '.haml', '.slim'] - - if (all_extensions & sass_type_aliasing).length > 0 - all_extensions |= sass_type_aliasing - end - - if (all_extensions & erb_type_aliasing).length > 0 - all_extensions |= erb_type_aliasing - end - - all_extensions.uniq! - - app.sitemap.resources.select(&:file_descriptor).select { |r| - local_extensions = collect_extensions(r.file_descriptor[:full_path].to_s) - - if (local_extensions & sass_type_aliasing).length > 0 - local_extensions |= sass_type_aliasing - end - - if (local_extensions & erb_type_aliasing).length > 0 - local_extensions |= erb_type_aliasing - end - - local_extensions.uniq! - - ((all_extensions & local_extensions).length > 0) && files.none? { |f| f == r.file_descriptor[:full_path] } - }.map(&:file_descriptor) - end - - # Handy methods for dealing with URI templates. Mix into whatever class. - module UriTemplates - module_function - - # Given a URI template string, make an Addressable::Template - # This supports the legacy middleman-blog/Sinatra style :colon - # URI templates as well as RFC6570 templates. - # - # @param [String] tmpl_src URI template source - # @return [Addressable::Template] a URI template - def uri_template(tmpl_src) - # Support the RFC6470 templates directly if people use them - if tmpl_src.include?(':') - tmpl_src = tmpl_src.gsub(/:([A-Za-z0-9]+)/, '{\1}') - end - - ::Addressable::Template.new ::Middleman::Util.normalize_path(tmpl_src) - end - - # Apply a URI template with the given data, producing a normalized - # Middleman path. - # - # @param [Addressable::Template] template - # @param [Hash] data - # @return [String] normalized path - def apply_uri_template(template, data) - ::Middleman::Util.normalize_path ::Addressable::URI.unencode(template.expand(data)).to_s - end - - # Use a template to extract parameters from a path, and validate some special (date) - # keys. Returns nil if the special keys don't match. - # - # @param [Addressable::Template] template - # @param [String] path - def extract_params(template, path) - template.extract(path, BlogTemplateProcessor) - end - - # Parameterize a string preserving any multibyte characters - def safe_parameterize(str) - sep = '-' - - # Reimplementation of http://api.rubyonrails.org/classes/ActiveSupport/Inflector.html#method-i-parameterize that preserves un-transliterate-able multibyte chars. - parameterized_string = ActiveSupport::Inflector.transliterate(str.to_s).downcase - parameterized_string.gsub!(/[^a-z0-9\-_\?]+/, sep) - - parameterized_string.chars.to_a.each_with_index do |char, i| - next unless char == '?' && str[i].bytes.count != 1 - parameterized_string[i] = str[i] - end - - re_sep = Regexp.escape(sep) - # No more than one of the separator in a row. - parameterized_string.gsub!(/#{re_sep}{2,}/, sep) - # Remove leading/trailing separator. - parameterized_string.gsub!(/^#{re_sep}|#{re_sep}$/, '') - - parameterized_string - end - - # Convert a date into a hash of components to strings - # suitable for using in a URL template. - # @param [DateTime] date - # @return [Hash] parameters - def date_to_params(date) - { - year: date.year.to_s, - month: date.month.to_s.rjust(2, '0'), - day: date.day.to_s.rjust(2, '0') - } - end - end - - # A special template processor that validates date fields - # and has an extra-permissive default regex. - # - # See https://github.com/sporkmonger/addressable/blob/master/lib/addressable/template.rb#L279 - class BlogTemplateProcessor - def self.match(name) - case name - when 'year' then '\d{4}' - when 'month' then '\d{2}' - when 'day' then '\d{2}' - else '.*?' - end - end - end end end diff --git a/middleman-core/lib/middleman-core/util/binary.rb b/middleman-core/lib/middleman-core/util/binary.rb new file mode 100644 index 00000000..71e584d1 --- /dev/null +++ b/middleman-core/lib/middleman-core/util/binary.rb @@ -0,0 +1,79 @@ +# Template and Mime detection +require 'tilt' +require 'rack/mime' + +require 'middleman-core/contracts' + +module Middleman + module Util + include Contracts + + module_function + + # Whether the source file is binary. + # + # @param [String] filename The file to check. + # @return [Boolean] + Contract Or[String, Pathname] => Bool + def binary?(filename) + @binary_cache ||= {} + + return @binary_cache[filename] if @binary_cache.key?(filename) + + @binary_cache[filename] = begin + path = Pathname(filename) + ext = path.extname + + # We hardcode detecting of gzipped SVG files + if ext == '.svgz' + true + elsif ::Tilt.registered?(ext.sub('.', '')) + false + else + dot_ext = (ext.to_s[0] == '.') ? ext.dup : ".#{ext}" + + if mime = ::Rack::Mime.mime_type(dot_ext, nil) + !nonbinary_mime?(mime) + else + file_contents_include_binary_bytes?(path.to_s) + end + end + end + end + + # Is mime type known to be non-binary? + # + # @param [String] mime The mimetype to check. + # @return [Boolean] + Contract String => Bool + def nonbinary_mime?(mime) + case + when mime.start_with?('text/') + true + when mime.include?('xml') && !mime.include?('officedocument') + true + when mime.include?('json') + true + when mime.include?('javascript') + true + else + false + end + end + + # Read a few bytes from the file and see if they are binary. + # + # @param [String] filename The file to check. + # @return [Boolean] + Contract String => Bool + def file_contents_include_binary_bytes?(filename) + binary_bytes = [0, 1, 2, 3, 4, 5, 6, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 28, 29, 30, 31] + s = ::File.read(filename, 4096) || '' + s.each_byte do |c| + return true if binary_bytes.include?(c) + end + + false + end + end +end diff --git a/middleman-core/lib/middleman-core/util/data.rb b/middleman-core/lib/middleman-core/util/data.rb index c18e6975..2bab7fe0 100644 --- a/middleman-core/lib/middleman-core/util/data.rb +++ b/middleman-core/lib/middleman-core/util/data.rb @@ -1,12 +1,40 @@ require 'yaml' require 'json' require 'pathname' -require 'middleman-core/util' -require 'middleman-core/contracts' require 'backports/2.1.0/array/to_h' +require 'hashie' + +require 'middleman-core/util/binary' +require 'middleman-core/contracts' module Middleman module Util + include Contracts + + module_function + + class EnhancedHash < ::Hashie::Mash + # include ::Hashie::Extensions::MergeInitializer + # include ::Hashie::Extensions::MethodReader + # include ::Hashie::Extensions::IndifferentAccess + end + + # Recursively convert a normal Hash into a EnhancedHash + # + # @private + # @param [Hash] data Normal hash + # @return [Hash] + Contract Any => Maybe[Or[Array, EnhancedHash]] + def recursively_enhance(obj) + if obj.is_a? ::Array + obj.map { |e| recursively_enhance(e) } + elsif obj.is_a? ::Hash + ::Hashie::Mash.new(obj) + else + obj + end + end + module Data include Contracts @@ -23,7 +51,7 @@ module Middleman # Avoid weird race condition when a file is renamed begin content = file.read - rescue EOFError, IOError, Errno::ENOENT + rescue EOFError, IOError, ::Errno::ENOENT return [{}, nil] end @@ -31,7 +59,7 @@ module Middleman .values .flatten(1) .transpose - .map(&Regexp.method(:union)) + .map(&::Regexp.method(:union)) match = / \A(?:[^\r\n]*coding:[^\r\n]*\r?\n)? diff --git a/middleman-core/lib/middleman-core/util/files.rb b/middleman-core/lib/middleman-core/util/files.rb new file mode 100644 index 00000000..f8fb1316 --- /dev/null +++ b/middleman-core/lib/middleman-core/util/files.rb @@ -0,0 +1,134 @@ +module Middleman + module Util + include Contracts + + module_function + + # Get a recusive list of files inside a path. + # Works with symlinks. + # + # @param path Some path string or Pathname + # @param ignore A proc/block that returns true if a given path should be ignored - if a path + # is ignored, nothing below it will be searched either. + # @return [Array] An array of Pathnames for each file (no directories) + Contract Or[String, Pathname], Proc => ArrayOf[Pathname] + def all_files_under(path, &ignore) + path = Pathname(path) + + if ignore && yield(path) + [] + elsif path.directory? + path.children.flat_map do |child| + all_files_under(child, &ignore) + end.compact + elsif path.file? + [path] + else + [] + end + end + + # Glob a directory and try to keep path encoding consistent. + # + # @param [String] path The glob path. + # @return [Array] + def glob_directory(path) + results = ::Dir[path] + + return results unless RUBY_PLATFORM =~ /darwin/ + + results.map { |r| r.encode('UTF-8', 'UTF-8-MAC') } + end + + # Get the PWD and try to keep path encoding consistent. + # + # @param [String] path The glob path. + # @return [Array] + def current_directory + result = ::Dir.pwd + + return result unless RUBY_PLATFORM =~ /darwin/ + + result.encode('UTF-8', 'UTF-8-MAC') + end + + Contract String => String + def step_through_extensions(path) + while ::Tilt[path] + ext = ::File.extname(path) + yield ext if block_given? + + # Strip templating extensions as long as Tilt knows them + path = path[0..-(ext.length + 1)] + end + + yield ::File.extname(path) if block_given? + + path + end + + # Removes the templating extensions, while keeping the others + # @param [String] path + # @return [String] + Contract String => String + def remove_templating_extensions(path) + step_through_extensions(path) + end + + # Removes the templating extensions, while keeping the others + # @param [String] path + # @return [String] + Contract String => ArrayOf[String] + def collect_extensions(path) + return [] if ::File.basename(path).start_with?('.') + + result = [] + + step_through_extensions(path) { |e| result << e } + + result + end + + # Finds files which should also be considered to be dirty when + # the given file(s) are touched. + # + # @param [Middleman::Application] app The app. + # @param [Pathname] files The original touched file paths. + # @return [Middleman::SourceFile] All related file paths, not including the source file paths. + Contract ::Middleman::Application, ArrayOf[Pathname] => ArrayOf[::Middleman::SourceFile] + def find_related_files(app, files) + return [] if files.empty? + + all_extensions = files.flat_map { |f| collect_extensions(f.to_s) } + + sass_type_aliasing = ['.scss', '.sass'] + erb_type_aliasing = ['.erb', '.haml', '.slim'] + + if (all_extensions & sass_type_aliasing).length > 0 + all_extensions |= sass_type_aliasing + end + + if (all_extensions & erb_type_aliasing).length > 0 + all_extensions |= erb_type_aliasing + end + + all_extensions.uniq! + + app.sitemap.resources.select(&:file_descriptor).select { |r| + local_extensions = collect_extensions(r.file_descriptor[:full_path].to_s) + + if (local_extensions & sass_type_aliasing).length > 0 + local_extensions |= sass_type_aliasing + end + + if (local_extensions & erb_type_aliasing).length > 0 + local_extensions |= erb_type_aliasing + end + + local_extensions.uniq! + + ((all_extensions & local_extensions).length > 0) && files.none? { |f| f == r.file_descriptor[:full_path] } + }.map(&:file_descriptor) + end + end +end diff --git a/middleman-core/lib/middleman-core/util/paths.rb b/middleman-core/lib/middleman-core/util/paths.rb new file mode 100644 index 00000000..61abec59 --- /dev/null +++ b/middleman-core/lib/middleman-core/util/paths.rb @@ -0,0 +1,251 @@ +# Core Pathname library used for traversal +require 'pathname' +require 'uri' + +require 'middleman-core/contracts' + +# rubocop:disable ModuleLength +module Middleman + module Util + include Contracts + + module_function + + # Normalize a path to not include a leading slash + # @param [String] path + # @return [String] + Contract String => String + def normalize_path(path) + # The tr call works around a bug in Ruby's Unicode handling + ::URI.decode(path).sub(%r{^/}, '').tr('', '') + end + + # This is a separate method from normalize_path in case we + # change how we normalize paths + Contract String => String + def strip_leading_slash(path) + path.sub(%r{^/}, '') + end + + # Get the path of a file of a given type + # + # @param [Middleman::Application] app The app. + # @param [Symbol] kind The type of file + # @param [String, Symbol] source The path to the file + # @param [Hash] options Data to pass through. + # @return [String] + Contract ::Middleman::Application, Symbol, Or[String, Symbol], Hash => String + def asset_path(app, kind, source, options={}) + return source if source.to_s.include?('//') || source.to_s.start_with?('data:') + + asset_folder = case kind + when :css + app.config[:css_dir] + when :js + app.config[:js_dir] + when :images + app.config[:images_dir] + when :fonts + app.config[:fonts_dir] + else + kind.to_s + end + + source = source.to_s.tr(' ', '') + ignore_extension = (kind == :images || kind == :fonts) # don't append extension + source << ".#{kind}" unless ignore_extension || source.end_with?(".#{kind}") + asset_folder = '' if source.start_with?('/') # absolute path + + asset_url(app, source, asset_folder, options) + end + + # Get the URL of an asset given a type/prefix + # + # @param [String] path The path (such as "photo.jpg") + # @param [String] prefix The type prefix (such as "images") + # @param [Hash] options Data to pass through. + # @return [String] The fully qualified asset url + Contract ::Middleman::Application, String, String, Hash => String + def asset_url(app, path, prefix='', options={}) + # Don't touch assets which already have a full path + return path if path.include?('//') || path.start_with?('data:') + + if options[:relative] && !options[:current_resource] + raise ArgumentError, '#asset_url must be run in a context with current_resource if relative: true' + end + + uri = URI(path) + path = uri.path + + result = if resource = app.sitemap.find_resource_by_destination_path(url_for(app, path, options)) + resource.url + else + path = ::File.join(prefix, path) + if resource = app.sitemap.find_resource_by_path(path) + resource.url + else + ::File.join(app.config[:http_prefix], path) + end + end + + final_result = ::URI.encode(relative_path_from_resource(options[:current_resource], result, options[:relative])) + + result_uri = URI(final_result) + result_uri.query = uri.query + result_uri.fragment = uri.fragment + result_uri.to_s + end + + # Given a source path (referenced either absolutely or relatively) + # or a Resource, this will produce the nice URL configured for that + # path, respecting :relative_links, directory indexes, etc. + Contract ::Middleman::Application, Or[String, ::Middleman::Sitemap::Resource], Hash => String + def url_for(app, path_or_resource, options={}) + # Handle Resources and other things which define their own url method + url = if path_or_resource.respond_to?(:url) + path_or_resource.url + else + path_or_resource.dup + end + + # Try to parse URL + begin + uri = URI(url) + rescue ::URI::InvalidURIError + # Nothing we can do with it, it's not really a URI + return url + end + + relative = options[:relative] + raise "Can't use the relative option with an external URL" if relative && uri.host + + # Allow people to turn on relative paths for all links with + # set :relative_links, true + # but still override on a case by case basis with the :relative parameter. + effective_relative = relative || false + effective_relative = true if relative.nil? && app.config[:relative_links] + + # Try to find a sitemap resource corresponding to the desired path + this_resource = options[:current_resource] + + if path_or_resource.is_a?(::Middleman::Sitemap::Resource) + resource = path_or_resource + resource_url = url + elsif this_resource && uri.path && !uri.host + # Handle relative urls + url_path = Pathname(uri.path) + current_source_dir = Pathname('/' + this_resource.path).dirname + url_path = current_source_dir.join(url_path) if url_path.relative? + resource = app.sitemap.find_resource_by_path(url_path.to_s) + if resource + resource_url = resource.url + else + # Try to find a resource relative to destination paths + url_path = Pathname(uri.path) + current_source_dir = Pathname('/' + this_resource.destination_path).dirname + url_path = current_source_dir.join(url_path) if url_path.relative? + resource = app.sitemap.find_resource_by_destination_path(url_path.to_s) + resource_url = resource.url if resource + end + elsif options[:find_resource] && uri.path && !uri.host + resource = app.sitemap.find_resource_by_path(uri.path) + resource_url = resource.url if resource + end + + if resource + uri.path = if this_resource + ::URI.encode(relative_path_from_resource(this_resource, resource_url, effective_relative)) + else + resource_url + end + end + + # Support a :query option that can be a string or hash + if query = options[:query] + uri.query = query.respond_to?(:to_param) ? query.to_param : query.to_s + end + + # Support a :fragment or :anchor option just like Padrino + fragment = options[:anchor] || options[:fragment] + uri.fragment = fragment.to_s if fragment + + # Finally make the URL back into a string + uri.to_s + end + + # Expand a path to include the index file if it's a directory + # + # @param [String] path Request path/ + # @param [Middleman::Application] app The requesting app. + # @return [String] Path with index file if necessary. + Contract String, ::Middleman::Application => String + def full_path(path, app) + resource = app.sitemap.find_resource_by_destination_path(path) + + unless resource + # Try it with /index.html at the end + indexed_path = ::File.join(path.sub(%r{/$}, ''), app.config[:index_file]) + resource = app.sitemap.find_resource_by_destination_path(indexed_path) + end + + if resource + '/' + resource.destination_path + else + '/' + normalize_path(path) + end + end + + # Get a relative path to a resource. + # + # @param [Middleman::Sitemap::Resource] curr_resource The resource. + # @param [String] resource_url The target url. + # @param [Boolean] relative If the path should be relative. + # @return [String] + Contract ::Middleman::Sitemap::Resource, String, Bool => String + def relative_path_from_resource(curr_resource, resource_url, relative) + # Switch to the relative path between resource and the given resource + # if we've been asked to. + if relative + # Output urls relative to the destination path, not the source path + current_dir = Pathname('/' + curr_resource.destination_path).dirname + relative_path = Pathname(resource_url).relative_path_from(current_dir).to_s + + # Put back the trailing slash to avoid unnecessary Apache redirects + if resource_url.end_with?('/') && !relative_path.end_with?('/') + relative_path << '/' + end + + relative_path + else + resource_url + end + end + + # Takes a matcher, which can be a literal string + # or a string containing glob expressions, or a + # regexp, or a proc, or anything else that responds + # to #match or #call, and returns whether or not the + # given path matches that matcher. + # + # @param [String, #match, #call] matcher A matcher String, RegExp, Proc, etc. + # @param [String] path A path as a string + # @return [Boolean] Whether the path matches the matcher + Contract PATH_MATCHER, String => Bool + def path_match(matcher, path) + case + when matcher.is_a?(String) + if matcher.include? '*' + ::File.fnmatch(matcher, path) + else + path == matcher + end + when matcher.respond_to?(:match) + !!(path =~ matcher) + when matcher.respond_to?(:call) + matcher.call(path) + else + ::File.fnmatch(matcher.to_s, path) + end + end + end +end diff --git a/middleman-core/lib/middleman-core/util/rack.rb b/middleman-core/lib/middleman-core/util/rack.rb new file mode 100644 index 00000000..8399b3af --- /dev/null +++ b/middleman-core/lib/middleman-core/util/rack.rb @@ -0,0 +1,52 @@ +require 'middleman-core/contracts' + +module Middleman + module Util + include Contracts + + module_function + + # Extract the text of a Rack response as a string. + # Useful for extensions implemented as Rack middleware. + # @param response The response from #call + # @return [String] The whole response as a string. + Contract RespondTo[:each] => String + def extract_response_text(response) + # The rack spec states all response bodies must respond to each + result = '' + response.each do |part, _| + result << part + end + result + end + + Contract String, String, ArrayOf[String], Proc => String + def rewrite_paths(body, _path, exts, &_block) + matcher = /([\'\"\(,]\s*)([^\s\'\"\)>]+(#{::Regexp.union(exts)}))/ + + url_fn_prefix = 'url(' + + body.dup.gsub(matcher) do |match| + opening_character = $1 + asset_path = $2 + + if asset_path.start_with?(url_fn_prefix) + opening_character << url_fn_prefix + asset_path = asset_path[url_fn_prefix.length..-1] + end + + begin + uri = ::Addressable::URI.parse(asset_path) + + if uri.relative? && uri.host.nil? && !(asset_path =~ /^[^\/].*[a-z]+\.[a-z]+\/.*/) && (result = yield(asset_path)) + "#{opening_character}#{result}" + else + match + end + rescue ::Addressable::URI::InvalidURIError + match + end + end + end + end +end diff --git a/middleman-core/lib/middleman-core/util/uri_templates.rb b/middleman-core/lib/middleman-core/util/uri_templates.rb new file mode 100644 index 00000000..c13d504d --- /dev/null +++ b/middleman-core/lib/middleman-core/util/uri_templates.rb @@ -0,0 +1,97 @@ +# For URI templating +require 'addressable/uri' +require 'addressable/template' +require 'active_support/inflector' +require 'active_support/inflector/transliterate' + +module Middleman + module Util + # Handy methods for dealing with URI templates. Mix into whatever class. + module UriTemplates + module_function + + # Given a URI template string, make an Addressable::Template + # This supports the legacy middleman-blog/Sinatra style :colon + # URI templates as well as RFC6570 templates. + # + # @param [String] tmpl_src URI template source + # @return [Addressable::Template] a URI template + def uri_template(tmpl_src) + # Support the RFC6470 templates directly if people use them + if tmpl_src.include?(':') + tmpl_src = tmpl_src.gsub(/:([A-Za-z0-9]+)/, '{\1}') + end + + ::Addressable::Template.new(::Middleman::Util.normalize_path(tmpl_src)) + end + + # Apply a URI template with the given data, producing a normalized + # Middleman path. + # + # @param [Addressable::Template] template + # @param [Hash] data + # @return [String] normalized path + def apply_uri_template(template, data) + ::Middleman::Util.normalize_path(::Addressable::URI.unencode(template.expand(data)).to_s) + end + + # Use a template to extract parameters from a path, and validate some special (date) + # keys. Returns nil if the special keys don't match. + # + # @param [Addressable::Template] template + # @param [String] path + def extract_params(template, path) + template.extract(path, BlogTemplateProcessor) + end + + # Parameterize a string preserving any multibyte characters + def safe_parameterize(str) + sep = '-' + + # Reimplementation of http://api.rubyonrails.org/classes/ActiveSupport/Inflector.html#method-i-parameterize that preserves un-transliterate-able multibyte chars. + parameterized_string = ::ActiveSupport::Inflector.transliterate(str.to_s).downcase + parameterized_string.gsub!(/[^a-z0-9\-_\?]+/, sep) + + parameterized_string.chars.to_a.each_with_index do |char, i| + next unless char == '?' && str[i].bytes.count != 1 + parameterized_string[i] = str[i] + end + + re_sep = ::Regexp.escape(sep) + # No more than one of the separator in a row. + parameterized_string.gsub!(/#{re_sep}{2,}/, sep) + # Remove leading/trailing separator. + parameterized_string.gsub!(/^#{re_sep}|#{re_sep}$/, '') + + parameterized_string + end + + # Convert a date into a hash of components to strings + # suitable for using in a URL template. + # @param [DateTime] date + # @return [Hash] parameters + def date_to_params(date) + { + year: date.year.to_s, + month: date.month.to_s.rjust(2, '0'), + day: date.day.to_s.rjust(2, '0') + } + end + end + + # A special template processor that validates date fields + # and has an extra-permissive default regex. + # + # See https://github.com/sporkmonger/addressable/blob/master/lib/addressable/template.rb#L279 + class BlogTemplateProcessor + def self.match(name) + case name + when 'year' then '\d{4}' + when 'month' then '\d{2}' + when 'day' then '\d{2}' + else '.*?' + end + end + end + end +end