URL autolinking bypasses inlined HTML (or any other URL preceded by double or single quote)

This commit is contained in:
Alexey Verkhovsky 2005-02-20 09:28:41 +00:00
parent 3e7610c0cd
commit b1b04e1af4
4 changed files with 44 additions and 28 deletions

View file

@ -61,19 +61,20 @@ class URIChunk < Chunk::Abstract
PORT = '\\d*' PORT = '\\d*'
INTERNET_URI = INTERNET_URI =
"(?:(#{SCHEME}):/{0,2})?" + # Optional scheme: (\1) "(?:(#{SCHEME}):/{0,2})?" + # Optional scheme: (\1)
"(?:(#{USERINFO})@)?" + # Optional userinfo@ (\2) "(?:(#{USERINFO})@)?" + # Optional userinfo@ (\2)
"(#{INTERNET_HOSTNAME})" + # Mandatory hostname (\3) "(#{INTERNET_HOSTNAME})" + # Mandatory hostname (\3)
"(?::(#{PORT}))?" + # Optional :port (\4) "(?::(#{PORT}))?" + # Optional :port (\4)
"(#{ABS_PATH})?" + # Optional absolute path (\5) "(#{ABS_PATH})?" + # Optional absolute path (\5)
"(?:\\?(#{QUERY}))?" + # Optional ?query (\6) "(?:\\?(#{QUERY}))?" + # Optional ?query (\6)
"(?:\\#(#{FRAGMENT}))?" + # Optional #fragment (\7) "(?:\\#(#{FRAGMENT}))?" + # Optional #fragment (\7)
'(?=\.?(?:\s|\)|\z))' # ends only with '(?=\.?(?:\s|\)|\z))' # ends only with optional dot + space or ")"
# optional dot + space or ")" or end of string # or end of the string
TEXTILE_SYNTAX_PREFIX = '(!|\"\:)?' # ! or ": SUSPICIOUS_PRECEDING_CHARACTER = '(!|\"\:|\"|\\\')?' # any of !, ":, ", '
INTERNET_URI_REGEXP = Regexp.new(TEXTILE_SYNTAX_PREFIX + INTERNET_URI, Regexp::EXTENDED, 'N') INTERNET_URI_REGEXP =
Regexp.new(SUSPICIOUS_PRECEDING_CHARACTER + INTERNET_URI, Regexp::EXTENDED, 'N')
end end
@ -86,7 +87,7 @@ class URIChunk < Chunk::Abstract
def self.apply_to(content) def self.apply_to(content)
content.gsub!( self.pattern ) do |matched_text| content.gsub!( self.pattern ) do |matched_text|
chunk = self.new($~) chunk = self.new($~)
if chunk.textile_url? or chunk.textile_image? if chunk.avoid_autolinking?
# do not substitute # do not substitute
matched_text matched_text
else else
@ -99,17 +100,13 @@ class URIChunk < Chunk::Abstract
def initialize(match_data) def initialize(match_data)
super(match_data) super(match_data)
@link_text = match_data[0] @link_text = match_data[0]
@textile_prefix, @original_scheme, @user, @host, @port, @path, @query, @fragment = @suspicious_preceding_character = match_data[1]
match_data[1..-1] @original_scheme, @user, @host, @port, @path, @query, @fragment = match_data[2..-1]
treat_trailing_character treat_trailing_character
end end
def textile_url? def avoid_autolinking?
@textile_prefix == '":' not @suspicious_preceding_character.nil?
end
def textile_image?
@textile_prefix == '!' and @trailing_punctuation == '!'
end end
def treat_trailing_character def treat_trailing_character
@ -121,6 +118,8 @@ class URIChunk < Chunk::Abstract
@trailing_punctuation = last_char @trailing_punctuation = last_char
@link_text.chop! @link_text.chop!
[@original_scheme, @user, @host, @port, @path, @query, @fragment].compact.last.chop! [@original_scheme, @user, @host, @port, @path, @query, @fragment].compact.last.chop!
else
@trailing_punctuation = nil
end end
end end
@ -174,15 +173,17 @@ class LocalURIChunk < URIChunk
# The basic URI expression as a string # The basic URI expression as a string
# Scheme and hostname are mandatory # Scheme and hostname are mandatory
LOCAL_URI = LOCAL_URI =
"(?:(#{SCHEME})://)+" + # Mandatory scheme:// (\1) "(?:(#{SCHEME})://)+" + # Mandatory scheme:// (\1)
"(?:(#{USERINFO})@)?" + # Optional userinfo@ (\2) "(?:(#{USERINFO})@)?" + # Optional userinfo@ (\2)
"(#{ANY_HOSTNAME})" + # Mandatory hostname (\3) "(#{ANY_HOSTNAME})" + # Mandatory hostname (\3)
"(?::(#{PORT}))?" + # Optional :port (\4) "(?::(#{PORT}))?" + # Optional :port (\4)
"(#{ABS_PATH})?" + # Optional absolute path (\5) "(#{ABS_PATH})?" + # Optional absolute path (\5)
"(?:\\?(#{QUERY}))?" + # Optional ?query (\6) "(?:\\?(#{QUERY}))?" + # Optional ?query (\6)
"(?:\\#(#{FRAGMENT}))?" # Optional #fragment (\7) "(?:\\#(#{FRAGMENT}))?" + # Optional #fragment (\7)
'(?=\.?(?:\s|\)|\z))' # ends only with optional dot + space or ")"
# or end of the string
LOCAL_URI_REGEXP = Regexp.new(TEXTILE_SYNTAX_PREFIX + LOCAL_URI, Regexp::EXTENDED, 'N') LOCAL_URI_REGEXP = Regexp.new(SUSPICIOUS_PRECEDING_CHARACTER + LOCAL_URI, Regexp::EXTENDED, 'N')
end end
def LocalURIChunk.pattern def LocalURIChunk.pattern

View file

@ -12,6 +12,7 @@ class Revision
def initialize(page, number, content, created_at, author) def initialize(page, number, content, created_at, author)
@page, @number, @created_at, @author = page, number, created_at, author @page, @number, @created_at, @author = page, number, created_at, author
self.content = content self.content = content
@display_cache = nil
end end
def created_on def created_on

View file

@ -139,6 +139,15 @@ class RevisionTest < Test::Unit::TestCase
'This !http://hobix.com/sample.jpg! is a Textile image link.') 'This !http://hobix.com/sample.jpg! is a Textile image link.')
end end
def test_content_with_inlined_img_tag
assert_markup_parsed_as(
'<p>This <img src="http://hobix.com/sample.jpg" alt="" /> is an inline image link.</p>',
'This <img src="http://hobix.com/sample.jpg" alt="" /> is an inline image link.')
assert_markup_parsed_as(
'<p>This <IMG SRC="http://hobix.com/sample.jpg" alt=""> is an inline image link.</p>',
'This <IMG SRC="http://hobix.com/sample.jpg" alt=""> is an inline image link.')
end
def test_content_with_nowiki_text def test_content_with_nowiki_text
assert_markup_parsed_as( assert_markup_parsed_as(
'<p>Do not mark up [[this text]] or http://www.thislink.com.</p>', '<p>Do not mark up [[this text]] or http://www.thislink.com.</p>',

View file

@ -131,6 +131,11 @@ class URITest < Test::Unit::TestCase
match(URIChunk, 'This http://hobix.com/sample.jpg should match', match(URIChunk, 'This http://hobix.com/sample.jpg should match',
:link_text => 'http://hobix.com/sample.jpg') :link_text => 'http://hobix.com/sample.jpg')
end end
def test_inline_html
assert_conversion_does_not_apply(URIChunk, '<IMG SRC="http://hobix.com/sample.jpg">')
assert_conversion_does_not_apply(URIChunk, "<img src='http://hobix.com/sample.jpg'/>")
end
def test_non_uri def test_non_uri
# "so" is a valid country code; "libproxy.so" is a valid url # "so" is a valid country code; "libproxy.so" is a valid url