Sanitize url refs in SVG attributes

Add some tests.
Sync with latest HTML5lib (includes above sanitization improvements).
This commit is contained in:
Jacques Distler 2007-10-27 17:34:29 -05:00
parent ae82f1be49
commit 5208bbf0af
28 changed files with 1277 additions and 735 deletions

View file

@ -999,7 +999,7 @@ module HTML5
_("Unexpected select start tag in the select phase " +
"implies select start tag."),
"unexpected-start-tag-in-select" =>
_("Unexpected start tag token (%(name) in the select phase. " +
_("Unexpected start tag token (%(name)) in the select phase. " +
"Ignored."),
"unexpected-end-tag-in-select" =>
_("Unexpected end tag (%(name)) in the select phase. Ignored."),
@ -1039,6 +1039,8 @@ module HTML5
"expected-eof-but-got-end-tag" =>
_("Unexpected end tag (%(name))" +
". Expected end of file."),
"unexpected-end-table-in-caption" =>
_("Unexpected end table tag in caption. Generates implied end caption.")
}
end

View file

@ -1,4 +1,5 @@
require 'html5/html5parser/phase'
require 'core_ext/kernel'
module HTML5
class InBodyPhase < Phase
@ -50,13 +51,12 @@ module HTML5
super(parser, tree)
# for special handling of whitespace in <pre>
@processSpaceCharactersDropNewline = false
if $-w
$-w = false
alias processSpaceCharactersNonPre processSpaceCharacters
class << self; alias processSpaceCharactersNonPre processSpaceCharacters; end
$-w = true
else
alias processSpaceCharactersNonPre processSpaceCharacters
class << self; alias processSpaceCharactersNonPre processSpaceCharacters; end
end
end
@ -65,10 +65,18 @@ module HTML5
if $-w
$-w = false
alias processSpaceCharacters processSpaceCharactersNonPre
class << self
silence do
alias processSpaceCharacters processSpaceCharactersNonPre
end
end
$-w = true
else
alias processSpaceCharacters processSpaceCharactersNonPre
class << self
silence do
alias processSpaceCharacters processSpaceCharactersNonPre
end
end
end
if (data.length > 0 and data[0] == ?\n &&
@ -121,7 +129,13 @@ module HTML5
def startTagCloseP(name, attributes)
endTagP('p') if in_scope?('p')
@tree.insert_element(name, attributes)
@processSpaceCharactersDropNewline = true if name == 'pre'
if name == 'pre'
class << self
silence do
alias processSpaceCharacters processSpaceCharactersDropNewline
end
end
end
end
def startTagForm(name, attributes)
@ -291,8 +305,7 @@ module HTML5
# XXX Form element pointer checking here as well...
@tree.insert_element(name, attributes)
@parser.tokenizer.content_model_flag = :RCDATA
@processSpaceCharactersDropNewline = true
alias processSpaceCharacters processSpaceCharactersDropNewline
class << self; alias processSpaceCharacters processSpaceCharactersDropNewline; end
end
# iframe, noembed noframes, noscript(if scripting enabled)
@ -344,9 +357,9 @@ module HTML5
# XXX Need to take open <p> tags into account here. We shouldn't imply
# </p> but we should not throw a parse error either. Specification is
# likely to be updated.
unless @tree.open_elements[1].name == 'body'
unless @tree.open_elements[1] && @tree.open_elements[1].name == 'body'
# inner_html case
parse_error
parse_error "unexpected-end-tag", {:name => 'body'}
return
end
unless @tree.open_elements.last.name == 'body'
@ -364,7 +377,14 @@ module HTML5
def endTagBlock(name)
#Put us back in the right whitespace handling mode
@processSpaceCharactersDropNewline = false if name == 'pre'
if name == 'pre'
class << self;
silence do
alias processSpaceCharacters processSpaceCharactersNonPre;
end
end
end
@tree.generateImpliedEndTags if in_scope?(name)

View file

@ -5,9 +5,9 @@ module HTML5
# http://www.whatwg.org/specs/web-apps/current-work/#in-caption
handle_start 'html', %w( caption col colgroup tbody td tfoot th thead tr ) => 'TableElement'
handle_start 'html', %w(caption col colgroup tbody td tfoot th thead tr) => 'TableElement'
handle_end 'caption', 'table', %w( body col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
handle_end 'caption', 'table', %w(body col colgroup html tbody td tfoot th thead tr) => 'Ignore'
def ignoreEndTagCaption
!in_scope?('caption', true)
@ -18,7 +18,7 @@ module HTML5
end
def startTagTableElement(name, attributes)
parse_error
parse_error "unexpected-end-tag", {"name" => name}
#XXX Have to duplicate logic here to find out if the tag is ignored
ignoreEndTag = ignoreEndTagCaption
@parser.phase.processEndTag('caption')
@ -33,7 +33,7 @@ module HTML5
if ignoreEndTagCaption
# inner_html case
assert @parser.inner_html
parse_error
parse_error "unexpected-end-tag", {"name" => name}
else
# AT this code is quite similar to endTagTable in "InTable"
@tree.generateImpliedEndTags
@ -41,7 +41,7 @@ module HTML5
unless @tree.open_elements[-1].name == 'caption'
parse_error("expected-one-end-tag-but-got-another",
{"gotName" => "caption",
"expectedNmae" => @tree.open_elements.last.name})
"expectedName" => @tree.open_elements.last.name})
end
remove_open_elements_until('caption')
@ -52,7 +52,7 @@ module HTML5
end
def endTagTable(name)
parse_error
parse_error "unexpected-end-table-in-caption"
ignoreEndTag = ignoreEndTagCaption
@parser.phase.processEndTag('caption')
@parser.phase.processEndTag(name) unless ignoreEndTag
@ -65,6 +65,5 @@ module HTML5
def endTagOther(name)
@parser.phases[:inBody].processEndTag(name)
end
end
end

View file

@ -56,7 +56,7 @@ module HTML5
@parser.phase.processEndTag(name)
else
# sometimes inner_html case
parse_error
parse_error "unexpected-end-tag", {:name => name}
end
end

View file

@ -34,7 +34,7 @@ module HTML5
if ignoreEndTagColgroup
# inner_html case
assert @parser.inner_html
parse_error
parse_error "unexpected-end-tag", {:name => name}
else
@tree.open_elements.pop
@parser.phase = @parser.phases[:inTable]

View file

@ -27,8 +27,7 @@ module HTML5
end
def startTagOther(name, attributes)
parse_error("unexpected-start-tag-in-frameset",
{"name" => name})
parse_error("unexpected-start-tag-in-frameset", {"name" => name})
end
def endTagFrameset(name)

View file

@ -35,7 +35,7 @@ module HTML5
if ignoreEndTagTr
# inner_html case
assert @parser.inner_html
parse_error
parse_error "unexpected-end-tag", {:name => name}
else
clearStackToTableRowContext
@tree.open_elements.pop
@ -57,7 +57,7 @@ module HTML5
@parser.phase.processEndTag(name)
else
# inner_html case
parse_error
parse_error "unexpected-end-tag", {:name => name}
end
end
@ -75,8 +75,7 @@ module HTML5
# XXX unify this with other table helper methods
def clearStackToTableRowContext
until %w[tr html].include?(name = @tree.open_elements.last.name)
parse_error("unexpected-implied-end-tag-in-table-row",
{"name" => @tree.open_elements.last.name})
parse_error("unexpected-implied-end-tag-in-table-row", {"name" => @tree.open_elements.last.name})
@tree.open_elements.pop
end
end

View file

@ -33,7 +33,7 @@ module HTML5
@parser.phase.processStartTag(name, attributes)
else
# inner_html case
parse_error
parse_error "unexpected-start-tag", {:name => name}
end
end
@ -47,8 +47,7 @@ module HTML5
@tree.open_elements.pop
@parser.phase = @parser.phases[:inTable]
else
parse_error("unexpected-end-tag-in-table-body",
{"name" => name})
parse_error("unexpected-end-tag-in-table-body", {"name" => name})
end
end
@ -59,13 +58,12 @@ module HTML5
@parser.phase.processEndTag(name)
else
# inner_html case
parse_error
parse_error "unexpected-end-tag", {:name => name}
end
end
def endTagIgnore(name)
parse_error("unexpected-end-tag-in-table-body",
{"name" => name})
parse_error("unexpected-end-tag-in-table-body", {"name" => name})
end
def endTagOther(name)

View file

@ -82,7 +82,7 @@ module HTML5
else
# inner_html case
assert @parser.inner_html
parse_error
parse_error "unexpected-end-tag", {:name => name}
end
end

View file

@ -47,7 +47,7 @@ module HTML5
#Encoding to use if no other information can be found
@DEFAULT_ENCODING = 'windows-1252'
#Detect encoding iff no explicit "transport level" encoding is supplied
if @encoding.nil? or not HTML5.is_valid_encoding(@encoding)
@char_encoding = detect_encoding
@ -235,14 +235,16 @@ module HTML5
# Returns (line, col) of the current position in the stream.
def position
line, col = @line, @col
@queue.reverse.each do |c|
if c == "\n"
line -= 1
raise RuntimeError.new("col=#{col}") unless col == 0
col = @line_lengths[line]
else
col -= 1
end
if @queue and @queue.last != :EOF
@queue.reverse.each do |c|
if c == "\n"
line -= 1
raise RuntimeError.new("col=#{col}") unless col == 0
col = @line_lengths[line]
else
col -= 1
end
end
end
return [line + 1, col]
end
@ -434,7 +436,13 @@ module HTML5
end
break unless keep_parsing
end
@encoding = @encoding.strip unless @encoding.nil?
unless @encoding.nil?
@encoding = @encoding.strip
if ["UTF-16", "UTF-16BE", "UTF-16LE", "UTF-32", "UTF-32BE", "UTF-32LE"].include?(@encoding.upcase)
@encoding = 'utf-8'
end
end
return @encoding
end

View file

@ -78,6 +78,9 @@ module HTML5
ATTR_VAL_IS_URI = %w[href src cite action longdesc xlink:href xml:base]
SVG_ATTR_VAL_ALLOWS_REF = %w[clip-path fill filter marker marker-start
marker-mid marker-end mask stroke textpath]
ACCEPTABLE_CSS_PROPERTIES = %w[azimuth background-color
border-bottom-color border-collapse border-color border-left-color
border-right-color border-top-color clear color cursor direction
@ -120,6 +123,9 @@ module HTML5
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ and !self.class.const_get("ALLOWED_PROTOCOLS").include?(val_unescaped.split(':')[0])
attrs.delete attr
end
SVG_ATTR_VAL_ALLOWS_REF.each do |attr|
attrs.delete attr if attrs[attr].to_s.downcase =~ /url\(\s*[^#]/m
end
end
if attrs['style']
attrs['style'] = sanitize_css(attrs['style'])

View file

@ -144,7 +144,7 @@ module HTML5
elsif type == :Comment
data = token[:data]
serialize_error(_("Comment contains --")) if data.index("--")
serialize_error("Comment contains --") if data.index("--")
comment = "<!--%s-->" % token[:data]
result << comment

View file

@ -1,3 +1,3 @@
module HTML5
VERSION = '0.1.0'
VERSION = '0.10.0'
end