HTML5lib is Back.

Synced with latest version of HTML5lib, which fixes problem with Astral plane characters.
I should really do some tests, but the HTML5lib Sanitizer seems to be 2-5 times slower than the old sanitizer.
This commit is contained in:
Jacques Distler 2007-05-30 10:45:52 -05:00
parent e1a6827f1f
commit 4dd70af5ae
39 changed files with 4843 additions and 5576 deletions

View file

@ -1,62 +1,62 @@
require 'html5lib/constants'
class TokenizerTestParser
def initialize(tokenizer)
@tokenizer = tokenizer
def initialize(tokenizer)
@tokenizer = tokenizer
end
def parse
@outputTokens = []
debug = nil
for token in @tokenizer
debug = token.inspect if token[:type] == :ParseError
send ('process' + token[:type].to_s), token
end
def parse
@outputTokens = []
return @outputTokens
end
debug = nil
for token in @tokenizer
debug = token.inspect if token[:type] == :ParseError
send ('process' + token[:type].to_s), token
end
def processDoctype(token)
@outputTokens.push(["DOCTYPE", token[:name], token[:data]])
end
return @outputTokens
def processStartTag(token)
@outputTokens.push(["StartTag", token[:name], token[:data]])
end
def processEmptyTag(token)
if not HTML5lib::VOID_ELEMENTS.include? token[:name]
@outputTokens.push("ParseError")
end
@outputTokens.push(["StartTag", token[:name], token[:data]])
end
def processDoctype(token)
@outputTokens.push(["DOCTYPE", token[:name], token[:data]])
def processEndTag(token)
if token[:data].length > 0
self.processParseError(token)
end
@outputTokens.push(["EndTag", token[:name]])
end
def processStartTag(token)
@outputTokens.push(["StartTag", token[:name], token[:data]])
end
def processComment(token)
@outputTokens.push(["Comment", token[:data]])
end
def processEmptyTag(token)
if not HTML5lib::VOID_ELEMENTS.include? token[:name]
@outputTokens.push("ParseError")
end
@outputTokens.push(["StartTag", token[:name], token[:data]])
end
def processCharacters(token)
@outputTokens.push(["Character", token[:data]])
end
def processEndTag(token)
if token[:data].length > 0
self.processParseError(token)
end
@outputTokens.push(["EndTag", token[:name]])
end
alias processSpaceCharacters processCharacters
def processComment(token)
@outputTokens.push(["Comment", token[:data]])
end
def processCharacters(token)
@outputTokens.push(["Character", token[:data]])
end
def processCharacters(token)
@outputTokens.push(["Character", token[:data]])
end
def processEOF(token)
end
alias processSpaceCharacters processCharacters
def processCharacters(token)
@outputTokens.push(["Character", token[:data]])
end
def processEOF(token)
end
def processParseError(token)
@outputTokens.push("ParseError")
end
def processParseError(token)
@outputTokens.push("ParseError")
end
end