instiki/vendor/plugins/HTML5lib/lib/html5lib/liberalxmlparser.rb

# Warning: this module is experimental and subject to change and even removal
# at any time. 
# 
# For background/rationale, see:
#  * http://www.intertwingly.net/blog/2007/01/08/Xhtml5lib
#  * http://tinyurl.com/ylfj8k (and follow-ups)
# 
# References:
#  * http://googlereader.blogspot.com/2005/12/xml-errors-in-feeds.html
#  * http://wiki.whatwg.org/wiki/HtmlVsXhtml
# 
# @@TODO:
# * Selectively lowercase only XHTML, but not foreign markup
require 'html5lib/html5parser'
require 'html5lib/constants'

module HTML5lib

# liberal XML parser
class XMLParser < HTMLParser

    def initialize(options={})
        super options
        @phases[:initial] = XmlRootPhase.new(self, @tree)
    end

    def normalizeToken(token)
        if token[:type] == :StartTag or token[:type] == :EmptyTag
            # We need to remove the duplicate attributes and convert attributes
            # to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}

            token[:data] = Hash[*token[:data].reverse.flatten]

            # For EmptyTags, process both a Start and an End tag
            if token[:type] == :EmptyTag
                @phase.processStartTag(token[:name], token[:data])
                token[:data] = {}
                token[:type] = :EndTag
            end

        elsif token[:type] == :EndTag
            if token[:data]
               parseError(_("End tag contains unexpected attributes."))
            end

        elsif token[:type] == :Comment
            # Rescue CDATA from the comments
            if token[:data][0..6] == "[CDATA[" and token[:data][-2..-1] == "]]"
                token[:type] = :Characters
                token[:data] = token[:data][7 ... -2]
            end
        end

        return token
    end
end

# liberal XMTHML parser
class XHTMLParser < XMLParser

    def initialize(options={})
        super options
        @phases[:initial] = InitialPhase.new(self, @tree)
        @phases[:rootElement] = XhmlRootPhase.new(self, @tree)
    end

    def normalizeToken(token)
        super(token)

        # ensure that non-void XHTML elements have content so that separate
        # open and close tags are emitted
        if token[:type]  == :EndTag and \
            not VOID_ELEMENTS.include? token[:name] and \
            token[:name] == @tree.openElements[-1].name and \
            not @tree.openElements[-1].hasContent
            @tree.insertText('') unless
                @tree.openElements.any? {|e|
                    e.attributes.keys.include? 'xmlns' and
                    e.attributes['xmlns'] != 'http://www.w3.org/1999/xhtml'
                }
        end

        return token
    end
end

class XhmlRootPhase < RootElementPhase
    def insertHtmlElement
        element = @tree.createElement("html", {'xmlns' => 'http://www.w3.org/1999/xhtml'})
        @tree.openElements.push(element)
        @tree.document.appendChild(element)
        @parser.phase = @parser.phases[:beforeHead]
    end
end

class XmlRootPhase < Phase
    # Prime the Xml parser
    @start_tag_handlers = Hash.new(:startTagOther)
    @end_tag_handlers = Hash.new(:endTagOther)
    def startTagOther(name, attributes)
        @tree.openElements.push(@tree.document)
        element = @tree.createElement(name, attributes)
        @tree.openElements[-1].appendChild(element)
        @tree.openElements.push(element)
        @parser.phase = XmlElementPhase.new(@parser,@tree)
    end
    def endTagOther(name)
        super
        @tree.openElements.pop
    end
end

class XmlElementPhase < Phase
    # Generic handling for all XML elements

    @start_tag_handlers = Hash.new(:startTagOther)
    @end_tag_handlers = Hash.new(:endTagOther)

    def startTagOther(name, attributes)
        element = @tree.createElement(name, attributes)
        @tree.openElements[-1].appendChild(element)
        @tree.openElements.push(element)
    end

    def endTagOther(name)
        for node in @tree.openElements.reverse
            if node.name == name
                {} while @tree.openElements.pop != node
                break
            else
                @parser.parseError
            end
        end
    end

    def processCharacters(data)
        @tree.insertText(data)
    end
end

end
HTML5lib Sanitizer Replaced native Sanitizer with HTML5lib version. Synced with latest Maruku. 2007-05-26 03:52:27 +02:00			`# Warning: this module is experimental and subject to change and even removal`
			`# at any time.`
			`#`
			`# For background/rationale, see:`
			`# * http://www.intertwingly.net/blog/2007/01/08/Xhtml5lib`
			`# * http://tinyurl.com/ylfj8k (and follow-ups)`
			`#`
			`# References:`
			`# * http://googlereader.blogspot.com/2005/12/xml-errors-in-feeds.html`
			`# * http://wiki.whatwg.org/wiki/HtmlVsXhtml`
			`#`
			`# @@TODO:`
			`# * Selectively lowercase only XHTML, but not foreign markup`
			`require 'html5lib/html5parser'`
			`require 'html5lib/constants'`

			`module HTML5lib`

			`# liberal XML parser`
			`class XMLParser < HTMLParser`

			`def initialize(options={})`
			`super options`
			`@phases[:initial] = XmlRootPhase.new(self, @tree)`
			`end`

			`def normalizeToken(token)`
			`if token[:type] == :StartTag or token[:type] == :EmptyTag`
			`# We need to remove the duplicate attributes and convert attributes`
			`# to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}`

			`token[:data] = Hash[*token[:data].reverse.flatten]`

			`# For EmptyTags, process both a Start and an End tag`
			`if token[:type] == :EmptyTag`
			`@phase.processStartTag(token[:name], token[:data])`
			`token[:data] = {}`
			`token[:type] = :EndTag`
			`end`

			`elsif token[:type] == :EndTag`
			`if token[:data]`
			`parseError(_("End tag contains unexpected attributes."))`
			`end`

			`elsif token[:type] == :Comment`
			`# Rescue CDATA from the comments`
			`if token[:data][0..6] == "[CDATA[" and token[:data][-2..-1] == "]]"`
			`token[:type] = :Characters`
			`token[:data] = token[:data][7 ... -2]`
			`end`
			`end`

			`return token`
			`end`
			`end`

			`# liberal XMTHML parser`
			`class XHTMLParser < XMLParser`

			`def initialize(options={})`
			`super options`
			`@phases[:initial] = InitialPhase.new(self, @tree)`
			`@phases[:rootElement] = XhmlRootPhase.new(self, @tree)`
			`end`

			`def normalizeToken(token)`
			`super(token)`

			`# ensure that non-void XHTML elements have content so that separate`
			`# open and close tags are emitted`
			`if token[:type] == :EndTag and \`
			`not VOID_ELEMENTS.include? token[:name] and \`
			`token[:name] == @tree.openElements[-1].name and \`
			`not @tree.openElements[-1].hasContent`
			`@tree.insertText('') unless`
			`@tree.openElements.any? {\|e\|`
			`e.attributes.keys.include? 'xmlns' and`
			`e.attributes['xmlns'] != 'http://www.w3.org/1999/xhtml'`
			`}`
			`end`

			`return token`
			`end`
			`end`

			`class XhmlRootPhase < RootElementPhase`
			`def insertHtmlElement`
			`element = @tree.createElement("html", {'xmlns' => 'http://www.w3.org/1999/xhtml'})`
			`@tree.openElements.push(element)`
			`@tree.document.appendChild(element)`
			`@parser.phase = @parser.phases[:beforeHead]`
			`end`
			`end`

			`class XmlRootPhase < Phase`
			`# Prime the Xml parser`
			`@start_tag_handlers = Hash.new(:startTagOther)`
			`@end_tag_handlers = Hash.new(:endTagOther)`
			`def startTagOther(name, attributes)`
			`@tree.openElements.push(@tree.document)`
			`element = @tree.createElement(name, attributes)`
			`@tree.openElements[-1].appendChild(element)`
			`@tree.openElements.push(element)`
			`@parser.phase = XmlElementPhase.new(@parser,@tree)`
			`end`
			`def endTagOther(name)`
			`super`
			`@tree.openElements.pop`
			`end`
			`end`

			`class XmlElementPhase < Phase`
			`# Generic handling for all XML elements`

			`@start_tag_handlers = Hash.new(:startTagOther)`
			`@end_tag_handlers = Hash.new(:endTagOther)`

			`def startTagOther(name, attributes)`
			`element = @tree.createElement(name, attributes)`
			`@tree.openElements[-1].appendChild(element)`
			`@tree.openElements.push(element)`
			`end`

			`def endTagOther(name)`
			`for node in @tree.openElements.reverse`
			`if node.name == name`
			`{} while @tree.openElements.pop != node`
			`break`
			`else`
			`@parser.parseError`
			`end`
			`end`
			`end`

			`def processCharacters(data)`
			`@tree.insertText(data)`
			`end`
			`end`

			`end`