#!/usr/bin/env ruby require File.join(File.dirname(__FILE__), 'preamble') require 'html5/html5parser' require 'html5/liberalxmlparser' require 'html5/treewalkers' require 'html5/serializer' require 'html5/sanitizer' class SanitizeTest < Test::Unit::TestCase include HTML5 def sanitize_xhtml stream XHTMLParser.parse_fragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8', :lowercase_element_name => false, :lowercase_attr_name => false}).join end def sanitize_html stream HTMLParser.parse_fragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8', :lowercase_element_name => false, :lowercase_attr_name => false}).join end def sanitize_rexml stream require 'rexml/document' doc = REXML::Document.new("
foo
foo <bad>bar</bad> baz
" htmloutput = "foo <bad>bar</bad> baz
" rexmloutput = attribute_name.include?(':') && !(attribute_name =~ /^xml(ns)?:/) ? "Ill-formed XHTML!" : output check_sanitization(input, htmloutput, output, rexmloutput) end end HTMLSanitizer::ALLOWED_ATTRIBUTES.each do |attribute_name| define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do input = "foo
foo <bad>bar</bad> baz
" rexmloutput = attribute_name.include?(':') ? "Ill-formed XHTML!" : output check_sanitization(input, output, output, rexmloutput) end end HTMLSanitizer::ALLOWED_PROTOCOLS.each do |protocol| define_method "test_should_allow_#{protocol}_uris" do input = %(foo) output = "foo" check_sanitization(input, output, output, output) end end HTMLSanitizer::ALLOWED_PROTOCOLS.each do |protocol| define_method "test_should_allow_uppercase_#{protocol}_uris" do input = %(foo) output = "foo" check_sanitization(input, output, output, output) end end HTMLSanitizer::SVG_ALLOW_LOCAL_HREF.each do |tag_name| next unless HTMLSanitizer::ALLOWED_ELEMENTS.include?(tag_name) define_method "test_#{tag_name}_should_allow_local_href" do input = %(<#{tag_name} xlink:href="#foo"/>) output = "<#{tag_name.downcase} xlink:href='#foo'/>" xhtmloutput = "<#{tag_name} xlink:href='#foo'>#{tag_name}>" rexmloutput = "Ill-formed XHTML!" check_sanitization(input, output, xhtmloutput, rexmloutput) end define_method "test_#{tag_name}_should_allow_local_href_with_newline" do input = %(<#{tag_name} xlink:href="\n#foo"/>) output = "<#{tag_name.downcase} xlink:href='\n#foo'/>" xhtmloutput = "<#{tag_name} xlink:href='\n#foo'>#{tag_name}>" rexmloutput = "Ill-formed XHTML!" check_sanitization(input, output, xhtmloutput, rexmloutput) end define_method "test_#{tag_name}_should_forbid_nonlocal_href" do input = %(<#{tag_name} xlink:href="http://bad.com/foo"/>) output = "<#{tag_name.downcase}/>" xhtmloutput = "<#{tag_name}>#{tag_name}>" rexmloutput = "Ill-formed XHTML!" check_sanitization(input, output, xhtmloutput, rexmloutput) end define_method "test_#{tag_name}_should_forbid_nonlocal_href_with_newline" do input = %(<#{tag_name} xlink:href="\nhttp://bad.com/foo"/>) output = "<#{tag_name.downcase}/>" xhtmloutput = "<#{tag_name}>#{tag_name}>" rexmloutput = "Ill-formed XHTML!" check_sanitization(input, output, xhtmloutput, rexmloutput) end end def test_should_handle_astral_plane_characters input = "𝒵 𝔸
" output = "\360\235\222\265 \360\235\224\270
" check_sanitization(input, output, output, output) input = "