From 8ccaad85a5389c89eace76d957fb1da9ae60fd08 Mon Sep 17 00:00:00 2001
From: Jacques Distler
Date: Wed, 4 Jul 2007 17:36:59 -0500
Subject: [PATCH] Sync with latest HTML5lib and latest Maruku
---
lib/sanitize.rb | 14 +-
.../HTML5lib/lib/{html5lib.rb => html5.rb} | 22 +-
.../plugins/HTML5lib/lib/html5/constants.rb | 817 ++++++++++++++++++
vendor/plugins/HTML5lib/lib/html5/filters.rb | 1 +
.../lib/{html5lib => html5}/filters/base.rb | 2 +-
.../filters/inject_meta_charset.rb | 4 +-
.../filters/optionaltags.rb | 6 +-
.../{html5lib => html5}/filters/sanitizer.rb | 6 +-
.../{html5lib => html5}/filters/whitespace.rb | 6 +-
.../lib/{html5lib => html5}/html5parser.rb | 492 +++++------
.../html5parser/after_body_phase.rb | 4 +-
.../html5parser/after_frameset_phase.rb | 4 +-
.../html5parser/after_head_phase.rb | 4 +-
.../html5parser/before_head_phase.rb | 6 +-
.../html5parser/in_body_phase.rb | 36 +-
.../html5parser/in_caption_phase.rb | 4 +-
.../html5parser/in_cell_phase.rb | 4 +-
.../html5parser/in_column_group_phase.rb | 4 +-
.../html5parser/in_frameset_phase.rb | 4 +-
.../html5parser/in_head_phase.rb | 6 +-
.../html5parser/in_row_phase.rb | 4 +-
.../html5parser/in_select_phase.rb | 4 +-
.../html5parser/in_table_body_phase.rb | 4 +-
.../html5parser/in_table_phase.rb | 4 +-
.../html5parser/initial_phase.rb | 4 +-
.../{html5lib => html5}/html5parser/phase.rb | 2 +-
.../html5parser/root_element_phase.rb | 4 +-
.../html5parser/trailing_end_phase.rb | 4 +-
.../lib/{html5lib => html5}/inputstream.rb | 63 +-
.../{html5lib => html5}/liberalxmlparser.rb | 24 +-
.../lib/{html5lib => html5}/sanitizer.rb | 3 +-
.../plugins/HTML5lib/lib/html5/serializer.rb | 2 +
.../serializer/htmlserializer.rb | 15 +-
.../serializer/xhtmlserializer.rb | 7 +-
.../lib/{html5lib => html5}/tokenizer.rb | 103 ++-
.../lib/{html5lib => html5}/treebuilders.rb | 8 +-
.../{html5lib => html5}/treebuilders/base.rb | 4 +-
.../treebuilders/hpricot.rb | 442 +++++-----
.../{html5lib => html5}/treebuilders/rexml.rb | 4 +-
.../treebuilders/simpletree.rb | 4 +-
.../lib/{html5lib => html5}/treewalkers.rb | 10 +-
.../{html5lib => html5}/treewalkers/base.rb | 4 +-
.../treewalkers/hpricot.rb | 6 +-
.../{html5lib => html5}/treewalkers/rexml.rb | 6 +-
.../treewalkers/simpletree.rb | 8 +-
.../HTML5lib/lib/html5lib/constants.rb | 708 ---------------
.../plugins/HTML5lib/lib/html5lib/filters.rb | 1 -
.../HTML5lib/lib/html5lib/serializer.rb | 2 -
vendor/plugins/HTML5lib/parse.rb | 24 +-
.../HTML5lib/testdata/encoding/tests2.dat | 1 -
.../HTML5lib/testdata/serializer/core.test | 3 +-
.../HTML5lib/testdata/serializer/options.test | 6 +
.../HTML5lib/testdata/tokenizer/test1.test | 16 +-
.../HTML5lib/testdata/tokenizer/test2.test | 14 +-
.../testdata/tree-construction/tests1.dat | 11 +-
.../testdata/tree-construction/tests2.dat | 1 +
.../testdata/tree-construction/tests3.dat | 30 +-
.../testdata/tree-construction/tests4.dat | 28 +-
.../testdata/tree-construction/tests6.dat | 38 +
vendor/plugins/HTML5lib/tests/preamble.rb | 162 ++--
.../plugins/HTML5lib/tests/test_encoding.rb | 16 +-
vendor/plugins/HTML5lib/tests/test_lxp.rb | 75 +-
vendor/plugins/HTML5lib/tests/test_parser.rb | 25 +-
.../plugins/HTML5lib/tests/test_sanitizer.rb | 14 +-
.../plugins/HTML5lib/tests/test_serializer.rb | 14 +-
vendor/plugins/HTML5lib/tests/test_stream.rb | 4 +-
.../plugins/HTML5lib/tests/test_tokenizer.rb | 6 +-
.../HTML5lib/tests/test_treewalkers.rb | 33 +-
.../HTML5lib/tests/tokenizer_test_parser.rb | 126 +--
.../maruku/lib/maruku/input/linesource.rb | 1 +
.../maruku/lib/maruku/input/parse_block.rb | 47 +-
71 files changed, 1974 insertions(+), 1621 deletions(-)
rename vendor/plugins/HTML5lib/lib/{html5lib.rb => html5.rb} (76%)
create mode 100755 vendor/plugins/HTML5lib/lib/html5/constants.rb
create mode 100644 vendor/plugins/HTML5lib/lib/html5/filters.rb
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/filters/base.rb (89%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/filters/inject_meta_charset.rb (98%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/filters/optionaltags.rb (99%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/filters/sanitizer.rb (73%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/filters/whitespace.rb (90%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/html5parser.rb (93%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/html5parser/after_body_phase.rb (96%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/html5parser/after_frameset_phase.rb (94%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/html5parser/after_head_phase.rb (95%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/html5parser/before_head_phase.rb (88%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/html5parser/in_body_phase.rb (94%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/html5parser/in_caption_phase.rb (97%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/html5parser/in_cell_phase.rb (97%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/html5parser/in_column_group_phase.rb (96%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/html5parser/in_frameset_phase.rb (96%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/html5parser/in_head_phase.rb (96%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/html5parser/in_row_phase.rb (97%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/html5parser/in_select_phase.rb (97%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/html5parser/in_table_body_phase.rb (97%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/html5parser/in_table_phase.rb (98%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/html5parser/initial_phase.rb (99%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/html5parser/phase.rb (99%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/html5parser/root_element_phase.rb (94%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/html5parser/trailing_end_phase.rb (94%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/inputstream.rb (90%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/liberalxmlparser.rb (87%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/sanitizer.rb (99%)
create mode 100644 vendor/plugins/HTML5lib/lib/html5/serializer.rb
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/serializer/htmlserializer.rb (93%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/serializer/xhtmlserializer.rb (72%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/tokenizer.rb (93%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/treebuilders.rb (70%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/treebuilders/base.rb (99%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/treebuilders/hpricot.rb (95%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/treebuilders/rexml.rb (98%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/treebuilders/simpletree.rb (98%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/treewalkers.rb (66%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/treewalkers/base.rb (98%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/treewalkers/hpricot.rb (89%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/treewalkers/rexml.rb (89%)
rename vendor/plugins/HTML5lib/lib/{html5lib => html5}/treewalkers/simpletree.rb (86%)
delete mode 100755 vendor/plugins/HTML5lib/lib/html5lib/constants.rb
delete mode 100644 vendor/plugins/HTML5lib/lib/html5lib/filters.rb
delete mode 100644 vendor/plugins/HTML5lib/lib/html5lib/serializer.rb
diff --git a/lib/sanitize.rb b/lib/sanitize.rb
index c36e7583..c0773fd4 100644
--- a/lib/sanitize.rb
+++ b/lib/sanitize.rb
@@ -25,14 +25,14 @@
module Sanitize
- require 'html5lib/html5parser'
- require 'html5lib/liberalxmlparser'
- require 'html5lib/treewalkers'
- require 'html5lib/treebuilders'
- require 'html5lib/serializer'
- require 'html5lib/sanitizer'
+ require 'html5/html5parser'
+ require 'html5/liberalxmlparser'
+ require 'html5/treewalkers'
+ require 'html5/treebuilders'
+ require 'html5/serializer'
+ require 'html5/sanitizer'
- include HTML5lib
+ include HTML5
# Sanitize a string, parsed using XHTML parsing rules.
#
diff --git a/vendor/plugins/HTML5lib/lib/html5lib.rb b/vendor/plugins/HTML5lib/lib/html5.rb
similarity index 76%
rename from vendor/plugins/HTML5lib/lib/html5lib.rb
rename to vendor/plugins/HTML5lib/lib/html5.rb
index b4aba9a9..bd2174f2 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib.rb
+++ b/vendor/plugins/HTML5lib/lib/html5.rb
@@ -1,11 +1,11 @@
-require 'html5lib/html5parser'
-
-module HTML5lib
- def self.parse(stream, options={})
- HTMLParser.parse(stream, options)
- end
-
- def self.parseFragment(stream, options={})
- HTMLParser.parse(stream, options)
- end
-end
+require 'html5/html5parser'
+
+module HTML5
+ def self.parse(stream, options={})
+ HTMLParser.parse(stream, options)
+ end
+
+ def self.parseFragment(stream, options={})
+ HTMLParser.parse(stream, options)
+ end
+end
diff --git a/vendor/plugins/HTML5lib/lib/html5/constants.rb b/vendor/plugins/HTML5lib/lib/html5/constants.rb
new file mode 100755
index 00000000..356fb836
--- /dev/null
+++ b/vendor/plugins/HTML5lib/lib/html5/constants.rb
@@ -0,0 +1,817 @@
+module HTML5
+
+ class EOF < Exception; end
+
+ CONTENT_MODEL_FLAGS = [
+ :PCDATA,
+ :RCDATA,
+ :CDATA,
+ :PLAINTEXT
+ ]
+
+ SCOPING_ELEMENTS = %w[
+ button
+ caption
+ html
+ marquee
+ object
+ table
+ td
+ th
+ ]
+
+ FORMATTING_ELEMENTS = %w[
+ a
+ b
+ big
+ em
+ font
+ i
+ nobr
+ s
+ small
+ strike
+ strong
+ tt
+ u
+ ]
+
+ SPECIAL_ELEMENTS = %w[
+ address
+ area
+ base
+ basefont
+ bgsound
+ blockquote
+ body
+ br
+ center
+ col
+ colgroup
+ dd
+ dir
+ div
+ dl
+ dt
+ embed
+ fieldset
+ form
+ frame
+ frameset
+ h1
+ h2
+ h3
+ h4
+ h5
+ h6
+ head
+ hr
+ iframe
+ image
+ img
+ input
+ isindex
+ li
+ link
+ listing
+ menu
+ meta
+ noembed
+ noframes
+ noscript
+ ol
+ optgroup
+ option
+ p
+ param
+ plaintext
+ pre
+ script
+ select
+ spacer
+ style
+ tbody
+ textarea
+ tfoot
+ thead
+ title
+ tr
+ ul
+ wbr
+ ]
+
+ SPACE_CHARACTERS = %W[
+ \t
+ \n
+ \x0B
+ \x0C
+ \x20
+ \r
+ ]
+
+ TABLE_INSERT_MODE_ELEMENTS = %w[
+ table
+ tbody
+ tfoot
+ thead
+ tr
+ ]
+
+ ASCII_LOWERCASE = ('a'..'z').to_a.join('')
+ ASCII_UPPERCASE = ('A'..'Z').to_a.join('')
+ ASCII_LETTERS = ASCII_LOWERCASE + ASCII_UPPERCASE
+ DIGITS = '0'..'9'
+ HEX_DIGITS = DIGITS.to_a + ('a'..'f').to_a + ('A'..'F').to_a
+
+ # Heading elements need to be ordered
+ HEADING_ELEMENTS = %w[
+ h1
+ h2
+ h3
+ h4
+ h5
+ h6
+ ]
+
+ # XXX What about event-source and command?
+ VOID_ELEMENTS = %w[
+ base
+ link
+ meta
+ hr
+ br
+ img
+ embed
+ param
+ area
+ col
+ input
+ ]
+
+ CDATA_ELEMENTS = %w[title textarea]
+
+ RCDATA_ELEMENTS = %w[
+ style
+ script
+ xmp
+ iframe
+ noembed
+ noframes
+ noscript
+ ]
+
+ BOOLEAN_ATTRIBUTES = {
+ :global => %w[irrelevant],
+ 'style' => %w[scoped],
+ 'img' => %w[ismap],
+ 'audio' => %w[autoplay controls],
+ 'video' => %w[autoplay controls],
+ 'script' => %w[defer async],
+ 'details' => %w[open],
+ 'datagrid' => %w[multiple disabled],
+ 'command' => %w[hidden disabled checked default],
+ 'menu' => %w[autosubmit],
+ 'fieldset' => %w[disabled readonly],
+ 'option' => %w[disabled readonly selected],
+ 'optgroup' => %w[disabled readonly],
+ 'button' => %w[disabled autofocus],
+ 'input' => %w[disabled readonly required autofocus checked ismap],
+ 'select' => %w[disabled readonly autofocus multiple],
+ 'output' => %w[disabled readonly]
+ }
+
+ # entitiesWindows1252 has to be _ordered_ and needs to have an index.
+ ENTITIES_WINDOWS1252 = [
+ 8364, # 0x80 0x20AC EURO SIGN
+ 65533, # 0x81 UNDEFINED
+ 8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK
+ 402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK
+ 8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK
+ 8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS
+ 8224, # 0x86 0x2020 DAGGER
+ 8225, # 0x87 0x2021 DOUBLE DAGGER
+ 710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT
+ 8240, # 0x89 0x2030 PER MILLE SIGN
+ 352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON
+ 8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ 338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE
+ 65533, # 0x8D UNDEFINED
+ 381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON
+ 65533, # 0x8F UNDEFINED
+ 65533, # 0x90 UNDEFINED
+ 8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK
+ 8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK
+ 8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK
+ 8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK
+ 8226, # 0x95 0x2022 BULLET
+ 8211, # 0x96 0x2013 EN DASH
+ 8212, # 0x97 0x2014 EM DASH
+ 732, # 0x98 0x02DC SMALL TILDE
+ 8482, # 0x99 0x2122 TRADE MARK SIGN
+ 353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON
+ 8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ 339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE
+ 65533, # 0x9D UNDEFINED
+ 382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON
+ 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
+ ]
+
+ # ENTITIES was generated from Python using the following code:
+ #
+ # import constants
+ # entities = constants.entities.items()
+ # entities.sort()
+ # list = [ ' '.join([repr(entity), '=>', ord(value)<128 and
+ # repr(str(value)) or repr(value.encode('utf-8')).replace("'",'"')])
+ # for entity, value in entities]
+ # print ' ENTITIES = {\n ' + ',\n '.join(list) + '\n }'
+
+ ENTITIES = {
+ 'AElig' => "\xc3\x86",
+ 'AElig;' => "\xc3\x86",
+ 'AMP' => '&',
+ 'AMP;' => '&',
+ 'Aacute' => "\xc3\x81",
+ 'Aacute;' => "\xc3\x81",
+ 'Acirc' => "\xc3\x82",
+ 'Acirc;' => "\xc3\x82",
+ 'Agrave' => "\xc3\x80",
+ 'Agrave;' => "\xc3\x80",
+ 'Alpha;' => "\xce\x91",
+ 'Aring' => "\xc3\x85",
+ 'Aring;' => "\xc3\x85",
+ 'Atilde' => "\xc3\x83",
+ 'Atilde;' => "\xc3\x83",
+ 'Auml' => "\xc3\x84",
+ 'Auml;' => "\xc3\x84",
+ 'Beta;' => "\xce\x92",
+ 'COPY' => "\xc2\xa9",
+ 'COPY;' => "\xc2\xa9",
+ 'Ccedil' => "\xc3\x87",
+ 'Ccedil;' => "\xc3\x87",
+ 'Chi;' => "\xce\xa7",
+ 'Dagger;' => "\xe2\x80\xa1",
+ 'Delta;' => "\xce\x94",
+ 'ETH' => "\xc3\x90",
+ 'ETH;' => "\xc3\x90",
+ 'Eacute' => "\xc3\x89",
+ 'Eacute;' => "\xc3\x89",
+ 'Ecirc' => "\xc3\x8a",
+ 'Ecirc;' => "\xc3\x8a",
+ 'Egrave' => "\xc3\x88",
+ 'Egrave;' => "\xc3\x88",
+ 'Epsilon;' => "\xce\x95",
+ 'Eta;' => "\xce\x97",
+ 'Euml' => "\xc3\x8b",
+ 'Euml;' => "\xc3\x8b",
+ 'GT' => '>',
+ 'GT;' => '>',
+ 'Gamma;' => "\xce\x93",
+ 'Iacute' => "\xc3\x8d",
+ 'Iacute;' => "\xc3\x8d",
+ 'Icirc' => "\xc3\x8e",
+ 'Icirc;' => "\xc3\x8e",
+ 'Igrave' => "\xc3\x8c",
+ 'Igrave;' => "\xc3\x8c",
+ 'Iota;' => "\xce\x99",
+ 'Iuml' => "\xc3\x8f",
+ 'Iuml;' => "\xc3\x8f",
+ 'Kappa;' => "\xce\x9a",
+ 'LT' => '<',
+ 'LT;' => '<',
+ 'Lambda;' => "\xce\x9b",
+ 'Mu;' => "\xce\x9c",
+ 'Ntilde' => "\xc3\x91",
+ 'Ntilde;' => "\xc3\x91",
+ 'Nu;' => "\xce\x9d",
+ 'OElig;' => "\xc5\x92",
+ 'Oacute' => "\xc3\x93",
+ 'Oacute;' => "\xc3\x93",
+ 'Ocirc' => "\xc3\x94",
+ 'Ocirc;' => "\xc3\x94",
+ 'Ograve' => "\xc3\x92",
+ 'Ograve;' => "\xc3\x92",
+ 'Omega;' => "\xce\xa9",
+ 'Omicron;' => "\xce\x9f",
+ 'Oslash' => "\xc3\x98",
+ 'Oslash;' => "\xc3\x98",
+ 'Otilde' => "\xc3\x95",
+ 'Otilde;' => "\xc3\x95",
+ 'Ouml' => "\xc3\x96",
+ 'Ouml;' => "\xc3\x96",
+ 'Phi;' => "\xce\xa6",
+ 'Pi;' => "\xce\xa0",
+ 'Prime;' => "\xe2\x80\xb3",
+ 'Psi;' => "\xce\xa8",
+ 'QUOT' => '"',
+ 'QUOT;' => '"',
+ 'REG' => "\xc2\xae",
+ 'REG;' => "\xc2\xae",
+ 'Rho;' => "\xce\xa1",
+ 'Scaron;' => "\xc5\xa0",
+ 'Sigma;' => "\xce\xa3",
+ 'THORN' => "\xc3\x9e",
+ 'THORN;' => "\xc3\x9e",
+ 'TRADE;' => "\xe2\x84\xa2",
+ 'Tau;' => "\xce\xa4",
+ 'Theta;' => "\xce\x98",
+ 'Uacute' => "\xc3\x9a",
+ 'Uacute;' => "\xc3\x9a",
+ 'Ucirc' => "\xc3\x9b",
+ 'Ucirc;' => "\xc3\x9b",
+ 'Ugrave' => "\xc3\x99",
+ 'Ugrave;' => "\xc3\x99",
+ 'Upsilon;' => "\xce\xa5",
+ 'Uuml' => "\xc3\x9c",
+ 'Uuml;' => "\xc3\x9c",
+ 'Xi;' => "\xce\x9e",
+ 'Yacute' => "\xc3\x9d",
+ 'Yacute;' => "\xc3\x9d",
+ 'Yuml;' => "\xc5\xb8",
+ 'Zeta;' => "\xce\x96",
+ 'aacute' => "\xc3\xa1",
+ 'aacute;' => "\xc3\xa1",
+ 'acirc' => "\xc3\xa2",
+ 'acirc;' => "\xc3\xa2",
+ 'acute' => "\xc2\xb4",
+ 'acute;' => "\xc2\xb4",
+ 'aelig' => "\xc3\xa6",
+ 'aelig;' => "\xc3\xa6",
+ 'agrave' => "\xc3\xa0",
+ 'agrave;' => "\xc3\xa0",
+ 'alefsym;' => "\xe2\x84\xb5",
+ 'alpha;' => "\xce\xb1",
+ 'amp' => '&',
+ 'amp;' => '&',
+ 'and;' => "\xe2\x88\xa7",
+ 'ang;' => "\xe2\x88\xa0",
+ 'apos;' => "'",
+ 'aring' => "\xc3\xa5",
+ 'aring;' => "\xc3\xa5",
+ 'asymp;' => "\xe2\x89\x88",
+ 'atilde' => "\xc3\xa3",
+ 'atilde;' => "\xc3\xa3",
+ 'auml' => "\xc3\xa4",
+ 'auml;' => "\xc3\xa4",
+ 'bdquo;' => "\xe2\x80\x9e",
+ 'beta;' => "\xce\xb2",
+ 'brvbar' => "\xc2\xa6",
+ 'brvbar;' => "\xc2\xa6",
+ 'bull;' => "\xe2\x80\xa2",
+ 'cap;' => "\xe2\x88\xa9",
+ 'ccedil' => "\xc3\xa7",
+ 'ccedil;' => "\xc3\xa7",
+ 'cedil' => "\xc2\xb8",
+ 'cedil;' => "\xc2\xb8",
+ 'cent' => "\xc2\xa2",
+ 'cent;' => "\xc2\xa2",
+ 'chi;' => "\xcf\x87",
+ 'circ;' => "\xcb\x86",
+ 'clubs;' => "\xe2\x99\xa3",
+ 'cong;' => "\xe2\x89\x85",
+ 'copy' => "\xc2\xa9",
+ 'copy;' => "\xc2\xa9",
+ 'crarr;' => "\xe2\x86\xb5",
+ 'cup;' => "\xe2\x88\xaa",
+ 'curren' => "\xc2\xa4",
+ 'curren;' => "\xc2\xa4",
+ 'dArr;' => "\xe2\x87\x93",
+ 'dagger;' => "\xe2\x80\xa0",
+ 'darr;' => "\xe2\x86\x93",
+ 'deg' => "\xc2\xb0",
+ 'deg;' => "\xc2\xb0",
+ 'delta;' => "\xce\xb4",
+ 'diams;' => "\xe2\x99\xa6",
+ 'divide' => "\xc3\xb7",
+ 'divide;' => "\xc3\xb7",
+ 'eacute' => "\xc3\xa9",
+ 'eacute;' => "\xc3\xa9",
+ 'ecirc' => "\xc3\xaa",
+ 'ecirc;' => "\xc3\xaa",
+ 'egrave' => "\xc3\xa8",
+ 'egrave;' => "\xc3\xa8",
+ 'empty;' => "\xe2\x88\x85",
+ 'emsp;' => "\xe2\x80\x83",
+ 'ensp;' => "\xe2\x80\x82",
+ 'epsilon;' => "\xce\xb5",
+ 'equiv;' => "\xe2\x89\xa1",
+ 'eta;' => "\xce\xb7",
+ 'eth' => "\xc3\xb0",
+ 'eth;' => "\xc3\xb0",
+ 'euml' => "\xc3\xab",
+ 'euml;' => "\xc3\xab",
+ 'euro;' => "\xe2\x82\xac",
+ 'exist;' => "\xe2\x88\x83",
+ 'fnof;' => "\xc6\x92",
+ 'forall;' => "\xe2\x88\x80",
+ 'frac12' => "\xc2\xbd",
+ 'frac12;' => "\xc2\xbd",
+ 'frac14' => "\xc2\xbc",
+ 'frac14;' => "\xc2\xbc",
+ 'frac34' => "\xc2\xbe",
+ 'frac34;' => "\xc2\xbe",
+ 'frasl;' => "\xe2\x81\x84",
+ 'gamma;' => "\xce\xb3",
+ 'ge;' => "\xe2\x89\xa5",
+ 'gt' => '>',
+ 'gt;' => '>',
+ 'hArr;' => "\xe2\x87\x94",
+ 'harr;' => "\xe2\x86\x94",
+ 'hearts;' => "\xe2\x99\xa5",
+ 'hellip;' => "\xe2\x80\xa6",
+ 'iacute' => "\xc3\xad",
+ 'iacute;' => "\xc3\xad",
+ 'icirc' => "\xc3\xae",
+ 'icirc;' => "\xc3\xae",
+ 'iexcl' => "\xc2\xa1",
+ 'iexcl;' => "\xc2\xa1",
+ 'igrave' => "\xc3\xac",
+ 'igrave;' => "\xc3\xac",
+ 'image;' => "\xe2\x84\x91",
+ 'infin;' => "\xe2\x88\x9e",
+ 'int;' => "\xe2\x88\xab",
+ 'iota;' => "\xce\xb9",
+ 'iquest' => "\xc2\xbf",
+ 'iquest;' => "\xc2\xbf",
+ 'isin;' => "\xe2\x88\x88",
+ 'iuml' => "\xc3\xaf",
+ 'iuml;' => "\xc3\xaf",
+ 'kappa;' => "\xce\xba",
+ 'lArr;' => "\xe2\x87\x90",
+ 'lambda;' => "\xce\xbb",
+ 'lang;' => "\xe3\x80\x88",
+ 'laquo' => "\xc2\xab",
+ 'laquo;' => "\xc2\xab",
+ 'larr;' => "\xe2\x86\x90",
+ 'lceil;' => "\xe2\x8c\x88",
+ 'ldquo;' => "\xe2\x80\x9c",
+ 'le;' => "\xe2\x89\xa4",
+ 'lfloor;' => "\xe2\x8c\x8a",
+ 'lowast;' => "\xe2\x88\x97",
+ 'loz;' => "\xe2\x97\x8a",
+ 'lrm;' => "\xe2\x80\x8e",
+ 'lsaquo;' => "\xe2\x80\xb9",
+ 'lsquo;' => "\xe2\x80\x98",
+ 'lt' => '<',
+ 'lt;' => '<',
+ 'macr' => "\xc2\xaf",
+ 'macr;' => "\xc2\xaf",
+ 'mdash;' => "\xe2\x80\x94",
+ 'micro' => "\xc2\xb5",
+ 'micro;' => "\xc2\xb5",
+ 'middot' => "\xc2\xb7",
+ 'middot;' => "\xc2\xb7",
+ 'minus;' => "\xe2\x88\x92",
+ 'mu;' => "\xce\xbc",
+ 'nabla;' => "\xe2\x88\x87",
+ 'nbsp' => "\xc2\xa0",
+ 'nbsp;' => "\xc2\xa0",
+ 'ndash;' => "\xe2\x80\x93",
+ 'ne;' => "\xe2\x89\xa0",
+ 'ni;' => "\xe2\x88\x8b",
+ 'not' => "\xc2\xac",
+ 'not;' => "\xc2\xac",
+ 'notin;' => "\xe2\x88\x89",
+ 'nsub;' => "\xe2\x8a\x84",
+ 'ntilde' => "\xc3\xb1",
+ 'ntilde;' => "\xc3\xb1",
+ 'nu;' => "\xce\xbd",
+ 'oacute' => "\xc3\xb3",
+ 'oacute;' => "\xc3\xb3",
+ 'ocirc' => "\xc3\xb4",
+ 'ocirc;' => "\xc3\xb4",
+ 'oelig;' => "\xc5\x93",
+ 'ograve' => "\xc3\xb2",
+ 'ograve;' => "\xc3\xb2",
+ 'oline;' => "\xe2\x80\xbe",
+ 'omega;' => "\xcf\x89",
+ 'omicron;' => "\xce\xbf",
+ 'oplus;' => "\xe2\x8a\x95",
+ 'or;' => "\xe2\x88\xa8",
+ 'ordf' => "\xc2\xaa",
+ 'ordf;' => "\xc2\xaa",
+ 'ordm' => "\xc2\xba",
+ 'ordm;' => "\xc2\xba",
+ 'oslash' => "\xc3\xb8",
+ 'oslash;' => "\xc3\xb8",
+ 'otilde' => "\xc3\xb5",
+ 'otilde;' => "\xc3\xb5",
+ 'otimes;' => "\xe2\x8a\x97",
+ 'ouml' => "\xc3\xb6",
+ 'ouml;' => "\xc3\xb6",
+ 'para' => "\xc2\xb6",
+ 'para;' => "\xc2\xb6",
+ 'part;' => "\xe2\x88\x82",
+ 'permil;' => "\xe2\x80\xb0",
+ 'perp;' => "\xe2\x8a\xa5",
+ 'phi;' => "\xcf\x86",
+ 'pi;' => "\xcf\x80",
+ 'piv;' => "\xcf\x96",
+ 'plusmn' => "\xc2\xb1",
+ 'plusmn;' => "\xc2\xb1",
+ 'pound' => "\xc2\xa3",
+ 'pound;' => "\xc2\xa3",
+ 'prime;' => "\xe2\x80\xb2",
+ 'prod;' => "\xe2\x88\x8f",
+ 'prop;' => "\xe2\x88\x9d",
+ 'psi;' => "\xcf\x88",
+ 'quot' => '"',
+ 'quot;' => '"',
+ 'rArr;' => "\xe2\x87\x92",
+ 'radic;' => "\xe2\x88\x9a",
+ 'rang;' => "\xe3\x80\x89",
+ 'raquo' => "\xc2\xbb",
+ 'raquo;' => "\xc2\xbb",
+ 'rarr;' => "\xe2\x86\x92",
+ 'rceil;' => "\xe2\x8c\x89",
+ 'rdquo;' => "\xe2\x80\x9d",
+ 'real;' => "\xe2\x84\x9c",
+ 'reg' => "\xc2\xae",
+ 'reg;' => "\xc2\xae",
+ 'rfloor;' => "\xe2\x8c\x8b",
+ 'rho;' => "\xcf\x81",
+ 'rlm;' => "\xe2\x80\x8f",
+ 'rsaquo;' => "\xe2\x80\xba",
+ 'rsquo;' => "\xe2\x80\x99",
+ 'sbquo;' => "\xe2\x80\x9a",
+ 'scaron;' => "\xc5\xa1",
+ 'sdot;' => "\xe2\x8b\x85",
+ 'sect' => "\xc2\xa7",
+ 'sect;' => "\xc2\xa7",
+ 'shy' => "\xc2\xad",
+ 'shy;' => "\xc2\xad",
+ 'sigma;' => "\xcf\x83",
+ 'sigmaf;' => "\xcf\x82",
+ 'sim;' => "\xe2\x88\xbc",
+ 'spades;' => "\xe2\x99\xa0",
+ 'sub;' => "\xe2\x8a\x82",
+ 'sube;' => "\xe2\x8a\x86",
+ 'sum;' => "\xe2\x88\x91",
+ 'sup1' => "\xc2\xb9",
+ 'sup1;' => "\xc2\xb9",
+ 'sup2' => "\xc2\xb2",
+ 'sup2;' => "\xc2\xb2",
+ 'sup3' => "\xc2\xb3",
+ 'sup3;' => "\xc2\xb3",
+ 'sup;' => "\xe2\x8a\x83",
+ 'supe;' => "\xe2\x8a\x87",
+ 'szlig' => "\xc3\x9f",
+ 'szlig;' => "\xc3\x9f",
+ 'tau;' => "\xcf\x84",
+ 'there4;' => "\xe2\x88\xb4",
+ 'theta;' => "\xce\xb8",
+ 'thetasym;' => "\xcf\x91",
+ 'thinsp;' => "\xe2\x80\x89",
+ 'thorn' => "\xc3\xbe",
+ 'thorn;' => "\xc3\xbe",
+ 'tilde;' => "\xcb\x9c",
+ 'times' => "\xc3\x97",
+ 'times;' => "\xc3\x97",
+ 'trade;' => "\xe2\x84\xa2",
+ 'uArr;' => "\xe2\x87\x91",
+ 'uacute' => "\xc3\xba",
+ 'uacute;' => "\xc3\xba",
+ 'uarr;' => "\xe2\x86\x91",
+ 'ucirc' => "\xc3\xbb",
+ 'ucirc;' => "\xc3\xbb",
+ 'ugrave' => "\xc3\xb9",
+ 'ugrave;' => "\xc3\xb9",
+ 'uml' => "\xc2\xa8",
+ 'uml;' => "\xc2\xa8",
+ 'upsih;' => "\xcf\x92",
+ 'upsilon;' => "\xcf\x85",
+ 'uuml' => "\xc3\xbc",
+ 'uuml;' => "\xc3\xbc",
+ 'weierp;' => "\xe2\x84\x98",
+ 'xi;' => "\xce\xbe",
+ 'yacute' => "\xc3\xbd",
+ 'yacute;' => "\xc3\xbd",
+ 'yen' => "\xc2\xa5",
+ 'yen;' => "\xc2\xa5",
+ 'yuml' => "\xc3\xbf",
+ 'yuml;' => "\xc3\xbf",
+ 'zeta;' => "\xce\xb6",
+ 'zwj;' => "\xe2\x80\x8d",
+ 'zwnj;' => "\xe2\x80\x8c"
+ }
+
+ ENCODINGS = %w[
+ ansi_x3.4-1968
+ iso-ir-6
+ ansi_x3.4-1986
+ iso_646.irv:1991
+ ascii
+ iso646-us
+ us-ascii
+ us
+ ibm367
+ cp367
+ csascii
+ ks_c_5601-1987
+ korean
+ iso-2022-kr
+ csiso2022kr
+ euc-kr
+ iso-2022-jp
+ csiso2022jp
+ iso-2022-jp-2
+ iso-ir-58
+ chinese
+ csiso58gb231280
+ iso_8859-1:1987
+ iso-ir-100
+ iso_8859-1
+ iso-8859-1
+ latin1
+ l1
+ ibm819
+ cp819
+ csisolatin1
+ iso_8859-2:1987
+ iso-ir-101
+ iso_8859-2
+ iso-8859-2
+ latin2
+ l2
+ csisolatin2
+ iso_8859-3:1988
+ iso-ir-109
+ iso_8859-3
+ iso-8859-3
+ latin3
+ l3
+ csisolatin3
+ iso_8859-4:1988
+ iso-ir-110
+ iso_8859-4
+ iso-8859-4
+ latin4
+ l4
+ csisolatin4
+ iso_8859-6:1987
+ iso-ir-127
+ iso_8859-6
+ iso-8859-6
+ ecma-114
+ asmo-708
+ arabic
+ csisolatinarabic
+ iso_8859-7:1987
+ iso-ir-126
+ iso_8859-7
+ iso-8859-7
+ elot_928
+ ecma-118
+ greek
+ greek8
+ csisolatingreek
+ iso_8859-8:1988
+ iso-ir-138
+ iso_8859-8
+ iso-8859-8
+ hebrew
+ csisolatinhebrew
+ iso_8859-5:1988
+ iso-ir-144
+ iso_8859-5
+ iso-8859-5
+ cyrillic
+ csisolatincyrillic
+ iso_8859-9:1989
+ iso-ir-148
+ iso_8859-9
+ iso-8859-9
+ latin5
+ l5
+ csisolatin5
+ iso-8859-10
+ iso-ir-157
+ l6
+ iso_8859-10:1992
+ csisolatin6
+ latin6
+ hp-roman8
+ roman8
+ r8
+ ibm037
+ cp037
+ csibm037
+ ibm424
+ cp424
+ csibm424
+ ibm437
+ cp437
+ 437
+ cspc8codepage437
+ ibm500
+ cp500
+ csibm500
+ ibm775
+ cp775
+ cspc775baltic
+ ibm850
+ cp850
+ 850
+ cspc850multilingual
+ ibm852
+ cp852
+ 852
+ cspcp852
+ ibm855
+ cp855
+ 855
+ csibm855
+ ibm857
+ cp857
+ 857
+ csibm857
+ ibm860
+ cp860
+ 860
+ csibm860
+ ibm861
+ cp861
+ 861
+ cp-is
+ csibm861
+ ibm862
+ cp862
+ 862
+ cspc862latinhebrew
+ ibm863
+ cp863
+ 863
+ csibm863
+ ibm864
+ cp864
+ csibm864
+ ibm865
+ cp865
+ 865
+ csibm865
+ ibm866
+ cp866
+ 866
+ csibm866
+ ibm869
+ cp869
+ 869
+ cp-gr
+ csibm869
+ ibm1026
+ cp1026
+ csibm1026
+ koi8-r
+ cskoi8r
+ koi8-u
+ big5-hkscs
+ ptcp154
+ csptcp154
+ pt154
+ cp154
+ utf-7
+ utf-16be
+ utf-16le
+ utf-16
+ utf-8
+ iso-8859-13
+ iso-8859-14
+ iso-ir-199
+ iso_8859-14:1998
+ iso_8859-14
+ latin8
+ iso-celtic
+ l8
+ iso-8859-15
+ iso_8859-15
+ iso-8859-16
+ iso-ir-226
+ iso_8859-16:2001
+ iso_8859-16
+ latin10
+ l10
+ gbk
+ cp936
+ ms936
+ gb18030
+ shift_jis
+ ms_kanji
+ csshiftjis
+ euc-jp
+ gb2312
+ big5
+ csbig5
+ windows-1250
+ windows-1251
+ windows-1252
+ windows-1253
+ windows-1254
+ windows-1255
+ windows-1256
+ windows-1257
+ windows-1258
+ tis-620
+ hz-gb-2312
+ ]
+
+end
diff --git a/vendor/plugins/HTML5lib/lib/html5/filters.rb b/vendor/plugins/HTML5lib/lib/html5/filters.rb
new file mode 100644
index 00000000..74c7f0e0
--- /dev/null
+++ b/vendor/plugins/HTML5lib/lib/html5/filters.rb
@@ -0,0 +1 @@
+require 'html5/filters/optionaltags'
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/filters/base.rb b/vendor/plugins/HTML5lib/lib/html5/filters/base.rb
similarity index 89%
rename from vendor/plugins/HTML5lib/lib/html5lib/filters/base.rb
rename to vendor/plugins/HTML5lib/lib/html5/filters/base.rb
index c1a5c660..0cb023d2 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters/base.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/filters/base.rb
@@ -1,7 +1,7 @@
require 'delegate'
require 'enumerator'
-module HTML5lib
+module HTML5
module Filters
class Base < SimpleDelegator
include Enumerable
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/filters/inject_meta_charset.rb b/vendor/plugins/HTML5lib/lib/html5/filters/inject_meta_charset.rb
similarity index 98%
rename from vendor/plugins/HTML5lib/lib/html5lib/filters/inject_meta_charset.rb
rename to vendor/plugins/HTML5lib/lib/html5/filters/inject_meta_charset.rb
index 00dc980d..b2cf93a5 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters/inject_meta_charset.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/filters/inject_meta_charset.rb
@@ -1,6 +1,6 @@
-require 'html5lib/filters/base'
+require 'html5/filters/base'
-module HTML5lib
+module HTML5
module Filters
class InjectMetaCharset < Base
def initialize(source, encoding)
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/filters/optionaltags.rb b/vendor/plugins/HTML5lib/lib/html5/filters/optionaltags.rb
similarity index 99%
rename from vendor/plugins/HTML5lib/lib/html5lib/filters/optionaltags.rb
rename to vendor/plugins/HTML5lib/lib/html5/filters/optionaltags.rb
index aacf3b73..37d2e29b 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters/optionaltags.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/filters/optionaltags.rb
@@ -1,7 +1,7 @@
-require 'html5lib/constants'
-require 'html5lib/filters/base'
+require 'html5/constants'
+require 'html5/filters/base'
-module HTML5lib
+module HTML5
module Filters
class OptionalTagFilter < Base
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/filters/sanitizer.rb b/vendor/plugins/HTML5lib/lib/html5/filters/sanitizer.rb
similarity index 73%
rename from vendor/plugins/HTML5lib/lib/html5lib/filters/sanitizer.rb
rename to vendor/plugins/HTML5lib/lib/html5/filters/sanitizer.rb
index db9a12e0..8e25f594 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters/sanitizer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/filters/sanitizer.rb
@@ -1,7 +1,7 @@
-require 'html5lib/filters/base'
-require 'html5lib/sanitizer'
+require 'html5/filters/base'
+require 'html5/sanitizer'
-module HTML5lib
+module HTML5
module Filters
class HTMLSanitizeFilter < Base
include HTMLSanitizeModule
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/filters/whitespace.rb b/vendor/plugins/HTML5lib/lib/html5/filters/whitespace.rb
similarity index 90%
rename from vendor/plugins/HTML5lib/lib/html5lib/filters/whitespace.rb
rename to vendor/plugins/HTML5lib/lib/html5/filters/whitespace.rb
index 3b85fd7b..b1d17190 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters/whitespace.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/filters/whitespace.rb
@@ -1,7 +1,7 @@
-require 'html5lib/constants'
-require 'html5lib/filters/base'
+require 'html5/constants'
+require 'html5/filters/base'
-module HTML5lib
+module HTML5
module Filters
class WhitespaceFilter < Base
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser.rb
similarity index 93%
rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser.rb
rename to vendor/plugins/HTML5lib/lib/html5/html5parser.rb
index bf48930a..b755bee1 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/html5parser.rb
@@ -1,246 +1,246 @@
-require 'html5lib/constants'
-require 'html5lib/tokenizer'
-require 'html5lib/treebuilders/rexml'
-
-Dir.glob(File.join(File.dirname(__FILE__), 'html5parser', '*_phase.rb')).each do |path|
- require 'html5lib/html5parser/' + File.basename(path)
-end
-
-module HTML5lib
-
- # Error in parsed document
- class ParseError < Exception; end
- class AssertionError < Exception; end
-
- # HTML parser. Generates a tree structure from a stream of (possibly malformed) HTML
- #
- class HTMLParser
-
- attr_accessor :phase, :firstStartTag, :innerHTML, :lastPhase, :insertFromTable
-
- attr_reader :phases, :tokenizer, :tree, :errors
-
- def self.parse(stream, options = {})
- encoding = options.delete(:encoding)
- new(options).parse(stream,encoding)
- end
-
- def self.parseFragment(stream, options = {})
- container = options.delete(:container) || 'div'
- encoding = options.delete(:encoding)
- new(options).parseFragment(stream,container,encoding)
- end
-
- @@phases = %w( initial rootElement beforeHead inHead afterHead inBody inTable inCaption
- inColumnGroup inTableBody inRow inCell inSelect afterBody inFrameset afterFrameset trailingEnd )
-
- # :strict - raise an exception when a parse error is encountered
- # :tree - a treebuilder class controlling the type of tree that will be
- # returned. Built in treebuilders can be accessed through
- # HTML5lib::TreeBuilders[treeType]
- def initialize(options = {})
- @strict = false
- @errors = []
-
- @tokenizer = HTMLTokenizer
- @tree = TreeBuilders::REXML::TreeBuilder
-
- options.each { |name, value| instance_variable_set("@#{name}", value) }
-
- @tree = @tree.new
-
- @phases = @@phases.inject({}) do |phases, phase_name|
- phase_class_name = phase_name.sub(/(.)/) { $1.upcase } + 'Phase'
- phases[phase_name.to_sym] = HTML5lib.const_get(phase_class_name).new(self, @tree)
- phases
- end
- end
-
- def _parse(stream, innerHTML, encoding, container = 'div')
- @tree.reset
- @firstStartTag = false
- @errors = []
-
- @tokenizer = @tokenizer.class unless Class === @tokenizer
- @tokenizer = @tokenizer.new(stream, :encoding => encoding,
- :parseMeta => !innerHTML)
-
- if innerHTML
- case @innerHTML = container.downcase
- when 'title', 'textarea'
- @tokenizer.contentModelFlag = :RCDATA
- when 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript'
- @tokenizer.contentModelFlag = :CDATA
- when 'plaintext'
- @tokenizer.contentModelFlag = :PLAINTEXT
- else
- # contentModelFlag already is PCDATA
- #@tokenizer.contentModelFlag = :PCDATA
- end
-
- @phase = @phases[:rootElement]
- @phase.insertHtmlElement
- resetInsertionMode
- else
- @innerHTML = false
- @phase = @phases[:initial]
- end
-
- # We only seem to have InBodyPhase testcases where the following is
- # relevant ... need others too
- @lastPhase = nil
-
- # XXX This is temporary for the moment so there isn't any other
- # changes needed for the parser to work with the iterable tokenizer
- @tokenizer.each do |token|
- token = normalizeToken(token)
-
- method = 'process%s' % token[:type]
-
- case token[:type]
- when :Characters, :SpaceCharacters, :Comment
- @phase.send method, token[:data]
- when :StartTag
- @phase.send method, token[:name], token[:data]
- when :EndTag
- @phase.send method, token[:name]
- when :Doctype
- @phase.send method, token[:name], token[:publicId],
- token[:systemId], token[:correct]
- else
- parseError(token[:data])
- end
- end
-
- # When the loop finishes it's EOF
- @phase.processEOF
- end
-
- # Parse a HTML document into a well-formed tree
- #
- # stream - a filelike object or string containing the HTML to be parsed
- #
- # The optional encoding parameter must be a string that indicates
- # the encoding. If specified, that encoding will be used,
- # regardless of any BOM or later declaration (such as in a meta
- # element)
- def parse(stream, encoding=nil)
- _parse(stream, false, encoding)
- return @tree.getDocument
- end
-
- # Parse a HTML fragment into a well-formed tree fragment
-
- # container - name of the element we're setting the innerHTML property
- # if set to nil, default to 'div'
- #
- # stream - a filelike object or string containing the HTML to be parsed
- #
- # The optional encoding parameter must be a string that indicates
- # the encoding. If specified, that encoding will be used,
- # regardless of any BOM or later declaration (such as in a meta
- # element)
- def parseFragment(stream, container='div', encoding=nil)
- _parse(stream, true, encoding, container)
- return @tree.getFragment
- end
-
- def parseError(data = 'XXX ERROR MESSAGE NEEDED')
- # XXX The idea is to make data mandatory.
- @errors.push([@tokenizer.stream.position, data])
- raise ParseError if @strict
- end
-
- # HTML5 specific normalizations to the token stream
- def normalizeToken(token)
-
- if token[:type] == :EmptyTag
- # When a solidus (/) is encountered within a tag name what happens
- # depends on whether the current tag name matches that of a void
- # element. If it matches a void element atheists did the wrong
- # thing and if it doesn't it's wrong for everyone.
-
- unless VOID_ELEMENTS.include?(token[:name])
- parseError(_('Solidus (/) incorrectly placed in tag.'))
- end
-
- token[:type] = :StartTag
- end
-
- if token[:type] == :StartTag
- token[:name] = token[:name].tr(ASCII_UPPERCASE,ASCII_LOWERCASE)
-
- # We need to remove the duplicate attributes and convert attributes
- # to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
-
- unless token[:data].empty?
- data = token[:data].reverse.map { |attr, value| [attr.tr(ASCII_UPPERCASE, ASCII_LOWERCASE), value] }
- token[:data] = Hash[*data.flatten]
- end
-
- elsif token[:type] == :EndTag
- parseError(_('End tag contains unexpected attributes.')) unless token[:data].empty?
- token[:name] = token[:name].downcase
- end
-
- return token
- end
-
- @@new_modes = {
- 'select' => :inSelect,
- 'td' => :inCell,
- 'th' => :inCell,
- 'tr' => :inRow,
- 'tbody' => :inTableBody,
- 'thead' => :inTableBody,
- 'tfoot' => :inTableBody,
- 'caption' => :inCaption,
- 'colgroup' => :inColumnGroup,
- 'table' => :inTable,
- 'head' => :inBody,
- 'body' => :inBody,
- 'frameset' => :inFrameset
- }
-
- def resetInsertionMode
- # The name of this method is mostly historical. (It's also used in the
- # specification.)
- last = false
-
- @tree.openElements.reverse.each do |node|
- nodeName = node.name
-
- if node == @tree.openElements[0]
- last = true
- unless ['td', 'th'].include?(nodeName)
- # XXX
- # assert @innerHTML
- nodeName = @innerHTML
- end
- end
-
- # Check for conditions that should only happen in the innerHTML
- # case
- if ['select', 'colgroup', 'head', 'frameset'].include?(nodeName)
- # XXX
- # assert @innerHTML
- end
-
- if @@new_modes.has_key?(nodeName)
- @phase = @phases[@@new_modes[nodeName]]
- elsif nodeName == 'html'
- @phase = @phases[@tree.headPointer.nil?? :beforeHead : :afterHead]
- elsif last
- @phase = @phases[:inBody]
- else
- next
- end
-
- break
- end
- end
-
- def _(string); string; end
- end
-
-end
+require 'html5/constants'
+require 'html5/tokenizer'
+require 'html5/treebuilders/rexml'
+
+Dir.glob(File.join(File.dirname(__FILE__), 'html5parser', '*_phase.rb')).each do |path|
+ require 'html5/html5parser/' + File.basename(path)
+end
+
+module HTML5
+
+ # Error in parsed document
+ class ParseError < Exception; end
+ class AssertionError < Exception; end
+
+ # HTML parser. Generates a tree structure from a stream of (possibly malformed) HTML
+ #
+ class HTMLParser
+
+ attr_accessor :phase, :firstStartTag, :innerHTML, :lastPhase, :insertFromTable
+
+ attr_reader :phases, :tokenizer, :tree, :errors
+
+ def self.parse(stream, options = {})
+ encoding = options.delete(:encoding)
+ new(options).parse(stream,encoding)
+ end
+
+ def self.parseFragment(stream, options = {})
+ container = options.delete(:container) || 'div'
+ encoding = options.delete(:encoding)
+ new(options).parseFragment(stream,container,encoding)
+ end
+
+ @@phases = %w( initial rootElement beforeHead inHead afterHead inBody inTable inCaption
+ inColumnGroup inTableBody inRow inCell inSelect afterBody inFrameset afterFrameset trailingEnd )
+
+ # :strict - raise an exception when a parse error is encountered
+ # :tree - a treebuilder class controlling the type of tree that will be
+ # returned. Built in treebuilders can be accessed through
+ # HTML5::TreeBuilders[treeType]
+ def initialize(options = {})
+ @strict = false
+ @errors = []
+
+ @tokenizer = HTMLTokenizer
+ @tree = TreeBuilders::REXML::TreeBuilder
+
+ options.each { |name, value| instance_variable_set("@#{name}", value) }
+
+ @tree = @tree.new
+
+ @phases = @@phases.inject({}) do |phases, phase_name|
+ phase_class_name = phase_name.sub(/(.)/) { $1.upcase } + 'Phase'
+ phases[phase_name.to_sym] = HTML5.const_get(phase_class_name).new(self, @tree)
+ phases
+ end
+ end
+
+ def _parse(stream, innerHTML, encoding, container = 'div')
+ @tree.reset
+ @firstStartTag = false
+ @errors = []
+
+ @tokenizer = @tokenizer.class unless Class === @tokenizer
+ @tokenizer = @tokenizer.new(stream, :encoding => encoding,
+ :parseMeta => !innerHTML)
+
+ if innerHTML
+ case @innerHTML = container.downcase
+ when 'title', 'textarea'
+ @tokenizer.contentModelFlag = :RCDATA
+ when 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript'
+ @tokenizer.contentModelFlag = :CDATA
+ when 'plaintext'
+ @tokenizer.contentModelFlag = :PLAINTEXT
+ else
+ # contentModelFlag already is PCDATA
+ #@tokenizer.contentModelFlag = :PCDATA
+ end
+
+ @phase = @phases[:rootElement]
+ @phase.insertHtmlElement
+ resetInsertionMode
+ else
+ @innerHTML = false
+ @phase = @phases[:initial]
+ end
+
+ # We only seem to have InBodyPhase testcases where the following is
+ # relevant ... need others too
+ @lastPhase = nil
+
+ # XXX This is temporary for the moment so there isn't any other
+ # changes needed for the parser to work with the iterable tokenizer
+ @tokenizer.each do |token|
+ token = normalizeToken(token)
+
+ method = 'process%s' % token[:type]
+
+ case token[:type]
+ when :Characters, :SpaceCharacters, :Comment
+ @phase.send method, token[:data]
+ when :StartTag
+ @phase.send method, token[:name], token[:data]
+ when :EndTag
+ @phase.send method, token[:name]
+ when :Doctype
+ @phase.send method, token[:name], token[:publicId],
+ token[:systemId], token[:correct]
+ else
+ parseError(token[:data])
+ end
+ end
+
+ # When the loop finishes it's EOF
+ @phase.processEOF
+ end
+
+ # Parse a HTML document into a well-formed tree
+ #
+ # stream - a filelike object or string containing the HTML to be parsed
+ #
+ # The optional encoding parameter must be a string that indicates
+ # the encoding. If specified, that encoding will be used,
+ # regardless of any BOM or later declaration (such as in a meta
+ # element)
+ def parse(stream, encoding=nil)
+ _parse(stream, false, encoding)
+ return @tree.getDocument
+ end
+
+ # Parse a HTML fragment into a well-formed tree fragment
+
+ # container - name of the element we're setting the innerHTML property
+ # if set to nil, default to 'div'
+ #
+ # stream - a filelike object or string containing the HTML to be parsed
+ #
+ # The optional encoding parameter must be a string that indicates
+ # the encoding. If specified, that encoding will be used,
+ # regardless of any BOM or later declaration (such as in a meta
+ # element)
+ def parseFragment(stream, container='div', encoding=nil)
+ _parse(stream, true, encoding, container)
+ return @tree.getFragment
+ end
+
+ def parseError(data = 'XXX ERROR MESSAGE NEEDED')
+ # XXX The idea is to make data mandatory.
+ @errors.push([@tokenizer.stream.position, data])
+ raise ParseError if @strict
+ end
+
+ # HTML5 specific normalizations to the token stream
+ def normalizeToken(token)
+
+ if token[:type] == :EmptyTag
+ # When a solidus (/) is encountered within a tag name what happens
+ # depends on whether the current tag name matches that of a void
+ # element. If it matches a void element atheists did the wrong
+ # thing and if it doesn't it's wrong for everyone.
+
+ unless VOID_ELEMENTS.include?(token[:name])
+ parseError(_('Solidus (/) incorrectly placed in tag.'))
+ end
+
+ token[:type] = :StartTag
+ end
+
+ if token[:type] == :StartTag
+ token[:name] = token[:name].tr(ASCII_UPPERCASE,ASCII_LOWERCASE)
+
+ # We need to remove the duplicate attributes and convert attributes
+ # to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
+
+ unless token[:data].empty?
+ data = token[:data].reverse.map { |attr, value| [attr.tr(ASCII_UPPERCASE, ASCII_LOWERCASE), value] }
+ token[:data] = Hash[*data.flatten]
+ end
+
+ elsif token[:type] == :EndTag
+ parseError(_('End tag contains unexpected attributes.')) unless token[:data].empty?
+ token[:name] = token[:name].downcase
+ end
+
+ return token
+ end
+
+ @@new_modes = {
+ 'select' => :inSelect,
+ 'td' => :inCell,
+ 'th' => :inCell,
+ 'tr' => :inRow,
+ 'tbody' => :inTableBody,
+ 'thead' => :inTableBody,
+ 'tfoot' => :inTableBody,
+ 'caption' => :inCaption,
+ 'colgroup' => :inColumnGroup,
+ 'table' => :inTable,
+ 'head' => :inBody,
+ 'body' => :inBody,
+ 'frameset' => :inFrameset
+ }
+
+ def resetInsertionMode
+ # The name of this method is mostly historical. (It's also used in the
+ # specification.)
+ last = false
+
+ @tree.openElements.reverse.each do |node|
+ nodeName = node.name
+
+ if node == @tree.openElements[0]
+ last = true
+ unless ['td', 'th'].include?(nodeName)
+ # XXX
+ # assert @innerHTML
+ nodeName = @innerHTML
+ end
+ end
+
+ # Check for conditions that should only happen in the innerHTML
+ # case
+ if ['select', 'colgroup', 'head', 'frameset'].include?(nodeName)
+ # XXX
+ # assert @innerHTML
+ end
+
+ if @@new_modes.has_key?(nodeName)
+ @phase = @phases[@@new_modes[nodeName]]
+ elsif nodeName == 'html'
+ @phase = @phases[@tree.headPointer.nil?? :beforeHead : :afterHead]
+ elsif last
+ @phase = @phases[:inBody]
+ else
+ next
+ end
+
+ break
+ end
+ end
+
+ def _(string); string; end
+ end
+
+end
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_body_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/after_body_phase.rb
similarity index 96%
rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_body_phase.rb
rename to vendor/plugins/HTML5lib/lib/html5/html5parser/after_body_phase.rb
index 27778ef1..b68a0af2 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_body_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/after_body_phase.rb
@@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'
-module HTML5lib
+module HTML5
class AfterBodyPhase < Phase
handle_end 'html'
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_frameset_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/after_frameset_phase.rb
similarity index 94%
rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_frameset_phase.rb
rename to vendor/plugins/HTML5lib/lib/html5/html5parser/after_frameset_phase.rb
index 376c5f38..7c97bf43 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_frameset_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/after_frameset_phase.rb
@@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'
-module HTML5lib
+module HTML5
class AfterFramesetPhase < Phase
# http://www.whatwg.org/specs/web-apps/current-work/#after3
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_head_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/after_head_phase.rb
similarity index 95%
rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_head_phase.rb
rename to vendor/plugins/HTML5lib/lib/html5/html5parser/after_head_phase.rb
index 37c8bf6b..082219e1 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_head_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/after_head_phase.rb
@@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'
-module HTML5lib
+module HTML5
class AfterHeadPhase < Phase
handle_start 'html', 'body', 'frameset', %w( base link meta script style title ) => 'FromHead'
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/before_head_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/before_head_phase.rb
similarity index 88%
rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/before_head_phase.rb
rename to vendor/plugins/HTML5lib/lib/html5/html5parser/before_head_phase.rb
index 98a9d023..6452dd02 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/before_head_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/before_head_phase.rb
@@ -1,11 +1,11 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'
-module HTML5lib
+module HTML5
class BeforeHeadPhase < Phase
handle_start 'html', 'head'
- handle_end %w( html head body br ) => 'ImplyHead'
+ handle_end %w( html head body br p ) => 'ImplyHead'
def processEOF
startTagHead('head', {})
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_body_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_body_phase.rb
similarity index 94%
rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_body_phase.rb
rename to vendor/plugins/HTML5lib/lib/html5/html5parser/in_body_phase.rb
index 57720292..306efb05 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_body_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_body_phase.rb
@@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'
-module HTML5lib
+module HTML5
class InBodyPhase < Phase
# http://www.whatwg.org/specs/web-apps/current-work/#in-body
@@ -112,7 +112,7 @@ module HTML5lib
def startTagForm(name, attributes)
if @tree.formPointer
- @parser.parseError('Unexpected start tag (form). Ignored.')
+ @parser.parseError(_('Unexpected start tag (form). Ignored.'))
else
endTagP('p') if in_scope?('p')
@tree.insertElement(name, attributes)
@@ -129,9 +129,9 @@ module HTML5lib
if stopName.include?(node.name)
poppedNodes = (0..i).collect { @tree.openElements.pop }
if i >= 1
- @parser.parseError("Missing end tag%s (%s)" % [
+ @parser.parseError(_("Missing end tag%s (%s)" % [
(i>1 ? 's' : ''),
- poppedNodes.reverse.map {|item| item.name}.join(', ')])
+ poppedNodes.reverse.map {|item| item.name}.join(', ')]))
end
break
end
@@ -251,7 +251,7 @@ module HTML5lib
end
def startTagIsindex(name, attributes)
- @parser.parseError("Unexpected start tag isindex. Don't use it!")
+ @parser.parseError(_("Unexpected start tag isindex. Don't use it!"))
return if @tree.formPointer
processStartTag('form', {})
processStartTag('hr', {})
@@ -311,8 +311,13 @@ module HTML5lib
def endTagP(name)
@tree.generateImpliedEndTags('p') if in_scope?('p')
- @parser.parseError('Unexpected end tag (p).') unless @tree.openElements[-1].name == 'p'
- @tree.openElements.pop while in_scope?('p')
+ @parser.parseError(_('Unexpected end tag (p).')) unless @tree.openElements[-1].name == 'p'
+ if in_scope?('p')
+ @tree.openElements.pop while in_scope?('p')
+ else
+ startTagCloseP('p', {})
+ endTagP('p')
+ end
end
def endTagBody(name)
@@ -342,7 +347,7 @@ module HTML5lib
@tree.generateImpliedEndTags if in_scope?(name)
unless @tree.openElements[-1].name == name
- @parser.parseError(("End tag (#{name}) seen too early. Expected other end tag."))
+ @parser.parseError(_("End tag (#{name}) seen too early. Expected other end tag."))
end
if in_scope?(name)
@@ -351,7 +356,14 @@ module HTML5lib
end
def endTagForm(name)
- endTagBlock(name)
+ if in_scope?(name)
+ @tree.generateImpliedEndTags
+ end
+ if @tree.openElements[-1].name != name
+ @parser.parseError(_("End tag (form) seen too early. Ignored."))
+ else
+ @tree.openElements.pop
+ end
@tree.formPointer = nil
end
@@ -361,7 +373,7 @@ module HTML5lib
@tree.generateImpliedEndTags(name)
unless @tree.openElements[-1].name == name
- @parser.parseError(("End tag (#{name}) seen too early. Expected other end tag."))
+ @parser.parseError(_("End tag (#{name}) seen too early. Expected other end tag."))
end
end
@@ -377,7 +389,7 @@ module HTML5lib
end
unless @tree.openElements[-1].name == name
- @parser.parseError(("Unexpected end tag (#{name}). Expected other end tag."))
+ @parser.parseError(_("Unexpected end tag (#{name}). Expected other end tag."))
end
HEADING_ELEMENTS.each do |element|
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_caption_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_caption_phase.rb
similarity index 97%
rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_caption_phase.rb
rename to vendor/plugins/HTML5lib/lib/html5/html5parser/in_caption_phase.rb
index ccdfcb91..bbafdcd8 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_caption_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_caption_phase.rb
@@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'
-module HTML5lib
+module HTML5
class InCaptionPhase < Phase
# http://www.whatwg.org/specs/web-apps/current-work/#in-caption
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_cell_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_cell_phase.rb
similarity index 97%
rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_cell_phase.rb
rename to vendor/plugins/HTML5lib/lib/html5/html5parser/in_cell_phase.rb
index 5b88a30b..24fdf28e 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_cell_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_cell_phase.rb
@@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'
-module HTML5lib
+module HTML5
class InCellPhase < Phase
# http://www.whatwg.org/specs/web-apps/current-work/#in-cell
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_column_group_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_column_group_phase.rb
similarity index 96%
rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_column_group_phase.rb
rename to vendor/plugins/HTML5lib/lib/html5/html5parser/in_column_group_phase.rb
index 7729eb83..e257bb17 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_column_group_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_column_group_phase.rb
@@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'
-module HTML5lib
+module HTML5
class InColumnGroupPhase < Phase
# http://www.whatwg.org/specs/web-apps/current-work/#in-column
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_frameset_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_frameset_phase.rb
similarity index 96%
rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_frameset_phase.rb
rename to vendor/plugins/HTML5lib/lib/html5/html5parser/in_frameset_phase.rb
index d6c7400c..0a9b4b29 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_frameset_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_frameset_phase.rb
@@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'
-module HTML5lib
+module HTML5
class InFramesetPhase < Phase
# http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_head_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_head_phase.rb
similarity index 96%
rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_head_phase.rb
rename to vendor/plugins/HTML5lib/lib/html5/html5parser/in_head_phase.rb
index 20b37653..d16205f1 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_head_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_head_phase.rb
@@ -1,12 +1,12 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'
-module HTML5lib
+module HTML5
class InHeadPhase < Phase
handle_start 'html', 'head', 'title', 'style', 'script', %w( base link meta )
handle_end 'head'
- handle_end %w( html body br ) => 'ImplyAfterHead'
+ handle_end %w( html body br p ) => 'ImplyAfterHead'
handle_end %w( title style script )
def processEOF
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_row_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_row_phase.rb
similarity index 97%
rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_row_phase.rb
rename to vendor/plugins/HTML5lib/lib/html5/html5parser/in_row_phase.rb
index b3ffa3f0..b8e4640a 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_row_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_row_phase.rb
@@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'
-module HTML5lib
+module HTML5
class InRowPhase < Phase
# http://www.whatwg.org/specs/web-apps/current-work/#in-row
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_select_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_select_phase.rb
similarity index 97%
rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_select_phase.rb
rename to vendor/plugins/HTML5lib/lib/html5/html5parser/in_select_phase.rb
index 850b8f9f..8c54996f 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_select_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_select_phase.rb
@@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'
-module HTML5lib
+module HTML5
class InSelectPhase < Phase
# http://www.whatwg.org/specs/web-apps/current-work/#in-select
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_body_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_table_body_phase.rb
similarity index 97%
rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_body_phase.rb
rename to vendor/plugins/HTML5lib/lib/html5/html5parser/in_table_body_phase.rb
index 79448216..6e998dab 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_body_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_table_body_phase.rb
@@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'
-module HTML5lib
+module HTML5
class InTableBodyPhase < Phase
# http://www.whatwg.org/specs/web-apps/current-work/#in-table0
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_table_phase.rb
similarity index 98%
rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_phase.rb
rename to vendor/plugins/HTML5lib/lib/html5/html5parser/in_table_phase.rb
index be38c53e..9adaf2ad 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/in_table_phase.rb
@@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'
-module HTML5lib
+module HTML5
class InTablePhase < Phase
# http://www.whatwg.org/specs/web-apps/current-work/#in-table
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/initial_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/initial_phase.rb
similarity index 99%
rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/initial_phase.rb
rename to vendor/plugins/HTML5lib/lib/html5/html5parser/initial_phase.rb
index aeb0afdd..392a69cd 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/initial_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/initial_phase.rb
@@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'
-module HTML5lib
+module HTML5
class InitialPhase < Phase
# This phase deals with error handling as well which is currently not
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/phase.rb
similarity index 99%
rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/phase.rb
rename to vendor/plugins/HTML5lib/lib/html5/html5parser/phase.rb
index d451eb37..b4bd11e1 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/phase.rb
@@ -1,4 +1,4 @@
-module HTML5lib
+module HTML5
# Base class for helper objects that implement each phase of processing.
#
# Handler methods should be in the following order (they can be omitted):
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/root_element_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/root_element_phase.rb
similarity index 94%
rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/root_element_phase.rb
rename to vendor/plugins/HTML5lib/lib/html5/html5parser/root_element_phase.rb
index 7a4b67c8..437f8812 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/root_element_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/root_element_phase.rb
@@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'
-module HTML5lib
+module HTML5
class RootElementPhase < Phase
def processEOF
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/trailing_end_phase.rb b/vendor/plugins/HTML5lib/lib/html5/html5parser/trailing_end_phase.rb
similarity index 94%
rename from vendor/plugins/HTML5lib/lib/html5lib/html5parser/trailing_end_phase.rb
rename to vendor/plugins/HTML5lib/lib/html5/html5parser/trailing_end_phase.rb
index f8f8d33a..74b1dda9 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/html5parser/trailing_end_phase.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/html5parser/trailing_end_phase.rb
@@ -1,6 +1,6 @@
-require 'html5lib/html5parser/phase'
+require 'html5/html5parser/phase'
-module HTML5lib
+module HTML5
class TrailingEndPhase < Phase
def processEOF
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/inputstream.rb b/vendor/plugins/HTML5lib/lib/html5/inputstream.rb
similarity index 90%
rename from vendor/plugins/HTML5lib/lib/html5lib/inputstream.rb
rename to vendor/plugins/HTML5lib/lib/html5/inputstream.rb
index 3abb5b67..94368d00 100755
--- a/vendor/plugins/HTML5lib/lib/html5lib/inputstream.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/inputstream.rb
@@ -1,7 +1,7 @@
require 'stringio'
-require 'html5lib/constants'
+require 'html5/constants'
-module HTML5lib
+module HTML5
# Provides a unicode stream of characters to the HTMLTokenizer.
@@ -10,7 +10,7 @@ module HTML5lib
class HTMLInputStream
- attr_accessor :queue, :char_encoding
+ attr_accessor :queue, :char_encoding, :errors
# Initialises the HTMLInputStream.
#
@@ -40,25 +40,31 @@ module HTML5lib
#Number of bytes to use when looking for a meta element with
#encoding information
@NUM_BYTES_META = 512
+ #Number of bytes to use when using detecting encoding using chardet
+ @NUM_BYTES_CHARDET = 256
+ #Number of bytes to use when reading content
+ @NUM_BYTES_BUFFER = 1024
+
#Encoding to use if no other information can be found
@DEFAULT_ENCODING = 'windows-1252'
#Detect encoding iff no explicit "transport level" encoding is supplied
- if @encoding.nil? or not HTML5lib.is_valid_encoding(@encoding)
+ if @encoding.nil? or not HTML5.is_valid_encoding(@encoding)
@char_encoding = detect_encoding
else
@char_encoding = @encoding
end
# Read bytes from stream decoding them into Unicode
- uString = @raw_stream.read
+ @buffer = @raw_stream.read(@NUM_BYTES_BUFFER) || ''
if @char_encoding == 'windows-1252'
@win1252 = true
elsif @char_encoding != 'utf-8'
begin
require 'iconv'
begin
- uString = Iconv.iconv('utf-8', @char_encoding, uString).first
+ @buffer << @raw_stream.read unless @raw_stream.eof?
+ @buffer = Iconv.iconv('utf-8', @char_encoding, @buffer).first
rescue
@win1252 = true
end
@@ -67,10 +73,8 @@ module HTML5lib
end
end
- # Convert the unicode string into a list to be used as the data stream
- @data_stream = uString
-
@queue = []
+ @errors = []
# Reset position in the list to read from
@tell = 0
@@ -109,9 +113,22 @@ module HTML5lib
begin
require 'rubygems'
require 'UniversalDetector' # gem install chardet
- buffer = @raw_stream.read
- encoding = UniversalDetector::chardet(buffer)['encoding']
- seek(buffer, 0)
+ buffers = []
+ detector = UniversalDetector::Detector.instance
+ detector.reset
+ until @raw_stream.eof?
+ buffer = @raw_stream.read(@NUM_BYTES_CHARDET)
+ break if !buffer or buffer.empty?
+ buffers << buffer
+ detector.feed(buffer)
+ break if detector.instance_eval {@done}
+ detector.instance_eval {
+ @_mLastChar = @_mLastChar.chr if Fixnum === @_mLastChar
+ }
+ end
+ detector.close
+ encoding = detector.result['encoding']
+ seek(buffers*'', 0)
rescue LoadError
end
end
@@ -242,14 +259,20 @@ module HTML5lib
unless @queue.empty?
return @queue.shift
else
- c = @data_stream[@tell]
+ if @tell + 3 > @buffer.length and !@raw_stream.eof?
+ # read next block
+ @buffer = @buffer[@tell .. -1] + @raw_stream.read(@NUM_BYTES_BUFFER)
+ @tell = 0
+ end
+
+ c = @buffer[@tell]
@tell += 1
case c
when 0x01 .. 0x7F
if c == 0x0D
# normalize newlines
- @tell += 1 if @data_stream[@tell] == 0x0A
+ @tell += 1 if @buffer[@tell] == 0x0A
c = 0x0A
end
@@ -276,7 +299,7 @@ module HTML5lib
when 0xC0 .. 0xFF
if @win1252
"\xC3" + (c-64).chr # convert to utf-8
- elsif @data_stream[@tell-1 .. -1] =~ /^
+ elsif @buffer[@tell-1 .. @tell+3] =~ /^
( [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
| [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
@@ -292,6 +315,8 @@ module HTML5lib
end
when 0x00
+ @errors.push('null character found in input stream, ' +
+ 'replaced with U+FFFD')
[0xFFFD].pack('U') # null characters are invalid
else
@@ -317,6 +342,10 @@ module HTML5lib
@queue.insert(0, c) unless c == :EOF
return char_stack.join('')
end
+
+ def unget(characters)
+ @queue.unshift(*characters.to_a) unless characters == :EOF
+ end
end
# String-like object with an assosiated position and various extra methods
@@ -433,14 +462,14 @@ module HTML5lib
if attr[0] == 'charset'
tentative_encoding = attr[1]
- if HTML5lib.is_valid_encoding(tentative_encoding)
+ if HTML5.is_valid_encoding(tentative_encoding)
@encoding = tentative_encoding
return false
end
elsif attr[0] == 'content'
content_parser = ContentAttrParser.new(EncodingBytes.new(attr[1]))
tentative_encoding = content_parser.parse
- if HTML5lib.is_valid_encoding(tentative_encoding)
+ if HTML5.is_valid_encoding(tentative_encoding)
@encoding = tentative_encoding
return false
end
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/liberalxmlparser.rb b/vendor/plugins/HTML5lib/lib/html5/liberalxmlparser.rb
similarity index 87%
rename from vendor/plugins/HTML5lib/lib/html5lib/liberalxmlparser.rb
rename to vendor/plugins/HTML5lib/lib/html5/liberalxmlparser.rb
index bbcf0eac..eae80ff7 100755
--- a/vendor/plugins/HTML5lib/lib/html5lib/liberalxmlparser.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/liberalxmlparser.rb
@@ -11,10 +11,10 @@
#
# @@TODO:
# * Selectively lowercase only XHTML, but not foreign markup
-require 'html5lib/html5parser'
-require 'html5lib/constants'
+require 'html5/html5parser'
+require 'html5/constants'
-module HTML5lib
+module HTML5
# liberal XML parser
class XMLParser < HTMLParser
@@ -25,25 +25,35 @@ module HTML5lib
end
def normalizeToken(token)
- if token[:type] == :StartTag or token[:type] == :EmptyTag
+ case token[:type]
+ when :StartTag, :EmptyTag
# We need to remove the duplicate attributes and convert attributes
- # to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
+ # to a Hash so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
token[:data] = Hash[*token[:data].reverse.flatten]
# For EmptyTags, process both a Start and an End tag
if token[:type] == :EmptyTag
+ save = @tokenizer.contentModelFlag
@phase.processStartTag(token[:name], token[:data])
+ @tokenizer.contentModelFlag = save
token[:data] = {}
token[:type] = :EndTag
end
- elsif token[:type] == :EndTag
+ when :Characters
+ # un-escape RCDATA_ELEMENTS (e.g. style, script)
+ if @tokenizer.contentModelFlag == :CDATA
+ token[:data] = token[:data].
+ gsub('<','<').gsub('>','>').gsub('&','&')
+ end
+
+ when :EndTag
if token[:data]
parseError(_("End tag contains unexpected attributes."))
end
- elsif token[:type] == :Comment
+ when :Comment
# Rescue CDATA from the comments
if token[:data][0..6] == "[CDATA[" and token[:data][-2..-1] == "]]"
token[:type] = :Characters
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/sanitizer.rb b/vendor/plugins/HTML5lib/lib/html5/sanitizer.rb
similarity index 99%
rename from vendor/plugins/HTML5lib/lib/html5lib/sanitizer.rb
rename to vendor/plugins/HTML5lib/lib/html5/sanitizer.rb
index 5af9cf51..44f20c60 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/sanitizer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/sanitizer.rb
@@ -1,6 +1,7 @@
require 'cgi'
+require 'html5/tokenizer'
-module HTML5lib
+module HTML5
# This module provides sanitization of XHTML+MathML+SVG
# and of inline style attributes.
diff --git a/vendor/plugins/HTML5lib/lib/html5/serializer.rb b/vendor/plugins/HTML5lib/lib/html5/serializer.rb
new file mode 100644
index 00000000..f7187b7b
--- /dev/null
+++ b/vendor/plugins/HTML5lib/lib/html5/serializer.rb
@@ -0,0 +1,2 @@
+require 'html5/serializer/htmlserializer'
+require 'html5/serializer/xhtmlserializer'
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/serializer/htmlserializer.rb b/vendor/plugins/HTML5lib/lib/html5/serializer/htmlserializer.rb
similarity index 93%
rename from vendor/plugins/HTML5lib/lib/html5lib/serializer/htmlserializer.rb
rename to vendor/plugins/HTML5lib/lib/html5/serializer/htmlserializer.rb
index a03b7d79..3f4eb812 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/serializer/htmlserializer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/serializer/htmlserializer.rb
@@ -1,6 +1,6 @@
-require 'html5lib/constants'
+require 'html5/constants'
-module HTML5lib
+module HTML5
class HTMLSerializer
@@ -21,6 +21,7 @@ module HTML5lib
@use_trailing_solidus = false
@space_before_trailing_solidus = true
@escape_lt_in_attrs = false
+ @escape_rcdata = false
@omit_optional_tags = true
@sanitize = false
@@ -43,22 +44,22 @@ module HTML5lib
@errors = []
if encoding and @inject_meta_charset
- require 'html5lib/filters/inject_meta_charset'
+ require 'html5/filters/inject_meta_charset'
treewalker = Filters::InjectMetaCharset.new(treewalker, encoding)
end
if @strip_whitespace
- require 'html5lib/filters/whitespace'
+ require 'html5/filters/whitespace'
treewalker = Filters::WhitespaceFilter.new(treewalker)
end
if @sanitize
- require 'html5lib/filters/sanitizer'
+ require 'html5/filters/sanitizer'
treewalker = Filters::HTMLSanitizeFilter.new(treewalker)
end
if @omit_optional_tags
- require 'html5lib/filters/optionaltags'
+ require 'html5/filters/optionaltags'
treewalker = Filters::OptionalTagFilter.new(treewalker)
end
@@ -81,7 +82,7 @@ module HTML5lib
elsif [:StartTag, :EmptyTag].include? type
name = token[:name]
- if RCDATA_ELEMENTS.include?(name)
+ if RCDATA_ELEMENTS.include?(name) and not @escape_rcdata
in_cdata = true
elsif in_cdata
serializeError(_("Unexpected child element of a CDATA element"))
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/serializer/xhtmlserializer.rb b/vendor/plugins/HTML5lib/lib/html5/serializer/xhtmlserializer.rb
similarity index 72%
rename from vendor/plugins/HTML5lib/lib/html5lib/serializer/xhtmlserializer.rb
rename to vendor/plugins/HTML5lib/lib/html5/serializer/xhtmlserializer.rb
index 43a63788..1e2885a6 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/serializer/xhtmlserializer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/serializer/xhtmlserializer.rb
@@ -1,6 +1,6 @@
-require 'html5lib/serializer/htmlserializer'
+require 'html5/serializer/htmlserializer'
-module HTML5lib
+module HTML5
class XHTMLSerializer < HTMLSerializer
DEFAULTS = {
@@ -8,7 +8,8 @@ module HTML5lib
:minimize_boolean_attributes => false,
:use_trailing_solidus => true,
:escape_lt_in_attrs => true,
- :omit_optional_tags => false
+ :omit_optional_tags => false,
+ :escape_rcdata => true
}
def initialize(options={})
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/tokenizer.rb b/vendor/plugins/HTML5lib/lib/html5/tokenizer.rb
similarity index 93%
rename from vendor/plugins/HTML5lib/lib/html5lib/tokenizer.rb
rename to vendor/plugins/HTML5lib/lib/html5/tokenizer.rb
index 6519944d..0d31d9de 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/tokenizer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/tokenizer.rb
@@ -1,7 +1,7 @@
-require 'html5lib/constants'
-require 'html5lib/inputstream'
+require 'html5/constants'
+require 'html5/inputstream'
-module HTML5lib
+module HTML5
# This class takes care of tokenizing HTML.
#
@@ -84,9 +84,9 @@ module HTML5lib
# Start processing. When EOF is reached @state will return false
# instead of true and the loop will terminate.
while send @state
- while not @tokenQueue.empty?
- yield @tokenQueue.shift
- end
+ yield :type => :ParseError, :data => @stream.errors.shift until
+ @stream.errors.empty?
+ yield @tokenQueue.shift until @tokenQueue.empty?
end
end
@@ -109,7 +109,7 @@ module HTML5lib
# The character we just consumed need to be put back on the stack so it
# doesn't get lost...
- @stream.queue.push(data)
+ @stream.unget(data)
end
# This function returns either U+FFFD or the character based on the
@@ -128,7 +128,6 @@ module HTML5lib
radix = 16
end
- char = [0xFFFD].pack('U')
charStack = []
# Consume all the characters that are in range while making sure we
@@ -142,17 +141,25 @@ module HTML5lib
# Convert the set of characters consumed to an int.
charAsInt = charStack.join('').to_i(radix)
- # If the integer is between 127 and 160 (so 128 and bigger and 159 and
- # smaller) we need to do the "windows trick".
- if (127...160).include? charAsInt
+ if charAsInt == 13
+ @tokenQueue.push({:type => :ParseError, :data =>
+ _("Incorrect CR newline entity. Replaced with LF.")})
+ charAsInt = 10
+ elsif (128..159).include? charAsInt
+ # If the integer is between 127 and 160 (so 128 and bigger and 159
+ # and smaller) we need to do the "windows trick".
@tokenQueue.push({:type => :ParseError, :data =>
_("Entity used with illegal number (windows-1252 reference).")})
charAsInt = ENTITIES_WINDOWS1252[charAsInt - 128]
end
- if charAsInt > 0 and charAsInt <= 1114111
+ if 0 < charAsInt and charAsInt <= 1114111 and not (55296 <= charAsInt and charAsInt <= 57343)
char = [charAsInt].pack('U')
+ else
+ char = [0xFFFD].pack('U')
+ @tokenQueue.push({:type => :ParseError, :data =>
+ _("Numeric entity represents an illegal codepoint.")})
end
# Discard the ; if present. Otherwise, put it back on the queue and
@@ -160,18 +167,18 @@ module HTML5lib
if c != ";"
@tokenQueue.push({:type => :ParseError, :data =>
_("Numeric entity didn't end with ';'.")})
- @stream.queue.push(c)
+ @stream.unget(c)
end
return char
end
- def consumeEntity
+ def consumeEntity(from_attribute=false)
char = nil
charStack = [@stream.char]
if SPACE_CHARACTERS.include?(charStack[0]) or
[:EOF, '<', '&'].include?(charStack[0])
- @stream.queue+= charStack
+ @stream.unget(charStack)
elsif charStack[0] == "#"
# We might have a number entity here.
charStack += [@stream.char, @stream.char]
@@ -179,22 +186,22 @@ module HTML5lib
# If we reach the end of the file put everything up to :EOF
# back in the queue
charStack = charStack[0...charStack.index(:EOF)]
- @stream.queue+= charStack
+ @stream.unget(charStack)
@tokenQueue.push({:type => :ParseError, :data =>
_("Numeric entity expected. Got end of file instead.")})
else
if charStack[1].downcase == "x" \
and HEX_DIGITS.include? charStack[2]
# Hexadecimal entity detected.
- @stream.queue.push(charStack[2])
+ @stream.unget(charStack[2])
char = consumeNumberEntity(true)
elsif DIGITS.include? charStack[1]
# Decimal entity detected.
- @stream.queue += charStack[1..-1]
+ @stream.unget(charStack[1..-1])
char = consumeNumberEntity(false)
else
# No number entity detected.
- @stream.queue += charStack
+ @stream.unget(charStack)
@tokenQueue.push({:type => :ParseError, :data =>
_("Numeric entity expected but none found.")})
end
@@ -209,6 +216,8 @@ module HTML5lib
filteredEntityList.reject! {|e| e[0].chr != charStack[0]}
entityName = nil
+ # Try to find the longest entity the string will match to take care
+ # of ¬i for instance.
while charStack[-1] != :EOF
name = charStack.join('')
if filteredEntityList.any? {|e| e[0...name.length] == name}
@@ -220,6 +229,7 @@ module HTML5lib
if ENTITIES.include? name
entityName = name
+ break if entityName[-1] == ';'
end
end
@@ -228,15 +238,23 @@ module HTML5lib
# Check whether or not the last character returned can be
# discarded or needs to be put back.
- if not charStack[-1] == ";"
+ if entityName[-1] != ?;
@tokenQueue.push({:type => :ParseError, :data =>
_("Named entity didn't end with ';'.")})
- @stream.queue += charStack[entityName.length..-1]
+ end
+
+ if charStack[-1] != ";" and from_attribute and
+ (ASCII_LETTERS.include?(charStack[entityName.length]) or
+ DIGITS.include?(charStack[entityName.length]))
+ @stream.unget(charStack)
+ char = '&'
+ else
+ @stream.unget(charStack[entityName.length..-1])
end
else
@tokenQueue.push({:type => :ParseError, :data =>
_("Named entity expected. Got none.")})
- @stream.queue += charStack
+ @stream.unget(charStack)
end
end
return char
@@ -244,7 +262,7 @@ module HTML5lib
# This method replaces the need for "entityInAttributeValueState".
def processEntityInAttribute
- entity = consumeEntity
+ entity = consumeEntity(true)
if entity
@currentToken[:data][-1][1] += entity
else
@@ -274,20 +292,23 @@ module HTML5lib
@lastFourChars.shift if @lastFourChars.length > 4
end
- if data == "&" and [:PCDATA,:RCDATA].include?(@contentModelFlag)
- @state = @states[:entityData]
+ if data == "&" and !@escapeFlag and
+ [:PCDATA,:RCDATA].include?(@contentModelFlag)
+ @state = @states[:entityData]
- elsif data == "-" and [:CDATA,:RCDATA].include?(@contentModelFlag) and
- @escapeFlag == false and @lastFourChars.join('') == ""
+ elsif data == ">" and @escapeFlag and
+ [:CDATA,:RCDATA].include?(@contentModelFlag) and
+ @lastFourChars[1..-1].join('') == "-->"
@escapeFlag = false
@tokenQueue.push({:type => :Characters, :data => data})
@@ -345,14 +366,14 @@ module HTML5lib
@tokenQueue.push({:type => :ParseError, :data =>
_("Expected tag name. Got '?' instead (HTML doesn't " +
"support processing instructions).")})
- @stream.queue.push(data)
+ @stream.unget(data)
@state = @states[:bogusComment]
else
# XXX
@tokenQueue.push({:type => :ParseError, :data =>
_("Expected tag name. Got something else instead")})
@tokenQueue.push({:type => :Characters, :data => "<"})
- @stream.queue.push(data)
+ @stream.unget(data)
@state = @states[:data]
end
else
@@ -363,7 +384,7 @@ module HTML5lib
@state = @states[:closeTagOpen]
else
@tokenQueue.push({:type => :Characters, :data => "<"})
- @stream.queue.insert(0, data)
+ @stream.unget(data)
@state = @states[:data]
end
end
@@ -388,7 +409,7 @@ module HTML5lib
# Since this is just for checking. We put the characters back on
# the stack.
- @stream.queue += charStack
+ @stream.unget(charStack)
end
if @currentToken and
@@ -426,7 +447,7 @@ module HTML5lib
# XXX data can be _'_...
@tokenQueue.push({:type => :ParseError, :data =>
_("Expected closing tag. Unexpected character '#{data}' found.")})
- @stream.queue.push(data)
+ @stream.unget(data)
@state = @states[:bogusComment]
end
@@ -556,7 +577,7 @@ module HTML5lib
@state = @states[:attributeValueDoubleQuoted]
elsif data == "&"
@state = @states[:attributeValueUnQuoted]
- @stream.queue.push(data);
+ @stream.unget(data);
elsif data == "'"
@state = @states[:attributeValueSingleQuoted]
elsif data == ">"
@@ -656,7 +677,7 @@ module HTML5lib
else
@tokenQueue.push({:type => :ParseError, :data =>
_("Expected '--' or 'DOCTYPE'. Not found.")})
- @stream.queue += charStack
+ @stream.unget(charStack)
@state = @states[:bogusComment]
end
end
@@ -771,7 +792,7 @@ module HTML5lib
else
@tokenQueue.push({:type => :ParseError, :data =>
_("No space after literal string 'DOCTYPE'.")})
- @stream.queue.push(data)
+ @stream.unget(data)
@state = @states[:beforeDoctypeName]
end
return true
@@ -827,7 +848,7 @@ module HTML5lib
@state = @states[:data]
elsif data == :EOF
@currentToken[:data] = true
- @stream.queue.push(data)
+ @stream.unget(data)
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected end of file in DOCTYPE.")})
@currentToken[:correct] = false
@@ -842,7 +863,7 @@ module HTML5lib
elsif token == "system"
@state = @states[:beforeDoctypeSystemIdentifier]
else
- @stream.queue += charStack
+ @stream.unget(charStack)
@tokenQueue.push({:type => :ParseError, :data =>
_("Expected 'public' or 'system'. Got '#{charStack.join('')}'")})
@state = @states[:bogusDoctype]
@@ -1028,7 +1049,7 @@ module HTML5lib
@state = @states[:data]
elsif data == :EOF
# XXX EMIT
- @stream.queue.push(data)
+ @stream.unget(data)
@tokenQueue.push({:type => :ParseError, :data =>
_("Unexpected end of file in bogus doctype.")})
@currentToken[:correct] = false
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/treebuilders.rb b/vendor/plugins/HTML5lib/lib/html5/treebuilders.rb
similarity index 70%
rename from vendor/plugins/HTML5lib/lib/html5lib/treebuilders.rb
rename to vendor/plugins/HTML5lib/lib/html5/treebuilders.rb
index 9fa49975..8c5bdd55 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/treebuilders.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/treebuilders.rb
@@ -1,17 +1,17 @@
-module HTML5lib
+module HTML5
module TreeBuilders
class << self
def [](name)
case name.to_s.downcase
when 'simpletree' then
- require 'html5lib/treebuilders/simpletree'
+ require 'html5/treebuilders/simpletree'
SimpleTree::TreeBuilder
when 'rexml' then
- require 'html5lib/treebuilders/rexml'
+ require 'html5/treebuilders/rexml'
REXML::TreeBuilder
when 'hpricot' then
- require 'html5lib/treebuilders/hpricot'
+ require 'html5/treebuilders/hpricot'
Hpricot::TreeBuilder
else
raise "Unknown TreeBuilder #{name}"
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/base.rb b/vendor/plugins/HTML5lib/lib/html5/treebuilders/base.rb
similarity index 99%
rename from vendor/plugins/HTML5lib/lib/html5lib/treebuilders/base.rb
rename to vendor/plugins/HTML5lib/lib/html5/treebuilders/base.rb
index 0d1082bd..f5d689db 100755
--- a/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/base.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/treebuilders/base.rb
@@ -1,8 +1,8 @@
-require 'html5lib/constants'
+require 'html5/constants'
#XXX - TODO; make the default interface more ElementTree-like rather than DOM-like
-module HTML5lib
+module HTML5
# The scope markers are inserted when entering buttons, object elements,
# marquees, table cells, and table captions, and are used to prevent formatting
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/hpricot.rb b/vendor/plugins/HTML5lib/lib/html5/treebuilders/hpricot.rb
similarity index 95%
rename from vendor/plugins/HTML5lib/lib/html5lib/treebuilders/hpricot.rb
rename to vendor/plugins/HTML5lib/lib/html5/treebuilders/hpricot.rb
index 20cc58b6..48c9a12d 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/hpricot.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/treebuilders/hpricot.rb
@@ -1,221 +1,221 @@
-require 'html5lib/treebuilders/base'
-require 'rubygems'
-require 'hpricot'
-require 'forwardable'
-
-module HTML5lib
- module TreeBuilders
- module Hpricot
-
- class Node < Base::Node
-
- extend Forwardable
-
- def_delegators :@hpricot, :name
-
- attr_accessor :hpricot
-
- def initialize(name)
- super(name)
- @hpricot = self.class.hpricot_class.new name
- end
-
- def appendChild(node)
- if node.kind_of?(TextNode) and childNodes.any? and childNodes.last.kind_of?(TextNode)
- childNodes[-1].hpricot.content = childNodes[-1].hpricot.to_s + node.hpricot.to_s
- else
- childNodes << node
- hpricot.children << node.hpricot
- end
- if (oldparent = node.hpricot.parent) != nil
- oldparent.children.delete_at(oldparent.children.index(node.hpricot))
- end
- node.hpricot.parent = hpricot
- node.parent = self
- end
-
- def removeChild(node)
- childNodes.delete(node)
- hpricot.children.delete_at(hpricot.children.index(node.hpricot))
- node.hpricot.parent = nil
- node.parent = nil
- end
-
- def insertText(data, before=nil)
- if before
- insertBefore(TextNode.new(data), before)
- else
- appendChild(TextNode.new(data))
- end
- end
-
- def insertBefore(node, refNode)
- index = childNodes.index(refNode)
- if node.kind_of?(TextNode) and index > 0 and childNodes[index-1].kind_of?(TextNode)
- childNodes[index-1].hpricot.content = childNodes[index-1].hpricot.to_s + node.hpricot.to_s
- else
- refNode.hpricot.parent.insert_before(node.hpricot,refNode.hpricot)
- childNodes.insert(index, node)
- end
- end
-
- def hasContent
- childNodes.any?
- end
- end
-
- class Element < Node
- def self.hpricot_class
- ::Hpricot::Elem
- end
-
- def initialize(name)
- super(name)
-
- @hpricot = ::Hpricot::Elem.new(::Hpricot::STag.new(name))
- end
-
- def name
- @hpricot.stag.name
- end
-
- def cloneNode
- attributes.inject(self.class.new(name)) do |node, (name, value)|
- node.hpricot[name] = value
- node
- end
- end
-
- # A call to Hpricot::Elem#raw_attributes is built dynamically,
- # so alterations to the returned value (a hash) will be lost.
- #
- # AttributeProxy works around this by forwarding :[]= calls
- # to the raw_attributes accessor on the element start tag.
- #
- class AttributeProxy
- def initialize(hpricot)
- @hpricot = hpricot
- end
-
- def []=(k, v)
- @hpricot.stag.send(stag_attributes_method)[k] = v
- end
-
- def stag_attributes_method
- # STag#attributes changed to STag#raw_attributes after Hpricot 0.5
- @hpricot.stag.respond_to?(:raw_attributes) ? :raw_attributes : :attributes
- end
-
- def method_missing(*a, &b)
- @hpricot.attributes.send(*a, &b)
- end
- end
-
- def attributes
- AttributeProxy.new(@hpricot)
- end
-
- def attributes=(attrs)
- attrs.each { |name, value| @hpricot[name] = value }
- end
-
- def printTree(indent=0)
- tree = "\n|#{' ' * indent}<#{name}>"
- indent += 2
- attributes.each do |name, value|
- next if name == 'xmlns'
- tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
- end
- childNodes.inject(tree) { |tree, child| tree + child.printTree(indent) }
- end
- end
-
- class Document < Node
- def self.hpricot_class
- ::Hpricot::Doc
- end
-
- def initialize
- super(nil)
- end
-
- def printTree(indent=0)
- childNodes.inject('#document') { |tree, child| tree + child.printTree(indent + 2) }
- end
- end
-
- class DocumentType < Node
- def self.hpricot_class
- ::Hpricot::DocType
- end
-
- def initialize(name)
- begin
- super(name)
- rescue ArgumentError # needs 3...
- end
-
- @hpricot = ::Hpricot::DocType.new(name, nil, nil)
- end
-
- def printTree(indent=0)
- "\n|#{' ' * indent}"
- end
- end
-
- class DocumentFragment < Element
- def initialize
- super('')
- end
-
- def printTree(indent=0)
- childNodes.inject('') { |tree, child| tree + child.printTree(indent+2) }
- end
- end
-
- class TextNode < Node
- def initialize(data)
- @hpricot = ::Hpricot::Text.new(data)
- end
-
- def printTree(indent=0)
- "\n|#{' ' * indent}\"#{hpricot.content}\""
- end
- end
-
- class CommentNode < Node
- def self.hpricot_class
- ::Hpricot::Comment
- end
-
- def printTree(indent=0)
- "\n|#{' ' * indent}"
- end
- end
-
- class TreeBuilder < Base::TreeBuilder
- def initialize
- @documentClass = Document
- @doctypeClass = DocumentType
- @elementClass = Element
- @commentClass = CommentNode
- @fragmentClass = DocumentFragment
- end
-
- def testSerializer(node)
- node.printTree
- end
-
- def getDocument
- @document.hpricot
- end
-
- def getFragment
- @document = super
- return @document.hpricot.children
- end
- end
-
- end
- end
-end
+require 'html5/treebuilders/base'
+require 'rubygems'
+require 'hpricot'
+require 'forwardable'
+
+module HTML5
+ module TreeBuilders
+ module Hpricot
+
+ class Node < Base::Node
+
+ extend Forwardable
+
+ def_delegators :@hpricot, :name
+
+ attr_accessor :hpricot
+
+ def initialize(name)
+ super(name)
+ @hpricot = self.class.hpricot_class.new name
+ end
+
+ def appendChild(node)
+ if node.kind_of?(TextNode) and childNodes.any? and childNodes.last.kind_of?(TextNode)
+ childNodes[-1].hpricot.content = childNodes[-1].hpricot.to_s + node.hpricot.to_s
+ else
+ childNodes << node
+ hpricot.children << node.hpricot
+ end
+ if (oldparent = node.hpricot.parent) != nil
+ oldparent.children.delete_at(oldparent.children.index(node.hpricot))
+ end
+ node.hpricot.parent = hpricot
+ node.parent = self
+ end
+
+ def removeChild(node)
+ childNodes.delete(node)
+ hpricot.children.delete_at(hpricot.children.index(node.hpricot))
+ node.hpricot.parent = nil
+ node.parent = nil
+ end
+
+ def insertText(data, before=nil)
+ if before
+ insertBefore(TextNode.new(data), before)
+ else
+ appendChild(TextNode.new(data))
+ end
+ end
+
+ def insertBefore(node, refNode)
+ index = childNodes.index(refNode)
+ if node.kind_of?(TextNode) and index > 0 and childNodes[index-1].kind_of?(TextNode)
+ childNodes[index-1].hpricot.content = childNodes[index-1].hpricot.to_s + node.hpricot.to_s
+ else
+ refNode.hpricot.parent.insert_before(node.hpricot,refNode.hpricot)
+ childNodes.insert(index, node)
+ end
+ end
+
+ def hasContent
+ childNodes.any?
+ end
+ end
+
+ class Element < Node
+ def self.hpricot_class
+ ::Hpricot::Elem
+ end
+
+ def initialize(name)
+ super(name)
+
+ @hpricot = ::Hpricot::Elem.new(::Hpricot::STag.new(name))
+ end
+
+ def name
+ @hpricot.stag.name
+ end
+
+ def cloneNode
+ attributes.inject(self.class.new(name)) do |node, (name, value)|
+ node.hpricot[name] = value
+ node
+ end
+ end
+
+ # A call to Hpricot::Elem#raw_attributes is built dynamically,
+ # so alterations to the returned value (a hash) will be lost.
+ #
+ # AttributeProxy works around this by forwarding :[]= calls
+ # to the raw_attributes accessor on the element start tag.
+ #
+ class AttributeProxy
+ def initialize(hpricot)
+ @hpricot = hpricot
+ end
+
+ def []=(k, v)
+ @hpricot.stag.send(stag_attributes_method)[k] = v
+ end
+
+ def stag_attributes_method
+ # STag#attributes changed to STag#raw_attributes after Hpricot 0.5
+ @hpricot.stag.respond_to?(:raw_attributes) ? :raw_attributes : :attributes
+ end
+
+ def method_missing(*a, &b)
+ @hpricot.attributes.send(*a, &b)
+ end
+ end
+
+ def attributes
+ AttributeProxy.new(@hpricot)
+ end
+
+ def attributes=(attrs)
+ attrs.each { |name, value| @hpricot[name] = value }
+ end
+
+ def printTree(indent=0)
+ tree = "\n|#{' ' * indent}<#{name}>"
+ indent += 2
+ attributes.each do |name, value|
+ next if name == 'xmlns'
+ tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
+ end
+ childNodes.inject(tree) { |tree, child| tree + child.printTree(indent) }
+ end
+ end
+
+ class Document < Node
+ def self.hpricot_class
+ ::Hpricot::Doc
+ end
+
+ def initialize
+ super(nil)
+ end
+
+ def printTree(indent=0)
+ childNodes.inject('#document') { |tree, child| tree + child.printTree(indent + 2) }
+ end
+ end
+
+ class DocumentType < Node
+ def self.hpricot_class
+ ::Hpricot::DocType
+ end
+
+ def initialize(name)
+ begin
+ super(name)
+ rescue ArgumentError # needs 3...
+ end
+
+ @hpricot = ::Hpricot::DocType.new(name, nil, nil)
+ end
+
+ def printTree(indent=0)
+ "\n|#{' ' * indent}"
+ end
+ end
+
+ class DocumentFragment < Element
+ def initialize
+ super('')
+ end
+
+ def printTree(indent=0)
+ childNodes.inject('') { |tree, child| tree + child.printTree(indent+2) }
+ end
+ end
+
+ class TextNode < Node
+ def initialize(data)
+ @hpricot = ::Hpricot::Text.new(data)
+ end
+
+ def printTree(indent=0)
+ "\n|#{' ' * indent}\"#{hpricot.content}\""
+ end
+ end
+
+ class CommentNode < Node
+ def self.hpricot_class
+ ::Hpricot::Comment
+ end
+
+ def printTree(indent=0)
+ "\n|#{' ' * indent}"
+ end
+ end
+
+ class TreeBuilder < Base::TreeBuilder
+ def initialize
+ @documentClass = Document
+ @doctypeClass = DocumentType
+ @elementClass = Element
+ @commentClass = CommentNode
+ @fragmentClass = DocumentFragment
+ end
+
+ def testSerializer(node)
+ node.printTree
+ end
+
+ def getDocument
+ @document.hpricot
+ end
+
+ def getFragment
+ @document = super
+ return @document.hpricot.children
+ end
+ end
+
+ end
+ end
+end
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/rexml.rb b/vendor/plugins/HTML5lib/lib/html5/treebuilders/rexml.rb
similarity index 98%
rename from vendor/plugins/HTML5lib/lib/html5lib/treebuilders/rexml.rb
rename to vendor/plugins/HTML5lib/lib/html5/treebuilders/rexml.rb
index f6aad877..a8181430 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/rexml.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/treebuilders/rexml.rb
@@ -1,8 +1,8 @@
-require 'html5lib/treebuilders/base'
+require 'html5/treebuilders/base'
require 'rexml/document'
require 'forwardable'
-module HTML5lib
+module HTML5
module TreeBuilders
module REXML
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/simpletree.rb b/vendor/plugins/HTML5lib/lib/html5/treebuilders/simpletree.rb
similarity index 98%
rename from vendor/plugins/HTML5lib/lib/html5lib/treebuilders/simpletree.rb
rename to vendor/plugins/HTML5lib/lib/html5/treebuilders/simpletree.rb
index 83034bff..827c0c0d 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/treebuilders/simpletree.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/treebuilders/simpletree.rb
@@ -1,6 +1,6 @@
-require 'html5lib/treebuilders/base'
+require 'html5/treebuilders/base'
-module HTML5lib
+module HTML5
module TreeBuilders
module SimpleTree
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers.rb b/vendor/plugins/HTML5lib/lib/html5/treewalkers.rb
similarity index 66%
rename from vendor/plugins/HTML5lib/lib/html5lib/treewalkers.rb
rename to vendor/plugins/HTML5lib/lib/html5/treewalkers.rb
index 2074768c..82c73bb7 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/treewalkers.rb
@@ -1,19 +1,19 @@
-require 'html5lib/treewalkers/base'
+require 'html5/treewalkers/base'
-module HTML5lib
+module HTML5
module TreeWalkers
class << self
def [](name)
case name.to_s.downcase
when 'simpletree' then
- require 'html5lib/treewalkers/simpletree'
+ require 'html5/treewalkers/simpletree'
SimpleTree::TreeWalker
when 'rexml' then
- require 'html5lib/treewalkers/rexml'
+ require 'html5/treewalkers/rexml'
REXML::TreeWalker
when 'hpricot' then
- require 'html5lib/treewalkers/hpricot'
+ require 'html5/treewalkers/hpricot'
Hpricot::TreeWalker
else
raise "Unknown TreeWalker #{name}"
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/base.rb b/vendor/plugins/HTML5lib/lib/html5/treewalkers/base.rb
similarity index 98%
rename from vendor/plugins/HTML5lib/lib/html5lib/treewalkers/base.rb
rename to vendor/plugins/HTML5lib/lib/html5/treewalkers/base.rb
index 21d4d3f7..394f8c07 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/base.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/treewalkers/base.rb
@@ -1,5 +1,5 @@
-require 'html5lib/constants'
-module HTML5lib
+require 'html5/constants'
+module HTML5
module TreeWalkers
module TokenConstructor
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/hpricot.rb b/vendor/plugins/HTML5lib/lib/html5/treewalkers/hpricot.rb
similarity index 89%
rename from vendor/plugins/HTML5lib/lib/html5lib/treewalkers/hpricot.rb
rename to vendor/plugins/HTML5lib/lib/html5/treewalkers/hpricot.rb
index c9d12263..75cde344 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/hpricot.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/treewalkers/hpricot.rb
@@ -1,10 +1,10 @@
-require 'html5lib/treewalkers/base'
+require 'html5/treewalkers/base'
require 'rexml/document'
-module HTML5lib
+module HTML5
module TreeWalkers
module Hpricot
- class TreeWalker < HTML5lib::TreeWalkers::NonRecursiveTreeWalker
+ class TreeWalker < HTML5::TreeWalkers::NonRecursiveTreeWalker
def node_details(node)
case node
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/rexml.rb b/vendor/plugins/HTML5lib/lib/html5/treewalkers/rexml.rb
similarity index 89%
rename from vendor/plugins/HTML5lib/lib/html5lib/treewalkers/rexml.rb
rename to vendor/plugins/HTML5lib/lib/html5/treewalkers/rexml.rb
index c6881d97..695dc154 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/rexml.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/treewalkers/rexml.rb
@@ -1,10 +1,10 @@
-require 'html5lib/treewalkers/base'
+require 'html5/treewalkers/base'
require 'rexml/document'
-module HTML5lib
+module HTML5
module TreeWalkers
module REXML
- class TreeWalker < HTML5lib::TreeWalkers::NonRecursiveTreeWalker
+ class TreeWalker < HTML5::TreeWalkers::NonRecursiveTreeWalker
def node_details(node)
case node
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/simpletree.rb b/vendor/plugins/HTML5lib/lib/html5/treewalkers/simpletree.rb
similarity index 86%
rename from vendor/plugins/HTML5lib/lib/html5lib/treewalkers/simpletree.rb
rename to vendor/plugins/HTML5lib/lib/html5/treewalkers/simpletree.rb
index 37ebf32a..3194389b 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/treewalkers/simpletree.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/treewalkers/simpletree.rb
@@ -1,10 +1,10 @@
-require 'html5lib/treewalkers/base'
+require 'html5/treewalkers/base'
-module HTML5lib
+module HTML5
module TreeWalkers
module SimpleTree
- class TreeWalker < HTML5lib::TreeWalkers::Base
- include HTML5lib::TreeBuilders::SimpleTree
+ class TreeWalker < HTML5::TreeWalkers::Base
+ include HTML5::TreeBuilders::SimpleTree
def walk(node)
case node
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/constants.rb b/vendor/plugins/HTML5lib/lib/html5lib/constants.rb
deleted file mode 100755
index 8144c93f..00000000
--- a/vendor/plugins/HTML5lib/lib/html5lib/constants.rb
+++ /dev/null
@@ -1,708 +0,0 @@
-module HTML5lib
-
- class EOF < Exception; end
-
- CONTENT_MODEL_FLAGS = [
- :PCDATA,
- :RCDATA,
- :CDATA,
- :PLAINTEXT
- ]
-
- SCOPING_ELEMENTS = %w[
- button
- caption
- html
- marquee
- object
- table
- td
- th
- ]
-
- FORMATTING_ELEMENTS = %w[
- a
- b
- big
- em
- font
- i
- nobr
- s
- small
- strike
- strong
- tt
- u
- ]
-
- SPECIAL_ELEMENTS = %w[
- address
- area
- base
- basefont
- bgsound
- blockquote
- body
- br
- center
- col
- colgroup
- dd
- dir
- div
- dl
- dt
- embed
- fieldset
- form
- frame
- frameset
- h1
- h2
- h3
- h4
- h5
- h6
- head
- hr
- iframe
- image
- img
- input
- isindex
- li
- link
- listing
- menu
- meta
- noembed
- noframes
- noscript
- ol
- optgroup
- option
- p
- param
- plaintext
- pre
- script
- select
- spacer
- style
- tbody
- textarea
- tfoot
- thead
- title
- tr
- ul
- wbr
- ]
-
- SPACE_CHARACTERS = %W[
- \t
- \n
- \x0B
- \x0C
- \x20
- \r
- ]
-
- TABLE_INSERT_MODE_ELEMENTS = %w[
- table
- tbody
- tfoot
- thead
- tr
- ]
-
- ASCII_LOWERCASE = ('a'..'z').to_a.join('')
- ASCII_UPPERCASE = ('A'..'Z').to_a.join('')
- ASCII_LETTERS = ASCII_LOWERCASE + ASCII_UPPERCASE
- DIGITS = '0'..'9'
- HEX_DIGITS = DIGITS.to_a + ('a'..'f').to_a + ('A'..'F').to_a
-
- # Heading elements need to be ordered
- HEADING_ELEMENTS = %w[
- h1
- h2
- h3
- h4
- h5
- h6
- ]
-
- # XXX What about event-source and command?
- VOID_ELEMENTS = %w[
- base
- link
- meta
- hr
- br
- img
- embed
- param
- area
- col
- input
- ]
-
- CDATA_ELEMENTS = %w[title textarea]
-
- RCDATA_ELEMENTS = %w[
- style
- script
- xmp
- iframe
- noembed
- noframes
- noscript
- ]
-
- BOOLEAN_ATTRIBUTES = {
- :global => %w[irrelevant],
- 'style' => %w[scoped],
- 'img' => %w[ismap],
- 'audio' => %w[autoplay controls],
- 'video' => %w[autoplay controls],
- 'script' => %w[defer async],
- 'details' => %w[open],
- 'datagrid' => %w[multiple disabled],
- 'command' => %w[hidden disabled checked default],
- 'menu' => %w[autosubmit],
- 'fieldset' => %w[disabled readonly],
- 'option' => %w[disabled readonly selected],
- 'optgroup' => %w[disabled readonly],
- 'button' => %w[disabled autofocus],
- 'input' => %w[disabled readonly required autofocus checked ismap],
- 'select' => %w[disabled readonly autofocus multiple],
- 'output' => %w[disabled readonly]
- }
-
- # entitiesWindows1252 has to be _ordered_ and needs to have an index.
- ENTITIES_WINDOWS1252 = [
- 8364, # 0x80 0x20AC EURO SIGN
- 65533, # 0x81 UNDEFINED
- 8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK
- 402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK
- 8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK
- 8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS
- 8224, # 0x86 0x2020 DAGGER
- 8225, # 0x87 0x2021 DOUBLE DAGGER
- 710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT
- 8240, # 0x89 0x2030 PER MILLE SIGN
- 352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON
- 8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
- 338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE
- 65533, # 0x8D UNDEFINED
- 381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON
- 65533, # 0x8F UNDEFINED
- 65533, # 0x90 UNDEFINED
- 8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK
- 8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK
- 8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK
- 8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK
- 8226, # 0x95 0x2022 BULLET
- 8211, # 0x96 0x2013 EN DASH
- 8212, # 0x97 0x2014 EM DASH
- 732, # 0x98 0x02DC SMALL TILDE
- 8482, # 0x99 0x2122 TRADE MARK SIGN
- 353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON
- 8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
- 339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE
- 65533, # 0x9D UNDEFINED
- 382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON
- 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
- ]
-
- private
-
- def self.U n
- [n].pack('U')
- end
-
- public
-
- ENTITIES = {
- "AElig" => U(0xC6),
- "Aacute" => U(0xC1),
- "Acirc" => U(0xC2),
- "Agrave" => U(0xC0),
- "Alpha" => U(0x0391),
- "Aring" => U(0xC5),
- "Atilde" => U(0xC3),
- "Auml" => U(0xC4),
- "Beta" => U(0x0392),
- "Ccedil" => U(0xC7),
- "Chi" => U(0x03A7),
- "Dagger" => U(0x2021),
- "Delta" => U(0x0394),
- "ETH" => U(0xD0),
- "Eacute" => U(0xC9),
- "Ecirc" => U(0xCA),
- "Egrave" => U(0xC8),
- "Epsilon" => U(0x0395),
- "Eta" => U(0x0397),
- "Euml" => U(0xCB),
- "Gamma" => U(0x0393),
- "Iacute" => U(0xCD),
- "Icirc" => U(0xCE),
- "Igrave" => U(0xCC),
- "Iota" => U(0x0399),
- "Iuml" => U(0xCF),
- "Kappa" => U(0x039A),
- "Lambda" => U(0x039B),
- "Mu" => U(0x039C),
- "Ntilde" => U(0xD1),
- "Nu" => U(0x039D),
- "OElig" => U(0x0152),
- "Oacute" => U(0xD3),
- "Ocirc" => U(0xD4),
- "Ograve" => U(0xD2),
- "Omega" => U(0x03A9),
- "Omicron" => U(0x039F),
- "Oslash" => U(0xD8),
- "Otilde" => U(0xD5),
- "Ouml" => U(0xD6),
- "Phi" => U(0x03A6),
- "Pi" => U(0x03A0),
- "Prime" => U(0x2033),
- "Psi" => U(0x03A8),
- "Rho" => U(0x03A1),
- "Scaron" => U(0x0160),
- "Sigma" => U(0x03A3),
- "THORN" => U(0xDE),
- "Tau" => U(0x03A4),
- "Theta" => U(0x0398),
- "Uacute" => U(0xDA),
- "Ucirc" => U(0xDB),
- "Ugrave" => U(0xD9),
- "Upsilon" => U(0x03A5),
- "Uuml" => U(0xDC),
- "Xi" => U(0x039E),
- "Yacute" => U(0xDD),
- "Yuml" => U(0x0178),
- "Zeta" => U(0x0396),
- "aacute" => U(0xE1),
- "acirc" => U(0xE2),
- "acute" => U(0xB4),
- "aelig" => U(0xE6),
- "agrave" => U(0xE0),
- "alefsym" => U(0x2135),
- "alpha" => U(0x03B1),
- "amp" => U(0x26),
- "AMP" => U(0x26),
- "and" => U(0x2227),
- "ang" => U(0x2220),
- "apos" => U(0x27),
- "aring" => U(0xE5),
- "asymp" => U(0x2248),
- "atilde" => U(0xE3),
- "auml" => U(0xE4),
- "bdquo" => U(0x201E),
- "beta" => U(0x03B2),
- "brvbar" => U(0xA6),
- "bull" => U(0x2022),
- "cap" => U(0x2229),
- "ccedil" => U(0xE7),
- "cedil" => U(0xB8),
- "cent" => U(0xA2),
- "chi" => U(0x03C7),
- "circ" => U(0x02C6),
- "clubs" => U(0x2663),
- "cong" => U(0x2245),
- "copy" => U(0xA9),
- "COPY" => U(0xA9),
- "crarr" => U(0x21B5),
- "cup" => U(0x222A),
- "curren" => U(0xA4),
- "dArr" => U(0x21D3),
- "dagger" => U(0x2020),
- "darr" => U(0x2193),
- "deg" => U(0xB0),
- "delta" => U(0x03B4),
- "diams" => U(0x2666),
- "divide" => U(0xF7),
- "eacute" => U(0xE9),
- "ecirc" => U(0xEA),
- "egrave" => U(0xE8),
- "empty" => U(0x2205),
- "emsp" => U(0x2003),
- "ensp" => U(0x2002),
- "epsilon" => U(0x03B5),
- "equiv" => U(0x2261),
- "eta" => U(0x03B7),
- "eth" => U(0xF0),
- "euml" => U(0xEB),
- "euro" => U(0x20AC),
- "exist" => U(0x2203),
- "fnof" => U(0x0192),
- "forall" => U(0x2200),
- "frac12" => U(0xBD),
- "frac14" => U(0xBC),
- "frac34" => U(0xBE),
- "frasl" => U(0x2044),
- "gamma" => U(0x03B3),
- "ge" => U(0x2265),
- "gt" => U(0x3E),
- "GT" => U(0x3E),
- "hArr" => U(0x21D4),
- "harr" => U(0x2194),
- "hearts" => U(0x2665),
- "hellip" => U(0x2026),
- "iacute" => U(0xED),
- "icirc" => U(0xEE),
- "iexcl" => U(0xA1),
- "igrave" => U(0xEC),
- "image" => U(0x2111),
- "infin" => U(0x221E),
- "int" => U(0x222B),
- "iota" => U(0x03B9),
- "iquest" => U(0xBF),
- "isin" => U(0x2208),
- "iuml" => U(0xEF),
- "kappa" => U(0x03BA),
- "lArr" => U(0x21D0),
- "lambda" => U(0x03BB),
- "lang" => U(0x2329),
- "laquo" => U(0xAB),
- "larr" => U(0x2190),
- "lceil" => U(0x2308),
- "ldquo" => U(0x201C),
- "le" => U(0x2264),
- "lfloor" => U(0x230A),
- "lowast" => U(0x2217),
- "loz" => U(0x25CA),
- "lrm" => U(0x200E),
- "lsaquo" => U(0x2039),
- "lsquo" => U(0x2018),
- "lt" => U(0x3C),
- "LT" => U(0x3C),
- "macr" => U(0xAF),
- "mdash" => U(0x2014),
- "micro" => U(0xB5),
- "middot" => U(0xB7),
- "minus" => U(0x2212),
- "mu" => U(0x03BC),
- "nabla" => U(0x2207),
- "nbsp" => U(0xA0),
- "ndash" => U(0x2013),
- "ne" => U(0x2260),
- "ni" => U(0x220B),
- "not" => U(0xAC),
- "notin" => U(0x2209),
- "nsub" => U(0x2284),
- "ntilde" => U(0xF1),
- "nu" => U(0x03BD),
- "oacute" => U(0xF3),
- "ocirc" => U(0xF4),
- "oelig" => U(0x0153),
- "ograve" => U(0xF2),
- "oline" => U(0x203E),
- "omega" => U(0x03C9),
- "omicron" => U(0x03BF),
- "oplus" => U(0x2295),
- "or" => U(0x2228),
- "ordf" => U(0xAA),
- "ordm" => U(0xBA),
- "oslash" => U(0xF8),
- "otilde" => U(0xF5),
- "otimes" => U(0x2297),
- "ouml" => U(0xF6),
- "para" => U(0xB6),
- "part" => U(0x2202),
- "permil" => U(0x2030),
- "perp" => U(0x22A5),
- "phi" => U(0x03C6),
- "pi" => U(0x03C0),
- "piv" => U(0x03D6),
- "plusmn" => U(0xB1),
- "pound" => U(0xA3),
- "prime" => U(0x2032),
- "prod" => U(0x220F),
- "prop" => U(0x221D),
- "psi" => U(0x03C8),
- "quot" => U(0x22),
- "QUOT" => U(0x22),
- "rArr" => U(0x21D2),
- "radic" => U(0x221A),
- "rang" => U(0x232A),
- "raquo" => U(0xBB),
- "rarr" => U(0x2192),
- "rceil" => U(0x2309),
- "rdquo" => U(0x201D),
- "real" => U(0x211C),
- "reg" => U(0xAE),
- "REG" => U(0xAE),
- "rfloor" => U(0x230B),
- "rho" => U(0x03C1),
- "rlm" => U(0x200F),
- "rsaquo" => U(0x203A),
- "rsquo" => U(0x2019),
- "sbquo" => U(0x201A),
- "scaron" => U(0x0161),
- "sdot" => U(0x22C5),
- "sect" => U(0xA7),
- "shy" => U(0xAD),
- "sigma" => U(0x03C3),
- "sigmaf" => U(0x03C2),
- "sim" => U(0x223C),
- "spades" => U(0x2660),
- "sub" => U(0x2282),
- "sube" => U(0x2286),
- "sum" => U(0x2211),
- "sup" => U(0x2283),
- "sup1" => U(0xB9),
- "sup2" => U(0xB2),
- "sup3" => U(0xB3),
- "supe" => U(0x2287),
- "szlig" => U(0xDF),
- "tau" => U(0x03C4),
- "there4" => U(0x2234),
- "theta" => U(0x03B8),
- "thetasym" => U(0x03D1),
- "thinsp" => U(0x2009),
- "thorn" => U(0xFE),
- "tilde" => U(0x02DC),
- "times" => U(0xD7),
- "trade" => U(0x2122),
- "uArr" => U(0x21D1),
- "uacute" => U(0xFA),
- "uarr" => U(0x2191),
- "ucirc" => U(0xFB),
- "ugrave" => U(0xF9),
- "uml" => U(0xA8),
- "upsih" => U(0x03D2),
- "upsilon" => U(0x03C5),
- "uuml" => U(0xFC),
- "weierp" => U(0x2118),
- "xi" => U(0x03BE),
- "yacute" => U(0xFD),
- "yen" => U(0xA5),
- "yuml" => U(0xFF),
- "zeta" => U(0x03B6),
- "zwj" => U(0x200D),
- "zwnj" => U(0x200C)
- }
-
- ENCODINGS = %w[
- ansi_x3.4-1968
- iso-ir-6
- ansi_x3.4-1986
- iso_646.irv:1991
- ascii
- iso646-us
- us-ascii
- us
- ibm367
- cp367
- csascii
- ks_c_5601-1987
- korean
- iso-2022-kr
- csiso2022kr
- euc-kr
- iso-2022-jp
- csiso2022jp
- iso-2022-jp-2
- iso-ir-58
- chinese
- csiso58gb231280
- iso_8859-1:1987
- iso-ir-100
- iso_8859-1
- iso-8859-1
- latin1
- l1
- ibm819
- cp819
- csisolatin1
- iso_8859-2:1987
- iso-ir-101
- iso_8859-2
- iso-8859-2
- latin2
- l2
- csisolatin2
- iso_8859-3:1988
- iso-ir-109
- iso_8859-3
- iso-8859-3
- latin3
- l3
- csisolatin3
- iso_8859-4:1988
- iso-ir-110
- iso_8859-4
- iso-8859-4
- latin4
- l4
- csisolatin4
- iso_8859-6:1987
- iso-ir-127
- iso_8859-6
- iso-8859-6
- ecma-114
- asmo-708
- arabic
- csisolatinarabic
- iso_8859-7:1987
- iso-ir-126
- iso_8859-7
- iso-8859-7
- elot_928
- ecma-118
- greek
- greek8
- csisolatingreek
- iso_8859-8:1988
- iso-ir-138
- iso_8859-8
- iso-8859-8
- hebrew
- csisolatinhebrew
- iso_8859-5:1988
- iso-ir-144
- iso_8859-5
- iso-8859-5
- cyrillic
- csisolatincyrillic
- iso_8859-9:1989
- iso-ir-148
- iso_8859-9
- iso-8859-9
- latin5
- l5
- csisolatin5
- iso-8859-10
- iso-ir-157
- l6
- iso_8859-10:1992
- csisolatin6
- latin6
- hp-roman8
- roman8
- r8
- ibm037
- cp037
- csibm037
- ibm424
- cp424
- csibm424
- ibm437
- cp437
- 437
- cspc8codepage437
- ibm500
- cp500
- csibm500
- ibm775
- cp775
- cspc775baltic
- ibm850
- cp850
- 850
- cspc850multilingual
- ibm852
- cp852
- 852
- cspcp852
- ibm855
- cp855
- 855
- csibm855
- ibm857
- cp857
- 857
- csibm857
- ibm860
- cp860
- 860
- csibm860
- ibm861
- cp861
- 861
- cp-is
- csibm861
- ibm862
- cp862
- 862
- cspc862latinhebrew
- ibm863
- cp863
- 863
- csibm863
- ibm864
- cp864
- csibm864
- ibm865
- cp865
- 865
- csibm865
- ibm866
- cp866
- 866
- csibm866
- ibm869
- cp869
- 869
- cp-gr
- csibm869
- ibm1026
- cp1026
- csibm1026
- koi8-r
- cskoi8r
- koi8-u
- big5-hkscs
- ptcp154
- csptcp154
- pt154
- cp154
- utf-7
- utf-16be
- utf-16le
- utf-16
- utf-8
- iso-8859-13
- iso-8859-14
- iso-ir-199
- iso_8859-14:1998
- iso_8859-14
- latin8
- iso-celtic
- l8
- iso-8859-15
- iso_8859-15
- iso-8859-16
- iso-ir-226
- iso_8859-16:2001
- iso_8859-16
- latin10
- l10
- gbk
- cp936
- ms936
- gb18030
- shift_jis
- ms_kanji
- csshiftjis
- euc-jp
- gb2312
- big5
- csbig5
- windows-1250
- windows-1251
- windows-1252
- windows-1253
- windows-1254
- windows-1255
- windows-1256
- windows-1257
- windows-1258
- tis-620
- hz-gb-2312
- ]
-
-end
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/filters.rb b/vendor/plugins/HTML5lib/lib/html5lib/filters.rb
deleted file mode 100644
index 05c3edd4..00000000
--- a/vendor/plugins/HTML5lib/lib/html5lib/filters.rb
+++ /dev/null
@@ -1 +0,0 @@
-require 'html5lib/filters/optionaltags'
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/serializer.rb b/vendor/plugins/HTML5lib/lib/html5lib/serializer.rb
deleted file mode 100644
index cd4c66a6..00000000
--- a/vendor/plugins/HTML5lib/lib/html5lib/serializer.rb
+++ /dev/null
@@ -1,2 +0,0 @@
-require 'html5lib/serializer/htmlserializer'
-require 'html5lib/serializer/xhtmlserializer'
diff --git a/vendor/plugins/HTML5lib/parse.rb b/vendor/plugins/HTML5lib/parse.rb
index 79233712..ba0d9071 100755
--- a/vendor/plugins/HTML5lib/parse.rb
+++ b/vendor/plugins/HTML5lib/parse.rb
@@ -26,15 +26,15 @@ def parse(opts, args)
exit(1)
end
- require 'html5lib/treebuilders'
- treebuilder = HTML5lib::TreeBuilders[opts.treebuilder]
+ require 'html5/treebuilders'
+ treebuilder = HTML5::TreeBuilders[opts.treebuilder]
if opts.output == :xml
- require 'html5lib/liberalxmlparser'
- p = HTML5lib::XHTMLParser.new(:tree=>treebuilder)
+ require 'html5/liberalxmlparser'
+ p = HTML5::XHTMLParser.new(:tree=>treebuilder)
else
- require 'html5lib/html5parser'
- p = HTML5lib::HTMLParser.new(:tree=>treebuilder)
+ require 'html5/html5parser'
+ p = HTML5::HTMLParser.new(:tree=>treebuilder)
end
if opts.parsemethod == :parse
@@ -70,10 +70,10 @@ def printOutput(parser, document, opts)
when :xml
print document
when :html
- require 'html5lib/treewalkers'
- tokens = HTML5lib::TreeWalkers[opts.treebuilder].new(document)
- require 'html5lib/serializer'
- puts HTML5lib::HTMLSerializer.serialize(tokens, opts.serializer)
+ require 'html5/treewalkers'
+ tokens = HTML5::TreeWalkers[opts.treebuilder].new(document)
+ require 'html5/serializer'
+ puts HTML5::HTMLSerializer.serialize(tokens, opts.serializer)
when :hilite
print document.hilite
when :tree
@@ -188,6 +188,10 @@ opts = OptionParser.new do |opts|
options.serializer[:escape_lt_in_attrs] = lt
end
+ opts.on("--[no-]escape-rcdata", "Escape rcdata element values") do |rcdata|
+ options.serializer[:escape_rcdata] = rcdata
+ end
+
opts.separator ""
opts.separator "Other Options:"
diff --git a/vendor/plugins/HTML5lib/testdata/encoding/tests2.dat b/vendor/plugins/HTML5lib/testdata/encoding/tests2.dat
index dd43f85c..dc74859c 100644
--- a/vendor/plugins/HTML5lib/testdata/encoding/tests2.dat
+++ b/vendor/plugins/HTML5lib/testdata/encoding/tests2.dat
@@ -33,7 +33,6 @@ EUC-jp
#encoding
EUC-jp
-
#data
diff --git a/vendor/plugins/HTML5lib/testdata/serializer/core.test b/vendor/plugins/HTML5lib/testdata/serializer/core.test
index fc981c14..d427822a 100644
--- a/vendor/plugins/HTML5lib/testdata/serializer/core.test
+++ b/vendor/plugins/HTML5lib/testdata/serializer/core.test
@@ -92,7 +92,8 @@
{"description": "rcdata",
"input": [["StartTag", "script", {}], ["Characters", "ac&d"]],
- "expected": ["
@@ -1511,6 +1515,7 @@ unexpected EOF
|
|
|
+|
#data
@@ -1807,6 +1812,7 @@ Unexpected EOF
|
|
|
+|
#data
@@ -1928,3 +1934,4 @@ Unexpected EOF
|
|
|
+|
diff --git a/vendor/plugins/HTML5lib/testdata/tree-construction/tests2.dat b/vendor/plugins/HTML5lib/testdata/tree-construction/tests2.dat
index fdf8356a..0b83d94c 100755
--- a/vendor/plugins/HTML5lib/testdata/tree-construction/tests2.dat
+++ b/vendor/plugins/HTML5lib/testdata/tree-construction/tests2.dat
@@ -777,3 +777,4 @@ Unexpected
end tag.
|
|
|
+|
diff --git a/vendor/plugins/HTML5lib/testdata/tree-construction/tests3.dat b/vendor/plugins/HTML5lib/testdata/tree-construction/tests3.dat
index a66effff..b447d300 100644
--- a/vendor/plugins/HTML5lib/testdata/tree-construction/tests3.dat
+++ b/vendor/plugins/HTML5lib/testdata/tree-construction/tests3.dat
@@ -61,7 +61,6 @@ No DOCTYPE
#data
-
foo
#errors
#document
@@ -72,10 +71,22 @@ foo |