Ruby 1.9 Compatibility

Completely removed the html5lib sanitizer.
Fixed the string-handling to work in both
Ruby 1.8.x and 1.9.2. There are still,
inexplicably, two functional tests that
fail. But the rest seems to work quite well.
This commit is contained in:
Jacques Distler 2009-11-30 16:28:18 -06:00
parent 79c8572053
commit a6429f8c22
142 changed files with 519 additions and 843 deletions

View file

@ -0,0 +1,773 @@
#data
<!DOCTYPE HTML>Test
#errors
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| "Test"
#data
<textarea>test</div>test
#errors
Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
Line: 1 Col: 24 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <textarea>
| "test</div>test"
#data
<table><td>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 11 Unexpected table cell start tag (td) in the table body phase.
Line: 1 Col: 11 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
#data
<table><td>test</tbody></table>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 11 Unexpected table cell start tag (td) in the table body phase.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "test"
#data
<frame>test
#errors
Line: 1 Col: 7 Unexpected start tag (frame). Expected DOCTYPE.
Line: 1 Col: 7 Unexpected start tag frame. Ignored.
#document
| <html>
| <head>
| <body>
| "test"
#data
<!DOCTYPE HTML><frameset>test
#errors
Line: 1 Col: 29 Unepxected characters in the frameset phase. Characters ignored.
Line: 1 Col: 29 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE HTML><frameset><!DOCTYPE HTML>
#errors
Line: 1 Col: 40 Unexpected DOCTYPE. Ignored.
Line: 1 Col: 40 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE HTML><font><p><b>test</font>
#errors
Line: 1 Col: 38 End tag (font) violates step 1, paragraph 3 of the adoption agency algorithm.
Line: 1 Col: 38 End tag (font) violates step 1, paragraph 3 of the adoption agency algorithm.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <font>
| <p>
| <font>
| <b>
| "test"
#data
<!DOCTYPE HTML><dt><div><dd>
#errors
Line: 1 Col: 28 Missing end tag (div, dt).
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <dt>
| <div>
| <dd>
#data
<script></x
#errors
Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
Line: 1 Col: 11 Unexpected end of file. Expected end tag (script).
#document
| <html>
| <head>
| <script>
| "</x"
| <body>
#data
<table><plaintext><td>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 18 Unexpected start tag (plaintext) in table context caused voodoo mode.
Line: 1 Col: 21 Unexpected non-space characters in table context caused voodoo mode.
Line: 1 Col: 22 Unexpected non-space characters in table context caused voodoo mode.
Line: 1 Col: 22 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <plaintext>
| "<td>"
| <table>
#data
<plaintext></plaintext>
#errors
Line: 1 Col: 11 Unexpected start tag (plaintext). Expected DOCTYPE.
Line: 1 Col: 23 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <plaintext>
| "</plaintext>"
#data
<!DOCTYPE HTML><table><tr>TEST
#errors
Line: 1 Col: 30 Unexpected non-space characters in table context caused voodoo mode.
Line: 1 Col: 30 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| "TEST"
| <table>
| <tbody>
| <tr>
#data
<!DOCTYPE HTML><body t1=1><body t2=2><body t3=3 t4=4>
#errors
Line: 1 Col: 37 Unexpected start tag (body).
Line: 1 Col: 53 Unexpected start tag (body).
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| t1="1"
| t2="2"
| t3="3"
| t4="4"
#data
</b test
#errors
Line: 1 Col: 8 Unexpected end of file in attribute name.
Line: 1 Col: 8 End tag contains unexpected attributes.
Line: 1 Col: 8 Unexpected end tag (b). Expected DOCTYPE.
Line: 1 Col: 8 Unexpected end tag (b) after the (implied) root element.
#document
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML></b test<b &=&amp>X
#errors
Line: 1 Col: 32 Named entity didn't end with ';'.
Line: 1 Col: 33 End tag contains unexpected attributes.
Line: 1 Col: 33 Unexpected end tag (b) after the (implied) root element.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| "X"
#data
<!doctypehtml><scrIPt type=text/x-foobar;baz>X</SCRipt
#errors
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
Line: 1 Col: 54 Unexpected end of file in the tag name.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <script>
| type="text/x-foobar;baz"
| "X"
| <body>
#data
&
#errors
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&"
#data
&#
#errors
Line: 1 Col: 1 Numeric entity expected. Got end of file instead.
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&#"
#data
&#X
#errors
Line: 1 Col: 3 Numeric entity expected but none found.
Line: 1 Col: 3 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&#X"
#data
&#x
#errors
Line: 1 Col: 3 Numeric entity expected but none found.
Line: 1 Col: 3 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&#x"
#data
&#45
#errors
Line: 1 Col: 4 Numeric entity didn't end with ';'.
Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "-"
#data
&x-test
#errors
Line: 1 Col: 1 Named entity expected. Got none.
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&x-test"
#data
<!doctypehtml><p><li>
#errors
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <li>
#data
<!doctypeHTML><p><dt>
#errors
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <p>
| <dt>
#data
<!doctypehtmL><p><dd>
#errors
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
#document
| <!DOCTYPE htmL>
| <html>
| <head>
| <body>
| <p>
| <dd>
#data
<!doctypehtml><p><form>
#errors
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
Line: 1 Col: 23 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <form>
#data
<!DOCTYPE HTML><p><b><i><u></p> <p>X
#errors
Line: 1 Col: 31 Unexpected end tag (p). Ignored.
Line: 1 Col: 36 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <p>
| <b>
| <i>
| <u>
| <b>
| <i>
| <u>
| " "
| <p>
| "X"
#data
<!DOCTYPE HTML><p></P>X
#errors
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <p>
| "X"
#data
&AMP
#errors
Line: 1 Col: 4 Named entity didn't end with ';'.
Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&"
#data
&AMp;
#errors
Line: 1 Col: 1 Named entity expected. Got none.
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&AMp;"
#data
<!DOCTYPE HTML><html><head></head><body><thisISasillyTESTelementNameToMakeSureCrazyTagNamesArePARSEDcorrectLY>
#errors
Line: 1 Col: 110 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <thisisasillytestelementnametomakesurecrazytagnamesareparsedcorrectly>
#data
<!DOCTYPE HTML>X</body>X
#errors
Line: 1 Col: 24 Unexpected non-space characters in the after body phase.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| "XX"
#data
<!DOCTYPE HTML><!-- X
#errors
Line: 1 Col: 21 Unexpected end of file in comment.
#document
| <!DOCTYPE HTML>
| <!-- X -->
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML><table><caption>test TEST</caption><td>test
#errors
Line: 1 Col: 54 Unexpected table cell start tag (td) in the table body phase.
Line: 1 Col: 58 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <table>
| <caption>
| "test TEST"
| <tbody>
| <tr>
| <td>
| "test"
#data
<!DOCTYPE HTML><select><option><optgroup>
#errors
Line: 1 Col: 41 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <select>
| <option>
| <optgroup>
#data
<!DOCTYPE HTML><select><optgroup><option></optgroup><option><select><option>
#errors
Line: 1 Col: 68 Unexpected select start tag in the select phase implies select start tag.
Line: 1 Col: 76 Unexpected start tag option. Ignored.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <select>
| <optgroup>
| <option>
| <option>
#data
<!DOCTYPE HTML><select><optgroup><option><optgroup>
#errors
Line: 1 Col: 51 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <select>
| <optgroup>
| <option>
| <optgroup>
#data
<!DOCTYPE HTML><font><input><input></font>
#errors
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <font>
| <input>
| <input>
#data
<!DOCTYPE HTML><!-- XXX - XXX -->
#errors
#document
| <!DOCTYPE HTML>
| <!-- XXX - XXX -->
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML><!-- XXX - XXX
#errors
Line: 1 Col: 29 Unexpected end of file in comment (-)
#document
| <!DOCTYPE HTML>
| <!-- XXX - XXX -->
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML><!-- XXX - XXX - XXX -->
#errors
#document
| <!DOCTYPE HTML>
| <!-- XXX - XXX - XXX -->
| <html>
| <head>
| <body>
#data
<isindex test=x name=x>
#errors
Line: 1 Col: 23 Unexpected start tag (isindex). Expected DOCTYPE.
Line: 1 Col: 23 Unexpected start tag isindex. Don't use it!
#document
| <html>
| <head>
| <body>
| <form>
| <hr>
| <p>
| <label>
| "This is a searchable index. Insert your search keywords here: "
| <input>
| name="isindex"
| test="x"
| <hr>
#data
test
test
#errors
Line: 2 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "test
test"
#data
<p><b><i><u></p>
<p>X
#errors
Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE.
Line: 1 Col: 16 Unexpected end tag (p). Ignored.
Line: 2 Col: 4 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <p>
| <b>
| <i>
| <u>
| <b>
| <i>
| <u>
| "
"
| <p>
| "X"
#data
<!DOCTYPE HTML><body><title>test</body></title>
#errors
Line: 1 Col: 28 Unexpected start tag (title) that can be in head. Moved.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <title>
| "test</body>"
| <body>
#data
<!DOCTYPE HTML><body><title>X</title><meta name=z><link rel=foo><style>
x { content:"</style" } </style>
#errors
Line: 1 Col: 28 Unexpected start tag (title) that can be in head. Moved.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <title>
| "X"
| <body>
| <meta>
| name="z"
| <link>
| rel="foo"
| <style>
| "
x { content:"</style" } "
#data
<!DOCTYPE HTML><select><optgroup></optgroup></select>
#errors
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <select>
| <optgroup>
#data
#errors
Line: 2 Col: 1 Unexpected End of file. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML> <html>
#errors
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML><script>
</script> <title>x</title> </head>
#errors
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <script>
| "
"
| " "
| <title>
| "x"
| " "
| <body>
#data
<!DOCTYPE HTML><html><body><html id=x>
#errors
Line: 1 Col: 38 html needs to be the first start tag.
#document
| <!DOCTYPE HTML>
| <html>
| id="x"
| <head>
| <body>
#data
<!DOCTYPE HTML>X</body><html id="x">
#errors
Line: 1 Col: 36 Unexpected start tag token (html) in the after body phase.
Line: 1 Col: 36 html needs to be the first start tag.
#document
| <!DOCTYPE HTML>
| <html>
| id="x"
| <head>
| <body>
| "X"
#data
<!DOCTYPE HTML><head><html id=x>
#errors
Line: 1 Col: 32 html needs to be the first start tag.
#document
| <!DOCTYPE HTML>
| <html>
| id="x"
| <head>
| <body>
#data
<!DOCTYPE HTML>X</html>X
#errors
Line: 1 Col: 24 Unexpected non-space characters. Expected end of file.
Line: 1 Col: 24 Unexpected non-space characters in the after body phase.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| "XX"
#data
<!DOCTYPE HTML>X</html>
#errors
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| "X "
#data
<!DOCTYPE HTML>X</html><p>X
#errors
Line: 1 Col: 26 Unexpected start tag (p). Expected end of file.
Line: 1 Col: 26 Unexpected start tag token (p) in the after body phase.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| "X"
| <p>
| "X"
#data
<!DOCTYPE HTML>X<p/x/y/z>
#errors
Line: 1 Col: 19 Solidus (/) incorrectly placed in tag.
Line: 1 Col: 21 Solidus (/) incorrectly placed in tag.
Line: 1 Col: 23 Solidus (/) incorrectly placed in tag.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| "X"
| <p>
| x=""
| y=""
| z=""
#data
<!DOCTYPE HTML><!--x--
#errors
Line: 1 Col: 22 Unexpected end of file in comment (--).
#document
| <!DOCTYPE HTML>
| <!-- x -->
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML><table><tr><td></p></table>
#errors
Line: 1 Col: 34 Unexpected end tag (p). Ignored.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <p>