Minor S5 tweaks and Sync with Latest HTML5lib

This commit is contained in:
Jacques Distler 2007-08-30 12:19:10 -05:00
parent dbed460843
commit 81d3cdc8e4
81 changed files with 9887 additions and 1687 deletions

View file

@ -161,23 +161,24 @@ module HTML5
]
BOOLEAN_ATTRIBUTES = {
:global => %w[irrelevant],
'style' => %w[scoped],
'img' => %w[ismap],
'audio' => %w[autoplay controls],
'video' => %w[autoplay controls],
'script' => %w[defer async],
'details' => %w[open],
:global => %w[irrelevant],
'style' => %w[scoped],
'img' => %w[ismap],
'audio' => %w[autoplay controls],
'video' => %w[autoplay controls],
'script' => %w[defer async],
'details' => %w[open],
'datagrid' => %w[multiple disabled],
'command' => %w[hidden disabled checked default],
'menu' => %w[autosubmit],
'command' => %w[hidden disabled checked default],
'menu' => %w[autosubmit],
'fieldset' => %w[disabled readonly],
'option' => %w[disabled readonly selected],
'option' => %w[disabled readonly selected],
'optgroup' => %w[disabled readonly],
'button' => %w[disabled autofocus],
'input' => %w[disabled readonly required autofocus checked ismap],
'select' => %w[disabled readonly autofocus multiple],
'output' => %w[disabled readonly]
'button' => %w[disabled autofocus],
'input' => %w[disabled readonly required autofocus checked ismap],
'select' => %w[disabled readonly autofocus multiple],
'output' => %w[disabled readonly]
}
# entitiesWindows1252 has to be _ordered_ and needs to have an index.
@ -227,372 +228,372 @@ module HTML5
# print ' ENTITIES = {\n ' + ',\n '.join(list) + '\n }'
ENTITIES = {
'AElig' => "\xc3\x86",
'AElig;' => "\xc3\x86",
'AMP' => '&',
'AMP;' => '&',
'Aacute' => "\xc3\x81",
'Aacute;' => "\xc3\x81",
'Acirc' => "\xc3\x82",
'Acirc;' => "\xc3\x82",
'Agrave' => "\xc3\x80",
'Agrave;' => "\xc3\x80",
'Alpha;' => "\xce\x91",
'Aring' => "\xc3\x85",
'Aring;' => "\xc3\x85",
'Atilde' => "\xc3\x83",
'Atilde;' => "\xc3\x83",
'Auml' => "\xc3\x84",
'Auml;' => "\xc3\x84",
'Beta;' => "\xce\x92",
'COPY' => "\xc2\xa9",
'COPY;' => "\xc2\xa9",
'Ccedil' => "\xc3\x87",
'Ccedil;' => "\xc3\x87",
'Chi;' => "\xce\xa7",
'Dagger;' => "\xe2\x80\xa1",
'Delta;' => "\xce\x94",
'ETH' => "\xc3\x90",
'ETH;' => "\xc3\x90",
'Eacute' => "\xc3\x89",
'Eacute;' => "\xc3\x89",
'Ecirc' => "\xc3\x8a",
'Ecirc;' => "\xc3\x8a",
'Egrave' => "\xc3\x88",
'Egrave;' => "\xc3\x88",
'Epsilon;' => "\xce\x95",
'Eta;' => "\xce\x97",
'Euml' => "\xc3\x8b",
'Euml;' => "\xc3\x8b",
'GT' => '>',
'GT;' => '>',
'Gamma;' => "\xce\x93",
'Iacute' => "\xc3\x8d",
'Iacute;' => "\xc3\x8d",
'Icirc' => "\xc3\x8e",
'Icirc;' => "\xc3\x8e",
'Igrave' => "\xc3\x8c",
'Igrave;' => "\xc3\x8c",
'Iota;' => "\xce\x99",
'Iuml' => "\xc3\x8f",
'Iuml;' => "\xc3\x8f",
'Kappa;' => "\xce\x9a",
'LT' => '<',
'LT;' => '<',
'Lambda;' => "\xce\x9b",
'Mu;' => "\xce\x9c",
'Ntilde' => "\xc3\x91",
'Ntilde;' => "\xc3\x91",
'Nu;' => "\xce\x9d",
'OElig;' => "\xc5\x92",
'Oacute' => "\xc3\x93",
'Oacute;' => "\xc3\x93",
'Ocirc' => "\xc3\x94",
'Ocirc;' => "\xc3\x94",
'Ograve' => "\xc3\x92",
'Ograve;' => "\xc3\x92",
'Omega;' => "\xce\xa9",
'Omicron;' => "\xce\x9f",
'Oslash' => "\xc3\x98",
'Oslash;' => "\xc3\x98",
'Otilde' => "\xc3\x95",
'Otilde;' => "\xc3\x95",
'Ouml' => "\xc3\x96",
'Ouml;' => "\xc3\x96",
'Phi;' => "\xce\xa6",
'Pi;' => "\xce\xa0",
'Prime;' => "\xe2\x80\xb3",
'Psi;' => "\xce\xa8",
'QUOT' => '"',
'QUOT;' => '"',
'REG' => "\xc2\xae",
'REG;' => "\xc2\xae",
'Rho;' => "\xce\xa1",
'Scaron;' => "\xc5\xa0",
'Sigma;' => "\xce\xa3",
'THORN' => "\xc3\x9e",
'THORN;' => "\xc3\x9e",
'TRADE;' => "\xe2\x84\xa2",
'Tau;' => "\xce\xa4",
'Theta;' => "\xce\x98",
'Uacute' => "\xc3\x9a",
'Uacute;' => "\xc3\x9a",
'Ucirc' => "\xc3\x9b",
'Ucirc;' => "\xc3\x9b",
'Ugrave' => "\xc3\x99",
'Ugrave;' => "\xc3\x99",
'Upsilon;' => "\xce\xa5",
'Uuml' => "\xc3\x9c",
'Uuml;' => "\xc3\x9c",
'Xi;' => "\xce\x9e",
'Yacute' => "\xc3\x9d",
'Yacute;' => "\xc3\x9d",
'Yuml;' => "\xc5\xb8",
'Zeta;' => "\xce\x96",
'aacute' => "\xc3\xa1",
'aacute;' => "\xc3\xa1",
'acirc' => "\xc3\xa2",
'acirc;' => "\xc3\xa2",
'acute' => "\xc2\xb4",
'acute;' => "\xc2\xb4",
'aelig' => "\xc3\xa6",
'aelig;' => "\xc3\xa6",
'agrave' => "\xc3\xa0",
'agrave;' => "\xc3\xa0",
'alefsym;' => "\xe2\x84\xb5",
'alpha;' => "\xce\xb1",
'amp' => '&',
'amp;' => '&',
'and;' => "\xe2\x88\xa7",
'ang;' => "\xe2\x88\xa0",
'apos;' => "'",
'aring' => "\xc3\xa5",
'aring;' => "\xc3\xa5",
'asymp;' => "\xe2\x89\x88",
'atilde' => "\xc3\xa3",
'atilde;' => "\xc3\xa3",
'auml' => "\xc3\xa4",
'auml;' => "\xc3\xa4",
'bdquo;' => "\xe2\x80\x9e",
'beta;' => "\xce\xb2",
'brvbar' => "\xc2\xa6",
'brvbar;' => "\xc2\xa6",
'bull;' => "\xe2\x80\xa2",
'cap;' => "\xe2\x88\xa9",
'ccedil' => "\xc3\xa7",
'ccedil;' => "\xc3\xa7",
'cedil' => "\xc2\xb8",
'cedil;' => "\xc2\xb8",
'cent' => "\xc2\xa2",
'cent;' => "\xc2\xa2",
'chi;' => "\xcf\x87",
'circ;' => "\xcb\x86",
'clubs;' => "\xe2\x99\xa3",
'cong;' => "\xe2\x89\x85",
'copy' => "\xc2\xa9",
'copy;' => "\xc2\xa9",
'crarr;' => "\xe2\x86\xb5",
'cup;' => "\xe2\x88\xaa",
'curren' => "\xc2\xa4",
'curren;' => "\xc2\xa4",
'dArr;' => "\xe2\x87\x93",
'dagger;' => "\xe2\x80\xa0",
'darr;' => "\xe2\x86\x93",
'deg' => "\xc2\xb0",
'deg;' => "\xc2\xb0",
'delta;' => "\xce\xb4",
'diams;' => "\xe2\x99\xa6",
'divide' => "\xc3\xb7",
'divide;' => "\xc3\xb7",
'eacute' => "\xc3\xa9",
'eacute;' => "\xc3\xa9",
'ecirc' => "\xc3\xaa",
'ecirc;' => "\xc3\xaa",
'egrave' => "\xc3\xa8",
'egrave;' => "\xc3\xa8",
'empty;' => "\xe2\x88\x85",
'emsp;' => "\xe2\x80\x83",
'ensp;' => "\xe2\x80\x82",
'epsilon;' => "\xce\xb5",
'equiv;' => "\xe2\x89\xa1",
'eta;' => "\xce\xb7",
'eth' => "\xc3\xb0",
'eth;' => "\xc3\xb0",
'euml' => "\xc3\xab",
'euml;' => "\xc3\xab",
'euro;' => "\xe2\x82\xac",
'exist;' => "\xe2\x88\x83",
'fnof;' => "\xc6\x92",
'forall;' => "\xe2\x88\x80",
'frac12' => "\xc2\xbd",
'frac12;' => "\xc2\xbd",
'frac14' => "\xc2\xbc",
'frac14;' => "\xc2\xbc",
'frac34' => "\xc2\xbe",
'frac34;' => "\xc2\xbe",
'frasl;' => "\xe2\x81\x84",
'gamma;' => "\xce\xb3",
'ge;' => "\xe2\x89\xa5",
'gt' => '>',
'gt;' => '>',
'hArr;' => "\xe2\x87\x94",
'harr;' => "\xe2\x86\x94",
'hearts;' => "\xe2\x99\xa5",
'hellip;' => "\xe2\x80\xa6",
'iacute' => "\xc3\xad",
'iacute;' => "\xc3\xad",
'icirc' => "\xc3\xae",
'icirc;' => "\xc3\xae",
'iexcl' => "\xc2\xa1",
'iexcl;' => "\xc2\xa1",
'igrave' => "\xc3\xac",
'igrave;' => "\xc3\xac",
'image;' => "\xe2\x84\x91",
'infin;' => "\xe2\x88\x9e",
'int;' => "\xe2\x88\xab",
'iota;' => "\xce\xb9",
'iquest' => "\xc2\xbf",
'iquest;' => "\xc2\xbf",
'isin;' => "\xe2\x88\x88",
'iuml' => "\xc3\xaf",
'iuml;' => "\xc3\xaf",
'kappa;' => "\xce\xba",
'lArr;' => "\xe2\x87\x90",
'lambda;' => "\xce\xbb",
'lang;' => "\xe3\x80\x88",
'laquo' => "\xc2\xab",
'laquo;' => "\xc2\xab",
'larr;' => "\xe2\x86\x90",
'lceil;' => "\xe2\x8c\x88",
'ldquo;' => "\xe2\x80\x9c",
'le;' => "\xe2\x89\xa4",
'lfloor;' => "\xe2\x8c\x8a",
'lowast;' => "\xe2\x88\x97",
'loz;' => "\xe2\x97\x8a",
'lrm;' => "\xe2\x80\x8e",
'lsaquo;' => "\xe2\x80\xb9",
'lsquo;' => "\xe2\x80\x98",
'lt' => '<',
'lt;' => '<',
'macr' => "\xc2\xaf",
'macr;' => "\xc2\xaf",
'mdash;' => "\xe2\x80\x94",
'micro' => "\xc2\xb5",
'micro;' => "\xc2\xb5",
'middot' => "\xc2\xb7",
'middot;' => "\xc2\xb7",
'minus;' => "\xe2\x88\x92",
'mu;' => "\xce\xbc",
'nabla;' => "\xe2\x88\x87",
'nbsp' => "\xc2\xa0",
'nbsp;' => "\xc2\xa0",
'ndash;' => "\xe2\x80\x93",
'ne;' => "\xe2\x89\xa0",
'ni;' => "\xe2\x88\x8b",
'not' => "\xc2\xac",
'not;' => "\xc2\xac",
'notin;' => "\xe2\x88\x89",
'nsub;' => "\xe2\x8a\x84",
'ntilde' => "\xc3\xb1",
'ntilde;' => "\xc3\xb1",
'nu;' => "\xce\xbd",
'oacute' => "\xc3\xb3",
'oacute;' => "\xc3\xb3",
'ocirc' => "\xc3\xb4",
'ocirc;' => "\xc3\xb4",
'oelig;' => "\xc5\x93",
'ograve' => "\xc3\xb2",
'ograve;' => "\xc3\xb2",
'oline;' => "\xe2\x80\xbe",
'omega;' => "\xcf\x89",
'omicron;' => "\xce\xbf",
'oplus;' => "\xe2\x8a\x95",
'or;' => "\xe2\x88\xa8",
'ordf' => "\xc2\xaa",
'ordf;' => "\xc2\xaa",
'ordm' => "\xc2\xba",
'ordm;' => "\xc2\xba",
'oslash' => "\xc3\xb8",
'oslash;' => "\xc3\xb8",
'otilde' => "\xc3\xb5",
'otilde;' => "\xc3\xb5",
'otimes;' => "\xe2\x8a\x97",
'ouml' => "\xc3\xb6",
'ouml;' => "\xc3\xb6",
'para' => "\xc2\xb6",
'para;' => "\xc2\xb6",
'part;' => "\xe2\x88\x82",
'permil;' => "\xe2\x80\xb0",
'perp;' => "\xe2\x8a\xa5",
'phi;' => "\xcf\x86",
'pi;' => "\xcf\x80",
'piv;' => "\xcf\x96",
'plusmn' => "\xc2\xb1",
'plusmn;' => "\xc2\xb1",
'pound' => "\xc2\xa3",
'pound;' => "\xc2\xa3",
'prime;' => "\xe2\x80\xb2",
'prod;' => "\xe2\x88\x8f",
'prop;' => "\xe2\x88\x9d",
'psi;' => "\xcf\x88",
'quot' => '"',
'quot;' => '"',
'rArr;' => "\xe2\x87\x92",
'radic;' => "\xe2\x88\x9a",
'rang;' => "\xe3\x80\x89",
'raquo' => "\xc2\xbb",
'raquo;' => "\xc2\xbb",
'rarr;' => "\xe2\x86\x92",
'rceil;' => "\xe2\x8c\x89",
'rdquo;' => "\xe2\x80\x9d",
'real;' => "\xe2\x84\x9c",
'reg' => "\xc2\xae",
'reg;' => "\xc2\xae",
'rfloor;' => "\xe2\x8c\x8b",
'rho;' => "\xcf\x81",
'rlm;' => "\xe2\x80\x8f",
'rsaquo;' => "\xe2\x80\xba",
'rsquo;' => "\xe2\x80\x99",
'sbquo;' => "\xe2\x80\x9a",
'scaron;' => "\xc5\xa1",
'sdot;' => "\xe2\x8b\x85",
'sect' => "\xc2\xa7",
'sect;' => "\xc2\xa7",
'shy' => "\xc2\xad",
'shy;' => "\xc2\xad",
'sigma;' => "\xcf\x83",
'sigmaf;' => "\xcf\x82",
'sim;' => "\xe2\x88\xbc",
'spades;' => "\xe2\x99\xa0",
'sub;' => "\xe2\x8a\x82",
'sube;' => "\xe2\x8a\x86",
'sum;' => "\xe2\x88\x91",
'sup1' => "\xc2\xb9",
'sup1;' => "\xc2\xb9",
'sup2' => "\xc2\xb2",
'sup2;' => "\xc2\xb2",
'sup3' => "\xc2\xb3",
'sup3;' => "\xc2\xb3",
'sup;' => "\xe2\x8a\x83",
'supe;' => "\xe2\x8a\x87",
'szlig' => "\xc3\x9f",
'szlig;' => "\xc3\x9f",
'tau;' => "\xcf\x84",
'there4;' => "\xe2\x88\xb4",
'theta;' => "\xce\xb8",
'AElig' => "\xc3\x86",
'AElig;' => "\xc3\x86",
'AMP' => '&',
'AMP;' => '&',
'Aacute' => "\xc3\x81",
'Aacute;' => "\xc3\x81",
'Acirc' => "\xc3\x82",
'Acirc;' => "\xc3\x82",
'Agrave' => "\xc3\x80",
'Agrave;' => "\xc3\x80",
'Alpha;' => "\xce\x91",
'Aring' => "\xc3\x85",
'Aring;' => "\xc3\x85",
'Atilde' => "\xc3\x83",
'Atilde;' => "\xc3\x83",
'Auml' => "\xc3\x84",
'Auml;' => "\xc3\x84",
'Beta;' => "\xce\x92",
'COPY' => "\xc2\xa9",
'COPY;' => "\xc2\xa9",
'Ccedil' => "\xc3\x87",
'Ccedil;' => "\xc3\x87",
'Chi;' => "\xce\xa7",
'Dagger;' => "\xe2\x80\xa1",
'Delta;' => "\xce\x94",
'ETH' => "\xc3\x90",
'ETH;' => "\xc3\x90",
'Eacute' => "\xc3\x89",
'Eacute;' => "\xc3\x89",
'Ecirc' => "\xc3\x8a",
'Ecirc;' => "\xc3\x8a",
'Egrave' => "\xc3\x88",
'Egrave;' => "\xc3\x88",
'Epsilon;' => "\xce\x95",
'Eta;' => "\xce\x97",
'Euml' => "\xc3\x8b",
'Euml;' => "\xc3\x8b",
'GT' => '>',
'GT;' => '>',
'Gamma;' => "\xce\x93",
'Iacute' => "\xc3\x8d",
'Iacute;' => "\xc3\x8d",
'Icirc' => "\xc3\x8e",
'Icirc;' => "\xc3\x8e",
'Igrave' => "\xc3\x8c",
'Igrave;' => "\xc3\x8c",
'Iota;' => "\xce\x99",
'Iuml' => "\xc3\x8f",
'Iuml;' => "\xc3\x8f",
'Kappa;' => "\xce\x9a",
'LT' => '<',
'LT;' => '<',
'Lambda;' => "\xce\x9b",
'Mu;' => "\xce\x9c",
'Ntilde' => "\xc3\x91",
'Ntilde;' => "\xc3\x91",
'Nu;' => "\xce\x9d",
'OElig;' => "\xc5\x92",
'Oacute' => "\xc3\x93",
'Oacute;' => "\xc3\x93",
'Ocirc' => "\xc3\x94",
'Ocirc;' => "\xc3\x94",
'Ograve' => "\xc3\x92",
'Ograve;' => "\xc3\x92",
'Omega;' => "\xce\xa9",
'Omicron;' => "\xce\x9f",
'Oslash' => "\xc3\x98",
'Oslash;' => "\xc3\x98",
'Otilde' => "\xc3\x95",
'Otilde;' => "\xc3\x95",
'Ouml' => "\xc3\x96",
'Ouml;' => "\xc3\x96",
'Phi;' => "\xce\xa6",
'Pi;' => "\xce\xa0",
'Prime;' => "\xe2\x80\xb3",
'Psi;' => "\xce\xa8",
'QUOT' => '"',
'QUOT;' => '"',
'REG' => "\xc2\xae",
'REG;' => "\xc2\xae",
'Rho;' => "\xce\xa1",
'Scaron;' => "\xc5\xa0",
'Sigma;' => "\xce\xa3",
'THORN' => "\xc3\x9e",
'THORN;' => "\xc3\x9e",
'TRADE;' => "\xe2\x84\xa2",
'Tau;' => "\xce\xa4",
'Theta;' => "\xce\x98",
'Uacute' => "\xc3\x9a",
'Uacute;' => "\xc3\x9a",
'Ucirc' => "\xc3\x9b",
'Ucirc;' => "\xc3\x9b",
'Ugrave' => "\xc3\x99",
'Ugrave;' => "\xc3\x99",
'Upsilon;' => "\xce\xa5",
'Uuml' => "\xc3\x9c",
'Uuml;' => "\xc3\x9c",
'Xi;' => "\xce\x9e",
'Yacute' => "\xc3\x9d",
'Yacute;' => "\xc3\x9d",
'Yuml;' => "\xc5\xb8",
'Zeta;' => "\xce\x96",
'aacute' => "\xc3\xa1",
'aacute;' => "\xc3\xa1",
'acirc' => "\xc3\xa2",
'acirc;' => "\xc3\xa2",
'acute' => "\xc2\xb4",
'acute;' => "\xc2\xb4",
'aelig' => "\xc3\xa6",
'aelig;' => "\xc3\xa6",
'agrave' => "\xc3\xa0",
'agrave;' => "\xc3\xa0",
'alefsym;' => "\xe2\x84\xb5",
'alpha;' => "\xce\xb1",
'amp' => '&',
'amp;' => '&',
'and;' => "\xe2\x88\xa7",
'ang;' => "\xe2\x88\xa0",
'apos;' => "'",
'aring' => "\xc3\xa5",
'aring;' => "\xc3\xa5",
'asymp;' => "\xe2\x89\x88",
'atilde' => "\xc3\xa3",
'atilde;' => "\xc3\xa3",
'auml' => "\xc3\xa4",
'auml;' => "\xc3\xa4",
'bdquo;' => "\xe2\x80\x9e",
'beta;' => "\xce\xb2",
'brvbar' => "\xc2\xa6",
'brvbar;' => "\xc2\xa6",
'bull;' => "\xe2\x80\xa2",
'cap;' => "\xe2\x88\xa9",
'ccedil' => "\xc3\xa7",
'ccedil;' => "\xc3\xa7",
'cedil' => "\xc2\xb8",
'cedil;' => "\xc2\xb8",
'cent' => "\xc2\xa2",
'cent;' => "\xc2\xa2",
'chi;' => "\xcf\x87",
'circ;' => "\xcb\x86",
'clubs;' => "\xe2\x99\xa3",
'cong;' => "\xe2\x89\x85",
'copy' => "\xc2\xa9",
'copy;' => "\xc2\xa9",
'crarr;' => "\xe2\x86\xb5",
'cup;' => "\xe2\x88\xaa",
'curren' => "\xc2\xa4",
'curren;' => "\xc2\xa4",
'dArr;' => "\xe2\x87\x93",
'dagger;' => "\xe2\x80\xa0",
'darr;' => "\xe2\x86\x93",
'deg' => "\xc2\xb0",
'deg;' => "\xc2\xb0",
'delta;' => "\xce\xb4",
'diams;' => "\xe2\x99\xa6",
'divide' => "\xc3\xb7",
'divide;' => "\xc3\xb7",
'eacute' => "\xc3\xa9",
'eacute;' => "\xc3\xa9",
'ecirc' => "\xc3\xaa",
'ecirc;' => "\xc3\xaa",
'egrave' => "\xc3\xa8",
'egrave;' => "\xc3\xa8",
'empty;' => "\xe2\x88\x85",
'emsp;' => "\xe2\x80\x83",
'ensp;' => "\xe2\x80\x82",
'epsilon;' => "\xce\xb5",
'equiv;' => "\xe2\x89\xa1",
'eta;' => "\xce\xb7",
'eth' => "\xc3\xb0",
'eth;' => "\xc3\xb0",
'euml' => "\xc3\xab",
'euml;' => "\xc3\xab",
'euro;' => "\xe2\x82\xac",
'exist;' => "\xe2\x88\x83",
'fnof;' => "\xc6\x92",
'forall;' => "\xe2\x88\x80",
'frac12' => "\xc2\xbd",
'frac12;' => "\xc2\xbd",
'frac14' => "\xc2\xbc",
'frac14;' => "\xc2\xbc",
'frac34' => "\xc2\xbe",
'frac34;' => "\xc2\xbe",
'frasl;' => "\xe2\x81\x84",
'gamma;' => "\xce\xb3",
'ge;' => "\xe2\x89\xa5",
'gt' => '>',
'gt;' => '>',
'hArr;' => "\xe2\x87\x94",
'harr;' => "\xe2\x86\x94",
'hearts;' => "\xe2\x99\xa5",
'hellip;' => "\xe2\x80\xa6",
'iacute' => "\xc3\xad",
'iacute;' => "\xc3\xad",
'icirc' => "\xc3\xae",
'icirc;' => "\xc3\xae",
'iexcl' => "\xc2\xa1",
'iexcl;' => "\xc2\xa1",
'igrave' => "\xc3\xac",
'igrave;' => "\xc3\xac",
'image;' => "\xe2\x84\x91",
'infin;' => "\xe2\x88\x9e",
'int;' => "\xe2\x88\xab",
'iota;' => "\xce\xb9",
'iquest' => "\xc2\xbf",
'iquest;' => "\xc2\xbf",
'isin;' => "\xe2\x88\x88",
'iuml' => "\xc3\xaf",
'iuml;' => "\xc3\xaf",
'kappa;' => "\xce\xba",
'lArr;' => "\xe2\x87\x90",
'lambda;' => "\xce\xbb",
'lang;' => "\xe3\x80\x88",
'laquo' => "\xc2\xab",
'laquo;' => "\xc2\xab",
'larr;' => "\xe2\x86\x90",
'lceil;' => "\xe2\x8c\x88",
'ldquo;' => "\xe2\x80\x9c",
'le;' => "\xe2\x89\xa4",
'lfloor;' => "\xe2\x8c\x8a",
'lowast;' => "\xe2\x88\x97",
'loz;' => "\xe2\x97\x8a",
'lrm;' => "\xe2\x80\x8e",
'lsaquo;' => "\xe2\x80\xb9",
'lsquo;' => "\xe2\x80\x98",
'lt' => '<',
'lt;' => '<',
'macr' => "\xc2\xaf",
'macr;' => "\xc2\xaf",
'mdash;' => "\xe2\x80\x94",
'micro' => "\xc2\xb5",
'micro;' => "\xc2\xb5",
'middot' => "\xc2\xb7",
'middot;' => "\xc2\xb7",
'minus;' => "\xe2\x88\x92",
'mu;' => "\xce\xbc",
'nabla;' => "\xe2\x88\x87",
'nbsp' => "\xc2\xa0",
'nbsp;' => "\xc2\xa0",
'ndash;' => "\xe2\x80\x93",
'ne;' => "\xe2\x89\xa0",
'ni;' => "\xe2\x88\x8b",
'not' => "\xc2\xac",
'not;' => "\xc2\xac",
'notin;' => "\xe2\x88\x89",
'nsub;' => "\xe2\x8a\x84",
'ntilde' => "\xc3\xb1",
'ntilde;' => "\xc3\xb1",
'nu;' => "\xce\xbd",
'oacute' => "\xc3\xb3",
'oacute;' => "\xc3\xb3",
'ocirc' => "\xc3\xb4",
'ocirc;' => "\xc3\xb4",
'oelig;' => "\xc5\x93",
'ograve' => "\xc3\xb2",
'ograve;' => "\xc3\xb2",
'oline;' => "\xe2\x80\xbe",
'omega;' => "\xcf\x89",
'omicron;' => "\xce\xbf",
'oplus;' => "\xe2\x8a\x95",
'or;' => "\xe2\x88\xa8",
'ordf' => "\xc2\xaa",
'ordf;' => "\xc2\xaa",
'ordm' => "\xc2\xba",
'ordm;' => "\xc2\xba",
'oslash' => "\xc3\xb8",
'oslash;' => "\xc3\xb8",
'otilde' => "\xc3\xb5",
'otilde;' => "\xc3\xb5",
'otimes;' => "\xe2\x8a\x97",
'ouml' => "\xc3\xb6",
'ouml;' => "\xc3\xb6",
'para' => "\xc2\xb6",
'para;' => "\xc2\xb6",
'part;' => "\xe2\x88\x82",
'permil;' => "\xe2\x80\xb0",
'perp;' => "\xe2\x8a\xa5",
'phi;' => "\xcf\x86",
'pi;' => "\xcf\x80",
'piv;' => "\xcf\x96",
'plusmn' => "\xc2\xb1",
'plusmn;' => "\xc2\xb1",
'pound' => "\xc2\xa3",
'pound;' => "\xc2\xa3",
'prime;' => "\xe2\x80\xb2",
'prod;' => "\xe2\x88\x8f",
'prop;' => "\xe2\x88\x9d",
'psi;' => "\xcf\x88",
'quot' => '"',
'quot;' => '"',
'rArr;' => "\xe2\x87\x92",
'radic;' => "\xe2\x88\x9a",
'rang;' => "\xe3\x80\x89",
'raquo' => "\xc2\xbb",
'raquo;' => "\xc2\xbb",
'rarr;' => "\xe2\x86\x92",
'rceil;' => "\xe2\x8c\x89",
'rdquo;' => "\xe2\x80\x9d",
'real;' => "\xe2\x84\x9c",
'reg' => "\xc2\xae",
'reg;' => "\xc2\xae",
'rfloor;' => "\xe2\x8c\x8b",
'rho;' => "\xcf\x81",
'rlm;' => "\xe2\x80\x8f",
'rsaquo;' => "\xe2\x80\xba",
'rsquo;' => "\xe2\x80\x99",
'sbquo;' => "\xe2\x80\x9a",
'scaron;' => "\xc5\xa1",
'sdot;' => "\xe2\x8b\x85",
'sect' => "\xc2\xa7",
'sect;' => "\xc2\xa7",
'shy' => "\xc2\xad",
'shy;' => "\xc2\xad",
'sigma;' => "\xcf\x83",
'sigmaf;' => "\xcf\x82",
'sim;' => "\xe2\x88\xbc",
'spades;' => "\xe2\x99\xa0",
'sub;' => "\xe2\x8a\x82",
'sube;' => "\xe2\x8a\x86",
'sum;' => "\xe2\x88\x91",
'sup1' => "\xc2\xb9",
'sup1;' => "\xc2\xb9",
'sup2' => "\xc2\xb2",
'sup2;' => "\xc2\xb2",
'sup3' => "\xc2\xb3",
'sup3;' => "\xc2\xb3",
'sup;' => "\xe2\x8a\x83",
'supe;' => "\xe2\x8a\x87",
'szlig' => "\xc3\x9f",
'szlig;' => "\xc3\x9f",
'tau;' => "\xcf\x84",
'there4;' => "\xe2\x88\xb4",
'theta;' => "\xce\xb8",
'thetasym;' => "\xcf\x91",
'thinsp;' => "\xe2\x80\x89",
'thorn' => "\xc3\xbe",
'thorn;' => "\xc3\xbe",
'tilde;' => "\xcb\x9c",
'times' => "\xc3\x97",
'times;' => "\xc3\x97",
'trade;' => "\xe2\x84\xa2",
'uArr;' => "\xe2\x87\x91",
'uacute' => "\xc3\xba",
'uacute;' => "\xc3\xba",
'uarr;' => "\xe2\x86\x91",
'ucirc' => "\xc3\xbb",
'ucirc;' => "\xc3\xbb",
'ugrave' => "\xc3\xb9",
'ugrave;' => "\xc3\xb9",
'uml' => "\xc2\xa8",
'uml;' => "\xc2\xa8",
'upsih;' => "\xcf\x92",
'upsilon;' => "\xcf\x85",
'uuml' => "\xc3\xbc",
'uuml;' => "\xc3\xbc",
'weierp;' => "\xe2\x84\x98",
'xi;' => "\xce\xbe",
'yacute' => "\xc3\xbd",
'yacute;' => "\xc3\xbd",
'yen' => "\xc2\xa5",
'yen;' => "\xc2\xa5",
'yuml' => "\xc3\xbf",
'yuml;' => "\xc3\xbf",
'zeta;' => "\xce\xb6",
'zwj;' => "\xe2\x80\x8d",
'zwnj;' => "\xe2\x80\x8c"
'thinsp;' => "\xe2\x80\x89",
'thorn' => "\xc3\xbe",
'thorn;' => "\xc3\xbe",
'tilde;' => "\xcb\x9c",
'times' => "\xc3\x97",
'times;' => "\xc3\x97",
'trade;' => "\xe2\x84\xa2",
'uArr;' => "\xe2\x87\x91",
'uacute' => "\xc3\xba",
'uacute;' => "\xc3\xba",
'uarr;' => "\xe2\x86\x91",
'ucirc' => "\xc3\xbb",
'ucirc;' => "\xc3\xbb",
'ugrave' => "\xc3\xb9",
'ugrave;' => "\xc3\xb9",
'uml' => "\xc2\xa8",
'uml;' => "\xc2\xa8",
'upsih;' => "\xcf\x92",
'upsilon;' => "\xcf\x85",
'uuml' => "\xc3\xbc",
'uuml;' => "\xc3\xbc",
'weierp;' => "\xe2\x84\x98",
'xi;' => "\xce\xbe",
'yacute' => "\xc3\xbd",
'yacute;' => "\xc3\xbd",
'yen' => "\xc2\xa5",
'yen;' => "\xc2\xa5",
'yuml' => "\xc3\xbf",
'yuml;' => "\xc3\xbf",
'zeta;' => "\xce\xb6",
'zwj;' => "\xe2\x80\x8d",
'zwnj;' => "\xe2\x80\x8c"
}
ENCODINGS = %w[

View file

@ -21,9 +21,9 @@ module HTML5
when :EmptyTag
if token[:name].downcase == "meta"
# replace charset with actual encoding
token[:data].each_with_index do |(name,value),index|
token[:data].each_with_index do |(name, value), index|
if name == 'charset'
token[:data][index][1]=@encoding
token[:data][index][1] = @encoding
meta_found = true
end
end
@ -31,7 +31,7 @@ module HTML5
# replace charset with actual encoding
has_http_equiv_content_type = false
content_index = -1
token[:data].each_with_index do |(name,value),i|
token[:data].each_with_index do |(name, value), i|
if name.downcase == 'charset'
token[:data][i] = ['charset', @encoding]
meta_found = true
@ -43,30 +43,27 @@ module HTML5
end
end
if not meta_found
if has_http_equiv_content_type and content_index >= 0
token[:data][content_index][1] =
'text/html; charset=%s' % @encoding
if !meta_found
if has_http_equiv_content_type && content_index >= 0
token[:data][content_index][1] = 'text/html; charset=%s' % @encoding
meta_found = true
end
end
elsif token[:name].downcase == "head" and not meta_found
elsif token[:name].downcase == "head" && !meta_found
# insert meta into empty head
yield(:type => :StartTag, :name => "head", :data => token[:data])
yield(:type => :EmptyTag, :name => "meta",
:data => [["charset", @encoding]])
yield(:type => :EndTag, :name => "head")
yield :type => :StartTag, :name => "head", :data => token[:data]
yield :type => :EmptyTag, :name => "meta", :data => [["charset", @encoding]]
yield :type => :EndTag, :name => "head"
meta_found = true
next
end
when :EndTag
if token[:name].downcase == "head" and pending.any?
if token[:name].downcase == "head" && pending.any?
# insert meta into head (if necessary) and flush pending queue
yield pending.shift
yield(:type => :EmptyTag, :name => "meta",
:data => [["charset", @encoding]]) if not meta_found
yield :type => :EmptyTag, :name => "meta", :data => [["charset", @encoding]] if !meta_found
yield pending.shift while pending.any?
meta_found = true
state = :post_head

View file

@ -75,8 +75,7 @@ module HTML5
if type == :StartTag
# omit the thead and tfoot elements' end tag when they are
# immediately followed by a tbody element. See is_optional_end.
if previous and previous[:type] == :EndTag and \
%w(tbody thead tfoot).include?(previous[:name])
if previous and previous[:type] == :EndTag && %w(tbody thead tfoot).include?(previous[:name])
return false
end
@ -85,7 +84,7 @@ module HTML5
return false
end
end
return false
return false
end
def is_optional_end(tagname, nexttok)

View file

@ -21,7 +21,7 @@ module HTML5
preserve -= 1 if preserve > 0
when :SpaceCharacters
next if preserve == 0
token[:data] = " " if preserve == 0 && token[:data]
when :Characters
token[:data] = token[:data].sub(SPACES,' ') if preserve == 0

View file

@ -16,7 +16,7 @@ module HTML5
#
class HTMLParser
attr_accessor :phase, :firstStartTag, :innerHTML, :lastPhase, :insertFromTable
attr_accessor :phase, :first_start_tag, :inner_html, :last_phase, :insert_from_table
attr_reader :phases, :tokenizer, :tree, :errors
@ -25,10 +25,10 @@ module HTML5
new(options).parse(stream,encoding)
end
def self.parseFragment(stream, options = {})
def self.parse_fragment(stream, options = {})
container = options.delete(:container) || 'div'
encoding = options.delete(:encoding)
new(options).parseFragment(stream,container,encoding)
new(options).parse_fragment(stream, container, encoding)
end
@@phases = %w( initial rootElement beforeHead inHead afterHead inBody inTable inCaption
@ -44,56 +44,58 @@ module HTML5
@tokenizer = HTMLTokenizer
@tree = TreeBuilders::REXML::TreeBuilder
options.each { |name, value| instance_variable_set("@#{name}", value) }
options.each {|name, value| instance_variable_set("@#{name}", value) }
@lowercase_attr_name = nil unless instance_variables.include?("@lowercase_attr_name")
@lowercase_element_name = nil unless instance_variables.include?("@lowercase_element_name")
@tree = @tree.new
@phases = @@phases.inject({}) do |phases, phase_name|
phase_class_name = phase_name.sub(/(.)/) { $1.upcase } + 'Phase'
phases[phase_name.to_sym] = HTML5.const_get(phase_class_name).new(self, @tree)
phases
phases
end
end
def _parse(stream, innerHTML, encoding, container = 'div')
def _parse(stream, inner_html, encoding, container = 'div')
@tree.reset
@firstStartTag = false
@first_start_tag = false
@errors = []
@tokenizer = @tokenizer.class unless Class === @tokenizer
@tokenizer = @tokenizer.new(stream, :encoding => encoding,
:parseMeta => !innerHTML)
:parseMeta => !inner_html, :lowercase_attr_name => @lowercase_attr_name, :lowercase_element_name => @lowercase_element_name)
if innerHTML
case @innerHTML = container.downcase
if inner_html
case @inner_html = container.downcase
when 'title', 'textarea'
@tokenizer.contentModelFlag = :RCDATA
@tokenizer.content_model_flag = :RCDATA
when 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript'
@tokenizer.contentModelFlag = :CDATA
@tokenizer.content_model_flag = :CDATA
when 'plaintext'
@tokenizer.contentModelFlag = :PLAINTEXT
@tokenizer.content_model_flag = :PLAINTEXT
else
# contentModelFlag already is PCDATA
#@tokenizer.contentModelFlag = :PCDATA
# content_model_flag already is PCDATA
#@tokenizer.content_model_flag = :PCDATA
end
@phase = @phases[:rootElement]
@phase.insertHtmlElement
resetInsertionMode
@phase.insert_html_element
reset_insertion_mode
else
@innerHTML = false
@inner_html = false
@phase = @phases[:initial]
end
# We only seem to have InBodyPhase testcases where the following is
# relevant ... need others too
@lastPhase = nil
@last_phase = nil
# XXX This is temporary for the moment so there isn't any other
# changes needed for the parser to work with the iterable tokenizer
@tokenizer.each do |token|
token = normalizeToken(token)
token = normalize_token(token)
method = 'process%s' % token[:type]
@ -108,12 +110,12 @@ module HTML5
@phase.send method, token[:name], token[:publicId],
token[:systemId], token[:correct]
else
parseError(token[:data])
parse_error(token[:data])
end
end
# When the loop finishes it's EOF
@phase.processEOF
@phase.process_eof
end
# Parse a HTML document into a well-formed tree
@ -126,12 +128,12 @@ module HTML5
# element)
def parse(stream, encoding=nil)
_parse(stream, false, encoding)
return @tree.getDocument
@tree.get_document
end
# Parse a HTML fragment into a well-formed tree fragment
# container - name of the element we're setting the innerHTML property
# container - name of the element we're setting the inner_html property
# if set to nil, default to 'div'
#
# stream - a filelike object or string containing the HTML to be parsed
@ -140,19 +142,19 @@ module HTML5
# the encoding. If specified, that encoding will be used,
# regardless of any BOM or later declaration (such as in a meta
# element)
def parseFragment(stream, container='div', encoding=nil)
def parse_fragment(stream, container='div', encoding=nil)
_parse(stream, true, encoding, container)
return @tree.getFragment
@tree.get_fragment
end
def parseError(data = 'XXX ERROR MESSAGE NEEDED')
def parse_error(data = 'XXX ERROR MESSAGE NEEDED')
# XXX The idea is to make data mandatory.
@errors.push([@tokenizer.stream.position, data])
raise ParseError if @strict
end
# HTML5 specific normalizations to the token stream
def normalizeToken(token)
def normalize_token(token)
if token[:type] == :EmptyTag
# When a solidus (/) is encountered within a tag name what happens
@ -161,75 +163,75 @@ module HTML5
# thing and if it doesn't it's wrong for everyone.
unless VOID_ELEMENTS.include?(token[:name])
parseError(_('Solidus (/) incorrectly placed in tag.'))
parse_error(_('Solidus (/) incorrectly placed in tag.'))
end
token[:type] = :StartTag
end
if token[:type] == :StartTag
token[:name] = token[:name].tr(ASCII_UPPERCASE,ASCII_LOWERCASE)
token[:name] = token[:name].downcase
# We need to remove the duplicate attributes and convert attributes
# to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
unless token[:data].empty?
data = token[:data].reverse.map { |attr, value| [attr.tr(ASCII_UPPERCASE, ASCII_LOWERCASE), value] }
data = token[:data].reverse.map {|attr, value| [attr.downcase, value] }
token[:data] = Hash[*data.flatten]
end
elsif token[:type] == :EndTag
parseError(_('End tag contains unexpected attributes.')) unless token[:data].empty?
parse_error(_('End tag contains unexpected attributes.')) unless token[:data].empty?
token[:name] = token[:name].downcase
end
return token
token
end
@@new_modes = {
'select' => :inSelect,
'td' => :inCell,
'th' => :inCell,
'tr' => :inRow,
'tbody' => :inTableBody,
'thead' => :inTableBody,
'tfoot' => :inTableBody,
'caption' => :inCaption,
'select' => :inSelect,
'td' => :inCell,
'th' => :inCell,
'tr' => :inRow,
'tbody' => :inTableBody,
'thead' => :inTableBody,
'tfoot' => :inTableBody,
'caption' => :inCaption,
'colgroup' => :inColumnGroup,
'table' => :inTable,
'head' => :inBody,
'body' => :inBody,
'table' => :inTable,
'head' => :inBody,
'body' => :inBody,
'frameset' => :inFrameset
}
def resetInsertionMode
def reset_insertion_mode
# The name of this method is mostly historical. (It's also used in the
# specification.)
last = false
@tree.openElements.reverse.each do |node|
nodeName = node.name
@tree.open_elements.reverse.each do |node|
node_name = node.name
if node == @tree.openElements[0]
if node == @tree.open_elements.first
last = true
unless ['td', 'th'].include?(nodeName)
unless ['td', 'th'].include?(node_name)
# XXX
# assert @innerHTML
nodeName = @innerHTML
# assert @inner_html
node_name = @inner_html
end
end
# Check for conditions that should only happen in the innerHTML
# Check for conditions that should only happen in the inner_html
# case
if ['select', 'colgroup', 'head', 'frameset'].include?(nodeName)
if ['select', 'colgroup', 'head', 'frameset'].include?(node_name)
# XXX
# assert @innerHTML
# assert @inner_html
end
if @@new_modes.has_key?(nodeName)
@phase = @phases[@@new_modes[nodeName]]
elsif nodeName == 'html'
@phase = @phases[@tree.headPointer.nil?? :beforeHead : :afterHead]
if @@new_modes.has_key?(node_name)
@phase = @phases[@@new_modes[node_name]]
elsif node_name == 'html'
@phase = @phases[@tree.head_pointer.nil?? :beforeHead : :afterHead]
elsif last
@phase = @phases[:inBody]
else

View file

@ -8,36 +8,36 @@ module HTML5
def processComment(data)
# This is needed because data is to be appended to the <html> element
# here and not to whatever is currently open.
@tree.insertComment(data, @tree.openElements[0])
@tree.insert_comment(data, @tree.open_elements.first)
end
def processCharacters(data)
@parser.parseError(_('Unexpected non-space characters in the after body phase.'))
parse_error(_('Unexpected non-space characters in the after body phase.'))
@parser.phase = @parser.phases[:inBody]
@parser.phase.processCharacters(data)
end
def processStartTag(name, attributes)
@parser.parseError(_("Unexpected start tag token (#{name}) in the after body phase."))
parse_error(_("Unexpected start tag token (#{name}) in the after body phase."))
@parser.phase = @parser.phases[:inBody]
@parser.phase.processStartTag(name, attributes)
end
def endTagHtml(name)
if @parser.innerHTML
@parser.parseError
if @parser.inner_html
parse_error
else
# XXX: This may need to be done, not sure
# Don't set lastPhase to the current phase but to the inBody phase
# Don't set last_phase to the current phase but to the inBody phase
# instead. No need for extra parse errors if there's something after </html>.
# Try "<!doctype html>X</html>X" for instance.
@parser.lastPhase = @parser.phase
@parser.phase = @parser.phases[:trailingEnd]
@parser.last_phase = @parser.phase
@parser.phase = @parser.phases[:trailingEnd]
end
end
def endTagOther(name)
@parser.parseError(_("Unexpected end tag token (#{name}) in the after body phase."))
parse_error(_("Unexpected end tag token (#{name}) in the after body phase."))
@parser.phase = @parser.phases[:inBody]
@parser.phase.processEndTag(name)
end

View file

@ -10,7 +10,7 @@ module HTML5
handle_end 'html'
def processCharacters(data)
@parser.parseError(_('Unexpected non-space characters in the after frameset phase. Ignored.'))
parse_error(_('Unexpected non-space characters in the after frameset phase. Ignored.'))
end
def startTagNoframes(name, attributes)
@ -18,16 +18,16 @@ module HTML5
end
def startTagOther(name, attributes)
@parser.parseError(_("Unexpected start tag (#{name}) in the after frameset phase. Ignored."))
parse_error(_("Unexpected start tag (#{name}) in the after frameset phase. Ignored."))
end
def endTagHtml(name)
@parser.lastPhase = @parser.phase
@parser.phase = @parser.phases[:trailingEnd]
@parser.last_phase = @parser.phase
@parser.phase = @parser.phases[:trailingEnd]
end
def endTagOther(name)
@parser.parseError(_("Unexpected end tag (#{name}) in the after frameset phase. Ignored."))
parse_error(_("Unexpected end tag (#{name}) in the after frameset phase. Ignored."))
end
end

View file

@ -2,47 +2,47 @@ require 'html5/html5parser/phase'
module HTML5
class AfterHeadPhase < Phase
handle_start 'html', 'body', 'frameset', %w( base link meta script style title ) => 'FromHead'
def processEOF
anythingElse
@parser.phase.processEOF
def process_eof
anything_else
@parser.phase.process_eof
end
def processCharacters(data)
anythingElse
anything_else
@parser.phase.processCharacters(data)
end
def startTagBody(name, attributes)
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inBody]
end
def startTagFrameset(name, attributes)
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inFrameset]
end
def startTagFromHead(name, attributes)
@parser.parseError(_("Unexpected start tag (#{name}) that can be in head. Moved."))
parse_error(_("Unexpected start tag (#{name}) that can be in head. Moved."))
@parser.phase = @parser.phases[:inHead]
@parser.phase.processStartTag(name, attributes)
end
def startTagOther(name, attributes)
anythingElse
anything_else
@parser.phase.processStartTag(name, attributes)
end
def processEndTag(name)
anythingElse
anything_else
@parser.phase.processEndTag(name)
end
def anythingElse
@tree.insertElement('body', {})
def anything_else
@tree.insert_element('body', {})
@parser.phase = @parser.phases[:inBody]
end

View file

@ -7,9 +7,9 @@ module HTML5
handle_end %w( html head body br p ) => 'ImplyHead'
def processEOF
def process_eof
startTagHead('head', {})
@parser.phase.processEOF
@parser.phase.process_eof
end
def processCharacters(data)
@ -18,8 +18,8 @@ module HTML5
end
def startTagHead(name, attributes)
@tree.insertElement(name, attributes)
@tree.headPointer = @tree.openElements[-1]
@tree.insert_element(name, attributes)
@tree.head_pointer = @tree.open_elements[-1]
@parser.phase = @parser.phases[:inHead]
end
@ -34,7 +34,7 @@ module HTML5
end
def endTagOther(name)
@parser.parseError(_("Unexpected end tag (#{name}) after the (implied) root element."))
parse_error(_("Unexpected end tag (#{name}) after the (implied) root element."))
end
end

View file

@ -51,25 +51,40 @@ module HTML5
# for special handling of whitespace in <pre>
@processSpaceCharactersDropNewline = false
if $-w
$-w = false
alias processSpaceCharactersNonPre processSpaceCharacters
$-w = true
else
alias processSpaceCharactersNonPre processSpaceCharacters
end
end
def processSpaceCharactersDropNewline(data)
#Sometimes (start of <pre> blocks) we want to drop leading newlines
@processSpaceCharactersDropNewline = false
if (data.length > 0 and data[0] == ?\n and
%w[pre textarea].include?(@tree.openElements[-1].name) and
not @tree.openElements[-1].hasContent)
# #Sometimes (start of <pre> blocks) we want to drop leading newlines
if $-w
$-w = false
alias processSpaceCharacters processSpaceCharactersNonPre
$-w = true
else
alias processSpaceCharacters processSpaceCharactersNonPre
end
if (data.length > 0 and data[0] == ?\n &&
%w[pre textarea].include?(@tree.open_elements.last.name) && !@tree.open_elements.last.hasContent)
data = data[1..-1]
end
@tree.insertText(data) if data.length > 0
if data.length > 0
@tree.reconstructActiveFormattingElements
@tree.insertText(data)
end
end
def processSpaceCharacters(data)
if @processSpaceCharactersDropNewline
processSpaceCharactersDropNewline(data)
else
super(data)
end
@tree.reconstructActiveFormattingElements()
@tree.insertText(data)
end
def processCharacters(data)
@ -85,20 +100,19 @@ module HTML5
end
def startTagTitle(name, attributes)
@parser.parseError(_("Unexpected start tag (#{name}) that belongs in the head. Moved."))
parse_error(_("Unexpected start tag (#{name}) that belongs in the head. Moved."))
@parser.phases[:inHead].processStartTag(name, attributes)
end
def startTagBody(name, attributes)
@parser.parseError(_('Unexpected start tag (body).'))
parse_error(_('Unexpected start tag (body).'))
if (@tree.openElements.length == 1 or
@tree.openElements[1].name != 'body')
assert @parser.innerHTML
if (@tree.open_elements.length == 1 || @tree.open_elements[1].name != 'body')
assert @parser.inner_html
else
attributes.each do |attr, value|
unless @tree.openElements[1].attributes.has_key?(attr)
@tree.openElements[1].attributes[attr] = value
unless @tree.open_elements[1].attributes.has_key?(attr)
@tree.open_elements[1].attributes[attr] = value
end
end
end
@ -106,17 +120,17 @@ module HTML5
def startTagCloseP(name, attributes)
endTagP('p') if in_scope?('p')
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
@processSpaceCharactersDropNewline = true if name == 'pre'
end
def startTagForm(name, attributes)
if @tree.formPointer
@parser.parseError(_('Unexpected start tag (form). Ignored.'))
parse_error(_('Unexpected start tag (form). Ignored.'))
else
endTagP('p') if in_scope?('p')
@tree.insertElement(name, attributes)
@tree.formPointer = @tree.openElements[-1]
@tree.insert_element(name, attributes)
@tree.formPointer = @tree.open_elements[-1]
end
end
@ -125,31 +139,28 @@ module HTML5
stopNames = {'li' => ['li'], 'dd' => ['dd', 'dt'], 'dt' => ['dd', 'dt']}
stopName = stopNames[name]
@tree.openElements.reverse.each_with_index do |node, i|
@tree.open_elements.reverse.each_with_index do |node, i|
if stopName.include?(node.name)
poppedNodes = (0..i).collect { @tree.openElements.pop }
poppedNodes = (0..i).collect { @tree.open_elements.pop }
if i >= 1
@parser.parseError(_("Missing end tag%s (%s)" % [
(i>1 ? 's' : ''),
poppedNodes.reverse.map {|item| item.name}.join(', ')]))
parse_error(_("Missing end tag%s (%s)" % [(i>1 ? 's' : ''), poppedNodes.reverse.map{|item| item.name}.join(', ')]))
end
break
end
# Phrasing elements are all non special, non scoping, non
# formatting elements
break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) and
not ['address', 'div'].include?(node.name))
break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) && !%w[address div].include?(node.name))
end
# Always insert an <li> element.
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
end
def startTagPlaintext(name, attributes)
endTagP('p') if in_scope?('p')
@tree.insertElement(name, attributes)
@parser.tokenizer.contentModelFlag = :PLAINTEXT
@tree.insert_element(name, attributes)
@parser.tokenizer.content_model_flag = :PLAINTEXT
end
def startTagHeading(name, attributes)
@ -158,7 +169,7 @@ module HTML5
# Uncomment the following for IE7 behavior:
# HEADING_ELEMENTS.each do |element|
# if in_scope?(element)
# @parser.parseError(_("Unexpected start tag (#{name})."))
# parse_error(_("Unexpected start tag (#{name})."))
#
# remove_open_elements_until do |element|
# HEADING_ELEMENTS.include?(element.name)
@ -167,14 +178,14 @@ module HTML5
# break
# end
# end
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
end
def startTagA(name, attributes)
if afeAElement = @tree.elementInActiveFormattingElements('a')
@parser.parseError(_('Unexpected start tag (a) implies end tag (a).'))
parse_error(_('Unexpected start tag (a) implies end tag (a).'))
endTagFormatting('a')
@tree.openElements.delete(afeAElement) if @tree.openElements.include?(afeAElement)
@tree.open_elements.delete(afeAElement) if @tree.open_elements.include?(afeAElement)
@tree.activeFormattingElements.delete(afeAElement) if @tree.activeFormattingElements.include?(afeAElement)
end
@tree.reconstructActiveFormattingElements
@ -188,77 +199,82 @@ module HTML5
def startTagNobr(name, attributes)
@tree.reconstructActiveFormattingElements
processEndTag('nobr') if in_scope?('nobr')
if in_scope?('nobr')
parse_error(_('Unexpected start tag (nobr) implies end tag (nobr).'))
processEndTag('nobr')
# XXX Need tests that trigger the following
@tree.reconstructActiveFormattingElements
end
addFormattingElement(name, attributes)
end
def startTagButton(name, attributes)
if in_scope?('button')
@parser.parseError(_('Unexpected start tag (button) implied end tag (button).'))
parse_error(_('Unexpected start tag (button) implied end tag (button).'))
processEndTag('button')
@parser.phase.processStartTag(name, attributes)
else
@tree.reconstructActiveFormattingElements
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
@tree.activeFormattingElements.push(Marker)
end
end
def startTagMarqueeObject(name, attributes)
@tree.reconstructActiveFormattingElements
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
@tree.activeFormattingElements.push(Marker)
end
def startTagXmp(name, attributes)
@tree.reconstructActiveFormattingElements
@tree.insertElement(name, attributes)
@parser.tokenizer.contentModelFlag = :CDATA
@tree.insert_element(name, attributes)
@parser.tokenizer.content_model_flag = :CDATA
end
def startTagTable(name, attributes)
processEndTag('p') if in_scope?('p')
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inTable]
end
def startTagVoidFormatting(name, attributes)
@tree.reconstructActiveFormattingElements
@tree.insertElement(name, attributes)
@tree.openElements.pop
@tree.insert_element(name, attributes)
@tree.open_elements.pop
end
def startTagHr(name, attributes)
endTagP('p') if in_scope?('p')
@tree.insertElement(name, attributes)
@tree.openElements.pop
@tree.insert_element(name, attributes)
@tree.open_elements.pop
end
def startTagImage(name, attributes)
# No really...
@parser.parseError(_('Unexpected start tag (image). Treated as img.'))
parse_error(_('Unexpected start tag (image). Treated as img.'))
processStartTag('img', attributes)
end
def startTagInput(name, attributes)
@tree.reconstructActiveFormattingElements
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
if @tree.formPointer
# XXX Not exactly sure what to do here
# @tree.openElements[-1].form = @tree.formPointer
# @tree.open_elements[-1].form = @tree.formPointer
end
@tree.openElements.pop
@tree.open_elements.pop
end
def startTagIsindex(name, attributes)
@parser.parseError(_("Unexpected start tag isindex. Don't use it!"))
parse_error(_("Unexpected start tag isindex. Don't use it!"))
return if @tree.formPointer
processStartTag('form', {})
processStartTag('hr', {})
processStartTag('p', {})
processStartTag('label', {})
# XXX Localization ...
processCharacters('This is a searchable index. Insert your search keywords here:')
processCharacters('This is a searchable index. Insert your search keywords here: ')
attributes['name'] = 'isindex'
attrs = attributes.to_a
processStartTag('input', attributes)
@ -270,20 +286,21 @@ module HTML5
def startTagTextarea(name, attributes)
# XXX Form element pointer checking here as well...
@tree.insertElement(name, attributes)
@parser.tokenizer.contentModelFlag = :RCDATA
@tree.insert_element(name, attributes)
@parser.tokenizer.content_model_flag = :RCDATA
@processSpaceCharactersDropNewline = true
alias processSpaceCharacters processSpaceCharactersDropNewline
end
# iframe, noembed noframes, noscript(if scripting enabled)
def startTagCdata(name, attributes)
@tree.insertElement(name, attributes)
@parser.tokenizer.contentModelFlag = :CDATA
@tree.insert_element(name, attributes)
@parser.tokenizer.content_model_flag = :CDATA
end
def startTagSelect(name, attributes)
@tree.reconstructActiveFormattingElements
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inSelect]
end
@ -293,7 +310,7 @@ module HTML5
# "caption", "col", "colgroup", "frame", "frameset", "head",
# "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
# "tr", "noscript"
@parser.parseError(_("Unexpected start tag (#{name}). Ignored."))
parse_error(_("Unexpected start tag (#{name}). Ignored."))
end
def startTagNew(name, attributes)
@ -306,14 +323,14 @@ module HTML5
def startTagOther(name, attributes)
@tree.reconstructActiveFormattingElements
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
end
def endTagP(name)
@tree.generateImpliedEndTags('p') if in_scope?('p')
@parser.parseError(_('Unexpected end tag (p).')) unless @tree.openElements[-1].name == 'p'
parse_error(_('Unexpected end tag (p).')) unless @tree.open_elements.last.name == 'p'
if in_scope?('p')
@tree.openElements.pop while in_scope?('p')
@tree.open_elements.pop while in_scope?('p')
else
startTagCloseP('p', {})
endTagP('p')
@ -324,20 +341,20 @@ module HTML5
# XXX Need to take open <p> tags into account here. We shouldn't imply
# </p> but we should not throw a parse error either. Specification is
# likely to be updated.
unless @tree.openElements[1].name == 'body'
# innerHTML case
@parser.parseError
unless @tree.open_elements[1].name == 'body'
# inner_html case
parse_error
return
end
unless @tree.openElements[-1].name == 'body'
@parser.parseError(_("Unexpected end tag (body). Missing end tag (#{@tree.openElements[-1].name})."))
unless @tree.open_elements.last.name == 'body'
parse_error(_("Unexpected end tag (body). Missing end tag (#{@tree.open_elements[-1].name})."))
end
@parser.phase = @parser.phases[:afterBody]
end
def endTagHtml(name)
endTagBody(name)
@parser.phase.processEndTag(name) unless @parser.innerHTML
@parser.phase.processEndTag(name) unless @parser.inner_html
end
def endTagBlock(name)
@ -346,8 +363,8 @@ module HTML5
@tree.generateImpliedEndTags if in_scope?(name)
unless @tree.openElements[-1].name == name
@parser.parseError(_("End tag (#{name}) seen too early. Expected other end tag."))
unless @tree.open_elements.last.name == name
parse_error(_("End tag (#{name}) seen too early. Expected other end tag."))
end
if in_scope?(name)
@ -359,22 +376,20 @@ module HTML5
if in_scope?(name)
@tree.generateImpliedEndTags
end
if @tree.openElements[-1].name != name
@parser.parseError(_("End tag (form) seen too early. Ignored."))
if @tree.open_elements.last.name != name
parse_error(_("End tag (form) seen too early. Ignored."))
else
@tree.openElements.pop
@tree.open_elements.pop
end
@tree.formPointer = nil
end
def endTagListItem(name)
# AT Could merge this with the Block case
if in_scope?(name)
@tree.generateImpliedEndTags(name)
@tree.generateImpliedEndTags(name) if in_scope?(name)
unless @tree.openElements[-1].name == name
@parser.parseError(_("End tag (#{name}) seen too early. Expected other end tag."))
end
unless @tree.open_elements.last.name == name
parse_error(_("End tag (#{name}) seen too early. " + 'Expected other end tag.'))
end
remove_open_elements_until(name) if in_scope?(name)
@ -388,13 +403,13 @@ module HTML5
end
end
unless @tree.openElements[-1].name == name
@parser.parseError(_("Unexpected end tag (#{name}). Expected other end tag."))
unless @tree.open_elements.last.name == name
parse_error(_("Unexpected end tag (#{name}). Expected other end tag."))
end
HEADING_ELEMENTS.each do |element|
if in_scope?(element)
remove_open_elements_until { |element| HEADING_ELEMENTS.include?(element.name) }
remove_open_elements_until {|element| HEADING_ELEMENTS.include?(element.name)}
break
end
end
@ -403,30 +418,30 @@ module HTML5
# The much-feared adoption agency algorithm
def endTagFormatting(name)
# http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
# XXX Better parseError messages appreciated.
# XXX Better parse_error messages appreciated.
while true
# Step 1 paragraph 1
afeElement = @tree.elementInActiveFormattingElements(name)
if not afeElement or (@tree.openElements.include?(afeElement) and not in_scope?(afeElement.name))
@parser.parseError(_("End tag (#{name}) violates step 1, paragraph 1 of the adoption agency algorithm."))
if !afeElement or (@tree.open_elements.include?(afeElement) && !in_scope?(afeElement.name))
parse_error(_("End tag (#{name}) violates step 1, paragraph 1 of the adoption agency algorithm."))
return
# Step 1 paragraph 2
elsif not @tree.openElements.include?(afeElement)
@parser.parseError(_("End tag (#{name}) violates step 1, paragraph 2 of the adoption agency algorithm."))
elsif not @tree.open_elements.include?(afeElement)
parse_error(_("End tag (#{name}) violates step 1, paragraph 2 of the adoption agency algorithm."))
@tree.activeFormattingElements.delete(afeElement)
return
end
# Step 1 paragraph 3
if afeElement != @tree.openElements[-1]
@parser.parseError(_("End tag (#{name}) violates step 1, paragraph 3 of the adoption agency algorithm."))
if afeElement != @tree.open_elements.last
parse_error(_("End tag (#{name}) violates step 1, paragraph 3 of the adoption agency algorithm."))
end
# Step 2
# Start of the adoption agency algorithm proper
afeIndex = @tree.openElements.index(afeElement)
afeIndex = @tree.open_elements.index(afeElement)
furthestBlock = nil
@tree.openElements[afeIndex..-1].each do |element|
@tree.open_elements[afeIndex..-1].each do |element|
if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(element.name)
furthestBlock = element
break
@ -435,11 +450,11 @@ module HTML5
# Step 3
if furthestBlock.nil?
element = remove_open_elements_until { |element| element == afeElement }
element = remove_open_elements_until {|element| element == afeElement }
@tree.activeFormattingElements.delete(element)
return
end
commonAncestor = @tree.openElements[afeIndex - 1]
commonAncestor = @tree.open_elements[afeIndex - 1]
# Step 5
furthestBlock.parent.removeChild(furthestBlock) if furthestBlock.parent
@ -456,11 +471,11 @@ module HTML5
while true
# AT replace this with a function and recursion?
# Node is element before node in open elements
node = @tree.openElements[@tree.openElements.index(node) - 1]
node = @tree.open_elements[@tree.open_elements.index(node) - 1]
until @tree.activeFormattingElements.include?(node)
tmpNode = node
node = @tree.openElements[@tree.openElements.index(node) - 1]
@tree.openElements.delete(tmpNode)
node = @tree.open_elements[@tree.open_elements.index(node) - 1]
@tree.open_elements.delete(tmpNode)
end
# Step 7.3
break if node == afeElement
@ -477,7 +492,7 @@ module HTML5
clone = node.cloneNode
# Replace node with clone
@tree.activeFormattingElements[@tree.activeFormattingElements.index(node)] = clone
@tree.openElements[@tree.openElements.index(node)] = clone
@tree.open_elements[@tree.open_elements.index(node)] = clone
node = clone
end
# Step 7.6
@ -507,47 +522,47 @@ module HTML5
@tree.activeFormattingElements.insert([bookmark,@tree.activeFormattingElements.length].min, clone)
# Step 13
@tree.openElements.delete(afeElement)
@tree.openElements.insert(@tree.openElements.index(furthestBlock) + 1, clone)
@tree.open_elements.delete(afeElement)
@tree.open_elements.insert(@tree.open_elements.index(furthestBlock) + 1, clone)
end
end
def endTagButtonMarqueeObject(name)
@tree.generateImpliedEndTags if in_scope?(name)
unless @tree.openElements[-1].name == name
@parser.parseError(_("Unexpected end tag (#{name}). Expected other end tag first."))
unless @tree.open_elements.last.name == name
parse_error(_("Unexpected end tag (#{name}). Expected other end tag first."))
end
if in_scope?(name)
remove_open_elements_until(name)
@tree.clearActiveFormattingElements
end
end
def endTagMisplaced(name)
# This handles elements with end tags in other insertion modes.
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
parse_error(_("Unexpected end tag (#{name}). Ignored."))
end
def endTagBr(name)
@parser.parseError(_("Unexpected end tag (br). Treated as br element."))
parse_error(_("Unexpected end tag (br). Treated as br element."))
@tree.reconstructActiveFormattingElements
@tree.insertElement(name, {})
@tree.openElements.pop()
@tree.insert_element(name, {})
@tree.open_elements.pop()
end
def endTagNone(name)
# This handles elements with no end tag.
@parser.parseError(_("This tag (#{name}) has no end tag"))
parse_error(_("This tag (#{name}) has no end tag"))
end
def endTagCdataTextAreaXmp(name)
if @tree.openElements[-1].name == name
@tree.openElements.pop
if @tree.open_elements.last.name == name
@tree.open_elements.pop
else
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
parse_error(_("Unexpected end tag (#{name}). Ignored."))
end
end
@ -561,20 +576,20 @@ module HTML5
def endTagOther(name)
# XXX This logic should be moved into the treebuilder
@tree.openElements.reverse.each do |node|
@tree.open_elements.reverse.each do |node|
if node.name == name
@tree.generateImpliedEndTags
unless @tree.openElements[-1].name == name
@parser.parseError(_("Unexpected end tag (#{name})."))
unless @tree.open_elements.last.name == name
parse_error(_("Unexpected end tag (#{name})."))
end
remove_open_elements_until { |element| element == node }
remove_open_elements_until {|element| element == node }
break
else
if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name)
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
parse_error(_("Unexpected end tag (#{name}). Ignored."))
break
end
end
@ -584,8 +599,8 @@ module HTML5
protected
def addFormattingElement(name, attributes)
@tree.insertElement(name, attributes)
@tree.activeFormattingElements.push(@tree.openElements[-1])
@tree.insert_element(name, attributes)
@tree.activeFormattingElements.push(@tree.open_elements.last)
end
end

View file

@ -10,7 +10,7 @@ module HTML5
handle_end 'caption', 'table', %w( body col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
def ignoreEndTagCaption
not in_scope?('caption', true)
!in_scope?('caption', true)
end
def processCharacters(data)
@ -18,7 +18,7 @@ module HTML5
end
def startTagTableElement(name, attributes)
@parser.parseError
parse_error
#XXX Have to duplicate logic here to find out if the tag is ignored
ignoreEndTag = ignoreEndTagCaption
@parser.phase.processEndTag('caption')
@ -31,15 +31,15 @@ module HTML5
def endTagCaption(name)
if ignoreEndTagCaption
# innerHTML case
assert @parser.innerHTML
@parser.parseError
# inner_html case
assert @parser.inner_html
parse_error
else
# AT this code is quite similar to endTagTable in "InTable"
@tree.generateImpliedEndTags
unless @tree.openElements[-1].name == 'caption'
@parser.parseError(_("Unexpected end tag (caption). Missing end tags."))
unless @tree.open_elements[-1].name == 'caption'
parse_error(_("Unexpected end tag (caption). Missing end tags."))
end
remove_open_elements_until('caption')
@ -50,14 +50,14 @@ module HTML5
end
def endTagTable(name)
@parser.parseError
parse_error
ignoreEndTag = ignoreEndTagCaption
@parser.phase.processEndTag('caption')
@parser.phase.processEndTag(name) unless ignoreEndTag
end
def endTagIgnore(name)
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
parse_error(_("Unexpected end tag (#{name}). Ignored."))
end
def endTagOther(name)

View file

@ -20,8 +20,8 @@ module HTML5
closeCell
@parser.phase.processStartTag(name, attributes)
else
# innerHTML case
@parser.parseError
# inner_html case
parse_error
end
end
@ -32,22 +32,22 @@ module HTML5
def endTagTableCell(name)
if in_scope?(name, true)
@tree.generateImpliedEndTags(name)
if @tree.openElements[-1].name != name
@parser.parseError("Got table cell end tag (#{name}) while required end tags are missing.")
if @tree.open_elements.last.name != name
parse_error("Got table cell end tag (#{name}) while required end tags are missing.")
remove_open_elements_until(name)
else
@tree.openElements.pop
@tree.open_elements.pop
end
@tree.clearActiveFormattingElements
@parser.phase = @parser.phases[:inRow]
else
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
parse_error(_("Unexpected end tag (#{name}). Ignored."))
end
end
def endTagIgnore(name)
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
parse_error(_("Unexpected end tag (#{name}). Ignored."))
end
def endTagImply(name)
@ -55,8 +55,8 @@ module HTML5
closeCell
@parser.phase.processEndTag(name)
else
# sometimes innerHTML case
@parser.parseError
# sometimes inner_html case
parse_error
end
end

View file

@ -10,7 +10,7 @@ module HTML5
handle_end 'colgroup', 'col'
def ignoreEndTagColgroup
@tree.openElements[-1].name == 'html'
@tree.open_elements[-1].name == 'html'
end
def processCharacters(data)
@ -20,8 +20,8 @@ module HTML5
end
def startTagCol(name, attributes)
@tree.insertElement(name, attributes)
@tree.openElements.pop
@tree.insert_element(name, attributes)
@tree.open_elements.pop
end
def startTagOther(name, attributes)
@ -32,17 +32,17 @@ module HTML5
def endTagColgroup(name)
if ignoreEndTagColgroup
# innerHTML case
assert @parser.innerHTML
@parser.parseError
# inner_html case
assert @parser.inner_html
parse_error
else
@tree.openElements.pop
@tree.open_elements.pop
@parser.phase = @parser.phases[:inTable]
end
end
def endTagCol(name)
@parser.parseError(_('Unexpected end tag (col). col has no end tag.'))
parse_error(_('Unexpected end tag (col). col has no end tag.'))
end
def endTagOther(name)

View file

@ -10,16 +10,16 @@ module HTML5
handle_end 'frameset', 'noframes'
def processCharacters(data)
@parser.parseError(_('Unexpected characters in the frameset phase. Characters ignored.'))
parse_error(_('Unexpected characters in the frameset phase. Characters ignored.'))
end
def startTagFrameset(name, attributes)
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
end
def startTagFrame(name, attributes)
@tree.insertElement(name, attributes)
@tree.openElements.pop
@tree.insert_element(name, attributes)
@tree.open_elements.pop
end
def startTagNoframes(name, attributes)
@ -27,19 +27,19 @@ module HTML5
end
def startTagOther(name, attributes)
@parser.parseError(_("Unexpected start tag token (#{name}) in the frameset phase. Ignored"))
parse_error(_("Unexpected start tag token (#{name}) in the frameset phase. Ignored"))
end
def endTagFrameset(name)
if @tree.openElements[-1].name == 'html'
# innerHTML case
@parser.parseError(_("Unexpected end tag token (frameset) in the frameset phase (innerHTML)."))
if @tree.open_elements.last.name == 'html'
# inner_html case
parse_error(_("Unexpected end tag token (frameset) in the frameset phase (inner_html)."))
else
@tree.openElements.pop
@tree.open_elements.pop
end
if (not @parser.innerHTML and
@tree.openElements[-1].name != 'frameset')
# If we're not in innerHTML mode and the the current node is not a
if (not @parser.inner_html and
@tree.open_elements.last.name != 'frameset')
# If we're not in inner_html mode and the the current node is not a
# "frameset" element (anymore) then switch.
@parser.phase = @parser.phases[:afterFrameset]
end
@ -50,7 +50,7 @@ module HTML5
end
def endTagOther(name)
@parser.parseError(_("Unexpected end tag token (#{name}) in the frameset phase. Ignored."))
parse_error(_("Unexpected end tag token (#{name}) in the frameset phase. Ignored."))
end
end

View file

@ -3,108 +3,120 @@ require 'html5/html5parser/phase'
module HTML5
class InHeadPhase < Phase
handle_start 'html', 'head', 'title', 'style', 'script', %w( base link meta )
handle_start 'html', 'head', 'title', 'style', 'script', 'noscript'
handle_start %w( base link meta )
handle_end 'head'
handle_end %w( html body br p ) => 'ImplyAfterHead'
handle_end %w( title style script )
handle_end %w( title style script noscript )
def processEOF
if ['title', 'style', 'script'].include?(name = @tree.openElements[-1].name)
@parser.parseError(_("Unexpected end of file. Expected end tag (#{name})."))
@tree.openElements.pop
def process_eof
if ['title', 'style', 'script'].include?(name = @tree.open_elements.last.name)
parse_error(_("Unexpected end of file. Expected end tag (#{name})."))
@tree.open_elements.pop
end
anythingElse
@parser.phase.processEOF
anything_else
@parser.phase.process_eof
end
def processCharacters(data)
if ['title', 'style', 'script'].include?(@tree.openElements[-1].name)
if %w[title style script noscript].include?(@tree.open_elements.last.name)
@tree.insertText(data)
else
anythingElse
anything_else
@parser.phase.processCharacters(data)
end
end
def startTagHead(name, attributes)
@parser.parseError(_('Unexpected start tag head in existing head. Ignored'))
parse_error(_('Unexpected start tag head in existing head. Ignored'))
end
def startTagTitle(name, attributes)
element = @tree.createElement(name, attributes)
appendToHead(element)
@tree.openElements.push(element)
@parser.tokenizer.contentModelFlag = :RCDATA
@tree.open_elements.push(element)
@parser.tokenizer.content_model_flag = :RCDATA
end
def startTagStyle(name, attributes)
element = @tree.createElement(name, attributes)
if @tree.headPointer != nil and @parser.phase == @parser.phases[:inHead]
if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
appendToHead(element)
else
@tree.openElements[-1].appendChild(element)
@tree.open_elements.last.appendChild(element)
end
@tree.openElements.push(element)
@parser.tokenizer.contentModelFlag = :CDATA
@tree.open_elements.push(element)
@parser.tokenizer.content_model_flag = :CDATA
end
def startTagNoscript(name, attributes)
# XXX Need to decide whether to implement the scripting disabled case.
element = @tree.createElement(name, attributes)
if @tree.head_pointer !=nil and @parser.phase == @parser.phases[:inHead]
appendToHead(element)
else
@tree.open_elements.last.appendChild(element)
end
@tree.open_elements.push(element)
@parser.tokenizer.content_model_flag = :CDATA
end
def startTagScript(name, attributes)
#XXX Inner HTML case may be wrong
element = @tree.createElement(name, attributes)
element._flags.push("parser-inserted")
if (@tree.headPointer != nil and
@parser.phase == @parser.phases[:inHead])
if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
appendToHead(element)
else
@tree.openElements[-1].appendChild(element)
@tree.open_elements.last.appendChild(element)
end
@tree.openElements.push(element)
@parser.tokenizer.contentModelFlag = :CDATA
@tree.open_elements.push(element)
@parser.tokenizer.content_model_flag = :CDATA
end
def startTagBaseLinkMeta(name, attributes)
element = @tree.createElement(name, attributes)
if @tree.headPointer != nil and @parser.phase == @parser.phases[:inHead]
if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
appendToHead(element)
else
@tree.openElements[-1].appendChild(element)
@tree.open_elements.last.appendChild(element)
end
end
def startTagOther(name, attributes)
anythingElse
anything_else
@parser.phase.processStartTag(name, attributes)
end
def endTagHead(name)
if @tree.openElements[-1].name == 'head'
@tree.openElements.pop
if @tree.open_elements.last.name == 'head'
@tree.open_elements.pop
else
@parser.parseError(_("Unexpected end tag (head). Ignored."))
parse_error(_("Unexpected end tag (head). Ignored."))
end
@parser.phase = @parser.phases[:afterHead]
end
def endTagImplyAfterHead(name)
anythingElse
anything_else
@parser.phase.processEndTag(name)
end
def endTagTitleStyleScript(name)
if @tree.openElements[-1].name == name
@tree.openElements.pop
def endTagTitleStyleScriptNoscript(name)
if @tree.open_elements.last.name == name
@tree.open_elements.pop
else
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
parse_error(_("Unexpected end tag (#{name}). Ignored."))
end
end
def endTagOther(name)
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
parse_error(_("Unexpected end tag (#{name}). Ignored."))
end
def anythingElse
if @tree.openElements[-1].name == 'head'
def anything_else
if @tree.open_elements.last.name == 'head'
endTagHead('head')
else
@parser.phase = @parser.phases[:afterHead]
@ -114,11 +126,11 @@ module HTML5
protected
def appendToHead(element)
if @tree.headPointer.nil?
assert @parser.innerHTML
@tree.openElements[-1].appendChild(element)
if @tree.head_pointer.nil?
assert @parser.inner_html
@tree.open_elements.last.appendChild(element)
else
@tree.headPointer.appendChild(element)
@tree.head_pointer.appendChild(element)
end
end

View file

@ -15,7 +15,7 @@ module HTML5
def startTagTableCell(name, attributes)
clearStackToTableRowContext
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inCell]
@tree.activeFormattingElements.push(Marker)
end
@ -23,7 +23,7 @@ module HTML5
def startTagTableOther(name, attributes)
ignoreEndTag = ignoreEndTagTr
endTagTr('tr')
# XXX how are we sure it's always ignored in the innerHTML case?
# XXX how are we sure it's always ignored in the inner_html case?
@parser.phase.processStartTag(name, attributes) unless ignoreEndTag
end
@ -33,12 +33,12 @@ module HTML5
def endTagTr(name)
if ignoreEndTagTr
# innerHTML case
assert @parser.innerHTML
@parser.parseError
# inner_html case
assert @parser.inner_html
parse_error
else
clearStackToTableRowContext
@tree.openElements.pop
@tree.open_elements.pop
@parser.phase = @parser.phases[:inTableBody]
end
end
@ -47,7 +47,7 @@ module HTML5
ignoreEndTag = ignoreEndTagTr
endTagTr('tr')
# Reprocess the current tag if the tr end tag was not ignored
# XXX how are we sure it's always ignored in the innerHTML case?
# XXX how are we sure it's always ignored in the inner_html case?
@parser.phase.processEndTag(name) unless ignoreEndTag
end
@ -56,13 +56,13 @@ module HTML5
endTagTr('tr')
@parser.phase.processEndTag(name)
else
# innerHTML case
@parser.parseError
# inner_html case
parse_error
end
end
def endTagIgnore(name)
@parser.parseError(_("Unexpected end tag (#{name}) in the row phase. Ignored."))
parse_error(_("Unexpected end tag (#{name}) in the row phase. Ignored."))
end
def endTagOther(name)
@ -73,9 +73,9 @@ module HTML5
# XXX unify this with other table helper methods
def clearStackToTableRowContext
until ['tr', 'html'].include?(name = @tree.openElements[-1].name)
@parser.parseError(_("Unexpected implied end tag (#{name}) in the row phase."))
@tree.openElements.pop
until %w[tr html].include?(name = @tree.open_elements.last.name)
parse_error(_("Unexpected implied end tag (#{name}) in the row phase."))
@tree.open_elements.pop
end
end

View file

@ -15,44 +15,44 @@ module HTML5
def startTagOption(name, attributes)
# We need to imply </option> if <option> is the current node.
@tree.openElements.pop if @tree.openElements[-1].name == 'option'
@tree.insertElement(name, attributes)
@tree.open_elements.pop if @tree.open_elements.last.name == 'option'
@tree.insert_element(name, attributes)
end
def startTagOptgroup(name, attributes)
@tree.openElements.pop if @tree.openElements[-1].name == 'option'
@tree.openElements.pop if @tree.openElements[-1].name == 'optgroup'
@tree.insertElement(name, attributes)
@tree.open_elements.pop if @tree.open_elements.last.name == 'option'
@tree.open_elements.pop if @tree.open_elements.last.name == 'optgroup'
@tree.insert_element(name, attributes)
end
def startTagSelect(name, attributes)
@parser.parseError(_('Unexpected start tag (select) in the select phase implies select start tag.'))
parse_error(_('Unexpected start tag (select) in the select phase implies select start tag.'))
endTagSelect('select')
end
def startTagOther(name, attributes)
@parser.parseError(_('Unexpected start tag token (#{name}) in the select phase. Ignored.'))
parse_error(_('Unexpected start tag token (#{name}) in the select phase. Ignored.'))
end
def endTagOption(name)
if @tree.openElements[-1].name == 'option'
@tree.openElements.pop
if @tree.open_elements.last.name == 'option'
@tree.open_elements.pop
else
@parser.parseError(_('Unexpected end tag (option) in the select phase. Ignored.'))
parse_error(_('Unexpected end tag (option) in the select phase. Ignored.'))
end
end
def endTagOptgroup(name)
# </optgroup> implicitly closes <option>
if @tree.openElements[-1].name == 'option' and @tree.openElements[-2].name == 'optgroup'
@tree.openElements.pop
if @tree.open_elements.last.name == 'option' and @tree.open_elements[-2].name == 'optgroup'
@tree.open_elements.pop
end
# It also closes </optgroup>
if @tree.openElements[-1].name == 'optgroup'
@tree.openElements.pop
if @tree.open_elements.last.name == 'optgroup'
@tree.open_elements.pop
# But nothing else
else
@parser.parseError(_('Unexpected end tag (optgroup) in the select phase. Ignored.'))
parse_error(_('Unexpected end tag (optgroup) in the select phase. Ignored.'))
end
end
@ -60,15 +60,15 @@ module HTML5
if in_scope?('select', true)
remove_open_elements_until('select')
@parser.resetInsertionMode
@parser.reset_insertion_mode
else
# innerHTML case
@parser.parseError
# inner_html case
parse_error
end
end
def endTagTableElements(name)
@parser.parseError(_("Unexpected table end tag (#{name}) in the select phase."))
parse_error(_("Unexpected table end tag (#{name}) in the select phase."))
if in_scope?(name, true)
endTagSelect('select')
@ -77,7 +77,7 @@ module HTML5
end
def endTagOther(name)
@parser.parseError(_("Unexpected end tag token (#{name}) in the select phase. Ignored."))
parse_error(_("Unexpected end tag token (#{name}) in the select phase. Ignored."))
end
end

View file

@ -15,12 +15,12 @@ module HTML5
def startTagTr(name, attributes)
clearStackToTableBodyContext
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inRow]
end
def startTagTableCell(name, attributes)
@parser.parseError(_("Unexpected table cell start tag (#{name}) in the table body phase."))
parse_error(_("Unexpected table cell start tag (#{name}) in the table body phase."))
startTagTr('tr', {})
@parser.phase.processStartTag(name, attributes)
end
@ -29,11 +29,11 @@ module HTML5
# XXX AT Any ideas on how to share this with endTagTable?
if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
clearStackToTableBodyContext
endTagTableRowGroup(@tree.openElements[-1].name)
endTagTableRowGroup(@tree.open_elements.last.name)
@parser.phase.processStartTag(name, attributes)
else
# innerHTML case
@parser.parseError
# inner_html case
parse_error
end
end
@ -44,26 +44,26 @@ module HTML5
def endTagTableRowGroup(name)
if in_scope?(name, true)
clearStackToTableBodyContext
@tree.openElements.pop
@tree.open_elements.pop
@parser.phase = @parser.phases[:inTable]
else
@parser.parseError(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
parse_error(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
end
end
def endTagTable(name)
if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
clearStackToTableBodyContext
endTagTableRowGroup(@tree.openElements[-1].name)
endTagTableRowGroup(@tree.open_elements.last.name)
@parser.phase.processEndTag(name)
else
# innerHTML case
@parser.parseError
# inner_html case
parse_error
end
end
def endTagIgnore(name)
@parser.parseError(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
parse_error(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
end
def endTagOther(name)
@ -73,9 +73,9 @@ module HTML5
protected
def clearStackToTableBodyContext
until ['tbody', 'tfoot', 'thead', 'html'].include?(name = @tree.openElements[-1].name)
@parser.parseError(_("Unexpected implied end tag (#{name}) in the table body phase."))
@tree.openElements.pop
until %w[tbody tfoot thead html].include?(name = @tree.open_elements.last.name)
parse_error(_("Unexpected implied end tag (#{name}) in the table body phase."))
@tree.open_elements.pop
end
end

View file

@ -12,24 +12,24 @@ module HTML5
handle_end 'table', %w( body caption col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
def processCharacters(data)
@parser.parseError(_("Unexpected non-space characters in table context caused voodoo mode."))
parse_error(_("Unexpected non-space characters in table context caused voodoo mode."))
# Make all the special element rearranging voodoo kick in
@tree.insertFromTable = true
@tree.insert_from_table = true
# Process the character in the "in body" mode
@parser.phases[:inBody].processCharacters(data)
@tree.insertFromTable = false
@tree.insert_from_table = false
end
def startTagCaption(name, attributes)
clearStackToTableContext
@tree.activeFormattingElements.push(Marker)
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inCaption]
end
def startTagColgroup(name, attributes)
clearStackToTableContext
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inColumnGroup]
end
@ -40,7 +40,7 @@ module HTML5
def startTagRowGroup(name, attributes)
clearStackToTableContext
@tree.insertElement(name, attributes)
@tree.insert_element(name, attributes)
@parser.phase = @parser.phases[:inTableBody]
end
@ -50,60 +50,60 @@ module HTML5
end
def startTagTable(name, attributes)
@parser.parseError(_("Unexpected start tag (table) in table phase. Implies end tag (table)."))
parse_error(_("Unexpected start tag (table) in table phase. Implies end tag (table)."))
@parser.phase.processEndTag('table')
@parser.phase.processStartTag(name, attributes) unless @parser.innerHTML
@parser.phase.processStartTag(name, attributes) unless @parser.inner_html
end
def startTagOther(name, attributes)
@parser.parseError(_("Unexpected start tag (#{name}) in table context caused voodoo mode."))
parse_error(_("Unexpected start tag (#{name}) in table context caused voodoo mode."))
# Make all the special element rearranging voodoo kick in
@tree.insertFromTable = true
@tree.insert_from_table = true
# Process the start tag in the "in body" mode
@parser.phases[:inBody].processStartTag(name, attributes)
@tree.insertFromTable = false
@tree.insert_from_table = false
end
def endTagTable(name)
if in_scope?('table', true)
@tree.generateImpliedEndTags
unless @tree.openElements[-1].name == 'table'
@parser.parseError(_("Unexpected end tag (table). Expected end tag (#{@tree.openElements[-1].name})."))
unless @tree.open_elements.last.name == 'table'
parse_error(_("Unexpected end tag (table). Expected end tag (#{@tree.open_elements.last.name})."))
end
remove_open_elements_until('table')
@parser.resetInsertionMode
@parser.reset_insertion_mode
else
# innerHTML case
assert @parser.innerHTML
@parser.parseError
# inner_html case
assert @parser.inner_html
parse_error
end
end
def endTagIgnore(name)
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
parse_error(_("Unexpected end tag (#{name}). Ignored."))
end
def endTagOther(name)
@parser.parseError(_("Unexpected end tag (#{name}) in table context caused voodoo mode."))
parse_error(_("Unexpected end tag (#{name}) in table context caused voodoo mode."))
# Make all the special element rearranging voodoo kick in
@tree.insertFromTable = true
@tree.insert_from_table = true
# Process the end tag in the "in body" mode
@parser.phases[:inBody].processEndTag(name)
@tree.insertFromTable = false
@tree.insert_from_table = false
end
protected
def clearStackToTableContext
# "clear the stack back to a table context"
until ['table', 'html'].include?(name = @tree.openElements[-1].name)
@parser.parseError(_("Unexpected implied end tag (#{name}) in the table phase."))
@tree.openElements.pop
until %w[table html].include?(name = @tree.open_elements.last.name)
parse_error(_("Unexpected implied end tag (#{name}) in the table phase."))
@tree.open_elements.pop
end
# When the current node is <html> it's an innerHTML case
# When the current node is <html> it's an inner_html case
end
end

View file

@ -7,22 +7,22 @@ module HTML5
# covered in the specification. The error handling is typically known as
# "quirks mode". It is expected that a future version of HTML5 will define this.
def processEOF
@parser.parseError(_('Unexpected End of file. Expected DOCTYPE.'))
def process_eof
parse_error(_('Unexpected End of file. Expected DOCTYPE.'))
@parser.phase = @parser.phases[:rootElement]
@parser.phase.processEOF
@parser.phase.process_eof
end
def processComment(data)
@tree.insertComment(data, @tree.document)
@tree.insert_comment(data, @tree.document)
end
def processDoctype(name, publicId, systemId, correct)
if name.downcase != 'html' or publicId or systemId
@parser.parseError(_('Erroneous DOCTYPE.'))
parse_error(_('Erroneous DOCTYPE.'))
end
# XXX need to update DOCTYPE tokens
@tree.insertDoctype(name)
@tree.insertDoctype(name, publicId, systemId)
publicId = publicId.to_s.upcase
@ -110,23 +110,22 @@ module HTML5
end
def processSpaceCharacters(data)
@tree.insertText(data, @tree.document)
end
def processCharacters(data)
@parser.parseError(_('Unexpected non-space characters. Expected DOCTYPE.'))
parse_error(_('Unexpected non-space characters. Expected DOCTYPE.'))
@parser.phase = @parser.phases[:rootElement]
@parser.phase.processCharacters(data)
end
def processStartTag(name, attributes)
@parser.parseError(_("Unexpected start tag (#{name}). Expected DOCTYPE."))
parse_error(_("Unexpected start tag (#{name}). Expected DOCTYPE."))
@parser.phase = @parser.phases[:rootElement]
@parser.phase.processStartTag(name, attributes)
end
def processEndTag(name)
@parser.parseError(_("Unexpected end tag (#{name}). Expected DOCTYPE."))
parse_error(_("Unexpected end tag (#{name}). Expected DOCTYPE."))
@parser.phase = @parser.phases[:rootElement]
@parser.phase.processEndTag(name)
end

View file

@ -15,9 +15,12 @@ module HTML5
#
class Phase
extend Forwardable
def_delegators :@parser, :parse_error
# The following example call:
#
# tag_handlers('startTag', 'html', %( base link meta ), %( li dt dd ) => 'ListItem')
# tag_handlers('startTag', 'html', %w( base link meta ), %w( li dt dd ) => 'ListItem')
#
# ...would return a hash equal to this:
#
@ -34,15 +37,15 @@ module HTML5
if tags.last.is_a?(Hash)
tags.pop.each do |names, handler_method_suffix|
handler_method = prefix + handler_method_suffix
Array(names).each { |name| mapping[name] = handler_method }
Array(names).each {|name| mapping[name] = handler_method }
end
end
tags.each do |names|
names = Array(names)
handler_method = prefix + names.map { |name| name.capitalize }.join
names.each { |name| mapping[name] = handler_method }
handler_method = prefix + names.map {|name| name.capitalize }.join
names.each {|name| mapping[name] = handler_method }
end
return mapping
mapping
end
def self.start_tag_handlers
@ -80,17 +83,17 @@ module HTML5
@parser, @tree = parser, tree
end
def processEOF
def process_eof
@tree.generateImpliedEndTags
if @tree.openElements.length > 2
@parser.parseError(_('Unexpected end of file. Missing closing tags.'))
elsif @tree.openElements.length == 2 and @tree.openElements[1].name != 'body'
if @tree.open_elements.length > 2
parse_error(_('Unexpected end of file. Missing closing tags.'))
elsif @tree.open_elements.length == 2 and @tree.open_elements[1].name != 'body'
# This happens for framesets or something?
@parser.parseError(_("Unexpected end of file. Expected end tag (#{@tree.openElements[1].name}) first."))
elsif @parser.innerHTML and @tree.openElements.length > 1
parse_error(_("Unexpected end of file. Expected end tag (#{@tree.open_elements[1].name}) first."))
elsif @parser.inner_html and @tree.open_elements.length > 1
# XXX This is not what the specification says. Not sure what to do here.
@parser.parseError(_('XXX innerHTML EOF'))
parse_error(_('XXX inner_html EOF'))
end
# Betting ends.
end
@ -98,11 +101,11 @@ module HTML5
def processComment(data)
# For most phases the following is correct. Where it's not it will be
# overridden.
@tree.insertComment(data, @tree.openElements[-1])
@tree.insert_comment(data, @tree.open_elements.last)
end
def processDoctype(name, publicId, systemId, correct)
@parser.parseError(_('Unexpected DOCTYPE. Ignored.'))
parse_error(_('Unexpected DOCTYPE. Ignored.'))
end
def processSpaceCharacters(data)
@ -114,17 +117,17 @@ module HTML5
end
def startTagHtml(name, attributes)
if @parser.firstStartTag == false and name == 'html'
@parser.parseError(_('html needs to be the first start tag.'))
if @parser.first_start_tag == false and name == 'html'
parse_error(_('html needs to be the first start tag.'))
end
# XXX Need a check here to see if the first start tag token emitted is
# this token... If it's not, invoke @parser.parseError.
# this token... If it's not, invoke parse_error.
attributes.each do |attr, value|
unless @tree.openElements[0].attributes.has_key?(attr)
@tree.openElements[0].attributes[attr] = value
unless @tree.open_elements.first.attributes.has_key?(attr)
@tree.open_elements.first.attributes[attr] = value
end
end
@parser.firstStartTag = false
@parser.first_start_tag = false
end
def processEndTag(name)
@ -146,11 +149,10 @@ module HTML5
def remove_open_elements_until(name=nil)
finished = false
until finished
element = @tree.openElements.pop
finished = name.nil?? yield(element) : element.name == name
element = @tree.open_elements.pop
finished = name.nil? ? yield(element) : element.name == name
end
return element
end
end
end

View file

@ -3,38 +3,37 @@ require 'html5/html5parser/phase'
module HTML5
class RootElementPhase < Phase
def processEOF
insertHtmlElement
@parser.phase.processEOF
def process_eof
insert_html_element
@parser.phase.process_eof
end
def processComment(data)
@tree.insertComment(data, @tree.document)
@tree.insert_comment(data, @tree.document)
end
def processSpaceCharacters(data)
@tree.insertText(data, @tree.document)
end
def processCharacters(data)
insertHtmlElement
insert_html_element
@parser.phase.processCharacters(data)
end
def processStartTag(name, attributes)
@parser.firstStartTag = true if name == 'html'
insertHtmlElement
@parser.first_start_tag = true if name == 'html'
insert_html_element
@parser.phase.processStartTag(name, attributes)
end
def processEndTag(name)
insertHtmlElement
insert_html_element
@parser.phase.processEndTag(name)
end
def insertHtmlElement
def insert_html_element
element = @tree.createElement('html', {})
@tree.openElements.push(element)
@tree.open_elements.push(element)
@tree.document.appendChild(element)
@parser.phase = @parser.phases[:beforeHead]
end

View file

@ -3,34 +3,33 @@ require 'html5/html5parser/phase'
module HTML5
class TrailingEndPhase < Phase
def processEOF
def process_eof
end
def processComment(data)
@tree.insertComment(data, @tree.document)
@tree.insert_comment(data, @tree.document)
end
def processSpaceCharacters(data)
@parser.lastPhase.processSpaceCharacters(data)
@parser.last_phase.processSpaceCharacters(data)
end
def processCharacters(data)
@parser.parseError(_('Unexpected non-space characters. Expected end of file.'))
@parser.phase = @parser.lastPhase
parse_error(_('Unexpected non-space characters. Expected end of file.'))
@parser.phase = @parser.last_phase
@parser.phase.processCharacters(data)
end
def processStartTag(name, attributes)
@parser.parseError(_('Unexpected start tag (#{name}). Expected end of file.'))
@parser.phase = @parser.lastPhase
parse_error(_('Unexpected start tag (#{name}). Expected end of file.'))
@parser.phase = @parser.last_phase
@parser.phase.processStartTag(name, attributes)
end
def processEndTag(name)
@parser.parseError(_('Unexpected end tag (#{name}). Expected end of file.'))
@parser.phase = @parser.lastPhase
parse_error(_('Unexpected end tag (#{name}). Expected end of file.'))
@parser.phase = @parser.last_phase
@parser.phase.processEndTag(name)
end
end
end

View file

@ -27,11 +27,11 @@ module HTML5
# parseMeta - Look for a <meta> element containing encoding information
def initialize(source, options = {})
@encoding = nil
@encoding = nil
@parse_meta = true
@chardet = true
@chardet = true
options.each { |name, value| instance_variable_set("@#{name}", value) }
options.each {|name, value| instance_variable_set("@#{name}", value) }
# Raw Stream
@raw_stream = open_stream(source)
@ -297,7 +297,7 @@ module HTML5
end
when 0xC0 .. 0xFF
if @win1252
if instance_variables.include?("@win1252") && @win1252
"\xC3" + (c-64).chr # convert to utf-8
elsif @buffer[@tell-1 .. @tell+3] =~ /^
( [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte

View file

@ -24,7 +24,7 @@ module HTML5
@phases[:initial] = XmlRootPhase.new(self, @tree)
end
def normalizeToken(token)
def normalize_token(token)
case token[:type]
when :StartTag, :EmptyTag
# We need to remove the duplicate attributes and convert attributes
@ -34,23 +34,23 @@ module HTML5
# For EmptyTags, process both a Start and an End tag
if token[:type] == :EmptyTag
save = @tokenizer.contentModelFlag
save = @tokenizer.content_model_flag
@phase.processStartTag(token[:name], token[:data])
@tokenizer.contentModelFlag = save
@tokenizer.content_model_flag = save
token[:data] = {}
token[:type] = :EndTag
end
when :Characters
# un-escape RCDATA_ELEMENTS (e.g. style, script)
if @tokenizer.contentModelFlag == :CDATA
if @tokenizer.content_model_flag == :CDATA
token[:data] = token[:data].
gsub('&lt;','<').gsub('&gt;','>').gsub('&amp;','&')
end
when :EndTag
if token[:data]
parseError(_("End tag contains unexpected attributes."))
parse_error(_("End tag contains unexpected attributes."))
end
when :Comment
@ -74,22 +74,22 @@ module HTML5
@phases[:rootElement] = XhmlRootPhase.new(self, @tree)
end
def normalizeToken(token)
def normalize_token(token)
super(token)
# ensure that non-void XHTML elements have content so that separate
# open and close tags are emitted
if token[:type] == :EndTag
if VOID_ELEMENTS.include? token[:name]
if @tree.openElements[-1].name != token["name"]:
if @tree.open_elements[-1].name != token["name"]:
token[:type] = :EmptyTag
token["data"] ||= {}
end
else
if token[:name] == @tree.openElements[-1].name and \
not @tree.openElements[-1].hasContent
if token[:name] == @tree.open_elements[-1].name and \
not @tree.open_elements[-1].hasContent
@tree.insertText('') unless
@tree.openElements.any? {|e|
@tree.open_elements.any? {|e|
e.attributes.keys.include? 'xmlns' and
e.attributes['xmlns'] != 'http://www.w3.org/1999/xhtml'
}
@ -102,9 +102,9 @@ module HTML5
end
class XhmlRootPhase < RootElementPhase
def insertHtmlElement
def insert_html_element
element = @tree.createElement("html", {'xmlns' => 'http://www.w3.org/1999/xhtml'})
@tree.openElements.push(element)
@tree.open_elements.push(element)
@tree.document.appendChild(element)
@parser.phase = @parser.phases[:beforeHead]
end
@ -115,15 +115,15 @@ module HTML5
@start_tag_handlers = Hash.new(:startTagOther)
@end_tag_handlers = Hash.new(:endTagOther)
def startTagOther(name, attributes)
@tree.openElements.push(@tree.document)
@tree.open_elements.push(@tree.document)
element = @tree.createElement(name, attributes)
@tree.openElements[-1].appendChild(element)
@tree.openElements.push(element)
@tree.open_elements[-1].appendChild(element)
@tree.open_elements.push(element)
@parser.phase = XmlElementPhase.new(@parser,@tree)
end
def endTagOther(name)
super
@tree.openElements.pop
@tree.open_elements.pop
end
end
@ -135,17 +135,17 @@ module HTML5
def startTagOther(name, attributes)
element = @tree.createElement(name, attributes)
@tree.openElements[-1].appendChild(element)
@tree.openElements.push(element)
@tree.open_elements[-1].appendChild(element)
@tree.open_elements.push(element)
end
def endTagOther(name)
for node in @tree.openElements.reverse
for node in @tree.open_elements.reverse
if node.name == name
{} while @tree.openElements.pop != node
{} while @tree.open_elements.pop != node
break
else
@parser.parseError
parse_error
end
end
end

View file

@ -13,11 +13,11 @@ module HTML5
# or, if you already have a parse tree (in this example, a REXML tree),
# at the Serializer stage:
#
# tokens = TreeWalkers.getTreeWalker('rexml').new(tree)
# tokens = TreeWalkers.get_tree_walker('rexml').new(tree)
# HTMLSerializer.serialize(tokens, {:encoding=>'utf-8',
# :sanitize => true})
module HTMLSanitizeModule
module HTMLSanitizeModule
ACCEPTABLE_ELEMENTS = %w[a abbr acronym address area b big blockquote br
button caption center cite code col colgroup dd del dfn dir div dl dt

View file

@ -13,18 +13,18 @@ module HTML5
end
def initialize(options={})
@quote_attr_values = false
@quote_char = '"'
@use_best_quote_char = true
@quote_attr_values = false
@quote_char = '"'
@use_best_quote_char = true
@minimize_boolean_attributes = true
@use_trailing_solidus = false
@use_trailing_solidus = false
@space_before_trailing_solidus = true
@escape_lt_in_attrs = false
@escape_rcdata = false
@escape_lt_in_attrs = false
@escape_rcdata = false
@omit_optional_tags = true
@sanitize = false
@sanitize = false
@strip_whitespace = false
@ -73,7 +73,7 @@ module HTML5
elsif [:Characters, :SpaceCharacters].include? type
if type == :SpaceCharacters or in_cdata
if in_cdata and token[:data].include?("</")
serializeError(_("Unexpected </ in CDATA"))
serialize_error(_("Unexpected </ in CDATA"))
end
result << token[:data]
else
@ -85,7 +85,7 @@ module HTML5
if RCDATA_ELEMENTS.include?(name) and not @escape_rcdata
in_cdata = true
elsif in_cdata
serializeError(_("Unexpected child element of a CDATA element"))
serialize_error(_("Unexpected child element of a CDATA element"))
end
attributes = []
for k,v in attrs = token[:data].to_a.sort
@ -137,19 +137,19 @@ module HTML5
if RCDATA_ELEMENTS.include?(name)
in_cdata = false
elsif in_cdata
serializeError(_("Unexpected child element of a CDATA element"))
serialize_error(_("Unexpected child element of a CDATA element"))
end
end_tag = "</#{name}>"
result << end_tag
elsif type == :Comment
data = token[:data]
serializeError(_("Comment contains --")) if data.index("--")
serialize_error(_("Comment contains --")) if data.index("--")
comment = "<!--%s-->" % token[:data]
result << comment
else
serializeError(token[:data])
serialize_error(token[:data])
end
end
@ -163,13 +163,15 @@ module HTML5
alias :render :serialize
def serializeError(data="XXX ERROR MESSAGE NEEDED")
def serialize_error(data="XXX ERROR MESSAGE NEEDED")
# XXX The idea is to make data mandatory.
@errors.push(data)
if @strict
raise SerializeError
end
end
def _(string); string; end
end
# Error in serialized tree

View file

@ -4,12 +4,12 @@ module HTML5
class XHTMLSerializer < HTMLSerializer
DEFAULTS = {
:quote_attr_values => true,
:quote_attr_values => true,
:minimize_boolean_attributes => false,
:use_trailing_solidus => true,
:escape_lt_in_attrs => true,
:omit_optional_tags => false,
:escape_rcdata => true
:use_trailing_solidus => true,
:escape_lt_in_attrs => true,
:omit_optional_tags => false,
:escape_rcdata => true
}
def initialize(options={})

View file

@ -0,0 +1,45 @@
module HTML5
module Sniffer
# 4.7.4
def html_or_feed str
s = str[0, 512] # steps 1, 2
pos = 0
while pos < s.length
case s[pos]
when 0x09, 0x20, 0x0A, 0x0D # tab, space, LF, CR
pos += 1
when 0x3C # "<"
pos += 1
if s[pos..pos+2] == "!--" # [0x21, 0x2D, 0x2D]
pos += 3
until s[pos..pos+2] == "-->" or pos >= s.length
pos += 1
end
pos += 3
elsif s[pos] == 0x21 # "!"
pos += 1
until s[pos] == 0x3E or pos >= s.length # ">"
pos += 1
end
pos += 1
elsif s[pos] == 0x3F # "?"
until s[pos..pos+1] == "?>" or pos >= s.length # [0x3F, 0x3E]
pos += 1
end
pos += 2
elsif s[pos..pos+2] == "rss" # [0x72, 0x73, 0x73]
return "application/rss+xml"
elsif s[pos..pos+3] == "feed" # [0x66, 0x65, 0x65, 0x64]
return "application/atom+xml"
elsif s[pos..pos+6] == "rdf:RDF" # [0x72, 0x64, 0x66, 0x3A, 0x52, 0x44, 0x46]
raise NotImplementedError
end
else
break
end
end
"text/html"
end
end
end

File diff suppressed because it is too large Load diff

View file

@ -18,7 +18,7 @@ module HTML5
end
end
alias :getTreeBuilder :[]
alias :get_tree_builder :[]
end
end
end

View file

@ -24,9 +24,9 @@ module HTML5
attr_accessor :_flags
def initialize(name)
@parent = nil
@parent = nil
@childNodes = []
@_flags = []
@_flags = []
end
# Insert node as a child of the current node
@ -76,13 +76,13 @@ module HTML5
# Base treebuilder implementation
class TreeBuilder
attr_accessor :openElements
attr_accessor :open_elements
attr_accessor :activeFormattingElements
attr_accessor :document
attr_accessor :headPointer
attr_accessor :head_pointer
attr_accessor :formPointer
@ -106,25 +106,25 @@ module HTML5
end
def reset
@openElements = []
@open_elements = []
@activeFormattingElements = []
#XXX - rename these to headElement, formElement
@headPointer = nil
@head_pointer = nil
@formPointer = nil
self.insertFromTable = false
self.insert_from_table = false
@document = @documentClass.new
end
def elementInScope(target, tableVariant=false)
# Exit early when possible.
return true if @openElements[-1].name == target
return true if @open_elements[-1].name == target
# AT How about while true and simply set node to [-1] and set it to
# [-2] at the end...
@openElements.reverse.each do |element|
@open_elements.reverse.each do |element|
if element.name == target
return true
elsif element.name == 'table'
@ -149,10 +149,10 @@ module HTML5
# Step 2 and step 3: we start with the last element. So i is -1.
i = -1
entry = @activeFormattingElements[i]
return if entry == Marker or @openElements.include?(entry)
return if entry == Marker or @open_elements.include?(entry)
# Step 6
until entry == Marker or @openElements.include?(entry)
until entry == Marker or @open_elements.include?(entry)
# Step 5: let entry be one earlier in the list.
i -= 1
begin
@ -171,7 +171,7 @@ module HTML5
clone = @activeFormattingElements[i].cloneNode
# Step 9
element = insertElement(clone.name, clone.attributes)
element = insert_element(clone.name, clone.attributes)
# Step 10
@activeFormattingElements[i] = element
@ -198,12 +198,15 @@ module HTML5
return false
end
def insertDoctype(name)
@document.appendChild(@doctypeClass.new(name))
def insertDoctype(name, public_id, system_id)
doctype = @doctypeClass.new(name)
doctype.public_id = public_id
doctype.system_id = system_id
@document.appendChild(doctype)
end
def insertComment(data, parent=nil)
parent = @openElements[-1] if parent.nil?
def insert_comment(data, parent=nil)
parent = @open_elements[-1] if parent.nil?
parent.appendChild(@commentClass.new(data))
end
@ -216,28 +219,28 @@ module HTML5
# Switch the function used to insert an element from the
# normal one to the misnested table one and back again
def insertFromTable=(value)
@insertFromTable = value
@insertElement = value ? :insertElementTable : :insertElementNormal
def insert_from_table=(value)
@insert_from_table = value
@insert_element = value ? :insert_elementTable : :insert_elementNormal
end
def insertElement(name, attributes)
send(@insertElement, name, attributes)
def insert_element(name, attributes)
send(@insert_element, name, attributes)
end
def insertElementNormal(name, attributes)
def insert_elementNormal(name, attributes)
element = @elementClass.new(name)
element.attributes = attributes
@openElements[-1].appendChild(element)
@openElements.push(element)
@open_elements.last.appendChild(element)
@open_elements.push(element)
return element
end
# Create an element and insert it into the tree
def insertElementTable(name, attributes)
def insert_elementTable(name, attributes)
element = @elementClass.new(name)
element.attributes = attributes
if TABLE_INSERT_MODE_ELEMENTS.include?(@openElements[-1].name)
if TABLE_INSERT_MODE_ELEMENTS.include?(@open_elements.last.name)
#We should be in the InTable mode. This means we want to do
#special magic element rearranging
parent, insertBefore = getTableMisnestedNodePosition
@ -246,17 +249,17 @@ module HTML5
else
parent.insertBefore(element, insertBefore)
end
@openElements.push(element)
@open_elements.push(element)
else
return insertElementNormal(name, attributes)
return insert_elementNormal(name, attributes)
end
return element
end
def insertText(data, parent=nil)
parent = @openElements[-1] if parent.nil?
parent = @open_elements[-1] if parent.nil?
if (not(@insertFromTable) or (@insertFromTable and not TABLE_INSERT_MODE_ELEMENTS.include?(@openElements[-1].name)))
if (not(@insert_from_table) or (@insert_from_table and not TABLE_INSERT_MODE_ELEMENTS.include?(@open_elements[-1].name)))
parent.insertText(data)
else
#We should be in the InTable mode. This means we want to do
@ -265,7 +268,7 @@ module HTML5
parent.insertText(data, insertBefore)
end
end
# Get the foster parent element, and sibling to insert before
# (or nil) when inserting a misnested table node
def getTableMisnestedNodePosition
@ -275,7 +278,7 @@ module HTML5
lastTable = nil
fosterParent = nil
insertBefore = nil
@openElements.reverse.each do |element|
@open_elements.reverse.each do |element|
if element.name == "table"
lastTable = element
break
@ -288,33 +291,34 @@ module HTML5
fosterParent = lastTable.parent
insertBefore = lastTable
else
fosterParent = @openElements[@openElements.index(lastTable) - 1]
fosterParent = @open_elements[@open_elements.index(lastTable) - 1]
end
else
fosterParent = @openElements[0]
fosterParent = @open_elements[0]
end
return fosterParent, insertBefore
end
def generateImpliedEndTags(exclude=nil)
name = @openElements[-1].name
name = @open_elements[-1].name
if (['dd', 'dt', 'li', 'p', 'td', 'th', 'tr'].include?(name) and name != exclude)
@openElements.pop
# XXX td, th and tr are not actually needed
if (%w[dd dt li p td th tr].include?(name) and name != exclude)
@open_elements.pop
# XXX This is not entirely what the specification says. We should
# investigate it more closely.
generateImpliedEndTags(exclude)
end
end
def getDocument
def get_document
@document
end
def getFragment
#assert @innerHTML
def get_fragment
#assert @inner_html
fragment = @fragmentClass.new
@openElements[0].reparentChildren(fragment)
@open_elements[0].reparentChildren(fragment)
return fragment
end

View file

@ -8,7 +8,6 @@ module HTML5
module Hpricot
class Node < Base::Node
extend Forwardable
def_delegators :@hpricot, :name
@ -22,7 +21,7 @@ module HTML5
def appendChild(node)
if node.kind_of?(TextNode) and childNodes.any? and childNodes.last.kind_of?(TextNode)
childNodes[-1].hpricot.content = childNodes[-1].hpricot.to_s + node.hpricot.to_s
childNodes.last.hpricot.content = childNodes.last.hpricot.content + node.hpricot.content
else
childNodes << node
hpricot.children << node.hpricot
@ -145,21 +144,27 @@ module HTML5
end
class DocumentType < Node
def_delegators :@hpricot, :public_id, :system_id
def self.hpricot_class
::Hpricot::DocType
end
def initialize(name)
def initialize(name, public_id, system_id)
begin
super(name)
rescue ArgumentError # needs 3...
end
@hpricot = ::Hpricot::DocType.new(name, nil, nil)
@hpricot = ::Hpricot::DocType.new(name, public_id, system_id)
end
def printTree(indent=0)
"\n|#{' ' * indent}<!DOCTYPE #{hpricot.target}>"
if hpricot.target and hpricot.target.any?
"\n|#{' ' * indent}<!DOCTYPE #{hpricot.target}>"
else
"\n|#{' ' * indent}<!DOCTYPE >"
end
end
end
@ -169,7 +174,7 @@ module HTML5
end
def printTree(indent=0)
childNodes.inject('') { |tree, child| tree + child.printTree(indent+2) }
childNodes.inject('') {|tree, child| tree + child.printTree(indent + 2) }
end
end
@ -196,21 +201,26 @@ module HTML5
class TreeBuilder < Base::TreeBuilder
def initialize
@documentClass = Document
@doctypeClass = DocumentType
@elementClass = Element
@commentClass = CommentNode
@doctypeClass = DocumentType
@elementClass = Element
@commentClass = CommentNode
@fragmentClass = DocumentFragment
end
def insertDoctype(name, public_id, system_id)
doctype = @doctypeClass.new(name, public_id, system_id)
@document.appendChild(doctype)
end
def testSerializer(node)
node.printTree
end
def getDocument
def get_document
@document.hpricot
end
def getFragment
def get_fragment
@document = super
return @document.hpricot.children
end

View file

@ -17,11 +17,9 @@ module HTML5
end
def appendChild node
if node.kind_of? TextNode and
childNodes.length>0 and childNodes[-1].kind_of? TextNode
childNodes[-1].rxobj.value =
childNodes[-1].rxobj.to_s + node.rxobj.to_s
childNodes[-1].rxobj.raw = true
if node.kind_of?(TextNode) && childNodes.length > 0 && childNodes.last.kind_of?(TextNode)
childNodes.last.rxobj.value = childNodes.last.rxobj.to_s + node.rxobj.to_s
childNodes.last.rxobj.raw = true
else
childNodes.push node
rxobj.add node.rxobj
@ -45,10 +43,8 @@ module HTML5
def insertBefore node, refNode
index = childNodes.index(refNode)
if node.kind_of? TextNode and index>0 and
childNodes[index-1].kind_of? TextNode
childNodes[index-1].rxobj.value =
childNodes[index-1].rxobj.to_s + node.rxobj.to_s
if node.kind_of?(TextNode) and index > 0 && childNodes[index-1].kind_of?(TextNode)
childNodes[index-1].rxobj.value = childNodes[index-1].rxobj.to_s + node.rxobj.to_s
childNodes[index-1].rxobj.raw = true
else
childNodes.insert index, node
@ -57,7 +53,7 @@ module HTML5
end
def hasContent
return (childNodes.length > 0)
(childNodes.length > 0)
end
end
@ -77,7 +73,7 @@ module HTML5
end
def attributes= value
value.each {|name, value| rxobj.attributes[name]=value}
value.each {|name, value| rxobj.attributes[name] = value}
end
def printTree indent=0
@ -90,7 +86,7 @@ module HTML5
for child in childNodes
tree += child.printTree(indent)
end
return tree
tree
end
end
@ -120,10 +116,25 @@ module HTML5
end
class DocumentType < Node
def_delegator :@rxobj, :public, :public_id
def_delegator :@rxobj, :system, :system_id
def self.rxclass
::REXML::DocType
end
def initialize name, public_id, system_id
super(name)
if public_id
@rxobj = ::REXML::DocType.new [name, ::REXML::DocType::PUBLIC, public_id, system_id]
elsif system_id
@rxobj = ::REXML::DocType.new [name, ::REXML::DocType::SYSTEM, nil, system_id]
else
@rxobj = ::REXML::DocType.new name
end
end
def printTree indent=0
"\n|#{' ' * indent}<!DOCTYPE #{name}>"
end
@ -145,7 +156,7 @@ module HTML5
class TextNode < Node
def initialize data
raw=data.gsub('&','&amp;').gsub('<','&lt;').gsub('>','&gt;')
raw = data.gsub('&', '&amp;').gsub('<', '&lt;').gsub('>', '&gt;')
@rxobj = ::REXML::Text.new(raw, true, nil, true)
end
@ -167,21 +178,26 @@ module HTML5
class TreeBuilder < Base::TreeBuilder
def initialize
@documentClass = Document
@doctypeClass = DocumentType
@elementClass = Element
@commentClass = CommentNode
@doctypeClass = DocumentType
@elementClass = Element
@commentClass = CommentNode
@fragmentClass = DocumentFragment
end
def testSerializer node
node.printTree()
def insertDoctype(name, public_id, system_id)
doctype = @doctypeClass.new(name, public_id, system_id)
@document.appendChild(doctype)
end
def getDocument
def testSerializer node
node.printTree
end
def get_document
@document.rxobj
end
def getFragment
def get_fragment
@document = super
return @document.rxobj.children
end

View file

@ -18,17 +18,17 @@ module HTML5
def initialize name
super
@name = name
@value = nil
@name = name
@value = nil
@attributes = {}
end
def appendChild node
if node.kind_of? TextNode and
childNodes.length>0 and childNodes[-1].kind_of? TextNode
childNodes[-1].value += node.value
childNodes.length > 0 and childNodes.last.kind_of? TextNode
childNodes.last.value += node.value
else
childNodes.push node
childNodes << node
end
node.parent = self
end
@ -55,8 +55,7 @@ module HTML5
def insertBefore node, refNode
index = childNodes.index(refNode)
if node.kind_of? TextNode and index>0 and
childNodes[index-1].kind_of? TextNode
if node.kind_of?(TextNode) && index > 0 && childNodes[index-1].kind_of?(TextNode)
childNodes[index-1].value += node.value
else
childNodes.insert index, node
@ -72,7 +71,7 @@ module HTML5
end
def hasContent
return (childNodes.length > 0)
childNodes.length > 0
end
end
@ -90,7 +89,7 @@ module HTML5
for child in childNodes
tree += child.printTree(indent)
end
return tree
tree
end
end
@ -108,13 +107,21 @@ module HTML5
for child in childNodes
tree += child.printTree(indent + 2)
end
return tree
tree
end
end
class DocumentType < Node
attr_accessor :public_id, :system_id
def to_s
"<!DOCTYPE %s>" % name
"<!DOCTYPE #{name}>"
end
def initialize name
super name
@public_id = nil
@system_id = nil
end
end
@ -157,19 +164,19 @@ module HTML5
class TreeBuilder < Base::TreeBuilder
def initialize
@documentClass = Document
@doctypeClass = DocumentType
@elementClass = Element
@commentClass = CommentNode
@doctypeClass = DocumentType
@elementClass = Element
@commentClass = CommentNode
@fragmentClass = DocumentFragment
end
def testSerializer node
node.printTree()
node.printTree
end
def getFragment
def get_fragment
@document = super
return @document.childNodes
@document.childNodes
end
end

View file

@ -6,13 +6,13 @@ module HTML5
class << self
def [](name)
case name.to_s.downcase
when 'simpletree' then
when 'simpletree'
require 'html5/treewalkers/simpletree'
SimpleTree::TreeWalker
when 'rexml' then
when 'rexml'
require 'html5/treewalkers/rexml'
REXML::TreeWalker
when 'hpricot' then
when 'hpricot'
require 'html5/treewalkers/hpricot'
Hpricot::TreeWalker
else
@ -20,7 +20,7 @@ module HTML5
end
end
alias :getTreeWalker :[]
alias :get_tree_walker :[]
end
end
end

View file

@ -3,153 +3,151 @@ module HTML5
module TreeWalkers
module TokenConstructor
def error(msg)
return {:type => "SerializeError", :data => msg}
def error(msg)
{:type => "SerializeError", :data => msg}
end
def normalize_attrs(attrs)
attrs.to_a
end
def empty_tag(name, attrs, has_children=false)
error(_("Void element has children")) if has_children
{:type => :EmptyTag, :name => name, :data => normalize_attrs(attrs)}
end
def start_tag(name, attrs)
{:type => :StartTag, :name => name, :data => normalize_attrs(attrs)}
end
def end_tag(name)
{:type => :EndTag, :name => name, :data => []}
end
def text(data)
if data =~ /\A([#{SPACE_CHARACTERS.join('')}]+)/m
yield({:type => :SpaceCharacters, :data => $1})
data = data[$1.length .. -1]
return if data.empty?
end
def normalizeAttrs(attrs)
attrs.to_a
if data =~ /([#{SPACE_CHARACTERS.join('')}]+)\Z/m
yield({:type => :Characters, :data => data[0 ... -$1.length]})
yield({:type => :SpaceCharacters, :data => $1})
else
yield({:type => :Characters, :data => data})
end
end
def emptyTag(name, attrs, hasChildren=false)
error(_("Void element has children")) if hasChildren
return({:type => :EmptyTag, :name => name, \
:data => normalizeAttrs(attrs)})
end
def comment(data)
{:type => :Comment, :data => data}
end
def startTag(name, attrs)
return {:type => :StartTag, :name => name, \
:data => normalizeAttrs(attrs)}
end
def doctype(name, public_id, system_id, correct=nil)
{:type => :Doctype, :name => name, :public_id => public_id, :system_id => system_id, :correct => correct}
end
def endTag(name)
return {:type => :EndTag, :name => name, :data => []}
end
def unknown(nodeType)
error(_("Unknown node type: ") + nodeType.to_s)
end
def text(data)
if data =~ /\A([#{SPACE_CHARACTERS.join('')}]+)/m
yield({:type => :SpaceCharacters, :data => $1})
data = data[$1.length .. -1]
return if data.empty?
end
if data =~ /([#{SPACE_CHARACTERS.join('')}]+)\Z/m
yield({:type => :Characters, :data => data[0 ... -$1.length]})
yield({:type => :SpaceCharacters, :data => $1})
else
yield({:type => :Characters, :data => data})
end
end
def comment(data)
return {:type => :Comment, :data => data}
end
def doctype(name)
return {:type => :Doctype, :name => name, :data => name.upcase() == "HTML"}
end
def unknown(nodeType)
return error(_("Unknown node type: ") + nodeType.to_s)
end
def _(str)
str
end
def _(str)
str
end
end
class Base
include TokenConstructor
def initialize(tree)
@tree = tree
@tree = tree
end
def each
raise NotImplementedError
raise NotImplementedError
end
alias walk each
end
class NonRecursiveTreeWalker < TreeWalkers::Base
def node_details(node)
raise NotImplementedError
end
def node_details(node)
raise NotImplementedError
end
def first_child(node)
raise NotImplementedError
end
def first_child(node)
raise NotImplementedError
end
def next_sibling(node)
raise NotImplementedError
end
def next_sibling(node)
raise NotImplementedError
end
def parent(node)
raise NotImplementedError
end
def parent(node)
raise NotImplementedError
end
def each
currentNode = @tree
while currentNode != nil
details = node_details(currentNode)
hasChildren = false
def each
current_node = @tree
while current_node != nil
details = node_details(current_node)
has_children = false
case details.shift
when :DOCTYPE
yield doctype(*details)
case details.shift
when :DOCTYPE
yield doctype(*details)
when :TEXT
text(*details) {|token| yield token}
when :TEXT
text(*details) {|token| yield token}
when :ELEMENT
name, attributes, hasChildren = details
if VOID_ELEMENTS.include?(name)
yield emptyTag(name, attributes.to_a, hasChildren)
hasChildren = false
else
yield startTag(name, attributes.to_a)
end
when :COMMENT
yield comment(details[0])
when :DOCUMENT, :DOCUMENT_FRAGMENT
hasChildren = true
when nil
# ignore (REXML::XMLDecl is an example)
else
yield unknown(details[0])
end
firstChild = hasChildren ? first_child(currentNode) : nil
if firstChild != nil
currentNode = firstChild
else
while currentNode != nil
details = node_details(currentNode)
if details.shift == :ELEMENT
name, attributes, hasChildren = details
yield endTag(name) if !VOID_ELEMENTS.include?(name)
end
if @tree == currentNode
currentNode = nil
else
nextSibling = next_sibling(currentNode)
if nextSibling != nil
currentNode = nextSibling
break
end
currentNode = parent(currentNode)
end
end
end
when :ELEMENT
name, attributes, has_children = details
if VOID_ELEMENTS.include?(name)
yield empty_tag(name, attributes.to_a, has_children)
has_children = false
else
yield start_tag(name, attributes.to_a)
end
when :COMMENT
yield comment(details[0])
when :DOCUMENT, :DOCUMENT_FRAGMENT
has_children = true
when nil
# ignore (REXML::XMLDecl is an example)
else
yield unknown(details[0])
end
first_child = has_children ? first_child(current_node) : nil
if first_child != nil
current_node = first_child
else
while current_node != nil
details = node_details(current_node)
if details.shift == :ELEMENT
name, attributes, has_children = details
yield end_tag(name) if !VOID_ELEMENTS.include?(name)
end
if @tree == current_node
current_node = nil
else
next_sibling = next_sibling(current_node)
if next_sibling != nil
current_node = next_sibling
break
end
current_node = parent(current_node)
end
end
end
end
end
end
end

View file

@ -13,17 +13,17 @@ module HTML5
[:DOCUMENT_FRAGMENT]
else
[:ELEMENT, node.name,
node.attributes.map {|name,value| [name,value]},
node.attributes.map {|name, value| [name, value]},
!node.empty?]
end
when ::Hpricot::Text
[:TEXT, node.to_plain_text]
[:TEXT, node.content]
when ::Hpricot::Comment
[:COMMENT, node.content]
when ::Hpricot::Doc
[:DOCUMENT]
when ::Hpricot::DocType
[:DOCTYPE, node.target]
[:DOCTYPE, node.target, node.public_id, node.system_id]
when ::Hpricot::XMLDecl
[nil]
else

View file

@ -23,7 +23,7 @@ module HTML5
when ::REXML::Comment
[:COMMENT, node.string]
when ::REXML::DocType
[:DOCTYPE, node.name]
[:DOCTYPE, node.name, node.public, node.system]
when ::REXML::XMLDecl
[nil]
else

View file

@ -12,20 +12,20 @@ module HTML5
return
when DocumentType
yield doctype(node.name)
yield doctype(node.name, node.public_id, node.system_id)
when TextNode
text(node.value) {|token| yield token}
when Element
if VOID_ELEMENTS.include?(node.name)
yield emptyTag(node.name, node.attributes, node.hasContent())
yield empty_tag(node.name, node.attributes, node.hasContent())
else
yield startTag(node.name, node.attributes)
yield start_tag(node.name, node.attributes)
for child in node.childNodes
walk(child) {|token| yield token}
end
yield endTag(node.name)
yield end_tag(node.name)
end
when CommentNode

View file

@ -0,0 +1,3 @@
module HTML5
VERSION = '0.1.0'
end