Improved parsing of <SCRIPT> tag.

Complex script code tends to contain other tags inside strings. As we generally don't parse strings we erroneously interpreted those tags. The easiest workaround is to not interpret tags at all until the </SCRIPT> tag is found.
2015-05-25 16:46:23 +02:00 · 2015-05-25 16:46:23 +02:00 · 9b9b92be06
commit 9b9b92be06
parent a8837e230c
1 changed files with 11 additions and 3 deletions
--- a/apps/webbrowser/htmlparser.c
+++ b/apps/webbrowser/htmlparser.c
@ -137,7 +137,7 @@ G * (<br>, <p>, <h>), the <li> tag (but does not even try to
 #define MAJORSTATE_LINK       2
 #define MAJORSTATE_FORM       3
 #define MAJORSTATE_DISCARD    4
-
+#define MAJORSTATE_SCRIPT     5

 struct htmlparser_state {

@ -303,7 +303,7 @@ do_word(void)
      if(s.word[s.wordlen - 1] != ISO_space) {
 	add_char(ISO_space);
      }
-    } else if(s.majorstate == MAJORSTATE_DISCARD) {
+    } else if(s.majorstate >= MAJORSTATE_DISCARD) {
      s.wordlen = 0;
    } else {
      s.word[s.wordlen] = '\0';
@ -363,11 +363,17 @@ static void
 parse_tag(void)
 {
  static char *tagattrparam;
+  static unsigned char tag;
  static unsigned char size;

+  tag = find_tag(s.tag);
+  if(s.majorstate == MAJORSTATE_SCRIPT && tag != TAG_SLASHSCRIPT) {
+    return;
+  }
+
  PRINTF(("Parsing tag '%s' '%s' '%s'\n", s.tag, s.tagattr, s.tagattrparam));

-  switch(find_tag(s.tag)) {
+  switch(tag) {
  case TAG_P:
  case TAG_H1:
  case TAG_H2:
@ -389,6 +395,8 @@ parse_tag(void)
    }
    break;
  case TAG_SCRIPT:
+    switch_majorstate(MAJORSTATE_SCRIPT);
+    break;
  case TAG_STYLE:
  case TAG_SELECT:
    switch_majorstate(MAJORSTATE_DISCARD);