Skip to content

Commit

Permalink
* Strict comment syntax rules
Browse files Browse the repository at this point in the history
  • Loading branch information
Jan Wielemaker committed Jun 6, 2007
1 parent 1a7a2b6 commit 70da19b
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 10 deletions.
3 changes: 2 additions & 1 deletion ChangeLog
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
Jun 6, 2007

* FIXED: give error on bad entities in XML mode.
* FIXED: Strict XML comment syntax. Jacco van Ossenbruggen.
* FIXED: give error on bad entities in XML mode. Jacco van Ossenbruggen.

Feb 6, 2007

Expand Down
10 changes: 10 additions & 0 deletions Test/comment.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?xml version="1.0"?>

<test>
<!-- comment -->
<!--- bad comment -->
<!-- bad -- comment -->
<!-- ok-comment -->
<!-- bad comment end --->
</test>

2 changes: 2 additions & 0 deletions Test/ok/comment.ok
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[element(test, [], ['\n \n \n \n \n \n'])].
[sgml(sgml_parser(1951880), 'comment.xml', 5, 'Syntax error: Illegal comment, found "<!---"'), sgml(sgml_parser(1951880), 'comment.xml', 6, 'Syntax error: Illegal comment'), sgml(sgml_parser(1951880), 'comment.xml', 8, 'Syntax error: Illegal comment')].
29 changes: 21 additions & 8 deletions parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -4405,13 +4405,14 @@ end_document_dtd_parser_(dtd_parser *p)
rval = TRUE;
break;
case S_CMT:
case S_CMT1:
case S_CMTE0:
case S_CMTE1:
case S_DECLCMT0:
case S_DECLCMT:
case S_DECLCMTE0:
rval = gripe(ERC_SYNTAX_ERROR,
L"Unexpected end-of-file in comment", "");
L"Unexpected end-of-file in comment", L"");
break;
case S_ECDATA1:
case S_ECDATA2:
Expand All @@ -4427,24 +4428,24 @@ end_document_dtd_parser_(dtd_parser *p)
case S_ENT:
case S_ENT0:
rval = gripe(ERC_SYNTAX_ERROR,
L"Unexpected end-of-file", "");
L"Unexpected end-of-file", L"");
break;
#ifdef UTF8
case S_UTF8:
rval = gripe(ERC_SYNTAX_ERROR,
L"Unexpected end-of-file in UTF-8 sequence", "");
L"Unexpected end-of-file in UTF-8 sequence", L"");
break;
#endif
case S_MSCDATA:
case S_EMSCDATA1:
case S_EMSCDATA2:
rval = gripe(ERC_SYNTAX_ERROR,
L"Unexpected end-of-file in CDATA marked section", "");
L"Unexpected end-of-file in CDATA marked section", L"");
break;
case S_PI:
case S_PI2:
rval = gripe(ERC_SYNTAX_ERROR,
L"Unexpected end-of-file in processing instruction", "");
L"Unexpected end-of-file in processing instruction", L"");
break;
default:
rval = gripe(ERC_SYNTAX_ERROR,
Expand Down Expand Up @@ -4662,7 +4663,7 @@ putchar_dtd_parser(dtd_parser *p, int chr)
#ifdef UTF8
if ( p->state == S_UTF8 )
{ if ( (chr & 0xc0) != 0x80 ) /* TBD: recover */
gripe(ERC_SYNTAX_ERROR, L"Bad UTF-8 sequence", "");
gripe(ERC_SYNTAX_ERROR, L"Bad UTF-8 sequence", L"");
p->utf8_char <<= 6;
p->utf8_char |= (chr & ~0xc0);
if ( --p->utf8_left == 0 )
Expand Down Expand Up @@ -5042,7 +5043,7 @@ putchar_dtd_parser(dtd_parser *p, int chr)
}
case S_CMTO: /* Seen <!- */
{ if ( f[CF_CMT] == chr ) /* - */
{ p->state = S_CMT;
{ p->state = S_CMT1;
return;
} else
{ add_cdata(p, f[CF_MDO1]);
Expand All @@ -5053,6 +5054,14 @@ putchar_dtd_parser(dtd_parser *p, int chr)
return;
}
}
case S_CMT1: /* <!-- */
{ if ( f[CF_CMT] == chr ) /* <!--- */
{ if ( dtd->dialect != DL_SGML )
gripe(ERC_SYNTAX_ERROR, L"Illegal comment", L"<!---");
}
p->state = S_CMT;
break;
}
case S_CMT:
{ if ( f[CF_CMT] == chr )
p->state = S_CMTE0; /* <!--...- */
Expand All @@ -5071,7 +5080,11 @@ putchar_dtd_parser(dtd_parser *p, int chr)
(*p->on_decl)(p, (ichar*)"");
p->state = S_PCDATA;
} else
p->state = S_CMT;
{ if ( dtd->dialect != DL_SGML )
gripe(ERC_SYNTAX_ERROR, L"Illegal comment", L"");
if ( f[CF_CMT] != chr )
p->state = S_CMT;
}
break;
}
case S_GROUP: /* [...] in declaration */
Expand Down
3 changes: 2 additions & 1 deletion parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ typedef enum
S_DECLCMT, /* Seen <...-- */
S_DECLCMTE0, /* Seen <...--..- */
S_CMTO, /* Seen <!- */
S_CMT, /* Seen <!--... */
S_CMT1, /* Seen <!-- */
S_CMT, /* Seen <!--X... */
S_CMTE0, /* Seem <!--...- */
S_CMTE1, /* Seem <!--...-- */
S_GROUP, /* inside [...] */
Expand Down

0 comments on commit 70da19b

Please sign in to comment.