Skip to content

Commit

Permalink
* Improvements to <!DOCTYPE xxx [...]> handling
Browse files Browse the repository at this point in the history
  • Loading branch information
Jan Wielemaker committed Sep 17, 2002
1 parent 63d0897 commit ffd8cd4
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 12 deletions.
3 changes: 3 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
* FIXED: skip [] for detection of nesting inside literal values and fix
handling of [] in the <!DOCTYPE xxx [...]> declaration. Richard O'Keefe.

* MODIFIED/ADDED: qualify_attributes option for xmlns mode. Default is
now *not* to qualify attributes.

Expand Down
7 changes: 7 additions & 0 deletions Test/ment.sgml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<!DOCTYPE ent [
<!ENTITY #DEFAULT CDATA "[missing]">
<!ELEMENT ent O O (#PCDATA)>
]>
One: &one;.
Two: &two;.
Three: &three;.
2 changes: 2 additions & 0 deletions Test/ok/ment.ok
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[element(ent, [], ['One: [missing].\nTwo: [missing].\nThree: [missing].'])].
[].
49 changes: 38 additions & 11 deletions parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -3452,7 +3452,23 @@ process_doctype(dtd_parser *p, const ichar *decl, const ichar *decl0)
p->buffer = new_icharbuf();

for( ; *s; s++ )
{ if ( isee_func(dtd, s, CF_DSC) && --grouplevel == 0 )
{ if ( isee_func(dtd, s, CF_LIT) || /* skip quoted strings */
isee_func(dtd, s, CF_LITA) )
{ ichar q = *s;

putchar_dtd_parser(p, *s++); /* pass open quote */

for( ; *s && *s != q; s++ )
putchar_dtd_parser(p, *s);

if ( *s == q ) /* pass closing quote */
putchar_dtd_parser(p, *s);
continue;
}

if ( isee_func(dtd, s, CF_DSO) )
grouplevel++;
else if ( isee_func(dtd, s, CF_DSC) && --grouplevel == 0 )
break;
putchar_dtd_parser(p, *s);
}
Expand Down Expand Up @@ -4317,7 +4333,7 @@ process_utf8(dtd_parser *p, int chr)
;
mask--; /* 0x20 --> 0x1f */

p->saved_state = p->state; /* state to return to */
p->utf8_saved_state = p->state; /* state to return to */
p->state = S_UTF8;
p->utf8_char = chr & mask;
p->utf8_left = bytes;
Expand Down Expand Up @@ -4705,16 +4721,18 @@ putchar_dtd_parser(dtd_parser *p, int chr)

add_icharbuf(p->buffer, chr);

if ( f[CF_LIT] == chr )
if ( f[CF_LIT] == chr ) /* " */
{ p->state = S_STRING;
p->saved = chr;
} else if ( f[CF_LITA] == chr )
p->lit_saved_state = S_DECL;
} else if ( f[CF_LITA] == chr ) /* ' */
{ p->state = S_STRING;
p->saved = chr;
p->lit_saved_state = S_DECL;
return;
} else if ( f[CF_CMT] == chr )
} else if ( f[CF_CMT] == chr ) /* - */
{ p->state = S_DECLCMT0;
} else if ( f[CF_DSO] == chr ) /* [: marked section */
} else if ( f[CF_DSO] == chr ) /* [: marked section */
{ terminate_icharbuf(p->buffer);

process_marked_section(p);
Expand Down Expand Up @@ -4772,7 +4790,7 @@ putchar_dtd_parser(dtd_parser *p, int chr)
case S_STRING:
{ add_icharbuf(p->buffer, chr);
if ( chr == p->saved )
p->state = S_DECL;
p->state = p->lit_saved_state;
break;
}
case S_CMTO: /* Seen <!- */
Expand Down Expand Up @@ -4809,13 +4827,22 @@ putchar_dtd_parser(dtd_parser *p, int chr)
p->state = S_CMT;
break;
}
case S_GROUP:
case S_GROUP: /* [...] in declaration */
{ add_icharbuf(p->buffer, chr);
if ( f[CF_DSO] == chr )
p->grouplevel++;
else if ( f[CF_DSC] == chr )
{ p->grouplevel++;
} else if ( f[CF_DSC] == chr )
{ if ( --p->grouplevel == 0 )
p->state = S_DECL;
} else if ( f[CF_LIT] == chr ) /* " */
{ p->state = S_STRING;
p->saved = chr;
p->lit_saved_state = S_GROUP;
} else if ( f[CF_LITA] == chr ) /* ' */
{ p->state = S_STRING;
p->saved = chr;
p->lit_saved_state = S_GROUP;
return;
}
break;
}
Expand All @@ -4827,7 +4854,7 @@ putchar_dtd_parser(dtd_parser *p, int chr)
p->utf8_char |= (chr & ~0xc0);
if ( --p->utf8_left == 0 )
{ add_cdata(p, p->utf8_char); /* verbatim? */
p->state = p->saved_state;
p->state = p->utf8_saved_state;
}
}
#endif
Expand Down
3 changes: 2 additions & 1 deletion parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,13 +184,14 @@ typedef struct _dtd_parser
int etaglen; /* length of end-tag */
int grouplevel; /* [..] level in declaration */
int saved; /* saved character */
dtdstate lit_saved_state; /* literal saved-state */
dtd_char_encoding encoding; /* CDATA output character-set */
dtd_shortref *map; /* SHORTREF map */
#ifdef UTF8
int utf8_decode; /* decode UTF-8 sequences? */
int utf8_char; /* building character */
int utf8_left; /* bytes left */
dtdstate saved_state; /* state from which we come */
dtdstate utf8_saved_state; /* state from which we come */
#endif
dtd_srcloc location; /* Current location */
dtd_srcloc startloc; /* Start of last markup */
Expand Down

0 comments on commit ffd8cd4

Please sign in to comment.