Skip to content

Commit

Permalink
* Added pre-processing to insert proper CR/LF in the stream
Browse files Browse the repository at this point in the history
  • Loading branch information
Jan Wielemaker committed May 24, 2001
1 parent 96d4fcb commit 201cd41
Show file tree
Hide file tree
Showing 9 changed files with 141 additions and 27 deletions.
2 changes: 1 addition & 1 deletion Test/defent.sgml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
]>

<def>&#65
&#66 &#67
&#66 &#67@ &#68
&b
&#66
&c</def>
3 changes: 2 additions & 1 deletion Test/ok/defent.ok
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
[element(def, [], ['AB C', sdata(?), 'B', sdata(?)])].
[element(def, [], ['AB C@ D', sdata(?), 'B', sdata(?)])].
[].
6 changes: 5 additions & 1 deletion Test/test.pl
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,11 @@
-> load_prolog_file(OkFile, TermOk, ErrorsOk),
( compare_dom(Term, TermOk)
-> format('ok')
; format('WRONG')
; format('WRONG'),
format('~NOK:~n'),
pp(TermOk),
format('~NANSWER:~n'),
pp(Term)
),
error_terms(Errors),
( compare_errors(Errors, ErrorsOk)
Expand Down
43 changes: 33 additions & 10 deletions parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -344,18 +344,28 @@ entity_file(dtd *dtd, dtd_entity *e)
}


/* Process long entities by reading the file.
static long
size_file(const char *path)
{ struct stat buf;
if ( stat(path, &buf) == -1 )
return -1;
return buf.st_size;
}
*/


static const ichar *
entity_value(dtd_parser *p, dtd_entity *e, int *len)
{ const char *file;

if ( e->value )
{ if ( len )
*len = e->length;
return e->value;
if ( !e->value && (file=entity_file(p->dtd, e)) )
{ int normalise = (e->content == EC_SGML || e->content == EC_CDATA);

e->value = load_sgml_file_to_charp(file, normalise, &e->length);
}

if ( (file=entity_file(p->dtd, e)) )
e->value = load_file_to_charp(file, &e->length);

if ( len )
*len = e->length;
Expand Down Expand Up @@ -2844,14 +2854,18 @@ get_attribute_value(dtd_parser *p, ichar const *decl, sgml_attribute *att)
{ if (att->definition->type == AT_CDATA)
{ int hasent = FALSE;
ichar const ero = dtd->charfunc->func[CF_ERO]; /* & */
ichar *q;

for (d = tmp; *d; d++)
{ if (HasClass(dtd, *d, CH_BLANK))
for (d = q = tmp; *d; *q++ = *d++)
{ if ( d[0] == CR && d[1] == LF )
d++;
if (HasClass(dtd, *d, CH_BLANK))
{ *d = ' '; /* map all blanks to spaces */
} else if (*d == ero)
{ hasent = TRUE; /* notice char/entity references */
}
}
*q = '\0';
if (hasent)
{ expand_entities(p, tmp, cdata, MAXSTRINGLEN);
buf = (ichar *) cdata;
Expand Down Expand Up @@ -4447,7 +4461,16 @@ putchar_dtd_parser(dtd_parser *p, int chr)
}
empty_icharbuf(p->buffer);

if ( f[CF_ERC] != chr && chr != '\n' )
if ( chr == CR )
p->state = S_ENTCR;
else if ( f[CF_ERC] != chr )
goto reprocess;

break;
}
case S_ENTCR: /* seen &entCR, eat the LF */
{ p->state = p->cdata_state;
if ( chr != LF )
goto reprocess;

break;
Expand Down
1 change: 1 addition & 0 deletions parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ typedef enum
S_PENT, /* Seen % */
S_ENT0, /* Seen & */
S_ENT, /* Seen &(#|\w) */
S_ENTCR, /* Seen &entity<CR> */
S_SHORTTAG_CDATA /* Seen <tag/ */
} dtdstate;

Expand Down
50 changes: 44 additions & 6 deletions sgml2pl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1516,26 +1516,64 @@ pl_sgml_parse(term_t parser, term_t options)
return sgml2pl_error(ERR_TYPE, "list", tail);

/* Parsing input from a stream */
#define CHECKERROR \
if ( pd->errors > pd->max_errors && pd->max_errors >= 0 ) \
return sgml2pl_error(ERR_LIMIT, "max_errors", (long)pd->max_errors);

if ( in )
{ int chr;
{ int p0, p1;

if ( !recursive )
{ pd->source = in;
begin_document_dtd_parser(p);
}
while( (chr = Sgetc(in)) != EOF && content_length-- != 0 )
{ putchar_dtd_parser(p, chr);

if ( pd->errors > pd->max_errors && pd->max_errors >= 0 )
return sgml2pl_error(ERR_LIMIT, "max_errors", (long)pd->max_errors);
if ( content_length-- == 0 || (p0 = Sgetc(in)) == EOF )
goto out;
if ( content_length-- == 0 || (p1 = Sgetc(in)) == EOF )
{ putchar_dtd_parser(p, p0);
goto end;
}

for(;;) /* perform newline handling */
{ int p2;

if ( content_length-- == 0 || (p2 = Sgetc(in)) == EOF )
{ putchar_dtd_parser(p, p0);
if ( p1 != LF )
putchar_dtd_parser(p, p1);

break;
} else if ( p2 == LF )
{ if ( p1 != CR )
{ putchar_dtd_parser(p, p0);
if ( pd->stopped )
{ p2 = CR;
goto stopped;
}
p0 = p1;
p1 = CR;
}
}

putchar_dtd_parser(p, p0);
CHECKERROR;
if ( pd->stopped )
{ pd->stopped = FALSE;
{ stopped:
pd->stopped = FALSE;
reset_document_dtd_parser(p); /* ensure a clean start */
Sungetc(p2, in);
Sungetc(p1, in);
goto out;
}
p0 = p1;
p1 = p2;
}

end:
if ( !recursive )
end_document_dtd_parser(p);
CHECKERROR;

out:
reset_url_cache();
Expand Down
5 changes: 3 additions & 2 deletions sgmldefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,14 @@ typedef unsigned char ichar; /* input character */
#define INPUT_CHARSET_SIZE (1<<(sizeof(ichar)*8))
#define OUTPUT_CHARSET_SIZE (1<<(sizeof(ochar)*8))
#define SYMBOLHASHSIZE 256
#define MAXSTRINGLEN 2048
#define MAXSTRINGLEN 2048
#define MAXNMLEN 256
#define MAXDECL 10240
#define MAXDECL 10240
#define MAXATTELEM 256 /* #elements in one ATTLIST */
#define MAXNAMEGROUP 256 /* #names in a (group) */
#define MAXATTRIBUTES 256 /* attributes per element */
#define MAXMAPLEN 32 /* max sequence length for SHORTREF */
#define SHORTENTITYFILE 100 /* short external entities in mem */


/*******************************
Expand Down
55 changes: 50 additions & 5 deletions util.c
Original file line number Diff line number Diff line change
Expand Up @@ -413,11 +413,21 @@ str_summary(char const *s, int len)
* FILES *
*******************************/

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Load a file into memory. This would be so easy if we didn't had to deal
with &#RE/&#RS handling that forces us to create the proper record start
and end.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

#ifndef O_BINARY
#define O_BINARY 0
#endif

ichar *
load_file_to_charp(const char *file, int *length)
load_sgml_file_to_charp(const char *file, int normalise_rsre, int *length)
{ int fd;

if ( (fd = open(file, O_RDONLY)) >= 0 )
if ( (fd = open(file, O_RDONLY|O_BINARY)) >= 0 )
{ struct stat buf;

if ( fstat(fd, &buf) == 0 )
Expand All @@ -440,11 +450,46 @@ load_file_to_charp(const char *file, int *length)
s += n;
}

if ( length )
*length = s-r;

len = s-r;
*s = '\0'; /* ensure closing EOS */
close(fd);

if ( normalise_rsre )
{ int nl;
int last_is_lf;

last_is_lf = (s > 0 && s[-1] == '\n');

for(s=r, nl=0; *s; s++)
{ if ( *s == '\n' && s>r && s[-1] != '\r' )
nl++;
}

if ( nl > 0 )
{ char *r2 = sgml_malloc(len+nl+1);
char *t;

for(s=r, t=r2; *s; s++)
{ if ( *s == '\n' )
{ if ( s>r && s[-1] != '\r' )
*t++ = CR;
*t++ = LF;
} else
*t++ = *s;
}
len = t-r2;
*t = '\0';
sgml_free(r);
r = r2;
}

if ( last_is_lf )
r[--len] = '\0'; /* delete last LF */
}

if ( length )
*length = len;

return (ichar *)r;
}
}
Expand Down
3 changes: 2 additions & 1 deletion util.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ void empty_ocharbuf(ocharbuf *buf);
const char * str_summary(const char *s, int len);
char * str2ring(const char *in);
char * ringallo(size_t);
ichar * load_file_to_charp(const char *file, int *len);
ichar * load_sgml_file_to_charp(const char *file, int normalise_rsre,
int *len);

#if defined(USE_STRING_FUNCTIONS) && !defined(UTIL_H_IMPLEMENTATION)

Expand Down

0 comments on commit 201cd41

Please sign in to comment.