Skip to content

Commit

Permalink
Merge branch 'master' of git://www.swi-prolog.org/home/pl/git/package…
Browse files Browse the repository at this point in the history
…s/sgml

Conflicts:
	sgml.pl
  • Loading branch information
vscosta committed Nov 11, 2013
2 parents 18647f7 + 21401c4 commit 22db6b0
Show file tree
Hide file tree
Showing 18 changed files with 1,453 additions and 256 deletions.
892 changes: 892 additions & 0 deletions DTD/HTML5.dtd

Large diffs are not rendered by default.

7 changes: 4 additions & 3 deletions Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ DOC=sgml
include ../Makefile.defs
LIBS=@LIBS@
CFLAGS+= -I.
# COFLAGS=-gdwarf-2 -g3

LIBOBJ= parser.o util.o charmap.o catalog.o model.o xmlns.o utf8.o \
xml_unicode.o
Expand All @@ -28,7 +29,7 @@ LIBPL= sgml.pl xsdp_types.pl iso_639.pl sgml_write.pl xpath.pl \
LIBSRCPL=$(addprefix $(srcdir)/, $(LIBPL))
TARGETS= sgml2pl.@SO@
PROGRAMS= dtd2pl$(EXEEXT) sgml$(EXEEXT)
DTDFILES= HTML4.dcl HTML4.dtd HTML4.soc \
DTDFILES= HTML4.dcl HTML4.dtd HTML4.soc HTML5.dtd \
HTMLlat1.ent HTMLspec.ent HTMLsym.ent

all: $(TARGETS) $(PROGRAMS)
Expand Down Expand Up @@ -64,8 +65,8 @@ html-install::
$(INSTALL_DATA) sgml.html $(DESTDIR)$(PKGDOC)

check::
$(PL) -f Test/test.pl -g test,halt -t 'halt(1)'
$(PL) -f Test/wrtest.pl -g test,halt -t 'halt(1)'
$(PL) -f Test/test.pl -q -g test,halt -t 'halt(1)'
$(PL) -f Test/wrtest.pl -q -g test,halt -t 'halt(1)'

uninstall::
(cd $(PLBASE)/$(SOLIB)/$(INSTALL_PLARCH) && rm -f $(TARGETS))
Expand Down
4 changes: 2 additions & 2 deletions Test/ok/badxmlent.ok
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[element(test, [a='John & Mary'], ['\n John & Mary\n'])].
[sgml(sgml_parser(1949540), 'badxmlent.xml', 3, 'Syntax error: Illegal entity, found "& Mary""'), sgml(sgml_parser(1949540), 'badxmlent.xml', 4, 'Syntax error: Illegal entity, found "& "')].
[element(test,[a='John & Mary'],['\n John & Mary\n'])].
[sgml(sgml_parser(955504),'badxmlent.xml',3,'Illegal entity, found "& Mary""'),sgml(sgml_parser(955504),'badxmlent.xml',4,'Illegal entity, found "& "')].
4 changes: 2 additions & 2 deletions Test/ok/bat.ok
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[element(bat, [], [element(x, [a=foo], []), element(x, [a=bar], []), element(x, [a='foo&bar'], []), element(x, [a='file.cgi?y=1'], []), element(x, [b=en], []), element(x, [b=en], []), element(x, [b='en en'], []), element(x, [c=[en]], []), element(x, [c=[en]], []), element(x, [c=[en, en]], []), element(x, [c=[un]], []), element(x, [c=['12']], []), element(x, [d='an-id'], []), element(x, [d='an*id'], []), element(x, [d='*id*'], []), element(x, [d='an id'], []), element(x, [e='an-id'], []), element(x, [e='un-id'], []), element(x, [f=['']], []), element(x, [f=['an-id']], []), element(x, [f=['an-id', 'an-id']], []), element(x, [g='1'], []), element(x, [g=''], []), element(x, [g='a-rather-long-name'], []), element(x, [g='a%name%with%percents'], []), element(x, [g='a name'], []), element(x, [g='a-name'], []), element(x, [h=['']], []), element(x, [h=[a]], []), element(x, [h=[name]], []), element(x, [h=[a, name]], []), element(x, [k='1'], []), element(x, [k='999999999999999999999999999999999999999999999'], []), element(x, [k=0], []), element(x, [k=0], []), element(x, [k=0], []), element(x, [n=[one, two]], []), element(x, [n=['1a', '2a']], []), element(x, [n=['1*ft', '2*in']], []), element(x, [o=no], []), element(x, [o=un], []), element(x, [p='--a--'], []), element(x, [p='--b--'], []), element(x, [p=' --a-- '], [])])].
[sgml(sgml_parser(423746), 'bat.sgml', 27, 'Syntax error: Attribute value requires quotes, found "foo&bar"'), sgml(sgml_parser(423746), 'bat.sgml', 28, 'Syntax error: Attribute value requires quotes, found "file.cgi?y=1"'), sgml(sgml_parser(423746), 'bat.sgml', 30, 'Element "x" has no attribute with value "en"'), sgml(sgml_parser(423746), 'bat.sgml', 30, 'Syntax error: Bad attribute list, found "b=en en"'), sgml(sgml_parser(423746), 'bat.sgml', 33, 'Element "x" has no attribute with value "en"'), sgml(sgml_parser(423746), 'bat.sgml', 33, 'Syntax error: Bad attribute list, found "c=en en"'), sgml(sgml_parser(423746), 'bat.sgml', 36, 'Syntax error: entity NAMES expected, found "12"'), sgml(sgml_parser(423746), 'bat.sgml', 38, 'Syntax error: Attribute value requires quotes, found "an*id"'), sgml(sgml_parser(423746), 'bat.sgml', 38, 'Syntax error: NAME expected, found "an*id"'), sgml(sgml_parser(423746), 'bat.sgml', 39, 'Syntax error: Attribute value requires quotes, found "*id*"'), sgml(sgml_parser(423746), 'bat.sgml', 39, 'Syntax error: NAME expected, found "*id*"'), sgml(sgml_parser(423746), 'bat.sgml', 43, 'Syntax error: NAMES expected, found """"'), sgml(sgml_parser(423746), 'bat.sgml', 46, 'Syntax error: NAME expected, found "1"'), sgml(sgml_parser(423746), 'bat.sgml', 47, 'Syntax error: NAME expected, found "\'\'"'), sgml(sgml_parser(423746), 'bat.sgml', 49, 'Syntax error: Attribute value requires quotes, found "a%name%with%percents"'), sgml(sgml_parser(423746), 'bat.sgml', 49, 'Syntax error: NAME expected, found "a%name%with%percents"'), sgml(sgml_parser(423746), 'bat.sgml', 52, 'Syntax error: NAMES expected, found """"'), sgml(sgml_parser(423746), 'bat.sgml', 58, 'Syntax error: NUMBER expected, found "1.2"'), sgml(sgml_parser(423746), 'bat.sgml', 59, 'Syntax error: NUMBER expected, found ""1.2""'), sgml(sgml_parser(423746), 'bat.sgml', 60, 'Syntax error: NUMBER expected, found ""-1.2""'), sgml(sgml_parser(423746), 'bat.sgml', 61, 'Syntax error: NUTOKENS expected, found ""one two""'), sgml(sgml_parser(423746), 'bat.sgml', 63, 'Syntax error: NUTOKENS expected, found ""1*ft 2*in""'), sgml(sgml_parser(423746), 'bat.sgml', 66, 'Element "x" has no attribute "p"')].
[element(bat,[],[element(x,[a=foo],[]),element(x,[a=bar],[]),element(x,[a='foo&bar'],[]),element(x,[a='file.cgi?y=1'],[]),element(x,[b=en],[]),element(x,[b=en],[]),element(x,[b='en en'],[]),element(x,[c=[en]],[]),element(x,[c=[en]],[]),element(x,[c=[en,en]],[]),element(x,[c=[un]],[]),element(x,[c=['12']],[]),element(x,[d='an-id'],[]),element(x,[d='an*id'],[]),element(x,[d='*id*'],[]),element(x,[d='an id'],[]),element(x,[e='an-id'],[]),element(x,[e='un-id'],[]),element(x,[f=['']],[]),element(x,[f=['an-id']],[]),element(x,[f=['an-id','an-id']],[]),element(x,[g='1'],[]),element(x,[g=''],[]),element(x,[g='a-rather-long-name'],[]),element(x,[g='a%name%with%percents'],[]),element(x,[g='a name'],[]),element(x,[g='a-name'],[]),element(x,[h=['']],[]),element(x,[h=[a]],[]),element(x,[h=[name]],[]),element(x,[h=[a,name]],[]),element(x,[k='1'],[]),element(x,[k='999999999999999999999999999999999999999999999'],[]),element(x,[k=0],[]),element(x,[k=0],[]),element(x,[k=0],[]),element(x,[n=[one,two]],[]),element(x,[n=['1a','2a']],[]),element(x,[n=['1*ft','2*in']],[]),element(x,[o=no],[]),element(x,[o=un],[]),element(x,[p='--a--'],[]),element(x,[p='--b--'],[]),element(x,[p=' --a-- '],[])])].
[sgml(sgml_parser(960612),'bat.sgml',27,'Attribute value requires quotes, found "foo&bar"'),sgml(sgml_parser(960612),'bat.sgml',28,'Attribute value requires quotes, found "file.cgi?y=1"'),sgml(sgml_parser(960612),'bat.sgml',30,'Element "x" has no attribute with value "en"'),sgml(sgml_parser(960612),'bat.sgml',30,'Bad attribute list, found "b=en en"'),sgml(sgml_parser(960612),'bat.sgml',33,'Element "x" has no attribute with value "en"'),sgml(sgml_parser(960612),'bat.sgml',33,'Bad attribute list, found "c=en en"'),sgml(sgml_parser(960612),'bat.sgml',36,'entity NAMES expected, found "12"'),sgml(sgml_parser(960612),'bat.sgml',38,'Attribute value requires quotes, found "an*id"'),sgml(sgml_parser(960612),'bat.sgml',38,'NAME expected, found "an*id"'),sgml(sgml_parser(960612),'bat.sgml',39,'Attribute value requires quotes, found "*id*"'),sgml(sgml_parser(960612),'bat.sgml',39,'NAME expected, found "*id*"'),sgml(sgml_parser(960612),'bat.sgml',43,'NAMES expected, found """"'),sgml(sgml_parser(960612),'bat.sgml',46,'NAME expected, found "1"'),sgml(sgml_parser(960612),'bat.sgml',47,'NAME expected, found "\'\'"'),sgml(sgml_parser(960612),'bat.sgml',49,'Attribute value requires quotes, found "a%name%with%percents"'),sgml(sgml_parser(960612),'bat.sgml',49,'NAME expected, found "a%name%with%percents"'),sgml(sgml_parser(960612),'bat.sgml',52,'NAMES expected, found """"'),sgml(sgml_parser(960612),'bat.sgml',58,'NUMBER expected, found "1.2"'),sgml(sgml_parser(960612),'bat.sgml',59,'NUMBER expected, found ""1.2""'),sgml(sgml_parser(960612),'bat.sgml',60,'NUMBER expected, found ""-1.2""'),sgml(sgml_parser(960612),'bat.sgml',61,'NUTOKENS expected, found ""one two""'),sgml(sgml_parser(960612),'bat.sgml',63,'NUTOKENS expected, found ""1*ft 2*in""'),sgml(sgml_parser(960612),'bat.sgml',66,'Element "x" has no attribute "p"')].
4 changes: 2 additions & 2 deletions Test/ok/cent-nul.ok
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[element(test, [], ['This content holds a byte that should be skipped'])].
[sgml(sgml_parser(482992), 'cent-nul.xml', 1, 'Syntax error: Bad character entity, found "#0"')].
[element(test,[],['This content holds a byte that should be skipped'])].
[sgml(sgml_parser(660228),'cent-nul.xml',1,'Bad character entity, found "#0"')].
4 changes: 2 additions & 2 deletions Test/ok/comment.ok
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[element(test, [], ['\n \n \n \n \n \n'])].
[sgml(sgml_parser(1951880), 'comment.xml', 5, 'Syntax error: Illegal comment, found "<!---"'), sgml(sgml_parser(1951880), 'comment.xml', 6, 'Syntax error: Illegal comment'), sgml(sgml_parser(1951880), 'comment.xml', 8, 'Syntax error: Illegal comment')].
[element(test,[],['\n \n \n \n \n \n'])].
[sgml(sgml_parser(950996),'comment.xml',5,'Illegal comment, found "<!---"'),sgml(sgml_parser(950996),'comment.xml',6,'Illegal comment'),sgml(sgml_parser(950996),'comment.xml',8,'Illegal comment')].
49 changes: 33 additions & 16 deletions Test/test.pl
Original file line number Diff line number Diff line change
@@ -1,13 +1,31 @@
/* $Id$
Part of SWI-Prolog SGML/XML parser
Author: Jan Wielemaker
E-mail: [email protected]
WWW: http://www.swi.psy.uva.nl/projects/SWI-Prolog/
Copying: LGPL-2. See the file COPYING or http://www.gnu.org
Copyright (C) 1990-2000 SWI, University of Amsterdam. All rights reserved.
/* Part of SWI-Prolog
Author: Jan Wielemaker
E-mail: [email protected]
WWW: http://www.swi-prolog.org
Copyright (C): 2000-2013, University of Amsterdam
VU University Amsterdam
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, if you link this library with other files,
compiled with a Free Software compiler, to produce an executable, this
library does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/

:- module(sgml_test,
Expand Down Expand Up @@ -48,15 +66,15 @@
dotest(_).

test(File) :-
format('~NTest ~w ... ', [File]),
debug(sgml(test), 'Test ~w ... ', [File]),
flush_output,
load_file(File, Term),
ground(Term), % make sure
okfile(File, OkFile),
( exists_file(OkFile)
-> load_prolog_file(OkFile, TermOk, ErrorsOk),
( compare_dom(Term, TermOk)
-> format('ok')
-> true
; assert(failed(File)),
format('WRONG'),
format('~NOK:~n'),
Expand All @@ -73,8 +91,7 @@
pretty_print(ErrorsOk),
format('~NANSWER:~n'),
pretty_print(Errors)
),
nl
)
; show_errors,
format('Loaded, no validating data~n'),
pretty_print(Term)
Expand All @@ -99,9 +116,9 @@
findall(X, failed(X), L),
length(L, Len),
( Len > 0
-> format('~n*** ~w tests failed ***~n', [Len]),
-> format('~N*** ~w tests failed ***~n', [Len]),
fail
; format('~nAll tests passed~n', [])
; format('~NAll tests passed~n', [])
).

:- dynamic
Expand Down
65 changes: 46 additions & 19 deletions Test/wrtest.pl
Original file line number Diff line number Diff line change
@@ -1,13 +1,31 @@
/* $Id$
Part of SWI-Prolog SGML/XML parser
Author: Jan Wielemaker
E-mail: [email protected]
WWW: http://www.swi.psy.uva.nl/projects/SWI-Prolog/
Copying: LGPL-2. See the file COPYING or http://www.gnu.org
Copyright (C) 1990-2000 SWI, University of Amsterdam. All rights reserved.
/* Part of SWI-Prolog
Author: Jan Wielemaker
E-mail: [email protected]
WWW: http://www.swi-prolog.org
Copyright (C): 2005-2013, University of Amsterdam
VU University Amsterdam
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, if you link this library with other files,
compiled with a Free Software compiler, to produce an executable, this
library does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/

:- prolog_load_context(directory, CWD),
Expand Down Expand Up @@ -56,15 +74,15 @@
ml_file(Ext),
file_base_name(File, Base),
\+ blocked(Base),
format(user_error, '~w ... ', [Base]),
debug(sgml(test), '~w ... ', [Base]),
( \+ utf8(Base)
-> format(user_error, ' (ISO Latin-1) ... ', []),
-> debug(sgml(test), ' (ISO Latin-1) ... ', []),
fixed_point(File, iso_latin_1)
; true
),
format(user_error, ' (UTF-8) ... ', []),
debug(sgml(test), ' (UTF-8) ... ', []),
fixed_point(File, utf8),
format(user_error, ' done~n', []),
debug(sgml(test), ' done~n', []),
fail
; report_failed
).
Expand Down Expand Up @@ -99,9 +117,9 @@
findall(X, failed(X, _), L),
length(L, Len),
( Len > 0
-> format('~n*** ~w tests failed ***~n', [Len]),
-> format('~N*** ~w tests failed ***~n', [Len]),
fail
; format('~nAll tests passed~n', [])
; format('~NAll read/write roundtrip tests passed~n', [])
).


Expand All @@ -121,15 +139,24 @@
fp(File, Encoding, load_html_file, html_write).

fp(File, Encoding, Load, Write) :-
put_char(user_error, r),
( debugging(sgml(test))
-> put_char(user_error, r)
; true
),
call(Load, File, Term),
tmp_file(xml, TmpFile),
open(TmpFile, write, TmpOut, [encoding(Encoding)]),
put_char(user_error, w),
( debugging(sgml(test))
-> put_char(user_error, w)
; true
),
call(Write, TmpOut, Term, []),
close(TmpOut),
% cat(TmpFile, Encoding),
put_char(user_error, r),
( debugging(sgml(test))
-> put_char(user_error, r)
; true
),
call(Load, TmpFile, Term2),
delete_file(TmpFile),
( eq(Term, Term2)
Expand Down
18 changes: 12 additions & 6 deletions dtd.h
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
/* $Id$
Part of SWI-Prolog
/* Part of SWI-Prolog
Author: Jan Wielemaker
E-mail: [email protected].nl
E-mail: J.Wielemaker@vu.nl
WWW: http://www.swi-prolog.org
Copyright (C): 1985-2002, University of Amsterdam
Copyright (C): 1985-2013, University of Amsterdam
Vu University Amsterdam
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
Expand Down Expand Up @@ -158,13 +157,19 @@ typedef enum

typedef enum
{ DL_SGML, /* Use SGML */
DL_HTML, /* Pre-HTML5 */
DL_HTML5, /* HTML5 extensions of SGML */
DL_XML, /* Use XML */
DL_XMLNS /* Use XML + Namespaces */
} dtd_dialect;

#define IS_SGML_DIALECT(d) ((int)(d) <= (int)DL_HTML5)
#define IS_HTML_DIALECT(d) ((d) == DL_HTML || (d) == DL_HTML5)
#define IS_XML_DIALECT(d) ((int)(d) >= (int)DL_XML)

typedef enum
{ OPT_SHORTTAG /* do/don't accept shorttag */
{ OPT_SHORTTAG, /* do/don't accept shorttag */
OPT_CASE_SENSITIVE_ATTRIBUTES /* attribute values case(in)sensitive */
} dtd_option;


Expand Down Expand Up @@ -421,6 +426,7 @@ typedef struct _dtd
dtd_dialect dialect; /* DL_* */
int case_sensitive; /* Tags are case-sensitive */
int ent_case_sensitive; /* Entities are case-sensitive */
int att_case_sensitive; /* Att values are case-sensitive */
ichar *doctype; /* defined document type */
dtd_symbol_table *symbols; /* symbol-table */
dtd_entity *pentities; /* defined parameter entities */
Expand Down
Loading

0 comments on commit 22db6b0

Please sign in to comment.