-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbuild.xml
403 lines (375 loc) · 13.6 KB
/
build.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
<?xml version="1.0" encoding="UTF-8"?>
<project basedir="." name="ise2-import" default="all" xmlns:if="ant:if" xmlns:unless="ant:unless">
<description>ISE2->ISE3 conversion scripts</description>
<import file="lib/schematron/ant-macro.xml"/>
<taskdef
name="jing"
classname="com.thaiopensource.relaxng.util.JingTask"
classpath="lib/jing.jar"/>
<!--#########################################################################-->
<!--## Configuration ##-->
<property name="myID" value="${user.name}"/>
<property name="site" value="ise"/>
<property name="metadata" value="ise2/metadata"/>
<property name="taxonomies" value="${basedir}/ise3/taxonomies.xml"/>
<property name="orgography" value="${basedir}/ise3/orgography.xml"/>
<property name="personography" value="${basedir}/ise3/personography.xml"/>
<property name="xwiki-exporter" value="http://isebeta.uvic.ca/xwiki/bin/view/IseCode/PublishedPagePipe?xpage=plain"/>
<fileset id="src.iml.os" dir="ise2">
<include name="documents/iml/doc_*.txt"/>
<exclude name="documents/iml/doc_*M.txt"/>
<exclude name="documents/iml/doc_*Me.txt"/>
</fileset>
<fileset id="src.iml.modern" dir="ise2">
<include name="documents/iml/doc_*M.txt"/>
<include name="documents/iml/doc_*Me.txt"/>
</fileset>
<patternset id="src.xwiki">
<include name="ise2/documents/xwiki/*.xml"/>
</patternset>
<patternset id="src.apparatus">
<include name="ise2/annotations/*_annotation.xml"/>
<include name="ise2/collations/*_collation.xml"/>
</patternset>
<!--#########################################################################-->
<!--## Conversion tasks ##-->
<target name="compile">
<mkdir dir="classes"/>
<javac srcdir="java" destdir="classes" includeantruntime="false">
<classpath>
<pathelement location="lib/isetools.jar"/>
</classpath>
</javac>
</target>
<target name="iml-os" description="convert old-spelling IML texts" depends="compile">
<prepareiml filesRef="src.iml.os" to="work/os"/>
<imltotei workdir="work/os" modern="false"/>
</target>
<target name="iml-modern" description="convert modern IML texts" depends="compile">
<prepareiml filesRef="src.iml.modern" to="work/modern"/>
<imltotei workdir="work/modern" modern="true"/>
</target>
<target name="iml" description="convert all IML texts" depends="iml-os, iml-modern"/>
<target name="xwiki-src" description="fetch XWiki texts from the isebeta XWiki server">
<property name="src.urls" value="xwiki-urls-${site}.txt"/>
<dependset>
<sources>
<file file="${metadata}/collections/${site}.xml"/>
<fileset dir="${metadata}/editions" includes="edition_*.xml"/>
</sources>
<targetfilelist dir="work" files="${src.urls}"/>
</dependset>
<xsl style="xsl/xwiki-list-urls.xsl" destdir="work">
<include name="${metadata}/collections/${site}.xml"/>
<mergemapper to="${src.urls}"/>
<param name="xwikiBaseUrl" expression="${xwiki-exporter}"/>
</xsl>
<condition property="xwiki.todo">
<length file="work/${src.urls}" when="gt" length="0"/>
</condition>
<echo unless:true="${xwiki.todo}">No supplements found in this collection.</echo>
<get
if:true="${xwiki.todo}"
usetimestamp="yes"
tryGzipEncoding="yes"
dest="ise2/documents/xwiki"
ignoreerrors="yes"
>
<resourcelist>
<file file="work/${src.urls}"/>
</resourcelist>
<mapper type="regexp"
from="&page=texts.([^&]+).*&edition=([^&]+)"
to="\2_\1.xml"
/>
</get>
</target>
<target name="xwiki" description="convert XWiki texts">
<dependset>
<srcfilelist dir="xsl" files="xwiki-tei.xsl, global.xsl, util.xsl, metadata.xsl, ilink.xsl"/>
<targetfileset dir="ise3" includes="texts/*/supp/*.xml"/>
</dependset>
<xsl style="xsl/xwiki-tei.xsl" destdir="ise3">
<patternset refid="src.xwiki"/>
<regexpmapper
from="([^/_]+)_([^/]+\.xml)$$"
to="texts/\1/supp/ise\2"
/>
</xsl>
</target>
<target name="apparatus" description="convert annotations and collations">
<xsl style="xsl/apparatus.xsl" destdir="ise3">
<patternset refid="src.apparatus"/>
<regexpmapper
from="([^/_]+)_(.*)_(annotation|collation).xml$$"
to="texts/\1/app/ise\1_\2_\3s.xml"
/>
</xsl>
</target>
<target name="facsimiles" description="convert facsimiles">
<dependset>
<srcfilelist dir="xsl" files="facsimile.xsl, global.xsl, util.xsl, metadata.xsl, ilink.xsl"/>
<targetfileset dir="ise3/facsimiles" includes="*.xml"/>
</dependset>
<xsl style="xsl/facsimile.xsl" destdir="ise3">
<include name="${metadata}/copies/copy_*.xml"/>
<regexpmapper from="^.*/(copy_[^/]+\.xml)$$" to="facsimiles/\1"/>
</xsl>
</target>
<target name="editions" description="convert editions">
<dependset>
<srcfilelist dir="xsl" files="edition.xsl, global.xsl, util.xsl, metadata.xsl, ilink.xsl"/>
<targetfileset dir="ise3/texts" includes="*/supp/*_edition.xml"/>
</dependset>
<xsl style="xsl/edition.xsl" destdir="ise3">
<include name="${metadata}/editions/edition_*.xml"/>
<regexpmapper
from="^.*/edition_(.*)\.xml$$"
to="texts/\1/supp/ise\1_edition.xml"
/>
</xsl>
</target>
<!--#########################################################################-->
<!--## Validation tasks ##-->
<target name="get-tei-xsl">
<get
src="https://github.com/TEIC/Stylesheets/releases/download/v7.43.0/tei-xsl-7.43.0.zip"
dest="lib/tei-xsl-7.43.0.zip"
verbose="yes"
skipexisting="yes"
/>
<unzip src="lib/tei-xsl-7.43.0.zip" dest="lib/tei-xsl" overwrite="false"/>
</target>
<target name="compile-odd" depends="get-tei-xsl">
<xslt
style="lib/tei-xsl/xml/tei/stylesheet/odds/odd2odd.xsl"
in="sch/ise.odd"
out="sch/ise-expanded.odd">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
</xslt>
<xslt
style="lib/tei-xsl/xml/tei/stylesheet/profiles/default/relaxng/to.xsl"
in="sch/ise-expanded.odd"
out="sch/ise.rng">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
</xslt>
<xslt
style="lib/tei-xsl/xml/tei/stylesheet/profiles/default/schematron/to.xsl"
in="sch/ise.odd"
out="sch/ise.sch">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
</xslt>
</target>
<target name="validate" depends="compile-odd">
<jing rngfile="sch/ise.rng">
<fileset dir="${basedir}/ise3">
<exclude name="${taxonomies}"/>
</fileset>
</jing>
<schematron schema="sch/ise.sch">
<fileset dir="${basedir}/ise3">
<exclude name="${taxonomies}"/>
</fileset>
</schematron>
</target>
<!--#########################################################################-->
<!--## Standard targets ##-->
<target
name="all"
description="convert all available data"
depends="iml, xwiki, apparatus, facsimiles, editions, validate"/>
<target name="clean">
<delete dir="ise3/facsimiles"/>
<delete dir="ise3/texts"/>
<delete dir="work"/>
<delete dir="classes"/>
</target>
<!--#########################################################################-->
<!--## Macros ##-->
<!-- shortcut for saxon transform with common input parameters -->
<macrodef name="xsl">
<attribute name="style"/>
<attribute name="destdir"/>
<element name="inout" implicit="yes"/>
<sequential>
<xslt style="@{style}" destdir="@{destdir}">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
<inout/>
<param name="runBy" expression="${myID}"/>
<param name="site" expression="${site}"/>
<param name="metadataPath" expression="${basedir}/${metadata}"/>
<param name="personographyPath" expression="${personography}"/>
<param name="orgographyPath" expression="${orgography}"/>
<param name="taxonomiesPath" expression="${taxonomies}"/>
</xslt>
</sequential>
</macrodef>
<!-- IML preprocessing (before TEI conversion) -->
<property name="rex.anyAttrs"
value='((?:\s+[^>\s]+\s*=\s*(?:"[^"]*"|'[^']*'))*)'/>
<macrodef name="prepareiml">
<attribute name="filesRef"/>
<attribute name="to"/>
<sequential>
<local name="files"/>
<pathconvert property="files" refid="@{filesRef}" pathsep=" "/>
<!-- use isetools to validate and expand curly-escapes -->
<java classname="ise2import.ValidateAndExpand" failonerror="yes" fork="yes">
<arg line="-o @{to}/raw ${files}"/>
<classpath>
<pathelement path="classes"/>
<pathelement location="lib/isetools.jar"/>
</classpath>
</java>
<!-- escape unicode characters (osx doesn't handle them well) -->
<xslt
style="xsl/escape-unicode.xsl"
basedir="@{to}/raw"
includes="*.txt"
destdir="@{to}/escaped"
extension=".txt"
>
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
</xslt>
<!-- preprocess some tagging known to trip up osx -->
<copy todir="@{to}/fixup">
<fileset dir="@{to}/escaped" includes="*.txt"/>
<flattenmapper/>
<filterchain>
<tokenfilter>
<!-- remove whitespace at beginning and end of tags -->
<replaceregex pattern="<\s*" replace="<" flags="g"/>
<replaceregex pattern="\s*>" replace=">" flags="g"/>
<!-- change several tags to milestones -->
<replaceregex
pattern="<(PAGE|COL|MODE)${rex.anyAttrs}>"
replace="<\1\2/>"
flags="gi"/>
<replaceregex
pattern="</MODE>"
replace='<MODE T="end"/>'
flags="gi"/>
<replaceregex
pattern="</(PAGE|COL)>"
replace=""
flags="gi"/>
<replaceregex
pattern="<(SUB|SUP)>"
replace='<VERTAL SWITCH="ON" T="\1"/>'
flags="gi"/>
<replaceregex
pattern="</(SUB|SUP)>"
replace='<VERTAL SWITCH="OFF" T="\1"/>'
flags="gi"/>
<replaceregex
pattern="<(C|RA|J)>"
replace='<HORZAL SWITCH="ON" T="\1"/>'
flags="gi"/>
<replaceregex
pattern="</(C|RA|J)>"
replace='<HORZAL SWITCH="OFF" T="\1"/>'
flags="gi"/>
<replaceregex
pattern="<(BLL|I|R)>"
replace='<STYLE SWITCH="ON" T="\1"/>'
flags="gi"/>
<replaceregex
pattern="</(BLL|I|R)>"
replace='<STYLE SWITCH="OFF" T="\1"/>'
flags="gi"/>
<replaceregex
pattern="<LS>"
replace='<LS SWITCH="ON"/>'
flags="gi"/>
<replaceregex
pattern="</LS>"
replace='<LS SWITCH="OFF"/>'
flags="gi"/>
<replaceregex
pattern="<SC>"
replace='<SC SWITCH="ON"/>'
flags="gi"/>
<replaceregex
pattern="</SC>"
replace='<SC SWITCH="OFF"/>'
flags="gi"/>
<replaceregex
pattern='<FONT\s+SIZE\s*=\s*("\s*\d+\s*"|'\s*\d+\s*')>'
replace='<FONT SWITCH="ON" SIZE=\1/>'
flags="gi"/>
<replaceregex
pattern='</FONT>'
replace='<FONT SWITCH="OFF"/>'
flags="gi"/>
<replaceregex
pattern='<INDENT\s+L\s*=\s*("\s*\d+\s*"|'\s*\d+\s*')>'
replace='<INDENT SWITCH="ON" N=\1/>'
flags="gi"/>
<replaceregex
pattern='</INDENT>'
replace='<INDENT SWITCH="OFF"/>'
flags="gi"/>
<!-- change self-closing tags to explicitly-closed tags -->
<replaceregex
pattern="<([A-Z0-9]+)(${rex.anyAttrs})\s*/>"
replace="<\1\2></\1>"
flags="gi"/>
</tokenfilter>
<tokenfilter>
<filetokenizer/>
<!-- remove whitespace at end of file -->
<replaceregex pattern="\s*$$" replace=""/>
<!-- insert explicit editor line breaks -->
<replaceregex
pattern="\s*\n+"
replace="<lb ed='this'></lb>"
flags="g"/>
<replaceregex pattern="^" replace="<!DOCTYPE WORK>"/>
</tokenfilter>
</filterchain>
</copy>
<!-- convert to XML using osx -->
<mkdir dir="@{to}/osx"/> <!-- redirector fails if dir doesn't exist already -->
<apply executable="osx" dest="@{to}/osx" timeout="2000" skipemptyfilesets="yes" failonerror="yes">
<arg value="-E100"/>
<arg value="-wno-valid"/>
<srcFile/>
<fileset dir="@{to}/fixup" includes="*.txt"/>
<globmapper from="*.txt" to="*.xml"/>
<redirector createemptyfiles="no">
<outputmapper type="glob" from="*.txt" to="@{to}/osx/*.xml"/>
<errormapper type="glob" from="*" to="@{to}/osx/*-errors"/>
</redirector>
</apply>
</sequential>
</macrodef>
<macrodef name="imltotei">
<attribute name="workdir"/>
<attribute name="modern"/>
<sequential>
<dependset>
<srcfilelist dir="xsl" files="iml-tei-step1.xsl, global.xsl, util.xsl, metadata.xsl"/>
<targetfileset dir="@{workdir}/tei-step1"/>
</dependset>
<xsl style="xsl/iml-tei-step1.xsl" destdir="@{workdir}/tei-step1">
<include name="@{workdir}/osx/*.xml"/>
<regexpmapper from="doc_(.+)_(.+)\.xml" to="${site}\1_\2.xml"/>
<param name="modern" type="BOOLEAN" expression="@{modern}"/>
</xsl>
<xsl style="xsl/iml-tei-step2.xsl" destdir="@{workdir}/tei-step2">
<include name="@{workdir}/tei-step1/*.xml"/>
<flattenmapper/>
</xsl>
<xsl style="xsl/iml-tei-step3.xsl" destdir="@{workdir}/tei-step3">
<include name="@{workdir}/tei-step2/*.xml"/>
<flattenmapper/>
<param name="modern" type="BOOLEAN" expression="@{modern}"/>
</xsl>
<xsl style="xsl/iml-tei-step4.xsl" destdir="ise3">
<include name="@{workdir}/tei-step3/*.xml"/>
<regexpmapper from="${site}(.+)_(.+)\.xml" to="texts/\1/main/\0"/>
<param name="modern" type="BOOLEAN" expression="@{modern}"/>
</xsl>
</sequential>
</macrodef>
</project>