3
3
# NOTE: Expects ISSN_LANG and UNIX_LANG to be defined
4
4
OUTPUT_FILE=" jabbrv-ltwa-${UNIX_LANG} .ldf" ;
5
5
6
- # Below is a rather large list of LaTeX replacements for UTF-8 Characters,
7
- # note that only characters found in the LTWA database are included below
6
+ # Below is a list of LaTeX replacements for UTF-8 combining diacritical marks and unusual symbols.
7
+ # Note that only characters found in the LTWA database are included below
8
8
# (to save processing time for my poor computer)
9
9
# \` \' \^ \~ \=
10
10
REPLACECHAR=(' \(.\)\xCC\x80' ' \(.\)\xCC\x81' ' \(.\)\xCC\x82' ' \(.\)\xCC\x83' ' \(.\)\xCC\x84' \
11
+ # \.
12
+ ' \(.\)\xCC\x87' \
11
13
# \" \v \J@C \c
12
14
' \(.\)\xCC\x88' ' \(.\)\xCC\x8C' ' \(.\)\xCC\xA1' ' \(.\)\xCC\xA7' \
13
- # \=a \"a \^A
14
- ' ā' ' ä' ' Â' \
15
- # \'e
16
- ' é' \
17
- # \`i \'i
18
- ' ì' ' í' \
19
- # \'o \oe \"o
20
- ' ó' ' œ' ' ö' \
21
- # \'u \^u \"u
22
- ' ú' ' û' ' ü' \
23
- # \v s
24
- ' š' \
15
+ # \oe
16
+ ' œ' \
25
17
' ʹ' );
26
18
REPLACEMENT=(' \\`\1' " \\\\ '\1" ' \\^\1' ' \\~\1' ' \\=\1' \
19
+ ' \\.\1' \
27
20
' \\"\1' ' \\v \1' ' \\J@C \1' ' \\c \1' \
28
- ' \\=a' ' \\"a' ' \\^A' \
29
- " \\\\ 'e" \
30
- ' \\`i' " \\\\ 'i" \
31
- " \\\\ 'o" ' \\oe ' ' \\"o' \
32
- " \\\\ 'u" ' \\^u' ' \\"u' \
33
- ' \\v s' \
21
+ ' \\oe ' \
34
22
" '" );
35
23
REPLACE_RULES=" " ;
36
24
MAXRULES=$(( ${# REPLACECHAR[@]} - 1 )) ;
37
25
for J in ` seq 0 ${MAXRULES} ` ; do
38
26
REPLACE_RULES=" ${REPLACE_RULES} ;s/${REPLACECHAR[$J]} /${REPLACEMENT[$J]} /g" ;
39
27
done
40
- # REPLACE_RULES="${REPLACE_RULES:1}";
41
- # For testing:
42
- # echo "${REPLACE_RULES}";
43
- # TEST="<td>èlement-<td>elem.<td>rus,fre,eng<td>";
44
- # TEST=`echo "${TEST}" | awk 'BEGIN { FS = "<td>" } ; { print $2 }'`;
45
- # TEST="èpidemiolog- ébénisterie";
46
- # TEST=`echo "${TEST:0:1}" | tr a-z A-Z`"${TEST:1}";
47
- # echo "${TEST}" | sed -e "${REPLACE_RULES:1}";
48
- # exit 0;
49
28
50
- HEADER=" %% Copyright 2010 Erich Hoover
51
-
29
+ REPLACE_ODD=" " ;
30
+ # almost the entire LTWA uses "combining" diacritical marks, except for limited instances of:
31
+ REPLACE_ODD=" ${REPLACE_ODD} ;s/Â/A\xCC\x82/g" ; # capital A with circumflex (Â)
32
+ REPLACE_ODD=" ${REPLACE_ODD} ;s/ā/a\xCC\x84/g" ; # lowercase a with overline (ā)
33
+ REPLACE_ODD=" ${REPLACE_ODD} ;s/ä/a\xCC\x88/g" ; # lowercase a with umlauts (ä)
34
+ REPLACE_ODD=" ${REPLACE_ODD} ;s/è/e\xCC\x80/g" ; # lowercase e with backtick (è)
35
+ REPLACE_ODD=" ${REPLACE_ODD} ;s/é/e\xCC\x81/g" ; # lowercase e with forward tick (é)
36
+ REPLACE_ODD=" ${REPLACE_ODD} ;s/ì/i\xCC\x80/g" ; # lowercase i with backtick (ì)
37
+ REPLACE_ODD=" ${REPLACE_ODD} ;s/í/i\xCC\x81/g" ; # lowercase i with forward tick (í)
38
+ REPLACE_ODD=" ${REPLACE_ODD} ;s/Ö/O\xCC\x88/g" ; # capital O with umlauts (Ö)
39
+ REPLACE_ODD=" ${REPLACE_ODD} ;s/ó/o\xCC\x81/g" ; # lowercase o with forward tick (ó)
40
+ REPLACE_ODD=" ${REPLACE_ODD} ;s/ö/o\xCC\x88/g" ; # lowercase o with umlauts (ö)
41
+ REPLACE_ODD=" ${REPLACE_ODD} ;s/ú/u\xCC\x81/g" ; # lowercase u with forward tick (ú)
42
+ REPLACE_ODD=" ${REPLACE_ODD} ;s/û/u\xCC\x82/g" ; # lowercase u with circumflex (û)
43
+ REPLACE_ODD=" ${REPLACE_ODD} ;s/ü/u\xCC\x88/g" ; # lowercase u with umlauts (ü)
44
+ REPLACE_ODD=" ${REPLACE_ODD} ;s/š/s\xCC\x8C/g" ; # lowercase s with caron (š)
45
+
46
+ # remove all the "Not Applicable" entries from the list
47
+ REPLACE_NA=" /.*\tn.a.\t.*/d" ;
48
+
49
+ # remove all the entries that start with a dash or a single quote
50
+ REPLACE_NONLETTER=" /^[-']/d" ;
51
+
52
+ HEADER=" %% Copyright 2010-2019 Erich E. Hoover
53
+
52
54
%%
53
55
%% =============================================
54
56
%% IMPORTANT NOTICE:
@@ -60,23 +62,32 @@ HEADER="%% Copyright 2010 Erich Hoover
60
62
%% http://www.latex-project.org/lppl.txt
61
63
%% =============================================
62
64
%% The List of Title Word Abbreviations below is automatically
63
- %% generatedfrom the ISSN LTWA database, publicly accessible from
65
+ %% generated from the ISSN LTWA database, publicly accessible from
64
66
%% their website:
65
67
%% http://www.issn.org/2-22660-LTWA.php
66
68
" ;
67
69
echo " ${HEADER} " > ${OUTPUT_FILE} ;
68
- ENTRIES=` cat lang_data.txt` ;
70
+ ENTRIES=$( cat lang_data.txt | sed -e " ${REPLACE_ODD} ;${REPLACE_NA} ;${REPLACE_NONLETTER} " ) ;
71
+ I=0;
72
+ export IFS=$' \r\n '
69
73
for ENTRY in ${ENTRIES} ; do
74
+ I=$(( I+ 1 )) ;
75
+ if [ " ${I} " -eq " 1" ]; then continue ; fi
70
76
# Remove punctuation:
71
77
ENTRY=` echo " ${ENTRY} " | sed ' s/\.//g' ` ;
72
- # Pull out the applicable languages for testing:
73
- LANGS=` echo " ${ENTRY} " | awk ' BEGIN { FS = "<td>" } ; { print $4 }' ` ;
78
+ # Pull out the applicable languages, title, and abbreviation:
79
+ OLDIFS=${IFS}
80
+ export IFS=$' \t '
81
+ while [ 1 ]; do
82
+ read TITLE ABBRV LANGS;
83
+ break ;
84
+ done < <( echo " ${ENTRY} " )
85
+ export IFS=${OLDIFS}
74
86
# See if one of the languages is the one we're interested in outputting
87
+ OLDIFS=${IFS}
88
+ export IFS=' '
75
89
for ELANG in ` echo " ${LANGS} " | sed ' s/,/\ /g' ` ; do
76
90
if [ " ${ELANG} " = " ${ISSN_LANG} " ]; then
77
- # Pull out the title and abbreviation:
78
- TITLE=` echo " ${ENTRY} " | awk ' BEGIN { FS = "<td>" } ; { print $2 }' ` ;
79
- ABBRV=` echo " ${ENTRY} " | awk ' BEGIN { FS = "<td>" } ; { print $3 }' ` ;
80
91
# Capitalize the first letter of the title and the abbreviation"
81
92
TITLE=` echo " ${TITLE: 0: 1} " | tr a-z A-Z` " ${TITLE: 1} " ;
82
93
ABBRV=` echo " ${ABBRV: 0: 1} " | tr a-z A-Z` " ${ABBRV: 1} " ;
@@ -96,5 +107,6 @@ for ENTRY in ${ENTRIES}; do
96
107
break ;
97
108
fi
98
109
done
110
+ export IFS=${OLDIFS}
99
111
done
100
112
0 commit comments