-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathwiki-tests
executable file
·171 lines (152 loc) · 5.13 KB
/
wiki-tests
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
#!/bin/bash
set -e -u
######## Globals:
# You shouldn't have to change this unless the wiki gets a new host:
if [[ ! -f "$(dirname $0)/../config.log" ]]; then
echo "Can't find $(dirname $0)/../config.log, did you forget to run autogen.sh?" >&2
exit 1
fi
declare -r PACKAGE=$(grep ^PACKAGE= "$(dirname $0)/../config.log" |grep -o "apertium-[^']*") # e.g. apertium-sme-nob
declare -r BASEURL="http://wiki.apertium.org/wiki/${PACKAGE}"
# Workarounds for Mac's:
SED=sed
if [[ "$(uname -s)" = Darwin ]]; then
SED=gsed
fi
declare -r SED
######## Functions:
fetchtests () {
local -r update=$1
local -r testtype=$2
local -r html=$3
if ${update}; then
tmp=$(mktemp -t wiki-tests.html.XXXXXXXXXX)
if wget -O "${tmp}" -q "${BASEURL}/${testtype}" && [[ -s "${tmp}" ]]; then
mv "${tmp}" "${html}"
else
rm "${tmp}"
echo "Couldn't fetch ${BASEURL}/${testtype}" >&2
fi
fi
if [[ ! -s "${html}" ]]; then
echo "${html} does not exist or is empty (use '-u' option)" >&2
exit 1
fi
}
decodehtml() {
# Decodes the three HTML standard entities if they are part of the test,
# because even if they are not written as HTML entities in MediaWiki, they
# are rendered as such because we use the HTML rendering of the MediaWiki
# page as source.
sed -e "s/&lt;/</g" -e "s/&gt;/>/g" -e "s/&/\&/g"
}
cleantst () {
# Would use these, but printf on Mac doesn't support unicode
# names, so the below sed's use *literal* byte-order-marks and
# zero-width-spaces:
local bom=$(env printf "\uFEFF")
local zws=$(env printf "\u200B")
$SED 's/^ *//; s/ *$//; s/\([^,.?!:;]\)$/\1./g; s/ */ /g' \
| $SED -e "s///g" \
| $SED -e "s///g"
}
cleansrc () {
local -r srclang=$1 trglang=$2
local -r tdir="\\((${srclang})\\|(${srclang}-${trglang})\\)"
grep -o "<li> ${tdir}[^→]*" |$SED "s%</*[^>]*>%%g; s% *${tdir} *%%" | cleantst | decodehtml
}
cleantrg () {
local -r srclang=$1 trglang=$2
local -r tdir="\\((${srclang})\\|(${srclang}-${trglang})\\)"
grep -o "<li> ${tdir}.*" | $SED 's%[^→]*→ *%%; s%::.*%%' | cleantst | decodehtml
}
summary () {
local -r mode=$1 srclist=$2 trglist=$3 tstlist=$4 onlyfail=$5 onlypass=$6
# Output the MT vs ref translations:
local -i total=0
local -i correct=0
local -r sep=''
while IFS="${sep}" read -r src trg tst; do
if [[ "${trg}" = "${tst}" ]]; then
(( ++correct ))
$onlyfail || printf "%s\t %s\nWORKS\t %s\n\n\n" "${mode}" "${src}" "${tst}"
else
(( 1 ))
$onlypass || printf "%s\t %s\n\t- %s\n\t+ %s\n\n\n" "${mode}" "${src}" "${trg}" "${tst}"
fi
(( ++total ))
done < <(paste -d "${sep}" "${srclist}" "${trglist}" "${tstlist}")
# Output the sums:
pct=
if command -V calc &>/dev/null; then
pct=$(calc -p "round(${correct} / ${total}, 4) * 100")
pct=", ${pct}%"
fi
echo "${correct} / ${total}${pct}"
}
echo_revision () {
if rev=$(svn info 2>/dev/null); then
echo "${rev}" | grep -e ^Revisjon -e ^Revision
elif git config --get svn-remote.svn.fetch &>/dev/null; then
git svn info | grep -a -e ^Revisjon -e ^Revision
else
echo "(doesn't seem to be a repo)"
fi
}
showrevisions () {
echo_revision
grep ^AP_SRC "$(dirname $0)/../config.log" | while IFS='=' read -r var dir; do
printf "%s " "${var}"
( cd "${dir//\'}"; echo_revision )
done
}
main () {
# Parse options:
update=false
onlypass=false
onlyfail=false
while getopts "upf" opt; do
case "$opt" in
u) update=true;;
f) onlyfail=true;;
p) onlypass=true;;
\?) echo "Invalid option" >&2; exit 2;;
:) echo "Option requires an argument." >&2; exit 2;;
esac
done
shift "$((OPTIND-1))"
if [[ $# -ne 2 ]] || ( ${onlypass} && ${onlyfail} ); then
echo "Usage: $0 [-u] [-p|-f] {Regression,Pending} srclang-trglang"
echo "-u Use updated tests"
echo "-p Show only passing tests"
echo "-f Show only failing tests"
exit 2
fi
declare -r testtype="$1_tests"
declare -r srclang="${2%%-*}"
declare -r trglang="${2##*-}"
# Derived options:
declare -r mode="${srclang}-${trglang}"
declare -r html="$(dirname $0)/${testtype}.html"
srclist=$(mktemp -t "${mode}-src.XXXXXXXXXX")
trglist=$(mktemp -t "${mode}-trg.XXXXXXXXXX")
tstlist=$(mktemp -t "${mode}-tst.XXXXXXXXXX")
rawlist=$(mktemp -t "${mode}-raw.XXXXXXXXXX")
trap "rm -f \"${srclist}\" \"${trglist}\" \"${tstlist}\" \"${rawlist}\"" EXIT
showrevisions
printf "Running $1-tests with mode \"${mode}\" "; ${update} && printf "with updated tests "; echo "..."
echo
fetchtests "${update}" "${testtype}" "${html}"
cleansrc "${srclang}" "${trglang}" < "${html}" > "${srclist}"
cleantrg "${srclang}" "${trglang}" < "${html}" > "${trglist}"
(
set -o pipefail
if ! apertium -d . "${mode}" < "${srclist}" > "${rawlist}"; then
cat "${rawlist}" >&2
return 1
fi
cleantst <"${rawlist}" > "${tstlist}"
)
summary "${mode}" "${srclist}" "${trglist}" "${tstlist}" "${onlyfail}" "${onlypass}"
}
main "$@"