forked from jhradilek/check-links
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest-docbk.sh
executable file
·346 lines (264 loc) · 8.94 KB
/
test-docbk.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
#!/bin/bash
# test-docbk.sh - list broken external links in a DocBook XML file
# Copyright (C) 2013, 2014 Jaromir Hradilek <[email protected]>
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTA-
# BILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
# License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
# General information about the script:
NAME=${0##*/}
# Default options:
OPT_ALL=0
OPT_LIST=0
OPT_PARALLEL=0
OPT_XINCLUDE=0
# Color settings:
CLR_FAILED=
CLR_IGNORED=
CLR_PASSED=
CLR_RESET=
# Prints an error message to standard error output and terminates the
# script with a selected exit status.
#
# Usage: exit_with_error ERROR_MESSAGE [EXIT_STATUS]
function exit_with_error {
local error_message=${1:-'An unexpected error has occurred.'}
local exit_status=${2:-1}
# Print the given message to standard error output:
echo -e "$NAME: $error_message" >&2
# Terminate the script with the given exit status:
exit $exit_status
}
# Prints usage information to standard output.
#
# Usage: print_usage
function print_usage {
echo "Usage: $NAME [-acip] FILE"
echo " $NAME [-i] -l FILE"
echo
echo ' -a print the status of all links'
echo ' -c enable colored output'
echo ' -i perform XInclude processing'
echo ' -l list all links without checking their status'
echo ' -p check links in parallel'
echo ' -h display this help and exit'
}
# Determines whether an external link is functional and prints the result
# to standard output.
#
# Usage: print_link_status LINK
function print_link_status {
local link="$1"
local status=0
# Make sure the link is not empty:
[[ -z "$link" ]] && return
# Check whether the link is broken:
if [[ "$link" =~ ^mailto: ]] || \
[[ "$link" =~ ^file:/// ]] || \
[[ "$link" =~ ^[a-z]+://(localhost|127\.0\.0\.1) ]]; then
# Mark the link as ignored:
status=2
elif [[ "$link" =~ ^[a-z]+:// ]] && check_link "$link"; then
# Mark the link as functional:
status=1
fi
# Check the status of the link:
if [[ "$status" -eq 0 ]]; then
# Report a broken link:
echo "${CLR_FAILED}FAILED:${CLR_RESET} $link"
elif [[ "$status" -eq 1 ]]; then
# Report a functional link:
[[ "$OPT_ALL" -ne 0 ]] && echo "${CLR_PASSED}PASSED:${CLR_RESET} $link"
else
# Report an ignored link:
[[ "$OPT_ALL" -ne 0 ]] && echo "${CLR_IGNORED}IGNORED:${CLR_RESET} $link"
fi
}
# Locates external links in a DocBook XML file and prints their list to
# standard output.
#
# Usage: print_links FILE
function print_links {
local file="$1"
# Check whether XInclude processing is enabled:
if [[ "$OPT_XINCLUDE" -ne 0 ]]; then
# Locate the links:
xmllint --xinclude --postvalid "$file" 2>/dev/null | \
xmlstarlet sel -t -v '//ulink/@url' 2>/dev/null | \
sort -u | sed '/^$/d'
else
# Locate the links:
xmlstarlet sel -t -v '//ulink/@url' "$file" 2>/dev/null | \
sort -u | sed '/^$/d'
fi
}
# Determines whether an external link is functional. If the link is valid,
# returns 0, otherwise returns a non-zero value.
#
# Usage: check_link LINK
function check_link {
local link="$1"
# Check the link:
curl -A 'Mozilla/5.0 (X11; Linux x86_64; rv:28:0) Gecko/20100101 Firefox/28.0' \
--connect-timeout 5 --retry 3 \
-4ILfks "$link" &>/dev/null
}
# Process command-line options:
while getopts ':achilp' OPTION; do
case "$OPTION" in
a)
# Enable listing of all links:
OPT_ALL=1
;;
c)
# Enable colored output:
bold=$(tput bold)
CLR_IGNORED="$bold$(tput setaf 3)"
CLR_FAILED="$bold$(tput setaf 1)"
CLR_PASSED="$bold$(tput setaf 2)"
CLR_RESET=$(tput sgr0)
;;
h)
# Print usage information to standard output:
print_usage
# Terminate the script:
exit 0
;;
i)
# Enable XInclude processing:
OPT_XINCLUDE=1
;;
l)
# Enable listing of links without checking their status:
OPT_LIST=1
;;
p)
# Enable parallel processing:
OPT_PARALLEL=1
;;
*)
# Report an error and terminate the script:
exit_with_error "Invalid option -- '$OPTARG'" 22
;;
esac
done
# Shift positional parameters:
shift $(($OPTIND - 1))
# Verify the number of command line arguments:
[[ "$#" -eq 1 ]] || exit_with_error 'Invalid number of arguments' 22
# Get the name of the XML file:
file="$1"
# Verify that the file exists:
[[ -e "$file" ]] || exit_with_error "$file: No such file or directory" 2
[[ -r "$file" ]] || exit_with_error "$file: Permission denied" 13
[[ -f "$file" ]] || exit_with_error "$file: Not a file" 21
# Verify that all required utilities are in the system:
for dependency in curl xmllint xmlstarlet; do
if ! type "$dependency" &>/dev/null; then
exit_with_error "Missing dependency -- '$dependency'" 1
fi
done
# Check which action to perform:
if [[ "$OPT_LIST" -ne 0 ]]; then
# Locate all external links and print them to standard output:
print_links "$file"
elif [[ "$OPT_PARALLEL" -ne 0 ]]; then
# Export required functions and variables:
export -f print_link_status check_link
export OPT_ALL CLR_IGNORED CLR_FAILED CLR_PASSED CLR_RESET
# Check the status of all external links and print it to standard output:
print_links "$file" | xargs -n 1 -P 0 bash -c 'print_link_status "$@"' --
else
# Check the status of all external links and print it to standard output:
print_links "$file" | while read -r link; do
print_link_status "$link"
done
fi
# Terminate the script:
exit 0
:<<-=cut
=head1 NAME
test-docbk - list broken external links in a DocBook XML file
=head1 SYNOPSIS
B<test-docbk> [B<-acips>] I<file>
B<test-docbk> [B<-i>] B<-l> I<file>
B<test-docbk> B<-h>
=head1 DESCRIPTION
The B<test-docbk> utility reads a DocBook XML file, locates all external
links and prints a list of those that are no longer functional to standard
output. In addition, it can be used to print all external links in the
selected file without checking their status, or configured to perform
XInclude processing.
By default, the B<test-docbk> utility treats external links as follows:
=over
=item *
If the external link is functional, the utility does not produce any output
and proceeds to check the next link in the queue.
=item *
If the external link is not functional, the utility prints the keyword
B<FAILED> followed by the URL.
=back
To change this behavior, use one or more of the command-line options listed
below.
=head1 OPTIONS
=over
=item B<-a>
Prints the current status of all external links, that is, B<PASSED> for
links that are functional, B<FAILED> for links that appear to be broken,
and B<IGNORED> for links that are explicitly ignored (typically email
addresses). By default, the B<test-docbk> utility prints only broken
links.
=item B<-c>
Enables colored output.
=item B<-i>
Performs XInclude processing. By default, the B<test-docbk> utility
checks only those links that are present in the selected file. With this
option, the utility also checks links in files that are included in the
selected file by using the B<E<lt>xi:includeE<gt>> statement.
=item B<-l>
Lists all external links in the selected file without checking their status.
This option can be used in conjunction with the B<-i> option.
=item B<-p>
Checks the current status of external links in parallel. By default, the
B<test-docbk> utility checks external links one at a time.
=item B<-h>
Displays usage information and exits.
=back
=head1 EXAMPLES
=over
=item *
To list all broken links in a selected DocBook XML file, type the following
at a shell prompt:
test-docbk FILE
=item *
To list all links in a selected DocBook XML file along with their current
status (B<PASSED>, B<FAILED>, or B<IGNORED>), run the following command:
test-docbk -a FILE
=item *
To list all links in a selected DocBook XML file without checking their
status, type:
test-docbk -l FILE
=item *
To list all broken links in a selected DocBook XML file and all files that
are included in it using the B<E<lt>xi:includeE<gt>> statement, run:
test-docbk -i FILE
=back
=head1 SEE ALSO
B<curl>(1), B<xmllint>(1), B<xmlstarlet>(1)
=head1 BUGS
To report a bug or submit a patch, please, send an email to
E<lt>[email protected]<gt>.
=head1 COPYRIGHT
Copyright (C) 2013, 2014 Jaromir Hradilek E<lt>[email protected]<gt>
This program is free software; see the source for copying conditions. It is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.
=cut