Skip to content

Commit 7b531c4

Browse files
committed
Add CLI tool for generating ToC in Markdown docs; ignore its backup files
1 parent 9cbdedb commit 7b531c4

File tree

2 files changed

+353
-1
lines changed

2 files changed

+353
-1
lines changed

.gitignore

+3-1
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,6 @@
1010
# rspec failure tracking
1111
.rspec_status
1212
/vendor/
13-
spec/examples.txt
13+
spec/examples.txt
14+
README.md.orig.*
15+
README.md.toc.*

bin/gh-md-toc

+350
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,350 @@
1+
#!/usr/bin/env bash
2+
3+
#
4+
# Steps:
5+
#
6+
# 1. Download corresponding html file for some README.md:
7+
# curl -s $1
8+
#
9+
# 2. Discard rows where no substring 'user-content-' (github's markup):
10+
# awk '/user-content-/ { ...
11+
#
12+
# 3.1 Get last number in each row like ' ... </span></a>sitemap.js</h1'.
13+
# It's a level of the current header:
14+
# substr($0, length($0), 1)
15+
#
16+
# 3.2 Get level from 3.1 and insert corresponding number of spaces before '*':
17+
# sprintf("%*s", substr($0, length($0), 1)*3, " ")
18+
#
19+
# 4. Find head's text and insert it inside "* [ ... ]":
20+
# substr($0, match($0, /a>.*<\/h/)+2, RLENGTH-5)
21+
#
22+
# 5. Find anchor and insert it inside "(...)":
23+
# substr($0, match($0, "href=\"[^\"]+?\" ")+6, RLENGTH-8)
24+
#
25+
26+
gh_toc_version="0.7.0"
27+
28+
gh_user_agent="gh-md-toc v$gh_toc_version"
29+
30+
#
31+
# Download rendered into html README.md by its url.
32+
#
33+
#
34+
gh_toc_load() {
35+
local gh_url=$1
36+
37+
if type curl &>/dev/null; then
38+
curl --user-agent "$gh_user_agent" -s "$gh_url"
39+
elif type wget &>/dev/null; then
40+
wget --user-agent="$gh_user_agent" -qO- "$gh_url"
41+
else
42+
echo "Please, install 'curl' or 'wget' and try again."
43+
exit 1
44+
fi
45+
}
46+
47+
#
48+
# Converts local md file into html by GitHub
49+
#
50+
# -> curl -X POST --data '{"text": "Hello world github/linguist#1 **cool**, and #1!"}' https://api.github.com/markdown
51+
# <p>Hello world github/linguist#1 <strong>cool</strong>, and #1!</p>'"
52+
gh_toc_md2html() {
53+
local gh_file_md=$1
54+
URL=https://api.github.com/markdown/raw
55+
56+
if [ ! -z "$GH_TOC_TOKEN" ]; then
57+
TOKEN=$GH_TOC_TOKEN
58+
else
59+
TOKEN_FILE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/token.txt"
60+
if [ -f "$TOKEN_FILE" ]; then
61+
TOKEN="$(cat $TOKEN_FILE)"
62+
fi
63+
fi
64+
if [ ! -z "${TOKEN}" ]; then
65+
AUTHORIZATION="Authorization: token ${TOKEN}"
66+
fi
67+
68+
# echo $URL 1>&2
69+
OUTPUT=$(curl -s \
70+
--user-agent "$gh_user_agent" \
71+
--data-binary @"$gh_file_md" \
72+
-H "Content-Type:text/plain" \
73+
-H "$AUTHORIZATION" \
74+
"$URL")
75+
76+
if [ "$?" != "0" ]; then
77+
echo "XXNetworkErrorXX"
78+
fi
79+
if [ "$(echo "${OUTPUT}" | awk '/API rate limit exceeded/')" != "" ]; then
80+
echo "XXRateLimitXX"
81+
else
82+
echo "${OUTPUT}"
83+
fi
84+
}
85+
86+
87+
#
88+
# Is passed string url
89+
#
90+
gh_is_url() {
91+
case $1 in
92+
https* | http*)
93+
echo "yes";;
94+
*)
95+
echo "no";;
96+
esac
97+
}
98+
99+
#
100+
# TOC generator
101+
#
102+
gh_toc(){
103+
local gh_src=$1
104+
local gh_src_copy=$1
105+
local gh_ttl_docs=$2
106+
local need_replace=$3
107+
local no_backup=$4
108+
109+
if [ "$gh_src" = "" ]; then
110+
echo "Please, enter URL or local path for a README.md"
111+
exit 1
112+
fi
113+
114+
115+
# Show "TOC" string only if working with one document
116+
if [ "$gh_ttl_docs" = "1" ]; then
117+
118+
echo "Table of Contents"
119+
echo "================="
120+
echo ""
121+
gh_src_copy=""
122+
123+
fi
124+
125+
if [ "$(gh_is_url "$gh_src")" == "yes" ]; then
126+
gh_toc_load "$gh_src" | gh_toc_grab "$gh_src_copy"
127+
if [ "${PIPESTATUS[0]}" != "0" ]; then
128+
echo "Could not load remote document."
129+
echo "Please check your url or network connectivity"
130+
exit 1
131+
fi
132+
if [ "$need_replace" = "yes" ]; then
133+
echo
134+
echo "!! '$gh_src' is not a local file"
135+
echo "!! Can't insert the TOC into it."
136+
echo
137+
fi
138+
else
139+
local rawhtml=$(gh_toc_md2html "$gh_src")
140+
if [ "$rawhtml" == "XXNetworkErrorXX" ]; then
141+
echo "Parsing local markdown file requires access to github API"
142+
echo "Please make sure curl is installed and check your network connectivity"
143+
exit 1
144+
fi
145+
if [ "$rawhtml" == "XXRateLimitXX" ]; then
146+
echo "Parsing local markdown file requires access to github API"
147+
echo "Error: You exceeded the hourly limit. See: https://developer.github.com/v3/#rate-limiting"
148+
TOKEN_FILE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/token.txt"
149+
echo "or place GitHub auth token here: ${TOKEN_FILE}"
150+
exit 1
151+
fi
152+
local toc=`echo "$rawhtml" | gh_toc_grab "$gh_src_copy"`
153+
echo "$toc"
154+
if [ "$need_replace" = "yes" ]; then
155+
if grep -Fxq "<!--ts-->" $gh_src && grep -Fxq "<!--te-->" $gh_src; then
156+
echo "Found markers"
157+
else
158+
echo "You don't have <!--ts--> or <!--te--> in your file...exiting"
159+
exit 1
160+
fi
161+
local ts="<\!--ts-->"
162+
local te="<\!--te-->"
163+
local dt=`date +'%F_%H%M%S'`
164+
local ext=".orig.${dt}"
165+
local toc_path="${gh_src}.toc.${dt}"
166+
local toc_footer="<!-- Added by: `whoami`, at: `date` -->"
167+
# http://fahdshariff.blogspot.ru/2012/12/sed-mutli-line-replacement-between-two.html
168+
# clear old TOC
169+
sed -i${ext} "/${ts}/,/${te}/{//!d;}" "$gh_src"
170+
# create toc file
171+
echo "${toc}" > "${toc_path}"
172+
echo -e "\n${toc_footer}\n" >> "$toc_path"
173+
# insert toc file
174+
if [[ "`uname`" == "Darwin" ]]; then
175+
sed -i "" "/${ts}/r ${toc_path}" "$gh_src"
176+
else
177+
sed -i "/${ts}/r ${toc_path}" "$gh_src"
178+
fi
179+
echo
180+
if [ $no_backup = "yes" ]; then
181+
rm ${toc_path} ${gh_src}${ext}
182+
fi
183+
echo "!! TOC was added into: '$gh_src'"
184+
if [ -z $no_backup ]; then
185+
echo "!! Origin version of the file: '${gh_src}${ext}'"
186+
echo "!! TOC added into a separate file: '${toc_path}'"
187+
fi
188+
echo
189+
fi
190+
fi
191+
}
192+
193+
#
194+
# Grabber of the TOC from rendered html
195+
#
196+
# $1 - a source url of document.
197+
# It's need if TOC is generated for multiple documents.
198+
#
199+
gh_toc_grab() {
200+
common_awk_script='
201+
modified_href = ""
202+
split(href, chars, "")
203+
for (i=1;i <= length(href); i++) {
204+
c = chars[i]
205+
res = ""
206+
if (c == "+") {
207+
res = " "
208+
} else {
209+
if (c == "%") {
210+
res = "\\\\x"
211+
} else {
212+
res = c ""
213+
}
214+
}
215+
modified_href = modified_href res
216+
}
217+
print sprintf("%*s", level*3, " ") "* [" text "](" gh_url modified_href ")"
218+
'
219+
if [ `uname -s` == "OS/390" ]; then
220+
grepcmd="pcregrep -o"
221+
echoargs=""
222+
awkscript='{
223+
level = substr($0, length($0), 1)
224+
text = substr($0, match($0, /a>.*<\/h/)+2, RLENGTH-5)
225+
href = substr($0, match($0, "href=\"([^\"]+)?\"")+6, RLENGTH-7)
226+
'"$common_awk_script"'
227+
}'
228+
else
229+
grepcmd="grep -Eo"
230+
echoargs="-e"
231+
awkscript='{
232+
level = substr($0, length($0), 1)
233+
text = substr($0, match($0, /a>.*<\/h/)+2, RLENGTH-5)
234+
href = substr($0, match($0, "href=\"[^\"]+?\"")+6, RLENGTH-7)
235+
'"$common_awk_script"'
236+
}'
237+
fi
238+
href_regex='href=\"[^\"]+?\"'
239+
240+
# if closed <h[1-6]> is on the new line, then move it on the prev line
241+
# for example:
242+
# was: The command <code>foo1</code>
243+
# </h1>
244+
# became: The command <code>foo1</code></h1>
245+
sed -e ':a' -e 'N' -e '$!ba' -e 's/\n<\/h/<\/h/g' |
246+
247+
# find strings that corresponds to template
248+
$grepcmd '<a.*id="user-content-[^"]*".*</h[1-6]' |
249+
250+
# remove code tags
251+
sed 's/<code>//g' | sed 's/<\/code>//g' |
252+
253+
# remove g-emoji
254+
sed 's/<g-emoji[^>]*[^<]*<\/g-emoji> //g' |
255+
256+
# now all rows are like:
257+
# <a id="user-content-..." href="..."><span ...></span></a> ... </h1
258+
# format result line
259+
# * $0 - whole string
260+
# * last element of each row: "</hN" where N in (1,2,3,...)
261+
echo $echoargs "$(awk -v "gh_url=$1" "$awkscript")"
262+
}
263+
264+
# perl -lpE 's/(\[[^\]]*\]\()(.*?)(\))/my ($pre, $in, $post)=($1, $2, $3) ; $in =~ s{\+}{ }g; $in =~ s{%}{\\x}g; $pre.$in.$post/ems')"
265+
266+
#
267+
# Returns filename only from full path or url
268+
#
269+
gh_toc_get_filename() {
270+
echo "${1##*/}"
271+
}
272+
273+
#
274+
# Options handlers
275+
#
276+
gh_toc_app() {
277+
local need_replace="no"
278+
279+
if [ "$1" = '--help' ] || [ $# -eq 0 ] ; then
280+
local app_name=$(basename "$0")
281+
echo "GitHub TOC generator ($app_name): $gh_toc_version"
282+
echo ""
283+
echo "Usage:"
284+
echo " $app_name [--insert] src [src] Create TOC for a README file (url or local path)"
285+
echo " $app_name [--no-backup] src [src] Create TOC without backup, requires <!--ts--> / <!--te--> placeholders"
286+
echo " $app_name - Create TOC for markdown from STDIN"
287+
echo " $app_name --help Show help"
288+
echo " $app_name --version Show version"
289+
return
290+
fi
291+
292+
if [ "$1" = '--version' ]; then
293+
echo "$gh_toc_version"
294+
echo
295+
echo "os: `lsb_release -d | cut -f 2`"
296+
echo "kernel: `cat /proc/version`"
297+
echo "shell: `$SHELL --version`"
298+
echo
299+
for tool in curl wget grep awk sed; do
300+
printf "%-5s: " $tool
301+
echo `$tool --version | head -n 1`
302+
done
303+
return
304+
fi
305+
306+
if [ "$1" = "-" ]; then
307+
if [ -z "$TMPDIR" ]; then
308+
TMPDIR="/tmp"
309+
elif [ -n "$TMPDIR" -a ! -d "$TMPDIR" ]; then
310+
mkdir -p "$TMPDIR"
311+
fi
312+
local gh_tmp_md
313+
if [ `uname -s` == "OS/390" ]; then
314+
local timestamp=$(date +%m%d%Y%H%M%S)
315+
gh_tmp_md="$TMPDIR/tmp.$timestamp"
316+
else
317+
gh_tmp_md=$(mktemp $TMPDIR/tmp.XXXXXX)
318+
fi
319+
while read input; do
320+
echo "$input" >> "$gh_tmp_md"
321+
done
322+
gh_toc_md2html "$gh_tmp_md" | gh_toc_grab ""
323+
return
324+
fi
325+
326+
if [ "$1" = '--insert' ]; then
327+
need_replace="yes"
328+
shift
329+
fi
330+
331+
if [ "$1" = '--no-backup' ]; then
332+
need_replace="yes"
333+
no_backup="yes"
334+
shift
335+
fi
336+
for md in "$@"
337+
do
338+
echo ""
339+
gh_toc "$md" "$#" "$need_replace" "$no_backup"
340+
done
341+
342+
echo ""
343+
echo "Created by [gh-md-toc](https://github.com/ekalinin/github-markdown-toc)"
344+
}
345+
346+
#
347+
# Entry point
348+
#
349+
gh_toc_app "$@"
350+

0 commit comments

Comments
 (0)