forked from clivedavies-cpi/confluence-wiki-sync
-
Notifications
You must be signed in to change notification settings - Fork 0
/
wiki_sync.py
240 lines (195 loc) · 8.96 KB
/
wiki_sync.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
#!/usr/bin/env python3
"""
This tool looks for modified doc files, transforms them into JIRA markdown and
uploads them to Confluence
"""
import logging
import os
import re
import subprocess
import sys
from typing import Dict, List
import atlassian
import pypandoc
# The format of a link in JIRA markdown is [link name|link]
# We only need a capture group for the link itself
# TODO handle links like [link], which happen when the link name is the same as
# the link itself
JIRA_LINK_PATTERN = re.compile(r'\[[^|\n]+\|([^|\n]+)\]')
JIRA_MACRO_PATTERN = re.compile(r'\${[a-zA-Z_-]*}')
def get_files_to_sync(changed_files: str) -> List[str]:
return [f for f in changed_files.split() if should_sync_file(f)]
def should_sync_file(file_name: str) -> bool:
# TODO Consider getting a list of extensions from action.yml
if not (file_name.endswith('.md') or file_name.endswith('.rst')):
return False
ignored_folders = os.environ['INPUT_IGNORED-FOLDERS'].split(' ')
for ignored_folder in ignored_folders:
if not ignored_folder:
continue # Ignore extra spaces
if not ignored_folder.endswith('/'):
ignored_folder = ignored_folder + '/'
if (len(os.path.commonprefix([ignored_folder, file_name]))
== len(ignored_folder)):
logging.info('Skipping file %s because folder %s is ignored',
file_name, ignored_folder)
return False
return True
def sync_files(files: List[str]) -> bool:
had_errors = False
wiki_client = atlassian.Confluence(
os.environ['INPUT_WIKI-BASE-URL'],
username=os.environ['INPUT_USER'],
password=os.environ['INPUT_TOKEN'],
cloud=True)
root_page_id = wiki_client.get_page_id(
os.environ['INPUT_SPACE-NAME'],
os.environ['INPUT_ROOT-PAGE-TITLE'])
logging.debug('The base root ID is %s', root_page_id)
github_repo = os.environ['GITHUB_REPOSITORY'] # eg. 'octocat/Hello-World'
# TODO consider getting the name of the default branch and using that
# instead of HEAD
# Could be an optional parameter in action.yml
url_root_for_file = f'https://github.com/{github_repo}/blob/HEAD/'
repo_name = github_repo.split('/')[1]
repo_root = get_repository_root()
for file_path in files:
read_only_warning = (
'{info:title=Imported content|icon=true}'
f'This content has been imported from the {repo_name} repository.'
'\nYou can find (and modify) the original at'
f' {url_root_for_file + file_path}.{{info}}\n'
'{warning:title=Do not update this page directly|icon=true}'
'Your modifications would be lost the next time the source file'
' is updated.{warning}\n')
absolute_file_path = os.path.join(repo_root, file_path)
if not os.path.exists(absolute_file_path):
# TODO delete corresponding wiki page
logging.warning(
'File %s not found. Deleting a wiki page is not currently'
' supported, so you will have to delete it manually',
absolute_file_path)
continue
try:
formatted_content = get_formatted_file_content(
wiki_client, repo_root, file_path, url_root_for_file,
repo_name)
content = read_only_warning + formatted_content
except Exception:
logging.exception('Error converting file %s:', absolute_file_path)
had_errors = True
continue
try:
create_or_update_pages_for_file(wiki_client, root_page_id,
repo_name, file_path, content)
except Exception:
logging.exception('Error uploading file %s:', absolute_file_path)
had_errors = True
continue
return had_errors
def get_formatted_file_content(wiki_client: atlassian.Confluence,
repo_root: str, file_path: str, gh_root: str,
repo_name: str) -> str:
"""
Takes the absolute path of a file and returns its contents formatted as
JIRA markdown.
Updates relative links to point to a Confluence page if it exists, or to a
GitHub page.
"""
# keys are relative links; values are what they should be replaced with
links_to_replace: Dict[str, str] = {}
# keys are macros in XHTML; values are what they should be replaced with
macros_to_replace: Dict[str, str] = {}
absolute_file_path = os.path.join(repo_root, file_path)
formated_file_contents = pypandoc.convert_file(absolute_file_path, 'jira')
for link in re.findall(JIRA_LINK_PATTERN, formated_file_contents):
# Most links are HTTP - don't waste time with them
if link.startswith('http'):
continue
target_path = os.path.join(os.path.split(absolute_file_path)[0], link)
target_path = os.path.normpath(target_path)
if not os.path.exists(target_path): # Not actually a relative link
continue
target_from_root = os.path.relpath(target_path, start=repo_root)
wiki_page_info = wiki_client.get_page_by_title(
os.environ['INPUT_SPACE-NAME'],
f'{repo_name}/{target_from_root}')
if wiki_page_info:
# The link is to a file that has a Confluence page
# Let's link to the page directly
target_page_url = (os.environ['INPUT_WIKI-BASE-URL']
+ '/wiki' + wiki_page_info['_links']['webui'])
links_to_replace[link] = target_page_url
else:
# No existing Confluence page - link to GitHub
links_to_replace[link] = gh_root + target_from_root
# Replace relative links
for relative_link, new_link in links_to_replace.items():
formated_file_contents = formated_file_contents.replace(
f'|{relative_link}]', f'|{new_link}]')
# find macros and escape the curly braces
for macro in re.findall(JIRA_MACRO_PATTERN, formated_file_contents):
macros_to_replace[macro] = macro.replace(
'{',
r'\{').replace('}', r'\}')
for macro, escaped_macro in macros_to_replace.items():
formated_file_contents = formated_file_contents.replace(
f'{macro}', f'{escaped_macro}')
return formated_file_contents
def get_repository_root() -> str:
repo_root = ''
with subprocess.Popen(['git', 'rev-parse', '--show-toplevel'],
stdout=subprocess.PIPE) as proc:
repo_root = proc.communicate()[0].rstrip().decode('utf-8')
return repo_root
def create_or_update_pages_for_file(wiki_client: atlassian.Confluence,
root_page_id: int, repo_name: str,
file_name: str, content: str) -> None:
# The git docs live in a tree under the root page, with the same
# tree structure as in the git repo.
# We need to navigate the tree to find where the page lives,
# creating intermediate pages if they don't exist.
space_name = os.environ['INPUT_SPACE-NAME']
current_root_id = root_page_id
file_path, _ = os.path.split(file_name)
if file_path:
page_title = repo_name
for current_folder in file_path.split(os.sep):
page_title += f'/{current_folder}'
sub_page_id = wiki_client.get_page_id(space_name, page_title)
if sub_page_id:
logging.debug('Page %s exists with id %s',
page_title, sub_page_id)
current_root_id = sub_page_id
else: # Page doesn't exist
logging.info(
'Creating intermediate page %s under root %s',
page_title, current_root_id)
response = wiki_client.create_page(
space=space_name,
title=page_title,
body='{children:sort=title|excerpt=none|all=true}',
parent_id=current_root_id,
representation='wiki')
current_root_id = response['id']
logging.debug('Current root ID is %s', current_root_id)
title = f'{repo_name}/{file_name}'
logging.info(
'Creating or updating page %s under root %s',
title, current_root_id)
# TODO Consider making the page read-only
wiki_client.update_or_create(
parent_id=current_root_id,
title=title,
body=content,
representation='wiki')
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO)
try:
files_to_sync = get_files_to_sync(os.environ['INPUT_MODIFIED-FILES'])
logging.info('Files to be synced: %s', files_to_sync)
had_sync_errors = sync_files(files_to_sync)
sys.exit(1 if had_sync_errors else 0)
except Exception:
logging.exception('Unhandled exception')
sys.exit(1)