-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_jira_events.py
executable file
·335 lines (316 loc) · 15.8 KB
/
get_jira_events.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
#!/usr/bin/env python3
import argparse
import datetime
import difflib
import logging
from re import search
from typing import List, Set, Tuple
import jira
from activity_merger.config.config import (
DAY_BORDER,
EVENTS_COMPARE_TOLERANCE_TIMEDELTA,
JIRA_BUCKET_ID,
JIRA_ISSUES_MAX,
JIRA_LOGIN_API_TOKEN,
JIRA_LOGIN_EMAIL,
JIRA_PROJECTS,
JIRA_SCRAPER_NAME,
JIRA_URL,
LOG,
)
from activity_merger.domain.input_entities import Event
from activity_merger.helpers.event_helpers import event_to_str, upload_events
from activity_merger.helpers.helpers import datetime_to_time_str, ensure_datetime, setup_logging, valid_date
JIRA_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%f%z"
JIRA_TICKET_FIELDS_VARIABLE_UPDATE_COMPLEXITY = {"description", "summary", "labels", "Component"}
def _get_jira_issues(
server_url: str, email: str, api_token: str, projects: List[str], search_datetime: datetime.datetime
) -> List[jira.Issue]:
assert server_url, "Jira server URL is not specified."
assert email, "Jira login email is not specified."
assert api_token, "Jira login API token is not specified."
assert projects, "Jira projects to consider are not specified."
assert search_datetime, "Date to search is not specified."
connection = jira.JIRA(
server=server_url,
basic_auth=(email, api_token),
# See https://github.com/pycontribs/jira/issues/1775 for patch below.
options={"headers": {"Accept": "application/json;q=1.0, */*;q=0.9"}},
)
LOG.info("Searching %s issues updated after %s and touched by %s.", projects, search_datetime, email)
jql = (
f"project IN ({','.join(projects)}) AND updated >= '{search_datetime.strftime('%Y-%m-%d %H:%M')}' AND "
"(reporter was currentUser()"
" OR commentedBy = currentUser()"
" OR assignee was currentUser()"
" OR status changed BY currentUser()"
")"
)
return connection.search_issues(jql, expand="changelog", maxResults=JIRA_ISSUES_MAX)
def _jira_story_item_field_to_string(field) -> str:
return str(field) if field else ""
def _calculate_diff(old_value: str, new_value: str) -> Tuple[str, int]:
change_desc = []
symbols_count = 0
# Treat inputs as arrays of characters to get precise change.
for opcode in difflib.SequenceMatcher(None, old_value, new_value).get_opcodes():
tag, i1, i2, j1, j2 = opcode
if tag in {"replace", "delete"}:
change_desc.append("-" + old_value[i1:i2])
symbols_count += i2 - i1
if tag in {"replace", "insert"}:
change_desc.append("+" + new_value[j1:j2])
symbols_count += j2 - j1
change_desc = "\n".join(change_desc)
return change_desc, symbols_count
def _parse_events_from_story_without_duration(
story: jira.resources.PropertyHolder, jira_id: str, summary: str, created: datetime.datetime
) -> Tuple[List[Event], Set[str]]:
unsupported_fields = set() # For debugging custom Jira projects/servers.
events = []
for item in story.items:
field = item.field
symbols_count = 1 # 1 is default for unsupported fields.
change_desc = item.toString
if field in JIRA_TICKET_FIELDS_VARIABLE_UPDATE_COMPLEXITY:
new_value = _jira_story_item_field_to_string(item.toString)
old_value = _jira_story_item_field_to_string(item.fromString)
# Calculate difference in text. Treat inputs as arrays of characters.
change_desc, symbols_count = _calculate_diff(old_value, new_value)
elif field in {"Link"}:
new_value = _jira_story_item_field_to_string(item.to)
old_value = _jira_story_item_field_to_string(item.__dict__["from"]) # 'from' is reserved keyword in Python
# First calculate difference in symbols length.
symbols_count = len(new_value) - len(old_value)
# Note that in case of removing link toString/fromString may be None.
# Also to/from doesn't bring information about type of link (relates to, blocks, caused by, etc.).
change_desc = (
f"{field} changed from '{'' if item.fromString is None else item.fromString}' to "
f"'{'' if item.toString is None else item.toString}'."
)
# Field may be truncuted by simple "press cross icon" so treat deletion as a single keystroke.
if symbols_count < 0:
symbols_count = 1
elif field in {"RemoteIssueLink"}: # Hard to find out exact link type.
symbols_count = 1 # Listed actions may require keystrokes but are fast actions anyway.
elif field in {"assignee", "status", "duedate", "priority", "Fix Version", "resolution"}:
new_value = _jira_story_item_field_to_string(item.toString)
old_value = _jira_story_item_field_to_string(item.fromString)
change_desc = f"{field} changed from '{old_value}' to '{new_value}'."
symbols_count = 1 # Listed actions may require keystrokes but are fast actions anyway.
else:
unsupported_fields.add(field)
# Make separate event for each change in issue - flat events are easier to handle.
events.append(
Event(
JIRA_BUCKET_ID,
created,
None,
{
"jira_id": jira_id,
"title": summary,
"field": field,
"symbols_count": str(symbols_count),
"change_desc": change_desc,
},
)
)
return events, unsupported_fields
def _format_jira_event_for_log(event: Event) -> str:
data = event.data
field = data["field"]
change_desc = f"{data['symbols_count']} changes in '{field}'"
if field not in JIRA_TICKET_FIELDS_VARIABLE_UPDATE_COMPLEXITY:
change_desc += ": " + data["change_desc"]
return f"{{{datetime_to_time_str(event.timestamp)} {data['jira_id']} {change_desc}}}"
def get_one_day_events_from_jira(
issues: List[jira.Issue], author_email: str, start_datetime: datetime.datetime
) -> List[Event]:
"""
Filters all issues by one day and specified author. Collects events from them.
:param issues: Jira issues to inspect in chronological order.
:param author_email: Account email to filter activities by.
:param start_datetime: Date and time to start day to filter from.
:return: List of events based on Jira issues activites performed by specific account.
"""
# First generate as much events as possible and without right duration.
events: List[Event] = []
unsupported_fields = set()
end_datetime = start_datetime + datetime.timedelta(days=1)
for issue in issues:
jira_id = issue.key
title = issue.fields.summary if hasattr(issue.fields, "summary") else "<cannot parse>"
# Don't use 'raw' value(s) because Jira may rename fields.
# Note that Jira history is provided in reversed order. They will be ordered by date later on.
for story in issue.changelog.histories:
# Skip changes by other people or by not signed actors.
if not hasattr(story.author, "emailAddress") or story.author.emailAddress != author_email:
continue
# Skip changes made outside of specified interval.
created = datetime.datetime.strptime(story.created, JIRA_DATETIME_FORMAT)
if start_datetime > created > end_datetime:
continue
story_events, story_unsupported_fields = _parse_events_from_story_without_duration(
story, jira_id, title, created
)
events.extend(story_events)
unsupported_fields.update(story_unsupported_fields)
unsupported_desc = (
" All fields are supported."
if not unsupported_fields
else " During parsing handled with 'default' behavior following unknown fields from Jira issues: "
+ str(unsupported_fields)
)
LOG.info("Parsed %d events from %d issues.%s", len(events), len(issues), unsupported_desc)
if len(events) <= 0:
return []
# Here events created on "per issue" basis though doesn't have durations and may intersect. Also theirs 'timestamp'
# field contains "end of event" datetime and duration may be shorter than tolerance though they would be skipped.
# Need to sort, set 'duration' and maybe tune 'timestamp' to make one consequitive line of "not too short" events.
events = sorted(events, key=lambda x: x.timestamp)
if LOG.level <= logging.DEBUG:
LOG.debug(
"Having following events without durations yet:\n %s",
"\n ".join(_format_jira_event_for_log(x) for x in events),
)
LOG.debug("Calculating duration and adjusting events:")
result_events = []
# If try to adjust "previous" only event if new one too short then it is not enough for Jira.
# Because one Jira action may trigger few changes by one human action. For example:
# - Moving ticket to "Done" triggers Fix Version, resolution, etc. updates in the same ticket.
# - Making "relates to" link in one ticket to another makes mirror changes in other ticket.
# If make 'data' of one event contain changes for few tickets it would be hard to analyse and merge later.
# Therefore buffer any number of events pointing to the same date and next "propagate" them back in time,
# cutting duration from "prevous" "long" event.
pending_events = []
# Assumption: as an start for the first event use start of the day.
event_start: datetime.datetime = ensure_datetime(events[0].timestamp.date())
first_pending_start: datetime.datetime = event_start
for event in events + [None]: # Add extra iteration at the end to handle last pending event(s).
duration = event.timestamp - event_start if event else datetime.timedelta(milliseconds=0)
# Check we have pending events and current one doesn't need to be postponed. Or it is the last iteration.
if (pending_events and duration > EVENTS_COMPARE_TOLERANCE_TIMEDELTA) or event is None:
# Free buffer using first pending event duration to accomodate all next ones.
reversed_buffer = []
# Iterate pending events in reversed order to get first pending event as a donor.
oldest_in_pending = pending_events[0]
latest_saved_timestamp = None
for pending in reversed(pending_events):
end = pending.timestamp # Or earlier.
if pending == oldest_in_pending:
# Add all remained duration to the first/oldest event.
start = first_pending_start
if end - start < EVENTS_COMPARE_TOLERANCE_TIMEDELTA:
# Case when last event in a day is too short. Extend its end to be later.
end = start + EVENTS_COMPARE_TOLERANCE_TIMEDELTA
else:
# All events except oldest one in "pending" are with short duration - extend them to minimum.
if latest_saved_timestamp:
end = latest_saved_timestamp
start = end - EVENTS_COMPARE_TOLERANCE_TIMEDELTA
# Assure that there were no miscalculations above and build full ActivityWatch event.
assert (
end - start >= EVENTS_COMPARE_TOLERANCE_TIMEDELTA
), f"Can't distribute Jira events duration using as a donor {_format_jira_event_for_log(pending)}."
reversed_buffer.append(Event(JIRA_BUCKET_ID, start, end - start, pending.data))
latest_saved_timestamp = start # Use start because we are going back in time.
result_events.extend(reversed(reversed_buffer)) # Don't forget to un-reverse.
first_pending_start = end
event_start = end
pending_events = [event] # Current event is a donor for the next "pending" events chunk.
else:
# In both opposite cases need to postpone event creation - it may become a donor for following ones.
pending_events.append(event)
event_start = event.timestamp
return result_events
def main():
parser = argparse.ArgumentParser(
description="Calls JIRA API to get issues updated by given account on given date,"
" parses all found events in it and loads them into ActivityWatch."
" To see more logs use `export LOGLEVEL=DEBUG` (or `set ...` on Windows)."
)
parser.add_argument(
"search_date",
nargs="?",
type=valid_date,
default=datetime.datetime.now().astimezone(),
help="Date to look for Jira events in format 'YYYY-mm-dd'. By default is today."
f" Note that day border is {DAY_BORDER}."
" If don't set here then date is calculated as today-'back days'.",
)
parser.add_argument(
"-b",
"--back-days",
type=int,
help="Overwrites 'date' if specified. Sets how many days back search events on."
" I.e. '1' value means 'search for yesterday'.",
)
parser.add_argument(
"-p",
"--projects",
type=str,
default=JIRA_PROJECTS,
help="Comma-separated list of Jira project ID's to scrape events from."
" Note that Jira API allows to get some limited number of issues at once"
" and it is a limit for scraping.",
)
parser.add_argument("-e", "--email", type=str, default=JIRA_LOGIN_EMAIL, help="Email address to login into Jira.")
parser.add_argument(
"-a",
"--api-token",
type=str,
default=JIRA_LOGIN_API_TOKEN,
help="Jira API token to login. For details see https://support.atlassian.com/atlassian-account"
"/docs/manage-api-tokens-for-your-atlassian-account/.",
)
parser.add_argument(
"-s",
"--server",
type=str,
default=JIRA_URL,
help="URL to Web (MS Office Web Apps) Outlook. Page where email box opens."
" May look like 'https://company.jira.net'.",
)
parser.add_argument(
"-r",
"--replace",
dest="is_replace_bucket",
action="store_true",
help=f"Flag to delete ActivityWatch '{JIRA_BUCKET_ID}' bucket first."
" Removes all previous events in it, for all time.",
)
parser.add_argument(
"--dry-run",
dest="is_dry_run",
action="store_true",
help="Flag to just log events but don't upload into ActivityWatch.",
)
args = parser.parse_args()
search_date = args.search_date
if args.back_days:
assert args.back_days >= 0, f"'back_days' value ({args.back_days}) should be positive or 0."
search_date = datetime.datetime.today().astimezone() - datetime.timedelta(days=args.back_days)
search_datetime = ensure_datetime(search_date).replace(hour=0, minute=0, second=0, microsecond=0) + DAY_BORDER
projects = [str(x).strip() for x in args.projects.split(",")] # Clean up input from extra spaces.
# Get "touched" Jira issues list.
issues = _get_jira_issues(args.server, args.email, args.api_token, projects, search_datetime)
LOG.info("Received %d issues from Jira [%s] projects.", len(issues), args.projects)
events = get_one_day_events_from_jira(issues, args.email, search_datetime)
LOG.info("Ready to upload %d events:\n %s", len(events), "\n ".join(event_to_str(x) for x in events))
if not events:
LOG.warning(
"Can't find Jira activity on %s for %s account in [%s] projects.",
search_datetime,
args.email,
args.projects,
)
# Load events into ActivityWatcher
if not args.is_dry_run:
LOG.info(
upload_events(
events, JIRA_SCRAPER_NAME, JIRA_BUCKET_ID, args.is_replace_bucket, aw_client_name="jira.issue.activity"
)
)
if __name__ == "__main__":
LOG = setup_logging()
main()