-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathanalyze_durations.py
executable file
·228 lines (173 loc) · 7.42 KB
/
analyze_durations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
#!/usr/bin/env python3
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
import argparse
import ast
import json
import os
import re
import statistics as stat
import sys
from enum import Enum, auto
from pprint import pprint
DESC = """Provides statistics relevant to performance analysis on the given duration
measurements such as the mean, median, and max values. Sample output:
{'max': 2415.0,
'mean': 2187.92,
'median': 2141.0,
'min': 2101.0,
'replicate_count': 50,
'replicates': [2116.0, 2212.0, 2145.0, ..., 2391.0, 2195.0]}
See the `path` argument for supported file formats.
"""
LOGCAT_MATCH_STR = 'average '
LOGCAT_EXPECTED_FORMAT = '2020-05-04 15:15:50.340 10845-10845/? E/lol: average 37'
def parse_args():
parser = argparse.ArgumentParser(description=DESC, formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument("path", nargs="+",
help="""path to a file with duration measurements. The following formats are supported:
- durations separated by newlines
- perfherder-data-json output from mozperftest VIEW
- logcat where some lines have a logged value of 'average <duration>'
- the output of this script""")
parser.add_argument("-o", "--output-safe", help="""writes the output to the given path, in addition to printing.
This operation is safe (non-destructive): if the path already exists, the script will abort.
This is useful to avoid accidentally deleting results.""")
parser.add_argument("--graph", action="store_true",
help=("displays a graph of the replicates, in addition to printing the output. Requires "
"matplotlib (from the venv requirements)"))
parser.add_argument(
"--print-github-table-header", action="store_true",
help=("prints the input-agnostic header for --print-github-table-row args and exits. "
"the path is still required to ease the implementation requirements")
)
parser.add_argument(
"--print-github-table-row", action="store_true", help="prints the result formatted as a GitHub table row"
)
return parser.parse_args()
def validate_args(args):
if args.output_safe and len(args.path) > 1:
print("--output_safe cannot be combined with multiple path arguments.", file=sys.stderr)
sys.exit(1)
if args.print_github_table_row and len(args.path) > 1:
print("--print-github-table-row cannot be combined with multiple path arguments.", file=sys.stderr)
sys.exit(1)
if args.graph and len(args.path) > 1:
print("--graph cannot be combined with multiple path arguments.", file=sys.stderr)
sys.exit(1)
def detect_filetype(path):
with open(path) as f:
contents = f.read()
if contents.startswith('{"suites":'):
return InputFileType.PERFHERDER_JSON
elif contents.startswith("{'"):
return InputFileType.SCRIPT_OUTPUT
elif re.match(r'^\d+-\d+', contents):
return InputFileType.LOGCAT
else:
return InputFileType.NEWLINES
def read_from_file_separated_by_newlines(path):
with open(path) as f:
contents = f.read()
return [float(r) for r in contents.split('\n') if r] # trailing if is used to remove empty lines.
def read_from_perfherder_json(path):
with open(path) as f:
contents = json.load(f)
# Hard-coded to paths for perftest VIEW.
return [float(e) for e in contents['suites'][0]['subtests'][0]['replicates']]
def read_from_output(path):
with open(path) as f:
contents = ast.literal_eval(f.read())
return contents['replicates']
def read_from_logcat_file(path):
measurements = []
with open(path) as f:
for line in f:
# The message will be after the last colon because we demand a certain formatting.
message_text = line[line.rfind(': ') + 2:] # +2 to move past ': '.
if message_text and message_text.startswith(LOGCAT_MATCH_STR):
measurements.append(float(message_text[len(LOGCAT_MATCH_STR):]))
if len(measurements) == 0:
print('WARN: no lines matched. expected format like:\n {}'.format(LOGCAT_EXPECTED_FORMAT))
return measurements
def to_stats(measurements_arr):
return {
'max': max(measurements_arr),
'mean': stat.mean(measurements_arr),
'median': stat.median(measurements_arr),
'min': min(measurements_arr),
'replicate_count': len(measurements_arr),
'replicates': measurements_arr,
'stdev': stat.stdev(measurements_arr),
}
def print_github_table_header():
print('|Iteration desc|mean|median|max|')
print('|-|-|-|-|')
def to_github_table_row(stats):
return '|todo-iteration-name|{}|{}|{}|'.format(stats['mean'], stats['median'], stats['max'])
def save_output(stats, path):
if os.path.exists(path):
raise Exception(('path specified by --output-safe/-o already '
'exists: aborting to prevent accidental overwrites. Use stream '
'redirection operators for intentional overwriting.'))
with open(path, 'x') as f:
print_stats(stats, f)
print('Saved output to path: {}'.format(path))
print('Also printing to stdout...\n')
def print_stats(stats, stream=None):
if not stream:
stream = sys.stdout
pprint(stats, compact=True, stream=stream)
def maybe_print_header(num_path_to_print, path):
# We don't want to print the filename header if we're only printing one value.
if num_path_to_print < 1:
return
# For simplicity, we add a newline here rather than adding it after the stats are printed
# even though it puts an unnecessary newline before the first entry.
print("\n==> {} <==".format(path)) # Same format as `tail`
def graph(stats):
from matplotlib import pyplot as plt
replicates = stats['replicates']
replicate_number = range(len(replicates))
plt.xlabel('Iteration number')
plt.ylabel('Duration')
plt.scatter(replicate_number, replicates)
plt.show()
class InputFileType(Enum):
NEWLINES = auto()
PERFHERDER_JSON = auto()
SCRIPT_OUTPUT = auto()
LOGCAT = auto()
def read_from(self, path):
if self is InputFileType.NEWLINES:
return read_from_file_separated_by_newlines(path)
elif self is InputFileType.PERFHERDER_JSON:
return read_from_perfherder_json(path)
elif self is InputFileType.SCRIPT_OUTPUT:
return read_from_output(path)
elif self is InputFileType.LOGCAT:
return read_from_logcat_file(path)
raise RuntimeError('Unknown input type: {}'.format(self))
def main():
args = parse_args()
validate_args(args)
if args.print_github_table_header:
print_github_table_header()
exit(0)
for path in args.path:
filetype = detect_filetype(path)
measurement_arr = filetype.read_from(path)
stats = to_stats(measurement_arr)
# Called before printing so if we abort, it's clearer to the user there was an error.
if args.output_safe:
save_output(stats, args.output_safe)
if args.print_github_table_row:
print(to_github_table_row(stats))
else:
maybe_print_header(len(args.path), path)
print_stats(stats)
if args.graph:
graph(stats)
if __name__ == '__main__':
main()