-
Notifications
You must be signed in to change notification settings - Fork 1
/
benchmark.py
95 lines (79 loc) · 2.34 KB
/
benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import logging
import os
import sys
import argparse
from time import time
import re
logger = logging.getLogger(__name__)
parser = argparse.ArgumentParser(
description="""
Test benchmarks for ingestion
""")
parser.add_argument(
'--name',
type=str,
required=False,
default="4xlarge",
help='Name to use for the test'
)
parser.add_argument(
'--env',
type=str,
default='.env',
required=False,
help='The dot env file to use'
)
parser.add_argument(
'--debug',
action="store_true",
help='Output at DEBUG level'
)
args = parser.parse_args()
if 'DOTENV' not in os.environ.keys() and args.env is not None:
os.environ['DOTENV'] = args.env
if args.debug:
os.environ['LOG_LEVEL'] = 'DEBUG'
from ingest.settings import settings
from fake import config, get_locations, as_realtime
from ingest.fetch import load_realtime
logging.basicConfig(
format='[%(asctime)s] %(levelname)s [%(name)s:%(lineno)s] %(message)s',
level=settings.LOG_LEVEL.upper(),
force=True,
)
f = open(f"benchmark_ingest_output_{args.name}.csv", "w")
f.writelines("name,key,locations,inserted_nodes,updated_nodes,total_meas,inserted_meas,ingest_time,process_time,log_time,copy_time,load_process_time\n")
n = 10
locations = [50, 250, 1000]
keys = []
ii = 1
## make a set of files
for r in locations:
for i in range(n):
config(source=f"benchmark-test-{r}-{i+1}", gz=True)
l = get_locations(n=r)
key = as_realtime(l["locations"], l["latitude"], l["longitude"])
keys.append({ "key": key, "locations": len(l["locations"]) })
ii=+1
## ingest each of the
for i, k in enumerate(keys):
key = k["key"]
locations = k["locations"]
logger.info(f"Ingesting {i+1} of {len(keys)}: {key} with {locations} locations")
start_time = time()
copy_time, load_process_time, log_time, notice = load_realtime([
(-1, key, None)
])
m = re.findall('([a-z-]+): (.+?),', notice)
process_time = round(float(m[17][1]))
total_meas = int(m[0][1])
inserted_meas = int(m[9][1])
updated_nodes = int(m[8][1])
inserted_nodes = int(m[11][1])
ingest_time = round((time() - start_time)*1000)
f.writelines(f"'{args.name}','{key}',{locations},{inserted_nodes},{updated_nodes},{total_meas},{inserted_meas},{ingest_time},{process_time},{log_time},{copy_time},{load_process_time}\n")
logger.info(
"loaded realtime records, timer: %0.4f, process: %0.4f",
ingest_time, process_time
)
f.close()