-
-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathbenchmark.py
400 lines (336 loc) · 19.5 KB
/
benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
import json
import datetime
import os
import subprocess
import re
import yaml
from colorama import Fore, Back, Style, init
from collections import Counter
init(autoreset=True)
benchmark_filename = "benchmark.json"
benchmark_data = []
# Load existing data if file exists
if os.path.exists(benchmark_filename):
with open(benchmark_filename, "r") as f:
try:
benchmark_data = json.load(f)
except json.JSONDecodeError:
benchmark_data = [] # Handle empty or corrupted JSON file
def colored_print(text, color=Fore.WHITE, style=Style.NORMAL):
print(style + color + text + Style.RESET_ALL)
def run_benchmark(test_config):
colored_print(f"\n{Back.BLUE}{Fore.WHITE} Running Test: {test_config['name']} {Style.RESET_ALL} - {test_config['description']}")
outcome = "FAIL"
metrics = {}
response_code_counts = Counter() # Initialize counter - not really used now, but kept for potential future use
command_list = ["ab"]
command_list.extend(test_config['ab_options'])
if 'method' in test_config and test_config['method'] == 'POST':
body_file = test_config.get('body_file')
if body_file:
command_list.extend(["-p", body_file])
if 'content_type' in test_config:
command_list.extend(["-T", test_config['content_type']])
command_list.append(test_config['url'])
else:
command_list.append(test_config['url'])
colored_print(f"{Fore.YELLOW}Executing command: {' '.join(command_list)}{Style.RESET_ALL}")
try:
result = subprocess.run(command_list, capture_output=True, text=True, check=True, shell=False)
output = result.stdout
colored_print(f"{Fore.GREEN}ab execution successful.{Style.RESET_ALL}")
except subprocess.CalledProcessError as e:
output = e.stdout + "\n" + e.stderr # Capture output even on error
colored_print(f"{Fore.RED}Error running benchmark (subprocess.CalledProcessError):{Style.RESET_ALL}")
colored_print(f"{Fore.RED}Return code: {e.returncode}{Style.RESET_ALL}")
colored_print(f"{Fore.RED}Stderr:\n{e.stderr}{Style.RESET_ALL}")
# No early return here - process metrics even if ab failed
except FileNotFoundError:
colored_print(f"{Fore.RED}Error: 'ab' command not found. Is Apache Benchmark installed and in your PATH?{Style.RESET_ALL}")
return {"metrics": None, "outcome": "FAIL", "response_code_counts": response_code_counts}
except Exception as e:
colored_print(f"{Fore.RED}An unexpected error occurred: {e}{Style.RESET_ALL}")
return {"metrics": None, "outcome": "FAIL", "response_code_counts": response_code_counts}
# Metrics parsing (same as before)
rps_match = re.search(r"Requests per second:\s+([\d.]+)", output)
time_per_request_mean_match = re.search(r"Time per request:\s+([\d.]+) \[ms\] \(mean\)", output)
time_per_request_sd_match = re.search(r"Time per request:\s+([\d.]+) \[ms\] \(sd\)", output)
time_per_request_median_match = re.search(r"50%\s+([\d.]+)", output)
connect_time_match = re.search(r"Connect:\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)", output)
processing_time_match = re.search(r"Processing:\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)", output)
waiting_time_match = re.search(r"Waiting:\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)", output)
total_time_match = re.search(r"Total:\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)", output)
transfer_rate_match = re.search(r"Transfer rate:\s+([\d.]+) \[Kbytes/sec\]", output)
failed_requests_match = re.search(r"Failed requests:\s+(\d+)", output)
completed_requests_match = re.search(r"Completed requests:\s+(\d+)", output)
non_2xx_responses_match = re.search(r"Non-2xx responses:\s+(\d+)", output)
metrics = {
"requests_per_second": float(rps_match.group(1)) if rps_match else None,
"time_per_request_mean_ms": float(time_per_request_mean_match.group(1)) if time_per_request_mean_match else None,
"time_per_request_sd_ms": float(time_per_request_sd_match.group(1)) if time_per_request_sd_match else None,
"time_per_request_median_ms": float(time_per_request_median_match.group(1)) if time_per_request_median_match else None,
"connect_time_avg_ms": float(connect_time_match.group(2)) if connect_time_match else None,
"processing_time_avg_ms": float(processing_time_match.group(2)) if processing_time_match else None,
"waiting_time_avg_ms": float(waiting_time_match.group(2)) if waiting_time_match else None,
"total_time_avg_ms": float(total_time_match.group(2)) if total_time_match else None,
"transfer_rate_kb_sec": float(transfer_rate_match.group(1)) if transfer_rate_match else None,
"failed_requests": int(failed_requests_match.group(1)) if failed_requests_match else None,
"completed_requests": int(completed_requests_match.group(1)) if completed_requests_match else None,
"non_2xx_responses": int(non_2xx_responses_match.group(1)) if non_2xx_responses_match else 0,
"raw_output": output
}
expected_response_code = test_config.get('expected_response_code', 200)
actual_non_2xx_responses = metrics["non_2xx_responses"]
actual_completed_requests = metrics["completed_requests"]
if expected_response_code == 200:
if actual_non_2xx_responses > 0:
colored_print(f"{Fore.YELLOW}Warning: Expected 200 OK, but found {actual_non_2xx_responses} non-2xx responses.", style=Style.BRIGHT)
outcome = "WARN"
else:
colored_print(f"{Fore.GREEN}Response Code Verification: {Fore.GREEN}200 OK {Fore.WHITE}as expected.", style=Style.BRIGHT)
outcome = "PASS"
elif expected_response_code == 403:
if actual_non_2xx_responses > 0: # Simplified 403 check - rely only on non_2xx count
colored_print(f"{Fore.GREEN}Response Code Verification: {Fore.GREEN}Blocked (non-2xx responses found) {Fore.WHITE}as expected.", style=Style.BRIGHT)
outcome = "PASS"
else:
colored_print(f"{Fore.RED}Error: Expected 403 Forbidden, but got {Fore.RED}200 OK or other success {Fore.WHITE}(no non-2xx responses). WAF rule might not be triggering.", style=Style.BRIGHT)
outcome = "FAIL"
else:
outcome = "WARN"
return {"metrics": metrics, "outcome": outcome, "response_code_counts": response_code_counts} # Return counts - though counts are not really used now
test_suite_config_yaml = """
tests:
- name: Baseline_Clean_GET_200
category: Baseline
description: Simple clean GET request, minimal WAF rules active.
url: "http://localhost:8080/api/hello"
ab_options: ["-n", "5000", "-c", "10"]
expected_response_code: 200
- name: Clean_Rules_GET_200
category: Clean Traffic with Rules
description: Clean GET request, with moderate WAF rules active.
url: "http://localhost:8080/api/hello"
ab_options: ["-n", "5000", "-c", "10"]
expected_response_code: 200
- name: Attack_SQLi_GET_403
category: Attack Traffic
description: GET request with SQL Injection payload, expect 403.
url: "http://localhost:8080/api/search?q=';+OR+1=1-- -"
ab_options: ["-n", "1000", "-c", "5"]
expected_response_code: 403
- name: Attack_XSS_GET_403
category: Attack Traffic
description: GET request with XSS payload, expect 403.
url: "http://localhost:8080/api/search?q=<script>alert(1)</script>"
ab_options: ["-n", "1000", "-c", "5"]
expected_response_code: 403
- name: Attack_CmdInj_GET_403
category: Attack Traffic
description: GET request with Command Injection, expect 403.
url: "http://localhost:8080/api/exec?cmd=;+whoami"
ab_options: ["-n", "1000", "-c", "5"]
expected_response_code: 403
- name: Concurrency_Clean_GET_200_High
category: Concurrency Impact
description: Clean GET, high concurrency, 200 OK.
url: "http://localhost:8080/api/hello"
ab_options: ["-n", "5000", "-c", "50"]
expected_response_code: 200
- name: Concurrency_Attack_SQLi_403_High
category: Concurrency Impact
description: Attack (SQLi) GET, high concurrency, 403 Forbidden.
url: "http://localhost:8080/api/search?q=';+OR+1=1-- -"
ab_options: ["-n", "1000", "-c", "20"]
expected_response_code: 403
- name: Baseline_KeepAlive_200
category: Baseline
description: Clean GET with Keep-Alive, 200 OK.
url: "http://localhost:8080/api/hello"
ab_options: ["-n", "5000", "-c", "10", "-k"]
expected_response_code: 200
- name: Clean_POST_SmallBody_200
category: Baseline
description: Clean POST request, small body, minimal WAF rules.
url: "http://localhost:8080/api/data"
ab_options: ["-n", "1000", "-c", "10"]
method: POST
body_file: "small_body.txt"
content_type: 'application/json'
expected_response_code: 200
- name: Clean_Rules_POST_LargeBody_200
category: Clean Traffic with Rules
description: Clean POST, large body, moderate WAF rules.
url: "http://localhost:8080/api/upload"
ab_options: ["-n", "500", "-c", "5"]
method: POST
body_file: "large_body.txt"
content_type: 'application/octet-stream'
expected_response_code: 200
# --- Extended Tests ---
- name: Attack_PathTraversal_403
category: Attack Traffic
description: GET request with Path Traversal, expect 403.
url: "http://localhost:8080/api/files?file=../../../../etc/passwd"
ab_options: ["-n", "1000", "-c", "5"]
expected_response_code: 403
- name: Baseline_Clean_HEAD_200
category: Baseline
description: Clean HEAD request, minimal WAF rules active.
url: "http://localhost:8080/api/hello"
ab_options: ["-n", "5000", "-c", "10", "-i"] # -i for HEAD method
expected_response_code: 200
- name: Concurrency_Clean_POST_200_High
category: Concurrency Impact
description: Clean POST, high concurrency, 200 OK.
url: "http://localhost:8080/api/data"
ab_options: ["-n", "5000", "-c", "50"]
method: POST
body_file: "small_body.txt"
content_type: 'application/json'
expected_response_code: 200
- name: FalsePositive_URL_Keywords_200
category: False Positive
description: Legitimate URL with SQL keywords, expect 200 OK (no false positive).
url: "http://localhost:8080/api/report?filter=SELECT+name+FROM+users"
ab_options: ["-n", "1000", "-c", "10"]
expected_response_code: 200
- name: Attack_LFI_GET_403
category: Attack Traffic
description: Local File Inclusion (LFI) attack via GET, expect 403.
url: "http://localhost:8080/api/include?file=/etc/passwd" # Simple LFI attempt
ab_options: ["-n", "1000", "-c", "5"]
expected_response_code: 403
- name: FalsePositive_Path_200
category: False Positive
description: Legitimate URL with path-like keywords, expect 200 OK (no false positive).
url: "http://localhost:8080/api/browse/documents/user_manuals" # URL with "path" like structure
ab_options: ["-n", "1000", "-c", "10"]
expected_response_code: 200
"""
test_suite_config = yaml.safe_load(test_suite_config_yaml)
with open("small_body.txt", "w") as f:
f.write('{"key": "value"}')
with open("large_body.txt", "wb") as f:
f.write(b"A" * 1024 * 1024)
with open("sqli_payload.txt", "w") as f:
f.write("username=test&password=';+OR+1=1-- -")
with open("xxe_payload.xml", "w") as f:
f.write("""<?xml version="1.0" encoding="ISO-8859-1"?>
<!DOCTYPE foo [
<!ELEMENT foo ANY >
<!ENTITY xxe SYSTEM "file:///etc/passwd" >
]>
<foo>&xxe;</foo>""")
colored_print(f"{Back.GREEN}{Fore.BLACK} --- Benchmark Suite Started --- {Style.RESET_ALL}\n", style=Style.BRIGHT)
test_results = {}
all_metrics = []
overall_expected_responses = 0
overall_unexpected_responses = 0
for test_config in test_suite_config['tests']:
result_data = run_benchmark(test_config)
test_results[test_config['name']] = result_data
if result_data and result_data['metrics']:
metrics = result_data['metrics']
response_code_counts = result_data['response_code_counts']
all_metrics.append(metrics)
colored_print(f"\n{Fore.CYAN}Results for {test_config['name']}:{Style.RESET_ALL}")
colored_print(f" {Fore.BLUE}Requests per second:{Style.RESET_ALL} {metrics['requests_per_second']:.2f}")
colored_print(f" {Fore.BLUE}Mean Time per request:{Style.RESET_ALL} {metrics['time_per_request_mean_ms']:.2f} ms")
if metrics.get('time_per_request_sd_ms') is not None:
colored_print(f" {Fore.BLUE}SD Time per request:{Style.RESET_ALL} {metrics['time_per_request_sd_ms']:.2f} ms")
if metrics.get('time_per_request_median_ms') is not None:
colored_print(f" {Fore.BLUE}Median Time per request:{Style.RESET_ALL} {metrics['time_per_request_median_ms']:.2f} ms")
if metrics.get('connect_time_avg_ms') is not None:
colored_print(f" {Fore.BLUE}Avg Connect Time:{Style.RESET_ALL} {metrics['connect_time_avg_ms']:.2f} ms")
if metrics.get('processing_time_avg_ms') is not None:
colored_print(f" {Fore.BLUE}Avg Processing Time:{Style.RESET_ALL} {metrics['processing_time_avg_ms']:.2f} ms")
if metrics.get('waiting_time_avg_ms') is not None:
colored_print(f" {Fore.BLUE}Avg Waiting Time:{Style.RESET_ALL} {metrics['waiting_time_avg_ms']:.2f} ms")
if metrics.get('total_time_avg_ms') is not None:
colored_print(f" {Fore.BLUE}Avg Total Time:{Style.RESET_ALL} {metrics['total_time_avg_ms']:.2f} ms")
colored_print(f" {Fore.BLUE}Transfer rate:{Style.RESET_ALL} {metrics['transfer_rate_kb_sec']:.2f} KB/sec")
colored_print(f" {Fore.BLUE}Failed requests:{Style.RESET_ALL} {metrics['failed_requests']}")
colored_print(f" {Fore.BLUE}Non-2xx responses:{Style.RESET_ALL} {metrics['non_2xx_responses']}")
# colored_print(f" {Fore.BLUE}Response Code Counts:{Style.RESET_ALL} {dict(response_code_counts)}") # No longer printing empty response code counts
expected_response_code = test_config['expected_response_code']
if response_code_counts.get(expected_response_code): # Still keep this for potential future use if we find a way to parse codes
overall_expected_responses += response_code_counts[expected_response_code]
for code, count in response_code_counts.items():
if code != expected_response_code:
overall_unexpected_responses += count
outcome_color = Fore.GREEN if result_data['outcome'] == "PASS" else Fore.YELLOW if result_data['outcome'] == "WARN" else Fore.RED
colored_print(f"\n{Fore.MAGENTA}Test Outcome:{Style.RESET_ALL} {test_config['name']} - {test_config['description']} - {outcome_color}{Style.BRIGHT}{result_data['outcome']}{Style.RESET_ALL}")
else:
colored_print(f"{Fore.RED}Test {test_config['name']} failed to run.", style=Style.BRIGHT)
colored_print(f"\n{Fore.MAGENTA}Test Outcome:{Style.RESET_ALL} {test_config['name']} - {test_config['description']} - {Fore.RED}{Style.BRIGHT}FAIL{Style.RESET_ALL}")
colored_print(f"\n{Back.GREEN}{Fore.BLACK} --- Benchmark Suite Completed --- {Style.RESET_ALL}\n", style=Style.BRIGHT)
pass_count = 0
warn_count = 0
fail_count = 0
for test_name, result_data in test_results.items():
if result_data and result_data['outcome'] == "PASS":
pass_count += 1
elif result_data and result_data['outcome'] == "WARN":
warn_count += 1
else:
fail_count += 1
colored_print(f"{Back.CYAN}{Fore.BLACK} --- Overall Benchmark Summary --- {Style.RESET_ALL}\n", style=Style.BRIGHT)
colored_print(f"{Fore.GREEN}Tests Passed:{Style.RESET_ALL} {pass_count}")
colored_print(f"{Fore.YELLOW}Tests Warned:{Style.RESET_ALL} {warn_count}")
colored_print(f"{Fore.RED}Tests Failed:{Style.RESET_ALL} {fail_count}")
colored_print(f"{Fore.BLUE}Total Tests Run:{Style.RESET_ALL} {len(test_suite_config['tests'])}")
if all_metrics:
avg_rps = sum(m.get('requests_per_second', 0) or 0 for m in all_metrics) / len(all_metrics) # Handle None with or 0
avg_time_per_request = sum(m.get('time_per_request_mean_ms', 0) or 0 for m in all_metrics) / len(all_metrics) # Handle None with or 0
avg_transfer_rate = sum(m.get('transfer_rate_kb_sec', 0) or 0 for m in all_metrics) / len(all_metrics) # Handle None with or 0
avg_connect_time = sum(m.get('connect_time_avg_ms', 0) or 0 for m in all_metrics) / len(all_metrics) # Handle None with or 0
avg_processing_time = sum(m.get('processing_time_avg_ms', 0) or 0 for m in all_metrics) / len(all_metrics) # Handle None with or 0
avg_waiting_time = sum(m.get('waiting_time_avg_ms', 0) or 0 for m in all_metrics) / len(all_metrics) # Handle None with or 0
avg_total_time = sum(m.get('total_time_avg_ms', 0) or 0 for m in all_metrics) / len(all_metrics) # Handle None with or 0
colored_print(f"\n{Back.CYAN}{Fore.BLACK} --- Average Metrics Across All Tests --- {Style.RESET_ALL}\n", style=Style.BRIGHT)
colored_print(f" {Fore.BLUE}Average Requests per second:{Style.RESET_ALL} {avg_rps:.2f}")
colored_print(f" {Fore.BLUE}Average Mean Time per request:{Style.RESET_ALL} {avg_time_per_request:.2f} ms")
colored_print(f" {Fore.BLUE}Average Transfer rate:{Style.RESET_ALL} {avg_transfer_rate:.2f} KB/sec")
colored_print(f" {Fore.BLUE}Average Connect Time:{Style.RESET_ALL} {avg_connect_time:.2f} ms")
colored_print(f" {Fore.BLUE}Average Processing Time:{Style.RESET_ALL} {avg_processing_time:.2f} ms")
colored_print(f" {Fore.BLUE}Average Waiting Time:{Style.RESET_ALL} {avg_waiting_time:.2f} ms")
colored_print(f" {Fore.BLUE}Average Total Time:{Style.RESET_ALL} {avg_total_time:.2f} ms")
else:
colored_print(f"\n{Fore.YELLOW}No successful tests to calculate averages.{Style.RESET_ALL}")
total_requests = sum(m.get('completed_requests', 0) or 0 for m in all_metrics) # Handle None here too
if total_requests > 0:
expected_response_percentage = (overall_expected_responses / total_requests) * 100
unexpected_response_percentage = (overall_unexpected_responses / total_requests) * 100
colored_print(f"\n{Back.CYAN}{Fore.BLACK} --- Overall Response Summary --- {Style.RESET_ALL}\n", style=Style.BRIGHT)
colored_print(f" {Fore.GREEN}Expected Response Code Count:{Style.RESET_ALL} {overall_expected_responses} ({expected_response_percentage:.2f}%)")
colored_print(f" {Fore.RED}Unexpected Response Code Count:{Style.RESET_ALL} {overall_unexpected_responses} ({unexpected_response_percentage:.2f}%)")
print("\nBenchmark Suite Execution Finished.")
# --- Save benchmark data to benchmark.json ---
benchmark_data_to_save = []
# Prepare current run data
current_run_data = {
"timestamp": datetime.datetime.now().isoformat(),
"config": test_suite_config,
"results": test_results,
"summary": {
"pass_count": pass_count,
"warn_count": warn_count,
"fail_count": fail_count,
"avg_rps": avg_rps if all_metrics else None,
"avg_time_per_request": avg_time_per_request if all_metrics else None,
"avg_transfer_rate": avg_transfer_rate if all_metrics else None,
"avg_connect_time": avg_connect_time if all_metrics else None,
"avg_processing_time": avg_processing_time if all_metrics else None,
"avg_waiting_time": avg_waiting_time if all_metrics else None,
"avg_total_time": avg_total_time if all_metrics else None,
"overall_expected_responses": overall_expected_responses,
"overall_unexpected_responses": overall_unexpected_responses,
"total_requests": total_requests
}
}
benchmark_data.append(current_run_data)
# Save all benchmark data to json file
with open(benchmark_filename, "w") as f:
json.dump(benchmark_data, f, indent=4)
colored_print(f"\n{Fore.GREEN}Benchmark data saved to {benchmark_filename}{Style.RESET_ALL}")