Skip to content

Commit

Permalink
update script and add test input and output
Browse files Browse the repository at this point in the history
  • Loading branch information
mehmetyusufoglu committed Jul 19, 2024
1 parent 07ac9ff commit 43f9381
Show file tree
Hide file tree
Showing 3 changed files with 1,919 additions and 35 deletions.
149 changes: 114 additions & 35 deletions script/benchmark/benchmarkXml2Json.py
Original file line number Diff line number Diff line change
@@ -1,65 +1,141 @@
import xml.etree.ElementTree as ET
#!/usr/bin/env python3

import os
import sys
import json
import hashlib
import xml.etree.ElementTree as ET
import html
import sys
import os
import re

# Function to parse the content of a <Warning> node into a dictionary
# This is needed because Catch2 INFO macro is not printed if the test case passes, only WARNING is printed
def parse_warning_node(warning_text):
warning_dict = {}
for line in warning_text.strip().split('\n'):
if ':' in line:
field_name, value = line.split(':', 1)
warning_dict[field_name.strip()] = convert_to_number(value.strip())
warning_dict[field_name.strip()] = value.strip()
return warning_dict

def convert_to_number(value):
# Check if the value is an integer
if re.match(r'^-?\d+$', value):
return int(value)
# Check if the value is a real number
elif re.match(r'^-?\d*\.\d+$', value):
return float(value)
# Return the value as a string if it's neither an integer nor a real number
return value

# Function to recursively convert XML elements into a dictionary
def xml_to_dict(element):
node_dict = {}
# Directly add attributes without '@'
# Add XML attributes directly to the dictionary
for key, value in element.attrib.items():
node_dict[key] = convert_to_number(value)
node_dict[key] = html.unescape(value)

if element.text and element.text.strip():
node_dict[element.tag] = convert_to_number(html.unescape(element.text.strip()))
# Add text content of the element if it exists
text = element.text.strip() if element.text else ''
if text:
node_dict['text'] = html.unescape(text)

special_tags = {'mean', 'standardDeviation', 'outliers'}

# Process each child element recursively
for child in list(element):
child_dict = xml_to_dict(child)
if child.tag == 'Warning':
warning_dict = parse_warning_node(child.text)
node_dict['MetaBenchmarkTestData'] = warning_dict
elif child.tag in special_tags:
node_dict[child.tag] = child_dict[child.tag]
if child.tag not in node_dict:
node_dict[child.tag] = {}
node_dict[child.tag].update(child_dict)
else:
if child.tag not in node_dict:
node_dict[child.tag] = []
node_dict[child.tag].append(child_dict)

return {element.tag: node_dict}
# Convert single-item lists to single objects
for key, value in node_dict.items():
if isinstance(value, list) and len(value) == 1:
node_dict[key] = value[0]

def consolidate_dict(d):
if isinstance(d, dict):
for key in d:
if isinstance(d[key], list) and len(d[key]) == 1:
d[key] = d[key][0]
consolidate_dict(d[key])
return node_dict

# Function to convert the XML string into a JSON-compatible dictionary
def xml_to_json(xml_string):
root = ET.fromstring(xml_string)
xml_dict = xml_to_dict(root)
consolidate_dict(xml_dict)
return json.dumps(xml_dict, indent=4)
root_dict = xml_to_dict(root)

# Extract and structure TestCase elements
test_cases = root_dict.pop('TestCase', [])

if not isinstance(test_cases, list):
test_cases = [test_cases]

# Create the final dictionary with the root tag and TestCase entries
final_dict = {
root.tag: root_dict,
'TestCase': test_cases
}

return final_dict

# Function to transform the JSON data into the desired format
def transform(json_data):
output_object = []

if 'Catch2TestRun' not in json_data:
print("Error: 'Catch2TestRun' not found in input JSON")
return []

# Extract general metadata
o1 = json_data['Catch2TestRun']

# Create metadata and environment subobjects
meta = {
'creator': "unknown",
'datetime': "unknown",
'hostname': "unknown",
'moreinterestingmetadata': "unknown"
}

env = {
'type': 'Catch2TestRun',
'kind': o1.get('name', 'unknown'),
'rng-seed': o1.get('rng-seed', 'unknown'),
'catch2-version': o1.get('catch2-version', 'unknown'),
'OverallResults': o1.get('OverallResults', {}),
'OverallResultsCases': o1.get('OverallResultsCases', {})
}

# Process each TestCase if it exists
test_cases = json_data.get('TestCase', [])
if not isinstance(test_cases, list):
test_cases = [test_cases]

for tc in test_cases:
# Ensure BenchmarkResults is a list
benchmark_results = tc.get('BenchmarkResults', [])
if not isinstance(benchmark_results, list):
benchmark_results = [benchmark_results]

for m in benchmark_results:
out = {}
out['id'] = ""
out['meta'] = meta
out['env'] = env
out['TestCase'] = {
'name': tc.get('name', 'unknown'),
'tags': tc.get('tags', 'unknown'),
'filename': tc.get('filename', 'unknown'),
'line': tc.get('line', 'unknown'),
'OverallResult': tc.get('OverallResult', {}),
'MetaBenchmarkTestData': tc.get('MetaBenchmarkTestData', {})
}

# Add benchmark result details
for k in m:
out[k] = m[k]

# Insert md5sum as unique id of the dataset
hash = hashlib.md5(json.dumps(out, sort_keys=True).encode('utf-8')).hexdigest()
out['id'] = hash

output_object.append(out)

return output_object

if __name__ == "__main__":
if len(sys.argv) != 3:
Expand Down Expand Up @@ -87,11 +163,14 @@ def xml_to_json(xml_string):
xml_content = xml_file.read()

# Convert XML to JSON
json_output = xml_to_json(xml_content)
json_data = xml_to_json(xml_content)

# Transform JSON to desired format
transformed_data = transform(json_data)

# Save the JSON output to the output file
with open(output_file, 'w') as json_file:
json_file.write(json_output)
# Save the transformed JSON output to the output file
with open(output_file, 'w', encoding='utf-8') as json_file:
json.dump(transformed_data, json_file, indent=4)

print(f"XML content has been converted to JSON and saved as '{output_file}'")
print(f"XML content has been converted to JSON and transformed, saved as '{output_file}'")

Loading

0 comments on commit 43f9381

Please sign in to comment.