update script and add test input and output

alpaka-group · Jul 19, 2024 · 43f9381 · 43f9381
1 parent 07ac9ff
commit 43f9381
Show file tree

Hide file tree

Showing 3 changed files with 1,919 additions and 35 deletions.
diff --git a/script/benchmark/benchmarkXml2Json.py b/script/benchmark/benchmarkXml2Json.py
@@ -1,65 +1,141 @@
-import xml.etree.ElementTree as ET
+#!/usr/bin/env python3
+
+import os
+import sys
 import json
+import hashlib
+import xml.etree.ElementTree as ET
 import html
-import sys
-import os
-import re
 
+# Function to parse the content of a <Warning> node into a dictionary
+# This is needed because Catch2 INFO macro is not printed if the test case passes, only WARNING is printed
 def parse_warning_node(warning_text):
     warning_dict = {}
     for line in warning_text.strip().split('\n'):
         if ':' in line:
             field_name, value = line.split(':', 1)
-            warning_dict[field_name.strip()] = convert_to_number(value.strip())
+            warning_dict[field_name.strip()] = value.strip()
     return warning_dict
 
-def convert_to_number(value):
-    # Check if the value is an integer
-    if re.match(r'^-?\d+$', value):
-        return int(value)
-    # Check if the value is a real number
-    elif re.match(r'^-?\d*\.\d+$', value):
-        return float(value)
-    # Return the value as a string if it's neither an integer nor a real number
-    return value
-
+# Function to recursively convert XML elements into a dictionary
 def xml_to_dict(element):
     node_dict = {}
-    # Directly add attributes without '@'
+    # Add XML attributes directly to the dictionary
     for key, value in element.attrib.items():
-        node_dict[key] = convert_to_number(value)
+        node_dict[key] = html.unescape(value)
 
-    if element.text and element.text.strip():
-        node_dict[element.tag] = convert_to_number(html.unescape(element.text.strip()))
+    # Add text content of the element if it exists
+    text = element.text.strip() if element.text else ''
+    if text:
+        node_dict['text'] = html.unescape(text)
 
     special_tags = {'mean', 'standardDeviation', 'outliers'}
 
+    # Process each child element recursively
     for child in list(element):
         child_dict = xml_to_dict(child)
         if child.tag == 'Warning':
             warning_dict = parse_warning_node(child.text)
             node_dict['MetaBenchmarkTestData'] = warning_dict
         elif child.tag in special_tags:
-            node_dict[child.tag] = child_dict[child.tag]
+            if child.tag not in node_dict:
+                node_dict[child.tag] = {}
+            node_dict[child.tag].update(child_dict)
         else:
             if child.tag not in node_dict:
                 node_dict[child.tag] = []
             node_dict[child.tag].append(child_dict)
 
-    return {element.tag: node_dict}
+    # Convert single-item lists to single objects
+    for key, value in node_dict.items():
+        if isinstance(value, list) and len(value) == 1:
+            node_dict[key] = value[0]
 
-def consolidate_dict(d):
-    if isinstance(d, dict):
-        for key in d:
-            if isinstance(d[key], list) and len(d[key]) == 1:
-                d[key] = d[key][0]
-            consolidate_dict(d[key])
+    return node_dict
 
+# Function to convert the XML string into a JSON-compatible dictionary
 def xml_to_json(xml_string):
     root = ET.fromstring(xml_string)
-    xml_dict = xml_to_dict(root)
-    consolidate_dict(xml_dict)
-    return json.dumps(xml_dict, indent=4)
+    root_dict = xml_to_dict(root)
+
+    # Extract and structure TestCase elements
+    test_cases = root_dict.pop('TestCase', [])
+
+    if not isinstance(test_cases, list):
+        test_cases = [test_cases]
+
+    # Create the final dictionary with the root tag and TestCase entries
+    final_dict = {
+        root.tag: root_dict,
+        'TestCase': test_cases
+    }
+
+    return final_dict
+
+# Function to transform the JSON data into the desired format
+def transform(json_data):
+    output_object = []
+
+    if 'Catch2TestRun' not in json_data:
+        print("Error: 'Catch2TestRun' not found in input JSON")
+        return []
+
+    # Extract general metadata
+    o1 = json_data['Catch2TestRun']
+
+    # Create metadata and environment subobjects
+    meta = {
+        'creator': "unknown",
+        'datetime': "unknown",
+        'hostname': "unknown",
+        'moreinterestingmetadata': "unknown"
+    }
+
+    env = {
+        'type': 'Catch2TestRun',
+        'kind': o1.get('name', 'unknown'),
+        'rng-seed': o1.get('rng-seed', 'unknown'),
+        'catch2-version': o1.get('catch2-version', 'unknown'),
+        'OverallResults': o1.get('OverallResults', {}),
+        'OverallResultsCases': o1.get('OverallResultsCases', {})
+    }
+
+    # Process each TestCase if it exists
+    test_cases = json_data.get('TestCase', [])
+    if not isinstance(test_cases, list):
+        test_cases = [test_cases]
+
+    for tc in test_cases:
+        # Ensure BenchmarkResults is a list
+        benchmark_results = tc.get('BenchmarkResults', [])
+        if not isinstance(benchmark_results, list):
+            benchmark_results = [benchmark_results]
+
+        for m in benchmark_results:
+            out = {}
+            out['id'] = ""
+            out['meta'] = meta
+            out['env'] = env
+            out['TestCase'] = {
+                'name': tc.get('name', 'unknown'),
+                'tags': tc.get('tags', 'unknown'),
+                'filename': tc.get('filename', 'unknown'),
+                'line': tc.get('line', 'unknown'),
+                'OverallResult': tc.get('OverallResult', {}),
+                'MetaBenchmarkTestData': tc.get('MetaBenchmarkTestData', {})
+            }
+
+            # Add benchmark result details
+            for k in m:
+                out[k] = m[k]
+
+            # Insert md5sum as unique id of the dataset
+            hash = hashlib.md5(json.dumps(out, sort_keys=True).encode('utf-8')).hexdigest()
+            out['id'] = hash
+
+            output_object.append(out)
+
+    return output_object
 
 if __name__ == "__main__":
     if len(sys.argv) != 3:
@@ -87,11 +163,14 @@ def xml_to_json(xml_string):
         xml_content = xml_file.read()
 
     # Convert XML to JSON
-    json_output = xml_to_json(xml_content)
+    json_data = xml_to_json(xml_content)
+
+    # Transform JSON to desired format
+    transformed_data = transform(json_data)
 
-    # Save the JSON output to the output file
-    with open(output_file, 'w') as json_file:
-        json_file.write(json_output)
+    # Save the transformed JSON output to the output file
+    with open(output_file, 'w', encoding='utf-8') as json_file:
+        json.dump(transformed_data, json_file, indent=4)
 
-    print(f"XML content has been converted to JSON and saved as '{output_file}'")
+    print(f"XML content has been converted to JSON and transformed, saved as '{output_file}'")