chore: refractor, change log to output

PKU-ASAL · Jun 18, 2024 · ce4c3d4 · ce4c3d4
1 parent e464dfe
commit ce4c3d4
Show file tree

Hide file tree

Showing 14 changed files with 91 additions and 45 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -27,7 +27,9 @@ jobs:
           # Set this option if you want the action to check for the latest available version that satisfies the version spec.
           # check-latest: # optional
       - name: Install requirements
-        run: pip install -r requirements.txt
+        run: |
+          pip install -r requirements.txt
+          sudo apt update && sudo apt install graphviz
       - name: Cache wabt
         id: cache-wabt
         uses: actions/cache@v4

diff --git a/.gitignore b/.gitignore
@@ -54,6 +54,6 @@ node_modules/
 *.elf
 
 # test files
-test/
-!test/**/*.py
-!test/**/*.wasm
+test/*
+!test/*.py
+!test/*.wasm
diff --git a/README.md b/README.md
@@ -20,6 +20,8 @@ pip install --upgrade pip wheel
 pip install --force-reinstall leb128==1.0.4
 ```
 
+If you need to visualize the graph (`--visualize`), make sure you have installed `graphviz` on your system. You can use `sudo apt install graphviz` to install.
+
 To analyze files written in other programming languages, you must generate the corresponding WASM file in your local environment. This section provides brief instructions on how to compile C/C++ SGX programs into WASM.
 
 
@@ -106,7 +108,7 @@ If we want to execute a program which does not requrie any extra arguments and i
 python launcher.py -f PATH_TO_WASM_BINARY -s
 ```
 
-The corresponding logging and results of feasible paths will be generated in `./log` folder.
+The corresponding logging and results of feasible paths will be generated in `output` folder.
 
 If compilicated arguments are required. For example, a `base64` program whose `main` is like:
 
@@ -192,7 +194,7 @@ Other available programs include `sgxwallet`, `SGXCryptoFile`, `verifiable-elect
 
 ### Output Report
 
-The vulnerability reports will be generated in the directory `log/`. The format of a vulnerability report is as follows:
+The vulnerability reports will be generated in the directory `output/result/PROGRAM_NAME`. The format of a vulnerability report is as follows:
 
 ```shell
 {

diff --git a/eunomia/arch/wasm/emulator.py b/eunomia/arch/wasm/emulator.py
@@ -44,7 +44,7 @@ class bcolors:
     UNDERLINE = '\033[4m'
 # config the logger
 logging_config = {
-    'filename': f'./log/log/{Configuration.get_file_name()}_{Configuration.get_start_time()}.log',
+    'filename': f'./output/log/{Configuration.get_file_name()}_{Configuration.get_start_time()}.log',
     'filemode': 'w+',
     'format': '%(asctime)s | %(levelname)s | %(message)s',
 }
@@ -569,10 +569,10 @@ def calculate_coverage(self, instr, func_name):
             output_string.append(
                 "------------------------------------------\n")
 
-            with open(f'./log/coverage-function/{Configuration.get_file_name()}_{Configuration.get_start_time()}.log', 'w') as fp:
+            with open(f'./output/coverage-function/{Configuration.get_file_name()}_{Configuration.get_start_time()}.log', 'w') as fp:
                 fp.writelines(output_string)
 
-            with open(f'./log/coverage-instruction/{Configuration.get_file_name()}_{Configuration.get_start_time()}.log', 'a') as fp:
+            with open(f'./output/coverage-instruction/{Configuration.get_file_name()}_{Configuration.get_start_time()}.log', 'a') as fp:
                 fp.write(
                     f'{current_timestamp}\t\t{current_visited_instrs:<6}/{self.total_instructions:<6} ({current_visited_instrs/self.total_instructions*100:.3f}%)\n')
 

diff --git a/eunomia/arch/wasm/pathgraph.py b/eunomia/arch/wasm/pathgraph.py
@@ -30,7 +30,7 @@ class bcolors:
 
 # config the logger
 logging_config = {
-    'filename': f'./log/log/{Configuration.get_file_name()}_{Configuration.get_start_time()}.log',
+    'filename': f'./output/log/{Configuration.get_file_name()}_{Configuration.get_start_time()}.log',
     'filemode': 'w+',
     'format': '%(asctime)s | %(levelname)s | %(message)s',
 }

diff --git a/eunomia/arch/wasm/utils.py b/eunomia/arch/wasm/utils.py
@@ -144,7 +144,7 @@ def str_to_little_endian_int(string):
 
 def write_result(state, exit=False):
     """
-    Write result in ./log/result folder in json format
+    Write result in ./output/result folder in json format
     """
     # if it is existed, and the stderr has no output
     # it means that it is raised by ProcFailTermination
@@ -153,7 +153,7 @@ def write_result(state, exit=False):
     if exit and not state.file_sys[2]['content']:
         return
 
-    file_name = f"./log/result/{Configuration.get_file_name()}_{Configuration.get_start_time()}/state_{datetime.timestamp(datetime.now()):.3f}.json"
+    file_name = f"./output/result/{Configuration.get_file_name()}_{Configuration.get_start_time()}/state_{datetime.timestamp(datetime.now()):.3f}.json"
     makedirs(path.dirname(file_name), exist_ok=True)
     state_result = {}
     with open(file_name, 'w') as fp:
@@ -235,14 +235,14 @@ def write_result(state, exit=False):
 
 def write_vulnerabilities(state, vtype):
     """
-    Write result in ./log/result folder in json format
+    Write result in ./output/result folder in json format
     """
     global vulnerability_list
     if (state.current_func_name, state.instr.offset) in vulnerability_list:
         return
     else:
         vulnerability_list.append((state.current_func_name, state.instr.offset))
-    file_name = f"./log/result/{Configuration.get_file_name()}_{Configuration.get_start_time()}/bug_state_{datetime.timestamp(datetime.now()):.3f}.json"
+    file_name = f"./output/result/{Configuration.get_file_name()}_{Configuration.get_start_time()}/bug_state_{datetime.timestamp(datetime.now()):.3f}.json"
     makedirs(path.dirname(file_name), exist_ok=True)
     state_result = {}
     with open(file_name, 'w') as fp:

diff --git a/main.py b/main.py
@@ -2,24 +2,32 @@
 # -*- coding: utf-8 -*-
 
 import argparse
-import json
-import sys
 from datetime import datetime
+import json
 from os import makedirs, path
-
+import resource
 import sh
-
-from seewasm.arch.wasm.configuration import Configuration
+import sys
 
 def do_symgx(args):
+    from eunomia.arch.wasm.configuration import Configuration
+    Configuration.set_start_time(datetime.now().strftime("%Y%m%d%H%M%S"))
     from SymGX import SymGX
+
+    Configuration.set_file(args.file.name)
+
     # ecall_list must be specified
     if not args.ecall_list:
-        print("Error: --symgx requires --ecall-list")
+        print("Error: --symgx requires --ecall-list", file=sys.stderr)
         exit(1)
     SymGX(args)
 
 def do_normal(args):
+    from seewasm.arch.wasm.configuration import Configuration
+    Configuration.set_start_time(datetime.now().strftime("%Y%m%d%H%M%S"))
+
+    print(args)
+
     module_bytecode = args.file.read()
     # create the corresponding wat file
     wat_file_path = args.file.name.replace('.wasm', '.wat')
@@ -31,6 +39,7 @@ def do_normal(args):
     # conduct symbolic execution
     if args.symbolic:
         Configuration.set_verbose_flag(args.verbose)
+        Configuration.set_file(args.file.name)
         Configuration.set_entry(args.entry)
         Configuration.set_visualize(args.visualize)
         Configuration.set_source_type(args.source_type)
@@ -39,7 +48,7 @@ def do_normal(args):
         Configuration.set_incremental_solving(args.incremental)
         Configuration.set_elem_index_to_func(wat_file_path)
 
-        command_file_name = f"./log/result/{Configuration.get_file_name()}_{Configuration.get_start_time()}/command.json"
+        command_file_name = f"./output/result/{Configuration.get_file_name()}_{Configuration.get_start_time()}/command.json"
         makedirs(path.dirname(command_file_name), exist_ok=True)
         with open(command_file_name, 'w') as fp:
             json.dump({"Command": " ".join(sys.argv)}, fp, indent=4)
@@ -54,17 +63,17 @@ def do_normal(args):
         # import necessary part
         from seewasm.arch.wasm.emulator import WasmSSAEmulatorEngine
         from seewasm.arch.wasm.graph import Graph
-        from seewasm.arch.wasm.visualizator import visualize
 
         wasmVM = WasmSSAEmulatorEngine(module_bytecode)
         # run the emulator for SSA
         Graph.wasmVM = wasmVM
         Graph.initialize()
         # draw the ICFG on basic block level, and exit
         if Configuration.get_visualize():
+            from seewasm.arch.wasm.visualizator import visualize
             # draw here
-            visualize(Graph)
-
+            graph_path = path.join("output", "visualized_graph", f"{Configuration.get_file_name()}_{Configuration.get_start_time()}.gv")
+            visualize(Graph, graph_path)
             print(f"The visualization of ICFG is done.")
             return
 
@@ -128,6 +137,9 @@ def parse():
     analyze.add_argument(
         '--max-time', action='store', type=int,
         help='maximum time in seconds')
+    analyze.add_argument(
+        '--max-memory', action='store', type=int,
+        help='maximum memory in MB')
 
     symgx = parser.add_argument_group('Symgx')
     symgx.add_argument('--symgx', action='store_true', help='enable the branch of symgx', default=False)
@@ -139,11 +151,14 @@ def parse():
 
 def main():
     args = parse()
+
+    if args.max_memory:
+        resource.setrlimit(resource.RLIMIT_AS, (args.max_memory * 1024 * 1024, args.max_memory * 1024 * 1024))
+        print(f"Memory limit set to {args.max_memory} MB", flush=True)
+
     job_start_time = datetime.now()
     current_time_start = job_start_time.strftime("%Y-%m-%d %H:%M:%S")
     print(f"Start to analyze: {current_time_start}", flush=True)
-    Configuration.set_file(args.file.name)
-    Configuration.set_start_time(job_start_time.strftime("%Y%m%d%H%M%S"))
     print(f"Running...", flush=True)
 
     if args.symgx:

diff --git a/output/log/.placeholder b/output/log/.placeholder
diff --git a/output/result/.placeholder b/output/result/.placeholder
diff --git a/requirements.txt b/requirements.txt
@@ -8,5 +8,4 @@ pytest-parallel==0.1.1
 pytest-cov==4.0.0
 sh==1.14.2
 z3-solver==4.10.2.0
-wasm==1.2
 wllvm==1.3.1
diff --git a/seewasm/arch/wasm/emulator.py b/seewasm/arch/wasm/emulator.py
@@ -34,7 +34,7 @@
 
 # config the logger
 logging_config = {
-    'filename': f'./log/log/{Configuration.get_file_name()}_{Configuration.get_start_time()}.log',
+    'filename': f'./output/log/{Configuration.get_file_name()}_{Configuration.get_start_time()}.log',
     'filemode': 'w+',
     'format': '%(asctime)s | %(levelname)s | %(message)s',
 }
@@ -46,6 +46,12 @@
     logging_config['level'] = logging.WARNING
 logging.basicConfig(**logging_config)
 
+# add console logger (warning level)
+console = logging.StreamHandler()
+console.setLevel(logging.WARNING)
+formatter = logging.Formatter('%(levelname)s: %(message)s')
+console.setFormatter(formatter)
+logging.getLogger('').addHandler(console)
 
 # =======================================
 # #         WASM Emulator               #
@@ -345,7 +351,7 @@ def emulate_basic_block(self, states, instructions, lvar=None):
                 logging.debug("got 'return' instruction, now return")
                 break
             if instruction.name == "unreachable":
-                logging.debug("got 'unreachable' instruction, now terminate")
+                logging.warn("got 'unreachable' instruction, now terminate")
                 raise ProcFailTermination(ASSERT_FAIL)
             next_states = []
             for state in states:  # TODO: embarassing parallel

diff --git a/seewasm/arch/wasm/utils.py b/seewasm/arch/wasm/utils.py
@@ -144,7 +144,7 @@ def str_to_little_endian_int(string):
 
 def write_result(state, exit=False):
     """
-    Write result in ./log/result folder in json format
+    Write result in ./output/result folder in json format
     """
     # if it is existed, and the stderr has no output
     # it means that it is raised by ProcFailTermination
@@ -157,7 +157,7 @@ def write_result(state, exit=False):
     if unsat == state.solver.check():
         return
 
-    file_name = f"./log/result/{Configuration.get_file_name()}_{Configuration.get_start_time()}/state_{datetime.timestamp(datetime.now()):.3f}_{random():.5f}.json"
+    file_name = f"./output/result/{Configuration.get_file_name()}_{Configuration.get_start_time()}/state_{datetime.timestamp(datetime.now()):.3f}_{random():.5f}.json"
     makedirs(path.dirname(file_name), exist_ok=True)
     state_result = {}
     with open(file_name, 'w') as fp:

diff --git a/seewasm/arch/wasm/visualizator.py b/seewasm/arch/wasm/visualizator.py
@@ -35,5 +35,5 @@ def visualize(Graph, filename="wasm_ICFG.gv"):
             c.node(node_to)
             c.edge(node_from, node_to)
 
-    print("Rendering...")
-    g.render(filename, view=True)
+    print("Rendering disabled on the server.")
+    g.render(filename, view=False)
diff --git a/test/test.py b/test/test.py
@@ -1,9 +1,14 @@
+import json
 import glob
 import os
 import pytest
+import resource
 import subprocess
 import sys
 
+# Set a memory limit of 4GB
+resource.setrlimit(resource.RLIMIT_AS, (4 * 1024 * 1024 * 1024, -1))
+
 @pytest.mark.parametrize('wasm_name', [
     'sgx-dnet',
     'sgxwallet',
@@ -22,13 +27,26 @@
 ])
 
 def test_sgx_wasm_can_be_analyzed(wasm_name):
-    cmd = ['/usr/bin/env', 'bash', 'run.sh', wasm_name, '--max-time']
-    cmd.append("5")
+    cmd = ['/usr/bin/env', 'bash', 'run.sh', wasm_name, '--max-time', '5']
     subprocess.run(cmd, timeout=60, check=True)
 
 def test_sgx_wasm_can_be_fully_analyzed():
     cmd = ['/usr/bin/env', 'bash', 'run.sh', 'SGXCryptoFile']
     subprocess.run(cmd, timeout=30, check=True)
+    result_dir = glob.glob('./output/result/sgxcrypto_*')
+    # sort and use last one
+    result_dir.sort()
+    result_dir = result_dir[-1]
+    state_path = glob.glob(f'{result_dir}/bug_state*.json')
+    assert len(state_path) == 2, 'should have two bug states'
+
+def test_ecall_list_must_be_specified():
+    cmd = [sys.executable, 'main.py', '-f', 'benchmarks/sgxcrypto.wasm', '--symgx']
+    proc = subprocess.run(cmd, timeout=5, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    # return code should be 1
+    assert proc.returncode == 1, 'return code should be 1'
+    # "--symgx requires --ecall-list" msg should be in stderr
+    assert '--symgx requires --ecall-list' in proc.stderr.decode('utf-8'), 'should have --symgx requires --ecall-list in stderr'
 
 
 @pytest.mark.parametrize('wasm_path, entry', [
@@ -50,29 +68,33 @@ def test_return_simulation():
     cmd = [sys.executable, 'main.py', '-f', wasm_path, '-s', '-v', 'info', '--source_type', 'rust']
     subprocess.run(cmd, timeout=30, check=True)
 
-    result_dir = glob.glob('./log/result/test_return_*')
+    result_dir = glob.glob('./output/result/test_return_*')
     assert len(result_dir) == 1, 'more than one matching results, do you have multiple `test_return*` cases?'
     result_dir = result_dir[0]
     state_path = glob.glob(f'{result_dir}/state*.json')
     assert len(state_path) == 1, 'should have only one state output `Exit 0`'
 
-    proc = subprocess.run(['jq', '.Solution.proc_exit', state_path[0]], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
-    out = proc.stdout.decode('utf-8').strip()
-    expect = '"\\u0000"'
-    assert out == expect, f'expect {expect}, got {out}'
+    with open(state_path[0], 'r') as f:
+        state = json.load(f)
+    assert state['Solution']['proc_exit'] == "\u0000", f'exit code should be 0, got {state["Solution"]["proc_exit"]}'
 
 def test_unreachable_simulation():
     wasm_path = './test/test_unreachable.wasm'
     cmd = [sys.executable, 'main.py', '-f', wasm_path, '-s', '-v', 'info', '--source_type', 'rust']
     subprocess.run(cmd, timeout=30, check=True)
 
-    result_dir = glob.glob('./log/result/test_unreachable_*')
+    result_dir = glob.glob('./output/result/test_unreachable_*')
     assert len(result_dir) == 1, 'more than one matching results, do you have multiple `test_unreachable*` cases?'
     result_dir = result_dir[0]
     state_path = glob.glob(f'{result_dir}/state*.json')
     assert len(state_path) == 1, 'should have only one state output `null`'
+    with open(state_path[0], 'r') as f:
+        state = json.load(f)
+    assert state['Solution'] == {}, f'should have no solution, got {state["Solution"]}'
 
-    proc = subprocess.run(['jq', '.Solution.proc_exit', state_path[0]], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
-    out = proc.stdout.decode('utf-8').strip()
-    expect = 'null'
-    assert out == expect, f'expect {expect}, got {out}'
+def test_visualize_graph():
+    wasm_path = './test/hello_world.wasm'
+    cmd = [sys.executable, 'main.py', '-f', wasm_path, '-s', '-v', 'info', '--visualize']
+    subprocess.run(cmd, timeout=30, check=True)
+    result_dir = glob.glob('./output/visualized_graph/hello_world*.pdf')
+    assert len(result_dir) == 1, 'more than one matching results, do you have multiple `hello_world*` cases?'