Skip to content

Commit b6d7a9c

Browse files
Reapply "Add GreenBench implementation (#1912)" (#1931) (#1961)
This reverts commit dc03c08.
1 parent 4a47f4b commit b6d7a9c

11 files changed

+160
-6
lines changed

common/experiment_utils.py

+16
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,17 @@ def get_custom_seed_corpora_filestore_path():
9797
'custom_seed_corpora')
9898

9999

100+
def get_oss_fuzz_corpora_unarchived_path():
101+
"""Returns path containing the user-provided seed corpora."""
102+
return posixpath.join(get_experiment_filestore_path(),
103+
'oss_fuzz_unarchived')
104+
105+
106+
def get_random_corpora_filestore_path():
107+
"""Returns path containing seed corpora for the target fuzzing experiment.""" # pylint: disable=line-too-long
108+
return posixpath.join(get_experiment_filestore_path(), 'random_corpora')
109+
110+
100111
def get_dispatcher_instance_name(experiment: str) -> str:
101112
"""Returns a dispatcher instance name for an experiment."""
102113
return f'd-{experiment}'
@@ -138,6 +149,11 @@ def is_local_experiment():
138149
return bool(environment.get('LOCAL_EXPERIMENT'))
139150

140151

152+
def is_micro_experiment():
153+
"""Returns True if running a micro experiment."""
154+
return bool(environment.get('MICRO_EXPERIMENT'))
155+
156+
141157
def get_trial_dir(fuzzer, benchmark, trial_id):
142158
"""Returns the unique directory for |fuzzer|, |benchmark|, and
143159
|trial_id|."""

common/random_corpus_fuzzing_utils.py

+101
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
"""Utility functions for micro-experiment run."""
15+
16+
import random
17+
import os
18+
import tempfile
19+
import multiprocessing
20+
import zipfile
21+
from typing import List
22+
23+
from common import experiment_utils
24+
from common import filesystem
25+
from common import logs
26+
27+
MAX_SOURCE_CORPUS_FILES = 1
28+
CORPUS_ELEMENT_BYTES_LIMIT = 1 * 1024 * 1024
29+
30+
31+
def initialize_random_corpus_fuzzing(benchmarks: List[str], num_trials: int):
32+
"""Prepare corpus for micro experiment."""
33+
pool_args = ()
34+
with multiprocessing.Pool(*pool_args) as pool:
35+
pool.starmap(prepare_benchmark_random_corpus,
36+
[(benchmark, num_trials) for benchmark in benchmarks])
37+
logs.info('Done preparing corpus for micro experiment')
38+
39+
40+
# pylint: disable=too-many-locals
41+
def prepare_benchmark_random_corpus(benchmark: str, num_trials: int):
42+
"""Prepare corpus for given benchmark."""
43+
# Temporary location to park corpus files before get picked randomly.
44+
benchmark_unarchived_corpora = os.path.join(
45+
experiment_utils.get_oss_fuzz_corpora_unarchived_path(), benchmark)
46+
filesystem.create_directory(benchmark_unarchived_corpora)
47+
48+
# Unzip oss fuzz corpus.
49+
corpus_archive_filename = f'{benchmark}.zip'
50+
oss_fuzz_corpus_archive_path = os.path.join(
51+
experiment_utils.get_oss_fuzz_corpora_filestore_path(),
52+
corpus_archive_filename)
53+
with zipfile.ZipFile(oss_fuzz_corpus_archive_path) as zip_file:
54+
idx = 0
55+
for seed_corpus_file in zip_file.infolist():
56+
if seed_corpus_file.filename.endswith('/'):
57+
# Ignore directories.
58+
continue
59+
# Allow callers to opt-out of unpacking large files.
60+
if seed_corpus_file.file_size > CORPUS_ELEMENT_BYTES_LIMIT:
61+
continue
62+
output_filename = f'{idx:016d}'
63+
output_file_path = os.path.join(benchmark_unarchived_corpora,
64+
output_filename)
65+
zip_file.extract(seed_corpus_file, output_file_path)
66+
idx += 1
67+
68+
# Path used to store and feed seed corpus for benchmark runner
69+
# each trial group will have the same seed input(s).
70+
benchmark_random_corpora = os.path.join(
71+
experiment_utils.get_random_corpora_filestore_path(), benchmark)
72+
filesystem.create_directory(benchmark_random_corpora)
73+
74+
with tempfile.TemporaryDirectory() as tmp_dir:
75+
all_corpus_files = []
76+
for root, _, files in os.walk(benchmark_unarchived_corpora):
77+
for filename in files:
78+
file_path = os.path.join(root, filename)
79+
all_corpus_files.append(file_path)
80+
81+
all_corpus_files.sort()
82+
trial_group_num = 0
83+
# All trials in the same group will start with the same
84+
# set of randomly selected seed files.
85+
while trial_group_num < num_trials:
86+
trial_group_subdir = f'trial-group-{trial_group_num}'
87+
custom_corpus_trial_dir = os.path.join(benchmark_random_corpora,
88+
trial_group_subdir)
89+
src_dir = os.path.join(tmp_dir, 'source')
90+
filesystem.recreate_directory(src_dir)
91+
92+
source_files = random.sample(all_corpus_files,
93+
MAX_SOURCE_CORPUS_FILES)
94+
for file in source_files:
95+
filesystem.copy(file, src_dir)
96+
97+
# Copy only the src directory.
98+
filesystem.copytree(src_dir, custom_corpus_trial_dir)
99+
trial_group_num += 1
100+
101+
return []

database/models.py

+1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ class Trial(Base):
5454
# Columns used for preemptible experiments.
5555
preemptible = Column(Boolean, default=False, nullable=False)
5656
preempted = Column(Boolean, default=False, nullable=False)
57+
trial_group_num = Column(Integer, nullable=True)
5758

5859
# Every trial has snapshots which is basically the saved state of that trial
5960
# at a given time. The snapshots field here and the trial field on Snapshot,

experiment/dispatcher.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import time
2525
from typing import List
2626

27+
from common import random_corpus_fuzzing_utils
2728
from common import experiment_path as exp_path
2829
from common import experiment_utils
2930
from common import logs
@@ -89,7 +90,7 @@ def _initialize_trials_in_db(trials: List[models.Trial]):
8990
db_utils.bulk_save(trials)
9091

9192

92-
class Experiment:
93+
class Experiment: # pylint: disable=too-many-instance-attributes
9394
"""Class representing an experiment."""
9495

9596
def __init__(self, experiment_config_filepath: str):
@@ -101,6 +102,7 @@ def __init__(self, experiment_config_filepath: str):
101102
self.experiment_name = self.config['experiment']
102103
self.git_hash = self.config['git_hash']
103104
self.preemptible = self.config.get('preemptible_runners')
105+
self.micro_experiment = self.config.get('micro_experiment')
104106

105107

106108
def build_images_for_trials(fuzzers: List[str], benchmarks: List[str],
@@ -123,7 +125,8 @@ def build_images_for_trials(fuzzers: List[str], benchmarks: List[str],
123125
models.Trial(fuzzer=fuzzer,
124126
experiment=experiment_name,
125127
benchmark=benchmark,
126-
preemptible=preemptible) for _ in range(num_trials)
128+
preemptible=preemptible,
129+
trial_group_num=trial) for trial in range(num_trials)
127130
]
128131
trials.extend(fuzzer_benchmark_trials)
129132
return trials
@@ -150,6 +153,10 @@ def dispatcher_main():
150153
experiment.preemptible)
151154
_initialize_trials_in_db(trials)
152155

156+
if experiment.micro_experiment:
157+
random_corpus_fuzzing_utils.initialize_random_corpus_fuzzing(
158+
experiment.benchmarks, experiment.num_trials)
159+
153160
create_work_subdirs(['experiment-folders', 'measurement-folders'])
154161

155162
# Start measurer and scheduler in seperate threads/processes.

experiment/resources/runner-startup-script-template.sh

+2
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ docker run \
4242
-e BENCHMARK={{benchmark}} \
4343
-e EXPERIMENT={{experiment}} \
4444
-e TRIAL_ID={{trial_id}} \
45+
-e TRIAL_GROUP_NUM={{trial_group_num}} \
46+
-e MICRO_EXPERIMENT={{micro_experiment}} \
4547
-e MAX_TOTAL_TIME={{max_total_time}} \
4648
-e SNAPSHOT_PERIOD={{snapshot_period}} \
4749
-e NO_SEEDS={{no_seeds}} \

experiment/run_experiment.py

+3
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ def _set_default_config_values(config: Dict[str, Union[int, str, bool]],
7474
config['snapshot_period'] = config.get(
7575
'snapshot_period', experiment_utils.DEFAULT_SNAPSHOT_SECONDS)
7676
config['private'] = config.get('private', False)
77+
config['micro_experiment'] = config.get('micro_experiment', False)
7778

7879

7980
def _validate_config_parameters(
@@ -187,6 +188,8 @@ def read_and_validate_experiment_config(config_filename: str) -> Dict:
187188
Requirement(False, int, False, ''),
188189
'runner_memory':
189190
Requirement(False, str, False, ''),
191+
'micro_experiment':
192+
Requirement(False, bool, False, ''),
190193
}
191194

192195
all_params_valid = _validate_config_parameters(config, config_requirements)

experiment/runner.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,18 @@ def get_clusterfuzz_seed_corpus_path(fuzz_target_path):
101101
return seed_corpus_path if os.path.exists(seed_corpus_path) else None
102102

103103

104+
def _unpack_random_corpus(corpus_directory):
105+
shutil.rmtree(corpus_directory)
106+
107+
benchmark = environment.get('BENCHMARK')
108+
trial_group_num = environment.get('TRIAL_GROUP_NUM', 0)
109+
random_corpora_dir = experiment_utils.get_random_corpora_filestore_path()
110+
random_corpora_sub_dir = f'trial-group-{int(trial_group_num)}'
111+
random_corpus_dir = posixpath.join(random_corpora_dir, benchmark,
112+
random_corpora_sub_dir)
113+
filestore_utils.cp(random_corpus_dir, corpus_directory, recursive=True)
114+
115+
104116
def _copy_custom_seed_corpus(corpus_directory):
105117
"""Copy custom seed corpus provided by user"""
106118
shutil.rmtree(corpus_directory)
@@ -257,7 +269,9 @@ def set_up_corpus_directories(self):
257269
FUZZ_TARGET_DIR, fuzz_target_name)
258270
input_corpus = environment.get('SEED_CORPUS_DIR')
259271
os.makedirs(input_corpus, exist_ok=True)
260-
if not environment.get('CUSTOM_SEED_CORPUS_DIR'):
272+
if environment.get('MICRO_EXPERIMENT'):
273+
_unpack_random_corpus(input_corpus)
274+
elif not environment.get('CUSTOM_SEED_CORPUS_DIR'):
261275
_unpack_clusterfuzz_seed_corpus(target_binary, input_corpus)
262276
else:
263277
_copy_custom_seed_corpus(input_corpus)

experiment/scheduler.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -689,7 +689,7 @@ def start_trials(trials, experiment_config: dict, pool, core_allocation=None):
689689
return started_trials
690690

691691

692-
class TrialProxy:
692+
class TrialProxy: # pylint: disable=too-many-instance-attributes
693693
"""A proxy object for a model.Trial. TrialProxy's allow these fields to be
694694
set and retreived without making any database calls."""
695695

@@ -701,6 +701,7 @@ def __init__(self, trial):
701701
self.time_ended = trial.time_ended
702702
self.preemptible = trial.preemptible
703703
self.cpuset = None
704+
self.trial_group_num = trial.trial_group_num
704705

705706

706707
def _initialize_logs(experiment):
@@ -729,7 +730,7 @@ def _start_trial(trial: TrialProxy, experiment_config: dict, cpuset=None):
729730
logger.info('Start trial %d.', trial.id)
730731
started = create_trial_instance(trial.fuzzer, trial.benchmark, trial.id,
731732
experiment_config, trial.preemptible,
732-
cpuset)
733+
cpuset, trial.trial_group_num)
733734
if started:
734735
trial.time_started = datetime_now()
735736
trial.cpuset = cpuset
@@ -743,6 +744,7 @@ def render_startup_script_template( # pylint: disable=too-many-arguments
743744
fuzzer: str,
744745
benchmark: str,
745746
trial_id: int,
747+
trial_group_num: int,
746748
experiment_config: dict,
747749
cpuset=None):
748750
"""Render the startup script using the template and the parameters
@@ -760,6 +762,8 @@ def render_startup_script_template( # pylint: disable=too-many-arguments
760762
'experiment': experiment,
761763
'fuzzer': fuzzer,
762764
'trial_id': trial_id,
765+
'trial_group_num': trial_group_num,
766+
'micro_experiment': experiment_config['micro_experiment'],
763767
'max_total_time': experiment_config['max_total_time'],
764768
'snapshot_period': experiment_config['snapshot_period'],
765769
'experiment_filestore': experiment_config['experiment_filestore'],
@@ -790,13 +794,15 @@ def create_trial_instance( # pylint: disable=too-many-arguments
790794
trial_id: int,
791795
experiment_config: dict,
792796
preemptible: bool,
793-
cpuset=None) -> bool:
797+
cpuset=None,
798+
trial_group_num: int = 0) -> bool:
794799
"""Create or start a trial instance for a specific
795800
trial_id,fuzzer,benchmark."""
796801
instance_name = experiment_utils.get_trial_instance_name(
797802
experiment_config['experiment'], trial_id)
798803
startup_script = render_startup_script_template(instance_name, fuzzer,
799804
benchmark, trial_id,
805+
trial_group_num,
800806
experiment_config, cpuset)
801807
startup_script_path = f'/tmp/{instance_name}-start-docker.sh'
802808
with open(startup_script_path, 'w', encoding='utf-8') as file_handle:

experiment/test_data/experiment-config.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,4 @@ measurers_cpus: null
4040
runner_num_cpu_cores: 1
4141
runner_machine_type: 'n1-standard-1'
4242
private: false
43+
micro_experiment: false

experiment/test_data/local-experiment-config.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,4 @@ report_filestore: /tmp/web-reports
2222
local_experiment: true
2323
benchmarks: "benchmark-1,benchmark-2"
2424
git_hash: "git-hash"
25+
micro_experiment: false

experiment/test_scheduler.py

+2
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ def test_create_trial_instance(benchmark, expected_image, expected_target,
114114
-e BENCHMARK={benchmark} \\
115115
-e EXPERIMENT=test-experiment \\
116116
-e TRIAL_ID=9 \\
117+
-e TRIAL_GROUP_NUM=0 \\
118+
-e MICRO_EXPERIMENT=False \\
117119
-e MAX_TOTAL_TIME=86400 \\
118120
-e SNAPSHOT_PERIOD=900 \\
119121
-e NO_SEEDS=False \\

0 commit comments

Comments
 (0)