Skip to content

Commit f9a8df9

Browse files
add build caching option (#514)
First touch on #499 Depends on: google/oss-fuzz#12284 The way this work is by saving a cached version of `build_fuzzers` post running of `compile` and then modifying the Dockerfiles of a project to use this cached build image + an adjusted build script. For example, for brotli the Dockerfile is originally: ```sh FROM gcr.io/oss-fuzz-base/base-builder RUN apt-get update && apt-get install -y cmake libtool make RUN git clone --depth 1 https://github.com/google/brotli.git WORKDIR brotli COPY build.sh $SRC/ COPY 01.c /src/brotli/c/fuzz/decode_fuzzer.c ``` a Dockerfile is then created which relies on the cached version, and it loosk like: ```sh FROM cached_image_brotli # RUN apt-get update && apt-get install -y cmake libtool make # # RUN git clone --depth 1 https://github.com/google/brotli.git # WORKDIR brotli # COPY build.sh $SRC/ # COPY 01.c /src/brotli/c/fuzz/decode_fuzzer.c # COPY adjusted_build.sh $SRC/build.sh ``` `adjusted_build.sh` is then the script that only builds fuzzers. This means we can also use `build_fuzzers`/`compile` workflows as we know it. More specifically, this PR: - Makes it possible to build Docker images of fuzzer build containers. Does this by running `build_fuzzers`, saving the docker container and then commit the docker container to an image. This image will have a projects' build set up post running of `compile`. This is then used when building fuzzers by OFG. - Supports only ASAN mode for now. Should be easy to extend to coverage too. - Currently builds images first and then uses them locally. We could extend, probably on another step of this, to use containers pushed by OSS-Fuzz itself. - Only does the caching if a "cache-build-script" exists (added a few for some projects) which contains the build instructions post-build process. It should be easy to extend such that we can rely on some DB of auto-generated build scripts as well (ref: google/oss-fuzz#11937) but I think it's nice to have both the option of us creating the scripts ourselves + an auto-generated DB. --------- Signed-off-by: David Korczynski <[email protected]>
1 parent abb5a5f commit f9a8df9

File tree

11 files changed

+285
-1
lines changed

11 files changed

+285
-1
lines changed

experiment/builder_runner.py

+19-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@
4242

4343
# The directory in the oss-fuzz image
4444
JCC_DIR = '/usr/local/bin'
45-
4645
RUN_TIMEOUT: int = 30
4746
CLOUD_EXP_MAX_ATTEMPT = 5
4847

@@ -534,7 +533,24 @@ def build_target_local(self,
534533
log_path: str,
535534
sanitizer: str = 'address') -> bool:
536535
"""Builds a target with OSS-Fuzz."""
536+
537537
logger.info('Building %s with %s', generated_project, sanitizer)
538+
539+
if oss_fuzz_checkout.ENABLE_CACHING and oss_fuzz_checkout.is_image_cached(
540+
self.benchmark.project, sanitizer):
541+
logger.info('We should use cached instance.')
542+
# Rewrite for caching.
543+
oss_fuzz_checkout.rewrite_project_to_cached_project(
544+
self.benchmark.project, generated_project, sanitizer)
545+
546+
# Prepare build
547+
oss_fuzz_checkout.prepare_build(self.benchmark.project, sanitizer,
548+
generated_project)
549+
550+
else:
551+
logger.info('The project does not have any cache')
552+
553+
# Build the image
538554
command = [
539555
'docker', 'build', '-t', f'gcr.io/oss-fuzz/{generated_project}',
540556
os.path.join(oss_fuzz_checkout.OSS_FUZZ_DIR, 'projects',
@@ -639,13 +655,15 @@ def get_coverage_local(
639655
sample_id = os.path.splitext(benchmark_target_name)[0]
640656
log_path = os.path.join(self.work_dirs.build_logs,
641657
f'{sample_id}-coverage.log')
658+
logger.info('Building project for coverage')
642659
built_coverage = self.build_target_local(generated_project,
643660
log_path,
644661
sanitizer='coverage')
645662
if not built_coverage:
646663
logger.info('Failed to make coverage build for %s', generated_project)
647664
return None, None
648665

666+
logger.info('Extracting coverage')
649667
corpus_dir = self.work_dirs.corpus(benchmark_target_name)
650668
command = [
651669
'python3',

experiment/oss_fuzz_checkout.py

+173
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,13 @@
2424

2525
import yaml
2626

27+
from experiment import benchmark as benchmarklib
28+
2729
logger = logging.getLogger(__name__)
2830

2931
BUILD_DIR: str = 'build'
3032
GLOBAL_TEMP_DIR: str = ''
33+
ENABLE_CACHING = bool(int(os.getenv('OFG_USE_CACHING', '0')))
3134
# Assume OSS-Fuzz is at repo root dir by default.
3235
# This will change if temp_dir is used.
3336
OSS_FUZZ_DIR: str = os.path.join(
@@ -174,3 +177,173 @@ def get_project_repository(project: str) -> str:
174177
with open(project_yaml_path, 'r') as benchmark_file:
175178
data = yaml.safe_load(benchmark_file)
176179
return data.get('main_repo', '')
180+
181+
182+
def _get_project_cache_name(project: str) -> str:
183+
"""Gets name of cached container for a project."""
184+
return f'gcr.io.oss-fuzz.{project}_cache'
185+
186+
187+
def _get_project_cache_image_name(project: str, sanitizer: str) -> str:
188+
"""Gets name of cached Docker image for a project and a respective
189+
sanitizer."""
190+
return f'gcr.io/oss-fuzz/{project}_{sanitizer}_cache'
191+
192+
193+
def _has_cache_build_script(project: str) -> bool:
194+
"""Checks if a project has cached fuzzer build script."""
195+
cached_build_script = os.path.join('fuzzer_build_script', project)
196+
return os.path.isfile(cached_build_script)
197+
198+
199+
def _prepare_image_cache(project: str) -> bool:
200+
"""Prepares cached images of fuzzer build containers."""
201+
# Only create a cached image if we have a post-build build script
202+
if not _has_cache_build_script(project):
203+
logger.info('No cached script for %s', project)
204+
return False
205+
logger.info('%s has a cached build script', project)
206+
207+
cached_container_name = _get_project_cache_name(project)
208+
adjusted_env = os.environ | {
209+
'OSS_FUZZ_SAVE_CONTAINERS_NAME': cached_container_name
210+
}
211+
212+
logger.info('Creating a cached images')
213+
for sanitizer in ['address', 'coverage']:
214+
# Create cached image by building using OSS-Fuzz with set variable
215+
command = [
216+
'python3', 'infra/helper.py', 'build_fuzzers', project, '--sanitizer',
217+
sanitizer
218+
]
219+
try:
220+
sp.run(command, cwd=OSS_FUZZ_DIR, env=adjusted_env, check=True)
221+
except sp.CalledProcessError:
222+
logger.info('Failed to build fuzzer for %s.', project)
223+
return False
224+
225+
# Commit the container to an image
226+
cached_image_name = _get_project_cache_image_name(project, sanitizer)
227+
228+
command = ['docker', 'commit', cached_container_name, cached_image_name]
229+
try:
230+
sp.run(command, check=True)
231+
except sp.CalledProcessError:
232+
logger.info('Could not rename image.')
233+
return False
234+
logger.info('Created cached image %s', cached_image_name)
235+
236+
# Delete the container we created
237+
command = ['docker', 'container', 'rm', cached_container_name]
238+
try:
239+
sp.run(command, check=True)
240+
except sp.CalledProcessError:
241+
logger.info('Could not rename image.')
242+
return True
243+
244+
245+
def prepare_cached_images(
246+
experiment_targets: list[benchmarklib.Benchmark]) -> None:
247+
"""Builds cached Docker images for a set of targets."""
248+
all_projects = set()
249+
for benchmark in experiment_targets:
250+
all_projects.add(benchmark.project)
251+
252+
logger.info('Preparing cache for %d projects', len(all_projects))
253+
254+
for project in all_projects:
255+
_prepare_image_cache(project)
256+
257+
258+
def is_image_cached(project_name: str, sanitizer: str) -> bool:
259+
"""Checks whether a project has a cached Docker image post fuzzer
260+
building."""
261+
cached_image_name = _get_project_cache_image_name(project_name, sanitizer)
262+
try:
263+
sp.run(
264+
['docker', 'inspect', '--type=image', cached_image_name],
265+
check=True,
266+
stdin=sp.DEVNULL,
267+
stdout=sp.DEVNULL,
268+
stderr=sp.STDOUT,
269+
)
270+
return True
271+
except sp.CalledProcessError:
272+
return False
273+
274+
275+
def rewrite_project_to_cached_project(project_name: str, generated_project: str,
276+
sanitizer: str) -> None:
277+
"""Rewrites Dockerfile of a project to enable cached build scripts."""
278+
cached_image_name = _get_project_cache_image_name(project_name, sanitizer)
279+
280+
generated_project_folder = os.path.join(OSS_FUZZ_DIR, 'projects',
281+
generated_project)
282+
283+
cached_dockerfile = os.path.join(generated_project_folder,
284+
f'Dockerfile_{sanitizer}_cached')
285+
if os.path.isfile(cached_dockerfile):
286+
logger.info('Already converted')
287+
return
288+
289+
# Check if there is an original Dockerfile, because we should use that in
290+
# case,as otherwise the "Dockerfile" may be a copy of another sanitizer.
291+
original_dockerfile = os.path.join(generated_project_folder,
292+
'Dockerfile_original')
293+
if not os.path.isfile(original_dockerfile):
294+
dockerfile = os.path.join(generated_project_folder, 'Dockerfile')
295+
shutil.copy(dockerfile, original_dockerfile)
296+
297+
with open(original_dockerfile, 'r') as f:
298+
docker_content = f.read()
299+
300+
docker_content = docker_content.replace(
301+
'FROM gcr.io/oss-fuzz-base/base-builder', f'FROM {cached_image_name}')
302+
docker_content += '\n' + 'COPY adjusted_build.sh $SRC/build.sh\n'
303+
304+
# Now comment out everything except the first FROM and the last two Dockers
305+
from_line = -1
306+
copy_fuzzer_line = -1
307+
copy_build_line = -1
308+
309+
for line_idx, line in enumerate(docker_content.split('\n')):
310+
if line.startswith('FROM') and from_line == -1:
311+
from_line = line_idx
312+
if line.startswith('COPY'):
313+
copy_fuzzer_line = copy_build_line
314+
copy_build_line = line_idx
315+
316+
lines_to_keep = {from_line, copy_fuzzer_line, copy_build_line}
317+
new_content = ''
318+
for line_idx, line in enumerate(docker_content.split('\n')):
319+
if line_idx not in lines_to_keep:
320+
new_content += f'# {line}\n'
321+
else:
322+
new_content += f'{line}\n'
323+
324+
# Overwrite the existing one
325+
with open(cached_dockerfile, 'w') as f:
326+
f.write(new_content)
327+
328+
# Copy over adjusted build script
329+
shutil.copy(os.path.join('fuzzer_build_script', project_name),
330+
os.path.join(generated_project_folder, 'adjusted_build.sh'))
331+
332+
333+
def prepare_build(project_name, sanitizer, generated_project):
334+
"""Prepares the correct Dockerfile to be used for cached builds."""
335+
generated_project_folder = os.path.join(OSS_FUZZ_DIR, 'projects',
336+
generated_project)
337+
if not ENABLE_CACHING:
338+
return
339+
dockerfile_to_use = os.path.join(generated_project_folder, 'Dockerfile')
340+
original_dockerfile = os.path.join(generated_project_folder,
341+
'Dockerfile_original')
342+
if is_image_cached(project_name, sanitizer):
343+
logger.info('Using cached dockerfile')
344+
cached_dockerfile = os.path.join(generated_project_folder,
345+
f'Dockerfile_{sanitizer}_cached')
346+
shutil.copy(cached_dockerfile, dockerfile_to_use)
347+
else:
348+
logger.info('Using original dockerfile')
349+
shutil.copy(original_dockerfile, dockerfile_to_use)

fuzzer_build_script/bluez

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
INCLUDES="-I. -I./src -I./lib -I./gobex -I/usr/local/include/glib-2.0/ -I/src/glib/_build/glib/"
2+
STATIC_LIBS="./src/.libs/libshared-glib.a ./lib/.libs/libbluetooth-internal.a -l:libical.a -l:libicalss.a -l:libicalvcal.a -l:libdbus-1.a /src/glib/_build/glib/libglib-2.0.a"
3+
4+
$CC $CFLAGS $INCLUDES $SRC/fuzz_xml.c -c
5+
$CC $CFLAGS $INCLUDES $SRC/fuzz_sdp.c -c
6+
$CC $CFLAGS $INCLUDES $SRC/fuzz_textfile.c -c
7+
$CC $CFLAGS $INCLUDES $SRC/fuzz_gobex.c -c
8+
$CC $CFLAGS $INCLUDES $SRC/fuzz_hci.c -c
9+
10+
$CXX $CXXFLAGS $LIB_FUZZING_ENGINE \
11+
./src/bluetoothd-sdp-xml.o fuzz_xml.o -o $OUT/fuzz_xml \
12+
$STATIC_LIBS -ldl -lpthread
13+
14+
$CXX $CXXFLAGS $LIB_FUZZING_ENGINE \
15+
fuzz_sdp.o -o $OUT/fuzz_sdp $STATIC_LIBS -ldl -lpthread
16+
17+
$CXX $CXXFLAGS $LIB_FUZZING_ENGINE fuzz_textfile.o -o $OUT/fuzz_textfile \
18+
$STATIC_LIBS -ldl -lpthread src/textfile.o
19+
20+
$CXX $CXXFLAGS $LIB_FUZZING_ENGINE \
21+
fuzz_gobex.o ./gobex/gobex*.o -o $OUT/fuzz_gobex \
22+
$STATIC_LIBS -ldl -lpthread
23+
24+
$CXX $CXXFLAGS $LIB_FUZZING_ENGINE \
25+
fuzz_hci.o ./gobex/gobex*.o -o $OUT/fuzz_hci \
26+
$STATIC_LIBS -ldl -lpthread
27+

fuzzer_build_script/brotli

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
$CC $CFLAGS -c -std=c99 -I. -I./c/include c/fuzz/decode_fuzzer.c
2+
3+
$CXX $CXXFLAGS ./decode_fuzzer.o -o $OUT/decode_fuzzer \
4+
$LIB_FUZZING_ENGINE ./libbrotlidec.a ./libbrotlicommon.a
5+
6+
cp java/org/brotli/integration/fuzz_data.zip $OUT/decode_fuzzer_seed_corpus.zip
7+
chmod a-x $OUT/decode_fuzzer_seed_corpus.zip # we will try to run it otherwise

fuzzer_build_script/htslib

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
make -j$(nproc) libhts.a test/fuzz/hts_open_fuzzer.o
2+
3+
# build fuzzers
4+
$CXX $CXXFLAGS -o "$OUT/hts_open_fuzzer" test/fuzz/hts_open_fuzzer.o $LIB_FUZZING_ENGINE libhts.a -lz -lbz2 -llzma -lcurl -lcrypto -lpthread

fuzzer_build_script/libraw

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# build fuzzers
2+
$CXX $CXXFLAGS -std=c++11 -Ilibraw \
3+
$SRC/libraw_fuzzer.cc -o $OUT/libraw_fuzzer \
4+
$LIB_FUZZING_ENGINE -lz lib/.libs/libraw.a
5+
6+
$CXX $CXXFLAGS -std=c++11 -Ilibraw \
7+
$SRC/libraw_fuzzer.cc -o $OUT/libraw_cr2_fuzzer \
8+
$LIB_FUZZING_ENGINE -lz lib/.libs/libraw.a
9+
10+
$CXX $CXXFLAGS -std=c++11 -Ilibraw \
11+
$SRC/libraw_fuzzer.cc -o $OUT/libraw_nef_fuzzer \
12+
$LIB_FUZZING_ENGINE -lz lib/.libs/libraw.a
13+
14+
$CXX $CXXFLAGS -std=c++11 -Ilibraw \
15+
$SRC/libraw_fuzzer.cc -o $OUT/libraw_raf_fuzzer \
16+
$LIB_FUZZING_ENGINE -lz lib/.libs/libraw.a

fuzzer_build_script/libsndfile

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
./ossfuzz/ossfuzz.sh
2+
3+
# To make CIFuzz fast, see here for details: https://github.com/libsndfile/libsndfile/pull/796
4+
for fuzzer in sndfile_alt_fuzzer sndfile_fuzzer; do
5+
echo "[libfuzzer]" > ${OUT}/${fuzzer}.options
6+
echo "close_fd_mask = 3" >> ${OUT}/${fuzzer}.options
7+
done

fuzzer_build_script/mosh

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
cd src/fuzz
2+
3+
make -j$n CFLAGS+="$CFLAGS" CXXFLAGS+="$CXXFLAGS"
4+
5+
for fuzzer in *_fuzzer; do
6+
cp $fuzzer $OUT
7+
8+
corpus=${fuzzer%_fuzzer}_corpus
9+
if [ -d $corpus ]; then
10+
zip -j $OUT/${fuzzer}_seed_corpus.zip $corpus/*
11+
fi
12+
done

fuzzer_build_script/quickjs

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#!/bin/bash -eu
2+
3+
build_fuzz_target () {
4+
local target=$1
5+
shift
6+
$CC $CFLAGS -I. -c fuzz/$target.c -o $target.o
7+
$CXX $CXXFLAGS $target.o -o $OUT/$target $@ $LIB_FUZZING_ENGINE
8+
}
9+
10+
build_fuzz_target fuzz_eval .obj/fuzz_common.o libquickjs.fuzz.a
11+
build_fuzz_target fuzz_compile .obj/fuzz_common.o libquickjs.fuzz.a
12+
build_fuzz_target fuzz_regexp .obj/libregexp.fuzz.o .obj/cutils.fuzz.o .obj/libunicode.fuzz.o
13+

fuzzer_build_script/tmux

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
make -j"$(nproc)" check
2+
find "${SRC}/tmux/fuzz/" -name '*-fuzzer' -exec cp -v '{}' "${OUT}"/ \;
3+
find "${SRC}/tmux/fuzz/" -name '*-fuzzer.options' -exec cp -v '{}' "${OUT}"/ \;
4+
find "${SRC}/tmux/fuzz/" -name '*-fuzzer.dict' -exec cp -v '{}' "${OUT}"/ \;

run_all_experiments.py

+3
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,9 @@ def main():
368368
experiment_targets = prepare_experiment_targets(args)
369369
experiment_results = []
370370

371+
if oss_fuzz_checkout.ENABLE_CACHING:
372+
oss_fuzz_checkout.prepare_cached_images(experiment_targets)
373+
371374
logger.info('Running %s experiment(s) in parallels of %s.',
372375
len(experiment_targets), str(NUM_EXP))
373376

0 commit comments

Comments
 (0)