-
Notifications
You must be signed in to change notification settings - Fork 185
Crash Analyzer Agent #814
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Crash Analyzer Agent #814
Changes from all commits
dac1f5d
688baa0
0a80cea
0374106
2e874c6
3aac96d
22f639f
dec8a91
1b4dcb0
2fa0dbb
19491fd
ad73c77
e3f0398
28ca005
8d38014
8fe9a31
b98bb18
bbe8e9f
4027d41
1406f71
953c8a0
fd3c531
6584d11
fcdda7c
2b4c529
dbbc33c
45e7fd8
29e8fd4
0a0d5b1
be8b629
7840581
a4e40d8
443d33c
b2b965e
002e936
2a17374
f5753f0
1914dbf
25beede
964f01b
2edd59e
fc06b0b
af6cc3f
0c15edb
c52136c
7ea741a
9cd46a0
e27080f
01fa45c
c67407e
6c54fe7
fde8009
7aa5d5b
fe6a64d
726cfdd
d2ea8fa
dffdf5f
3906f09
b2ed633
e66d849
ea9ebc1
552d02e
45856d3
52d8f20
e47641e
ddaa549
468d850
833de89
4cac3f6
b105395
760907f
24a48ee
ed537d5
e95cf2d
f9c13f4
b01b044
813596c
d47d810
647efa8
2ee0f40
97acb2a
ee1fd2a
8af08d7
08f1689
bbe4988
25a04ed
e63cf85
c3432b5
3097b1a
fa8ac33
e7e8012
6be630b
c046b53
d7deb7e
de7df65
d1dd2b3
fe101f3
5cccf95
7b0b0dd
e71a474
591a339
2937b28
ee740d0
5184f5b
91339cd
0b6bc96
ee63bd0
f9a7466
f243e89
f116f6a
c46c409
d4f1d47
e56eeec
0edc0ae
32c3b49
c6351ef
70a0545
38809a0
1cfea58
e1c67b7
7aa761d
1a6f267
476c05e
69b2b4f
1047a18
fdc4b55
ebd49dd
4568a08
ab7f36f
92ac9f5
4fcdbaf
c877dd7
753febc
1b3ce54
87b98a6
eab7415
328349e
ba7c809
7f080ea
fbbb502
0f9b6f8
45d7d16
f0e77c8
1cd9202
e432609
c59e934
21a2173
3f671b6
1d88c95
5d1331d
c45197c
d01e21b
98b8924
8bde106
a79b121
1e4f62f
4d7c8db
9e6c822
2100a11
a2d0186
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,8 +14,207 @@ | |
"""An LLM agent to analyze and provide insight of a fuzz target's runtime crash. | ||
Use it as a usual module locally, or as script in cloud builds. | ||
""" | ||
import argparse | ||
import os | ||
import subprocess as sp | ||
from typing import Optional | ||
|
||
import logger | ||
from agent.base_agent import BaseAgent | ||
from experiment import evaluator as evaluator_lib | ||
from experiment import oss_fuzz_checkout | ||
from experiment.workdir import WorkDirs | ||
from llm_toolkit import prompt_builder | ||
from llm_toolkit.models import LLM | ||
from llm_toolkit.prompts import Prompt | ||
from results import AnalysisResult, CrashResult, Result, RunResult | ||
from tool.base_tool import BaseTool | ||
from tool.container_tool import ProjectContainerTool | ||
from tool.lldb_tool import LLDBTool | ||
|
||
MAX_ROUND = 100 | ||
|
||
|
||
class CrashAnalyzer(BaseAgent): | ||
pass | ||
"""The Agent to analyze a runtime crash and provide insight to fuzz target.""" | ||
|
||
def __init__(self, | ||
trial: int, | ||
llm: LLM, | ||
args: argparse.Namespace, | ||
tools: Optional[list[BaseTool]] = None, | ||
name: str = '', | ||
artifact_path: str = '') -> None: | ||
super().__init__(trial, llm, args, tools, name) | ||
self.artifact_path = artifact_path | ||
|
||
def _initial_prompt(self, results: list[Result]) -> Prompt: | ||
"""Constructs initial prompt of the agent.""" | ||
last_result = results[-1] | ||
|
||
if isinstance(last_result, RunResult): | ||
crash_analyzer_prompt_builder = \ | ||
prompt_builder.CrashAnalyzerTemplateBuilder( | ||
model=self.llm, | ||
benchmark=last_result.benchmark) | ||
prompt = crash_analyzer_prompt_builder.build_crash_analyzer_prompt( | ||
last_result.benchmark, last_result.fuzz_target_source, | ||
last_result.run_error, last_result.crash_func) | ||
return prompt | ||
|
||
logger.error("Expected a RunResult object in results list", | ||
trial=self.trial) | ||
return prompt_builder.CrashAnalyzerTemplateBuilder(self.llm).build([]) | ||
|
||
def _format_lldb_execution_result( | ||
self, | ||
lldb_command: str, | ||
process: sp.CompletedProcess, | ||
previous_prompt: Optional[Prompt] = None) -> str: | ||
"""Formats a prompt based on lldb execution result.""" | ||
if previous_prompt: | ||
previous_prompt_text = previous_prompt.get() | ||
else: | ||
previous_prompt_text = '' | ||
stdout = self.llm.truncate_prompt(process.stdout, | ||
previous_prompt_text).strip() | ||
stderr = self.llm.truncate_prompt(process.stderr, | ||
stdout + previous_prompt_text).strip() | ||
return (f'<lldb command>\n{lldb_command.strip()}\n</lldb command>\n' | ||
f'<lldb output>\n{stdout}\n</lldb output>\n' | ||
f'<stderr>\n{stderr}\n</stderr>\n') | ||
|
||
def _container_handle_lldb_command(self, response: str, tool: LLDBTool, | ||
prompt: Prompt) -> Prompt: | ||
"""Handles the command from LLM with lldb tool.""" | ||
prompt_text = '' | ||
for command in self._parse_tags(response, 'lldb'): | ||
process = tool.execute_in_screen(command) | ||
prompt_text += self._format_lldb_execution_result( | ||
command, process, previous_prompt=prompt) + '\n' | ||
prompt.append(prompt_text) | ||
return prompt | ||
|
||
def _container_handle_conclusion(self, cur_round: int, response: str, | ||
crash_result: CrashResult) -> None: | ||
"""Parses LLM conclusion, analysis and suggestion.""" | ||
logger.info('----- ROUND %02d Received conclusion -----', | ||
cur_round, | ||
trial=self.trial) | ||
|
||
conclusion = self._parse_tag(response, 'conclusion') | ||
if conclusion == 'Crash is caused by bug in fuzz driver.': | ||
crash_result.true_bug = False | ||
elif conclusion == 'Crash is caused by bug in project.': | ||
crash_result.true_bug = True | ||
else: | ||
logger.error('***** Failed to match conclusion in %02d rounds *****', | ||
cur_round, | ||
trial=self.trial) | ||
|
||
crash_result.insight = self._parse_tag(response, 'analysis and suggestion') | ||
if not crash_result.insight: | ||
logger.error('Round %02d No analysis and suggestion in conclusion: %s', | ||
cur_round, | ||
response, | ||
trial=self.trial) | ||
|
||
def _container_tool_reaction(self, cur_round: int, response: str, | ||
crash_result: CrashResult) -> Optional[Prompt]: | ||
"""Validates LLM conclusion or executes its command.""" | ||
if self._parse_tag(response, 'conclusion'): | ||
return self._container_handle_conclusion(cur_round, response, | ||
crash_result) | ||
prompt = prompt_builder.CrashAnalyzerTemplateBuilder(self.llm, | ||
None).build([]) | ||
if self._parse_tag(response, 'lldb'): | ||
return self._container_handle_lldb_command(response, self.analyze_tool, | ||
prompt) | ||
if self._parse_tag(response, 'bash'): | ||
return self._container_handle_bash_command(response, self.check_tool, | ||
prompt) | ||
return None | ||
|
||
def execute(self, result_history: list[Result]) -> AnalysisResult: | ||
"""Executes the agent based on previous run result.""" | ||
WorkDirs(self.args.work_dirs.base, keep=True) | ||
last_result = result_history[-1] | ||
benchmark = last_result.benchmark | ||
logger.info('Executing Crash Analyzer', trial=self.trial) | ||
assert isinstance(last_result, RunResult) | ||
|
||
if not os.path.exists(last_result.artifact_path): | ||
logger.error('Artifact path %s does not exist', | ||
last_result.artifact_path, | ||
trial=self.trial) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good news: Bad news: Already have image (with digest): gcr.io/cloud-builders/docker
2025-05-28 09:29:13 [Trial ID: 00] INFO [logger.info]: Checkign if we should use local FI
2025-05-28 09:29:13 [Trial ID: 00] INFO [logger.info]: This does not require a local FI.
2025-05-28 09:29:13 [Trial ID: 07] INFO [logger.info]: Executing Crash Analyzer
2025-05-28 09:29:13 [Trial ID: 07] ERROR [logger.error]: Artifact path /experiment/results/output-ada-url-ada_can_parse_with_base/artifacts/07.fuzz_target-F0-07/crash-5c013da7b11b7ccb2c437239fbcdbf4c53b20655 does not exist
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "/workspace/ofg/agent/base_agent.py", line 280, in <module>
BaseAgent.cloud_main()
File "/workspace/ofg/agent/base_agent.py", line 266, in cloud_main
result = agent.execute(result_history)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/workspace/ofg/agent/crash_analyzer.py", line 172, in execute
evaluator_lib.Evaluator.create_ossfuzz_project_with_lldb(
File "/workspace/ofg/experiment/evaluator.py", line 321, in create_ossfuzz_project_with_lldb
shutil.copyfile(
File "/usr/lib/python3.11/shutil.py", line 256, in copyfile
with open(src, 'rb') as fsrc:
^^^^^^^^^^^^^^^
FileNotFoundError: [Errno 2] No such file or directory: '/experiment/results/output-ada-url-ada_can_parse_with_base/artifacts/07.fuzz_target-F0-07/crash-5c013da7b11b7ccb2c437239fbcdbf4c53b20655' Would the artifact be at IIRC, we discussed that it's better to place the artifact at
Thinking more about this: |
||
|
||
# TODO(dongge): Move these to oss_fuzz_checkout. | ||
generated_target_name = os.path.basename(benchmark.target_path) | ||
sample_id = os.path.splitext(generated_target_name)[0] | ||
generated_oss_fuzz_project = ( | ||
f'{benchmark.id}-{sample_id}-lldb-{self.trial:02d}') | ||
generated_oss_fuzz_project = oss_fuzz_checkout.rectify_docker_tag( | ||
generated_oss_fuzz_project) | ||
|
||
# TODO(dongge): Write to OSS-Fuzz project dir files directly. | ||
fuzz_target_path = os.path.join(last_result.work_dirs.fuzz_targets, | ||
f'{self.trial:02d}.fuzz_target') | ||
with open(fuzz_target_path, 'w') as ft_file: | ||
ft_file.write(last_result.fuzz_target_source) | ||
if last_result.build_script_source: | ||
build_script_path = os.path.join(last_result.work_dirs.fuzz_targets, | ||
f'{self.trial:02d}.build_script') | ||
with open(build_script_path, 'w') as ft_file: | ||
ft_file.write(last_result.build_script_source) | ||
else: | ||
build_script_path = '' | ||
|
||
evaluator_lib.Evaluator.create_ossfuzz_project_with_lldb( | ||
benchmark, generated_oss_fuzz_project, fuzz_target_path, last_result, | ||
build_script_path, last_result.artifact_path) | ||
|
||
self.analyze_tool = LLDBTool(benchmark, | ||
result=last_result, | ||
name='lldb', | ||
project_name=generated_oss_fuzz_project) | ||
self.analyze_tool.execute('compile > /dev/null') | ||
# Launch LLDB and load fuzz target binary | ||
self.analyze_tool.execute(f'screen -dmS lldb_session -L ' | ||
f'-Logfile /tmp/lldb_log.txt ' | ||
f'lldb /out/{last_result.benchmark.target_name}') | ||
self.check_tool = ProjectContainerTool( | ||
benchmark, name='check', project_name=generated_oss_fuzz_project) | ||
self.check_tool.compile(extra_commands=' && rm -rf /out/* > /dev/null') | ||
prompt = self._initial_prompt(result_history) | ||
prompt.add_problem(self.analyze_tool.tutorial()) | ||
prompt.add_problem(self.check_tool.tutorial()) | ||
crash_result = CrashResult(benchmark=benchmark, | ||
trial=last_result.trial, | ||
work_dirs=last_result.work_dirs, | ||
author=self, | ||
chat_history={self.name: ''}) | ||
cur_round = 1 | ||
try: | ||
client = self.llm.get_chat_client(model=self.llm.get_model()) | ||
while prompt and cur_round < MAX_ROUND: | ||
response = self.chat_llm(cur_round=cur_round, | ||
client=client, | ||
prompt=prompt, | ||
trial=self.trial) | ||
prompt = self._container_tool_reaction(cur_round, response, | ||
crash_result) | ||
cur_round += 1 | ||
self._sleep_random_duration(trial=self.trial) | ||
finally: | ||
# Cleanup: stop the container | ||
logger.debug('Stopping the crash analyze container %s', | ||
self.analyze_tool.container_id, | ||
trial=self.trial) | ||
self.analyze_tool.terminate() | ||
|
||
analysis_result = AnalysisResult( | ||
author=self, | ||
run_result=last_result, | ||
crash_result=crash_result, | ||
chat_history={self.name: crash_result.to_dict()}) | ||
return analysis_result |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,7 +32,7 @@ | |
|
||
import utils | ||
from agent.base_agent import BaseAgent | ||
from results import Result | ||
from results import Result, RunResult | ||
|
||
OF_REPO = 'https://github.com/google/oss-fuzz.git' | ||
OFG_ROOT_DIR = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) | ||
|
@@ -82,11 +82,42 @@ def __init__(self, args: argparse.Namespace) -> None: | |
def _upload_files(self, archive_name: str, target_dir: str, | ||
files_to_upload: list[str]) -> str: | ||
"""Archive and upload files to GCS.""" | ||
valid_files = [] | ||
for f in files_to_upload: | ||
file_path = os.path.join(target_dir, f) | ||
if os.path.exists(file_path): | ||
valid_files.append(f) | ||
else: | ||
logging.error("File does not exist: %s", file_path) | ||
|
||
valid_files.sort() | ||
|
||
with tempfile.TemporaryDirectory() as tmpdirname: | ||
archive_path = os.path.join(tmpdirname, archive_name) | ||
tar_command = ['tar', '-czf', archive_path] + files_to_upload | ||
subprocess.run(tar_command, cwd=target_dir, check=True) | ||
logging.info('Created archive: %s', archive_path) | ||
tar_command = ['tar', '-czf', archive_path] + valid_files | ||
logging.error("Archive path: %s (exists: %s)", archive_path, | ||
os.path.exists(archive_path)) | ||
logging.error("Tar command: %s", ' '.join(tar_command)) | ||
|
||
try: | ||
result = subprocess.run(tar_command, | ||
cwd=target_dir, | ||
check=True, | ||
stdout=subprocess.PIPE, | ||
stderr=subprocess.PIPE, | ||
text=True) | ||
logging.error("subprocess stdout:\n%s", result.stdout) | ||
logging.error("subprocess stderr:\n%s", result.stderr) | ||
except subprocess.CalledProcessError as e: | ||
logging.error("Tar command failed with return code %d", e.returncode) | ||
logging.error("stdout:\n%s", e.stdout) | ||
logging.error("stderr:\n%s", e.stderr) | ||
raise | ||
|
||
if os.path.exists(archive_path): | ||
logging.info("Successfully created archive: %s", archive_path) | ||
else: | ||
logging.error("Failed to create archive: %s", archive_path) | ||
return self._upload_to_gcs(archive_path) | ||
|
||
def _upload_to_gcs(self, local_file_path: str) -> str: | ||
|
@@ -149,7 +180,8 @@ def _upload_fi_oss_fuzz_data(self) -> str: | |
files_to_upload) | ||
|
||
def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, | ||
results_dill_url: str, oss_fuzz_data_url: str, | ||
results_dill_url: str, artifact_url: str, | ||
artifact_path: str, oss_fuzz_data_url: str, | ||
data_dir_url: str, new_result_filename: str) -> str: | ||
"""Requests Cloud Build to execute the operation.""" | ||
|
||
|
@@ -167,7 +199,7 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, | |
|
||
cloud_build_config = { | ||
'steps': [ | ||
# Step 1: Download the dill files from GCS bucket. | ||
# Step 1: Download the dill and artifact files from GCS bucket. | ||
{ | ||
'name': 'bash', | ||
'dir': '/workspace', | ||
|
@@ -183,6 +215,23 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, | |
'dir': '/workspace', | ||
'args': ['cp', results_dill_url, 'dills/result_history.pkl'] | ||
}, | ||
{ | ||
'name': 'gcr.io/cloud-builders/gsutil', | ||
'entrypoint': 'bash', | ||
'args': [ | ||
'-c', | ||
f'mkdir -p /workspace/host/{os.path.dirname(artifact_path)}' | ||
], | ||
'allowFailure': True, | ||
}, | ||
{ | ||
'name': 'gcr.io/cloud-builders/gsutil', | ||
'dir': '/workspace', | ||
'args': [ | ||
'cp', artifact_url, f'/workspace/host/{artifact_path}' | ||
], | ||
'allowFailure': True, | ||
}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For example, before this step, we have something like: {
'name':
'gcr.io/cloud-builders/gsutil',
'entrypoint':
'bash',
'args': [
'-c', f'mkdir -p {os.path.dirname(artifact_path)}'
]
}, |
||
# Step 2: Prepare OFG and OF repos. | ||
{ | ||
'name': | ||
|
@@ -256,6 +305,8 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, | |
'-v', | ||
'/workspace:/workspace', | ||
'-v', | ||
'/workspace/host/experiment:/experiment', | ||
'-v', | ||
'/var/run/docker.sock:/var/run/docker.sock', | ||
'-e', | ||
'VERTEX_AI_LOCATIONS=' + | ||
|
@@ -275,7 +326,7 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, | |
'/workspace/dills/new_result.pkl' | ||
], | ||
}, | ||
# Step 4: Upload the result to GCS bucket | ||
# Step 6: Upload the result to GCS bucket | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks! |
||
{ | ||
'name': 'bash', | ||
'dir': '/workspace', | ||
|
@@ -388,12 +439,23 @@ def run(self, agent: BaseAgent, result_history: list[Result], | |
ofg_url = self._prepare_and_upload_archive(result_history) | ||
agent_url = self._upload_to_gcs(agent_dill) | ||
results_url = self._upload_to_gcs(results_dill) | ||
artifact_url = '' | ||
artifact_path = '' | ||
if isinstance(result_history[-1], RunResult): | ||
artifact_path = result_history[-1].artifact_path | ||
if artifact_path: | ||
logging.info('Found artifact_path: %s in RunResult.', artifact_path) | ||
artifact_url = self._upload_to_gcs(artifact_path) | ||
logging.info('Uploaded artifact to %s', artifact_url) | ||
else: | ||
logging.error('No artifact_path found in RunResult.') | ||
oss_fuzz_data_url = self._upload_oss_fuzz_data() | ||
data_dir_url = self._upload_fi_oss_fuzz_data() | ||
|
||
# Step 3: Request Cloud Build. | ||
new_result_filename = f'{uuid.uuid4().hex}.pkl' | ||
build_id = self._request_cloud_build(ofg_url, agent_url, results_url, | ||
artifact_url, artifact_path, | ||
oss_fuzz_data_url, data_dir_url, | ||
new_result_filename) | ||
|
||
|
@@ -416,7 +478,7 @@ def run(self, agent: BaseAgent, result_history: list[Result], | |
|
||
cloud_build_log += self._get_build_log(build_id) | ||
|
||
# Step 4: Deserialize dilld file. | ||
# Step 5: Deserialize dilld file. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍🏼 |
||
result = utils.deserialize_from_dill(new_result_dill) | ||
if not result: | ||
cloud_build_log += f'Failed to deserialize from dill {new_result_dill}.\n' | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could this be the same as
_container_tool_reaction()
inPrototyper
?If so, we can relocate that function into
base_agent
so that you don't have to repeat it here.Again, future users/editors will appreciate this because they can read/modify the code in one place.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is different from
_container_tool_reaction()
inPrototyper
because the prompt is designed differently.