diff --git a/commit0/__main__.py b/commit0/__main__.py index ca875e6..967421b 100644 --- a/commit0/__main__.py +++ b/commit0/__main__.py @@ -1,18 +1,47 @@ import commit0.harness.run_pytest_ids import commit0.harness.build import commit0.harness.setup +import copy import sys +import hydra +from hydra.core.config_store import ConfigStore +from commit0.configs.config_class import Commit0Config def main() -> None: command = sys.argv[1] + # type check config values + cs = ConfigStore.instance() + cs.store(name="base", node=Commit0Config) + # have hydra to ignore all command-line arguments + sys_argv = copy.deepcopy(sys.argv) + sys.argv = [sys.argv[0]] + hydra.initialize(version_base=None, config_path="configs") + config = hydra.compose(config_name="base") + # after hydra gets all configs, put command-line arguments back + sys.argv = sys_argv if command == "clone": - commit0.harness.setup.main() + commit0.harness.setup.main( + config.dataset_name, config.dataset_split, config.base_dir + ) elif command == "build": - commit0.harness.build.main() + commit0.harness.build.main( + config.dataset_name, config.dataset_split, config.num_workers + ) elif command == "test": - commit0.harness.run_pytest_ids.main() + repo = sys.argv[2] + test_ids = sys.argv[3] + commit0.harness.run_pytest_ids.main( + config.dataset_name, + config.dataset_split, + config.base_dir, + repo, + config.branch, + test_ids, + config.backend, + config.timeout, + ) if __name__ == "__main__": diff --git a/commit0/configs/base.yaml b/commit0/configs/base.yaml new file mode 100644 index 0000000..ac56315 --- /dev/null +++ b/commit0/configs/base.yaml @@ -0,0 +1,18 @@ +defaults: + - _self_ + +# shared in all steps +dataset_name: wentingzhao/commit0_docstring +dataset_split: test + +# clone related +base_dir: repos/ + +# build related +build: all +num_workers: 8 + +# test related +backend: local +branch: ai +timeout: 1_800 diff --git a/commit0/configs/config_class.py b/commit0/configs/config_class.py new file mode 100644 index 0000000..1779878 --- /dev/null +++ b/commit0/configs/config_class.py @@ -0,0 +1,23 @@ +from dataclasses import dataclass + + +@dataclass +class Commit0Config: + # shared in all steps + dataset_name: str + dataset_split: str + + # clone related + base_dir: str + + # build related + # which repo to build, all or one repo + build: str + num_workers: int + + # test related + backend: str + # which branch to work on + branch: str + # timeout for running pytest + timeout: int diff --git a/commit0/harness/build.py b/commit0/harness/build.py index 66654a7..0c0e1bf 100644 --- a/commit0/harness/build.py +++ b/commit0/harness/build.py @@ -4,11 +4,9 @@ from datasets import load_dataset from typing import Iterator -from omegaconf import DictConfig from commit0.harness.docker_build import build_repo_images from commit0.harness.spec import make_spec from commit0.harness.constants import RepoInstance -import hydra logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" @@ -16,16 +14,15 @@ logger = logging.getLogger(__name__) -@hydra.main(version_base=None, config_path="configs", config_name="base") -def main(config: DictConfig) -> None: - dataset: Iterator[RepoInstance] = load_dataset(hf_name, split="test") # type: ignore +def main(dataset_name: str, dataset_split: str, num_workers: int) -> None: + dataset: Iterator[RepoInstance] = load_dataset(dataset_name, split=dataset_split) # type: ignore specs = [] for example in dataset: spec = make_spec(example) specs.append(spec) client = docker.from_env() - build_repo_images(client, specs) + build_repo_images(client, specs, num_workers) logger.info("Done building docker images") diff --git a/commit0/harness/constants.py b/commit0/harness/constants.py index c43ae42..af51c71 100644 --- a/commit0/harness/constants.py +++ b/commit0/harness/constants.py @@ -1,6 +1,6 @@ from enum import Enum from pathlib import Path -from typing import TypedDict +from typing import Dict, TypedDict class RepoInstance(TypedDict): @@ -8,7 +8,7 @@ class RepoInstance(TypedDict): base_commit: str reference_commit: str setup: dict - test: str + test: Dict[str, str] # Constants - Evaluation Log Directories diff --git a/commit0/harness/docker_build.py b/commit0/harness/docker_build.py index 0896cf3..dea9055 100644 --- a/commit0/harness/docker_build.py +++ b/commit0/harness/docker_build.py @@ -119,24 +119,14 @@ def build_image( ) # Log the build process continuously - buildlog = "" for chunk in response: if "stream" in chunk: # Remove ANSI escape sequences from the log chunk_stream = ansi_escape.sub("", chunk["stream"]) logger.info(chunk_stream.strip()) - buildlog += chunk_stream - elif "errorDetail" in chunk: - # Decode error message, raise BuildError - logger.error( - f"Error: {ansi_escape.sub('', chunk['errorDetail']['message'])}" - ) - raise docker.errors.BuildError( - chunk["errorDetail"]["message"], buildlog - ) logger.info("Image built successfully!") - except docker.errors.BuildError as e: - logger.error(f"docker.errors.BuildError during {image_name}: {e}") + except docker.errors.APIError as e: + logger.error(f"docker.errors.APIError during {image_name}: {e}") raise BuildImageError(image_name, str(e), logger) from e except Exception as e: logger.error(f"Error building image {image_name}: {e}") diff --git a/commit0/harness/docker_utils.py b/commit0/harness/docker_utils.py index 092bf8c..716b5e0 100644 --- a/commit0/harness/docker_utils.py +++ b/commit0/harness/docker_utils.py @@ -11,8 +11,8 @@ from pathlib import Path from io import BytesIO from typing import Optional, List, Union -import docker.errors +import docker.errors from docker.models.containers import Container HEREDOC_DELIMITER = "EOF_1399519320" # different from dataset HEREDOC_DELIMITERs! @@ -330,7 +330,6 @@ def log_error(x: str) -> None: def log_info(x: str) -> None: print(x) - raise_error = True elif logger == "quiet": # if logger is "quiet", don't print anything def log_info(x: str) -> None: @@ -386,20 +385,20 @@ def exec_run_with_timeout( # Local variables to store the result of executing the command exec_result = "" exec_id = None - exception = None timed_out = False # Wrapper function to run the command def run_command() -> None: - nonlocal exec_result, exec_id, exception + nonlocal exec_result, exec_id try: - assert container.client is not None, "Client did not load" - exec_id = container.client.api.exec_create(container.id, cmd)["Id"] - exec_stream = container.client.api.exec_start(exec_id, stream=True) + exec_id = container.client.api.exec_create(container=container.id, cmd=cmd)[ # pyright: ignore + "Id" + ] + exec_stream = container.client.api.exec_start(exec_id=exec_id, stream=True) # pyright: ignore for chunk in exec_stream: exec_result += chunk.decode("utf-8", errors="replace") - except Exception as e: - exception = e + except docker.errors.APIError as e: + raise Exception(f"Container {container.id} cannot execute {cmd}.\n{str(e)}") # Start the command in a separate thread thread = threading.Thread(target=run_command) @@ -407,13 +406,10 @@ def run_command() -> None: thread.start() thread.join(timeout) - if exception: - raise exception - # If the thread is still alive, the command timed out if thread.is_alive(): if exec_id is not None: - exec_pid = container.client.api.exec_inspect(exec_id)["Pid"] + exec_pid = container.client.api.exec_inspect(exec_id=exec_id)["Pid"] # pyright: ignore container.exec_run(f"kill -TERM {exec_pid}", detach=True) timed_out = True end_time = time.time() diff --git a/commit0/harness/run_pytest_ids.py b/commit0/harness/run_pytest_ids.py index ad21578..c51b986 100644 --- a/commit0/harness/run_pytest_ids.py +++ b/commit0/harness/run_pytest_ids.py @@ -6,10 +6,8 @@ from pathlib import Path import logging -from omegaconf import DictConfig, OmegaConf -import hydra - -from commit0.harness.constants import RUN_PYTEST_LOG_DIR +from typing import Iterator +from commit0.harness.constants import RUN_PYTEST_LOG_DIR, RepoInstance from commit0.harness.docker_build import ( close_logger, setup_logger, @@ -196,39 +194,46 @@ def run_modal( ) -@hydra.main(version_base=None, config_path="configs", config_name="base") -def main(config: DictConfig) -> None: - OmegaConf.to_yaml(config) - dataset = load_dataset(config.dataset_name, split="test") +def main( + dataset_name: str, + dataset_split: str, + base_dir: str, + repo: str, + branch: str, + test_ids: str, + backend: str, + timeout: int, +) -> None: + dataset: Iterator[RepoInstance] = load_dataset(dataset_name, split=dataset_split) # type: ignore spec = None for example in dataset: - if example["repo"].endswith(config.repo): + if example["repo"].endswith(repo): spec = make_spec(example) break assert spec is not None, "No spec available" - hashed_test_ids = get_hash_string(config.test_ids) + hashed_test_ids = get_hash_string(test_ids) # set up logging - log_dir = RUN_PYTEST_LOG_DIR / config.repo / hashed_test_ids + log_dir = RUN_PYTEST_LOG_DIR / repo / hashed_test_ids log_dir.mkdir(parents=True, exist_ok=True) log_file = log_dir / "run_pytest.log" - logger = setup_logger(config.repo, log_file) + logger = setup_logger(repo, log_file) # make eval file eval_script = spec.eval_script.format( - local_repo=f"{config.base_dir}/{config.repo}", - branch_name=config.branch, - test_ids=config.test_ids, - ip=get_ip(config.backend), + local_repo=f"{base_dir}/{repo}", + branch_name=branch, + test_ids=test_ids, + ip=get_ip(backend), user=get_user(), ) eval_file = Path(log_dir / "eval.sh") eval_file.write_text(eval_script) - if ExecutionBackend(config.backend) == ExecutionBackend.LOCAL: - run_docker(spec, logger, eval_file, config.timeout, log_dir) - elif ExecutionBackend(config.backend) == ExecutionBackend.MODAL: - run_modal(spec, logger, eval_file, config.timeout, log_dir) + if ExecutionBackend(backend) == ExecutionBackend.LOCAL: + run_docker(spec, logger, eval_file, timeout, log_dir) + elif ExecutionBackend(backend) == ExecutionBackend.MODAL: + run_modal(spec, logger, eval_file, timeout, log_dir) __all__ = [] diff --git a/commit0/harness/setup.py b/commit0/harness/setup.py index eaa2ff3..659fa0b 100644 --- a/commit0/harness/setup.py +++ b/commit0/harness/setup.py @@ -2,9 +2,7 @@ import os import docker -import hydra from datasets import load_dataset -from omegaconf import DictConfig from typing import Iterator from commit0.harness.utils import clone_repo @@ -19,9 +17,8 @@ logger = logging.getLogger(__name__) -@hydra.main(version_base=None, config_path="configs", config_name="base") -def main(config: DictConfig) -> None: - dataset: Iterator[RepoInstance] = load_dataset(hf_name, split="test") # type: ignore +def main(dataset_name: str, dataset_split: str, base_dir: str) -> None: + dataset: Iterator[RepoInstance] = load_dataset(dataset_name, split=dataset_split) # type: ignore out = dict() specs = [] for example in dataset: