Skip to content

Commit a17d06b

Browse files
authored
Merge pull request #16 from commit-0/checks
GitHub actions for build pipeline
2 parents 8c39bfa + 2c079c3 commit a17d06b

File tree

10 files changed

+168
-35
lines changed

10 files changed

+168
-35
lines changed

.github/workflows/system.yml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
name: system
2+
3+
on:
4+
pull_request:
5+
push:
6+
branches: [main]
7+
8+
jobs:
9+
system:
10+
runs-on: ubuntu-latest
11+
steps:
12+
- uses: actions/checkout@v3
13+
- name: Install uv
14+
uses: astral-sh/setup-uv@v2
15+
- name: Set up Python
16+
run: uv venv --python 3.12
17+
- name: Set up Docker
18+
uses: docker/setup-buildx-action@v3
19+
- name: Install the project
20+
run: uv sync
21+
- name: Clone
22+
run: uv run commit0 clone simpy
23+
- name: Setup
24+
run: uv run commit0 build simpy
25+
- name: Test
26+
run: uv run commit0 test-reference simpy tests/test_event.py::test_succeed

commit0/__main__.py

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,35 +3,58 @@
33
import commit0.harness.setup
44
import copy
55
import sys
6+
import os
67
import hydra
78
from hydra.core.config_store import ConfigStore
89
from commit0.configs.config_class import Commit0Config
10+
from commit0.harness.constants import COMMANDS, SPLIT
911

1012

1113
def main() -> None:
1214
command = sys.argv[1]
15+
if command not in COMMANDS:
16+
raise ValueError(
17+
f"command must be from {', '.join(COMMANDS)}, but you provided {command}"
18+
)
1319
# type check config values
1420
cs = ConfigStore.instance()
15-
cs.store(name="base", node=Commit0Config)
21+
cs.store(name="user", node=Commit0Config)
1622
# have hydra to ignore all command-line arguments
1723
sys_argv = copy.deepcopy(sys.argv)
1824
sys.argv = [sys.argv[0]]
1925
hydra.initialize(version_base=None, config_path="configs")
20-
config = hydra.compose(config_name="base")
26+
config = hydra.compose(config_name="user")
2127
# after hydra gets all configs, put command-line arguments back
2228
sys.argv = sys_argv
29+
# repo_split: split from command line has a higher priority than split in hydra
30+
if command in ["clone", "build"]:
31+
if len(sys.argv) == 3:
32+
if sys.argv[2] not in SPLIT:
33+
raise ValueError(
34+
f"repo split must be from {', '.join(SPLIT.keys())}, but you provided {sys.argv[2]}"
35+
)
36+
config.repo_split = sys.argv[2]
37+
config.base_dir = os.path.abspath(config.base_dir)
2338

2439
if command == "clone":
2540
commit0.harness.setup.main(
26-
config.dataset_name, config.dataset_split, config.base_dir
41+
config.dataset_name,
42+
config.dataset_split,
43+
config.repo_split,
44+
config.base_dir,
2745
)
2846
elif command == "build":
2947
commit0.harness.build.main(
30-
config.dataset_name, config.dataset_split, config.num_workers
48+
config.dataset_name,
49+
config.dataset_split,
50+
config.repo_split,
51+
config.num_workers,
3152
)
32-
elif command == "test":
53+
elif command == "test" or command == "test-reference":
3354
repo = sys.argv[2]
3455
test_ids = sys.argv[3]
56+
if command == "test-reference":
57+
config.branch = "reference"
3558
commit0.harness.run_pytest_ids.main(
3659
config.dataset_name,
3760
config.dataset_split,

commit0/configs/base.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ dataset_split: test
77

88
# clone related
99
base_dir: repos/
10+
repo_split: all
1011

1112
# build related
12-
build: all
1313
num_workers: 8
1414

1515
# test related

commit0/configs/config_class.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@ class Commit0Config:
99

1010
# clone related
1111
base_dir: str
12+
repo_split: str
1213

1314
# build related
1415
# which repo to build, all or one repo
15-
build: str
1616
num_workers: int
1717

1818
# test related

commit0/configs/user.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
defaults:
2+
- base
3+
- _self_

commit0/harness/build.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,24 +6,28 @@
66

77
from commit0.harness.docker_build import build_repo_images
88
from commit0.harness.spec import make_spec
9-
from commit0.harness.constants import RepoInstance
9+
from commit0.harness.constants import RepoInstance, SPLIT
1010

1111
logging.basicConfig(
1212
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
1313
)
1414
logger = logging.getLogger(__name__)
1515

1616

17-
def main(dataset_name: str, dataset_split: str, num_workers: int) -> None:
17+
def main(
18+
dataset_name: str, dataset_split: str, repo_split: str, num_workers: int
19+
) -> None:
1820
dataset: Iterator[RepoInstance] = load_dataset(dataset_name, split=dataset_split) # type: ignore
1921
specs = []
2022
for example in dataset:
23+
repo_name = example["repo"].split("/")[-1]
24+
if repo_split != "all" and repo_name not in SPLIT[repo_split]:
25+
continue
2126
spec = make_spec(example)
2227
specs.append(spec)
2328

2429
client = docker.from_env()
2530
build_repo_images(client, specs, num_workers)
26-
logger.info("Done building docker images")
2731

2832

2933
__all__ = []

commit0/harness/constants.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,88 @@ class RepoInstance(TypedDict):
2525
# Evaluation backends
2626
EVAL_BACKENDS = ["local", "modal"]
2727

28+
# available commands
29+
COMMANDS = ["clone", "build", "test", "test-reference"]
30+
# repo splits
31+
SPLIT_MINITORCH = ["minitorch"]
32+
SPLIT_SIMPY = ["simpy"]
33+
SPLIT_LITE = [
34+
"tinydb",
35+
"simpy",
36+
"deprecated",
37+
"wcwidth",
38+
"voluptuous",
39+
"cachetools",
40+
"imapclient",
41+
"marshmallow",
42+
"jinja",
43+
"cookiecutter",
44+
]
45+
SPLIT_ALL = [
46+
"statsmodels",
47+
"python-progressbar",
48+
"xarray",
49+
"imbalanced-learn",
50+
"web3.py",
51+
"scrapy",
52+
"seaborn",
53+
"pypdf",
54+
"pexpect",
55+
"pytest",
56+
"pylint",
57+
"joblib",
58+
"dulwich",
59+
"virtualenv",
60+
"minitorch",
61+
"networkx",
62+
"requests",
63+
"sphinx",
64+
"jedi",
65+
"moviepy",
66+
"loguru",
67+
"paramiko",
68+
"geopandas",
69+
"bitstring",
70+
"fastapi",
71+
"chardet",
72+
"tornado",
73+
"python-prompt-toolkit",
74+
"attrs",
75+
"PyBoy",
76+
"pydantic",
77+
"filesystem_spec",
78+
"tlslite-ng",
79+
"graphene",
80+
"mimesis",
81+
"babel",
82+
"dnspython",
83+
"portalocker," "cookiecutter",
84+
"pyjwt",
85+
"python-rsa",
86+
"more-itertools",
87+
"simpy",
88+
"click",
89+
"fabric",
90+
"jinja",
91+
"flask",
92+
"sqlparse",
93+
"marshmallow",
94+
"imapclient",
95+
"tinydb",
96+
"cachetools",
97+
"voluptuous",
98+
"parsel",
99+
"wcwidth",
100+
"deprecated",
101+
]
102+
103+
SPLIT = {
104+
"all": SPLIT_ALL,
105+
"minitorch": SPLIT_MINITORCH,
106+
"simpy": SPLIT_SIMPY,
107+
"lite": SPLIT_LITE,
108+
}
109+
28110

29111
class ResolvedStatus(Enum):
30112
NO = "RESOLVED_NO"

commit0/harness/run_pytest_ids.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import logging
88

99
from typing import Iterator
10+
from git import Repo
1011
from commit0.harness.constants import RUN_PYTEST_LOG_DIR, RepoInstance
1112
from commit0.harness.docker_build import (
1213
close_logger,
@@ -206,11 +207,13 @@ def main(
206207
) -> None:
207208
dataset: Iterator[RepoInstance] = load_dataset(dataset_name, split=dataset_split) # type: ignore
208209
spec = None
210+
example = None
209211
for example in dataset:
210212
if example["repo"].endswith(repo):
211213
spec = make_spec(example)
212214
break
213215
assert spec is not None, "No spec available"
216+
assert example is not None, "No example available"
214217

215218
hashed_test_ids = get_hash_string(test_ids)
216219
# set up logging
@@ -219,10 +222,17 @@ def main(
219222
log_file = log_dir / "run_pytest.log"
220223
logger = setup_logger(repo, log_file)
221224

225+
if branch == "reference":
226+
commit_id = example["reference_commit"]
227+
else:
228+
local_repo = Repo(f"{base_dir}/{repo}")
229+
local_branch = local_repo.branches[branch]
230+
commit_id = local_branch.commit.hexsha
231+
222232
# make eval file
223233
eval_script = spec.eval_script.format(
224234
local_repo=f"{base_dir}/{repo}",
225-
branch_name=branch,
235+
commit_id=commit_id,
226236
test_ids=test_ids,
227237
ip=get_ip(backend),
228238
user=get_user(),

commit0/harness/setup.py

Lines changed: 6 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,11 @@
11
import logging
22
import os
33

4-
import docker
54
from datasets import load_dataset
65

76
from typing import Iterator
87
from commit0.harness.utils import clone_repo
9-
from commit0.harness.constants import REPO_IMAGE_BUILD_DIR, RepoInstance
10-
from commit0.harness.docker_build import build_repo_images
11-
from commit0.harness.spec import make_spec
8+
from commit0.harness.constants import RepoInstance, SPLIT
129

1310

1411
logging.basicConfig(
@@ -17,28 +14,15 @@
1714
logger = logging.getLogger(__name__)
1815

1916

20-
def main(dataset_name: str, dataset_split: str, base_dir: str) -> None:
17+
def main(dataset_name: str, dataset_split: str, repo_split: str, base_dir: str) -> None:
2118
dataset: Iterator[RepoInstance] = load_dataset(dataset_name, split=dataset_split) # type: ignore
22-
out = dict()
23-
specs = []
2419
for example in dataset:
25-
spec = make_spec(example)
26-
specs.append(spec)
2720
repo_name = example["repo"].split("/")[-1]
28-
out[repo_name] = example
29-
out[repo_name]["local_path"] = os.path.abspath(
30-
os.path.join(base_dir, repo_name)
31-
)
21+
if repo_split != "all" and repo_name not in SPLIT[repo_split]:
22+
continue
3223
clone_url = f"https://github.com/{example['repo']}.git"
33-
clone_repo(
34-
clone_url, out[repo_name]["local_path"], example["base_commit"], logger
35-
)
36-
37-
logger.info("Start building docker images")
38-
logger.info(f"Please check {REPO_IMAGE_BUILD_DIR} for build details")
39-
client = docker.from_env()
40-
build_repo_images(client, specs)
41-
logger.info("Done building docker images")
24+
clone_dir = os.path.abspath(os.path.join(base_dir, repo_name))
25+
clone_repo(clone_url, clone_dir, example["base_commit"], logger)
4226

4327

4428
__all__ = []

commit0/harness/spec.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,11 +154,12 @@ def make_eval_script_list(instance: RepoInstance, repo_directory: str) -> list[s
154154
"source .venv/bin/activate",
155155
f"git remote add {origin_name} ssh://{{user}}@{{ip}}:{{local_repo}}",
156156
f"git fetch {origin_name}",
157-
f"git checkout -b {{branch_name}} {origin_name}/{{branch_name}}",
157+
"git checkout {commit_id}",
158158
"git status",
159159
f"{instance['test']['test_cmd']} --json-report --json-report-file=report.json {{test_ids}}",
160160
f"git checkout {instance['base_commit']}",
161-
f"git remote remove {origin_name}" "git status",
161+
f"git remote remove {origin_name}",
162+
"git status",
162163
]
163164
return eval_script_list
164165

0 commit comments

Comments
 (0)