-
Notifications
You must be signed in to change notification settings - Fork 5
/
postfilter_lszz.py
58 lines (40 loc) · 1.68 KB
/
postfilter_lszz.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import json
import logging as log
import os
import sys
from szz.l_szz import LSZZ
from typing import List
log.basicConfig(level=log.INFO, format='%(asctime)s :: %(funcName)s - %(levelname)s :: %(message)s')
if len(sys.argv) < 3:
print("Usage: {} <results_folder> <repos_folder>".format(sys.argv[0]))
sys.exit(1)
RESULTS_FOLDER = sys.argv[1]
REPOS_FOLDER = sys.argv[2]
SUFFIX = ".lszz.json"
def select_largest_commit(repo_name: str, bic: List[str]) -> List[str]:
# using test:test as git login to skip private repos during clone
repo_url = f'https://test:[email protected]/{repo_name}.git'
szz = LSZZ(repo_name, repo_url, REPOS_FOLDER)
bic = {szz.repository.commit(c) for c in bic}
bic_new = list()
largest = szz.select_largest_commit(bic)
if largest:
bic_new.append(largest.hexsha)
return bic_new
def main():
for f in os.listdir(RESULTS_FOLDER):
if f.endswith(".json") and not f.endswith(SUFFIX):
log.info(f)
bugfix_commits_new = list()
with open(os.path.join(RESULTS_FOLDER, f), "r") as infile:
bugfix_commits = json.load(infile)
for bfc in bugfix_commits:
log.info("Processing {} {}".format(bfc["repo_name"], bfc["fix_commit_hash"]))
repo_name = bfc["repo_name"]
bfc["inducing_commit_hash"] = select_largest_commit(repo_name, bfc["inducing_commit_hash"])
bugfix_commits_new.append(bfc)
with open(os.path.join(RESULTS_FOLDER, f.replace(".json", SUFFIX)), "w") as outfile:
json.dump(bugfix_commits_new, outfile)
if __name__ == "__main__":
main()
log.info("Done!")