From ec346106c399256529db6ac9b534c0e8a48019ab Mon Sep 17 00:00:00 2001 From: "Mark A. Miller" Date: Thu, 12 Sep 2024 12:17:36 -0400 Subject: [PATCH 1/2] new PR class in github_wrapper --- .gitignore | 3 + Makefile | 7 ++- .../wrappers/general/github_wrapper.py | 59 ++++++++++++++++--- 3 files changed, 59 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index a8f9868..bd8763b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +.idea/ +*.sqlite + *.pem db/ proddb/ diff --git a/Makefile b/Makefile index 667f7af..c22b7f9 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,8 @@ RUN = poetry run CURATE = $(RUN) curategpt -DB_PATH = stagedb +#DB_PATH = stagedb +DB_PATH = db ONTS = cl uberon obi go envo hp mp mondo po to oba agro fbbt nbo chebi vo peco maxo TRACKERS = cl uberon obi envo hp mondo go @@ -118,4 +119,8 @@ load-github-maxo: list: $(CURATE) collections list -p $(DB_PATH) +load-github-mixs: + $(CURATE) -v view index -p $(DB_PATH) -c gh_mixs -m openai: --view github --init-with "{repo: GenomicsStandardsConsortium/mixs}" +load-github-nmdc-schema-issues-prs: + $(CURATE) -v view index -p $(DB_PATH) -c gh_nmdc -m openai: --view github --init-with "{repo: microbiomedata/nmdc-schema}" diff --git a/src/curate_gpt/wrappers/general/github_wrapper.py b/src/curate_gpt/wrappers/general/github_wrapper.py index 12c4a6f..25aa0c7 100644 --- a/src/curate_gpt/wrappers/general/github_wrapper.py +++ b/src/curate_gpt/wrappers/general/github_wrapper.py @@ -22,6 +22,20 @@ class Comment(BaseModel): body: str = None +class PullRequest(BaseModel): + model_config = ConfigDict(protected_namespaces=()) + id: str + number: int = None + title: str = None + user: str = None + labels: List[str] = None + state: str = None + assignees: List[str] = None + created_at: str = None + body: str = None + comments: List[Comment] = None + + class Issue(BaseModel): model_config = ConfigDict(protected_namespaces=()) id: str @@ -38,6 +52,18 @@ class Issue(BaseModel): comments: List[Comment] = None +def pr_comments(self, pr_number: str) -> Iterator[Dict]: + session = self.session + url = f"https://api.github.com/repos/{self.owner}/{self.repo}/pulls/{pr_number}/comments" + params = {"per_page": 100} + + while url: + response = session.get(url, headers=self.headers, params=params) + response.raise_for_status() + yield from response.json() + url = response.links.get("next", {}).get("url") + + def get_token(token: str = None) -> Optional[str]: if token: return token @@ -101,7 +127,7 @@ def repo_description(self) -> str: return self._repo_description def external_search( - self, text: str, expand: bool = True, limit=None, token: str = None, **kwargs + self, text: str, expand: bool = True, limit=None, token: str = None, **kwargs ) -> List[Dict]: token = get_token(token) if limit is None: @@ -146,11 +172,11 @@ def external_search( return all_issues def objects( - self, - collection: str = None, - object_ids: Optional[Iterable[str]] = None, - token: str = None, - **kwargs, + self, + collection: str = None, + object_ids: Optional[Iterable[str]] = None, + token: str = None, + **kwargs, ) -> Iterator[Dict]: session = self.session token = get_token(token) @@ -161,7 +187,7 @@ def objects( sleep(5) logger.debug(f"Header: {headers}") params = { - "state": "all", # To fetch both open and closed issues + "state": "all", # To fetch both open and closed issues and PRs "per_page": 100, # Fetch 100 results per page (max allowed) } @@ -171,10 +197,14 @@ def objects( issues = response.json() for issue in issues: issue_number = issue.get("number") - issue["comments"] = list(self.issue_comments(issue_number)) + # Fetch both issue comments and PR comments + if "pull_request" in issue: + issue["comments"] = list(self.pr_comments(issue_number)) + else: + issue["comments"] = list(self.issue_comments(issue_number)) issue_obj = self.transform_issue(issue) yield issue_obj.dict() - # Check if there are more pages to process + # Check if there are more pages to process url = response.links.get("next", {}).get("url") if not response.from_cache: sleep(0.2) @@ -219,3 +249,14 @@ def transform_issue(self, obj: Dict) -> Issue: ], ) return issue + + def pr_comments(self, pr_number: str) -> Iterator[Dict]: + session = self.session + url = f"https://api.github.com/repos/{self.owner}/{self.repo}/pulls/{pr_number}/comments" + params = {"per_page": 100} + + while url: + response = session.get(url, headers=self.headers, params=params) + response.raise_for_status() + yield from response.json() + url = response.links.get("next", {}).get("url") From f8dfb20a0a0b6ac23c2d97fcd96083502e7440ff Mon Sep 17 00:00:00 2001 From: "Mark A. Miller" Date: Thu, 12 Sep 2024 12:18:55 -0400 Subject: [PATCH 2/2] revert DB_PATH --- Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Makefile b/Makefile index c22b7f9..c5fa393 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,7 @@ RUN = poetry run CURATE = $(RUN) curategpt -#DB_PATH = stagedb -DB_PATH = db +DB_PATH = stagedb ONTS = cl uberon obi go envo hp mp mondo po to oba agro fbbt nbo chebi vo peco maxo TRACKERS = cl uberon obi envo hp mondo go