Skip to content

Commit

Permalink
Merge pull request #86 from monarch-initiative/67-include-prs-and-the…
Browse files Browse the repository at this point in the history
…ir-comments-in-gh-issue-view

new `PullRequest` class in `src/curate_gpt/wrappers/general/github_wrapper.py`
  • Loading branch information
cmungall authored Sep 12, 2024
2 parents af6fa60 + f8dfb20 commit d549679
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 9 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
.idea/
*.sqlite

*.pem
db/
proddb/
Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -118,4 +118,8 @@ load-github-maxo:
list:
$(CURATE) collections list -p $(DB_PATH)

load-github-mixs:
$(CURATE) -v view index -p $(DB_PATH) -c gh_mixs -m openai: --view github --init-with "{repo: GenomicsStandardsConsortium/mixs}"

load-github-nmdc-schema-issues-prs:
$(CURATE) -v view index -p $(DB_PATH) -c gh_nmdc -m openai: --view github --init-with "{repo: microbiomedata/nmdc-schema}"
59 changes: 50 additions & 9 deletions src/curate_gpt/wrappers/general/github_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,20 @@ class Comment(BaseModel):
body: str = None


class PullRequest(BaseModel):
model_config = ConfigDict(protected_namespaces=())
id: str
number: int = None
title: str = None
user: str = None
labels: List[str] = None
state: str = None
assignees: List[str] = None
created_at: str = None
body: str = None
comments: List[Comment] = None


class Issue(BaseModel):
model_config = ConfigDict(protected_namespaces=())
id: str
Expand All @@ -38,6 +52,18 @@ class Issue(BaseModel):
comments: List[Comment] = None


def pr_comments(self, pr_number: str) -> Iterator[Dict]:
session = self.session
url = f"https://api.github.com/repos/{self.owner}/{self.repo}/pulls/{pr_number}/comments"
params = {"per_page": 100}

while url:
response = session.get(url, headers=self.headers, params=params)
response.raise_for_status()
yield from response.json()
url = response.links.get("next", {}).get("url")


def get_token(token: str = None) -> Optional[str]:
if token:
return token
Expand Down Expand Up @@ -101,7 +127,7 @@ def repo_description(self) -> str:
return self._repo_description

def external_search(
self, text: str, expand: bool = True, limit=None, token: str = None, **kwargs
self, text: str, expand: bool = True, limit=None, token: str = None, **kwargs
) -> List[Dict]:
token = get_token(token)
if limit is None:
Expand Down Expand Up @@ -146,11 +172,11 @@ def external_search(
return all_issues

def objects(
self,
collection: str = None,
object_ids: Optional[Iterable[str]] = None,
token: str = None,
**kwargs,
self,
collection: str = None,
object_ids: Optional[Iterable[str]] = None,
token: str = None,
**kwargs,
) -> Iterator[Dict]:
session = self.session
token = get_token(token)
Expand All @@ -161,7 +187,7 @@ def objects(
sleep(5)
logger.debug(f"Header: {headers}")
params = {
"state": "all", # To fetch both open and closed issues
"state": "all", # To fetch both open and closed issues and PRs
"per_page": 100, # Fetch 100 results per page (max allowed)
}

Expand All @@ -171,10 +197,14 @@ def objects(
issues = response.json()
for issue in issues:
issue_number = issue.get("number")
issue["comments"] = list(self.issue_comments(issue_number))
# Fetch both issue comments and PR comments
if "pull_request" in issue:
issue["comments"] = list(self.pr_comments(issue_number))
else:
issue["comments"] = list(self.issue_comments(issue_number))
issue_obj = self.transform_issue(issue)
yield issue_obj.dict()
# Check if there are more pages to process
# Check if there are more pages to process
url = response.links.get("next", {}).get("url")
if not response.from_cache:
sleep(0.2)
Expand Down Expand Up @@ -219,3 +249,14 @@ def transform_issue(self, obj: Dict) -> Issue:
],
)
return issue

def pr_comments(self, pr_number: str) -> Iterator[Dict]:
session = self.session
url = f"https://api.github.com/repos/{self.owner}/{self.repo}/pulls/{pr_number}/comments"
params = {"per_page": 100}

while url:
response = session.get(url, headers=self.headers, params=params)
response.raise_for_status()
yield from response.json()
url = response.links.get("next", {}).get("url")

0 comments on commit d549679

Please sign in to comment.