-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Functional to OOP refactor for fetch_issues method #105
base: main
Are you sure you want to change the base?
Changes from 8 commits
be0fb2d
fe6884d
9ee4747
6b12419
549699b
3278cd5
4d2f753
ef4da20
db159cc
4d6e3f6
1cbbf69
06fa34e
01fed3d
3dc679d
fe72a60
cd9d8b6
345805b
f290eb8
187cfa5
90fae80
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,9 @@ | ||
# flake8: noqa | ||
from .comments import fetch_comments | ||
from .commit import fetch_commits | ||
from .issues import fetch_issues | ||
from .issues import GithubIssueExtraction | ||
from .pull_requests import fetch_pull_requests | ||
|
||
|
||
class GithubExtraction(GithubIssueExtraction): | ||
pass |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -2,96 +2,131 @@ | |||||
|
||||||
import neo4j | ||||||
from github.neo4j_storage.neo4j_connection import Neo4jConnection | ||||||
from hivemind_etl_helpers.src.db.github.schema import GitHubIssue | ||||||
|
||||||
|
||||||
def fetch_raw_issues( | ||||||
repository_id: list[int], | ||||||
from_date: datetime | None = None, | ||||||
) -> list[neo4j._data.Record]: | ||||||
""" | ||||||
fetch raw issues from data dump in neo4j | ||||||
|
||||||
Parameters | ||||||
------------ | ||||||
repository_id : list[int] | ||||||
a list of repository id to fetch their issues | ||||||
from_date : datetime | None | ||||||
get the issues form a specific date that they were created | ||||||
defualt is `None`, meaning to apply no filtering on data | ||||||
|
||||||
Returns | ||||||
-------- | ||||||
raw_records : list[neo4j._data.Record] | ||||||
list of neo4j records as the extracted issues | ||||||
""" | ||||||
neo4j_connection = Neo4jConnection() | ||||||
neo4j_driver = neo4j_connection.connect_neo4j() | ||||||
query = """MATCH (i:Issue)<-[:CREATED]-(user:GitHubUser) | ||||||
WHERE | ||||||
i.repository_id IN $repoIds | ||||||
""" | ||||||
if from_date is not None: | ||||||
query += "AND datetime(i.updated_at) >= datetime($from_date)" | ||||||
|
||||||
query += """ | ||||||
MATCH (repo:Repository {id: i.repository_id}) | ||||||
RETURN | ||||||
user.login as author_name, | ||||||
i.id as id, | ||||||
i.title as title, | ||||||
i.body as text, | ||||||
i.state as state, | ||||||
i.state_reason as state_reason, | ||||||
i.created_at as created_at, | ||||||
i.updated_at as updated_at, | ||||||
i.closed_at as closed_at, | ||||||
i.latestSavedAt as latest_saved_at, | ||||||
i.html_url as url, | ||||||
i.repository_id as repository_id, | ||||||
repo.full_name as repository_name | ||||||
ORDER BY datetime(created_at) | ||||||
""" | ||||||
|
||||||
def _exec_query(tx, repoIds, from_date): | ||||||
result = tx.run(query, repoIds=repoIds, from_date=from_date) | ||||||
return list(result) | ||||||
|
||||||
with neo4j_driver.session() as session: | ||||||
raw_records = session.execute_read( | ||||||
_exec_query, | ||||||
repoIds=repository_id, | ||||||
from_date=from_date, | ||||||
) | ||||||
|
||||||
return raw_records | ||||||
|
||||||
|
||||||
def fetch_issues( | ||||||
repository_id: list[int], | ||||||
from_date: datetime | None = None, | ||||||
) -> list[GitHubIssue]: | ||||||
""" | ||||||
fetch issues from data dump in neo4j | ||||||
|
||||||
Parameters | ||||||
------------ | ||||||
repository_id : list[int] | ||||||
a list of repository id to fetch their issues | ||||||
from_date : datetime | None | ||||||
get the issues form a specific date that they were created | ||||||
defualt is `None`, meaning to apply no filtering on data | ||||||
|
||||||
Returns | ||||||
-------- | ||||||
github_issues : list[GitHubIssue] | ||||||
list of neo4j records as the extracted issues | ||||||
""" | ||||||
records = fetch_raw_issues(repository_id, from_date) | ||||||
|
||||||
github_issues: list[GitHubIssue] = [] | ||||||
for record in records: | ||||||
issue = GitHubIssue.from_dict(record) | ||||||
github_issues.append(issue) | ||||||
|
||||||
return github_issues | ||||||
from hivemind_etl_helpers.src.db.github.schema import GitHubIssue, GitHubIssueID | ||||||
|
||||||
|
||||||
class GithubIssueExtraction: | ||||||
def __init__(self): | ||||||
pass | ||||||
|
||||||
def __fetch_raw_issues( | ||||||
self, | ||||||
repository_id: list[int], | ||||||
from_date: datetime | None = None, | ||||||
) -> list[neo4j._data.Record]: | ||||||
""" | ||||||
fetch raw issues from data dump in neo4j | ||||||
|
||||||
Parameters | ||||||
------------ | ||||||
repository_id : list[int] | ||||||
a list of repository id to fetch their issues | ||||||
from_date : datetime | None | ||||||
get the issues form a specific date that they were created | ||||||
default is `None`, meaning to apply no filtering on data | ||||||
|
||||||
Returns | ||||||
-------- | ||||||
raw_records : list[neo4j._data.Record] | ||||||
list of neo4j records as the extracted issues | ||||||
""" | ||||||
neo4j_connection = Neo4jConnection() | ||||||
neo4j_driver = neo4j_connection.connect_neo4j() | ||||||
query = """MATCH (i:Issue)<-[:CREATED]-(user:GitHubUser) | ||||||
WHERE | ||||||
i.repository_id IN $repoIds | ||||||
""" | ||||||
if from_date is not None: | ||||||
query += "AND datetime(i.updated_at) >= datetime($from_date)" | ||||||
|
||||||
query += """ | ||||||
MATCH (repo:Repository {id: i.repository_id}) | ||||||
RETURN | ||||||
user.login as author_name, | ||||||
i.id as id, | ||||||
i.title as title, | ||||||
i.body as text, | ||||||
i.state as state, | ||||||
i.state_reason as state_reason, | ||||||
i.created_at as created_at, | ||||||
i.updated_at as updated_at, | ||||||
i.closed_at as closed_at, | ||||||
i.latestSavedAt as latest_saved_at, | ||||||
i.html_url as url, | ||||||
i.repository_id as repository_id, | ||||||
repo.full_name as repository_name | ||||||
ORDER BY datetime(created_at) | ||||||
""" | ||||||
|
||||||
def _exec_query(tx, repoIds, from_date): | ||||||
result = tx.run(query, repoIds=repoIds, from_date=from_date) | ||||||
return list(result) | ||||||
|
||||||
with neo4j_driver.session() as session: | ||||||
raw_records = session.execute_read( | ||||||
_exec_query, | ||||||
repoIds=repository_id, | ||||||
from_date=from_date, | ||||||
) | ||||||
|
||||||
return raw_records | ||||||
|
||||||
def fetch_issues( | ||||||
self, | ||||||
repository_id: list[int], | ||||||
from_date: datetime | None = None, | ||||||
) -> list[GitHubIssue]: | ||||||
""" | ||||||
fetch issues from data dump in neo4j | ||||||
|
||||||
Parameters | ||||||
------------ | ||||||
repository_id : list[int] | ||||||
a list of repository id to fetch their issues | ||||||
from_date : datetime | None | ||||||
get the issues form a specific date that they were created | ||||||
default is `None`, meaning to apply no filtering on data | ||||||
|
||||||
Returns | ||||||
-------- | ||||||
github_issues : list[GitHubIssue] | ||||||
list of neo4j records as the extracted issues | ||||||
""" | ||||||
records = self.__fetch_raw_issues(repository_id, from_date) | ||||||
|
||||||
github_issues: list[GitHubIssue] = [] | ||||||
for record in records: | ||||||
issue = GitHubIssue.from_dict(record) | ||||||
github_issues.append(issue) | ||||||
|
||||||
return github_issues | ||||||
|
||||||
def fetch_issue_ids( | ||||||
amindadgar marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
self, | ||||||
repository_id: list[int], | ||||||
from_date: datetime | None = None, | ||||||
) -> list[GitHubIssueID]: | ||||||
""" | ||||||
fetch issues from data dump in neo4j | ||||||
|
||||||
Parameters | ||||||
------------ | ||||||
repository_id : list[int] | ||||||
a list of repository id to fetch their issues | ||||||
from_date : datetime | None | ||||||
get the issues form a specific date that they were created | ||||||
default is `None`, meaning to apply no filtering on data | ||||||
|
||||||
Returns | ||||||
-------- | ||||||
github_issues_ids : list[GitHubIssueID] | ||||||
list of neo4j records as the extracted issue ids | ||||||
""" | ||||||
records = self.__fetch_raw_issues(repository_id, from_date) | ||||||
|
||||||
github_issue_ids: list[GitHubIssueID] = [] | ||||||
for record in records: | ||||||
issue = GitHubIssueID.from_dict(record) | ||||||
github_issues_ids.append(issue) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fix the undefined variable - github_issues_ids.append(issue)
+ github_issue_ids.append(issue) Committable suggestion
Suggested change
|
||||||
|
||||||
return github_issue_ids |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
# flake8: noqa | ||
from .comment import GitHubComment | ||
from .commit import GitHubCommit | ||
from .issue import GitHubIssue | ||
from .issue import GitHubIssue, GitHubIssueID | ||
from .pull_request import GitHubPullRequest |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please remove line 5 and import the
GithubIssueExtraction
directly from its file in test cases.