diff --git a/dags/hivemind_etl_helpers/src/db/github/load/prepare_deletion.py b/dags/hivemind_etl_helpers/src/db/github/load/prepare_deletion.py index ff719bf6..96581293 100644 --- a/dags/hivemind_etl_helpers/src/db/github/load/prepare_deletion.py +++ b/dags/hivemind_etl_helpers/src/db/github/load/prepare_deletion.py @@ -69,14 +69,16 @@ def _delete_issue_and_comment_docs( documents = issue_documents + comment_documents docs_to_save, doc_file_ids_to_delete = self._check_documents( - documents, identifier="id", date_field="updated_at", identifier_type="::text", + documents, + identifier="id", + date_field="updated_at", + identifier_type="::text", ) return docs_to_save, doc_file_ids_to_delete def _delete_pr_document( self, pr_docs: list[Document] ) -> tuple[list[Document], list[str]]: - docs_merged, docs_merged_pr_ids_to_delete = self._check_documents( pr_docs, identifier="id", @@ -97,14 +99,14 @@ def _delete_pr_document( documents_to_save = self._get_unique_docs(docs_merged, docs_closed, "id") return documents_to_save, list(doc_file_ids_to_delete) - + def _check_documents( - self, - documents: list[Document], - identifier: str, - date_field: str, - identifier_type: str, - ) -> tuple[list[Document], list[str]]: + self, + documents: list[Document], + identifier: str, + date_field: str, + identifier_type: str, + ) -> tuple[list[Document], list[str]]: """ a wrapper class for checking previous documents """ @@ -131,7 +133,7 @@ def _create_deletion_query( deletion_query = f""" DELETE FROM data_github - WHERE (metadata_->>'id')::text IN {deletion_ids}; + WHERE (metadata_->>'id')::integer IN {deletion_ids}; """ return deletion_query diff --git a/dags/hivemind_etl_helpers/src/db/github/transform/issues.py b/dags/hivemind_etl_helpers/src/db/github/transform/issues.py index 5d07f9eb..091cc270 100644 --- a/dags/hivemind_etl_helpers/src/db/github/transform/issues.py +++ b/dags/hivemind_etl_helpers/src/db/github/transform/issues.py @@ -59,7 +59,7 @@ def transform_comment_of_issue(data: GitHubIssue) -> Document: document : llama_index.Document the comment document within the github issue """ - # since there's no way we could have the + # since there's no way we could have the # first comment's id, we're creating one manually # note: no ids before had 9 in front of them # so this id would be unique