Skip to content

Commit

Permalink
Merge pull request #308 from TogetherCrew/feat/302-telegram-raw-vecto…
Browse files Browse the repository at this point in the history
…rize

feat: telegram vectorize, added field schema option for getting the latest document!
  • Loading branch information
amindadgar authored Oct 22, 2024
2 parents fbb843a + 95c5a05 commit 6ab0726
Showing 1 changed file with 10 additions and 2 deletions.
12 changes: 10 additions & 2 deletions dags/hivemind_etl_helpers/ingestion_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,11 @@ def _create_payload_index(

return operation_result

def get_latest_document_date(self, field_name: str) -> datetime | None:
def get_latest_document_date(
self,
field_name: str,
field_schema: qdrant_types.PayloadSchemaType = models.PayloadSchemaType.FLOAT,
) -> datetime | None:
"""
get the latest date for the most recent available document
Expand All @@ -129,6 +133,10 @@ def get_latest_document_date(self, field_name: str) -> datetime | None:
------------
field_name : str
the datetime field name in qdrant points' payload
field_schema : qdrant_client.conversions.common_types.PayloadSchemaType
the date field schema
for default we're assuming it is a float timestamp
but it also could be DATETIME
Returns
---------
Expand All @@ -140,7 +148,7 @@ def get_latest_document_date(self, field_name: str) -> datetime | None:
try:
result = self._create_payload_index(
field_name=field_name,
field_schema=models.PayloadSchemaType.DATETIME,
field_schema=field_schema,
)
if result.status.name == "COMPLETED":
latest_document = self.qdrant_client.scroll(
Expand Down

0 comments on commit 6ab0726

Please sign in to comment.