From 0005225d25a6b3a0d75da99a225eea165bdf704a Mon Sep 17 00:00:00 2001
From: Mohammad Amin <dadgaramin96@gmail.com>
Date: Thu, 23 May 2024 16:28:22 +0330
Subject: [PATCH 1/3] feat: Add logs to ingestion pipeline!

---
 dags/hivemind_etl_helpers/ingestion_pipeline.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dags/hivemind_etl_helpers/ingestion_pipeline.py b/dags/hivemind_etl_helpers/ingestion_pipeline.py
index a49ffbcd..7e6cd046 100644
--- a/dags/hivemind_etl_helpers/ingestion_pipeline.py
+++ b/dags/hivemind_etl_helpers/ingestion_pipeline.py
@@ -39,6 +39,7 @@ def __init__(self, community_id: str, collection_name: str, testing: bool = Fals
 
     def run_pipeline(self, docs: list[Document]):
         # qdrant is just collection based and doesn't have any database
+        logging.info(f"{len(docs)} docuemnts was extracted and now loading into QDrant DB!")
         qdrant_collection_name = f"{self.collection_name}_{self.platform_name}"
         vector_access = QDrantVectorAccess(collection_name=qdrant_collection_name)
         vector_store = vector_access.setup_qdrant_vector_store()
@@ -61,6 +62,7 @@ def run_pipeline(self, docs: list[Document]):
             ),
             docstore_strategy=DocstoreStrategy.UPSERTS,
         )
+        logging.info("Pipeline created, now inserting documents into pipeline!")
         try:
             nodes = pipeline.run(documents=docs, show_progress=True)
             return nodes

From 6414fdd9afdf71c8cbbd14f1966f5476df337c3a Mon Sep 17 00:00:00 2001
From: Mohammad Amin <dadgaramin96@gmail.com>
Date: Thu, 23 May 2024 16:33:24 +0330
Subject: [PATCH 2/3] feat: printing qdrant creds! HAVE TO BE REMOVED!

---
 dags/hivemind_etl_helpers/debugs.py | 8 ++++++++
 dags/hivemind_mediawiki_etl.py      | 3 +++
 2 files changed, 11 insertions(+)
 create mode 100644 dags/hivemind_etl_helpers/debugs.py

diff --git a/dags/hivemind_etl_helpers/debugs.py b/dags/hivemind_etl_helpers/debugs.py
new file mode 100644
index 00000000..1b91d59f
--- /dev/null
+++ b/dags/hivemind_etl_helpers/debugs.py
@@ -0,0 +1,8 @@
+import logging
+
+from tc_hivemind_backend.db.credentials import load_qdrant_credentials
+
+
+def pring_qdrant_creds():
+    qdrant_creds = load_qdrant_credentials()
+    logging.info(f"qdrant_creds: {qdrant_creds}")
\ No newline at end of file
diff --git a/dags/hivemind_mediawiki_etl.py b/dags/hivemind_mediawiki_etl.py
index 339ec73e..2dec6019 100644
--- a/dags/hivemind_mediawiki_etl.py
+++ b/dags/hivemind_mediawiki_etl.py
@@ -5,6 +5,7 @@
 
 from airflow import DAG
 from airflow.decorators import task
+from hivemind_etl_helpers.debugs import pring_qdrant_creds
 from hivemind_etl_helpers.mediawiki_etl import process_mediawiki_etl
 from hivemind_etl_helpers.src.utils.modules import ModulesMediaWiki
 
@@ -19,6 +20,8 @@ def get_mediawiki_communities() -> list[dict[str, str | list[str]]]:
         """
         Getting all communities having mediawiki from database
         """
+        # TODO: REMOVE!!!
+        pring_qdrant_creds()
         communities = ModulesMediaWiki().get_learning_platforms()
         return communities
 

From 9a285e596d853decbd224254811fd93e04983430 Mon Sep 17 00:00:00 2001
From: Mohammad Amin <dadgaramin96@gmail.com>
Date: Thu, 23 May 2024 16:43:37 +0330
Subject: [PATCH 3/3] fix: lint issues!

---
 dags/hivemind_etl_helpers/debugs.py             | 2 +-
 dags/hivemind_etl_helpers/ingestion_pipeline.py | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/dags/hivemind_etl_helpers/debugs.py b/dags/hivemind_etl_helpers/debugs.py
index 1b91d59f..799bd95c 100644
--- a/dags/hivemind_etl_helpers/debugs.py
+++ b/dags/hivemind_etl_helpers/debugs.py
@@ -5,4 +5,4 @@
 
 def pring_qdrant_creds():
     qdrant_creds = load_qdrant_credentials()
-    logging.info(f"qdrant_creds: {qdrant_creds}")
\ No newline at end of file
+    logging.info(f"qdrant_creds: {qdrant_creds}")
diff --git a/dags/hivemind_etl_helpers/ingestion_pipeline.py b/dags/hivemind_etl_helpers/ingestion_pipeline.py
index 7e6cd046..2d8080b7 100644
--- a/dags/hivemind_etl_helpers/ingestion_pipeline.py
+++ b/dags/hivemind_etl_helpers/ingestion_pipeline.py
@@ -39,7 +39,9 @@ def __init__(self, community_id: str, collection_name: str, testing: bool = Fals
 
     def run_pipeline(self, docs: list[Document]):
         # qdrant is just collection based and doesn't have any database
-        logging.info(f"{len(docs)} docuemnts was extracted and now loading into QDrant DB!")
+        logging.info(
+            f"{len(docs)} docuemnts was extracted and now loading into QDrant DB!"
+        )
         qdrant_collection_name = f"{self.collection_name}_{self.platform_name}"
         vector_access = QDrantVectorAccess(collection_name=qdrant_collection_name)
         vector_store = vector_access.setup_qdrant_vector_store()