Review changes

Bridgeconn · Feb 13, 2024 · a0687dc · a0687dc
1 parent 19970c1
commit a0687dc
Show file tree

Hide file tree

Showing 4 changed files with 64 additions and 79 deletions.
diff --git a/Dataupload/fetching.py → Dataupload/extract.py b/Dataupload/fetching.py → Dataupload/extract.py
diff --git a/Dataupload/readme.MD b/Dataupload/readme.MD
@@ -1,16 +1,16 @@
-# DataUpload 
+# Data Upload 
 
 This Python script tackles the challenge of streamlining data uploads to a database. Instead of manually uploading individual files, it leverages the power of automation, condensing the entire process into a single, user-friendly script. You no longer need to deal with tedious, repetitive data uploads – one command is all it takes to initiate the data flow.
 
 But this script goes beyond mere convenience. It starts by efficiently extracting data from a specified CSV file. It then meticulously transforms that raw data into a format perfectly suited for your database, ensuring seamless ingestion. Finally, with a single command, it seamlessly transmits the prepared data to a designated endpoint, completing the entire data upload journey in an automated and error-free manner.
 
 ### Contents:
 
-Within this directory, you will discover two Python scripts: `fetching.py` and `upload.py.`
+Within this directory, you will discover two Python scripts: `extract.py` and `upload.py.`
 
 #### Automated Data Extraction:
 
-Handling large SQL dump files, particularly those of 2GB size, can be a daunting and time-consuming task when manually extracting tables. To streamline this process, the `fetching.py` script has been developed. This script systematically traverses your SQL dump, capturing data and organizing it into CSV files. Each CSV file is named after its corresponding table, ensuring convenient access for future use.
+Handling large SQL dump files, particularly those of 2GB size, can be a daunting and time-consuming task when manually extracting tables. To streamline this process, the `extract.py` script has been developed. This script systematically traverses your SQL dump, capturing data and organizing it into CSV files. Each CSV file is named after its corresponding table, ensuring convenient access for future use.
 
 
 #### Data Upload Utility:
@@ -22,7 +22,6 @@ Complementing the data extraction process, `upload.py` facilitates seamless data
 The Python script responsible for data upload incorporates functionality to upload data to a specified schema within the database. If the specified schema does not exist, the script dynamically creates it, ensuring data organization and accessibility.
 
 
-
 
  ## Implementation Details
 
@@ -40,8 +39,8 @@ The Python script responsible for data upload incorporates functionality to uplo
 
 ### Execution Command :
 
-To run the fetching script you can use this command
-```python3 fetching.py```
+To run the extract script you can use this command
+```python3 extract.py```
 
 ### Execution Command:
 

diff --git a/Dataupload/requirements.txt b/Dataupload/requirements.txt
@@ -1,10 +1,3 @@
-certifi==2021.10.8
-charset-normalizer==2.0.12
-idna==3.3
-psycopg2
-requests==2.27.1
-urllib3==1.26.8
-fastapi[all]==0.95.0 
 SQLAlchemy==2.0.9
-jsonpickle==2.2.0
-pytz==2023.3
+requests==2.27.1
+psycopg2
diff --git a/Dataupload/upload.py b/Dataupload/upload.py
@@ -22,7 +22,7 @@ def create_database_schema():
     postgres_schema = os.environ.get("VACHAN_POSTGRES_SCHEMA", "vachan_cms_rest_12")
 
     #if you want to use a new schema, you can use below code to specify the name.
-    # postgres_schema = os.environ.get("VACHAN_POSTGRES_SCHEMA", "DataUpload")
+    # postgres_schema = os.environ.get("VACHAN_POSTGRES_SCHEMA", "<schema_name>"")
 
 
     encoded_password = urllib.parse.quote(postgres_password, safe='')
@@ -49,7 +49,6 @@ def create_database_schema():
 
 
 #creating token
-        #Token is now disabled since cms is not integrated with auth. When it is , the token need to be enabled with headers
 # LOGIN_URL = '/v2/user/login'
 # SUPER_USER = os.environ.get("VACHAN_SUPER_USERNAME")
 # SUPER_PASSWORD = os.environ.get("VACHAN_SUPER_PASSWORD")
@@ -476,59 +475,59 @@ def add_bible(csv_file_path):
     return usfm_list
 
 #==========================================================================================================================
-# def add_parascriptual(csv_file_path):   #Only use if you want to add new parascriptual data using folder "parascriptuals"
-#     data_list = []
-
-#     try:
-#         with open(csv_file_path, 'r', encoding='utf-8') as file:
-#             # Create a CSV reader
-#             reader = csv.DictReader(file)
-
-#             # Assuming the first row is the header
-#             for row in reader:
-#                 try:
-#                     # Extracting required fields
-#                     reference_data = json.loads(row['reference'])
-#                     reference = {
-#                         "book": reference_data['book'],
-#                         "chapter": reference_data.get('chapter', 0),
-#                         "verseNumber": reference_data.get('verseNumber', 0),
-#                         "bookEnd": reference_data.get('bookEnd', ''),
-#                         "chapterEnd": reference_data.get('chapterEnd', 0),
-#                         "verseEnd": reference_data.get('verseEnd', 0)
-#                     }
-#                 except KeyError:
-#                     print(f"Error: 'reference' column does not contain required keys in row: {row}")
-#                     continue
-#                 except json.JSONDecodeError:
-#                     print(f"Error: 'reference' column contains invalid JSON format in row: {row}")
-#                     continue
-
-#                 # Constructing data dictionary
-#                 data = {
-#                     "category": row.get('category', ''),
-#                     "title": row.get('title', ''),
-#                     "description": row.get('description', ''),
-#                     "content": row.get('content', ''),
-#                     "reference": reference,
-#                     "link": row.get('link', ''),
-#                     "metaData": json.loads(row.get('metadata', '{}')),
-#                     "active": row.get('active', '') == 't'
-#                 }
-
-#                 data_list.append(data)
-
-#     except FileNotFoundError:
-#         print(f"Error: File '{csv_file_path}' not found.")
-#     except Exception as e:
-#         print(f"An error occurred while processing {csv_file_path}: {str(e)}")
-
-#     return data_list
-
-# data = add_parascriptual('files4/ml_TBP_1_parascriptural.csv')
-# resource_name = 'ml_TBP_1_parascriptural'
-# parascript_url = f"/resources/parascripturals/{resource_name}"
-# upload_data(data, parascript_url)
+def add_parascriptual(csv_file_path):   #Only use if you want to add new parascriptual.
+    data_list = []
+
+    try:
+        with open(csv_file_path, 'r', encoding='utf-8') as file:
+            # Create a CSV reader
+            reader = csv.DictReader(file)
+
+            # Assuming the first row is the header
+            for row in reader:
+                try:
+                    # Extracting required fields
+                    reference_data = json.loads(row['reference'])
+                    reference = {
+                        "book": reference_data['book'],
+                        "chapter": reference_data.get('chapter', 0),
+                        "verseNumber": reference_data.get('verseNumber', 0),
+                        "bookEnd": reference_data.get('bookEnd', ''),
+                        "chapterEnd": reference_data.get('chapterEnd', 0),
+                        "verseEnd": reference_data.get('verseEnd', 0)
+                    }
+                except KeyError:
+                    print(f"Error: 'reference' column does not contain required keys in row: {row}")
+                    continue
+                except json.JSONDecodeError:
+                    print(f"Error: 'reference' column contains invalid JSON format in row: {row}")
+                    continue
+
+                # Constructing data dictionary
+                data = {
+                    "category": row.get('category', ''),
+                    "title": row.get('title', ''),
+                    "description": row.get('description', ''),
+                    "content": row.get('content', ''),
+                    "reference": reference,
+                    "link": row.get('link', ''),
+                    "metaData": json.loads(row.get('metadata', '{}')),
+                    "active": row.get('active', '') == 't'
+                }
+
+                data_list.append(data)
+
+    except FileNotFoundError:
+        print(f"Error: File '{csv_file_path}' not found.")
+    except Exception as e:
+        print(f"An error occurred while processing {csv_file_path}: {str(e)}")
+
+    return data_list
+
+data = add_parascriptual('files4/ml_TBP_1_parascriptural.csv')
+resource_name = 'ml_TBP_1_parascriptural'
+parascript_url = f"/resources/parascripturals/{resource_name}"
+upload_data(data, parascript_url)
 
 #==========================================================================================================================
 
@@ -554,7 +553,7 @@ def add_bible(csv_file_path):
     # Add USFM data to biblebooks
 
     def upload_bible_data():
-        folder_path = 'bible'
+        folder_path = 'bible' #folder path to the respective data files
         for filename in os.listdir(folder_path):
             if filename.endswith('.csv'):
                 csv_file_path = os.path.join(folder_path, filename)
@@ -571,20 +570,13 @@ def upload_bible_data():
                     except Exception as e:
                         print(f"Failed to upload data for {resource_name}: {str(e)}")
 
-                #By this method you can validate file by file
-                # try:
-                # # Upload all entries for a single CSV file
-                #     upload_data(data, bible_url)
-                #     print(f"Success: All data for {resource_name} uploaded successfully.")
-                # except Exception as e:
-                #     print(f"Failed to upload data for {resource_name}: {str(e)}")
-
     #Call the function to upload data
     upload_bible_data() 
 
     #4th
     #Add vocabularies
 
+    #file paths to the respective data files
     file_paths = [
     'vocabularies/en_EBD_1_vocabulary.csv',
     'vocabularies/hi_IRVD_1_vocabulary.csv',
@@ -623,6 +615,7 @@ def upload_bible_data():
     #8th
     #Add commentaries
 
+    #file paths to the respective data files
     file_paths = [
         'files/commentarydata_mr_BBC_1_commentary.txt',
         'files/commentarydata_en_BBC_1_commentary.txt',