diff --git a/Dataupload/upload.py b/Dataupload/upload.py index a5945d1c..8e1e4f10 100644 --- a/Dataupload/upload.py +++ b/Dataupload/upload.py @@ -12,6 +12,12 @@ BASE_URL = r"http://127.0.0.1:8000/v2/cms/rest" # BASE_URL = r"https://api.vachanengine.org/v2/cms/rest" # BASE_URL = r"https://stagingapi.vachanengine.org/v2/cms/rest" +TOKEN = "" + +headers = {"contentType": "application/json", + "accept": "application/json", + 'Authorization': "Bearer"+" "+ TOKEN + } def create_database_schema(): postgres_host = os.environ.get("VACHAN_POSTGRES_HOST", "localhost") @@ -85,50 +91,25 @@ def create_database_schema(): # 'Authorization': "Bearer"+" "+ TOKEN # } -SOURCEDATA = [] -# upload single data -def upload_v2_data(input_data,unit_url): - ''' upload data t=in v2 format''' - # response = requests.post(BASE_URL+unit_url, headers=headers, json=input_data) - response = requests.post(BASE_URL+unit_url, json=input_data) - - if not response.status_code == 201: - # print("resp==------------------------->",input_data) - print("resp==>",response) - print("resp==>",response.json()) + + + + +def upload_data(input_data,unit_url): + '''Upload data through POST API call''' + print("Inside data upload") + response = requests.post(BASE_URL+unit_url, json=input_data,headers=headers) + if response.status_code == 201: + print("<<<==============Resource uploaded successfully!==============>>>") + else: + print(f"Failed to create resource. Status code: {response.status_code}") + print("Response details:", response.json()) print("---------------------------------------------------------------------") - print("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") - -def add_resources_v2(): - '''add resources in v2 format''' - # with open('./files/v1-v2_sources.csv','r',encoding="UTF-8") as file: - with open('./files/commentarydata_en_BBC_1_commentary.txt','r',encoding="UTF-8") as file: - csvreader = csv.reader(file) - header = next(csvreader) - permission_list = [] - for table_row in csvreader: - # print("table_row[15]:",type(table_row[15]), table_row[15]) - access_perm = json.loads(table_row[15]) - # access_perm = table_row[15] - permission_list = [x for x in access_perm] - source_inp = { - "resourceType": table_row[6], - "language": table_row[9], - "version": table_row[12], - "versionTag": table_row[13], - "label": ["latest","published"], - "year": table_row[3], - "license": table_row[4].upper(), - "accessPermissions": permission_list, - "metaData": json.loads(table_row[14]) - # "metaData": table_row[14] - } - upload_v2_data(source_inp,'/resources') - print("Sourcename--->",table_row[2]) - SOURCEDATA.append(source_inp) + +#1: add_licenses def add_licenses(): '''Add licenses''' with open('files/licenses.csv', 'r', encoding="UTF-8") as file: @@ -144,6 +125,10 @@ def add_licenses(): print("la", license_inp) upload_data(license_inp,'/resources/licenses') + + + +#2: add_versions def add_versions(): '''Add versions''' with open('files/versions.csv', 'r', encoding="UTF-8") as file: @@ -158,18 +143,18 @@ def add_versions(): } upload_data(version_inp, '/resources/versions') + + + +#3:add_resources def add_resources(): '''Add resources''' with open('files/resources.csv','r',encoding="UTF-8") as file: - csvreader = csv.reader(file) next(csvreader) - for table_row in csvreader: if not table_row: continue - - print("!!tablerow:",table_row) source_inp = { "resourceType": table_row[0], "language": table_row[1], @@ -182,13 +167,101 @@ def add_resources(): } upload_data(source_inp,'/resources') + + + def book_id_code_mapping(book_id): '''Map bible book id to book code''' with open('./files/v1bookcodemap.json','r',encoding="UTF-8") as file: v1_book_code_json = json.load(file) - # print(v1_book_code_json) return(v1_book_code_json[book_id]) + + + +#4: upload_commentary +def upload_commentary(file_path): + """upload v2 commentaries + add license, version, source, content""" + parsed_data = [] + with open(file_path, 'r', encoding="UTF-8") as file: + for line in file: + fields = line.strip().split('\t') + if len(fields) == 7: + parsed_data.append({ + + 'reference':{'book':book_id_code_mapping(fields[6]), + "chapter": fields[1], + "verseNumber": fields[2], + "bookEnd":book_id_code_mapping(fields[6]), + "chapterEnd":fields[1], + "verseEnd": fields[3]}, + + 'commentary': fields[4], + 'active': fields[5] == 't', + "sectionType":["commentary-text"] + }) + # Extract resource name from the file path + file_name = file_path.split("/")[-1].split(".")[0] + resource_name = "_".join(file_name.split("_")[1:]).strip() + resource_name_pattern = "^[a-zA-Z]+(-[a-zA-Z0-9]+)*_[A-Z]+_[\w.]+_[a-z]+$" + if not re.match(resource_name_pattern, resource_name): + print(f"Invalid resource_name: {resource_name}. Does not match the expected pattern.") + return None, None + return parsed_data, resource_name + + + + +#5: add_vocabularies +def add_vocabularies(file_path): + # List to store parsed data + parsed_data = [] + # Extract resource name from the file path + file_name = os.path.splitext(os.path.basename(file_path))[0] + resource_name = file_name + # Open the CSV file for reading + with open(file_path, 'r', encoding='UTF-8') as file: + # Create a CSV reader + csv_reader = csv.reader(file) + # Iterate over each row in the CSV file + for row in csv_reader: + # Skip rows with metadata + if 'COPY public.table_11 (word_id, word, details, active) FROM stdin;' in row: + continue + # Check if the row has enough elements + if len(row) != 4: + print(f"Skipping row: {row}. Expected 4 elements, got {len(row)} elements.") + continue + # Extract relevant columns from the row + _, word, details, active = row + try: + # Convert details JSON string to a dictionary + details_dict = json.loads(details) + # Extract "Type" and "definition" from "details" + type_value = details_dict.get('Type', '') + definition_value = details_dict.get('definition', '') + # Create the data entry + entry = { + 'word': word, + 'details': { + 'type': type_value, + 'definition': definition_value, + }, + 'active': active.lower() == 'true' + } + # Append the entry to the list + parsed_data.append(entry) + except json.JSONDecodeError as e: + print(f"Error decoding JSON in row: {row}") + print(f"Details column value: {details}") + print(f"Error details: {e}") + return parsed_data, resource_name + + + + +#6: add_infographic_data-parascriptural def add_infographic_data(): '''Get infographic data from text file''' parsed_data = [] @@ -206,65 +279,24 @@ def add_infographic_data(): 'link': fields[2], 'active': fields[3] == 't' }) - print("parsed data:",parsed_data) - return parsed_data - - -def add_audiobible_data(): - '''Get audio bible data from text file''' - parsed_data = [] - with open('files/audio_bible.txt', 'r',encoding="UTF-8") as file: - for line in file: - # print("line:",line) - fields = line.strip().split('\t') - if len(fields) == 6: - parsed_data.append({ - 'audioId': int(fields[0]), - 'name' : fields[1], - 'reference':{'book':book_id_code_mapping(fields[5]), - "chapter": 0, - "verseNumber": 0}, - 'link': fields[2], - 'audioFormat': "mp3", - 'active': fields[3] == 't' - }) - for item in parsed_data: - print(item['reference']) - print("parsed data:",parsed_data) return parsed_data -def upload_data(input_data,unit_url): - '''Upload data through POST API call''' - print("Inside data upload") - # response = requests.post(BASE_URL+unit_url, headers=headers, json=input_data) - response = requests.post(BASE_URL+unit_url, json=input_data) - # print("response:", response.json()) - - if response.status_code == 201: - print("<<<==============Resource uploaded successfully!==============>>>") - else: - print(f"Failed to create resource. Status code: {response.status_code}") - print("Response details:", response.json()) - print("---------------------------------------------------------------------") +#7: add_bible_video-parascriptural def upload_v2_project_videos(): """upload v2 bible project videos - add license, version, source, content""" - + add license, version, source, content""" TBP_lang_based_contents = {} - #generate language based content with open('files/TBP-content-v2.csv','r',encoding="UTF-8") as file: csvreader = csv.reader(file) header = next(csvreader) - for table_row in csvreader: references = [] books = None chapter = None - table_row[2] = table_row[2].lower() table_row[2] = table_row[2].strip() if table_row[2] in ("ot","nt","","tanak/ot"): @@ -274,8 +306,7 @@ def upload_v2_project_videos(): elif re.findall(r"^(\w+)(,\s*\w+)*$",table_row[2]): books = table_row[2].split(',') else: - books = [table_row[2]] - + books = [table_row[2]] table_row[3] = table_row[3].strip() if table_row[3] == "": chapter=['0'] @@ -305,12 +336,6 @@ def upload_v2_project_videos(): "chapter": int(chapter[0]) if chapter else 0, "verseEnd": 0 } - - # temp_ref = { - # "book": buk, - # "chapter": 0, - # } - # references.append(temp_ref) metadata = {"series": table_row[10]} print("----------REF:",temp_ref) video_inp = { @@ -322,264 +347,205 @@ def upload_v2_project_videos(): "metaData": metadata, "active": True } - if table_row[1] in TBP_lang_based_contents.keys(): TBP_lang_based_contents[table_row[1]].append(video_inp) else: TBP_lang_based_contents[table_row[1]] = [video_inp] - #upload content - # exlcude_test_lang = ["hi"] - # print("COntent:",TBP_lang_based_contents[0]) for content in TBP_lang_based_contents: # if not content in inlcude_test_lang: - print("******************************************") - + print("******************************************") resource_name = content+"_TBP_1_parascriptural" print("resourcename----------------->",resource_name) resource_url = f"/resources/parascripturals/{resource_name}" upload_data(TBP_lang_based_contents[content],resource_url) - # print(">>",TBP_lang_based_contents[content]) -def upload_commentary(file_path): - """upload v2 commentaries - add license, version, source, content""" - - parsed_data = [] - # with open('files/commentarydata_mr_BBC_1_commentary.txt', 'r',encoding="UTF-8") as file: + +#8:upload_audiobible +def upload_audiobible(file_path): + """Upload audiobible data.""" + parsed_data = [] with open(file_path, 'r', encoding="UTF-8") as file: for line in file: - # print("line:",line) fields = line.strip().split('\t') - if len(fields) == 7: + if len(fields) == 6: parsed_data.append({ - - 'reference':{'book':book_id_code_mapping(fields[6]), - "chapter": fields[1], - "verseNumber": fields[2], - "bookEnd":book_id_code_mapping(fields[6]), - "chapterEnd":fields[1], - "verseEnd": fields[3]}, - - 'commentary': fields[4], - 'active': fields[5] == 't', - "sectionType":["commentary-text"] - }) - + 'audioId': int(fields[0]), + 'name' : fields[1], + 'reference': { + 'book': book_id_code_mapping(fields[5]), + "chapter": 0, + "verseNumber": 0 + }, + 'link': fields[2], + 'audioFormat': "mp3", + "active": True + }) # Extract resource name from the file path file_name = file_path.split("/")[-1].split(".")[0] resource_name = "_".join(file_name.split("_")[1:]).strip() - resource_name_pattern = "^[a-zA-Z]+(-[a-zA-Z0-9]+)*_[A-Z]+_[\w.]+_[a-z]+$" if not re.match(resource_name_pattern, resource_name): print(f"Invalid resource_name: {resource_name}. Does not match the expected pattern.") return None, None - return parsed_data, resource_name -def add_vocabularies(file_path): - # List to store parsed data - parsed_data = [] - - # Extract resource name from the file path - file_name = os.path.splitext(os.path.basename(file_path))[0] - resource_name = file_name - - # Open the CSV file for reading - with open(file_path, 'r', encoding='UTF-8') as file: - # Create a CSV reader - csv_reader = csv.reader(file) - - # Iterate over each row in the CSV file - for row in csv_reader: - # Skip rows with metadata - if 'COPY public.table_11 (word_id, word, details, active) FROM stdin;' in row: - continue - - # Print the content of each row for debugging - # print(f"Row: {row}") - - # Check if the row has enough elements - if len(row) != 4: - print(f"Skipping row: {row}. Expected 4 elements, got {len(row)} elements.") - continue - - # Extract relevant columns from the row - _, word, details, active = row - try: - # Convert details JSON string to a dictionary - details_dict = json.loads(details) - # Extract "Type" and "definition" from "details" - type_value = details_dict.get('Type', '') - definition_value = details_dict.get('definition', '') - # Create the data entry - entry = { - 'word': word, - 'details': { - 'type': type_value, - 'definition': definition_value, - }, - 'active': active.lower() == 'true' - } +# #9: add_signbible_data +def add_signbible_data(csv_file_path): + '''Parse sign video data from a CSV file''' + parsed_data = [] + try: + with open(csv_file_path, 'r', encoding="UTF-8") as file: + csv_reader = csv.reader(file, delimiter=';') # Adjust delimiter here + # Skip the header row + next(csv_reader) + for line_number, line in enumerate(csv_reader, start=2): # Start from line 2 (header is line 1) + try: + # Check if the line has the correct number of columns + if len(line) != 13: # Adjust this number based on your CSV file structure + raise ValueError(f"Invalid number of columns in line {line_number}") + # Parse each column from the line + signvideo_id = int(line[0].strip()) + title = line[1].strip() + description = line[2].strip() + # Parse reference data + try: + reference_data = json.loads(line[3].strip()) + reference = { + "book": reference_data['book'], + "chapter": reference_data.get('chapter', 0), + "verseNumber": reference_data.get('verseNumber', 0), + "bookEnd": reference_data.get('bookEnd', ''), + "chapterEnd": reference_data.get('chapterEnd', 0), + "verseEnd": reference_data.get('verseEnd', 0) + } + except KeyError: + raise ValueError(f"Reference column does not contain required keys in line {line_number}") + except json.JSONDecodeError: + raise ValueError(f"Reference column contains invalid JSON format in line {line_number}") + ref_start = line[4].strip() + ref_end = line[5].strip() + link = line[6].strip() + metadata = json.loads(line[7].strip()) + active = line[8].strip().lower() == 'true' + created_user = line[9].strip() + last_updated_user = line[10].strip() + created_at = line[11].strip() + last_updated_at = line[12].strip() + # Append the parsed data to the list + parsed_data.append({ + 'signvideo_id': signvideo_id, + 'title': title, + 'description': description, + 'reference': reference, + 'ref_start': ref_start, + 'ref_end': ref_end, + 'link': link, + 'metadata': metadata, + 'active': active, + 'created_user': created_user, + 'last_updated_user': last_updated_user, + 'created_at': created_at, + 'last_updated_at': last_updated_at + }) + except ValueError as ve: + print(f"Error processing line {line_number}: {ve}") + except FileNotFoundError: + print(f"Error: CSV file '{csv_file_path}' not found.") + return parsed_data - # Append the entry to the list - parsed_data.append(entry) - except json.JSONDecodeError as e: - print(f"Error decoding JSON in row: {row}") - print(f"Details column value: {details}") - print(f"Error details: {e}") - return parsed_data, resource_name +#10: add_bible def add_bible(csv_file_path): usfm_list = [] - # Set a higher field size limit csv.field_size_limit(100000000) # Adjust as needed - try: with open(csv_file_path, 'r', encoding='utf-8') as file: # Create a CSV reader reader = csv.reader(file) - # Assuming the first row is the header header = next(reader) print("Header:", header) # Print the header - for values in reader: # Extract usfm_text from the second column usfm_text = values[1] - # Format usfm_text as in your example and append to the list formatted_usfm = f'{{"USFM": "{usfm_text}"}}' usfm_list.append(eval(formatted_usfm)) - except FileNotFoundError: print(f"Error: File '{csv_file_path}' not found.") except Exception as e: print(f"An error occurred while processing {csv_file_path}: {str(e)}") - print("data,", usfm_list) - return usfm_list -#========================================================================================================================== -def add_parascriptual(csv_file_path): #Only use if you want to add new parascriptual. - data_list = [] - try: - with open(csv_file_path, 'r', encoding='utf-8') as file: - # Create a CSV reader - reader = csv.DictReader(file) - - # Assuming the first row is the header - for row in reader: - try: - # Extracting required fields - reference_data = json.loads(row['reference']) - reference = { - "book": reference_data['book'], - "chapter": reference_data.get('chapter', 0), - "verseNumber": reference_data.get('verseNumber', 0), - "bookEnd": reference_data.get('bookEnd', ''), - "chapterEnd": reference_data.get('chapterEnd', 0), - "verseEnd": reference_data.get('verseEnd', 0) - } - except KeyError: - print(f"Error: 'reference' column does not contain required keys in row: {row}") - continue - except json.JSONDecodeError: - print(f"Error: 'reference' column contains invalid JSON format in row: {row}") - continue - - # Constructing data dictionary - data = { - "category": row.get('category', ''), - "title": row.get('title', ''), - "description": row.get('description', ''), - "content": row.get('content', ''), - "reference": reference, - "link": row.get('link', ''), - "metaData": json.loads(row.get('metadata', '{}')), - "active": row.get('active', '') == 't' - } - data_list.append(data) - except FileNotFoundError: - print(f"Error: File '{csv_file_path}' not found.") - except Exception as e: - print(f"An error occurred while processing {csv_file_path}: {str(e)}") - - return data_list +try: + #check whether the schema is there or not.If not , it will create one as mentioned + # create_database_schema() -data = add_parascriptual('files4/ml_TBP_1_parascriptural.csv') -resource_name = 'ml_TBP_1_parascriptural' -parascript_url = f"/resources/parascripturals/{resource_name}" -upload_data(data, parascript_url) + # 1st + # Add licenses + add_licenses() -#========================================================================================================================== -# add_resources_v2() -try: - #check whether the schema is there or not.If not , it will create one as mentioned - create_database_schema() + + # 2nd + # Add versions + add_versions() - # 1st - # Add licenses and versions - add_licenses() - add_versions() - #2nd - # Add parascriptuals, audiobibles, and commentaries into resources + #3rd + # Add resources add_resources() - #3rd - # Add USFM data to biblebooks - def upload_bible_data(): - folder_path = 'bible' #folder path to the respective data files - for filename in os.listdir(folder_path): - if filename.endswith('.csv'): - csv_file_path = os.path.join(folder_path, filename) - resource_name = os.path.splitext(filename)[0] - data = add_bible(csv_file_path) - bible_url = f"/resources/bibles/{resource_name}/books" - - #By this method you can validate every data from each files - for entry in data: - try: - # Add each entry individually and handle errors - upload_data([entry], bible_url) - print(f"Success: Data for {resource_name} uploaded successfully.") - except Exception as e: - print(f"Failed to upload data for {resource_name}: {str(e)}") - #Call the function to upload data - upload_bible_data() #4th + # Add commentaries + # file paths to the respective data files + file_paths = [ + 'files/commentarydata_mr_BBC_1_commentary.txt', + 'files/commentarydata_en_BBC_1_commentary.txt', + 'files/commentarydata_en_SBC_2_commentary.txt', + 'files/commentarydata_en_MHCC_1_commentary .txt', + 'files/commentarydata_hi_HINDIIRVN_1_commentary.txt', + 'files/commentarydata_gu_BBC_1_commentary .txt' + ] + for file_path in file_paths: + data, resource_name = upload_commentary(file_path) + commentary_url = f"/resources/commentaries/{resource_name}" + upload_data(data, commentary_url) + + + + + #5th #Add vocabularies - #file paths to the respective data files + # file paths to the respective data files file_paths = [ 'vocabularies/en_EBD_1_vocabulary.csv', 'vocabularies/hi_IRVD_1_vocabulary.csv', 'vocabularies/ins_IRV_1_vocabulary.csv', ] - for file_path in file_paths: data, resource_name = add_vocabularies(file_path) service_url = f"/resources/vocabularies/{resource_name}" @@ -587,48 +553,103 @@ def upload_bible_data(): print("Data Uploaded successfully!") - #5th - # Add infographic data to parascripturals + + + + #6th + # Add infographic data to parascripturals data = add_infographic_data() resource_name = 'hi_HI_1_parascriptural' parascript_url = f"/resources/parascripturals/{resource_name}" upload_data(data, parascript_url) - #6th - # Add Bible video data to parascripturals - upload_v2_project_videos() #7th - # Add audio bible + # Add Bible video data to parascripturals + upload_v2_project_videos() - data = add_audiobible_data() - resource_name = 'dgo_DSV_1_audiobible' - audiobible_url = f"/resources/bible/audios/{resource_name}" - upload_data(data, audiobible_url) - #8th - #Add commentaries - #file paths to the respective data files + + #8th + # Add audio bible file_paths = [ - 'files/commentarydata_mr_BBC_1_commentary.txt', - 'files/commentarydata_en_BBC_1_commentary.txt', - 'files/commentarydata_en_SBC_2_commentary.txt', - 'files/commentarydata_en_MHCC_1_commentary .txt', - 'files/commentarydata_hi_HINDIIRVN_1_commentary.txt', - 'files/commentarydata_gu_BBC_1_commentary .txt' - ] - + 'files/audiobible_bgc_HB_1_audiobible.txt', + 'files/audiobible_pa_IRV_5_audiobible.txt', + 'files/audiobible_hi_IRV_5_audiobible.txt', + 'files/audiobible_kfs_BSV_1_audiobible.txt', + 'files/audiobible_dgo_DSV_1_audiobible.txt', + 'files/audiobible_ory_IRV_5_audiobible.txt' + ] for file_path in file_paths: - data, resource_name = upload_commentary(file_path) - print("resourcename", resource_name) - commentary_url = f"/resources/commentaries/{resource_name}" - upload_data(data, commentary_url) + data, resource_name = upload_audiobible(file_path) + audiobible_url = f"/resources/bible/audios/{resource_name}" + upload_data(data, audiobible_url) + + + + + #9th + #signbible + csv_file_path = 'files/ISL.csv' + data = add_signbible_data(csv_file_path) + if data: + resource_name = 'ins_ISL_1_signbiblevideo' + signbible_url = f"/resources/bible/videos/{resource_name}" + upload_data(data, signbible_url) + + - print("Data Uploaded success uploadedully!") + #10th + # #To add all bible together + + # def upload_bible_data(): + # folder_path = 'bible' #folder path to the respective data files + # for filename in os.listdir(folder_path): + # if filename.endswith('.csv'): + # csv_file_path = os.path.join(folder_path, filename) + # resource_name = os.path.splitext(filename)[0] + # data = add_bible(csv_file_path) + # bible_url = f"/resources/bibles/{resource_name}/books" + + # #By this method you can validate every data from each files + # for entry in data: + # try: + # # Add each entry individually and handle errors + # upload_data([entry], bible_url) + # print(f"Success: Data for {resource_name} uploaded successfully.") + # except Exception as e: + # print(f"Failed to upload data for {resource_name}: {str(e)}") + + + + #To add bibles seperately + def upload_bible_data(): + folder_path = 'bible' #folder path to the respective data files + for filename in os.listdir(folder_path): + if filename.endswith('ur_IRV_5_bible.csv'): + csv_file_path = os.path.join(folder_path, filename) + resource_name = os.path.splitext(filename)[0] + data = add_bible(csv_file_path) + #resource names available are as_IRV_5_bible,bgc_HB_1_bible,bn_IRV_5.bible,dgo_DSV_1_bible,en_ESV_1_bible, + # gu_IRV_5_bible,hi_IRV_5_bible,kfs_BSV_1_bible,kn_IRV_5_bible,ml_IRV_5_bible,mr_IRV_5_bible, + # nag_ISV_1_bible,ne_ULB_1_bible,pa_IRV_5_bible,ta_IRV_5_bible,te_IRV_5_bible,ur_IRV_5_bible + bible_url = f"/resources/bibles/{resource_name}/books" + #By this method you can validate every data from each files + for entry in data: + try: + # Add each entry individually and handle errors + upload_data([entry], bible_url) + print(f"Success: Data for {resource_name} uploaded successfully.") + except Exception as e: + print(f"Failed to upload data for {resource_name}: {str(e)}") + + #Call the function to upload data + # upload_bible_data() + print("Data Uploaded success uploadedully!") except Exception as e: - print(f"An error occurred: {str(e)}") + print(f"An error occurred: {str(e)}") \ No newline at end of file