From 282ba2aa69fc27acf682d9d3ca02fbf530969355 Mon Sep 17 00:00:00 2001
From: Abhishek-Jain-1925 <jainabhishek1925@gmail.com>
Date: Tue, 12 Nov 2024 17:57:48 +0530
Subject: [PATCH] [feat]:Added Flow of Maintain records in Sheet

---
 service/drive.py | 151 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 108 insertions(+), 43 deletions(-)

diff --git a/service/drive.py b/service/drive.py
index dbc2cf7..4cf5aed 100644
--- a/service/drive.py
+++ b/service/drive.py
@@ -10,8 +10,15 @@
 from audio_service import translate_with_whisper
 from summarizer import summarize_using_openai
 from logger import logger
+import ssl
+import httplib2
+from google_auth_httplib2 import AuthorizedHttp
+import time
+import logging
 
 load_dotenv()
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 
 # If modifying these scopes, delete the file token.json.
 SCOPES = [
@@ -55,11 +62,11 @@ def get_folder_id(service, folder_name):
         )
         items = results.get("files", [])
         if not items:
-            print(f"No folder found with the name '{folder_name}'.")
+            logger.info(f"No folder found with the name '{folder_name}'.")
             return None
         return items[0]['id']
     except HttpError as error:
-        print(f"An error occurred: {error}")
+        logger.warning(f"An error occurred: {error}")
         return None
 
 def get_files_in_folder(service, folder_id):
@@ -73,7 +80,7 @@ def get_files_in_folder(service, folder_id):
         items = results.get("files", [])
         return items
     except HttpError as error:
-        print(f"An error occurred: {error}")
+        logger.warning(f"An error occurred: {error}")
         return []
     
 def make_file_public(service, file_id):
@@ -87,18 +94,24 @@ def make_file_public(service, file_id):
         service.files().get(fileId=file_id, fields='webViewLink').execute()
         return 'https://www.googleapis.com/drive/v3/files/'+ file_id + '?alt=media&key=' + os.getenv("GCP_API_KEY")
     except HttpError as error:
-        print(f"An error occurred: {error}")
+        logger.warning(f"An error occurred while making file Public: {error}")
         return None
 
-def revoke_public_access(service, file_id):
-    """Revoke public access to a file."""
-    try:
-        permissions = service.permissions().list(fileId=file_id).execute()
-        for permission in permissions.get('permissions', []):
-            if permission.get('type') == 'anyone':
-                service.permissions().delete(fileId=file_id, permissionId=permission['id']).execute()
-    except HttpError as error:
-        print(f"An error occurred: {error}")
+def revoke_public_access_with_retry(service, file_id, max_retries=3):
+    """Attempt to revoke public access, with retries in case of SSL errors."""
+    for attempt in range(max_retries):
+        try:
+            permissions = service.permissions().list(fileId=file_id).execute()
+            for permission in permissions.get('permissions', []):
+                if permission['role'] == 'reader' and permission['type'] == 'anyone':
+                    service.permissions().delete(fileId=file_id, permissionId=permission['id']).execute()
+            break
+        except (HttpError, ssl.SSLEOFError) as e:
+            logger.warning(f"Attempt {attempt + 1} failed for revoke public access: {e}")
+            if attempt < max_retries - 1:
+                time.sleep(2 ** attempt)
+            else:
+                logger.warning("Max retries reached. Could not revoke public access.")
 
 
 def filter_files_by_date(files):
@@ -108,47 +121,100 @@ def filter_files_by_date(files):
     for file in files:
         created_time = datetime.fromisoformat(file['createdTime'][:-1]).date()
         modified_time = datetime.fromisoformat(file['modifiedTime'][:-1]).date()
-        if created_time == today and modified_time == today:
+        if created_time == today:
             filtered_files.append(file)
     return filtered_files
 
+ssl_context = ssl.create_default_context()
+ssl_context.set_ciphers("DEFAULT@SECLEVEL=1")
+
+def append_to_sheet(sheets_service, sheet_id, data, retries=3):
+    body = {'values': data}
+    for attempt in range(retries):
+        try:
+            sheets_service.spreadsheets().values().append(
+                spreadsheetId=sheet_id,
+                range="Sheet1!A:C",
+                valueInputOption="RAW",
+                insertDataOption="INSERT_ROWS",
+                body=body
+            ).execute()
+            break
+        except ssl.SSLEOFError:
+            if attempt < retries - 1:
+                time.sleep(2 ** attempt)
+                continue
+            else:
+                raise
+
+def get_transcription_and_summary(file, drive_service, existing_file_ids, new_rows, results):
+    public_link = make_file_public(drive_service, file['id'])
+    if file['id'] and file['id'] not in existing_file_ids:
+        
+        mime_type = file.get('mimeType', '')
+        if mime_type.startswith('audio/') or mime_type.startswith('video/'):
+            translation = translate_with_whisper(public_link)
+            logger.info(f"Translation for file '{file['name']}' completed.")
+            logger.info("Translation: %s", translation)
+            
+            summary = summarize_using_openai(translation)
+            logger.info(f"Summary for file '{file['name']}' completed.")
+            logger.info("Summary: %s", summary)
+
+            created_time_str = file['createdTime'].replace("Z", "+00:00")
+            created_time = datetime.fromisoformat(created_time_str).date()
+
+            results.append({
+                'file id': file['id'],
+                'transcription': translation,
+                'summary': summary
+            })
+            
+            new_rows.append([file['id'], file['name'], translation, summary, created_time.isoformat()])
+            
+        revoke_public_access_with_retry(drive_service, file['id'])
+
 
 def main():
-    """Main function to return the names and ids of all files in the 'recordings' folder."""
+    ssl_context = ssl.create_default_context()
+    ssl_context.set_ciphers("DEFAULT@SECLEVEL=1")
+
+    http = httplib2.Http()
+    http.ssl_context = ssl_context
     creds = get_credentials()
-    service = build("drive", "v3", credentials=creds)
+    authed_http = AuthorizedHttp(creds, http=http)
+
+    drive_service = build("drive", "v3", http=authed_http)
+    sheets_service = build("sheets", "v4", http=authed_http)
+ 
+    sheet_id = "1fDoslff2Asbrys5xvFtHgiBWW02555Or_vOTurTC1yk"
     
     folder_name = "Interview Recordings"
-    folder_id = get_folder_id(service, folder_name)
+    folder_id = get_folder_id(drive_service, folder_name)
     
+    try:
+        result = sheets_service.spreadsheets().values().get(
+            spreadsheetId=sheet_id, range="Sheet1!A:A"
+        ).execute()
+        values = result.get('values', [])
+        existing_file_ids = {row[0] for row in values if row}
+    except HttpError as error:
+        logger.warning(f"An error occurred while retrieving sheet data: {error}")
+        return
+
     results = []
-    
+
     if folder_id:
-        files = get_files_in_folder(service, folder_id)
-        files = filter_files_by_date(files)
-        
+        files = get_files_in_folder(drive_service, folder_id)
+        files = filter_files_by_date(files)        
+        new_rows = []
+
         for file in files:
-            public_link = make_file_public(service, file['id'])
-            if public_link:
-                print(f"Public link for file '{file['name']}': {public_link}")
-                
-                translation = translate_with_whisper(public_link)
-                logger.info(f"Translation for file '{file['name']}' completed.")
-                print("Translation:",translation)
-                summary = summarize_using_openai(translation)
-                
-                logger.info(f"Summary for file '{file['name']}' completed.")
-                print("Summary:",summary)
-                                
-                revoke_public_access(service, file['id'])
-                print(f"Public access revoked for file '{file['name']}'")
-                
-                results.append({
-                    'file_url': public_link,
-                    'translation': translation,
-                    'summary': summary
-                })
-            
+            get_transcription_and_summary(file, drive_service, existing_file_ids, new_rows, results)
+
+        if new_rows:
+            append_to_sheet(sheets_service, sheet_id, new_rows)
+        
         return results
     
     else:
@@ -157,4 +223,3 @@ def main():
 
 if __name__ == "__main__":
     files = main()
-    print(files)
\ No newline at end of file