From c608222e8c67ac8de56d997a2a4fe06aaff68a0d Mon Sep 17 00:00:00 2001
From: Amged Wageh <amged.wageh@kaspersky.com>
Date: Fri, 18 Oct 2024 16:54:33 +0300
Subject: [PATCH] Add URL ID search

---
 README.md                           |  3 +-
 drivefs_sleuth/executor.py          | 48 +++++++++++++++++++++++++----
 drivefs_sleuth/setup.py             | 12 ++++++++
 drivefs_sleuth/synced_files_tree.py |  8 ++++-
 setup.py                            |  2 +-
 5 files changed, 64 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 9cf2d8d..f13e6aa 100644
--- a/README.md
+++ b/README.md
@@ -123,13 +123,14 @@ DriveFS Sleuth supports various search functionalities to meet your investigativ
 * **Regular Expressions:** Use the `--regex` parameter to employ regular expressions for searching. Multiple expressions can be specified separated by spaces.
 * **Simple Text Search:** Perform a simple text search using the `[-q|--query-by-name]` optional parameter. Input single or multiple texts separated by spaces. The tool searches for files or folders with names containing the provided text. Toggle the `--exact` parameter for an exact name search.
 * **MD5 Search:** Use the `--md5` parameter to search by the MD5 hash of the files. Multiple MD5s can be specified separated by spaces.
+* **URL ID:** Use the `--url-id` parameter to search by the URL ID of the item. Multiple MD5s can be specified separated by spaces. _URL ID is the ID of the item when it's being accessed by HTTP requests._
 
 ### Customization Options
 Tailor the tool's behavior with additional parameters:
 * **Listing Control:** Use the `--dont-list-sub-items` parameter to suppress listing sub-items and only display matching folders.
 * **Complex Criteria:** Enable a more complex combination of search criteria by providing a CSV file through the `--search-csv parameter`. The CSV file includes case-sensitive headers: `TARGET`, `TYPE`, `CONTAINS`, and `LIST_SUB_ITEMS`.
     * **TARGET:** Holds the searching regex or simple text.
-    * **TYPE:** Classifies the search type as either `FILENAME`, `REGEX`, or `MD5`.
+    * **TYPE:** Classifies the search type as either `FILENAME`, `REGEX`, `MD5`, or `urlid`.
     * **CONTAINS:** Use `FALSE` for an exact search or `TRUE` to search for any filename containing the specified target.
     * `LIST_SUB_ITEMS:` Enable or disable the listing of sub-items for matching folders, indicated by `TRUE` or `FALSE`, respectively.
 
diff --git a/drivefs_sleuth/executor.py b/drivefs_sleuth/executor.py
index a8e2064..4c02634 100644
--- a/drivefs_sleuth/executor.py
+++ b/drivefs_sleuth/executor.py
@@ -95,6 +95,15 @@ def execute():
         help='Searches for files by the MD5 hash. Multiple hashes can be passed separated by spaces.'
     )
 
+    searching_group.add_argument(
+        '--url-id',
+        type=str,
+        nargs='+',
+        default=[],
+        dest='url_id',
+        help='Searches for files by the URL ID. Multiple hashes can be passed separated by spaces.'
+    )
+
     searching_group.add_argument(
         '--search-csv',
         type=str,
@@ -201,8 +210,21 @@ def execute():
                             "TYPE": "md5",
                             "TARGET": [criteria['TARGET']]
                         })
+                    elif criteria['TYPE'].lower() == 'urlid':
+                        if (criteria['LIST_SUB_ITEMS'] or 'true').lower() == 'false':
+                            searching_criteria.append({
+                                "TYPE": "urlid",
+                                "TARGET": [criteria['TARGET']],
+                                "LIST_SUB_ITEMS": False
+                            })
+                        else:
+                            searching_criteria.append({
+                                "TYPE": "urlid",
+                                "TARGET": [criteria['TARGET']],
+                                "LIST_SUB_ITEMS": True
+                            })
                     elif criteria['TYPE'].lower() == 'regex':
-                        if criteria['LIST_SUB_ITEMS'].lower() == 'false':
+                        if (criteria['LIST_SUB_ITEMS'] or 'true').lower() == 'false':
                             searching_criteria.append({
                                 "TYPE": "regex",
                                 "TARGET": [criteria['TARGET']],
@@ -215,8 +237,8 @@ def execute():
                                 "LIST_SUB_ITEMS": True
                             })
                     else:
-                        if criteria['CONTAINS'].lower() == 'false':
-                            if criteria['LIST_SUB_ITEMS'].lower() == 'false':
+                        if (criteria['CONTAINS'] or 'true').lower() == 'false':
+                            if (criteria['LIST_SUB_ITEMS'] or 'true').lower() == 'false':
                                 searching_criteria.append({
                                     "TYPE": "filename",
                                     "TARGET": [criteria['TARGET']],
@@ -231,7 +253,7 @@ def execute():
                                     "LIST_SUB_ITEMS": True
                                 })
                         else:
-                            if criteria['LIST_SUB_ITEMS'].lower() == 'false':
+                            if (criteria['LIST_SUB_ITEMS'] or 'true').lower() == 'false':
                                 searching_criteria.append({
                                     "TYPE": "filename",
                                     "TARGET": [criteria['TARGET']],
@@ -250,8 +272,8 @@ def execute():
                 'Searching CSV file should be formated as follows:\n'
                 '\t- The Head should be TYPE,TARGET,CONTAINS,LIST_SUB_ITEMS (case sensitive), '
                 'where the values should be as follows:\n'
-                '\t- TYPE: [md5|filename|regex] (case insensitive)\n'
-                '\t- TARGET: the value to be searched. (case insensitive for md5 and filename only)\n'
+                '\t- TYPE: [md5|filename|regex|urlid] (case insensitive)\n'
+                '\t- TARGET: the value to be searched. (case sensitive for regex only)\n'
                 '\t- CONTAINS: [True|False] (case insensitive)\n'
                 '\t- LIST_SUB_ITEMS: [True|False] (case insensitive)')
             arg_parser.exit()
@@ -308,6 +330,20 @@ def execute():
             "TARGET": args.md5
         })
 
+    if args.url_id:
+        if args.list_sub_items:
+            searching_criteria.append({
+                "TYPE": "urlid",
+                "TARGET": args.url_id,
+                "LIST_SUB_ITEMS": True
+            })
+        else:
+            searching_criteria.append({
+                "TYPE": "urlid",
+                "TARGET": args.url_id,
+                "LIST_SUB_ITEMS": False
+            })
+
     print(f'{__get_status_emoji("🔍", "[SEARCHING]")} Searching... [IN PROGRESS]')
 
     if searching_criteria:
diff --git a/drivefs_sleuth/setup.py b/drivefs_sleuth/setup.py
index 407030a..02ade99 100644
--- a/drivefs_sleuth/setup.py
+++ b/drivefs_sleuth/setup.py
@@ -135,6 +135,8 @@ def _construct_synced_files_trees(self):
                                                        parent_info[9], get_item_properties(self.__profile_path,
                                                                                            parent_id), parent_info[3],
                                                        parent_info[10])
+                        if parent_info[9] == 1:
+                            self.__synced_files_tree.add_recovered_deleted_item(current_parent_dir)
                         orphan_dirs[parent_id] = current_parent_dir
 
             for child_id in childs_ids:
@@ -154,6 +156,8 @@ def _construct_synced_files_trees(self):
                                       f'{current_parent_dir.tree_path}\\{child_info[3]}', content_cache_path,
                                       thumbnail_path, child_info[10])
                     current_parent_dir.add_item(child_file)
+                    if child_info[9] == 1:
+                        self.__synced_files_tree.add_recovered_deleted_item(child_file)
                     if content_cache_path:
                         self.__synced_files_tree.add_recoverable_item_from_cache(child_file)
                     if thumbnail_path:
@@ -192,6 +196,8 @@ def _construct_synced_files_trees(self):
                                                            f'{current_parent_dir.tree_path}\\{target_info[3]}',
                                                            target_info[10])
                                         added_dirs[target_stable_id] = target
+                                        if target_info[9] == 1:
+                                            self.__synced_files_tree.add_recovered_deleted_item(target)
                                 else:
                                     target = DummyItem(target_stable_id)
                                     self.__synced_files_tree.add_deleted_item(target)
@@ -217,6 +223,8 @@ def _construct_synced_files_trees(self):
 
                     added_dirs[child_id] = child
                     current_parent_dir.add_item(child)
+                    if child_info[9] == 1:
+                        self.__synced_files_tree.add_recovered_deleted_item(child)
 
         # TODO: check if I can add a link in the shared with me
         for shared_with_me_item_info in get_shared_with_me_without_link(self.__profile_path):
@@ -238,6 +246,8 @@ def _construct_synced_files_trees(self):
                     self.__synced_files_tree.add_recoverable_item_from_cache(shared_with_me_file)
                 if thumbnail_path:
                     self.__synced_files_tree.add_thumbnail_item(shared_with_me_file)
+                if shared_with_me_item_info[9] == 1:
+                    self.__synced_files_tree.add_recovered_deleted_item(shared_with_me_file)
             else:
                 shared_with_me_item = orphan_dirs.get(shared_with_me_item_info[1], None)
                 if shared_with_me_item:
@@ -250,6 +260,8 @@ def _construct_synced_files_trees(self):
                                                     shared_with_me_item_info[9], shared_with_me_item_properties,
                                                     f'{current_parent_dir.tree_path}\\{shared_with_me_item_info[3]}',
                                                     shared_with_me_item_info[10])
+                    if shared_with_me_item_info[9] == 1:
+                        self.__synced_files_tree.add_recovered_deleted_item(shared_with_me_item)
                 self.__synced_files_tree.add_shared_with_me_item(shared_with_me_item)
 
         for orphan_id, orphan_dir in orphan_dirs.items():
diff --git a/drivefs_sleuth/synced_files_tree.py b/drivefs_sleuth/synced_files_tree.py
index 4b15ce7..e81ab19 100644
--- a/drivefs_sleuth/synced_files_tree.py
+++ b/drivefs_sleuth/synced_files_tree.py
@@ -109,7 +109,7 @@ def get_sub_items(self):
 
 class DummyItem(Item):
     def __init__(self, stable_id):
-        super().__init__(stable_id, '', 'DELETED_ITEM', '', '', '', '', '', '', '', '', '')
+        super().__init__(stable_id, '', 'DELETED_ITEM', '', '', '', '', '', '', '', 'DELETED_ITEM', '')
 
     def get_sub_items(self):
         return []
@@ -261,6 +261,12 @@ def __search(current_item):
                     if condition[1]:
                         add_sub_items(current_item)
 
+            for condition in [(target.lower(), c['LIST_SUB_ITEMS']) for c in conditions if c['TYPE'] == 'urlid' for target in c['TARGET']]:
+                if condition[0] == current_item.url_id.lower():
+                    items.append(current_item)
+                    if condition[1]:
+                        add_sub_items(current_item)
+
             for condition in [(target.lower(), c['LIST_SUB_ITEMS'], c['CONTAINS']) for c in conditions if c['TYPE'] == 'filename' for target in c['TARGET']]:
                 if condition[2]:
                     if condition[0] in current_item.local_title.lower():
diff --git a/setup.py b/setup.py
index 414ac60..208d770 100644
--- a/setup.py
+++ b/setup.py
@@ -8,7 +8,7 @@
 
 setup(
     name='drivefs_sleuth',
-    version='1.1.0',
+    version='1.2.0',
     description='The ultimate Google Drive File Stream Investigator!',
     long_description=open('README.md', encoding='utf-8').read(),
     long_description_content_type='text/markdown',