diff --git a/CHANGELOG.md b/CHANGELOG.md index e69af84dce..539ac63a8c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.15.2-dev5 +## 0.15.2-dev6 ### Enhancements diff --git a/requirements/deps/constraints.txt b/requirements/deps/constraints.txt index 93828753ac..d2fd98a2aa 100644 --- a/requirements/deps/constraints.txt +++ b/requirements/deps/constraints.txt @@ -15,9 +15,6 @@ certifi>=2023.7.22 pyparsing<3.1.0 scipy<1.11.4 IPython<8.13 -# NOTE(alan) Pinned to avoid error that occurs with 2.4.3: -# AttributeError: 'ResourcePath' object has no attribute 'collection' -Office365-REST-Python-Client<2.4.3 # NOTE(trevor) `unstructured-inference` is set in extra-pdf-image.in to allow # unstructured-inference to be upgraded when unstructured library is upgraded # https://github.com/Unstructured-IO/unstructured/issues/1458 diff --git a/requirements/extra-pdf-image.txt b/requirements/extra-pdf-image.txt index a11667dd73..e0fd93a29f 100644 --- a/requirements/extra-pdf-image.txt +++ b/requirements/extra-pdf-image.txt @@ -148,7 +148,7 @@ pdfminer-six==20231228 # via # -r ./extra-pdf-image.in # pdfplumber -pdfplumber==0.11.2 +pdfplumber==0.11.3 # via layoutparser pikepdf==9.1.0 # via -r ./extra-pdf-image.in diff --git a/requirements/ingest/onedrive.txt b/requirements/ingest/onedrive.txt index 0fd9b13df1..b0358255f1 100644 --- a/requirements/ingest/onedrive.txt +++ b/requirements/ingest/onedrive.txt @@ -33,10 +33,8 @@ msal==1.30.0 # via # -r ./ingest/onedrive.in # office365-rest-python-client -office365-rest-python-client==2.4.2 - # via - # -c ./ingest/../deps/constraints.txt - # -r ./ingest/onedrive.in +office365-rest-python-client==2.5.11 + # via -r ./ingest/onedrive.in pycparser==2.22 # via cffi pyjwt[crypto]==2.9.0 @@ -52,6 +50,10 @@ soupsieve==2.5 # via # -c ./ingest/../base.txt # beautifulsoup4 +typing-extensions==4.12.2 + # via + # -c ./ingest/../base.txt + # office365-rest-python-client urllib3==1.26.19 # via # -c ./ingest/../base.txt diff --git a/requirements/ingest/outlook.txt b/requirements/ingest/outlook.txt index dace922e85..473e026252 100644 --- a/requirements/ingest/outlook.txt +++ b/requirements/ingest/outlook.txt @@ -27,10 +27,8 @@ msal==1.30.0 # via # -r ./ingest/outlook.in # office365-rest-python-client -office365-rest-python-client==2.4.2 - # via - # -c ./ingest/../deps/constraints.txt - # -r ./ingest/outlook.in +office365-rest-python-client==2.5.11 + # via -r ./ingest/outlook.in pycparser==2.22 # via cffi pyjwt[crypto]==2.9.0 @@ -42,6 +40,10 @@ requests==2.32.3 # -c ./ingest/../base.txt # msal # office365-rest-python-client +typing-extensions==4.12.2 + # via + # -c ./ingest/../base.txt + # office365-rest-python-client urllib3==1.26.19 # via # -c ./ingest/../base.txt diff --git a/requirements/ingest/sharepoint.txt b/requirements/ingest/sharepoint.txt index 8680b35cbe..3c84d0de9a 100644 --- a/requirements/ingest/sharepoint.txt +++ b/requirements/ingest/sharepoint.txt @@ -27,10 +27,8 @@ msal==1.30.0 # via # -r ./ingest/sharepoint.in # office365-rest-python-client -office365-rest-python-client==2.4.2 - # via - # -c ./ingest/../deps/constraints.txt - # -r ./ingest/sharepoint.in +office365-rest-python-client==2.5.11 + # via -r ./ingest/sharepoint.in pycparser==2.22 # via cffi pyjwt[crypto]==2.9.0 @@ -42,6 +40,10 @@ requests==2.32.3 # -c ./ingest/../base.txt # msal # office365-rest-python-client +typing-extensions==4.12.2 + # via + # -c ./ingest/../base.txt + # office365-rest-python-client urllib3==1.26.19 # via # -c ./ingest/../base.txt diff --git a/unstructured/__version__.py b/unstructured/__version__.py index d2ae41a9f7..b17a9bb28c 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.15.2-dev5" # pragma: no cover +__version__ = "0.15.2-dev6" # pragma: no cover diff --git a/unstructured/ingest/v2/logger.py b/unstructured/ingest/v2/logger.py index 34c5c1df3d..c565449b16 100644 --- a/unstructured/ingest/v2/logger.py +++ b/unstructured/ingest/v2/logger.py @@ -22,8 +22,8 @@ def default_is_data_sensitive(k: str, v: Any) -> bool: def hide_sensitive_fields( - data: dict, is_sensitive_fn: Callable[[str, Any], bool] = default_is_data_sensitive -) -> dict: + data: dict[str, Any], is_sensitive_fn: Callable[[str, Any], bool] = default_is_data_sensitive +) -> dict[str, Any]: """ Will recursively look through every k, v pair in this dict and any nested ones and run is_sensitive_fn to dynamically redact the value of the k, v pair. Will also check if @@ -62,8 +62,7 @@ def redact_jsons(s: str) -> str: if "{" not in chars: return s i = 0 - jsons = [] - i = 0 + jsons: list[str] = [] while i < len(chars): char = chars[i] if char == "{": diff --git a/unstructured/ingest/v2/processes/connectors/onedrive.py b/unstructured/ingest/v2/processes/connectors/onedrive.py index c873ecfddf..3ab711b6bd 100644 --- a/unstructured/ingest/v2/processes/connectors/onedrive.py +++ b/unstructured/ingest/v2/processes/connectors/onedrive.py @@ -89,10 +89,10 @@ class OnedriveIndexer(Indexer): def list_objects(self, folder, recursive) -> list["DriveItem"]: drive_items = folder.children.get().execute_query() - files = [d for d in drive_items if d.is_file] + files = [d for d in drive_items if d.file is not None] if not recursive: return files - folders = [d for d in drive_items if d.is_folder] + folders = [d for d in drive_items if d.is_folder is not None] for f in folders: files.extend(self.list_objects(f, recursive)) return files @@ -123,12 +123,12 @@ def drive_item_to_file_data(self, drive_item: "DriveItem") -> FileData: server_path = file_path + "/" + filename rel_path = server_path.replace(self.index_config.path, "").lstrip("/") date_modified_dt = ( - parser.parse(drive_item.last_modified_datetime) + parser.parse(str(drive_item.last_modified_datetime)) if drive_item.last_modified_datetime else None ) date_created_at = ( - parser.parse(drive_item.created_datetime) if drive_item.created_datetime else None + parser.parse(str(drive_item.created_datetime)) if drive_item.created_datetime else None ) return FileData( identifier=drive_item.id,