Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: remove Office constraint pin #3495

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
## 0.15.2-dev5
## 0.15.2-dev6

### Enhancements

Expand Down
3 changes: 0 additions & 3 deletions requirements/deps/constraints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@ certifi>=2023.7.22
pyparsing<3.1.0
scipy<1.11.4
IPython<8.13
# NOTE(alan) Pinned to avoid error that occurs with 2.4.3:
# AttributeError: 'ResourcePath' object has no attribute 'collection'
Office365-REST-Python-Client<2.4.3
# NOTE(trevor) `unstructured-inference` is set in extra-pdf-image.in to allow
# unstructured-inference to be upgraded when unstructured library is upgraded
# https://github.com/Unstructured-IO/unstructured/issues/1458
Expand Down
2 changes: 1 addition & 1 deletion requirements/extra-pdf-image.txt
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ pdfminer-six==20231228
# via
# -r ./extra-pdf-image.in
# pdfplumber
pdfplumber==0.11.2
pdfplumber==0.11.3
# via layoutparser
pikepdf==9.1.0
# via -r ./extra-pdf-image.in
Expand Down
10 changes: 6 additions & 4 deletions requirements/ingest/onedrive.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,8 @@ msal==1.30.0
# via
# -r ./ingest/onedrive.in
# office365-rest-python-client
office365-rest-python-client==2.4.2
# via
# -c ./ingest/../deps/constraints.txt
# -r ./ingest/onedrive.in
office365-rest-python-client==2.5.11
# via -r ./ingest/onedrive.in
pycparser==2.22
# via cffi
pyjwt[crypto]==2.9.0
Expand All @@ -52,6 +50,10 @@ soupsieve==2.5
# via
# -c ./ingest/../base.txt
# beautifulsoup4
typing-extensions==4.12.2
# via
# -c ./ingest/../base.txt
# office365-rest-python-client
urllib3==1.26.19
# via
# -c ./ingest/../base.txt
Expand Down
10 changes: 6 additions & 4 deletions requirements/ingest/outlook.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,8 @@ msal==1.30.0
# via
# -r ./ingest/outlook.in
# office365-rest-python-client
office365-rest-python-client==2.4.2
# via
# -c ./ingest/../deps/constraints.txt
# -r ./ingest/outlook.in
office365-rest-python-client==2.5.11
# via -r ./ingest/outlook.in
pycparser==2.22
# via cffi
pyjwt[crypto]==2.9.0
Expand All @@ -42,6 +40,10 @@ requests==2.32.3
# -c ./ingest/../base.txt
# msal
# office365-rest-python-client
typing-extensions==4.12.2
# via
# -c ./ingest/../base.txt
# office365-rest-python-client
urllib3==1.26.19
# via
# -c ./ingest/../base.txt
Expand Down
10 changes: 6 additions & 4 deletions requirements/ingest/sharepoint.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,8 @@ msal==1.30.0
# via
# -r ./ingest/sharepoint.in
# office365-rest-python-client
office365-rest-python-client==2.4.2
# via
# -c ./ingest/../deps/constraints.txt
# -r ./ingest/sharepoint.in
office365-rest-python-client==2.5.11
# via -r ./ingest/sharepoint.in
pycparser==2.22
# via cffi
pyjwt[crypto]==2.9.0
Expand All @@ -42,6 +40,10 @@ requests==2.32.3
# -c ./ingest/../base.txt
# msal
# office365-rest-python-client
typing-extensions==4.12.2
# via
# -c ./ingest/../base.txt
# office365-rest-python-client
urllib3==1.26.19
# via
# -c ./ingest/../base.txt
Expand Down
2 changes: 1 addition & 1 deletion unstructured/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.15.2-dev5" # pragma: no cover
__version__ = "0.15.2-dev6" # pragma: no cover
7 changes: 3 additions & 4 deletions unstructured/ingest/v2/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ def default_is_data_sensitive(k: str, v: Any) -> bool:


def hide_sensitive_fields(
data: dict, is_sensitive_fn: Callable[[str, Any], bool] = default_is_data_sensitive
) -> dict:
data: dict[str, Any], is_sensitive_fn: Callable[[str, Any], bool] = default_is_data_sensitive
) -> dict[str, Any]:
"""
Will recursively look through every k, v pair in this dict and any nested ones and run
is_sensitive_fn to dynamically redact the value of the k, v pair. Will also check if
Expand Down Expand Up @@ -62,8 +62,7 @@ def redact_jsons(s: str) -> str:
if "{" not in chars:
return s
i = 0
jsons = []
i = 0
jsons: list[str] = []
while i < len(chars):
char = chars[i]
if char == "{":
Expand Down
8 changes: 4 additions & 4 deletions unstructured/ingest/v2/processes/connectors/onedrive.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,10 @@ class OnedriveIndexer(Indexer):

def list_objects(self, folder, recursive) -> list["DriveItem"]:
drive_items = folder.children.get().execute_query()
files = [d for d in drive_items if d.is_file]
files = [d for d in drive_items if d.file is not None]
if not recursive:
return files
folders = [d for d in drive_items if d.is_folder]
folders = [d for d in drive_items if d.is_folder is not None]
Coniferish marked this conversation as resolved.
Show resolved Hide resolved
for f in folders:
files.extend(self.list_objects(f, recursive))
return files
Expand Down Expand Up @@ -123,12 +123,12 @@ def drive_item_to_file_data(self, drive_item: "DriveItem") -> FileData:
server_path = file_path + "/" + filename
rel_path = server_path.replace(self.index_config.path, "").lstrip("/")
date_modified_dt = (
parser.parse(drive_item.last_modified_datetime)
parser.parse(str(drive_item.last_modified_datetime))
Coniferish marked this conversation as resolved.
Show resolved Hide resolved
if drive_item.last_modified_datetime
else None
)
date_created_at = (
parser.parse(drive_item.created_datetime) if drive_item.created_datetime else None
parser.parse(str(drive_item.created_datetime)) if drive_item.created_datetime else None
)
return FileData(
identifier=drive_item.id,
Expand Down
Loading