From ee254c35a6542f8432dd75d574fded901dd8b90b Mon Sep 17 00:00:00 2001 From: Roman Isecke Date: Tue, 16 Jul 2024 13:52:45 -0400 Subject: [PATCH] Save file id for all fsspec connectors if present --- unstructured/ingest/v2/processes/connectors/fsspec/fsspec.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/fsspec.py b/unstructured/ingest/v2/processes/connectors/fsspec/fsspec.py index 3a1a76ca9b..2adfa99b03 100644 --- a/unstructured/ingest/v2/processes/connectors/fsspec/fsspec.py +++ b/unstructured/ingest/v2/processes/connectors/fsspec/fsspec.py @@ -187,6 +187,9 @@ def get_metadata(self, path: str) -> DataSourceMetadata: "protocol": self.index_config.protocol, "remote_file_path": self.index_config.remote_url, } + file_stat = self.fs.stat(path=path) + if file_id := file_stat.get("id"): + record_locator["file_id"] = file_id if metadata: record_locator["metadata"] = metadata return DataSourceMetadata(