Skip to content

Commit

Permalink
update ingest test
Browse files Browse the repository at this point in the history
  • Loading branch information
rbiseck3 committed Jun 18, 2024
1 parent 68e7296 commit a9d424d
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 1 deletion.
2 changes: 2 additions & 0 deletions test_unstructured_ingest/src/google-drive.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \
--verbose \
--drive-id 1OQZ66OHBE30rNsNa7dweGLfRmXvkT_jr \
--service-account-key "$GCP_INGEST_SERVICE_KEY_FILE" \
--recursive \
--extensions "pdf,docx" \
--work-dir "$WORK_DIR"

set +e
Expand Down
7 changes: 6 additions & 1 deletion unstructured/ingest/v2/processes/connectors/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,12 @@ def run(self, contents: list[UploadContent], **kwargs: Any) -> None:
for content in contents:
if source_identifiers := content.file_data.source_identifiers:
identifiers = source_identifiers
new_path = self.upload_config.output_path / identifiers.relative_path
rel_path = (
identifiers.relative_path[1:]
if identifiers.relative_path.startswith("/")
else identifiers.relative_path
)
new_path = self.upload_config.output_path / Path(rel_path)
final_path = str(new_path).replace(
identifiers.filename, f"{identifiers.filename}.json"
)
Expand Down

0 comments on commit a9d424d

Please sign in to comment.