Skip to content

Commit

Permalink
Support uploading and unpacking zip files
Browse files Browse the repository at this point in the history
The archive API upload path now indicates the target folder; filename is in any case passed separately as a query param.
  • Loading branch information
marksparkza committed Nov 5, 2024
1 parent f6a08c2 commit f6d1467
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 111 deletions.
25 changes: 18 additions & 7 deletions odp/api/lib/archive/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from collections import namedtuple
from os import PathLike

from fastapi import HTTPException, UploadFile
Expand All @@ -8,6 +9,10 @@
from odp.db import Session
from odp.db.models import Archive

FileInfo = namedtuple('FileInfo', (
'relpath', 'size', 'sha256'
))


class ArchiveAdapter:
"""Abstract base class for an archive implementation adapter.
Expand All @@ -29,14 +34,20 @@ async def get_zip(self, *paths: str | PathLike) -> FileResponse:
files at `paths` to the client."""
raise NotImplementedError

async def put(self, path: str | PathLike, file: UploadFile, sha256: str) -> None:
"""Store the contents of the incoming `file` at `path` and
verify the stored file against the given checksum."""
raise NotImplementedError
async def put(
self,
folder: str,
filename: str,
file: UploadFile,
sha256: str,
unpack: bool,
) -> list[FileInfo]:
"""Add or unpack `file` into `folder` relative to the
archive's upload directory.
async def put_zip(self, path: str | PathLike, file: UploadFile) -> None:
"""Unpack the contents of the incoming `file` into the
directory at `path`."""
Return a list of FileInfo tuple(relpath, size, sha256)
for each written file.
"""
raise NotImplementedError


Expand Down
59 changes: 39 additions & 20 deletions odp/api/lib/archive/nextcloud.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,60 @@
from os import PathLike
from typing import Any
from urllib.parse import urljoin

import requests
from fastapi import HTTPException, UploadFile
from fastapi.responses import FileResponse, RedirectResponse

from odp.api.lib.archive import ArchiveAdapter
from odp.api.lib.archive import ArchiveAdapter, FileInfo


class NextcloudArchiveAdapter(ArchiveAdapter):
"""Adapter for a Nextcloud archive."""

async def get(self, path: str | PathLike) -> FileResponse | RedirectResponse:
"""Send the contents of the file at `path` to the client,
or return a redirect to the relevant Nextcloud folder."""

async def get_zip(self, *paths: str | PathLike) -> FileResponse:
"""Send a zip file of the directories and files at `paths`
to the client."""

async def put(self, path: str | PathLike, file: UploadFile, sha256: str) -> None:
"""Upload the incoming `file` to the ODP file storage service
on the Nextcloud server, which in turn writes and verifies the
file at `path` relative to the Nextcloud upload directory."""
async def put(
self,
folder: str,
filename: str,
file: UploadFile,
sha256: str,
unpack: bool,
) -> list[FileInfo]:
await file.seek(0)
params = {'filename': filename, 'sha256': sha256}
if unpack:
params |= {'unpack': 1}

result = self._send_request(
'PUT',
urljoin(self.upload_url, folder),
files={'file': file.file},
params=params,
)
return [
FileInfo(path, info[0], info[1])
for path, info in result.items()
]

@staticmethod
def _send_request(method, url, files, params) -> Any:
"""Send a request to the ODP file storage service and return
its JSON response."""
try:
r = requests.post(
urljoin(self.upload_url, path),
files={'file': file.file},
params={'sha256': sha256},
r = requests.request(
method,
url,
files=files,
params=params,
)
r.raise_for_status()
return r.json()

except requests.RequestException as e:
if e.response is not None:
status_code = e.response.status_code
error_detail = e.response.text
try:
error_detail = e.response.json()['message']
except (TypeError, ValueError, KeyError):
error_detail = e.response.text
else:
status_code = 503
error_detail = str(e)
Expand Down
153 changes: 69 additions & 84 deletions odp/api/routers/archive.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import mimetypes
import pathlib
from datetime import datetime, timezone

from fastapi import APIRouter, Depends, File, HTTPException, Path, Query, UploadFile
from sqlalchemy import func, select
from sqlalchemy.exc import IntegrityError
from starlette.status import HTTP_404_NOT_FOUND, HTTP_405_METHOD_NOT_ALLOWED, HTTP_422_UNPROCESSABLE_ENTITY
from werkzeug.utils import secure_filename

from odp.api.lib.archive import ArchiveAdapter, get_archive_adapter
from odp.api.lib.auth import ArchiveAuthorize, Authorize, Authorized
Expand Down Expand Up @@ -118,18 +120,17 @@ async def list_resources(


@router.put(
'/{archive_id}/{provider_id}/{package_id}/{path:path}',
'/{archive_id}/{provider_id}/{package_id}/{folder:path}',
dependencies=[Depends(ArchiveAuthorize())],
)
async def upload_file(
archive_id: str,
provider_id: str,
package_id: str,
path: str = Path(..., title='Resource path relative to the package root'),
folder: str = Path(..., title='Path to containing folder relative to package root'),
unpack: bool = Query(False, title='Unpack zip file into folder'),
file: UploadFile = File(..., title='File upload'),
unzip: bool = Query(False, title='Unzip uploaded file'),
filename: str = Query(..., title='File name'),
mimetype: str = Query(..., title='Content type'),
sha256: str = Query(..., title='SHA-256 checksum'),
title: str = Query(None, title='Resource title'),
description: str = Query(None, title='Resource description'),
Expand All @@ -138,7 +139,12 @@ async def upload_file(
package_auth: Authorized = Depends(Authorize(ODPScope.PACKAGE_WRITE)),
) -> None:
"""
Upload a file to an archive and add it to a package.
Upload a file to an archive and add/unpack it into a package folder.
By default, a single resource is created and associated with the archive
and the package. If unpack is true and the file is a supported zip format,
its contents are unpacked into the folder and, for each unpacked file, a
resource is created and similarly associated.
"""
if not (archive := Session.get(Archive, archive_id)):
raise HTTPException(
Expand All @@ -157,97 +163,76 @@ async def upload_file(
)
package_auth.enforce_constraint([package.provider_id])

if not path:
if '..' in folder:
raise HTTPException(
HTTP_422_UNPROCESSABLE_ENTITY, 'path cannot be blank'
)

if '..' in path:
raise HTTPException(
HTTP_422_UNPROCESSABLE_ENTITY, "'..' not allowed in path"
)

if pathlib.Path(path).is_absolute():
raise HTTPException(
HTTP_422_UNPROCESSABLE_ENTITY, 'path must be relative'
)

if unzip:
...

else:
await _add_resource(
archive,
provider,
package,
path,
file,
filename,
mimetype,
sha256,
title,
description,
archive_adapter,
HTTP_422_UNPROCESSABLE_ENTITY, "'..' not allowed in folder"
)


async def _add_resource(
archive: Archive,
provider: Provider,
package: Package,
path: str,
file: UploadFile,
filename: str,
mimetype: str,
sha256: str,
title: str | None,
description: str | None,
archive_adapter: ArchiveAdapter,
):
resource = Resource(
title=title,
description=description,
filename=filename,
mimetype=mimetype,
size=file.size,
hash=sha256,
hash_algorithm=HashAlgorithm.sha256,
timestamp=(timestamp := datetime.now(timezone.utc)),
provider_id=provider.id,
)
resource.save()

try:
archive_resource = ArchiveResource(
archive_id=archive.id,
resource_id=resource.id,
path=(archive_path := f'{provider.key}/{package.key}/{path}'),
timestamp=timestamp,
)
archive_resource.save()
except IntegrityError:
if pathlib.Path(folder).is_absolute():
raise HTTPException(
HTTP_422_UNPROCESSABLE_ENTITY, f"Path '{archive_path}' already exists in archive"
HTTP_422_UNPROCESSABLE_ENTITY, 'folder must be relative'
)

try:
package_resource = PackageResource(
package_id=package.id,
resource_id=resource.id,
path=path,
timestamp=timestamp,
)
package_resource.save()
except IntegrityError:
if not (filename := secure_filename(filename)):
raise HTTPException(
HTTP_422_UNPROCESSABLE_ENTITY, f"Path '{path}' already exists in package"
HTTP_422_UNPROCESSABLE_ENTITY, 'invalid filename'
)

archive_folder = f'{provider.key}/{package.key}/{folder}'
try:
await archive_adapter.put(
archive_path, file, sha256
file_info_list = await archive_adapter.put(
archive_folder, filename, file, sha256, unpack
)
except NotImplementedError:
raise HTTPException(
HTTP_405_METHOD_NOT_ALLOWED, f'Operation not supported for {archive.id}'
)

for file_info in file_info_list:
res_mimetype, encoding = mimetypes.guess_type(file_info.relpath, strict=False)
package_path = file_info.relpath.removeprefix(f'{provider.key}/{package.key}/')
archive_path = file_info.relpath
res_filename = pathlib.Path(file_info.relpath).name
res_size = file_info.size
res_hash = file_info.sha256
res_title = title
res_description = description

resource = Resource(
title=res_title,
description=res_description,
filename=res_filename,
mimetype=res_mimetype,
size=res_size,
hash=res_hash,
hash_algorithm=HashAlgorithm.sha256,
timestamp=(timestamp := datetime.now(timezone.utc)),
provider_id=provider.id,
)
resource.save()

try:
archive_resource = ArchiveResource(
archive_id=archive.id,
resource_id=resource.id,
path=archive_path,
timestamp=timestamp,
)
archive_resource.save()
except IntegrityError:
raise HTTPException(
HTTP_422_UNPROCESSABLE_ENTITY, f"Path '{archive_path}' already exists in archive"
)

try:
package_resource = PackageResource(
package_id=package.id,
resource_id=resource.id,
path=package_path,
timestamp=timestamp,
)
package_resource.save()
except IntegrityError:
raise HTTPException(
HTTP_422_UNPROCESSABLE_ENTITY, f"Path '{package_path}' already exists in package"
)

0 comments on commit f6d1467

Please sign in to comment.