Skip to content

Commit

Permalink
Merge pull request #30 from simonsobs/JBorrow/add-search-endpoints
Browse files Browse the repository at this point in the history
Add Search Endpoints
  • Loading branch information
JBorrow authored Jan 24, 2024
2 parents b67bcdb + 99e71b7 commit 99e551b
Show file tree
Hide file tree
Showing 10 changed files with 349 additions and 2,053 deletions.
93 changes: 93 additions & 0 deletions hera_librarian/models/search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
"""
Pydantic models for the search endpoint.
"""

from datetime import datetime
from pathlib import Path
from typing import Optional

from pydantic import BaseModel, Field, RootModel

from hera_librarian.deletion import DeletionPolicy


class FileSearchRequest(BaseModel):
"""
Represents a file search request.
"""

name: Optional[str] = None
"The name of the file to search for."
create_time_window: Optional[tuple[datetime, ...]] = Field(
default=None, min_length=2, max_length=2
)
"The time window to search for files in. This is a tuple of two datetimes, the first being the start and the second being the end. Note that the datetimes should be in UTC."
uploader: Optional[str] = None
"The uploader to search for."
source: Optional[str] = None
"The source to search for."
max_results: int = 64
"The maximum number of results to return."


class InstanceSearchResponse(BaseModel):
"""
Represents an instance in the file search response.
"""

path: Path
"The path of the instance."
deletion_policy: DeletionPolicy
"The deletion policy of the instance."
created_time: datetime
"The time the instance was created."
available: bool
"Whether or not the instance is available."


class RemoteInstanceSearchResponse(BaseModel):
"""
Represents a remote instance in the file search response.
"""

librarian_name: str
"The name of the librarian that this instance lives on."
copy_time: datetime
"The time at which this instance was copied to the remote librarian."


class FileSearchResponse(BaseModel):
"""
Represents a file search response.
"""

name: str
"The name of the file."
create_time: datetime
"The time the file was created."
size: int
"The size of the file in bytes."
checksum: str
"The checksum of the file."
uploader: str
"The uploader of the file."
source: str
"The source of the file."
instances: list[InstanceSearchResponse]
"The instances of the file."
remote_instances: list[RemoteInstanceSearchResponse]
"The remote instances of the file."


FileSearchResponses = RootModel[list[FileSearchResponse]]


class FileSearchFailedResponse(BaseModel):
"""
Represents a file search failure response.
"""

reason: str
"The reason why the search failed."
suggested_remedy: str
"A suggested remedy for the failure."
12 changes: 0 additions & 12 deletions hera_librarian/models/stores.py

This file was deleted.

3 changes: 2 additions & 1 deletion librarian_server/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@ def main() -> FastAPI:

log.debug("Adding API router.")

from .api import upload_router, ping_router, clone_router
from .api import upload_router, ping_router, clone_router, search_router

app.include_router(upload_router)
app.include_router(ping_router)
app.include_router(clone_router)
app.include_router(search_router)

return app
3 changes: 2 additions & 1 deletion librarian_server/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@

from .upload import router as upload_router
from .ping import router as ping_router
from .clone import router as clone_router
from .clone import router as clone_router
from .search import router as search_router
103 changes: 103 additions & 0 deletions librarian_server/api/search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
"""
Contains endpoints for searching the files uploaded to the librarian.
"""

from fastapi import APIRouter, Depends, Response, status
from sqlalchemy import select
from sqlalchemy.orm import Session

from hera_librarian.models.search import (FileSearchFailedResponse,
FileSearchRequest,
FileSearchResponse,
FileSearchResponses,
InstanceSearchResponse,
RemoteInstanceSearchResponse)

from ..database import yield_session
from ..logger import log
from ..orm.file import File
from ..orm.instance import Instance, RemoteInstance
from ..settings import server_settings

router = APIRouter(prefix="/api/v2/search")


@router.post("/file", response_model=FileSearchResponses | FileSearchFailedResponse)
def file(
request: FileSearchRequest,
response: Response,
session: Session = Depends(yield_session),
):
"""
Searches for files in the librarian.
Possible response codes:
200 - OK. Search completed successfully.
404 - No file found to match search criteria.
"""

log.debug(f"Received file search request: {request}")

# Start to build our query.
query = select(File)

if request.name is not None:
query = query.where(File.name == request.name)

if request.create_time_window is not None:
query = query.where(File.create_time >= request.create_time_window[0])
query = query.where(File.create_time <= request.create_time_window[1])

if request.uploader is not None:
query = query.where(File.uploader == request.uploader)

if request.source is not None:
query = query.where(File.source == request.source)

query.order_by(File.create_time)
query.limit(max(min(request.max_results, server_settings.max_search_results), 0))

# Execute the query.
results = session.execute(query).scalars().all()

if len(results) == 0:
log.debug(f"No files found. Returning error.")
response.status_code = status.HTTP_404_NOT_FOUND
return FileSearchFailedResponse(
reason="No files found.",
suggested_remedy="Check that you are searching for the correct file.",
)

# Build the response.
respond_files = []

for result in results:
respond_files.append(
FileSearchResponse(
name=result.name,
create_time=result.create_time,
size=result.size,
checksum=result.checksum,
uploader=result.uploader,
source=result.source,
instances=[
InstanceSearchResponse(
path=instance.path,
deletion_policy=instance.deletion_policy,
created_time=instance.created_time,
available=instance.available,
)
for instance in result.instances
],
remote_instances=[
RemoteInstanceSearchResponse(
librarian_name=remote_instance.librarian_name,
copy_time=remote_instance.copy_time,
)
for remote_instance in result.remote_instances
],
)
)

return FileSearchResponses(respond_files)
Loading

0 comments on commit 99e551b

Please sign in to comment.