Skip to content

Commit

Permalink
Merge pull request #14 from FAIR-CA-indicators/load-omex-archive
Browse files Browse the repository at this point in the history
Allow for automated parsing of uploaded omex archive
  • Loading branch information
Fancien authored Sep 26, 2023
2 parents 61758d7 + df9ffba commit d801e81
Show file tree
Hide file tree
Showing 51 changed files with 757,649 additions and 117 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,10 @@ jobs:
black --check --verbose -- .
- name: Test with pytest
run: |
cp app/dependencies/settings.py.template app/dependencies/settings.py
pytest
env:
REDIS_URL: localhost
REDIS_PORT: 6379
FAIR_COMBINE_ENV: test
CELERY_SECRET_KEY: ${{ secrets.CELERY_SECRET_KEY }}
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,7 @@
/redis_data/.testfile
/redis_data/dump.rdb
/app/test_main.http
/.coverage
/.coverage
/app/dependencies/settings.py
/Dockerfile
/.env
24 changes: 24 additions & 0 deletions Dockerfile.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# syntax=docker/dockerfile:1

FROM python:3.9 as main
LABEL authors="francois.ancien"


USER root
WORKDIR /faircombine
COPY requirements.txt /faircombine/requirements.txt
RUN python3 -m pip install --no-cache-dir --upgrade -r /faircombine/requirements.txt

COPY ./app /faircombine/app
COPY ./app/dependencies/settings.py.template /faircombine/app/dependencies/settings.py
COPY ./session_files /faicombine/session_files

ENV REDIS_URL="faircombine-redis"
ENV REDIS_PORT=6379

CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "80", "--reload"]

FROM main as celery
ENV BACKEND_URL="faircombine-backend"
ENV BACKEND_PORT=80
CMD ["celery", "-A", "app.celery.celery_app", "worker", "-l", "INFO"]
19 changes: 16 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,23 @@ Requirements: python3.9, [redis](https://redis.io/)
```bash
python -m pip install -r requirements.txt
```
2. In another terminal, run Redis
2. Copy the file `app/dependencies/settings.py.template` in `app/dependencies/settings.py`
and modify the value of `CELERY_SECRET_KEY` inside (or set a value in your environment).

3. In another terminal, run Redis
```bash
redis-stack-server
```
3. Run the local server:
```bash
uvicorn app.main:app --reload
```
4. (Optional) If you want the automated assessments to work, you need a celery worker running.
The option `-l INFO` can be added at the end of the line to increase the log level to INFO
```bash
celery -A app.celery.celery_app worker
```


The documentation is available at `http://localhost:8000/docs` (in SwaggerUI format) and at `http://localhost:8000/redoc` (in ReDoc format)

Expand All @@ -24,15 +33,19 @@ Main page (`http://localhost:8000`) redirects towards the documentation in ReDoc
## Docker installation
Requirements: Docker needs to be installed

1. Run docker-compose
1. Copy the `DockerFile.template` file and paste it as `DockerFile`
2. Add the environment variable `CELERY_SECRET_KEY` just below the declaration of `REDIS_PORT` and give it a value.
3. Run docker-compose
```bash
docker-compose up -d
docker-compose up --build
```

Endpoints are accessible at `http://localhost:8000`.

If you have redis-cli or RedisInsight installed, the redis endpoint can be accessed at `http://localhost:6379`

This docker container also includes an image for the celery worker.

# Testing

First install test requirements.
Expand Down
5 changes: 3 additions & 2 deletions Dockerfile → app/celery/DockerFile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# syntax=docker/dockerfile:1

FROM python:3.9
FROM python:3.9 as main
LABEL authors="francois.ancien"


Expand All @@ -9,10 +9,11 @@ WORKDIR /faircombine
COPY requirements.txt /faircombine/requirements.txt
RUN python3 -m pip install --no-cache-dir --upgrade -r /faircombine/requirements.txt

COPY ./app /faircombine/app
COPY ../app /faircombine/app

ENV REDIS_URL="faircombine-redis"
ENV REDIS_PORT=6379

CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "80", "--reload"]

FROM main as celery
Empty file added app/celery/__init__.py
Empty file.
7 changes: 7 additions & 0 deletions app/celery/automated_tasks/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from .f1_model_persistent_identifier_task import f1_model_persistent_identifier
from .f4_model_metadata_harvestable_task import f4_model_metadata_harvestable

__all__ = [
f1_model_persistent_identifier,
f4_model_metadata_harvestable,
]
91 changes: 91 additions & 0 deletions app/celery/automated_tasks/f1_model_persistent_identifier_task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import requests

from typing import Optional

from app.celery.celery_app import app
from app.dependencies.settings import get_settings
from urllib.parse import urlparse
from ... import models


def check_alt_ids(metadata: dict) -> bool:
accepted_persistents = [
"doi.org",
"purl.org",
"purl.oclc.org",
"purl.net",
"purl.com",
"identifiers.org",
"w3id.org",
]

for resource_id in metadata["alt_ids"]:
resource_url = urlparse(resource_id)
if resource_url.netloc in accepted_persistents:
return True

return False


@app.task
def f1_model_persistent_identifier(
task_dict: dict, data: dict, test: bool = False
) -> Optional["models.TaskStatus"]:
"""
Representation of celery task to evaluate an assessment.
These celery tasks should be in the format:
```
def assessment_task(task_dict: dict, data: dict) -> None:
session_id = task_dict["session_id"]
task_id = task_dict["id"]
# Code to get the final TaskStatus
...
status = models.TaskStatusIn(status=models.TaskStatus(result), force_update=config.celery_key)
requests.patch(
f"http://localhost:8000/session/{session_id}/tasks/{task_id},
json=status
)
:param task_dict: Task dict representation
:param data: (Meta)Data to evaluate
:return: None
"""
config = get_settings()
print("Execution successfully called")
session_id = task_dict["session_id"]
task_id = task_dict["id"]
try:
if data["main_model_metadata"] and check_alt_ids(data["main_model_metadata"]):
print("Found persistent identifiers in model metadata")
result = "success"
else:
print("No persistent identifier was found for model")
result = "failed"
except Exception as e:
print(f"An error occurred while assessing task: {str(e)}")
result = "error"

print(config.celery_key)
status = models.TaskStatusIn(
status=models.TaskStatus(result), force_update=config.celery_key
)

print(f"Task status computed: {result}")
# Needs to send a request for the task to be updated
if test:
return models.TaskStatus(result)
else:
url = f"http://{config.backend_url}:{config.backend_port}/session/{session_id}/tasks/{task_id}"
print(f"Patching {url}")
requests.patch(
url,
json=status.dict(),
)

# Does not work because celery does not have access to fair_indicators
# routers.update_task(session_id, task_id, status)

# Works, but does not trigger updating of children
# redis_app.json().set(f"session:{session_id}", f".tasks.{task_id}.status", obj=result)
71 changes: 71 additions & 0 deletions app/celery/automated_tasks/f4_model_metadata_harvestable_task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import requests

from typing import Optional

from app.celery.celery_app import app
from app.dependencies.settings import get_settings
from ... import models


def dict_non_empty(metadata: dict):
return any([bool(x) for x in metadata.values()])


@app.task
def f4_model_metadata_harvestable(
task_dict: dict, data: dict, test: bool = False
) -> Optional["models.TaskStatus"]:
"""
Representation of celery task to evaluate an assessment.
These celery tasks should be in the format:
```
def assessment_task(task_dict: dict, data: dict) -> None:
session_id = task_dict["session_id"]
task_id = task_dict["id"]
# Code to get the final TaskStatus
...
status = models.TaskStatusIn(status=models.TaskStatus(result), force_update=config.celery_key)
requests.patch(
f"http://localhost:8000/session/{session_id}/tasks/{task_id},
json=status
)
:param task_dict: Task dict representation
:param data: (Meta)Data to evaluate
:return: None
"""
config = get_settings()
print("Execution successfully called")
session_id = task_dict["session_id"]
task_id = task_dict["id"]

if dict_non_empty(data["main_model_metadata"]):
print(f"Found metadata: {data['main_model_metadata']}")
result = "success"
else:
print("No metadata found")
result = "failed"

status = models.TaskStatusIn(
status=models.TaskStatus(result), force_update=config.celery_key
)

print(f"Task status computed: {result}")
if test:
return models.TaskStatus(result)
else:
# Needs to send a request for the task to be updated
url = f"http://{config.backend_url}:{config.backend_port}/session/{session_id}/tasks/{task_id}"
print(f"Patching {url}")
requests.patch(
url,
json=status.dict(),
)

# Does not work because celery does not have access to fair_indicators
# routers.update_task(session_id, task_id, status)

# Works, but does not trigger updating of children
# redis_app.json().set(f"session:{session_id}", f".tasks.{task_id}.status", obj=result)
7 changes: 7 additions & 0 deletions app/celery/celery_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from celery import Celery
from app.dependencies.settings import get_settings

config = get_settings()
# FIXME Need to load broker from settings
app = Celery("fair-combine", broker=config.celery_broker)
app.autodiscover_tasks(["app.celery.automated_tasks"])
33 changes: 33 additions & 0 deletions app/decorators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import inspect
from typing import Type

from fastapi import Form
from pydantic import BaseModel
from pydantic.fields import ModelField


def as_form(cls: Type[BaseModel]):
new_parameters = []

for field_name, model_field in cls.__fields__.items():
model_field: ModelField

new_parameters.append(
inspect.Parameter(
model_field.alias,
inspect.Parameter.POSITIONAL_ONLY,
default=Form(...)
if model_field.required
else Form(model_field.default),
annotation=model_field.outer_type_,
)
)

def as_form_func(**params):
return cls(**params)

sig = inspect.signature(as_form_func)
sig = sig.replace(parameters=new_parameters)
as_form_func.__signature__ = sig
setattr(cls, "as_form", as_form_func)
return cls
Original file line number Diff line number Diff line change
@@ -1,13 +1,24 @@
import os

from functools import lru_cache
from pydantic import BaseSettings
from typing import List


class Config(BaseSettings):
app_name: str = "FAIR Combine API"
backend_url = os.environ.get("BACKEND_URL", "localhost")
backend_port = os.environ.get("BACKEND_PORT", 8000)
redis_url = os.environ.get("REDIS_URL", "localhost")
redis_port = os.environ.get("REDIS_PORT", 6379)
redis_db_number: int = 0
indicators_path = "app/metrics/metrics.csv"
celery_broker = os.environ.get(
"CELERY_BROKER_URL", f"redis://{redis_url}:{redis_port}/{redis_db_number}"
)
celery_key = os.environ.get("CELERY_SECRET_KEY") # KEEP HIDDEN
if not celery_key:
raise ValueError("A secret key is necessary for celery to interact with the application")

allowed_origins: List[str] = []
# List of indicators that applied to archive (if no archive, their statuses will be set to 'failed')
Expand Down Expand Up @@ -98,6 +109,11 @@ class Config(BaseSettings):
"CA-RDA-R1.1-03MA": {"condition": "or", "indicators": ["CA-RDA-R1.1-01MA"]},
}

automated_assessments: dict[str, str] = {
"CA-RDA-F1-01Model": "f1_model_persistent_identifier",
"CA-RDA-F4-01MM": "f4_model_metadata_harvestable",
}


class DevConfig(Config):
allowed_origins: List[str] = ["*"]
Expand Down
Loading

0 comments on commit d801e81

Please sign in to comment.