Skip to content

Commit

Permalink
reorganize
Browse files Browse the repository at this point in the history
  • Loading branch information
leej3 committed Jul 29, 2024
1 parent 50b4681 commit 73f5834
Show file tree
Hide file tree
Showing 28 changed files with 727 additions and 93 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ build/
venv/
.vscode/settings.json
.DS_Store
osm_output
5 changes: 4 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,13 @@ repos:

- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: "v0.4.9"
rev: v0.5.0
hooks:
# Run the linter.
- id: ruff
args: ["--fix"]
# Run the formatter.
- id: ruff-format

- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
Expand Down
23 changes: 10 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,22 +1,19 @@
# OpenSciMetrics

OpenSciMetrics (OSM) applies NLP and LLM-based metrics and indicators related to transparency, data sharing, rigor, and open science on biomedical publications.

# How to setup and run the application
- After cloning the repo, navigate into the project's root directory by running `cd osm`
- Run `python -m venv venv` to create a Virtual Environment
- Depending on your system, run the approriate command to Activate the Virtual Environment
Windows: `venv\Scripts\activate`<br>
macOS and Linux: `source venv/bin/activate`
# Running the app

N.B. pdf parsing does not work on Apple silicon...

- Next, run `pip install -e .` to install the package with its dependencies.
- Finally, run `osm pdf-xml "path_to_file_name.pdf" file_id`
- With docker-compose and python >3.11 installed, runng the following from the project's root directory:

# How to run tests of the application
Run `tox`
# How to run the unit tests
- Navigate to the project's root directory and run `pytest`
```
pip install .
osm -f path/to/pdf-or-xml -u uuid
```

# Using pre-commit for commit checks
## Using pre-commit for commit checks

Pre-commit will run all of its hooks on every commit you make. To install
pre-commit and its hooks, run the following commands:
Expand Down
32 changes: 32 additions & 0 deletions compose.override.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
services:
rtransparent:
build:
context: .
dockerfile: docker_images/rtransparent/Dockerfile
volumes:
- ./docker_images/rtransparent:/app

osm_web_api:
environment:
- MONGODB_URI=mongodb://db:27017/test
# - MONGODB_URI=mongodb://mongoadmin:secret@db:27017/osm
build:
context: .
dockerfile: ./docker_images/web_api/Dockerfile
ports:
- 80:80
volumes:
- ./docker_images/web_api:/app/app
working_dir: /app/app
command: ["fastapi","dev","--host","0.0.0.0","--port","80"]
depends_on:
- db

db:
image: mongo:4.4.6
ports:
- 27017:27017
environment:
- MONGO_INITDB_DATABASE=test
# - MONGO_INITDB_ROOT_USERNAME=mongoadmin
# - MONGO_INITDB_ROOT_PASSWORD=secret
14 changes: 14 additions & 0 deletions compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
services:
sciencebeam:
image: elifesciences/sciencebeam-parser
ports:
- "8070:8070"
rtransparent:
image: nimh/rtransparent
ports:
- "8071:8071"
healthcheck:
test: ["CMD", "curl", "--include", "--request", "GET", "http://localhost:8071/health"]
interval: 1s
timeout: 3s
retries: 3
4 changes: 4 additions & 0 deletions docker_images/_entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash
source /opt/conda/etc/profile.d/conda.sh
conda activate osm
exec "$@"
35 changes: 35 additions & 0 deletions docker_images/rtransparent/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
FROM condaforge/mambaforge:24.3.0-0
SHELL ["/bin/bash", "--login", "-c"]
# Set working directory
WORKDIR /app

# Install debugging tools
RUN apt-get update && apt-get install -y \
git \
curl \
iputils-ping \
net-tools \
&& rm -rf /var/lib/apt/lists/*

COPY docker_images/rtransparent/environment.yaml /app

# Create the environment
RUN conda env create -f environment.yaml

# Ensure the conda environment is activated
RUN echo "source /opt/conda/etc/profile.d/conda.sh && conda activate osm" | tee -a ~/.bashrc /etc/profile /etc/profile.d/conda.sh /etc/skel/.bashrc /etc/skel/.profile > /dev/null

RUN R -e '\
install.packages("roadoi", repos = "http://cran.us.r-project.org"); \
devtools::install_github("quest-bih/oddpub"); \
devtools::install_github("cran/crminer"); \
devtools::install_github("serghiou/metareadr"); \
devtools::install_github("serghiou/rtransparent", build_vignettes = F)'

# # Copy the project files and install the package
COPY docker_images/rtransparent/app.py /app

# Make entrypoint etc. convenient for users
COPY docker_images/_entrypoint.sh /usr/local/bin/_entrypoint.sh
ENTRYPOINT ["/usr/local/bin/_entrypoint.sh"]
CMD ["fastapi", "dev", "--host", "0.0.0.0", "--port", "8071"]
80 changes: 80 additions & 0 deletions docker_images/rtransparent/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import logging
import tempfile
from pathlib import Path

import psutil
import rpy2.robjects as ro
from fastapi import FastAPI, HTTPException, Request, status
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from rpy2.robjects import pandas2ri
from rpy2.robjects.packages import importr

ro.r(f'Sys.setenv(VROOM_CONNECTION_SIZE = "{2**20}")')

logger = logging.getLogger(__name__)
app = FastAPI()


class HealthCheck(BaseModel):
"""Response model to validate and return when performing a health check."""

status: str = "OK"


@app.get(
"/health",
tags=["healthcheck"],
summary="Perform a Health Check",
response_description="Return HTTP Status Code 200 (OK)",
status_code=status.HTTP_200_OK,
response_model=HealthCheck,
)
def get_health() -> HealthCheck:
"""
## Perform a Health Check
Endpoint to perform a healthcheck on. This endpoint can primarily be used Docker
to ensure a robust container orchestration and management is in place. Other
services which rely on proper functioning of the API service will not deploy if this
endpoint returns any other HTTP status code except 200 (OK).
Returns:
HealthCheck: Returns a JSON response with the health status
"""
return HealthCheck(status="OK")


def rtransparent_metric_extraction(
xml_content: bytes, workers: int = psutil.cpu_count()
):
rtransparent = importr("rtransparent")
future = importr("future")
future.plan(future.multisession, workers=workers)

# Write the XML content to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".xml") as temp_xml_file:
temp_xml_file.write(xml_content)
temp_xml_file_path = temp_xml_file.name

with (ro.default_converter + pandas2ri.converter).context():
df = ro.conversion.get_conversion().rpy2py(
rtransparent.rt_all(temp_xml_file_path)
)

# Clean up the temporary file
temp_xml_file.close()
Path(temp_xml_file_path).unlink()

return df


# from osm.schemas import Invocation
@app.post("/extract-metrics")
async def extract_metrics(request: Request):
try:
xml_content = await request.body()
metrics_df = rtransparent_metric_extraction(xml_content)
logger.info(metrics_df)
metrics_json = metrics_df.to_json(orient="records")
return JSONResponse(content=metrics_json, status_code=200)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
47 changes: 47 additions & 0 deletions docker_images/rtransparent/environment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: osm
channels:
- conda-forge
- nodefaults
dependencies:
- fastapi
- lxml
- pandas
- pip
- psutil
- python
- requests
- rpy2
- uvicorn
# Dependencies for rtransparent
- r-crul
- r-devtools
- r-dplyr
- r-furrr
- r-future
- r-globals
- r-hoardr
- r-httpcode
- r-lazyeval
- r-lubridate
- r-magrittr
- r-pbapply
- r-pdftools
- r-plyr
- r-purrr
- r-qpdf
- r-readr
# - r-rentrez
- r-rlang
- r-stringr
- r-tibble
- r-tidyr
- r-tidyselect
- r-timechange
- r-tokenizers
- r-triebeard
- r-urltools
- r-utf8
# - r-XML
- r-xml2
- pip:
- metapub
15 changes: 15 additions & 0 deletions docker_images/web_api/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
FROM tiangolo/uvicorn-gunicorn:python3.11

WORKDIR /app

COPY ./docker_images/web_api/requirements.txt /app/app/requirements.txt
RUN pip install --no-cache-dir --upgrade -r /app/app/requirements.txt

# Consider installing from pypi
RUN mkdir -p /opt/osm
COPY pyproject.toml /opt/osm
COPY osm /opt/osm/osm
COPY .git /opt/osm/.git
RUN pip install /opt/osm

COPY ./docker_images/web_api/main.py /app/app/main.py
File renamed without changes.
20 changes: 20 additions & 0 deletions docker_images/web_api/compose.override.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
services:
osm_web_api:
environment:
- MONGODB_URI=mongodb://db:27017/test
# - MONGODB_URI=mongodb://mongoadmin:secret@db:27017/osm
volumes:
- ./:/app/app
working_dir: /app/app
command: ["fastapi","dev","--host","0.0.0.0","--port","80"]
depends_on:
- db

db:
image: mongo:4.4.6
ports:
- 27017:27017
environment:
- MONGO_INITDB_DATABASE=test
# - MONGO_INITDB_ROOT_USERNAME=mongoadmin
# - MONGO_INITDB_ROOT_PASSWORD=secret
10 changes: 10 additions & 0 deletions docker_images/web_api/compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
services:
osm_web_api:
image: osm_web_api
environment:
- MONGODB_URI="mongodb+srv://johnlee:<password>@cluster0.6xo8ws7.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
build:
context: ../..
dockerfile: ./docker_images/web_api/Dockerfile
ports:
- 80:80
55 changes: 55 additions & 0 deletions docker_images/web_api/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"""
Sets up a web API for uploading osm metrics to a centralized database
Usage along the lines of:
curl -X POST "http://localhost:80/upload/" \
-H "Content-Type: application/json" \
-d '{
"osm_version": "1.0",
"timestamp": "2024-07-24T12:00:00Z",
"user_comment": "example comment",
"work": {
"user_defined_id": "123",
"pmid": "pmid_example",
"file": "example_file_content_base64_encoded",
"content_hash": "example_hash",
"timestamp": "2024-07-24T12:00:00Z"
}
}'
"""

import os

import motor.motor_asyncio
from fastapi import FastAPI
from odmantic import AIOEngine

from osm.schemas import Invocation

app = FastAPI()

client = motor.motor_asyncio.AsyncIOMotorClient(
os.environ.get(
"MONGODB_URI",
"mongodb+srv://johnlee:<password>@cluster0.6xo8ws7.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0",
)
)
engine = AIOEngine(client=client, database="test")


@app.put("/upload/", response_model=Invocation)
async def upload_invocation(invocation: Invocation):
await engine.save(invocation)
return invocation


if __name__ == "__main__":
import asyncio

import uvicorn

loop = asyncio.get_event_loop()
config = uvicorn.Config(app=app, host="0.0.0.0", port=8000, loop=loop)
server = uvicorn.Server(config)
loop.run_until_complete(server.serve())
2 changes: 2 additions & 0 deletions docker_images/web_api/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
fastapi
odmantic
Loading

0 comments on commit 73f5834

Please sign in to comment.