Skip to content

Commit

Permalink
Tidy up
Browse files Browse the repository at this point in the history
  • Loading branch information
leej3 committed Jul 10, 2024
1 parent 03fede2 commit 1f06cef
Show file tree
Hide file tree
Showing 28 changed files with 517 additions and 141 deletions.
4 changes: 4 additions & 0 deletions .docker/_activate_current_env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash
source /opt/conda/etc/profile.d/conda.sh
conda activate osm
exec "$@"
4 changes: 4 additions & 0 deletions .docker/_apptainer_shell.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash
source ${MAMBA_ROOT_PREFIX}/etc/profile.d/conda.sh
conda activate ${ENV_NAME}
exec "\$@"
4 changes: 4 additions & 0 deletions .docker/_dockerfile_initialize_user_accounts.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash
set -e
addgroup --gid ${MAMBA_USER_GID} ${MAMBA_USER}
adduser --uid ${MAMBA_USER_ID} --gid ${MAMBA_USER_GID} --disabled-password --gecos "" ${MAMBA_USER}
4 changes: 4 additions & 0 deletions .docker/_dockerfile_setup_root_prefix.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash
set -e
mkdir -p ${MAMBA_ROOT_PREFIX}
chown -R ${MAMBA_USER}:${MAMBA_USER} ${MAMBA_ROOT_PREFIX}
4 changes: 4 additions & 0 deletions .docker/_dockerfile_shell.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash
eval $(/opt/conda/bin/conda shell.bash hook)
conda activate osm
exec "$@"
4 changes: 4 additions & 0 deletions .docker/_entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash
source /opt/conda/etc/profile.d/conda.sh
conda activate osm
exec "$@"
72 changes: 29 additions & 43 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,47 +6,33 @@ jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
- uses: pre-commit/[email protected]
tox-tests:
runs-on: ubuntu-latest

strategy:
matrix:
python-version: [3.9]

steps:

- name: Checkout repository
uses: actions/checkout@v2

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}

- name: Cache pip
uses: actions/cache@v2
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e .[dev]
- name: Start ScienceBeam Docker container
run: |
docker run -d --rm -p 8070 elifesciences/sciencebeam-parser
- name: Run tests
run: |
tox
- name: Test packaging
run: |
tox -e .package
pytest-suite:
runs-on: ubuntu-latest
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Set up docker caching
uses: actions/cache@v4
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-docker-${{ hashFiles('Dockerfile', '**/compose*yaml', '.docker/*', 'pyproject.toml', 'environment.yaml') }}
restore-keys: |
${{ runner.os }}-docker-
- name: Build the stack
run: |
docker buildx build --cache-from=type=local,src=/tmp/.buildx-cache --cache-to=type=local,dest=/tmp/.buildx-cache,mode=max --build-arg BUILDKIT_INLINE_CACHE=1 .
docker-compose up -d
env:
DOCKER_BUILDKIT: 1
- name: Wait for services to be ready
run: sleep 10 # Adjust time as needed for your services
- name: Run pytest
run: docker-compose run --entrypoint '/bin/bash -lc' app pytest tests
- name: Tear down the stack
if: always()
run: docker-compose down
5 changes: 4 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,13 @@ repos:

- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: "v0.4.9"
rev: v0.5.0
hooks:
# Run the linter.
- id: ruff
args: ["--fix"]
# Run the formatter.
- id: ruff-format

- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
Expand Down
42 changes: 42 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
FROM condaforge/mambaforge:24.3.0-0
SHELL ["/bin/bash", "--login", "-c"]
# Set working directory
WORKDIR /app

# Install debugging tools
RUN apt-get update && apt-get install -y \
git \
curl \
iputils-ping \
net-tools \
&& rm -rf /var/lib/apt/lists/*

# Copy project files for installation
ENV ENV_NAME=osm
COPY environment.yaml /app

# Create the environment
RUN conda env create -f environment.yaml

# Ensure the conda environment is activated
RUN echo "source /opt/conda/etc/profile.d/conda.sh && conda activate osm" | tee -a ~/.bashrc /etc/profile /etc/profile.d/conda.sh /etc/skel/.bashrc /etc/skel/.profile > /dev/null

RUN R -e '\
install.packages("roadoi", repos = "http://cran.us.r-project.org"); \
devtools::install_github("quest-bih/oddpub"); \
devtools::install_github("cran/crminer"); \
devtools::install_github("serghiou/metareadr"); \
devtools::install_github("serghiou/rtransparent")'

# Copy the project files and install the package
COPY pyproject.toml /app
COPY osm /app/osm
COPY .git /app/.git

# Install the package in editable mode
RUN pip install -e .

# Make entrypoint etc. convenient for users
COPY .docker/_entrypoint.sh /usr/local/bin/_entrypoint.sh
ENTRYPOINT ["/usr/local/bin/_entrypoint.sh", "--", "osm"]
CMD ["--help"]
68 changes: 56 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,22 +1,66 @@
# OpenSciMetrics

OpenSciMetrics (OSM) applies NLP and LLM-based metrics and indicators related to transparency, data sharing, rigor, and open science on biomedical publications.

# How to setup and run the application
- After cloning the repo, navigate into the project's root directory by running `cd osm`
- Run `python -m venv venv` to create a Virtual Environment
- Depending on your system, run the approriate command to Activate the Virtual Environment
Windows: `venv\Scripts\activate`<br>
macOS and Linux: `source venv/bin/activate`
# Running the app

- From the project's root directory run:

```
docker-compose -f compose.yaml run \
--rm \
-v $PWD:/mnt \
app \
rtransparent \
/mnt/docs/examples/pdf_inputs/test_sample.pdf \
output.xml
```


# Development

You can use docker-compose for development with the local source code mounted into the app container (this uses compose.override.yaml):

```
docker-compose up
```

If you are changing dependencies or the docker files you will want to rebuild the images:

```
docker-compose up --build
```

Troubleshooting issues with the osm application can be done with:

```
docker-compose run --entrypoint bash app
```

You can also set up this package for local development (on Linux x86 architecture only) by cloning the repo, navigate into the project's root directory, and follow the steps in the [Dockerfile](./Dockerfile) to:
- create a conda environment
- install R dependencies only available from github/CRAN
- install the package in editable mode

Open a terminal tab and run the image `docker run --rm -p 8070 elifesciences/sciencebeam-parser` and keep it running (or use docker-compose as described above).

**Note:** The ScienceBeam image is not supported on apple silicon chips with emulation (using --platform=linux/amd64). The conda environment also doesn't solve at the moment due to various missing packages on this architecture.


## To test that the sciencebeam server is working:

This will work if you have exposed the port 8070 from the sciencebeam container (done for development in the docker override file):
```
curl --fail --show-error --form "file=@docs/examples/pdf_inputs/test_sample.pdf;filename=test_sample.pdf" --silent "http://localhost:8070/api/pdfalto"
```

You can alter the above command to work from other containers if you use app_network with the container and target the host "sciencebeam".

- Next, run `pip install -e .` to install the package with its dependencies.
- Finally, run `osm pdf-xml "path_to_file_name.pdf" file_id`
## How to run the tests

# How to run tests of the application
Run `tox`
# How to run the unit tests
- Navigate to the project's root directory and run `pytest`

# Using pre-commit for commit checks
## Using pre-commit for commit checks

Pre-commit will run all of its hooks on every commit you make. To install
pre-commit and its hooks, run the following commands:
Expand Down
9 changes: 9 additions & 0 deletions compose.override.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
services:
sciencebeam:
ports:
# Forward the port 8070 on the host to the port 8070 on the container for
# convenience during development
- "8070:8070"
app:
volumes:
- .:/app
18 changes: 18 additions & 0 deletions compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
services:
sciencebeam:
image: elifesciences/sciencebeam-parser
networks:
- app_network

app:
build:
context: .
dockerfile: Dockerfile
depends_on:
- sciencebeam
networks:
- app_network

networks:
app_network:
driver: bridge
54 changes: 54 additions & 0 deletions environment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
name: osm
channels:
- conda-forge
- nodefaults
dependencies:
- python=3.11
- click
- python-dotenv
- ipython
- lxml
- pandas
- pip
- pkginfo
- pre-commit
- pytest
- pytest-cov
- pytest-mock
- requests
- rich
- rpy2
- ruff=0.5.0
# Dependencies for rtransparent
- r-crul
- r-devtools
- r-dplyr
- r-furrr
- r-future
- r-globals
- r-hoardr
- r-httpcode
- r-lazyeval
- r-lubridate
- r-magrittr
- r-pbapply
- r-pdftools
- r-plyr
- r-purrr
- r-qpdf
- r-readr
- r-rentrez
- r-rlang
- r-stringr
- r-tibble
- r-tidyr
- r-tidyselect
- r-timechange
- r-tokenizers
- r-triebeard
- r-urltools
- r-utf8
- r-XML
- r-xml2
- pip:
- metapub
28 changes: 26 additions & 2 deletions osm/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,27 @@
from . import _version
import os

__version__ = _version.version

def get_version():
try:
from . import _version

return _version.version
except ImportError:
generate_version_file()
from . import _version

return _version.version


def generate_version_file():
import pkg_resources

version = pkg_resources.get_distribution("osm").version
version_file_content = f"version = '{version}'\n"

version_file_path = os.path.join(os.path.dirname(__file__), "_version.py")
with open(version_file_path, "w") as version_file:
version_file.write(version_file_content)


__version__ = get_version()
17 changes: 17 additions & 0 deletions osm/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import click

from osm.converters import convert_pdf


@click.group()
def osm():
"""Main command for OSM"""
pass


@osm.command()
@click.argument("file_path", type=click.Path(exists=True))
@click.argument("output_file", type=str)
def rtransparent(file_path, output_file):
"""Processes a biomedical publication. Writes out processed document and associated metrics."""
convert_pdf(file_path, output_file)
18 changes: 0 additions & 18 deletions osm/cli/main.py

This file was deleted.

Loading

0 comments on commit 1f06cef

Please sign in to comment.