From d98faab26c77a250d23e77c11456a3ebfde87883 Mon Sep 17 00:00:00 2001 From: leej3 Date: Wed, 28 Aug 2024 16:47:41 +0100 Subject: [PATCH] more integration complete --- compose.development.override.yaml | 8 ++++ compose.yaml | 5 +++ external_components/llm_extraction/Dockerfile | 15 +++++--- .../llm_extraction/environment.yaml | 38 ++----------------- osm/cli.py | 8 +++- 5 files changed, 33 insertions(+), 41 deletions(-) diff --git a/compose.development.override.yaml b/compose.development.override.yaml index 933fd03c..3eff03dd 100644 --- a/compose.development.override.yaml +++ b/compose.development.override.yaml @@ -6,6 +6,14 @@ services: dockerfile: ./external_components/rtransparent/Dockerfile volumes: - ./external_components/rtransparent:/app + llm_extraction: + environment: + - OPENAI_API_KEY=${OPENAI_API_KEY:-NOKEY} + build: + context: . + dockerfile: ./external_components/llm_extraction/Dockerfile + volumes: + - ./external_components/llm_extraction:/app web_api: container_name: web_api diff --git a/compose.yaml b/compose.yaml index 037890bf..b4d384e2 100644 --- a/compose.yaml +++ b/compose.yaml @@ -9,3 +9,8 @@ services: image: nimhdsst/rtransparent:staging ports: - "8071:8071" + llm_extraction: + container_name: llm_extraction + image: nimhdsst/llm_extraction:staging + ports: + - "8072:8072" diff --git a/external_components/llm_extraction/Dockerfile b/external_components/llm_extraction/Dockerfile index 0b1b1d16..e456e10f 100644 --- a/external_components/llm_extraction/Dockerfile +++ b/external_components/llm_extraction/Dockerfile @@ -3,7 +3,7 @@ SHELL ["/bin/bash", "--login", "-c"] # Set working directory WORKDIR /app -COPY external_components/rtransparent/environment.yaml /app +COPY external_components/llm_extraction/environment.yaml /app # Create the environment RUN conda env create -f environment.yaml @@ -11,14 +11,17 @@ RUN conda env create -f environment.yaml # Ensure the conda environment is activated RUN echo "source /opt/conda/etc/profile.d/conda.sh && conda activate osm" | tee -a ~/.bashrc /etc/profile /etc/profile.d/conda.sh /etc/skel/.bashrc /etc/skel/.profile > /dev/null -RUN R -e '\ -devtools::install_github("quest-bih/oddpub",ref="c5b091c7e82ed6177192dc380a515b3dc6304863"); \ -devtools::install_github("serghiou/rtransparent", build_vignettes = F)' +RUN mkdir -p /opt/osm +COPY pyproject.toml /opt/osm +COPY osm /opt/osm/osm +ARG PSEUDO_VERSION=0.0.1 # strongly recommended to update based on git describe +RUN SETUPTOOLS_SCM_PRETEND_VERSION_FOR_OSM=${PSEUDO_VERSION} pip install -e /opt/osm +RUN --mount=source=.git,target=/opt/osm/.git,type=bind pip install -e /opt/osm # # Copy the project files and install the package -COPY external_components/rtransparent/app.py /app +COPY external_components/llm_extraction/app.py /app # Make entrypoint etc. convenient for users COPY external_components/_entrypoint.sh /usr/local/bin/_entrypoint.sh ENTRYPOINT ["/usr/local/bin/_entrypoint.sh"] -CMD ["fastapi", "dev", "--host", "0.0.0.0", "--port", "8071"] +CMD ["fastapi", "dev", "--host", "0.0.0.0", "--port", "8072"] diff --git a/external_components/llm_extraction/environment.yaml b/external_components/llm_extraction/environment.yaml index f18e1134..60d75a77 100644 --- a/external_components/llm_extraction/environment.yaml +++ b/external_components/llm_extraction/environment.yaml @@ -7,41 +7,11 @@ dependencies: - lxml - pandas - pip - - psutil - python - requests - - rpy2 - uvicorn - # Dependencies for rtransparent - - r-crul - - r-devtools - - r-dplyr - - r-furrr - - r-future - - r-globals - - r-hoardr - - r-httpcode - - r-lazyeval - - r-lubridate - - r-magrittr - - r-pbapply - - r-pdftools - - r-plyr - - r-purrr - - r-qpdf - - r-readr - # - r-rentrez - - r-rlang - - r-stringr - - r-tibble - - r-tidyr - - r-tidyselect - - r-timechange - - r-tokenizers - - r-triebeard - - r-urltools - - r-utf8 - # - r-XML - - r-xml2 + - llama-index + - llama-index-llms-openai - pip: - - metapub + - odmantic + - llama-index-program-openai diff --git a/osm/cli.py b/osm/cli.py index 2917d4a4..bc193d06 100644 --- a/osm/cli.py +++ b/osm/cli.py @@ -2,7 +2,7 @@ from osm._utils import DEFAULT_OUTPUT_DIR, _existing_file, _setup, compose_down from osm.pipeline.core import Pipeline, Savers -from osm.pipeline.extractors import RTransparentExtractor +from osm.pipeline.extractors import LLMExtractor, RTransparentExtractor from osm.pipeline.parsers import NoopParser, PMCParser, ScienceBeamParser from osm.pipeline.savers import FileSaver, JSONSaver, OSMSaver @@ -13,6 +13,7 @@ } EXTRACTORS = { "rtransparent": RTransparentExtractor, + "llm_extractor": LLMExtractor, } @@ -51,6 +52,11 @@ def parse_args(): nargs="+", help="Select the tool for extracting the output metrics. Default is 'rtransparent'.", ) + parser.add_argument( + "--llm_model", + default="gpt-4o-2024-08-06", + help="Specify the model to use for LLM extraction.", + ) parser.add_argument( "--comment", required=False,