Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add prototype #31

Merged
merged 1 commit into from
Aug 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
name: Deploy to AWS EC2 with Terraform

on:
workflow_dispatch:
inputs:
environment:
description: 'Deployment environment'
required: true
default: staging
type: choice
options:
- staging
push:
tags:
- 'v*.*.*' # Matches version tags like v1.0.0

jobs:
deploy:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
with:
install: true

- name: Cache Docker layers
uses: actions/cache@v3
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-

- name: Log in to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v2

- name: Build and push Docker image
run: |
DOCKER_BUILDKIT=1 docker build -t osm_web_api:${{ github.event.inputs.environment || 'production' }} -f ./docker_images/web_api/Dockerfile .
docker tag osm_web_api:${{ github.event.inputs.environment || 'production' }}:latest ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/osm_web_api:${{ github.event.inputs.environment || 'production' }}
docker push ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/osm_web_api:${{ github.event.inputs.environment || 'production' }}

- name: Setup Terraform
uses: hashicorp/setup-terraform@v2

- name: Terraform Init
run: terraform init -backend-config="path/to/${{ github.event.inputs.environment || 'production' }}/backend-config"

- name: Terraform Plan
run: terraform plan -var-file="terraform/${{ github.event.inputs.environment || 'production' }}.tfvars"

- name: Terraform Apply
if: success()
run: terraform apply -var-file="terraform/${{ github.event.inputs.environment || 'production' }}.tfvars" -auto-approve

- name: Notify Success
if: success()
run: echo "Deployment to ${{ github.event.inputs.environment || 'production' }} environment was successful."

- name: Notify Failure
if: failure()
run: echo "Deployment to ${{ github.event.inputs.environment || 'production' }} environment failed."
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,14 @@ __pycache__/
*.pyd
*.env
*.egg-info/
*.tfstate
dist/
build/
.tox/
venv/
.vscode/settings.json
.DS_Store
osm_output
.terraform
.terraform.lock.hcl
.public_dns
4 changes: 1 addition & 3 deletions compose.override.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ services:
osm_web_api:
environment:
- MONGODB_URI=mongodb://db:27017/test
# - MONGODB_URI=mongodb://mongoadmin:secret@db:27017/osm
build:
context: .
dockerfile: ./docker_images/web_api/Dockerfile
Expand All @@ -23,10 +22,9 @@ services:
- db

db:
# use old version of mongo to avoid Apple Instruction set error
image: mongo:4.4.6
ports:
- 27017:27017
environment:
- MONGO_INITDB_DATABASE=test
# - MONGO_INITDB_ROOT_USERNAME=mongoadmin
# - MONGO_INITDB_ROOT_PASSWORD=secret
15 changes: 0 additions & 15 deletions docker_images/web_api/Dockerfile

This file was deleted.

10 changes: 0 additions & 10 deletions docker_images/web_api/compose.yaml

This file was deleted.

2 changes: 0 additions & 2 deletions docker_images/web_api/requirements.txt

This file was deleted.

File renamed without changes.
55 changes: 53 additions & 2 deletions osm/_utils.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
import argparse
import base64
import hashlib
import logging
import os
import shlex
import subprocess
import time
from pathlib import Path

import requests

from osm._version import __version__

DEFAULT_OUTPUT_DIR = "./osm_output"
logger = logging.getLogger(__name__)


def _get_metrics_dir(output_dir: Path = DEFAULT_OUTPUT_DIR) -> Path:
Expand All @@ -34,7 +39,7 @@ def get_compute_context_id():
return hash(f"{os.environ.get('HOSTNAME')}_{os.environ.get('USERNAME')}")


def _upload_data(args, file_in, xml, extracted):
def _upload_data(args, file_in, xml, metrics, components):
"""
TODO: add in derivatives and components
"""
Expand All @@ -53,7 +58,8 @@ def _upload_data(args, file_in, xml, extracted):
"compute_context_id": get_compute_context_id(),
"email": args.email,
},
"metrics": extracted,
"metrics": metrics,
"components": components,
}
# Send POST request to OSM API
response = requests.put(f"{osm_api}/upload", json=payload)
Expand All @@ -63,3 +69,48 @@ def _upload_data(args, file_in, xml, extracted):
print("Invocation data uploaded successfully")
else:
print(f"Failed to upload invocation data: \n {response.text}")


def wait_for_containers():
while True:
try:
response = requests.get("http://localhost:8071/health")
if response.status_code == 200:
break
except requests.exceptions.RequestException:
pass

time.sleep(1)


def compose_up():
cmd = shlex.split("docker-compose up -d --build")
subprocess.run(
cmd,
check=True,
)


def compose_down():
cmd = shlex.split("docker-compose down")
subprocess.run(
cmd,
check=True,
)


def _setup(args):
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
xml_path = _get_text_dir() / f"{args.uid}.xml"
if args.filepath.name.endswith(".pdf"):
if xml_path.exists():
raise FileExistsError(xml_path)
metrics_path = _get_metrics_dir() / f"{args.uid}.json"
if metrics_path.exists():
raise FileExistsError(metrics_path)
if not args.user_managed_compose:
compose_up()
logger.info("Waiting for containers to be ready...")
wait_for_containers()
return xml_path, metrics_path
129 changes: 52 additions & 77 deletions osm/cli.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,25 @@
import argparse
import json
import logging
import shlex
import subprocess
import time
from pathlib import Path

import requests
from osm._utils import DEFAULT_OUTPUT_DIR, _existing_file, _setup, compose_down
from osm.pipeline.core import Pipeline
from osm.pipeline.extractors import RTransparentExtractor
from osm.pipeline.parsers import ScienceBeamParser
from osm.pipeline.savers import FileSaver, JSONSaver, OSMSaver, Savers

from osm._utils import (
DEFAULT_OUTPUT_DIR,
_existing_file,
_get_metrics_dir,
_get_text_dir,
_upload_data,
)
from osm.components.rtransparent import _extract
from osm.components.sciencebeam import _convert

logger = logging.getLogger(__name__)
PARSERS = {
"sciencebeam": ScienceBeamParser,
}
EXTRACTORS = {
"rtransparent": RTransparentExtractor,
}


def parse_args():
parser = argparse.ArgumentParser(description=("""Manage the execution of osm."""))

parser.add_argument(
"-f",
"--file",
"--filepath",
type=_existing_file,
required=True,
help="Specify the path to the pdf/xml for processing.",
Expand All @@ -42,6 +35,20 @@ def parse_args():
default=DEFAULT_OUTPUT_DIR,
help="Directory to store output.",
)
parser.add_argument(
"--parser",
choices=PARSERS.keys(),
default=["sciencebeam"],
nargs="+",
help="Select the tool for parsing the input document. Default is 'sciencebeam'.",
)
parser.add_argument(
"--metrics-type",
choices=EXTRACTORS.keys(),
default=["rtransparent"],
nargs="+",
help="Select the tool for extracting the output metrics. Default is 'rtransparent'.",
)
parser.add_argument(
"--comment",
required=False,
Expand All @@ -61,71 +68,39 @@ def parse_args():
return parser.parse_args()


def wait_for_containers():
while True:
try:
response = requests.get("http://localhost:8071/health")
if response.status_code == 200:
break
except requests.exceptions.RequestException:
pass

time.sleep(1)


def compose_up():
cmd = shlex.split("docker-compose up -d --build")
subprocess.run(
cmd,
check=True,
)


def compose_down():
cmd = shlex.split("docker-compose down")
subprocess.run(
cmd,
check=True,
)


def _setup(args):
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
xml_out = _get_text_dir() / f"{args.uid}.xml"
if args.file.name.endswith(".pdf"):
if xml_out.exists():
raise FileExistsError(xml_out)
metrics_out = _get_metrics_dir() / f"{args.uid}.json"
if metrics_out.exists():
raise FileExistsError(metrics_out)
if not args.user_managed_compose:
compose_up()
logger.info("Waiting for containers to be ready...")
wait_for_containers()
return xml_out, metrics_out


def main():
args = parse_args()
try:
xml_out, metrics_out = _setup(args)
file_in = args.file.read_bytes()

if args.file.name.endswith(".pdf"):
xml = _convert(file_in)
xml_out.write_bytes(xml)
else:
xml = file_in
extracted = _extract(xml)
metrics_out.write_text(json.dumps(extracted))
_upload_data(args, file_in, xml, extracted)

xml_path, metrics_path = _setup(args)
pipeline = Pipeline(
filepath=args.filepath,
xml_path=xml_path,
metrics_path=metrics_path,
parsers=[PARSERS[p] for p in args.parser],
extractors=[EXTRACTORS[m] for m in args.metrics_type],
savers=Savers(
file_saver=FileSaver(), json_saver=JSONSaver(), osm_saver=OSMSaver()
),
)
pipeline.run()
finally:
if not args.user_managed_compose:
compose_down()
pass


if __name__ == "__main__":
main()

# def main():
# args = parse_args()
# try:
# pipeline = _setup(args)
# pipeline.parse()
# pipeline.extract()
# pipeline.save()
# xml_path, metrics_path, parser, extractor = _setup(args)
# xml = parser.parse()
# xml_path.write_bytes(xml)
# metrics = _extract(xml)
# metrics_path.write_text(json.dumps(metrics))
# _upload_data(args, file_in, xml, metrics,components)
Loading
Loading