Skip to content

Commit

Permalink
apply ruff
Browse files Browse the repository at this point in the history
  • Loading branch information
leej3 committed Aug 27, 2024
1 parent 7260074 commit 5bb9155
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 10 deletions.
1 change: 0 additions & 1 deletion osm/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import time
import types
from pathlib import Path
from time import sleep

import pandas as pd
import requests
Expand Down
8 changes: 5 additions & 3 deletions osm/pipeline/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def __init__(self, version: str = "0.0.1"):
self._orm_model = None

@abstractmethod
def _run(self, data: bytes|dict, **kwargs) -> Any:
def _run(self, data: bytes | dict, **kwargs) -> Any:
"""Abstract method that subclasses must implement."""
pass

Expand Down Expand Up @@ -99,9 +99,11 @@ def __init__(
self.xml_path = xml_path
self.metrics_path = metrics_path

def run(self,user_managed_compose:bool=False):
def run(self, user_managed_compose: bool = False):
for parser in self.parsers:
parsed_data = parser.run(self.file_data,user_managed_compose=user_managed_compose)
parsed_data = parser.run(
self.file_data, user_managed_compose=user_managed_compose
)
if isinstance(parsed_data, bytes):
self.savers.save_file(parsed_data, self.xml_path)
for extractor in self.extractors:
Expand Down
16 changes: 10 additions & 6 deletions osm/pipeline/parsers.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import requests
import io
import time

import requests

from osm.schemas.custom_fields import LongBytes

from .core import Component
import io

SCIENCEBEAM_URL = "http://localhost:8070/api/convert"

Expand All @@ -28,17 +30,19 @@ class PMCParser(NoopParser):


class ScienceBeamParser(Component):
def _run(self, data: bytes,user_managed_compose=False) -> str:
def _run(self, data: bytes, user_managed_compose=False) -> str:
self.sample = LongBytes(data)
headers = {"Accept": "application/tei+xml", "Content-Type": "application/pdf"}
files = {'file': ('input.pdf', io.BytesIO(data), 'application/pdf')}
files = {"file": ("input.pdf", io.BytesIO(data), "application/pdf")}
for attempt in range(5):
try:
if not user_managed_compose:
time.sleep(10)
response = requests.post(SCIENCEBEAM_URL, files=files, headers=headers)
except requests.exceptions.RequestException as e:
print(f"Attempt {attempt + 1} for parsing the file failed. This can happen while the container is starting up. Retrying in 5 seconds.")
except requests.exceptions.RequestException:
print(
f"Attempt {attempt + 1} for parsing the file failed. This can happen while the container is starting up. Retrying in 5 seconds."
)
continue
if response.status_code == 200:
return response.content
Expand Down

0 comments on commit 5bb9155

Please sign in to comment.