Skip to content

Commit

Permalink
switch stray components to bytes
Browse files Browse the repository at this point in the history
  • Loading branch information
leej3 committed Aug 27, 2024
1 parent 6510854 commit e7b3d33
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 7 deletions.
6 changes: 3 additions & 3 deletions osm/pipeline/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def __init__(self, version: str = "0.0.1"):
self._orm_model = None

@abstractmethod
def _run(self, data: bytes, **kwargs) -> Any:
def _run(self, data: bytes|dict, **kwargs) -> Any:
"""Abstract method that subclasses must implement."""
pass

Expand Down Expand Up @@ -64,7 +64,7 @@ def __iter__(self):
yield self.json_saver
yield self.osm_saver

def save_file(self, data: str, path: Path):
def save_file(self, data: bytes, path: Path):
self.file_saver.run(data, path=path)

def save_json(self, data: dict, path: Path):
Expand Down Expand Up @@ -102,7 +102,7 @@ def __init__(
def run(self):
for parser in self.parsers:
parsed_data = parser.run(self.file_data)
if isinstance(parsed_data, str):
if isinstance(parsed_data, bytes):
self.savers.save_file(parsed_data, self.xml_path)
for extractor in self.extractors:
extracted_metrics = extractor.run(parsed_data, parser=parser.name)
Expand Down
7 changes: 5 additions & 2 deletions osm/pipeline/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from osm.schemas.custom_fields import LongBytes

from .core import Component
import io

SCIENCEBEAM_URL = "http://localhost:8070/api/convert"

Expand Down Expand Up @@ -30,8 +31,10 @@ class ScienceBeamParser(Component):
def _run(self, data: bytes) -> str:
self.sample = LongBytes(data)
headers = {"Accept": "application/tei+xml", "Content-Type": "application/pdf"}
response = requests.post(SCIENCEBEAM_URL, data=data, headers=headers)
files = {'file': ('input.pdf', io.BytesIO(data), 'application/pdf')}

response = requests.post(SCIENCEBEAM_URL, files=files, headers=headers)
if response.status_code == 200:
return response.text
return response.content
else:
response.raise_for_status()
4 changes: 2 additions & 2 deletions osm/pipeline/savers.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ def format_error_message() -> str:
class FileSaver(Component):
"""Basic saver that writes data to a file."""

def _run(self, data: str, path: Path):
def _run(self, data: bytes, path: Path):
"""Write data to a file.
Args:
data (str): Some data.
path (Path): A file path.
"""
path.write_text(data)
path.write_bytes(data)
logger.info(f"Data saved to {path}")


Expand Down

0 comments on commit e7b3d33

Please sign in to comment.