Skip to content

Commit

Permalink
improve progress reporting
Browse files Browse the repository at this point in the history
  • Loading branch information
leej3 committed Aug 26, 2024
1 parent 250491a commit 86b272f
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 8 deletions.
8 changes: 6 additions & 2 deletions osm/pipeline/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,14 @@ def __init__(self, version: str = "0.0.1"):
self._orm_model = None

@abstractmethod
def run(self, data: Any, **kwargs) -> Any:
def _run(self, data: Any, **kwargs) -> Any:
"""Abstract method that subclasses must implement."""
pass

def run(self, data: Any, *args, **kwargs) -> Any:
print(f"{self.name} (version {self.version}) is running.")
return self._run(data, *args, **kwargs)

def _get_orm_fields(self) -> dict[str, Any]:
fields = {}
for fieldname in self.orm_model_class.model_fields.keys():
Expand Down Expand Up @@ -100,7 +105,6 @@ def run(self):
if isinstance(parsed_data, str):
self.savers.save_file(parsed_data, self.xml_path)
for extractor in self.extractors:
# extracted_metrics = extractor.run(parsed_data,parser=parser.name)
extracted_metrics = extractor.run(parsed_data)
self.savers.save_osm(
file_in=self.file_data,
Expand Down
2 changes: 1 addition & 1 deletion osm/pipeline/extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


class RTransparentExtractor(Component):
def run(self, data: str, parser: str = None) -> dict:
def _run(self, data: str, parser: str = None) -> dict:
headers = {"Content-Type": "application/octet-stream"}
response = requests.post(
"http://localhost:8071/extract-metrics",
Expand Down
4 changes: 2 additions & 2 deletions osm/pipeline/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@
class NoopParser(Component):
"""Used if the input is xml and so needs no parsing."""

def run(self, data: bytes) -> str:
def _run(self, data: bytes) -> str:
self.sample = LongBytes(data)
return data.decode("utf-8")


class ScienceBeamParser(Component):
def run(self, data: bytes) -> str:
def _run(self, data: bytes) -> str:
self.sample = LongBytes(data)
headers = {"Accept": "application/tei+xml", "Content-Type": "application/pdf"}
response = requests.post(SCIENCEBEAM_URL, data=data, headers=headers)
Expand Down
6 changes: 3 additions & 3 deletions osm/pipeline/savers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
class FileSaver(Component):
"""Basic saver that writes data to a file."""

def run(self, data: str, path: Path):
def _run(self, data: str, path: Path):
"""Write data to a file.
Args:
Expand All @@ -35,7 +35,7 @@ def run(self, data: str, path: Path):
class JSONSaver(Component):
"""Saver that writes JSON data to a file."""

def run(self, data: dict, path: Path):
def _run(self, data: dict, path: Path):
"""Write output metrics to a JSON file for the user.
Args:
Expand Down Expand Up @@ -66,7 +66,7 @@ def __init__(self, comment, email, user_defined_id, filename):
self.user_defined_id = user_defined_id
self.filename = filename

def run(self, file_in: bytes, metrics: dict, components: list[schemas.Component]):
def _run(self, file_in: bytes, metrics: dict, components: list[schemas.Component]):
"""Save the extracted metrics to the OSM API.
Args:
Expand Down

0 comments on commit 86b272f

Please sign in to comment.