Skip to content

Commit

Permalink
another try
Browse files Browse the repository at this point in the history
  • Loading branch information
anna-parker committed Dec 9, 2024
1 parent 82c268a commit a2d061f
Showing 1 changed file with 4 additions and 7 deletions.
11 changes: 4 additions & 7 deletions preprocessing/dummy/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import random
import time
from dataclasses import dataclass, field
from typing import List, Optional

import requests

Expand Down Expand Up @@ -88,7 +87,7 @@ class Sequence:
)


def fetch_unprocessed_sequences(etag: str | None, n: int) -> tuple[str | None, List[Sequence]]:
def fetch_unprocessed_sequences(etag: str | None, n: int) -> tuple[str | None, list[Sequence]]:
url = backendHost + "/extract-unprocessed-data"
params = {"numberOfSequenceEntries": n, "pipelineVersion": pipeline_version}
headers = {
Expand All @@ -112,7 +111,7 @@ def fetch_unprocessed_sequences(etag: str | None, n: int) -> tuple[str | None, L
)


def parse_ndjson(ndjson_data: str) -> List[Sequence]:
def parse_ndjson(ndjson_data: str) -> list[Sequence]:
json_strings = ndjson_data.split("\n")
entries = []
for json_str in json_strings:
Expand All @@ -124,7 +123,7 @@ def parse_ndjson(ndjson_data: str) -> List[Sequence]:
return entries


def process(unprocessed: List[Sequence]) -> List[Sequence]:
def process(unprocessed: list[Sequence]) -> list[Sequence]:
with open("mock-sequences.json", "r") as f:
mock_sequences = json.load(f)
possible_lineages = ["A.1", "A.1.1", "A.2"]
Expand All @@ -139,8 +138,6 @@ def process(unprocessed: List[Sequence]) -> List[Sequence]:
sequence.version,
{"metadata": metadata, **mock_sequences},
)
updated_sequence.warnings = []
updated_sequence.errors = []

disable_randomly = randomWarnError and random.choice([True, True, False])
if addErrors and not disable_randomly:
Expand Down Expand Up @@ -193,7 +190,7 @@ def process(unprocessed: List[Sequence]) -> List[Sequence]:
return processed


def submit_processed_sequences(processed: List[Sequence]):
def submit_processed_sequences(processed: list[Sequence]):
json_strings = [json.dumps(dataclasses.asdict(sequence)) for sequence in processed]
ndjson_string = "\n".join(json_strings)
logging.info(ndjson_string)
Expand Down

0 comments on commit a2d061f

Please sign in to comment.