Skip to content

Commit

Permalink
Address review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
corneliusroemer committed Dec 2, 2024
1 parent 686fd39 commit 04ec4c7
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ In the default configuration the pipeline performs:

The pipeline also formats metadata fields:

- **process date**: Takes a date string and returns a date field in the "%Y-%m-%d" format.
- **parse timestamp**: Takes a timestamp e.g. 2022-11-01T00:00:00Z and returns that field in the "%Y-%m-%d" format.
- **parse timestamp**: Takes an ISO timestamp e.g. `2022-11-01T00:00:00Z` and returns that field in the `%Y-%m-%d` format.

The code is available on [GitHub](https://github.com/loculus-project/loculus/tree/main/preprocessing/nextclade) under the [AGPL-3.0 license](https://github.com/loculus-project/loculus/blob/main/LICENSE).
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
import logging
import re
from dataclasses import dataclass
from datetime import date, datetime, tzinfo
from sqlite3 import Date
from datetime import datetime

import dateutil.parser as dateutil
import pytz
Expand Down Expand Up @@ -207,12 +206,17 @@ def parse_date_into_range(
output_field: str,
args: FunctionArgs = None, # args is essential - even if Pylance says it's not used
) -> ProcessingResult:
"""Parse date string into a range, return based on FunctionArgs
Three output options
"""Parse date string formatted as one of YYYY | YYYY-MM | YYYY-MM-DD into a range
Return value determined FunctionArgs:
fieldType: "dateRangeString" | "dateRangeLower" | "dateRangeUpper"
Default fieldType is "dateRangeString"
"""
if args is None:
args = {"fieldType": "dateRangeString"}

logger.debug(f"input_data: {input_data}")
date_str = input_data["date"]

input_date_str = input_data["date"]

release_date_str = input_data.get("releaseDate", "") or ""
try:
Expand All @@ -223,7 +227,7 @@ def parse_date_into_range(
now = datetime.now(tz=pytz.utc)
max_upper_limit = min(now, release_date) if release_date else now

if not date_str:
if not input_date_str:
return ProcessingResult(
datum=max_upper_limit.strftime("%Y-%m-%d")
if args["fieldType"] == "dateRangeUpper"
Expand All @@ -249,19 +253,19 @@ class DateRange:

for format, message in formats_to_messages.items():
try:
parsed_date = datetime.strptime(date_str, format).replace(tzinfo=pytz.utc)
parsed_date = datetime.strptime(input_date_str, format).replace(tzinfo=pytz.utc)
except ValueError:
continue
match format:
case "%Y-%m-%d":
datum = DateRange(
date_range_string=parsed_date.strftime("%Y-%m-%d"),
date_range_string=parsed_date.strftime(format),
date_range_lower=parsed_date,
date_range_upper=parsed_date,
)
case "%Y-%m":
datum = DateRange(
date_range_string=parsed_date.strftime("%Y-%m"),
date_range_string=parsed_date.strftime(format),
date_range_lower=parsed_date.replace(day=1),
date_range_upper=(
parsed_date.replace(
Expand All @@ -271,7 +275,7 @@ class DateRange:
)
case "%Y":
datum = DateRange(
date_range_string=parsed_date.strftime("%Y"),
date_range_string=parsed_date.strftime(format),
date_range_lower=parsed_date.replace(month=1, day=1),
date_range_upper=parsed_date.replace(month=12, day=31),
)
Expand All @@ -280,7 +284,7 @@ class DateRange:

if datum.date_range_upper > max_upper_limit:
logger.debug(
"Upper limit was tightened due to release date or current date. "
"Tightening upper limit due to release date or current date. "
f"Original upper limit: {datum.date_range_upper},"
f"new upper limit: {max_upper_limit}"
)
Expand All @@ -292,7 +296,7 @@ class DateRange:
source=[
AnnotationSource(name=output_field, type=AnnotationSourceType.METADATA)
],
message=f"Metadata field {output_field}:'{date_str}' - " + message,
message=f"Metadata field {output_field}:'{input_date_str}' - " + message,
)
)

Expand All @@ -305,7 +309,7 @@ class DateRange:
source=[
AnnotationSource(name=output_field, type=AnnotationSourceType.METADATA)
],
message=f"Metadata field {output_field}:'{date_str}' is in the future.",
message=f"Metadata field {output_field}:'{input_date_str}' is in the future.",
)
)

Expand All @@ -317,7 +321,8 @@ class DateRange:
AnnotationSource(name=output_field, type=AnnotationSourceType.METADATA)
],
message=(
f"Metadata field {output_field}:'{date_str}'" "is after release date."
f"Metadata field {output_field}:'{input_date_str}'"
"is after release date."
),
)
)
Expand Down Expand Up @@ -346,7 +351,7 @@ class DateRange:
source=[
AnnotationSource(name=output_field, type=AnnotationSourceType.METADATA)
],
message=f"Metadata field {output_field}: Date format is not recognized.",
message=f"Metadata field {output_field}: Date {input_date_str} could not be parsed.",
)
],
)
Expand Down

0 comments on commit 04ec4c7

Please sign in to comment.