From e1f404ff36ded027ec2ee93b0bb026de74d2549c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9lina?= Date: Thu, 28 Nov 2024 10:59:33 +0100 Subject: [PATCH] Fix timestamp parsing to always include milliseconds (#2683) * fix datetime parsing when ms are zero * small refactoring --- src/huggingface_hub/utils/_datetime.py | 23 ++++++++++++++--------- tests/test_utils_datetime.py | 6 ++++++ 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/huggingface_hub/utils/_datetime.py b/src/huggingface_hub/utils/_datetime.py index e544884b87..1a7f44285d 100644 --- a/src/huggingface_hub/utils/_datetime.py +++ b/src/huggingface_hub/utils/_datetime.py @@ -46,15 +46,20 @@ def parse_datetime(date_string: str) -> datetime: If `date_string` cannot be parsed. """ try: - # Datetime ending with a Z means "UTC". We parse the date and then explicitly - # set the timezone to UTC. - # See https://en.wikipedia.org/wiki/ISO_8601#Coordinated_Universal_Time_(UTC) - # Taken from https://stackoverflow.com/a/3168394. - if len(date_string) == 30: - # Means timezoned-timestamp with nanoseconds precision. We need to truncate the last 3 digits. - date_string = date_string[:-4] + "Z" - dt = datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S.%fZ") - return dt.replace(tzinfo=timezone.utc) # Set explicit timezone + # Normalize the string to always have 6 digits of fractional seconds + if date_string.endswith("Z"): + # Case 1: No decimal point (e.g., "2024-11-16T00:27:02Z") + if "." not in date_string: + # No fractional seconds - insert .000000 + date_string = date_string[:-1] + ".000000Z" + # Case 2: Has decimal point (e.g., "2022-08-19T07:19:38.123456789Z") + else: + # Get the fractional and base parts + base, fraction = date_string[:-1].split(".") + # fraction[:6] takes first 6 digits and :0<6 pads with zeros if less than 6 digits + date_string = f"{base}.{fraction[:6]:0<6}Z" + + return datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=timezone.utc) except ValueError as e: raise ValueError( f"Cannot parse '{date_string}' as a datetime. Date string is expected to" diff --git a/tests/test_utils_datetime.py b/tests/test_utils_datetime.py index 3cb6c04877..091fbc1f79 100644 --- a/tests/test_utils_datetime.py +++ b/tests/test_utils_datetime.py @@ -20,6 +20,12 @@ def test_parse_datetime(self): datetime(2022, 8, 19, 7, 19, 38, 123456, tzinfo=timezone.utc), ) + # Test without milliseconds (should add .000) + self.assertEqual( + parse_datetime("2024-11-16T00:27:02Z"), + datetime(2024, 11, 16, 0, 27, 2, 0, tzinfo=timezone.utc), + ) + with pytest.raises(ValueError, match=r".*Cannot parse '2022-08-19T07:19:38' as a datetime.*"): parse_datetime("2022-08-19T07:19:38")