Skip to content

Commit

Permalink
Fix timestamp parsing to always include milliseconds (#2683)
Browse files Browse the repository at this point in the history
* fix datetime parsing when ms are zero

* small refactoring
  • Loading branch information
hanouticelina authored Nov 28, 2024
1 parent d0a948f commit e1f404f
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 9 deletions.
23 changes: 14 additions & 9 deletions src/huggingface_hub/utils/_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,20 @@ def parse_datetime(date_string: str) -> datetime:
If `date_string` cannot be parsed.
"""
try:
# Datetime ending with a Z means "UTC". We parse the date and then explicitly
# set the timezone to UTC.
# See https://en.wikipedia.org/wiki/ISO_8601#Coordinated_Universal_Time_(UTC)
# Taken from https://stackoverflow.com/a/3168394.
if len(date_string) == 30:
# Means timezoned-timestamp with nanoseconds precision. We need to truncate the last 3 digits.
date_string = date_string[:-4] + "Z"
dt = datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S.%fZ")
return dt.replace(tzinfo=timezone.utc) # Set explicit timezone
# Normalize the string to always have 6 digits of fractional seconds
if date_string.endswith("Z"):
# Case 1: No decimal point (e.g., "2024-11-16T00:27:02Z")
if "." not in date_string:
# No fractional seconds - insert .000000
date_string = date_string[:-1] + ".000000Z"
# Case 2: Has decimal point (e.g., "2022-08-19T07:19:38.123456789Z")
else:
# Get the fractional and base parts
base, fraction = date_string[:-1].split(".")
# fraction[:6] takes first 6 digits and :0<6 pads with zeros if less than 6 digits
date_string = f"{base}.{fraction[:6]:0<6}Z"

return datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=timezone.utc)
except ValueError as e:
raise ValueError(
f"Cannot parse '{date_string}' as a datetime. Date string is expected to"
Expand Down
6 changes: 6 additions & 0 deletions tests/test_utils_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ def test_parse_datetime(self):
datetime(2022, 8, 19, 7, 19, 38, 123456, tzinfo=timezone.utc),
)

# Test without milliseconds (should add .000)
self.assertEqual(
parse_datetime("2024-11-16T00:27:02Z"),
datetime(2024, 11, 16, 0, 27, 2, 0, tzinfo=timezone.utc),
)

with pytest.raises(ValueError, match=r".*Cannot parse '2022-08-19T07:19:38' as a datetime.*"):
parse_datetime("2022-08-19T07:19:38")

Expand Down

0 comments on commit e1f404f

Please sign in to comment.