Skip to content

Commit

Permalink
Added support for native date encoding (#203)
Browse files Browse the repository at this point in the history
Co-authored-by: Alex Grönholm <[email protected]>
  • Loading branch information
bschoenmaeckers and agronholm authored Jan 4, 2024
1 parent d0101c5 commit 3e93caa
Show file tree
Hide file tree
Showing 10 changed files with 203 additions and 36 deletions.
13 changes: 12 additions & 1 deletion cbor2/_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import struct
import sys
from collections.abc import Callable, Mapping, Sequence
from datetime import datetime, timedelta, timezone
from datetime import date, datetime, timedelta, timezone
from io import BytesIO
from typing import TYPE_CHECKING, Any, TypeVar, cast, overload

Expand Down Expand Up @@ -448,6 +448,15 @@ def decode_special(self, subtype: int) -> Any:
#
# Semantic decoders (major tag 6)
#
def decode_epoch_date(self) -> date:
# Semantic tag 100
value = self._decode()
return self.set_shareable(date.fromordinal(value + 719163))

def decode_date_string(self) -> date:
# Semantic tag 1004
value = self._decode()
return self.set_shareable(date.fromisoformat(value))

def decode_datetime_string(self) -> datetime:
# Semantic tag 0
Expand Down Expand Up @@ -699,10 +708,12 @@ def decode_float64(self) -> float:
35: CBORDecoder.decode_regexp,
36: CBORDecoder.decode_mime,
37: CBORDecoder.decode_uuid,
100: CBORDecoder.decode_epoch_date,
256: CBORDecoder.decode_stringref_namespace,
258: CBORDecoder.decode_set,
260: CBORDecoder.decode_ipaddress,
261: CBORDecoder.decode_ipnetwork,
1004: CBORDecoder.decode_date_string,
55799: CBORDecoder.decode_self_describe_cbor,
}

Expand Down
17 changes: 13 additions & 4 deletions cbor2/_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ class CBOREncoder:

__slots__ = (
"datetime_as_timestamp",
"date_as_datetime",
"_timezone",
"_default",
"value_sharing",
Expand Down Expand Up @@ -171,6 +172,7 @@ def __init__(
"""
self.fp = fp
self.datetime_as_timestamp = datetime_as_timestamp
self.date_as_datetime = date_as_datetime
self.timezone = timezone
self.value_sharing = value_sharing
self.string_referencing = string_referencing
Expand All @@ -184,8 +186,6 @@ def __init__(
self._encoders = default_encoders.copy()
if canonical:
self._encoders.update(canonical_encoders)
if date_as_datetime:
self._encoders[date] = CBOREncoder.encode_date

def _find_encoder(self, obj_type: type) -> Callable[[CBOREncoder, Any], None] | None:
for type_or_tuple, enc in list(self._encoders.items()):
Expand Down Expand Up @@ -524,8 +524,16 @@ def encode_datetime(self, value: datetime) -> None:
self.encode_semantic(CBORTag(0, datestring))

def encode_date(self, value: date) -> None:
value = datetime.combine(value, time()).replace(tzinfo=self._timezone)
self.encode_datetime(value)
# Semantic tag 100
if self.date_as_datetime:
value = datetime.combine(value, time()).replace(tzinfo=self._timezone)
self.encode_datetime(value)
elif self.datetime_as_timestamp:
days_since_epoch = value.toordinal() - 719163
self.encode_semantic(CBORTag(100, days_since_epoch))
else:
datestring = value.isoformat()
self.encode_semantic(CBORTag(1004, datestring))

def encode_decimal(self, value: Decimal) -> None:
# Semantic tag 4
Expand Down Expand Up @@ -655,6 +663,7 @@ def encode_undefined(self, value: UndefinedType) -> None:
FrozenDict: CBOREncoder.encode_map,
type(undefined): CBOREncoder.encode_undefined,
datetime: CBOREncoder.encode_datetime,
date: CBOREncoder.encode_date,
re.Pattern: CBOREncoder.encode_regexp,
("fractions", "Fraction"): CBOREncoder.encode_rational,
("email.message", "Message"): CBOREncoder.encode_mime,
Expand Down
1 change: 1 addition & 0 deletions docs/versionhistory.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ This library adheres to `Semantic Versioning <http://semver.org/>`_.
- Fixed a segmentation fault when decoding invalid unicode data
- Fixed infinite recursion when trying to hash a CBOR tag whose value points to the tag
itself
- Added support for native date encoding (bschoenmaeckers)

**5.5.1** (2023-11-02)

Expand Down
92 changes: 89 additions & 3 deletions source/decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ static PyObject * decode_bytestring(CBORDecoderObject *, uint8_t);
static PyObject * decode_string(CBORDecoderObject *, uint8_t);
static PyObject * CBORDecoder_decode_datetime_string(CBORDecoderObject *);
static PyObject * CBORDecoder_decode_epoch_datetime(CBORDecoderObject *);
static PyObject * CBORDecoder_decode_epoch_date(CBORDecoderObject *);
static PyObject * CBORDecoder_decode_date_string(CBORDecoderObject *);
static PyObject * CBORDecoder_decode_fraction(CBORDecoderObject *);
static PyObject * CBORDecoder_decode_bigfloat(CBORDecoderObject *);
static PyObject * CBORDecoder_decode_rational(CBORDecoderObject *);
Expand Down Expand Up @@ -974,10 +976,12 @@ decode_semantic(CBORDecoderObject *self, uint8_t subtype)
case 35: ret = CBORDecoder_decode_regexp(self); break;
case 36: ret = CBORDecoder_decode_mime(self); break;
case 37: ret = CBORDecoder_decode_uuid(self); break;
case 100: ret = CBORDecoder_decode_epoch_date(self); break;
case 256: ret = CBORDecoder_decode_stringref_ns(self); break;
case 258: ret = CBORDecoder_decode_set(self); break;
case 260: ret = CBORDecoder_decode_ipaddress(self); break;
case 261: ret = CBORDecoder_decode_ipnetwork(self); break;
case 1004: ret = CBORDecoder_decode_date_string(self); break;
case 55799: ret = CBORDecoder_decode_self_describe_cbor(self);
break;

Expand Down Expand Up @@ -1009,7 +1013,7 @@ decode_semantic(CBORDecoderObject *self, uint8_t subtype)


static PyObject *
parse_datestr(CBORDecoderObject *self, PyObject *str)
parse_datetimestr(CBORDecoderObject *self, PyObject *str)
{
const char* buf;
char *p;
Expand Down Expand Up @@ -1082,6 +1086,31 @@ parse_datestr(CBORDecoderObject *self, PyObject *str)
return ret;
}

static PyObject *
parse_datestr(CBORDecoderObject *self, PyObject *str)
{
const char* buf;
Py_ssize_t size;
PyObject *ret = NULL;
unsigned long int Y, m, d;

buf = PyUnicode_AsUTF8AndSize(str, &size);
if (
size < 10 || buf[4] != '-' || buf[7] != '-')
{
PyErr_Format(
_CBOR2_CBORDecodeValueError, "invalid date string %R", str);
return NULL;
}
if (buf) {
Y = strtoul(buf, NULL, 10);
m = strtoul(buf + 5, NULL, 10);
d = strtoul(buf + 8, NULL, 10);
ret = PyDate_FromDate(Y, m, d);
}
return ret;
}


// CBORDecoder.decode_datetime_string(self)
static PyObject *
Expand All @@ -1090,6 +1119,63 @@ CBORDecoder_decode_datetime_string(CBORDecoderObject *self)
// semantic type 0
PyObject *match, *str, *ret = NULL;

if (!_CBOR2_datetimestr_re && _CBOR2_init_re_compile() == -1)
return NULL;
str = decode(self, DECODE_NORMAL);
if (str) {
if (PyUnicode_Check(str)) {
match = PyObject_CallMethodObjArgs(
_CBOR2_datetimestr_re, _CBOR2_str_match, str, NULL);
if (match) {
if (match != Py_None)
ret = parse_datetimestr(self, str);
else
PyErr_Format(
_CBOR2_CBORDecodeValueError,
"invalid datetime string: %R", str);
Py_DECREF(match);
}
} else
PyErr_Format(
_CBOR2_CBORDecodeValueError, "invalid datetime value: %R", str);
Py_DECREF(str);
}
set_shareable(self, ret);
return ret;
}

// CBORDecoder.decode_epoch_date(self)
static PyObject *
CBORDecoder_decode_epoch_date(CBORDecoderObject *self)
{
// semantic type 100
PyObject *num, *tuple, *ret = NULL;

num = decode(self, DECODE_NORMAL);
if (num) {
if (PyNumber_Check(num)) {
tuple = PyTuple_Pack(1, PyNumber_Multiply(num, PyLong_FromLong(24 * 60 * 60)));
if (tuple) {
ret = PyDate_FromTimestamp(tuple);
Py_DECREF(tuple);
}
} else {
PyErr_Format(
_CBOR2_CBORDecodeValueError, "invalid timestamp value %R", num);
}
Py_DECREF(num);
}
set_shareable(self, ret);
return ret;
}

// CBORDecoder.decode_date_string(self)
static PyObject *
CBORDecoder_decode_date_string(CBORDecoderObject *self)
{
// semantic type 0
PyObject *match, *str, *ret = NULL;

if (!_CBOR2_datestr_re && _CBOR2_init_re_compile() == -1)
return NULL;
str = decode(self, DECODE_NORMAL);
Expand All @@ -1103,12 +1189,12 @@ CBORDecoder_decode_datetime_string(CBORDecoderObject *self)
else
PyErr_Format(
_CBOR2_CBORDecodeValueError,
"invalid datetime string: %R", str);
"invalid date string: %R", str);
Py_DECREF(match);
}
} else
PyErr_Format(
_CBOR2_CBORDecodeValueError, "invalid datetime value: %R", str);
_CBOR2_CBORDecodeValueError, "invalid date value: %R", str);
Py_DECREF(str);
}
set_shareable(self, ret);
Expand Down
44 changes: 27 additions & 17 deletions source/encoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ CBOREncoder_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
self->string_references = Py_None;
self->enc_style = 0;
self->timestamp_format = false;
self->date_as_datetime = false;
self->value_sharing = false;
self->shared_handler = NULL;
self->string_referencing = false;
Expand Down Expand Up @@ -139,6 +140,8 @@ CBOREncoder_init(CBOREncoderObject *self, PyObject *args, PyObject *kwargs)
// Predicate values are returned as ints, but need to be stored as bool or ubyte
if (timestamp_format == 1)
self->timestamp_format = true;
if (date_as_datetime == 1)
self->date_as_datetime = true;
if (value_sharing == 1)
self->value_sharing = true;
if (enc_style == 1)
Expand Down Expand Up @@ -180,15 +183,6 @@ CBOREncoder_init(CBOREncoderObject *self, PyObject *args, PyObject *kwargs)
_CBOR2_str_update, _CBOR2_canonical_encoders, NULL))
return -1;
}
if (date_as_datetime == 1) {
PyObject *encode_date = PyObject_GetAttr((PyObject *) &CBOREncoderType, _CBOR2_str_encode_date);
if (!encode_date)
return -1;
PyObject *datetime_class = (PyObject*)PyDateTimeAPI->DateType;
if (PyObject_SetItem(self->encoders, datetime_class, encode_date) == -1)
return -1;
Py_DECREF(encode_date);
}

return 0;
}
Expand Down Expand Up @@ -1043,22 +1037,36 @@ CBOREncoder_encode_datetime(CBOREncoderObject *self, PyObject *value)
static PyObject *
CBOREncoder_encode_date(CBOREncoderObject *self, PyObject *value)
{
PyObject *datetime, *ret = NULL;
// semantic type 100 or 1004

if (PyDate_Check(value)) {
datetime = PyDateTimeAPI->DateTime_FromDateAndTime(
PyObject *tmp, *ret = NULL;
const char *buf;
Py_ssize_t length;
if (self->date_as_datetime) {
tmp = PyDateTimeAPI->DateTime_FromDateAndTime(
PyDateTime_GET_YEAR(value),
PyDateTime_GET_MONTH(value),
PyDateTime_GET_DAY(value),
0, 0, 0, 0, self->tz,
PyDateTimeAPI->DateTimeType);
if (datetime) {
ret = CBOREncoder_encode_datetime(self, datetime);
Py_DECREF(datetime);
return ret;
if (tmp)
ret = CBOREncoder_encode_datetime(self, tmp);
}
else if (self->timestamp_format) {
tmp = PyObject_CallMethodObjArgs(
value, _CBOR2_str_toordinal, NULL);
if (tmp && fp_write(self, "\xD8\x64", 2) == 0) {
ret = CBOREncoder_encode_int(self, PyNumber_Subtract(tmp, PyLong_FromLong(719163)));
}
} else {
tmp = PyObject_CallMethodObjArgs(
value, _CBOR2_str_isoformat, NULL);
if (tmp && fp_write(self, "\xD9\x03\xEC", 3) == 0) {
ret = CBOREncoder_encode_string(self, tmp);
}
}
return NULL;
Py_XDECREF(tmp);
return ret;
}


Expand Down Expand Up @@ -1995,6 +2003,8 @@ encode(CBOREncoderObject *self, PyObject *value)
return CBOREncoder_encode_map(self, value);
else if (PyDateTime_CheckExact(value))
return CBOREncoder_encode_datetime(self, value);
else if (PyDate_CheckExact(value))
return CBOREncoder_encode_date(self, value);
else if (PyAnySet_CheckExact(value))
return CBOREncoder_encode_set(self, value);
// fall-thru
Expand Down
1 change: 1 addition & 0 deletions source/encoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ typedef struct {
PyObject *shared_handler;
uint8_t enc_style; // 0=regular, 1=canonical, 2=custom
bool timestamp_format;
bool date_as_datetime;
bool value_sharing;
bool string_referencing;
bool string_namespacing;
Expand Down
Loading

0 comments on commit 3e93caa

Please sign in to comment.