Skip to content

Commit

Permalink
numpy.int16 and numpy.uint16 support
Browse files Browse the repository at this point in the history
  • Loading branch information
NazarKostetskiy authored and ijl committed Aug 27, 2022
1 parent 263e912 commit 16b1323
Show file tree
Hide file tree
Showing 5 changed files with 178 additions and 3 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -783,8 +783,8 @@ JSONEncodeError: Integer exceeds 53-bit range
### numpy

orjson natively serializes `numpy.ndarray` and individual `numpy.float64`,
`numpy.float32`, `numpy.int64`, `numpy.int32`, `numpy.int8`, `numpy.uint64`,
`numpy.uint32`, `numpy.uint8`, `numpy.uintp`, or `numpy.intp`, and
`numpy.float32`, `numpy.int64`, `numpy.int32`, `numpy.int16`, `numpy.int8`, `numpy.uint64`,
`numpy.uint32`, `numpy.uint16`, `numpy.uint8`, `numpy.uintp`, or `numpy.intp`, and
`numpy.datetime64` instances.

orjson is faster than all compared libraries at serializing
Expand Down
10 changes: 9 additions & 1 deletion script/pynumpy
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,18 @@ elif kind == "bool":
array = numpy.random.choice((True, False), size=(100000, 200))
elif kind == "int8":
array = numpy.random.randint(((2**7) - 1), size=(100000, 100), dtype=numpy.int8)
elif kind == "int16":
array = numpy.random.randint(((2**15) - 1), size=(100000, 100), dtype=numpy.int16)
elif kind == "int32":
array = numpy.random.randint(((2**31) - 1), size=(100000, 100), dtype=numpy.int32)
elif kind == "uint8":
array = numpy.random.randint(((2**8) - 1), size=(100000, 100), dtype=numpy.uint8)
elif kind == "uint16":
array = numpy.random.randint(
((2**16) - 1), size=(100000, 100), dtype=numpy.uint16
)
else:
print("usage: pynumpy (bool|int32|float64|int8|uint8)")
print("usage: pynumpy (bool|int16|int32|float64|int8|uint8|uint16)")
sys.exit(1)
proc = psutil.Process()

Expand Down
127 changes: 127 additions & 0 deletions src/serialize/numpy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,13 @@ pub fn is_numpy_scalar(ob_type: *mut PyTypeObject) -> bool {
ob_type == scalar_types.float64
|| ob_type == scalar_types.float32
|| ob_type == scalar_types.int64
|| ob_type == scalar_types.int16
|| ob_type == scalar_types.int32
|| ob_type == scalar_types.int8
|| ob_type == scalar_types.uint64
|| ob_type == scalar_types.uint32
|| ob_type == scalar_types.uint8
|| ob_type == scalar_types.uint16
|| ob_type == scalar_types.bool_
|| ob_type == scalar_types.datetime64
}
Expand Down Expand Up @@ -134,9 +136,11 @@ pub enum ItemType {
F32,
F64,
I8,
I16,
I32,
I64,
U8,
U16,
U32,
U64,
}
Expand All @@ -152,9 +156,11 @@ impl ItemType {
(102, 4) => Some(ItemType::F32),
(102, 8) => Some(ItemType::F64),
(105, 1) => Some(ItemType::I8),
(105, 2) => Some(ItemType::I16),
(105, 4) => Some(ItemType::I32),
(105, 8) => Some(ItemType::I64),
(117, 1) => Some(ItemType::U8),
(117, 2) => Some(ItemType::U16),
(117, 4) => Some(ItemType::U32),
(117, 8) => Some(ItemType::U64),
_ => None,
Expand Down Expand Up @@ -323,6 +329,10 @@ impl Serialize for NumpyArray {
NumpyU32Array::new(slice!(self.data() as *const u32, self.num_items()))
.serialize(serializer)
}
ItemType::U16 => {
NumpyU16Array::new(slice!(self.data() as *const u16, self.num_items()))
.serialize(serializer)
}
ItemType::U8 => {
NumpyU8Array::new(slice!(self.data() as *const u8, self.num_items()))
.serialize(serializer)
Expand All @@ -335,6 +345,10 @@ impl Serialize for NumpyArray {
NumpyI32Array::new(slice!(self.data() as *const i32, self.num_items()))
.serialize(serializer)
}
ItemType::I16 => {
NumpyI16Array::new(slice!(self.data() as *const i16, self.num_items()))
.serialize(serializer)
}
ItemType::I8 => {
NumpyI8Array::new(slice!(self.data() as *const i8, self.num_items()))
.serialize(serializer)
Expand Down Expand Up @@ -506,6 +520,44 @@ impl Serialize for DataTypeU32 {
}
}

#[repr(transparent)]
struct NumpyU16Array<'a> {
data: &'a [u16],
}

impl<'a> NumpyU16Array<'a> {
fn new(data: &'a [u16]) -> Self {
Self { data }
}
}

impl<'a> Serialize for NumpyU16Array<'a> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut seq = serializer.serialize_seq(None).unwrap();
for &each in self.data.iter() {
seq.serialize_element(&DataTypeU16 { obj: each }).unwrap();
}
seq.end()
}
}

#[repr(transparent)]
pub struct DataTypeU16 {
obj: u16
}

impl Serialize for DataTypeU16 {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_u16(self.obj)
}
}

#[repr(transparent)]
struct NumpyI64Array<'a> {
data: &'a [i64],
Expand Down Expand Up @@ -582,6 +634,45 @@ impl Serialize for DataTypeI32 {
}
}

#[repr(transparent)]
struct NumpyI16Array<'a> {
data: &'a [i16],
}

impl<'a> NumpyI16Array<'a> {
fn new(data: &'a [i16]) -> Self {
Self { data }
}
}

impl<'a> Serialize for NumpyI16Array<'a> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut seq = serializer.serialize_seq(None).unwrap();
for &each in self.data.iter() {
seq.serialize_element(&DataTypeI16 { obj: each }).unwrap();
}
seq.end()
}
}

#[repr(transparent)]
pub struct DataTypeI16 {
obj: i16,
}

impl Serialize for DataTypeI16 {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_i16(self.obj)
}
}


#[repr(transparent)]
struct NumpyI8Array<'a> {
data: &'a [i8],
Expand Down Expand Up @@ -725,12 +816,16 @@ impl Serialize for NumpyScalar {
(*(self.ptr as *mut NumpyInt64)).serialize(serializer)
} else if ob_type == scalar_types.int32 {
(*(self.ptr as *mut NumpyInt32)).serialize(serializer)
} else if ob_type == scalar_types.int16 {
(*(self.ptr as *mut NumpyInt16)).serialize(serializer)
} else if ob_type == scalar_types.int8 {
(*(self.ptr as *mut NumpyInt8)).serialize(serializer)
} else if ob_type == scalar_types.uint64 {
(*(self.ptr as *mut NumpyUint64)).serialize(serializer)
} else if ob_type == scalar_types.uint32 {
(*(self.ptr as *mut NumpyUint32)).serialize(serializer)
} else if ob_type == scalar_types.uint16 {
(*(self.ptr as *mut NumpyUint16)).serialize(serializer)
} else if ob_type == scalar_types.uint8 {
(*(self.ptr as *mut NumpyUint8)).serialize(serializer)
} else if ob_type == scalar_types.bool_ {
Expand Down Expand Up @@ -765,6 +860,22 @@ impl Serialize for NumpyInt8 {
}
}

#[repr(C)]
pub struct NumpyInt16 {
pub ob_refcnt: Py_ssize_t,
pub ob_type: *mut PyTypeObject,
pub value: i16,
}

impl<'p> Serialize for NumpyInt16 {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_i16(self.value)
}
}

#[repr(C)]
pub struct NumpyInt32 {
ob_refcnt: Py_ssize_t,
Expand Down Expand Up @@ -813,6 +924,22 @@ impl Serialize for NumpyUint8 {
}
}

#[repr(C)]
pub struct NumpyUint16 {
pub ob_refcnt: Py_ssize_t,
pub ob_type: *mut PyTypeObject,
pub value: u16,
}

impl<'p> Serialize for NumpyUint16 {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_u16(self.value)
}
}

#[repr(C)]
pub struct NumpyUint32 {
ob_refcnt: Py_ssize_t,
Expand Down
4 changes: 4 additions & 0 deletions src/typeref.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@ pub struct NumpyTypes {
pub float32: *mut PyTypeObject,
pub int64: *mut PyTypeObject,
pub int32: *mut PyTypeObject,
pub int16: *mut PyTypeObject,
pub int8: *mut PyTypeObject,
pub uint64: *mut PyTypeObject,
pub uint32: *mut PyTypeObject,
pub uint16: *mut PyTypeObject,
pub uint8: *mut PyTypeObject,
pub bool_: *mut PyTypeObject,
pub datetime64: *mut PyTypeObject,
Expand Down Expand Up @@ -216,8 +218,10 @@ unsafe fn load_numpy_types() -> Option<NumpyTypes> {
float32: look_up_numpy_type(numpy, "float32\0"),
float64: look_up_numpy_type(numpy, "float64\0"),
int8: look_up_numpy_type(numpy, "int8\0"),
int16: look_up_numpy_type(numpy, "int16\0"),
int32: look_up_numpy_type(numpy, "int32\0"),
int64: look_up_numpy_type(numpy, "int64\0"),
uint16: look_up_numpy_type(numpy, "uint16\0"),
uint32: look_up_numpy_type(numpy, "uint32\0"),
uint64: look_up_numpy_type(numpy, "uint64\0"),
uint8: look_up_numpy_type(numpy, "uint8\0"),
Expand Down
36 changes: 36 additions & 0 deletions test/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,24 @@ def test_numpy_array_d1_i32(self):
== b"[-2147483647,2147483647]"
)

def test_numpy_array_d1_i16(self):
assert (
orjson.dumps(
numpy.array([-32768, 32767], numpy.int16),
option=orjson.OPT_SERIALIZE_NUMPY,
)
== b"[-32768,32767]"
)

def test_numpy_array_d1_u16(self):
assert (
orjson.dumps(
numpy.array([0, 65535], numpy.uint16),
option=orjson.OPT_SERIALIZE_NUMPY,
)
== b"[0,65535]"
)

def test_numpy_array_d1_u32(self):
assert (
orjson.dumps(
Expand Down Expand Up @@ -522,6 +540,17 @@ def test_numpy_scalar_int8(self):
== b"-128"
)

def test_numpy_scalar_int16(self):
assert orjson.dumps(numpy.int16(0), option=orjson.OPT_SERIALIZE_NUMPY) == b"0"
assert (
orjson.dumps(numpy.int16(32767), option=orjson.OPT_SERIALIZE_NUMPY)
== b"32767"
)
assert (
orjson.dumps(numpy.int16(-32768), option=orjson.OPT_SERIALIZE_NUMPY)
== b"-32768"
)

def test_numpy_scalar_int32(self):
assert orjson.dumps(numpy.int32(1), option=orjson.OPT_SERIALIZE_NUMPY) == b"1"
assert (
Expand Down Expand Up @@ -553,6 +582,13 @@ def test_numpy_scalar_uint8(self):
orjson.dumps(numpy.uint8(255), option=orjson.OPT_SERIALIZE_NUMPY) == b"255"
)

def test_numpy_scalar_uint16(self):
assert orjson.dumps(numpy.uint16(0), option=orjson.OPT_SERIALIZE_NUMPY) == b"0"
assert (
orjson.dumps(numpy.uint16(65535), option=orjson.OPT_SERIALIZE_NUMPY)
== b"65535"
)

def test_numpy_scalar_uint32(self):
assert orjson.dumps(numpy.uint32(0), option=orjson.OPT_SERIALIZE_NUMPY) == b"0"
assert (
Expand Down

0 comments on commit 16b1323

Please sign in to comment.