numpy.int16 and numpy.uint16 support

ijl · Aug 27, 2022 · 16b1323 · 16b1323
1 parent 263e912
commit 16b1323
Show file tree

Hide file tree

Showing 5 changed files with 178 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -783,8 +783,8 @@ JSONEncodeError: Integer exceeds 53-bit range
 ### numpy
 
 orjson natively serializes `numpy.ndarray` and individual `numpy.float64`,
-`numpy.float32`, `numpy.int64`, `numpy.int32`, `numpy.int8`, `numpy.uint64`,
-`numpy.uint32`, `numpy.uint8`, `numpy.uintp`, or `numpy.intp`, and
+`numpy.float32`, `numpy.int64`, `numpy.int32`, `numpy.int16`, `numpy.int8`, `numpy.uint64`,
+`numpy.uint32`, `numpy.uint16`, `numpy.uint8`, `numpy.uintp`, or `numpy.intp`, and
 `numpy.datetime64` instances.
 
 orjson is faster than all compared libraries at serializing

diff --git a/script/pynumpy b/script/pynumpy
@@ -32,10 +32,18 @@ elif kind == "bool":
     array = numpy.random.choice((True, False), size=(100000, 200))
 elif kind == "int8":
     array = numpy.random.randint(((2**7) - 1), size=(100000, 100), dtype=numpy.int8)
+elif kind == "int16":
+    array = numpy.random.randint(((2**15) - 1), size=(100000, 100), dtype=numpy.int16)
+elif kind == "int32":
+    array = numpy.random.randint(((2**31) - 1), size=(100000, 100), dtype=numpy.int32)
 elif kind == "uint8":
     array = numpy.random.randint(((2**8) - 1), size=(100000, 100), dtype=numpy.uint8)
+elif kind == "uint16":
+    array = numpy.random.randint(
+        ((2**16) - 1), size=(100000, 100), dtype=numpy.uint16
+    )
 else:
-    print("usage: pynumpy (bool|int32|float64|int8|uint8)")
+    print("usage: pynumpy (bool|int16|int32|float64|int8|uint8|uint16)")
     sys.exit(1)
 proc = psutil.Process()
 

diff --git a/src/serialize/numpy.rs b/src/serialize/numpy.rs
@@ -84,11 +84,13 @@ pub fn is_numpy_scalar(ob_type: *mut PyTypeObject) -> bool {
         ob_type == scalar_types.float64
             || ob_type == scalar_types.float32
             || ob_type == scalar_types.int64
+            || ob_type == scalar_types.int16
             || ob_type == scalar_types.int32
             || ob_type == scalar_types.int8
             || ob_type == scalar_types.uint64
             || ob_type == scalar_types.uint32
             || ob_type == scalar_types.uint8
+            || ob_type == scalar_types.uint16
             || ob_type == scalar_types.bool_
             || ob_type == scalar_types.datetime64
     }
@@ -134,9 +136,11 @@ pub enum ItemType {
     F32,
     F64,
     I8,
+    I16,
     I32,
     I64,
     U8,
+    U16,
     U32,
     U64,
 }
@@ -152,9 +156,11 @@ impl ItemType {
             (102, 4) => Some(ItemType::F32),
             (102, 8) => Some(ItemType::F64),
             (105, 1) => Some(ItemType::I8),
+            (105, 2) => Some(ItemType::I16),
             (105, 4) => Some(ItemType::I32),
             (105, 8) => Some(ItemType::I64),
             (117, 1) => Some(ItemType::U8),
+            (117, 2) => Some(ItemType::U16),
             (117, 4) => Some(ItemType::U32),
             (117, 8) => Some(ItemType::U64),
             _ => None,
@@ -323,6 +329,10 @@ impl Serialize for NumpyArray {
                     NumpyU32Array::new(slice!(self.data() as *const u32, self.num_items()))
                         .serialize(serializer)
                 }
+                ItemType::U16 => {
+                    NumpyU16Array::new(slice!(self.data() as *const u16, self.num_items()))
+                        .serialize(serializer)
+                }
                 ItemType::U8 => {
                     NumpyU8Array::new(slice!(self.data() as *const u8, self.num_items()))
                         .serialize(serializer)
@@ -335,6 +345,10 @@ impl Serialize for NumpyArray {
                     NumpyI32Array::new(slice!(self.data() as *const i32, self.num_items()))
                         .serialize(serializer)
                 }
+                ItemType::I16 => {
+                    NumpyI16Array::new(slice!(self.data() as *const i16, self.num_items()))
+                        .serialize(serializer)
+                }
                 ItemType::I8 => {
                     NumpyI8Array::new(slice!(self.data() as *const i8, self.num_items()))
                         .serialize(serializer)
@@ -506,6 +520,44 @@ impl Serialize for DataTypeU32 {
     }
 }
 
+#[repr(transparent)]
+struct NumpyU16Array<'a> {
+    data: &'a [u16],
+}
+
+impl<'a> NumpyU16Array<'a> {
+    fn new(data: &'a [u16]) -> Self {
+        Self { data }
+    }
+}
+
+impl<'a> Serialize for NumpyU16Array<'a> {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        let mut seq = serializer.serialize_seq(None).unwrap();
+        for &each in self.data.iter() {
+            seq.serialize_element(&DataTypeU16 { obj: each }).unwrap();
+        }
+        seq.end()
+    }
+}
+
+#[repr(transparent)]
+pub struct DataTypeU16 {
+    obj: u16
+}
+
+impl Serialize for DataTypeU16 {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        serializer.serialize_u16(self.obj)
+    }
+}
+
 #[repr(transparent)]
 struct NumpyI64Array<'a> {
     data: &'a [i64],
@@ -582,6 +634,45 @@ impl Serialize for DataTypeI32 {
     }
 }
 
+#[repr(transparent)]
+struct NumpyI16Array<'a> {
+    data: &'a [i16],
+}
+
+impl<'a> NumpyI16Array<'a> {
+    fn new(data: &'a [i16]) -> Self {
+        Self { data }
+    }
+}
+
+impl<'a> Serialize for NumpyI16Array<'a> {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        let mut seq = serializer.serialize_seq(None).unwrap();
+        for &each in self.data.iter() {
+            seq.serialize_element(&DataTypeI16 { obj: each }).unwrap();
+        }
+        seq.end()
+    }
+}
+
+#[repr(transparent)]
+pub struct DataTypeI16 {
+    obj: i16,
+}
+
+impl Serialize for DataTypeI16 {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        serializer.serialize_i16(self.obj)
+    }
+}
+
+
 #[repr(transparent)]
 struct NumpyI8Array<'a> {
     data: &'a [i8],
@@ -725,12 +816,16 @@ impl Serialize for NumpyScalar {
                 (*(self.ptr as *mut NumpyInt64)).serialize(serializer)
             } else if ob_type == scalar_types.int32 {
                 (*(self.ptr as *mut NumpyInt32)).serialize(serializer)
+            } else if ob_type == scalar_types.int16 {
+                (*(self.ptr as *mut NumpyInt16)).serialize(serializer)
             } else if ob_type == scalar_types.int8 {
                 (*(self.ptr as *mut NumpyInt8)).serialize(serializer)
             } else if ob_type == scalar_types.uint64 {
                 (*(self.ptr as *mut NumpyUint64)).serialize(serializer)
             } else if ob_type == scalar_types.uint32 {
                 (*(self.ptr as *mut NumpyUint32)).serialize(serializer)
+            } else if ob_type == scalar_types.uint16 {
+                (*(self.ptr as *mut NumpyUint16)).serialize(serializer)
             } else if ob_type == scalar_types.uint8 {
                 (*(self.ptr as *mut NumpyUint8)).serialize(serializer)
             } else if ob_type == scalar_types.bool_ {
@@ -765,6 +860,22 @@ impl Serialize for NumpyInt8 {
     }
 }
 
+#[repr(C)]
+pub struct NumpyInt16 {
+    pub ob_refcnt: Py_ssize_t,
+    pub ob_type: *mut PyTypeObject,
+    pub value: i16,
+}
+
+impl<'p> Serialize for NumpyInt16 {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        serializer.serialize_i16(self.value)
+    }
+}
+
 #[repr(C)]
 pub struct NumpyInt32 {
     ob_refcnt: Py_ssize_t,
@@ -813,6 +924,22 @@ impl Serialize for NumpyUint8 {
     }
 }
 
+#[repr(C)]
+pub struct NumpyUint16 {
+    pub ob_refcnt: Py_ssize_t,
+    pub ob_type: *mut PyTypeObject,
+    pub value: u16,
+}
+
+impl<'p> Serialize for NumpyUint16 {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        serializer.serialize_u16(self.value)
+    }
+}
+
 #[repr(C)]
 pub struct NumpyUint32 {
     ob_refcnt: Py_ssize_t,

diff --git a/src/typeref.rs b/src/typeref.rs
@@ -13,9 +13,11 @@ pub struct NumpyTypes {
     pub float32: *mut PyTypeObject,
     pub int64: *mut PyTypeObject,
     pub int32: *mut PyTypeObject,
+    pub int16: *mut PyTypeObject,
     pub int8: *mut PyTypeObject,
     pub uint64: *mut PyTypeObject,
     pub uint32: *mut PyTypeObject,
+    pub uint16: *mut PyTypeObject,
     pub uint8: *mut PyTypeObject,
     pub bool_: *mut PyTypeObject,
     pub datetime64: *mut PyTypeObject,
@@ -216,8 +218,10 @@ unsafe fn load_numpy_types() -> Option<NumpyTypes> {
         float32: look_up_numpy_type(numpy, "float32\0"),
         float64: look_up_numpy_type(numpy, "float64\0"),
         int8: look_up_numpy_type(numpy, "int8\0"),
+        int16: look_up_numpy_type(numpy, "int16\0"),
         int32: look_up_numpy_type(numpy, "int32\0"),
         int64: look_up_numpy_type(numpy, "int64\0"),
+        uint16: look_up_numpy_type(numpy, "uint16\0"),
         uint32: look_up_numpy_type(numpy, "uint32\0"),
         uint64: look_up_numpy_type(numpy, "uint64\0"),
         uint8: look_up_numpy_type(numpy, "uint8\0"),

diff --git a/test/test_numpy.py b/test/test_numpy.py
@@ -80,6 +80,24 @@ def test_numpy_array_d1_i32(self):
             == b"[-2147483647,2147483647]"
         )
 
+    def test_numpy_array_d1_i16(self):
+        assert (
+            orjson.dumps(
+                numpy.array([-32768, 32767], numpy.int16),
+                option=orjson.OPT_SERIALIZE_NUMPY,
+            )
+            == b"[-32768,32767]"
+        )
+
+    def test_numpy_array_d1_u16(self):
+        assert (
+            orjson.dumps(
+                numpy.array([0, 65535], numpy.uint16),
+                option=orjson.OPT_SERIALIZE_NUMPY,
+            )
+            == b"[0,65535]"
+        )
+
     def test_numpy_array_d1_u32(self):
         assert (
             orjson.dumps(
@@ -522,6 +540,17 @@ def test_numpy_scalar_int8(self):
             == b"-128"
         )
 
+    def test_numpy_scalar_int16(self):
+        assert orjson.dumps(numpy.int16(0), option=orjson.OPT_SERIALIZE_NUMPY) == b"0"
+        assert (
+            orjson.dumps(numpy.int16(32767), option=orjson.OPT_SERIALIZE_NUMPY)
+            == b"32767"
+        )
+        assert (
+            orjson.dumps(numpy.int16(-32768), option=orjson.OPT_SERIALIZE_NUMPY)
+            == b"-32768"
+        )
+
     def test_numpy_scalar_int32(self):
         assert orjson.dumps(numpy.int32(1), option=orjson.OPT_SERIALIZE_NUMPY) == b"1"
         assert (
@@ -553,6 +582,13 @@ def test_numpy_scalar_uint8(self):
             orjson.dumps(numpy.uint8(255), option=orjson.OPT_SERIALIZE_NUMPY) == b"255"
         )
 
+    def test_numpy_scalar_uint16(self):
+        assert orjson.dumps(numpy.uint16(0), option=orjson.OPT_SERIALIZE_NUMPY) == b"0"
+        assert (
+            orjson.dumps(numpy.uint16(65535), option=orjson.OPT_SERIALIZE_NUMPY)
+            == b"65535"
+        )
+
     def test_numpy_scalar_uint32(self):
         assert orjson.dumps(numpy.uint32(0), option=orjson.OPT_SERIALIZE_NUMPY) == b"0"
         assert (