Skip to content

Commit

Permalink
PYTHON-2926 Updated signature of Binary.from_vector to take a BinaryV…
Browse files Browse the repository at this point in the history
…ector (mongodb#1963)
  • Loading branch information
caseyclements authored Oct 23, 2024
1 parent 5f7afea commit 26a61c8
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 8 deletions.
25 changes: 18 additions & 7 deletions bson/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import struct
from dataclasses import dataclass
from enum import Enum
from typing import TYPE_CHECKING, Any, Sequence, Tuple, Type, Union
from typing import TYPE_CHECKING, Any, Optional, Sequence, Tuple, Type, Union
from uuid import UUID

"""Tools for representing BSON binary data.
Expand Down Expand Up @@ -400,24 +400,35 @@ def as_uuid(self, uuid_representation: int = UuidRepresentation.STANDARD) -> UUI
@classmethod
def from_vector(
cls: Type[Binary],
vector: list[int, float],
dtype: BinaryVectorDtype,
padding: int = 0,
vector: Union[BinaryVector, list[int, float]],
dtype: Optional[BinaryVectorDtype] = None,
padding: Optional[int] = None,
) -> Binary:
"""**(BETA)** Create a BSON :class:`~bson.binary.Binary` of Vector subtype from a list of Numbers.
"""**(BETA)** Create a BSON :class:`~bson.binary.Binary` of Vector subtype.
To interpret the representation of the numbers, a data type must be included.
See :class:`~bson.binary.BinaryVectorDtype` for available types and descriptions.
The dtype and padding are prepended to the binary data's value.
:param vector: List of values
:param vector: Either a List of values, or a :class:`~bson.binary.BinaryVector` dataclass.
:param dtype: Data type of the values
:param padding: For fractional bytes, number of bits to ignore at end of vector.
:return: Binary packed data identified by dtype and padding.
.. versionadded:: 4.10
"""
if isinstance(vector, BinaryVector):
if dtype or padding:
raise ValueError(
"The first argument, vector, has type BinaryVector. "
"dtype or padding cannot be separately defined, but were."
)
dtype = vector.dtype
padding = vector.padding
vector = vector.data # type: ignore

padding = 0 if padding is None else padding
if dtype == BinaryVectorDtype.INT8: # pack ints in [-128, 127] as signed int8
format_str = "b"
if padding:
Expand All @@ -432,7 +443,7 @@ def from_vector(
raise NotImplementedError("%s not yet supported" % dtype)

metadata = struct.pack("<sB", dtype.value, padding)
data = struct.pack(f"<{len(vector)}{format_str}", *vector)
data = struct.pack(f"<{len(vector)}{format_str}", *vector) # type: ignore
return cls(metadata + data, subtype=VECTOR_SUBTYPE)

def as_vector(self) -> BinaryVector:
Expand Down
20 changes: 19 additions & 1 deletion test/test_bson.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,13 @@
is_valid,
json_util,
)
from bson.binary import USER_DEFINED_SUBTYPE, Binary, BinaryVectorDtype, UuidRepresentation
from bson.binary import (
USER_DEFINED_SUBTYPE,
Binary,
BinaryVector,
BinaryVectorDtype,
UuidRepresentation,
)
from bson.code import Code
from bson.codec_options import CodecOptions, DatetimeConversion
from bson.datetime_ms import _DATETIME_ERROR_SUGGESTION
Expand Down Expand Up @@ -785,6 +791,18 @@ def test_vector(self):
else:
self.fail("Failed to raise an exception.")

# Test form of Binary.from_vector(BinaryVector)

assert padded_vec == Binary.from_vector(
BinaryVector(list_vector, BinaryVectorDtype.PACKED_BIT, padding)
)
assert binary_vector == Binary.from_vector(
BinaryVector(list_vector, BinaryVectorDtype.INT8)
)
assert float_binary == Binary.from_vector(
BinaryVector(list_vector, BinaryVectorDtype.FLOAT32)
)

def test_unicode_regex(self):
"""Tests we do not get a segfault for C extension on unicode RegExs.
This had been happening.
Expand Down

0 comments on commit 26a61c8

Please sign in to comment.