Skip to content

Commit 1e67c5c

Browse files
PYTHON-5289 Validate ignored bits are 0 on write for bson.BinaryVector (#2397)
1 parent ca3cbc3 commit 1e67c5c

File tree

3 files changed

+35
-10
lines changed

3 files changed

+35
-10
lines changed

bson/binary.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from __future__ import annotations
1515

1616
import struct
17+
import warnings
1718
from enum import Enum
1819
from typing import TYPE_CHECKING, Any, Optional, Sequence, Tuple, Type, Union, overload
1920
from uuid import UUID
@@ -255,6 +256,9 @@ def __eq__(self, other: Any) -> bool:
255256
self.dtype == other.dtype and self.padding == other.padding and self.data == other.data
256257
)
257258

259+
def __len__(self) -> int:
260+
return len(self.data)
261+
258262

259263
class Binary(bytes):
260264
"""Representation of BSON binary data.
@@ -439,6 +443,9 @@ def from_vector(
439443
:param padding: For fractional bytes, number of bits to ignore at end of vector.
440444
:return: Binary packed data identified by dtype and padding.
441445
446+
.. versionchanged:: 4.14
447+
When padding is non-zero, ignored bits should be zero. Raise exception on encoding, warn on decoding.
448+
442449
.. versionadded:: 4.10
443450
"""
444451
if isinstance(vector, BinaryVector):
@@ -471,6 +478,10 @@ def from_vector(
471478

472479
metadata = struct.pack("<sB", dtype.value, padding)
473480
data = struct.pack(f"<{len(vector)}{format_str}", *vector) # type: ignore
481+
if padding and len(vector) and not (data[-1] & ((1 << padding) - 1)) == 0:
482+
raise ValueError(
483+
"Vector has a padding P, but bits in the final byte lower than P are non-zero. They must be zero."
484+
)
474485
return cls(metadata + data, subtype=VECTOR_SUBTYPE)
475486

476487
def as_vector(self) -> BinaryVector:
@@ -522,6 +533,12 @@ def as_vector(self) -> BinaryVector:
522533
dtype_format = "B"
523534
format_string = f"<{n_values}{dtype_format}"
524535
unpacked_uint8s = list(struct.unpack_from(format_string, self, position))
536+
if padding and n_values and unpacked_uint8s[-1] & (1 << padding) - 1 != 0:
537+
warnings.warn(
538+
"Vector has a padding P, but bits in the final byte lower than P are non-zero. For pymongo>=5.0, they must be zero.",
539+
DeprecationWarning,
540+
stacklevel=2,
541+
)
525542
return BinaryVector(unpacked_uint8s, dtype, padding)
526543

527544
else:

doc/changelog.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,10 @@ PyMongo 4.13 brings a number of changes including:
5858
or the `migration guide <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/reference/migration/>`_ for more information.
5959
- Fixed a bug where :class:`pymongo.write_concern.WriteConcern` repr was not eval-able
6060
when using ``w="majority"``.
61+
- When padding is set, ignored bits in a BSON BinaryVector of PACKED_BIT dtype should be set to zero.
62+
When encoding, this is enforced and is a breaking change.
63+
It is not yet enforced when decoding, so reading from the database will not fail, however a warning will be triggered.
64+
From PyMongo 5.0, this rule will be enforced for both encoding and decoding.
6165

6266
Issues Resolved
6367
...............

test/test_bson.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -739,7 +739,7 @@ def test_vector(self):
739739
"""Tests of subtype 9"""
740740
# We start with valid cases, across the 3 dtypes implemented.
741741
# Work with a simple vector that can be interpreted as int8, float32, or ubyte
742-
list_vector = [127, 7]
742+
list_vector = [127, 8]
743743
# As INT8, vector has length 2
744744
binary_vector = Binary.from_vector(list_vector, BinaryVectorDtype.INT8)
745745
vector = binary_vector.as_vector()
@@ -764,18 +764,18 @@ def test_vector(self):
764764
uncompressed = ""
765765
for val in list_vector:
766766
uncompressed += format(val, "08b")
767-
assert uncompressed[:-padding] == "0111111100000"
767+
assert uncompressed[:-padding] == "0111111100001"
768768

769769
# It is worthwhile explicitly showing the values encoded to BSON
770770
padded_doc = {"padded_vec": padded_vec}
771771
assert (
772772
encode(padded_doc)
773-
== b"\x1a\x00\x00\x00\x05padded_vec\x00\x04\x00\x00\x00\t\x10\x03\x7f\x07\x00"
773+
== b"\x1a\x00\x00\x00\x05padded_vec\x00\x04\x00\x00\x00\t\x10\x03\x7f\x08\x00"
774774
)
775775
# and dumped to json
776776
assert (
777777
json_util.dumps(padded_doc)
778-
== '{"padded_vec": {"$binary": {"base64": "EAN/Bw==", "subType": "09"}}}'
778+
== '{"padded_vec": {"$binary": {"base64": "EAN/CA==", "subType": "09"}}}'
779779
)
780780

781781
# FLOAT32 is also implemented
@@ -784,15 +784,19 @@ def test_vector(self):
784784

785785
# Now some invalid cases
786786
for x in [-1, 257]:
787-
try:
787+
with self.assertRaises(struct.error):
788788
Binary.from_vector([x], BinaryVectorDtype.PACKED_BIT)
789-
except Exception as exc:
790-
self.assertIsInstance(exc, struct.error)
791-
else:
792-
self.fail("Failed to raise an exception.")
793789

794-
# Test form of Binary.from_vector(BinaryVector)
790+
# Test one must pass zeros for all ignored bits
791+
with self.assertRaises(ValueError):
792+
Binary.from_vector([255], BinaryVectorDtype.PACKED_BIT, padding=7)
795793

794+
with self.assertWarns(DeprecationWarning):
795+
meta = struct.pack("<sB", BinaryVectorDtype.PACKED_BIT.value, 7)
796+
data = struct.pack("1B", 255)
797+
Binary(meta + data, subtype=9).as_vector()
798+
799+
# Test form of Binary.from_vector(BinaryVector)
796800
assert padded_vec == Binary.from_vector(
797801
BinaryVector(list_vector, BinaryVectorDtype.PACKED_BIT, padding)
798802
)

0 commit comments

Comments
 (0)