Skip to content

Commit ff70b00

Browse files
NoahStappsleepyStick
authored andcommitted
PYTHON-4915 - Add guidance on adding _id fields to documents to CRUD spec, reorder client.bulk_write generated _id fields (#1976)
1 parent 84db915 commit ff70b00

File tree

4 files changed

+134
-1
lines changed

4 files changed

+134
-1
lines changed

pymongo/message.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import datetime
2525
import random
2626
import struct
27+
from collections import ChainMap
2728
from io import BytesIO as _BytesIO
2829
from typing import (
2930
TYPE_CHECKING,
@@ -1115,8 +1116,18 @@ def _check_doc_size_limits(
11151116
# key and the index of its namespace within ns_info as its value.
11161117
op_doc[op_type] = ns_info[namespace] # type: ignore[index]
11171118

1119+
# Since the data document itself is nested within the insert document
1120+
# it won't be automatically re-ordered by the BSON conversion.
1121+
# We use ChainMap here to make the _id field the first field instead.
1122+
doc_to_encode = op_doc
1123+
if real_op_type == "insert":
1124+
doc = op_doc["document"]
1125+
if not isinstance(doc, RawBSONDocument):
1126+
doc_to_encode = op_doc.copy() # type: ignore[attr-defined] # Shallow copy
1127+
doc_to_encode["document"] = ChainMap(doc, {"_id": doc["_id"]}) # type: ignore[index]
1128+
11181129
# Encode current operation doc and, if newly added, namespace doc.
1119-
op_doc_encoded = _dict_to_bson(op_doc, False, opts)
1130+
op_doc_encoded = _dict_to_bson(doc_to_encode, False, opts)
11201131
op_length = len(op_doc_encoded)
11211132
if ns_doc:
11221133
ns_doc_encoded = _dict_to_bson(ns_doc, False, opts)

test/asynchronous/test_client_bulk_write.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@
1818
import os
1919
import sys
2020

21+
from bson import encode
22+
from bson.raw_bson import RawBSONDocument
23+
2124
sys.path[0:0] = [""]
2225

2326
from test.asynchronous import (
@@ -82,6 +85,17 @@ async def test_formats_write_error_correctly(self):
8285
self.assertEqual(write_error["idx"], 1)
8386
self.assertEqual(write_error["op"], {"insert": 0, "document": {"_id": 1}})
8487

88+
@async_client_context.require_version_min(8, 0, 0, -24)
89+
@async_client_context.require_no_serverless
90+
async def test_raw_bson_not_inflated(self):
91+
doc = RawBSONDocument(encode({"a": "b" * 100}))
92+
models = [
93+
InsertOne(namespace="db.coll", document=doc),
94+
]
95+
await self.client.bulk_write(models=models)
96+
97+
self.assertIsNone(doc._RawBSONDocument__inflated_doc)
98+
8599

86100
# https://github.com/mongodb/specifications/tree/master/source/crud/tests
87101
# Note: tests 1 and 2 are in test_read_write_concern_spec.py

test/mockupdb/test_id_ordering.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# Copyright 2024-present MongoDB, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
from test import PyMongoTestCase
18+
19+
import pytest
20+
21+
from pymongo import InsertOne
22+
23+
try:
24+
from mockupdb import MockupDB, OpMsg, go, going
25+
26+
_HAVE_MOCKUPDB = True
27+
except ImportError:
28+
_HAVE_MOCKUPDB = False
29+
30+
31+
from bson.objectid import ObjectId
32+
33+
pytestmark = pytest.mark.mockupdb
34+
35+
36+
# https://github.com/mongodb/specifications/blob/master/source/crud/tests/README.md#16-generated-document-identifiers-are-the-first-field-in-their-document
37+
class TestIdOrdering(PyMongoTestCase):
38+
def test_16_generated_document_ids_are_first_field(self):
39+
server = MockupDB()
40+
server.autoresponds(
41+
"hello",
42+
isWritablePrimary=True,
43+
msg="isdbgrid",
44+
minWireVersion=0,
45+
maxWireVersion=25,
46+
helloOk=True,
47+
serviceId=ObjectId(),
48+
)
49+
server.run()
50+
self.addCleanup(server.stop)
51+
52+
# We also verify that the original document contains an _id field after each insert
53+
document = {"x": 1}
54+
55+
client = self.simple_client(server.uri, loadBalanced=True)
56+
collection = client.db.coll
57+
with going(collection.insert_one, document):
58+
request = server.receives()
59+
self.assertEqual("_id", next(iter(request["documents"][0])))
60+
request.reply({"ok": 1})
61+
self.assertIn("_id", document)
62+
63+
document = {"x1": 1}
64+
65+
with going(collection.bulk_write, [InsertOne(document)]):
66+
request = server.receives()
67+
self.assertEqual("_id", next(iter(request["documents"][0])))
68+
request.reply({"ok": 1})
69+
self.assertIn("_id", document)
70+
71+
document = {"x2": 1}
72+
with going(client.bulk_write, [InsertOne(namespace="db.coll", document=document)]):
73+
request = server.receives()
74+
self.assertEqual("_id", next(iter(request["ops"][0]["document"])))
75+
request.reply({"ok": 1})
76+
self.assertIn("_id", document)
77+
78+
# Re-ordering user-supplied _id fields is not required by the spec, but PyMongo does it for performance reasons
79+
with going(collection.insert_one, {"x": 1, "_id": 111}):
80+
request = server.receives()
81+
self.assertEqual("_id", next(iter(request["documents"][0])))
82+
request.reply({"ok": 1})
83+
84+
with going(collection.bulk_write, [InsertOne({"x1": 1, "_id": 1111})]):
85+
request = server.receives()
86+
self.assertEqual("_id", next(iter(request["documents"][0])))
87+
request.reply({"ok": 1})
88+
89+
with going(
90+
client.bulk_write, [InsertOne(namespace="db.coll", document={"x2": 1, "_id": 11111})]
91+
):
92+
request = server.receives()
93+
self.assertEqual("_id", next(iter(request["ops"][0]["document"])))
94+
request.reply({"ok": 1})

test/test_client_bulk_write.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@
1818
import os
1919
import sys
2020

21+
from bson import encode
22+
from bson.raw_bson import RawBSONDocument
23+
2124
sys.path[0:0] = [""]
2225

2326
from test import (
@@ -82,6 +85,17 @@ def test_formats_write_error_correctly(self):
8285
self.assertEqual(write_error["idx"], 1)
8386
self.assertEqual(write_error["op"], {"insert": 0, "document": {"_id": 1}})
8487

88+
@client_context.require_version_min(8, 0, 0, -24)
89+
@client_context.require_no_serverless
90+
def test_raw_bson_not_inflated(self):
91+
doc = RawBSONDocument(encode({"a": "b" * 100}))
92+
models = [
93+
InsertOne(namespace="db.coll", document=doc),
94+
]
95+
self.client.bulk_write(models=models)
96+
97+
self.assertIsNone(doc._RawBSONDocument__inflated_doc)
98+
8599

86100
# https://github.com/mongodb/specifications/tree/master/source/crud/tests
87101
# Note: tests 1 and 2 are in test_read_write_concern_spec.py

0 commit comments

Comments
 (0)