[IMP] orm: add optional parallelism to iter_browse.create()

cawo-odoo · cawo-odoo · commit be0b3033d26e · 2025-10-01T12:35:03.000Z
Like the same support added to `__attr__` in the parent commit, this can only
be used by callers when it is known that database modifications will be
distinct, not causing concurrency issues or side-effects on the results.

`create` returns an `iter_browse` object for the caller to browse created
records. To support vast amounts of created records in multiprocessing
strategy, we process values in a generator and initialize the returned
`iter_browse` object with it. As this requires the caller of `create` to always
consume/iterate the result (otherwise records will not be created), it is not
applied to the other strategies as it would break existing API.
diff --git a/src/util/orm.py b/src/util/orm.py
@@ -367,7 +367,12 @@ def _mp_iter_browse_cb(ids_or_values, params):
         getattr(
             me.env[params["model_name"]].with_context(params["context"]).browse(ids_or_values), params["attr_name"]
         )(*params["args"], **params["kwargs"])
+    if params["mode"] == "create":
+        new_ids = me.env[params["model_name"]].with_context(params["context"]).create(ids_or_values).ids
     me.env.cr.commit()
+    if params["mode"] == "create":
+        return new_ids
+    return None
 
 
 class iter_browse(object):
@@ -437,14 +442,14 @@ def __init__(self, model, *args, **kw):
         self._ids = args[-1]
         self._size = kw.pop("size", None)
         self._chunk_size = kw.pop("chunk_size", 200)  # keyword-only argument
+        self._task_size = self._chunk_size
         self._logger = kw.pop("logger", _logger)
         self._strategy = kw.pop("strategy", "flush")
         assert self._strategy in {"flush", "commit", "multiprocessing"}
         if self._strategy == "multiprocessing":
             if not ProcessPoolExecutor:
                 raise ValueError("multiprocessing strategy can not be used in scripts run by python2")
             if UPG_PARALLEL_ITER_BROWSE:
-                self._task_size = self._chunk_size
                 self._chunk_size = min(get_max_workers() * 10 * self._task_size, 1000000)
             else:
                 self._strategy = "commit"  # downgrade
@@ -593,30 +598,58 @@ def create(self, values, **kw):
         if self._size:
             raise ValueError("`create` can only called on empty `browse_record` objects.")
 
-        ids = []
+        if self._strategy == "multiprocessing" and not multi:
+            raise ValueError("The multiprocessing strategy only supports the multi version of `create`")
+
         size = len(values)
         it = chunks(values, self._chunk_size, fmt=list)
         if self._logger:
             sz = (size + self._chunk_size - 1) // self._chunk_size
             qualifier = "env[%r].create([:%d])" % (self._model._name, self._chunk_size)
             it = log_progress(it, self._logger, qualifier=qualifier, size=sz)
 
-        self._patch = no_selection_cache_validation()
-        for sub_values in it:
+        def mp_create():
+            params = {
+                "dbname": self._model.env.cr.dbname,
+                "model_name": self._model._name,
+                # convert to dict for pickle. Will still break if any value in the context is not pickleable
+                "context": dict(self._model.env.context),
+                "mode": "create",
+            }
+            self._model.env.cr.commit()
             self._patch.start()
+            extrakwargs = {"mp_context": multiprocessing.get_context("fork")} if sys.version_info >= (3, 7) else {}
+            with ProcessPoolExecutor(max_workers=get_max_workers(), **extrakwargs) as executor:
+                for sub_values in it:
+                    for task_result in executor.map(
+                        _mp_iter_browse_cb, chunks(sub_values, self._task_size, fmt=tuple), repeat(params)
+                    ):
+                        self._model.env.cr.commit()  # make task_result visible on main cursor before yielding ids
+                        for new_id in task_result:
+                            yield new_id
+            next(self._end(), None)
 
-            if multi:
-                ids += self._model.create(sub_values).ids
-            elif not self._cr_uid:
-                ids += [self._model.create(sub_value).id for sub_value in sub_values]
-            else:
-                # old API, `create` directly return the id
-                ids += [self._model.create(*(self._cr_uid + (sub_value,))) for sub_value in sub_values]
+        self._patch = no_selection_cache_validation()
+        if self._strategy == "multiprocessing":
+            ids = mp_create()
+        else:
+            ids = []
+            for sub_values in it:
+                self._patch.start()
+
+                if multi:
+                    ids += self._model.create(sub_values).ids
+                elif not self._cr_uid:
+                    ids += [self._model.create(sub_value).id for sub_value in sub_values]
+                else:
+                    # old API, `create` directly return the id
+                    ids += [self._model.create(*(self._cr_uid + (sub_value,))) for sub_value in sub_values]
+
+                next(self._end(), None)
 
-            next(self._end(), None)
         args = self._cr_uid + (ids,)
         return iter_browse(
-            self._model, *args, chunk_size=self._chunk_size, logger=self._logger, strategy=self._strategy
+            self._model, *args, size=size, chunk_size=self._task_size, logger=self._logger, strategy=self._strategy
         )