Add stream vs. stack benchmarks to README

brandonwillard · brandonwillard · commit ea52306ae727 · 2021-01-09T16:34:37.000-06:00
diff --git a/README.md b/README.md
@@ -120,7 +120,87 @@ See the full example in the [examples directory](https://github.com/pythological
 
 ## Performance and Reliability
 
-`unification`'s current design allows for unification and reification of nested structures that break the Python stack recursion limit.  This scalability incurs an overhead cost compared to simple stack-based recursive unification/reificiation.
+`unification`'s current design allows for unification and reification of nested structures that would otherwise break the Python stack recursion limit.  It uses a generator-based design to "stream" the unifications and reifications.
+
+Below are some stack vs. stream benchmarks that demonstrate how well the stream-based approach scales against the stack-based approach in terms of unifying and reifying deeply nested lists containing integers.  These benchmarks were generated from the tests in `tests/test_benchmarks.py` using CPython 3.7.3.
+
+<details><summary>Stack vs. stream benchmarks</summary>
+<p>
+
+```python
+-------------------------------------------------------------------------------- benchmark 'reify_chain size=10': 2 tests -------------------------------------------------------------------------------
+Name (time in us)                   Min                 Max               Mean            StdDev             Median               IQR                Outliers  OPS (Kops/s)            Rounds  Iterations
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+test_reify_chain_stack[10]      41.0790 (1.0)      545.1940 (3.20)     52.9087 (1.07)     9.7964 (1.04)     50.8650 (1.08)     6.4301 (8.37)      11815;10849       18.9005 (0.93)     260164           1
+test_reify_chain_stream[10]     42.4410 (1.03)     170.5540 (1.0)      49.3080 (1.0)      9.3993 (1.0)      47.2400 (1.0)      0.7680 (1.0)      14962;102731       20.2807 (1.0)      278113           1
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+------------------------------------------ benchmark 'reify_chain size=1000': 1 tests -----------------------------------------
+Name (time in ms)                          Min      Max     Mean  StdDev  Median     IQR  Outliers      OPS  Rounds  Iterations
+-------------------------------------------------------------------------------------------------------------------------------
+test_reify_chain_stream_large[1000]     7.7722  28.2579  10.0723  2.5087  9.4899  0.3106    70;155  99.2820    1528           1
+-------------------------------------------------------------------------------------------------------------------------------
+
+------------------------------------------------------------------------- benchmark 'reify_chain size=300': 2 tests --------------------------------------------------------------------------
+Name (time in ms)                   Min                Max              Mean            StdDev            Median               IQR            Outliers       OPS            Rounds  Iterations
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+test_reify_chain_stack[300]      1.5183 (1.0)      22.1821 (1.19)     1.9826 (1.0)      1.5511 (1.16)     1.7410 (1.0)      0.0801 (1.0)       144;684  504.3878 (1.0)        7201           1
+test_reify_chain_stream[300]     1.7059 (1.12)     18.6020 (1.0)      2.1237 (1.07)     1.3389 (1.0)      1.9260 (1.11)     0.1020 (1.27)      118;585  470.8745 (0.93)       6416           1
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------- benchmark 'reify_chain size=35': 2 tests --------------------------------------------------------------------------------
+Name (time in us)                    Min                 Max                Mean             StdDev              Median                IQR             Outliers  OPS (Kops/s)            Rounds  Iterations
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+test_reify_chain_stream[35]     129.2780 (1.0)      868.1510 (1.02)     190.0433 (1.11)     36.2784 (1.41)     179.5690 (1.08)     21.5360 (2.30)     1535;1455        5.2620 (0.90)      26072           1
+test_reify_chain_stack[35]      150.7850 (1.17)     853.7920 (1.0)      170.5166 (1.0)      25.7944 (1.0)      165.8500 (1.0)       9.3530 (1.0)      3724;5480        5.8645 (1.0)       81286           1
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+------------------------------------------- benchmark 'reify_chain size=5000': 1 tests ------------------------------------------
+Name (time in ms)                           Min      Max     Mean  StdDev   Median     IQR  Outliers      OPS  Rounds  Iterations
+---------------------------------------------------------------------------------------------------------------------------------
+test_reify_chain_stream_large[5000]     46.9073  86.9737  52.9724  6.6919  49.6787  3.9609     68;68  18.8778     292           1
+---------------------------------------------------------------------------------------------------------------------------------
+
+------------------------------------------------------------------------------- benchmark 'unify_chain size=10': 2 tests -------------------------------------------------------------------------------
+Name (time in us)                   Min                 Max                Mean             StdDev              Median               IQR            Outliers  OPS (Kops/s)            Rounds  Iterations
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+test_unify_chain_stream[10]     77.6280 (1.0)      307.9130 (1.0)       86.7625 (1.0)      17.5355 (1.20)      82.7525 (1.0)      1.7290 (1.0)      809;1736       11.5257 (1.0)       15524           1
+test_unify_chain_stack[10]      92.9890 (1.20)     309.8770 (1.01)     104.2017 (1.20)     14.6694 (1.0)      101.0160 (1.22)     4.2368 (2.45)    3657;6651        9.5968 (0.83)      73379           1
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+------------------------------------------- benchmark 'unify_chain size=1000': 1 tests ------------------------------------------
+Name (time in ms)                           Min      Max     Mean  StdDev   Median     IQR  Outliers      OPS  Rounds  Iterations
+---------------------------------------------------------------------------------------------------------------------------------
+test_unify_chain_stream_large[1000]     27.3518  65.5924  31.1374  4.2563  29.5148  3.5286     38;35  32.1158     496           1
+---------------------------------------------------------------------------------------------------------------------------------
+
+------------------------------------------------------------------------- benchmark 'unify_chain size=300': 2 tests --------------------------------------------------------------------------
+Name (time in ms)                   Min                Max              Mean            StdDev            Median               IQR            Outliers       OPS            Rounds  Iterations
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+test_unify_chain_stream[300]     3.6957 (1.0)      13.1876 (1.0)      4.4439 (1.0)      1.0719 (1.42)     4.2080 (1.0)      0.2410 (1.67)        51;95  225.0298 (1.0)        1114           1
+test_unify_chain_stack[300]      4.2952 (1.16)     13.4294 (1.02)     4.7732 (1.07)     0.7555 (1.0)      4.6623 (1.11)     0.1446 (1.0)        36;136  209.5024 (0.93)       2911           1
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------- benchmark 'unify_chain size=35': 2 tests ---------------------------------------------------------------------------------
+Name (time in us)                    Min                   Max                Mean             StdDev              Median                IQR            Outliers  OPS (Kops/s)            Rounds  Iterations
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+test_unify_chain_stream[35]     285.6880 (1.0)        934.9690 (1.0)      324.5402 (1.0)      40.8338 (1.0)      319.8520 (1.0)      20.4375 (1.0)      962;1159        3.0813 (1.0)       24331           1
+test_unify_chain_stack[35]      345.2770 (1.21)     1,088.3650 (1.16)     407.9067 (1.26)     52.2263 (1.28)     396.6640 (1.24)     20.6560 (1.01)    2054;3027        2.4515 (0.80)      37594           1
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+--------------------------------------------- benchmark 'unify_chain size=5000': 1 tests ---------------------------------------------
+Name (time in ms)                            Min       Max      Mean   StdDev    Median      IQR  Outliers     OPS  Rounds  Iterations
+--------------------------------------------------------------------------------------------------------------------------------------
+test_unify_chain_stream_large[5000]     555.2733  754.9897  605.4949  50.6124  591.1251  61.4030       2;2  1.6515      26           1
+--------------------------------------------------------------------------------------------------------------------------------------
+
+Legend:
+  Outliers: 1 Standard Deviation from Mean; 1.5 IQR (InterQuartile Range) from 1st Quartile and 3rd Quartile.
+  OPS: Operations Per Second, computed as 1 / Mean
+```
+
+</p>
+</details>
 
 ## About
 
diff --git a/tests/test_benchmarks.py b/tests/test_benchmarks.py
@@ -1,3 +1,6 @@
+import platform
+import sys
+
 import pytest
 
 from tests.utils import gen_long_chain
@@ -47,8 +50,8 @@ def reify_stack(u, s):
 @pytest.mark.parametrize("size", nesting_sizes)
 def test_unify_chain_stream(size, benchmark):
     a_lv = var()
-    form = gen_long_chain(a_lv, size)
-    term = gen_long_chain("a", size)
+    form, lvars = gen_long_chain(a_lv, size, use_lvars=True)
+    term, _ = gen_long_chain("a", size)
 
     res = benchmark(unify, form, term, {})
     assert res[a_lv] == "a"
@@ -58,8 +61,8 @@ def test_unify_chain_stream(size, benchmark):
 @pytest.mark.parametrize("size", nesting_sizes)
 def test_unify_chain_stack(size, benchmark):
     a_lv = var()
-    form = gen_long_chain(a_lv, size)
-    term = gen_long_chain("a", size)
+    form, lvars = gen_long_chain(a_lv, size, use_lvars=True)
+    term, _ = gen_long_chain("a", size)
 
     res = benchmark(unify_stack, form, term, {})
     assert res[a_lv] == "a"
@@ -69,19 +72,54 @@ def test_unify_chain_stack(size, benchmark):
 @pytest.mark.parametrize("size", nesting_sizes)
 def test_reify_chain_stream(size, benchmark):
     a_lv = var()
-    form = gen_long_chain(a_lv, size)
-    term = gen_long_chain("a", size)
+    form, lvars = gen_long_chain(a_lv, size, use_lvars=True)
+    term, _ = gen_long_chain("a", size)
 
-    res = benchmark(reify, form, {a_lv: "a"})
+    lvars.update({a_lv: "a"})
+    res = benchmark(reify_stack, form, lvars)
     assert res == term
 
 
 @pytest.mark.benchmark(group="reify_chain")
 @pytest.mark.parametrize("size", nesting_sizes)
 def test_reify_chain_stack(size, benchmark):
     a_lv = var()
-    form = gen_long_chain(a_lv, size)
-    term = gen_long_chain("a", size)
+    form, lvars = gen_long_chain(a_lv, size, use_lvars=True)
+    term, _ = gen_long_chain("a", size)
 
-    res = benchmark(reify_stack, form, {a_lv: "a"})
+    lvars.update({a_lv: "a"})
+    res = benchmark(reify_stack, form, lvars)
     assert res == term
+
+
+@pytest.mark.benchmark(group="unify_chain")
+@pytest.mark.parametrize("size", [1000, 5000])
+def test_unify_chain_stream_large(size, benchmark):
+    a_lv = var()
+    form, lvars = gen_long_chain(a_lv, size, use_lvars=True)
+    term, _ = gen_long_chain("a", size)
+
+    res = benchmark(unify, form, term, {})
+    assert res[a_lv] == "a"
+
+
+@pytest.mark.skipif(
+    platform.python_implementation() == "PyPy",
+    reason="PyPy's sys.getrecursionlimit changes",
+)
+@pytest.mark.benchmark(group="reify_chain")
+@pytest.mark.parametrize("size", [sys.getrecursionlimit(), sys.getrecursionlimit() * 5])
+def test_reify_chain_stream_large(size, benchmark):
+    a_lv = var()
+    form, lvars = gen_long_chain(a_lv, size, use_lvars=True)
+    term, _ = gen_long_chain("a", size)
+
+    lvars.update({a_lv: "a"})
+
+    res = benchmark(reify, form, lvars)
+
+    if size < sys.getrecursionlimit():
+        assert res == term
+    else:
+        with pytest.raises(RecursionError):
+            assert res == term
diff --git a/tests/test_core.py b/tests/test_core.py
@@ -218,18 +218,18 @@ def test_reify_recursion_limit():
 
     a_lv = var()
 
-    b = gen_long_chain(a_lv, 10)
+    b, _ = gen_long_chain(a_lv, 10)
     res = reify(b, {a_lv: "a"})
-    assert res == gen_long_chain("a", 10)
+    assert res == gen_long_chain("a", 10)[0]
 
     r_limit = sys.getrecursionlimit()
 
     try:
         sys.setrecursionlimit(100)
 
-        b = gen_long_chain(a_lv, 200)
+        b, _ = gen_long_chain(a_lv, 200)
         res = reify(b, {a_lv: "a"})
-        exp_res = gen_long_chain("a", 200)
+        exp_res, _ = gen_long_chain("a", 200)
 
         if platform.python_implementation().lower() != "pypy":
             # CPython has stack limit issues when comparing nested lists, but
@@ -248,8 +248,8 @@ def test_reify_recursion_limit():
 def test_unify_recursion_limit():
     a_lv = var()
 
-    b = gen_long_chain("a")
-    b_var = gen_long_chain(a_lv)
+    b, _ = gen_long_chain("a")
+    b_var, _ = gen_long_chain(a_lv)
 
     s = unify(b, b_var, {})
 
diff --git a/tests/utils.py b/tests/utils.py
@@ -1,10 +1,36 @@
 import sys
 
+from unification.variable import var
 
-def gen_long_chain(last_elem=None, N=None):
+
+def gen_long_chain(last_elem=None, N=None, use_lvars=False):
+    """Generate a nested list of length `N` with the last element set to `last_elm`.
+
+    Parameters
+    ----------
+    last_elem: object
+        The element to be placed in the inner-most nested list.
+    N: int
+        The number of nested lists.
+    use_lvars: bool
+        Whether or not to add `var`s to the first elements of each nested list
+        or simply integers.  If ``True``, each `var` is passed the nesting
+        level integer (i.e. ``var(i)``).
+
+    Returns
+    -------
+    list, dict
+        The generated nested list and a ``dict`` containing the generated
+        `var`s and their nesting level integers, if any.
+
+    """
     b_struct = None
     if N is None:
         N = sys.getrecursionlimit()
+    lvars = {}
     for i in range(N - 1, 0, -1):
-        b_struct = [i, last_elem if i == N - 1 else b_struct]
-    return b_struct
+        i_el = var(i) if use_lvars else i
+        if use_lvars:
+            lvars[i_el] = i
+        b_struct = [i_el, last_elem if i == N - 1 else b_struct]
+    return b_struct, lvars