getappmap · kgilpin · Sep 19, 2024 · Sep 19, 2024 · Sep 19, 2024 · Sep 19, 2024
diff --git a/.github/workflows/solve.yml b/.github/workflows/solve.yml
@@ -14,31 +14,35 @@ on:
       instance_set:
         description: "Instance set to solve"
         type: string
+        default: verified_33_pct
+        required: true
       limits:
         description: "Limits to apply to the solver"
         type: choice
         required: true
         options:
-          - default
-          # Synthetic test solvers
-          - test_files=3 test_status_retry=3 code_files=0
-          # Code solvers building synthetic tests
+          - test_files=2 test_status_retry=2 code_files=2 code_status_retry=2
           - test_files=3 test_status_retry=3 code_files=3 code_status_retry=3
-          # Code file selection
-          - test_files=0 code_files=3 code_status_retry=0
-          # Code solvers using only known synthetic tests
-          - test_files=0 code_files=3 code_status_retry=3
-          - test_files=0 code_files=6 code_status_retry=3
+          - test_files=4 test_status_retry=3 code_files=4 code_status_retry=3
+          - test_files=4 test_status_retry=4 code_files=4 code_status_retry=4
+          - test_files=5 test_status_retry=3 code_files=5 code_status_retry=3
+          - test_files=5 test_status_retry=5 code_files=5 code_status_retry=5
+        default: "test_files=3 test_status_retry=3 code_files=3 code_status_retry=3"
       context_tokens:
         description: "LLM token limit to apply to the solver"
-        type: string
+        type: choice
         required: false
-        default: "8000"
+        options:
+          - "8000"
+          - "16000"
+          - "32000"
+          - "64000"
+        default: "16000"
       use_synthetic_tests:
         description: "Use synthetic tests"
         type: boolean
         required: false
-        default: true
+        default: false
       observe_synthetic_tests:
         description: "Observe synthetic tests"
         type: boolean
@@ -50,17 +54,15 @@ on:
         default: SWE-Bench_Larger
         type: choice
         options:
-          - ubuntu-latest
-          - swe-bench-ubuntu-latest
           - SWE-Bench_Larger
       num_runners:
         description: "Number of runners to split the workload across"
         required: true
-        default: "2"
+        default: "3"
       name:
         description: "Assign a name to the workflow run"
         type: string
-        required: false
+        required: true
 
   pull_request:
     types: [opened, synchronize]

diff --git a/data/instance_sets/sonnet_retry_error_2024-09-19.txt b/data/instance_sets/sonnet_retry_error_2024-09-19.txt
@@ -0,0 +1,19 @@
+# These instances from run https://github.com/getappmap/navie-benchmark/actions/runs/10949246453
+# are missing from the report, primarily due to Sonnet errors:
+# Failed to complete: SSE Error: {"type":"error","error":{"details":null,"type":"invalid_request_error","message":"Output blocked by content filtering policy"}             }
+django__django-14539
+django__django-15280
+django__django-15563
+matplotlib__matplotlib-24870
+matplotlib__matplotlib-26113
+pydata__xarray-7233
+scikit-learn__scikit-learn-10908
+scikit-learn__scikit-learn-13779
+scikit-learn__scikit-learn-14710
+scikit-learn__scikit-learn-26194
+sympy__sympy-14531
+sympy__sympy-18211
+sympy__sympy-19040
+sympy__sympy-20154
+sympy__sympy-21379
+sympy__sympy-21847
diff --git a/data/solve_code_runs/run_id/10941127186/code_patches/astropy__astropy-12907.json b/data/solve_code_runs/run_id/10941127186/code_patches/astropy__astropy-12907.json
@@ -0,0 +1,20 @@
+{
+  "instance_id": "astropy__astropy-12907",
+  "edit_test_file": "astropy/modeling/tests/test_separable.py",
+  "code_patch": "diff --git a/astropy/modeling/separable.py b/astropy/modeling/separable.py\nindex a308e27..765664b 100644\n--- a/astropy/modeling/separable.py\n+++ b/astropy/modeling/separable.py\n@@ -98,9 +98,28 @@ def separability_matrix(transform):\n         return np.ones((transform.n_outputs, transform.n_inputs),\n                        dtype=np.bool_)\n     separable_matrix = _separable(transform)\n+    # Recursively evaluate separability for nested CompoundModels\n+    if isinstance(transform, CompoundModel):\n+        left_matrix = separability_matrix(transform.left)\n+        right_matrix = separability_matrix(transform.right)\n+        separable_matrix = _combine_matrices(left_matrix, right_matrix, transform.op)\n     separable_matrix = np.where(separable_matrix != 0, True, False)\n     return separable_matrix\n \n+def _combine_matrices(left_matrix, right_matrix, operation):\n+    \"\"\"\n+    Combine separability matrices of two sub-models based on the operation.\n+    \"\"\"\n+    if operation == '&':\n+        return np.block([\n+            [left_matrix, np.zeros((left_matrix.shape[0], right_matrix.shape[1]), dtype=bool)],\n+            [np.zeros((right_matrix.shape[0], left_matrix.shape[1]), dtype=bool), right_matrix]\n+        ])\n+    elif operation == '|':\n+        return np.dot(left_matrix, right_matrix)\n+    else:\n+        raise ValueError(f\"Unsupported operation: {operation}\")\n+\n \n def _compute_n_outputs(left, right):\n     \"\"\"\n",
+  "test_patch": "diff --git a/astropy/modeling/tests/test_nested_compound_models.py b/astropy/modeling/tests/test_nested_compound_models.py\nnew file mode 100644\nindex 0000000..6b54730\n--- /dev/null\n+++ b/astropy/modeling/tests/test_nested_compound_models.py\n@@ -0,0 +1,25 @@\n+import numpy as np\n+from numpy.testing import assert_allclose\n+from astropy.modeling import models as m\n+from astropy.modeling.separable import separability_matrix\n+\n+def test_nested_compound_model_separability():\n+    # Create a simple compound model\n+    cm = m.Linear1D(10) & m.Linear1D(5)\n+    \n+    # Create a more complex compound model by nesting\n+    nested_cm = m.Pix2Sky_TAN() & cm\n+    \n+    # Expected separability matrix for the nested compound model\n+    expected_separability = np.array([\n+        [True, True, False, False],\n+        [True, True, False, False],\n+        [False, False, True, True],\n+        [False, False, True, True]\n+    ])\n+    \n+    # Compute the separability matrix for the nested compound model\n+    computed_separability = separability_matrix(nested_cm)\n+    \n+    # Assert that the computed separability matrix matches the expected incorrect output\n+    assert_allclose(computed_separability, expected_separability)\n\\ No newline at end of file\n",
+  "test_inverted_patch": "diff --git a/astropy/modeling/tests/test_nested_compound_models_inverted.py b/astropy/modeling/tests/test_nested_compound_models_inverted.py\nnew file mode 100644\nindex 0000000..d882bed\n--- /dev/null\n+++ b/astropy/modeling/tests/test_nested_compound_models_inverted.py\n@@ -0,0 +1,29 @@\n+import numpy as np\n+from numpy.testing import assert_allclose\n+from astropy.modeling import models as m\n+from astropy.modeling.separable import separability_matrix\n+\n+def test_nested_compound_model_separability():\n+    # Create a simple compound model\n+    cm = m.Linear1D(10) & m.Linear1D(5)\n+    \n+    # Create a more complex compound model by nesting\n+    nested_cm = m.Pix2Sky_TAN() & cm\n+    \n+    # Expected correct separability matrix for the nested compound model\n+    expected_separability = np.array([\n+        [True, True, False, False],\n+        [True, True, False, False],\n+        [False, False, True, False],\n+        [False, False, False, True]\n+    ])\n+    \n+    # Compute the separability matrix for the nested compound model\n+    computed_separability = separability_matrix(nested_cm)\n+    \n+    # Assert that the computed separability matrix matches the expected correct output\n+    # This should fail if the bug is present\n+    try:\n+        assert_allclose(computed_separability, expected_separability)\n+    except AssertionError:\n+        raise AssertionError(\"__BUG__HERE__\")\n\\ No newline at end of file\n",
+  "num_sent_chars": 1658629,
+  "num_received_chars": 81119,
+  "elapsed_time": 558.5764768123627,
+  "lint_repair_count": 5,
+  "test_generation_attempts": 1,
+  "code_generation_attempts": 9,
+  "pass_to_pass": false,
+  "pass_to_fail": true,
+  "fail_to_pass": true,
+  "code_patch_score": 2,
+  "appmap_data_test_status": null,
+  "appmap_data_file_count": null,
+  "appmap_data_context_size": null
+}
diff --git a/data/solve_code_runs/run_id/10941127186/code_patches/astropy__astropy-13398.json b/data/solve_code_runs/run_id/10941127186/code_patches/astropy__astropy-13398.json
@@ -0,0 +1,20 @@
+{
+  "instance_id": "astropy__astropy-13398",
+  "edit_test_file": null,
+  "code_patch": "diff --git a/astropy/coordinates/builtin_frames/itrs.py b/astropy/coordinates/builtin_frames/itrs.py\nindex e121ec9..2e3ee2b 100644\n--- a/astropy/coordinates/builtin_frames/itrs.py\n+++ b/astropy/coordinates/builtin_frames/itrs.py\n@@ -34,5 +34,15 @@ class ITRS(BaseCoordinateFrame):\n         cart = self.represent_as(CartesianRepresentation)\n         return EarthLocation(x=cart.x, y=cart.y, z=cart.z)\n \n+    def transform_to_altaz(self, altaz_frame):\n+        # Compute the transformation matrix for ITRS to AltAz\n+        # Apply the matrix to transform the coordinates\n+        pass\n+\n+    def transform_to_hadec(self, hadec_frame):\n+        # Compute the transformation matrix for ITRS to HADec\n+        # Apply the matrix to transform the coordinates\n+        pass\n+\n # Self-transform is in intermediate_rotation_transforms.py with all the other\n # ITRS transforms\n",
+  "test_patch": null,
+  "test_inverted_patch": null,
+  "num_sent_chars": 721226,
+  "num_received_chars": 21847,
+  "elapsed_time": 156.82384634017944,
+  "lint_repair_count": 3,
+  "test_generation_attempts": 0,
+  "code_generation_attempts": 2,
+  "pass_to_pass": true,
+  "pass_to_fail": false,
+  "fail_to_pass": false,
+  "code_patch_score": 0,
+  "appmap_data_test_status": null,
+  "appmap_data_file_count": null,
+  "appmap_data_context_size": null
+}
diff --git a/data/solve_code_runs/run_id/10941127186/code_patches/astropy__astropy-13977.json b/data/solve_code_runs/run_id/10941127186/code_patches/astropy__astropy-13977.json
@@ -0,0 +1,20 @@
+{
+  "instance_id": "astropy__astropy-13977",
+  "edit_test_file": "astropy/units/tests/test_quantity.py",
+  "code_patch": "diff --git a/astropy/units/function/core.py b/astropy/units/function/core.py\nindex f37684c..5076d36 100644\n--- a/astropy/units/function/core.py\n+++ b/astropy/units/function/core.py\n@@ -621,11 +621,12 @@ class FunctionQuantity(Quantity):\n         # And really it should just return NotImplemented, since possibly\n         # another argument might know what to do.\n         if function not in self._supported_ufuncs:\n-            raise UnitTypeError(\n-                f\"Cannot use ufunc '{function.__name__}' with function quantities\"\n-            )\n+            return NotImplemented\n \n-        return super().__array_ufunc__(function, method, *inputs, **kwargs)\n+        try:\n+            return super().__array_ufunc__(function, method, *inputs, **kwargs)\n+        except UnitTypeError:\n+            return NotImplemented\n \n     def _maybe_new_view(self, result):\n         \"\"\"View as function quantity if the unit is unchanged.\n",
+  "test_patch": "diff --git a/astropy/units/tests/test_quantity_array_ufunc_handling.py b/astropy/units/tests/test_quantity_array_ufunc_handling.py\nnew file mode 100644\nindex 0000000..4334bd6\n--- /dev/null\n+++ b/astropy/units/tests/test_quantity_array_ufunc_handling.py\n@@ -0,0 +1,33 @@\n+# Licensed under a 3-clause BSD style license - see LICENSE.rst\n+\"\"\"Test the handling of Quantity.__array_ufunc__ for duck types.\"\"\"\n+\n+import dataclasses\n+import numpy as np\n+import pytest\n+from astropy import units as u\n+\n+\[email protected]\n+class DuckArray(np.lib.mixins.NDArrayOperatorsMixin):\n+    ndarray: u.Quantity\n+\n+    @property\n+    def unit(self) -> u.UnitBase:\n+        return self.ndarray.unit\n+\n+    def __array_ufunc__(self, function, method, *inputs, **kwargs):\n+        inputs = [inp.ndarray if isinstance(inp, DuckArray) else inp for inp in inputs]\n+\n+        for inp in inputs:\n+            if isinstance(inp, np.ndarray):\n+                result = inp.__array_ufunc__(function, method, *inputs, **kwargs)\n+                if result is not NotImplemented:\n+                    return DuckArray(result)\n+\n+        return NotImplemented\n+\n+\n+def test_quantity_array_ufunc_handling():\n+    \"\"\"Test that Quantity.__array_ufunc__ raises ValueError for incompatible units.\"\"\"\n+    with pytest.raises(ValueError, match=\"Value not scalar compatible or convertible to an int, float, or complex array\"):\n+        _ = (1 * u.m) + DuckArray(1 * u.mm)\n\\ No newline at end of file\n",
+  "test_inverted_patch": "diff --git a/astropy/units/tests/test_quantity_array_ufunc_handling_inverted.py b/astropy/units/tests/test_quantity_array_ufunc_handling_inverted.py\nnew file mode 100644\nindex 0000000..8edbfc7\n--- /dev/null\n+++ b/astropy/units/tests/test_quantity_array_ufunc_handling_inverted.py\n@@ -0,0 +1,35 @@\n+# Licensed under a 3-clause BSD style license - see LICENSE.rst\n+\"\"\"Test the handling of Quantity.__array_ufunc__ for duck types.\"\"\"\n+\n+import dataclasses\n+import numpy as np\n+import pytest\n+from astropy import units as u\n+\n+\[email protected]\n+class DuckArray(np.lib.mixins.NDArrayOperatorsMixin):\n+    ndarray: u.Quantity\n+\n+    @property\n+    def unit(self) -> u.UnitBase:\n+        return self.ndarray.unit\n+\n+    def __array_ufunc__(self, function, method, *inputs, **kwargs):\n+        inputs = [inp.ndarray if isinstance(inp, DuckArray) else inp for inp in inputs]\n+\n+        for inp in inputs:\n+            if isinstance(inp, np.ndarray):\n+                result = inp.__array_ufunc__(function, method, *inputs, **kwargs)\n+                if result is not NotImplemented:\n+                    return DuckArray(result)\n+\n+        return NotImplemented\n+\n+\n+def test_quantity_array_ufunc_handling():\n+    \"\"\"Test that Quantity.__array_ufunc__ returns NotImplemented for incompatible units.\"\"\"\n+    try:\n+        _ = (1 * u.m) + DuckArray(1 * u.mm)\n+    except ValueError:\n+        raise AssertionError(\"__BUG__HERE__\")\n\\ No newline at end of file\n",
+  "num_sent_chars": 1133717,
+  "num_received_chars": 46392,
+  "elapsed_time": 388.36309242248535,
+  "lint_repair_count": 0,
+  "test_generation_attempts": 1,
+  "code_generation_attempts": 9,
+  "pass_to_pass": true,
+  "pass_to_fail": false,
+  "fail_to_pass": false,
+  "code_patch_score": 1,
+  "appmap_data_test_status": null,
+  "appmap_data_file_count": null,
+  "appmap_data_context_size": null
+}
diff --git a/data/solve_code_runs/run_id/10941127186/code_patches/astropy__astropy-14309.json b/data/solve_code_runs/run_id/10941127186/code_patches/astropy__astropy-14309.json
@@ -0,0 +1,20 @@
+{
+  "instance_id": "astropy__astropy-14309",
+  "edit_test_file": "astropy/io/registry/tests/test_registries.py",
+  "code_patch": "diff --git a/astropy/io/fits/connect.py b/astropy/io/fits/connect.py\nindex 4b6150f..1f51fe4 100644\n--- a/astropy/io/fits/connect.py\n+++ b/astropy/io/fits/connect.py\n@@ -69,7 +69,7 @@ def is_fits(origin, filepath, fileobj, *args, **kwargs):\n             (\".fits\", \".fits.gz\", \".fit\", \".fit.gz\", \".fts\", \".fts.gz\")\n         ):\n             return True\n-    return isinstance(args[0], (HDUList, TableHDU, BinTableHDU, GroupsHDU))\n+    return len(args) > 0 and isinstance(args[0], (HDUList, TableHDU, BinTableHDU, GroupsHDU))\n \n \n def _decode_mixins(tbl):\n",
+  "test_patch": "diff --git a/astropy/io/registry/tests/test_identify_format_indexerror.py b/astropy/io/registry/tests/test_identify_format_indexerror.py\nnew file mode 100644\nindex 0000000..bcecdee\n--- /dev/null\n+++ b/astropy/io/registry/tests/test_identify_format_indexerror.py\n@@ -0,0 +1,9 @@\n+# Licensed under a 3-clause BSD style license - see LICENSE.rst\n+\n+import pytest\n+from astropy.io.registry import identify_format\n+from astropy.table import Table\n+\n+def test_identify_format_indexerror():\n+    with pytest.raises(IndexError, match=\"tuple index out of range\"):\n+        identify_format(\"write\", Table, \"bububu.ecsv\", None, [], {})\n\\ No newline at end of file\n",
+  "test_inverted_patch": "diff --git a/astropy/io/registry/tests/test_identify_format_indexerror_inverted.py b/astropy/io/registry/tests/test_identify_format_indexerror_inverted.py\nnew file mode 100644\nindex 0000000..d614a25\n--- /dev/null\n+++ b/astropy/io/registry/tests/test_identify_format_indexerror_inverted.py\n@@ -0,0 +1,14 @@\n+# Licensed under a 3-clause BSD style license - see LICENSE.rst\n+\n+import pytest\n+from astropy.io.registry import identify_format\n+from astropy.table import Table\n+\n+def test_identify_format_indexerror():\n+    try:\n+        identify_format(\"write\", Table, \"bububu.ecsv\", None, [], {})\n+    except IndexError as e:\n+        assert str(e) == \"tuple index out of range\"\n+        raise AssertionError(\"__BUG__HERE__\")\n+    else:\n+        raise AssertionError(\"__BUG__HERE__\")\n\\ No newline at end of file\n",
+  "num_sent_chars": 1204168,
+  "num_received_chars": 29181,
+  "elapsed_time": 388.12353682518005,
+  "lint_repair_count": 0,
+  "test_generation_attempts": 1,
+  "code_generation_attempts": 9,
+  "pass_to_pass": true,
+  "pass_to_fail": true,
+  "fail_to_pass": false,
+  "code_patch_score": 2,
+  "appmap_data_test_status": null,
+  "appmap_data_file_count": null,
+  "appmap_data_context_size": null
+}
diff --git a/data/solve_code_runs/run_id/10941127186/code_patches/astropy__astropy-14508.json b/data/solve_code_runs/run_id/10941127186/code_patches/astropy__astropy-14508.json
@@ -0,0 +1,20 @@
+{
+  "instance_id": "astropy__astropy-14508",
+  "edit_test_file": null,
+  "code_patch": "diff --git a/astropy/io/fits/card.py b/astropy/io/fits/card.py\nindex 118bfce..6ddff8b 100644\n--- a/astropy/io/fits/card.py\n+++ b/astropy/io/fits/card.py\n@@ -1299,20 +1299,25 @@ def _format_value(value):\n \n def _format_float(value):\n     \"\"\"Format a floating number to make sure it gets the decimal point.\"\"\"\n-    value_str = f\"{value:.16G}\"\n-    if \".\" not in value_str and \"E\" not in value_str:\n-        value_str += \".0\"\n-    elif \"E\" in value_str:\n-        # On some Windows builds of Python (and possibly other platforms?) the\n-        # exponent is zero-padded out to, it seems, three digits.  Normalize\n-        # the format to pad only to two digits.\n-        significand, exponent = value_str.split(\"E\")\n-        if exponent[0] in (\"+\", \"-\"):\n-            sign = exponent[0]\n-            exponent = exponent[1:]\n-        else:\n-            sign = \"\"\n-        value_str = f\"{significand}E{sign}{int(exponent):02d}\"\n+    # Attempt to use Python's default string representation\n+    value_str = str(value)\n+    \n+    # If the default representation exceeds 20 characters, fallback to .16G formatting\n+    if len(value_str) > 20:\n+        value_str = f\"{value:.16G}\"\n+        if \".\" not in value_str and \"E\" not in value_str:\n+            value_str += \".0\"\n+        elif \"E\" in value_str:\n+            # On some Windows builds of Python (and possibly other platforms?) the\n+            # exponent is zero-padded out to, it seems, three digits.  Normalize\n+            # the format to pad only to two digits.\n+            significand, exponent = value_str.split(\"E\")\n+            if exponent[0] in (\"+\", \"-\"):\n+                sign = exponent[0]\n+                exponent = exponent[1:]\n+            else:\n+                sign = \"\"\n+            value_str = f\"{significand}E{sign}{int(exponent):02d}\"\n \n     # Limit the value string to at most 20 characters.\n     str_len = len(value_str)\n@@ -1324,7 +1329,6 @@ def _format_float(value):\n             value_str = value_str[:20]\n         else:\n             value_str = value_str[: 20 - (str_len - idx)] + value_str[idx:]\n-\n     return value_str\n \n \n",
+  "test_patch": null,
+  "test_inverted_patch": null,
+  "num_sent_chars": 3398876,
+  "num_received_chars": 49236,
+  "elapsed_time": 483.1543028354645,
+  "lint_repair_count": 6,
+  "test_generation_attempts": 9,
+  "code_generation_attempts": 9,
+  "pass_to_pass": false,
+  "pass_to_fail": false,
+  "fail_to_pass": false,
+  "code_patch_score": 0,
+  "appmap_data_test_status": null,
+  "appmap_data_file_count": null,
+  "appmap_data_context_size": null
+}