Skip to content
Draft
  •  
  •  
  •  
34 changes: 18 additions & 16 deletions .github/workflows/solve.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,31 +14,35 @@ on:
instance_set:
description: "Instance set to solve"
type: string
default: verified_33_pct
required: true
limits:
description: "Limits to apply to the solver"
type: choice
required: true
options:
- default
# Synthetic test solvers
- test_files=3 test_status_retry=3 code_files=0
# Code solvers building synthetic tests
- test_files=2 test_status_retry=2 code_files=2 code_status_retry=2
- test_files=3 test_status_retry=3 code_files=3 code_status_retry=3
# Code file selection
- test_files=0 code_files=3 code_status_retry=0
# Code solvers using only known synthetic tests
- test_files=0 code_files=3 code_status_retry=3
- test_files=0 code_files=6 code_status_retry=3
- test_files=4 test_status_retry=3 code_files=4 code_status_retry=3
- test_files=4 test_status_retry=4 code_files=4 code_status_retry=4
- test_files=5 test_status_retry=3 code_files=5 code_status_retry=3
- test_files=5 test_status_retry=5 code_files=5 code_status_retry=5
default: "test_files=3 test_status_retry=3 code_files=3 code_status_retry=3"
context_tokens:
description: "LLM token limit to apply to the solver"
type: string
type: choice
required: false
default: "8000"
options:
- "8000"
- "16000"
- "32000"
- "64000"
default: "16000"
use_synthetic_tests:
description: "Use synthetic tests"
type: boolean
required: false
default: true
default: false
observe_synthetic_tests:
description: "Observe synthetic tests"
type: boolean
Expand All @@ -50,17 +54,15 @@ on:
default: SWE-Bench_Larger
type: choice
options:
- ubuntu-latest
- swe-bench-ubuntu-latest
- SWE-Bench_Larger
num_runners:
description: "Number of runners to split the workload across"
required: true
default: "2"
default: "3"
name:
description: "Assign a name to the workflow run"
type: string
required: false
required: true

pull_request:
types: [opened, synchronize]
Expand Down
19 changes: 19 additions & 0 deletions data/instance_sets/sonnet_retry_error_2024-09-19.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# These instances from run https://github.com/getappmap/navie-benchmark/actions/runs/10949246453
# are missing from the report, primarily due to Sonnet errors:
# Failed to complete: SSE Error: {"type":"error","error":{"details":null,"type":"invalid_request_error","message":"Output blocked by content filtering policy"} }
django__django-14539
django__django-15280
django__django-15563
matplotlib__matplotlib-24870
matplotlib__matplotlib-26113
pydata__xarray-7233
scikit-learn__scikit-learn-10908
scikit-learn__scikit-learn-13779
scikit-learn__scikit-learn-14710
scikit-learn__scikit-learn-26194
sympy__sympy-14531
sympy__sympy-18211
sympy__sympy-19040
sympy__sympy-20154
sympy__sympy-21379
sympy__sympy-21847
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"instance_id": "astropy__astropy-12907",
"edit_test_file": "astropy/modeling/tests/test_separable.py",
"code_patch": "diff --git a/astropy/modeling/separable.py b/astropy/modeling/separable.py\nindex a308e27..765664b 100644\n--- a/astropy/modeling/separable.py\n+++ b/astropy/modeling/separable.py\n@@ -98,9 +98,28 @@ def separability_matrix(transform):\n return np.ones((transform.n_outputs, transform.n_inputs),\n dtype=np.bool_)\n separable_matrix = _separable(transform)\n+ # Recursively evaluate separability for nested CompoundModels\n+ if isinstance(transform, CompoundModel):\n+ left_matrix = separability_matrix(transform.left)\n+ right_matrix = separability_matrix(transform.right)\n+ separable_matrix = _combine_matrices(left_matrix, right_matrix, transform.op)\n separable_matrix = np.where(separable_matrix != 0, True, False)\n return separable_matrix\n \n+def _combine_matrices(left_matrix, right_matrix, operation):\n+ \"\"\"\n+ Combine separability matrices of two sub-models based on the operation.\n+ \"\"\"\n+ if operation == '&':\n+ return np.block([\n+ [left_matrix, np.zeros((left_matrix.shape[0], right_matrix.shape[1]), dtype=bool)],\n+ [np.zeros((right_matrix.shape[0], left_matrix.shape[1]), dtype=bool), right_matrix]\n+ ])\n+ elif operation == '|':\n+ return np.dot(left_matrix, right_matrix)\n+ else:\n+ raise ValueError(f\"Unsupported operation: {operation}\")\n+\n \n def _compute_n_outputs(left, right):\n \"\"\"\n",
"test_patch": "diff --git a/astropy/modeling/tests/test_nested_compound_models.py b/astropy/modeling/tests/test_nested_compound_models.py\nnew file mode 100644\nindex 0000000..6b54730\n--- /dev/null\n+++ b/astropy/modeling/tests/test_nested_compound_models.py\n@@ -0,0 +1,25 @@\n+import numpy as np\n+from numpy.testing import assert_allclose\n+from astropy.modeling import models as m\n+from astropy.modeling.separable import separability_matrix\n+\n+def test_nested_compound_model_separability():\n+ # Create a simple compound model\n+ cm = m.Linear1D(10) & m.Linear1D(5)\n+ \n+ # Create a more complex compound model by nesting\n+ nested_cm = m.Pix2Sky_TAN() & cm\n+ \n+ # Expected separability matrix for the nested compound model\n+ expected_separability = np.array([\n+ [True, True, False, False],\n+ [True, True, False, False],\n+ [False, False, True, True],\n+ [False, False, True, True]\n+ ])\n+ \n+ # Compute the separability matrix for the nested compound model\n+ computed_separability = separability_matrix(nested_cm)\n+ \n+ # Assert that the computed separability matrix matches the expected incorrect output\n+ assert_allclose(computed_separability, expected_separability)\n\\ No newline at end of file\n",
"test_inverted_patch": "diff --git a/astropy/modeling/tests/test_nested_compound_models_inverted.py b/astropy/modeling/tests/test_nested_compound_models_inverted.py\nnew file mode 100644\nindex 0000000..d882bed\n--- /dev/null\n+++ b/astropy/modeling/tests/test_nested_compound_models_inverted.py\n@@ -0,0 +1,29 @@\n+import numpy as np\n+from numpy.testing import assert_allclose\n+from astropy.modeling import models as m\n+from astropy.modeling.separable import separability_matrix\n+\n+def test_nested_compound_model_separability():\n+ # Create a simple compound model\n+ cm = m.Linear1D(10) & m.Linear1D(5)\n+ \n+ # Create a more complex compound model by nesting\n+ nested_cm = m.Pix2Sky_TAN() & cm\n+ \n+ # Expected correct separability matrix for the nested compound model\n+ expected_separability = np.array([\n+ [True, True, False, False],\n+ [True, True, False, False],\n+ [False, False, True, False],\n+ [False, False, False, True]\n+ ])\n+ \n+ # Compute the separability matrix for the nested compound model\n+ computed_separability = separability_matrix(nested_cm)\n+ \n+ # Assert that the computed separability matrix matches the expected correct output\n+ # This should fail if the bug is present\n+ try:\n+ assert_allclose(computed_separability, expected_separability)\n+ except AssertionError:\n+ raise AssertionError(\"__BUG__HERE__\")\n\\ No newline at end of file\n",
"num_sent_chars": 1658629,
"num_received_chars": 81119,
"elapsed_time": 558.5764768123627,
"lint_repair_count": 5,
"test_generation_attempts": 1,
"code_generation_attempts": 9,
"pass_to_pass": false,
"pass_to_fail": true,
"fail_to_pass": true,
"code_patch_score": 2,
"appmap_data_test_status": null,
"appmap_data_file_count": null,
"appmap_data_context_size": null
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"instance_id": "astropy__astropy-13398",
"edit_test_file": null,
"code_patch": "diff --git a/astropy/coordinates/builtin_frames/itrs.py b/astropy/coordinates/builtin_frames/itrs.py\nindex e121ec9..2e3ee2b 100644\n--- a/astropy/coordinates/builtin_frames/itrs.py\n+++ b/astropy/coordinates/builtin_frames/itrs.py\n@@ -34,5 +34,15 @@ class ITRS(BaseCoordinateFrame):\n cart = self.represent_as(CartesianRepresentation)\n return EarthLocation(x=cart.x, y=cart.y, z=cart.z)\n \n+ def transform_to_altaz(self, altaz_frame):\n+ # Compute the transformation matrix for ITRS to AltAz\n+ # Apply the matrix to transform the coordinates\n+ pass\n+\n+ def transform_to_hadec(self, hadec_frame):\n+ # Compute the transformation matrix for ITRS to HADec\n+ # Apply the matrix to transform the coordinates\n+ pass\n+\n # Self-transform is in intermediate_rotation_transforms.py with all the other\n # ITRS transforms\n",
"test_patch": null,
"test_inverted_patch": null,
"num_sent_chars": 721226,
"num_received_chars": 21847,
"elapsed_time": 156.82384634017944,
"lint_repair_count": 3,
"test_generation_attempts": 0,
"code_generation_attempts": 2,
"pass_to_pass": true,
"pass_to_fail": false,
"fail_to_pass": false,
"code_patch_score": 0,
"appmap_data_test_status": null,
"appmap_data_file_count": null,
"appmap_data_context_size": null
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"instance_id": "astropy__astropy-13977",
"edit_test_file": "astropy/units/tests/test_quantity.py",
"code_patch": "diff --git a/astropy/units/function/core.py b/astropy/units/function/core.py\nindex f37684c..5076d36 100644\n--- a/astropy/units/function/core.py\n+++ b/astropy/units/function/core.py\n@@ -621,11 +621,12 @@ class FunctionQuantity(Quantity):\n # And really it should just return NotImplemented, since possibly\n # another argument might know what to do.\n if function not in self._supported_ufuncs:\n- raise UnitTypeError(\n- f\"Cannot use ufunc '{function.__name__}' with function quantities\"\n- )\n+ return NotImplemented\n \n- return super().__array_ufunc__(function, method, *inputs, **kwargs)\n+ try:\n+ return super().__array_ufunc__(function, method, *inputs, **kwargs)\n+ except UnitTypeError:\n+ return NotImplemented\n \n def _maybe_new_view(self, result):\n \"\"\"View as function quantity if the unit is unchanged.\n",
"test_patch": "diff --git a/astropy/units/tests/test_quantity_array_ufunc_handling.py b/astropy/units/tests/test_quantity_array_ufunc_handling.py\nnew file mode 100644\nindex 0000000..4334bd6\n--- /dev/null\n+++ b/astropy/units/tests/test_quantity_array_ufunc_handling.py\n@@ -0,0 +1,33 @@\n+# Licensed under a 3-clause BSD style license - see LICENSE.rst\n+\"\"\"Test the handling of Quantity.__array_ufunc__ for duck types.\"\"\"\n+\n+import dataclasses\n+import numpy as np\n+import pytest\n+from astropy import units as u\n+\n+\[email protected]\n+class DuckArray(np.lib.mixins.NDArrayOperatorsMixin):\n+ ndarray: u.Quantity\n+\n+ @property\n+ def unit(self) -> u.UnitBase:\n+ return self.ndarray.unit\n+\n+ def __array_ufunc__(self, function, method, *inputs, **kwargs):\n+ inputs = [inp.ndarray if isinstance(inp, DuckArray) else inp for inp in inputs]\n+\n+ for inp in inputs:\n+ if isinstance(inp, np.ndarray):\n+ result = inp.__array_ufunc__(function, method, *inputs, **kwargs)\n+ if result is not NotImplemented:\n+ return DuckArray(result)\n+\n+ return NotImplemented\n+\n+\n+def test_quantity_array_ufunc_handling():\n+ \"\"\"Test that Quantity.__array_ufunc__ raises ValueError for incompatible units.\"\"\"\n+ with pytest.raises(ValueError, match=\"Value not scalar compatible or convertible to an int, float, or complex array\"):\n+ _ = (1 * u.m) + DuckArray(1 * u.mm)\n\\ No newline at end of file\n",
"test_inverted_patch": "diff --git a/astropy/units/tests/test_quantity_array_ufunc_handling_inverted.py b/astropy/units/tests/test_quantity_array_ufunc_handling_inverted.py\nnew file mode 100644\nindex 0000000..8edbfc7\n--- /dev/null\n+++ b/astropy/units/tests/test_quantity_array_ufunc_handling_inverted.py\n@@ -0,0 +1,35 @@\n+# Licensed under a 3-clause BSD style license - see LICENSE.rst\n+\"\"\"Test the handling of Quantity.__array_ufunc__ for duck types.\"\"\"\n+\n+import dataclasses\n+import numpy as np\n+import pytest\n+from astropy import units as u\n+\n+\[email protected]\n+class DuckArray(np.lib.mixins.NDArrayOperatorsMixin):\n+ ndarray: u.Quantity\n+\n+ @property\n+ def unit(self) -> u.UnitBase:\n+ return self.ndarray.unit\n+\n+ def __array_ufunc__(self, function, method, *inputs, **kwargs):\n+ inputs = [inp.ndarray if isinstance(inp, DuckArray) else inp for inp in inputs]\n+\n+ for inp in inputs:\n+ if isinstance(inp, np.ndarray):\n+ result = inp.__array_ufunc__(function, method, *inputs, **kwargs)\n+ if result is not NotImplemented:\n+ return DuckArray(result)\n+\n+ return NotImplemented\n+\n+\n+def test_quantity_array_ufunc_handling():\n+ \"\"\"Test that Quantity.__array_ufunc__ returns NotImplemented for incompatible units.\"\"\"\n+ try:\n+ _ = (1 * u.m) + DuckArray(1 * u.mm)\n+ except ValueError:\n+ raise AssertionError(\"__BUG__HERE__\")\n\\ No newline at end of file\n",
"num_sent_chars": 1133717,
"num_received_chars": 46392,
"elapsed_time": 388.36309242248535,
"lint_repair_count": 0,
"test_generation_attempts": 1,
"code_generation_attempts": 9,
"pass_to_pass": true,
"pass_to_fail": false,
"fail_to_pass": false,
"code_patch_score": 1,
"appmap_data_test_status": null,
"appmap_data_file_count": null,
"appmap_data_context_size": null
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"instance_id": "astropy__astropy-14309",
"edit_test_file": "astropy/io/registry/tests/test_registries.py",
"code_patch": "diff --git a/astropy/io/fits/connect.py b/astropy/io/fits/connect.py\nindex 4b6150f..1f51fe4 100644\n--- a/astropy/io/fits/connect.py\n+++ b/astropy/io/fits/connect.py\n@@ -69,7 +69,7 @@ def is_fits(origin, filepath, fileobj, *args, **kwargs):\n (\".fits\", \".fits.gz\", \".fit\", \".fit.gz\", \".fts\", \".fts.gz\")\n ):\n return True\n- return isinstance(args[0], (HDUList, TableHDU, BinTableHDU, GroupsHDU))\n+ return len(args) > 0 and isinstance(args[0], (HDUList, TableHDU, BinTableHDU, GroupsHDU))\n \n \n def _decode_mixins(tbl):\n",
"test_patch": "diff --git a/astropy/io/registry/tests/test_identify_format_indexerror.py b/astropy/io/registry/tests/test_identify_format_indexerror.py\nnew file mode 100644\nindex 0000000..bcecdee\n--- /dev/null\n+++ b/astropy/io/registry/tests/test_identify_format_indexerror.py\n@@ -0,0 +1,9 @@\n+# Licensed under a 3-clause BSD style license - see LICENSE.rst\n+\n+import pytest\n+from astropy.io.registry import identify_format\n+from astropy.table import Table\n+\n+def test_identify_format_indexerror():\n+ with pytest.raises(IndexError, match=\"tuple index out of range\"):\n+ identify_format(\"write\", Table, \"bububu.ecsv\", None, [], {})\n\\ No newline at end of file\n",
"test_inverted_patch": "diff --git a/astropy/io/registry/tests/test_identify_format_indexerror_inverted.py b/astropy/io/registry/tests/test_identify_format_indexerror_inverted.py\nnew file mode 100644\nindex 0000000..d614a25\n--- /dev/null\n+++ b/astropy/io/registry/tests/test_identify_format_indexerror_inverted.py\n@@ -0,0 +1,14 @@\n+# Licensed under a 3-clause BSD style license - see LICENSE.rst\n+\n+import pytest\n+from astropy.io.registry import identify_format\n+from astropy.table import Table\n+\n+def test_identify_format_indexerror():\n+ try:\n+ identify_format(\"write\", Table, \"bububu.ecsv\", None, [], {})\n+ except IndexError as e:\n+ assert str(e) == \"tuple index out of range\"\n+ raise AssertionError(\"__BUG__HERE__\")\n+ else:\n+ raise AssertionError(\"__BUG__HERE__\")\n\\ No newline at end of file\n",
"num_sent_chars": 1204168,
"num_received_chars": 29181,
"elapsed_time": 388.12353682518005,
"lint_repair_count": 0,
"test_generation_attempts": 1,
"code_generation_attempts": 9,
"pass_to_pass": true,
"pass_to_fail": true,
"fail_to_pass": false,
"code_patch_score": 2,
"appmap_data_test_status": null,
"appmap_data_file_count": null,
"appmap_data_context_size": null
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"instance_id": "astropy__astropy-14508",
"edit_test_file": null,
"code_patch": "diff --git a/astropy/io/fits/card.py b/astropy/io/fits/card.py\nindex 118bfce..6ddff8b 100644\n--- a/astropy/io/fits/card.py\n+++ b/astropy/io/fits/card.py\n@@ -1299,20 +1299,25 @@ def _format_value(value):\n \n def _format_float(value):\n \"\"\"Format a floating number to make sure it gets the decimal point.\"\"\"\n- value_str = f\"{value:.16G}\"\n- if \".\" not in value_str and \"E\" not in value_str:\n- value_str += \".0\"\n- elif \"E\" in value_str:\n- # On some Windows builds of Python (and possibly other platforms?) the\n- # exponent is zero-padded out to, it seems, three digits. Normalize\n- # the format to pad only to two digits.\n- significand, exponent = value_str.split(\"E\")\n- if exponent[0] in (\"+\", \"-\"):\n- sign = exponent[0]\n- exponent = exponent[1:]\n- else:\n- sign = \"\"\n- value_str = f\"{significand}E{sign}{int(exponent):02d}\"\n+ # Attempt to use Python's default string representation\n+ value_str = str(value)\n+ \n+ # If the default representation exceeds 20 characters, fallback to .16G formatting\n+ if len(value_str) > 20:\n+ value_str = f\"{value:.16G}\"\n+ if \".\" not in value_str and \"E\" not in value_str:\n+ value_str += \".0\"\n+ elif \"E\" in value_str:\n+ # On some Windows builds of Python (and possibly other platforms?) the\n+ # exponent is zero-padded out to, it seems, three digits. Normalize\n+ # the format to pad only to two digits.\n+ significand, exponent = value_str.split(\"E\")\n+ if exponent[0] in (\"+\", \"-\"):\n+ sign = exponent[0]\n+ exponent = exponent[1:]\n+ else:\n+ sign = \"\"\n+ value_str = f\"{significand}E{sign}{int(exponent):02d}\"\n \n # Limit the value string to at most 20 characters.\n str_len = len(value_str)\n@@ -1324,7 +1329,6 @@ def _format_float(value):\n value_str = value_str[:20]\n else:\n value_str = value_str[: 20 - (str_len - idx)] + value_str[idx:]\n-\n return value_str\n \n \n",
"test_patch": null,
"test_inverted_patch": null,
"num_sent_chars": 3398876,
"num_received_chars": 49236,
"elapsed_time": 483.1543028354645,
"lint_repair_count": 6,
"test_generation_attempts": 9,
"code_generation_attempts": 9,
"pass_to_pass": false,
"pass_to_fail": false,
"fail_to_pass": false,
"code_patch_score": 0,
"appmap_data_test_status": null,
"appmap_data_file_count": null,
"appmap_data_context_size": null
}
Loading
Loading