|  | 
| 84 | 84 | repo_retry = float(os.environ.get("REPO_RETRY_DELAY", 30)) | 
| 85 | 85 | 
 | 
| 86 | 86 | 
 | 
| 87 |  | -@connect.retry(2, sqlalchemy.exc.OperationalError, wait=repo_retry) | 
|  | 87 | +# TODO: revisit which cases should be retried after DM-50934 | 
|  | 88 | +# TODO: catch ButlerConnectionError once it's available | 
|  | 89 | +SQL_EXCEPTIONS = (sqlalchemy.exc.OperationalError, sqlalchemy.exc.InterfaceError) | 
|  | 90 | +DATASTORE_EXCEPTIONS = SQL_EXCEPTIONS + (botocore.exceptions.ClientError, ) | 
|  | 91 | + | 
|  | 92 | + | 
|  | 93 | +@connect.retry(2, SQL_EXCEPTIONS, wait=repo_retry) | 
| 88 | 94 | def get_central_butler(central_repo: str, instrument_class: str): | 
| 89 | 95 |     """Provide a Butler that can access the given repository and read and write | 
| 90 | 96 |     data for the given instrument. | 
| @@ -360,7 +366,7 @@ def _init_visit_definer(self): | 
| 360 | 366 |         define_visits_config.groupExposures = "one-to-one" | 
| 361 | 367 |         self.define_visits = lsst.obs.base.DefineVisitsTask(config=define_visits_config, butler=self.butler) | 
| 362 | 368 | 
 | 
| 363 |  | -    @connect.retry(2, (sqlalchemy.exc.OperationalError, botocore.exceptions.ClientError), wait=repo_retry) | 
|  | 369 | +    @connect.retry(2, DATASTORE_EXCEPTIONS, wait=repo_retry) | 
| 364 | 370 |     def _init_governor_datasets(self, timestamp, skymap): | 
| 365 | 371 |         """Load and store the camera and skymap for later use. | 
| 366 | 372 | 
 | 
| @@ -537,7 +543,7 @@ def prep_butler(self) -> None: | 
| 537 | 543 |                         detector=self.visit.detector, | 
| 538 | 544 |                         group=self.visit.groupId) | 
| 539 | 545 | 
 | 
| 540 |  | -    @connect.retry(2, sqlalchemy.exc.OperationalError, wait=repo_retry) | 
|  | 546 | +    @connect.retry(2, SQL_EXCEPTIONS, wait=repo_retry) | 
| 541 | 547 |     def _find_data_to_preload(self, region): | 
| 542 | 548 |         """Identify the datasets to export from the central repo. | 
| 543 | 549 | 
 | 
| @@ -912,7 +918,7 @@ def _find_init_outputs(self): | 
| 912 | 918 |             _log.debug("Found %d new init-output datasets from %s.", n_datasets, run) | 
| 913 | 919 |         return datasets | 
| 914 | 920 | 
 | 
| 915 |  | -    @connect.retry(2, (sqlalchemy.exc.OperationalError, botocore.exceptions.ClientError), wait=repo_retry) | 
|  | 921 | +    @connect.retry(2, DATASTORE_EXCEPTIONS, wait=repo_retry) | 
| 916 | 922 |     def _transfer_data(self, datasets, calibs): | 
| 917 | 923 |         """Transfer datasets and all associated collections from the central | 
| 918 | 924 |         repo to the local repo. | 
| @@ -1602,7 +1608,7 @@ def _get_safe_dataset_types(butler): | 
| 1602 | 1608 |         return [dstype.name for dstype in butler.registry.queryDatasetTypes(...) | 
| 1603 | 1609 |                 if "detector" in dstype.dimensions] | 
| 1604 | 1610 | 
 | 
| 1605 |  | -    @connect.retry(2, (sqlalchemy.exc.OperationalError, botocore.exceptions.ClientError), wait=repo_retry) | 
|  | 1611 | +    @connect.retry(2, DATASTORE_EXCEPTIONS, wait=repo_retry) | 
| 1606 | 1612 |     def _export_subset(self, exposure_ids: set[int], | 
| 1607 | 1613 |                        dataset_types: typing.Any, in_collections: typing.Any) -> None: | 
| 1608 | 1614 |         """Copy datasets associated with a processing run back to the | 
|  | 
0 commit comments