diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py index 25ec3a77727..e321e1a112c 100644 --- a/src/datasets/arrow_dataset.py +++ b/src/datasets/arrow_dataset.py @@ -66,6 +66,7 @@ DatasetCard, DatasetCardData, HfApi, + constants, ) from huggingface_hub.hf_api import RepoFile from multiprocess import Pool @@ -5607,14 +5608,19 @@ def push_to_hub( api = HfApi(endpoint=config.HF_ENDPOINT, token=token) - repo_url = api.create_repo( + if not api.repo_exists( repo_id, token=token, - repo_type="dataset", - private=private, - exist_ok=True, - ) - repo_id = repo_url.repo_id + repo_type=constants.REPO_TYPE_DATASET, + ): + repo_url = api.create_repo( + repo_id, + token=token, + repo_type=constants.REPO_TYPE_DATASET, + private=private, + exist_ok=True, + ) + repo_id = repo_url.repo_id if revision is not None and not revision.startswith("refs/pr/"): # We do not call create_branch for a PR reference: 400 Bad Request diff --git a/src/datasets/dataset_dict.py b/src/datasets/dataset_dict.py index 3d586583259..b31c4b74e0d 100644 --- a/src/datasets/dataset_dict.py +++ b/src/datasets/dataset_dict.py @@ -20,6 +20,7 @@ DatasetCard, DatasetCardData, HfApi, + constants, ) from huggingface_hub.hf_api import RepoFile @@ -1720,14 +1721,19 @@ def push_to_hub( api = HfApi(endpoint=config.HF_ENDPOINT, token=token) - repo_url = api.create_repo( + if not api.repo_exists( repo_id, token=token, - repo_type="dataset", - private=private, - exist_ok=True, - ) - repo_id = repo_url.repo_id + repo_type=constants.REPO_TYPE_DATASET, + ): + repo_url = api.create_repo( + repo_id, + token=token, + repo_type=constants.REPO_TYPE_DATASET, + private=private, + exist_ok=True, + ) + repo_id = repo_url.repo_id if revision is not None and not revision.startswith("refs/pr/"): # We do not call create_branch for a PR reference: 400 Bad Request