From fb662c10811f2036a885aa5b97e7469c106e3344 Mon Sep 17 00:00:00 2001 From: Arjun Dinesh Jagdale <142811259+ArjunJagdale@users.noreply.github.com> Date: Sat, 28 Jun 2025 14:48:37 +0530 Subject: [PATCH] fix(load): strip deprecated use_auth_token from config_kwargs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #7504 This PR resolves a compatibility error when loading datasets via `load_dataset()` using outdated arguments like `use_auth_token`. 🔧 **What was happening:** Users passing `use_auth_token` in `load_dataset(..., use_auth_token=...)` encountered a `ValueError`: BuilderConfig ParquetConfig(...) doesn't have a 'use_auth_token' key. javascript Copy Edit 🔍 **Why:** `use_auth_token` has been deprecated and removed from config definitions (replaced by `token`), but the `load_dataset()` function still forwarded it via `**config_kwargs` to BuilderConfigs, leading to unrecognized key errors. ✅ **Fix:** We now intercept and strip `use_auth_token` from `config_kwargs` inside `load_dataset`, replacing it with a warning: ```python if "use_auth_token" in config_kwargs: logger.warning("The 'use_auth_token' argument is deprecated. Please use 'token' instead.") config_kwargs.pop("use_auth_token") This ensures legacy compatibility while guiding users to switch to the token argument. Let me know if you'd prefer a deprecation error instead of a warning. Thanks! --- src/datasets/load.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/datasets/load.py b/src/datasets/load.py index bc2b0e679b6..94dd4d781e8 100644 --- a/src/datasets/load.py +++ b/src/datasets/load.py @@ -1387,7 +1387,12 @@ def load_dataset( verification_mode = VerificationMode( (verification_mode or VerificationMode.BASIC_CHECKS) if not save_infos else VerificationMode.ALL_CHECKS ) - + + # `use_auth_token` has been deprecated and removed from config definitions (replaced by `token`) + if "use_auth_token" in config_kwargs: + logger.warning("The 'use_auth_token' argument is deprecated. Please use 'token' instead.") + config_kwargs.pop("use_auth_token") + # Create a dataset builder builder_instance = load_dataset_builder( path=path,