From 6cb31cfbae2ac266f2786fe584441de6afec12ba Mon Sep 17 00:00:00 2001 From: Christian Reetz Date: Sat, 20 Sep 2025 13:07:36 -0700 Subject: [PATCH 1/6] new --save-dataset-cache arg --- tests/test_eval_cli.py | 4 ++++ verifiers/scripts/eval.py | 46 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/tests/test_eval_cli.py b/tests/test_eval_cli.py index ccb87d4c2..91a096c48 100644 --- a/tests/test_eval_cli.py +++ b/tests/test_eval_cli.py @@ -67,6 +67,8 @@ def __init__(self, api_key=None, base_url=None): }, verbose=False, save_dataset=False, + save_dataset_cache=False, + cache_dir="~/.cache/verifiers/", save_to_hf_hub=False, hf_hub_dataset_name="", ) @@ -114,6 +116,8 @@ def __init__(self, api_key=None, base_url=None): }, verbose=False, save_dataset=False, + save_dataset_cache=False, + cache_dir="~/.cache/verifiers/", save_to_hf_hub=False, hf_hub_dataset_name="", ) diff --git a/verifiers/scripts/eval.py b/verifiers/scripts/eval.py index 7c92db11f..c32873aa1 100644 --- a/verifiers/scripts/eval.py +++ b/verifiers/scripts/eval.py @@ -35,6 +35,8 @@ def eval_environment( sampling_args: dict | None, verbose: bool, save_dataset: bool, + save_dataset_cache: bool, + cache_dir: bool, save_to_hf_hub: bool, hf_hub_dataset_name: str, ): @@ -145,7 +147,7 @@ def eval_environment( out = f"r{i + 1}: {trials}" print(out) - if save_dataset or save_to_hf_hub: + if save_dataset or save_dataset_cache or save_to_hf_hub: ids = [i // rollouts_per_example for i in range(n * rollouts_per_example)] rewards = results.reward tasks = results.task @@ -198,6 +200,40 @@ def eval_environment( json.dump(metadata, f) logger.info(f"Saved dataset to {results_path}") + if save_dataset_cache: + if cache_dir: + cache_base = Path(os.environ.get( + "VF_CACHE_DIR", + Path.home() / ".cache" / "verifiers" + )) + cache_path = cache_base / "evals" / env_model_str / uuid_str + cache_path.mkdir(parents=True, exist_ok=True) + + dataset.to_json(cache_path / "results.jsonl") + with open(cache_path / "metadata.json", "w") as f: + json.dump(metadata, f) + + index_file = cache_base / "evals" / "index.json" + index_data = {} + if index_file.exists(): + with open(index_file, "r") as f: + index_data = json.load(f) + + run_key = f"{env_model_str}/{uuid_str}" + index_data[run_key] = { + "env": env, + "model": model, + "timestamp": datetime.now().isoformat(), + "path": str(cache_path), + "avg_reward": metadata.get("avg_reward"), + "num_examples": n, + "rollouts_per_example": rollouts_per_example + } + + with open(index_file, "w") as f: + json.dump(index_data, f, indent=2) + + if save_to_hf_hub: if hf_hub_dataset_name == "": dataset_name = ( @@ -307,6 +343,13 @@ def main(): action="store_true", help="Save dataset to disk", ) + parser.add_argument( + "--save-dataset-cache", + "-sc", + default=False, + action="store_true", + help="Save dataset to disk .cache", + ) parser.add_argument( "--save-to-hf-hub", "-H", @@ -339,6 +382,7 @@ def main(): sampling_args=args.sampling_args, verbose=args.verbose, save_dataset=args.save_dataset, + save_dataset_cache=args.save_dataset_cache, save_to_hf_hub=args.save_to_hf_hub, hf_hub_dataset_name=args.hf_hub_dataset_name, ) From 5e3aba46a525e3ffff786d1f1c311c66ce9478a3 Mon Sep 17 00:00:00 2001 From: Christian Reetz Date: Sat, 20 Sep 2025 13:42:25 -0700 Subject: [PATCH 2/6] add imports --- verifiers/scripts/eval.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/verifiers/scripts/eval.py b/verifiers/scripts/eval.py index c32873aa1..d39d52fd1 100644 --- a/verifiers/scripts/eval.py +++ b/verifiers/scripts/eval.py @@ -5,6 +5,7 @@ import logging import time import uuid +import os from datetime import datetime from pathlib import Path @@ -202,10 +203,9 @@ def eval_environment( logger.info(f"Saved dataset to {results_path}") if save_dataset_cache: if cache_dir: - cache_base = Path(os.environ.get( - "VF_CACHE_DIR", - Path.home() / ".cache" / "verifiers" - )) + cache_base = Path( + os.environ.get("VF_CACHE_DIR", Path.home() / ".cache" / "verifiers") + ) cache_path = cache_base / "evals" / env_model_str / uuid_str cache_path.mkdir(parents=True, exist_ok=True) @@ -227,13 +227,12 @@ def eval_environment( "path": str(cache_path), "avg_reward": metadata.get("avg_reward"), "num_examples": n, - "rollouts_per_example": rollouts_per_example + "rollouts_per_example": rollouts_per_example, } with open(index_file, "w") as f: json.dump(index_data, f, indent=2) - if save_to_hf_hub: if hf_hub_dataset_name == "": dataset_name = ( From 55911cbc24ce3e2b0c0b3713b5858824eadac94f Mon Sep 17 00:00:00 2001 From: Christian Reetz Date: Sat, 20 Sep 2025 14:01:12 -0700 Subject: [PATCH 3/6] add eval_environment() cache_dir pos arg --- verifiers/scripts/eval.py | 1 + 1 file changed, 1 insertion(+) diff --git a/verifiers/scripts/eval.py b/verifiers/scripts/eval.py index d39d52fd1..d515f97ee 100644 --- a/verifiers/scripts/eval.py +++ b/verifiers/scripts/eval.py @@ -382,6 +382,7 @@ def main(): verbose=args.verbose, save_dataset=args.save_dataset, save_dataset_cache=args.save_dataset_cache, + cache_dir=args.cache_dir, save_to_hf_hub=args.save_to_hf_hub, hf_hub_dataset_name=args.hf_hub_dataset_name, ) From 4651155d57087330a1592531c824e168819eb02c Mon Sep 17 00:00:00 2001 From: Christian Reetz Date: Sat, 20 Sep 2025 14:03:24 -0700 Subject: [PATCH 4/6] cache dir str not bool --- verifiers/scripts/eval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/verifiers/scripts/eval.py b/verifiers/scripts/eval.py index d515f97ee..9a36ef95c 100644 --- a/verifiers/scripts/eval.py +++ b/verifiers/scripts/eval.py @@ -37,7 +37,7 @@ def eval_environment( verbose: bool, save_dataset: bool, save_dataset_cache: bool, - cache_dir: bool, + cache_dir: str, save_to_hf_hub: bool, hf_hub_dataset_name: str, ): From 94050f3423c3306823e9484b0dac828604964eb9 Mon Sep 17 00:00:00 2001 From: Christian Reetz Date: Sat, 20 Sep 2025 14:05:47 -0700 Subject: [PATCH 5/6] cache dir str not bool --- verifiers/scripts/eval.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/verifiers/scripts/eval.py b/verifiers/scripts/eval.py index 9a36ef95c..586198c0e 100644 --- a/verifiers/scripts/eval.py +++ b/verifiers/scripts/eval.py @@ -349,6 +349,13 @@ def main(): action="store_true", help="Save dataset to disk .cache", ) + parser.add_argument( + "--cache-dir", + "-C", + type=str, + default="", + help="Custom cache directory" + ) parser.add_argument( "--save-to-hf-hub", "-H", From 58a7228f22f52ad6909a75e3e83c7b245a9c65c0 Mon Sep 17 00:00:00 2001 From: Christian Reetz Date: Sat, 20 Sep 2025 15:56:37 -0700 Subject: [PATCH 6/6] cache dir str not bool --- verifiers/scripts/eval.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/verifiers/scripts/eval.py b/verifiers/scripts/eval.py index 586198c0e..31628ea47 100644 --- a/verifiers/scripts/eval.py +++ b/verifiers/scripts/eval.py @@ -203,8 +203,13 @@ def eval_environment( logger.info(f"Saved dataset to {results_path}") if save_dataset_cache: if cache_dir: + cache_base = Path(cache_dir) + else: cache_base = Path( - os.environ.get("VF_CACHE_DIR", Path.home() / ".cache" / "verifiers") + os.environ.get( + "VF_CACHE_DIR", + Path.home() / ".cache" / "verifiers" + ) ) cache_path = cache_base / "evals" / env_model_str / uuid_str cache_path.mkdir(parents=True, exist_ok=True)