Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 24 additions & 17 deletions dataset/build_arc_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,8 @@ def _map_grid(grid: np.ndarray):


def load_puzzles_arcagi(results: dict, dataset_path: str, config: DataProcessConfig):
train_examples_dest = ("train", "all")
test_examples_map = {
"evaluation": [(1.0, ("test", "all"))],
"_default": [(1.0, ("train", "all"))]
}
# Changed: Split puzzles completely, not examples within puzzles
test_puzzle_fraction = 0.2 # Reserve 20% of puzzles for testing

total_puzzles = 0
for subdir in os.scandir(dataset_path):
Expand All @@ -164,19 +161,29 @@ def load_puzzles_arcagi(results: dict, dataset_path: str, config: DataProcessCon
# Shuffle puzzles
np.random.shuffle(puzzles)

# Assign by fraction
for idx, (default_name, puzzle) in enumerate(puzzles):
fraction = idx / len(puzzles)
test_examples_dest = None
for f, dest in test_examples_map.get(subdir.name, test_examples_map["_default"]):
if fraction < f:
test_examples_dest = dest
break

assert test_examples_dest is not None
# Split puzzles at puzzle level to avoid data leakage
if subdir.name == "evaluation":
# For evaluation set, reserve some puzzles completely for testing
num_test_puzzles = int(len(puzzles) * test_puzzle_fraction)
train_puzzles = puzzles[num_test_puzzles:]
test_puzzles = puzzles[:num_test_puzzles]

# Process train puzzles - both train and test examples go to training
for default_name, puzzle in train_puzzles:
convert_single_arc_puzzle(results, default_name, puzzle, config.num_aug,
{"train": ("train", "all"), "test": ("train", "all")})

# Process test puzzles - both train and test examples go to testing
for default_name, puzzle in test_puzzles:
convert_single_arc_puzzle(results, default_name, puzzle, 0, # No augmentation for test
{"train": ("test", "all"), "test": ("test", "all")})
else:
# For other directories, all puzzles go to training
for default_name, puzzle in puzzles:
convert_single_arc_puzzle(results, default_name, puzzle, config.num_aug,
{"train": ("train", "all"), "test": ("train", "all")})

convert_single_arc_puzzle(results, default_name, puzzle, config.num_aug, {"train": train_examples_dest, "test": test_examples_dest})
total_puzzles += 1
total_puzzles += len(puzzles)

print (f"[{dataset_path}] total puzzles: {total_puzzles}")

Expand Down
24 changes: 24 additions & 0 deletions dataset/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,27 @@ def dihedral_transform(arr: np.ndarray, tid: int) -> np.ndarray:

def inverse_dihedral_transform(arr: np.ndarray, tid: int) -> np.ndarray:
return dihedral_transform(arr, DIHEDRAL_INVERSE[tid])


def split_puzzles_by_id(puzzle_ids: List[str], test_fraction: float = 0.2, seed: int = 42) -> tuple[set[str], set[str]]:
"""Split puzzle IDs into train and test sets to avoid data leakage.

Args:
puzzle_ids: List of puzzle identifiers
test_fraction: Fraction of puzzles to reserve for testing
seed: Random seed for reproducible splits

Returns:
Tuple of (train_puzzle_ids, test_puzzle_ids)
"""
import random
random.seed(seed)

shuffled_ids = puzzle_ids.copy()
random.shuffle(shuffled_ids)

num_test = int(len(shuffled_ids) * test_fraction)
test_ids = set(shuffled_ids[:num_test])
train_ids = set(shuffled_ids[num_test:])

return train_ids, test_ids