Skip to content

Commit f416d17

Browse files
Address PR feedback: simplify dependencies and configuration
- Remove torch and accelerate from installation (dependencies of TRL) - Remove pad token check (handled automatically) - Restore num_generations to default value (8) - Remove remove_unused_columns parameter (false by default) - Remove processing_class parameter (loaded automatically)
1 parent 4575405 commit f416d17

File tree

1 file changed

+7
-136
lines changed

1 file changed

+7
-136
lines changed

notebooks/en/trl_grpo_reasoning_advanced_reward.ipynb

Lines changed: 7 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,7 @@
1717
"execution_count": null,
1818
"metadata": {},
1919
"outputs": [],
20-
"source": [
21-
"# Install required packages\n",
22-
"!pip install transformers datasets trl accelerate bitsandbytes peft torch"
23-
]
20+
"source": "# Install required packages\n!pip install transformers datasets trl bitsandbytes peft"
2421
},
2522
{
2623
"cell_type": "markdown",
@@ -62,69 +59,14 @@
6259
"execution_count": null,
6360
"metadata": {},
6461
"outputs": [],
65-
"source": [
66-
"import torch\n",
67-
"import re\n",
68-
"from transformers import (\n",
69-
" AutoModelForCausalLM, \n",
70-
" AutoTokenizer, \n",
71-
" BitsAndBytesConfig,\n",
72-
")\n",
73-
"from peft import LoraConfig, get_peft_model, TaskType\n",
74-
"from datasets import load_dataset\n",
75-
"from trl import GRPOConfig, GRPOTrainer\n",
76-
"import logging\n",
77-
"\n",
78-
"# Set up logging\n",
79-
"logging.basicConfig(level=logging.INFO)\n",
80-
"logger = logging.getLogger(__name__)"
81-
]
62+
"source": "import torch\nimport re\nfrom transformers import (\n AutoModelForCausalLM, \n AutoTokenizer, \n BitsAndBytesConfig,\n)\nfrom peft import LoraConfig, get_peft_model, TaskType\nfrom datasets import load_dataset\nfrom trl import GRPOConfig, GRPOTrainer\nimport logging\n\n# Set up logging\nlogging.basicConfig(level=logging.INFO)\nlogger = logging.getLogger(__name__)"
8263
},
8364
{
8465
"cell_type": "code",
8566
"execution_count": null,
8667
"metadata": {},
8768
"outputs": [],
88-
"source": [
89-
"# Model configuration\n",
90-
"model_name = \"Qwen/Qwen2.5-3B-Instruct\" # You can change this to any model you prefer\n",
91-
"# Alternative models:\n",
92-
"# model_name = \"microsoft/DialoGPT-small\"\n",
93-
"# model_name = \"gpt2\"\n",
94-
"# model_name = \"google/gemma-2b\"\n",
95-
"\n",
96-
"max_seq_length = 2048\n",
97-
"\n",
98-
"# Quantization config for memory efficiency\n",
99-
"bnb_config = BitsAndBytesConfig(\n",
100-
" load_in_4bit=True,\n",
101-
" bnb_4bit_quant_type=\"nf4\",\n",
102-
" bnb_4bit_compute_dtype=torch.float16,\n",
103-
" bnb_4bit_use_double_quant=True,\n",
104-
")\n",
105-
"\n",
106-
"# Load model and tokenizer with correct device mapping\n",
107-
"# Since CUDA_VISIBLE_DEVICES=\"1\" is set, GPU 1 becomes device 0 from PyTorch's perspective\n",
108-
"model = AutoModelForCausalLM.from_pretrained(\n",
109-
" model_name,\n",
110-
" quantization_config=bnb_config,\n",
111-
" device_map={\"\": 0}, # Use device 0 (which is actually GPU 1 due to CUDA_VISIBLE_DEVICES)\n",
112-
" trust_remote_code=True\n",
113-
")\n",
114-
"\n",
115-
"tokenizer = AutoTokenizer.from_pretrained(\n",
116-
" model_name,\n",
117-
" trust_remote_code=True\n",
118-
")\n",
119-
"\n",
120-
"# Add pad token if it doesn't exist\n",
121-
"if tokenizer.pad_token is None:\n",
122-
" tokenizer.pad_token = tokenizer.eos_token\n",
123-
"\n",
124-
"print(f\"Model loaded: {model_name}\")\n",
125-
"print(f\"Model device: {model.device}\")\n",
126-
"print(f\"Tokenizer vocab size: {len(tokenizer)}\")"
127-
]
69+
"source": "# Model configuration\nmodel_name = \"Qwen/Qwen2.5-3B-Instruct\" # You can change this to any model you prefer\n# Alternative models:\n# model_name = \"microsoft/DialoGPT-small\"\n# model_name = \"gpt2\"\n# model_name = \"google/gemma-2b\"\n\nmax_seq_length = 2048\n\n# Quantization config for memory efficiency\nbnb_config = BitsAndBytesConfig(\n load_in_4bit=True,\n bnb_4bit_quant_type=\"nf4\",\n bnb_4bit_compute_dtype=torch.float16,\n bnb_4bit_use_double_quant=True,\n)\n\n# Load model and tokenizer with correct device mapping\n# Since CUDA_VISIBLE_DEVICES=\"1\" is set, GPU 1 becomes device 0 from PyTorch's perspective\nmodel = AutoModelForCausalLM.from_pretrained(\n model_name,\n quantization_config=bnb_config,\n device_map={\"\": 0}, # Use device 0 (which is actually GPU 1 due to CUDA_VISIBLE_DEVICES)\n trust_remote_code=True\n)\n\ntokenizer = AutoTokenizer.from_pretrained(\n model_name,\n trust_remote_code=True\n)\n\nprint(f\"Model loaded: {model_name}\")\nprint(f\"Model device: {model.device}\")\nprint(f\"Tokenizer vocab size: {len(tokenizer)}\")"
12870
},
12971
{
13072
"cell_type": "markdown",
@@ -341,51 +283,7 @@
341283
"execution_count": null,
342284
"metadata": {},
343285
"outputs": [],
344-
"source": [
345-
"# GRPO Training configuration with enhanced logging\n",
346-
"training_args = GRPOConfig(\n",
347-
" learning_rate=5e-6,\n",
348-
" adam_beta1=0.9,\n",
349-
" adam_beta2=0.99,\n",
350-
" weight_decay=0.1,\n",
351-
" warmup_ratio=0.1,\n",
352-
" lr_scheduler_type=\"cosine\",\n",
353-
" optim=\"adamw_torch_fused\",\n",
354-
" logging_steps=1, # Log every step\n",
355-
" per_device_train_batch_size=2, # Start small to avoid memory issues\n",
356-
" gradient_accumulation_steps=8, # Increase to maintain effective batch size\n",
357-
" num_generations=4, # Reduce to save memory\n",
358-
" max_prompt_length=1024, # Reduce if needed\n",
359-
" max_completion_length=1024, # Reduce if needed\n",
360-
" max_steps=10, # Reduce for testing\n",
361-
" save_steps=10,\n",
362-
" eval_steps=1, # Enable evaluation logging\n",
363-
" max_grad_norm=0.1,\n",
364-
" report_to=\"none\", # Disable reporting to external services\n",
365-
" output_dir=\"./trl_grpo_outputs\",\n",
366-
" logging_dir=\"./logs\", # Directory for logs\n",
367-
" remove_unused_columns=False,\n",
368-
" dataloader_drop_last=True,\n",
369-
" # Enhanced logging options\n",
370-
" log_level=\"info\",\n",
371-
" logging_first_step=True,\n",
372-
" logging_nan_inf_filter=True,\n",
373-
" metric_for_best_model=\"reward\",\n",
374-
" greater_is_better=True,\n",
375-
" # Keep default progress bar enabled\n",
376-
" disable_tqdm=False,\n",
377-
")\n",
378-
"\n",
379-
"print(\"Training configuration with enhanced default progress bar:\")\n",
380-
"print(f\"Batch size: {training_args.per_device_train_batch_size}\")\n",
381-
"print(f\"Gradient accumulation: {training_args.gradient_accumulation_steps}\")\n",
382-
"print(f\"Effective batch size: {training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}\")\n",
383-
"print(f\"Max steps: {training_args.max_steps}\")\n",
384-
"print(f\"Learning rate: {training_args.learning_rate}\")\n",
385-
"print(f\"Logging every: {training_args.logging_steps} steps\")\n",
386-
"print(f\"Evaluation every: {training_args.eval_steps} steps\")\n",
387-
"print(f\"Default tqdm enabled: {not training_args.disable_tqdm}\")"
388-
]
286+
"source": "# GRPO Training configuration with enhanced logging\ntraining_args = GRPOConfig(\n learning_rate=5e-6,\n adam_beta1=0.9,\n adam_beta2=0.99,\n weight_decay=0.1,\n warmup_ratio=0.1,\n lr_scheduler_type=\"cosine\",\n optim=\"adamw_torch_fused\",\n logging_steps=1, # Log every step\n per_device_train_batch_size=2, # Start small to avoid memory issues\n gradient_accumulation_steps=8, # Increase to maintain effective batch size\n max_prompt_length=1024, # Reduce if needed\n max_completion_length=1024, # Reduce if needed\n max_steps=10, # Reduce for testing\n save_steps=10,\n eval_steps=1, # Enable evaluation logging\n max_grad_norm=0.1,\n report_to=\"none\", # Disable reporting to external services\n output_dir=\"./trl_grpo_outputs\",\n logging_dir=\"./logs\", # Directory for logs\n dataloader_drop_last=True,\n # Enhanced logging options\n log_level=\"info\",\n logging_first_step=True,\n logging_nan_inf_filter=True,\n metric_for_best_model=\"reward\",\n greater_is_better=True,\n # Keep default progress bar enabled\n disable_tqdm=False,\n)\n\nprint(\"Training configuration with enhanced default progress bar:\")\nprint(f\"Batch size: {training_args.per_device_train_batch_size}\")\nprint(f\"Gradient accumulation: {training_args.gradient_accumulation_steps}\")\nprint(f\"Effective batch size: {training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}\")\nprint(f\"Max steps: {training_args.max_steps}\")\nprint(f\"Learning rate: {training_args.learning_rate}\")\nprint(f\"Logging every: {training_args.logging_steps} steps\")\nprint(f\"Evaluation every: {training_args.eval_steps} steps\")\nprint(f\"Default tqdm enabled: {not training_args.disable_tqdm}\")"
389287
},
390288
{
391289
"cell_type": "markdown",
@@ -540,37 +438,10 @@
540438
},
541439
{
542440
"cell_type": "code",
543-
"execution_count": 36,
441+
"execution_count": null,
544442
"metadata": {},
545-
"outputs": [
546-
{
547-
"name": "stderr",
548-
"output_type": "stream",
549-
"text": [
550-
"max_steps is given, it will override any value given in num_train_epochs\n",
551-
"Using auto half precision backend\n",
552-
"No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.\n"
553-
]
554-
}
555-
],
556-
"source": [
557-
"# Initialize GRPO trainer with HuggingFace-style interactive table callback\n",
558-
"hf_table_callback = HuggingFaceStyleTableCallback()\n",
559-
"\n",
560-
"trainer = GRPOTrainer(\n",
561-
" model=model,\n",
562-
" processing_class=tokenizer,\n",
563-
" reward_funcs=[\n",
564-
" match_format_exactly,\n",
565-
" match_format_approximately,\n",
566-
" check_answer_correctness,\n",
567-
" check_numbers_extraction,\n",
568-
" ],\n",
569-
" args=training_args,\n",
570-
" train_dataset=dataset,\n",
571-
" callbacks=[hf_table_callback], # Add HuggingFace-style table callback\n",
572-
")"
573-
]
443+
"outputs": [],
444+
"source": "# Initialize GRPO trainer with HuggingFace-style interactive table callback\nhf_table_callback = HuggingFaceStyleTableCallback()\n\ntrainer = GRPOTrainer(\n model=model,\n reward_funcs=[\n match_format_exactly,\n match_format_approximately,\n check_answer_correctness,\n check_numbers_extraction,\n ],\n args=training_args,\n train_dataset=dataset,\n callbacks=[hf_table_callback], # Add HuggingFace-style table callback\n)"
574445
},
575446
{
576447
"cell_type": "code",

0 commit comments

Comments
 (0)