|
17 | 17 | "execution_count": null,
|
18 | 18 | "metadata": {},
|
19 | 19 | "outputs": [],
|
20 |
| - "source": [ |
21 |
| - "# Install required packages\n", |
22 |
| - "!pip install transformers datasets trl accelerate bitsandbytes peft torch" |
23 |
| - ] |
| 20 | + "source": "# Install required packages\n!pip install transformers datasets trl bitsandbytes peft" |
24 | 21 | },
|
25 | 22 | {
|
26 | 23 | "cell_type": "markdown",
|
|
62 | 59 | "execution_count": null,
|
63 | 60 | "metadata": {},
|
64 | 61 | "outputs": [],
|
65 |
| - "source": [ |
66 |
| - "import torch\n", |
67 |
| - "import re\n", |
68 |
| - "from transformers import (\n", |
69 |
| - " AutoModelForCausalLM, \n", |
70 |
| - " AutoTokenizer, \n", |
71 |
| - " BitsAndBytesConfig,\n", |
72 |
| - ")\n", |
73 |
| - "from peft import LoraConfig, get_peft_model, TaskType\n", |
74 |
| - "from datasets import load_dataset\n", |
75 |
| - "from trl import GRPOConfig, GRPOTrainer\n", |
76 |
| - "import logging\n", |
77 |
| - "\n", |
78 |
| - "# Set up logging\n", |
79 |
| - "logging.basicConfig(level=logging.INFO)\n", |
80 |
| - "logger = logging.getLogger(__name__)" |
81 |
| - ] |
| 62 | + "source": "import torch\nimport re\nfrom transformers import (\n AutoModelForCausalLM, \n AutoTokenizer, \n BitsAndBytesConfig,\n)\nfrom peft import LoraConfig, get_peft_model, TaskType\nfrom datasets import load_dataset\nfrom trl import GRPOConfig, GRPOTrainer\nimport logging\n\n# Set up logging\nlogging.basicConfig(level=logging.INFO)\nlogger = logging.getLogger(__name__)" |
82 | 63 | },
|
83 | 64 | {
|
84 | 65 | "cell_type": "code",
|
85 | 66 | "execution_count": null,
|
86 | 67 | "metadata": {},
|
87 | 68 | "outputs": [],
|
88 |
| - "source": [ |
89 |
| - "# Model configuration\n", |
90 |
| - "model_name = \"Qwen/Qwen2.5-3B-Instruct\" # You can change this to any model you prefer\n", |
91 |
| - "# Alternative models:\n", |
92 |
| - "# model_name = \"microsoft/DialoGPT-small\"\n", |
93 |
| - "# model_name = \"gpt2\"\n", |
94 |
| - "# model_name = \"google/gemma-2b\"\n", |
95 |
| - "\n", |
96 |
| - "max_seq_length = 2048\n", |
97 |
| - "\n", |
98 |
| - "# Quantization config for memory efficiency\n", |
99 |
| - "bnb_config = BitsAndBytesConfig(\n", |
100 |
| - " load_in_4bit=True,\n", |
101 |
| - " bnb_4bit_quant_type=\"nf4\",\n", |
102 |
| - " bnb_4bit_compute_dtype=torch.float16,\n", |
103 |
| - " bnb_4bit_use_double_quant=True,\n", |
104 |
| - ")\n", |
105 |
| - "\n", |
106 |
| - "# Load model and tokenizer with correct device mapping\n", |
107 |
| - "# Since CUDA_VISIBLE_DEVICES=\"1\" is set, GPU 1 becomes device 0 from PyTorch's perspective\n", |
108 |
| - "model = AutoModelForCausalLM.from_pretrained(\n", |
109 |
| - " model_name,\n", |
110 |
| - " quantization_config=bnb_config,\n", |
111 |
| - " device_map={\"\": 0}, # Use device 0 (which is actually GPU 1 due to CUDA_VISIBLE_DEVICES)\n", |
112 |
| - " trust_remote_code=True\n", |
113 |
| - ")\n", |
114 |
| - "\n", |
115 |
| - "tokenizer = AutoTokenizer.from_pretrained(\n", |
116 |
| - " model_name,\n", |
117 |
| - " trust_remote_code=True\n", |
118 |
| - ")\n", |
119 |
| - "\n", |
120 |
| - "# Add pad token if it doesn't exist\n", |
121 |
| - "if tokenizer.pad_token is None:\n", |
122 |
| - " tokenizer.pad_token = tokenizer.eos_token\n", |
123 |
| - "\n", |
124 |
| - "print(f\"Model loaded: {model_name}\")\n", |
125 |
| - "print(f\"Model device: {model.device}\")\n", |
126 |
| - "print(f\"Tokenizer vocab size: {len(tokenizer)}\")" |
127 |
| - ] |
| 69 | + "source": "# Model configuration\nmodel_name = \"Qwen/Qwen2.5-3B-Instruct\" # You can change this to any model you prefer\n# Alternative models:\n# model_name = \"microsoft/DialoGPT-small\"\n# model_name = \"gpt2\"\n# model_name = \"google/gemma-2b\"\n\nmax_seq_length = 2048\n\n# Quantization config for memory efficiency\nbnb_config = BitsAndBytesConfig(\n load_in_4bit=True,\n bnb_4bit_quant_type=\"nf4\",\n bnb_4bit_compute_dtype=torch.float16,\n bnb_4bit_use_double_quant=True,\n)\n\n# Load model and tokenizer with correct device mapping\n# Since CUDA_VISIBLE_DEVICES=\"1\" is set, GPU 1 becomes device 0 from PyTorch's perspective\nmodel = AutoModelForCausalLM.from_pretrained(\n model_name,\n quantization_config=bnb_config,\n device_map={\"\": 0}, # Use device 0 (which is actually GPU 1 due to CUDA_VISIBLE_DEVICES)\n trust_remote_code=True\n)\n\ntokenizer = AutoTokenizer.from_pretrained(\n model_name,\n trust_remote_code=True\n)\n\nprint(f\"Model loaded: {model_name}\")\nprint(f\"Model device: {model.device}\")\nprint(f\"Tokenizer vocab size: {len(tokenizer)}\")" |
128 | 70 | },
|
129 | 71 | {
|
130 | 72 | "cell_type": "markdown",
|
|
341 | 283 | "execution_count": null,
|
342 | 284 | "metadata": {},
|
343 | 285 | "outputs": [],
|
344 |
| - "source": [ |
345 |
| - "# GRPO Training configuration with enhanced logging\n", |
346 |
| - "training_args = GRPOConfig(\n", |
347 |
| - " learning_rate=5e-6,\n", |
348 |
| - " adam_beta1=0.9,\n", |
349 |
| - " adam_beta2=0.99,\n", |
350 |
| - " weight_decay=0.1,\n", |
351 |
| - " warmup_ratio=0.1,\n", |
352 |
| - " lr_scheduler_type=\"cosine\",\n", |
353 |
| - " optim=\"adamw_torch_fused\",\n", |
354 |
| - " logging_steps=1, # Log every step\n", |
355 |
| - " per_device_train_batch_size=2, # Start small to avoid memory issues\n", |
356 |
| - " gradient_accumulation_steps=8, # Increase to maintain effective batch size\n", |
357 |
| - " num_generations=4, # Reduce to save memory\n", |
358 |
| - " max_prompt_length=1024, # Reduce if needed\n", |
359 |
| - " max_completion_length=1024, # Reduce if needed\n", |
360 |
| - " max_steps=10, # Reduce for testing\n", |
361 |
| - " save_steps=10,\n", |
362 |
| - " eval_steps=1, # Enable evaluation logging\n", |
363 |
| - " max_grad_norm=0.1,\n", |
364 |
| - " report_to=\"none\", # Disable reporting to external services\n", |
365 |
| - " output_dir=\"./trl_grpo_outputs\",\n", |
366 |
| - " logging_dir=\"./logs\", # Directory for logs\n", |
367 |
| - " remove_unused_columns=False,\n", |
368 |
| - " dataloader_drop_last=True,\n", |
369 |
| - " # Enhanced logging options\n", |
370 |
| - " log_level=\"info\",\n", |
371 |
| - " logging_first_step=True,\n", |
372 |
| - " logging_nan_inf_filter=True,\n", |
373 |
| - " metric_for_best_model=\"reward\",\n", |
374 |
| - " greater_is_better=True,\n", |
375 |
| - " # Keep default progress bar enabled\n", |
376 |
| - " disable_tqdm=False,\n", |
377 |
| - ")\n", |
378 |
| - "\n", |
379 |
| - "print(\"Training configuration with enhanced default progress bar:\")\n", |
380 |
| - "print(f\"Batch size: {training_args.per_device_train_batch_size}\")\n", |
381 |
| - "print(f\"Gradient accumulation: {training_args.gradient_accumulation_steps}\")\n", |
382 |
| - "print(f\"Effective batch size: {training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}\")\n", |
383 |
| - "print(f\"Max steps: {training_args.max_steps}\")\n", |
384 |
| - "print(f\"Learning rate: {training_args.learning_rate}\")\n", |
385 |
| - "print(f\"Logging every: {training_args.logging_steps} steps\")\n", |
386 |
| - "print(f\"Evaluation every: {training_args.eval_steps} steps\")\n", |
387 |
| - "print(f\"Default tqdm enabled: {not training_args.disable_tqdm}\")" |
388 |
| - ] |
| 286 | + "source": "# GRPO Training configuration with enhanced logging\ntraining_args = GRPOConfig(\n learning_rate=5e-6,\n adam_beta1=0.9,\n adam_beta2=0.99,\n weight_decay=0.1,\n warmup_ratio=0.1,\n lr_scheduler_type=\"cosine\",\n optim=\"adamw_torch_fused\",\n logging_steps=1, # Log every step\n per_device_train_batch_size=2, # Start small to avoid memory issues\n gradient_accumulation_steps=8, # Increase to maintain effective batch size\n max_prompt_length=1024, # Reduce if needed\n max_completion_length=1024, # Reduce if needed\n max_steps=10, # Reduce for testing\n save_steps=10,\n eval_steps=1, # Enable evaluation logging\n max_grad_norm=0.1,\n report_to=\"none\", # Disable reporting to external services\n output_dir=\"./trl_grpo_outputs\",\n logging_dir=\"./logs\", # Directory for logs\n dataloader_drop_last=True,\n # Enhanced logging options\n log_level=\"info\",\n logging_first_step=True,\n logging_nan_inf_filter=True,\n metric_for_best_model=\"reward\",\n greater_is_better=True,\n # Keep default progress bar enabled\n disable_tqdm=False,\n)\n\nprint(\"Training configuration with enhanced default progress bar:\")\nprint(f\"Batch size: {training_args.per_device_train_batch_size}\")\nprint(f\"Gradient accumulation: {training_args.gradient_accumulation_steps}\")\nprint(f\"Effective batch size: {training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}\")\nprint(f\"Max steps: {training_args.max_steps}\")\nprint(f\"Learning rate: {training_args.learning_rate}\")\nprint(f\"Logging every: {training_args.logging_steps} steps\")\nprint(f\"Evaluation every: {training_args.eval_steps} steps\")\nprint(f\"Default tqdm enabled: {not training_args.disable_tqdm}\")" |
389 | 287 | },
|
390 | 288 | {
|
391 | 289 | "cell_type": "markdown",
|
|
540 | 438 | },
|
541 | 439 | {
|
542 | 440 | "cell_type": "code",
|
543 |
| - "execution_count": 36, |
| 441 | + "execution_count": null, |
544 | 442 | "metadata": {},
|
545 |
| - "outputs": [ |
546 |
| - { |
547 |
| - "name": "stderr", |
548 |
| - "output_type": "stream", |
549 |
| - "text": [ |
550 |
| - "max_steps is given, it will override any value given in num_train_epochs\n", |
551 |
| - "Using auto half precision backend\n", |
552 |
| - "No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.\n" |
553 |
| - ] |
554 |
| - } |
555 |
| - ], |
556 |
| - "source": [ |
557 |
| - "# Initialize GRPO trainer with HuggingFace-style interactive table callback\n", |
558 |
| - "hf_table_callback = HuggingFaceStyleTableCallback()\n", |
559 |
| - "\n", |
560 |
| - "trainer = GRPOTrainer(\n", |
561 |
| - " model=model,\n", |
562 |
| - " processing_class=tokenizer,\n", |
563 |
| - " reward_funcs=[\n", |
564 |
| - " match_format_exactly,\n", |
565 |
| - " match_format_approximately,\n", |
566 |
| - " check_answer_correctness,\n", |
567 |
| - " check_numbers_extraction,\n", |
568 |
| - " ],\n", |
569 |
| - " args=training_args,\n", |
570 |
| - " train_dataset=dataset,\n", |
571 |
| - " callbacks=[hf_table_callback], # Add HuggingFace-style table callback\n", |
572 |
| - ")" |
573 |
| - ] |
| 443 | + "outputs": [], |
| 444 | + "source": "# Initialize GRPO trainer with HuggingFace-style interactive table callback\nhf_table_callback = HuggingFaceStyleTableCallback()\n\ntrainer = GRPOTrainer(\n model=model,\n reward_funcs=[\n match_format_exactly,\n match_format_approximately,\n check_answer_correctness,\n check_numbers_extraction,\n ],\n args=training_args,\n train_dataset=dataset,\n callbacks=[hf_table_callback], # Add HuggingFace-style table callback\n)" |
574 | 445 | },
|
575 | 446 | {
|
576 | 447 | "cell_type": "code",
|
|
0 commit comments