From 89e5c05d32e438e03f655a5ddc9a0c278589812f Mon Sep 17 00:00:00 2001 From: liuyifan123123 <2038433131@qq.com> Date: Sun, 24 Nov 2024 23:38:20 +0800 Subject: [PATCH] peft_adalora_sep2 --- .../peft_adalora_seq2seq.ipynb | 259 ++++++++++++------ 1 file changed, 171 insertions(+), 88 deletions(-) diff --git a/llm/peft/adalora/train_adalora_seq2seq/peft_adalora_seq2seq.ipynb b/llm/peft/adalora/train_adalora_seq2seq/peft_adalora_seq2seq.ipynb index 8ec5c3545..3e3670651 100644 --- a/llm/peft/adalora/train_adalora_seq2seq/peft_adalora_seq2seq.ipynb +++ b/llm/peft/adalora/train_adalora_seq2seq/peft_adalora_seq2seq.ipynb @@ -26,7 +26,7 @@ " from .autonotebook import tqdm as notebook_tqdm\n", "Building prefix dict from the default dictionary ...\n", "Loading model from cache /tmp/jieba.cache\n", - "Loading model cost 1.294 seconds.\n", + "Loading model cost 1.297 seconds.\n", "Prefix dict has been built successfully.\n" ] } @@ -37,17 +37,18 @@ "os.environ[\"KMP_DUPLICATE_LIB_OK\"]=\"TRUE\"\n", "import mindspore\n", "from tqdm import tqdm\n", - "from mindnlp.transformers import BartTokenizer, AutoModelForSeq2SeqLM\n", + "from mindnlp.transformers import AutoModelForSeq2SeqLM,AutoTokenizer\n", "import numpy as np\n", "\n", "from mindnlp.peft import AdaLoraConfig, PeftConfig, PeftModel, TaskType, get_peft_model\n", "from mindspore.dataset import GeneratorDataset\n", - "\n", + "from mindnlp.common.optimization import get_linear_schedule_with_warmup\n", + "from mindnlp.core import value_and_grad\n", "if \"RANK_TABLE_FILE\" in os.environ:\n", " del os.environ[\"RANL_TABLE_FILE\"]\n", "\n", "\n", - "# os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n", + "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n", "model_name_or_path = \"facebook/bart-base\"\n", "checkpoint_name = \"financial_sentiment_analysis_lora_v1.pt\"\n", "text_column = \"sentence\"\n", @@ -133,53 +134,82 @@ "cell_type": "code", "execution_count": 5, "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['negative', 'neutral', 'positive']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#加载数据集\n", + "from mindnlp.transformers import AutoTokenizer\n", + "mindspore.dataset.config.set_seed(123)\n", + "dataset = load_dataset(\"financial_phrasebank\", \"sentences_allagree\")\n", + "classes = dataset.source.ds.features[\"label\"].names\n", + "classes" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "[WARNING] ME(13021:281473221177520,MainProcess):2024-10-17-20:25:53.512.70 [mindspore/dataset/engine/datasets.py:1217] Dataset is shuffled before split.\n" + "[WARNING] ME(94915:281473809993904,MainProcess):2024-11-24-22:45:54.630.857 [mindspore/dataset/engine/datasets.py:1217] Dataset is shuffled before split.\n" ] } ], "source": [ - "#加载数据集\n", - "from mindnlp.transformers import AutoTokenizer\n", - "\n", - "dataset = load_dataset(\"financial_phrasebank\", \"sentences_allagree\")\n", "train_dataset, validation_dataset = dataset.shuffle(64).split([0.9, 0.1])" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def add_text_label(sentence, label):\n", + " return sentence, label, classes[label.item()]\n", + "\n", + "train_dataset = train_dataset.map(add_text_label, ['sentence', 'label'], ['sentence', 'label', 'text_label'])\n", + "validation_dataset = validation_dataset.map(add_text_label, ['sentence', 'label'], ['sentence', 'label', 'text_label'])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['negative', 'neutral', 'positive']" + "{'sentence': Tensor(shape=[], dtype=String, value= 'The gross area of the Innova 2 project will be about 10,000 sq m ( 107,600 sq ft ) .'),\n", + " 'label': Tensor(shape=[], dtype=Int64, value= 1),\n", + " 'text_label': Tensor(shape=[], dtype=String, value= 'neutral')}" ] }, - "execution_count": 6, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "#添加文本标签\n", - "classes = dataset.source.ds.features[\"label\"].names\n", - "def add_text_label(sentence, label):\n", - " return sentence, label, classes[label.item()]\n", - "\n", - "train_dataset = train_dataset.map(add_text_label, ['sentence', 'label'], ['sentence', 'label', 'text_label'])\n", - "validation_dataset = validation_dataset.map(add_text_label, ['sentence', 'label'], ['sentence', 'label', 'text_label'])\n", - "classes" + "next(train_dataset.create_dict_iterator())" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -198,7 +228,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -206,6 +236,7 @@ "from mindnlp.dataset import BaseMapFunction\n", "from threading import Lock\n", "lock = Lock()\n", + "\n", "class MapFunc(BaseMapFunction):\n", " def __call__(self, sentence, label, text_label):\n", " lock.acquire()\n", @@ -214,56 +245,113 @@ " lock.release()\n", " labels = labels['input_ids']\n", " labels = np.where(np.equal(labels, tokenizer.pad_token_id), -100, labels)\n", - " return model_inputs['input_ids'], model_inputs['attention_mask'], labels" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ + " return model_inputs['input_ids'], model_inputs['attention_mask'], labels\n", "\n", - "def get_dataset(dataset, tokenizer, batch_size=None, shuffle=True):\n", + "\n", + "def get_dataset(dataset, tokenizer, shuffle=True):\n", " input_colums=['sentence', 'label', 'text_label']\n", " output_columns=['input_ids', 'attention_mask', 'labels']\n", " dataset = dataset.map(MapFunc(input_colums, output_columns),\n", " input_colums, output_columns)\n", " if shuffle:\n", " dataset = dataset.shuffle(64)\n", - " if batch_size:\n", - " dataset = dataset.batch(batch_size)\n", + " dataset = dataset.batch(batch_size)\n", " return dataset\n", "\n", - "train_dataset = get_dataset(train_dataset, tokenizer,batch_size=batch_size)\n", - "eval_dataset = get_dataset(validation_dataset, tokenizer, batch_size=batch_size,shuffle=False)" + "train_dataset = get_dataset(train_dataset, tokenizer)\n", + "eval_dataset = get_dataset(validation_dataset, tokenizer, shuffle=False)" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'input_ids': Tensor(shape=[8, 128], dtype=Int64, value=\n", + " [[ 0, 133, 4200 ... 1, 1, 1],\n", + " [ 0, 20839, 42 ... 1, 1, 1],\n", + " [ 0, 133, 2771 ... 1, 1, 1],\n", + " ...\n", + " [ 0, 487, 17202 ... 1, 1, 1],\n", + " [ 0, 37591, 1633 ... 1, 1, 1],\n", + " [ 0, 133, 4939 ... 1, 1, 1]]),\n", + " 'attention_mask': Tensor(shape=[8, 128], dtype=Int64, value=\n", + " [[1, 1, 1 ... 0, 0, 0],\n", + " [1, 1, 1 ... 0, 0, 0],\n", + " [1, 1, 1 ... 0, 0, 0],\n", + " ...\n", + " [1, 1, 1 ... 0, 0, 0],\n", + " [1, 1, 1 ... 0, 0, 0],\n", + " [1, 1, 1 ... 0, 0, 0]]),\n", + " 'labels': Tensor(shape=[8, 3], dtype=Int64, value=\n", + " [[ 0, 12516, 2],\n", + " [ 0, 12516, 2],\n", + " [ 0, 12516, 2],\n", + " ...\n", + " [ 0, 12516, 2],\n", + " [ 0, 22173, 2],\n", + " [ 0, 12516, 2]])}" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "next(train_dataset.create_dict_iterator())" + ] + }, + { + "cell_type": "code", + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ - "from mindnlp.common.optimization import get_linear_schedule_with_warmup\n", + "# from mindnlp.build.lib.mindnlp.common.optimization import get_linear_schedule_with_warmup\n", "from mindnlp.core import optim\n", + "\n", "# Setting up optimizer and learning rate scheduler\n", - "optimizer = optim.AdamW(model.trainable_params(), lr=1e-3)\n", + "optimizer = optim.AdamW(model.trainable_params(), lr=lr)\n", "lr_scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=(len(train_dataset) * num_epochs))" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ - "model.base_model.peft_config[\"default\"].total_step = len(train_dataset) * num_epochs" + "# model.base_model.peft_config[\"default\"].total_step = len(train_dataset) * num_epochs\n", + "# model" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# Forward function to compute the loss\n", + "# from Model.mindnlp.build.lib.mindnlp.core.autograd.function import value_and_grad\n", + "def forward_fn(**batch):\n", + " outputs = model(\n", + " **batch\n", + " )\n", + " loss = outputs.loss\n", + " return loss\n", + "\n", + "# Gradient function to compute gradients for optimization\n", + "grad_fn = value_and_grad(forward_fn, model.trainable_params(),has_aux=False,attach_grads=True)\n", + "# Define the training step function#" + ] + }, + { + "cell_type": "code", + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -284,8 +372,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 255/255 [03:15<00:00, 1.31it/s]\n", - " 3%|▎ | 1/29 [00:01<00:39, 1.41s/it]" + "100%|██████████| 255/255 [02:48<00:00, 1.51it/s]\n", + " 7%|▋ | 2/29 [00:01<00:17, 1.52it/s]" ] }, { @@ -299,119 +387,119 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 29/29 [00:07<00:00, 3.84it/s]\n" + "100%|██████████| 29/29 [00:06<00:00, 4.76it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "epoch=0: train_ppl=Tensor(shape=[], dtype=Float32, value= 3.03945) train_epoch_loss=Tensor(shape=[], dtype=Float32, value= 1.11168) eval_ppl=Tensor(shape=[], dtype=Float32, value= 1.1222) eval_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.115295)\n" + "epoch=0: train_ppl=Tensor(shape=[], dtype=Float32, value= 3.05763) train_epoch_loss=Tensor(shape=[], dtype=Float32, value= 1.11764) eval_ppl=Tensor(shape=[], dtype=Float32, value= 1.15011) eval_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.139858)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 255/255 [02:44<00:00, 1.55it/s]\n", - "100%|██████████| 29/29 [00:05<00:00, 5.02it/s]\n" + "100%|██████████| 255/255 [02:50<00:00, 1.49it/s]\n", + "100%|██████████| 29/29 [00:07<00:00, 3.82it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "epoch=1: train_ppl=Tensor(shape=[], dtype=Float32, value= 1.14435) train_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.134838) eval_ppl=Tensor(shape=[], dtype=Float32, value= 1.04685) eval_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.0457841)\n" + "epoch=1: train_ppl=Tensor(shape=[], dtype=Float32, value= 1.14799) train_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.138016) eval_ppl=Tensor(shape=[], dtype=Float32, value= 1.05501) eval_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.0535532)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 255/255 [02:52<00:00, 1.48it/s]\n", - "100%|██████████| 29/29 [00:05<00:00, 4.88it/s]\n" + "100%|██████████| 255/255 [03:17<00:00, 1.29it/s]\n", + "100%|██████████| 29/29 [00:06<00:00, 4.17it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "epoch=2: train_ppl=Tensor(shape=[], dtype=Float32, value= 1.08269) train_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.0794451) eval_ppl=Tensor(shape=[], dtype=Float32, value= 1.02456) eval_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.0242597)\n" + "epoch=2: train_ppl=Tensor(shape=[], dtype=Float32, value= 1.1036) train_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.0985806) eval_ppl=Tensor(shape=[], dtype=Float32, value= 1.04416) eval_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.0432154)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 255/255 [02:50<00:00, 1.50it/s]\n", - "100%|██████████| 29/29 [00:05<00:00, 5.15it/s]\n" + "100%|██████████| 255/255 [02:58<00:00, 1.43it/s]\n", + "100%|██████████| 29/29 [00:07<00:00, 4.00it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "epoch=3: train_ppl=Tensor(shape=[], dtype=Float32, value= 1.0693) train_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.0670007) eval_ppl=Tensor(shape=[], dtype=Float32, value= 1.01534) eval_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.0152213)\n" + "epoch=3: train_ppl=Tensor(shape=[], dtype=Float32, value= 1.18729) train_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.171672) eval_ppl=Tensor(shape=[], dtype=Float32, value= 1.22589) eval_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.203668)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 255/255 [02:50<00:00, 1.50it/s]\n", - "100%|██████████| 29/29 [00:07<00:00, 4.05it/s]\n" + "100%|██████████| 255/255 [03:10<00:00, 1.34it/s]\n", + "100%|██████████| 29/29 [00:07<00:00, 3.75it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "epoch=4: train_ppl=Tensor(shape=[], dtype=Float32, value= 1.05028) train_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.0490523) eval_ppl=Tensor(shape=[], dtype=Float32, value= 1.00823) eval_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.00819443)\n" + "epoch=4: train_ppl=Tensor(shape=[], dtype=Float32, value= 1.20757) train_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.188607) eval_ppl=Tensor(shape=[], dtype=Float32, value= 1.12162) eval_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.114774)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 255/255 [02:59<00:00, 1.42it/s]\n", - "100%|██████████| 29/29 [00:06<00:00, 4.69it/s]\n" + "100%|██████████| 255/255 [03:17<00:00, 1.29it/s]\n", + "100%|██████████| 29/29 [00:07<00:00, 3.83it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "epoch=5: train_ppl=Tensor(shape=[], dtype=Float32, value= 1.05887) train_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.0572002) eval_ppl=Tensor(shape=[], dtype=Float32, value= 1.01062) eval_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.0105656)\n" + "epoch=5: train_ppl=Tensor(shape=[], dtype=Float32, value= 1.12998) train_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.122197) eval_ppl=Tensor(shape=[], dtype=Float32, value= 1.09629) eval_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.091934)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 255/255 [02:46<00:00, 1.53it/s]\n", - "100%|██████████| 29/29 [00:05<00:00, 5.04it/s]\n" + "100%|██████████| 255/255 [03:09<00:00, 1.35it/s]\n", + "100%|██████████| 29/29 [00:07<00:00, 3.98it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "epoch=6: train_ppl=Tensor(shape=[], dtype=Float32, value= 1.033) train_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.0324655) eval_ppl=Tensor(shape=[], dtype=Float32, value= 1.01462) eval_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.0145114)\n" + "epoch=6: train_ppl=Tensor(shape=[], dtype=Float32, value= 1.09778) train_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.093289) eval_ppl=Tensor(shape=[], dtype=Float32, value= 1.08209) eval_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.0788912)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 255/255 [02:47<00:00, 1.52it/s]\n", - "100%|██████████| 29/29 [00:05<00:00, 5.27it/s]" + "100%|██████████| 255/255 [03:18<00:00, 1.29it/s]\n", + "100%|██████████| 29/29 [00:07<00:00, 3.93it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "epoch=7: train_ppl=Tensor(shape=[], dtype=Float32, value= 1.02977) train_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.0293341) eval_ppl=Tensor(shape=[], dtype=Float32, value= 1.00767) eval_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.00763729)\n" + "epoch=7: train_ppl=Tensor(shape=[], dtype=Float32, value= 1.09743) train_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.0929676) eval_ppl=Tensor(shape=[], dtype=Float32, value= 1.08905) eval_epoch_loss=Tensor(shape=[], dtype=Float32, value= 0.0853012)\n" ] }, { @@ -423,16 +511,6 @@ } ], "source": [ - "from mindnlp.core import value_and_grad\n", - "# Forward function to compute the loss\n", - "def forward_fn(**batch):\n", - " outputs = model(**batch)\n", - " loss = outputs.loss\n", - " return loss\n", - "\n", - "# Gradient function to compute gradients for optimization\n", - "grad_fn = value_and_grad(forward_fn, model.trainable_params(), attach_grads=True)\n", - "\n", "from mindspore import ops\n", "global_step = 0\n", "for epoch in range(num_epochs):\n", @@ -440,13 +518,14 @@ " total_loss = 0\n", " train_total_size = train_dataset.get_dataset_size()\n", " # Iterate over each entry in the training dataset\n", - " for step, batch in enumerate(tqdm(train_dataset.create_dict_iterator(), total=train_total_size)): \n", + " for step, batch in enumerate(tqdm(train_dataset.create_dict_iterator(), total=train_total_size)):\n", " optimizer.zero_grad()\n", " loss = grad_fn(**batch)\n", " optimizer.step()\n", - " total_loss += loss.float() # Accumulate loss for monitoring\n", - " lr_scheduler.step() # Update learning rate based on scheduler\n", - " # model.base_model.update_and_allocate(global_step,grads)\n", + " total_loss += loss.float()\n", + " lr_scheduler.step()\n", + " # model.base_model.update_and_allocate(global_step)\n", + " \n", " global_step += 1\n", " model.set_train(False)\n", " eval_loss = 0\n", @@ -470,16 +549,16 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "accuracy=42.92035398230089 % on the evaluation dataset\n", - "eval_preds[:10]=['neutral', 'neutral', 'neutral', 'negative', 'neutral', 'neutral', 'positive', 'neutral', 'positive', 'neutral']\n", - "ground_truth[:10]=['neutral', 'negative', 'positive', 'neutral', 'negative', 'neutral', 'neutral', 'positive', 'neutral', 'positive']\n" + "accuracy=97.34513274336283 % on the evaluation dataset\n", + "eval_preds[:10]=['neutral', 'neutral', 'neutral', 'neutral', 'positive', 'positive', 'neutral', 'positive', 'positive', 'positive']\n", + "ground_truth[:10]=['neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'positive', 'positive', 'positive']\n" ] } ], @@ -510,7 +589,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -521,7 +600,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -542,7 +621,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -559,6 +638,10 @@ ], "source": [ "# Retrieve an entry from the validation dataset.\n", + "# example = next(validation_dataset.create_dict_iterator(output_numpy=True)) # Get an example entry from the validation dataset\n", + "# print(example['sentence'])\n", + "# print(example['text_label'])\n", + "\n", "# Alternatively, create your own text\n", "example = {'sentence': 'Nvidia Tops $3 Trillion in Market Value, Leapfrogging Apple.'}\n", "\n",