From d69c84adf939c351a775e487aeac6bbfd81ab6df Mon Sep 17 00:00:00 2001 From: Alemax067 <2657236382@qq.com> Date: Sat, 28 Dec 2024 02:23:35 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E3=80=90=E5=BC=80=E6=BA=90=E5=AE=9E?= =?UTF-8?q?=E4=B9=A0=E3=80=91blip=E6=A8=A1=E5=9E=8B=E5=BE=AE=E8=B0=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- llm/finetune/blip/README.md | 49 ++ llm/finetune/blip/blip_finetune.ipynb | 982 ++++++++++++++++++++++++ llm/finetune/blip/results_visible.ipynb | 94 +++ 3 files changed, 1125 insertions(+) create mode 100644 llm/finetune/blip/README.md create mode 100644 llm/finetune/blip/blip_finetune.ipynb create mode 100644 llm/finetune/blip/results_visible.ipynb diff --git a/llm/finetune/blip/README.md b/llm/finetune/blip/README.md new file mode 100644 index 000000000..b96261c15 --- /dev/null +++ b/llm/finetune/blip/README.md @@ -0,0 +1,49 @@ +# FineTune BLIP +- reference [repo](https://github.com/eeshashetty/captionary-api) + +## Requirements +- python 3.9 +- mindspore 2.3.1 +- mindnlp 0.4.1 + +## args for training the model +- args.device_target : Ascend +- args.device_id +- args.model_name_or_path : 'Salesforce/blip-image-captioning-base' or the path to the model +- args.dataset_name_or_path : 'eeshclusive/captionary-dataset' or the path to the data directory +- args.batch_size : batch size +- args.max_eps : maximum number of epochs +- args.save_path : path to save the model, if not provided the model will not be saved, such as './outputs/' + +## Results +### my results on mindspore +20 epochs: +- train loss: 0.0132 +- val loss: 0.0126 + +requirements: +- Ascend 910B +- Python 3.9 +- MindSpore 2.3.1 +- MindNLP 0.4.1 + +### my results on pytorch +10 epochs: +- train loss: 0.0135 +- val loss: 0.0125 + +requirements: +- GPU 1080ti +- CUDA 11.1.1 +- Python 3.9 +- Pytorch 1.10.2 +- Transformers 4.45.2 + +### Original results from the repo +20 epochs: +- train loss: 1.3579 +- val loss: 1.3584 + +### 其他 +- 训练loss可视化见results_visible.ipynb文件 +- 愿仓库的损失不知为何特别高,复现时训练参数保持一致,但pytorch开启了混合精度,而mindnlp暂不支持,所以pytorch训练收敛的更快一些 \ No newline at end of file diff --git a/llm/finetune/blip/blip_finetune.ipynb b/llm/finetune/blip/blip_finetune.ipynb new file mode 100644 index 000000000..54f58edcd --- /dev/null +++ b/llm/finetune/blip/blip_finetune.ipynb @@ -0,0 +1,982 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ddaf1c40", + "metadata": {}, + "source": [ + "## requirements\n", + "### mindspore==2.3.1\n", + "### mindnlp==0.4.1" + ] + }, + { + "cell_type": "markdown", + "id": "5c8be3d0", + "metadata": {}, + "source": [ + "导入所需库" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "bb9816da", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "Building prefix dict from the default dictionary ...\n", + "Dumping model to file cache /tmp/jieba.cache\n", + "Loading model cost 1.383 seconds.\n", + "Prefix dict has been built successfully.\n" + ] + } + ], + "source": [ + "import time\n", + "from tqdm import tqdm\n", + "\n", + "import mindspore\n", + "import mindspore.numpy as np\n", + "from mindspore.dataset import GeneratorDataset\n", + "from mindspore import save_checkpoint\n", + "\n", + "from mindnlp.transformers import AutoProcessor, BlipForConditionalGeneration\n", + "from mindnlp.core.optim import Adam\n", + "from mindnlp.core import value_and_grad\n", + "\n", + "from datasets import load_dataset" + ] + }, + { + "cell_type": "markdown", + "id": "6c29932c", + "metadata": {}, + "source": [ + "数据集加载" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "54a601b4", + "metadata": {}, + "outputs": [], + "source": [ + "class ImageCaptioningDataset():\n", + " def __init__(self, dataset, processor):\n", + " self.dataset = dataset\n", + " self.processor = processor\n", + "\n", + " def __len__(self):\n", + " return len(self.dataset)\n", + "\n", + " def __getitem__(self, idx):\n", + " if not isinstance(idx, int):\n", + " idx = int(idx)\n", + " item = self.dataset[idx]\n", + " encoding = self.processor(images=item['image'], text=item['text'], padding=\"max_length\")\n", + " return np.asarray(encoding[\"pixel_values\"]), np.asarray(encoding[\"input_ids\"]), np.asarray(encoding[\"attention_mask\"])\n", + "\n", + "def get_loader(dataset, processor, batch_size, shuffle=True, num_workers=1, drop_remainder=True):\n", + " dataset = ImageCaptioningDataset(dataset, processor)\n", + " return GeneratorDataset(source=dataset, \n", + " column_names=[\"pixel_values\", \"input_ids\", \"attention_mask\"],\n", + " shuffle=shuffle,\n", + " num_parallel_workers=num_workers\n", + " ).batch(batch_size=batch_size, \n", + " drop_remainder=drop_remainder)" + ] + }, + { + "cell_type": "markdown", + "id": "9a4c4ee5", + "metadata": {}, + "source": [ + "自定义Trainer类" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "38a964f9", + "metadata": {}, + "outputs": [], + "source": [ + "class Trainer:\n", + " def __init__(self, net, optimizer, args,\n", + " train_dataset, eval_dataset=None\n", + " ):\n", + " self.net = net\n", + " self.opt = optimizer\n", + " self.args = args\n", + " self.train_dataset = train_dataset\n", + " self.weights = self.net.trainable_params()\n", + " self.value_and_grad = value_and_grad(fn=self.forward_fn, params_or_argnums=self.weights)\n", + " self.run_eval = eval_dataset is not None\n", + " if self.run_eval:\n", + " self.eval_dataset = eval_dataset\n", + " \n", + " def forward_fn(self, input_ids, pixel_values, attention_mask):\n", + " outputs = self.net(input_ids=input_ids, pixel_values=pixel_values, attention_mask=attention_mask, labels=input_ids)\n", + " loss = outputs.loss\n", + " return loss\n", + "\n", + " def train_single(self, input_ids, pixel_values, attention_mask):\n", + " self.opt.zero_grad()\n", + " loss = self.value_and_grad(input_ids, pixel_values, attention_mask)\n", + " self.opt.step()\n", + " return loss\n", + "\n", + " def train(self, epochs):\n", + " best_val_loss = float('inf')\n", + " for epoch in range(0, epochs):\n", + " print(\"\\nEpoch {}/{}\".format(epoch+1, epochs))\n", + " self.net.set_train(True)\n", + " tloss = 0\n", + " step = 0\n", + " for batch in tqdm(self.train_dataset.create_dict_iterator()):\n", + " input_ids = batch[\"input_ids\"]\n", + " pixel_values = batch[\"pixel_values\"].squeeze(1)\n", + " attention_mask = batch[\"attention_mask\"]\n", + " \n", + " loss = self.train_single(input_ids, pixel_values, attention_mask)\n", + " \n", + " tloss = tloss + loss.asnumpy()\n", + " step = step + 1\n", + " \n", + " tloss /= step\n", + " print(\"\\tTrain Loss {:.04f}\".format(tloss))\n", + " \n", + " if self.run_eval:\n", + " self.net.set_train(False)\n", + " val_loss = self.val()\n", + " print(\"Epoch {} complete! Validation Loss : {}\".format(epoch + 1, val_loss))\n", + " if val_loss < best_val_loss:\n", + " print(\"Best validation Loss improved from {} to {}\".format(best_val_loss, val_loss))\n", + " best_val_loss = val_loss\n", + " if self.args.save_path is not None:\n", + " print(\"saving model...\")\n", + " save_checkpoint(self.net, self.args.save_path + 'best_model.ckpt')\n", + " \n", + " def val(self):\n", + " vloss = 0\n", + " step = 0\n", + " with mindspore._no_grad():\n", + " for batch in tqdm(self.eval_dataset.create_dict_iterator()):\n", + " input_ids = batch[\"input_ids\"]\n", + " pixel_values = batch[\"pixel_values\"].squeeze(1)\n", + " attention_mask = batch[\"attention_mask\"]\n", + "\n", + " outputs = self.net(input_ids=input_ids, pixel_values=pixel_values, attention_mask=attention_mask, labels=input_ids)\n", + " loss = outputs.loss\n", + " \n", + " vloss = vloss + loss.asnumpy()\n", + " step = step + 1\n", + "\n", + " return vloss / step" + ] + }, + { + "cell_type": "markdown", + "id": "782bd6c0", + "metadata": {}, + "source": [ + "主函数入口,完整训练流程" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "20138545", + "metadata": {}, + "outputs": [], + "source": [ + "def main(args): \n", + " #load the blip model\n", + " print(\"Building model! (This might take time if you are running this for first time)\")\n", + " st = time.time()\n", + " mindspore.set_context(device_target=args.device_target, device_id=args.device_id, pynative_synchronize=True)\n", + " processor = AutoProcessor.from_pretrained(args.model_name_or_path)\n", + " model = BlipForConditionalGeneration.from_pretrained(args.model_name_or_path)\n", + " print(\"Done in {} seconds\".format(time.time() - st))\n", + "\n", + " print(\"Creating optimizer objects\")\n", + " st = time.time()\n", + " optimizer = Adam(model.trainable_params(), lr=5e-5)\n", + " print(\"Done in {} seconds\".format(time.time() - st))\n", + "\n", + " #Creating dataloaders\n", + " print(\"Creating train and val dataloaders\")\n", + " st = time.time()\n", + " data = load_dataset(args.dataset_name_or_path)\n", + " train_loader = get_loader(data['train'], processor, args.batch_size, shuffle=True, drop_remainder=True)\n", + " val_loader = get_loader(data['test'], processor, args.batch_size, shuffle=True, drop_remainder=False)\n", + " print(\"Done in {} seconds\".format(time.time() - st))\n", + "\n", + " print(\"Let the training begin\")\n", + " st = time.time()\n", + " trainer = Trainer(net=model, optimizer=optimizer, args=args, train_dataset=train_loader, eval_dataset=val_loader)\n", + " trainer.train(epochs=args.max_eps)\n", + " print(\"Done in {} seconds\".format(time.time() - st))" + ] + }, + { + "cell_type": "markdown", + "id": "cf64a755", + "metadata": {}, + "source": [ + "设置训练参数,开始训练" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "01fecad1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Building model! (This might take time if you are running this for first time)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.9/site-packages/mindnlp/transformers/tokenization_utils_base.py:1526: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted, and will be then set to `False` by default. \n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[MS_ALLOC_CONF]Runtime config: enable_vmm:True vmm_align_size:2MB\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "BlipTextLMHeadModel has generative capabilities, as `prepare_inputs_for_generation` is explicitly overwritten. However, it doesn't directly inherit from `GenerationMixin`.`PreTrainedModel` will NOT inherit from `GenerationMixin`, and this model will lose the ability to call `generate` and other related functions.\n", + " - If you are the owner of the model architecture code, please modify your model class such that it inherits from `GenerationMixin` (after `PreTrainedModel`, otherwise you'll get an exception).\n", + " - If you are not the owner of the model architecture class, please contact the model code owner to update it.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Done in 17.581424474716187 seconds\n", + "Creating optimizer objects\n", + "Done in 0.0065310001373291016 seconds\n", + "Creating train and val dataloaders\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generating train split: 100%|██████████| 162/162 [00:00<00:00, 440.00 examples/s]\n", + "Generating test split: 100%|██████████| 51/51 [00:00<00:00, 728.11 examples/s]\n", + "[WARNING] ME(200:281472890875920,MainProcess):2024-12-28-00:27:44.642.659 [mindspore/dataset/engine/datasets_user_defined.py:796] Input 'source' of 'GeneratorDataset' includes network computing operators like in mindspore.nn, mindspore.ops, mindspore.numpy module and etc, which do not support multi-thread compiling, recommend to replace it with python implemented operator like numpy etc. Here decrease 'num_parallel_workers' into 1.\n", + "[WARNING] ME(200:281472890875920,MainProcess):2024-12-28-00:27:44.647.061 [mindspore/dataset/engine/datasets_user_defined.py:796] Input 'source' of 'GeneratorDataset' includes network computing operators like in mindspore.nn, mindspore.ops, mindspore.numpy module and etc, which do not support multi-thread compiling, recommend to replace it with python implemented operator like numpy etc. Here decrease 'num_parallel_workers' into 1.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Done in 15.54231882095337 seconds\n", + "Let the training begin\n", + "\n", + "Epoch 1/20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "0it [00:00, ?it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "40it [01:41, 2.54s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\tTrain Loss 7.3443\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "13it [00:04, 2.77it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1 complete! Validation Loss : 4.915086085979755\n", + "Best validation Loss improved from inf to 4.915086085979755\n", + "\n", + "Epoch 2/20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "40it [00:49, 1.24s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\tTrain Loss 3.2319\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "13it [00:04, 2.89it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 2 complete! Validation Loss : 1.8268253069657545\n", + "Best validation Loss improved from 4.915086085979755 to 1.8268253069657545\n", + "\n", + "Epoch 3/20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "40it [00:48, 1.22s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\tTrain Loss 1.1534\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "13it [00:04, 2.81it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 3 complete! Validation Loss : 0.5436725112108084\n", + "Best validation Loss improved from 1.8268253069657545 to 0.5436725112108084\n", + "\n", + "Epoch 4/20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "40it [00:48, 1.21s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\tTrain Loss 0.3363\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "13it [00:04, 2.92it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 4 complete! Validation Loss : 0.20180132755866417\n", + "Best validation Loss improved from 0.5436725112108084 to 0.20180132755866417\n", + "\n", + "Epoch 5/20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "40it [00:52, 1.31s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\tTrain Loss 0.1522\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "13it [00:04, 2.79it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 5 complete! Validation Loss : 0.1140028633750402\n", + "Best validation Loss improved from 0.20180132755866417 to 0.1140028633750402\n", + "\n", + "Epoch 6/20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "40it [00:50, 1.27s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\tTrain Loss 0.0940\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "13it [00:04, 2.75it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 6 complete! Validation Loss : 0.07747195661067963\n", + "Best validation Loss improved from 0.1140028633750402 to 0.07747195661067963\n", + "\n", + "Epoch 7/20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "40it [00:50, 1.26s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\tTrain Loss 0.0668\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "13it [00:04, 2.74it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 7 complete! Validation Loss : 0.05752776018702067\n", + "Best validation Loss improved from 0.07747195661067963 to 0.05752776018702067\n", + "\n", + "Epoch 8/20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "40it [00:51, 1.29s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\tTrain Loss 0.0514\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "13it [00:04, 2.92it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 8 complete! Validation Loss : 0.045433574284498505\n", + "Best validation Loss improved from 0.05752776018702067 to 0.045433574284498505\n", + "\n", + "Epoch 9/20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "40it [00:50, 1.27s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\tTrain Loss 0.0413\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "13it [00:04, 2.77it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 9 complete! Validation Loss : 0.03752241713496355\n", + "Best validation Loss improved from 0.045433574284498505 to 0.03752241713496355\n", + "\n", + "Epoch 10/20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "40it [00:50, 1.25s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\tTrain Loss 0.0345\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "13it [00:04, 2.94it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 10 complete! Validation Loss : 0.03150226190113104\n", + "Best validation Loss improved from 0.03752241713496355 to 0.03150226190113104\n", + "\n", + "Epoch 11/20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "40it [00:49, 1.24s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\tTrain Loss 0.0294\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "13it [00:04, 2.91it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 11 complete! Validation Loss : 0.027369202186281864\n", + "Best validation Loss improved from 0.03150226190113104 to 0.027369202186281864\n", + "\n", + "Epoch 12/20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "40it [00:49, 1.23s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\tTrain Loss 0.0258\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "13it [00:04, 2.65it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 12 complete! Validation Loss : 0.024082990936361827\n", + "Best validation Loss improved from 0.027369202186281864 to 0.024082990936361827\n", + "\n", + "Epoch 13/20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "40it [00:48, 1.21s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\tTrain Loss 0.0230\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "13it [00:04, 2.76it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 13 complete! Validation Loss : 0.021563996345951006\n", + "Best validation Loss improved from 0.024082990936361827 to 0.021563996345951006\n", + "\n", + "Epoch 14/20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "40it [00:50, 1.26s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\tTrain Loss 0.0206\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "13it [00:04, 2.79it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 14 complete! Validation Loss : 0.019490097291194476\n", + "Best validation Loss improved from 0.021563996345951006 to 0.019490097291194476\n", + "\n", + "Epoch 15/20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "40it [00:50, 1.26s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\tTrain Loss 0.0188\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "13it [00:04, 2.95it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 15 complete! Validation Loss : 0.018077760504988525\n", + "Best validation Loss improved from 0.019490097291194476 to 0.018077760504988525\n", + "\n", + "Epoch 16/20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "40it [00:48, 1.22s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\tTrain Loss 0.0172\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "13it [00:04, 2.78it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 16 complete! Validation Loss : 0.01667449616182309\n", + "Best validation Loss improved from 0.018077760504988525 to 0.01667449616182309\n", + "\n", + "Epoch 17/20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "40it [00:48, 1.21s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\tTrain Loss 0.0160\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "13it [00:04, 2.77it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 17 complete! Validation Loss : 0.015317266162198324\n", + "Best validation Loss improved from 0.01667449616182309 to 0.015317266162198324\n", + "\n", + "Epoch 18/20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "40it [00:48, 1.21s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\tTrain Loss 0.0149\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "13it [00:04, 2.72it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 18 complete! Validation Loss : 0.014371497556567192\n", + "Best validation Loss improved from 0.015317266162198324 to 0.014371497556567192\n", + "\n", + "Epoch 19/20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "40it [00:49, 1.24s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\tTrain Loss 0.0139\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "13it [00:04, 2.84it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 19 complete! Validation Loss : 0.013473815069748806\n", + "Best validation Loss improved from 0.014371497556567192 to 0.013473815069748806\n", + "\n", + "Epoch 20/20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "40it [00:47, 1.19s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\tTrain Loss 0.0132\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "13it [00:04, 2.86it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 20 complete! Validation Loss : 0.012598874477239756\n", + "Best validation Loss improved from 0.013473815069748806 to 0.012598874477239756\n", + "Done in 1139.0716316699982 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "from types import SimpleNamespace\n", + "\n", + "args = SimpleNamespace()\n", + "args.device_target = 'Ascend'\n", + "args.device_id = 0\n", + "args.model_name_or_path = 'Salesforce/blip-image-captioning-base'\n", + "args.dataset_name_or_path = 'eeshclusive/captionary-dataset'\n", + "args.batch_size = 4\n", + "args.max_eps = 20\n", + "args.save_path = None\n", + "\n", + "main(args)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/llm/finetune/blip/results_visible.ipynb b/llm/finetune/blip/results_visible.ipynb new file mode 100644 index 000000000..c0b823016 --- /dev/null +++ b/llm/finetune/blip/results_visible.ipynb @@ -0,0 +1,94 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "train_loss_orig = [8.1256, 3.1276, 1.4304, 1.3708, 1.3636, 1.3608, 1.3595, 1.3589, 1.3585, 1.3583, 1.3582, 1.3581, 1.3580, 1.3580, 1.3580, 1.3579, 1.3579, 1.3579, 1.3579, 1.3579]\n", + "val_loss_orig = [5.3221, 1.5943, 1.3788, 1.3662, 1.3631, 1.3605, 1.3596, 1.3591, 1.3589, 1.3586, 1.3586, 1.3585, 1.3585, 1.3584, 1.3584, 1.3584, 1.3584, 1.3584, 1.3584, 1.3584]\n", + "\n", + "train_loss_torch = [8.1981, 2.1381, 0.1474, 0.0526, 0.0347, 0.0258, 0.0207, 0.0175, 0.0151, 0.0135, 0.0121, 0.0111, 0.0104, 0.0097, 0.0092, 0.0088, 0.0082, 0.0078, 0.0077, 0.0073]\n", + "val_loss_torch = [4.6963, 0.3585, 0.0691, 0.0406, 0.0290, 0.0228, 0.0187, 0.0163, 0.0140, 0.0125, 0.0115, 0.0105, 0.0098, 0.0095, 0.0090, 0.0084, 0.0078, 0.0078, 0.0076, 0.0073]\n", + "\n", + "train_loss_mindspore = [7.3443, 3.2319, 1.1534, 0.3363, 0.1522, 0.094, 0.0668, 0.0514, 0.0413, 0.0345, 0.0294, 0.0258, 0.023, 0.0206, 0.0188, 0.0172, 0.016, 0.0149, 0.0139, 0.0132]\n", + "val_loss_mindspore = [4.9151, 1.8268, 0.5437, 0.2018, 0.114, 0.0775, 0.0575, 0.0454, 0.0375, 0.0315, 0.0274, 0.0241, 0.0216, 0.0195, 0.0181, 0.0167, 0.0153, 0.0144, 0.0135, 0.0126]\n", + "\n", + "import matplotlib.pyplot as plt\n", + "def plot_losses(epochs_to_plot):\n", + " epochs = range(1, len(train_loss_orig) + 1)\n", + " plt.figure(figsize=(10, 10))\n", + "\n", + " # Plot for original losses\n", + " plt.subplot(2, 2, 1)\n", + " plt.plot(epochs[:epochs_to_plot], train_loss_orig[:epochs_to_plot], 'r-', label='Train Loss Orig')\n", + " plt.plot(epochs[:epochs_to_plot], val_loss_orig[:epochs_to_plot], 'g-', label='Val Loss Orig')\n", + " plt.xlabel('Epochs')\n", + " plt.ylabel('Loss')\n", + " plt.title('Original Losses')\n", + " plt.legend()\n", + " plt.ylim(0, 9) # 设置纵坐标范围\n", + "\n", + " # Plot for torch losses\n", + " plt.subplot(2, 2, 2)\n", + " plt.plot(epochs[:epochs_to_plot], train_loss_torch[:epochs_to_plot], 'r-', label='Train Loss Torch')\n", + " plt.plot(epochs[:epochs_to_plot], val_loss_torch[:epochs_to_plot], 'g-', label='Val Loss Torch')\n", + " plt.xlabel('Epochs')\n", + " plt.ylabel('Loss')\n", + " plt.title('Torch Losses')\n", + " plt.legend()\n", + " plt.ylim(0, 9) # 设置纵坐标范围\n", + "\n", + " # Plot for mindspore losses\n", + " plt.subplot(2, 2, 3)\n", + " plt.plot(epochs[:epochs_to_plot], train_loss_mindspore[:epochs_to_plot], 'r-', label='Train Loss Mindspore')\n", + " plt.plot(epochs[:epochs_to_plot], val_loss_mindspore[:epochs_to_plot], 'g-', label='Val Loss Mindspore')\n", + " plt.xlabel('Epochs')\n", + " plt.ylabel('Loss')\n", + " plt.title('Mindspore Losses')\n", + " plt.legend()\n", + " plt.ylim(0, 9) # 设置纵坐标范围\n", + "\n", + " plt.tight_layout()\n", + " plt.show()\n", + "\n", + "# Example usage:\n", + "plot_losses(10)\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "llm_torch251", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.15" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 634c8dae29f4f51fd557c550c0984c8227b3a4f4 Mon Sep 17 00:00:00 2001 From: Alemax067 <2657236382@qq.com> Date: Mon, 24 Feb 2025 15:56:14 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E5=88=A0=E9=99=A4=E5=A4=9A=E4=BD=99?= =?UTF-8?q?=E7=A9=BA=E6=A0=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- llm/finetune/blip/blip_finetune.ipynb | 16 ++++++++-------- llm/finetune/blip/results_visible.ipynb | 5 ++--- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/llm/finetune/blip/blip_finetune.ipynb b/llm/finetune/blip/blip_finetune.ipynb index 54f58edcd..0d74744a9 100644 --- a/llm/finetune/blip/blip_finetune.ipynb +++ b/llm/finetune/blip/blip_finetune.ipynb @@ -103,7 +103,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "38a964f9", "metadata": {}, "outputs": [], @@ -121,7 +121,7 @@ " self.run_eval = eval_dataset is not None\n", " if self.run_eval:\n", " self.eval_dataset = eval_dataset\n", - " \n", + "\n", " def forward_fn(self, input_ids, pixel_values, attention_mask):\n", " outputs = self.net(input_ids=input_ids, pixel_values=pixel_values, attention_mask=attention_mask, labels=input_ids)\n", " loss = outputs.loss\n", @@ -144,15 +144,15 @@ " input_ids = batch[\"input_ids\"]\n", " pixel_values = batch[\"pixel_values\"].squeeze(1)\n", " attention_mask = batch[\"attention_mask\"]\n", - " \n", + "\n", " loss = self.train_single(input_ids, pixel_values, attention_mask)\n", - " \n", + "\n", " tloss = tloss + loss.asnumpy()\n", " step = step + 1\n", - " \n", + "\n", " tloss /= step\n", " print(\"\\tTrain Loss {:.04f}\".format(tloss))\n", - " \n", + "\n", " if self.run_eval:\n", " self.net.set_train(False)\n", " val_loss = self.val()\n", @@ -163,7 +163,7 @@ " if self.args.save_path is not None:\n", " print(\"saving model...\")\n", " save_checkpoint(self.net, self.args.save_path + 'best_model.ckpt')\n", - " \n", + "\n", " def val(self):\n", " vloss = 0\n", " step = 0\n", @@ -175,7 +175,7 @@ "\n", " outputs = self.net(input_ids=input_ids, pixel_values=pixel_values, attention_mask=attention_mask, labels=input_ids)\n", " loss = outputs.loss\n", - " \n", + "\n", " vloss = vloss + loss.asnumpy()\n", " step = step + 1\n", "\n", diff --git a/llm/finetune/blip/results_visible.ipynb b/llm/finetune/blip/results_visible.ipynb index c0b823016..1767c3ebb 100644 --- a/llm/finetune/blip/results_visible.ipynb +++ b/llm/finetune/blip/results_visible.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -65,8 +65,7 @@ " plt.show()\n", "\n", "# Example usage:\n", - "plot_losses(10)\n", - "\n" + "plot_losses(10)" ] } ],