+{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"collapsed":true,"execution":{"iopub.execute_input":"2024-06-23T03:03:11.320620Z","iopub.status.busy":"2024-06-23T03:03:11.320205Z","iopub.status.idle":"2024-06-23T03:04:10.665693Z","shell.execute_reply":"2024-06-23T03:04:10.664546Z","shell.execute_reply.started":"2024-06-23T03:03:11.320587Z"},"jupyter":{"outputs_hidden":true},"trusted":true},"outputs":[],"source":["!pip install -q git+https://github.com/huggingface/peft.git transformers bitsandbytes datasets accelerate\n","!pip install -i https://pypi.org/simple/ bitsandbytes\n","!pip install evaluate"]},{"cell_type":"code","execution_count":2,"metadata":{"execution":{"iopub.execute_input":"2024-06-23T03:04:10.668360Z","iopub.status.busy":"2024-06-23T03:04:10.668005Z","iopub.status.idle":"2024-06-23T03:04:10.673669Z","shell.execute_reply":"2024-06-23T03:04:10.672611Z","shell.execute_reply.started":"2024-06-23T03:04:10.668325Z"},"trusted":true},"outputs":[],"source":["import warnings\n","warnings.filterwarnings(action='ignore')"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2024-06-23T03:04:10.675163Z","iopub.status.busy":"2024-06-23T03:04:10.674893Z","iopub.status.idle":"2024-06-23T03:07:36.130169Z","shell.execute_reply":"2024-06-23T03:07:36.129386Z","shell.execute_reply.started":"2024-06-23T03:04:10.675139Z"},"trusted":true},"outputs":[],"source":["from datasets import load_dataset\n","import torch\n","from PIL import Image\n","from torch.utils.data import DataLoader\n","from tqdm import tqdm\n","import pickle\n","from transformers import AutoProcessor, Blip2ForConditionalGeneration\n","from peft import prepare_model_for_kbit_training\n","\n","processor = AutoProcessor.from_pretrained(\"Salesforce/blip2-opt-2.7b\")\n","model = Blip2ForConditionalGeneration.from_pretrained(\"Salesforce/blip2-opt-2.7b\", \n"," device_map=\"auto\", )\n"," \n","from peft import LoraConfig, get_peft_model\n","\n","config = LoraConfig(\n"," r=16,\n"," lora_alpha=32,\n"," lora_dropout=0.05,\n"," bias=\"none\",\n"," target_modules=[\"q_proj\", \"k_proj\"]\n",")\n","\n","model = prepare_model_for_kbit_training(model)\n","model = get_peft_model(model, config)\n","model.print_trainable_parameters()"]},{"cell_type":"code","execution_count":4,"metadata":{"execution":{"iopub.execute_input":"2024-06-23T03:07:36.133994Z","iopub.status.busy":"2024-06-23T03:07:36.132833Z","iopub.status.idle":"2024-06-23T03:07:50.988005Z","shell.execute_reply":"2024-06-23T03:07:50.986990Z","shell.execute_reply.started":"2024-06-23T03:07:36.133965Z"},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["loading dataset into memory...\n","0:00:06.067868\n","Training sets: 491479->20000 - Validating set: 70211->2000\n"]}],"source":["import os\n","from shutil import copyfile\n","\n","# setting for kaggle format\n","lib_PATH = '/kaggle/input/vizwiz-dataset'\n","\n","from os import listdir\n","from os.path import isfile, join\n","lib_files = [f for f in listdir(lib_PATH) if isfile(join(lib_PATH, f))]\n","\n","for lib_f in lib_files:\n"," copyfile(src = os.path.join(lib_PATH, lib_f), \n"," dst = os.path.join(\"../working\", lib_f))\n","\n","# import all our functions\n","from preprocessing import *\n","from prepare_data import *\n","from vqa import *\n","\n","#-------------------------------download VIZWIZ dataset--------------------------#\n","\n","vizwiz_data, VIZWIZ_TRAIN_PATH, VIZWIZ_VALIDATION_PATH = load_dataset_vizwiz(\"/kaggle/input/vizwiz\")\n","\n","vizwiz_train_dataset = VQADataset(dataset=vizwiz_data['train'],\n"," processor=processor,\n"," img_path=VIZWIZ_TRAIN_PATH)\n","vizwiz_valid_dataset = VQADataset(dataset=vizwiz_data['valid'],\n"," processor=processor,\n"," img_path=VIZWIZ_VALIDATION_PATH)\n","\n","\n","#-------------------------------download KVQA dataset--------------------------#\n","\n","kvqa_data, KVQA_TRAIN_PATH, KVQA_VALIDATION_PATH = load_dataset_kvqa(\"/kaggle/input/vqa-blind-ko\")\n","\n","kvqa_train_dataset = VQADataset(dataset=kvqa_data['train'],\n"," processor=processor,\n"," img_path=KVQA_TRAIN_PATH)\n","kvqa_valid_dataset = VQADataset(dataset=kvqa_data['valid'],\n"," processor=processor,\n"," img_path=KVQA_VALIDATION_PATH)"]},{"cell_type":"code","execution_count":5,"metadata":{"execution":{"iopub.execute_input":"2024-06-23T03:07:50.989585Z","iopub.status.busy":"2024-06-23T03:07:50.989036Z","iopub.status.idle":"2024-06-23T03:08:14.742234Z","shell.execute_reply":"2024-06-23T03:08:14.741077Z","shell.execute_reply.started":"2024-06-23T03:07:50.989557Z"},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m If you're specifying your api key in code, ensure this code is not shared publicly.\n","\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Consider setting the WANDB_API_KEY environment variable, or running `wandb login` from the command line.\n","\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n"]},{"name":"stderr","output_type":"stream","text":["\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33msooh-j\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"]},{"data":{"text/html":["wandb version 0.17.2 is available! To upgrade, please run:\n"," $ pip install wandb --upgrade"],"text/plain":["<IPython.core.display.HTML object>"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Tracking run with wandb version 0.17.0"],"text/plain":["<IPython.core.display.HTML object>"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Run data is saved locally in <code>/kaggle/working/wandb/run-20240623_030757-3xm8wtzb</code>"],"text/plain":["<IPython.core.display.HTML object>"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Syncing run <strong><a href='https://wandb.ai/sooh-j/finetuning_kvqa_vizwiz_QA/runs/3xm8wtzb' target=\"_blank\">fresh-surf-7</a></strong> to <a href='https://wandb.ai/sooh-j/finetuning_kvqa_vizwiz_QA' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"],"text/plain":["<IPython.core.display.HTML object>"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View project at <a href='https://wandb.ai/sooh-j/finetuning_kvqa_vizwiz_QA' target=\"_blank\">https://wandb.ai/sooh-j/finetuning_kvqa_vizwiz_QA</a>"],"text/plain":["<IPython.core.display.HTML object>"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View run at <a href='https://wandb.ai/sooh-j/finetuning_kvqa_vizwiz_QA/runs/3xm8wtzb' target=\"_blank\">https://wandb.ai/sooh-j/finetuning_kvqa_vizwiz_QA/runs/3xm8wtzb</a>"],"text/plain":["<IPython.core.display.HTML object>"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<button onClick=\"this.nextSibling.style.display='block';this.style.display='none';\">Display W&B run</button><iframe src='https://wandb.ai/sooh-j/finetuning_kvqa_vizwiz_QA/runs/3xm8wtzb?jupyter=true' style='border:none;width:100%;height:420px;display:none;'></iframe>"],"text/plain":["<wandb.sdk.wandb_run.Run at 0x7acf31b6a770>"]},"execution_count":5,"metadata":{},"output_type":"execute_result"}],"source":["!python -c \"from huggingface_hub.hf_api import HfFolder; \\\n"," HfFolder.save_token('__YOUR/HUGGINGFACE/TOKEN/HERE__')\"\n","!wandb login __YOUR/WANDB/TOKEN/HERE__\n","\n","import wandb\n","\n","wandb.init(\n"," project=\"finetuning_kvqa_vizwiz_QA\",\n",")"]},{"cell_type":"code","execution_count":6,"metadata":{"execution":{"iopub.execute_input":"2024-06-23T03:08:14.744383Z","iopub.status.busy":"2024-06-23T03:08:14.744035Z","iopub.status.idle":"2024-06-23T03:08:15.555853Z","shell.execute_reply":"2024-06-23T03:08:15.554690Z","shell.execute_reply.started":"2024-06-23T03:08:14.744349Z"},"trusted":true},"outputs":[],"source":["from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score\n","from transformers import TrainingArguments\n","\n","def compute_metrics(pred):\n"," labels = pred.label_ids\n"," preds = pred.predictions.argmax(-1)\n"," precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')\n"," acc = accuracy_score(labels, preds)\n"," return {\n"," 'accuracy': acc,\n"," 'f1': f1,\n"," 'precision': precision,\n"," 'recall': recall,\n"," }\n","\n","vizwiz_training_args = TrainingArguments(\n"," output_dir='../results', # output directory\n"," num_train_epochs=1, # total number of training epochs\n"," per_device_train_batch_size=1, # batch size per device during training\n"," per_device_eval_batch_size=1, # batch size for evaluation\n"," warmup_steps=200, # number of warmup steps for learning rate scheduler\n"," logging_dir='../logs', # directory for storing logs\n"," logging_steps=2000, # How often to print logs\n"," do_train=True, # Perform trainingwandb\n"," fp16=True, # Use mixed precision\n"," fp16_opt_level=\"02\", # mixed precision mode\n"," run_name=\"blip-2_QA_finetuning-VIZWIZ\", # experiment name\n"," seed=3, # Seed for experiment reproducibility 3x3,\n",")\n","\n","KVQA_training_args = TrainingArguments(\n"," output_dir='../results', # output directory\n"," num_train_epochs=1, # total number of training epochs\n"," per_device_train_batch_size=1, # batch size per device during training\n"," per_device_eval_batch_size=1, # batch size for evaluation\n"," warmup_steps=200, # number of warmup steps for learning rate scheduler\n"," logging_dir='../logs', # directory for storing logs\n"," logging_steps=2000, # How often to print logs\n"," do_train=True, # Perform trainingwandb\n"," fp16=True, # Use mixed precision\n"," fp16_opt_level=\"02\", # mixed precision mode\n"," run_name=\"blip-2_QA_finetuning-KVQA\", # experiment name\n"," seed=3, # Seed for experiment reproducibility 3x3,\n"," save_safetensors=False\n",")"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["#-------------------------------finetuning with VIZWIZ dataset--------------------------#\n","\n","#initialize Trainer\n","vizwiz_trainer = Trainer(\n"," model=model,\n"," args=vizwiz_training_args,\n"," train_dataset=vizwiz_train_dataset,\n"," eval_dataset=vizwiz_valid_dataset,\n"," compute_metrics=compute_metrics,\n",")\n","\n","vizwiz_trainer.train()\n","\n","import torch\n","import torch.nn as nn\n","\n","model_vizwiz = model.merge_and_unload()\n","# push to the hub\n","model_vizwiz.push_to_hub(\"VQA-for-blind\")"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2024-06-23T03:08:15.558027Z","iopub.status.busy":"2024-06-23T03:08:15.557635Z","iopub.status.idle":"2024-06-23T03:08:20.980969Z","shell.execute_reply":"2024-06-23T03:08:20.979362Z","shell.execute_reply.started":"2024-06-23T03:08:15.557990Z"},"trusted":true},"outputs":[],"source":["#-------------------------------finetuning with KVQA dataset--------------------------#\n","\n","#initialize Trainer\n","kvqa_trainer = Trainer(\n"," model=model,\n"," args=KVQA_training_args,\n"," train_dataset=kvqa_train_dataset,\n"," eval_dataset=kvqa_valid_dataset,\n"," compute_metrics=compute_metrics,\n",")\n","\n","kvqa_trainer.train()\n","\n","model = model.merge_and_unload()\n","\n","model.push_to_hub(\"VQA-for-blind\")"]}],"metadata":{"kaggle":{"accelerator":"none","dataSources":[{"datasetId":2310141,"sourceId":3887986,"sourceType":"datasetVersion"},{"datasetId":4884402,"sourceId":8751960,"sourceType":"datasetVersion"},{"datasetId":5253046,"sourceId":8747346,"sourceType":"datasetVersion"}],"dockerImageVersionId":30732,"isGpuEnabled":false,"isInternetEnabled":true,"language":"python","sourceType":"notebook"},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.13"}},"nbformat":4,"nbformat_minor":4}
0 commit comments