BODA-KONKUK
diff --git a/‎data/VQA_BLIND_KO/VQA_BLIND_KO.md b/‎data/VQA_BLIND_KO/VQA_BLIND_KO.md
diff --git a/‎data/VQA_BLIND_KO/split.ipynb
+134 b/‎data/VQA_BLIND_KO/split.ipynb
+134
diff --git a/‎data/VizWiz-VQA/VizWiz-VQA.md b/‎data/VizWiz-VQA/VizWiz-VQA.md
@@ -0,0 +1,134 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Split the train dataset file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "file_path = 'train.xlsx'\n",
+    "df = pd.read_excel(file_path)\n",
+    "split_n = 10\n",
+    "\n",
+    "rows_per_file = len(df) // split_n  \n",
+    "remaining_rows = len(df) % split_n\n",
+    "\n",
+    "start_row = 0\n",
+    "\n",
+    "for i in range(split_n):\n",
+    "    end_row = start_row + rows_per_file\n",
+    "    if remaining_rows > 0:\n",
+    "        end_row += 1\n",
+    "        remaining_rows -= 1\n",
+    "    split_df = df.iloc[start_row:end_row, [0,2,4,5]]\n",
+    "    split_df.to_excel(f'train_{i+1}.xlsx', index=False)\n",
+    "    start_row = end_row"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Translate the document from Korean to English using \"[Google Translate](\"https://translate.google.co.kr/?sl=auto&tl=ko&op=translate\")\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Merge the files and save in JSON format"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Columns in final_df: Index(['image_id', ' category', ' question', ' answer', 'image'], dtype='object')\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import json\n",
+    "\n",
+    "# 엑셀 파일 읽기\n",
+    "file_path = 'train.xlsx'\n",
+    "train_df = pd.read_excel(file_path)\n",
+    "split_n = 10\n",
+    "\n",
+    "# 병합할 파일 목록\n",
+    "file_list = [f'train_en_{i+1}.xlsx' for i in range(split_n)]\n",
+    "\n",
+    "# 빈 데이터프레임 생성\n",
+    "merged_df = pd.DataFrame()\n",
+    "\n",
+    "# 각 파일을 읽어서 병합\n",
+    "for file in file_list:\n",
+    "    df_en = pd.read_excel(file)\n",
+    "    merged_df = pd.concat([merged_df, df_en], ignore_index=True)\n",
+    "\n",
+    "train_columns = ['image_id', 'image']\n",
+    "\n",
+    "train_df = train_df[train_columns]\n",
+    "final_df = pd.merge(merged_df, train_df, on='image_id', how='left')\n",
+    "final_df = final_df.drop_duplicates()\n",
+    "print(\"Columns in final_df:\", final_df.columns)\n",
+    "\n",
+    "json_list = []\n",
+    "for _, row in final_df.iterrows():\n",
+    "    json_entry = {\n",
+    "        \"image\": row['image'],\n",
+    "        \"question\": row[' question'].strip(),\n",
+    "        \"answers\": [\n",
+    "            {\n",
+    "                \"answer\": str(row[' answer']).strip(),\n",
+    "                \"answer_confidence\": \"yes\"  \n",
+    "            },\n",
+    "        ],\n",
+    "        \"answer_type\": row[' category'].strip(),  # 예제 데이터에 따라 변경 가능\n",
+    "        \"answerable\": 1  # 예제 데이터에 따라 변경 가능\n",
+    "    }\n",
+    "    json_list.append(json_entry)\n",
+    "\n",
+    "# JSON 형식으로 저장\n",
+    "with open('train_en.json', 'w') as json_file:\n",
+    "    json.dump(json_list, json_file, indent=2)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "boda2",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}