Skip to content

Commit

Permalink
Implemented vggish
Browse files Browse the repository at this point in the history
Also cleaned up some stuff in the other embedding scripts and RF script
  • Loading branch information
BenUCL committed Aug 30, 2023
1 parent 08e0d72 commit 39ad7a8
Show file tree
Hide file tree
Showing 16 changed files with 1,053 additions and 308 deletions.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,44 +1,111 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"cell_type": "markdown",
"metadata": {},
"outputs": [],
"source": [
"# import torch\n",
"# import torch.nn as nn\n",
"# import torchvision.models as models\n",
"# from torch.utils.data import DataLoader"
"# VGGish\n",
"\n",
"Script to extract embeddings from audio using VGGish. \n",
"\n",
"Note this is far slower than the other embedding scripts as its not using the gpu."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2023-08-30 21:28:37.542298: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
"To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
"2023-08-30 21:28:42.233539: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
]
}
],
"source": [
"import tensorflow as tf\n",
"import tensorflow_hub as hub\n",
"import numpy as np\n",
"import csv\n",
"\n",
"import matplotlib.pyplot as plt\n",
"from IPython.display import Audio\n",
"from scipy.io import wavfile\n",
"\n",
"# Importing necessary modules\n",
"import json\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2023-08-30 21:28:51.360074: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.\n",
"Skipping registering GPU devices...\n"
]
}
],
"source": [
"# load VGGish\n",
"model = hub.load('https://tfhub.dev/google/vggish/1')\n",
"\n",
"### needs this placeholder for some reason\n",
"# Input: 3 seconds of silence as mono 16 kHz waveform samples.\n",
"waveform = np.zeros(3 * 16000, dtype=np.float32)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# which dataset to use\n",
"test_dataset = 'test_bermuda'\n",
"\n",
"# Load the JSON file\n",
"# path where json file of data is stored\n",
"json_path = '/home/ben/reef-audio-representation-learning/data/dataset.json'\n",
"with open(json_path, 'r') as f:\n",
" dataset_json = json.load(f)"
"\n",
"# path to the audio files\n",
"dataset_path = '/home/ben/data/full_dataset/'\n",
"\n",
"# path to the results folder, where the csv if embeddings will be saved\n",
"results_path = '/home/ben/reef-audio-representation-learning/code/simclr-pytorch-reefs/evaluation/embeddings/raw_embeddings/'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Find the right data"
]
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# open the json\n",
"with open(json_path, 'r') as f:\n",
" dataset_json = json.load(f)\n",
" \n",
"# Initialize an empty list to store the filtered entries\n",
"filtered_entries = []\n",
"\n",
"# Filter entries based on 'data_type' and 'dataset'\n",
"for entry in dataset_json['audio']:\n",
" if entry['data_type'] == 'test_data' and entry['dataset'] == 'test_australia':\n",
" if entry['data_type'] == 'test_data' and entry['dataset'] == test_dataset:\n",
" # Convert the 'class' to numeric\n",
" numeric_class = int(entry['class'].replace('class', ''))\n",
" \n",
Expand All @@ -49,15 +116,254 @@
" }\n",
" \n",
" # Append the filtered entry to the list\n",
" filtered_entries.append(filtered_entry)"
" filtered_entries.append(filtered_entry) #list objest with dictionaries of {file_name: file, class}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Get embeddings"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def ensure_sample_rate(original_sample_rate, waveform,\n",
" desired_sample_rate=16000):\n",
" \"\"\"Resample waveform if required.\"\"\"\n",
" if original_sample_rate != desired_sample_rate:\n",
" desired_length = int(round(float(len(waveform)) /\n",
" original_sample_rate * desired_sample_rate))\n",
" waveform = scipy.signal.resample(waveform, desired_length)\n",
" return desired_sample_rate, waveform"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# Initialize an empty list to store the embeddings\n",
"all_embeddings = []\n",
"\n",
"# Initialize an empty list to store the rows for DataFrame\n",
"df_rows = []\n",
"\n",
"# Loop through each filtered entry to read and process the WAV file\n",
"for entry in filtered_entries:\n",
" wav_file_name = dataset_path + entry['file_name']\n",
" \n",
" # Read the WAV file\n",
" sample_rate, wav_data = wavfile.read(wav_file_name, 'rb')\n",
" \n",
" # Ensure sample rate\n",
" sample_rate, wav_data = ensure_sample_rate(sample_rate, wav_data)\n",
" \n",
" # Pad wav_data with 280 extra zeros\n",
" wav_data = np.pad(wav_data, (0, 280), 'constant')\n",
" \n",
" # Compute the embeddings\n",
" embeddings = model(wav_data)\n",
" \n",
" # Assert the shape of the embeddings\n",
" embeddings.shape.assert_is_compatible_with([None, 128])\n",
"\n",
" # convert embeddings to a numpy array\n",
" second_1 = np.array(embeddings[0])\n",
" second_2 = np.array(embeddings[1])\n",
"\n",
" # take mean of the array for each 1sec, so we average features over the 2 seconds\n",
" mean = np.mean([second_1, second_2], axis=0)\n",
" \n",
" # Create a row for DataFrame\n",
" df_row = {'label': entry['class']}\n",
" for i, feature in enumerate(mean): # Assuming embeddings[0] contains the 128 features\n",
" df_row[f'Feature_{i+1}'] = feature\n",
" \n",
" df_rows.append(df_row)\n",
"\n",
"# Create a DataFrame\n",
"df = pd.DataFrame(df_rows)\n",
"\n",
"# Save the DataFrame to a CSV file\n",
"df.to_csv(results_path + 'VGGish-' + test_dataset[5:] + '-embeddings.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>label</th>\n",
" <th>Feature_1</th>\n",
" <th>Feature_2</th>\n",
" <th>Feature_3</th>\n",
" <th>Feature_4</th>\n",
" <th>Feature_5</th>\n",
" <th>Feature_6</th>\n",
" <th>Feature_7</th>\n",
" <th>Feature_8</th>\n",
" <th>Feature_9</th>\n",
" <th>...</th>\n",
" <th>Feature_119</th>\n",
" <th>Feature_120</th>\n",
" <th>Feature_121</th>\n",
" <th>Feature_122</th>\n",
" <th>Feature_123</th>\n",
" <th>Feature_124</th>\n",
" <th>Feature_125</th>\n",
" <th>Feature_126</th>\n",
" <th>Feature_127</th>\n",
" <th>Feature_128</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>-0.755880</td>\n",
" <td>-0.239144</td>\n",
" <td>-0.006482</td>\n",
" <td>-0.660316</td>\n",
" <td>-0.661326</td>\n",
" <td>-1.564038</td>\n",
" <td>0.189483</td>\n",
" <td>-0.150790</td>\n",
" <td>-2.337072</td>\n",
" <td>...</td>\n",
" <td>-0.306457</td>\n",
" <td>0.085061</td>\n",
" <td>-0.065240</td>\n",
" <td>-0.174579</td>\n",
" <td>-0.748717</td>\n",
" <td>-0.202958</td>\n",
" <td>-0.170341</td>\n",
" <td>-0.619031</td>\n",
" <td>0.144040</td>\n",
" <td>0.159795</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>-0.569910</td>\n",
" <td>-0.196253</td>\n",
" <td>-0.012757</td>\n",
" <td>-0.733111</td>\n",
" <td>-0.702112</td>\n",
" <td>-1.603721</td>\n",
" <td>0.293776</td>\n",
" <td>-0.188705</td>\n",
" <td>-2.214564</td>\n",
" <td>...</td>\n",
" <td>-0.384656</td>\n",
" <td>0.058515</td>\n",
" <td>-0.087278</td>\n",
" <td>-0.202737</td>\n",
" <td>-0.680734</td>\n",
" <td>-0.189267</td>\n",
" <td>-0.165939</td>\n",
" <td>-0.563902</td>\n",
" <td>0.084017</td>\n",
" <td>0.065772</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0</td>\n",
" <td>-0.767339</td>\n",
" <td>-0.215024</td>\n",
" <td>0.117208</td>\n",
" <td>-0.570487</td>\n",
" <td>-0.628667</td>\n",
" <td>-1.538399</td>\n",
" <td>0.244541</td>\n",
" <td>-0.060223</td>\n",
" <td>-2.132523</td>\n",
" <td>...</td>\n",
" <td>-0.199617</td>\n",
" <td>0.119985</td>\n",
" <td>-0.073416</td>\n",
" <td>-0.218369</td>\n",
" <td>-0.632460</td>\n",
" <td>-0.165810</td>\n",
" <td>-0.144961</td>\n",
" <td>-0.630340</td>\n",
" <td>0.159019</td>\n",
" <td>0.107950</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3 rows × 129 columns</p>\n",
"</div>"
],
"text/plain": [
" label Feature_1 Feature_2 Feature_3 Feature_4 Feature_5 Feature_6 \\\n",
"0 0 -0.755880 -0.239144 -0.006482 -0.660316 -0.661326 -1.564038 \n",
"1 0 -0.569910 -0.196253 -0.012757 -0.733111 -0.702112 -1.603721 \n",
"2 0 -0.767339 -0.215024 0.117208 -0.570487 -0.628667 -1.538399 \n",
"\n",
" Feature_7 Feature_8 Feature_9 ... Feature_119 Feature_120 \\\n",
"0 0.189483 -0.150790 -2.337072 ... -0.306457 0.085061 \n",
"1 0.293776 -0.188705 -2.214564 ... -0.384656 0.058515 \n",
"2 0.244541 -0.060223 -2.132523 ... -0.199617 0.119985 \n",
"\n",
" Feature_121 Feature_122 Feature_123 Feature_124 Feature_125 \\\n",
"0 -0.065240 -0.174579 -0.748717 -0.202958 -0.170341 \n",
"1 -0.087278 -0.202737 -0.680734 -0.189267 -0.165939 \n",
"2 -0.073416 -0.218369 -0.632460 -0.165810 -0.144961 \n",
"\n",
" Feature_126 Feature_127 Feature_128 \n",
"0 -0.619031 0.144040 0.159795 \n",
"1 -0.563902 0.084017 0.065772 \n",
"2 -0.630340 0.159019 0.107950 \n",
"\n",
"[3 rows x 129 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# view first 5 entries to check it worked\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"source": [
"# get a summary of the label colum in df\n",
"df['label'].describe()"
]
}
],
"metadata": {
Expand Down
Loading

0 comments on commit 39ad7a8

Please sign in to comment.