Skip to content

Commit

Permalink
feat: copy original id3 tags, close Captain-FLAM#8
Browse files Browse the repository at this point in the history
add mutagen package to copy id3 tags from original file to output file. only for mp3 format.
  • Loading branch information
lannodev committed May 11, 2024
1 parent ee4a527 commit bc22d15
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 18 deletions.
32 changes: 27 additions & 5 deletions App/audio_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import os, librosa, subprocess, tempfile, soundfile as sf, numpy as np
from scipy import signal
from mutagen.id3 import ID3

def Load_Audio(file, sample_rate, ffmpeg = None, output_path = None):

Expand Down Expand Up @@ -46,17 +47,22 @@ def Load_Audio(file, sample_rate, ffmpeg = None, output_path = None):

return audio, sample_rate

def Save_Audio(file_path, audio, sample_rate, output_format, cut_off, ffmpeg):
def Save_Audio(file_path, audio, sample_rate, output_format, cut_off, ffmpeg, audio_file):

# Get output audio file path
output_path = ''

if output_format == 'PCM_16' or output_format == 'FLOAT':
sf.write(file_path + '.wav', audio.T, sample_rate, format='wav', subtype = output_format)
output_path = file_path + '.wav'
sf.write(output_path, audio.T, sample_rate, format='wav', subtype = output_format)
else:
# Create a temporary file
temp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)

if output_format == 'FLAC':
output_path = file_path + '.flac'
sf.write(temp, audio.T, sample_rate, format='wav', subtype='FLOAT')
ffmpeg = f'"{ffmpeg}" -y -i "{temp.name}" -codec:a flac -compression_level 5 -ch_mode mid_side -frame_size {sample_rate} -lpc_type cholesky -lpc_passes 1 -exact_rice_parameters 1 "{file_path}.flac"'
ffmpeg = f'"{ffmpeg}" -y -i "{temp.name}" -codec:a flac -compression_level 5 -ch_mode mid_side -frame_size {sample_rate} -lpc_type cholesky -lpc_passes 1 -exact_rice_parameters 1 "{output_path}"'

elif output_format == 'MP3':
# TODO : Correct the BUG of Lame encoder which modify the length of audio results (~ +30 ms on short song, -30 ms on long song) ?!?!
Expand All @@ -70,9 +76,9 @@ def Save_Audio(file_path, audio, sample_rate, output_format, cut_off, ffmpeg):
# And also, parameters = ['-joint_stereo', '0'] (Separated stereo channels)
# is WORSE than "Joint Stereo" for High Frequencies !
# So let's use it by default for MP3 encoding !!

output_path = file_path + '.mp3'
sf.write(temp, audio.T, sample_rate, format='wav', subtype='PCM_16')
ffmpeg = f'"{ffmpeg}" -y -i "{temp.name}" -codec:a libmp3lame -b:a 320k -q:a 0 -joint_stereo 1 -cutoff {cut_off} "{file_path}.mp3"'
ffmpeg = f'"{ffmpeg}" -y -i "{temp.name}" -codec:a libmp3lame -b:a 320k -q:a 0 -joint_stereo 1 -cutoff {cut_off} "{output_path}"'

try:
subprocess.run(ffmpeg, shell=True, text=True, capture_output=True, check=True)
Expand All @@ -86,6 +92,20 @@ def Save_Audio(file_path, audio, sample_rate, output_format, cut_off, ffmpeg):
temp.close()
os.remove(temp.name)

# Copy original tags to the output file
# only for MP3 files
input_format = file_format(audio_file)
output_format = file_format(output_path)

if(input_format == '.mp3' and output_format == '.mp3'):
input_tags = ID3(audio_file)
output_tags = ID3(output_path)
for tag in input_tags:
output_tags[tag] = input_tags[tag]

print("► Copying tags...")
output_tags.save()

def Normalize(audio, threshold_dB = -1.0):
"""
Normalize audio to -1.0 dB peak amplitude
Expand Down Expand Up @@ -396,3 +416,5 @@ def to_shape(x, target_shape):
padding_list.append(pad_tuple)

return np.pad(x, tuple(padding_list), mode='constant')

def file_format(file): return os.path.splitext(file)[1]
22 changes: 11 additions & 11 deletions App/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ def SEPARATE(self, audio_file, BATCH_MODE):
print(f"► Normalizing audio : {self.normalize} dB")
normalized = App.audio_utils.Normalize(original_audio, self.normalize)

self.Save_Audio(0, normalized)
self.Save_Audio(0, normalized, '', audio_file)
else:
normalized = original_audio

Expand All @@ -373,7 +373,7 @@ def SEPARATE(self, audio_file, BATCH_MODE):
if audio is None:
audio = self.Extract_with_Model("Music", normalized, model)

self.Save_Audio(1, audio, model['Name'])
self.Save_Audio(1, audio, model['Name'], audio_file)

music_extracts.append(audio)

Expand All @@ -395,7 +395,7 @@ def SEPARATE(self, audio_file, BATCH_MODE):
if audio is None:
audio = self.Extract_with_Model("Vocal", vocal_sub, model)

self.Save_Audio(2, audio, model['Name'])
self.Save_Audio(2, audio, model['Name'], audio_file)

vocal_extracts.append(audio)

Expand All @@ -419,7 +419,7 @@ def SEPARATE(self, audio_file, BATCH_MODE):
if audio is None:
audio = self.Extract_with_Model("Bleed_Music", vocal_ensemble, model)

self.Save_Audio(3, audio, model['Name'])
self.Save_Audio(3, audio, model['Name'], audio_file)

bleed_extracts.append(audio)

Expand Down Expand Up @@ -458,7 +458,7 @@ def SEPARATE(self, audio_file, BATCH_MODE):
if len(self.models['bleed_vocal']) == 0:
music_final = music_sub
else:
if self.DEBUG: self.Save_Audio("4 - Music - SUB", music_sub)
if self.DEBUG: self.Save_Audio("4 - Music - SUB", music_sub, audio_file)

bleed_extracts = []; music_extracts = []

Expand All @@ -469,7 +469,7 @@ def SEPARATE(self, audio_file, BATCH_MODE):
if audio is None:
audio = self.Extract_with_Model("Bleed_Vocal", music_sub, model)

self.Save_Audio(4, audio, model['Name'])
self.Save_Audio(4, audio, model['Name'], audio_file)

bleed_extracts.append(audio)

Expand All @@ -487,7 +487,7 @@ def SEPARATE(self, audio_file, BATCH_MODE):
if audio is None:
audio = self.Extract_with_Model("Bleed_Music", bleed_ensemble, model)

self.Save_Audio(5, audio, model['Name'])
self.Save_Audio(5, audio, model['Name'], audio_file)

music_extracts.append(audio)

Expand All @@ -510,7 +510,7 @@ def SEPARATE(self, audio_file, BATCH_MODE):
# Apply silence filter
if self.silent < 0: vocal_final = App.audio_utils.Silent(vocal_final, self.sample_rate, self.silent)

self.Save_Audio(6, vocal_final)
self.Save_Audio(6, vocal_final, '', audio_file)

print("► Save Music FINAL !")

Expand All @@ -520,7 +520,7 @@ def SEPARATE(self, audio_file, BATCH_MODE):
# Apply silence filter
if self.silent < 0: music_final = App.audio_utils.Silent(music_final, self.sample_rate, self.silent)

self.Save_Audio(7, music_final)
self.Save_Audio(7, music_final, '', audio_file)

print('<b>--> Processing DONE !</b>')

Expand Down Expand Up @@ -781,7 +781,7 @@ def Check_Already_Processed(self, key, model_name = ""):

return None

def Save_Audio(self, key, audio, model_name = ""):
def Save_Audio(self, key, audio, model_name = "", audio_file = ""):
"""
Key : index of AudioFiles list or "str" (direct filename for test mode)
if Key is a string, it will force saving !
Expand All @@ -804,7 +804,7 @@ def Save_Audio(self, key, audio, model_name = ""):

file = os.path.join(self.song_output_path, filename)

App.audio_utils.Save_Audio(file, audio, self.sample_rate, self.output_format, self.original_cutoff, self.ffmpeg)
App.audio_utils.Save_Audio(file, audio, self.sample_rate, self.output_format, self.original_cutoff, self.ffmpeg, audio_file)


def demix_full(self, mix, use_model, infer_session, bigshifts, pass_number = 0):
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,6 @@ onnxruntime-gpu
ml_collections
PyYAML

#**** MP3 Tags (for future use) ****
#**** MP3 Tags ****

# mutagen
mutagen

0 comments on commit bc22d15

Please sign in to comment.