feat: copy original id3 tags, close Captain-FLAM#8

add mutagen package to copy id3 tags from original file to output file. only for mp3 format.
lannodev · May 11, 2024 · bc22d15 · bc22d15
1 parent ee4a527
commit bc22d15
Show file tree

Hide file tree

Showing 3 changed files with 40 additions and 18 deletions.
diff --git a/App/audio_utils.py b/App/audio_utils.py
@@ -5,6 +5,7 @@
 
 import os, librosa, subprocess, tempfile, soundfile as sf, numpy as np
 from scipy import signal
+from mutagen.id3 import ID3
 
 def Load_Audio(file, sample_rate, ffmpeg = None, output_path = None):
 
@@ -46,17 +47,22 @@ def Load_Audio(file, sample_rate, ffmpeg = None, output_path = None):
 
 	return audio, sample_rate
 
-def Save_Audio(file_path, audio, sample_rate, output_format, cut_off, ffmpeg):
+def Save_Audio(file_path, audio, sample_rate, output_format, cut_off, ffmpeg, audio_file):
+
+	# Get output audio file path
+	output_path = ''
 
 	if output_format == 'PCM_16' or output_format == 'FLOAT':
-		sf.write(file_path + '.wav',  audio.T, sample_rate, format='wav', subtype = output_format)
+		output_path = file_path + '.wav'
+		sf.write(output_path,  audio.T, sample_rate, format='wav', subtype = output_format)
 	else:
 		# Create a temporary file
 		temp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
 
 		if output_format == 'FLAC':
+			output_path = file_path + '.flac'
 			sf.write(temp, audio.T, sample_rate, format='wav', subtype='FLOAT')
-			ffmpeg = f'"{ffmpeg}" -y -i "{temp.name}" -codec:a flac -compression_level 5 -ch_mode mid_side -frame_size {sample_rate} -lpc_type cholesky -lpc_passes 1 -exact_rice_parameters 1 "{file_path}.flac"'
+			ffmpeg = f'"{ffmpeg}" -y -i "{temp.name}" -codec:a flac -compression_level 5 -ch_mode mid_side -frame_size {sample_rate} -lpc_type cholesky -lpc_passes 1 -exact_rice_parameters 1 "{output_path}"'
 
 		elif output_format == 'MP3':
 			# TODO : Correct the BUG of Lame encoder which modify the length of audio results (~ +30 ms on short song, -30 ms on long song) ?!?!
@@ -70,9 +76,9 @@ def Save_Audio(file_path, audio, sample_rate, output_format, cut_off, ffmpeg):
 			# And also, parameters = ['-joint_stereo', '0'] (Separated stereo channels)
 			# is WORSE than "Joint Stereo" for High Frequencies !
 			# So let's use it by default for MP3 encoding !!
-
+			output_path = file_path + '.mp3'
 			sf.write(temp, audio.T, sample_rate, format='wav', subtype='PCM_16')
-			ffmpeg = f'"{ffmpeg}" -y -i "{temp.name}" -codec:a libmp3lame -b:a 320k -q:a 0 -joint_stereo 1 -cutoff {cut_off} "{file_path}.mp3"'
+			ffmpeg = f'"{ffmpeg}" -y -i "{temp.name}" -codec:a libmp3lame -b:a 320k -q:a 0 -joint_stereo 1 -cutoff {cut_off} "{output_path}"'
 
 		try:
 			subprocess.run(ffmpeg, shell=True, text=True, capture_output=True, check=True)
@@ -86,6 +92,20 @@ def Save_Audio(file_path, audio, sample_rate, output_format, cut_off, ffmpeg):
 		temp.close()
 		os.remove(temp.name)
 
+	# Copy original tags to the output file
+	# only for MP3 files
+	input_format = file_format(audio_file)
+	output_format = file_format(output_path)
+
+	if(input_format == '.mp3' and output_format == '.mp3'):
+		input_tags = ID3(audio_file)
+		output_tags = ID3(output_path)
+		for tag in input_tags:
+			output_tags[tag] = input_tags[tag]
+
+		print("► Copying tags...")
+		output_tags.save()
+
 def Normalize(audio, threshold_dB = -1.0):
 	"""
 	Normalize audio to -1.0 dB peak amplitude
@@ -396,3 +416,5 @@ def to_shape(x, target_shape):
 		padding_list.append(pad_tuple)
 
 	return np.pad(x, tuple(padding_list), mode='constant')
+
+def file_format(file): return os.path.splitext(file)[1]
diff --git a/App/inference.py b/App/inference.py
@@ -358,7 +358,7 @@ def SEPARATE(self, audio_file, BATCH_MODE):
 				print(f"► Normalizing audio : {self.normalize} dB")
 				normalized = App.audio_utils.Normalize(original_audio, self.normalize)
 
-				self.Save_Audio(0, normalized)
+				self.Save_Audio(0, normalized, '', audio_file)
 		else:
 			normalized = original_audio
 
@@ -373,7 +373,7 @@ def SEPARATE(self, audio_file, BATCH_MODE):
 				if audio is None:
 					audio = self.Extract_with_Model("Music", normalized, model)
 
-					self.Save_Audio(1, audio, model['Name'])
+					self.Save_Audio(1, audio, model['Name'], audio_file)
 
 				music_extracts.append(audio)
 
@@ -395,7 +395,7 @@ def SEPARATE(self, audio_file, BATCH_MODE):
 			if audio is None:
 				audio = self.Extract_with_Model("Vocal", vocal_sub, model)
 
-				self.Save_Audio(2, audio, model['Name'])
+				self.Save_Audio(2, audio, model['Name'], audio_file)
 
 			vocal_extracts.append(audio)
 
@@ -419,7 +419,7 @@ def SEPARATE(self, audio_file, BATCH_MODE):
 				if audio is None:
 					audio = self.Extract_with_Model("Bleed_Music", vocal_ensemble, model)
 
-					self.Save_Audio(3, audio, model['Name'])
+					self.Save_Audio(3, audio, model['Name'], audio_file)
 
 				bleed_extracts.append(audio)
 
@@ -458,7 +458,7 @@ def SEPARATE(self, audio_file, BATCH_MODE):
 		if len(self.models['bleed_vocal']) == 0:
 			music_final = music_sub
 		else:
-			if self.DEBUG:  self.Save_Audio("4 - Music - SUB", music_sub)
+			if self.DEBUG:  self.Save_Audio("4 - Music - SUB", music_sub, audio_file)
 
 			bleed_extracts = [];  music_extracts = []
 
@@ -469,7 +469,7 @@ def SEPARATE(self, audio_file, BATCH_MODE):
 				if audio is None:
 					audio = self.Extract_with_Model("Bleed_Vocal", music_sub, model)
 
-					self.Save_Audio(4, audio, model['Name'])
+					self.Save_Audio(4, audio, model['Name'], audio_file)
 
 				bleed_extracts.append(audio)
 
@@ -487,7 +487,7 @@ def SEPARATE(self, audio_file, BATCH_MODE):
 					if audio is None:
 						audio = self.Extract_with_Model("Bleed_Music", bleed_ensemble, model)
 
-						self.Save_Audio(5, audio, model['Name'])
+						self.Save_Audio(5, audio, model['Name'], audio_file)
 
 					music_extracts.append(audio)
 
@@ -510,7 +510,7 @@ def SEPARATE(self, audio_file, BATCH_MODE):
 		# Apply silence filter
 		if self.silent < 0:  vocal_final = App.audio_utils.Silent(vocal_final, self.sample_rate, self.silent)
 
-		self.Save_Audio(6, vocal_final)
+		self.Save_Audio(6, vocal_final, '', audio_file)
 
 		print("► Save Music FINAL !")
 
@@ -520,7 +520,7 @@ def SEPARATE(self, audio_file, BATCH_MODE):
 		# Apply silence filter
 		if self.silent < 0:  music_final = App.audio_utils.Silent(music_final, self.sample_rate, self.silent)
 
-		self.Save_Audio(7, music_final)
+		self.Save_Audio(7, music_final, '', audio_file)
 
 		print('<b>--> Processing DONE !</b>')
 
@@ -781,7 +781,7 @@ def Check_Already_Processed(self, key, model_name = ""):
 
 		return None
 
-	def Save_Audio(self, key, audio, model_name = ""):
+	def Save_Audio(self, key, audio, model_name = "", audio_file = ""):
 		"""
 		Key : index of AudioFiles list or "str" (direct filename for test mode)
 		if Key is a string, it will force saving !
@@ -804,7 +804,7 @@ def Save_Audio(self, key, audio, model_name = ""):
 
 		file = os.path.join(self.song_output_path, filename)
 
-		App.audio_utils.Save_Audio(file, audio, self.sample_rate, self.output_format, self.original_cutoff, self.ffmpeg)
+		App.audio_utils.Save_Audio(file, audio, self.sample_rate, self.output_format, self.original_cutoff, self.ffmpeg, audio_file)
 
 
 	def demix_full(self, mix, use_model, infer_session, bigshifts, pass_number = 0):

diff --git a/requirements.txt b/requirements.txt
@@ -20,6 +20,6 @@ onnxruntime-gpu
 ml_collections
 PyYAML
 
-#****  MP3 Tags (for future use)  ****
+#****  MP3 Tags ****
 
-# mutagen
+mutagen