diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1d7901e --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/__pycache__ \ No newline at end of file diff --git a/AuxIVA.py b/AuxIVA.py new file mode 100644 index 0000000..8c6e6c1 --- /dev/null +++ b/AuxIVA.py @@ -0,0 +1,100 @@ +import numpy as np +from numpy.linalg import inv +from scipy.signal import stft, istft +import sys + + +# suppose that the number of sources and microphones are equal. + +# M : # of channels whose index is m +# K : # of frequency bins whose index is k +# T : # of time frames whose index is t + +class AuxIVA: + + def __init__(self, x, sample_freq, beta=0.2, win='hanning', nperseg=256, noverlap=128,nchannel=3): + ''' + @param(win):str, desired window to use. + @param(nperseg): length of each segment. + @param(noverlap): number of points to overlap between segments. + ''' + self.max_iter = 20 + self.x = np.array(x) + self.sample_freq = sample_freq + self.win = win + self.nperseg = nperseg + self.noverlap = noverlap + self.nchannel = nchannel + self.beta = beta + + def auxiva(self): + ''' + X is complex64-type-3-dementional array whose x axis is microphie , y axis is the segment times, z is frequency respectively. + @output(x_prd): 2 dimensional array whose 1st axis is the source index, 2nd is data of them. + ''' + + f, _, X = stft(self.x, self.sample_freq, self.win, self.nperseg, self.noverlap) + # X is (channel index, freq index, time segment index) + n_bin = len(f) + n_timesegment = len(X[0,0,:]) + W = self.W_estimate(X,n_bin,n_timesegment) + Y = np.zeros(X.shape,dtype='complex64') + for f in range(n_bin): + Y[:,f,:] = np.dot(W[f,:,:],X[:,f,:]) + + _, x_prd = istft(Y, self.sample_freq, self.win, self.nperseg, self.noverlap) + + return x_prd + + + + def W_estimate(self,X,n_bin,n_timesegment): + nchannel = self.nchannel + W = np.zeros((n_bin,nchannel,nchannel),dtype='complex64') + for f in range(n_bin): + W[f,:,:] = np.eye(nchannel, dtype='complex64') + + for i in range(self.max_iter): + for f in range(n_bin): + for k in range(nchannel): + r = self.Y_L2norm(W[f,:,:],X[:,f,:],k,n_bin,n_timesegment) + fai = self.WeigtingFunction(r,self.beta,n_timesegment) + V = self.CovarianceMatrix(fai,X[:,f,:],n_timesegment) + w_k = np.zeros(nchannel) + w_k = np.dot(np.linalg.inv(np.dot(W[f,:,:],V)),np.eye(nchannel)[k].T) + w_k = w_k/np.sqrt(np.dot(np.dot(np.conjugate(w_k.T),V),w_k)) + W[f,k,:] = np.conjugate(w_k) + Y = np.dot(W[f,:,:],X[:,f,:]) + c = 0 + for k in range(nchannel): + for t in range(n_timesegment): + c += np.power(np.linalg.norm(Y[k,t]),2) + c = c/n_timesegment/nchannel/n_bin + W[f,:,:] = W[f,:,:]/np.linalg.norm(c) + print(i) + return W + + + + + def Y_L2norm(self,W,X,k,n_bin,n_timesegment): + r = np.zeros((n_timesegment)) + for i in range(n_bin): + r += np.power(np.absolute(np.dot(W[k],X)),2) + return np.sqrt(r) + + def WeigtingFunction(self,r,beta,n_timesegment,fai0=1000): + for t in range(n_timesegment): + if(r[t]>=fai0): + r[t] = r[t]**(beta-2) + else: + r[t]=fai0 + return r + + def CovarianceMatrix(self,fai,X,n_timesegment): + nchannel = self.nchannel + V = np.zeros((nchannel,nchannel), dtype='complex64') + for i in range(n_timesegment): + V += fai[i]*np.dot(X[:,np.newaxis,i],np.conjugate(X[:,np.newaxis,i].T)) + return V/n_timesegment + \ No newline at end of file diff --git a/FDICA.py b/FDICA.py new file mode 100644 index 0000000..89e1d90 --- /dev/null +++ b/FDICA.py @@ -0,0 +1,249 @@ +import numpy as np +from numpy.linalg import inv +from scipy.signal import stft, istft +from munkres import Munkres, print_matrix +from tqdm import tqdm +import time + + +#suppose that the number of sources and microphones are equal. + +class ICA: + ''' + @func(__fai_func_sigmoid): use sigmoid as fai func. + @func(__fai_func_sign): use sign functio as fai func. + @func(__fai_func_tanh): use tanh as fai func. + ''' + + def __init__(self, num_iter=200): + self.max_iter = num_iter + self.eta = 1.0e-4 # is step size + self.EPS = 1.0e-12 # is epsilon for sign function below. + print('TDICA iteration: {} [times]'.format(self.max_iter)) + + def ica(self, x): + x = np.array(x) + w = self.__optimize(x) + y = np.dot(w, x) + return y, w + + def __sign_scalar(self,x,z): + ''' + @input(z):complex scalar. + @output(x):complex scalar. + ''' + if np.abs(z.real) < self.EPS: + x += 0.0 + elif z.real > 0: + x += 1.0 + else: + x += -1.0 + + if np.abs(z.imag) < self.EPS: + x += 0.0 + elif z.imag > 0: + x += 1.0j + else: + x += -1.0j + return x + + def __sign(self,z): + sign_func = np.vectorize(self.__sign_scalar) + x = np.zeros_like(z) + return sign_func(x,z) + + def __fai_func_sigmoid(self, y): + return 1/(1+np.exp(-y.real)) + 1j*1/(1+np.exp(-y.imag)) + + def __fai_func_sign(self, y): + return self.__sign(y) + + def __fai_func_tanh(self,y): + return np.tanh(100.0 * y) + + def __alpha(self, y): + ''' + You can change the __fai_func_xxxxx from 3 different function above. + ''' + return np.dot(self.__fai_func_sigmoid(y), y.T.conjugate()) + + def __optimize(self, x): + r,c = x.shape + w = np.zeros((r,r), dtype=np.complex64) + w += np.diag(np.ones(r)) + + + for _ in range(self.max_iter): + y = np.dot(w, x) + alpha = self.__alpha(y) + + alpha = alpha/c + w += self.eta * np.dot((np.diag(np.diag(alpha)) - alpha), w) + return w + +class FDICA(ICA): + ''' + The class FDCIA is inherited from ICA + ''' + + def __init__(self, x, sample_freq, num_iter=200, win='boxcar', nperseg=256, noverlap=126): + ''' + @param(n_iter): the times of iteration of TDICA optmization. + @param(win):str, desired window to use. + @param(nperseg): length of each segment. + @param(noverlap): number of points to overlap between segments. + * (nperseg, noverlap) = (1024, 512) + ''' + print('-----------------------------------------') + super().__init__(num_iter=num_iter) + self.m_shit = 5 + self.x = np.array(x) + self.sample_freq = sample_freq + self.win = win + self.nperseg = nperseg + self.noverlap = noverlap + print('The sample frequency: {} [/sec]'.format(sample_freq)) + print('The length of each segment: {}'.format(nperseg)) + print('The number of points to overlap between segments: {}'.format(noverlap)) + + def fdica(self): + ''' + X is complex64-type-3-dementional array whose x axis is microphie , y axis is the segment times, z is frequency respectively. + @output(x_prd): 3 dimensional array whose 1st axis is the source index, 2nd is the microphon index, third is data of them. + ''' + start = time.time() + print('Now... short time discrete fourier transformation') + + f,_,X = stft(self.x, self.sample_freq, self.win, self.nperseg, self.noverlap) + # X is (channel index, freq index, time segment idex) + + y = self.reconstruct(f,X) + + print('Now... inverted short time discrete fourier transformation') + + _,x_prd = istft(y[:,:,:,0], self.sample_freq, self.win, self.nperseg, self.noverlap) + + deltatime = time.time()-start + print('FDICA took {} [sec] to finish'.format(deltatime)) + print('-----------------------------------------') + return x_prd + + + def reconstruct(self,f,X): + ''' + This func is the way of permutation. + @param(f): frequency array. + @param(X): stft of time series x. + @output(y): + v is 4 dementional array whose 1st axis is the source index, 2nd axis is the microphone index, 4th axis is frequency index. + ''' + + epsilon_v = np.zeros(X.shape) + + v = np.zeros((X.shape[0], X.shape[1], X.shape[2], X.shape[0]), dtype=np.complex64) + + print('Now... separation in each {} frequency.'.format(len(f))) + + for i in tqdm(range(len(f))): # i refers to the freq. + U,B = self.ica(X[:,i,:]) + epsilon_v[:,i,:], v[:,i,:,:] = self.get_epsilon(U, B) + + sim = np.zeros_like(f) + for i in range(len(f)): + sim[i] = self.get_sim(epsilon_v, i) + + odr_sim = np.argsort(-sim, kind='heapsort') + + y = np.zeros_like(v, dtype=np.complex64) + epsilon_y = np.zeros_like(epsilon_v) + + n = epsilon_v.shape[0] + + y[:,0,:,:] = v[:,odr_sim[0],:,:] + epsilon_y[:,0,:] = epsilon_v[:,odr_sim[0],:] + + print('Now... permutation in each {} frequency.'.format(len(odr_sim))) + + for k, w_k in enumerate(tqdm(odr_sim)): + if(k==0): + continue + + #create matrix for correlation + crlat = np.zeros((n,n)) + for a in range(n): + for b in range(n): + for j in range(k-1): + w_j = odr_sim[j] + crlat[a][b] += np.sum(epsilon_v[b,w_k,:]*epsilon_y[a,w_j,:]) + + #complete matching with munkres algorithm + munkres = Munkres() + indexes = munkres.compute(-crlat) + + for i in range(n): + y[i,w_k,:,:] = v[indexes[i][1],w_k,:,:] + + epsilon_y[:,w_k,:] = self.make_epsilon(y[:,w_k,:,:]) + + return y + + def get_epsilon(self, U, B): + ''' + for specific frequency w. + @input(U): 2 dimensional complex ndarray. x-axis is channel index, y-axis time segment. + @input(B): 2 dimensional complex ndarray. x,y-axies are channel indices. + @output(v): 3 dimensional ndarray. z-axis is channel index j. + ''' + + n, TS = U.shape + epsilon_v = np.zeros((n, TS)) + v = np.zeros((n,TS,n), dtype=np.complex64) + sum_v = np.zeros((n,TS), dtype=np.complex64) + + for ts in range(TS): + v[:,ts,:] = np.dot(inv(B), np.diag(U[:,ts].flatten())).T + + epsilon_v = self.make_epsilon(v) + + return epsilon_v, v + + + def make_epsilon(self, v): + ''' + This yeilds the epsilon of v from v. + @param(v): 3 dimensional array whose x,z axis are source n, y is segment times. + @output(epsilon_v): real value. + ''' + n, TS, _ = v.shape + epsilon_v = np.zeros((n, TS)) + sum_v = np.sum(np.abs(v), axis=2) + + for ts in range(TS): + for dts in range(np.maximum(0,ts-self.m_shit), np.minimum(TS, ts+self.m_shit+1)): + epsilon_v[:,ts] += 1/(2*self.m_shit) * sum_v[:, dts] + + return epsilon_v + + def epsilon_dot(self, epsilon_v, w1_i, i, w2_i, j): + ''' + @param(epsilon_v): is 3 dimentional array. z-axis denotes the frequency. + @param(w1,i): those are set of frequency index and microphone index, which is also the case with (w2,j) + ''' + return np.sum(epsilon_v[i,w1_i,:] * epsilon_v[j,w2_i,:]) + + def epsilon_abs(self, epsilon_v, w_i, i): + + return np.sqrt(self.epsilon_dot(epsilon_v, w_i, i, w_i, i)) + + + def get_sim(self, epsilon_v , w_i): + ''' + @param(w): frequency indices + @output(sim): cross correlation. + ''' + n = epsilon_v.shape[0] + sim = .0 + for i in range(n-1): + for j in range(i,n): + sim += self.epsilon_dot(epsilon_v, w_i, i, w_i, j)/(self.epsilon_abs(epsilon_v, w_i, i)*self.epsilon_abs(epsilon_v, w_i, j)) + return sim diff --git a/IVA.py b/IVA.py new file mode 100644 index 0000000..a9c6ca9 --- /dev/null +++ b/IVA.py @@ -0,0 +1,119 @@ +import numpy as np +from tqdm import tqdm +from numpy.linalg import inv +from scipy.signal import stft, istft + +epsilon = 1e-6 + + +# suppose that the number of sources and microphones are equal. + +# M : # of channels whose index is m +# K : # of frequency bins whose index is k +# T : # of time frames whose index is t + +class IVA: + + def __init__(self, x, sample_freq, win='hanning', nperseg=256, noverlap=128): + ''' + @param(win):str, desired window to use. + @param(nperseg): length of each segment. + @param(noverlap): number of points to overlap between segments. + ''' + self.max_iter = 1000 + self.eta = 2.5 * 10 ** (-2) # is step size + self.x = np.array(x) + self.sample_freq = sample_freq + self.win = win + self.nperseg = nperseg + self.noverlap = noverlap + + def iva(self): + ''' + X is complex64-type-3-dementional array whose x axis is microphie , y axis is the segment times, z is frequency respectively. + @output(x_prd): 2 dimensional array whose 1st axis is the source index, 2nd is data of them. + ''' + + f, _, X = stft(self.x, self.sample_freq, self.win, self.nperseg, self.noverlap) + # X is (channel index, freq index, time segment index) + + y = self.reconstruct(X) + + _, x_prd = istft(y, self.sample_freq, self.win, self.nperseg, self.noverlap) + + return x_prd + + def reconstruct(self, X): + ''' + This func is the way of permutation. + @param(f): frequency array. + @param(X): stft of time series x. + @output(y):y is 3 dementional array + whose 1st axis is source index 2nd axis is frequency index and 3rd is time segment index. + ''' + + w = self.__optimize(X) + print(w[:2,:,:]) + y = np.empty(X.shape, dtype=np.complex64) + for k in range(X.shape[1]): + y[:,k,:] = np.dot(w[k,:,:], X[:,k,:]) + + return y + + """ + def __phi_func(self, y): + # y is (channel index, freq index) + # return is (channel index, freq index) + ysq = np.sum(np.abs(y)**2, axis=1) + ysq1 = 1/np.sqrt(ysq) + phi = (ysq1*y.T).T + return phi + + + def __alpha(self, y): + # y is (channel index, freq index, time segment index) + M, K, T = y.shape + alpha = np.zeros((K, M, M), dtype=np.complex64) + for t in range(T): + phi = self.__phi_func(y[:,:,t]) + for k in range(K): + alpha[k,:,:] += np.dot(np.matrix(phi[:,k]).T, np.matrix(y[:,k,t].conjugate())) + alpha = alpha / T + return np.array(alpha) + """ + + def __alpha2(self, y): + # y is (channel index, freq index, time segment index) + M, K, T = y.shape + alpha = np.zeros((K, M, M), dtype=np.complex64) + + ysq = np.sum(np.abs(y) ** 2, axis=1) + ysq1 = 1 / np.sqrt(ysq) + for k in range(K): + phi = ysq1*y[:,k,:] + alpha[k,:,:] = np.dot(phi, np.conjugate(y[:,k,:].T))/T + return alpha + + def __adjust(self, w): + w = np.dot(np.diag(np.diag(inv(w))), w) + return w + + def __optimize(self, X): + M, K, T = X.shape + w = np.zeros((K, M, M), dtype=np.complex64) + y = np.empty((M, K, T), dtype=np.complex64) + for k in range(K): + # w[k,:,:] += np.random.randn(M,M) + w[k,:,:] += np.eye(M) + + for _ in tqdm(range(self.max_iter)): + for k in range(K): + y[:,k,:] = np.dot(w[k,:,:], X[:,k,:]) + alpha = self.__alpha2(y) + for k in range(K): + w[k,:,:] += self.eta * np.dot((np.eye(M) - alpha[k,:,:]), w[k,:,:]) + + for k in range(K): + w[k,:,:] = self.__adjust(w[k,:,:]) + + return w diff --git a/a.py b/a.py new file mode 100644 index 0000000..a57babe --- /dev/null +++ b/a.py @@ -0,0 +1,11 @@ +import numpy as np + +A=np.array([[1,2,3],[-2,3,-1],[-3,-1,5]]) +a = np.array([1,2,3]) +b = np.array([-2,3,-1]) +c = np.array([-3,-1,5]) + + +print(np.linalg.norm(A,2)) +print(np.linalg.norm(a,2)+np.linalg.norm(b,2)+np.linalg.norm(c,2)) +print(np.zeros(3).shape) \ No newline at end of file diff --git a/cis.py b/cis.py new file mode 100644 index 0000000..b4390f2 --- /dev/null +++ b/cis.py @@ -0,0 +1,40 @@ +import numpy as np +import simpleaudio as sa +import scipy.io.wavfile as sw + + +def audioplay(fs, y): + yout = np.iinfo(np.int16).max / np.max(np.abs(y)) * y + yout = yout.astype(np.int16) + play_obj = sa.play_buffer(yout, y.ndim, 2, fs) + + +def wavread(wavefile): + fs, y = sw.read(wavefile) + if y.dtype == 'float32' or y.dtype == 'float64': + max_y = 1 + elif y.dtype == 'uint8': + y = y - 128 + max_y = 128 + elif y.dtype == 'int16': + max_y = np.abs(np.iinfo(np.int16).min) + else: + max_y = np.abs(np.iinfo(np.int16).min) + y = y / max_y + y = y.astype(np.float32) + + return fs, y + + +def wavwrite(wavefile, fs, data): + if data.dtype == 'float32' or data.dtype == 'float64': + max_y = np.max(np.abs(data)) + elif data.dtype == 'uint8': + data = data - 128 + max_y = 128 + elif data.dtype == 'int16': + max_y = np.abs(np.iinfo(np.int16).min) + else: + max_y = np.abs(np.iinfo(np.int16).min) + data = np.int16(data / max_y * np.abs(np.iinfo(np.int16).min)) + sw.write(wavefile, fs, data) diff --git a/fastAuxIVA.py b/fastAuxIVA.py new file mode 100644 index 0000000..adecc38 --- /dev/null +++ b/fastAuxIVA.py @@ -0,0 +1,98 @@ +import numpy as np +from numpy.linalg import inv +from scipy.signal import stft, istft +import sys + + +# suppose that the number of sources and microphones are equal. + +# M : # of channels whose index is m +# K : # of frequency bins whose index is k +# T : # of time frames whose index is t + +class AuxIVA: + + def __init__(self, x, sample_freq, beta=0.2, win='hanning', nperseg=256, noverlap=128,nchannel=3): + ''' + @param(win):str, desired window to use. + @param(nperseg): length of each segment. + @param(noverlap): number of points to overlap between segments. + ''' + self.max_iter = 100 + self.x = np.array(x) + self.sample_freq = sample_freq + self.win = win + self.nperseg = nperseg + self.noverlap = noverlap + self.nchannel = nchannel + self.beta = beta + + def auxiva(self): + ''' + X is complex64-type-3-dementional array whose x axis is microphie , y axis is the segment times, z is frequency respectively. + @output(x_prd): 2 dimensional array whose 1st axis is the source index, 2nd is data of them. + ''' + + f, _, X = stft(self.x, self.sample_freq, self.win, self.nperseg, self.noverlap) + # X is (channel index, freq index, time segment index) + n_bin = len(f) + n_timesegment = len(X[0,0,:]) + W = self.W_estimate(X,n_bin,n_timesegment) + Y = np.zeros(X.shape,dtype='complex64') + for f in range(n_bin): + Y[:,f,:] = np.dot(W[f,:,:],X[:,f,:]) + + _, x_prd = istft(Y, self.sample_freq, self.win, self.nperseg, self.noverlap) + + return x_prd + + + + def W_estimate(self,X,n_bin,n_timesegment): + nchannel = self.nchannel + W = np.zeros((n_bin,nchannel,nchannel),dtype='complex64') + Y_k = np.zeros((nchannel,n_bin,n_timesegment),dtype='complex64') + + for f in range(n_bin): + W[f,:,:] = np.eye(nchannel, dtype='complex64') + + for i in range(self.max_iter): + r = self.Y_L2norm(W,X,n_bin,n_timesegment,nchannel) + fai = self.WeigtingFunction(r,self.beta) + for f in range(n_bin): + V = self.CovarianceMatrix(fai,X[:,f,:],n_timesegment) + for k in range(nchannel): + w_k = np.zeros(nchannel,dtype="complex64") + w_k = np.dot(np.linalg.inv(np.dot(W[f,:,:],V[k,:,:])),np.eye(nchannel)[k]) + w_k = w_k/np.sqrt(np.dot(np.dot(np.conjugate(w_k.T),V[k,:,:]),w_k)) + W[f,k,:] = np.conjugate(w_k) + Y_k[:,f,:] = np.dot(W[f,:,:],X[:,f,:]) + self.Scaling(W,Y_k,n_timesegment,nchannel,n_bin) + print(i) + return W + + def Y_L2norm(self,W,X,n_bin,n_timesegment,nchannel): + r = np.zeros((nchannel,n_timesegment)) + for i in range(n_bin): + r += np.power(np.absolute(np.dot(W[i,:,:],X[:,i,:])),2) + return np.sqrt(r) + + def WeigtingFunction(self,r,beta,fai0=1000): + r = np.power(r,beta-2) + return np.clip(r,None,fai0) + + def CovarianceMatrix(self,fai,X,n_timesegment): + nchannel = self.nchannel + V = np.zeros((nchannel,nchannel,nchannel), dtype='complex64') + for k in range(nchannel): + V[k,:,:] = np.dot(fai[k]*X,np.conjugate(X.T)) + return V/n_timesegment + + def Scaling(self,W,Y_k,n_timesegment,nchannel,n_bin): + c = 0 + for i in range(nchannel): + for t in range(n_timesegment): + c += np.linalg.norm(Y_k[i,:,t],2) + c = c/n_timesegment/nchannel/n_bin + return W/c + diff --git a/fdicatest.py b/fdicatest.py new file mode 100644 index 0000000..37534f3 --- /dev/null +++ b/fdicatest.py @@ -0,0 +1,30 @@ +import numpy as np +import sys +import scipy.io.wavfile as wf +from FDICA import FDICA,ICA +from sound_mixing import Preprocessing + +#prepare data +rate1, data1 = wf.read('./samples/raw_1.wav') +rate2, data2 = wf.read('./samples/raw_2.wav') +rate3, data3 = wf.read('./samples/raw_3.wav') +if rate1 != rate2 or rate2 != rate3: + raise ValueError('Sampling_rate_Error') + + +data1 = data1.T[0,400000:800000] +data2 = data2.T[0,400000:800000] +data3 = data3.T[0,400000:800000] + +data = np.vstack([data1.astype(float), data2.astype(float), data3.astype(float)]) + +x = Preprocessing(data,10).mixing() + + +y = FDICA(x, sample_freq=rate1).fdica() + +y = [(y_i * 32767 / max(np.absolute(y_i))).astype(np.int16) for y_i in np.asarray(y)] + +wf.write('./samples/fdica_1_out.wav', rate1, y[0]) +wf.write('./samples/fdica_2_out.wav', rate2, y[1]) +wf.write('./samples/fdica_3_out.wav', rate3, y[2]) \ No newline at end of file diff --git a/ivatest.py b/ivatest.py new file mode 100644 index 0000000..540fa87 --- /dev/null +++ b/ivatest.py @@ -0,0 +1,21 @@ +import numpy as np +import sys +import cis +import scipy.io.wavfile as wf +from IVA import IVA +from sound_mixing import Preprocessing + +#prepare data +rate1, data1 = cis.wavread('./samples/music1.wav') +rate2, data2 = cis.wavread('./samples/music2.wav') +rate3, data3 = cis.wavread('./samples/music3.wav') +if rate1 != rate2 or rate2 != rate3: + raise ValueError('Sampling_rate_Error') + +fs = rate1 +x = np.array([data1, data2, data3], dtype=np.float32) +y = IVA(x, fs).iva() + +cis.wavwrite('./samples/iva_1_out.wav', fs, y[0]) +cis.wavwrite('./samples/iva_2_out.wav', fs, y[1]) +cis.wavwrite('./samples/iva_3_out.wav', fs, y[2]) \ No newline at end of file diff --git a/mixtest.py b/mixtest.py new file mode 100644 index 0000000..975e541 --- /dev/null +++ b/mixtest.py @@ -0,0 +1,62 @@ +import numpy as np +import sys +import scipy.io.wavfile as wf +from fastAuxIVA import AuxIVA +from sound_mixing import Preprocessing + +#prepare data +rate1, data1 = wf.read('./samples/fmix_1.wav') +rate2, data2 = wf.read('./samples/fmix_2.wav') +rate3, data3 = wf.read('./samples/fmix_3.wav') +if rate1 != rate2 or rate2 != rate3: + raise ValueError('Sampling_rate_Error') + +x = np.vstack([data1.astype(float), data2.astype(float), data3.astype(float)]) + +y = AuxIVA(x, sample_freq=rate1,beta=0.1).auxiva() + +y = [(y_i * 32767 / max(np.absolute(y_i))).astype(np.int16) for y_i in np.asarray(y)] + +wf.write('./samples/fmix_1_out_1.wav', rate1, y[0]) +wf.write('./samples/fmix_2_out_1.wav', rate2, y[1]) +wf.write('./samples/fmix_3_out_1.wav', rate3, y[2]) + +x = np.vstack([data1.astype(float), data2.astype(float), data3.astype(float)]) + +y = AuxIVA(x, sample_freq=rate1,beta=0.2).auxiva() + +y = [(y_i * 32767 / max(np.absolute(y_i))).astype(np.int16) for y_i in np.asarray(y)] + +wf.write('./samples/fmix_1_out_2.wav', rate1, y[0]) +wf.write('./samples/fmix_2_out_2.wav', rate2, y[1]) +wf.write('./samples/fmix_3_out_2.wav', rate3, y[2]) + +x = np.vstack([data1.astype(float), data2.astype(float), data3.astype(float)]) + +y = AuxIVA(x, sample_freq=rate1,beta=0.3).auxiva() + +y = [(y_i * 32767 / max(np.absolute(y_i))).astype(np.int16) for y_i in np.asarray(y)] + +wf.write('./samples/fmix_1_out_3.wav', rate1, y[0]) +wf.write('./samples/fmix_2_out_3.wav', rate2, y[1]) +wf.write('./samples/fmix_3_out_3.wav', rate3, y[2]) + +x = np.vstack([data1.astype(float), data2.astype(float), data3.astype(float)]) + +y = AuxIVA(x, sample_freq=rate1,beta=0.4).auxiva() + +y = [(y_i * 32767 / max(np.absolute(y_i))).astype(np.int16) for y_i in np.asarray(y)] + +wf.write('./samples/fmix_1_out_4.wav', rate1, y[0]) +wf.write('./samples/fmix_2_out_4.wav', rate2, y[1]) +wf.write('./samples/fmix_3_out_4.wav', rate3, y[2]) + +x = np.vstack([data1.astype(float), data2.astype(float), data3.astype(float)]) + +y = AuxIVA(x, sample_freq=rate1,beta=0.5).auxiva() + +y = [(y_i * 32767 / max(np.absolute(y_i))).astype(np.int16) for y_i in np.asarray(y)] + +wf.write('./samples/fmix_1_out_5.wav', rate1, y[0]) +wf.write('./samples/fmix_2_out_5.wav', rate2, y[1]) +wf.write('./samples/fmix_3_out_5.wav', rate3, y[2]) \ No newline at end of file diff --git a/samples/fanfare.wav b/samples/fanfare.wav new file mode 100755 index 0000000..60368fc Binary files /dev/null and b/samples/fanfare.wav differ diff --git a/samples/fdica_music1.wav b/samples/fdica_music1.wav new file mode 100644 index 0000000..340818b Binary files /dev/null and b/samples/fdica_music1.wav differ diff --git a/samples/fdica_music2.wav b/samples/fdica_music2.wav new file mode 100644 index 0000000..572e943 Binary files /dev/null and b/samples/fdica_music2.wav differ diff --git a/samples/fdica_music3.wav b/samples/fdica_music3.wav new file mode 100644 index 0000000..aeaa907 Binary files /dev/null and b/samples/fdica_music3.wav differ diff --git a/samples/fmix_1.wav b/samples/fmix_1.wav new file mode 100755 index 0000000..480c047 Binary files /dev/null and b/samples/fmix_1.wav differ diff --git a/samples/fmix_1_out.wav b/samples/fmix_1_out.wav new file mode 100644 index 0000000..0804215 Binary files /dev/null and b/samples/fmix_1_out.wav differ diff --git a/samples/fmix_1_out_1.wav b/samples/fmix_1_out_1.wav new file mode 100644 index 0000000..4b1f749 Binary files /dev/null and b/samples/fmix_1_out_1.wav differ diff --git a/samples/fmix_1_out_2.wav b/samples/fmix_1_out_2.wav new file mode 100644 index 0000000..87a97b6 Binary files /dev/null and b/samples/fmix_1_out_2.wav differ diff --git a/samples/fmix_1_out_3.wav b/samples/fmix_1_out_3.wav new file mode 100644 index 0000000..d082fc7 Binary files /dev/null and b/samples/fmix_1_out_3.wav differ diff --git a/samples/fmix_1_out_4.wav b/samples/fmix_1_out_4.wav new file mode 100644 index 0000000..bd01e08 Binary files /dev/null and b/samples/fmix_1_out_4.wav differ diff --git a/samples/fmix_1_out_5.wav b/samples/fmix_1_out_5.wav new file mode 100644 index 0000000..9354710 Binary files /dev/null and b/samples/fmix_1_out_5.wav differ diff --git a/samples/fmix_2.wav b/samples/fmix_2.wav new file mode 100755 index 0000000..110528e Binary files /dev/null and b/samples/fmix_2.wav differ diff --git a/samples/fmix_2_out.wav b/samples/fmix_2_out.wav new file mode 100644 index 0000000..8d3c777 Binary files /dev/null and b/samples/fmix_2_out.wav differ diff --git a/samples/fmix_2_out_1.wav b/samples/fmix_2_out_1.wav new file mode 100644 index 0000000..e01eca6 Binary files /dev/null and b/samples/fmix_2_out_1.wav differ diff --git a/samples/fmix_2_out_2.wav b/samples/fmix_2_out_2.wav new file mode 100644 index 0000000..4a3115a Binary files /dev/null and b/samples/fmix_2_out_2.wav differ diff --git a/samples/fmix_2_out_3.wav b/samples/fmix_2_out_3.wav new file mode 100644 index 0000000..8dab9bd Binary files /dev/null and b/samples/fmix_2_out_3.wav differ diff --git a/samples/fmix_2_out_4.wav b/samples/fmix_2_out_4.wav new file mode 100644 index 0000000..a9a4d3f Binary files /dev/null and b/samples/fmix_2_out_4.wav differ diff --git a/samples/fmix_2_out_5.wav b/samples/fmix_2_out_5.wav new file mode 100644 index 0000000..8f55aca Binary files /dev/null and b/samples/fmix_2_out_5.wav differ diff --git a/samples/fmix_3.wav b/samples/fmix_3.wav new file mode 100755 index 0000000..5e34790 Binary files /dev/null and b/samples/fmix_3.wav differ diff --git a/samples/fmix_3_out.wav b/samples/fmix_3_out.wav new file mode 100644 index 0000000..a7686b7 Binary files /dev/null and b/samples/fmix_3_out.wav differ diff --git a/samples/fmix_3_out_1.wav b/samples/fmix_3_out_1.wav new file mode 100644 index 0000000..c322c96 Binary files /dev/null and b/samples/fmix_3_out_1.wav differ diff --git a/samples/fmix_3_out_2.wav b/samples/fmix_3_out_2.wav new file mode 100644 index 0000000..d2270f8 Binary files /dev/null and b/samples/fmix_3_out_2.wav differ diff --git a/samples/fmix_3_out_3.wav b/samples/fmix_3_out_3.wav new file mode 100644 index 0000000..d25af49 Binary files /dev/null and b/samples/fmix_3_out_3.wav differ diff --git a/samples/fmix_3_out_4.wav b/samples/fmix_3_out_4.wav new file mode 100644 index 0000000..bf69726 Binary files /dev/null and b/samples/fmix_3_out_4.wav differ diff --git a/samples/fmix_3_out_5.wav b/samples/fmix_3_out_5.wav new file mode 100644 index 0000000..735ca00 Binary files /dev/null and b/samples/fmix_3_out_5.wav differ diff --git a/samples/loop1.wav b/samples/loop1.wav new file mode 100755 index 0000000..2b9c0ba Binary files /dev/null and b/samples/loop1.wav differ diff --git a/samples/music1.wav b/samples/music1.wav new file mode 100644 index 0000000..1c94e94 Binary files /dev/null and b/samples/music1.wav differ diff --git a/samples/music1_2.wav b/samples/music1_2.wav new file mode 100644 index 0000000..fbdc037 Binary files /dev/null and b/samples/music1_2.wav differ diff --git a/samples/music2_2.wav b/samples/music2_2.wav new file mode 100644 index 0000000..afa67e7 Binary files /dev/null and b/samples/music2_2.wav differ diff --git a/samples/raw_1.wav b/samples/raw_1.wav new file mode 100755 index 0000000..e405dd3 Binary files /dev/null and b/samples/raw_1.wav differ diff --git a/samples/raw_2.wav b/samples/raw_2.wav new file mode 100755 index 0000000..c1888a3 Binary files /dev/null and b/samples/raw_2.wav differ diff --git a/samples/raw_3.wav b/samples/raw_3.wav new file mode 100755 index 0000000..f7a2885 Binary files /dev/null and b/samples/raw_3.wav differ diff --git a/samples/strings.wav b/samples/strings.wav new file mode 100755 index 0000000..efd1723 Binary files /dev/null and b/samples/strings.wav differ diff --git a/sound_mixing.py b/sound_mixing.py new file mode 100644 index 0000000..0b26105 --- /dev/null +++ b/sound_mixing.py @@ -0,0 +1,37 @@ +import numpy as np +from scipy.signal import stft, istft + +class Preprocessing(): + + def __init__(self,s,r): + ''' + sは音源信号行列((3,441000)) + rは音源が並んでいる円の半径(スカラー) + ''' + self.s = s + self.r = r + + def mixing(self): + all2bDis = self.r + a2cDis = self.r * np.sqrt(2+np.sqrt(2)) + a2aDis = self.r * np.sqrt(2-np.sqrt(2)) + b2aDis = self.r * np.sqrt(2) + + all2bTime = all2bDis/340.5 #これが基準 + a2cTime = a2cDis/340.5 + a2aTime = a2aDis/340.5 + b2aTime = b2aDis/340. + + F,_,S = stft(self.s, 44100, "boxcar", 256, 128) + n_bin = len(F) + X = np.empty_like(S) + for f in range(n_bin): + X[0,f,:] = S[0,f,:]*np.exp(-1j*2*np.pi*F[f]*a2aTime)+S[1,f,:]*np.exp(-1j*2*np.pi*F[f]*b2aTime)+S[2,f,:]*np.exp(-1j*2*np.pi*F[f]*a2cTime) + X[1,f,:] = S[0,f,:]*np.exp(-1j*2*np.pi*F[f]*all2bTime)+S[1,f,:]*np.exp(-1j*2*np.pi*F[f]*all2bTime)+S[2,f,:]*np.exp(-1j*2*np.pi*F[f]*all2bTime) + X[2,f,:] = S[0,f,:]*np.exp(-1j*2*np.pi*F[f]*a2cTime)+S[1,f,:]*np.exp(-1j*2*np.pi*F[f]*b2aTime)+S[2,f,:]*np.exp(-1j*2*np.pi*F[f]*a2aTime) + + _, x = istft(X, 44100, "boxcar", 256, 128) + return x + + + diff --git a/test.py b/test.py new file mode 100644 index 0000000..dae93ee --- /dev/null +++ b/test.py @@ -0,0 +1,19 @@ +import numpy as np +import sys +import scipy.io.wavfile as wf +from fastAuxIVA import AuxIVA +from sound_mixing import Preprocessing + +#prepare data +rate0, data0 = wf.read('./samples/samples/group/output.wav') + +data0 = data0.astype(float).T + + +y = AuxIVA(data0, sample_freq=rate0,beta=0.3).auxiva() + +y = [(y_i * 32767 / max(np.absolute(y_i))).astype(np.int16) for y_i in np.asarray(y)] + +wf.write('./samples/samples/group/auxiva_0.wav', rate0, y[0]) +wf.write('./samples/samples/group/auxiva_1.wav', rate0, y[1]) +wf.write('./samples/samples/group/auxiva_2.wav', rate0, y[2])