-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathmultinmf_recons_im.py
62 lines (48 loc) · 1.82 KB
/
multinmf_recons_im.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import numpy as np
def multinmf_recons_im(X, W, H, Q, part):
'''
Reconstruction of sources from their non-negative factorization
with non-negative channel weights.
Sizes
* F: number of frequency bins
* N: number of frames
* K: number of latent variables in the non-negative decomposition
* n_c: number of channels
* n_s: number of sources
Parameters
----------
X:
STFT of multichannel mixture (F x N x n_c)
W: nd-array
basis (F x K)
H: nd-array
activation coef. (K x N)
Q: nd-array
squared magnitude channels filters (F x n_c x n_s)
part: list
W and H are shared for all sources, this is a list of size n_s with
i-th element being the list of indices in W and H corresponding to the
i-th source
'''
F,N,n_c = X.shape # n_bins, n_frames, n_channels
n_s = Q.shape[2] # n_sources
P = np.zeros((F,N,n_s), dtype=np.float)
Im = np.zeros((F,N,n_s,n_c), dtype=np.complex)
# compute the separated spectrogram of all sources
# from their factorized parts
for j in range(n_s):
P[:,:,j] = np.dot(W[:,part[j]], H[part[j],:])
# Re-synthetize the
for i in range(n_c):
# recompute the approximated non-negative spectrogram at microphone i
P_at_mic = np.zeros(P.shape, dtype=np.float)
for j in range(n_s):
P_at_mic[:,:,j] = Q[:,i,j,np.newaxis] * P[:,:,j]
# mix all sources
P_at_mic_all_sources = np.sum(P_at_mic, axis=2)
# Procedure:
# 1) Divide original spectrogram by the cumulative non-negative approximation
# 2) Multiply by the separated source magnitude
for j in range(n_s):
Im[:,:,j,i] = P_at_mic[:,:,j] / P_at_mic_all_sources * X[:,:,i]
return Im