Skip to content

Commit a342043

Browse files
committed
Add some more tests, and a utility for computing local amplitude.
1 parent cc4775d commit a342043

9 files changed

+453
-3
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*~
2+
[#]*[#]

LICENSE.txt

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
Copyright 2019 Daniel Povey
2+
3+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4+
5+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6+
7+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8+

filter_utils/__init__.py

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
2+
from . multistreamer import Multistreamer
3+

filter_utils/filter_function.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@
22
import numpy as np
33

44
#
5-
# This is the filter-function that is output from ../filter_shape/main.py with D = 256, S = 6, T = 4.
5+
# This is the filter-function that is output from ../filter_shape/optimize_filter.py with D = 256, S = 6, T = 4.
66
#
7-
# Some relevant output from ../filter_shape/main.py with S = 6:
7+
# Some relevant output from ../filter_shape/optimize_filter.py with S = 6:
88
# Iter 2900: relative error in frequency gain is 0.00015374938993331817; integral of energy in banned frequency region is 0.00013761181521137081
99
# f_penalty = 0.0+0.0021310990157878996; integral of abs(highpassed-signal) = 8.324605530421483e-06
1010
# F = tensor([1.0001e+00, 1.0001e+00, 1.0001e+00, ..., 2.9109e-06, 2.3796e-06,
1111
# 1.8225e-06])
1212

13-
# Some relevant output from ../filter_shape/main.py with S = 10 (note:
13+
# Some relevant output from ../filter_shape/optimize_filter.py with S = 10 (note:
1414
#Iter 2400: loss = 0.0002991063477731235 = 0.00010634876963097998 + 0.00019275757814214353
1515
#Iter 2400: relative error in frequency gain is 7.242217090437826e-06; integral of energy in banned frequency region is 1.7746373966239925e-05
1616
#f_penalty = 0.0+0.0008506770552570358; integral of abs(highpassed-signal) = 3.322957247097796e-06

filter_utils/local_amplitude.py

+219
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
# To be run with python3. Caution: this module requires torch!
2+
3+
"""
4+
This module defines an object called Normalizer that can be used to
5+
normalize the output of class Multistreamer (from ./multistreamer.py),
6+
with a view to making the data easier for neural nets to process.
7+
8+
The basic idea is that we compute a moving average of the amplitude of the
9+
signal within each frequency band, and use that to normalize the signal. (The
10+
neural net will see both the normalized signal and the log of the normalization
11+
factor). The idea is that after possibly being modified by the nnet
12+
(e.g. denoised), we then 'un-normalize' the signal with the same normalization
13+
factor.
14+
15+
We also provide a factor that can be used as part of the objective function
16+
if it's desired to put a greater weight on the louder frequency bands for
17+
training purposes.
18+
"""
19+
20+
21+
import numpy as np
22+
import cmath
23+
import math
24+
import torch
25+
from . import filter_function
26+
from . import filters
27+
from . import torch_filter
28+
from . import resampler
29+
30+
import matplotlib.pyplot as plt # TEMP
31+
32+
class LocalAmplitudeComputer:
33+
"""
34+
This class is a utility for computing the smoothed-over-time local amplitude
35+
of a signal, to be used in class Normalizer to compute a normalized form of
36+
the signal.
37+
"""
38+
def __init__(self,
39+
gaussian_stddev = 100.0,
40+
epsilon = 1.0e-05,
41+
block_size = 8,
42+
double_precision = False):
43+
"""
44+
Constructor.
45+
Args:
46+
gaussian_stddev (float): This can be interpreted as a time constant measured
47+
in samples; for instance, if the sampling rate of the signal
48+
we are normalizing is 1kHz, gaussian_stddev = 1000 would mean
49+
we're smoothing with approximately 1 second of data on each
50+
side.
51+
epsilon (float): A constant that is used to smooth the instantaneous
52+
amplitude. Dimensionally this is an amplitude.
53+
block_size A number which should be substantially less than
54+
gaussian_stddev. We first sum the data over blocks and then
55+
do convolutions, efficiency. Any number >= 1 is OK but
56+
numbers approaching gaussian_stddev may start to affect
57+
the output
58+
double_precision If true, create these filters in double precision
59+
(float64), will require input to be double too.
60+
"""
61+
if block_size < 1 or block_size >= gaussian_stddev / 2:
62+
raise ValueError("Invalid values block-size={}, gaussian-stddev={}".format(
63+
block_size, gaussian_stddev))
64+
65+
# reduced_stddev is the stddev after summing over blocks of samples
66+
# (which reduces the sampling rate by that factor).
67+
reduced_stddev = gaussian_stddev / block_size
68+
(f, i) = filters.gaussian_filter(reduced_stddev)
69+
# We'll be summing, not averaging over blocks, so we need
70+
# to correct for that factor.
71+
f *= (1.0 / block_size)
72+
73+
self.epsilon = epsilon
74+
75+
self.dtype = torch.float64 if double_precision else torch.float32
76+
77+
self.gaussian_filter = torch_filter.SymmetricFirFilter(
78+
(f,i), double_precision = double_precision)
79+
80+
81+
self.block_size = block_size
82+
if block_size > 1:
83+
# num_zeros = 4 is a lower-than-normal width for the FIR filter since there
84+
# won't be frequencies near the Nyquist and we don't need a sharp cutoff.
85+
# filter_cutoff_ratio = 9 is to avoid aliasing effects with this less-precise
86+
# filter (default is 0.95).
87+
self.resampler = resampler.Resampler(block_size, num_zeros = 4,
88+
filter_cutoff_ratio = 0.9,
89+
double_precision = double_precision)
90+
91+
92+
def compute(self,
93+
input):
94+
"""
95+
Computes and returns the local energy which is a smoothed version of the
96+
instantaneous amplitude.
97+
98+
Args:
99+
input: a torch.Tensor with dimension
100+
(minibatch_size, 2, num_channels, signal_length)
101+
representing the (real, imaginary) parts of `num_channels`
102+
parallel frequency channels. dtype should be
103+
torch.float32 if constructor had double_precision==False,
104+
else torch.float36.
105+
Returns:
106+
Returns a torch.Tensor with dimension (minibatch_size, num_channels,
107+
signal_length) containing the smoothed local amplitude.
108+
"""
109+
if not isinstance(input, torch.Tensor) or input.dtype != self.dtype:
110+
raise TypeError("Expected input to be of type torch.Tensor with dtype=".format(
111+
self.dtype))
112+
if len(input.shape) != 4 or input.shape[1] != 2:
113+
raise ValueError("Expected input to have 4 axes with the 2nd dim == 2, got {}".format(
114+
input.shape))
115+
(minibatch_size, two, num_channels, signal_length) = input.shape
116+
117+
118+
# We really want shape (minibatch_size, num_channels, signal_length) for
119+
# instantaneous_amplitude, but we want another array of size (signal_length)
120+
# containing all ones, for purposes of normalization after applying the
121+
# Gaussian smoothing (to correct for end effects)..
122+
amplitudes = torch.empty(
123+
(minibatch_size * num_channels + 1), signal_length,
124+
dtype=self.dtype)
125+
126+
# set the last row to all ones.
127+
amplitudes[minibatch_size*num_channels:,:] = 1
128+
129+
instantaneous_amplitude = amplitudes[0:minibatch_size*num_channels,:].view(
130+
minibatch_size, num_channels, signal_length)
131+
instantaneous_amplitude.fill_(self.epsilon*self.epsilon) # set to epsilon...
132+
instantaneous_amplitude += input[:,0,:,:] ** 2
133+
instantaneous_amplitude += input[:,1,:,:] ** 2
134+
instantaneous_amplitude.sqrt_()
135+
136+
137+
# summed_amplitudes has num-cols reduced by about self.block_size,
138+
# which will make convolution with a Gaussian easier.
139+
summed_amplitudes = self._block_sum(amplitudes)
140+
141+
142+
smoothed_amplitudes = self.gaussian_filter.apply(summed_amplitudes)
143+
assert smoothed_amplitudes.shape == summed_amplitudes.shape
144+
145+
upsampled_amplitudes = self.resampler.upsample(smoothed_amplitudes)
146+
assert upsampled_amplitudes.shape[1] >= signal_length
147+
148+
149+
150+
# Truncate to actual signal length (we may have a few extra samples at
151+
# the end.) Remove the first self.block_size samples to avoid small
152+
# phase changes, not that it would really matter since the block
153+
# size will be << the gaussian stddev.
154+
upsampled_amplitudes = upsampled_amplitudes[:,:signal_length]
155+
156+
n = minibatch_size*num_channels
157+
# The following corrects for constant factors, including a
158+
# 1/b factor that we missed when summing over blocks, and also for
159+
# edge effects so that we can interpret the Gaussian convolution as
160+
# an appropriately weighted average near the edges of the signal.
161+
# We took a signal of all-ones and put it through this process
162+
# as the last row of an n+1-row matrix, and we're using that
163+
# to normalize.
164+
# The shapes of the expressions below are, respectively:
165+
# (minibatch_size*num_channels, signal_length) and (1, signal_length)
166+
upsampled_amplitudes[0:n,:] /= upsampled_amplitudes[n:,:]
167+
168+
169+
# the `contiguous()` below would not be necessary if PyTorch had been
170+
# more carefully implemented, since the shapes here are quite compatible
171+
# with zero-copy. (Possibly it's not necessary even now, not 100%
172+
# sure.)
173+
return upsampled_amplitudes[0:n,:].contiguous().view(minibatch_size, num_channels,
174+
signal_length)
175+
176+
def _block_sum(self, amplitudes):
177+
"""
178+
This internal function sums the input amplitudes over blocks
179+
(we do this before the Gaussian filtering to save compute).
180+
181+
Args:
182+
amplitudes: a torch.Tensor with shape (n, s) with s being the
183+
signal length and n being some combination of minibatch
184+
and channel; dtype self.dtype
185+
Returns:
186+
returns a torch.Tensor with shape (n, t) where t = (s+2b-1)//b, where
187+
b is the block_size passed to the constructor. Note that this means
188+
we are padding with two extra outputs, one zero-valued block at the
189+
start and also a partial block sum at the end. This is necessary to
190+
ensure we have enough samples when we upsample the Gaussian-smoothed
191+
version of this. It also means we get the amplitude sum for time t
192+
from a Gaussian centered at about t - block_size/2; this is harmless.
193+
"""
194+
amplitudes = amplitudes.contiguous()
195+
b = self.block_size
196+
(n, s) = amplitudes.shape
197+
t = (s + 2 * b - 1) // b
198+
199+
ans = torch.zeros((n, t), dtype=self.dtype)
200+
201+
# make sure `amplitudes` is contiguous.
202+
203+
# t_end will be t-1 if there is a partial block, otherwise t.
204+
t_whole = s // b # the number of whole sums
205+
t_end = t_whole + 1
206+
s_whole = (s // b) * b
207+
208+
# Sum over the b elements of each block.
209+
ans[:,1:t_end] += amplitudes[:,:s_whole].view(n, t_whole, b).sum(dim=-1)
210+
if t_end != t:
211+
# sum over the left-over columns, i.e. sum over k things where k ==
212+
# s % b
213+
ans[:,t_end] += amplitudes[:,s_whole:].sum(dim=-1)
214+
return ans
215+
216+
217+
218+
219+

filter_utils/resampler.py

+1
Original file line numberDiff line numberDiff line change
@@ -123,3 +123,4 @@ def upsample(self, input):
123123
stride=self.N,
124124
padding=self.padding).squeeze(1)
125125

126+

filter_utils/torch_filter.py

+68
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# To be run with python3
2+
3+
"""
4+
This module defines an object that can be used for upsampling and downsampling
5+
of signals. Note: unlike ./filters.py, this object has a torch dependency.
6+
(It uses ./filters.py for initialization though.)
7+
"""
8+
9+
10+
import numpy as np
11+
from . import filters
12+
import math
13+
import torch
14+
15+
class SymmetricFirFilter:
16+
"""
17+
This class is used for applying symmetric FIR filters using torch 1d
18+
convolution.
19+
"""
20+
21+
def __init__(self, filter,
22+
double_precision = False):
23+
"""
24+
This creates an object that can apply a symmetric FIR filter
25+
based on torch.nn.functional.conv1d.
26+
27+
Args:
28+
filter: A filter as defined in ./filters.py. Expected to be
29+
symmetric, i.e. its (i*2)+1 must equal its filter
30+
length.
31+
double_precision: If true, we'll use float64 for the filter; else float32.
32+
33+
padding: Must be 'zero' or 'reflect'. If 'zero', the output is
34+
as if we padded the signal with zeros to get the same length.
35+
If 'reflect', it's as if we reflected the signal at 0.5 of a
36+
sample past the first and last sample.
37+
"""
38+
filters.check_is_filter(filter)
39+
(f, i) = filter
40+
filt_len = f.shape[0]
41+
assert filt_len == i * 2 + 1 # check it's symmetric
42+
dtype = (torch.float64 if double_precision else torch.float32)
43+
# the shape is (out_channels, in_channels, width),
44+
# where out_channels and in_channels are both 1.
45+
self.filt = torch.tensor(f, dtype=dtype).view(1, 1, filt_len)
46+
self.padding = i
47+
48+
def apply(self, input):
49+
"""
50+
Apply the FIR filter, and return a result of the same shape
51+
52+
Args:
53+
input: a torch.Tensor with dtype torch.float64 if double_precision=True was
54+
supplied to the constructor, else torch.float32.
55+
There must be 2 axes, interpreted as (minibatch_size, sequence_length)
56+
57+
Return: Returns a torch.Tensor with the same dtype and dim as the
58+
input.
59+
"""
60+
61+
# input.unsqueeze(1) changes dim from (minibatch_size, sequence_length) to
62+
# (minibatch_size, num_channels=1, sequence_length)
63+
# the final squeeze(1) removes the num_channels=1 axis
64+
return torch.nn.functional.conv1d(input.unsqueeze(1), self.filt,
65+
padding=self.padding).squeeze(1)
66+
67+
68+

setup.py

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import setuptools
2+
import sys
3+
4+
with open("README.md", "r") as fh:
5+
long_description = fh.read()
6+
7+
setuptools.setup(
8+
python_requires='>3.0.0',
9+
name="filter_utils",
10+
version="0.0.1",
11+
author="Daniel Povey",
12+
author_email="[email protected]",
13+
description="Utilities for filtering signals",
14+
long_description=long_description,
15+
long_description_content_type="text/markdown",
16+
url="https://github.com/danpovey/filter_utils",
17+
packages=setuptools.find_packages(),
18+
install_requires=[
19+
'numpy', 'torch'
20+
],
21+
classifiers=[
22+
"Programming Language :: Python :: 3",
23+
"License :: OSI Approved :: MIT License",
24+
"Operating System :: OS Independent",
25+
],
26+
)

0 commit comments

Comments
 (0)