Add some more tests, and a utility for computing local amplitude.

danpovey · danpovey · commit a342043805da · 2019-09-03T12:49:26.000+08:00
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+*~
+[#]*[#]
diff --git a/LICENSE.txt b/LICENSE.txt
@@ -0,0 +1,8 @@
+Copyright 2019   Daniel Povey
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
diff --git a/filter_utils/__init__.py b/filter_utils/__init__.py
@@ -0,0 +1,3 @@
+
+from . multistreamer import Multistreamer
+
diff --git a/filter_utils/filter_function.py b/filter_utils/filter_function.py
@@ -2,15 +2,15 @@
 import numpy as np
 
 #
-# This is the filter-function that is output from ../filter_shape/main.py with D = 256, S = 6, T = 4.
+# This is the filter-function that is output from ../filter_shape/optimize_filter.py with D = 256, S = 6, T = 4.
 #
-# Some relevant output from ../filter_shape/main.py with S = 6:
+# Some relevant output from ../filter_shape/optimize_filter.py with S = 6:
 # Iter 2900: relative error in frequency gain is 0.00015374938993331817; integral of energy in banned frequency region is 0.00013761181521137081
 # f_penalty = 0.0+0.0021310990157878996; integral of abs(highpassed-signal) = 8.324605530421483e-06
 # F =  tensor([1.0001e+00, 1.0001e+00, 1.0001e+00,  ..., 2.9109e-06, 2.3796e-06,
 #         1.8225e-06])
 
-# Some relevant output from ../filter_shape/main.py with S = 10 (note:
+# Some relevant output from ../filter_shape/optimize_filter.py with S = 10 (note:
 #Iter 2400: loss = 0.0002991063477731235 = 0.00010634876963097998 + 0.00019275757814214353
 #Iter 2400: relative error in frequency gain is 7.242217090437826e-06; integral of energy in banned frequency region is 1.7746373966239925e-05
 #f_penalty = 0.0+0.0008506770552570358; integral of abs(highpassed-signal) = 3.322957247097796e-06
diff --git a/filter_utils/local_amplitude.py b/filter_utils/local_amplitude.py
@@ -0,0 +1,219 @@
+# To be run with python3.  Caution: this module requires torch!
+
+"""
+This module defines an object called Normalizer that can be used to
+normalize the output of class Multistreamer (from ./multistreamer.py),
+with a view to making the data easier for neural nets to process.
+
+The basic idea is that we compute a moving average of the amplitude of the
+signal within each frequency band, and use that to normalize the signal.  (The
+neural net will see both the normalized signal and the log of the normalization
+factor).  The idea is that after possibly being modified by the nnet
+(e.g. denoised), we then 'un-normalize' the signal with the same normalization
+factor.
+
+We also provide a factor that can be used as part of the objective function
+if it's desired to put a greater weight on the louder frequency bands for
+training purposes.
+"""
+
+
+import numpy as np
+import cmath
+import math
+import torch
+from . import filter_function
+from . import filters
+from . import torch_filter
+from . import resampler
+
+import matplotlib.pyplot as plt  # TEMP
+
+class LocalAmplitudeComputer:
+    """
+    This class is a utility for computing the smoothed-over-time local amplitude
+    of a signal, to be used in class Normalizer to compute a normalized form of
+    the signal.
+    """
+    def __init__(self,
+                 gaussian_stddev = 100.0,
+                 epsilon = 1.0e-05,
+                 block_size = 8,
+                 double_precision = False):
+        """
+        Constructor.
+        Args:
+           gaussian_stddev (float):  This can be interpreted as a time constant measured
+                    in samples; for instance, if the sampling rate of the signal
+                    we are normalizing is 1kHz, gaussian_stddev = 1000 would mean
+                    we're smoothing with approximately 1 second of data on each
+                    side.
+           epsilon (float):  A constant that is used to smooth the instantaneous
+                    amplitude.  Dimensionally this is an amplitude.
+           block_size  A number which should be substantially less than
+                    gaussian_stddev.  We first sum the data over blocks and then
+                    do convolutions, efficiency.  Any number >= 1 is OK but
+                    numbers approaching gaussian_stddev may start to affect
+                    the output
+           double_precision  If true, create these filters in double precision
+                    (float64), will require input to be double too.
+        """
+        if block_size < 1 or block_size >= gaussian_stddev / 2:
+            raise ValueError("Invalid values block-size={}, gaussian-stddev={}".format(
+                    block_size, gaussian_stddev))
+
+        # reduced_stddev is the stddev after summing over blocks of samples
+        # (which reduces the sampling rate by that factor).
+        reduced_stddev = gaussian_stddev / block_size
+        (f, i) = filters.gaussian_filter(reduced_stddev)
+        # We'll be summing, not averaging over blocks, so we need
+        # to correct for that factor.
+        f *= (1.0 / block_size)
+
+        self.epsilon = epsilon
+
+        self.dtype = torch.float64 if double_precision else torch.float32
+
+        self.gaussian_filter = torch_filter.SymmetricFirFilter(
+            (f,i), double_precision = double_precision)
+
+
+        self.block_size = block_size
+        if block_size > 1:
+            # num_zeros = 4 is a lower-than-normal width for the FIR filter since there
+            # won't be frequencies near the Nyquist and we don't need a sharp cutoff.
+            # filter_cutoff_ratio = 9 is to avoid aliasing effects with this less-precise
+            # filter (default is 0.95).
+            self.resampler = resampler.Resampler(block_size, num_zeros = 4,
+                                                 filter_cutoff_ratio = 0.9,
+                                                 double_precision = double_precision)
+
+
+    def compute(self,
+                input):
+        """
+        Computes and returns the local energy which is a smoothed version of the
+        instantaneous amplitude.
+
+        Args:
+          input: a torch.Tensor with dimension
+            (minibatch_size, 2, num_channels, signal_length)
+            representing the (real, imaginary) parts of `num_channels`
+            parallel frequency channels.  dtype should be
+            torch.float32 if constructor had double_precision==False,
+            else torch.float36.
+        Returns:
+           Returns a torch.Tensor with dimension (minibatch_size, num_channels,
+            signal_length) containing the smoothed local amplitude.
+        """
+        if not isinstance(input, torch.Tensor) or input.dtype != self.dtype:
+            raise TypeError("Expected input to be of type torch.Tensor with dtype=".format(
+                            self.dtype))
+        if len(input.shape) != 4 or input.shape[1] != 2:
+            raise ValueError("Expected input to have 4 axes with the 2nd dim == 2, got {}".format(
+                    input.shape))
+        (minibatch_size, two, num_channels, signal_length) = input.shape
+
+
+        # We really want shape (minibatch_size, num_channels, signal_length) for
+        # instantaneous_amplitude, but we want another array of size (signal_length)
+        # containing all ones, for purposes of normalization after applying the
+        # Gaussian smoothing (to correct for end effects)..
+        amplitudes = torch.empty(
+            (minibatch_size * num_channels + 1), signal_length,
+            dtype=self.dtype)
+
+        # set the last row to all ones.
+        amplitudes[minibatch_size*num_channels:,:] = 1
+
+        instantaneous_amplitude = amplitudes[0:minibatch_size*num_channels,:].view(
+            minibatch_size, num_channels, signal_length)
+        instantaneous_amplitude.fill_(self.epsilon*self.epsilon)  # set to epsilon...
+        instantaneous_amplitude += input[:,0,:,:] ** 2
+        instantaneous_amplitude += input[:,1,:,:] ** 2
+        instantaneous_amplitude.sqrt_()
+
+
+        # summed_amplitudes has num-cols reduced by about self.block_size,
+        # which will make convolution with a Gaussian easier.
+        summed_amplitudes = self._block_sum(amplitudes)
+
+
+        smoothed_amplitudes = self.gaussian_filter.apply(summed_amplitudes)
+        assert smoothed_amplitudes.shape == summed_amplitudes.shape
+
+        upsampled_amplitudes = self.resampler.upsample(smoothed_amplitudes)
+        assert upsampled_amplitudes.shape[1] >= signal_length
+
+
+
+        # Truncate to actual signal length (we may have a few extra samples at
+        # the end.)  Remove the first self.block_size samples to avoid small
+        # phase changes, not that it would really matter since the block
+        # size will be << the gaussian stddev.
+        upsampled_amplitudes = upsampled_amplitudes[:,:signal_length]
+
+        n = minibatch_size*num_channels
+        # The following corrects for constant factors, including a
+        # 1/b factor that we missed when summing over blocks, and also for
+        # edge effects so that we can interpret the Gaussian convolution as
+        # an appropriately weighted average near the edges of the signal.
+        # We took a signal of all-ones and put it through this process
+        # as the last row of an n+1-row matrix, and we're using that
+        # to normalize.
+        # The shapes of the expressions below are, respectively:
+        #   (minibatch_size*num_channels, signal_length) and (1, signal_length)
+        upsampled_amplitudes[0:n,:] /= upsampled_amplitudes[n:,:]
+
+
+        # the `contiguous()` below would not be necessary if PyTorch had been
+        # more carefully implemented, since the shapes here are quite compatible
+        # with zero-copy.  (Possibly it's not necessary even now, not 100%
+        # sure.)
+        return upsampled_amplitudes[0:n,:].contiguous().view(minibatch_size, num_channels,
+                                                             signal_length)
+
+    def _block_sum(self, amplitudes):
+        """
+        This internal function sums the input amplitudes over blocks
+        (we do this before the Gaussian filtering to save compute).
+
+        Args:
+          amplitudes: a torch.Tensor with shape (n, s) with s being the
+                  signal length and n being some combination of minibatch
+                  and channel; dtype self.dtype
+        Returns:
+          returns a torch.Tensor with shape (n, t) where t = (s+2b-1)//b, where
+          b is the block_size passed to the constructor.  Note that this means
+          we are padding with two extra outputs, one zero-valued block at the
+          start and also a partial block sum at the end.  This is necessary to
+          ensure we have enough samples when we upsample the Gaussian-smoothed
+          version of this.  It also means we get the amplitude sum for time t
+          from a Gaussian centered at about t - block_size/2; this is harmless.
+        """
+        amplitudes = amplitudes.contiguous()
+        b = self.block_size
+        (n, s) = amplitudes.shape
+        t = (s + 2 * b - 1) // b
+
+        ans = torch.zeros((n, t), dtype=self.dtype)
+
+        # make sure `amplitudes` is contiguous.
+
+        # t_end will be t-1 if there is a partial block, otherwise t.
+        t_whole = s // b      # the number of whole sums
+        t_end = t_whole + 1
+        s_whole = (s // b) * b
+
+        # Sum over the b elements of each block.
+        ans[:,1:t_end] += amplitudes[:,:s_whole].view(n, t_whole, b).sum(dim=-1)
+        if t_end != t:
+            # sum over the left-over columns, i.e. sum over k things where k ==
+            # s % b
+            ans[:,t_end] += amplitudes[:,s_whole:].sum(dim=-1)
+        return ans
+
+
+
+
+
diff --git a/filter_utils/resampler.py b/filter_utils/resampler.py
@@ -123,3 +123,4 @@ def upsample(self, input):
                                                     stride=self.N,
                                                     padding=self.padding).squeeze(1)
 
+
diff --git a/filter_utils/torch_filter.py b/filter_utils/torch_filter.py
@@ -0,0 +1,68 @@
+# To be run with python3
+
+"""
+This module defines an object that can be used for upsampling and downsampling
+of signals.  Note: unlike ./filters.py, this object has a torch dependency.
+(It uses ./filters.py for initialization though.)
+"""
+
+
+import numpy as np
+from . import filters
+import math
+import torch
+
+class SymmetricFirFilter:
+    """
+    This class is used for applying symmetric FIR filters using torch 1d
+    convolution.
+    """
+
+    def __init__(self, filter,
+                 double_precision = False):
+        """
+        This creates an object that can apply a symmetric FIR filter
+        based on torch.nn.functional.conv1d.
+
+        Args:
+        filter:  A filter as defined in ./filters.py.  Expected to be
+               symmetric, i.e. its (i*2)+1 must equal its filter
+               length.
+        double_precision:  If true, we'll use float64 for the filter; else float32.
+
+        padding:  Must be 'zero' or 'reflect'.  If 'zero', the output is
+          as if we padded the signal with zeros to get the same length.
+          If 'reflect', it's as if we reflected the signal at 0.5 of a
+          sample past the first and last sample.
+        """
+        filters.check_is_filter(filter)
+        (f, i) = filter
+        filt_len = f.shape[0]
+        assert filt_len == i * 2 + 1  # check it's symmetric
+        dtype = (torch.float64 if double_precision else torch.float32)
+        # the shape is (out_channels, in_channels, width),
+        # where out_channels and in_channels are both 1.
+        self.filt = torch.tensor(f, dtype=dtype).view(1, 1, filt_len)
+        self.padding = i
+
+    def apply(self, input):
+        """
+        Apply the FIR filter, and return a result of the same shape
+
+        Args:
+         input: a torch.Tensor with dtype torch.float64 if double_precision=True was
+         supplied to the constructor, else torch.float32.
+         There must be 2 axes, interpreted as (minibatch_size, sequence_length)
+
+        Return:  Returns a torch.Tensor with the same dtype and dim as the
+        input.
+        """
+
+        # input.unsqueeze(1) changes dim from (minibatch_size, sequence_length) to
+        # (minibatch_size, num_channels=1, sequence_length)
+        # the final squeeze(1) removes the num_channels=1 axis
+        return torch.nn.functional.conv1d(input.unsqueeze(1), self.filt,
+                                         padding=self.padding).squeeze(1)
+
+
+
diff --git a/setup.py b/setup.py
@@ -0,0 +1,26 @@
+import setuptools
+import sys
+
+with open("README.md", "r") as fh:
+    long_description = fh.read()
+
+setuptools.setup(
+    python_requires='>3.0.0',
+    name="filter_utils",
+    version="0.0.1",
+    author="Daniel Povey",
+    author_email="dpovey@gmail.com",
+    description="Utilities for filtering signals",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="https://github.com/danpovey/filter_utils",
+    packages=setuptools.find_packages(),
+    install_requires=[
+        'numpy', 'torch'
+    ],
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+    ],
+)
diff --git a/tests/test_local_amplitude.py b/tests/test_local_amplitude.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+`
	`2`	`+from . multistreamer import Multistreamer`
	`3`	`+`
Original file line number	Diff line number	Diff line change
`@@ -123,3 +123,4 @@ def upsample(self, input):`
`123`	`123`	`stride=self.N,`
`124`	`124`	`padding=self.padding).squeeze(1)`
`125`	`125`
	`126`	`+`