Merge pull request #70 from mrocamora/dev

v0.1.6
mrocamora · Mar 15, 2022 · 1c054a9 · 1c054a9
2 parents 982af4d + c350d8d
commit 1c054a9
Show file tree

Hide file tree

Showing 18 changed files with 1,165 additions and 58 deletions.
diff --git a/.gitignore b/.gitignore
@@ -44,6 +44,7 @@ Thumbs.db
 docs/generated/
 docs/_build/
 docs/auto_examples/
+docs/source/auto_examples/
 
 # Vim
 *.swp

diff --git a/README.md b/README.md
@@ -21,6 +21,7 @@ Some demonstrations of what you can do with carat:
 
 * [Rhythmic patterns demo notebook](http://nbviewer.ipython.org/github/mrocamora/carat/blob/master/examples/carat_rhythmic_patterns_demo.ipynb): how to extract rhythmic patterns from an audio recording.
 * [Microtiming patterns demo notebook](http://nbviewer.ipython.org/github/mrocamora/carat/blob/master/examples/carat_microtiming_patterns_demo.ipynb): how to extract microtiming patterns from an audio recording.
+* [Onsets detection demo notebook](http://nbviewer.ipython.org/github/mrocamora/carat/blob/master/examples/carat_onsets_detection_demo.ipynb): how to detect onsets from an audio recording.
 
 
 Installation

diff --git a/carat/annotations.py b/carat/annotations.py
@@ -50,7 +50,7 @@ def load_beats(labels_file, delimiter=',', times_col=0, labels_col=1):
     Load an included example file from the candombe dataset.
     http://www.eumus.edu.uy/candombe/datasets/ISMIR2015/
 
-    >>> annotations_file = carat.util.example_beats_file(num_file=1)
+    >>> annotations_file = carat.util.example("ansina_beats")
     >>> beats, beat_labs = annotations.load_beats(annotations_file)
     >>> beats[0]
     0.548571428
@@ -60,7 +60,7 @@ def load_beats(labels_file, delimiter=',', times_col=0, labels_col=1):
     Load an included example file from the samba dataset.
     http://www.smt.ufrj.br/~starel/datasets/brid.html
 
-    >>> annotations_file = carat.util.example_beats_file(num_file=3)
+    >>> annotations_file = carat.util.example("tamborim_beats")
     >>> beats, beat_labs = annotations.load_beats(annotations_file, delimiter=' ')
     >>> beats
     array([ 2.088,  2.559,  3.012,   3.48,  3.933,   4.41,  4.867,   5.32,
@@ -133,7 +133,7 @@ def load_downbeats(labels_file, delimiter=',', times_col=0, labels_col=1, downbe
     Load an included example file from the candombe dataset.
     http://www.eumus.edu.uy/candombe/datasets/ISMIR2015/
 
-    >>> annotations_file = carat.util.example_beats_file(num_file=1)
+    >>> annotations_file = carat.util.example("ansina_beats")
     >>> downbeats, downbeat_labs = carat.annotations.load_downbeats(annotations_file)
     >>> downbeats[:3]
     array([0.54857143, 2.33265306, 4.11530612])
@@ -144,7 +144,7 @@ def load_downbeats(labels_file, delimiter=',', times_col=0, labels_col=1, downbe
     Load an included example file from the samba dataset.
     http://www.smt.ufrj.br/~starel/datasets/brid.html
 
-    >>> annotations_file = carat.util.example_beats_file(num_file=3)
+    >>> annotations_file = carat.util.example("tamborim_beats")
     >>> downbeats, downbeat_labs = annotations.load_downbeats(annotations_file,
                                                               delimiter=' ', downbeat_label='1')
     >>> downbeats
@@ -214,7 +214,7 @@ def load_onsets(labels_file, delimiter=',', times_col=0, labels_col=1):
 
     Load an included example file from the candombe dataset.
 
-    >>> onsets_file = carat.util.example_onsets_file(num_file=2)
+    >>> onsets_file = carat.util.example("chico_onsets")
     >>> onsets, onset_labs = carat.annotations.load_beats(onsets_file)
     >>> onsets[0]
     12.969002267
@@ -224,7 +224,7 @@ def load_onsets(labels_file, delimiter=',', times_col=0, labels_col=1):
     Load an included example file from the samba dataset.
     http://www.smt.ufrj.br/~starel/datasets/brid.html
 
-    >>> onsets_file = carat.util.example_onsets_file(num_file=3)
+    >>> onsets_file = carat.util.example("tamborim_onsets")
     >>> onsets, onset_labs = carat.annotations.load_beats(onsets_file, delimiter='\t')
     >>> onsets[0]
     1.93
@@ -325,3 +325,27 @@ def trim_downbeats(downbeat_times, downbeat_labels, ini_bar, num_bars):
     downbeat_labels_trimmed = downbeat_labels[ini_bar-1:ini_bar-1+num_bars]
 
     return downbeat_times_trimmed, downbeat_labels_trimmed
+
+
+def save_onsets(onsets_file, onset_times, onset_labels=None, delimiter=","):
+    """ Save onsets to a file.
+    
+    Parameters
+    ----------
+    onsets_file : str
+        name (including path) of the output file
+    delimiter : str
+        string used as delimiter in the output file
+    onset_times : np.ndarray
+        time instant of each onset
+    onset_labels : list
+        label for each onset
+
+    """
+
+    # write onsets as rows
+    if onset_labels:
+        # TODO
+        print("writing onset labels not implemented yet")
+
+    np.savetxt(onsets_file, onset_times, delimiter=delimiter)
diff --git a/carat/display.py b/carat/display.py
@@ -33,8 +33,8 @@
 
 
 def wave_plot(y, sr=22050, x_axis='time', beats=None, beat_labs=None,
-              ax=None, **kwargs):
-    '''Plot an audio waveform and beat labels (optinal).
+              onsets=None, ax=None, **kwargs):
+    '''Plot an audio waveform (as well as beats and onsets, optionally).
 
     Parameters
     ----------
@@ -44,6 +44,12 @@ def wave_plot(y, sr=22050, x_axis='time', beats=None, beat_labs=None,
         sampling rate of `y`
     x_axis : str {'time', 'off', 'none'} or None
         If 'time', the x-axis is given time tick-marks.
+    beats : np.ndarray
+        location of beats as time values
+    beat_labs : list
+        labels at the beats (e.g. 1.1, 1.2, etc)
+    onsets : np.ndarray
+        location of onsets as time values
     ax : matplotlib.axes.Axes or None
         Axes to plot on instead of the default `plt.gca()`.
     kwargs
@@ -73,9 +79,14 @@ def wave_plot(y, sr=22050, x_axis='time', beats=None, beat_labs=None,
     # plot waveform
     out = axes.plot(time, y, **kwargs)
 
+    # plot beats
     if beats is not None:
         __plot_beats(beats, max_time, axes, beat_labs=beat_labs, **kwargs)
 
+    # plot onsets
+    if onsets is not None:
+        __plot_onsets(onsets, max_time, axes, **kwargs)
+
     # format x axis
     if x_axis == 'time':
         axes.xaxis.set_major_formatter(TimeFormatter(lag=False))
@@ -89,8 +100,8 @@ def wave_plot(y, sr=22050, x_axis='time', beats=None, beat_labs=None,
 
 
 def feature_plot(feature, time, x_axis='time', beats=None, beat_labs=None,
-                 ax=None, **kwargs):
-    '''Plot an audio waveform and beat labels (optinal).
+                 onsets=None, ax=None, **kwargs):
+    '''Plot a feature function (as well as beats and onsets, optionally).
 
 
     Parameters
@@ -101,6 +112,12 @@ def feature_plot(feature, time, x_axis='time', beats=None, beat_labs=None,
         time instant of the feature values
     x_axis : str {'time', 'off', 'none'} or None
         If 'time', the x-axis is given time tick-marks.
+    beats : np.ndarray
+        location of beats as time values
+    beat_labs : list
+        labels at the beats (e.g. 1.1, 1.2, etc)
+    onsets : np.ndarray
+        location of onsets as time values
     ax : matplotlib.axes.Axes or None
         Axes to plot on instead of the default `plt.gca()`.
     kwargs
@@ -128,9 +145,14 @@ def feature_plot(feature, time, x_axis='time', beats=None, beat_labs=None,
     # plot waveform
     out = axes.plot(time, feature, **kwargs)
 
+    # plot beats
     if beats is not None:
         __plot_beats(beats, max_time, axes, beat_labs=beat_labs, **kwargs)
 
+    # plot onsets
+    if onsets is not None:
+        __plot_onsets(onsets, max_time, axes, **kwargs)
+
     # format x axis
     if x_axis == 'time':
         axes.xaxis.set_major_formatter(TimeFormatter(lag=False))
@@ -229,6 +251,8 @@ def __plot_beats(beats, max_time, ax, beat_labs=None, **kwargs):
     ----------
     beats : np.ndarray
         audio time series
+    max_time : float
+        maximum time value
     beat_labs : list
         beat labels
     x_axis : str {'time', 'off', 'none'} or None
@@ -273,6 +297,42 @@ def __plot_beats(beats, max_time, ax, beat_labs=None, **kwargs):
     return ax2
 
 
+def __plot_onsets(onsets, max_time, ax, **kwargs):
+    '''Plot onsets locations.
+
+    Parameters
+    ----------
+    onsets : np.ndarray
+        audio time series
+    max_time : float
+        maximum time value
+    ax : matplotlib.axes.Axes or None
+        Axes to plot on instead of the default `plt.gca()`.
+    kwargs
+        Additional keyword arguments to `matplotlib.`
+
+    Returns
+    -------
+
+    '''
+
+    kwargs['color'] = 'crimson'
+    kwargs.setdefault('linestyle', '-')
+    kwargs['alpha'] = 0.3
+    kwargs.setdefault('linewidth', 2)
+
+    # replace nan values to 0
+    onsets = np.nan_to_num(onsets)
+
+    # consider onsets bellow max_time
+    ind_ons = util.find_nearest(onsets, max_time)
+    new_ons = onsets[:ind_ons]
+
+    # plot onsets annotations
+    for onset in new_ons:
+        ax.axvline(x=onset, **kwargs)
+
+
 def map_show(data, x_coords=None, y_coords=None, ax=None,
              n_tatums=4, clusters=None, **kwargs):
     '''Display a feature map.

diff --git a/carat/features.py b/carat/features.py
@@ -78,6 +78,8 @@ def accentuation_feature(signal, fs, sum_flag=True, log_flag=False, mel_flag=Tru
         feature values
     time : np.array
         time values
+    frequency : np.array
+        frequency values
 
     Notes
     -----
@@ -115,7 +117,7 @@ def accentuation_feature(signal, fs, sum_flag=True, log_flag=False, mel_flag=Tru
     # maximum filter (and difference)
     if maxfilt_flag:
         # maximum filter
-        max_spec = sp.ndimage.filters.maximum_filter(feature, size=(maxbins, 1))
+        max_spec = sp.ndimage.maximum_filter(feature, size=(maxbins, 1))
         # init the diff array
         diff = np.zeros(feature.shape)
         # calculate difference between log spec and max filtered version
@@ -600,29 +602,30 @@ def peak_detection(feature, threshold=0.05, pre_avg=0, pos_avg=0, pre_max=1, pos
         # origin controls the placement of the filter
         avg_origin = int(np.floor((pre_avg - pos_avg) / 2))
         # moving average
-        mov_avg = sp.ndimage.filters.uniform_filter(data, avg_length,
+        mov_avg = sp.ndimage.uniform_filter(data, avg_length,
                                                     mode='constant',
                                                     origin=avg_origin)
     else:
         # do not use a moving average
         mov_avg = 0
-        # candidates above the moving average + the threshold
-        candidates = data * (data >= mov_avg + threshold)
-        # length of moving maximum filter
-        max_length = pre_max + pos_max + 1
-        # compute the moving maximum
-        if max_length > 1:
-            # origin controls the placement of the filter
-            max_origin = int(np.floor((pre_max - pos_max) / 2))
-            # moving maximum
-            mov_max = sp.ndimage.filters.maximum_filter(candidates, max_length,
-                                                        mode='constant',
-                                                        origin=max_origin)
-            # candidates are peak positions
-            candidates *= (candidates == mov_max)
-        # return indices
-        candidates_0 = np.nonzero(candidates)[0]
-        return candidates_0, mov_avg, mov_max
+    # candidates above the moving average + the threshold
+    candidates = data * (data >= mov_avg + threshold)
+    # length of moving maximum filter
+    max_length = pre_max + pos_max + 1
+    # compute the moving maximum
+    if max_length > 1:
+        # origin controls the placement of the filter
+        max_origin = int(np.floor((pre_max - pos_max) / 2))
+        # moving maximum
+        mov_max = sp.ndimage.maximum_filter(candidates, max_length,
+                                                    mode='constant',
+                                                    origin=max_origin)
+        # candidates are peak positions
+        candidates *= (candidates == mov_max)
+    # return indices
+    candidates_0 = np.nonzero(candidates)[0]
+
+    return candidates_0, mov_avg, mov_max
 
 
 #def accentuation_feature(y, sr=22050, hop_length=512, n_fft=2048,

diff --git a/carat/onsets.py b/carat/onsets.py
@@ -0,0 +1,59 @@
+# encoding: utf-8
+# pylint: disable=C0103
+"""
+Onsets
+======
+
+Onsets detection
+----------------
+
+.. autosummary::
+    :toctree: generated/
+
+    detection
+
+"""
+
+from . import util
+from . import features
+
+__all__ = ['detection']
+
+
+def detection(signal, fs=22050, **kwargs):
+    """Onset detection from audio signal. 
+
+    Parameters
+    ----------
+    signal : np.array
+        input audio signal
+    fs : int
+        sampling rate
+    **kwargs :  (check)
+        keyword arguments passed down to each of the functions used
+
+    Returns
+    -------
+    onset_times : np.ndarray
+        time instants of the onsets
+    feature_values: np.ndarray
+        feature values at the onsets
+
+    """
+
+    # valid keywords for peak_detection (other ones are passed to accentuation feature)
+    peaks_kw, acce_kw = util.getValidKeywords(kwargs, features.peak_detection)
+
+    # accentuation feature computation
+    acce, times, _ = features.accentuation_feature(signal, fs, **acce_kw)
+
+    # peak picking in the feature function
+    ons_indx, _, _ = features.peak_detection(acce, **peaks_kw)
+
+    # time instants of the onsets
+    onset_times = times[ons_indx]
+
+    # feature values at the onsets
+    feature_values = acce[ons_indx]
+
+    return onset_times, feature_values