From 41dbfb39a436c0e8ee3c3234fc9971d25f1d7ebb Mon Sep 17 00:00:00 2001
From: Florian Hermes <Florian.Hermes@vm-webhrv.dhclab.i.hpi.de>
Date: Mon, 14 Feb 2022 13:04:31 +0000
Subject: [PATCH 01/38] Add nonlinear feature extractions

---
 pypg/features.py | 129 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 129 insertions(+)

diff --git a/pypg/features.py b/pypg/features.py
index 47597d3..6d833a0 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -181,6 +181,135 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False):
         plt.show()
     return cycle_features
 
+
+def nonlinear(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
+    """
+    Extracts non-linear features from PPG cycles in a give PPG segment. Returns a pandas.DataFrame in
+    which each line contains the features for a given valid cycle (as described
+    by Li et al.) from the PPG segment given as input.
+
+    Parameters
+    ----------
+    ppg : pandas.Series, ndarray
+        The PPG signal.
+    sampling_frequency : int
+        The sampling frequency of the signal in Hz.
+    factor: float, optional
+        Number that is used to calculate the distance in relation to the
+        sampling_frequency, by default 0.667 (or 66.7%). The factor is based
+        on the paper by Elgendi et al. (2013).
+    unit : str, optional
+        The unit of the index, by default 'ms'.
+    verbose : bool, optional
+        If True, plots the features with and without outliers, by default False.
+
+    Raises
+    ----------
+    Exception
+        When PPG values are neither pandas.Series nor ndarray.
+
+    Returns
+    -------
+    segment_features : pd.DataFrame
+        A dataframe with the non-linear features in seconds for each valid
+        cycle in the PPG segment.
+    """
+    if isinstance(ppg, np.ndarray):
+        ppg = pd.Series(ppg)
+    elif not isinstance(ppg, pd.core.series.Series):
+        raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.')
+
+    cycles = find_with_template(ppg, sampling_frequency, factor=0.6667, verbose=verbose)
+    # all signal peaks
+    all_peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0]
+    if verbose:
+        print('All Peaks: ', all_peaks)
+    if len(cycles) == 0:
+        return pd.DataFrame()
+
+    cur_index = 0
+    segment_features = pd.DataFrame()
+    for i, cycle in enumerate(cycles):
+        segment_features = segment_features.append(nonlinear_cycle(cycle,
+                                         sampling_frequency, factor, unit, verbose=verbose),
+                                         ignore_index=True)
+
+    if verbose:
+        print('Cycle Features within Segment:')
+        print(segment_features)
+
+    # remove outliers
+    segment_features = _clean_segment_features_of_outliers(segment_features)
+
+    if verbose:
+        print('Cycle Features within Segment and no Outliers:')
+    return segment_features
+
+
+def nonlinear_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False):
+    """
+    Extracts nonlinear features for a PPG cycle. Returns a pandas.Series.
+
+    Parameters
+    ----------
+    ppg : pandas.Series, ndarray
+        The PPG signal.
+    sampling_frequency : int
+        The sampling frequency of the signal in Hz.
+    factor: float, optional
+        Number that is used to calculate the distance in relation to the
+        sampling_frequency, by default 0.667 (or 66.7%). The factor is based
+        on the paper by Elgendi et al. (2013).
+    unit : str, optional
+        The unit of the index, by default 'ms'.
+    verbose : bool, optional
+        If True, plots the features with and without outliers, by default False.
+
+    Raises
+    ----------
+    Exception
+        When PPG values are neither pandas.Series nor ndarray.
+
+    Returns
+    -------
+    cycle_features : pd.DataFrame
+        A dataframe with the nonlinear features in seconds for the PPG cycle.
+    """
+
+    if isinstance(ppg, np.ndarray):
+        ppg = pd.Series(ppg)
+    elif not isinstance(ppg, pd.core.series.Series):
+        raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.')
+
+    if not isinstance(ppg.index, pd.DatetimeIndex):
+        ppg.index = pd.to_datetime(ppg.index, unit=unit)
+
+    ppg = ppg.interpolate(method='time')
+    ppg = ppg - ppg.min()
+    ppg_cycle_duration = len(ppg)
+
+    peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0]
+    systolic_peak_index = peaks[0]
+
+    if verbose:
+        plt.figure()
+        plt.xlim((ppg.index.min(), ppg.index.max()))
+        plt.scatter(ppg.index[peaks], ppg[peaks])
+        plt.plot(ppg.index, ppg.values)
+
+    # features
+    cycle_features = pd.DataFrame({
+                        'ratio_WL_cycle_mean': (ppg_cycle_duration / np.mean(ppg)),
+                        'ratio_SUT_WL_DT': ((systolic_peak_index / ppg_cycle_duration) / (ppg_cycle_duration - systolic_peak_index)),
+                        'ratio_SUT_DT': (systolic_peak_index / (ppg_cycle_duration - systolic_peak_index)),
+                        'ratio_cycle_mean_cycle_var': (np.mean(ppg) / np.var(ppg))
+                        }, index=[0])
+
+    if verbose:
+        plt.show()
+    return cycle_features
+
+
 # takes a dataframe of calculated features and removes the outliers occurring due
 # to inaccuracies in the signal
 def _clean_segment_features_of_outliers(segment_df, treshold=0.8):

From 24a1889e4179d468d77bfe1ae7a1780671f1d197 Mon Sep 17 00:00:00 2001
From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com>
Date: Mon, 14 Feb 2022 16:53:52 +0000
Subject: [PATCH 02/38] Add statistical features extraction

---
 pypg/features.py | 128 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 127 insertions(+), 1 deletion(-)

diff --git a/pypg/features.py b/pypg/features.py
index 6d833a0..7058d7c 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -31,7 +31,7 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-from scipy import signal
+from scipy import signal, stats
 
 from .cycles import find_with_template
 
@@ -310,6 +310,132 @@ def nonlinear_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=Fa
     return cycle_features
 
 
+def statistical(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
+    """
+    Extracts statistical features from PPG cycles in a give PPG segment. Returns a pandas.DataFrame in
+    which each line contains the features for a given valid cycle (as described
+    by Li et al.) from the PPG segment given as input.
+
+    Parameters
+    ----------
+    ppg : pandas.Series, ndarray
+        The PPG signal.
+    sampling_frequency : int
+        The sampling frequency of the signal in Hz.
+    factor: float, optional
+        Number that is used to calculate the distance in relation to the
+        sampling_frequency, by default 0.667 (or 66.7%). The factor is based
+        on the paper by Elgendi et al. (2013).
+    unit : str, optional
+        The unit of the index, by default 'ms'.
+    verbose : bool, optional
+        If True, plots the features with and without outliers, by default False.
+
+    Raises
+    ----------
+    Exception
+        When PPG values are neither pandas.Series nor ndarray.
+
+    Returns
+    -------
+    segment_features : pd.DataFrame
+        A dataframe with the statistical features in seconds for each valid
+        cycle in the PPG segment.
+    """
+    if isinstance(ppg, np.ndarray):
+        ppg = pd.Series(ppg)
+    elif not isinstance(ppg, pd.core.series.Series):
+        raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.')
+
+    cycles = find_with_template(ppg, sampling_frequency, factor=0.6667, verbose=verbose)
+    # all signal peaks
+    all_peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0]
+    if verbose:
+        print('All Peaks: ', all_peaks)
+    if len(cycles) == 0:
+        return pd.DataFrame()
+
+    cur_index = 0
+    segment_features = pd.DataFrame()
+    for i, cycle in enumerate(cycles):
+        segment_features = segment_features.append(statistical_cycle(cycle,
+                                         sampling_frequency, factor, unit, verbose=verbose),
+                                         ignore_index=True)
+
+    if verbose:
+        print('Cycle Features within Segment:')
+        print(segment_features)
+
+    # remove outliers
+    segment_features = _clean_segment_features_of_outliers(segment_features)
+
+    if verbose:
+        print('Cycle Features within Segment and no Outliers:')
+    return segment_features
+
+
+def statistical_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False):
+    """
+    Extracts statistical features for a PPG cycle. Returns a pandas.Series.
+
+    Parameters
+    ----------
+    ppg : pandas.Series, ndarray
+        The PPG signal.
+    sampling_frequency : int
+        The sampling frequency of the signal in Hz.
+    factor: float, optional
+        Number that is used to calculate the distance in relation to the
+        sampling_frequency, by default 0.667 (or 66.7%). The factor is based
+        on the paper by Elgendi et al. (2013).
+    unit : str, optional
+        The unit of the index, by default 'ms'.
+    verbose : bool, optional
+        If True, plots the features with and without outliers, by default False.
+
+    Raises
+    ----------
+    Exception
+        When PPG values are neither pandas.Series nor ndarray.
+
+    Returns
+    -------
+    cycle_features : pd.DataFrame
+        A dataframe with the statistical features in seconds for the PPG cycle.
+    """
+
+    if isinstance(ppg, np.ndarray):
+        ppg = pd.Series(ppg)
+    elif not isinstance(ppg, pd.core.series.Series):
+        raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.')
+
+    if not isinstance(ppg.index, pd.DatetimeIndex):
+        ppg.index = pd.to_datetime(ppg.index, unit=unit)
+
+    ppg = ppg.interpolate(method='time')
+    ppg = ppg - ppg.min()
+
+    peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0]
+
+    if verbose:
+        plt.figure()
+        plt.xlim((ppg.index.min(), ppg.index.max()))
+        plt.scatter(ppg.index[peaks], ppg[peaks])
+        plt.plot(ppg.index, ppg.values)
+
+    # features
+    cycle_features = pd.DataFrame({
+                        'ppg_mean': (np.mean(ppg)),
+                        'ppg_var': (np.var(ppg)),
+                        'ppg_skewness': (stats.skew(ppg)),
+                        'ppg_kurtosis': (stats.kurtosis(ppg))
+                        }, index=[0])
+
+    if verbose:
+        plt.show()
+    return cycle_features
+
+
 # takes a dataframe of calculated features and removes the outliers occurring due
 # to inaccuracies in the signal
 def _clean_segment_features_of_outliers(segment_df, treshold=0.8):

From cb2eb310c4ba7d102a75487bdf58f0f2000dd34e Mon Sep 17 00:00:00 2001
From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com>
Date: Mon, 14 Feb 2022 18:27:44 +0000
Subject: [PATCH 03/38] Add sdppg features extraction

---
 pypg/features.py | 312 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 312 insertions(+)

diff --git a/pypg/features.py b/pypg/features.py
index 7058d7c..d44583c 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -436,6 +436,318 @@ def statistical_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=
     return cycle_features
 
 
+def sdppg(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
+    """
+    Extracts features from the second derivative (APG) of the PPG cycles in a give PPG segment. Returns a pandas.DataFrame in
+    which each line contains the features for a given valid cycle (as described
+    by Li et al.) from the PPG segment given as input.
+
+    Parameters
+    ----------
+    ppg : pandas.Series, ndarray
+        The PPG signal.
+    sampling_frequency : int
+        The sampling frequency of the signal in Hz.
+    factor: float, optional
+        Number that is used to calculate the distance in relation to the
+        sampling_frequency, by default 0.667 (or 66.7%). The factor is based
+        on the paper by Elgendi et al. (2013).
+    unit : str, optional
+        The unit of the index, by default 'ms'.
+    verbose : bool, optional
+        If True, plots the features with and without outliers, by default False.
+
+    Raises
+    ----------
+    Exception
+        When PPG values are neither pandas.Series nor ndarray.
+
+    Returns
+    -------
+    segment_features : pd.DataFrame
+        A dataframe with the features from the second derivative (APG) of the PPG cycles in seconds for each valid
+        cycle in the PPG segment.
+    """
+    if isinstance(ppg, np.ndarray):
+        ppg = pd.Series(ppg)
+    elif not isinstance(ppg, pd.core.series.Series):
+        raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.')
+
+    cycles = find_with_template(ppg, sampling_frequency, factor=0.6667, verbose=verbose)
+    # all signal peaks
+    all_peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0]
+    if verbose:
+        print('All Peaks: ', all_peaks)
+    if len(cycles) == 0:
+        return pd.DataFrame()
+
+    cur_index = 0
+    segment_features = pd.DataFrame()
+    for i, cycle in enumerate(cycles):
+        segment_features = segment_features.append(sdppg_cycle(cycle,
+                                         sampling_frequency, factor, unit, verbose=verbose),
+                                         ignore_index=True)
+
+    if verbose:
+        print('Cycle Features within Segment:')
+        print(segment_features)
+
+    # remove outliers
+    segment_features = _clean_segment_features_of_outliers(segment_features)
+
+    if verbose:
+        print('Cycle Features within Segment and no Outliers:')
+    return segment_features
+
+
+def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False):
+    """
+    Extracts features from the second derivative (APG) for the PPG cycles. Returns a pandas.Series.
+
+    Parameters
+    ----------
+    ppg : pandas.Series, ndarray
+        The PPG signal.
+    sampling_frequency : int
+        The sampling frequency of the signal in Hz.
+    factor: float, optional
+        Number that is used to calculate the distance in relation to the
+        sampling_frequency, by default 0.667 (or 66.7%). The factor is based
+        on the paper by Elgendi et al. (2013).
+    unit : str, optional
+        The unit of the index, by default 'ms'.
+    verbose : bool, optional
+        If True, plots the features with and without outliers, by default False.
+
+    Raises
+    ----------
+    Exception
+        When PPG values are neither pandas.Series nor ndarray.
+
+    Returns
+    -------
+    cycle_features : pd.DataFrame
+        A dataframe with the features from the second derivative (APG) in seconds for the PPG cycle.
+    """
+
+    if isinstance(ppg, np.ndarray):
+        ppg = pd.Series(ppg)
+    elif not isinstance(ppg, pd.core.series.Series):
+        raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.')
+
+    if not isinstance(ppg.index, pd.DatetimeIndex):
+        ppg.index = pd.to_datetime(ppg.index, unit=unit)
+
+    ppg = ppg.interpolate(method='time')
+    ppg = ppg - ppg.min()
+
+    peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0]
+    systolic_peak_index = peaks[0]
+
+    if verbose:
+        plt.figure()
+        plt.xlim((ppg.index.min(), ppg.index.max()))
+        plt.scatter(ppg.index[peaks], ppg[peaks])
+        plt.plot(ppg.index, ppg.values)
+
+    # second derviative of the PPG signal
+    sdppg_signal = np.gradient(np.gradient(ppg))
+
+    # features
+    # https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0076585
+    # https://www.researchgate.net/figure/Important-points-on-PPG-VPG-and-AVG-The-first-top-signal-is-an-epoch-of-PPG-The_fig4_339604879
+    # a is global maximum;
+    # b is the global minimum; (maybe get systolic_peak_index and get local minimum between a_index and systolic_peak_index)
+    # e and c are the local maximum;
+    # d is the local minimum in APG between e and c;
+    # D is local minimum in APG after e
+    # 
+    # and Gaurav Paper
+
+    # TODO: Can be improved!
+    # TODO: maybe add systolic peak index as reference for indices detection
+    
+    sdppg_signal_peaks, peak_dict = signal.find_peaks(sdppg_signal, height=0)
+    sdppg_signal_valleys, valley_dict = signal.find_peaks(-sdppg_signal, height=0)
+    
+    # a wave
+    if len(sdppg_signal_peaks) != 0:
+        
+        a_index = (sdppg_signal_peaks[np.argmax(peak_dict['peak_heights'])])
+        
+        ## subset signal + dictionary to only have left over indeces + magnitudes
+        old_len = len(sdppg_signal_peaks)
+        sdppg_signal_peaks = sdppg_signal_peaks[sdppg_signal_peaks > a_index[0]]
+        peak_dict['peak_heights'] = peak_dict['peak_heights'][old_len - len(sdppg_signal_peaks):]
+        
+        old_len = len(sdppg_signal_valleys)
+        sdppg_signal_valleys = sdppg_signal_valleys[sdppg_signal_valleys > a_index[0]]
+        valley_dict['peak_heights'] = valley_dict['peak_heights'][old_len - len(sdppg_signal_valleys):]
+    
+    # b wave
+    if len(sdppg_signal_valleys) != 0:
+        
+        b_index = (sdppg_signal_valleys[np.argmax(valley_dict['peak_heights'])])
+        
+        ## subset signal + dictionary to only have left over indeces + magnitudes
+        old_len = len(sdppg_signal_peaks)
+        sdppg_signal_peaks = sdppg_signal_peaks[sdppg_signal_peaks > b_index[0]]
+        peak_dict['peak_heights'] = peak_dict['peak_heights'][old_len - len(sdppg_signal_peaks):]
+        
+        old_len = len(sdppg_signal_valleys)
+        sdppg_signal_valleys = sdppg_signal_valleys[sdppg_signal_valleys > b_index[0]]
+        valley_dict['peak_heights'] = valley_dict['peak_heights'][old_len - len(sdppg_signal_valleys):]
+
+	# c and e wave
+    if len(sdppg_signal_peaks) >= 2:
+        
+        peaks = peak_dict['peak_heights'].argsort()[-2:]
+        
+        if len(peaks) == 2:
+            if peaks[0] < peaks[1]:
+                c_index = (sdppg_signal_peaks[peaks[0]])
+                e_index = (sdppg_signal_peaks[peaks[1]])
+            else:
+                e_index = (sdppg_signal_peaks[peaks[0]])
+                c_index = (sdppg_signal_peaks[peaks[1]])
+            
+            ## subset signal + dictionary to only have left over indeces + magnitudes
+            old_len = len(sdppg_signal_valleys)
+            sdppg_signal_valleys = sdppg_signal_valleys[sdppg_signal_valleys > c_index[0]]
+            valley_dict['peak_heights'] = valley_dict['peak_heights'][old_len - len(sdppg_signal_valleys):]
+    
+    # d wave
+    if len(sdppg_signal_valleys) >= 1 and (c_index or e_index):
+        
+        ## get only valleys between c and e
+        old_len = len(sdppg_signal_valleys)
+        sdppg_signal_valleys = sdppg_signal_valleys[sdppg_signal_valleys < e_index[0]]
+        
+        if old_len > len(sdppg_signal_valleys):
+            valley_dict['peak_heights'] = valley_dict['peak_heights'][:-(old_len - len(sdppg_signal_valleys))]
+        
+        if len(valley_dict['peak_heights']) != 0:
+            d_index = (sdppg_signal_valleys[valley_dict['peak_heights'].argmax()])
+    
+    ## SDPPG Signal values
+    ### a
+    if a_index:
+        a_val = (sdppg_signal[a_index])
+    
+    ### b
+    if b_index:
+        b_val = (sdppg_signal[b_index])
+    
+    ### c
+    if c_index:
+        c_val = (sdppg_signal[c_index])
+    
+    ### d
+    if d_index:
+        d_val = (sdppg_signal[d_index])
+    
+    ### e
+    if e_index:
+        e_val = (sdppg_signal[e_index])
+   
+    
+    if verbose:
+        plt.figure(figsize=(17,6))
+        plt.plot(sdppg_signal, label = 'SDPPG Signal')
+        plt.scatter(np.array([a_index, b_index, c_index, d_index, e_index]), np.array([a_val, b_val, c_val, d_val, e_val]), c='g', label='Representative Points')
+        plt.title('SDPPG Signal Wave Detection')
+        plt.xlabel('Time')
+        plt.ylabel('SDPPG')
+        plt.legend()
+    
+    
+    ## Time values of characteristics
+    ### Location of c with respect to time (AD)
+    if c_index:
+        AD = (c_index / sampling_frequency)
+    
+    ### Location of d with respect to time (AE)
+    if d_index:
+        AE = (d_index / sampling_frequency)
+    
+    ### Difference of peak and dicrotic notch with respect to time (CD)
+    if c_index and systolic_peak_index is not None:
+        CD = ((c_index - systolic_peak_index) / sampling_frequency)
+    
+    ### Difference of dicrotic notch and end of signal with respect to time (DF)
+    if c_index:
+        DF = ((len(sdppg_signal) - c_index)  / sampling_frequency)
+    
+    
+    ## PPG Signal values
+    ### Dicrotic notch (D')
+    if c_index:
+        D = (ppg[c_index])
+    
+    ### Diastolic point (E')
+        if d_index:
+            E = (ppg[d_index])
+    
+    
+    ## Ratios
+    ### First valley to the peak value of APG signal (b/a)
+    if b_val and a_val:
+        ratio_b_a = (b_val / a_val)
+    
+    ### Dicrotic notch to the peak value of APG signal (c/a)
+    if c_val and a_val:
+        ratio_c_a = (c_val / a_val)
+    
+    ### Diastolic point to the peak value of APG signal (d/a)
+    if d_val and a_val:
+        ratio_d_a = (d_val / a_val)
+    
+    ### e to the peak value of APG signal (e/a)
+    if e_val and a_val:
+        ratio_e_a = (e_val / a_val)
+    
+    ### Location of dicrotic notch with respect to the length of window (AD/AF)
+    if c_index:
+        ratio_AD_AF = (c_index / len(sdppg_signal))
+    
+    ### Difference of location of peak and dicrotic notch with respect to the length of window (CD/AF)
+    if c_index and systolic_peak_index is not None:
+        ratio_CD_AF = ((c_index - systolic_peak_index) / len(sdppg_signal))
+    
+    ### Location of diastolic point on PPG signal with respect to the length of window (AE/AF)
+    if d_index:
+        ratio_AE_AF = (d_index / len(sdppg_signal))
+    
+    ### Difference of dichrotic notch and end with  respect ot length of window (DF/AF)
+    if c_index:
+        ratio_DF_AF = ((len(sdppg_signal) - c_index) / len(sdppg_signal))
+
+    cycle_features = pd.DataFrame({
+                        'a_val': a_val,
+                        'b_val': b_val,
+                        'c_val': c_val,
+                        'd_val': d_val,
+                        'e_val': e_val,
+                        'AD': AD,
+                        'AE': AE,
+                        'CD': CD,
+                        'DF': DF,
+                        'D': D,
+                        'E': E,
+                        'ratio_b_a': ratio_b_a,
+                        'ratio_c_a': ratio_c_a,
+                        'ratio_d_a': ratio_d_a,
+                        'ratio_e_a': ratio_e_a,
+                        'ratio_AD_AF': ratio_AD_AF,
+                        'ratio_CD_AF': ratio_CD_AF,
+                        'ratio_AE_AF': ratio_AE_AF,
+                        'ratio_DF_AF': ratio_DF_AF,
+                        }, index=[0])
+
+    if verbose:
+        plt.show()
+    return cycle_features
+
 # takes a dataframe of calculated features and removes the outliers occurring due
 # to inaccuracies in the signal
 def _clean_segment_features_of_outliers(segment_df, treshold=0.8):

From 3bf1f2835d8cf7b7463b3ad156e533fc2d224a14 Mon Sep 17 00:00:00 2001
From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com>
Date: Mon, 14 Feb 2022 20:27:02 +0000
Subject: [PATCH 04/38] Add frequency features extraction

---
 pypg/features.py | 118 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 117 insertions(+), 1 deletion(-)

diff --git a/pypg/features.py b/pypg/features.py
index d44583c..36cb22e 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -31,7 +31,7 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-from scipy import signal, stats
+from scipy import signal, stats, interpolate
 
 from .cycles import find_with_template
 
@@ -748,6 +748,95 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
         plt.show()
     return cycle_features
 
+
+def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq, interval_size, verbose=False):
+    """
+    Extracts frequency features from PPG cycles in a give PPG segment. Returns a pandas.DataFrame in
+    which each line contains the features for a given valid cycle (as described
+    by Li et al.) from the PPG segment given as input.
+
+    Parameters
+    ----------
+    ppg : pandas.Series, ndarray
+        The PPG signal.
+    sampling_frequency : int
+        The sampling frequency of the signal in Hz.
+    verbose : bool, optional
+        If True, plots the features, by default False.
+
+    Raises
+    ----------
+    Exception
+        When PPG values are neither pandas.Series nor ndarray.
+
+    Returns
+    -------
+    segment_features : pd.DataFrame
+        A dataframe with the frequency features in seconds for each valid
+        cycle in the PPG segment.
+    """
+    if isinstance(ppg, np.ndarray):
+        ppg = pd.Series(ppg)
+    elif not isinstance(ppg, pd.core.series.Series):
+        raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.')
+
+     # Transform signal from time to frequency domain
+    freq, mag = _transformSigfromTimetoFreq(ppg, sampling_frequency, transformMethod, verbose)
+
+	# features
+    segment_features = pd.DataFrame()
+
+    # Cut off frequencies
+    freq_cut = freq[freq <= cutoff_freq]
+    mag_cut = mag[freq <= cutoff_freq]
+
+    if verbose:
+        plt.semilogy(freq_cut, mag_cut)
+        plt.title('CutOff')
+        plt.xlabel('Frequency')
+        plt.ylabel('mag')
+        plt.show()
+
+    ## -> Paper Wang
+    for start_index in np.arange(0, cutoff_freq, interval_size):
+        end_index = start_index + interval_size
+        segment_features.append({
+            'freqInt' + str(start_index) + '_' + str(end_index): 
+            np.nanmean(mag_cut[(freq_cut >= start_index) & (freq_cut < end_index)])}, 
+            ignore_index=True)
+
+    ## -> Paper Slapnicar
+    sorted_mag = mag_cut[np.argsort(mag_cut)[::-1]]
+    if len(sorted_mag) >= 3:
+        top3_psd = sorted_mag[:3]
+    else:
+        top3_psd = sorted_mag[:len(sorted_mag)]
+
+    for i in np.arange(0, len(top3_psd)):
+        segment_features.append({
+            'mag_top' + str(i+1):
+            top3_psd[i]},
+            ignore_index=True)
+
+    sorted_freq = freq_cut[np.argsort(mag_cut)[::-1]]
+    if len(sorted_freq) >= 3:
+        top3_freq = sorted_freq[:3]
+    else:
+        top3_freq = sorted_freq[:len(sorted_freq)]
+
+    for i in np.arange(0, len(top3_freq)):
+        segment_features.append({
+            'freq_top' + str(i+1):
+            top3_freq[i]},
+            ignore_index=True)
+
+    if verbose:
+        print('Cycle Features within Segment:')
+        print(segment_features)
+
+    return segment_features
+
+
 # takes a dataframe of calculated features and removes the outliers occurring due
 # to inaccuracies in the signal
 def _clean_segment_features_of_outliers(segment_df, treshold=0.8):
@@ -763,3 +852,30 @@ def _find_xs_for_y(y_s, y_val, sys_peak):
     x_1 = diffs[:sys_peak].idxmin()
     x_2 = diffs[sys_peak:].idxmin()
     return x_1, x_2
+
+# transforms a given temporal signal into the spectral domain
+# using different methods and allowing interpolation if required.
+# returns the frequencies and their magnitudes.
+def _transformSigfromTimetoFreq(signal, fs, transformMethod, fft_interpolation=None, verbose=False):
+
+	## TODO: Can maybe be improved?!
+
+	if fft_interpolation:
+		x = np.cumsum(signal)
+		f_interpol = interpolate.interp1d(x, signal, kind = "cubic", fill_value="extrapolate")
+		t_interpol = np.arange(x[0], x[-1], fft_interpolation/fs)
+		
+		signal = f_interpol(t_interpol)
+		fs = fft_interpolation
+
+	if transformMethod == 'welch':
+		freq, mag = signal.welch(signal, fs, window='hamming', scaling='density', detrend='linear')
+
+	if verbose:
+		plt.semilogy(freq, mag)
+		plt.title('Spectral Analysis using Welch Method')
+		plt.xlabel('Frequency')
+		plt.ylabel('PSD')
+		plt.show()
+
+	return freq, mag
\ No newline at end of file

From 6b1edf68ce9d0d4a93b2641b96aad327901cfc88 Mon Sep 17 00:00:00 2001
From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com>
Date: Tue, 15 Feb 2022 00:05:35 +0000
Subject: [PATCH 05/38] Add HRV features extraction

---
 pypg/features.py | 281 ++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 264 insertions(+), 17 deletions(-)

diff --git a/pypg/features.py b/pypg/features.py
index 36cb22e..e37a19b 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -28,9 +28,11 @@
 https://doi.org/10.1088/0967-3334/33/9/1491
 """
 
+from operator import index
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
+import nolds
 from scipy import signal, stats, interpolate
 
 from .cycles import find_with_template
@@ -837,6 +839,87 @@ def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq, interval_si
     return segment_features
 
 
+def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
+    """
+    Extracts hrv features from PPG cycles in a give PPG segment. 
+    Returns a pandas.DataFrame in which each line contains the features 
+    for a given valid cycle (as described by Li et al.) from the PPG segment given as input.
+
+    Parameters
+    ----------
+    ppg : pandas.Series, ndarray
+        The PPG signal.
+    sampling_frequency : int
+        The sampling frequency of the signal in Hz.
+    factor: float, optional
+        Number that is used to calculate the distance in relation to the
+        sampling_frequency, by default 0.667 (or 66.7%). The factor is based
+        on the paper by Elgendi et al. (2013).
+    unit : str, optional
+        The unit of the index, by default 'ms'.
+    verbose : bool, optional
+        If True, plots the features with and without outliers, by default False.
+
+    Raises
+    ----------
+    Exception
+        When PPG values are neither pandas.Series nor ndarray.
+
+    Returns
+    -------
+    segment_features : pd.DataFrame
+        A dataframe with the hrv-domain features in seconds for each valid
+        cycle in the PPG segment.
+    """
+    if isinstance(ppg, np.ndarray):
+        ppg = pd.Series(ppg)
+    elif not isinstance(ppg, pd.core.series.Series):
+        raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.')
+
+    cycles = find_with_template(ppg, sampling_frequency, factor=0.6667, verbose=verbose)
+    # all signal peaks
+    all_peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0]
+    if verbose:
+        print('All Peaks: ', all_peaks)
+    if len(cycles) == 0:
+        return pd.DataFrame()
+
+    segment_features = pd.DataFrame()
+
+    cur_index = 0
+    CP = pd.DataFrame()
+    for i, cycle in enumerate(cycles):
+        CP = CP.append(time_cycle(cycle, sampling_frequency,
+                                    factor, unit, verbose=verbose),
+                                    ignore_index=True)
+        if i > 0:
+            CP.loc[cur_index-1, 'CP'] = (
+                CP.loc[cur_index, 'sys_peak_ts'] - CP.loc[cur_index-1,
+                'sys_peak_ts']).total_seconds()
+        cur_index = cur_index + 1
+    # last cycle or only cycle need to relies on the difference between the general peaks
+    all_peaks_index = len(all_peaks)-1
+    CP.loc[cur_index-1, 'CP'] = (
+        all_peaks[all_peaks_index] - all_peaks[all_peaks_index-1])/sampling_frequency
+    
+    temporalHRVFeatures = _getTemporalHRVFeatures(CP['CP'])
+    frequencyHRVFeatures = _getFreqencyHRVFeatures(CP['CP'], sampling_frequency)
+
+    segment_features.append(temporalHRVFeatures)
+    segment_features.append(frequencyHRVFeatures)
+ 
+    if verbose:
+        print('Cycle Features within Segment:')
+        print(segment_features)
+
+    # remove outliers
+    segment_features = _clean_segment_features_of_outliers(segment_features)
+
+    if verbose:
+        print('Cycle Features within Segment and no Outliers:')
+    return segment_features
+
+
 # takes a dataframe of calculated features and removes the outliers occurring due
 # to inaccuracies in the signal
 def _clean_segment_features_of_outliers(segment_df, treshold=0.8):
@@ -858,24 +941,188 @@ def _find_xs_for_y(y_s, y_val, sys_peak):
 # returns the frequencies and their magnitudes.
 def _transformSigfromTimetoFreq(signal, fs, transformMethod, fft_interpolation=None, verbose=False):
 
-	## TODO: Can maybe be improved?!
+    ## TODO: Can maybe be improved?!
+    if fft_interpolation:
+        x = np.cumsum(signal)
+        f_interpol = interpolate.interp1d(x, signal, kind = "cubic", fill_value="extrapolate")
+        t_interpol = np.arange(x[0], x[-1], fft_interpolation/fs)
+        
+        signal = f_interpol(t_interpol)
+        fs = fft_interpolation
+    
+    if transformMethod == 'welch':
+        freq, mag = signal.welch(signal, fs, window='hamming', scaling='density', detrend='linear')
+    
+    if verbose:
+        plt.semilogy(freq, mag)
+        plt.title('Spectral Analysis using Welch Method')
+        plt.xlabel('Frequency')
+        plt.ylabel('PSD')
+        plt.show()
+    
+    return freq, mag
 
-	if fft_interpolation:
-		x = np.cumsum(signal)
-		f_interpol = interpolate.interp1d(x, signal, kind = "cubic", fill_value="extrapolate")
-		t_interpol = np.arange(x[0], x[-1], fft_interpolation/fs)
-		
-		signal = f_interpol(t_interpol)
-		fs = fft_interpolation
 
-	if transformMethod == 'welch':
-		freq, mag = signal.welch(signal, fs, window='hamming', scaling='density', detrend='linear')
+def _getTemporalHRVFeatures(signal):
+    
+    if isinstance(signal, np.ndarray):
+        ibi_series = pd.Series(signal)
+    elif not isinstance(signal, pd.core.series.Series):
+        raise Exception('Signal values not accepted, enter a pandas.Series or ndarray.')
+    
+    window = 5
+    nn_threshold = 50
+    
+    # Prepare data
+    instantaneous_hr = 60 / (ibi_series)
+    rolling_mean_hr = instantaneous_hr.rolling(window).mean()
+    rolling_24h = ibi_series.rolling(5)
+    
+    # Precalculate data for standard indexes
+    nn_diff = np.diff(ibi_series)
+    nn_xx = np.sum(np.abs(nn_diff) > nn_threshold)
+    
+    # Precalculate data for geometrical indeces
+    bin_size = 7.8125
+    hist_middle = (ibi_series.min() + ibi_series.max()) / 2
+    hist_bound_lower = hist_middle - bin_size * np.ceil((hist_middle - ibi_series.min()) / bin_size)
+    hist_length = int(np.ceil((ibi_series.max() - hist_bound_lower) / bin_size) + 1)
+    hist_bins = hist_bound_lower + np.arange(hist_length) * bin_size
+    hist, _ = np.histogram(ibi_series, hist_bins)
+    hist_height = np.max(hist)
+    
+    # Calculate TINN measure
+    hist_max_index = np.argmax(hist)
+    min_n = 0
+    min_m = len(hist) - 1
+    min_error = np.finfo(np.float64).max
+    # Ignore bins that do not contain any intervals
+    nonzero_indices = np.nonzero(hist)
+    for n in range(hist_max_index):
+        for m in reversed(range(hist_max_index + 1, len(hist))):
+            # Create triangular interpolation function for n and m
+            tri_interp = interpolate.interp1d(
+                [n, hist_max_index, m],
+                [0, hist[hist_max_index], 0],
+                bounds_error=False,
+                fill_value=0
+            )
+            # Square difference of histogram and triangle
+            error = np.trapz(
+                [(hist[t] - tri_interp(t)) ** 2 for t in nonzero_indices],
+                [hist_bins[t] for t in nonzero_indices]
+            )
+            if min_error > error:
+                min_n = n
+                min_m = m
+                min_error = error
+    n = hist_bins[min_n]
+    m = hist_bins[min_m]
+	
+	# Non-Linear Parameters
+    tolerance = ibi_series.std() * 0.2
+	
+    # features
+    temporalHRVFeatures = pd.DataFrame({
+                            'SampEn': float(nolds.sampen(ibi_series.to_numpy(), 2, tolerance)),
+                            'MeanNN': ibi_series.mean(),
+                            'MeanHR': instantaneous_hr.mean(),
+                            'MaxHR': rolling_mean_hr.max(),
+                            'MinHR': rolling_mean_hr.min(),
+                            'STDHR': instantaneous_hr.std(),
+                            'SDNN': np.std(ibi_series),
+                            'SDNNindex': rolling_24h.std().mean(),
+                            'SDANN': rolling_24h.mean().std(),
+                            'RMSSD': np.sqrt(np.mean(nn_diff ** 2)),
+                            f'NN{nn_threshold}': nn_xx,
+                            f'pNN{nn_threshold}': nn_xx / len(ibi_series) * 100,
+                            'HRVTriangularIndex': len(ibi_series) / hist_height,
+                            'TINN': m - n}, 
+                            index=[0])
+    
+    return temporalHRVFeatures
 
-	if verbose:
-		plt.semilogy(freq, mag)
-		plt.title('Spectral Analysis using Welch Method')
-		plt.xlabel('Frequency')
-		plt.ylabel('PSD')
-		plt.show()
 
-	return freq, mag
\ No newline at end of file
+def _getFreqencyHRVFeatures(signal, sampling_frequency):
+    
+    if isinstance(signal, np.ndarray):
+        ibi_series = pd.Series(signal)
+    elif not isinstance(signal, pd.core.series.Series):
+        raise Exception('Signal values not accepted, enter a pandas.Series or ndarray.')
+	
+	## TODO: HYPERPARAMETERS to config dict
+    fft_interpolation = 4.0
+    use_ulf = False
+    lomb_smoothing = 0.02
+    
+    vlf_limit = 0.04
+    lf_limit = 0.15
+    hf_limit = 0.4
+    
+    if not use_ulf or ibi_series.sum() < 300000: #TODO: check
+		# Do not use ULF band on sample shorter than 5 minutes
+        ulf_limit = 0
+    else:
+        ulf_limit = ulf_limit #TODO: ??????
+	
+	# TODO: export transformMethod
+    transformMethod='welch'
+    freq, mag = _transformSigfromTimetoFreq(ibi_series, sampling_frequency, transformMethod, fft_interpolation)
+    
+    abs_index = freq <= hf_limit
+    ulf_index = freq <= ulf_limit
+    vlf_index = (freq >= ulf_limit) & (freq <= vlf_limit)
+    lf_index = (freq >= vlf_limit) & (freq <= lf_limit)
+    hf_index = (freq >= lf_limit) & (freq <= hf_limit)
+	
+	# Get power for each band by integrating over spectral density
+    abs_power = np.trapz(mag[abs_index], freq[abs_index])
+    ulf = np.trapz(mag[ulf_index], freq[ulf_index])
+    vlf = np.trapz(mag[vlf_index], freq[vlf_index])
+    lf = np.trapz(mag[lf_index], freq[lf_index])
+    hf = np.trapz(mag[hf_index], freq[hf_index])
+	
+	# Normalized power for LF and HF band
+    lf_nu = lf / (abs_power - vlf - ulf) * 100
+    hf_nu = hf / (abs_power - vlf - ulf) * 100
+	
+	# Relative power of each band
+    ulf_perc = (ulf / abs_power) * 100
+    vlf_perc = (vlf / abs_power) * 100
+    lf_perc = (lf / abs_power) * 100
+    hf_perc = (hf / abs_power) * 100
+	
+	# Frequency with highest power
+    vlf_peak = freq[vlf_index][np.argmax(mag[vlf_index])]
+    lf_peak = freq[lf_index][np.argmax(mag[lf_index])]
+    hf_peak = freq[hf_index][np.argmax(mag[hf_index])]
+    
+    freqencyHRVFeatures = pd.DataFrame({
+                            'VLF_peak': vlf_peak,
+                            'VLF_power': vlf,
+                            'VLF_power_log': np.log(vlf),
+                            'VLF_power_rel': vlf_perc,
+                            'LF_peak': lf_peak,
+                            'LF_power': lf,
+                            'LF_power_log': np.log(lf),
+                            'LF_power_rel': lf_perc,
+                            'LF_power_norm': lf_nu,
+                            'HF_peak': hf_peak,
+                            'HF_power': hf,
+                            'HF_power_log': np.log(hf),
+                            'HF_power_rel': hf_perc,
+                            'HF_power_norm': hf_nu,
+                            'ratio_LF_HF': lf/hf}, 
+                            index=[0])
+    
+    # Add ULF parameters, if band available
+    if use_ulf and np.sum(ulf_index) > 0:
+        ulf_peak = freq[np.argmax(mag[ulf_index])]
+        freqencyHRVFeatures.append({
+                                'ULF_peak': ulf_peak,
+                                'ULF_power': ulf,
+                                'ULF_power_log': np.log(ulf),
+                                'ULF_power_rel': ulf_perc}, 
+                                index=[0])
+    
+    return freqencyHRVFeatures

From ea3fb03c09d98092cb6d12923f8d75563dc3769d Mon Sep 17 00:00:00 2001
From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com>
Date: Wed, 16 Feb 2022 12:04:11 +0000
Subject: [PATCH 06/38] Change names of methods; Clean-up

---
 pypg/features.py | 94 ++++++++++++++++++++++++------------------------
 1 file changed, 47 insertions(+), 47 deletions(-)

diff --git a/pypg/features.py b/pypg/features.py
index e37a19b..2e11ccf 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -902,8 +902,8 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
     CP.loc[cur_index-1, 'CP'] = (
         all_peaks[all_peaks_index] - all_peaks[all_peaks_index-1])/sampling_frequency
     
-    temporalHRVFeatures = _getTemporalHRVFeatures(CP['CP'])
-    frequencyHRVFeatures = _getFreqencyHRVFeatures(CP['CP'], sampling_frequency)
+    temporalHRVFeatures = temporal_hrv(CP['CP'])
+    frequencyHRVFeatures = frequency_hrv(CP['CP'], sampling_frequency)
 
     segment_features.append(temporalHRVFeatures)
     segment_features.append(frequencyHRVFeatures)
@@ -920,50 +920,7 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
     return segment_features
 
 
-# takes a dataframe of calculated features and removes the outliers occurring due
-# to inaccuracies in the signal
-def _clean_segment_features_of_outliers(segment_df, treshold=0.8):
-    quant = segment_df.quantile(treshold)
-    for col in segment_df.columns:
-        if col.find('ts') == -1:
-            segment_df = segment_df[segment_df[col] < quant[col]*2]
-    return segment_df
-
-# returns the x values for those samples in the signal, that are closest to some given y value
-def _find_xs_for_y(y_s, y_val, sys_peak):
-    diffs = abs(y_s - y_val)
-    x_1 = diffs[:sys_peak].idxmin()
-    x_2 = diffs[sys_peak:].idxmin()
-    return x_1, x_2
-
-# transforms a given temporal signal into the spectral domain
-# using different methods and allowing interpolation if required.
-# returns the frequencies and their magnitudes.
-def _transformSigfromTimetoFreq(signal, fs, transformMethod, fft_interpolation=None, verbose=False):
-
-    ## TODO: Can maybe be improved?!
-    if fft_interpolation:
-        x = np.cumsum(signal)
-        f_interpol = interpolate.interp1d(x, signal, kind = "cubic", fill_value="extrapolate")
-        t_interpol = np.arange(x[0], x[-1], fft_interpolation/fs)
-        
-        signal = f_interpol(t_interpol)
-        fs = fft_interpolation
-    
-    if transformMethod == 'welch':
-        freq, mag = signal.welch(signal, fs, window='hamming', scaling='density', detrend='linear')
-    
-    if verbose:
-        plt.semilogy(freq, mag)
-        plt.title('Spectral Analysis using Welch Method')
-        plt.xlabel('Frequency')
-        plt.ylabel('PSD')
-        plt.show()
-    
-    return freq, mag
-
-
-def _getTemporalHRVFeatures(signal):
+def temporal_hrv(signal):
     
     if isinstance(signal, np.ndarray):
         ibi_series = pd.Series(signal)
@@ -1043,7 +1000,7 @@ def _getTemporalHRVFeatures(signal):
     return temporalHRVFeatures
 
 
-def _getFreqencyHRVFeatures(signal, sampling_frequency):
+def frequency_hrv(signal, sampling_frequency):
     
     if isinstance(signal, np.ndarray):
         ibi_series = pd.Series(signal)
@@ -1126,3 +1083,46 @@ def _getFreqencyHRVFeatures(signal, sampling_frequency):
                                 index=[0])
     
     return freqencyHRVFeatures
+
+
+# takes a dataframe of calculated features and removes the outliers occurring due
+# to inaccuracies in the signal
+def _clean_segment_features_of_outliers(segment_df, treshold=0.8):
+    quant = segment_df.quantile(treshold)
+    for col in segment_df.columns:
+        if col.find('ts') == -1:
+            segment_df = segment_df[segment_df[col] < quant[col]*2]
+    return segment_df
+
+# returns the x values for those samples in the signal, that are closest to some given y value
+def _find_xs_for_y(y_s, y_val, sys_peak):
+    diffs = abs(y_s - y_val)
+    x_1 = diffs[:sys_peak].idxmin()
+    x_2 = diffs[sys_peak:].idxmin()
+    return x_1, x_2
+
+# transforms a given temporal signal into the spectral domain
+# using different methods and allowing interpolation if required.
+# returns the frequencies and their magnitudes.
+def _transformSigfromTimetoFreq(signal, fs, transformMethod, fft_interpolation=None, verbose=False):
+
+    ## TODO: Can maybe be improved?!
+    if fft_interpolation:
+        x = np.cumsum(signal)
+        f_interpol = interpolate.interp1d(x, signal, kind = "cubic", fill_value="extrapolate")
+        t_interpol = np.arange(x[0], x[-1], fft_interpolation/fs)
+        
+        signal = f_interpol(t_interpol)
+        fs = fft_interpolation
+    
+    if transformMethod == 'welch':
+        freq, mag = signal.welch(signal, fs, window='hamming', scaling='density', detrend='linear')
+    
+    if verbose:
+        plt.semilogy(freq, mag)
+        plt.title('Spectral Analysis using Welch Method')
+        plt.xlabel('Frequency')
+        plt.ylabel('PSD')
+        plt.show()
+    
+    return freq, mag
\ No newline at end of file

From 0bf014e786eb8be66512d925c100017009e46bee Mon Sep 17 00:00:00 2001
From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com>
Date: Wed, 16 Feb 2022 14:17:09 +0000
Subject: [PATCH 07/38] Adapt gitignore

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index 9e92fcc..5b335d9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -89,6 +89,8 @@ share/python-wheels/
 .installed.cfg
 *.egg
 MANIFEST
+Pipfile
+*.lock
 
 # PyInstaller
 #  Usually these files are written by a python script from a template

From e0367c4b078a650363403231711428e91a9bc7d5 Mon Sep 17 00:00:00 2001
From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com>
Date: Mon, 4 Apr 2022 15:58:54 +0000
Subject: [PATCH 08/38] review time features; add comments

---
 pypg/features.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/pypg/features.py b/pypg/features.py
index 2e11ccf..3453794 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -37,6 +37,7 @@
 
 from .cycles import find_with_template
 
+
 def time(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
     """
     Extracts time domain features from PPG cycles in a give PPG segment as
@@ -94,6 +95,7 @@ def time(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
                 segment_features.loc[cur_index, 'sys_peak_ts'] - segment_features.loc[cur_index-1,
                 'sys_peak_ts']).total_seconds()
         cur_index = cur_index + 1
+    
     # last cycle or only cycle need to relies on the difference between the general peaks
     all_peaks_index = len(all_peaks)-1
     segment_features.loc[cur_index-1, 'CP'] = (
@@ -148,14 +150,14 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False):
         raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.')
 
     if not isinstance(ppg.index, pd.DatetimeIndex):
-        ppg.index = pd.to_datetime(ppg.index, unit=unit)
+        ppg.index = pd.to_datetime(ppg.index, unit=unit) # ??? Sampling Frequncy considered ???
 
     ppg = ppg.interpolate(method='time')
-    ppg = ppg - ppg.min()
+    ppg = ppg - ppg.min() ## ??? SHOULDNT IT BE MEAN VALUE: ppg - ppg.mean() ???
     max_amplitude = ppg.max()
 
     peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0]
-    sys_peak_ts = ppg.index[peaks[0]]
+    sys_peak_ts = ppg.index[peaks[0]] # ??? ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks) ???
 
     if verbose:
         plt.figure()
@@ -168,8 +170,12 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False):
                         'start_ts': ppg.index.min(),
                         'sys_peak_ts': sys_peak_ts,
                         'SUT': (sys_peak_ts - ppg.index.min()).total_seconds(),
-                        'DT': (ppg.index.max() - sys_peak_ts).total_seconds()
+                        'DT': (ppg.index.max() - sys_peak_ts).total_seconds(),
+                        'BPM': (60 / (ppg.index.max() - ppg.index.min()).total_seconds()), # ??? CHECK KURYLAK DEF ???
+                        'CT': (ppg.index.max() - ppg.index.min()).total_seconds(),
+                        'SUT_VAL': (ppg.values[sys_peak_ts])
                         }, index=[0])
+
     for p_value in [10, 25, 33, 50, 66, 75]:
         p_ampl = p_value / 100 * max_amplitude
         x_1, x_2 = _find_xs_for_y(ppg, p_ampl, peaks[0])
@@ -181,6 +187,7 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False):
                                         x_2 - sys_peak_ts) / (sys_peak_ts - x_1)
     if verbose:
         plt.show()
+    
     return cycle_features
 
 

From 73e67831376377ce73c11491a80b7e695838e0aa Mon Sep 17 00:00:00 2001
From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com>
Date: Mon, 4 Apr 2022 16:15:36 +0000
Subject: [PATCH 09/38] review nonlinear features; add comments

---
 pypg/features.py | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/pypg/features.py b/pypg/features.py
index 3453794..f46f98d 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -7,8 +7,10 @@
 
 Features
 ----------
-time       - Extract features from the time domain for a PPG segment.
-time_cycle - Extract features from the time domain for a PPG cycle.
+time            - Extract features from the time domain for a PPG segment.
+time_cycle      - Extract features from the time domain for a PPG cycle.
+nonlinear       - Extract features from the time domain and compute non-linear relations for a PPG segment.
+nonlinear_cycle - Extract features from the time domain and compute non-linear relations for a PPG cycle.
 
 References
 ----------
@@ -220,7 +222,7 @@ def nonlinear(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
     Returns
     -------
     segment_features : pd.DataFrame
-        A dataframe with the non-linear features in seconds for each valid
+        A dataframe with the non-linear features for each valid
         cycle in the PPG segment.
     """
     if isinstance(ppg, np.ndarray):
@@ -252,8 +254,9 @@ def nonlinear(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
 
     if verbose:
         print('Cycle Features within Segment and no Outliers:')
-    return segment_features
+        print(segment_features)
 
+    return segment_features
 
 def nonlinear_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False):
     """
@@ -291,14 +294,14 @@ def nonlinear_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=Fa
         raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.')
 
     if not isinstance(ppg.index, pd.DatetimeIndex):
-        ppg.index = pd.to_datetime(ppg.index, unit=unit)
+        ppg.index = pd.to_datetime(ppg.index, unit=unit) # ??? Sampling Frequncy considered ???
 
     ppg = ppg.interpolate(method='time')
-    ppg = ppg - ppg.min()
-    ppg_cycle_duration = len(ppg)
+    ppg = ppg - ppg.min() ## ??? SHOULDNT IT BE MEAN VALUE: ppg - ppg.mean() ???
+    ppg_cycle_duration = (ppg.index.max() - ppg.index.min()).total_seconds()
 
     peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0]
-    systolic_peak_index = peaks[0]
+    sys_peak_ts = ppg.index[peaks[0]] # ??? ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks) ???
 
     if verbose:
         plt.figure()
@@ -308,14 +311,15 @@ def nonlinear_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=Fa
 
     # features
     cycle_features = pd.DataFrame({
-                        'ratio_WL_cycle_mean': (ppg_cycle_duration / np.mean(ppg)),
-                        'ratio_SUT_WL_DT': ((systolic_peak_index / ppg_cycle_duration) / (ppg_cycle_duration - systolic_peak_index)),
-                        'ratio_SUT_DT': (systolic_peak_index / (ppg_cycle_duration - systolic_peak_index)),
-                        'ratio_cycle_mean_cycle_var': (np.mean(ppg) / np.var(ppg))
+                        'ratio_WL_cycle_mean': (ppg_cycle_duration / np.mean(ppg.values)), # ??  WHERE DO DEFINITIONS COME FROM ??
+                        'ratio_SUT_WL_DT': ((sys_peak_ts / ppg_cycle_duration) / (ppg_cycle_duration - sys_peak_ts)),
+                        'ratio_SUT_DT': (sys_peak_ts / (ppg_cycle_duration - sys_peak_ts)),
+                        'ratio_cycle_mean_cycle_var': (np.mean(ppg.values) / np.var(ppg.values))
                         }, index=[0])
 
     if verbose:
         plt.show()
+
     return cycle_features
 
 

From 228408a87c90dee0e20fe036a6983f2c11d47df6 Mon Sep 17 00:00:00 2001
From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com>
Date: Mon, 4 Apr 2022 16:56:04 +0000
Subject: [PATCH 10/38] review sdppg features; add comments

---
 pypg/features.py | 80 ++++++++++++++++++++++++++----------------------
 1 file changed, 44 insertions(+), 36 deletions(-)

diff --git a/pypg/features.py b/pypg/features.py
index f46f98d..390d84a 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -7,10 +7,14 @@
 
 Features
 ----------
-time            - Extract features from the time domain for a PPG segment.
-time_cycle      - Extract features from the time domain for a PPG cycle.
-nonlinear       - Extract features from the time domain and compute non-linear relations for a PPG segment.
-nonlinear_cycle - Extract features from the time domain and compute non-linear relations for a PPG cycle.
+time             - Extract features from the time domain for a PPG segment.
+time_cycle       - Extract features from the time domain for a PPG cycle.
+nonlinear        - Extract features from the time domain computing non-linear relations for a PPG segment.
+nonlinear_cycle  - Extract features from the time domain computing non-linear relations for a PPG cycle.
+statistical      - Extract features from the time domain computing statistical values for a PPG segment.
+statistical_cyle - Extract features from the time domain computing statistical values for a PPG cycle.
+sdppg            - Extract features from the time domain's second derivative (SDPPG or APG) for a PPG segment.
+sdppg_cycle      - Extract features from the time domain's second derivative (SDPPG or APG) for a PPG cycle.
 
 References
 ----------
@@ -155,7 +159,7 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False):
         ppg.index = pd.to_datetime(ppg.index, unit=unit) # ??? Sampling Frequncy considered ???
 
     ppg = ppg.interpolate(method='time')
-    ppg = ppg - ppg.min() ## ??? SHOULDNT IT BE MEAN VALUE: ppg - ppg.mean() ???
+    ppg = ppg - ppg.min() ## ??? SHOULDNT IT BE MEAN VALUE: ppg - ppg.mean() ???  if we do it for each cycle individually features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values)
     max_amplitude = ppg.max()
 
     peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0]
@@ -285,7 +289,7 @@ def nonlinear_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=Fa
     Returns
     -------
     cycle_features : pd.DataFrame
-        A dataframe with the nonlinear features in seconds for the PPG cycle.
+        A dataframe with the nonlinear features for the PPG cycle.
     """
 
     if isinstance(ppg, np.ndarray):
@@ -297,7 +301,7 @@ def nonlinear_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=Fa
         ppg.index = pd.to_datetime(ppg.index, unit=unit) # ??? Sampling Frequncy considered ???
 
     ppg = ppg.interpolate(method='time')
-    ppg = ppg - ppg.min() ## ??? SHOULDNT IT BE MEAN VALUE: ppg - ppg.mean() ???
+    ppg = ppg - ppg.min() ## ??? SHOULDNT IT BE MEAN VALUE: ppg - ppg.mean() ???  if we do it for each cycle individually features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values)
     ppg_cycle_duration = (ppg.index.max() - ppg.index.min()).total_seconds()
 
     peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0]
@@ -352,7 +356,7 @@ def statistical(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False
     Returns
     -------
     segment_features : pd.DataFrame
-        A dataframe with the statistical features in seconds for each valid
+        A dataframe with the statistical features for each valid
         cycle in the PPG segment.
     """
     if isinstance(ppg, np.ndarray):
@@ -384,8 +388,9 @@ def statistical(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False
 
     if verbose:
         print('Cycle Features within Segment and no Outliers:')
-    return segment_features
+        print(segment_features)
 
+    return segment_features
 
 def statistical_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False):
     """
@@ -414,7 +419,7 @@ def statistical_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=
     Returns
     -------
     cycle_features : pd.DataFrame
-        A dataframe with the statistical features in seconds for the PPG cycle.
+        A dataframe with the statistical features for the PPG cycle.
     """
 
     if isinstance(ppg, np.ndarray):
@@ -423,10 +428,10 @@ def statistical_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=
         raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.')
 
     if not isinstance(ppg.index, pd.DatetimeIndex):
-        ppg.index = pd.to_datetime(ppg.index, unit=unit)
+        ppg.index = pd.to_datetime(ppg.index, unit=unit) # ??? Sampling Frequncy considered ???
 
     ppg = ppg.interpolate(method='time')
-    ppg = ppg - ppg.min()
+    ppg = ppg - ppg.min() ## ??? SHOULDNT IT BE MEAN VALUE: ppg - ppg.mean() ???  if we do it for each cycle individually features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values)
 
     peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0]
 
@@ -438,20 +443,21 @@ def statistical_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=
 
     # features
     cycle_features = pd.DataFrame({
-                        'ppg_mean': (np.mean(ppg)),
-                        'ppg_var': (np.var(ppg)),
-                        'ppg_skewness': (stats.skew(ppg)),
-                        'ppg_kurtosis': (stats.kurtosis(ppg))
+                        'ppg_mean': (np.mean(ppg.values)),
+                        'ppg_var': (np.var(ppg.values)),
+                        'ppg_skewness': (stats.skew(ppg.values)),
+                        'ppg_kurtosis': (stats.kurtosis(ppg.values))
                         }, index=[0])
 
     if verbose:
         plt.show()
+
     return cycle_features
 
 
 def sdppg(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
     """
-    Extracts features from the second derivative (APG) of the PPG cycles in a give PPG segment. Returns a pandas.DataFrame in
+    Extracts features from the second derivative (SDPPG or APG) of the PPG cycles in a give PPG segment. Returns a pandas.DataFrame in
     which each line contains the features for a given valid cycle (as described
     by Li et al.) from the PPG segment given as input.
 
@@ -478,7 +484,7 @@ def sdppg(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
     Returns
     -------
     segment_features : pd.DataFrame
-        A dataframe with the features from the second derivative (APG) of the PPG cycles in seconds for each valid
+        A dataframe with the features from the second derivative (SDPPG or APG) of the PPG cycles in seconds for each valid
         cycle in the PPG segment.
     """
     if isinstance(ppg, np.ndarray):
@@ -510,12 +516,13 @@ def sdppg(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
 
     if verbose:
         print('Cycle Features within Segment and no Outliers:')
-    return segment_features
+        print(segment_features)
 
+    return segment_features
 
 def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False):
     """
-    Extracts features from the second derivative (APG) for the PPG cycles. Returns a pandas.Series.
+    Extracts features from the second derivative (SDPPG or APG) for the PPG cycles. Returns a pandas.Series.
 
     Parameters
     ----------
@@ -540,7 +547,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
     Returns
     -------
     cycle_features : pd.DataFrame
-        A dataframe with the features from the second derivative (APG) in seconds for the PPG cycle.
+        A dataframe with the features from the second derivative (SDPPG or APG) in seconds for the PPG cycle.
     """
 
     if isinstance(ppg, np.ndarray):
@@ -549,13 +556,13 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
         raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.')
 
     if not isinstance(ppg.index, pd.DatetimeIndex):
-        ppg.index = pd.to_datetime(ppg.index, unit=unit)
+        ppg.index = pd.to_datetime(ppg.index, unit=unit) # ??? Sampling Frequncy considered ???
 
     ppg = ppg.interpolate(method='time')
-    ppg = ppg - ppg.min()
+    ppg = ppg - ppg.min() ## ??? SHOULDNT IT BE MEAN VALUE: ppg - ppg.mean() ???  if we do it for each cycle individually features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values)
 
     peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0]
-    systolic_peak_index = peaks[0]
+    sys_peak_ts = ppg.index[peaks[0]] # ??? ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks) ???
 
     if verbose:
         plt.figure()
@@ -564,9 +571,9 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
         plt.plot(ppg.index, ppg.values)
 
     # second derviative of the PPG signal
-    sdppg_signal = np.gradient(np.gradient(ppg))
+    sdppg_signal = np.gradient(np.gradient(ppg.values))
 
-    # features
+    # features !!! WRITE DOWN REFERENCE NICELY IN TOP DESCRIPTION !!!
     # https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0076585
     # https://www.researchgate.net/figure/Important-points-on-PPG-VPG-and-AVG-The-first-top-signal-is-an-epoch-of-PPG-The_fig4_339604879
     # a is global maximum;
@@ -588,7 +595,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
         
         a_index = (sdppg_signal_peaks[np.argmax(peak_dict['peak_heights'])])
         
-        ## subset signal + dictionary to only have left over indeces + magnitudes
+        ## subset signal + dictionary to exclude a index + magnitude
         old_len = len(sdppg_signal_peaks)
         sdppg_signal_peaks = sdppg_signal_peaks[sdppg_signal_peaks > a_index[0]]
         peak_dict['peak_heights'] = peak_dict['peak_heights'][old_len - len(sdppg_signal_peaks):]
@@ -602,7 +609,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
         
         b_index = (sdppg_signal_valleys[np.argmax(valley_dict['peak_heights'])])
         
-        ## subset signal + dictionary to only have left over indeces + magnitudes
+        ## subset signal + dictionary to exclude b index + magnitude
         old_len = len(sdppg_signal_peaks)
         sdppg_signal_peaks = sdppg_signal_peaks[sdppg_signal_peaks > b_index[0]]
         peak_dict['peak_heights'] = peak_dict['peak_heights'][old_len - len(sdppg_signal_peaks):]
@@ -624,7 +631,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
                 e_index = (sdppg_signal_peaks[peaks[0]])
                 c_index = (sdppg_signal_peaks[peaks[1]])
             
-            ## subset signal + dictionary to only have left over indeces + magnitudes
+            ## subset signal + dictionary to exlcude c and e indeces + magnitudes
             old_len = len(sdppg_signal_valleys)
             sdppg_signal_valleys = sdppg_signal_valleys[sdppg_signal_valleys > c_index[0]]
             valley_dict['peak_heights'] = valley_dict['peak_heights'][old_len - len(sdppg_signal_valleys):]
@@ -684,8 +691,8 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
         AE = (d_index / sampling_frequency)
     
     ### Difference of peak and dicrotic notch with respect to time (CD)
-    if c_index and systolic_peak_index is not None:
-        CD = ((c_index - systolic_peak_index) / sampling_frequency)
+    if c_index and sys_peak_ts is not None:
+        CD = ((c_index - sys_peak_ts) / sampling_frequency)
     
     ### Difference of dicrotic notch and end of signal with respect to time (DF)
     if c_index:
@@ -695,11 +702,11 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
     ## PPG Signal values
     ### Dicrotic notch (D')
     if c_index:
-        D = (ppg[c_index])
+        D = (ppg.values[c_index])
     
     ### Diastolic point (E')
-        if d_index:
-            E = (ppg[d_index])
+    if d_index:
+        E = (ppg.values[d_index])
     
     
     ## Ratios
@@ -724,8 +731,8 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
         ratio_AD_AF = (c_index / len(sdppg_signal))
     
     ### Difference of location of peak and dicrotic notch with respect to the length of window (CD/AF)
-    if c_index and systolic_peak_index is not None:
-        ratio_CD_AF = ((c_index - systolic_peak_index) / len(sdppg_signal))
+    if c_index and sys_peak_ts is not None:
+        ratio_CD_AF = ((c_index - sys_peak_ts) / len(sdppg_signal))
     
     ### Location of diastolic point on PPG signal with respect to the length of window (AE/AF)
     if d_index:
@@ -759,6 +766,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
 
     if verbose:
         plt.show()
+
     return cycle_features
 
 

From 286f0f37b745a4558beec88ea5cfa67c34940316 Mon Sep 17 00:00:00 2001
From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com>
Date: Mon, 4 Apr 2022 17:29:52 +0000
Subject: [PATCH 11/38] review frequency features; Add comments

---
 pypg/features.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/pypg/features.py b/pypg/features.py
index 390d84a..f618238 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -15,6 +15,7 @@
 statistical_cyle - Extract features from the time domain computing statistical values for a PPG cycle.
 sdppg            - Extract features from the time domain's second derivative (SDPPG or APG) for a PPG segment.
 sdppg_cycle      - Extract features from the time domain's second derivative (SDPPG or APG) for a PPG cycle.
+frequency        - Extract features from the frequency domain for a PPG segment.
 
 References
 ----------
@@ -770,7 +771,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
     return cycle_features
 
 
-def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq, interval_size, verbose=False):
+def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq = 12.5, interval_size = 0.5, verbose=False):
     """
     Extracts frequency features from PPG cycles in a give PPG segment. Returns a pandas.DataFrame in
     which each line contains the features for a given valid cycle (as described
@@ -782,6 +783,12 @@ def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq, interval_si
         The PPG signal.
     sampling_frequency : int
         The sampling frequency of the signal in Hz.
+    transformMethod : str
+        The method used for transforming the time signal to the frequency domain.
+    cutoff_freq : int
+        The frequency threshold used to exclude frequencies above threshold from analysis.
+    interval_size : int
+        The size of the interval used to split the frequency spectrum.
     verbose : bool, optional
         If True, plots the features, by default False.
 
@@ -855,6 +862,13 @@ def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq, interval_si
         print('Cycle Features within Segment:')
         print(segment_features)
 
+    # remove outliers
+    segment_features = _clean_segment_features_of_outliers(segment_features)
+
+    if verbose:
+        print('Cycle Features within Segment and no Outliers:')
+        print(segment_features)
+
     return segment_features
 
 

From ce2dcc77013f26fd99d77d0bb99492e36220bb1f Mon Sep 17 00:00:00 2001
From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com>
Date: Mon, 4 Apr 2022 17:30:26 +0000
Subject: [PATCH 12/38] Clean up

---
 pypg/features.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pypg/features.py b/pypg/features.py
index f618238..e6dceb4 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -771,7 +771,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
     return cycle_features
 
 
-def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq = 12.5, interval_size = 0.5, verbose=False):
+def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq=12.5, interval_size=0.5, verbose=False):
     """
     Extracts frequency features from PPG cycles in a give PPG segment. Returns a pandas.DataFrame in
     which each line contains the features for a given valid cycle (as described

From c4877a7df16f7d33711c4a1dcb6dec24ec25b2d6 Mon Sep 17 00:00:00 2001
From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com>
Date: Mon, 4 Apr 2022 17:45:59 +0000
Subject: [PATCH 13/38] review hrv features; add comments

---
 pypg/features.py | 34 ++++++++++++++++------------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/pypg/features.py b/pypg/features.py
index e6dceb4..1af30f3 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -16,6 +16,7 @@
 sdppg            - Extract features from the time domain's second derivative (SDPPG or APG) for a PPG segment.
 sdppg_cycle      - Extract features from the time domain's second derivative (SDPPG or APG) for a PPG cycle.
 frequency        - Extract features from the frequency domain for a PPG segment.
+hrv              - Extract features from the computed Heart-Rate-Variability signal for a PPG segment.
 
 References
 ----------
@@ -874,9 +875,9 @@ def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq=12.5, interv
 
 def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
     """
-    Extracts hrv features from PPG cycles in a give PPG segment. 
+    Extracts hrv features from a given PPG segment. 
     Returns a pandas.DataFrame in which each line contains the features 
-    for a given valid cycle (as described by Li et al.) from the PPG segment given as input.
+    from the PPG segment given as input.
 
     Parameters
     ----------
@@ -901,8 +902,7 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
     Returns
     -------
     segment_features : pd.DataFrame
-        A dataframe with the hrv-domain features in seconds for each valid
-        cycle in the PPG segment.
+        A dataframe with the hrv-domain features in seconds for the PPG segment.
     """
     if isinstance(ppg, np.ndarray):
         ppg = pd.Series(ppg)
@@ -917,8 +917,6 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
     if len(cycles) == 0:
         return pd.DataFrame()
 
-    segment_features = pd.DataFrame()
-
     cur_index = 0
     CP = pd.DataFrame()
     for i, cycle in enumerate(cycles):
@@ -927,19 +925,17 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
                                     ignore_index=True)
         if i > 0:
             CP.loc[cur_index-1, 'CP'] = (
-                CP.loc[cur_index, 'sys_peak_ts'] - CP.loc[cur_index-1,
-                'sys_peak_ts']).total_seconds()
+                CP.loc[cur_index, 'sys_peak_ts'] - CP.loc[cur_index-1, 'sys_peak_ts']).total_seconds()
         cur_index = cur_index + 1
     # last cycle or only cycle need to relies on the difference between the general peaks
     all_peaks_index = len(all_peaks)-1
     CP.loc[cur_index-1, 'CP'] = (
         all_peaks[all_peaks_index] - all_peaks[all_peaks_index-1])/sampling_frequency
     
-    temporalHRVFeatures = temporal_hrv(CP['CP'])
-    frequencyHRVFeatures = frequency_hrv(CP['CP'], sampling_frequency)
+    temporalHRVFeatures = _temporal_hrv(CP['CP'])
+    frequencyHRVFeatures = _frequency_hrv(CP['CP'], sampling_frequency)
 
-    segment_features.append(temporalHRVFeatures)
-    segment_features.append(frequencyHRVFeatures)
+    segment_features = pd.concat([temporalHRVFeatures.reset_index(drop=True), frequencyHRVFeatures], axis=1)
  
     if verbose:
         print('Cycle Features within Segment:')
@@ -950,14 +946,17 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
 
     if verbose:
         print('Cycle Features within Segment and no Outliers:')
+        print(segment_features)
+
     return segment_features
 
 
-def temporal_hrv(signal):
+# compute temporal and frequency features of HRV
+def _temporal_hrv(ibi_series):
     
-    if isinstance(signal, np.ndarray):
-        ibi_series = pd.Series(signal)
-    elif not isinstance(signal, pd.core.series.Series):
+    if isinstance(ibi_series, np.ndarray):
+        ibi_series = pd.Series(ibi_series)
+    elif not isinstance(ibi_series, pd.core.series.Series):
         raise Exception('Signal values not accepted, enter a pandas.Series or ndarray.')
     
     window = 5
@@ -1032,8 +1031,7 @@ def temporal_hrv(signal):
     
     return temporalHRVFeatures
 
-
-def frequency_hrv(signal, sampling_frequency):
+def _frequency_hrv(signal, sampling_frequency):
     
     if isinstance(signal, np.ndarray):
         ibi_series = pd.Series(signal)

From 7e45e9ab88275b47c7a45b05e71642d115d3e1a9 Mon Sep 17 00:00:00 2001
From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com>
Date: Sun, 10 Apr 2022 17:03:51 +0000
Subject: [PATCH 14/38] Double check code and adapt according to tests on
 PPG-BP dataset

---
 pypg/features.py | 185 +++++++++++++++++++++++++----------------------
 1 file changed, 97 insertions(+), 88 deletions(-)

diff --git a/pypg/features.py b/pypg/features.py
index 1af30f3..e9a701e 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -158,14 +158,14 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False):
         raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.')
 
     if not isinstance(ppg.index, pd.DatetimeIndex):
-        ppg.index = pd.to_datetime(ppg.index, unit=unit) # ??? Sampling Frequncy considered ???
+        ppg.index = pd.to_datetime(ppg.index, unit=unit) # TODO: @Ari: Sampling Frequncy considered???
 
     ppg = ppg.interpolate(method='time')
-    ppg = ppg - ppg.min() ## ??? SHOULDNT IT BE MEAN VALUE: ppg - ppg.mean() ???  if we do it for each cycle individually features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values)
+    ppg = ppg - ppg.min() # TODO: @Ari: If we do it for each cycle seperately, features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values)
     max_amplitude = ppg.max()
 
     peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0]
-    sys_peak_ts = ppg.index[peaks[0]] # ??? ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks) ???
+    sys_peak_ts = ppg.index[peaks[0]] # TODO: @Ari: ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks e.g. check peak height dictionary?)
 
     if verbose:
         plt.figure()
@@ -179,9 +179,9 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False):
                         'sys_peak_ts': sys_peak_ts,
                         'SUT': (sys_peak_ts - ppg.index.min()).total_seconds(),
                         'DT': (ppg.index.max() - sys_peak_ts).total_seconds(),
-                        'BPM': (60 / (ppg.index.max() - ppg.index.min()).total_seconds()), # ??? CHECK KURYLAK DEF ???
+                        'BPM': (60 / (ppg.index.max() - ppg.index.min()).total_seconds()), # TODO: CHECK DEF! @Ari: even use? makes sense?
                         'CT': (ppg.index.max() - ppg.index.min()).total_seconds(),
-                        'SUT_VAL': (ppg.values[sys_peak_ts])
+                        'SUT_VAL': (ppg.values[peaks[0]])
                         }, index=[0])
 
     for p_value in [10, 25, 33, 50, 66, 75]:
@@ -191,8 +191,8 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False):
             plt.scatter([x_1, x_2], ppg[[x_1, x_2]])
         cycle_features.loc[0, 'DW_'+str(p_value)] = (x_2 - sys_peak_ts).total_seconds()
         cycle_features.loc[0, 'DW_SW_sum_'+str(p_value)] = (x_2 - x_1).total_seconds()
-        cycle_features.loc[0, 'DW_SW_ratio_'+str(p_value)] = (
-                                        x_2 - sys_peak_ts) / (sys_peak_ts - x_1)
+        cycle_features.loc[0, 'DW_SW_ratio_'+str(p_value)] = (x_2 - sys_peak_ts) / (sys_peak_ts - x_1)
+    
     if verbose:
         plt.show()
     
@@ -300,14 +300,15 @@ def nonlinear_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=Fa
         raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.')
 
     if not isinstance(ppg.index, pd.DatetimeIndex):
-        ppg.index = pd.to_datetime(ppg.index, unit=unit) # ??? Sampling Frequncy considered ???
+        ppg.index = pd.to_datetime(ppg.index, unit=unit) # TODO: @Ari: Sampling Frequncy considered???
 
     ppg = ppg.interpolate(method='time')
-    ppg = ppg - ppg.min() ## ??? SHOULDNT IT BE MEAN VALUE: ppg - ppg.mean() ???  if we do it for each cycle individually features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values)
+    ppg = ppg - ppg.min() # TODO: @Ari: If we do it for each cycle seperately, features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values)
     ppg_cycle_duration = (ppg.index.max() - ppg.index.min()).total_seconds()
 
     peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0]
-    sys_peak_ts = ppg.index[peaks[0]] # ??? ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks) ???
+    sys_peak_ts = ppg.index[peaks[0]] # TODO: @Ari: ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks e.g. check peak height dictionary?)
+    sys_peak = (sys_peak_ts - ppg.index.min()).total_seconds()
 
     if verbose:
         plt.figure()
@@ -317,9 +318,9 @@ def nonlinear_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=Fa
 
     # features
     cycle_features = pd.DataFrame({
-                        'ratio_WL_cycle_mean': (ppg_cycle_duration / np.mean(ppg.values)), # ??  WHERE DO DEFINITIONS COME FROM ??
-                        'ratio_SUT_WL_DT': ((sys_peak_ts / ppg_cycle_duration) / (ppg_cycle_duration - sys_peak_ts)),
-                        'ratio_SUT_DT': (sys_peak_ts / (ppg_cycle_duration - sys_peak_ts)),
+                        'ratio_WL_cycle_mean': (ppg_cycle_duration / np.mean(ppg.values)), # TODO: Add definition to description
+                        'ratio_SUT_WL_DT': ((sys_peak / ppg_cycle_duration) / (ppg_cycle_duration - sys_peak)),
+                        'ratio_SUT_DT': (sys_peak / (ppg_cycle_duration - sys_peak)),
                         'ratio_cycle_mean_cycle_var': (np.mean(ppg.values) / np.var(ppg.values))
                         }, index=[0])
 
@@ -430,10 +431,10 @@ def statistical_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=
         raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.')
 
     if not isinstance(ppg.index, pd.DatetimeIndex):
-        ppg.index = pd.to_datetime(ppg.index, unit=unit) # ??? Sampling Frequncy considered ???
+        ppg.index = pd.to_datetime(ppg.index, unit=unit) # TODO: @Ari: Sampling Frequncy considered???
 
     ppg = ppg.interpolate(method='time')
-    ppg = ppg - ppg.min() ## ??? SHOULDNT IT BE MEAN VALUE: ppg - ppg.mean() ???  if we do it for each cycle individually features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values)
+    ppg = ppg - ppg.min() # TODO: @Ari: If we do it for each cycle seperately, features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values)
 
     peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0]
 
@@ -558,13 +559,14 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
         raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.')
 
     if not isinstance(ppg.index, pd.DatetimeIndex):
-        ppg.index = pd.to_datetime(ppg.index, unit=unit) # ??? Sampling Frequncy considered ???
+        ppg.index = pd.to_datetime(ppg.index, unit=unit) # TODO: @Ari: Sampling Frequncy considered???
 
     ppg = ppg.interpolate(method='time')
-    ppg = ppg - ppg.min() ## ??? SHOULDNT IT BE MEAN VALUE: ppg - ppg.mean() ???  if we do it for each cycle individually features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values)
+    ppg = ppg - ppg.min() # TODO: @Ari: If we do it for each cycle seperately, features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values)
 
     peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0]
-    sys_peak_ts = ppg.index[peaks[0]] # ??? ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks) ???
+    sys_peak_ts = ppg.index[peaks[0]] # TODO: @Ari: ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks e.g. check peak height dictionary?)
+    sys_peak = (sys_peak_ts - ppg.index.min()).total_seconds()
 
     if verbose:
         plt.figure()
@@ -575,7 +577,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
     # second derviative of the PPG signal
     sdppg_signal = np.gradient(np.gradient(ppg.values))
 
-    # features !!! WRITE DOWN REFERENCE NICELY IN TOP DESCRIPTION !!!
+    # TODO: features !!! WRITE DOWN REFERENCE NICELY IN TOP DESCRIPTION !!!
     # https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0076585
     # https://www.researchgate.net/figure/Important-points-on-PPG-VPG-and-AVG-The-first-top-signal-is-an-epoch-of-PPG-The_fig4_339604879
     # a is global maximum;
@@ -586,11 +588,18 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
     # 
     # and Gaurav Paper
 
-    # TODO: Can be improved!
     # TODO: maybe add systolic peak index as reference for indices detection
     
     sdppg_signal_peaks, peak_dict = signal.find_peaks(sdppg_signal, height=0)
     sdppg_signal_valleys, valley_dict = signal.find_peaks(-sdppg_signal, height=0)
+
+    # initialize SDPPG indices and variables
+    a_index = b_index = c_index = d_index = e_index = None
+    a_val = b_val = c_val = d_val = e_val = None
+    AD = AE = CD = DF = D = E = None
+    ratio_b_a = ratio_c_a = ratio_d_a = ratio_e_a = None
+    ratio_AD_AF = ratio_CD_AF = ratio_AE_AF = ratio_DF_AF = None
+
     
     # a wave
     if len(sdppg_signal_peaks) != 0:
@@ -599,11 +608,11 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
         
         ## subset signal + dictionary to exclude a index + magnitude
         old_len = len(sdppg_signal_peaks)
-        sdppg_signal_peaks = sdppg_signal_peaks[sdppg_signal_peaks > a_index[0]]
+        sdppg_signal_peaks = sdppg_signal_peaks[sdppg_signal_peaks > a_index]
         peak_dict['peak_heights'] = peak_dict['peak_heights'][old_len - len(sdppg_signal_peaks):]
         
         old_len = len(sdppg_signal_valleys)
-        sdppg_signal_valleys = sdppg_signal_valleys[sdppg_signal_valleys > a_index[0]]
+        sdppg_signal_valleys = sdppg_signal_valleys[sdppg_signal_valleys > a_index]
         valley_dict['peak_heights'] = valley_dict['peak_heights'][old_len - len(sdppg_signal_valleys):]
     
     # b wave
@@ -613,14 +622,14 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
         
         ## subset signal + dictionary to exclude b index + magnitude
         old_len = len(sdppg_signal_peaks)
-        sdppg_signal_peaks = sdppg_signal_peaks[sdppg_signal_peaks > b_index[0]]
+        sdppg_signal_peaks = sdppg_signal_peaks[sdppg_signal_peaks > b_index]
         peak_dict['peak_heights'] = peak_dict['peak_heights'][old_len - len(sdppg_signal_peaks):]
         
         old_len = len(sdppg_signal_valleys)
-        sdppg_signal_valleys = sdppg_signal_valleys[sdppg_signal_valleys > b_index[0]]
+        sdppg_signal_valleys = sdppg_signal_valleys[sdppg_signal_valleys > b_index]
         valley_dict['peak_heights'] = valley_dict['peak_heights'][old_len - len(sdppg_signal_valleys):]
-
-	# c and e wave
+    
+    # c and e wave
     if len(sdppg_signal_peaks) >= 2:
         
         peaks = peak_dict['peak_heights'].argsort()[-2:]
@@ -635,7 +644,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
             
             ## subset signal + dictionary to exlcude c and e indeces + magnitudes
             old_len = len(sdppg_signal_valleys)
-            sdppg_signal_valleys = sdppg_signal_valleys[sdppg_signal_valleys > c_index[0]]
+            sdppg_signal_valleys = sdppg_signal_valleys[sdppg_signal_valleys > c_index]
             valley_dict['peak_heights'] = valley_dict['peak_heights'][old_len - len(sdppg_signal_valleys):]
     
     # d wave
@@ -643,7 +652,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
         
         ## get only valleys between c and e
         old_len = len(sdppg_signal_valleys)
-        sdppg_signal_valleys = sdppg_signal_valleys[sdppg_signal_valleys < e_index[0]]
+        sdppg_signal_valleys = sdppg_signal_valleys[sdppg_signal_valleys < e_index]
         
         if old_len > len(sdppg_signal_valleys):
             valley_dict['peak_heights'] = valley_dict['peak_heights'][:-(old_len - len(sdppg_signal_valleys))]
@@ -651,6 +660,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
         if len(valley_dict['peak_heights']) != 0:
             d_index = (sdppg_signal_valleys[valley_dict['peak_heights'].argmax()])
     
+
     ## SDPPG Signal values
     ### a
     if a_index:
@@ -693,8 +703,8 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
         AE = (d_index / sampling_frequency)
     
     ### Difference of peak and dicrotic notch with respect to time (CD)
-    if c_index and sys_peak_ts is not None:
-        CD = ((c_index - sys_peak_ts) / sampling_frequency)
+    if c_index and sys_peak:
+        CD = ((c_index - sys_peak) / sampling_frequency)
     
     ### Difference of dicrotic notch and end of signal with respect to time (DF)
     if c_index:
@@ -713,19 +723,19 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
     
     ## Ratios
     ### First valley to the peak value of APG signal (b/a)
-    if b_val and a_val:
+    if b_index and a_index:
         ratio_b_a = (b_val / a_val)
     
     ### Dicrotic notch to the peak value of APG signal (c/a)
-    if c_val and a_val:
+    if c_index and a_index:
         ratio_c_a = (c_val / a_val)
     
     ### Diastolic point to the peak value of APG signal (d/a)
-    if d_val and a_val:
+    if d_index and a_index:
         ratio_d_a = (d_val / a_val)
     
     ### e to the peak value of APG signal (e/a)
-    if e_val and a_val:
+    if e_index and a_index:
         ratio_e_a = (e_val / a_val)
     
     ### Location of dicrotic notch with respect to the length of window (AD/AF)
@@ -733,8 +743,8 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
         ratio_AD_AF = (c_index / len(sdppg_signal))
     
     ### Difference of location of peak and dicrotic notch with respect to the length of window (CD/AF)
-    if c_index and sys_peak_ts is not None:
-        ratio_CD_AF = ((c_index - sys_peak_ts) / len(sdppg_signal))
+    if c_index and sys_peak:
+        ratio_CD_AF = ((c_index - sys_peak) / len(sdppg_signal))
     
     ### Location of diastolic point on PPG signal with respect to the length of window (AE/AF)
     if d_index:
@@ -745,7 +755,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
         ratio_DF_AF = ((len(sdppg_signal) - c_index) / len(sdppg_signal))
 
     cycle_features = pd.DataFrame({
-                        'a_val': a_val,
+                        'a_val': a_val, # TODO: @Ari: What to put if NaN, i.e. not able to detect in cycle due to bad quality
                         'b_val': b_val,
                         'c_val': c_val,
                         'd_val': d_val,
@@ -772,7 +782,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
     return cycle_features
 
 
-def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq=12.5, interval_size=0.5, verbose=False):
+def frequency(ppg, sampling_frequency, transformMethod='welch', cutoff_freq=125.0, interval_size=5, verbose=False):
     """
     Extracts frequency features from PPG cycles in a give PPG segment. Returns a pandas.DataFrame in
     which each line contains the features for a given valid cycle (as described
@@ -810,9 +820,9 @@ def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq=12.5, interv
         raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.')
 
      # Transform signal from time to frequency domain
-    freq, mag = _transformSigfromTimetoFreq(ppg, sampling_frequency, transformMethod, verbose)
+    freq, mag = _transformSigfromTimetoFreq(ppg.values, sampling_frequency, transformMethod=transformMethod, verbose=verbose)
 
-	# features
+    # features
     segment_features = pd.DataFrame()
 
     # Cut off frequencies
@@ -823,18 +833,17 @@ def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq=12.5, interv
         plt.semilogy(freq_cut, mag_cut)
         plt.title('CutOff')
         plt.xlabel('Frequency')
-        plt.ylabel('mag')
+        plt.xlim((freq_cut.min(), freq_cut.max()))
+        plt.ylim((mag_cut.min(), mag_cut.max()))
+        plt.ylabel('Magnitude')
         plt.show()
 
-    ## -> Paper Wang
+    ## -> Paper Wang # TODO: Add description to the top
     for start_index in np.arange(0, cutoff_freq, interval_size):
         end_index = start_index + interval_size
-        segment_features.append({
-            'freqInt' + str(start_index) + '_' + str(end_index): 
-            np.nanmean(mag_cut[(freq_cut >= start_index) & (freq_cut < end_index)])}, 
-            ignore_index=True)
+        segment_features.loc[0, 'freqInt' + str(start_index) + '_' + str(end_index)] = np.nanmean(mag_cut[(freq_cut >= start_index) & (freq_cut < end_index)])
 
-    ## -> Paper Slapnicar
+    ## -> Paper Slapnicar # TODO: Add description to the top
     sorted_mag = mag_cut[np.argsort(mag_cut)[::-1]]
     if len(sorted_mag) >= 3:
         top3_psd = sorted_mag[:3]
@@ -842,10 +851,7 @@ def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq=12.5, interv
         top3_psd = sorted_mag[:len(sorted_mag)]
 
     for i in np.arange(0, len(top3_psd)):
-        segment_features.append({
-            'mag_top' + str(i+1):
-            top3_psd[i]},
-            ignore_index=True)
+        segment_features.loc[0, 'mag_top' + str(i+1)] = top3_psd[i]
 
     sorted_freq = freq_cut[np.argsort(mag_cut)[::-1]]
     if len(sorted_freq) >= 3:
@@ -854,15 +860,13 @@ def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq=12.5, interv
         top3_freq = sorted_freq[:len(sorted_freq)]
 
     for i in np.arange(0, len(top3_freq)):
-        segment_features.append({
-            'freq_top' + str(i+1):
-            top3_freq[i]},
-            ignore_index=True)
+        segment_features.loc[0, 'freq_top' + str(i+1)] = top3_freq[i]
 
     if verbose:
         print('Cycle Features within Segment:')
         print(segment_features)
 
+    # TODO: @Ari: OTHER CLEANING METRIC FOR HRV > computes only one value per window, therefore quantile doesnt make sense
     # remove outliers
     segment_features = _clean_segment_features_of_outliers(segment_features)
 
@@ -918,22 +922,28 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
         return pd.DataFrame()
 
     cur_index = 0
-    CP = pd.DataFrame()
+    time_features = pd.DataFrame()
     for i, cycle in enumerate(cycles):
-        CP = CP.append(time_cycle(cycle, sampling_frequency,
+        time_features = time_features.append(time_cycle(cycle, sampling_frequency,
                                     factor, unit, verbose=verbose),
                                     ignore_index=True)
         if i > 0:
-            CP.loc[cur_index-1, 'CP'] = (
-                CP.loc[cur_index, 'sys_peak_ts'] - CP.loc[cur_index-1, 'sys_peak_ts']).total_seconds()
+            time_features.loc[cur_index-1, 'CP'] = (
+                time_features.loc[cur_index, 'sys_peak_ts'] - time_features.loc[cur_index-1, 'sys_peak_ts']).total_seconds()
         cur_index = cur_index + 1
     # last cycle or only cycle need to relies on the difference between the general peaks
     all_peaks_index = len(all_peaks)-1
-    CP.loc[cur_index-1, 'CP'] = (
+    time_features.loc[cur_index-1, 'CP'] = (
         all_peaks[all_peaks_index] - all_peaks[all_peaks_index-1])/sampling_frequency
     
-    temporalHRVFeatures = _temporal_hrv(CP['CP'])
-    frequencyHRVFeatures = _frequency_hrv(CP['CP'], sampling_frequency)
+    ## FOR TESTING WITH PPG-BP DATASET:
+    #ibi_series = time_features['CP'].append(pd.Series([0.77, 0.943, 0.854, 0.644, 0.693, 0.832, 0.999, 0.942]), ignore_index=True)
+    #print(ibi_series)
+
+    # TODO: @Ari: if len(time_features['CP']) < 5 don't compute hrv features
+
+    temporalHRVFeatures = _temporal_hrv(time_features['CP'])
+    frequencyHRVFeatures = _frequency_hrv(time_features['CP'], sampling_frequency)
 
     segment_features = pd.concat([temporalHRVFeatures.reset_index(drop=True), frequencyHRVFeatures], axis=1)
  
@@ -941,6 +951,7 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
         print('Cycle Features within Segment:')
         print(segment_features)
 
+    # TODO: @Ari: OTHER CLEANING METRIC FOR HRV > computes only one value per window, therefore quantile doesnt make sense
     # remove outliers
     segment_features = _clean_segment_features_of_outliers(segment_features)
 
@@ -960,12 +971,12 @@ def _temporal_hrv(ibi_series):
         raise Exception('Signal values not accepted, enter a pandas.Series or ndarray.')
     
     window = 5
-    nn_threshold = 50
+    nn_threshold = 0.05 # TODO: @Ari: HOW TO SET THIS VALUE? > IBI_SERIES VALUES around 0.88 ish. Affect computation of nn_xx
     
     # Prepare data
     instantaneous_hr = 60 / (ibi_series)
     rolling_mean_hr = instantaneous_hr.rolling(window).mean()
-    rolling_24h = ibi_series.rolling(5)
+    rolling_24h = ibi_series.rolling(window)
     
     # Precalculate data for standard indexes
     nn_diff = np.diff(ibi_series)
@@ -973,11 +984,11 @@ def _temporal_hrv(ibi_series):
     
     # Precalculate data for geometrical indeces
     bin_size = 7.8125
-    hist_middle = (ibi_series.min() + ibi_series.max()) / 2
-    hist_bound_lower = hist_middle - bin_size * np.ceil((hist_middle - ibi_series.min()) / bin_size)
-    hist_length = int(np.ceil((ibi_series.max() - hist_bound_lower) / bin_size) + 1)
+    hist_middle = (ibi_series.values.min() + ibi_series.values.max()) / 2
+    hist_bound_lower = hist_middle - bin_size * np.ceil((hist_middle - ibi_series.values.min()) / bin_size)
+    hist_length = int(np.ceil((ibi_series.values.max() - hist_bound_lower) / bin_size) + 1)
     hist_bins = hist_bound_lower + np.arange(hist_length) * bin_size
-    hist, _ = np.histogram(ibi_series, hist_bins)
+    hist, _ = np.histogram(ibi_series.values, hist_bins)
     hist_height = np.max(hist)
     
     # Calculate TINN measure
@@ -1013,7 +1024,7 @@ def _temporal_hrv(ibi_series):
 	
     # features
     temporalHRVFeatures = pd.DataFrame({
-                            'SampEn': float(nolds.sampen(ibi_series.to_numpy(), 2, tolerance)),
+                            'SampEn': float(nolds.sampen(ibi_series, 2, tolerance)), # TODO: @Ari: RETURNS INF AS VALUE, MAYBE SIGNAL SEGMENT TOO SMALL
                             'MeanNN': ibi_series.mean(),
                             'MeanHR': instantaneous_hr.mean(),
                             'MaxHR': rolling_mean_hr.max(),
@@ -1024,21 +1035,21 @@ def _temporal_hrv(ibi_series):
                             'SDANN': rolling_24h.mean().std(),
                             'RMSSD': np.sqrt(np.mean(nn_diff ** 2)),
                             f'NN{nn_threshold}': nn_xx,
-                            f'pNN{nn_threshold}': nn_xx / len(ibi_series) * 100,
-                            'HRVTriangularIndex': len(ibi_series) / hist_height,
+                            f'pNN{nn_threshold}': nn_xx / len(ibi_series.values) * 100,
+                            'HRVTriangularIndex': len(ibi_series.values) / hist_height,
                             'TINN': m - n}, 
                             index=[0])
-    
+        
     return temporalHRVFeatures
 
-def _frequency_hrv(signal, sampling_frequency):
+def _frequency_hrv(ibi_series, sampling_frequency):
     
-    if isinstance(signal, np.ndarray):
-        ibi_series = pd.Series(signal)
-    elif not isinstance(signal, pd.core.series.Series):
+    if isinstance(ibi_series, np.ndarray):
+        ibi_series = pd.Series(ibi_series)
+    elif not isinstance(ibi_series, pd.core.series.Series):
         raise Exception('Signal values not accepted, enter a pandas.Series or ndarray.')
 	
-	## TODO: HYPERPARAMETERS to config dict
+	## TODO: @Ari: HYPERPARAMETERS to config dict
     fft_interpolation = 4.0
     use_ulf = False
     lomb_smoothing = 0.02
@@ -1053,9 +1064,7 @@ def _frequency_hrv(signal, sampling_frequency):
     else:
         ulf_limit = ulf_limit #TODO: ??????
 	
-	# TODO: export transformMethod
-    transformMethod='welch'
-    freq, mag = _transformSigfromTimetoFreq(ibi_series, sampling_frequency, transformMethod, fft_interpolation)
+    freq, mag = _transformSigfromTimetoFreq(ibi_series.values, sampling_frequency, fft_interpolation=fft_interpolation)
     
     abs_index = freq <= hf_limit
     ulf_index = freq <= ulf_limit
@@ -1121,8 +1130,8 @@ def _frequency_hrv(signal, sampling_frequency):
 def _clean_segment_features_of_outliers(segment_df, treshold=0.8):
     quant = segment_df.quantile(treshold)
     for col in segment_df.columns:
-        if col.find('ts') == -1:
-            segment_df = segment_df[segment_df[col] < quant[col]*2]
+        if col.find('ts') == -1 and len(segment_df[col]) > 1:
+            segment_df = segment_df[np.abs(segment_df[col]) < np.abs(quant[col]*2)] ## TODO: @Ari: DOES THIS ALWAYS APPLY? e.g. HRV/ Frequency
     return segment_df
 
 # returns the x values for those samples in the signal, that are closest to some given y value
@@ -1135,19 +1144,19 @@ def _find_xs_for_y(y_s, y_val, sys_peak):
 # transforms a given temporal signal into the spectral domain
 # using different methods and allowing interpolation if required.
 # returns the frequencies and their magnitudes.
-def _transformSigfromTimetoFreq(signal, fs, transformMethod, fft_interpolation=None, verbose=False):
+def _transformSigfromTimetoFreq(timeSignal, fs, transformMethod='welch', fft_interpolation=None, verbose=False):
 
-    ## TODO: Can maybe be improved?!
     if fft_interpolation:
-        x = np.cumsum(signal)
-        f_interpol = interpolate.interp1d(x, signal, kind = "cubic", fill_value="extrapolate")
+        x = np.cumsum(timeSignal)
+        f_interpol = interpolate.interp1d(x, timeSignal, kind = "cubic", fill_value="extrapolate")
         t_interpol = np.arange(x[0], x[-1], fft_interpolation/fs)
         
-        signal = f_interpol(t_interpol)
+        timeSignal = f_interpol(t_interpol)
         fs = fft_interpolation
     
+    # TODO: Maybe add different methods
     if transformMethod == 'welch':
-        freq, mag = signal.welch(signal, fs, window='hamming', scaling='density', detrend='linear')
+        freq, mag = signal.welch(timeSignal, fs, window='hamming', scaling='density', detrend='linear')
     
     if verbose:
         plt.semilogy(freq, mag)

From 69da20402d469495581e9c22ff1b2c9da0b1fe9d Mon Sep 17 00:00:00 2001
From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com>
Date: Mon, 11 Apr 2022 17:43:06 +0000
Subject: [PATCH 15/38] Fix conversion error

---
 pypg/features.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pypg/features.py b/pypg/features.py
index e9a701e..d52870d 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -1024,7 +1024,7 @@ def _temporal_hrv(ibi_series):
 	
     # features
     temporalHRVFeatures = pd.DataFrame({
-                            'SampEn': float(nolds.sampen(ibi_series, 2, tolerance)), # TODO: @Ari: RETURNS INF AS VALUE, MAYBE SIGNAL SEGMENT TOO SMALL
+                            'SampEn': float(nolds.sampen(ibi_series.to_numpy(), 2, tolerance)), # TODO: @Ari: RETURNS INF AS VALUE, MAYBE SIGNAL SEGMENT TOO SMALL
                             'MeanNN': ibi_series.mean(),
                             'MeanHR': instantaneous_hr.mean(),
                             'MaxHR': rolling_mean_hr.max(),

From 773b897d3e70562b7db4278aa6ef27030148c563 Mon Sep 17 00:00:00 2001
From: Florian Hermes <florian.hermes@student.hpi.de>
Date: Thu, 14 Apr 2022 15:33:47 +0000
Subject: [PATCH 16/38] Add comments; remove cleanSegment for frequency and hrv

---
 pypg/features.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/pypg/features.py b/pypg/features.py
index d52870d..e4860b5 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -179,7 +179,7 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False):
                         'sys_peak_ts': sys_peak_ts,
                         'SUT': (sys_peak_ts - ppg.index.min()).total_seconds(),
                         'DT': (ppg.index.max() - sys_peak_ts).total_seconds(),
-                        'BPM': (60 / (ppg.index.max() - ppg.index.min()).total_seconds()), # TODO: CHECK DEF! @Ari: even use? makes sense?
+                        #'BPM': (60 / (ppg.index.max() - ppg.index.min()).total_seconds()), # TODO: CHECK DEF! @Ari: even use? makes sense?
                         'CT': (ppg.index.max() - ppg.index.min()).total_seconds(),
                         'SUT_VAL': (ppg.values[peaks[0]])
                         }, index=[0])
@@ -868,7 +868,7 @@ def frequency(ppg, sampling_frequency, transformMethod='welch', cutoff_freq=125.
 
     # TODO: @Ari: OTHER CLEANING METRIC FOR HRV > computes only one value per window, therefore quantile doesnt make sense
     # remove outliers
-    segment_features = _clean_segment_features_of_outliers(segment_features)
+    #segment_features = _clean_segment_features_of_outliers(segment_features)
 
     if verbose:
         print('Cycle Features within Segment and no Outliers:')
@@ -941,6 +941,7 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
     #print(ibi_series)
 
     # TODO: @Ari: if len(time_features['CP']) < 5 don't compute hrv features
+    # PAPER What should be the minimum threshold
 
     temporalHRVFeatures = _temporal_hrv(time_features['CP'])
     frequencyHRVFeatures = _frequency_hrv(time_features['CP'], sampling_frequency)
@@ -953,7 +954,7 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
 
     # TODO: @Ari: OTHER CLEANING METRIC FOR HRV > computes only one value per window, therefore quantile doesnt make sense
     # remove outliers
-    segment_features = _clean_segment_features_of_outliers(segment_features)
+    #segment_features = _clean_segment_features_of_outliers(segment_features)
 
     if verbose:
         print('Cycle Features within Segment and no Outliers:')
@@ -971,7 +972,7 @@ def _temporal_hrv(ibi_series):
         raise Exception('Signal values not accepted, enter a pandas.Series or ndarray.')
     
     window = 5
-    nn_threshold = 0.05 # TODO: @Ari: HOW TO SET THIS VALUE? > IBI_SERIES VALUES around 0.88 ish. Affect computation of nn_xx
+    nn_threshold = 0.05 # TODO: @Ari: HOW TO SET THIS VALUE? > IBI_SERIES VALUES around 0.88 ish. Affect computation of nn_xx /// was 50 before
     
     # Prepare data
     instantaneous_hr = 60 / (ibi_series)

From 14739af8e5f1e0f2b5fbace274b6b7058bb8c138 Mon Sep 17 00:00:00 2001
From: Florian Hermes <florian.hermes@student.hpi.de>
Date: Fri, 15 Apr 2022 14:58:50 +0000
Subject: [PATCH 17/38] Replace append by concat; Clean-up; add check for null
 array to frequency feature generation

---
 pypg/features.py | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/pypg/features.py b/pypg/features.py
index e4860b5..667fc99 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -247,8 +247,8 @@ def nonlinear(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
     cur_index = 0
     segment_features = pd.DataFrame()
     for i, cycle in enumerate(cycles):
-        segment_features = segment_features.append(nonlinear_cycle(cycle,
-                                         sampling_frequency, factor, unit, verbose=verbose),
+        segment_features = pd.concat([segment_features, nonlinear_cycle(cycle,
+                                         sampling_frequency, factor, unit, verbose=verbose)],
                                          ignore_index=True)
 
     if verbose:
@@ -378,8 +378,8 @@ def statistical(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False
     cur_index = 0
     segment_features = pd.DataFrame()
     for i, cycle in enumerate(cycles):
-        segment_features = segment_features.append(statistical_cycle(cycle,
-                                         sampling_frequency, factor, unit, verbose=verbose),
+        segment_features = pd.concat([segment_features, statistical_cycle(cycle,
+                                         sampling_frequency, factor, unit, verbose=verbose)],
                                          ignore_index=True)
 
     if verbose:
@@ -503,11 +503,10 @@ def sdppg(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
     if len(cycles) == 0:
         return pd.DataFrame()
 
-    cur_index = 0
     segment_features = pd.DataFrame()
     for i, cycle in enumerate(cycles):
-        segment_features = segment_features.append(sdppg_cycle(cycle,
-                                         sampling_frequency, factor, unit, verbose=verbose),
+        segment_features = pd.concat([segment_features, sdppg_cycle(cycle,
+                                         sampling_frequency, factor, unit, verbose=verbose)],
                                          ignore_index=True)
 
     if verbose:
@@ -841,7 +840,10 @@ def frequency(ppg, sampling_frequency, transformMethod='welch', cutoff_freq=125.
     ## -> Paper Wang # TODO: Add description to the top
     for start_index in np.arange(0, cutoff_freq, interval_size):
         end_index = start_index + interval_size
-        segment_features.loc[0, 'freqInt' + str(start_index) + '_' + str(end_index)] = np.nanmean(mag_cut[(freq_cut >= start_index) & (freq_cut < end_index)])
+        if mag_cut[(freq_cut >= start_index) & (freq_cut < end_index)].size != 0:
+            segment_features.loc[0, 'freqInt' + str(start_index) + '_' + str(end_index)] = np.nanmean(mag_cut[(freq_cut >= start_index) & (freq_cut < end_index)])
+        else:
+            segment_features.loc[0, 'freqInt' + str(start_index) + '_' + str(end_index)] = 0
 
     ## -> Paper Slapnicar # TODO: Add description to the top
     sorted_mag = mag_cut[np.argsort(mag_cut)[::-1]]
@@ -924,8 +926,8 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
     cur_index = 0
     time_features = pd.DataFrame()
     for i, cycle in enumerate(cycles):
-        time_features = time_features.append(time_cycle(cycle, sampling_frequency,
-                                    factor, unit, verbose=verbose),
+        time_features = pd.concat([time_features, time_cycle(cycle, sampling_frequency,
+                                    factor, unit, verbose=verbose)],
                                     ignore_index=True)
         if i > 0:
             time_features.loc[cur_index-1, 'CP'] = (
@@ -937,7 +939,7 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
         all_peaks[all_peaks_index] - all_peaks[all_peaks_index-1])/sampling_frequency
     
     ## FOR TESTING WITH PPG-BP DATASET:
-    #ibi_series = time_features['CP'].append(pd.Series([0.77, 0.943, 0.854, 0.644, 0.693, 0.832, 0.999, 0.942]), ignore_index=True)
+    #ibi_series = time_features['CP'].concat(pd.Series([0.77, 0.943, 0.854, 0.644, 0.693, 0.832, 0.999, 0.942]), ignore_index=True)
     #print(ibi_series)
 
     # TODO: @Ari: if len(time_features['CP']) < 5 don't compute hrv features
@@ -1116,7 +1118,7 @@ def _frequency_hrv(ibi_series, sampling_frequency):
     # Add ULF parameters, if band available
     if use_ulf and np.sum(ulf_index) > 0:
         ulf_peak = freq[np.argmax(mag[ulf_index])]
-        freqencyHRVFeatures.append({
+        freqencyHRVFeatures.concat({
                                 'ULF_peak': ulf_peak,
                                 'ULF_power': ulf,
                                 'ULF_power_log': np.log(ulf),

From 141068598ca5d38d34a91a3736280401c0c7b437 Mon Sep 17 00:00:00 2001
From: Florian Hermes <florian.hermes@student.hpi.de>
Date: Fri, 15 Apr 2022 20:38:10 +0000
Subject: [PATCH 18/38] Add feature empty check; Add cleanSegment length check

---
 pypg/features.py | 42 ++++++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 20 deletions(-)

diff --git a/pypg/features.py b/pypg/features.py
index 667fc99..4d60a5a 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -508,7 +508,7 @@ def sdppg(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
         segment_features = pd.concat([segment_features, sdppg_cycle(cycle,
                                          sampling_frequency, factor, unit, verbose=verbose)],
                                          ignore_index=True)
-
+    
     if verbose:
         print('Cycle Features within Segment:')
         print(segment_features)
@@ -754,25 +754,25 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
         ratio_DF_AF = ((len(sdppg_signal) - c_index) / len(sdppg_signal))
 
     cycle_features = pd.DataFrame({
-                        'a_val': a_val, # TODO: @Ari: What to put if NaN, i.e. not able to detect in cycle due to bad quality
-                        'b_val': b_val,
-                        'c_val': c_val,
-                        'd_val': d_val,
-                        'e_val': e_val,
-                        'AD': AD,
-                        'AE': AE,
-                        'CD': CD,
-                        'DF': DF,
-                        'D': D,
-                        'E': E,
-                        'ratio_b_a': ratio_b_a,
-                        'ratio_c_a': ratio_c_a,
-                        'ratio_d_a': ratio_d_a,
-                        'ratio_e_a': ratio_e_a,
-                        'ratio_AD_AF': ratio_AD_AF,
-                        'ratio_CD_AF': ratio_CD_AF,
-                        'ratio_AE_AF': ratio_AE_AF,
-                        'ratio_DF_AF': ratio_DF_AF,
+                        'a_val': a_val if a_val else np.nan, # TODO: @Ari: What to put if NaN, i.e. not able to detect in cycle due to bad quality
+                        'b_val': b_val if b_val else np.nan,
+                        'c_val': c_val if c_val else np.nan,
+                        'd_val': d_val if d_val else np.nan,
+                        'e_val': e_val if e_val else np.nan,
+                        'AD': AD if AD else np.nan,
+                        'AE': AE if AE else np.nan,
+                        'CD': CD if CD else np.nan,
+                        'DF': DF if DF else np.nan,
+                        'D': D if D else np.nan,
+                        'E': E if E else np.nan,
+                        'ratio_b_a': ratio_b_a if ratio_b_a else np.nan,
+                        'ratio_c_a': ratio_c_a if ratio_c_a else np.nan,
+                        'ratio_d_a': ratio_d_a if ratio_d_a else np.nan,
+                        'ratio_e_a': ratio_e_a if ratio_e_a else np.nan,
+                        'ratio_AD_AF': ratio_AD_AF if ratio_AD_AF else np.nan,
+                        'ratio_CD_AF': ratio_CD_AF if ratio_CD_AF else np.nan,
+                        'ratio_AE_AF': ratio_AE_AF if ratio_AE_AF else np.nan,
+                        'ratio_DF_AF': ratio_DF_AF if ratio_DF_AF else np.nan,
                         }, index=[0])
 
     if verbose:
@@ -1135,6 +1135,8 @@ def _clean_segment_features_of_outliers(segment_df, treshold=0.8):
     for col in segment_df.columns:
         if col.find('ts') == -1 and len(segment_df[col]) > 1:
             segment_df = segment_df[np.abs(segment_df[col]) < np.abs(quant[col]*2)] ## TODO: @Ari: DOES THIS ALWAYS APPLY? e.g. HRV/ Frequency
+            if len(segment_df) < 2:
+                return pd.DataFrame()
     return segment_df
 
 # returns the x values for those samples in the signal, that are closest to some given y value

From f943caa18188eab0892720dae4a4ac50676aef7d Mon Sep 17 00:00:00 2001
From: Florian Hermes <florian.hermes@student.hpi.de>
Date: Sun, 17 Apr 2022 16:58:15 +0000
Subject: [PATCH 19/38] Add checks to feature extraction

---
 pypg/features.py | 100 +++++++++++++++++++++++------------------------
 1 file changed, 48 insertions(+), 52 deletions(-)

diff --git a/pypg/features.py b/pypg/features.py
index 4d60a5a..6bc9b4b 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -165,6 +165,8 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False):
     max_amplitude = ppg.max()
 
     peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0]
+    if len(peaks) == 0:
+        return pd.DataFrame()
     sys_peak_ts = ppg.index[peaks[0]] # TODO: @Ari: ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks e.g. check peak height dictionary?)
 
     if verbose:
@@ -179,7 +181,6 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False):
                         'sys_peak_ts': sys_peak_ts,
                         'SUT': (sys_peak_ts - ppg.index.min()).total_seconds(),
                         'DT': (ppg.index.max() - sys_peak_ts).total_seconds(),
-                        #'BPM': (60 / (ppg.index.max() - ppg.index.min()).total_seconds()), # TODO: CHECK DEF! @Ari: even use? makes sense?
                         'CT': (ppg.index.max() - ppg.index.min()).total_seconds(),
                         'SUT_VAL': (ppg.values[peaks[0]])
                         }, index=[0])
@@ -244,7 +245,6 @@ def nonlinear(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
     if len(cycles) == 0:
         return pd.DataFrame()
 
-    cur_index = 0
     segment_features = pd.DataFrame()
     for i, cycle in enumerate(cycles):
         segment_features = pd.concat([segment_features, nonlinear_cycle(cycle,
@@ -307,6 +307,8 @@ def nonlinear_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=Fa
     ppg_cycle_duration = (ppg.index.max() - ppg.index.min()).total_seconds()
 
     peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0]
+    if len(peaks) == 0:
+        return pd.DataFrame()
     sys_peak_ts = ppg.index[peaks[0]] # TODO: @Ari: ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks e.g. check peak height dictionary?)
     sys_peak = (sys_peak_ts - ppg.index.min()).total_seconds()
 
@@ -375,7 +377,6 @@ def statistical(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False
     if len(cycles) == 0:
         return pd.DataFrame()
 
-    cur_index = 0
     segment_features = pd.DataFrame()
     for i, cycle in enumerate(cycles):
         segment_features = pd.concat([segment_features, statistical_cycle(cycle,
@@ -437,6 +438,8 @@ def statistical_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=
     ppg = ppg - ppg.min() # TODO: @Ari: If we do it for each cycle seperately, features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values)
 
     peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0]
+    if len(peaks) == 0:
+        return pd.DataFrame()
 
     if verbose:
         plt.figure()
@@ -564,6 +567,8 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
     ppg = ppg - ppg.min() # TODO: @Ari: If we do it for each cycle seperately, features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values)
 
     peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0]
+    if len(peaks) == 0:
+        return pd.DataFrame()
     sys_peak_ts = ppg.index[peaks[0]] # TODO: @Ari: ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks e.g. check peak height dictionary?)
     sys_peak = (sys_peak_ts - ppg.index.min()).total_seconds()
 
@@ -575,6 +580,8 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
 
     # second derviative of the PPG signal
     sdppg_signal = np.gradient(np.gradient(ppg.values))
+    if sdppg_signal.size == 0:
+        return pd.DataFrame()
 
     # TODO: features !!! WRITE DOWN REFERENCE NICELY IN TOP DESCRIPTION !!!
     # https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0076585
@@ -591,6 +598,8 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
     
     sdppg_signal_peaks, peak_dict = signal.find_peaks(sdppg_signal, height=0)
     sdppg_signal_valleys, valley_dict = signal.find_peaks(-sdppg_signal, height=0)
+    if len(sdppg_signal_peaks) == 0 or len(sdppg_signal_valleys) == 0:
+        return pd.DataFrame()
 
     # initialize SDPPG indices and variables
     a_index = b_index = c_index = d_index = e_index = None
@@ -820,8 +829,10 @@ def frequency(ppg, sampling_frequency, transformMethod='welch', cutoff_freq=125.
 
      # Transform signal from time to frequency domain
     freq, mag = _transformSigfromTimetoFreq(ppg.values, sampling_frequency, transformMethod=transformMethod, verbose=verbose)
+    if freq.size == 0 or mag.size == 0:
+        return pd.DataFrame()
 
-    # features
+    # Initialize features dataframe
     segment_features = pd.DataFrame()
 
     # Cut off frequencies
@@ -868,14 +879,6 @@ def frequency(ppg, sampling_frequency, transformMethod='welch', cutoff_freq=125.
         print('Cycle Features within Segment:')
         print(segment_features)
 
-    # TODO: @Ari: OTHER CLEANING METRIC FOR HRV > computes only one value per window, therefore quantile doesnt make sense
-    # remove outliers
-    #segment_features = _clean_segment_features_of_outliers(segment_features)
-
-    if verbose:
-        print('Cycle Features within Segment and no Outliers:')
-        print(segment_features)
-
     return segment_features
 
 
@@ -938,12 +941,9 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
     time_features.loc[cur_index-1, 'CP'] = (
         all_peaks[all_peaks_index] - all_peaks[all_peaks_index-1])/sampling_frequency
     
-    ## FOR TESTING WITH PPG-BP DATASET:
-    #ibi_series = time_features['CP'].concat(pd.Series([0.77, 0.943, 0.854, 0.644, 0.693, 0.832, 0.999, 0.942]), ignore_index=True)
-    #print(ibi_series)
-
-    # TODO: @Ari: if len(time_features['CP']) < 5 don't compute hrv features
-    # PAPER What should be the minimum threshold
+    if len(time_features['CP']) < 5: # TODO: Check for minimum size (Ultra small HRV)
+        print('PPG Segment too small! Please provide bigger PPG segment')
+        return pd.DataFrame()
 
     temporalHRVFeatures = _temporal_hrv(time_features['CP'])
     frequencyHRVFeatures = _frequency_hrv(time_features['CP'], sampling_frequency)
@@ -954,14 +954,6 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
         print('Cycle Features within Segment:')
         print(segment_features)
 
-    # TODO: @Ari: OTHER CLEANING METRIC FOR HRV > computes only one value per window, therefore quantile doesnt make sense
-    # remove outliers
-    #segment_features = _clean_segment_features_of_outliers(segment_features)
-
-    if verbose:
-        print('Cycle Features within Segment and no Outliers:')
-        print(segment_features)
-
     return segment_features
 
 
@@ -1068,7 +1060,9 @@ def _frequency_hrv(ibi_series, sampling_frequency):
         ulf_limit = ulf_limit #TODO: ??????
 	
     freq, mag = _transformSigfromTimetoFreq(ibi_series.values, sampling_frequency, fft_interpolation=fft_interpolation)
-    
+    if freq.size == 0 or mag.size == 0:
+        return pd.DataFrame()
+
     abs_index = freq <= hf_limit
     ulf_index = freq <= ulf_limit
     vlf_index = (freq >= ulf_limit) & (freq <= vlf_limit)
@@ -1098,32 +1092,32 @@ def _frequency_hrv(ibi_series, sampling_frequency):
     hf_peak = freq[hf_index][np.argmax(mag[hf_index])]
     
     freqencyHRVFeatures = pd.DataFrame({
-                            'VLF_peak': vlf_peak,
-                            'VLF_power': vlf,
-                            'VLF_power_log': np.log(vlf),
-                            'VLF_power_rel': vlf_perc,
-                            'LF_peak': lf_peak,
-                            'LF_power': lf,
-                            'LF_power_log': np.log(lf),
-                            'LF_power_rel': lf_perc,
-                            'LF_power_norm': lf_nu,
-                            'HF_peak': hf_peak,
-                            'HF_power': hf,
-                            'HF_power_log': np.log(hf),
-                            'HF_power_rel': hf_perc,
-                            'HF_power_norm': hf_nu,
-                            'ratio_LF_HF': lf/hf}, 
-                            index=[0])
+                            'VLF_peak': vlf_peak if vlf_peak else np.nan,
+                            'VLF_power': vlf if vlf else np.nan,
+                            'VLF_power_log': np.log(vlf) if np.log(vlf) else np.nan,
+                            'VLF_power_rel': vlf_perc if vlf_perc else np.nan,
+                            'LF_peak': lf_peak if lf_peak else np.nan,
+                            'LF_power': lf if lf else np.nan,
+                            'LF_power_log': np.log(lf) if np.log(lf) else np.nan,
+                            'LF_power_rel': lf_perc if lf_perc else np.nan,
+                            'LF_power_norm': lf_nu if lf_nu else np.nan,
+                            'HF_peak': hf_peak if hf_peak else np.nan,
+                            'HF_power': hf if hf else np.nan,
+                            'HF_power_log': np.log(hf) if np.log(hf) else np.nan,
+                            'HF_power_rel': hf_perc if hf_perc else np.nan,
+                            'HF_power_norm': hf_nu if hf_nu else np.nan,
+                            'ratio_LF_HF': lf/hf if lf/hf else np.nan
+                            }, index=[0])
     
     # Add ULF parameters, if band available
     if use_ulf and np.sum(ulf_index) > 0:
         ulf_peak = freq[np.argmax(mag[ulf_index])]
         freqencyHRVFeatures.concat({
-                                'ULF_peak': ulf_peak,
-                                'ULF_power': ulf,
-                                'ULF_power_log': np.log(ulf),
-                                'ULF_power_rel': ulf_perc}, 
-                                index=[0])
+                                'ULF_peak': ulf_peak if ulf_peak else np.nan,
+                                'ULF_power': ulf if ulf else np.nan,
+                                'ULF_power_log': np.log(ulf) if np.log(ulf) else np.nan,
+                                'ULF_power_rel': ulf_perc if ulf_perc else np.nan
+                                }, index=[0])
     
     return freqencyHRVFeatures
 
@@ -1149,19 +1143,21 @@ def _find_xs_for_y(y_s, y_val, sys_peak):
 # transforms a given temporal signal into the spectral domain
 # using different methods and allowing interpolation if required.
 # returns the frequencies and their magnitudes.
-def _transformSigfromTimetoFreq(timeSignal, fs, transformMethod='welch', fft_interpolation=None, verbose=False):
+def _transformSigfromTimetoFreq(timeSignal, sampling_frequency, transformMethod='welch', fft_interpolation=None, verbose=False):
 
     if fft_interpolation:
         x = np.cumsum(timeSignal)
         f_interpol = interpolate.interp1d(x, timeSignal, kind = "cubic", fill_value="extrapolate")
-        t_interpol = np.arange(x[0], x[-1], fft_interpolation/fs)
+        t_interpol = np.arange(x[0], x[-1], fft_interpolation/sampling_frequency)
         
         timeSignal = f_interpol(t_interpol)
-        fs = fft_interpolation
+        sampling_frequency = fft_interpolation
     
     # TODO: Maybe add different methods
     if transformMethod == 'welch':
-        freq, mag = signal.welch(timeSignal, fs, window='hamming', scaling='density', detrend='linear')
+        freq, mag = signal.welch(timeSignal, sampling_frequency, window='hamming', scaling='density', detrend='linear')
+    else:
+        print('Transform method not available. Please select one of the following: [welch]')
     
     if verbose:
         plt.semilogy(freq, mag)

From bd9ed2162185772b291bdfcf07009dbca168113b Mon Sep 17 00:00:00 2001
From: Florian Hermes <florian.hermes@student.hpi.de>
Date: Tue, 19 Apr 2022 11:43:26 +0000
Subject: [PATCH 20/38] Cleanup; remove finished TODOs

---
 pypg/features.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pypg/features.py b/pypg/features.py
index 6bc9b4b..e635c2b 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -763,7 +763,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
         ratio_DF_AF = ((len(sdppg_signal) - c_index) / len(sdppg_signal))
 
     cycle_features = pd.DataFrame({
-                        'a_val': a_val if a_val else np.nan, # TODO: @Ari: What to put if NaN, i.e. not able to detect in cycle due to bad quality
+                        'a_val': a_val if a_val else np.nan,
                         'b_val': b_val if b_val else np.nan,
                         'c_val': c_val if c_val else np.nan,
                         'd_val': d_val if d_val else np.nan,
@@ -941,7 +941,7 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
     time_features.loc[cur_index-1, 'CP'] = (
         all_peaks[all_peaks_index] - all_peaks[all_peaks_index-1])/sampling_frequency
     
-    if len(time_features['CP']) < 5: # TODO: Check for minimum size (Ultra small HRV)
+    if len(time_features['CP']) < 5: # TODO: Check for minimum size (Ultra short HRV)
         print('PPG Segment too small! Please provide bigger PPG segment')
         return pd.DataFrame()
 
@@ -1019,7 +1019,7 @@ def _temporal_hrv(ibi_series):
 	
     # features
     temporalHRVFeatures = pd.DataFrame({
-                            'SampEn': float(nolds.sampen(ibi_series.to_numpy(), 2, tolerance)), # TODO: @Ari: RETURNS INF AS VALUE, MAYBE SIGNAL SEGMENT TOO SMALL
+                            'SampEn': float(nolds.sampen(ibi_series.to_numpy(), 2, tolerance)),
                             'MeanNN': ibi_series.mean(),
                             'MeanHR': instantaneous_hr.mean(),
                             'MaxHR': rolling_mean_hr.max(),
@@ -1128,7 +1128,7 @@ def _clean_segment_features_of_outliers(segment_df, treshold=0.8):
     quant = segment_df.quantile(treshold)
     for col in segment_df.columns:
         if col.find('ts') == -1 and len(segment_df[col]) > 1:
-            segment_df = segment_df[np.abs(segment_df[col]) < np.abs(quant[col]*2)] ## TODO: @Ari: DOES THIS ALWAYS APPLY? e.g. HRV/ Frequency
+            segment_df = segment_df[np.abs(segment_df[col]) < np.abs(quant[col]*2)]
             if len(segment_df) < 2:
                 return pd.DataFrame()
     return segment_df

From 3b68b8fbc963dbfcb4bc3ff3889a96b8819eed7e Mon Sep 17 00:00:00 2001
From: Florian Hermes <florian.hermes@student.hpi.de>
Date: Fri, 22 Apr 2022 10:37:12 +0000
Subject: [PATCH 21/38] Put output in verbose mode; add TODO

---
 pypg/features.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pypg/features.py b/pypg/features.py
index e635c2b..5cf9001 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -942,7 +942,8 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
         all_peaks[all_peaks_index] - all_peaks[all_peaks_index-1])/sampling_frequency
     
     if len(time_features['CP']) < 5: # TODO: Check for minimum size (Ultra short HRV)
-        print('PPG Segment too small! Please provide bigger PPG segment')
+        if verbose:
+            print('PPG Segment too small! Please provide bigger PPG segment')
         return pd.DataFrame()
 
     temporalHRVFeatures = _temporal_hrv(time_features['CP'])
@@ -1129,7 +1130,7 @@ def _clean_segment_features_of_outliers(segment_df, treshold=0.8):
     for col in segment_df.columns:
         if col.find('ts') == -1 and len(segment_df[col]) > 1:
             segment_df = segment_df[np.abs(segment_df[col]) < np.abs(quant[col]*2)]
-            if len(segment_df) < 2:
+            if len(segment_df) < 2: # TODO: Maybe take out because will remove data (Change generate_featueBAsed_Dataset.py)
                 return pd.DataFrame()
     return segment_df
 

From 0332152ab45573b6b847157f790edcef0b5e0772 Mon Sep 17 00:00:00 2001
From: Florian Hermes <florian.hermes@student.hpi.de>
Date: Fri, 22 Apr 2022 13:21:30 +0000
Subject: [PATCH 22/38] Remove len check of cleaned dataframe

---
 pypg/features.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pypg/features.py b/pypg/features.py
index 5cf9001..c1c5ab6 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -1130,8 +1130,6 @@ def _clean_segment_features_of_outliers(segment_df, treshold=0.8):
     for col in segment_df.columns:
         if col.find('ts') == -1 and len(segment_df[col]) > 1:
             segment_df = segment_df[np.abs(segment_df[col]) < np.abs(quant[col]*2)]
-            if len(segment_df) < 2: # TODO: Maybe take out because will remove data (Change generate_featueBAsed_Dataset.py)
-                return pd.DataFrame()
     return segment_df
 
 # returns the x values for those samples in the signal, that are closest to some given y value

From 82967d99b930602d6f0e95ee7cc2778b661e091b Mon Sep 17 00:00:00 2001
From: Florian Hermes <florian.hermes@student.hpi.de>
Date: Fri, 22 Apr 2022 14:35:47 +0000
Subject: [PATCH 23/38] Compare code to ALPS repository

---
 pypg/features.py | 21 ++-------------------
 1 file changed, 2 insertions(+), 19 deletions(-)

diff --git a/pypg/features.py b/pypg/features.py
index c1c5ab6..02445c8 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -970,7 +970,7 @@ def _temporal_hrv(ibi_series):
     nn_threshold = 0.05 # TODO: @Ari: HOW TO SET THIS VALUE? > IBI_SERIES VALUES around 0.88 ish. Affect computation of nn_xx /// was 50 before
     
     # Prepare data
-    instantaneous_hr = 60 / (ibi_series)
+    instantaneous_hr = 60 / (ibi_series / 1000) # TODO: @Ari: why divided by 1000? from ms to s?
     rolling_mean_hr = instantaneous_hr.rolling(window).mean()
     rolling_24h = ibi_series.rolling(window)
     
@@ -1047,18 +1047,11 @@ def _frequency_hrv(ibi_series, sampling_frequency):
 	
 	## TODO: @Ari: HYPERPARAMETERS to config dict
     fft_interpolation = 4.0
-    use_ulf = False
-    lomb_smoothing = 0.02
     
     vlf_limit = 0.04
     lf_limit = 0.15
     hf_limit = 0.4
-    
-    if not use_ulf or ibi_series.sum() < 300000: #TODO: check
-		# Do not use ULF band on sample shorter than 5 minutes
-        ulf_limit = 0
-    else:
-        ulf_limit = ulf_limit #TODO: ??????
+    ulf_limit = 0
 	
     freq, mag = _transformSigfromTimetoFreq(ibi_series.values, sampling_frequency, fft_interpolation=fft_interpolation)
     if freq.size == 0 or mag.size == 0:
@@ -1110,16 +1103,6 @@ def _frequency_hrv(ibi_series, sampling_frequency):
                             'ratio_LF_HF': lf/hf if lf/hf else np.nan
                             }, index=[0])
     
-    # Add ULF parameters, if band available
-    if use_ulf and np.sum(ulf_index) > 0:
-        ulf_peak = freq[np.argmax(mag[ulf_index])]
-        freqencyHRVFeatures.concat({
-                                'ULF_peak': ulf_peak if ulf_peak else np.nan,
-                                'ULF_power': ulf if ulf else np.nan,
-                                'ULF_power_log': np.log(ulf) if np.log(ulf) else np.nan,
-                                'ULF_power_rel': ulf_perc if ulf_perc else np.nan
-                                }, index=[0])
-    
     return freqencyHRVFeatures
 
 

From c3a33ca15847a394c757f19bd145a37a6a7519fb Mon Sep 17 00:00:00 2001
From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com>
Date: Thu, 28 Apr 2022 13:13:18 +0000
Subject: [PATCH 24/38] adapt nn_threshold

---
 pypg/features.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pypg/features.py b/pypg/features.py
index 02445c8..ba961ca 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -967,7 +967,7 @@ def _temporal_hrv(ibi_series):
         raise Exception('Signal values not accepted, enter a pandas.Series or ndarray.')
     
     window = 5
-    nn_threshold = 0.05 # TODO: @Ari: HOW TO SET THIS VALUE? > IBI_SERIES VALUES around 0.88 ish. Affect computation of nn_xx /// was 50 before
+    nn_threshold = 50 # TODO: @Ari: HOW TO SET THIS VALUE? > IBI_SERIES VALUES around 0.88 ish. Affect computation of nn_xx /// was 50 before
     
     # Prepare data
     instantaneous_hr = 60 / (ibi_series / 1000) # TODO: @Ari: why divided by 1000? from ms to s?

From e707d1d6a07c46dbc5de58d02b62935a31762408 Mon Sep 17 00:00:00 2001
From: Florian Hermes <florian.hermes@student.hpi.de>
Date: Tue, 10 May 2022 16:46:35 +0000
Subject: [PATCH 25/38] Adapt ibi_series computation

---
 pypg/features.py | 39 ++++++++++++++++++---------------------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/pypg/features.py b/pypg/features.py
index ba961ca..097a3ff 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -43,7 +43,8 @@
 import nolds
 from scipy import signal, stats, interpolate
 
-from .cycles import find_with_template
+from .cycles import find_with_template, find_with_SNR
+from .plots import marks_plot
 
 
 def time(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
@@ -917,37 +918,33 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
         ppg = pd.Series(ppg)
     elif not isinstance(ppg, pd.core.series.Series):
         raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.')
-
-    cycles = find_with_template(ppg, sampling_frequency, factor=0.6667, verbose=verbose)
+    
+    cycles = find_with_SNR(ppg, sampling_frequency, factor=0.6667, verbose=verbose)
+    if len(cycles) == 0:
+        return pd.DataFrame()
+    
     # all signal peaks
     all_peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0]
+
     if verbose:
         print('All Peaks: ', all_peaks)
-    if len(cycles) == 0:
-        return pd.DataFrame()
 
-    cur_index = 0
-    time_features = pd.DataFrame()
-    for i, cycle in enumerate(cycles):
-        time_features = pd.concat([time_features, time_cycle(cycle, sampling_frequency,
-                                    factor, unit, verbose=verbose)],
-                                    ignore_index=True)
+    sys_peak_ts_curr = 0
+    ibi_series = pd.Series()
+    for i, peak in enumerate(all_peaks):
+        sys_peak_ts_prior = sys_peak_ts_curr
+        sys_peak_ts_curr = ppg.index[peak]
+
         if i > 0:
-            time_features.loc[cur_index-1, 'CP'] = (
-                time_features.loc[cur_index, 'sys_peak_ts'] - time_features.loc[cur_index-1, 'sys_peak_ts']).total_seconds()
-        cur_index = cur_index + 1
-    # last cycle or only cycle need to relies on the difference between the general peaks
-    all_peaks_index = len(all_peaks)-1
-    time_features.loc[cur_index-1, 'CP'] = (
-        all_peaks[all_peaks_index] - all_peaks[all_peaks_index-1])/sampling_frequency
+            ibi_series = ibi_series.append(pd.Series([sys_peak_ts_curr - sys_peak_ts_prior]), ignore_index=True)
     
-    if len(time_features['CP']) < 5: # TODO: Check for minimum size (Ultra short HRV)
+    if len(ibi_series) < 5: # TODO: Check for minimum size (Ultra short HRV)
         if verbose:
             print('PPG Segment too small! Please provide bigger PPG segment')
         return pd.DataFrame()
 
-    temporalHRVFeatures = _temporal_hrv(time_features['CP'])
-    frequencyHRVFeatures = _frequency_hrv(time_features['CP'], sampling_frequency)
+    temporalHRVFeatures = _temporal_hrv(ibi_series)
+    frequencyHRVFeatures = _frequency_hrv(ibi_series, sampling_frequency)
 
     segment_features = pd.concat([temporalHRVFeatures.reset_index(drop=True), frequencyHRVFeatures], axis=1)
  

From f9f5eb0f6f773e8d60cb4a2c6d65db2f3a32003f Mon Sep 17 00:00:00 2001
From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com>
Date: Tue, 7 Jun 2022 11:07:34 +0000
Subject: [PATCH 26/38] add sw features

---
 pypg/features.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pypg/features.py b/pypg/features.py
index 097a3ff..aff4795 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -192,6 +192,7 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False):
         if verbose:
             plt.scatter([x_1, x_2], ppg[[x_1, x_2]])
         cycle_features.loc[0, 'DW_'+str(p_value)] = (x_2 - sys_peak_ts).total_seconds()
+        cycle_features.loc[0, 'SW_'+str(p_value)] = (sys_peak_ts - x_1).total_seconds()
         cycle_features.loc[0, 'DW_SW_sum_'+str(p_value)] = (x_2 - x_1).total_seconds()
         cycle_features.loc[0, 'DW_SW_ratio_'+str(p_value)] = (x_2 - sys_peak_ts) / (sys_peak_ts - x_1)
     

From 3fb109dcbd0728c14d331454add6d7b2c2e3e878 Mon Sep 17 00:00:00 2001
From: Florian Hermes <florian.hermes@student.hpi.de>
Date: Tue, 7 Jun 2022 12:09:21 +0000
Subject: [PATCH 27/38] Change cutOff-Frequency

---
 pypg/features.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pypg/features.py b/pypg/features.py
index aff4795..25d52d1 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -792,7 +792,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
     return cycle_features
 
 
-def frequency(ppg, sampling_frequency, transformMethod='welch', cutoff_freq=125.0, interval_size=5, verbose=False):
+def frequency(ppg, sampling_frequency, transformMethod='welch', cutoff_freq=12.5, interval_size=5, verbose=False):
     """
     Extracts frequency features from PPG cycles in a give PPG segment. Returns a pandas.DataFrame in
     which each line contains the features for a given valid cycle (as described

From 3688746a5c2e5037531142a22668cc33440cc7c9 Mon Sep 17 00:00:00 2001
From: Florian Hermes <florian.hermes@student.hpi.de>
Date: Thu, 9 Jun 2022 11:46:40 +0000
Subject: [PATCH 28/38] adapt computation of time features

---
 pypg/features.py | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/pypg/features.py b/pypg/features.py
index 25d52d1..2faf2e8 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -46,7 +46,7 @@
 from .cycles import find_with_template, find_with_SNR
 from .plots import marks_plot
 
-
+# TODO: unit could be removed
 def time(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
     """
     Extracts time domain features from PPG cycles in a give PPG segment as
@@ -102,9 +102,10 @@ def time(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
         if i > 0:
             segment_features.loc[cur_index-1, 'CP'] = (
                 segment_features.loc[cur_index, 'sys_peak_ts'] - segment_features.loc[cur_index-1,
-                'sys_peak_ts']).total_seconds()
+                'sys_peak_ts'])/sampling_frequency
         cur_index = cur_index + 1
     
+    # TODO: Remove CP because some cycles are skipped which leads to wrong CP values
     # last cycle or only cycle need to relies on the difference between the general peaks
     all_peaks_index = len(all_peaks)-1
     segment_features.loc[cur_index-1, 'CP'] = (
@@ -122,6 +123,7 @@ def time(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
         print(segment_features)
     return segment_features
 
+# TODO: unit could be removed
 def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False):
     """
     Extracts time domain features for a PPG cycle. Returns a pandas.Series with
@@ -158,31 +160,30 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False):
     elif not isinstance(ppg, pd.core.series.Series):
         raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.')
 
-    if not isinstance(ppg.index, pd.DatetimeIndex):
-        ppg.index = pd.to_datetime(ppg.index, unit=unit) # TODO: @Ari: Sampling Frequncy considered???
+    if isinstance(ppg.index, pd.RangeIndex):
+        ppg.index = list(ppg.index)
 
-    ppg = ppg.interpolate(method='time')
+    #ppg = ppg.interpolate(method='time')
     ppg = ppg - ppg.min() # TODO: @Ari: If we do it for each cycle seperately, features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values)
     max_amplitude = ppg.max()
 
     peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0]
     if len(peaks) == 0:
         return pd.DataFrame()
-    sys_peak_ts = ppg.index[peaks[0]] # TODO: @Ari: ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks e.g. check peak height dictionary?)
+    sys_peak_ts = ppg.index[peaks[0]]
 
     if verbose:
         plt.figure()
-        plt.xlim((ppg.index.min(), ppg.index.max()))
-        plt.scatter(ppg.index[peaks], ppg[peaks])
+        plt.scatter(ppg.index[peaks[0]], ppg.values[peaks[0]])
         plt.plot(ppg.index, ppg.values)
-
+        
     # features
     cycle_features = pd.DataFrame({
                         'start_ts': ppg.index.min(),
                         'sys_peak_ts': sys_peak_ts,
-                        'SUT': (sys_peak_ts - ppg.index.min()).total_seconds(),
-                        'DT': (ppg.index.max() - sys_peak_ts).total_seconds(),
-                        'CT': (ppg.index.max() - ppg.index.min()).total_seconds(),
+                        'SUT': (sys_peak_ts - ppg.index.min())/sampling_frequency,
+                        'DT': (ppg.index.max() - sys_peak_ts)/sampling_frequency,
+                        'CT': (ppg.index.max() - ppg.index.min())/sampling_frequency,
                         'SUT_VAL': (ppg.values[peaks[0]])
                         }, index=[0])
 
@@ -190,10 +191,10 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False):
         p_ampl = p_value / 100 * max_amplitude
         x_1, x_2 = _find_xs_for_y(ppg, p_ampl, peaks[0])
         if verbose:
-            plt.scatter([x_1, x_2], ppg[[x_1, x_2]])
-        cycle_features.loc[0, 'DW_'+str(p_value)] = (x_2 - sys_peak_ts).total_seconds()
-        cycle_features.loc[0, 'SW_'+str(p_value)] = (sys_peak_ts - x_1).total_seconds()
-        cycle_features.loc[0, 'DW_SW_sum_'+str(p_value)] = (x_2 - x_1).total_seconds()
+            plt.scatter([x_1, x_2], [ppg.where(ppg.index==x_1).dropna(), ppg.where(ppg.index==x_2).dropna()])
+        cycle_features.loc[0, 'DW_'+str(p_value)] = (x_2 - sys_peak_ts)/sampling_frequency
+        cycle_features.loc[0, 'SW_'+str(p_value)] = (sys_peak_ts - x_1)/sampling_frequency
+        cycle_features.loc[0, 'DW_SW_sum_'+str(p_value)] = (x_2 - x_1)/sampling_frequency
         cycle_features.loc[0, 'DW_SW_ratio_'+str(p_value)] = (x_2 - sys_peak_ts) / (sys_peak_ts - x_1)
     
     if verbose:

From 752dce5d82f63783762854a21eb98513958c5d58 Mon Sep 17 00:00:00 2001
From: Florian Hermes <florian.hermes@student.hpi.de>
Date: Thu, 9 Jun 2022 12:07:38 +0000
Subject: [PATCH 29/38] adapt nonlinear: fs for time computation

---
 pypg/features.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/pypg/features.py b/pypg/features.py
index 2faf2e8..21b95d2 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -203,6 +203,7 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False):
     return cycle_features
 
 
+# TODO: unit could be removed
 def nonlinear(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
     """
     Extracts non-linear features from PPG cycles in a give PPG segment. Returns a pandas.DataFrame in
@@ -267,6 +268,7 @@ def nonlinear(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
 
     return segment_features
 
+# TODO: unit could be removed
 def nonlinear_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False):
     """
     Extracts nonlinear features for a PPG cycle. Returns a pandas.Series.
@@ -302,23 +304,22 @@ def nonlinear_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=Fa
     elif not isinstance(ppg, pd.core.series.Series):
         raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.')
 
-    if not isinstance(ppg.index, pd.DatetimeIndex):
-        ppg.index = pd.to_datetime(ppg.index, unit=unit) # TODO: @Ari: Sampling Frequncy considered???
+    if isinstance(ppg.index, pd.RangeIndex):
+        ppg.index = list(ppg.index)
 
-    ppg = ppg.interpolate(method='time')
+    #ppg = ppg.interpolate(method='time')
     ppg = ppg - ppg.min() # TODO: @Ari: If we do it for each cycle seperately, features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values)
-    ppg_cycle_duration = (ppg.index.max() - ppg.index.min()).total_seconds()
+    ppg_cycle_duration = (ppg.index.max() - ppg.index.min())/sampling_frequency
 
     peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0]
     if len(peaks) == 0:
         return pd.DataFrame()
-    sys_peak_ts = ppg.index[peaks[0]] # TODO: @Ari: ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks e.g. check peak height dictionary?)
-    sys_peak = (sys_peak_ts - ppg.index.min()).total_seconds()
+    sys_peak_ts = ppg.index[peaks[0]]
+    sys_peak = (sys_peak_ts - ppg.index.min())/sampling_frequency
 
     if verbose:
         plt.figure()
-        plt.xlim((ppg.index.min(), ppg.index.max()))
-        plt.scatter(ppg.index[peaks], ppg[peaks])
+        plt.scatter(ppg.index[peaks[0]], ppg.values[peaks[0]])
         plt.plot(ppg.index, ppg.values)
 
     # features

From 4f8e85009172f85d1b44756a8125c58037dff11d Mon Sep 17 00:00:00 2001
From: Florian Hermes <florian.hermes@student.hpi.de>
Date: Thu, 9 Jun 2022 12:19:41 +0000
Subject: [PATCH 30/38] adapt statistical: fs for time computation

---
 pypg/features.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/pypg/features.py b/pypg/features.py
index 21b95d2..1640e98 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -336,6 +336,7 @@ def nonlinear_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=Fa
     return cycle_features
 
 
+# TODO: unit could be removed
 def statistical(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
     """
     Extracts statistical features from PPG cycles in a give PPG segment. Returns a pandas.DataFrame in
@@ -400,6 +401,7 @@ def statistical(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False
 
     return segment_features
 
+# TODO: unit could be removed
 def statistical_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False):
     """
     Extracts statistical features for a PPG cycle. Returns a pandas.Series.
@@ -435,10 +437,10 @@ def statistical_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=
     elif not isinstance(ppg, pd.core.series.Series):
         raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.')
 
-    if not isinstance(ppg.index, pd.DatetimeIndex):
-        ppg.index = pd.to_datetime(ppg.index, unit=unit) # TODO: @Ari: Sampling Frequncy considered???
-
-    ppg = ppg.interpolate(method='time')
+    if isinstance(ppg.index, pd.RangeIndex):
+        ppg.index = list(ppg.index)
+    
+    #ppg = ppg.interpolate(method='time')
     ppg = ppg - ppg.min() # TODO: @Ari: If we do it for each cycle seperately, features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values)
 
     peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0]
@@ -447,8 +449,7 @@ def statistical_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=
 
     if verbose:
         plt.figure()
-        plt.xlim((ppg.index.min(), ppg.index.max()))
-        plt.scatter(ppg.index[peaks], ppg[peaks])
+        plt.scatter(ppg.index[peaks[0]], ppg.values[peaks[0]])
         plt.plot(ppg.index, ppg.values)
 
     # features

From 55c4d7e7c558c079d9bafe799089ebf74d6e3dfd Mon Sep 17 00:00:00 2001
From: Florian Hermes <florian.hermes@student.hpi.de>
Date: Thu, 9 Jun 2022 12:40:49 +0000
Subject: [PATCH 31/38] adapt statistical: fs for time computation

---
 pypg/features.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/pypg/features.py b/pypg/features.py
index 1640e98..dee9678 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -466,6 +466,7 @@ def statistical_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=
     return cycle_features
 
 
+# TODO: unit could be removed
 def sdppg(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
     """
     Extracts features from the second derivative (SDPPG or APG) of the PPG cycles in a give PPG segment. Returns a pandas.DataFrame in
@@ -530,6 +531,7 @@ def sdppg(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
 
     return segment_features
 
+# TODO: unit could be removed
 def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False):
     """
     Extracts features from the second derivative (SDPPG or APG) for the PPG cycles. Returns a pandas.Series.
@@ -565,22 +567,21 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
     elif not isinstance(ppg, pd.core.series.Series):
         raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.')
 
-    if not isinstance(ppg.index, pd.DatetimeIndex):
-        ppg.index = pd.to_datetime(ppg.index, unit=unit) # TODO: @Ari: Sampling Frequncy considered???
+    if isinstance(ppg.index, pd.RangeIndex):
+        ppg.index = list(ppg.index)
 
-    ppg = ppg.interpolate(method='time')
+    #ppg = ppg.interpolate(method='time')
     ppg = ppg - ppg.min() # TODO: @Ari: If we do it for each cycle seperately, features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values)
 
     peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0]
     if len(peaks) == 0:
         return pd.DataFrame()
-    sys_peak_ts = ppg.index[peaks[0]] # TODO: @Ari: ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks e.g. check peak height dictionary?)
-    sys_peak = (sys_peak_ts - ppg.index.min()).total_seconds()
+    sys_peak_ts = ppg.index[peaks[0]]
+    sys_peak = (sys_peak_ts - ppg.index.min())
 
     if verbose:
         plt.figure()
-        plt.xlim((ppg.index.min(), ppg.index.max()))
-        plt.scatter(ppg.index[peaks], ppg[peaks])
+        plt.scatter(ppg.index[peaks[0]], ppg.values[peaks[0]])
         plt.plot(ppg.index, ppg.values)
 
     # second derviative of the PPG signal

From a80498c714e3cf6f13a442ea40ee9281ed7dfe2a Mon Sep 17 00:00:00 2001
From: Florian Hermes <florian.hermes@student.hpi.de>
Date: Thu, 9 Jun 2022 12:50:06 +0000
Subject: [PATCH 32/38] adapt hrv: fs for time computation

---
 pypg/features.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pypg/features.py b/pypg/features.py
index dee9678..b5f4171 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -941,12 +941,15 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
         sys_peak_ts_curr = ppg.index[peak]
 
         if i > 0:
-            ibi_series = ibi_series.append(pd.Series([sys_peak_ts_curr - sys_peak_ts_prior]), ignore_index=True)
+            ibi_series = ibi_series.append(pd.Series([(sys_peak_ts_curr - sys_peak_ts_prior)/sampling_frequency]), ignore_index=True)
     
     if len(ibi_series) < 5: # TODO: Check for minimum size (Ultra short HRV)
         if verbose:
             print('PPG Segment too small! Please provide bigger PPG segment')
         return pd.DataFrame()
+    else:
+        if verbose:
+            print('IBI-Series: ', ibi_series.values)
 
     temporalHRVFeatures = _temporal_hrv(ibi_series)
     frequencyHRVFeatures = _frequency_hrv(ibi_series, sampling_frequency)

From 6a73fb26c7e68ff9c6985442a02d942a941feecf Mon Sep 17 00:00:00 2001
From: Florian Hermes <florian.hermes@student.hpi.de>
Date: Thu, 9 Jun 2022 12:50:37 +0000
Subject: [PATCH 33/38] Add TODO

---
 pypg/features.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pypg/features.py b/pypg/features.py
index b5f4171..19f511d 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -888,6 +888,7 @@ def frequency(ppg, sampling_frequency, transformMethod='welch', cutoff_freq=12.5
     return segment_features
 
 
+# TODO: unit could be removed
 def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
     """
     Extracts hrv features from a given PPG segment. 

From 7d38d7f196973c6a39a63ee099121cc7fd7cdb06 Mon Sep 17 00:00:00 2001
From: Florian Hermes <florian.hermes@student.hpi.de>
Date: Thu, 9 Jun 2022 22:20:35 +0000
Subject: [PATCH 34/38] Change interval size for frfrequency blocks

---
 pypg/features.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pypg/features.py b/pypg/features.py
index 19f511d..0dec31f 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -796,7 +796,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False)
     return cycle_features
 
 
-def frequency(ppg, sampling_frequency, transformMethod='welch', cutoff_freq=12.5, interval_size=5, verbose=False):
+def frequency(ppg, sampling_frequency, transformMethod='welch', cutoff_freq=12.5, interval_size=0.5, verbose=False):
     """
     Extracts frequency features from PPG cycles in a give PPG segment. Returns a pandas.DataFrame in
     which each line contains the features for a given valid cycle (as described

From 321baf547b16f0bf52ba71563e5943942f8d634a Mon Sep 17 00:00:00 2001
From: Florian Hermes <florian.hermes@student.hpi.de>
Date: Sat, 11 Jun 2022 11:02:37 +0000
Subject: [PATCH 35/38] Add SampEn check

---
 pypg/features.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pypg/features.py b/pypg/features.py
index 0dec31f..7104a53 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -1023,10 +1023,14 @@ def _temporal_hrv(ibi_series):
 	
 	# Non-Linear Parameters
     tolerance = ibi_series.std() * 0.2
+
+    SampEn = float(nolds.sampen(ibi_series.to_numpy(), 2, tolerance))
+    if SampEn == np.inf:
+        SampEn = np.nan
 	
     # features
     temporalHRVFeatures = pd.DataFrame({
-                            'SampEn': float(nolds.sampen(ibi_series.to_numpy(), 2, tolerance)),
+                            'SampEn': SampEn,
                             'MeanNN': ibi_series.mean(),
                             'MeanHR': instantaneous_hr.mean(),
                             'MaxHR': rolling_mean_hr.max(),

From f4ec044f4572f802243db5baeb26b97e247e0eef Mon Sep 17 00:00:00 2001
From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com>
Date: Sat, 18 Jun 2022 13:31:28 +0000
Subject: [PATCH 36/38] add CP mean and var; remove CP from time features

---
 pypg/features.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/pypg/features.py b/pypg/features.py
index 7104a53..c4ab034 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -99,7 +99,7 @@ def time(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
         segment_features = pd.concat([segment_features, time_cycle(cycle,
                                          sampling_frequency, factor, unit, verbose=verbose)],
                                          ignore_index=True)
-        if i > 0:
+        '''if i > 0:
             segment_features.loc[cur_index-1, 'CP'] = (
                 segment_features.loc[cur_index, 'sys_peak_ts'] - segment_features.loc[cur_index-1,
                 'sys_peak_ts'])/sampling_frequency
@@ -109,7 +109,7 @@ def time(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
     # last cycle or only cycle need to relies on the difference between the general peaks
     all_peaks_index = len(all_peaks)-1
     segment_features.loc[cur_index-1, 'CP'] = (
-        all_peaks[all_peaks_index] - all_peaks[all_peaks_index-1])/sampling_frequency
+        all_peaks[all_peaks_index] - all_peaks[all_peaks_index-1])/sampling_frequency'''
 
     if verbose:
         print('Cycle Features within Segment:')
@@ -936,27 +936,28 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
         print('All Peaks: ', all_peaks)
 
     sys_peak_ts_curr = 0
-    ibi_series = pd.Series()
+    CP = pd.Series()
     for i, peak in enumerate(all_peaks):
         sys_peak_ts_prior = sys_peak_ts_curr
         sys_peak_ts_curr = ppg.index[peak]
 
         if i > 0:
-            ibi_series = ibi_series.append(pd.Series([(sys_peak_ts_curr - sys_peak_ts_prior)/sampling_frequency]), ignore_index=True)
+            CP = CP.append(pd.Series([(sys_peak_ts_curr - sys_peak_ts_prior)/sampling_frequency]), ignore_index=True)
     
-    if len(ibi_series) < 5: # TODO: Check for minimum size (Ultra short HRV)
+    if len(CP) < 5: # TODO: Check for minimum size (Ultra short HRV)
         if verbose:
             print('PPG Segment too small! Please provide bigger PPG segment')
         return pd.DataFrame()
     else:
         if verbose:
-            print('IBI-Series: ', ibi_series.values)
+            print('IBI-Series: ', CP.values)
 
-    temporalHRVFeatures = _temporal_hrv(ibi_series)
-    frequencyHRVFeatures = _frequency_hrv(ibi_series, sampling_frequency)
+    temporalHRVFeatures = _temporal_hrv(CP)
+    frequencyHRVFeatures = _frequency_hrv(CP, sampling_frequency)
 
     segment_features = pd.concat([temporalHRVFeatures.reset_index(drop=True), frequencyHRVFeatures], axis=1)
- 
+    segment_features = pd.concat([segment_features, pd.DataFrame({'CP_mean': CP.mean(), 'CP_var': CP.var()})])
+
     if verbose:
         print('Cycle Features within Segment:')
         print(segment_features)

From 514163088a94eb3a5718e41a4fd12f742ea12ce6 Mon Sep 17 00:00:00 2001
From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com>
Date: Sat, 18 Jun 2022 17:34:45 +0000
Subject: [PATCH 37/38] fix concatinating error

---
 pypg/features.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pypg/features.py b/pypg/features.py
index c4ab034..7b31766 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -956,7 +956,7 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False):
     frequencyHRVFeatures = _frequency_hrv(CP, sampling_frequency)
 
     segment_features = pd.concat([temporalHRVFeatures.reset_index(drop=True), frequencyHRVFeatures], axis=1)
-    segment_features = pd.concat([segment_features, pd.DataFrame({'CP_mean': CP.mean(), 'CP_var': CP.var()})])
+    segment_features = pd.concat([segment_features.reset_index(drop=True), pd.DataFrame({'CP_mean': CP.mean(), 'CP_var': CP.var()}, index=[0])], axis=1)
 
     if verbose:
         print('Cycle Features within Segment:')

From 16eb9093ba163fd93dfc0157eff0b4ce9368e88e Mon Sep 17 00:00:00 2001
From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com>
Date: Sat, 18 Jun 2022 17:50:42 +0000
Subject: [PATCH 38/38] Adapt computation of HR

---
 pypg/features.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pypg/features.py b/pypg/features.py
index 7b31766..90e7118 100644
--- a/pypg/features.py
+++ b/pypg/features.py
@@ -977,7 +977,7 @@ def _temporal_hrv(ibi_series):
     nn_threshold = 50 # TODO: @Ari: HOW TO SET THIS VALUE? > IBI_SERIES VALUES around 0.88 ish. Affect computation of nn_xx /// was 50 before
     
     # Prepare data
-    instantaneous_hr = 60 / (ibi_series / 1000) # TODO: @Ari: why divided by 1000? from ms to s?
+    instantaneous_hr = 60 / ibi_series #/1000  # TODO: @Ari: why divided by 1000? from ms to s? > if ibi series in ms but for MIMIC it is in s
     rolling_mean_hr = instantaneous_hr.rolling(window).mean()
     rolling_24h = ibi_series.rolling(window)