From 41dbfb39a436c0e8ee3c3234fc9971d25f1d7ebb Mon Sep 17 00:00:00 2001 From: Florian Hermes Date: Mon, 14 Feb 2022 13:04:31 +0000 Subject: [PATCH 01/38] Add nonlinear feature extractions --- pypg/features.py | 129 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) diff --git a/pypg/features.py b/pypg/features.py index 47597d3..6d833a0 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -181,6 +181,135 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False): plt.show() return cycle_features + +def nonlinear(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): + """ + Extracts non-linear features from PPG cycles in a give PPG segment. Returns a pandas.DataFrame in + which each line contains the features for a given valid cycle (as described + by Li et al.) from the PPG segment given as input. + + Parameters + ---------- + ppg : pandas.Series, ndarray + The PPG signal. + sampling_frequency : int + The sampling frequency of the signal in Hz. + factor: float, optional + Number that is used to calculate the distance in relation to the + sampling_frequency, by default 0.667 (or 66.7%). The factor is based + on the paper by Elgendi et al. (2013). + unit : str, optional + The unit of the index, by default 'ms'. + verbose : bool, optional + If True, plots the features with and without outliers, by default False. + + Raises + ---------- + Exception + When PPG values are neither pandas.Series nor ndarray. + + Returns + ------- + segment_features : pd.DataFrame + A dataframe with the non-linear features in seconds for each valid + cycle in the PPG segment. + """ + if isinstance(ppg, np.ndarray): + ppg = pd.Series(ppg) + elif not isinstance(ppg, pd.core.series.Series): + raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.') + + cycles = find_with_template(ppg, sampling_frequency, factor=0.6667, verbose=verbose) + # all signal peaks + all_peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0] + if verbose: + print('All Peaks: ', all_peaks) + if len(cycles) == 0: + return pd.DataFrame() + + cur_index = 0 + segment_features = pd.DataFrame() + for i, cycle in enumerate(cycles): + segment_features = segment_features.append(nonlinear_cycle(cycle, + sampling_frequency, factor, unit, verbose=verbose), + ignore_index=True) + + if verbose: + print('Cycle Features within Segment:') + print(segment_features) + + # remove outliers + segment_features = _clean_segment_features_of_outliers(segment_features) + + if verbose: + print('Cycle Features within Segment and no Outliers:') + return segment_features + + +def nonlinear_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False): + """ + Extracts nonlinear features for a PPG cycle. Returns a pandas.Series. + + Parameters + ---------- + ppg : pandas.Series, ndarray + The PPG signal. + sampling_frequency : int + The sampling frequency of the signal in Hz. + factor: float, optional + Number that is used to calculate the distance in relation to the + sampling_frequency, by default 0.667 (or 66.7%). The factor is based + on the paper by Elgendi et al. (2013). + unit : str, optional + The unit of the index, by default 'ms'. + verbose : bool, optional + If True, plots the features with and without outliers, by default False. + + Raises + ---------- + Exception + When PPG values are neither pandas.Series nor ndarray. + + Returns + ------- + cycle_features : pd.DataFrame + A dataframe with the nonlinear features in seconds for the PPG cycle. + """ + + if isinstance(ppg, np.ndarray): + ppg = pd.Series(ppg) + elif not isinstance(ppg, pd.core.series.Series): + raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.') + + if not isinstance(ppg.index, pd.DatetimeIndex): + ppg.index = pd.to_datetime(ppg.index, unit=unit) + + ppg = ppg.interpolate(method='time') + ppg = ppg - ppg.min() + ppg_cycle_duration = len(ppg) + + peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0] + systolic_peak_index = peaks[0] + + if verbose: + plt.figure() + plt.xlim((ppg.index.min(), ppg.index.max())) + plt.scatter(ppg.index[peaks], ppg[peaks]) + plt.plot(ppg.index, ppg.values) + + # features + cycle_features = pd.DataFrame({ + 'ratio_WL_cycle_mean': (ppg_cycle_duration / np.mean(ppg)), + 'ratio_SUT_WL_DT': ((systolic_peak_index / ppg_cycle_duration) / (ppg_cycle_duration - systolic_peak_index)), + 'ratio_SUT_DT': (systolic_peak_index / (ppg_cycle_duration - systolic_peak_index)), + 'ratio_cycle_mean_cycle_var': (np.mean(ppg) / np.var(ppg)) + }, index=[0]) + + if verbose: + plt.show() + return cycle_features + + # takes a dataframe of calculated features and removes the outliers occurring due # to inaccuracies in the signal def _clean_segment_features_of_outliers(segment_df, treshold=0.8): From 24a1889e4179d468d77bfe1ae7a1780671f1d197 Mon Sep 17 00:00:00 2001 From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com> Date: Mon, 14 Feb 2022 16:53:52 +0000 Subject: [PATCH 02/38] Add statistical features extraction --- pypg/features.py | 128 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 127 insertions(+), 1 deletion(-) diff --git a/pypg/features.py b/pypg/features.py index 6d833a0..7058d7c 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -31,7 +31,7 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd -from scipy import signal +from scipy import signal, stats from .cycles import find_with_template @@ -310,6 +310,132 @@ def nonlinear_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=Fa return cycle_features +def statistical(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): + """ + Extracts statistical features from PPG cycles in a give PPG segment. Returns a pandas.DataFrame in + which each line contains the features for a given valid cycle (as described + by Li et al.) from the PPG segment given as input. + + Parameters + ---------- + ppg : pandas.Series, ndarray + The PPG signal. + sampling_frequency : int + The sampling frequency of the signal in Hz. + factor: float, optional + Number that is used to calculate the distance in relation to the + sampling_frequency, by default 0.667 (or 66.7%). The factor is based + on the paper by Elgendi et al. (2013). + unit : str, optional + The unit of the index, by default 'ms'. + verbose : bool, optional + If True, plots the features with and without outliers, by default False. + + Raises + ---------- + Exception + When PPG values are neither pandas.Series nor ndarray. + + Returns + ------- + segment_features : pd.DataFrame + A dataframe with the statistical features in seconds for each valid + cycle in the PPG segment. + """ + if isinstance(ppg, np.ndarray): + ppg = pd.Series(ppg) + elif not isinstance(ppg, pd.core.series.Series): + raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.') + + cycles = find_with_template(ppg, sampling_frequency, factor=0.6667, verbose=verbose) + # all signal peaks + all_peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0] + if verbose: + print('All Peaks: ', all_peaks) + if len(cycles) == 0: + return pd.DataFrame() + + cur_index = 0 + segment_features = pd.DataFrame() + for i, cycle in enumerate(cycles): + segment_features = segment_features.append(statistical_cycle(cycle, + sampling_frequency, factor, unit, verbose=verbose), + ignore_index=True) + + if verbose: + print('Cycle Features within Segment:') + print(segment_features) + + # remove outliers + segment_features = _clean_segment_features_of_outliers(segment_features) + + if verbose: + print('Cycle Features within Segment and no Outliers:') + return segment_features + + +def statistical_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False): + """ + Extracts statistical features for a PPG cycle. Returns a pandas.Series. + + Parameters + ---------- + ppg : pandas.Series, ndarray + The PPG signal. + sampling_frequency : int + The sampling frequency of the signal in Hz. + factor: float, optional + Number that is used to calculate the distance in relation to the + sampling_frequency, by default 0.667 (or 66.7%). The factor is based + on the paper by Elgendi et al. (2013). + unit : str, optional + The unit of the index, by default 'ms'. + verbose : bool, optional + If True, plots the features with and without outliers, by default False. + + Raises + ---------- + Exception + When PPG values are neither pandas.Series nor ndarray. + + Returns + ------- + cycle_features : pd.DataFrame + A dataframe with the statistical features in seconds for the PPG cycle. + """ + + if isinstance(ppg, np.ndarray): + ppg = pd.Series(ppg) + elif not isinstance(ppg, pd.core.series.Series): + raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.') + + if not isinstance(ppg.index, pd.DatetimeIndex): + ppg.index = pd.to_datetime(ppg.index, unit=unit) + + ppg = ppg.interpolate(method='time') + ppg = ppg - ppg.min() + + peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0] + + if verbose: + plt.figure() + plt.xlim((ppg.index.min(), ppg.index.max())) + plt.scatter(ppg.index[peaks], ppg[peaks]) + plt.plot(ppg.index, ppg.values) + + # features + cycle_features = pd.DataFrame({ + 'ppg_mean': (np.mean(ppg)), + 'ppg_var': (np.var(ppg)), + 'ppg_skewness': (stats.skew(ppg)), + 'ppg_kurtosis': (stats.kurtosis(ppg)) + }, index=[0]) + + if verbose: + plt.show() + return cycle_features + + # takes a dataframe of calculated features and removes the outliers occurring due # to inaccuracies in the signal def _clean_segment_features_of_outliers(segment_df, treshold=0.8): From cb2eb310c4ba7d102a75487bdf58f0f2000dd34e Mon Sep 17 00:00:00 2001 From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com> Date: Mon, 14 Feb 2022 18:27:44 +0000 Subject: [PATCH 03/38] Add sdppg features extraction --- pypg/features.py | 312 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 312 insertions(+) diff --git a/pypg/features.py b/pypg/features.py index 7058d7c..d44583c 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -436,6 +436,318 @@ def statistical_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose= return cycle_features +def sdppg(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): + """ + Extracts features from the second derivative (APG) of the PPG cycles in a give PPG segment. Returns a pandas.DataFrame in + which each line contains the features for a given valid cycle (as described + by Li et al.) from the PPG segment given as input. + + Parameters + ---------- + ppg : pandas.Series, ndarray + The PPG signal. + sampling_frequency : int + The sampling frequency of the signal in Hz. + factor: float, optional + Number that is used to calculate the distance in relation to the + sampling_frequency, by default 0.667 (or 66.7%). The factor is based + on the paper by Elgendi et al. (2013). + unit : str, optional + The unit of the index, by default 'ms'. + verbose : bool, optional + If True, plots the features with and without outliers, by default False. + + Raises + ---------- + Exception + When PPG values are neither pandas.Series nor ndarray. + + Returns + ------- + segment_features : pd.DataFrame + A dataframe with the features from the second derivative (APG) of the PPG cycles in seconds for each valid + cycle in the PPG segment. + """ + if isinstance(ppg, np.ndarray): + ppg = pd.Series(ppg) + elif not isinstance(ppg, pd.core.series.Series): + raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.') + + cycles = find_with_template(ppg, sampling_frequency, factor=0.6667, verbose=verbose) + # all signal peaks + all_peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0] + if verbose: + print('All Peaks: ', all_peaks) + if len(cycles) == 0: + return pd.DataFrame() + + cur_index = 0 + segment_features = pd.DataFrame() + for i, cycle in enumerate(cycles): + segment_features = segment_features.append(sdppg_cycle(cycle, + sampling_frequency, factor, unit, verbose=verbose), + ignore_index=True) + + if verbose: + print('Cycle Features within Segment:') + print(segment_features) + + # remove outliers + segment_features = _clean_segment_features_of_outliers(segment_features) + + if verbose: + print('Cycle Features within Segment and no Outliers:') + return segment_features + + +def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False): + """ + Extracts features from the second derivative (APG) for the PPG cycles. Returns a pandas.Series. + + Parameters + ---------- + ppg : pandas.Series, ndarray + The PPG signal. + sampling_frequency : int + The sampling frequency of the signal in Hz. + factor: float, optional + Number that is used to calculate the distance in relation to the + sampling_frequency, by default 0.667 (or 66.7%). The factor is based + on the paper by Elgendi et al. (2013). + unit : str, optional + The unit of the index, by default 'ms'. + verbose : bool, optional + If True, plots the features with and without outliers, by default False. + + Raises + ---------- + Exception + When PPG values are neither pandas.Series nor ndarray. + + Returns + ------- + cycle_features : pd.DataFrame + A dataframe with the features from the second derivative (APG) in seconds for the PPG cycle. + """ + + if isinstance(ppg, np.ndarray): + ppg = pd.Series(ppg) + elif not isinstance(ppg, pd.core.series.Series): + raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.') + + if not isinstance(ppg.index, pd.DatetimeIndex): + ppg.index = pd.to_datetime(ppg.index, unit=unit) + + ppg = ppg.interpolate(method='time') + ppg = ppg - ppg.min() + + peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0] + systolic_peak_index = peaks[0] + + if verbose: + plt.figure() + plt.xlim((ppg.index.min(), ppg.index.max())) + plt.scatter(ppg.index[peaks], ppg[peaks]) + plt.plot(ppg.index, ppg.values) + + # second derviative of the PPG signal + sdppg_signal = np.gradient(np.gradient(ppg)) + + # features + # https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0076585 + # https://www.researchgate.net/figure/Important-points-on-PPG-VPG-and-AVG-The-first-top-signal-is-an-epoch-of-PPG-The_fig4_339604879 + # a is global maximum; + # b is the global minimum; (maybe get systolic_peak_index and get local minimum between a_index and systolic_peak_index) + # e and c are the local maximum; + # d is the local minimum in APG between e and c; + # D is local minimum in APG after e + # + # and Gaurav Paper + + # TODO: Can be improved! + # TODO: maybe add systolic peak index as reference for indices detection + + sdppg_signal_peaks, peak_dict = signal.find_peaks(sdppg_signal, height=0) + sdppg_signal_valleys, valley_dict = signal.find_peaks(-sdppg_signal, height=0) + + # a wave + if len(sdppg_signal_peaks) != 0: + + a_index = (sdppg_signal_peaks[np.argmax(peak_dict['peak_heights'])]) + + ## subset signal + dictionary to only have left over indeces + magnitudes + old_len = len(sdppg_signal_peaks) + sdppg_signal_peaks = sdppg_signal_peaks[sdppg_signal_peaks > a_index[0]] + peak_dict['peak_heights'] = peak_dict['peak_heights'][old_len - len(sdppg_signal_peaks):] + + old_len = len(sdppg_signal_valleys) + sdppg_signal_valleys = sdppg_signal_valleys[sdppg_signal_valleys > a_index[0]] + valley_dict['peak_heights'] = valley_dict['peak_heights'][old_len - len(sdppg_signal_valleys):] + + # b wave + if len(sdppg_signal_valleys) != 0: + + b_index = (sdppg_signal_valleys[np.argmax(valley_dict['peak_heights'])]) + + ## subset signal + dictionary to only have left over indeces + magnitudes + old_len = len(sdppg_signal_peaks) + sdppg_signal_peaks = sdppg_signal_peaks[sdppg_signal_peaks > b_index[0]] + peak_dict['peak_heights'] = peak_dict['peak_heights'][old_len - len(sdppg_signal_peaks):] + + old_len = len(sdppg_signal_valleys) + sdppg_signal_valleys = sdppg_signal_valleys[sdppg_signal_valleys > b_index[0]] + valley_dict['peak_heights'] = valley_dict['peak_heights'][old_len - len(sdppg_signal_valleys):] + + # c and e wave + if len(sdppg_signal_peaks) >= 2: + + peaks = peak_dict['peak_heights'].argsort()[-2:] + + if len(peaks) == 2: + if peaks[0] < peaks[1]: + c_index = (sdppg_signal_peaks[peaks[0]]) + e_index = (sdppg_signal_peaks[peaks[1]]) + else: + e_index = (sdppg_signal_peaks[peaks[0]]) + c_index = (sdppg_signal_peaks[peaks[1]]) + + ## subset signal + dictionary to only have left over indeces + magnitudes + old_len = len(sdppg_signal_valleys) + sdppg_signal_valleys = sdppg_signal_valleys[sdppg_signal_valleys > c_index[0]] + valley_dict['peak_heights'] = valley_dict['peak_heights'][old_len - len(sdppg_signal_valleys):] + + # d wave + if len(sdppg_signal_valleys) >= 1 and (c_index or e_index): + + ## get only valleys between c and e + old_len = len(sdppg_signal_valleys) + sdppg_signal_valleys = sdppg_signal_valleys[sdppg_signal_valleys < e_index[0]] + + if old_len > len(sdppg_signal_valleys): + valley_dict['peak_heights'] = valley_dict['peak_heights'][:-(old_len - len(sdppg_signal_valleys))] + + if len(valley_dict['peak_heights']) != 0: + d_index = (sdppg_signal_valleys[valley_dict['peak_heights'].argmax()]) + + ## SDPPG Signal values + ### a + if a_index: + a_val = (sdppg_signal[a_index]) + + ### b + if b_index: + b_val = (sdppg_signal[b_index]) + + ### c + if c_index: + c_val = (sdppg_signal[c_index]) + + ### d + if d_index: + d_val = (sdppg_signal[d_index]) + + ### e + if e_index: + e_val = (sdppg_signal[e_index]) + + + if verbose: + plt.figure(figsize=(17,6)) + plt.plot(sdppg_signal, label = 'SDPPG Signal') + plt.scatter(np.array([a_index, b_index, c_index, d_index, e_index]), np.array([a_val, b_val, c_val, d_val, e_val]), c='g', label='Representative Points') + plt.title('SDPPG Signal Wave Detection') + plt.xlabel('Time') + plt.ylabel('SDPPG') + plt.legend() + + + ## Time values of characteristics + ### Location of c with respect to time (AD) + if c_index: + AD = (c_index / sampling_frequency) + + ### Location of d with respect to time (AE) + if d_index: + AE = (d_index / sampling_frequency) + + ### Difference of peak and dicrotic notch with respect to time (CD) + if c_index and systolic_peak_index is not None: + CD = ((c_index - systolic_peak_index) / sampling_frequency) + + ### Difference of dicrotic notch and end of signal with respect to time (DF) + if c_index: + DF = ((len(sdppg_signal) - c_index) / sampling_frequency) + + + ## PPG Signal values + ### Dicrotic notch (D') + if c_index: + D = (ppg[c_index]) + + ### Diastolic point (E') + if d_index: + E = (ppg[d_index]) + + + ## Ratios + ### First valley to the peak value of APG signal (b/a) + if b_val and a_val: + ratio_b_a = (b_val / a_val) + + ### Dicrotic notch to the peak value of APG signal (c/a) + if c_val and a_val: + ratio_c_a = (c_val / a_val) + + ### Diastolic point to the peak value of APG signal (d/a) + if d_val and a_val: + ratio_d_a = (d_val / a_val) + + ### e to the peak value of APG signal (e/a) + if e_val and a_val: + ratio_e_a = (e_val / a_val) + + ### Location of dicrotic notch with respect to the length of window (AD/AF) + if c_index: + ratio_AD_AF = (c_index / len(sdppg_signal)) + + ### Difference of location of peak and dicrotic notch with respect to the length of window (CD/AF) + if c_index and systolic_peak_index is not None: + ratio_CD_AF = ((c_index - systolic_peak_index) / len(sdppg_signal)) + + ### Location of diastolic point on PPG signal with respect to the length of window (AE/AF) + if d_index: + ratio_AE_AF = (d_index / len(sdppg_signal)) + + ### Difference of dichrotic notch and end with respect ot length of window (DF/AF) + if c_index: + ratio_DF_AF = ((len(sdppg_signal) - c_index) / len(sdppg_signal)) + + cycle_features = pd.DataFrame({ + 'a_val': a_val, + 'b_val': b_val, + 'c_val': c_val, + 'd_val': d_val, + 'e_val': e_val, + 'AD': AD, + 'AE': AE, + 'CD': CD, + 'DF': DF, + 'D': D, + 'E': E, + 'ratio_b_a': ratio_b_a, + 'ratio_c_a': ratio_c_a, + 'ratio_d_a': ratio_d_a, + 'ratio_e_a': ratio_e_a, + 'ratio_AD_AF': ratio_AD_AF, + 'ratio_CD_AF': ratio_CD_AF, + 'ratio_AE_AF': ratio_AE_AF, + 'ratio_DF_AF': ratio_DF_AF, + }, index=[0]) + + if verbose: + plt.show() + return cycle_features + # takes a dataframe of calculated features and removes the outliers occurring due # to inaccuracies in the signal def _clean_segment_features_of_outliers(segment_df, treshold=0.8): From 3bf1f2835d8cf7b7463b3ad156e533fc2d224a14 Mon Sep 17 00:00:00 2001 From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com> Date: Mon, 14 Feb 2022 20:27:02 +0000 Subject: [PATCH 04/38] Add frequency features extraction --- pypg/features.py | 118 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 117 insertions(+), 1 deletion(-) diff --git a/pypg/features.py b/pypg/features.py index d44583c..36cb22e 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -31,7 +31,7 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd -from scipy import signal, stats +from scipy import signal, stats, interpolate from .cycles import find_with_template @@ -748,6 +748,95 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) plt.show() return cycle_features + +def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq, interval_size, verbose=False): + """ + Extracts frequency features from PPG cycles in a give PPG segment. Returns a pandas.DataFrame in + which each line contains the features for a given valid cycle (as described + by Li et al.) from the PPG segment given as input. + + Parameters + ---------- + ppg : pandas.Series, ndarray + The PPG signal. + sampling_frequency : int + The sampling frequency of the signal in Hz. + verbose : bool, optional + If True, plots the features, by default False. + + Raises + ---------- + Exception + When PPG values are neither pandas.Series nor ndarray. + + Returns + ------- + segment_features : pd.DataFrame + A dataframe with the frequency features in seconds for each valid + cycle in the PPG segment. + """ + if isinstance(ppg, np.ndarray): + ppg = pd.Series(ppg) + elif not isinstance(ppg, pd.core.series.Series): + raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.') + + # Transform signal from time to frequency domain + freq, mag = _transformSigfromTimetoFreq(ppg, sampling_frequency, transformMethod, verbose) + + # features + segment_features = pd.DataFrame() + + # Cut off frequencies + freq_cut = freq[freq <= cutoff_freq] + mag_cut = mag[freq <= cutoff_freq] + + if verbose: + plt.semilogy(freq_cut, mag_cut) + plt.title('CutOff') + plt.xlabel('Frequency') + plt.ylabel('mag') + plt.show() + + ## -> Paper Wang + for start_index in np.arange(0, cutoff_freq, interval_size): + end_index = start_index + interval_size + segment_features.append({ + 'freqInt' + str(start_index) + '_' + str(end_index): + np.nanmean(mag_cut[(freq_cut >= start_index) & (freq_cut < end_index)])}, + ignore_index=True) + + ## -> Paper Slapnicar + sorted_mag = mag_cut[np.argsort(mag_cut)[::-1]] + if len(sorted_mag) >= 3: + top3_psd = sorted_mag[:3] + else: + top3_psd = sorted_mag[:len(sorted_mag)] + + for i in np.arange(0, len(top3_psd)): + segment_features.append({ + 'mag_top' + str(i+1): + top3_psd[i]}, + ignore_index=True) + + sorted_freq = freq_cut[np.argsort(mag_cut)[::-1]] + if len(sorted_freq) >= 3: + top3_freq = sorted_freq[:3] + else: + top3_freq = sorted_freq[:len(sorted_freq)] + + for i in np.arange(0, len(top3_freq)): + segment_features.append({ + 'freq_top' + str(i+1): + top3_freq[i]}, + ignore_index=True) + + if verbose: + print('Cycle Features within Segment:') + print(segment_features) + + return segment_features + + # takes a dataframe of calculated features and removes the outliers occurring due # to inaccuracies in the signal def _clean_segment_features_of_outliers(segment_df, treshold=0.8): @@ -763,3 +852,30 @@ def _find_xs_for_y(y_s, y_val, sys_peak): x_1 = diffs[:sys_peak].idxmin() x_2 = diffs[sys_peak:].idxmin() return x_1, x_2 + +# transforms a given temporal signal into the spectral domain +# using different methods and allowing interpolation if required. +# returns the frequencies and their magnitudes. +def _transformSigfromTimetoFreq(signal, fs, transformMethod, fft_interpolation=None, verbose=False): + + ## TODO: Can maybe be improved?! + + if fft_interpolation: + x = np.cumsum(signal) + f_interpol = interpolate.interp1d(x, signal, kind = "cubic", fill_value="extrapolate") + t_interpol = np.arange(x[0], x[-1], fft_interpolation/fs) + + signal = f_interpol(t_interpol) + fs = fft_interpolation + + if transformMethod == 'welch': + freq, mag = signal.welch(signal, fs, window='hamming', scaling='density', detrend='linear') + + if verbose: + plt.semilogy(freq, mag) + plt.title('Spectral Analysis using Welch Method') + plt.xlabel('Frequency') + plt.ylabel('PSD') + plt.show() + + return freq, mag \ No newline at end of file From 6b1edf68ce9d0d4a93b2641b96aad327901cfc88 Mon Sep 17 00:00:00 2001 From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com> Date: Tue, 15 Feb 2022 00:05:35 +0000 Subject: [PATCH 05/38] Add HRV features extraction --- pypg/features.py | 281 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 264 insertions(+), 17 deletions(-) diff --git a/pypg/features.py b/pypg/features.py index 36cb22e..e37a19b 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -28,9 +28,11 @@ https://doi.org/10.1088/0967-3334/33/9/1491 """ +from operator import index import matplotlib.pyplot as plt import numpy as np import pandas as pd +import nolds from scipy import signal, stats, interpolate from .cycles import find_with_template @@ -837,6 +839,87 @@ def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq, interval_si return segment_features +def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): + """ + Extracts hrv features from PPG cycles in a give PPG segment. + Returns a pandas.DataFrame in which each line contains the features + for a given valid cycle (as described by Li et al.) from the PPG segment given as input. + + Parameters + ---------- + ppg : pandas.Series, ndarray + The PPG signal. + sampling_frequency : int + The sampling frequency of the signal in Hz. + factor: float, optional + Number that is used to calculate the distance in relation to the + sampling_frequency, by default 0.667 (or 66.7%). The factor is based + on the paper by Elgendi et al. (2013). + unit : str, optional + The unit of the index, by default 'ms'. + verbose : bool, optional + If True, plots the features with and without outliers, by default False. + + Raises + ---------- + Exception + When PPG values are neither pandas.Series nor ndarray. + + Returns + ------- + segment_features : pd.DataFrame + A dataframe with the hrv-domain features in seconds for each valid + cycle in the PPG segment. + """ + if isinstance(ppg, np.ndarray): + ppg = pd.Series(ppg) + elif not isinstance(ppg, pd.core.series.Series): + raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.') + + cycles = find_with_template(ppg, sampling_frequency, factor=0.6667, verbose=verbose) + # all signal peaks + all_peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0] + if verbose: + print('All Peaks: ', all_peaks) + if len(cycles) == 0: + return pd.DataFrame() + + segment_features = pd.DataFrame() + + cur_index = 0 + CP = pd.DataFrame() + for i, cycle in enumerate(cycles): + CP = CP.append(time_cycle(cycle, sampling_frequency, + factor, unit, verbose=verbose), + ignore_index=True) + if i > 0: + CP.loc[cur_index-1, 'CP'] = ( + CP.loc[cur_index, 'sys_peak_ts'] - CP.loc[cur_index-1, + 'sys_peak_ts']).total_seconds() + cur_index = cur_index + 1 + # last cycle or only cycle need to relies on the difference between the general peaks + all_peaks_index = len(all_peaks)-1 + CP.loc[cur_index-1, 'CP'] = ( + all_peaks[all_peaks_index] - all_peaks[all_peaks_index-1])/sampling_frequency + + temporalHRVFeatures = _getTemporalHRVFeatures(CP['CP']) + frequencyHRVFeatures = _getFreqencyHRVFeatures(CP['CP'], sampling_frequency) + + segment_features.append(temporalHRVFeatures) + segment_features.append(frequencyHRVFeatures) + + if verbose: + print('Cycle Features within Segment:') + print(segment_features) + + # remove outliers + segment_features = _clean_segment_features_of_outliers(segment_features) + + if verbose: + print('Cycle Features within Segment and no Outliers:') + return segment_features + + # takes a dataframe of calculated features and removes the outliers occurring due # to inaccuracies in the signal def _clean_segment_features_of_outliers(segment_df, treshold=0.8): @@ -858,24 +941,188 @@ def _find_xs_for_y(y_s, y_val, sys_peak): # returns the frequencies and their magnitudes. def _transformSigfromTimetoFreq(signal, fs, transformMethod, fft_interpolation=None, verbose=False): - ## TODO: Can maybe be improved?! + ## TODO: Can maybe be improved?! + if fft_interpolation: + x = np.cumsum(signal) + f_interpol = interpolate.interp1d(x, signal, kind = "cubic", fill_value="extrapolate") + t_interpol = np.arange(x[0], x[-1], fft_interpolation/fs) + + signal = f_interpol(t_interpol) + fs = fft_interpolation + + if transformMethod == 'welch': + freq, mag = signal.welch(signal, fs, window='hamming', scaling='density', detrend='linear') + + if verbose: + plt.semilogy(freq, mag) + plt.title('Spectral Analysis using Welch Method') + plt.xlabel('Frequency') + plt.ylabel('PSD') + plt.show() + + return freq, mag - if fft_interpolation: - x = np.cumsum(signal) - f_interpol = interpolate.interp1d(x, signal, kind = "cubic", fill_value="extrapolate") - t_interpol = np.arange(x[0], x[-1], fft_interpolation/fs) - - signal = f_interpol(t_interpol) - fs = fft_interpolation - if transformMethod == 'welch': - freq, mag = signal.welch(signal, fs, window='hamming', scaling='density', detrend='linear') +def _getTemporalHRVFeatures(signal): + + if isinstance(signal, np.ndarray): + ibi_series = pd.Series(signal) + elif not isinstance(signal, pd.core.series.Series): + raise Exception('Signal values not accepted, enter a pandas.Series or ndarray.') + + window = 5 + nn_threshold = 50 + + # Prepare data + instantaneous_hr = 60 / (ibi_series) + rolling_mean_hr = instantaneous_hr.rolling(window).mean() + rolling_24h = ibi_series.rolling(5) + + # Precalculate data for standard indexes + nn_diff = np.diff(ibi_series) + nn_xx = np.sum(np.abs(nn_diff) > nn_threshold) + + # Precalculate data for geometrical indeces + bin_size = 7.8125 + hist_middle = (ibi_series.min() + ibi_series.max()) / 2 + hist_bound_lower = hist_middle - bin_size * np.ceil((hist_middle - ibi_series.min()) / bin_size) + hist_length = int(np.ceil((ibi_series.max() - hist_bound_lower) / bin_size) + 1) + hist_bins = hist_bound_lower + np.arange(hist_length) * bin_size + hist, _ = np.histogram(ibi_series, hist_bins) + hist_height = np.max(hist) + + # Calculate TINN measure + hist_max_index = np.argmax(hist) + min_n = 0 + min_m = len(hist) - 1 + min_error = np.finfo(np.float64).max + # Ignore bins that do not contain any intervals + nonzero_indices = np.nonzero(hist) + for n in range(hist_max_index): + for m in reversed(range(hist_max_index + 1, len(hist))): + # Create triangular interpolation function for n and m + tri_interp = interpolate.interp1d( + [n, hist_max_index, m], + [0, hist[hist_max_index], 0], + bounds_error=False, + fill_value=0 + ) + # Square difference of histogram and triangle + error = np.trapz( + [(hist[t] - tri_interp(t)) ** 2 for t in nonzero_indices], + [hist_bins[t] for t in nonzero_indices] + ) + if min_error > error: + min_n = n + min_m = m + min_error = error + n = hist_bins[min_n] + m = hist_bins[min_m] + + # Non-Linear Parameters + tolerance = ibi_series.std() * 0.2 + + # features + temporalHRVFeatures = pd.DataFrame({ + 'SampEn': float(nolds.sampen(ibi_series.to_numpy(), 2, tolerance)), + 'MeanNN': ibi_series.mean(), + 'MeanHR': instantaneous_hr.mean(), + 'MaxHR': rolling_mean_hr.max(), + 'MinHR': rolling_mean_hr.min(), + 'STDHR': instantaneous_hr.std(), + 'SDNN': np.std(ibi_series), + 'SDNNindex': rolling_24h.std().mean(), + 'SDANN': rolling_24h.mean().std(), + 'RMSSD': np.sqrt(np.mean(nn_diff ** 2)), + f'NN{nn_threshold}': nn_xx, + f'pNN{nn_threshold}': nn_xx / len(ibi_series) * 100, + 'HRVTriangularIndex': len(ibi_series) / hist_height, + 'TINN': m - n}, + index=[0]) + + return temporalHRVFeatures - if verbose: - plt.semilogy(freq, mag) - plt.title('Spectral Analysis using Welch Method') - plt.xlabel('Frequency') - plt.ylabel('PSD') - plt.show() - return freq, mag \ No newline at end of file +def _getFreqencyHRVFeatures(signal, sampling_frequency): + + if isinstance(signal, np.ndarray): + ibi_series = pd.Series(signal) + elif not isinstance(signal, pd.core.series.Series): + raise Exception('Signal values not accepted, enter a pandas.Series or ndarray.') + + ## TODO: HYPERPARAMETERS to config dict + fft_interpolation = 4.0 + use_ulf = False + lomb_smoothing = 0.02 + + vlf_limit = 0.04 + lf_limit = 0.15 + hf_limit = 0.4 + + if not use_ulf or ibi_series.sum() < 300000: #TODO: check + # Do not use ULF band on sample shorter than 5 minutes + ulf_limit = 0 + else: + ulf_limit = ulf_limit #TODO: ?????? + + # TODO: export transformMethod + transformMethod='welch' + freq, mag = _transformSigfromTimetoFreq(ibi_series, sampling_frequency, transformMethod, fft_interpolation) + + abs_index = freq <= hf_limit + ulf_index = freq <= ulf_limit + vlf_index = (freq >= ulf_limit) & (freq <= vlf_limit) + lf_index = (freq >= vlf_limit) & (freq <= lf_limit) + hf_index = (freq >= lf_limit) & (freq <= hf_limit) + + # Get power for each band by integrating over spectral density + abs_power = np.trapz(mag[abs_index], freq[abs_index]) + ulf = np.trapz(mag[ulf_index], freq[ulf_index]) + vlf = np.trapz(mag[vlf_index], freq[vlf_index]) + lf = np.trapz(mag[lf_index], freq[lf_index]) + hf = np.trapz(mag[hf_index], freq[hf_index]) + + # Normalized power for LF and HF band + lf_nu = lf / (abs_power - vlf - ulf) * 100 + hf_nu = hf / (abs_power - vlf - ulf) * 100 + + # Relative power of each band + ulf_perc = (ulf / abs_power) * 100 + vlf_perc = (vlf / abs_power) * 100 + lf_perc = (lf / abs_power) * 100 + hf_perc = (hf / abs_power) * 100 + + # Frequency with highest power + vlf_peak = freq[vlf_index][np.argmax(mag[vlf_index])] + lf_peak = freq[lf_index][np.argmax(mag[lf_index])] + hf_peak = freq[hf_index][np.argmax(mag[hf_index])] + + freqencyHRVFeatures = pd.DataFrame({ + 'VLF_peak': vlf_peak, + 'VLF_power': vlf, + 'VLF_power_log': np.log(vlf), + 'VLF_power_rel': vlf_perc, + 'LF_peak': lf_peak, + 'LF_power': lf, + 'LF_power_log': np.log(lf), + 'LF_power_rel': lf_perc, + 'LF_power_norm': lf_nu, + 'HF_peak': hf_peak, + 'HF_power': hf, + 'HF_power_log': np.log(hf), + 'HF_power_rel': hf_perc, + 'HF_power_norm': hf_nu, + 'ratio_LF_HF': lf/hf}, + index=[0]) + + # Add ULF parameters, if band available + if use_ulf and np.sum(ulf_index) > 0: + ulf_peak = freq[np.argmax(mag[ulf_index])] + freqencyHRVFeatures.append({ + 'ULF_peak': ulf_peak, + 'ULF_power': ulf, + 'ULF_power_log': np.log(ulf), + 'ULF_power_rel': ulf_perc}, + index=[0]) + + return freqencyHRVFeatures From ea3fb03c09d98092cb6d12923f8d75563dc3769d Mon Sep 17 00:00:00 2001 From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com> Date: Wed, 16 Feb 2022 12:04:11 +0000 Subject: [PATCH 06/38] Change names of methods; Clean-up --- pypg/features.py | 94 ++++++++++++++++++++++++------------------------ 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/pypg/features.py b/pypg/features.py index e37a19b..2e11ccf 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -902,8 +902,8 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): CP.loc[cur_index-1, 'CP'] = ( all_peaks[all_peaks_index] - all_peaks[all_peaks_index-1])/sampling_frequency - temporalHRVFeatures = _getTemporalHRVFeatures(CP['CP']) - frequencyHRVFeatures = _getFreqencyHRVFeatures(CP['CP'], sampling_frequency) + temporalHRVFeatures = temporal_hrv(CP['CP']) + frequencyHRVFeatures = frequency_hrv(CP['CP'], sampling_frequency) segment_features.append(temporalHRVFeatures) segment_features.append(frequencyHRVFeatures) @@ -920,50 +920,7 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): return segment_features -# takes a dataframe of calculated features and removes the outliers occurring due -# to inaccuracies in the signal -def _clean_segment_features_of_outliers(segment_df, treshold=0.8): - quant = segment_df.quantile(treshold) - for col in segment_df.columns: - if col.find('ts') == -1: - segment_df = segment_df[segment_df[col] < quant[col]*2] - return segment_df - -# returns the x values for those samples in the signal, that are closest to some given y value -def _find_xs_for_y(y_s, y_val, sys_peak): - diffs = abs(y_s - y_val) - x_1 = diffs[:sys_peak].idxmin() - x_2 = diffs[sys_peak:].idxmin() - return x_1, x_2 - -# transforms a given temporal signal into the spectral domain -# using different methods and allowing interpolation if required. -# returns the frequencies and their magnitudes. -def _transformSigfromTimetoFreq(signal, fs, transformMethod, fft_interpolation=None, verbose=False): - - ## TODO: Can maybe be improved?! - if fft_interpolation: - x = np.cumsum(signal) - f_interpol = interpolate.interp1d(x, signal, kind = "cubic", fill_value="extrapolate") - t_interpol = np.arange(x[0], x[-1], fft_interpolation/fs) - - signal = f_interpol(t_interpol) - fs = fft_interpolation - - if transformMethod == 'welch': - freq, mag = signal.welch(signal, fs, window='hamming', scaling='density', detrend='linear') - - if verbose: - plt.semilogy(freq, mag) - plt.title('Spectral Analysis using Welch Method') - plt.xlabel('Frequency') - plt.ylabel('PSD') - plt.show() - - return freq, mag - - -def _getTemporalHRVFeatures(signal): +def temporal_hrv(signal): if isinstance(signal, np.ndarray): ibi_series = pd.Series(signal) @@ -1043,7 +1000,7 @@ def _getTemporalHRVFeatures(signal): return temporalHRVFeatures -def _getFreqencyHRVFeatures(signal, sampling_frequency): +def frequency_hrv(signal, sampling_frequency): if isinstance(signal, np.ndarray): ibi_series = pd.Series(signal) @@ -1126,3 +1083,46 @@ def _getFreqencyHRVFeatures(signal, sampling_frequency): index=[0]) return freqencyHRVFeatures + + +# takes a dataframe of calculated features and removes the outliers occurring due +# to inaccuracies in the signal +def _clean_segment_features_of_outliers(segment_df, treshold=0.8): + quant = segment_df.quantile(treshold) + for col in segment_df.columns: + if col.find('ts') == -1: + segment_df = segment_df[segment_df[col] < quant[col]*2] + return segment_df + +# returns the x values for those samples in the signal, that are closest to some given y value +def _find_xs_for_y(y_s, y_val, sys_peak): + diffs = abs(y_s - y_val) + x_1 = diffs[:sys_peak].idxmin() + x_2 = diffs[sys_peak:].idxmin() + return x_1, x_2 + +# transforms a given temporal signal into the spectral domain +# using different methods and allowing interpolation if required. +# returns the frequencies and their magnitudes. +def _transformSigfromTimetoFreq(signal, fs, transformMethod, fft_interpolation=None, verbose=False): + + ## TODO: Can maybe be improved?! + if fft_interpolation: + x = np.cumsum(signal) + f_interpol = interpolate.interp1d(x, signal, kind = "cubic", fill_value="extrapolate") + t_interpol = np.arange(x[0], x[-1], fft_interpolation/fs) + + signal = f_interpol(t_interpol) + fs = fft_interpolation + + if transformMethod == 'welch': + freq, mag = signal.welch(signal, fs, window='hamming', scaling='density', detrend='linear') + + if verbose: + plt.semilogy(freq, mag) + plt.title('Spectral Analysis using Welch Method') + plt.xlabel('Frequency') + plt.ylabel('PSD') + plt.show() + + return freq, mag \ No newline at end of file From 0bf014e786eb8be66512d925c100017009e46bee Mon Sep 17 00:00:00 2001 From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com> Date: Wed, 16 Feb 2022 14:17:09 +0000 Subject: [PATCH 07/38] Adapt gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 9e92fcc..5b335d9 100644 --- a/.gitignore +++ b/.gitignore @@ -89,6 +89,8 @@ share/python-wheels/ .installed.cfg *.egg MANIFEST +Pipfile +*.lock # PyInstaller # Usually these files are written by a python script from a template From e0367c4b078a650363403231711428e91a9bc7d5 Mon Sep 17 00:00:00 2001 From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com> Date: Mon, 4 Apr 2022 15:58:54 +0000 Subject: [PATCH 08/38] review time features; add comments --- pypg/features.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/pypg/features.py b/pypg/features.py index 2e11ccf..3453794 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -37,6 +37,7 @@ from .cycles import find_with_template + def time(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): """ Extracts time domain features from PPG cycles in a give PPG segment as @@ -94,6 +95,7 @@ def time(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): segment_features.loc[cur_index, 'sys_peak_ts'] - segment_features.loc[cur_index-1, 'sys_peak_ts']).total_seconds() cur_index = cur_index + 1 + # last cycle or only cycle need to relies on the difference between the general peaks all_peaks_index = len(all_peaks)-1 segment_features.loc[cur_index-1, 'CP'] = ( @@ -148,14 +150,14 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False): raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.') if not isinstance(ppg.index, pd.DatetimeIndex): - ppg.index = pd.to_datetime(ppg.index, unit=unit) + ppg.index = pd.to_datetime(ppg.index, unit=unit) # ??? Sampling Frequncy considered ??? ppg = ppg.interpolate(method='time') - ppg = ppg - ppg.min() + ppg = ppg - ppg.min() ## ??? SHOULDNT IT BE MEAN VALUE: ppg - ppg.mean() ??? max_amplitude = ppg.max() peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0] - sys_peak_ts = ppg.index[peaks[0]] + sys_peak_ts = ppg.index[peaks[0]] # ??? ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks) ??? if verbose: plt.figure() @@ -168,8 +170,12 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False): 'start_ts': ppg.index.min(), 'sys_peak_ts': sys_peak_ts, 'SUT': (sys_peak_ts - ppg.index.min()).total_seconds(), - 'DT': (ppg.index.max() - sys_peak_ts).total_seconds() + 'DT': (ppg.index.max() - sys_peak_ts).total_seconds(), + 'BPM': (60 / (ppg.index.max() - ppg.index.min()).total_seconds()), # ??? CHECK KURYLAK DEF ??? + 'CT': (ppg.index.max() - ppg.index.min()).total_seconds(), + 'SUT_VAL': (ppg.values[sys_peak_ts]) }, index=[0]) + for p_value in [10, 25, 33, 50, 66, 75]: p_ampl = p_value / 100 * max_amplitude x_1, x_2 = _find_xs_for_y(ppg, p_ampl, peaks[0]) @@ -181,6 +187,7 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False): x_2 - sys_peak_ts) / (sys_peak_ts - x_1) if verbose: plt.show() + return cycle_features From 73e67831376377ce73c11491a80b7e695838e0aa Mon Sep 17 00:00:00 2001 From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com> Date: Mon, 4 Apr 2022 16:15:36 +0000 Subject: [PATCH 09/38] review nonlinear features; add comments --- pypg/features.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/pypg/features.py b/pypg/features.py index 3453794..f46f98d 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -7,8 +7,10 @@ Features ---------- -time - Extract features from the time domain for a PPG segment. -time_cycle - Extract features from the time domain for a PPG cycle. +time - Extract features from the time domain for a PPG segment. +time_cycle - Extract features from the time domain for a PPG cycle. +nonlinear - Extract features from the time domain and compute non-linear relations for a PPG segment. +nonlinear_cycle - Extract features from the time domain and compute non-linear relations for a PPG cycle. References ---------- @@ -220,7 +222,7 @@ def nonlinear(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): Returns ------- segment_features : pd.DataFrame - A dataframe with the non-linear features in seconds for each valid + A dataframe with the non-linear features for each valid cycle in the PPG segment. """ if isinstance(ppg, np.ndarray): @@ -252,8 +254,9 @@ def nonlinear(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): if verbose: print('Cycle Features within Segment and no Outliers:') - return segment_features + print(segment_features) + return segment_features def nonlinear_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False): """ @@ -291,14 +294,14 @@ def nonlinear_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=Fa raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.') if not isinstance(ppg.index, pd.DatetimeIndex): - ppg.index = pd.to_datetime(ppg.index, unit=unit) + ppg.index = pd.to_datetime(ppg.index, unit=unit) # ??? Sampling Frequncy considered ??? ppg = ppg.interpolate(method='time') - ppg = ppg - ppg.min() - ppg_cycle_duration = len(ppg) + ppg = ppg - ppg.min() ## ??? SHOULDNT IT BE MEAN VALUE: ppg - ppg.mean() ??? + ppg_cycle_duration = (ppg.index.max() - ppg.index.min()).total_seconds() peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0] - systolic_peak_index = peaks[0] + sys_peak_ts = ppg.index[peaks[0]] # ??? ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks) ??? if verbose: plt.figure() @@ -308,14 +311,15 @@ def nonlinear_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=Fa # features cycle_features = pd.DataFrame({ - 'ratio_WL_cycle_mean': (ppg_cycle_duration / np.mean(ppg)), - 'ratio_SUT_WL_DT': ((systolic_peak_index / ppg_cycle_duration) / (ppg_cycle_duration - systolic_peak_index)), - 'ratio_SUT_DT': (systolic_peak_index / (ppg_cycle_duration - systolic_peak_index)), - 'ratio_cycle_mean_cycle_var': (np.mean(ppg) / np.var(ppg)) + 'ratio_WL_cycle_mean': (ppg_cycle_duration / np.mean(ppg.values)), # ?? WHERE DO DEFINITIONS COME FROM ?? + 'ratio_SUT_WL_DT': ((sys_peak_ts / ppg_cycle_duration) / (ppg_cycle_duration - sys_peak_ts)), + 'ratio_SUT_DT': (sys_peak_ts / (ppg_cycle_duration - sys_peak_ts)), + 'ratio_cycle_mean_cycle_var': (np.mean(ppg.values) / np.var(ppg.values)) }, index=[0]) if verbose: plt.show() + return cycle_features From 228408a87c90dee0e20fe036a6983f2c11d47df6 Mon Sep 17 00:00:00 2001 From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com> Date: Mon, 4 Apr 2022 16:56:04 +0000 Subject: [PATCH 10/38] review sdppg features; add comments --- pypg/features.py | 80 ++++++++++++++++++++++++++---------------------- 1 file changed, 44 insertions(+), 36 deletions(-) diff --git a/pypg/features.py b/pypg/features.py index f46f98d..390d84a 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -7,10 +7,14 @@ Features ---------- -time - Extract features from the time domain for a PPG segment. -time_cycle - Extract features from the time domain for a PPG cycle. -nonlinear - Extract features from the time domain and compute non-linear relations for a PPG segment. -nonlinear_cycle - Extract features from the time domain and compute non-linear relations for a PPG cycle. +time - Extract features from the time domain for a PPG segment. +time_cycle - Extract features from the time domain for a PPG cycle. +nonlinear - Extract features from the time domain computing non-linear relations for a PPG segment. +nonlinear_cycle - Extract features from the time domain computing non-linear relations for a PPG cycle. +statistical - Extract features from the time domain computing statistical values for a PPG segment. +statistical_cyle - Extract features from the time domain computing statistical values for a PPG cycle. +sdppg - Extract features from the time domain's second derivative (SDPPG or APG) for a PPG segment. +sdppg_cycle - Extract features from the time domain's second derivative (SDPPG or APG) for a PPG cycle. References ---------- @@ -155,7 +159,7 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False): ppg.index = pd.to_datetime(ppg.index, unit=unit) # ??? Sampling Frequncy considered ??? ppg = ppg.interpolate(method='time') - ppg = ppg - ppg.min() ## ??? SHOULDNT IT BE MEAN VALUE: ppg - ppg.mean() ??? + ppg = ppg - ppg.min() ## ??? SHOULDNT IT BE MEAN VALUE: ppg - ppg.mean() ??? if we do it for each cycle individually features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values) max_amplitude = ppg.max() peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0] @@ -285,7 +289,7 @@ def nonlinear_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=Fa Returns ------- cycle_features : pd.DataFrame - A dataframe with the nonlinear features in seconds for the PPG cycle. + A dataframe with the nonlinear features for the PPG cycle. """ if isinstance(ppg, np.ndarray): @@ -297,7 +301,7 @@ def nonlinear_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=Fa ppg.index = pd.to_datetime(ppg.index, unit=unit) # ??? Sampling Frequncy considered ??? ppg = ppg.interpolate(method='time') - ppg = ppg - ppg.min() ## ??? SHOULDNT IT BE MEAN VALUE: ppg - ppg.mean() ??? + ppg = ppg - ppg.min() ## ??? SHOULDNT IT BE MEAN VALUE: ppg - ppg.mean() ??? if we do it for each cycle individually features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values) ppg_cycle_duration = (ppg.index.max() - ppg.index.min()).total_seconds() peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0] @@ -352,7 +356,7 @@ def statistical(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False Returns ------- segment_features : pd.DataFrame - A dataframe with the statistical features in seconds for each valid + A dataframe with the statistical features for each valid cycle in the PPG segment. """ if isinstance(ppg, np.ndarray): @@ -384,8 +388,9 @@ def statistical(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False if verbose: print('Cycle Features within Segment and no Outliers:') - return segment_features + print(segment_features) + return segment_features def statistical_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False): """ @@ -414,7 +419,7 @@ def statistical_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose= Returns ------- cycle_features : pd.DataFrame - A dataframe with the statistical features in seconds for the PPG cycle. + A dataframe with the statistical features for the PPG cycle. """ if isinstance(ppg, np.ndarray): @@ -423,10 +428,10 @@ def statistical_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose= raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.') if not isinstance(ppg.index, pd.DatetimeIndex): - ppg.index = pd.to_datetime(ppg.index, unit=unit) + ppg.index = pd.to_datetime(ppg.index, unit=unit) # ??? Sampling Frequncy considered ??? ppg = ppg.interpolate(method='time') - ppg = ppg - ppg.min() + ppg = ppg - ppg.min() ## ??? SHOULDNT IT BE MEAN VALUE: ppg - ppg.mean() ??? if we do it for each cycle individually features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values) peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0] @@ -438,20 +443,21 @@ def statistical_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose= # features cycle_features = pd.DataFrame({ - 'ppg_mean': (np.mean(ppg)), - 'ppg_var': (np.var(ppg)), - 'ppg_skewness': (stats.skew(ppg)), - 'ppg_kurtosis': (stats.kurtosis(ppg)) + 'ppg_mean': (np.mean(ppg.values)), + 'ppg_var': (np.var(ppg.values)), + 'ppg_skewness': (stats.skew(ppg.values)), + 'ppg_kurtosis': (stats.kurtosis(ppg.values)) }, index=[0]) if verbose: plt.show() + return cycle_features def sdppg(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): """ - Extracts features from the second derivative (APG) of the PPG cycles in a give PPG segment. Returns a pandas.DataFrame in + Extracts features from the second derivative (SDPPG or APG) of the PPG cycles in a give PPG segment. Returns a pandas.DataFrame in which each line contains the features for a given valid cycle (as described by Li et al.) from the PPG segment given as input. @@ -478,7 +484,7 @@ def sdppg(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): Returns ------- segment_features : pd.DataFrame - A dataframe with the features from the second derivative (APG) of the PPG cycles in seconds for each valid + A dataframe with the features from the second derivative (SDPPG or APG) of the PPG cycles in seconds for each valid cycle in the PPG segment. """ if isinstance(ppg, np.ndarray): @@ -510,12 +516,13 @@ def sdppg(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): if verbose: print('Cycle Features within Segment and no Outliers:') - return segment_features + print(segment_features) + return segment_features def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False): """ - Extracts features from the second derivative (APG) for the PPG cycles. Returns a pandas.Series. + Extracts features from the second derivative (SDPPG or APG) for the PPG cycles. Returns a pandas.Series. Parameters ---------- @@ -540,7 +547,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) Returns ------- cycle_features : pd.DataFrame - A dataframe with the features from the second derivative (APG) in seconds for the PPG cycle. + A dataframe with the features from the second derivative (SDPPG or APG) in seconds for the PPG cycle. """ if isinstance(ppg, np.ndarray): @@ -549,13 +556,13 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.') if not isinstance(ppg.index, pd.DatetimeIndex): - ppg.index = pd.to_datetime(ppg.index, unit=unit) + ppg.index = pd.to_datetime(ppg.index, unit=unit) # ??? Sampling Frequncy considered ??? ppg = ppg.interpolate(method='time') - ppg = ppg - ppg.min() + ppg = ppg - ppg.min() ## ??? SHOULDNT IT BE MEAN VALUE: ppg - ppg.mean() ??? if we do it for each cycle individually features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values) peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0] - systolic_peak_index = peaks[0] + sys_peak_ts = ppg.index[peaks[0]] # ??? ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks) ??? if verbose: plt.figure() @@ -564,9 +571,9 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) plt.plot(ppg.index, ppg.values) # second derviative of the PPG signal - sdppg_signal = np.gradient(np.gradient(ppg)) + sdppg_signal = np.gradient(np.gradient(ppg.values)) - # features + # features !!! WRITE DOWN REFERENCE NICELY IN TOP DESCRIPTION !!! # https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0076585 # https://www.researchgate.net/figure/Important-points-on-PPG-VPG-and-AVG-The-first-top-signal-is-an-epoch-of-PPG-The_fig4_339604879 # a is global maximum; @@ -588,7 +595,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) a_index = (sdppg_signal_peaks[np.argmax(peak_dict['peak_heights'])]) - ## subset signal + dictionary to only have left over indeces + magnitudes + ## subset signal + dictionary to exclude a index + magnitude old_len = len(sdppg_signal_peaks) sdppg_signal_peaks = sdppg_signal_peaks[sdppg_signal_peaks > a_index[0]] peak_dict['peak_heights'] = peak_dict['peak_heights'][old_len - len(sdppg_signal_peaks):] @@ -602,7 +609,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) b_index = (sdppg_signal_valleys[np.argmax(valley_dict['peak_heights'])]) - ## subset signal + dictionary to only have left over indeces + magnitudes + ## subset signal + dictionary to exclude b index + magnitude old_len = len(sdppg_signal_peaks) sdppg_signal_peaks = sdppg_signal_peaks[sdppg_signal_peaks > b_index[0]] peak_dict['peak_heights'] = peak_dict['peak_heights'][old_len - len(sdppg_signal_peaks):] @@ -624,7 +631,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) e_index = (sdppg_signal_peaks[peaks[0]]) c_index = (sdppg_signal_peaks[peaks[1]]) - ## subset signal + dictionary to only have left over indeces + magnitudes + ## subset signal + dictionary to exlcude c and e indeces + magnitudes old_len = len(sdppg_signal_valleys) sdppg_signal_valleys = sdppg_signal_valleys[sdppg_signal_valleys > c_index[0]] valley_dict['peak_heights'] = valley_dict['peak_heights'][old_len - len(sdppg_signal_valleys):] @@ -684,8 +691,8 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) AE = (d_index / sampling_frequency) ### Difference of peak and dicrotic notch with respect to time (CD) - if c_index and systolic_peak_index is not None: - CD = ((c_index - systolic_peak_index) / sampling_frequency) + if c_index and sys_peak_ts is not None: + CD = ((c_index - sys_peak_ts) / sampling_frequency) ### Difference of dicrotic notch and end of signal with respect to time (DF) if c_index: @@ -695,11 +702,11 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) ## PPG Signal values ### Dicrotic notch (D') if c_index: - D = (ppg[c_index]) + D = (ppg.values[c_index]) ### Diastolic point (E') - if d_index: - E = (ppg[d_index]) + if d_index: + E = (ppg.values[d_index]) ## Ratios @@ -724,8 +731,8 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) ratio_AD_AF = (c_index / len(sdppg_signal)) ### Difference of location of peak and dicrotic notch with respect to the length of window (CD/AF) - if c_index and systolic_peak_index is not None: - ratio_CD_AF = ((c_index - systolic_peak_index) / len(sdppg_signal)) + if c_index and sys_peak_ts is not None: + ratio_CD_AF = ((c_index - sys_peak_ts) / len(sdppg_signal)) ### Location of diastolic point on PPG signal with respect to the length of window (AE/AF) if d_index: @@ -759,6 +766,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) if verbose: plt.show() + return cycle_features From 286f0f37b745a4558beec88ea5cfa67c34940316 Mon Sep 17 00:00:00 2001 From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com> Date: Mon, 4 Apr 2022 17:29:52 +0000 Subject: [PATCH 11/38] review frequency features; Add comments --- pypg/features.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/pypg/features.py b/pypg/features.py index 390d84a..f618238 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -15,6 +15,7 @@ statistical_cyle - Extract features from the time domain computing statistical values for a PPG cycle. sdppg - Extract features from the time domain's second derivative (SDPPG or APG) for a PPG segment. sdppg_cycle - Extract features from the time domain's second derivative (SDPPG or APG) for a PPG cycle. +frequency - Extract features from the frequency domain for a PPG segment. References ---------- @@ -770,7 +771,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) return cycle_features -def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq, interval_size, verbose=False): +def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq = 12.5, interval_size = 0.5, verbose=False): """ Extracts frequency features from PPG cycles in a give PPG segment. Returns a pandas.DataFrame in which each line contains the features for a given valid cycle (as described @@ -782,6 +783,12 @@ def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq, interval_si The PPG signal. sampling_frequency : int The sampling frequency of the signal in Hz. + transformMethod : str + The method used for transforming the time signal to the frequency domain. + cutoff_freq : int + The frequency threshold used to exclude frequencies above threshold from analysis. + interval_size : int + The size of the interval used to split the frequency spectrum. verbose : bool, optional If True, plots the features, by default False. @@ -855,6 +862,13 @@ def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq, interval_si print('Cycle Features within Segment:') print(segment_features) + # remove outliers + segment_features = _clean_segment_features_of_outliers(segment_features) + + if verbose: + print('Cycle Features within Segment and no Outliers:') + print(segment_features) + return segment_features From ce2dcc77013f26fd99d77d0bb99492e36220bb1f Mon Sep 17 00:00:00 2001 From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com> Date: Mon, 4 Apr 2022 17:30:26 +0000 Subject: [PATCH 12/38] Clean up --- pypg/features.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypg/features.py b/pypg/features.py index f618238..e6dceb4 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -771,7 +771,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) return cycle_features -def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq = 12.5, interval_size = 0.5, verbose=False): +def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq=12.5, interval_size=0.5, verbose=False): """ Extracts frequency features from PPG cycles in a give PPG segment. Returns a pandas.DataFrame in which each line contains the features for a given valid cycle (as described From c4877a7df16f7d33711c4a1dcb6dec24ec25b2d6 Mon Sep 17 00:00:00 2001 From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com> Date: Mon, 4 Apr 2022 17:45:59 +0000 Subject: [PATCH 13/38] review hrv features; add comments --- pypg/features.py | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/pypg/features.py b/pypg/features.py index e6dceb4..1af30f3 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -16,6 +16,7 @@ sdppg - Extract features from the time domain's second derivative (SDPPG or APG) for a PPG segment. sdppg_cycle - Extract features from the time domain's second derivative (SDPPG or APG) for a PPG cycle. frequency - Extract features from the frequency domain for a PPG segment. +hrv - Extract features from the computed Heart-Rate-Variability signal for a PPG segment. References ---------- @@ -874,9 +875,9 @@ def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq=12.5, interv def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): """ - Extracts hrv features from PPG cycles in a give PPG segment. + Extracts hrv features from a given PPG segment. Returns a pandas.DataFrame in which each line contains the features - for a given valid cycle (as described by Li et al.) from the PPG segment given as input. + from the PPG segment given as input. Parameters ---------- @@ -901,8 +902,7 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): Returns ------- segment_features : pd.DataFrame - A dataframe with the hrv-domain features in seconds for each valid - cycle in the PPG segment. + A dataframe with the hrv-domain features in seconds for the PPG segment. """ if isinstance(ppg, np.ndarray): ppg = pd.Series(ppg) @@ -917,8 +917,6 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): if len(cycles) == 0: return pd.DataFrame() - segment_features = pd.DataFrame() - cur_index = 0 CP = pd.DataFrame() for i, cycle in enumerate(cycles): @@ -927,19 +925,17 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): ignore_index=True) if i > 0: CP.loc[cur_index-1, 'CP'] = ( - CP.loc[cur_index, 'sys_peak_ts'] - CP.loc[cur_index-1, - 'sys_peak_ts']).total_seconds() + CP.loc[cur_index, 'sys_peak_ts'] - CP.loc[cur_index-1, 'sys_peak_ts']).total_seconds() cur_index = cur_index + 1 # last cycle or only cycle need to relies on the difference between the general peaks all_peaks_index = len(all_peaks)-1 CP.loc[cur_index-1, 'CP'] = ( all_peaks[all_peaks_index] - all_peaks[all_peaks_index-1])/sampling_frequency - temporalHRVFeatures = temporal_hrv(CP['CP']) - frequencyHRVFeatures = frequency_hrv(CP['CP'], sampling_frequency) + temporalHRVFeatures = _temporal_hrv(CP['CP']) + frequencyHRVFeatures = _frequency_hrv(CP['CP'], sampling_frequency) - segment_features.append(temporalHRVFeatures) - segment_features.append(frequencyHRVFeatures) + segment_features = pd.concat([temporalHRVFeatures.reset_index(drop=True), frequencyHRVFeatures], axis=1) if verbose: print('Cycle Features within Segment:') @@ -950,14 +946,17 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): if verbose: print('Cycle Features within Segment and no Outliers:') + print(segment_features) + return segment_features -def temporal_hrv(signal): +# compute temporal and frequency features of HRV +def _temporal_hrv(ibi_series): - if isinstance(signal, np.ndarray): - ibi_series = pd.Series(signal) - elif not isinstance(signal, pd.core.series.Series): + if isinstance(ibi_series, np.ndarray): + ibi_series = pd.Series(ibi_series) + elif not isinstance(ibi_series, pd.core.series.Series): raise Exception('Signal values not accepted, enter a pandas.Series or ndarray.') window = 5 @@ -1032,8 +1031,7 @@ def temporal_hrv(signal): return temporalHRVFeatures - -def frequency_hrv(signal, sampling_frequency): +def _frequency_hrv(signal, sampling_frequency): if isinstance(signal, np.ndarray): ibi_series = pd.Series(signal) From 7e45e9ab88275b47c7a45b05e71642d115d3e1a9 Mon Sep 17 00:00:00 2001 From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com> Date: Sun, 10 Apr 2022 17:03:51 +0000 Subject: [PATCH 14/38] Double check code and adapt according to tests on PPG-BP dataset --- pypg/features.py | 185 +++++++++++++++++++++++++---------------------- 1 file changed, 97 insertions(+), 88 deletions(-) diff --git a/pypg/features.py b/pypg/features.py index 1af30f3..e9a701e 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -158,14 +158,14 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False): raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.') if not isinstance(ppg.index, pd.DatetimeIndex): - ppg.index = pd.to_datetime(ppg.index, unit=unit) # ??? Sampling Frequncy considered ??? + ppg.index = pd.to_datetime(ppg.index, unit=unit) # TODO: @Ari: Sampling Frequncy considered??? ppg = ppg.interpolate(method='time') - ppg = ppg - ppg.min() ## ??? SHOULDNT IT BE MEAN VALUE: ppg - ppg.mean() ??? if we do it for each cycle individually features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values) + ppg = ppg - ppg.min() # TODO: @Ari: If we do it for each cycle seperately, features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values) max_amplitude = ppg.max() peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0] - sys_peak_ts = ppg.index[peaks[0]] # ??? ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks) ??? + sys_peak_ts = ppg.index[peaks[0]] # TODO: @Ari: ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks e.g. check peak height dictionary?) if verbose: plt.figure() @@ -179,9 +179,9 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False): 'sys_peak_ts': sys_peak_ts, 'SUT': (sys_peak_ts - ppg.index.min()).total_seconds(), 'DT': (ppg.index.max() - sys_peak_ts).total_seconds(), - 'BPM': (60 / (ppg.index.max() - ppg.index.min()).total_seconds()), # ??? CHECK KURYLAK DEF ??? + 'BPM': (60 / (ppg.index.max() - ppg.index.min()).total_seconds()), # TODO: CHECK DEF! @Ari: even use? makes sense? 'CT': (ppg.index.max() - ppg.index.min()).total_seconds(), - 'SUT_VAL': (ppg.values[sys_peak_ts]) + 'SUT_VAL': (ppg.values[peaks[0]]) }, index=[0]) for p_value in [10, 25, 33, 50, 66, 75]: @@ -191,8 +191,8 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False): plt.scatter([x_1, x_2], ppg[[x_1, x_2]]) cycle_features.loc[0, 'DW_'+str(p_value)] = (x_2 - sys_peak_ts).total_seconds() cycle_features.loc[0, 'DW_SW_sum_'+str(p_value)] = (x_2 - x_1).total_seconds() - cycle_features.loc[0, 'DW_SW_ratio_'+str(p_value)] = ( - x_2 - sys_peak_ts) / (sys_peak_ts - x_1) + cycle_features.loc[0, 'DW_SW_ratio_'+str(p_value)] = (x_2 - sys_peak_ts) / (sys_peak_ts - x_1) + if verbose: plt.show() @@ -300,14 +300,15 @@ def nonlinear_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=Fa raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.') if not isinstance(ppg.index, pd.DatetimeIndex): - ppg.index = pd.to_datetime(ppg.index, unit=unit) # ??? Sampling Frequncy considered ??? + ppg.index = pd.to_datetime(ppg.index, unit=unit) # TODO: @Ari: Sampling Frequncy considered??? ppg = ppg.interpolate(method='time') - ppg = ppg - ppg.min() ## ??? SHOULDNT IT BE MEAN VALUE: ppg - ppg.mean() ??? if we do it for each cycle individually features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values) + ppg = ppg - ppg.min() # TODO: @Ari: If we do it for each cycle seperately, features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values) ppg_cycle_duration = (ppg.index.max() - ppg.index.min()).total_seconds() peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0] - sys_peak_ts = ppg.index[peaks[0]] # ??? ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks) ??? + sys_peak_ts = ppg.index[peaks[0]] # TODO: @Ari: ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks e.g. check peak height dictionary?) + sys_peak = (sys_peak_ts - ppg.index.min()).total_seconds() if verbose: plt.figure() @@ -317,9 +318,9 @@ def nonlinear_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=Fa # features cycle_features = pd.DataFrame({ - 'ratio_WL_cycle_mean': (ppg_cycle_duration / np.mean(ppg.values)), # ?? WHERE DO DEFINITIONS COME FROM ?? - 'ratio_SUT_WL_DT': ((sys_peak_ts / ppg_cycle_duration) / (ppg_cycle_duration - sys_peak_ts)), - 'ratio_SUT_DT': (sys_peak_ts / (ppg_cycle_duration - sys_peak_ts)), + 'ratio_WL_cycle_mean': (ppg_cycle_duration / np.mean(ppg.values)), # TODO: Add definition to description + 'ratio_SUT_WL_DT': ((sys_peak / ppg_cycle_duration) / (ppg_cycle_duration - sys_peak)), + 'ratio_SUT_DT': (sys_peak / (ppg_cycle_duration - sys_peak)), 'ratio_cycle_mean_cycle_var': (np.mean(ppg.values) / np.var(ppg.values)) }, index=[0]) @@ -430,10 +431,10 @@ def statistical_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose= raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.') if not isinstance(ppg.index, pd.DatetimeIndex): - ppg.index = pd.to_datetime(ppg.index, unit=unit) # ??? Sampling Frequncy considered ??? + ppg.index = pd.to_datetime(ppg.index, unit=unit) # TODO: @Ari: Sampling Frequncy considered??? ppg = ppg.interpolate(method='time') - ppg = ppg - ppg.min() ## ??? SHOULDNT IT BE MEAN VALUE: ppg - ppg.mean() ??? if we do it for each cycle individually features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values) + ppg = ppg - ppg.min() # TODO: @Ari: If we do it for each cycle seperately, features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values) peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0] @@ -558,13 +559,14 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.') if not isinstance(ppg.index, pd.DatetimeIndex): - ppg.index = pd.to_datetime(ppg.index, unit=unit) # ??? Sampling Frequncy considered ??? + ppg.index = pd.to_datetime(ppg.index, unit=unit) # TODO: @Ari: Sampling Frequncy considered??? ppg = ppg.interpolate(method='time') - ppg = ppg - ppg.min() ## ??? SHOULDNT IT BE MEAN VALUE: ppg - ppg.mean() ??? if we do it for each cycle individually features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values) + ppg = ppg - ppg.min() # TODO: @Ari: If we do it for each cycle seperately, features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values) peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0] - sys_peak_ts = ppg.index[peaks[0]] # ??? ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks) ??? + sys_peak_ts = ppg.index[peaks[0]] # TODO: @Ari: ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks e.g. check peak height dictionary?) + sys_peak = (sys_peak_ts - ppg.index.min()).total_seconds() if verbose: plt.figure() @@ -575,7 +577,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) # second derviative of the PPG signal sdppg_signal = np.gradient(np.gradient(ppg.values)) - # features !!! WRITE DOWN REFERENCE NICELY IN TOP DESCRIPTION !!! + # TODO: features !!! WRITE DOWN REFERENCE NICELY IN TOP DESCRIPTION !!! # https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0076585 # https://www.researchgate.net/figure/Important-points-on-PPG-VPG-and-AVG-The-first-top-signal-is-an-epoch-of-PPG-The_fig4_339604879 # a is global maximum; @@ -586,11 +588,18 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) # # and Gaurav Paper - # TODO: Can be improved! # TODO: maybe add systolic peak index as reference for indices detection sdppg_signal_peaks, peak_dict = signal.find_peaks(sdppg_signal, height=0) sdppg_signal_valleys, valley_dict = signal.find_peaks(-sdppg_signal, height=0) + + # initialize SDPPG indices and variables + a_index = b_index = c_index = d_index = e_index = None + a_val = b_val = c_val = d_val = e_val = None + AD = AE = CD = DF = D = E = None + ratio_b_a = ratio_c_a = ratio_d_a = ratio_e_a = None + ratio_AD_AF = ratio_CD_AF = ratio_AE_AF = ratio_DF_AF = None + # a wave if len(sdppg_signal_peaks) != 0: @@ -599,11 +608,11 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) ## subset signal + dictionary to exclude a index + magnitude old_len = len(sdppg_signal_peaks) - sdppg_signal_peaks = sdppg_signal_peaks[sdppg_signal_peaks > a_index[0]] + sdppg_signal_peaks = sdppg_signal_peaks[sdppg_signal_peaks > a_index] peak_dict['peak_heights'] = peak_dict['peak_heights'][old_len - len(sdppg_signal_peaks):] old_len = len(sdppg_signal_valleys) - sdppg_signal_valleys = sdppg_signal_valleys[sdppg_signal_valleys > a_index[0]] + sdppg_signal_valleys = sdppg_signal_valleys[sdppg_signal_valleys > a_index] valley_dict['peak_heights'] = valley_dict['peak_heights'][old_len - len(sdppg_signal_valleys):] # b wave @@ -613,14 +622,14 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) ## subset signal + dictionary to exclude b index + magnitude old_len = len(sdppg_signal_peaks) - sdppg_signal_peaks = sdppg_signal_peaks[sdppg_signal_peaks > b_index[0]] + sdppg_signal_peaks = sdppg_signal_peaks[sdppg_signal_peaks > b_index] peak_dict['peak_heights'] = peak_dict['peak_heights'][old_len - len(sdppg_signal_peaks):] old_len = len(sdppg_signal_valleys) - sdppg_signal_valleys = sdppg_signal_valleys[sdppg_signal_valleys > b_index[0]] + sdppg_signal_valleys = sdppg_signal_valleys[sdppg_signal_valleys > b_index] valley_dict['peak_heights'] = valley_dict['peak_heights'][old_len - len(sdppg_signal_valleys):] - - # c and e wave + + # c and e wave if len(sdppg_signal_peaks) >= 2: peaks = peak_dict['peak_heights'].argsort()[-2:] @@ -635,7 +644,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) ## subset signal + dictionary to exlcude c and e indeces + magnitudes old_len = len(sdppg_signal_valleys) - sdppg_signal_valleys = sdppg_signal_valleys[sdppg_signal_valleys > c_index[0]] + sdppg_signal_valleys = sdppg_signal_valleys[sdppg_signal_valleys > c_index] valley_dict['peak_heights'] = valley_dict['peak_heights'][old_len - len(sdppg_signal_valleys):] # d wave @@ -643,7 +652,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) ## get only valleys between c and e old_len = len(sdppg_signal_valleys) - sdppg_signal_valleys = sdppg_signal_valleys[sdppg_signal_valleys < e_index[0]] + sdppg_signal_valleys = sdppg_signal_valleys[sdppg_signal_valleys < e_index] if old_len > len(sdppg_signal_valleys): valley_dict['peak_heights'] = valley_dict['peak_heights'][:-(old_len - len(sdppg_signal_valleys))] @@ -651,6 +660,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) if len(valley_dict['peak_heights']) != 0: d_index = (sdppg_signal_valleys[valley_dict['peak_heights'].argmax()]) + ## SDPPG Signal values ### a if a_index: @@ -693,8 +703,8 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) AE = (d_index / sampling_frequency) ### Difference of peak and dicrotic notch with respect to time (CD) - if c_index and sys_peak_ts is not None: - CD = ((c_index - sys_peak_ts) / sampling_frequency) + if c_index and sys_peak: + CD = ((c_index - sys_peak) / sampling_frequency) ### Difference of dicrotic notch and end of signal with respect to time (DF) if c_index: @@ -713,19 +723,19 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) ## Ratios ### First valley to the peak value of APG signal (b/a) - if b_val and a_val: + if b_index and a_index: ratio_b_a = (b_val / a_val) ### Dicrotic notch to the peak value of APG signal (c/a) - if c_val and a_val: + if c_index and a_index: ratio_c_a = (c_val / a_val) ### Diastolic point to the peak value of APG signal (d/a) - if d_val and a_val: + if d_index and a_index: ratio_d_a = (d_val / a_val) ### e to the peak value of APG signal (e/a) - if e_val and a_val: + if e_index and a_index: ratio_e_a = (e_val / a_val) ### Location of dicrotic notch with respect to the length of window (AD/AF) @@ -733,8 +743,8 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) ratio_AD_AF = (c_index / len(sdppg_signal)) ### Difference of location of peak and dicrotic notch with respect to the length of window (CD/AF) - if c_index and sys_peak_ts is not None: - ratio_CD_AF = ((c_index - sys_peak_ts) / len(sdppg_signal)) + if c_index and sys_peak: + ratio_CD_AF = ((c_index - sys_peak) / len(sdppg_signal)) ### Location of diastolic point on PPG signal with respect to the length of window (AE/AF) if d_index: @@ -745,7 +755,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) ratio_DF_AF = ((len(sdppg_signal) - c_index) / len(sdppg_signal)) cycle_features = pd.DataFrame({ - 'a_val': a_val, + 'a_val': a_val, # TODO: @Ari: What to put if NaN, i.e. not able to detect in cycle due to bad quality 'b_val': b_val, 'c_val': c_val, 'd_val': d_val, @@ -772,7 +782,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) return cycle_features -def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq=12.5, interval_size=0.5, verbose=False): +def frequency(ppg, sampling_frequency, transformMethod='welch', cutoff_freq=125.0, interval_size=5, verbose=False): """ Extracts frequency features from PPG cycles in a give PPG segment. Returns a pandas.DataFrame in which each line contains the features for a given valid cycle (as described @@ -810,9 +820,9 @@ def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq=12.5, interv raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.') # Transform signal from time to frequency domain - freq, mag = _transformSigfromTimetoFreq(ppg, sampling_frequency, transformMethod, verbose) + freq, mag = _transformSigfromTimetoFreq(ppg.values, sampling_frequency, transformMethod=transformMethod, verbose=verbose) - # features + # features segment_features = pd.DataFrame() # Cut off frequencies @@ -823,18 +833,17 @@ def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq=12.5, interv plt.semilogy(freq_cut, mag_cut) plt.title('CutOff') plt.xlabel('Frequency') - plt.ylabel('mag') + plt.xlim((freq_cut.min(), freq_cut.max())) + plt.ylim((mag_cut.min(), mag_cut.max())) + plt.ylabel('Magnitude') plt.show() - ## -> Paper Wang + ## -> Paper Wang # TODO: Add description to the top for start_index in np.arange(0, cutoff_freq, interval_size): end_index = start_index + interval_size - segment_features.append({ - 'freqInt' + str(start_index) + '_' + str(end_index): - np.nanmean(mag_cut[(freq_cut >= start_index) & (freq_cut < end_index)])}, - ignore_index=True) + segment_features.loc[0, 'freqInt' + str(start_index) + '_' + str(end_index)] = np.nanmean(mag_cut[(freq_cut >= start_index) & (freq_cut < end_index)]) - ## -> Paper Slapnicar + ## -> Paper Slapnicar # TODO: Add description to the top sorted_mag = mag_cut[np.argsort(mag_cut)[::-1]] if len(sorted_mag) >= 3: top3_psd = sorted_mag[:3] @@ -842,10 +851,7 @@ def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq=12.5, interv top3_psd = sorted_mag[:len(sorted_mag)] for i in np.arange(0, len(top3_psd)): - segment_features.append({ - 'mag_top' + str(i+1): - top3_psd[i]}, - ignore_index=True) + segment_features.loc[0, 'mag_top' + str(i+1)] = top3_psd[i] sorted_freq = freq_cut[np.argsort(mag_cut)[::-1]] if len(sorted_freq) >= 3: @@ -854,15 +860,13 @@ def frequency(ppg, sampling_frequency, transformMethod, cutoff_freq=12.5, interv top3_freq = sorted_freq[:len(sorted_freq)] for i in np.arange(0, len(top3_freq)): - segment_features.append({ - 'freq_top' + str(i+1): - top3_freq[i]}, - ignore_index=True) + segment_features.loc[0, 'freq_top' + str(i+1)] = top3_freq[i] if verbose: print('Cycle Features within Segment:') print(segment_features) + # TODO: @Ari: OTHER CLEANING METRIC FOR HRV > computes only one value per window, therefore quantile doesnt make sense # remove outliers segment_features = _clean_segment_features_of_outliers(segment_features) @@ -918,22 +922,28 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): return pd.DataFrame() cur_index = 0 - CP = pd.DataFrame() + time_features = pd.DataFrame() for i, cycle in enumerate(cycles): - CP = CP.append(time_cycle(cycle, sampling_frequency, + time_features = time_features.append(time_cycle(cycle, sampling_frequency, factor, unit, verbose=verbose), ignore_index=True) if i > 0: - CP.loc[cur_index-1, 'CP'] = ( - CP.loc[cur_index, 'sys_peak_ts'] - CP.loc[cur_index-1, 'sys_peak_ts']).total_seconds() + time_features.loc[cur_index-1, 'CP'] = ( + time_features.loc[cur_index, 'sys_peak_ts'] - time_features.loc[cur_index-1, 'sys_peak_ts']).total_seconds() cur_index = cur_index + 1 # last cycle or only cycle need to relies on the difference between the general peaks all_peaks_index = len(all_peaks)-1 - CP.loc[cur_index-1, 'CP'] = ( + time_features.loc[cur_index-1, 'CP'] = ( all_peaks[all_peaks_index] - all_peaks[all_peaks_index-1])/sampling_frequency - temporalHRVFeatures = _temporal_hrv(CP['CP']) - frequencyHRVFeatures = _frequency_hrv(CP['CP'], sampling_frequency) + ## FOR TESTING WITH PPG-BP DATASET: + #ibi_series = time_features['CP'].append(pd.Series([0.77, 0.943, 0.854, 0.644, 0.693, 0.832, 0.999, 0.942]), ignore_index=True) + #print(ibi_series) + + # TODO: @Ari: if len(time_features['CP']) < 5 don't compute hrv features + + temporalHRVFeatures = _temporal_hrv(time_features['CP']) + frequencyHRVFeatures = _frequency_hrv(time_features['CP'], sampling_frequency) segment_features = pd.concat([temporalHRVFeatures.reset_index(drop=True), frequencyHRVFeatures], axis=1) @@ -941,6 +951,7 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): print('Cycle Features within Segment:') print(segment_features) + # TODO: @Ari: OTHER CLEANING METRIC FOR HRV > computes only one value per window, therefore quantile doesnt make sense # remove outliers segment_features = _clean_segment_features_of_outliers(segment_features) @@ -960,12 +971,12 @@ def _temporal_hrv(ibi_series): raise Exception('Signal values not accepted, enter a pandas.Series or ndarray.') window = 5 - nn_threshold = 50 + nn_threshold = 0.05 # TODO: @Ari: HOW TO SET THIS VALUE? > IBI_SERIES VALUES around 0.88 ish. Affect computation of nn_xx # Prepare data instantaneous_hr = 60 / (ibi_series) rolling_mean_hr = instantaneous_hr.rolling(window).mean() - rolling_24h = ibi_series.rolling(5) + rolling_24h = ibi_series.rolling(window) # Precalculate data for standard indexes nn_diff = np.diff(ibi_series) @@ -973,11 +984,11 @@ def _temporal_hrv(ibi_series): # Precalculate data for geometrical indeces bin_size = 7.8125 - hist_middle = (ibi_series.min() + ibi_series.max()) / 2 - hist_bound_lower = hist_middle - bin_size * np.ceil((hist_middle - ibi_series.min()) / bin_size) - hist_length = int(np.ceil((ibi_series.max() - hist_bound_lower) / bin_size) + 1) + hist_middle = (ibi_series.values.min() + ibi_series.values.max()) / 2 + hist_bound_lower = hist_middle - bin_size * np.ceil((hist_middle - ibi_series.values.min()) / bin_size) + hist_length = int(np.ceil((ibi_series.values.max() - hist_bound_lower) / bin_size) + 1) hist_bins = hist_bound_lower + np.arange(hist_length) * bin_size - hist, _ = np.histogram(ibi_series, hist_bins) + hist, _ = np.histogram(ibi_series.values, hist_bins) hist_height = np.max(hist) # Calculate TINN measure @@ -1013,7 +1024,7 @@ def _temporal_hrv(ibi_series): # features temporalHRVFeatures = pd.DataFrame({ - 'SampEn': float(nolds.sampen(ibi_series.to_numpy(), 2, tolerance)), + 'SampEn': float(nolds.sampen(ibi_series, 2, tolerance)), # TODO: @Ari: RETURNS INF AS VALUE, MAYBE SIGNAL SEGMENT TOO SMALL 'MeanNN': ibi_series.mean(), 'MeanHR': instantaneous_hr.mean(), 'MaxHR': rolling_mean_hr.max(), @@ -1024,21 +1035,21 @@ def _temporal_hrv(ibi_series): 'SDANN': rolling_24h.mean().std(), 'RMSSD': np.sqrt(np.mean(nn_diff ** 2)), f'NN{nn_threshold}': nn_xx, - f'pNN{nn_threshold}': nn_xx / len(ibi_series) * 100, - 'HRVTriangularIndex': len(ibi_series) / hist_height, + f'pNN{nn_threshold}': nn_xx / len(ibi_series.values) * 100, + 'HRVTriangularIndex': len(ibi_series.values) / hist_height, 'TINN': m - n}, index=[0]) - + return temporalHRVFeatures -def _frequency_hrv(signal, sampling_frequency): +def _frequency_hrv(ibi_series, sampling_frequency): - if isinstance(signal, np.ndarray): - ibi_series = pd.Series(signal) - elif not isinstance(signal, pd.core.series.Series): + if isinstance(ibi_series, np.ndarray): + ibi_series = pd.Series(ibi_series) + elif not isinstance(ibi_series, pd.core.series.Series): raise Exception('Signal values not accepted, enter a pandas.Series or ndarray.') - ## TODO: HYPERPARAMETERS to config dict + ## TODO: @Ari: HYPERPARAMETERS to config dict fft_interpolation = 4.0 use_ulf = False lomb_smoothing = 0.02 @@ -1053,9 +1064,7 @@ def _frequency_hrv(signal, sampling_frequency): else: ulf_limit = ulf_limit #TODO: ?????? - # TODO: export transformMethod - transformMethod='welch' - freq, mag = _transformSigfromTimetoFreq(ibi_series, sampling_frequency, transformMethod, fft_interpolation) + freq, mag = _transformSigfromTimetoFreq(ibi_series.values, sampling_frequency, fft_interpolation=fft_interpolation) abs_index = freq <= hf_limit ulf_index = freq <= ulf_limit @@ -1121,8 +1130,8 @@ def _frequency_hrv(signal, sampling_frequency): def _clean_segment_features_of_outliers(segment_df, treshold=0.8): quant = segment_df.quantile(treshold) for col in segment_df.columns: - if col.find('ts') == -1: - segment_df = segment_df[segment_df[col] < quant[col]*2] + if col.find('ts') == -1 and len(segment_df[col]) > 1: + segment_df = segment_df[np.abs(segment_df[col]) < np.abs(quant[col]*2)] ## TODO: @Ari: DOES THIS ALWAYS APPLY? e.g. HRV/ Frequency return segment_df # returns the x values for those samples in the signal, that are closest to some given y value @@ -1135,19 +1144,19 @@ def _find_xs_for_y(y_s, y_val, sys_peak): # transforms a given temporal signal into the spectral domain # using different methods and allowing interpolation if required. # returns the frequencies and their magnitudes. -def _transformSigfromTimetoFreq(signal, fs, transformMethod, fft_interpolation=None, verbose=False): +def _transformSigfromTimetoFreq(timeSignal, fs, transformMethod='welch', fft_interpolation=None, verbose=False): - ## TODO: Can maybe be improved?! if fft_interpolation: - x = np.cumsum(signal) - f_interpol = interpolate.interp1d(x, signal, kind = "cubic", fill_value="extrapolate") + x = np.cumsum(timeSignal) + f_interpol = interpolate.interp1d(x, timeSignal, kind = "cubic", fill_value="extrapolate") t_interpol = np.arange(x[0], x[-1], fft_interpolation/fs) - signal = f_interpol(t_interpol) + timeSignal = f_interpol(t_interpol) fs = fft_interpolation + # TODO: Maybe add different methods if transformMethod == 'welch': - freq, mag = signal.welch(signal, fs, window='hamming', scaling='density', detrend='linear') + freq, mag = signal.welch(timeSignal, fs, window='hamming', scaling='density', detrend='linear') if verbose: plt.semilogy(freq, mag) From 69da20402d469495581e9c22ff1b2c9da0b1fe9d Mon Sep 17 00:00:00 2001 From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com> Date: Mon, 11 Apr 2022 17:43:06 +0000 Subject: [PATCH 15/38] Fix conversion error --- pypg/features.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypg/features.py b/pypg/features.py index e9a701e..d52870d 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -1024,7 +1024,7 @@ def _temporal_hrv(ibi_series): # features temporalHRVFeatures = pd.DataFrame({ - 'SampEn': float(nolds.sampen(ibi_series, 2, tolerance)), # TODO: @Ari: RETURNS INF AS VALUE, MAYBE SIGNAL SEGMENT TOO SMALL + 'SampEn': float(nolds.sampen(ibi_series.to_numpy(), 2, tolerance)), # TODO: @Ari: RETURNS INF AS VALUE, MAYBE SIGNAL SEGMENT TOO SMALL 'MeanNN': ibi_series.mean(), 'MeanHR': instantaneous_hr.mean(), 'MaxHR': rolling_mean_hr.max(), From 773b897d3e70562b7db4278aa6ef27030148c563 Mon Sep 17 00:00:00 2001 From: Florian Hermes Date: Thu, 14 Apr 2022 15:33:47 +0000 Subject: [PATCH 16/38] Add comments; remove cleanSegment for frequency and hrv --- pypg/features.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pypg/features.py b/pypg/features.py index d52870d..e4860b5 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -179,7 +179,7 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False): 'sys_peak_ts': sys_peak_ts, 'SUT': (sys_peak_ts - ppg.index.min()).total_seconds(), 'DT': (ppg.index.max() - sys_peak_ts).total_seconds(), - 'BPM': (60 / (ppg.index.max() - ppg.index.min()).total_seconds()), # TODO: CHECK DEF! @Ari: even use? makes sense? + #'BPM': (60 / (ppg.index.max() - ppg.index.min()).total_seconds()), # TODO: CHECK DEF! @Ari: even use? makes sense? 'CT': (ppg.index.max() - ppg.index.min()).total_seconds(), 'SUT_VAL': (ppg.values[peaks[0]]) }, index=[0]) @@ -868,7 +868,7 @@ def frequency(ppg, sampling_frequency, transformMethod='welch', cutoff_freq=125. # TODO: @Ari: OTHER CLEANING METRIC FOR HRV > computes only one value per window, therefore quantile doesnt make sense # remove outliers - segment_features = _clean_segment_features_of_outliers(segment_features) + #segment_features = _clean_segment_features_of_outliers(segment_features) if verbose: print('Cycle Features within Segment and no Outliers:') @@ -941,6 +941,7 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): #print(ibi_series) # TODO: @Ari: if len(time_features['CP']) < 5 don't compute hrv features + # PAPER What should be the minimum threshold temporalHRVFeatures = _temporal_hrv(time_features['CP']) frequencyHRVFeatures = _frequency_hrv(time_features['CP'], sampling_frequency) @@ -953,7 +954,7 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): # TODO: @Ari: OTHER CLEANING METRIC FOR HRV > computes only one value per window, therefore quantile doesnt make sense # remove outliers - segment_features = _clean_segment_features_of_outliers(segment_features) + #segment_features = _clean_segment_features_of_outliers(segment_features) if verbose: print('Cycle Features within Segment and no Outliers:') @@ -971,7 +972,7 @@ def _temporal_hrv(ibi_series): raise Exception('Signal values not accepted, enter a pandas.Series or ndarray.') window = 5 - nn_threshold = 0.05 # TODO: @Ari: HOW TO SET THIS VALUE? > IBI_SERIES VALUES around 0.88 ish. Affect computation of nn_xx + nn_threshold = 0.05 # TODO: @Ari: HOW TO SET THIS VALUE? > IBI_SERIES VALUES around 0.88 ish. Affect computation of nn_xx /// was 50 before # Prepare data instantaneous_hr = 60 / (ibi_series) From 14739af8e5f1e0f2b5fbace274b6b7058bb8c138 Mon Sep 17 00:00:00 2001 From: Florian Hermes Date: Fri, 15 Apr 2022 14:58:50 +0000 Subject: [PATCH 17/38] Replace append by concat; Clean-up; add check for null array to frequency feature generation --- pypg/features.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/pypg/features.py b/pypg/features.py index e4860b5..667fc99 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -247,8 +247,8 @@ def nonlinear(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): cur_index = 0 segment_features = pd.DataFrame() for i, cycle in enumerate(cycles): - segment_features = segment_features.append(nonlinear_cycle(cycle, - sampling_frequency, factor, unit, verbose=verbose), + segment_features = pd.concat([segment_features, nonlinear_cycle(cycle, + sampling_frequency, factor, unit, verbose=verbose)], ignore_index=True) if verbose: @@ -378,8 +378,8 @@ def statistical(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False cur_index = 0 segment_features = pd.DataFrame() for i, cycle in enumerate(cycles): - segment_features = segment_features.append(statistical_cycle(cycle, - sampling_frequency, factor, unit, verbose=verbose), + segment_features = pd.concat([segment_features, statistical_cycle(cycle, + sampling_frequency, factor, unit, verbose=verbose)], ignore_index=True) if verbose: @@ -503,11 +503,10 @@ def sdppg(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): if len(cycles) == 0: return pd.DataFrame() - cur_index = 0 segment_features = pd.DataFrame() for i, cycle in enumerate(cycles): - segment_features = segment_features.append(sdppg_cycle(cycle, - sampling_frequency, factor, unit, verbose=verbose), + segment_features = pd.concat([segment_features, sdppg_cycle(cycle, + sampling_frequency, factor, unit, verbose=verbose)], ignore_index=True) if verbose: @@ -841,7 +840,10 @@ def frequency(ppg, sampling_frequency, transformMethod='welch', cutoff_freq=125. ## -> Paper Wang # TODO: Add description to the top for start_index in np.arange(0, cutoff_freq, interval_size): end_index = start_index + interval_size - segment_features.loc[0, 'freqInt' + str(start_index) + '_' + str(end_index)] = np.nanmean(mag_cut[(freq_cut >= start_index) & (freq_cut < end_index)]) + if mag_cut[(freq_cut >= start_index) & (freq_cut < end_index)].size != 0: + segment_features.loc[0, 'freqInt' + str(start_index) + '_' + str(end_index)] = np.nanmean(mag_cut[(freq_cut >= start_index) & (freq_cut < end_index)]) + else: + segment_features.loc[0, 'freqInt' + str(start_index) + '_' + str(end_index)] = 0 ## -> Paper Slapnicar # TODO: Add description to the top sorted_mag = mag_cut[np.argsort(mag_cut)[::-1]] @@ -924,8 +926,8 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): cur_index = 0 time_features = pd.DataFrame() for i, cycle in enumerate(cycles): - time_features = time_features.append(time_cycle(cycle, sampling_frequency, - factor, unit, verbose=verbose), + time_features = pd.concat([time_features, time_cycle(cycle, sampling_frequency, + factor, unit, verbose=verbose)], ignore_index=True) if i > 0: time_features.loc[cur_index-1, 'CP'] = ( @@ -937,7 +939,7 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): all_peaks[all_peaks_index] - all_peaks[all_peaks_index-1])/sampling_frequency ## FOR TESTING WITH PPG-BP DATASET: - #ibi_series = time_features['CP'].append(pd.Series([0.77, 0.943, 0.854, 0.644, 0.693, 0.832, 0.999, 0.942]), ignore_index=True) + #ibi_series = time_features['CP'].concat(pd.Series([0.77, 0.943, 0.854, 0.644, 0.693, 0.832, 0.999, 0.942]), ignore_index=True) #print(ibi_series) # TODO: @Ari: if len(time_features['CP']) < 5 don't compute hrv features @@ -1116,7 +1118,7 @@ def _frequency_hrv(ibi_series, sampling_frequency): # Add ULF parameters, if band available if use_ulf and np.sum(ulf_index) > 0: ulf_peak = freq[np.argmax(mag[ulf_index])] - freqencyHRVFeatures.append({ + freqencyHRVFeatures.concat({ 'ULF_peak': ulf_peak, 'ULF_power': ulf, 'ULF_power_log': np.log(ulf), From 141068598ca5d38d34a91a3736280401c0c7b437 Mon Sep 17 00:00:00 2001 From: Florian Hermes Date: Fri, 15 Apr 2022 20:38:10 +0000 Subject: [PATCH 18/38] Add feature empty check; Add cleanSegment length check --- pypg/features.py | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/pypg/features.py b/pypg/features.py index 667fc99..4d60a5a 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -508,7 +508,7 @@ def sdppg(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): segment_features = pd.concat([segment_features, sdppg_cycle(cycle, sampling_frequency, factor, unit, verbose=verbose)], ignore_index=True) - + if verbose: print('Cycle Features within Segment:') print(segment_features) @@ -754,25 +754,25 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) ratio_DF_AF = ((len(sdppg_signal) - c_index) / len(sdppg_signal)) cycle_features = pd.DataFrame({ - 'a_val': a_val, # TODO: @Ari: What to put if NaN, i.e. not able to detect in cycle due to bad quality - 'b_val': b_val, - 'c_val': c_val, - 'd_val': d_val, - 'e_val': e_val, - 'AD': AD, - 'AE': AE, - 'CD': CD, - 'DF': DF, - 'D': D, - 'E': E, - 'ratio_b_a': ratio_b_a, - 'ratio_c_a': ratio_c_a, - 'ratio_d_a': ratio_d_a, - 'ratio_e_a': ratio_e_a, - 'ratio_AD_AF': ratio_AD_AF, - 'ratio_CD_AF': ratio_CD_AF, - 'ratio_AE_AF': ratio_AE_AF, - 'ratio_DF_AF': ratio_DF_AF, + 'a_val': a_val if a_val else np.nan, # TODO: @Ari: What to put if NaN, i.e. not able to detect in cycle due to bad quality + 'b_val': b_val if b_val else np.nan, + 'c_val': c_val if c_val else np.nan, + 'd_val': d_val if d_val else np.nan, + 'e_val': e_val if e_val else np.nan, + 'AD': AD if AD else np.nan, + 'AE': AE if AE else np.nan, + 'CD': CD if CD else np.nan, + 'DF': DF if DF else np.nan, + 'D': D if D else np.nan, + 'E': E if E else np.nan, + 'ratio_b_a': ratio_b_a if ratio_b_a else np.nan, + 'ratio_c_a': ratio_c_a if ratio_c_a else np.nan, + 'ratio_d_a': ratio_d_a if ratio_d_a else np.nan, + 'ratio_e_a': ratio_e_a if ratio_e_a else np.nan, + 'ratio_AD_AF': ratio_AD_AF if ratio_AD_AF else np.nan, + 'ratio_CD_AF': ratio_CD_AF if ratio_CD_AF else np.nan, + 'ratio_AE_AF': ratio_AE_AF if ratio_AE_AF else np.nan, + 'ratio_DF_AF': ratio_DF_AF if ratio_DF_AF else np.nan, }, index=[0]) if verbose: @@ -1135,6 +1135,8 @@ def _clean_segment_features_of_outliers(segment_df, treshold=0.8): for col in segment_df.columns: if col.find('ts') == -1 and len(segment_df[col]) > 1: segment_df = segment_df[np.abs(segment_df[col]) < np.abs(quant[col]*2)] ## TODO: @Ari: DOES THIS ALWAYS APPLY? e.g. HRV/ Frequency + if len(segment_df) < 2: + return pd.DataFrame() return segment_df # returns the x values for those samples in the signal, that are closest to some given y value From f943caa18188eab0892720dae4a4ac50676aef7d Mon Sep 17 00:00:00 2001 From: Florian Hermes Date: Sun, 17 Apr 2022 16:58:15 +0000 Subject: [PATCH 19/38] Add checks to feature extraction --- pypg/features.py | 100 +++++++++++++++++++++++------------------------ 1 file changed, 48 insertions(+), 52 deletions(-) diff --git a/pypg/features.py b/pypg/features.py index 4d60a5a..6bc9b4b 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -165,6 +165,8 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False): max_amplitude = ppg.max() peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0] + if len(peaks) == 0: + return pd.DataFrame() sys_peak_ts = ppg.index[peaks[0]] # TODO: @Ari: ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks e.g. check peak height dictionary?) if verbose: @@ -179,7 +181,6 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False): 'sys_peak_ts': sys_peak_ts, 'SUT': (sys_peak_ts - ppg.index.min()).total_seconds(), 'DT': (ppg.index.max() - sys_peak_ts).total_seconds(), - #'BPM': (60 / (ppg.index.max() - ppg.index.min()).total_seconds()), # TODO: CHECK DEF! @Ari: even use? makes sense? 'CT': (ppg.index.max() - ppg.index.min()).total_seconds(), 'SUT_VAL': (ppg.values[peaks[0]]) }, index=[0]) @@ -244,7 +245,6 @@ def nonlinear(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): if len(cycles) == 0: return pd.DataFrame() - cur_index = 0 segment_features = pd.DataFrame() for i, cycle in enumerate(cycles): segment_features = pd.concat([segment_features, nonlinear_cycle(cycle, @@ -307,6 +307,8 @@ def nonlinear_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=Fa ppg_cycle_duration = (ppg.index.max() - ppg.index.min()).total_seconds() peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0] + if len(peaks) == 0: + return pd.DataFrame() sys_peak_ts = ppg.index[peaks[0]] # TODO: @Ari: ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks e.g. check peak height dictionary?) sys_peak = (sys_peak_ts - ppg.index.min()).total_seconds() @@ -375,7 +377,6 @@ def statistical(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False if len(cycles) == 0: return pd.DataFrame() - cur_index = 0 segment_features = pd.DataFrame() for i, cycle in enumerate(cycles): segment_features = pd.concat([segment_features, statistical_cycle(cycle, @@ -437,6 +438,8 @@ def statistical_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose= ppg = ppg - ppg.min() # TODO: @Ari: If we do it for each cycle seperately, features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values) peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0] + if len(peaks) == 0: + return pd.DataFrame() if verbose: plt.figure() @@ -564,6 +567,8 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) ppg = ppg - ppg.min() # TODO: @Ari: If we do it for each cycle seperately, features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values) peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0] + if len(peaks) == 0: + return pd.DataFrame() sys_peak_ts = ppg.index[peaks[0]] # TODO: @Ari: ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks e.g. check peak height dictionary?) sys_peak = (sys_peak_ts - ppg.index.min()).total_seconds() @@ -575,6 +580,8 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) # second derviative of the PPG signal sdppg_signal = np.gradient(np.gradient(ppg.values)) + if sdppg_signal.size == 0: + return pd.DataFrame() # TODO: features !!! WRITE DOWN REFERENCE NICELY IN TOP DESCRIPTION !!! # https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0076585 @@ -591,6 +598,8 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) sdppg_signal_peaks, peak_dict = signal.find_peaks(sdppg_signal, height=0) sdppg_signal_valleys, valley_dict = signal.find_peaks(-sdppg_signal, height=0) + if len(sdppg_signal_peaks) == 0 or len(sdppg_signal_valleys) == 0: + return pd.DataFrame() # initialize SDPPG indices and variables a_index = b_index = c_index = d_index = e_index = None @@ -820,8 +829,10 @@ def frequency(ppg, sampling_frequency, transformMethod='welch', cutoff_freq=125. # Transform signal from time to frequency domain freq, mag = _transformSigfromTimetoFreq(ppg.values, sampling_frequency, transformMethod=transformMethod, verbose=verbose) + if freq.size == 0 or mag.size == 0: + return pd.DataFrame() - # features + # Initialize features dataframe segment_features = pd.DataFrame() # Cut off frequencies @@ -868,14 +879,6 @@ def frequency(ppg, sampling_frequency, transformMethod='welch', cutoff_freq=125. print('Cycle Features within Segment:') print(segment_features) - # TODO: @Ari: OTHER CLEANING METRIC FOR HRV > computes only one value per window, therefore quantile doesnt make sense - # remove outliers - #segment_features = _clean_segment_features_of_outliers(segment_features) - - if verbose: - print('Cycle Features within Segment and no Outliers:') - print(segment_features) - return segment_features @@ -938,12 +941,9 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): time_features.loc[cur_index-1, 'CP'] = ( all_peaks[all_peaks_index] - all_peaks[all_peaks_index-1])/sampling_frequency - ## FOR TESTING WITH PPG-BP DATASET: - #ibi_series = time_features['CP'].concat(pd.Series([0.77, 0.943, 0.854, 0.644, 0.693, 0.832, 0.999, 0.942]), ignore_index=True) - #print(ibi_series) - - # TODO: @Ari: if len(time_features['CP']) < 5 don't compute hrv features - # PAPER What should be the minimum threshold + if len(time_features['CP']) < 5: # TODO: Check for minimum size (Ultra small HRV) + print('PPG Segment too small! Please provide bigger PPG segment') + return pd.DataFrame() temporalHRVFeatures = _temporal_hrv(time_features['CP']) frequencyHRVFeatures = _frequency_hrv(time_features['CP'], sampling_frequency) @@ -954,14 +954,6 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): print('Cycle Features within Segment:') print(segment_features) - # TODO: @Ari: OTHER CLEANING METRIC FOR HRV > computes only one value per window, therefore quantile doesnt make sense - # remove outliers - #segment_features = _clean_segment_features_of_outliers(segment_features) - - if verbose: - print('Cycle Features within Segment and no Outliers:') - print(segment_features) - return segment_features @@ -1068,7 +1060,9 @@ def _frequency_hrv(ibi_series, sampling_frequency): ulf_limit = ulf_limit #TODO: ?????? freq, mag = _transformSigfromTimetoFreq(ibi_series.values, sampling_frequency, fft_interpolation=fft_interpolation) - + if freq.size == 0 or mag.size == 0: + return pd.DataFrame() + abs_index = freq <= hf_limit ulf_index = freq <= ulf_limit vlf_index = (freq >= ulf_limit) & (freq <= vlf_limit) @@ -1098,32 +1092,32 @@ def _frequency_hrv(ibi_series, sampling_frequency): hf_peak = freq[hf_index][np.argmax(mag[hf_index])] freqencyHRVFeatures = pd.DataFrame({ - 'VLF_peak': vlf_peak, - 'VLF_power': vlf, - 'VLF_power_log': np.log(vlf), - 'VLF_power_rel': vlf_perc, - 'LF_peak': lf_peak, - 'LF_power': lf, - 'LF_power_log': np.log(lf), - 'LF_power_rel': lf_perc, - 'LF_power_norm': lf_nu, - 'HF_peak': hf_peak, - 'HF_power': hf, - 'HF_power_log': np.log(hf), - 'HF_power_rel': hf_perc, - 'HF_power_norm': hf_nu, - 'ratio_LF_HF': lf/hf}, - index=[0]) + 'VLF_peak': vlf_peak if vlf_peak else np.nan, + 'VLF_power': vlf if vlf else np.nan, + 'VLF_power_log': np.log(vlf) if np.log(vlf) else np.nan, + 'VLF_power_rel': vlf_perc if vlf_perc else np.nan, + 'LF_peak': lf_peak if lf_peak else np.nan, + 'LF_power': lf if lf else np.nan, + 'LF_power_log': np.log(lf) if np.log(lf) else np.nan, + 'LF_power_rel': lf_perc if lf_perc else np.nan, + 'LF_power_norm': lf_nu if lf_nu else np.nan, + 'HF_peak': hf_peak if hf_peak else np.nan, + 'HF_power': hf if hf else np.nan, + 'HF_power_log': np.log(hf) if np.log(hf) else np.nan, + 'HF_power_rel': hf_perc if hf_perc else np.nan, + 'HF_power_norm': hf_nu if hf_nu else np.nan, + 'ratio_LF_HF': lf/hf if lf/hf else np.nan + }, index=[0]) # Add ULF parameters, if band available if use_ulf and np.sum(ulf_index) > 0: ulf_peak = freq[np.argmax(mag[ulf_index])] freqencyHRVFeatures.concat({ - 'ULF_peak': ulf_peak, - 'ULF_power': ulf, - 'ULF_power_log': np.log(ulf), - 'ULF_power_rel': ulf_perc}, - index=[0]) + 'ULF_peak': ulf_peak if ulf_peak else np.nan, + 'ULF_power': ulf if ulf else np.nan, + 'ULF_power_log': np.log(ulf) if np.log(ulf) else np.nan, + 'ULF_power_rel': ulf_perc if ulf_perc else np.nan + }, index=[0]) return freqencyHRVFeatures @@ -1149,19 +1143,21 @@ def _find_xs_for_y(y_s, y_val, sys_peak): # transforms a given temporal signal into the spectral domain # using different methods and allowing interpolation if required. # returns the frequencies and their magnitudes. -def _transformSigfromTimetoFreq(timeSignal, fs, transformMethod='welch', fft_interpolation=None, verbose=False): +def _transformSigfromTimetoFreq(timeSignal, sampling_frequency, transformMethod='welch', fft_interpolation=None, verbose=False): if fft_interpolation: x = np.cumsum(timeSignal) f_interpol = interpolate.interp1d(x, timeSignal, kind = "cubic", fill_value="extrapolate") - t_interpol = np.arange(x[0], x[-1], fft_interpolation/fs) + t_interpol = np.arange(x[0], x[-1], fft_interpolation/sampling_frequency) timeSignal = f_interpol(t_interpol) - fs = fft_interpolation + sampling_frequency = fft_interpolation # TODO: Maybe add different methods if transformMethod == 'welch': - freq, mag = signal.welch(timeSignal, fs, window='hamming', scaling='density', detrend='linear') + freq, mag = signal.welch(timeSignal, sampling_frequency, window='hamming', scaling='density', detrend='linear') + else: + print('Transform method not available. Please select one of the following: [welch]') if verbose: plt.semilogy(freq, mag) From bd9ed2162185772b291bdfcf07009dbca168113b Mon Sep 17 00:00:00 2001 From: Florian Hermes Date: Tue, 19 Apr 2022 11:43:26 +0000 Subject: [PATCH 20/38] Cleanup; remove finished TODOs --- pypg/features.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pypg/features.py b/pypg/features.py index 6bc9b4b..e635c2b 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -763,7 +763,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) ratio_DF_AF = ((len(sdppg_signal) - c_index) / len(sdppg_signal)) cycle_features = pd.DataFrame({ - 'a_val': a_val if a_val else np.nan, # TODO: @Ari: What to put if NaN, i.e. not able to detect in cycle due to bad quality + 'a_val': a_val if a_val else np.nan, 'b_val': b_val if b_val else np.nan, 'c_val': c_val if c_val else np.nan, 'd_val': d_val if d_val else np.nan, @@ -941,7 +941,7 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): time_features.loc[cur_index-1, 'CP'] = ( all_peaks[all_peaks_index] - all_peaks[all_peaks_index-1])/sampling_frequency - if len(time_features['CP']) < 5: # TODO: Check for minimum size (Ultra small HRV) + if len(time_features['CP']) < 5: # TODO: Check for minimum size (Ultra short HRV) print('PPG Segment too small! Please provide bigger PPG segment') return pd.DataFrame() @@ -1019,7 +1019,7 @@ def _temporal_hrv(ibi_series): # features temporalHRVFeatures = pd.DataFrame({ - 'SampEn': float(nolds.sampen(ibi_series.to_numpy(), 2, tolerance)), # TODO: @Ari: RETURNS INF AS VALUE, MAYBE SIGNAL SEGMENT TOO SMALL + 'SampEn': float(nolds.sampen(ibi_series.to_numpy(), 2, tolerance)), 'MeanNN': ibi_series.mean(), 'MeanHR': instantaneous_hr.mean(), 'MaxHR': rolling_mean_hr.max(), @@ -1128,7 +1128,7 @@ def _clean_segment_features_of_outliers(segment_df, treshold=0.8): quant = segment_df.quantile(treshold) for col in segment_df.columns: if col.find('ts') == -1 and len(segment_df[col]) > 1: - segment_df = segment_df[np.abs(segment_df[col]) < np.abs(quant[col]*2)] ## TODO: @Ari: DOES THIS ALWAYS APPLY? e.g. HRV/ Frequency + segment_df = segment_df[np.abs(segment_df[col]) < np.abs(quant[col]*2)] if len(segment_df) < 2: return pd.DataFrame() return segment_df From 3b68b8fbc963dbfcb4bc3ff3889a96b8819eed7e Mon Sep 17 00:00:00 2001 From: Florian Hermes Date: Fri, 22 Apr 2022 10:37:12 +0000 Subject: [PATCH 21/38] Put output in verbose mode; add TODO --- pypg/features.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pypg/features.py b/pypg/features.py index e635c2b..5cf9001 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -942,7 +942,8 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): all_peaks[all_peaks_index] - all_peaks[all_peaks_index-1])/sampling_frequency if len(time_features['CP']) < 5: # TODO: Check for minimum size (Ultra short HRV) - print('PPG Segment too small! Please provide bigger PPG segment') + if verbose: + print('PPG Segment too small! Please provide bigger PPG segment') return pd.DataFrame() temporalHRVFeatures = _temporal_hrv(time_features['CP']) @@ -1129,7 +1130,7 @@ def _clean_segment_features_of_outliers(segment_df, treshold=0.8): for col in segment_df.columns: if col.find('ts') == -1 and len(segment_df[col]) > 1: segment_df = segment_df[np.abs(segment_df[col]) < np.abs(quant[col]*2)] - if len(segment_df) < 2: + if len(segment_df) < 2: # TODO: Maybe take out because will remove data (Change generate_featueBAsed_Dataset.py) return pd.DataFrame() return segment_df From 0332152ab45573b6b847157f790edcef0b5e0772 Mon Sep 17 00:00:00 2001 From: Florian Hermes Date: Fri, 22 Apr 2022 13:21:30 +0000 Subject: [PATCH 22/38] Remove len check of cleaned dataframe --- pypg/features.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pypg/features.py b/pypg/features.py index 5cf9001..c1c5ab6 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -1130,8 +1130,6 @@ def _clean_segment_features_of_outliers(segment_df, treshold=0.8): for col in segment_df.columns: if col.find('ts') == -1 and len(segment_df[col]) > 1: segment_df = segment_df[np.abs(segment_df[col]) < np.abs(quant[col]*2)] - if len(segment_df) < 2: # TODO: Maybe take out because will remove data (Change generate_featueBAsed_Dataset.py) - return pd.DataFrame() return segment_df # returns the x values for those samples in the signal, that are closest to some given y value From 82967d99b930602d6f0e95ee7cc2778b661e091b Mon Sep 17 00:00:00 2001 From: Florian Hermes Date: Fri, 22 Apr 2022 14:35:47 +0000 Subject: [PATCH 23/38] Compare code to ALPS repository --- pypg/features.py | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/pypg/features.py b/pypg/features.py index c1c5ab6..02445c8 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -970,7 +970,7 @@ def _temporal_hrv(ibi_series): nn_threshold = 0.05 # TODO: @Ari: HOW TO SET THIS VALUE? > IBI_SERIES VALUES around 0.88 ish. Affect computation of nn_xx /// was 50 before # Prepare data - instantaneous_hr = 60 / (ibi_series) + instantaneous_hr = 60 / (ibi_series / 1000) # TODO: @Ari: why divided by 1000? from ms to s? rolling_mean_hr = instantaneous_hr.rolling(window).mean() rolling_24h = ibi_series.rolling(window) @@ -1047,18 +1047,11 @@ def _frequency_hrv(ibi_series, sampling_frequency): ## TODO: @Ari: HYPERPARAMETERS to config dict fft_interpolation = 4.0 - use_ulf = False - lomb_smoothing = 0.02 vlf_limit = 0.04 lf_limit = 0.15 hf_limit = 0.4 - - if not use_ulf or ibi_series.sum() < 300000: #TODO: check - # Do not use ULF band on sample shorter than 5 minutes - ulf_limit = 0 - else: - ulf_limit = ulf_limit #TODO: ?????? + ulf_limit = 0 freq, mag = _transformSigfromTimetoFreq(ibi_series.values, sampling_frequency, fft_interpolation=fft_interpolation) if freq.size == 0 or mag.size == 0: @@ -1110,16 +1103,6 @@ def _frequency_hrv(ibi_series, sampling_frequency): 'ratio_LF_HF': lf/hf if lf/hf else np.nan }, index=[0]) - # Add ULF parameters, if band available - if use_ulf and np.sum(ulf_index) > 0: - ulf_peak = freq[np.argmax(mag[ulf_index])] - freqencyHRVFeatures.concat({ - 'ULF_peak': ulf_peak if ulf_peak else np.nan, - 'ULF_power': ulf if ulf else np.nan, - 'ULF_power_log': np.log(ulf) if np.log(ulf) else np.nan, - 'ULF_power_rel': ulf_perc if ulf_perc else np.nan - }, index=[0]) - return freqencyHRVFeatures From c3a33ca15847a394c757f19bd145a37a6a7519fb Mon Sep 17 00:00:00 2001 From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com> Date: Thu, 28 Apr 2022 13:13:18 +0000 Subject: [PATCH 24/38] adapt nn_threshold --- pypg/features.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypg/features.py b/pypg/features.py index 02445c8..ba961ca 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -967,7 +967,7 @@ def _temporal_hrv(ibi_series): raise Exception('Signal values not accepted, enter a pandas.Series or ndarray.') window = 5 - nn_threshold = 0.05 # TODO: @Ari: HOW TO SET THIS VALUE? > IBI_SERIES VALUES around 0.88 ish. Affect computation of nn_xx /// was 50 before + nn_threshold = 50 # TODO: @Ari: HOW TO SET THIS VALUE? > IBI_SERIES VALUES around 0.88 ish. Affect computation of nn_xx /// was 50 before # Prepare data instantaneous_hr = 60 / (ibi_series / 1000) # TODO: @Ari: why divided by 1000? from ms to s? From e707d1d6a07c46dbc5de58d02b62935a31762408 Mon Sep 17 00:00:00 2001 From: Florian Hermes Date: Tue, 10 May 2022 16:46:35 +0000 Subject: [PATCH 25/38] Adapt ibi_series computation --- pypg/features.py | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/pypg/features.py b/pypg/features.py index ba961ca..097a3ff 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -43,7 +43,8 @@ import nolds from scipy import signal, stats, interpolate -from .cycles import find_with_template +from .cycles import find_with_template, find_with_SNR +from .plots import marks_plot def time(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): @@ -917,37 +918,33 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): ppg = pd.Series(ppg) elif not isinstance(ppg, pd.core.series.Series): raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.') - - cycles = find_with_template(ppg, sampling_frequency, factor=0.6667, verbose=verbose) + + cycles = find_with_SNR(ppg, sampling_frequency, factor=0.6667, verbose=verbose) + if len(cycles) == 0: + return pd.DataFrame() + # all signal peaks all_peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0] + if verbose: print('All Peaks: ', all_peaks) - if len(cycles) == 0: - return pd.DataFrame() - cur_index = 0 - time_features = pd.DataFrame() - for i, cycle in enumerate(cycles): - time_features = pd.concat([time_features, time_cycle(cycle, sampling_frequency, - factor, unit, verbose=verbose)], - ignore_index=True) + sys_peak_ts_curr = 0 + ibi_series = pd.Series() + for i, peak in enumerate(all_peaks): + sys_peak_ts_prior = sys_peak_ts_curr + sys_peak_ts_curr = ppg.index[peak] + if i > 0: - time_features.loc[cur_index-1, 'CP'] = ( - time_features.loc[cur_index, 'sys_peak_ts'] - time_features.loc[cur_index-1, 'sys_peak_ts']).total_seconds() - cur_index = cur_index + 1 - # last cycle or only cycle need to relies on the difference between the general peaks - all_peaks_index = len(all_peaks)-1 - time_features.loc[cur_index-1, 'CP'] = ( - all_peaks[all_peaks_index] - all_peaks[all_peaks_index-1])/sampling_frequency + ibi_series = ibi_series.append(pd.Series([sys_peak_ts_curr - sys_peak_ts_prior]), ignore_index=True) - if len(time_features['CP']) < 5: # TODO: Check for minimum size (Ultra short HRV) + if len(ibi_series) < 5: # TODO: Check for minimum size (Ultra short HRV) if verbose: print('PPG Segment too small! Please provide bigger PPG segment') return pd.DataFrame() - temporalHRVFeatures = _temporal_hrv(time_features['CP']) - frequencyHRVFeatures = _frequency_hrv(time_features['CP'], sampling_frequency) + temporalHRVFeatures = _temporal_hrv(ibi_series) + frequencyHRVFeatures = _frequency_hrv(ibi_series, sampling_frequency) segment_features = pd.concat([temporalHRVFeatures.reset_index(drop=True), frequencyHRVFeatures], axis=1) From f9f5eb0f6f773e8d60cb4a2c6d65db2f3a32003f Mon Sep 17 00:00:00 2001 From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com> Date: Tue, 7 Jun 2022 11:07:34 +0000 Subject: [PATCH 26/38] add sw features --- pypg/features.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pypg/features.py b/pypg/features.py index 097a3ff..aff4795 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -192,6 +192,7 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False): if verbose: plt.scatter([x_1, x_2], ppg[[x_1, x_2]]) cycle_features.loc[0, 'DW_'+str(p_value)] = (x_2 - sys_peak_ts).total_seconds() + cycle_features.loc[0, 'SW_'+str(p_value)] = (sys_peak_ts - x_1).total_seconds() cycle_features.loc[0, 'DW_SW_sum_'+str(p_value)] = (x_2 - x_1).total_seconds() cycle_features.loc[0, 'DW_SW_ratio_'+str(p_value)] = (x_2 - sys_peak_ts) / (sys_peak_ts - x_1) From 3fb109dcbd0728c14d331454add6d7b2c2e3e878 Mon Sep 17 00:00:00 2001 From: Florian Hermes Date: Tue, 7 Jun 2022 12:09:21 +0000 Subject: [PATCH 27/38] Change cutOff-Frequency --- pypg/features.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypg/features.py b/pypg/features.py index aff4795..25d52d1 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -792,7 +792,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) return cycle_features -def frequency(ppg, sampling_frequency, transformMethod='welch', cutoff_freq=125.0, interval_size=5, verbose=False): +def frequency(ppg, sampling_frequency, transformMethod='welch', cutoff_freq=12.5, interval_size=5, verbose=False): """ Extracts frequency features from PPG cycles in a give PPG segment. Returns a pandas.DataFrame in which each line contains the features for a given valid cycle (as described From 3688746a5c2e5037531142a22668cc33440cc7c9 Mon Sep 17 00:00:00 2001 From: Florian Hermes Date: Thu, 9 Jun 2022 11:46:40 +0000 Subject: [PATCH 28/38] adapt computation of time features --- pypg/features.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/pypg/features.py b/pypg/features.py index 25d52d1..2faf2e8 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -46,7 +46,7 @@ from .cycles import find_with_template, find_with_SNR from .plots import marks_plot - +# TODO: unit could be removed def time(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): """ Extracts time domain features from PPG cycles in a give PPG segment as @@ -102,9 +102,10 @@ def time(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): if i > 0: segment_features.loc[cur_index-1, 'CP'] = ( segment_features.loc[cur_index, 'sys_peak_ts'] - segment_features.loc[cur_index-1, - 'sys_peak_ts']).total_seconds() + 'sys_peak_ts'])/sampling_frequency cur_index = cur_index + 1 + # TODO: Remove CP because some cycles are skipped which leads to wrong CP values # last cycle or only cycle need to relies on the difference between the general peaks all_peaks_index = len(all_peaks)-1 segment_features.loc[cur_index-1, 'CP'] = ( @@ -122,6 +123,7 @@ def time(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): print(segment_features) return segment_features +# TODO: unit could be removed def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False): """ Extracts time domain features for a PPG cycle. Returns a pandas.Series with @@ -158,31 +160,30 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False): elif not isinstance(ppg, pd.core.series.Series): raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.') - if not isinstance(ppg.index, pd.DatetimeIndex): - ppg.index = pd.to_datetime(ppg.index, unit=unit) # TODO: @Ari: Sampling Frequncy considered??? + if isinstance(ppg.index, pd.RangeIndex): + ppg.index = list(ppg.index) - ppg = ppg.interpolate(method='time') + #ppg = ppg.interpolate(method='time') ppg = ppg - ppg.min() # TODO: @Ari: If we do it for each cycle seperately, features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values) max_amplitude = ppg.max() peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0] if len(peaks) == 0: return pd.DataFrame() - sys_peak_ts = ppg.index[peaks[0]] # TODO: @Ari: ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks e.g. check peak height dictionary?) + sys_peak_ts = ppg.index[peaks[0]] if verbose: plt.figure() - plt.xlim((ppg.index.min(), ppg.index.max())) - plt.scatter(ppg.index[peaks], ppg[peaks]) + plt.scatter(ppg.index[peaks[0]], ppg.values[peaks[0]]) plt.plot(ppg.index, ppg.values) - + # features cycle_features = pd.DataFrame({ 'start_ts': ppg.index.min(), 'sys_peak_ts': sys_peak_ts, - 'SUT': (sys_peak_ts - ppg.index.min()).total_seconds(), - 'DT': (ppg.index.max() - sys_peak_ts).total_seconds(), - 'CT': (ppg.index.max() - ppg.index.min()).total_seconds(), + 'SUT': (sys_peak_ts - ppg.index.min())/sampling_frequency, + 'DT': (ppg.index.max() - sys_peak_ts)/sampling_frequency, + 'CT': (ppg.index.max() - ppg.index.min())/sampling_frequency, 'SUT_VAL': (ppg.values[peaks[0]]) }, index=[0]) @@ -190,10 +191,10 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False): p_ampl = p_value / 100 * max_amplitude x_1, x_2 = _find_xs_for_y(ppg, p_ampl, peaks[0]) if verbose: - plt.scatter([x_1, x_2], ppg[[x_1, x_2]]) - cycle_features.loc[0, 'DW_'+str(p_value)] = (x_2 - sys_peak_ts).total_seconds() - cycle_features.loc[0, 'SW_'+str(p_value)] = (sys_peak_ts - x_1).total_seconds() - cycle_features.loc[0, 'DW_SW_sum_'+str(p_value)] = (x_2 - x_1).total_seconds() + plt.scatter([x_1, x_2], [ppg.where(ppg.index==x_1).dropna(), ppg.where(ppg.index==x_2).dropna()]) + cycle_features.loc[0, 'DW_'+str(p_value)] = (x_2 - sys_peak_ts)/sampling_frequency + cycle_features.loc[0, 'SW_'+str(p_value)] = (sys_peak_ts - x_1)/sampling_frequency + cycle_features.loc[0, 'DW_SW_sum_'+str(p_value)] = (x_2 - x_1)/sampling_frequency cycle_features.loc[0, 'DW_SW_ratio_'+str(p_value)] = (x_2 - sys_peak_ts) / (sys_peak_ts - x_1) if verbose: From 752dce5d82f63783762854a21eb98513958c5d58 Mon Sep 17 00:00:00 2001 From: Florian Hermes Date: Thu, 9 Jun 2022 12:07:38 +0000 Subject: [PATCH 29/38] adapt nonlinear: fs for time computation --- pypg/features.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pypg/features.py b/pypg/features.py index 2faf2e8..21b95d2 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -203,6 +203,7 @@ def time_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False): return cycle_features +# TODO: unit could be removed def nonlinear(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): """ Extracts non-linear features from PPG cycles in a give PPG segment. Returns a pandas.DataFrame in @@ -267,6 +268,7 @@ def nonlinear(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): return segment_features +# TODO: unit could be removed def nonlinear_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False): """ Extracts nonlinear features for a PPG cycle. Returns a pandas.Series. @@ -302,23 +304,22 @@ def nonlinear_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=Fa elif not isinstance(ppg, pd.core.series.Series): raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.') - if not isinstance(ppg.index, pd.DatetimeIndex): - ppg.index = pd.to_datetime(ppg.index, unit=unit) # TODO: @Ari: Sampling Frequncy considered??? + if isinstance(ppg.index, pd.RangeIndex): + ppg.index = list(ppg.index) - ppg = ppg.interpolate(method='time') + #ppg = ppg.interpolate(method='time') ppg = ppg - ppg.min() # TODO: @Ari: If we do it for each cycle seperately, features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values) - ppg_cycle_duration = (ppg.index.max() - ppg.index.min()).total_seconds() + ppg_cycle_duration = (ppg.index.max() - ppg.index.min())/sampling_frequency peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0] if len(peaks) == 0: return pd.DataFrame() - sys_peak_ts = ppg.index[peaks[0]] # TODO: @Ari: ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks e.g. check peak height dictionary?) - sys_peak = (sys_peak_ts - ppg.index.min()).total_seconds() + sys_peak_ts = ppg.index[peaks[0]] + sys_peak = (sys_peak_ts - ppg.index.min())/sampling_frequency if verbose: plt.figure() - plt.xlim((ppg.index.min(), ppg.index.max())) - plt.scatter(ppg.index[peaks], ppg[peaks]) + plt.scatter(ppg.index[peaks[0]], ppg.values[peaks[0]]) plt.plot(ppg.index, ppg.values) # features From 4f8e85009172f85d1b44756a8125c58037dff11d Mon Sep 17 00:00:00 2001 From: Florian Hermes Date: Thu, 9 Jun 2022 12:19:41 +0000 Subject: [PATCH 30/38] adapt statistical: fs for time computation --- pypg/features.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pypg/features.py b/pypg/features.py index 21b95d2..1640e98 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -336,6 +336,7 @@ def nonlinear_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=Fa return cycle_features +# TODO: unit could be removed def statistical(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): """ Extracts statistical features from PPG cycles in a give PPG segment. Returns a pandas.DataFrame in @@ -400,6 +401,7 @@ def statistical(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False return segment_features +# TODO: unit could be removed def statistical_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False): """ Extracts statistical features for a PPG cycle. Returns a pandas.Series. @@ -435,10 +437,10 @@ def statistical_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose= elif not isinstance(ppg, pd.core.series.Series): raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.') - if not isinstance(ppg.index, pd.DatetimeIndex): - ppg.index = pd.to_datetime(ppg.index, unit=unit) # TODO: @Ari: Sampling Frequncy considered??? - - ppg = ppg.interpolate(method='time') + if isinstance(ppg.index, pd.RangeIndex): + ppg.index = list(ppg.index) + + #ppg = ppg.interpolate(method='time') ppg = ppg - ppg.min() # TODO: @Ari: If we do it for each cycle seperately, features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values) peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0] @@ -447,8 +449,7 @@ def statistical_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose= if verbose: plt.figure() - plt.xlim((ppg.index.min(), ppg.index.max())) - plt.scatter(ppg.index[peaks], ppg[peaks]) + plt.scatter(ppg.index[peaks[0]], ppg.values[peaks[0]]) plt.plot(ppg.index, ppg.values) # features From 55c4d7e7c558c079d9bafe799089ebf74d6e3dfd Mon Sep 17 00:00:00 2001 From: Florian Hermes Date: Thu, 9 Jun 2022 12:40:49 +0000 Subject: [PATCH 31/38] adapt statistical: fs for time computation --- pypg/features.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pypg/features.py b/pypg/features.py index 1640e98..dee9678 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -466,6 +466,7 @@ def statistical_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose= return cycle_features +# TODO: unit could be removed def sdppg(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): """ Extracts features from the second derivative (SDPPG or APG) of the PPG cycles in a give PPG segment. Returns a pandas.DataFrame in @@ -530,6 +531,7 @@ def sdppg(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): return segment_features +# TODO: unit could be removed def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False): """ Extracts features from the second derivative (SDPPG or APG) for the PPG cycles. Returns a pandas.Series. @@ -565,22 +567,21 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) elif not isinstance(ppg, pd.core.series.Series): raise Exception('PPG values not accepted, enter a pandas.Series or ndarray.') - if not isinstance(ppg.index, pd.DatetimeIndex): - ppg.index = pd.to_datetime(ppg.index, unit=unit) # TODO: @Ari: Sampling Frequncy considered??? + if isinstance(ppg.index, pd.RangeIndex): + ppg.index = list(ppg.index) - ppg = ppg.interpolate(method='time') + #ppg = ppg.interpolate(method='time') ppg = ppg - ppg.min() # TODO: @Ari: If we do it for each cycle seperately, features based on ppg values might be wrong because offset will be different!!! (e.g. SUT_VAL or statistical values) peaks = signal.find_peaks(ppg.values, distance=factor*sampling_frequency)[0] if len(peaks) == 0: return pd.DataFrame() - sys_peak_ts = ppg.index[peaks[0]] # TODO: @Ari: ASSUMING SYS PEAK IS ALWAYS FIRST MAXIMA > clean signal assumption (maybe add checks e.g. check peak height dictionary?) - sys_peak = (sys_peak_ts - ppg.index.min()).total_seconds() + sys_peak_ts = ppg.index[peaks[0]] + sys_peak = (sys_peak_ts - ppg.index.min()) if verbose: plt.figure() - plt.xlim((ppg.index.min(), ppg.index.max())) - plt.scatter(ppg.index[peaks], ppg[peaks]) + plt.scatter(ppg.index[peaks[0]], ppg.values[peaks[0]]) plt.plot(ppg.index, ppg.values) # second derviative of the PPG signal From a80498c714e3cf6f13a442ea40ee9281ed7dfe2a Mon Sep 17 00:00:00 2001 From: Florian Hermes Date: Thu, 9 Jun 2022 12:50:06 +0000 Subject: [PATCH 32/38] adapt hrv: fs for time computation --- pypg/features.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pypg/features.py b/pypg/features.py index dee9678..b5f4171 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -941,12 +941,15 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): sys_peak_ts_curr = ppg.index[peak] if i > 0: - ibi_series = ibi_series.append(pd.Series([sys_peak_ts_curr - sys_peak_ts_prior]), ignore_index=True) + ibi_series = ibi_series.append(pd.Series([(sys_peak_ts_curr - sys_peak_ts_prior)/sampling_frequency]), ignore_index=True) if len(ibi_series) < 5: # TODO: Check for minimum size (Ultra short HRV) if verbose: print('PPG Segment too small! Please provide bigger PPG segment') return pd.DataFrame() + else: + if verbose: + print('IBI-Series: ', ibi_series.values) temporalHRVFeatures = _temporal_hrv(ibi_series) frequencyHRVFeatures = _frequency_hrv(ibi_series, sampling_frequency) From 6a73fb26c7e68ff9c6985442a02d942a941feecf Mon Sep 17 00:00:00 2001 From: Florian Hermes Date: Thu, 9 Jun 2022 12:50:37 +0000 Subject: [PATCH 33/38] Add TODO --- pypg/features.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pypg/features.py b/pypg/features.py index b5f4171..19f511d 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -888,6 +888,7 @@ def frequency(ppg, sampling_frequency, transformMethod='welch', cutoff_freq=12.5 return segment_features +# TODO: unit could be removed def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): """ Extracts hrv features from a given PPG segment. From 7d38d7f196973c6a39a63ee099121cc7fd7cdb06 Mon Sep 17 00:00:00 2001 From: Florian Hermes Date: Thu, 9 Jun 2022 22:20:35 +0000 Subject: [PATCH 34/38] Change interval size for frfrequency blocks --- pypg/features.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypg/features.py b/pypg/features.py index 19f511d..0dec31f 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -796,7 +796,7 @@ def sdppg_cycle(ppg, sampling_frequency, factor=0.667, unit='ms', verbose=False) return cycle_features -def frequency(ppg, sampling_frequency, transformMethod='welch', cutoff_freq=12.5, interval_size=5, verbose=False): +def frequency(ppg, sampling_frequency, transformMethod='welch', cutoff_freq=12.5, interval_size=0.5, verbose=False): """ Extracts frequency features from PPG cycles in a give PPG segment. Returns a pandas.DataFrame in which each line contains the features for a given valid cycle (as described From 321baf547b16f0bf52ba71563e5943942f8d634a Mon Sep 17 00:00:00 2001 From: Florian Hermes Date: Sat, 11 Jun 2022 11:02:37 +0000 Subject: [PATCH 35/38] Add SampEn check --- pypg/features.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pypg/features.py b/pypg/features.py index 0dec31f..7104a53 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -1023,10 +1023,14 @@ def _temporal_hrv(ibi_series): # Non-Linear Parameters tolerance = ibi_series.std() * 0.2 + + SampEn = float(nolds.sampen(ibi_series.to_numpy(), 2, tolerance)) + if SampEn == np.inf: + SampEn = np.nan # features temporalHRVFeatures = pd.DataFrame({ - 'SampEn': float(nolds.sampen(ibi_series.to_numpy(), 2, tolerance)), + 'SampEn': SampEn, 'MeanNN': ibi_series.mean(), 'MeanHR': instantaneous_hr.mean(), 'MaxHR': rolling_mean_hr.max(), From f4ec044f4572f802243db5baeb26b97e247e0eef Mon Sep 17 00:00:00 2001 From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com> Date: Sat, 18 Jun 2022 13:31:28 +0000 Subject: [PATCH 36/38] add CP mean and var; remove CP from time features --- pypg/features.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/pypg/features.py b/pypg/features.py index 7104a53..c4ab034 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -99,7 +99,7 @@ def time(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): segment_features = pd.concat([segment_features, time_cycle(cycle, sampling_frequency, factor, unit, verbose=verbose)], ignore_index=True) - if i > 0: + '''if i > 0: segment_features.loc[cur_index-1, 'CP'] = ( segment_features.loc[cur_index, 'sys_peak_ts'] - segment_features.loc[cur_index-1, 'sys_peak_ts'])/sampling_frequency @@ -109,7 +109,7 @@ def time(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): # last cycle or only cycle need to relies on the difference between the general peaks all_peaks_index = len(all_peaks)-1 segment_features.loc[cur_index-1, 'CP'] = ( - all_peaks[all_peaks_index] - all_peaks[all_peaks_index-1])/sampling_frequency + all_peaks[all_peaks_index] - all_peaks[all_peaks_index-1])/sampling_frequency''' if verbose: print('Cycle Features within Segment:') @@ -936,27 +936,28 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): print('All Peaks: ', all_peaks) sys_peak_ts_curr = 0 - ibi_series = pd.Series() + CP = pd.Series() for i, peak in enumerate(all_peaks): sys_peak_ts_prior = sys_peak_ts_curr sys_peak_ts_curr = ppg.index[peak] if i > 0: - ibi_series = ibi_series.append(pd.Series([(sys_peak_ts_curr - sys_peak_ts_prior)/sampling_frequency]), ignore_index=True) + CP = CP.append(pd.Series([(sys_peak_ts_curr - sys_peak_ts_prior)/sampling_frequency]), ignore_index=True) - if len(ibi_series) < 5: # TODO: Check for minimum size (Ultra short HRV) + if len(CP) < 5: # TODO: Check for minimum size (Ultra short HRV) if verbose: print('PPG Segment too small! Please provide bigger PPG segment') return pd.DataFrame() else: if verbose: - print('IBI-Series: ', ibi_series.values) + print('IBI-Series: ', CP.values) - temporalHRVFeatures = _temporal_hrv(ibi_series) - frequencyHRVFeatures = _frequency_hrv(ibi_series, sampling_frequency) + temporalHRVFeatures = _temporal_hrv(CP) + frequencyHRVFeatures = _frequency_hrv(CP, sampling_frequency) segment_features = pd.concat([temporalHRVFeatures.reset_index(drop=True), frequencyHRVFeatures], axis=1) - + segment_features = pd.concat([segment_features, pd.DataFrame({'CP_mean': CP.mean(), 'CP_var': CP.var()})]) + if verbose: print('Cycle Features within Segment:') print(segment_features) From 514163088a94eb3a5718e41a4fd12f742ea12ce6 Mon Sep 17 00:00:00 2001 From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com> Date: Sat, 18 Jun 2022 17:34:45 +0000 Subject: [PATCH 37/38] fix concatinating error --- pypg/features.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypg/features.py b/pypg/features.py index c4ab034..7b31766 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -956,7 +956,7 @@ def hrv(ppg, sampling_frequency, factor=0.6667, unit='ms', verbose=False): frequencyHRVFeatures = _frequency_hrv(CP, sampling_frequency) segment_features = pd.concat([temporalHRVFeatures.reset_index(drop=True), frequencyHRVFeatures], axis=1) - segment_features = pd.concat([segment_features, pd.DataFrame({'CP_mean': CP.mean(), 'CP_var': CP.var()})]) + segment_features = pd.concat([segment_features.reset_index(drop=True), pd.DataFrame({'CP_mean': CP.mean(), 'CP_var': CP.var()}, index=[0])], axis=1) if verbose: print('Cycle Features within Segment:') From 16eb9093ba163fd93dfc0157eff0b4ce9368e88e Mon Sep 17 00:00:00 2001 From: Florian Hermes <43293808+GoWithTheFlo95@users.noreply.github.com> Date: Sat, 18 Jun 2022 17:50:42 +0000 Subject: [PATCH 38/38] Adapt computation of HR --- pypg/features.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypg/features.py b/pypg/features.py index 7b31766..90e7118 100644 --- a/pypg/features.py +++ b/pypg/features.py @@ -977,7 +977,7 @@ def _temporal_hrv(ibi_series): nn_threshold = 50 # TODO: @Ari: HOW TO SET THIS VALUE? > IBI_SERIES VALUES around 0.88 ish. Affect computation of nn_xx /// was 50 before # Prepare data - instantaneous_hr = 60 / (ibi_series / 1000) # TODO: @Ari: why divided by 1000? from ms to s? + instantaneous_hr = 60 / ibi_series #/1000 # TODO: @Ari: why divided by 1000? from ms to s? > if ibi series in ms but for MIMIC it is in s rolling_mean_hr = instantaneous_hr.rolling(window).mean() rolling_24h = ibi_series.rolling(window)