Skip to content

Commit

Permalink
Merge pull request #17 from MD2Korg/3.0
Browse files Browse the repository at this point in the history
Bug fixes and added graphic feature
  • Loading branch information
nasirali1 committed Aug 8, 2019
2 parents 4cf168e + 6a4f21f commit da3bdd1
Show file tree
Hide file tree
Showing 35 changed files with 2,085 additions and 275 deletions.
8 changes: 7 additions & 1 deletion cerebralcortex/algorithms/__init__.py
@@ -1,2 +1,8 @@
from cerebralcortex.algorithms.gps import gps_clusters
__all__ = ["gps_clusters"]
from cerebralcortex.algorithms.ecg.ecg_signal_processing import process_ecg
from cerebralcortex.algorithms.stress_prediction.stress_prediction import stress_prediction
from cerebralcortex.algorithms.stress_prediction.stress_episodes import stress_episodes_estimation
from cerebralcortex.algorithms.rr_intervals.rr_interval_feature_extraction import rr_interval_feature_extraction

__all__ = ["gps_clusters","process_ecg", "rr_interval_feature_extraction", "stress_prediction", "stress_episodes_estimation"]

2 changes: 2 additions & 0 deletions cerebralcortex/algorithms/ecg/__init__.py
@@ -0,0 +1,2 @@
from cerebralcortex.algorithms.ecg.ecg_signal_processing import process_ecg
__all__ = ["process_ecg"]
460 changes: 460 additions & 0 deletions cerebralcortex/algorithms/ecg/ecg_signal_processing.py

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions cerebralcortex/algorithms/gps/gps_clustering.py
Expand Up @@ -32,14 +32,14 @@
from pyspark.sql.types import StructField, StructType, StringType, FloatType


EPSILON_CONSTANT = 1000
EPSILON_CONSTANT = 1000/100.0
LATITUDE = 0
LONGITUDE = 1
ACCURACY = -1
GPS_ACCURACY_THRESHOLD = 41.0
KM_PER_RADIAN = 6371.0088
GEO_FENCE_DISTANCE = 2
MINIMUM_POINTS_IN_CLUSTER = 500
MINIMUM_POINTS_IN_CLUSTER = 50

def get_centermost_point(cluster: object) -> object:
"""
Expand Down
Empty file.
@@ -0,0 +1,218 @@
# Copyright (c) 2017, MD2K Center of Excellence
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from typing import List
import numpy as np
from scipy import signal
from scipy.stats import iqr
from scipy.stats.mstats_basic import winsorize
from enum import Enum
from pyspark.sql.types import StructField, StructType, StringType, FloatType, TimestampType, ArrayType
from pyspark.sql.functions import pandas_udf, PandasUDFType
import pandas as pd
import datetime

def lomb(time_stamps:List,
samples:List,
low_frequency: float,
high_frequency: float):
"""
: Lomb–Scargle periodogram implementation
:param data: List[DataPoint]
:param high_frequency: float
:param low_frequency: float
:return lomb-scargle pgram and frequency values
"""

frequency_range = np.linspace(low_frequency, high_frequency, len(time_stamps))
result = signal.lombscargle(time_stamps, samples, frequency_range)
return result, frequency_range


def heart_rate_power(power: np.ndarray,
frequency: np.ndarray,
low_rate: float,
high_rate: float):
"""
Compute Heart Rate Power for specific frequency range
:param power: np.ndarray
:param frequency: np.ndarray
:param high_rate: float
:param low_rate: float
:return: sum of power for the frequency range
"""
result_power = float(0.0)
for i, value in enumerate(power):
if low_rate <= frequency[i] <= high_rate:
result_power += value
return result_power



def rr_feature_computation(timestamp:list,
value:list,
low_frequency: float = 0.01,
high_frequency: float = 0.7,
low_rate_vlf: float = 0.0009,
high_rate_vlf: float = 0.04,
low_rate_hf: float = 0.15,
high_rate_hf: float = 0.4,
low_rate_lf: float = 0.04,
high_rate_lf: float = 0.15):
"""
ECG Feature Implementation. The frequency ranges for High, Low and Very low heart rate variability values are
derived from the following paper:
'Heart rate variability: standards of measurement, physiological interpretation and clinical use'
:param high_rate_lf: float
:param low_rate_lf: float
:param high_rate_hf: float
:param low_rate_hf: float
:param high_rate_vlf: float
:param low_rate_vlf: float
:param high_frequency: float
:param low_frequency: float
:param datastream: DataStream
:param window_size: float
:param window_offset: float
:return: ECG Feature DataStreams
"""


# perform windowing of datastream


# initialize each ecg feature array

rr_variance_data = []
rr_mean_data = []
rr_median_data = []
rr_80percentile_data = []
rr_20percentile_data = []
rr_quartile_deviation_data = []
rr_HF_data = []
rr_LF_data = []
rr_VLF_data = []
rr_LF_HF_data = []
rr_heart_rate_data = []

# iterate over each window and calculate features


reference_data = value

rr_variance_data.append(np.var(reference_data))

power, frequency = lomb(time_stamps=timestamp,samples=value,low_frequency=low_frequency, high_frequency=high_frequency)

rr_VLF_data.append(heart_rate_power(power, frequency, low_rate_vlf, high_rate_vlf))

rr_HF_data.append(heart_rate_power(power, frequency, low_rate_hf, high_rate_hf))

rr_LF_data.append(heart_rate_power(power,frequency,low_rate_lf,high_rate_lf))

if heart_rate_power(power, frequency, low_rate_hf, high_rate_hf) != 0:
lf_hf = float(heart_rate_power(power, frequency, low_rate_lf, high_rate_lf) / heart_rate_power(power,
frequency,
low_rate_hf,
high_rate_hf))
rr_LF_HF_data.append(lf_hf)
else:
rr_LF_HF_data.append(0)

rr_mean_data.append(np.mean(reference_data))
rr_median_data.append(np.median(reference_data))
rr_quartile_deviation_data.append((0.5*(np.percentile(reference_data, 75) - np.percentile(reference_data,25))))
rr_heart_rate_data.append(np.median(60000/reference_data))

return [rr_variance_data[0], rr_VLF_data[0], rr_HF_data[0], rr_LF_data[0], rr_LF_HF_data[0],\
rr_mean_data[0], rr_median_data[0], rr_quartile_deviation_data[0], rr_heart_rate_data[0],\
np.percentile(value,80),np.percentile(value,20)]


def get_windows(data):
window_col,ts_col = [],[]
rr_interval = data.sort_values(by=['timestamp'])
st = (rr_interval['timestamp'].values[0].astype('int64')/1e9)*1000
et = (rr_interval['timestamp'].values[-1].astype('int64')/1e9)*1000
ts_array = np.arange(st,et,60000)

x = [(i.astype('int64')/1e9)*1000 for i in rr_interval['timestamp'].values]
y = [i for i in rr_interval['rr_interval'].values]
#tmp_ts = np.array(x)
#tmp_rri = np.array(y)

tmp_rri = np.zeros((len(x),2))
for c in range(len(x)):
tmp_rri[c][0] = x[c]
tmp_rri[c][1] = y[c]

for t in ts_array:
#index = np.where((tmp_ts >= t) & (tmp_ts <= t+60000))[0]
index = np.where((tmp_rri[:,0]>=t)&(tmp_rri[:,0]<=t+60000))[0]

if len(index)>100 or len(index)<20:
continue

window_col.append(tmp_rri[index,:])
#window_col.append([[tmp_ts[i], tmp_rri[i]] for i in index])
ts_col.append(t)
return window_col,ts_col

def combine_data(window_col):
feature_matrix = np.zeros((0,11))
for i,item in enumerate(window_col):
feature = rr_feature_computation(item[:,0],item[:,1])
feature_matrix = np.concatenate((feature_matrix,np.array(feature).reshape(-1,11)))
return feature_matrix



schema = StructType([
StructField("user", StringType()),
StructField("timestamp", TimestampType()),
StructField("rr_feature", ArrayType(FloatType())),
])


@pandas_udf(schema, PandasUDFType.GROUPED_MAP)
def rr_interval_feature_extraction(data: object) -> object:
winsor_limit = 0.1 #FIXME - this must be passed or configurable

mean = data['rr_interval'].mean()
std = data['rr_interval'].std()

data['rr_interval'] = (data['rr_interval'] - mean)/std

window_col, ts_col = get_windows(data)
X = combine_data(window_col)
for k in range(X.shape[1]):
X[:,k] = winsorize(X[:,k],limits=[winsor_limit,winsor_limit])


df = pd.DataFrame(index = np.arange(0, len(ts_col)), columns=['user', 'timestamp', 'rr_feature'])
user = data['user'].values[0]
for c in range(len(ts_col)):
df.loc[c] = [user, np.datetime64(int(ts_col[c]), 'ms'), X[c]]

return df

Empty file.

0 comments on commit da3bdd1

Please sign in to comment.