Skip to content

Commit 0aaa370

Browse files
committed
Versioning + Minor adaptions
1 parent 5548e67 commit 0aaa370

File tree

6 files changed

+246
-125
lines changed

6 files changed

+246
-125
lines changed

requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,5 @@ neurokit2
44
openpyxl
55
pandas
66
matlabengine
7+
tensorflow
8+
wfdb

src/icentia11k/icentia11k_dataset_builder.py

Lines changed: 36 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,12 @@
1111
project_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
1212
sys.path.append(project_path)
1313

14-
1514
class Builder(tfds.core.GeneratorBasedBuilder):
1615
"""DatasetBuilder for icentia11k dataset."""
1716

18-
VERSION = tfds.core.Version('1.0.0')
17+
VERSION = tfds.core.Version('1.0.3')
1918
RELEASE_NOTES = {
20-
'1.0.0': 'Initial release.',
19+
'1.0.3': 'Initial release.',
2120
}
2221

2322
def _info(self) -> tfds.core.DatasetInfo:
@@ -44,41 +43,45 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager):
4443

4544
# TODO: Adjust the path to your needs - due to the size of the dataset (> 1 TB unzipped) no automatic
4645
# download is provided
47-
path = './data/'
48-
for k in range(0, 11000):
46+
path = '/mnt/sdb/icentia11k-single-lead-continuous-raw-electrocardiogram-dataset-1.0/'
47+
np.random.seed(42)
48+
ids = np.random.choice(range(11000), 100, replace=False)
49+
for k in ids:
4950
result.update({str(k): self._generate_examples(path, k)})
5051

5152
return result
5253

5354
def _generate_examples(self, path, subject):
5455

5556
preprocessor = Preprocess(125, 125, peak='R', final_length=500)
56-
5757
for segment in range(0, 50):
58-
t = segment // 1000
59-
filename = path + 'p' + f"{t :02d}" + '/p' + f"{subject :05d}" + '/p' + f"{subject :05d}" + '_s' + f"{segment :02d}"
60-
rec = wfdb.rdrecord(filename)
61-
ann = wfdb.rdann(filename, "atr")
62-
63-
ann.symbol = np.array(ann.symbol)
64-
ann.sample = np.array(ann.sample)
65-
66-
signal = rec.p_signal.flatten()
67-
rpeaks = pd.DataFrame(ann.sample[ann.symbol != '+'], columns=['ECG_R_Peaks'])
68-
labels = ann.symbol[ann.symbol != '+']
69-
70-
ecg_clean, q, ind = preprocessor.preprocess(data=signal, sampling_rate=ann.fs, rpeaks=rpeaks)
71-
labels = labels[ind]
72-
73-
for i, k in enumerate(ecg_clean[:, :, 0]):
74-
yield str(subject) + '_' + str(segment) + '_' + str(i), {
75-
'ecg': {
76-
'I': k,
77-
},
78-
'patient': int(subject),
79-
'segment': int(segment),
80-
# TODO: the rhythm information is now statically set to N
81-
'rhythm': 'N',
82-
'beat': labels[i],
83-
'quality': q[i],
84-
}
58+
try:
59+
t = subject // 1000
60+
filename = path + 'p' + f"{t :02d}" + '/p' + f"{subject :05d}" + '/p' + f"{subject :05d}" + '_s' + f"{segment :02d}"
61+
rec = wfdb.rdrecord(filename)
62+
ann = wfdb.rdann(filename, "atr")
63+
64+
ann.symbol = np.array(ann.symbol)
65+
ann.sample = np.array(ann.sample)
66+
67+
signal = rec.p_signal.flatten()
68+
rpeaks = pd.DataFrame(ann.sample[ann.symbol != '+'], columns=['ECG_R_Peaks'])
69+
labels = ann.symbol[ann.symbol != '+']
70+
71+
ecg_clean, q, ind = preprocessor.preprocess(data=signal, sampling_rate=ann.fs, rpeaks=rpeaks)
72+
labels = labels[ind]
73+
74+
for i, k in enumerate(ecg_clean[:, :, 0]):
75+
yield str(subject) + '_' + str(segment) + '_' + str(i), {
76+
'ecg': {
77+
'I': k,
78+
},
79+
'patient': int(subject),
80+
'segment': int(segment),
81+
# TODO: the rhythm information is now statically set to N
82+
'rhythm': 'N',
83+
'beat': labels[i],
84+
'quality': q[i],
85+
}
86+
except:
87+
continue

src/medalcare/medalcare_dataset_builder.py

Lines changed: 43 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@
1010
class Builder(tfds.core.GeneratorBasedBuilder):
1111
"""DatasetBuilder for medalcare dataset."""
1212

13-
VERSION = tfds.core.Version('1.0.0')
13+
VERSION = tfds.core.Version('1.0.4')
1414
RELEASE_NOTES = {
15-
'1.0.0': 'Initial release.',
15+
'1.0.4': 'Initial release.',
1616
}
1717

1818
def _info(self) -> tfds.core.DatasetInfo:
@@ -23,6 +23,8 @@ def _info(self) -> tfds.core.DatasetInfo:
2323
'ecg': tfds.features.Sequence({
2424
'I': np.float64,
2525
}),
26+
'subject': np.int32,
27+
'diagnosis': tfds.features.ClassLabel(names=['avblock', 'fam', 'iab', 'lae', 'lbbb', 'mi', 'rbbb', 'sinus']),
2628
'v_G.lungs': np.float64,
2729
'v_G.ischemia': np.float64,
2830
'v_G.torso': np.float64,
@@ -191,7 +193,7 @@ def _generate_examples(self, path, mode):
191193
files = glob.glob(path + '/*.txt', recursive=True)
192194
preprocessor = Preprocess(250, 250, peak='R', final_length=500)
193195

194-
for k in list(set([item[:item.rfind('_')] for item in files])):
196+
for subject, k in enumerate(list(set([item[:item.rfind('_')] for item in files]))):
195197
try:
196198
atrial = pd.read_csv(k + '_AtrialParameters.txt', sep=' ', skiprows=2).ffill(axis=1)
197199
ventricular = pd.read_csv(k + '_VentricularParameters.txt', sep=' ').ffill(axis=1)
@@ -201,39 +203,46 @@ def _generate_examples(self, path, mode):
201203
data_prep, q, ind = preprocessor.preprocess(data=signal, sampling_rate=500)
202204
atrial.iloc[:, -1] = atrial.iloc[:, -1].apply(
203205
lambda x: x.replace('mm/s', '').replace('deg', '').replace('mm', ''))
204-
dict = {
205-
'ecg': {
206-
'I': np.median(data_prep, axis=0).flatten(),
207-
},
208-
}
209206

210-
for j in allowed:
211-
dict.update({j: 0.0})
212-
213-
for j, val in ventricular.iterrows():
214-
ke = 'v_' + val.iloc[0]
215-
216-
if ke in allowed:
207+
result = [part for part in k.split("/") if part]
208+
print(result[10])
209+
210+
for i, t in enumerate(data_prep):
211+
dict = {
212+
'ecg': {
213+
'I': t.flatten(),
214+
},
215+
'subject': subject,
216+
'diagnosis': result[10] #TODO: 10 is set statically --> needs to be adapt
217+
}
218+
219+
for j in allowed:
220+
dict.update({j: 0.0})
221+
222+
for j, val in ventricular.iterrows():
223+
ke = 'v_' + val.iloc[0]
224+
225+
if ke in allowed:
226+
va = 0.0
227+
try:
228+
va = np.float64(val.iloc[-1])
229+
except:
230+
if (val.iloc[-1] == 'True') | (val.iloc[-1] == 'False'):
231+
va = val.iloc[-1]
232+
dict.update({ke: va})
233+
234+
for j, val in atrial.iterrows():
235+
ke = 'a_' + val.iloc[0]
217236
va = 0.0
218-
try:
219-
va = np.float64(val.iloc[-1])
220-
except:
221-
if (val.iloc[-1] == 'True') | (val.iloc[-1] == 'False'):
222-
va = val.iloc[-1]
223-
dict.update({ke: va})
224-
225-
for j, val in atrial.iterrows():
226-
ke = 'a_' + val.iloc[0]
227-
va = 0.0
228-
if ke in allowed:
229-
try:
230-
va = np.float64(val.iloc[-1])
231-
except:
232-
if (val.iloc[-1] == 'True') | (val.iloc[-1] == 'False'):
233-
va = val.iloc[-1]
234-
dict.update({ke: va})
235-
236-
yield k, dict
237+
if ke in allowed:
238+
try:
239+
va = np.float64(val.iloc[-1])
240+
except:
241+
if (val.iloc[-1] == 'True') | (val.iloc[-1] == 'False'):
242+
va = val.iloc[-1]
243+
dict.update({ke: va})
244+
245+
yield k + '_' + str(i), dict
237246

238247
except Exception as e:
239248
print(e)

src/ptb/ptb_dataset_builder.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@
1616
class Builder(tfds.core.GeneratorBasedBuilder):
1717
"""DatasetBuilder for ptb dataset."""
1818

19-
VERSION = tfds.core.Version('1.0.0')
19+
VERSION = tfds.core.Version('1.0.3')
2020
RELEASE_NOTES = {
21-
'1.0.0': 'Initial release.',
21+
'1.0.3': 'Initial release.',
2222
}
2323

2424
def _info(self) -> tfds.core.DatasetInfo:
@@ -28,6 +28,7 @@ def _info(self) -> tfds.core.DatasetInfo:
2828
'ecg': tfds.features.Sequence({
2929
'I': np.float64,
3030
}),
31+
'subject': np.int32,
3132
'quality': tfds.features.ClassLabel(names=['Unacceptable', 'Barely acceptable', 'Excellent', '[]']),
3233
'age': np.uint8,
3334
'gender': np.uint8,
@@ -67,15 +68,16 @@ def _generate_examples(self, path):
6768
data = wfdb.rdsamp(str(path) + '/' + row['filename_hr'])[0][:, 0]
6869
data_prep, q, ind = preprocessor.preprocess(data=data, sampling_rate=500)
6970

70-
#for j, k in enumerate(data_prep):
71-
key = str(row['patient_id']) + "_" + str(index)
72-
diagnostic = "NAV" if len(row['diagnostic_superclass']) == 0 else row['diagnostic_superclass'][0]
73-
yield key, {
74-
'ecg': {
75-
'I':np.median(data_prep, axis=0).flatten(),
76-
},
77-
'quality': str(q[0]),
78-
'age': row['age'],
79-
'gender': row['sex'],
80-
'diagnostic': diagnostic,
81-
}
71+
for j, k in enumerate(data_prep):
72+
key = str(row['patient_id']) + "_" + str(index) + "_" + str(j)
73+
diagnostic = "NAV" if len(row['diagnostic_superclass']) == 0 else row['diagnostic_superclass'][0]
74+
yield key, {
75+
'ecg': {
76+
'I': k.flatten(),
77+
},
78+
'subject': index,
79+
'quality': str(q[0]),
80+
'age': row['age'],
81+
'gender': row['sex'],
82+
'diagnostic': diagnostic,
83+
}

0 commit comments

Comments
 (0)