-
Notifications
You must be signed in to change notification settings - Fork 14
/
data_prepare.py
104 lines (72 loc) · 5.54 KB
/
data_prepare.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
from __future__ import absolute_import, division
from tensorflow.python.framework import dtypes
from tensorflow.contrib.learn.python.learn.datasets import base
import scipy.io as sio
import numpy as np
def load_data(image_file, label_file):
image_data = sio.loadmat(image_file)
label_data = sio.loadmat(label_file)
image = image_data['Kennedy176']
label = label_data['KSC_gt']
image = image.astype(np.float32)
image = (image - np.min(image)) / (np.max(image) - np.min(image))
return image, label
def one_hot_transform(x, length):
ont_hot_array = np.zeros([1, length])
ont_hot_array[0, int(x)-1] = 1
return ont_hot_array
def readdata(image_file, label_file, train_nsamples=600, validation_nsamples=300,
windowsize=7, istraining=True, shuffle_number=None, batchnumber=5000, times=0):
image, label = load_data(image_file, label_file)
shape = np.shape(image)
halfsize = int((windowsize - 1) / 2)
number_class = np.max(label)
Mask = np.zeros([shape[0], shape[1]])
Mask[halfsize:shape[0] - halfsize, halfsize:shape[1] - halfsize] = 1
label = label * Mask
not_zero_raw, not_zero_col = label.nonzero()
number_samples = len(not_zero_raw)
test_nsamples = number_samples - train_nsamples - validation_nsamples
if train_nsamples + validation_nsamples >= number_samples:
raise ValueError('train_nsamples + validation_nsamples bigger than total samples')
if istraining:
shuffle_number = np.arange(number_samples)
np.random.shuffle(shuffle_number)
train_image = np.zeros([train_nsamples, windowsize, windowsize, shape[2]], dtype=np.float32)
validation_image = np.zeros([validation_nsamples, windowsize, windowsize, shape[2]], dtype=np.float32)
train_label = np.zeros([train_nsamples, number_class], dtype=np.uint8)
validation_label = np.zeros([validation_nsamples, number_class], dtype=np.uint8)
for i in range(train_nsamples):
train_image[i, :, :, :] = image[(not_zero_raw[shuffle_number[i]] - halfsize):(not_zero_raw[shuffle_number[i]] + halfsize + 1),
(not_zero_col[shuffle_number[i]] - halfsize):(not_zero_col[shuffle_number[i]] + halfsize + 1), :]
train_label[i, :] = one_hot_transform(label[not_zero_raw[shuffle_number[i]],
not_zero_col[shuffle_number[i]]], number_class)
for i in range(validation_nsamples):
validation_image[i, :, :, :] = image[(not_zero_raw[shuffle_number[i+train_nsamples]] - halfsize):(not_zero_raw[shuffle_number[i+train_nsamples]] + halfsize + 1),
(not_zero_col[shuffle_number[i+train_nsamples]] - halfsize):(not_zero_col[shuffle_number[i+train_nsamples]] + halfsize + 1), :]
validation_label[i, :] = one_hot_transform(label[not_zero_raw[shuffle_number[i+train_nsamples]],
not_zero_col[shuffle_number[i+train_nsamples]]], number_class)
return [train_image, train_label, validation_image, validation_label], shuffle_number
else:
n_batch = test_nsamples // batchnumber
if times > n_batch:
return None
if n_batch == times:
batchnumber_test = test_nsamples - n_batch * batchnumber
test_image = np.zeros([batchnumber_test, windowsize, windowsize, shape[2]], dtype=np.float32)
test_label = np.zeros([batchnumber_test, number_class], dtype=np.uint8)
for i in range(batchnumber_test):
test_image[i, :, :, :] = image[(not_zero_raw[shuffle_number[batchnumber*times+i+train_nsamples+validation_nsamples]] - halfsize):(not_zero_raw[shuffle_number[batchnumber*times+i+train_nsamples+validation_nsamples]] + halfsize + 1),
(not_zero_col[shuffle_number[batchnumber*times+i+train_nsamples+validation_nsamples]] - halfsize):(not_zero_col[shuffle_number[batchnumber*times+i+train_nsamples+validation_nsamples]] + halfsize + 1), :]
test_label[i, :] = one_hot_transform(label[not_zero_raw[shuffle_number[batchnumber*times+i+train_nsamples+validation_nsamples]],
not_zero_col[shuffle_number[batchnumber*times+i+train_nsamples+validation_nsamples]]], number_class)
return [test_image, test_label]
if times < n_batch:
test_image = np.zeros([batchnumber, windowsize, windowsize, shape[2]], dtype=np.float32)
test_label = np.zeros([batchnumber, number_class], dtype=np.uint8)
for i in range(batchnumber):
test_image[i, :, :, :] = image[(not_zero_raw[shuffle_number[batchnumber*times+i+train_nsamples+validation_nsamples]] - halfsize):(not_zero_raw[shuffle_number[batchnumber*times+i+train_nsamples+validation_nsamples]] + halfsize + 1),
(not_zero_col[shuffle_number[batchnumber*times+i+train_nsamples+validation_nsamples]] - halfsize):(not_zero_col[shuffle_number[batchnumber*times+i+train_nsamples+validation_nsamples]] + halfsize + 1), :]
test_label[i, :] = one_hot_transform(label[not_zero_raw[shuffle_number[batchnumber*times+i+train_nsamples+validation_nsamples]],
not_zero_col[shuffle_number[batchnumber*times+i+train_nsamples+validation_nsamples]]], number_class)
return [test_image, test_label]