/
cnn_load_data.py
181 lines (156 loc) · 7.7 KB
/
cnn_load_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
"""Importing the essential packages"""
import scipy.io as sio
import numpy as np
import math
def load_data(common_param):
"""Defining the data set , ground truth file and header names"""
data_set_file_name = "PaviaU"
ground_truth_file_name = "PaviaU_gt"
data_set_header_required = "paviaU"
ground_truth_set_header_required = "paviaU_gt"
"""Loading the Dataset values"""
data_set = sio.loadmat(data_set_file_name)
"""Loading the Ground Truth Values"""
ground_truth_set = sio.loadmat(ground_truth_file_name)
"""Printing the values in the data_set
for key,value in data_set.items():
print (key)
array = np.array(value)
print (array.shape)
#print ("Length : " + str(len(value)))
print (value)
print (type(value))
"""
""" -> Deleting the items (eg. __header__, __globals__,__version__)
-> Dictionary is changed to a numpy array"""
data_set = data_set[data_set_header_required]
ground_truth_set = ground_truth_set[ground_truth_set_header_required]
data_set_shape_tuple = data_set.shape
ground_truth_shape_tuple = ground_truth_set.shape
"""Initializing parameters"""
common_param.x_axis = ground_truth_shape_tuple[0]
common_param.y_axis = ground_truth_shape_tuple[1]
total_sample_count = data_set_shape_tuple[0]*data_set_shape_tuple[1]
training_sample_count = common_param.no_of_classes*common_param.count_of_each_class
test_sample_count = total_sample_count
"""Reshaping the data_set
data_set_reshaped_set = data_set.reshape(data_set_shape_tuple[0]*data_set_shape_tuple[1],data_set_shape_tuple[2])
Reshaping the ground_truth_set
ground_truth_reshaped_set = ground_truth_set.reshape(ground_truth_shape_tuple[0]*ground_truth_shape_tuple[1])
"""
"""Creating the training, validation and test sets"""
training_set = np.ndarray(shape = (training_sample_count,data_set_shape_tuple[2]))
validation_set = np.ndarray(shape = (training_sample_count,1))
test_set = np.ndarray(shape = (test_sample_count,data_set_shape_tuple[2]))
test_truth_set = np.ndarray(shape = (test_sample_count,1))
count_set = np.zeros((1,common_param.no_of_classes+1))
"""Finding the class count
for row in range(0,ground_truth_set.shape[0]):
for column in range(0,ground_truth_set.shape[1]):
count_set[0][ground_truth_set[row][column]] += 1
print (count_set)
"""
"""Finding the class count --> Wrong answer
for row in range(0,ground_truth_set.shape[0]):
count_set[0][ground_truth_reshaped_set[row]] += 1
print (count_set)
"""
"""Scaling the reflectance values to -1.0 to +1.0"""
"""Finding the high and low values for scaling"""
#max_reflectance_val = float(np.amax(data_set))
#min_reflectance_val = float(np.amin(data_set))
#print (max_reflectance_val)
#print (min_reflectance_val)
"""Actual scaling
data_set = np.float64(data_set)
mean = np.mean(data_set)
standard_deviation = np.std(data_set)
for i in range(0,data_set_shape_tuple[0]):
for j in range(0,data_set_shape_tuple[1]):
for k in range(0,data_set_shape_tuple[2]):
Scaling Formula 1
Formula A = (A / max)*2 - 1
data_set[i][j][k] = (data_set[i][j][k] / max_reflectance_val) * 2.0 - 1.0
Scaling Formula 2
Formula (b-a) + ((x - minx)/(maxx - minx)) + a
data_set[i][j][k] = 2*((data_set[i][j][k] - min_reflectance_val)/(max_reflectance_val - min_reflectance_val)) - 1
Normalization Formula 3 (Min - max normalization)
Xnormalized = (Xcurrent - ((Xmax + Xmin)/2))/((Xmax - Xmin)/2)
data_set[i][j][k] = ((data_set[i][j][k] - ((max_reflectance_val + min_reflectance_val)/2))/((max_reflectance_val - min_reflectance_val)/2))
Guassian normalization
Xnormalized = Xcurrent - mean / standard deviation
data_set[i][j][k] = (data_set[i][j][k] - mean)/standard_deviation
print (data_set[i][j][k])
#print ("Mean : ", (sum_value / (data_set_shape_tuple[0]*data_set_shape_tuple[1]*data_set_shape_tuple[2])))
print ("Mean : ", np.mean(data_set))
print ("std : ",np.std(data_set))
"""
"""Normalizing the pixels locally within the band values"""
data_set = np.float64(data_set)
for i in range(0,data_set_shape_tuple[0]):
for j in range(0,data_set_shape_tuple[1]):
temp = data_set[i][j]
#min_val = float(np.amin(data_set[i][j]))
#max_val = float(np.amax(data_set[i][j]))
mean = np.mean(data_set[i][j])
std_dev = np.std(data_set[i][j])
for k in range(0,data_set_shape_tuple[2]):
#data_set[i][j][k] = ((data_set[i][j][k] - ((max_val + min_val)/2))/((max_val - min_val)/2))
data_set[i][j][k] = (data_set[i][j][k] - mean)/std_dev
row_count_1 = 0
row_count_2 = 0
for row in range(0,ground_truth_set.shape[0]):
for column in range(0,ground_truth_set.shape[1]):
#print ((count_set[0][ground_truth_set[row][column]]))
if(ground_truth_set[row][column] != 0.0):
if(count_set[0][ground_truth_set[row][column]] < common_param.count_of_each_class):
count_set[0][ground_truth_set[row][column]] += 1
training_set[row_count_1] = data_set[row][column]
validation_set[row_count_1] = ground_truth_set[row][column]
row_count_1 += 1
test_set[row_count_2] = data_set[row][column]
test_truth_set[row_count_2] = ground_truth_set[row][column]
row_count_2 += 1
"""Normalizing the training set
mean = np.mean(training_set)
standard_deviation = np.std(training_set)
for row in range(0,training_set.shape[0]):
for column in range(0,training_set.shape[1]):
training_set[row][column] = (training_set[row][column] - mean)/standard_deviation
"""
"""Normalizing the test set
mean = np.mean(test_set)
standard_deviation = np.std(test_set)
for row in range(0,test_set.shape[0]):
for column in range(0,test_set.shape[1]):
test_set[row][column] = (test_set[row][column] - mean)/standard_deviation
"""
#print (row_count_2)
"""Checking the generated data
for row in range(0,common_param.no_of_classes*common_param.count_of_each_class):
print (test_set[row][0] ," "),
print (test_truth_set[row])
"""
return training_set,validation_set,test_set,test_truth_set
def shuffle_in_unison(a, b):
rng_state = np.random.get_state()
np.random.shuffle(a)
np.random.set_state(rng_state)
np.random.shuffle(b)
return a, b
def shuffle_in_order_of_class(a, b, common_param):
set_a = np.ndarray(shape = (a.shape[0],a.shape[1]))
set_b = np.ndarray(shape = (b.shape[0],b.shape[1]))
offset = 0
for i in range(1, common_param.no_of_classes+1):
individual_class_count = 1
position = 0
for j in range(0,a.shape[0]):
if(individual_class_count < common_param.count_of_each_class):
if(b[j] == i):
set_a[position*common_param.no_of_classes + offset] = a[j]
set_b[position*common_param.no_of_classes + offset] = b[j]
position += 1
individual_class_count += 1
offset += 1
return set_a,set_b