-
Notifications
You must be signed in to change notification settings - Fork 0
/
augmentation.py
executable file
·146 lines (78 loc) · 3.35 KB
/
augmentation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# import configuration file
import config
# set random seed
from numpy.random import seed
from tensorflow import set_random_seed
seed(config.fixed_seed)
set_random_seed(config.fixed_seed)
import os
import math
import numpy as np
def augmentation(dataset, labels):
"""
Applies augmentation on a given dataset and appends the new images to the original dataset and labels
INPUT:
dataset, labels - original dataset and labels on which augmentation should be performed
OUTPUT:
dataset, labels - new sets including augmented images
saves the augmented images in the given directory
"""
print("Augmentation")
# if necessary create aug dir and make sure it's empty
if not os.path.exists(config.aug_dir):
os.makedirs(config.aug_dir)
else:
os.system('rm -rf %s/*' % config.aug_dir)
# sort ids based on category
split_categories = {0: [], 1: []}
for id in dataset:
split_categories[labels[id]].append(id)
# calculate the amount of missing images to be augmented
missing = {0: max(0, config.class_total - len(split_categories[0])), 1: max(0, config.class_total - len(split_categories[1]))}
print(" missing " + config.class0 + " data: ", missing[0])
print(" missing " + config.class1 + " data: ", missing[1])
cnt = 0
# loop over categories
for cat in split_categories:
# loop over missing repetitions of whole dataset
for rep_idx in range(math.floor(missing[cat] / len(split_categories[cat]))):
# loop over ids in dataset
for id in split_categories[cat]:
aug_name = "aug" + str(cnt) + "_" + id
# update labels + dataset
labels[aug_name] = cat
dataset = np.append(dataset, aug_name)
# augment image + save
aug_image = mixing(id, split_categories[cat])
np.save(config.aug_dir + aug_name + ".npy", aug_image)
cnt += 1
# loop over rest of the missing images
for rest_idx in range(missing[cat] % len(split_categories[cat])):
id = split_categories[cat][rest_idx]
aug_name = "aug" + str(cnt) + "_" + id
# update labels + dataset
labels[aug_name] = cat
dataset = np.append(dataset, aug_name)
# augment image + save
aug_image = mixing(id, split_categories[cat])
np.save(config.aug_dir + aug_name + ".npy", aug_image)
cnt += 1
return dataset, labels
def mixing(id, list_ids):
"""
Applies augmentation on an image by mixing the original image with a random image of the same category.
The augmentation factor indicates the percentage / fraction of the random image which will be used.
INPUT:
id - id of subject to be augmented
list_ids - list with all ids to pick mix image
OUTPUT:
aug_image - the augmented image
"""
# load original image
image = np.load(config.data_dir + id + ".npy")
# load random image from same category
id_mix = np.random.choice(np.setdiff1d(list_ids, [id]))
image_mix = np.load(config.data_dir + id_mix + ".npy")
# mix images
aug_image = (1 - config.aug_factor) * image + config.aug_factor * image_mix
return aug_image