/
train.py
135 lines (112 loc) · 5.4 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import os
import sys
import numpy as np
import tensorflow as tf
from custom_vgg19 import Vgg19
from data_loader import TXTLoader
from thumbnail_generartion import GCA, RPN
from ops import gen_anchors_op, overlaps_op, norm_bbox_op, rpn_class_loss_op, rpn_bbox_loss_op
from ops import bbox_refinement_op
from utils import py_rpn_match
os.environ["CUDA_VISIBLE_DEVICES"] = "2" # 指定GPU
sys.path.append("/home/shizai/xushiqi/projects/tg/")
RPN_BBOX_STD_DEV = tf.constant([0.1, 0.1, 0.2, 0.2])
# 构造图
# 不显示构造图的话,tensorflow默认会构造一个图,所有的op都会放在默认图中
graph = tf.Graph()
with graph.as_default():
# 输入占位符
image_ph = tf.placeholder(tf.float32, [1, 650, 800, 3]) # [batch, h, w, c]
ratio_ph = tf.placeholder(tf.float32, [1, 1]) # [batch, num_ratios]
gt_bbox_ph = tf.placeholder(tf.float32, [1, 1, 4]) # [batch, num_bbox, (y1, x1, y2, x2)]
# vgg19
vgg19 = Vgg19(vgg19_npy_path='./model/vgg19_imagenet_pretrained.npy')
vgg19.build(image_ph)
# GCA
f_attn = GCA(vgg19.conv5_4) # f_attn: [batch, h, w, 512]
# RPN
bbox_op, objectness_op = RPN(f_attn, ratio_ph)
# bbox_op: [batch, h, w, num_scales, 4]
# objectness_op: [batch, h, w, num_scales, 2]
# 获取feature map的height和width
shape = tf.shape(f_attn)
b, h, w, c = shape[0], shape[1], shape[2], shape[3]
feature_shape = tf.stack([h, w])
# 根据feature map的height和width产生所有的anchors
scales = [128, 256, 512]
feature_stride, anchor_stride = 16, 1
anchors = gen_anchors_op(scales, ratio_ph, feature_shape, feature_stride, anchor_stride)
# anchors: [batch, h, w, num_scales, 4]
rpn_bbox = tf.reshape(bbox_op, [-1, 4]) # rpn_bbox: [batch*num_anchor, 4]
rpn_objectness = tf.reshape(objectness_op, [1, -1, 2]) # rpn_objectness: [batch*num_anchor, 2]
gt_bbox = tf.reshape(gt_bbox_ph, [-1, 4]) # gt_bbox: [batch*num_bbox, 4]
anchors = tf.reshape(anchors, [-1, 4]) # anchors: [batch*num_anchor, 4]
# 计算iou
overlaps = overlaps_op(anchors, gt_bbox) # overlaps: [batch*num_anchor, 1]
# 注:overlaps_op暂不支持batch操作,batch默认为1
iou_argmax_ = tf.argmax(overlaps, axis=1)
iou_argmax = tf.reshape(iou_argmax_, [-1, 1])
index = tf.reshape(tf.range(tf.shape(overlaps)[0]), [-1, 1])
index = tf.cast(index, tf.int64)
indices = tf.concat([index, iou_argmax], axis=1)
iou_max = tf.gather_nd(overlaps, indices)
# 每个anchor都会对应一个gt_bbox
target_bbox_ = tf.gather(gt_bbox, iou_argmax_) # target_bbox: [batch*num_anchor, 4]
# iou_max2 = tf.reduce_max(overlaps, axis=1) # iou_max2: [batch*num_anchor]
# 计算正样本索引
pos_indices = tf.where(iou_max >= 0.5) # pos_indices: [batch*num_pos, 1]
pos_indices = tf.reshape(pos_indices, [-1]) # pos_indices: [batch*num_pos]
# 计算中立样本索引
neu_indices = tf.where(tf.logical_and(iou_max > 0.3, iou_max < 0.7)) # neu_indices: [batch*num_neu, 1]
neu_indices = tf.reshape(neu_indices, [-1]) # neu_indices: [batch*num_neu]
# 计算负样本索引
neg_indices = tf.where(iou_max <= 0.3) # neg_indices: [batch*num_neg, 1]
neg_indices = tf.reshape(neg_indices, [-1]) # neg_indices: [batch*num_neg]
# 随机选取256个正负样本
shuffle_pos_indices = tf.random_shuffle(pos_indices)[:128]
pos_count = tf.shape(shuffle_pos_indices)[0]
shuffle_neg_indices = tf.random_shuffle(neg_indices)[:256 - pos_count]
neg_count = tf.shape(shuffle_neg_indices)[0]
# 制作rpn中anchors的0、1标签
rpn_match = tf.py_func(
py_rpn_match,
[tf.shape(anchors)[0], shuffle_pos_indices, shuffle_neg_indices],
tf.int32)
# rpn classify loss
rpn_class_loss = rpn_class_loss_op(tf.reshape(rpn_match, [b, tf.shape(anchors)[0], 1]), rpn_objectness)
target_bbox = bbox_refinement_op(anchors, target_bbox_)
rpn_bbox_loss = rpn_bbox_loss_op(
tf.reshape(target_bbox, [1, -1, 4]),
tf.reshape(rpn_match, [1, -1, 1]),
tf.reshape(rpn_bbox, [1, -1, 4]))
loss = rpn_class_loss + 10 * rpn_bbox_loss
train_step = tf.train.AdamOptimizer(0.001).minimize(loss)
# data loader
loader = TXTLoader(root='/Users/aiyoj/Downloads/Thumbnail Data Set/PQ_Set',
txt_path='./data/train_set.txt',
batch_size=1,
shuffle=False)
# loader = TXTLoader(root='./data/Thumbnail Data Set/PQ_Set',
# txt_path='./data/train_set.txt',
# batch_size=1,
# shuffle=True)
num_epoch = 10
num_batch = 60000
config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
config.gpu_options.allow_growth = True
# 将上面的构造好的图graph传给session,这样session就可以run graph中的op
with tf.Session(graph=graph, config=config) as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(num_epoch):
for step in range(epoch * num_batch, (epoch + 1) * num_batch):
image_batch, gt_bbox_batch, thumbnail_dim_batch, ratio_batch, meta_batch, name_batch = loader.batch()
_ = sess.run(
[iou_max],
feed_dict={
image_ph: image_batch,
ratio_ph: np.reshape(ratio_batch, [1, 1]),
gt_bbox_ph: np.reshape(gt_bbox_batch, [1, 1, 4]),
}
)
print(_[0])
break