SPP-Net的tensorflow实现
代码来源:https://github.com/peace195/sppnet
代码解读
导入包
python
1 | import numpy as np |
参数设置
python
1 | DROPOUT = 0.5 #随机失活概率 |
加载数据
python
1 | net_data = load("bvlc_alexnet.npy",allow_pickle=True).item() |
子函数
打印迭代训练输出
python
1 | def print_activations(t): |
整理标签
python
1 | def dense_to_one_hot(labels_dense, num_classes): |
读取训练图像
python
1 | def read_images_from_disk(input_queue): |
通过函数产生权重数据
python
1 | def weight_variable(shape, name): |
通过函数产生偏置数据
python
1 | def bias_variable(shape, name): |
实现caffe多通道卷积,当窗口不够时,舍弃
python
1 | def conv(input, kernel, biases, k_h, k_w, c_o, s_h, s_w, padding = "VALID", group = 1): |
单通道卷积,当窗口不够时,填充
python
1 | def conv2d(x, W, stride_h, stride_w, padding='SAME'): |
2*2池化操作,当窗口不够时,填充
python
1 | def max_pool_2x2(x): |
3*3池化操作,当窗口不够时,填充
python
1 | def max_pool_3x3(x): |
4*4池化操作,当窗口不够时,填充
python
1 | def max_pool_4x4(x): |
金字塔池化操作
将输入的batch_size*height*width*channels的图像池化成batch_size*1*out_pool_size大小
python1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# Spatial Pyramid Pooling block
# https://arxiv.org/abs/1406.4729
def spatial_pyramid_pool(previous_conv, num_sample, previous_conv_size, out_pool_size):
"""
previous_conv: a tensor vector of previous convolution layer
num_sample: an int number of image in the batch
previous_conv_size: an int vector [height, width] of the matrix features size of previous convolution layer
out_pool_size: a int vector of expected output size of max pooling layer
returns: a tensor vector with shape [1 x n] is the concentration of multi-level pooling
"""
for i in range(len(out_pool_size)):
h_strd = h_size = math.ceil(float(previous_conv_size[0]) / out_pool_size[i])
w_strd = w_size = math.ceil(float(previous_conv_size[1]) / out_pool_size[i])
pad_h = int(out_pool_size[i] * h_size - previous_conv_size[0])
pad_w = int(out_pool_size[i] * w_size - previous_conv_size[1])
new_previous_conv = tf.pad(previous_conv, tf.constant([[0, 0], [0, pad_h], [0, pad_w], [0, 0]]))
max_pool = tf.nn.max_pool(new_previous_conv,
ksize=[1,h_size, h_size, 1],
strides=[1,h_strd, w_strd,1],
padding='SAME')
if (i == 0):
spp = tf.reshape(max_pool, [num_sample, -1])
else:
spp = tf.concat(axis=1, values=[spp, tf.reshape(max_pool, [num_sample, -1])])
return spp
python
1 | # Spatial Pyramid Pooling block |
训练模型
设置batch
将某一维尺寸相差不超过10个像素点值的部分聚集在一起,成为一个batch
python
1 | size_cluster = defaultdict(list) |
初始化变量
python
1 | train_accuracies = [] |
训练部分
迭代计数部分
python
1 | # Training block |
循环主体
设置batch
每一个batch的数量不相同,图片尺寸从小到大进行训练,在进行训练图像输入时,先将图像的长宽缩小2倍
python
1 | y_train = labels[size_cluster[size_cluster_keys[it%len(size_cluster_keys)]]] |
加载预训练的Alexnet模型
舍弃了alexnet第六层全连接的W,重新初始化变量,修改第八层全连接参数匹配训练集
python
1 | x = tf.placeholder('float', shape = x_train.get_shape()) #定义输入参数 |
前向传播通路
第一层
conv1 卷积核11*11 卷积步长4 填充卷积 relu函数激活
lrn1 正则化
maxpool1 3*3池化 池化步长2 舍弃池化
第二层
conv2 卷积核5*5 卷积步长1 填充卷积 relu函数激活
lrn2 正则化
maxpool2 3*3池化 池化步长为2 舍弃池化
第三层
conv3 卷积核3*3 卷积步长1 填充卷积 relu函数激活
第四层
conv4 卷积核3*3 卷积步长1 填充卷积 relu函数激活
第五层
conv5 卷积核3*3 卷积步长1 填充卷积 relu函数激活
spp5 金字塔池化
第六层
fc6 全连接 relu激活 随机失活训练
第七层
fc7 全连接 relu激活 随机失活训练
第八层
fc8 全连接 不使用激活函数
python
1 | def model(x): |
损失函数计算反向传播
使用交叉熵softmax函数计算score,损失函数为:交叉熵损失+正则化系数*所有权重的L2,动态调整学习率
python1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
logits = model(x)
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_))
regularizers = tf.nn.l2_loss(conv1W) + tf.nn.l2_loss(conv1b) + \
tf.nn.l2_loss(conv2W) + tf.nn.l2_loss(conv2b) + \
tf.nn.l2_loss(conv3W) + tf.nn.l2_loss(conv3b) + \
tf.nn.l2_loss(conv4W) + tf.nn.l2_loss(conv4b) + \
tf.nn.l2_loss(conv5W) + tf.nn.l2_loss(conv5b) + \
tf.nn.l2_loss(fc6W) + tf.nn.l2_loss(fc6b) + \
tf.nn.l2_loss(fc7W) + tf.nn.l2_loss(fc7b) + \
tf.nn.l2_loss(fc8W) + tf.nn.l2_loss(fc8b)
loss = tf.reduce_mean(cross_entropy + WEIGHT_DECAY * regularizers)
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_))
# optimisation loss function
global_step = tf.Variable(0)
learning_rate = tf.train.exponential_decay(LEARNING_RATE, global_step, 1000, 0.9, staircase=True) #动态学习率
train_step = tf.train.AdagradOptimizer(learning_rate).minimize(loss)
##### 评价模型
计算模型正确率
python1
2
3
4
5
6
7
8
9
10
11
12
# evaluation
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
predict = tf.argmax(logits, 1)
saver = tf.train.Saver({v.op.name: v for v in [conv1W, conv1b,
conv2W, conv2b,
conv3W, conv3b,
conv4W, conv4b,
conv5W, conv5b,
fc6W, fc6b,
fc7W, fc7b,
fc8W, fc8b]})
##### 训练设置
python1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
with tf.Session(graph=graph) as sess:
init = tf.global_variables_initializer()
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
if os.path.exists('./alex_model_spp.ckpt'):
saver.restore(sess, './alex_model_spp.ckpt')
cnt_tmp = 0
xtrain, ytrain = sess.run([x_train, y_train])
for i in range(10):
it = it + 1
_, train_accuracy, cost = sess.run([train_step, accuracy, cross_entropy],
feed_dict = {x: xtrain,
y_: ytrain,
keep_prob: 1.0})
print('training_accuracy => %.4f, cost value => %.4f for step %d'
%(train_accuracy, cost, it))
if (train_accuracy > 0.95):
cnt_tmp = cnt_tmp + 1
if (cnt_tmp > 10):
break
train_accuracies.append(train_accuracy)
x_range.append(it)
train_cost.append(cost)
saver.save(sess, './alex_model_spp.ckpt')
coord.request_stop()
coord.join(threads)
sess.close()
del sess
##### 输出评价图
python1
2
3
4
5
6
7
8
9
10
11
# Plot accuracy and loss curve
plt.plot(x_range, train_cost,'-b')
plt.ylabel('spp_cost')
plt.xlabel('step')
plt.savefig('spp_cost.png')
plt.close()
plt.plot(x_range, train_accuracies,'-b')
plt.ylabel('spp_accuracies')
plt.ylim(ymax = 1.1)
plt.xlabel('step')
plt.savefig('spp_accuracy.png')
python
1 | logits = model(x) |
python
1 | # evaluation |
python
1 | with tf.Session(graph=graph) as sess: |
python
1 | # Plot accuracy and loss curve |
测试模型
python
1 | # Testing block |
本博客所有文章除特别声明外,均采用 CC BY-NC-SA 4.0 许可协议。转载请注明来自 Thyssen Wen's Blog!