代码来源:https://github.com/jwyang/faster-rcnn.pytorch

代码解读

把python2的新版本特性导入到python2当前版本

1
2
3
from __future__ import absolute_import #设置成绝对引用
from __future__ import division #使用精确除法
from __future__ import print_function #打印函数

导入库

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import _init_paths #自定义包含文件 用于设置环境路径
import os
import sys
import numpy as np
import argparse
import pprint #美化打印库
import pdb #python调试库
import time #python时间函数库,用于计算运行时间

import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.optim as optim

import torchvision.transforms as transforms #pytorch处理图像视频的torchvision
from torch.utils.data.sampler import Sampler

from roi_data_layer.roidb import combined_roidb
from roi_data_layer.roibatchLoader import roibatchLoader
from model.utils.config import cfg, cfg_from_file, cfg_from_list, get_output_dir
from model.utils.net_utils import weights_normal_init, save_net, load_net, \
adjust_learning_rate, save_checkpoint, clip_gradient

from model.faster_rcnn.vgg16 import vgg16
from model.faster_rcnn.resnet import resnet

命令行参数定义

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
def parse_args():
"""
Parse input arguments
"""
parser = argparse.ArgumentParser(description='Train a Fast R-CNN network')
parser.add_argument('--dataset', dest='dataset',
help='training dataset',
default='pascal_voc', type=str)
parser.add_argument('--net', dest='net',
help='vgg16, res101',
default='vgg16', type=str)
parser.add_argument('--start_epoch', dest='start_epoch',
help='starting epoch',
default=1, type=int)
parser.add_argument('--epochs', dest='max_epochs',
help='number of epochs to train',
default=20, type=int)
parser.add_argument('--disp_interval', dest='disp_interval',
help='number of iterations to display',
default=100, type=int)
parser.add_argument('--checkpoint_interval', dest='checkpoint_interval',
help='number of iterations to display',
default=10000, type=int)

parser.add_argument('--save_dir', dest='save_dir',
help='directory to save models', default="models",
type=str)
parser.add_argument('--nw', dest='num_workers',
help='number of workers to load data',
default=0, type=int)
parser.add_argument('--cuda', dest='cuda',
help='whether use CUDA',
action='store_true')
parser.add_argument('--ls', dest='large_scale',
help='whether use large imag scale',
action='store_true')
parser.add_argument('--mGPUs', dest='mGPUs',
help='whether use multiple GPUs',
action='store_true')
parser.add_argument('--bs', dest='batch_size',
help='batch_size',
default=1, type=int)
parser.add_argument('--cag', dest='class_agnostic',
help='whether to perform class_agnostic bbox regression',
action='store_true')

# config optimization
parser.add_argument('--o', dest='optimizer',
help='training optimizer',
default="sgd", type=str)
parser.add_argument('--lr', dest='lr',
help='starting learning rate',
default=0.001, type=float)
parser.add_argument('--lr_decay_step', dest='lr_decay_step',
help='step to do learning rate decay, unit is epoch',
default=5, type=int)
parser.add_argument('--lr_decay_gamma', dest='lr_decay_gamma',
help='learning rate decay ratio',
default=0.1, type=float)

# set training session
parser.add_argument('--s', dest='session',
help='training session',
default=1, type=int)

# resume trained model
parser.add_argument('--r', dest='resume',
help='resume checkpoint or not',
default=False, type=bool)
parser.add_argument('--checksession', dest='checksession',
help='checksession to load model',
default=1, type=int)
parser.add_argument('--checkepoch', dest='checkepoch',
help='checkepoch to load model',
default=1, type=int)
parser.add_argument('--checkpoint', dest='checkpoint',
help='checkpoint to load model',
default=0, type=int)
# log and display
parser.add_argument('--use_tfb', dest='use_tfboard',
help='whether use tensorboard',
action='store_true')

args = parser.parse_args()
return args

定义类

样本类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
class sampler(Sampler):
def __init__(self, train_size, batch_size):
self.num_data = train_size
self.num_per_batch = int(train_size / batch_size)
self.batch_size = batch_size
self.range = torch.arange(0,batch_size).view(1, batch_size).long()
self.leftover_flag = False
if train_size % batch_size:
self.leftover = torch.arange(self.num_per_batch*batch_size, train_size).long()
self.leftover_flag = True

def __iter__(self):
rand_num = torch.randperm(self.num_per_batch).view(-1,1) * self.batch_size
self.rand_num = rand_num.expand(self.num_per_batch, self.batch_size) + self.range

self.rand_num_view = self.rand_num.view(-1)

if self.leftover_flag:
self.rand_num_view = torch.cat((self.rand_num_view, self.leftover),0)

return iter(self.rand_num_view)

def __len__(self):
return self.num_data

训练主体

训练设置载入

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
args = parse_args()

print('Called with args:')
print(args)

if args.dataset == "pascal_voc":
args.imdb_name = "voc_2007_trainval"
args.imdbval_name = "voc_2007_test"
args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '20']
elif args.dataset == "pascal_voc_0712":
args.imdb_name = "voc_2007_trainval+voc_2012_trainval"
args.imdbval_name = "voc_2007_test"
args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '20']
elif args.dataset == "coco":
args.imdb_name = "coco_2014_train+coco_2014_valminusminival"
args.imdbval_name = "coco_2014_minival"
args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '50']
elif args.dataset == "imagenet":
args.imdb_name = "imagenet_train"
args.imdbval_name = "imagenet_val"
args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '30']
elif args.dataset == "vg":
# train sizes: train, smalltrain, minitrain
# train scale: ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20']
args.imdb_name = "vg_150-50-50_minitrain"
args.imdbval_name = "vg_150-50-50_minival"
args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '50']

args.cfg_file = "cfgs/{}_ls.yml".format(args.net) if args.large_scale else "cfgs/{}.yml".format(args.net)

if args.cfg_file is not None:
cfg_from_file(args.cfg_file)
if args.set_cfgs is not None:
cfg_from_list(args.set_cfgs)

print('Using config:')
pprint.pprint(cfg)
np.random.seed(cfg.RNG_SEED)

#torch.backends.cudnn.benchmark = True
if torch.cuda.is_available() and not args.cuda:
print("WARNING: You have a CUDA device, so you should probably run with --cuda")

训练集准备

1
2
3
4
5
6
7
8
# train set
# -- Note: Use validation set and disable the flipped to enable faster loading.
cfg.TRAIN.USE_FLIPPED = True
cfg.USE_GPU_NMS = args.cuda
imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name)
# imdb 是数据本身
# roidb 是ROI数据
train_size = len(roidb)