def reinitialize_fc_layers(self):
print 'Reinitialize the fc layers...',
weight_multiplier = 4096. / self.nhidden
vgg16 = models.vgg16(pretrained=True)
self.fc6_obj.fc.weight.data.copy_(vgg16.classifier[0].weight.data[:self.nhidden] * weight_multiplier)
self.fc6_obj.fc.bias.data.copy_(vgg16.classifier[0].bias.data[:self.nhidden] * weight_multiplier)
self.fc6_phrase.fc.weight.data.copy_(vgg16.classifier[0].weight.data[:self.nhidden] * weight_multiplier)
self.fc6_phrase.fc.bias.data.copy_(vgg16.classifier[0].bias.data[:self.nhidden] * weight_multiplier)
self.fc6_region.fc.weight.data.copy_(vgg16.classifier[0].weight.data[:self.nhidden] * weight_multiplier)
self.fc6_region.fc.bias.data.copy_(vgg16.classifier[0].bias.data[:self.nhidden] * weight_multiplier)
self.fc7_obj.fc.weight.data.copy_(vgg16.classifier[3].weight.data[:self.nhidden, :self.nhidden] * weight_multiplier)
self.fc7_obj.fc.bias.data.copy_(vgg16.classifier[3].bias.data[:self.nhidden])
self.fc7_phrase.fc.weight.data.copy_(vgg16.classifier[3].weight.data[:self.nhidden, :self.nhidden] * weight_multiplier)
self.fc7_phrase.fc.bias.data.copy_(vgg16.classifier[3].bias.data[:self.nhidden])
self.fc7_region.fc.weight.data.copy_(vgg16.classifier[3].weight.data[:self.nhidden, :self.nhidden] * weight_multiplier)
self.fc7_region.fc.bias.data.copy_(vgg16.classifier[3].bias.data[:self.nhidden])
# network.weights_normal_init(self.caption_prediction, 0.01)
print 'Done.'
python类vgg16()的实例源码
def __init__(self, num_classes, pretrained=True):
super(FCN32VGG, self).__init__()
vgg = models.vgg16()
if pretrained:
vgg.load_state_dict(torch.load(vgg16_caffe_path))
features, classifier = list(vgg.features.children()), list(vgg.classifier.children())
features[0].padding = (100, 100)
for f in features:
if 'MaxPool' in f.__class__.__name__:
f.ceil_mode = True
elif 'ReLU' in f.__class__.__name__:
f.inplace = True
self.features5 = nn.Sequential(*features)
fc6 = nn.Conv2d(512, 4096, kernel_size=7)
fc6.weight.data.copy_(classifier[0].weight.data.view(4096, 512, 7, 7))
fc6.bias.data.copy_(classifier[0].bias.data)
fc7 = nn.Conv2d(4096, 4096, kernel_size=1)
fc7.weight.data.copy_(classifier[3].weight.data.view(4096, 4096, 1, 1))
fc7.bias.data.copy_(classifier[3].bias.data)
score_fr = nn.Conv2d(4096, num_classes, kernel_size=1)
score_fr.weight.data.zero_()
score_fr.bias.data.zero_()
self.score_fr = nn.Sequential(
fc6, nn.ReLU(inplace=True), nn.Dropout(), fc7, nn.ReLU(inplace=True), nn.Dropout(), score_fr
)
self.upscore = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=64, stride=32, bias=False)
self.upscore.weight.data.copy_(get_upsampling_weight(num_classes, num_classes, 64))
def __init__(self, requires_grad=False):
super(Vgg16, self).__init__()
vgg_pretrained_features = models.vgg16(pretrained=True).features
self.slice1 = torch.nn.Sequential()
self.slice2 = torch.nn.Sequential()
self.slice3 = torch.nn.Sequential()
self.slice4 = torch.nn.Sequential()
for x in range(4):
self.slice1.add_module(str(x), vgg_pretrained_features[x])
for x in range(4, 9):
self.slice2.add_module(str(x), vgg_pretrained_features[x])
for x in range(9, 16):
self.slice3.add_module(str(x), vgg_pretrained_features[x])
for x in range(16, 23):
self.slice4.add_module(str(x), vgg_pretrained_features[x])
if not requires_grad:
for param in self.parameters():
param.requires_grad = False
def get_model(name, n_classes):
model = _get_model_instance(name)
if name in ['frrnA', 'frrnB']:
model = model(n_classes, model_type=name[-1])
elif name in ['fcn32s', 'fcn16s', 'fcn8s']:
model = model(n_classes=n_classes)
vgg16 = models.vgg16(pretrained=True)
model.init_vgg16_params(vgg16)
elif name == 'segnet':
model = model(n_classes=n_classes,
is_unpooling=True)
vgg16 = models.vgg16(pretrained=True)
model.init_vgg16_params(vgg16)
elif name == 'unet':
model = model(n_classes=n_classes,
is_batchnorm=True,
in_channels=3,
is_deconv=True)
else:
model = model(n_classes=n_classes)
return model
def def_netF():
vgg16 = M.vgg16()
vgg16.load_state_dict(torch.load('vgg16-397923af.pth'))
vgg16.features = nn.Sequential(
*list(vgg16.features.children())[:9]
)
for param in vgg16.parameters():
param.requires_grad = False
return vgg16.features
def def_netF():
vgg16 = M.vgg16()
vgg16.load_state_dict(torch.load('vgg16-397923af.pth'))
vgg16.features = nn.Sequential(
*list(vgg16.features.children())[:9]
)
for param in vgg16.parameters():
param.requires_grad = False
return vgg16.features
def def_netF():
vgg16 = M.vgg16()
vgg16.load_state_dict(torch.load('vgg16-397923af.pth'))
vgg16.features = nn.Sequential(
*list(vgg16.features.children())[:9]
)
for param in vgg16.parameters():
param.requires_grad = False
return vgg16.features
def def_netF():
vgg16 = M.vgg16()
vgg16.load_state_dict(torch.load('vgg16-397923af.pth'))
vgg16.features = nn.Sequential(
*list(vgg16.features.children())[:9]
)
for param in vgg16.parameters():
param.requires_grad = False
return vgg16.features
def def_netF():
vgg16 = M.vgg16()
vgg16.load_state_dict(torch.load('vgg16-397923af.pth'))
vgg16.features = nn.Sequential(
*list(vgg16.features.children())[:9]
)
for param in vgg16.parameters():
param.requires_grad = False
return vgg16.features
def def_netF():
vgg16 = M.vgg16()
vgg16.load_state_dict(torch.load('vgg16-397923af.pth'))
vgg16.features = nn.Sequential(
*list(vgg16.features.children())[:5]
)
for param in vgg16.parameters():
param.requires_grad = False
return vgg16.features
def def_netF2():
vgg16 = M.vgg16()
vgg16.load_state_dict(torch.load('vgg16-397923af.pth'))
vgg16.features = nn.Sequential(
*list(vgg16.features.children())[:23]
)
for param in vgg16.parameters():
param.requires_grad = False
return vgg16.features
def __init__(self, embed_ndim):
super(VisualSemanticEmbedding, self).__init__()
self.embed_ndim = embed_ndim
# image feature
self.img_encoder = models.vgg16(pretrained=True)
for param in self.img_encoder.parameters():
param.requires_grad = False
self.feat_extractor = nn.Sequential(*(self.img_encoder.classifier[i] for i in range(6)))
self.W = nn.Linear(4096, embed_ndim, False)
# text feature
self.txt_encoder = nn.GRU(embed_ndim, embed_ndim, 1)
def __init__(self, num_classes, pretrained=True):
super(FCN16VGG, self).__init__()
vgg = models.vgg16()
if pretrained:
vgg.load_state_dict(torch.load(vgg16_caffe_path))
features, classifier = list(vgg.features.children()), list(vgg.classifier.children())
features[0].padding = (100, 100)
for f in features:
if 'MaxPool' in f.__class__.__name__:
f.ceil_mode = True
elif 'ReLU' in f.__class__.__name__:
f.inplace = True
self.features4 = nn.Sequential(*features[: 24])
self.features5 = nn.Sequential(*features[24:])
self.score_pool4 = nn.Conv2d(512, num_classes, kernel_size=1)
self.score_pool4.weight.data.zero_()
self.score_pool4.bias.data.zero_()
fc6 = nn.Conv2d(512, 4096, kernel_size=7)
fc6.weight.data.copy_(classifier[0].weight.data.view(4096, 512, 7, 7))
fc6.bias.data.copy_(classifier[0].bias.data)
fc7 = nn.Conv2d(4096, 4096, kernel_size=1)
fc7.weight.data.copy_(classifier[3].weight.data.view(4096, 4096, 1, 1))
fc7.bias.data.copy_(classifier[3].bias.data)
score_fr = nn.Conv2d(4096, num_classes, kernel_size=1)
score_fr.weight.data.zero_()
score_fr.bias.data.zero_()
self.score_fr = nn.Sequential(
fc6, nn.ReLU(inplace=True), nn.Dropout(), fc7, nn.ReLU(inplace=True), nn.Dropout(), score_fr
)
self.upscore2 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2, bias=False)
self.upscore16 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=32, stride=16, bias=False)
self.upscore2.weight.data.copy_(get_upsampling_weight(num_classes, num_classes, 4))
self.upscore16.weight.data.copy_(get_upsampling_weight(num_classes, num_classes, 32))
def __init__(self):
super(EncoderCNN, self).__init__()
self.vgg = models.vgg16()
self.vgg.load_state_dict(torch.load(vgg_checkpoint))
# ?VGG?????fc??????ReLU????????
self.vgg.classifier = nn.Sequential(*list(self.vgg.classifier.children())[:-1])
def _init_modules(self):
vgg = models.vgg16()
if self.pretrained:
print("Loading pretrained weights from %s" %(self.model_path))
state_dict = torch.load(self.model_path)
vgg.load_state_dict({k:v for k,v in state_dict.items() if k in vgg.state_dict()})
vgg.classifier = nn.Sequential(*list(vgg.classifier._modules.values())[:-1])
# not using the last maxpool layer
self.RCNN_base = nn.Sequential(*list(vgg.features._modules.values())[:-1])
# Fix the layers before conv3:
for layer in range(10):
for p in self.RCNN_base[layer].parameters(): p.requires_grad = False
# self.RCNN_base = _RCNN_base(vgg.features, self.classes, self.dout_base_model)
self.RCNN_top = vgg.classifier
# not using the last maxpool layer
self.RCNN_cls_score = nn.Linear(4096, self.n_classes)
if self.class_agnostic:
self.RCNN_bbox_pred = nn.Linear(4096, 4)
else:
self.RCNN_bbox_pred = nn.Linear(4096, 4 * self.n_classes)
def getNetwork(args):
if (args.net_type == 'alexnet'):
net = models.alexnet(pretrained=args.finetune)
file_name = 'alexnet'
elif (args.net_type == 'vggnet'):
if(args.depth == 11):
net = models.vgg11(pretrained=args.finetune)
elif(args.depth == 13):
net = models.vgg13(pretrained=args.finetune)
elif(args.depth == 16):
net = models.vgg16(pretrained=args.finetune)
elif(args.depth == 19):
net = models.vgg19(pretrained=args.finetune)
else:
print('Error : VGGnet should have depth of either [11, 13, 16, 19]')
sys.exit(1)
file_name = 'vgg-%s' %(args.depth)
elif (args.net_type == 'resnet'):
net = resnet(args.finetune, args.depth)
file_name = 'resnet-%s' %(args.depth)
else:
print('Error : Network should be either [alexnet / vggnet / resnet]')
sys.exit(1)
return net, file_name
def vgg16(num_classes=1000, pretrained='imagenet'):
"""VGG 16-layer model (configuration "D")
"""
model = models.vgg16(pretrained=False)
if pretrained is not None:
settings = pretrained_settings['vgg16'][pretrained]
model = load_pretrained(model, num_classes, settings)
model = modify_vggs(model)
return model
def load_pretrained_npy(faster_rcnn_model, fname):
params = np.load(fname).item()
# vgg16
vgg16_dict = faster_rcnn_model.rpn.features.state_dict()
for name, val in vgg16_dict.items():
# # print name
# # print val.size()
# # print param.size()
if name.find('bn.') >= 0:
continue
i, j = int(name[4]), int(name[6]) + 1
ptype = 'weights' if name[-1] == 't' else 'biases'
key = 'conv{}_{}'.format(i, j)
param = torch.from_numpy(params[key][ptype])
if ptype == 'weights':
param = param.permute(3, 2, 0, 1)
val.copy_(param)
# fc6 fc7
frcnn_dict = faster_rcnn_model.state_dict()
pairs = {'fc6.fc': 'fc6', 'fc7.fc': 'fc7'}
for k, v in pairs.items():
key = '{}.weight'.format(k)
param = torch.from_numpy(params[v]['weights']).permute(1, 0)
frcnn_dict[key].copy_(param)
key = '{}.bias'.format(k)
param = torch.from_numpy(params[v]['biases'])
frcnn_dict[key].copy_(param)
def load_pretrained_model(faster_rcnn_model, model_name='vgg16'):
if model_name == 'vgg16':
model = models.vgg16(pretrained=True)
faster_rcnn_model.rpn.features = model.features
mod = list(model.classifier.children())[:-1]
faster_rcnn_model.fcs = nn.Sequential(*mod)
elif model_name == 'resnet101':
model = models.resnet101(pretrained=True)
faster_rcnn_model.rpn.features = nn.Sequential(model.conv1, model.bn1, model.relu, model.maxpool,
model.layer1, model.layer2, model.layer3, model.layer4,
model.avgpool)
faster_rcnn_model.fcs = model.fc
def __init__(self, classes=None, debug=False, arch='vgg16'):
super(FasterRCNN, self).__init__()
if classes is not None:
self.classes = classes
self.n_classes = len(classes)
print('n_classes: {}\n{}'.format(self.n_classes, self.classes))
if arch == 'vgg16':
cnn_arch = models.vgg16(pretrained=False) # w/o bn
self.rpn = RPN(features=cnn_arch.features)
self.fcs = nn.Sequential(
nn.Linear(512 * 7 * 7, 4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Dropout()
)
self.roi_pool = RoIPool(7, 7, 1.0/16)
# self.fc6 = FC(512 * 7 * 7, 4096)
# self.fc7 = FC(4096, 4096)
self.score_fc = FC(4096, self.n_classes, relu=False)
self.bbox_fc = FC(4096, self.n_classes * 4, relu=False)
# loss
self.cross_entropy = None
self.loss_box = None
# for log
self.debug = debug
def train(args):
print('Dataset of instance(s) and batch size is {}'.format(args.batch_size))
vgg = models.vgg16(True)
model = YOLO(vgg.features)
if args.use_cuda:
model = torch.nn.DataParallel(model)
model.cuda()
optimizer = optim.Adam(model.parameters(), lr=args.lr)
best = 1e+30
for epoch in range(1, args.epochs+1):
l = train_epoch(epoch, model, optimizer, args)
upperleft, bottomright, classes, confs = test_epoch(model, jpg='../data/1.jpg')
is_best = l < best
best = min(l, best)
save_checkpoint({
'epoch': epoch + 1,
'state_dict': model.state_dict(),
'optimizer' : optimizer.state_dict(),
}, is_best)
checkpoint = torch.load('./model_best.pth.tar')
state_dict = checkpoint['state_dict']
new_state_dict = OrderedDict()
for k, v in state_dict.items():
name = k[7:]
new_state_dict[name] = v
model.load_state_dict(new_state_dict)
model.cpu()
torch.save(model.state_dict(), 'model_cpu.pth.tar')
def _init_head_tail(self):
self.vgg = models.vgg16()
# Remove fc8
self.vgg.classifier = nn.Sequential(*list(self.vgg.classifier._modules.values())[:-1])
# Fix the layers before conv3:
for layer in range(10):
for p in self.vgg.features[layer].parameters(): p.requires_grad = False
# not using the last maxpool layer
self._layers['head'] = nn.Sequential(*list(self.vgg.features._modules.values())[:-1])
def __init__(self, use_kmeans_anchors=False):
super(RPN, self).__init__()
if use_kmeans_anchors:
print 'using k-means anchors'
self.anchor_scales = self.anchor_scales_kmeans
self.anchor_ratios = self.anchor_ratios_kmeans
self.anchor_scales_region = self.anchor_scales_kmeans_region
self.anchor_ratios_region = self.anchor_ratios_kmeans_region
else:
print 'using normal anchors'
self.anchor_scales, self.anchor_ratios = \
np.meshgrid(self.anchor_scales_normal, self.anchor_ratios_normal, indexing='ij')
self.anchor_scales = self.anchor_scales.reshape(-1)
self.anchor_ratios = self.anchor_ratios.reshape(-1)
self.anchor_scales_region, self.anchor_ratios_region = \
np.meshgrid(self.anchor_scales_normal_region, self.anchor_ratios_normal_region, indexing='ij')
self.anchor_scales_region = self.anchor_scales_region.reshape(-1)
self.anchor_ratios_region = self.anchor_ratios_region.reshape(-1)
self.anchor_num = len(self.anchor_scales)
self.anchor_num_region = len(self.anchor_scales_region)
# self.features = VGG16(bn=False)
self.features = models.vgg16(pretrained=True).features
self.features.__delattr__('30') # to delete the max pooling
# by default, fix the first four layers
network.set_trainable_param(list(self.features.parameters())[:8], requires_grad=False)
# self.features = models.vgg16().features
self.conv1 = Conv2d(512, 512, 3, same_padding=True)
self.score_conv = Conv2d(512, self.anchor_num * 2, 1, relu=False, same_padding=False)
self.bbox_conv = Conv2d(512, self.anchor_num * 4, 1, relu=False, same_padding=False)
self.conv1_region = Conv2d(512, 512, 3, same_padding=True)
self.score_conv_region = Conv2d(512, self.anchor_num_region * 2, 1, relu=False, same_padding=False)
self.bbox_conv_region = Conv2d(512, self.anchor_num_region * 4, 1, relu=False, same_padding=False)
# loss
self.cross_entropy = None
self.loss_box = None
self.cross_entropy_region = None
self.loss_box_region = None
# initialize the parameters
self.initialize_parameters()
def __init__(self, num_classes, pretrained=True, caffe=False):
super(FCN8s, self).__init__()
vgg = models.vgg16()
if pretrained:
if caffe:
# load the pretrained vgg16 used by the paper's author
vgg.load_state_dict(torch.load(vgg16_caffe_path))
else:
vgg.load_state_dict(torch.load(vgg16_path))
features, classifier = list(vgg.features.children()), list(vgg.classifier.children())
'''
100 padding for 2 reasons:
1) support very small input size
2) allow cropping in order to match size of different layers' feature maps
Note that the cropped part corresponds to a part of the 100 padding
Spatial information of different layers' feature maps cannot be align exactly because of cropping, which is bad
'''
features[0].padding = (100, 100)
for f in features:
if 'MaxPool' in f.__class__.__name__:
f.ceil_mode = True
elif 'ReLU' in f.__class__.__name__:
f.inplace = True
self.features3 = nn.Sequential(*features[: 17])
self.features4 = nn.Sequential(*features[17: 24])
self.features5 = nn.Sequential(*features[24:])
self.score_pool3 = nn.Conv2d(256, num_classes, kernel_size=1)
self.score_pool4 = nn.Conv2d(512, num_classes, kernel_size=1)
self.score_pool3.weight.data.zero_()
self.score_pool3.bias.data.zero_()
self.score_pool4.weight.data.zero_()
self.score_pool4.bias.data.zero_()
fc6 = nn.Conv2d(512, 4096, kernel_size=7)
fc6.weight.data.copy_(classifier[0].weight.data.view(4096, 512, 7, 7))
fc6.bias.data.copy_(classifier[0].bias.data)
fc7 = nn.Conv2d(4096, 4096, kernel_size=1)
fc7.weight.data.copy_(classifier[3].weight.data.view(4096, 4096, 1, 1))
fc7.bias.data.copy_(classifier[3].bias.data)
score_fr = nn.Conv2d(4096, num_classes, kernel_size=1)
score_fr.weight.data.zero_()
score_fr.bias.data.zero_()
self.score_fr = nn.Sequential(
fc6, nn.ReLU(inplace=True), nn.Dropout(), fc7, nn.ReLU(inplace=True), nn.Dropout(), score_fr
)
self.upscore2 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2, bias=False)
self.upscore_pool4 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2, bias=False)
self.upscore8 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=16, stride=8, bias=False)
self.upscore2.weight.data.copy_(get_upsampling_weight(num_classes, num_classes, 4))
self.upscore_pool4.weight.data.copy_(get_upsampling_weight(num_classes, num_classes, 4))
self.upscore8.weight.data.copy_(get_upsampling_weight(num_classes, num_classes, 16))