forked from BR-IDL/PaddleViT
-
Notifications
You must be signed in to change notification settings - Fork 0
/
backbone.py
179 lines (152 loc) · 6.99 KB
/
backbone.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# Copyright (c) 2021 PPViT Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Backbone related classes and methods for DETR
Backbone now supports ResNet50, and ResNet101
'build_backbone' method returns resnet with position_embedding
ResNet is implemented in ./resnet.py
"""
from collections import OrderedDict
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from position_embedding import build_position_encoding
import resnet
from utils import NestedTensor
class IntermediateLayerGetter(nn.LayerDict):
""" Run inference and return outputs from selected layers
This class stores the layers needed for inferening all the selected
return layers, layers after the last return layer will be ignored.
Forward method returns a dict with layer names and corresponding output tensors
Arguments:
model: nn.Layer, backbone model, e.g., resnet50
return_layers: dict, dict of return layers
"""
def __init__(self, model, return_layers):
#print([name for name, _ in model.named_children()])
if not set(return_layers).issubset([name for name, _ in model.named_children()]):
raise ValueError('return_layers are not present in model')
orig_return_layers = return_layers
# copy return_layers is required, otherwise orig_return_layers will be empty
return_layers = {k:v for k, v in return_layers.items()}
layers = OrderedDict()
for name, module in model.named_children():
layers[name] = module
if name in return_layers:
del return_layers[name]
if not return_layers:
break
super(IntermediateLayerGetter, self).__init__(layers)
self.return_layers = orig_return_layers
def forward(self, x):
out = OrderedDict()
for name, module in self.named_children():
x = module(x)
#print(f'--------{name}-------------')
#print(x)
if name in self.return_layers:
out_name = self.return_layers[name]
out[out_name] = x
return out
class FrozenBatchNorm2D(nn.Layer):
"""Freeze the bn layer without learning and updating.
This layer can be replaced with nn.BatchNorm2D, in order to freeze
the learning, usually is used in backbone during the training.
Weights and params are same as nn.BatchNorm2D, now eps is set to 1e-5
"""
def __init__(self, n):
super(FrozenBatchNorm2D, self).__init__()
self.register_buffer('weight', paddle.ones([n]))
self.register_buffer('bias', paddle.zeros([n]))
self.register_buffer('_mean', paddle.zeros([n]))
self.register_buffer('_variance', paddle.ones([n]))
def forward(self, x):
w = self.weight.reshape([1, -1, 1, 1])
b = self.bias.reshape([1, -1, 1, 1])
rv = self._variance.reshape([1, -1, 1, 1])
rm = self._mean.reshape([1, -1, 1, 1])
eps = 1e-5
scale = w * (rv + eps).rsqrt()
bias = b - rm * scale
return x * scale + bias
class BackboneBase(nn.Layer):
"""Backbone Base class for NestedTensor input and multiple outputs
This class handles the NestedTensor as input, run inference through backbone
and return multiple output tensors(NestedTensors) from selected layers
"""
def __init__(self,
backbone: nn.Layer,
train_backbone: bool,
num_channels: int,
return_interm_layers: bool):
super().__init__()
if return_interm_layers:
return_layers = {'layer0': '0', 'layer2': '1', 'layer3':'2', 'layer4':'3'}
else:
return_layers = {'layer4': '0'}
self.body = IntermediateLayerGetter(backbone, return_layers=return_layers)
self.num_channels = num_channels
def forward(self, tensor_list):
#Inference through resnet backbone, which takes the paddle.Tensor as input
#tensor_list contains .tensor(paddle.Tensor) and .mask(paddle.Tensor) for batch inputs
xs = self.body(tensor_list.tensors)
out = {}
for name, x in xs.items():
# x.shape: [batch_size, feat_dim, feat_h, feat_w]
m = tensor_list.mask # [batch_size, orig_h, orig_w]
assert m is not None
m = m.unsqueeze(0).astype('float32') # [1, batch_size, orig_h, orig_w]
mask = F.interpolate(m, size=x.shape[-2:])[0] #[batch_size, feat_h, fea_w]
out[name] = NestedTensor(x, mask)
return out
class Backbone(BackboneBase):
"""Get resnet backbone from resnet.py with multiple settings and return BackboneBase instance"""
def __init__(self, name, train_backbone, return_interm_layers, dilation):
backbone = getattr(resnet, name)(pretrained=paddle.distributed.get_rank() == 0,
norm_layer=FrozenBatchNorm2D,
replace_stride_with_dilation=[False, False, dilation])
num_channels = 512 if name in ('resnet18', 'resnet34') else 2048
super().__init__(backbone, train_backbone, num_channels, return_interm_layers)
class Joiner(nn.Sequential):
""" Joiner layers(nn.Sequential) for backbone and pos_embed
Arguments:
backbone: nn.Layer, backbone layer (resnet)
position_embedding: nn.Layer, position_embedding(learned, or sine)
"""
def __init__(self, backbone, position_embedding):
super().__init__(backbone, position_embedding)
def forward(self, x):
# feature from resnet backbone inference
xs = self[0](x)
out = []
pos = []
# for each backbone output, apply position embedding
for name, xx in xs.items():
out.append(xx)
pos.append(self[1](xx).astype(xx.tensors.dtype))
#print(f'----- {name} pos: ---------')
#print(pos[-1])
return out, pos
def build_backbone(config):
""" build resnet backbone and position embedding according to config """
assert config.MODEL.BACKBONE in ['resnet50', 'resnet101'], "backbone name is not supported!"
backbone_name = config.MODEL.BACKBONE
dilation = False
train_backbone = not config.EVAL
return_interm_layers = False #TODO: impl case True for segmentation
position_embedding = build_position_encoding(config.MODEL.TRANS.HIDDEN_SIZE)
backbone = Backbone(backbone_name, train_backbone, return_interm_layers, dilation)
model = Joiner(backbone, position_embedding)
model.num_channels = backbone.num_channels
return model