최인훈

Distance Estimation Module

1 +classes=12
2 +train=data/cafe/train.txt
3 +valid=data/cafe/valid.txt
4 +names=data/cafe/classes.names
...\ No newline at end of file ...\ No newline at end of file
1 +classes=5
2 +train=data/cafe_distance/train.txt
3 +valid=data/cafe_distance/valid.txt
4 +names=data/cafe_distance/classes.names
5 +
6 +
7 +
8 +
1 +classes=5
2 +train=data/testdata/train.txt
3 +valid=data/testdata/valid.txt
4 +names=data/testdata/classes.names
5 +
6 +
7 +
8 +
1 +[net]
2 +# Testing
3 +# batch=1
4 +# subdivisions=1
5 +# Training
6 +batch=8
7 +subdivisions=2
8 +width=416
9 +height=416
10 +channels=3
11 +momentum=0.9
12 +decay=0.0005
13 +angle=0
14 +saturation = 1.5
15 +exposure = 1.5
16 +hue=.1
17 +
18 +learning_rate=0.001
19 +burn_in=1000
20 +max_batches = 500200
21 +policy=steps
22 +steps=400000,450000
23 +scales=.1,.1
24 +
25 +# 0
26 +[convolutional]
27 +batch_normalize=1
28 +filters=16
29 +size=3
30 +stride=1
31 +pad=1
32 +activation=leaky
33 +
34 +# 1
35 +[maxpool]
36 +size=2
37 +stride=2
38 +
39 +# 2
40 +[convolutional]
41 +batch_normalize=1
42 +filters=32
43 +size=3
44 +stride=1
45 +pad=1
46 +activation=leaky
47 +
48 +# 3
49 +[maxpool]
50 +size=2
51 +stride=2
52 +
53 +# 4
54 +[convolutional]
55 +batch_normalize=1
56 +filters=64
57 +size=3
58 +stride=1
59 +pad=1
60 +activation=leaky
61 +
62 +# 5
63 +[maxpool]
64 +size=2
65 +stride=2
66 +
67 +# 6
68 +[convolutional]
69 +batch_normalize=1
70 +filters=128
71 +size=3
72 +stride=1
73 +pad=1
74 +activation=leaky
75 +
76 +# 7
77 +[maxpool]
78 +size=2
79 +stride=2
80 +
81 +# 8
82 +[convolutional]
83 +batch_normalize=1
84 +filters=256
85 +size=3
86 +stride=1
87 +pad=1
88 +activation=leaky
89 +
90 +# 9
91 +[convolutional]
92 +batch_normalize=1
93 +filters=512
94 +size=3
95 +stride=1
96 +pad=1
97 +activation=leaky
98 +
99 +# 10
100 +[convolutional]
101 +size=1
102 +stride=1
103 +pad=1
104 +filters=42
105 +activation=linear
106 +
107 +# 11
108 +[yolo]
109 +mask = 0, 1, 2
110 +anchors = 37,58, 81,82, 135,169
111 +classes=9
112 +num=3
113 +jitter=.3
114 +ignore_thresh = .7
115 +truth_thresh = 1
116 +random=1
117 +
118 +
119 +
120 +
121 +
1 +[net]
2 +# Testing
3 +# batch=1
4 +# subdivisions=1
5 +# Training
6 +batch=8
7 +subdivisions=2
8 +width=416
9 +height=416
10 +channels=3
11 +momentum=0.9
12 +decay=0.0005
13 +angle=0
14 +saturation = 1.5
15 +exposure = 1.5
16 +hue=.1
17 +
18 +learning_rate=0.001
19 +burn_in=1000
20 +max_batches = 500200
21 +policy=steps
22 +steps=400000,450000
23 +scales=.1,.1
24 +
25 +# 0
26 +[convolutional]
27 +batch_normalize=1
28 +filters=16
29 +size=3
30 +stride=1
31 +pad=1
32 +activation=leaky
33 +
34 +# 1
35 +[maxpool]
36 +size=2
37 +stride=2
38 +
39 +# 2
40 +[convolutional]
41 +batch_normalize=1
42 +filters=32
43 +size=3
44 +stride=1
45 +pad=1
46 +activation=leaky
47 +
48 +# 3
49 +[maxpool]
50 +size=2
51 +stride=2
52 +
53 +# 4
54 +[convolutional]
55 +batch_normalize=1
56 +filters=64
57 +size=3
58 +stride=1
59 +pad=1
60 +activation=leaky
61 +
62 +# 5
63 +[maxpool]
64 +size=2
65 +stride=2
66 +
67 +# 6
68 +[convolutional]
69 +batch_normalize=1
70 +filters=128
71 +size=3
72 +stride=1
73 +pad=1
74 +activation=leaky
75 +
76 +# 7
77 +[maxpool]
78 +size=2
79 +stride=2
80 +
81 +# 8
82 +[convolutional]
83 +batch_normalize=1
84 +filters=256
85 +size=3
86 +stride=1
87 +pad=1
88 +activation=leaky
89 +
90 +# 9
91 +[convolutional]
92 +size=1
93 +stride=1
94 +pad=1
95 +filters=42
96 +activation=linear
97 +
98 +# 10
99 +[yolo]
100 +mask = 0, 1, 2
101 +anchors = 59,119, 81,82, 135,169
102 +classes=9
103 +num=3
104 +jitter=.3
105 +ignore_thresh = .7
106 +truth_thresh = 1
107 +random=1
108 +
109 +
110 +
111 +
112 +
1 +[net]
2 +# Testing
3 +# batch=1
4 +# subdivisions=1
5 +# Training
6 +batch=8
7 +subdivisions=2
8 +width=416
9 +height=416
10 +channels=3
11 +momentum=0.9
12 +decay=0.0005
13 +angle=0
14 +saturation = 1.5
15 +exposure = 1.5
16 +hue=.1
17 +
18 +learning_rate=0.001
19 +burn_in=1000
20 +max_batches = 500200
21 +policy=steps
22 +steps=400000,450000
23 +scales=.1,.1
24 +
25 +# 0
26 +[convolutional]
27 +batch_normalize=1
28 +filters=16
29 +size=3
30 +stride=1
31 +pad=1
32 +activation=leaky
33 +
34 +# 1
35 +[maxpool]
36 +size=2
37 +stride=2
38 +
39 +# 2
40 +[convolutional]
41 +batch_normalize=1
42 +filters=32
43 +size=3
44 +stride=1
45 +pad=1
46 +activation=leaky
47 +
48 +# 3
49 +[maxpool]
50 +size=2
51 +stride=2
52 +
53 +# 4
54 +[convolutional]
55 +batch_normalize=1
56 +filters=64
57 +size=3
58 +stride=1
59 +pad=1
60 +activation=leaky
61 +
62 +# 5
63 +[maxpool]
64 +size=2
65 +stride=2
66 +
67 +# 6
68 +[convolutional]
69 +batch_normalize=1
70 +filters=128
71 +size=3
72 +stride=1
73 +pad=1
74 +activation=leaky
75 +
76 +# 7
77 +[maxpool]
78 +size=2
79 +stride=2
80 +
81 +# 8
82 +[convolutional]
83 +batch_normalize=1
84 +filters=256
85 +size=3
86 +stride=1
87 +pad=1
88 +activation=leaky
89 +
90 +# 9
91 +[maxpool]
92 +size=2
93 +stride=2
94 +
95 +# 10
96 +[convolutional]
97 +batch_normalize=1
98 +filters=512
99 +size=3
100 +stride=1
101 +pad=1
102 +activation=leaky
103 +
104 +# 11
105 +[maxpool]
106 +size=2
107 +stride=1
108 +
109 +# 12
110 +[convolutional]
111 +batch_normalize=1
112 +filters=1024
113 +size=3
114 +stride=1
115 +pad=1
116 +activation=leaky
117 +
118 +###########
119 +
120 +# 13
121 +[convolutional]
122 +batch_normalize=1
123 +filters=256
124 +size=1
125 +stride=1
126 +pad=1
127 +activation=leaky
128 +
129 +# 14
130 +[convolutional]
131 +batch_normalize=1
132 +filters=512
133 +size=3
134 +stride=1
135 +pad=1
136 +activation=leaky
137 +
138 +# 15
139 +[convolutional]
140 +size=1
141 +stride=1
142 +pad=1
143 +filters=30
144 +activation=linear
145 +
146 +
147 +
148 +# 16
149 +[yolo]
150 +mask = 3,4,5
151 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
152 +classes=5
153 +num=6
154 +jitter=.3
155 +ignore_thresh = .7
156 +truth_thresh = 1
157 +random=1
158 +
159 +# 17
160 +[route]
161 +layers = -4
162 +
163 +# 18
164 +[convolutional]
165 +batch_normalize=1
166 +filters=128
167 +size=1
168 +stride=1
169 +pad=1
170 +activation=leaky
171 +
172 +# 19
173 +[upsample]
174 +stride=2
175 +
176 +# 20
177 +[route]
178 +layers = -1, 8
179 +
180 +# 21
181 +[convolutional]
182 +batch_normalize=1
183 +filters=256
184 +size=3
185 +stride=1
186 +pad=1
187 +activation=leaky
188 +
189 +# 22
190 +[convolutional]
191 +size=1
192 +stride=1
193 +pad=1
194 +filters=30
195 +activation=linear
196 +
197 +# 23
198 +[yolo]
199 +mask = 0,1,2
200 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
201 +classes=5
202 +num=6
203 +jitter=.3
204 +ignore_thresh = .7
205 +truth_thresh = 1
206 +random=1
1 +from __future__ import division
2 +
3 +import torch
4 +import torch.nn as nn
5 +import torch.nn.functional as F
6 +from torch.autograd import Variable
7 +import numpy as np
8 +
9 +from utils.parse_config import *
10 +from utils.utils import build_targets, to_cpu, non_max_suppression
11 +
12 +import matplotlib.pyplot as plt
13 +import matplotlib.patches as patches
14 +
15 +
16 +def create_modules(module_defs):
17 + """
18 + Constructs module list of layer blocks from module configuration in module_defs
19 + """
20 + hyperparams = module_defs.pop(0)
21 + output_filters = [int(hyperparams["channels"])]
22 + module_list = nn.ModuleList()
23 + for module_i, module_def in enumerate(module_defs):
24 + modules = nn.Sequential()
25 +
26 + if module_def["type"] == "convolutional":
27 + bn = int(module_def["batch_normalize"])
28 + filters = int(module_def["filters"])
29 + kernel_size = int(module_def["size"])
30 + pad = (kernel_size - 1) // 2
31 + modules.add_module(
32 + "conv_{}".format(module_i),
33 + nn.Conv2d(
34 + in_channels=output_filters[-1],
35 + out_channels=filters,
36 + kernel_size=kernel_size,
37 + stride=int(module_def["stride"]),
38 + padding=pad,
39 + bias=not bn,
40 + ),
41 + )
42 + if bn:
43 + modules.add_module("batch_norm_{}".format(module_i), nn.BatchNorm2d(filters, momentum=0.9, eps=1e-5))
44 + if module_def["activation"] == "leaky":
45 + modules.add_module("leaky_{}".format(module_i), nn.LeakyReLU(0.1))
46 +
47 + elif module_def["type"] == "maxpool":
48 + kernel_size = int(module_def["size"])
49 + stride = int(module_def["stride"])
50 + if kernel_size == 2 and stride == 1:
51 + modules.add_module("_debug_padding_{}".format(module_i), nn.ZeroPad2d((0, 1, 0, 1)))
52 + maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2))
53 + modules.add_module("maxpool_{}".format(module_i), maxpool)
54 +
55 + elif module_def["type"] == "upsample":
56 + upsample = Upsample(scale_factor=int(module_def["stride"]), mode="nearest")
57 + modules.add_module("upsample_{}".format(module_i), upsample)
58 +
59 + elif module_def["type"] == "route":
60 + layers = [int(x) for x in module_def["layers"].split(",")]
61 + filters = sum([output_filters[1:][i] for i in layers])
62 + modules.add_module("route_{}".format(module_i), EmptyLayer())
63 +
64 + elif module_def["type"] == "shortcut": # 없음
65 + filters = output_filters[1:][int(module_def["from"])]
66 + modules.add_module("shortcut_{}".format(module_i), EmptyLayer())
67 +
68 + elif module_def["type"] == "yolo":
69 + anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
70 + # Extract anchors
71 + anchors = [int(x) for x in module_def["anchors"].split(",")]
72 + anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
73 + anchors = [anchors[i] for i in anchor_idxs]
74 + num_classes = int(module_def["classes"])
75 + img_size = int(hyperparams["height"])
76 + # Define detection layer
77 + yolo_layer = YOLOLayer(anchors, num_classes, img_size)
78 + modules.add_module("yolo_{}".format(module_i), yolo_layer)
79 + # Register module list and number of output filters
80 + module_list.append(modules)
81 + output_filters.append(filters)
82 +
83 + return hyperparams, module_list
84 +
85 +
86 +class Upsample(nn.Module):
87 + """ nn.Upsample is deprecated """
88 +
89 + def __init__(self, scale_factor, mode="nearest"):
90 + super(Upsample, self).__init__()
91 + self.scale_factor = scale_factor
92 + self.mode = mode
93 +
94 + def forward(self, x):
95 + x = F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)
96 + return x
97 +
98 +
99 +class EmptyLayer(nn.Module):
100 + """Placeholder for 'route' and 'shortcut' layers"""
101 +
102 + def __init__(self):
103 + super(EmptyLayer, self).__init__()
104 +
105 +
106 +class YOLOLayer(nn.Module):
107 + """Detection layer"""
108 +
109 + def __init__(self, anchors, num_classes, img_dim=416):
110 + super(YOLOLayer, self).__init__()
111 + self.anchors = anchors
112 + self.num_anchors = len(anchors)
113 + self.num_classes = num_classes
114 + self.ignore_thres = 0.5
115 + self.mse_loss = nn.MSELoss()
116 + self.bce_loss = nn.BCELoss()
117 + self.obj_scale = 1
118 + self.noobj_scale = 100
119 + self.metrics = {}
120 + self.img_dim = img_dim
121 + self.grid_size = 0 # grid size
122 +
123 + def compute_grid_offsets(self, grid_size, cuda=True):
124 + self.grid_size = grid_size
125 + g = self.grid_size
126 + FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
127 + self.stride = self.img_dim / self.grid_size
128 + # Calculate offsets for each grid
129 + self.grid_x = torch.arange(g).repeat(g, 1).view([1, 1, g, g]).type(FloatTensor)
130 + self.grid_y = torch.arange(g).repeat(g, 1).t().view([1, 1, g, g]).type(FloatTensor)
131 + self.scaled_anchors = FloatTensor([(a_w / self.stride, a_h / self.stride) for a_w, a_h in self.anchors])
132 + self.anchor_w = self.scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1))
133 + self.anchor_h = self.scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1))
134 +
135 + def forward(self, x, targets=None, img_dim=None):
136 +
137 + # Tensors for cuda support
138 + FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
139 + LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
140 + ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor
141 +
142 + self.img_dim = img_dim
143 + num_samples = x.size(0)
144 + grid_size = x.size(2)
145 +
146 + prediction = (
147 + x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size)
148 + .permute(0, 1, 3, 4, 2)
149 + .contiguous()
150 + )
151 +
152 + # Get outputs
153 + x = torch.sigmoid(prediction[..., 0]) # Center x
154 + y = torch.sigmoid(prediction[..., 1]) # Center y
155 + w = prediction[..., 2] # Width
156 + h = prediction[..., 3] # Height
157 + pred_conf = torch.sigmoid(prediction[..., 4]) # Conf
158 + pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred.
159 +
160 + # If grid size does not match current we compute new offsets
161 + if grid_size != self.grid_size:
162 + self.compute_grid_offsets(grid_size, cuda=x.is_cuda)
163 +
164 + # Add offset and scale with anchors
165 + pred_boxes = FloatTensor(prediction[..., :4].shape)
166 + pred_boxes[..., 0] = x.data + self.grid_x
167 + pred_boxes[..., 1] = y.data + self.grid_y
168 + pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
169 + pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h
170 +
171 + output = torch.cat(
172 + (
173 + pred_boxes.view(num_samples, -1, 4) * self.stride,
174 + pred_conf.view(num_samples, -1, 1),
175 + pred_cls.view(num_samples, -1, self.num_classes),
176 + ),
177 + -1,
178 + )
179 +
180 + if targets is None:
181 + return output, 0
182 + else:
183 + iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
184 + pred_boxes=pred_boxes,
185 + pred_cls=pred_cls,
186 + target=targets,
187 + anchors=self.scaled_anchors,
188 + ignore_thres=self.ignore_thres,
189 + )
190 +
191 + # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
192 + loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
193 + loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
194 + loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
195 + loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
196 + loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
197 + loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask])
198 + loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
199 + loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
200 + total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
201 +
202 + # Metrics
203 + cls_acc = 100 * class_mask[obj_mask].mean()
204 + conf_obj = pred_conf[obj_mask].mean()
205 + conf_noobj = pred_conf[noobj_mask].mean()
206 + conf50 = (pred_conf > 0.5).float()
207 + iou50 = (iou_scores > 0.5).float()
208 + iou75 = (iou_scores > 0.75).float()
209 + detected_mask = conf50 * class_mask * tconf
210 + precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
211 + recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
212 + recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)
213 +
214 + self.metrics = {
215 + "loss": to_cpu(total_loss).item(),
216 + "x": to_cpu(loss_x).item(),
217 + "y": to_cpu(loss_y).item(),
218 + "w": to_cpu(loss_w).item(),
219 + "h": to_cpu(loss_h).item(),
220 + "conf": to_cpu(loss_conf).item(),
221 + "cls": to_cpu(loss_cls).item(),
222 + "cls_acc": to_cpu(cls_acc).item(),
223 + "recall50": to_cpu(recall50).item(),
224 + "recall75": to_cpu(recall75).item(),
225 + "precision": to_cpu(precision).item(),
226 + "conf_obj": to_cpu(conf_obj).item(),
227 + "conf_noobj": to_cpu(conf_noobj).item(),
228 + "grid_size": grid_size,
229 + }
230 +
231 + return output, total_loss
232 +
233 +
234 +
235 +
236 +class Darknet(nn.Module):
237 + """YOLOv3 object detection model"""
238 +
239 + def __init__(self, config_path, img_size=416):
240 + super(Darknet, self).__init__()
241 + self.module_defs = parse_model_config(config_path)
242 + self.hyperparams, self.module_list = create_modules(self.module_defs)
243 + self.yolo_layers = [layer[0] for layer in self.module_list if hasattr(layer[0], "metrics")]
244 + self.img_size = img_size
245 + self.seen = 0
246 + self.header_info = np.array([0, 0, 0, self.seen, 0], dtype=np.int32)
247 +
248 + def forward(self, x, targets=None):
249 + img_dim = x.shape[2]
250 + loss = 0
251 + layer_outputs, yolo_outputs = [], []
252 + isfeature = False
253 + for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
254 + if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
255 + x = module(x)
256 + elif module_def["type"] == "route":
257 + x = torch.cat([layer_outputs[int(layer_i)] for layer_i in module_def["layers"].split(",")], 1)
258 + if (not isfeature):
259 + featuremap = x
260 + isfeature = True
261 + elif module_def["type"] == "shortcut": # shortcut없음
262 + layer_i = int(module_def["from"])
263 + x = layer_outputs[-1] + layer_outputs[layer_i]
264 + elif module_def["type"] == "yolo":
265 +
266 + x, layer_loss = module[0](x, targets, img_dim)
267 + loss += layer_loss
268 + yolo_outputs.append(x)
269 + layer_outputs.append(x)
270 + yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1))
271 +
272 + # yolo_outputs = non_max_suppression(yolo_outputs, 0.8, 0.4)
273 + # if yolo_outputs is not None:
274 + # res = self.roipool(featuremap, yolo_outputs, targets)
275 +
276 + return featuremap, yolo_outputs if targets is None else (loss, yolo_outputs)
277 +
278 + def load_darknet_weights(self, weights_path):
279 + """Parses and loads the weights stored in 'weights_path'"""
280 +
281 + # Open the weights file
282 + with open(weights_path, "rb") as f:
283 + header = np.fromfile(f, dtype=np.int32, count=5) # First five are header values
284 + self.header_info = header # Needed to write header when saving weights
285 + self.seen = header[3] # number of images seen during training
286 + weights = np.fromfile(f, dtype=np.float32) # The rest are weights
287 +
288 + # Establish cutoff for loading backbone weights
289 + cutoff = None
290 + if "darknet53.conv.74" in weights_path:
291 + cutoff = 75
292 +
293 + ptr = 0
294 + for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
295 + if i == cutoff:
296 + break
297 + if module_def["type"] == "convolutional":
298 + conv_layer = module[0]
299 + if module_def["batch_normalize"]:
300 + # Load BN bias, weights, running mean and running variance
301 + bn_layer = module[1]
302 + num_b = bn_layer.bias.numel() # Number of biases
303 + # Bias
304 + bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.bias)
305 + bn_layer.bias.data.copy_(bn_b)
306 + ptr += num_b
307 + # Weight
308 + bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.weight)
309 + bn_layer.weight.data.copy_(bn_w)
310 + ptr += num_b
311 + # Running Mean
312 + bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_mean)
313 + bn_layer.running_mean.data.copy_(bn_rm)
314 + ptr += num_b
315 + # Running Var
316 + bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_var)
317 + bn_layer.running_var.data.copy_(bn_rv)
318 + ptr += num_b
319 + else:
320 + # Load conv. bias
321 + num_b = conv_layer.bias.numel()
322 + conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(conv_layer.bias)
323 + conv_layer.bias.data.copy_(conv_b)
324 + ptr += num_b
325 + # Load conv. weights
326 + num_w = conv_layer.weight.numel()
327 + conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as(conv_layer.weight)
328 + conv_layer.weight.data.copy_(conv_w)
329 + ptr += num_w
330 +
331 + def save_darknet_weights(self, path, cutoff=-1):
332 + """
333 + @:param path - path of the new weights file
334 + @:param cutoff - save layers between 0 and cutoff (cutoff = -1 -> all are saved)
335 + """
336 + fp = open(path, "wb")
337 + self.header_info[3] = self.seen
338 + self.header_info.tofile(fp)
339 +
340 + # Iterate through layers
341 + for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
342 + if module_def["type"] == "convolutional":
343 + conv_layer = module[0]
344 + # If batch norm, load bn first
345 + if module_def["batch_normalize"]:
346 + bn_layer = module[1]
347 + bn_layer.bias.data.cpu().numpy().tofile(fp)
348 + bn_layer.weight.data.cpu().numpy().tofile(fp)
349 + bn_layer.running_mean.data.cpu().numpy().tofile(fp)
350 + bn_layer.running_var.data.cpu().numpy().tofile(fp)
351 + # Load conv bias
352 + else:
353 + conv_layer.bias.data.cpu().numpy().tofile(fp)
354 + # Load conv weights
355 + conv_layer.weight.data.cpu().numpy().tofile(fp)
356 +
357 + fp.close()
1 +from __future__ import division
2 +
3 +import torch
4 +import torch.nn as nn
5 +import torch.nn.functional as F
6 +from torch.nn.modules import module
7 +
8 +from utils.utils import *
9 +
10 +
11 +class ROIPool(nn.Module):
12 + def __init__(self, output_size):
13 + super(ROIPool, self).__init__()
14 + self.maxpool = nn.AdaptiveMaxPool2d(output_size)
15 + self.size = output_size
16 + self.fc1 = nn.Linear(2304, 1024)
17 + self.fc2 = nn.Linear(1024, 512)
18 + self.fc3 = nn.Linear(512, 1)
19 + self.softplus = nn.Softplus()
20 + self.smoothl1 = nn.SmoothL1Loss()
21 + self.mse = nn.MSELoss()
22 +
23 +
24 + def target_detection_iou(self, box1, box2):
25 + b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
26 + b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
27 +
28 + # get the corrdinates of the intersection rectangle
29 + b1_x1 = b1_x1.type(torch.float64)
30 + b1_y1 = b1_y1.type(torch.float64)
31 + b1_x2 = b1_x2.type(torch.float64)
32 + b1_y2 = b1_y2.type(torch.float64)
33 +
34 + inter_rect_x1 = torch.max(b1_x1, b2_x1)
35 + inter_rect_y1 = torch.max(b1_y1, b2_y1)
36 + inter_rect_x2 = torch.min(b1_x2, b2_x2)
37 + inter_rect_y2 = torch.min(b1_y2, b2_y2)
38 + # Intersection area
39 + inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
40 + inter_rect_y2 - inter_rect_y1 + 1, min=0
41 + )
42 + # Union Area
43 + b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
44 + b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
45 +
46 + iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
47 +
48 + return iou
49 +
50 + def similar_bbox(self, detections, targets):
51 + rescaled_boxes = rescale_boxes(detections, 416, (480, 640))
52 + similar_box = list(range(len(rescaled_boxes)))
53 + for i in range(len(rescaled_boxes)):
54 + for j in range(len(targets)):
55 + target_xyxy = [(targets[j][0]-(targets[j][2]/2))*640, (targets[j][1]-(targets[j][3]/2))*480, (targets[j][0]+(targets[j][2]/2))*640, (targets[j][1]+(targets[j][3]/2))*480]
56 + target_xyxy = torch.tensor(target_xyxy)
57 + iou = self.target_detection_iou(rescaled_boxes[i][:4], target_xyxy)
58 + if iou > 0.01:
59 + similar_box[i] = targets[j][-1]
60 + break
61 + else:
62 + similar_box[i] = -1
63 + return similar_box
64 +
65 +
66 + def cal_scale(self, x, detections, targets):
67 + targets_distance = targets[:, :4]
68 + square_targets = []
69 +
70 + for target_distance in targets_distance:
71 + x1 = (target_distance[0]-(target_distance[2]/2))*416
72 + y1 = ((target_distance[1]-(target_distance[3]/2))*480+80)*13/15
73 + x2 = (target_distance[0]+(target_distance[2]/2))*416
74 + y2 = ((target_distance[1]+(target_distance[3]/2))*480+80)*13/15
75 +
76 + square_targets.append([x1, y1, x2, y2])
77 + square_targets = torch.tensor(square_targets)
78 +
79 + scale = get_scale(square_targets)
80 + output_distance = []
81 +
82 + roi_results = []
83 + for i in scale:
84 + x1_scale = i[0]
85 + y1_scale = i[1]
86 + x2_scale = i[2]
87 + y2_scale = i[3]
88 +
89 + output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1]
90 +
91 + output = self.maxpool(output)
92 +
93 + output = output.view(1, -1)
94 + # print(output)
95 + roi_results.append(output)
96 + return roi_results
97 +
98 + def cal_scale_evaL(self, x, detections):
99 + detections = detections[:, :4]
100 + scale = get_scale(detections)
101 + output_distance = []
102 + roi_results = []
103 + for i in scale:
104 + x1_scale = i[0]
105 + y1_scale = i[1]
106 + x2_scale = i[2]
107 + y2_scale = i[3]
108 +
109 + output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1]
110 + output = self.maxpool(output)
111 + output = output.view(1, -1)
112 + roi_results.append(output)
113 + return roi_results
114 +
115 + def forward(self, x, detections, targets=None):
116 + if targets is not None:
117 + distances = targets[:, 4]
118 + distances = distances * 10
119 + # distances = distances * 10
120 + # print(f'disatnces = {distances}')
121 + # targets_distance = targets[:, :4]
122 + # square_targets = []
123 +
124 + # for target_distance in targets_distance:
125 + # x1 = (target_distance[0]-(target_distance[2]/2))*416
126 + # y1 = ((target_distance[1]-(target_distance[3]/2))*480+80)*13/15
127 + # x2 = (target_distance[0]+(target_distance[2]/2))*416
128 + # y2 = ((target_distance[1]+(target_distance[3]/2))*480+80)*13/15
129 +
130 + # square_targets.append([x1, y1, x2, y2])
131 + # square_targets = torch.tensor(square_targets)
132 +
133 + # scale = get_scale(square_targets)
134 + # output_distance = []
135 +
136 + # roi_results = []
137 + # for i in scale:
138 + # x1_scale = i[0]
139 + # y1_scale = i[1]
140 + # x2_scale = i[2]
141 + # y2_scale = i[3]
142 +
143 + # output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1]
144 +
145 + # output = self.maxpool(output)
146 +
147 + # output = output.view(1, -1).cuda()
148 + # # print(output)
149 + # roi_results.append(output)
150 + roi_results = self.cal_scale(x, detections, targets)
151 +
152 + output = torch.cat(roi_results, 0)
153 + # print(output.shape)
154 + # print(output.shape)
155 + output = self.fc1(output)
156 + output = self.fc2(output)
157 + output = self.fc3(output)
158 + output = self.softplus(output)
159 + # print(f'output = {output}')
160 + #loss = 0
161 + # output_distance = torch.tensor(output, requires_grad=True)
162 +
163 +
164 + '''
165 + output = x
166 + # output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1]
167 + output = self.maxpool(output)
168 + output = output.view(1, -1).cuda()
169 + # print(output.shape)
170 + output = self.fc1(output)
171 + output = self.fc2(output)
172 + output = self.fc3(output)
173 + output = self.softplus(output)
174 + '''
175 +
176 + # output_distance = torch.cuda.FloatTensor(output_distance, requires_grad=True)#.to('cpu')
177 +
178 + #print(f'output_distance = {output_distance}')
179 + #print(output_distance.shape)
180 + #print(f'distances = {distances}')
181 + #print(distances.shape)
182 + distances = distances.cuda()
183 + # print(f'output = {output}')
184 + # print(f'output = {output}')
185 + # print(f'distances = {distances}')
186 + loss = self.smoothl1(output, distances.float())
187 + # print(f'loss = {loss}')
188 +
189 + # print(f'output_distance = {output_distance}')
190 + # print(f'distances = {distances}')
191 + # print(f'loss = {loss}')
192 + return loss, output
193 +
194 + else:
195 +
196 + '''
197 + detections = detections[:, :4]
198 + scale = get_scale(detections)
199 + output_distance = []
200 + for i in scale:
201 + x1_scale = i[0]
202 + y1_scale = i[1]
203 + x2_scale = i[2]
204 + y2_scale = i[3]
205 +
206 + output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1]
207 + output = self.maxpool(output)
208 + output = output.view(1, -1).cuda()
209 + '''
210 + roi_results = self.cal_scale_evaL(x, detections)
211 + output = torch.cat(roi_results, 0)
212 + # print(f'output = {output.shape}')
213 + output = self.fc1(output)
214 + output = self.fc2(output)
215 + output = self.fc3(output)
216 + output = self.softplus(output)
217 + # print(f'output = {output}')
218 +
219 +
220 + return output
221 +
222 +
223 + '''
224 + scale = get_scale(detections)
225 +
226 +
227 + output_distance = []
228 + for i in scale:
229 + x1_scale = i[0]
230 + y1_scale = i[1]
231 + x2_scale = i[2]
232 + y2_scale = i[3]
233 +
234 + output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1]
235 + # output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1]
236 + output = self.maxpool(output)
237 + output = output.view(1, -1).cuda()
238 + output = self.fc1(output)
239 + output = self.fc2(output)
240 +
241 + output_distance.append(output)
242 +
243 + if targets is None:
244 + return output_distance, 0
245 +
246 + else:
247 + loss = 0
248 + box_similar_distance = self.similar_bbox(detections, targets)
249 + for i in range(len(box_similar_distance)):
250 + if box_similar_distance[i] == -1:
251 + output_distance[i] = -1
252 +
253 +
254 + output_distance = torch.FloatTensor(output_distance).to('cpu')
255 + box_similar_distance = torch.FloatTensor(box_similar_distance).to('cpu')
256 +
257 +
258 + # print(f'output_distance = {output_distance}')
259 + # print(f'target_distance = {box_similar_distance}')
260 + loss = self.smoothl1(output_distance, box_similar_distance)
261 + '''
262 +
263 +
264 +
265 +
266 +
267 +
268 +
269 +
1 +import torch
2 +import torch.nn.functional as F
3 +import numpy as np
4 +
5 +
6 +def horisontal_flip(images, targets):
7 + images = torch.flip(images, [-1])
8 + targets[:, 2] = 1 - targets[:, 2]
9 + return images, targets
1 +import glob
2 +import random
3 +import os
4 +import sys
5 +import numpy as np
6 +from PIL import Image
7 +import torch
8 +import torch.nn.functional as F
9 +import time
10 +
11 +from utils.augmentations import horisontal_flip
12 +from torch.utils.data import Dataset
13 +import torchvision.transforms as transforms
14 +
15 +
16 +def pad_to_square(img, pad_value):
17 + c, h, w = img.shape
18 + dim_diff = np.abs(h - w)
19 + # (upper / left) padding and (lower / right) padding
20 + pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
21 + # Determine padding
22 + pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0)
23 + # Add padding
24 + img = F.pad(img, pad, "constant", value=pad_value)
25 +
26 + return img, pad
27 +
28 +
29 +def resize(image, size):
30 + image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0)
31 + return image
32 +
33 +
34 +def random_resize(images, min_size=288, max_size=448):
35 + new_size = random.sample(list(range(min_size, max_size + 1, 32)), 1)[0]
36 + images = F.interpolate(images, size=new_size, mode="nearest")
37 + return images
38 +
39 +
40 +class ImageFolder(Dataset):
41 + def __init__(self, folder_path, img_size=416):
42 + self.files = sorted(glob.glob("%s/*.*" % folder_path))
43 + self.img_size = img_size
44 +
45 + def __getitem__(self, index):
46 + img_path = self.files[index % len(self.files)]
47 + # Extract image as PyTorch tensor
48 + img = transforms.ToTensor()(Image.open(img_path))
49 + # Pad to square resolution
50 + img, _ = pad_to_square(img, 0)
51 + # Resize
52 + img = resize(img, self.img_size)
53 +
54 + return img_path, img
55 +
56 + def __len__(self):
57 + return len(self.files)
58 +
59 +
60 +class ListDataset(Dataset):
61 + def __init__(self, list_path, img_size=416, augment=True, multiscale=True, normalized_labels=True):
62 + with open(list_path, "r") as file:
63 + self.img_files = file.readlines()
64 +
65 + self.label_files = [
66 + path.replace("images", "labels").replace(".png", ".txt").replace(".jpg", ".txt")
67 + for path in self.img_files
68 + ]
69 + self.img_size = img_size
70 + self.max_objects = 100
71 + self.augment = augment
72 + self.multiscale = multiscale
73 + self.normalized_labels = normalized_labels
74 + self.min_size = self.img_size - 3 * 32
75 + self.max_size = self.img_size + 3 * 32
76 + self.batch_count = 0
77 +
78 + def __getitem__(self, index):
79 +
80 + # ---------
81 + # Image
82 + # ---------
83 +
84 + img_path = self.img_files[index % len(self.img_files)].rstrip()
85 + # Extract image as PyTorch tensor
86 + img = transforms.ToTensor()(Image.open(img_path, 'r').convert('RGB'))
87 +
88 + # Handle images with less than three channels
89 + if len(img.shape) != 3:
90 + img = img.unsqueeze(0)
91 + img = img.expand((3, img.shape[1:]))
92 +
93 + _, h, w = img.shape
94 + h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
95 + # Pad to square resolution
96 + img, pad = pad_to_square(img, 0)
97 + _, padded_h, padded_w = img.shape
98 +
99 + # ---------
100 + # Label
101 + # ---------
102 +
103 + label_path = self.label_files[index % len(self.img_files)].rstrip()
104 +
105 + targets = None
106 + targets_distance = None
107 + if os.path.exists(label_path):
108 + if torch.from_numpy(np.loadtxt(label_path)).ndim == 2:
109 + boxes = torch.from_numpy(np.loadtxt(label_path)[:,:-1].reshape(-1, 5))
110 + else:
111 + boxes = torch.from_numpy(np.loadtxt(label_path)[:-1].reshape(-1, 5))
112 + # Extract coordinates for unpadded + unscaled image
113 + x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
114 + y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
115 + x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
116 + y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)
117 + # Adjust for added padding
118 + x1 += pad[0]
119 + y1 += pad[2]
120 + x2 += pad[1]
121 + y2 += pad[3]
122 + # Returns (x, y, w, h)
123 + boxes[:, 1] = ((x1 + x2) / 2) / padded_w
124 + boxes[:, 2] = ((y1 + y2) / 2) / padded_h
125 + boxes[:, 3] *= w_factor / padded_w
126 + boxes[:, 4] *= h_factor / padded_h
127 +
128 + targets = torch.zeros((len(boxes), 6))
129 + targets[:, 1:] = boxes
130 +
131 + if torch.from_numpy(np.loadtxt(label_path)).ndim == 2:
132 + targets_distance = torch.from_numpy(np.loadtxt(label_path)[:,1:].reshape(-1, 5))
133 + else:
134 + targets_distance = torch.from_numpy(np.loadtxt(label_path)[1:].reshape(-1, 5))
135 +
136 + # Apply augmentations
137 + # if self.augment:
138 + # if np.random.random() < 0.5:
139 + # img, targets = horisontal_flip(img, targets)
140 +
141 + return img_path, img, targets, targets_distance
142 +
143 + def collate_fn(self, batch):
144 + paths, imgs, targets, targets_distance = list(zip(*batch))
145 + # Remove empty placeholder targets
146 + targets = [boxes for boxes in targets if boxes is not None]
147 + # Add sample index to targets
148 + for i, boxes in enumerate(targets):
149 + boxes[:, 0] = i
150 + targets = torch.cat(targets, 0)
151 + # Selects new image size every tenth batch
152 + if self.multiscale and self.batch_count % 10 == 0:
153 + self.img_size = random.choice(range(self.min_size, self.max_size + 1, 32))
154 + # Resize images to input shape
155 + imgs = torch.stack([resize(img, self.img_size) for img in imgs])
156 + self.batch_count += 1
157 + return paths, imgs, targets, targets_distance
158 +
159 + def __len__(self):
160 + return len(self.img_files)
1 +import tensorflow as tf
2 +
3 +
4 +class Logger(object):
5 + def __init__(self, log_dir):
6 + """Create a summary writer logging to log_dir."""
7 + self.writer = tf.summary.create_file_writer(log_dir)
8 +
9 + def scalar_summary(self, tag, value, step):
10 + """Log a scalar variable."""
11 + summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
12 + self.writer.add_summary(summary, step)
13 +
14 + def list_of_scalars_summary(self, tag_value_pairs, step):
15 + """Log scalar variables."""
16 + # summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value) for tag, value in tag_value_pairs])
17 + # self.writer.add_summary(summary, step)
1 +
2 +
3 +def parse_model_config(path):
4 + """Parses the yolo-v3 layer configuration file and returns module definitions"""
5 + file = open(path, 'r')
6 + lines = file.read().split('\n')
7 + lines = [x for x in lines if x and not x.startswith('#')]
8 + lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
9 + module_defs = []
10 + for line in lines:
11 + if line.startswith('['): # This marks the start of a new block
12 + module_defs.append({})
13 + module_defs[-1]['type'] = line[1:-1].rstrip()
14 + if module_defs[-1]['type'] == 'convolutional':
15 + module_defs[-1]['batch_normalize'] = 0
16 + else:
17 + key, value = line.split("=")
18 + value = value.strip()
19 + module_defs[-1][key.rstrip()] = value.strip()
20 +
21 + return module_defs
22 +
23 +def parse_data_config(path):
24 + """Parses the data configuration file"""
25 + options = dict()
26 + options['gpus'] = '0,1,2,3'
27 + options['num_workers'] = '10'
28 + with open(path, 'r') as fp:
29 + lines = fp.readlines()
30 + for line in lines:
31 + line = line.strip()
32 + if line == '' or line.startswith('#'):
33 + continue
34 + key, value = line.split('=')
35 + options[key.strip()] = value.strip()
36 + return options
1 +from __future__ import division
2 +import math
3 +import time
4 +import tqdm
5 +import torch
6 +import torch.nn as nn
7 +import torch.nn.functional as F
8 +from torch.autograd import Variable
9 +import numpy as np
10 +import matplotlib.pyplot as plt
11 +import matplotlib.patches as patches
12 +
13 +
14 +def to_cpu(tensor):
15 + return tensor.detach().cpu()
16 +
17 +
18 +def load_classes(path):
19 + """
20 + Loads class labels at 'path'
21 + """
22 + fp = open(path, "r")
23 + names = fp.read().split("\n")[:-1]
24 + return names
25 +
26 +
27 +def weights_init_normal(m):
28 + classname = m.__class__.__name__
29 + if classname.find("Conv") != -1:
30 + torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
31 + elif classname.find("BatchNorm2d") != -1:
32 + torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
33 + torch.nn.init.constant_(m.bias.data, 0.0)
34 +
35 +
36 +def rescale_boxes(boxes, current_dim, original_shape):
37 + """ Rescales bounding boxes to the original shape """
38 + orig_h, orig_w = original_shape
39 + # The amount of padding that was added
40 + pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape))
41 + pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape))
42 + # Image height and width after padding is removed
43 + unpad_h = current_dim - pad_y
44 + unpad_w = current_dim - pad_x
45 + # Rescale bounding boxes to dimension of original image
46 + boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w
47 + boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h
48 + boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w
49 + boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h
50 + return boxes
51 +
52 +
53 +def xywh2xyxy(x):
54 + y = x.new(x.shape)
55 + y[..., 0] = x[..., 0] - x[..., 2] / 2
56 + y[..., 1] = x[..., 1] - x[..., 3] / 2
57 + y[..., 2] = x[..., 0] + x[..., 2] / 2
58 + y[..., 3] = x[..., 1] + x[..., 3] / 2
59 + return y
60 +
61 +
62 +def ap_per_class(tp, conf, pred_cls, target_cls):
63 + """ Compute the average precision, given the recall and precision curves.
64 + Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
65 + # Arguments
66 + tp: True positives (list).
67 + conf: Objectness value from 0-1 (list).
68 + pred_cls: Predicted object classes (list).
69 + target_cls: True object classes (list).
70 + # Returns
71 + The average precision as computed in py-faster-rcnn.
72 + """
73 +
74 + # Sort by objectness
75 + i = np.argsort(-conf)
76 + tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
77 +
78 + # Find unique classes
79 + unique_classes = np.unique(target_cls)
80 +
81 + # Create Precision-Recall curve and compute AP for each class
82 + ap, p, r = [], [], []
83 + for c in tqdm.tqdm(unique_classes, desc="Computing AP"):
84 + i = pred_cls == c
85 + n_gt = (target_cls == c).sum() # Number of ground truth objects
86 + n_p = i.sum() # Number of predicted objects
87 +
88 + if n_p == 0 and n_gt == 0:
89 + continue
90 + elif n_p == 0 or n_gt == 0:
91 + ap.append(0)
92 + r.append(0)
93 + p.append(0)
94 + else:
95 + # Accumulate FPs and TPs
96 + fpc = (1 - tp[i]).cumsum()
97 + tpc = (tp[i]).cumsum()
98 +
99 + # Recall
100 + recall_curve = tpc / (n_gt + 1e-16)
101 + r.append(recall_curve[-1])
102 +
103 + # Precision
104 + precision_curve = tpc / (tpc + fpc)
105 + p.append(precision_curve[-1])
106 +
107 + # AP from recall-precision curve
108 + ap.append(compute_ap(recall_curve, precision_curve))
109 +
110 + # Compute F1 score (harmonic mean of precision and recall)
111 + p, r, ap = np.array(p), np.array(r), np.array(ap)
112 + f1 = 2 * p * r / (p + r + 1e-16)
113 +
114 + return p, r, ap, f1, unique_classes.astype("int32")
115 +
116 +
117 +def compute_ap(recall, precision):
118 + """ Compute the average precision, given the recall and precision curves.
119 + Code originally from https://github.com/rbgirshick/py-faster-rcnn.
120 +
121 + # Arguments
122 + recall: The recall curve (list).
123 + precision: The precision curve (list).
124 + # Returns
125 + The average precision as computed in py-faster-rcnn.
126 + """
127 + # correct AP calculation
128 + # first append sentinel values at the end
129 + mrec = np.concatenate(([0.0], recall, [1.0]))
130 + mpre = np.concatenate(([0.0], precision, [0.0]))
131 +
132 + # compute the precision envelope
133 + for i in range(mpre.size - 1, 0, -1):
134 + mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
135 +
136 + # to calculate area under PR curve, look for points
137 + # where X axis (recall) changes value
138 + i = np.where(mrec[1:] != mrec[:-1])[0]
139 +
140 + # and sum (\Delta recall) * prec
141 + ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
142 + return ap
143 +
144 +
145 +def get_batch_statistics(outputs, targets, iou_threshold):
146 + """ Compute true positives, predicted scores and predicted labels per sample """
147 + batch_metrics = []
148 + for sample_i in range(len(outputs)):
149 +
150 + if outputs[sample_i] is None:
151 + continue
152 +
153 + output = outputs[sample_i]
154 + pred_boxes = output[:, :4]
155 + pred_scores = output[:, 4]
156 + pred_labels = output[:, -1]
157 +
158 + true_positives = np.zeros(pred_boxes.shape[0])
159 +
160 + annotations = targets[targets[:, 0] == sample_i][:, 1:]
161 + target_labels = annotations[:, 0] if len(annotations) else []
162 + if len(annotations):
163 + detected_boxes = []
164 + target_boxes = annotations[:, 1:]
165 +
166 + for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)):
167 +
168 + # If targets are found break
169 + if len(detected_boxes) == len(annotations):
170 + break
171 +
172 + # Ignore if label is not one of the target labels
173 + if pred_label not in target_labels:
174 + continue
175 +
176 + iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0)
177 + if iou >= iou_threshold and box_index not in detected_boxes:
178 + true_positives[pred_i] = 1
179 + detected_boxes += [box_index]
180 + batch_metrics.append([true_positives, pred_scores, pred_labels])
181 + return batch_metrics
182 +
183 +
184 +def bbox_wh_iou(wh1, wh2):
185 + wh2 = wh2.t()
186 + w1, h1 = wh1[0], wh1[1]
187 + w2, h2 = wh2[0], wh2[1]
188 + inter_area = torch.min(w1, w2) * torch.min(h1, h2)
189 + union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
190 + return inter_area / union_area
191 +
192 +
193 +def bbox_iou(box1, box2, x1y1x2y2=True):
194 + """
195 + Returns the IoU of two bounding boxes
196 + """
197 + if not x1y1x2y2:
198 + # Transform from center and width to exact coordinates
199 + b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
200 + b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
201 + b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
202 + b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
203 + else:
204 + # Get the coordinates of bounding boxes
205 + b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
206 + b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
207 +
208 + # get the corrdinates of the intersection rectangle
209 + inter_rect_x1 = torch.max(b1_x1, b2_x1)
210 + inter_rect_y1 = torch.max(b1_y1, b2_y1)
211 + inter_rect_x2 = torch.min(b1_x2, b2_x2)
212 + inter_rect_y2 = torch.min(b1_y2, b2_y2)
213 + # Intersection area
214 + inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
215 + inter_rect_y2 - inter_rect_y1 + 1, min=0
216 + )
217 + # Union Area
218 + b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
219 + b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
220 +
221 + iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
222 +
223 + return iou
224 +
225 +
226 +def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
227 + """
228 + Removes detections with lower object confidence score than 'conf_thres' and performs
229 + Non-Maximum Suppression to further filter detections.
230 + Returns detections with shape:
231 + (x1, y1, x2, y2, object_conf, class_score, class_pred)
232 + """
233 +
234 + # From (center x, center y, width, height) to (x1, y1, x2, y2)
235 + prediction[..., :4] = xywh2xyxy(prediction[..., :4])
236 + output = [None for _ in range(len(prediction))]
237 + for image_i, image_pred in enumerate(prediction):
238 + # Filter out confidence scores below threshold
239 + image_pred = image_pred[image_pred[:, 4] >= conf_thres]
240 + # If none are remaining => process next image
241 + if not image_pred.size(0):
242 + continue
243 + # Object confidence times class confidence
244 + score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0]
245 + # Sort by it
246 + image_pred = image_pred[(-score).argsort()]
247 + class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True)
248 + detections = torch.cat((image_pred[:, :5], class_confs.float(), class_preds.float()), 1)
249 + # Perform non-maximum suppression
250 + keep_boxes = []
251 + while detections.size(0):
252 + large_overlap = bbox_iou(detections[0, :4].unsqueeze(0), detections[:, :4]) > nms_thres
253 + label_match = detections[0, -1] == detections[:, -1]
254 + # Indices of boxes with lower confidence scores, large IOUs and matching labels
255 + invalid = large_overlap & label_match
256 + weights = detections[invalid, 4:5]
257 + # Merge overlapping bboxes by order of confidence
258 + detections[0, :4] = (weights * detections[invalid, :4]).sum(0) / weights.sum()
259 + keep_boxes += [detections[0]]
260 + detections = detections[~invalid]
261 + if keep_boxes:
262 + output[image_i] = torch.stack(keep_boxes)
263 +
264 + return output
265 +
266 +
267 +def build_targets(pred_boxes, pred_cls, target, anchors, ignore_thres):
268 +
269 + ByteTensor = torch.cuda.ByteTensor if pred_boxes.is_cuda else torch.ByteTensor
270 + FloatTensor = torch.cuda.FloatTensor if pred_boxes.is_cuda else torch.FloatTensor
271 +
272 + nB = pred_boxes.size(0)
273 + nA = pred_boxes.size(1)
274 + nC = pred_cls.size(-1)
275 + nG = pred_boxes.size(2)
276 +
277 + # Output tensors
278 + obj_mask = ByteTensor(nB, nA, nG, nG).fill_(0)
279 + noobj_mask = ByteTensor(nB, nA, nG, nG).fill_(1)
280 + class_mask = FloatTensor(nB, nA, nG, nG).fill_(0)
281 + iou_scores = FloatTensor(nB, nA, nG, nG).fill_(0)
282 + tx = FloatTensor(nB, nA, nG, nG).fill_(0)
283 + ty = FloatTensor(nB, nA, nG, nG).fill_(0)
284 + tw = FloatTensor(nB, nA, nG, nG).fill_(0)
285 + th = FloatTensor(nB, nA, nG, nG).fill_(0)
286 + tcls = FloatTensor(nB, nA, nG, nG, nC).fill_(0)
287 +
288 + # Convert to position relative to box
289 + target_boxes = target[:, 2:6] * nG
290 + gxy = target_boxes[:, :2]
291 + gwh = target_boxes[:, 2:]
292 + # Get anchors with best iou
293 + ious = torch.stack([bbox_wh_iou(anchor, gwh) for anchor in anchors])
294 + best_ious, best_n = ious.max(0)
295 + # Separate target values
296 + b, target_labels = target[:, :2].long().t()
297 + gx, gy = gxy.t()
298 + gw, gh = gwh.t()
299 + gi, gj = gxy.long().t()
300 + # Set masks
301 + obj_mask[b, best_n, gj, gi] = 1
302 + noobj_mask[b, best_n, gj, gi] = 0
303 +
304 + # Set noobj mask to zero where iou exceeds ignore threshold
305 + for i, anchor_ious in enumerate(ious.t()):
306 + noobj_mask[b[i], anchor_ious > ignore_thres, gj[i], gi[i]] = 0
307 +
308 + # Coordinates
309 + tx[b, best_n, gj, gi] = gx - gx.floor()
310 + ty[b, best_n, gj, gi] = gy - gy.floor()
311 + # Width and height
312 + tw[b, best_n, gj, gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16)
313 + th[b, best_n, gj, gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16)
314 + # One-hot encoding of label
315 + tcls[b, best_n, gj, gi, target_labels] = 1
316 + # Compute label correctness and iou at best anchor
317 + class_mask[b, best_n, gj, gi] = (pred_cls[b, best_n, gj, gi].argmax(-1) == target_labels).float()
318 + iou_scores[b, best_n, gj, gi] = bbox_iou(pred_boxes[b, best_n, gj, gi], target_boxes, x1y1x2y2=False)
319 +
320 + tconf = obj_mask.float()
321 + return iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf
322 +
323 +def count_parameters(model):
324 + return sum(p.numel() for p in model.parameters() if p.requires_grad)
325 +
326 +def get_scale(detections):
327 + num_roi = detections.size(0)
328 + outputs = []
329 + for num in range(num_roi):
330 + x1, y1, x2, y2 = detections[num]
331 + standard = 416/7
332 + x1_scale = math.floor(x1/standard)
333 + y1_scale = math.floor(y1/standard)
334 + x2_scale = math.ceil(x2/standard)
335 + y2_scale = math.ceil(y2/standard)
336 + outputs.append([x1_scale, y1_scale, x2_scale, y2_scale])
337 +
338 + return outputs