Showing
25 changed files
with
1645 additions
and
0 deletions
source_code/config/cafe.data
0 → 100644
source_code/config/cafe_distance.data
0 → 100644
source_code/config/testdata.data
0 → 100644
source_code/config/tiny1.cfg
0 → 100644
1 | +[net] | ||
2 | +# Testing | ||
3 | +# batch=1 | ||
4 | +# subdivisions=1 | ||
5 | +# Training | ||
6 | +batch=8 | ||
7 | +subdivisions=2 | ||
8 | +width=416 | ||
9 | +height=416 | ||
10 | +channels=3 | ||
11 | +momentum=0.9 | ||
12 | +decay=0.0005 | ||
13 | +angle=0 | ||
14 | +saturation = 1.5 | ||
15 | +exposure = 1.5 | ||
16 | +hue=.1 | ||
17 | + | ||
18 | +learning_rate=0.001 | ||
19 | +burn_in=1000 | ||
20 | +max_batches = 500200 | ||
21 | +policy=steps | ||
22 | +steps=400000,450000 | ||
23 | +scales=.1,.1 | ||
24 | + | ||
25 | +# 0 | ||
26 | +[convolutional] | ||
27 | +batch_normalize=1 | ||
28 | +filters=16 | ||
29 | +size=3 | ||
30 | +stride=1 | ||
31 | +pad=1 | ||
32 | +activation=leaky | ||
33 | + | ||
34 | +# 1 | ||
35 | +[maxpool] | ||
36 | +size=2 | ||
37 | +stride=2 | ||
38 | + | ||
39 | +# 2 | ||
40 | +[convolutional] | ||
41 | +batch_normalize=1 | ||
42 | +filters=32 | ||
43 | +size=3 | ||
44 | +stride=1 | ||
45 | +pad=1 | ||
46 | +activation=leaky | ||
47 | + | ||
48 | +# 3 | ||
49 | +[maxpool] | ||
50 | +size=2 | ||
51 | +stride=2 | ||
52 | + | ||
53 | +# 4 | ||
54 | +[convolutional] | ||
55 | +batch_normalize=1 | ||
56 | +filters=64 | ||
57 | +size=3 | ||
58 | +stride=1 | ||
59 | +pad=1 | ||
60 | +activation=leaky | ||
61 | + | ||
62 | +# 5 | ||
63 | +[maxpool] | ||
64 | +size=2 | ||
65 | +stride=2 | ||
66 | + | ||
67 | +# 6 | ||
68 | +[convolutional] | ||
69 | +batch_normalize=1 | ||
70 | +filters=128 | ||
71 | +size=3 | ||
72 | +stride=1 | ||
73 | +pad=1 | ||
74 | +activation=leaky | ||
75 | + | ||
76 | +# 7 | ||
77 | +[maxpool] | ||
78 | +size=2 | ||
79 | +stride=2 | ||
80 | + | ||
81 | +# 8 | ||
82 | +[convolutional] | ||
83 | +batch_normalize=1 | ||
84 | +filters=256 | ||
85 | +size=3 | ||
86 | +stride=1 | ||
87 | +pad=1 | ||
88 | +activation=leaky | ||
89 | + | ||
90 | +# 9 | ||
91 | +[convolutional] | ||
92 | +batch_normalize=1 | ||
93 | +filters=512 | ||
94 | +size=3 | ||
95 | +stride=1 | ||
96 | +pad=1 | ||
97 | +activation=leaky | ||
98 | + | ||
99 | +# 10 | ||
100 | +[convolutional] | ||
101 | +size=1 | ||
102 | +stride=1 | ||
103 | +pad=1 | ||
104 | +filters=42 | ||
105 | +activation=linear | ||
106 | + | ||
107 | +# 11 | ||
108 | +[yolo] | ||
109 | +mask = 0, 1, 2 | ||
110 | +anchors = 37,58, 81,82, 135,169 | ||
111 | +classes=9 | ||
112 | +num=3 | ||
113 | +jitter=.3 | ||
114 | +ignore_thresh = .7 | ||
115 | +truth_thresh = 1 | ||
116 | +random=1 | ||
117 | + | ||
118 | + | ||
119 | + | ||
120 | + | ||
121 | + |
source_code/config/tiny2.cfg
0 → 100644
1 | +[net] | ||
2 | +# Testing | ||
3 | +# batch=1 | ||
4 | +# subdivisions=1 | ||
5 | +# Training | ||
6 | +batch=8 | ||
7 | +subdivisions=2 | ||
8 | +width=416 | ||
9 | +height=416 | ||
10 | +channels=3 | ||
11 | +momentum=0.9 | ||
12 | +decay=0.0005 | ||
13 | +angle=0 | ||
14 | +saturation = 1.5 | ||
15 | +exposure = 1.5 | ||
16 | +hue=.1 | ||
17 | + | ||
18 | +learning_rate=0.001 | ||
19 | +burn_in=1000 | ||
20 | +max_batches = 500200 | ||
21 | +policy=steps | ||
22 | +steps=400000,450000 | ||
23 | +scales=.1,.1 | ||
24 | + | ||
25 | +# 0 | ||
26 | +[convolutional] | ||
27 | +batch_normalize=1 | ||
28 | +filters=16 | ||
29 | +size=3 | ||
30 | +stride=1 | ||
31 | +pad=1 | ||
32 | +activation=leaky | ||
33 | + | ||
34 | +# 1 | ||
35 | +[maxpool] | ||
36 | +size=2 | ||
37 | +stride=2 | ||
38 | + | ||
39 | +# 2 | ||
40 | +[convolutional] | ||
41 | +batch_normalize=1 | ||
42 | +filters=32 | ||
43 | +size=3 | ||
44 | +stride=1 | ||
45 | +pad=1 | ||
46 | +activation=leaky | ||
47 | + | ||
48 | +# 3 | ||
49 | +[maxpool] | ||
50 | +size=2 | ||
51 | +stride=2 | ||
52 | + | ||
53 | +# 4 | ||
54 | +[convolutional] | ||
55 | +batch_normalize=1 | ||
56 | +filters=64 | ||
57 | +size=3 | ||
58 | +stride=1 | ||
59 | +pad=1 | ||
60 | +activation=leaky | ||
61 | + | ||
62 | +# 5 | ||
63 | +[maxpool] | ||
64 | +size=2 | ||
65 | +stride=2 | ||
66 | + | ||
67 | +# 6 | ||
68 | +[convolutional] | ||
69 | +batch_normalize=1 | ||
70 | +filters=128 | ||
71 | +size=3 | ||
72 | +stride=1 | ||
73 | +pad=1 | ||
74 | +activation=leaky | ||
75 | + | ||
76 | +# 7 | ||
77 | +[maxpool] | ||
78 | +size=2 | ||
79 | +stride=2 | ||
80 | + | ||
81 | +# 8 | ||
82 | +[convolutional] | ||
83 | +batch_normalize=1 | ||
84 | +filters=256 | ||
85 | +size=3 | ||
86 | +stride=1 | ||
87 | +pad=1 | ||
88 | +activation=leaky | ||
89 | + | ||
90 | +# 9 | ||
91 | +[convolutional] | ||
92 | +size=1 | ||
93 | +stride=1 | ||
94 | +pad=1 | ||
95 | +filters=42 | ||
96 | +activation=linear | ||
97 | + | ||
98 | +# 10 | ||
99 | +[yolo] | ||
100 | +mask = 0, 1, 2 | ||
101 | +anchors = 59,119, 81,82, 135,169 | ||
102 | +classes=9 | ||
103 | +num=3 | ||
104 | +jitter=.3 | ||
105 | +ignore_thresh = .7 | ||
106 | +truth_thresh = 1 | ||
107 | +random=1 | ||
108 | + | ||
109 | + | ||
110 | + | ||
111 | + | ||
112 | + |
source_code/config/yolov3-tiny.cfg
0 → 100644
1 | +[net] | ||
2 | +# Testing | ||
3 | +# batch=1 | ||
4 | +# subdivisions=1 | ||
5 | +# Training | ||
6 | +batch=8 | ||
7 | +subdivisions=2 | ||
8 | +width=416 | ||
9 | +height=416 | ||
10 | +channels=3 | ||
11 | +momentum=0.9 | ||
12 | +decay=0.0005 | ||
13 | +angle=0 | ||
14 | +saturation = 1.5 | ||
15 | +exposure = 1.5 | ||
16 | +hue=.1 | ||
17 | + | ||
18 | +learning_rate=0.001 | ||
19 | +burn_in=1000 | ||
20 | +max_batches = 500200 | ||
21 | +policy=steps | ||
22 | +steps=400000,450000 | ||
23 | +scales=.1,.1 | ||
24 | + | ||
25 | +# 0 | ||
26 | +[convolutional] | ||
27 | +batch_normalize=1 | ||
28 | +filters=16 | ||
29 | +size=3 | ||
30 | +stride=1 | ||
31 | +pad=1 | ||
32 | +activation=leaky | ||
33 | + | ||
34 | +# 1 | ||
35 | +[maxpool] | ||
36 | +size=2 | ||
37 | +stride=2 | ||
38 | + | ||
39 | +# 2 | ||
40 | +[convolutional] | ||
41 | +batch_normalize=1 | ||
42 | +filters=32 | ||
43 | +size=3 | ||
44 | +stride=1 | ||
45 | +pad=1 | ||
46 | +activation=leaky | ||
47 | + | ||
48 | +# 3 | ||
49 | +[maxpool] | ||
50 | +size=2 | ||
51 | +stride=2 | ||
52 | + | ||
53 | +# 4 | ||
54 | +[convolutional] | ||
55 | +batch_normalize=1 | ||
56 | +filters=64 | ||
57 | +size=3 | ||
58 | +stride=1 | ||
59 | +pad=1 | ||
60 | +activation=leaky | ||
61 | + | ||
62 | +# 5 | ||
63 | +[maxpool] | ||
64 | +size=2 | ||
65 | +stride=2 | ||
66 | + | ||
67 | +# 6 | ||
68 | +[convolutional] | ||
69 | +batch_normalize=1 | ||
70 | +filters=128 | ||
71 | +size=3 | ||
72 | +stride=1 | ||
73 | +pad=1 | ||
74 | +activation=leaky | ||
75 | + | ||
76 | +# 7 | ||
77 | +[maxpool] | ||
78 | +size=2 | ||
79 | +stride=2 | ||
80 | + | ||
81 | +# 8 | ||
82 | +[convolutional] | ||
83 | +batch_normalize=1 | ||
84 | +filters=256 | ||
85 | +size=3 | ||
86 | +stride=1 | ||
87 | +pad=1 | ||
88 | +activation=leaky | ||
89 | + | ||
90 | +# 9 | ||
91 | +[maxpool] | ||
92 | +size=2 | ||
93 | +stride=2 | ||
94 | + | ||
95 | +# 10 | ||
96 | +[convolutional] | ||
97 | +batch_normalize=1 | ||
98 | +filters=512 | ||
99 | +size=3 | ||
100 | +stride=1 | ||
101 | +pad=1 | ||
102 | +activation=leaky | ||
103 | + | ||
104 | +# 11 | ||
105 | +[maxpool] | ||
106 | +size=2 | ||
107 | +stride=1 | ||
108 | + | ||
109 | +# 12 | ||
110 | +[convolutional] | ||
111 | +batch_normalize=1 | ||
112 | +filters=1024 | ||
113 | +size=3 | ||
114 | +stride=1 | ||
115 | +pad=1 | ||
116 | +activation=leaky | ||
117 | + | ||
118 | +########### | ||
119 | + | ||
120 | +# 13 | ||
121 | +[convolutional] | ||
122 | +batch_normalize=1 | ||
123 | +filters=256 | ||
124 | +size=1 | ||
125 | +stride=1 | ||
126 | +pad=1 | ||
127 | +activation=leaky | ||
128 | + | ||
129 | +# 14 | ||
130 | +[convolutional] | ||
131 | +batch_normalize=1 | ||
132 | +filters=512 | ||
133 | +size=3 | ||
134 | +stride=1 | ||
135 | +pad=1 | ||
136 | +activation=leaky | ||
137 | + | ||
138 | +# 15 | ||
139 | +[convolutional] | ||
140 | +size=1 | ||
141 | +stride=1 | ||
142 | +pad=1 | ||
143 | +filters=30 | ||
144 | +activation=linear | ||
145 | + | ||
146 | + | ||
147 | + | ||
148 | +# 16 | ||
149 | +[yolo] | ||
150 | +mask = 3,4,5 | ||
151 | +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 | ||
152 | +classes=5 | ||
153 | +num=6 | ||
154 | +jitter=.3 | ||
155 | +ignore_thresh = .7 | ||
156 | +truth_thresh = 1 | ||
157 | +random=1 | ||
158 | + | ||
159 | +# 17 | ||
160 | +[route] | ||
161 | +layers = -4 | ||
162 | + | ||
163 | +# 18 | ||
164 | +[convolutional] | ||
165 | +batch_normalize=1 | ||
166 | +filters=128 | ||
167 | +size=1 | ||
168 | +stride=1 | ||
169 | +pad=1 | ||
170 | +activation=leaky | ||
171 | + | ||
172 | +# 19 | ||
173 | +[upsample] | ||
174 | +stride=2 | ||
175 | + | ||
176 | +# 20 | ||
177 | +[route] | ||
178 | +layers = -1, 8 | ||
179 | + | ||
180 | +# 21 | ||
181 | +[convolutional] | ||
182 | +batch_normalize=1 | ||
183 | +filters=256 | ||
184 | +size=3 | ||
185 | +stride=1 | ||
186 | +pad=1 | ||
187 | +activation=leaky | ||
188 | + | ||
189 | +# 22 | ||
190 | +[convolutional] | ||
191 | +size=1 | ||
192 | +stride=1 | ||
193 | +pad=1 | ||
194 | +filters=30 | ||
195 | +activation=linear | ||
196 | + | ||
197 | +# 23 | ||
198 | +[yolo] | ||
199 | +mask = 0,1,2 | ||
200 | +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 | ||
201 | +classes=5 | ||
202 | +num=6 | ||
203 | +jitter=.3 | ||
204 | +ignore_thresh = .7 | ||
205 | +truth_thresh = 1 | ||
206 | +random=1 |
source_code/models.py
0 → 100644
1 | +from __future__ import division | ||
2 | + | ||
3 | +import torch | ||
4 | +import torch.nn as nn | ||
5 | +import torch.nn.functional as F | ||
6 | +from torch.autograd import Variable | ||
7 | +import numpy as np | ||
8 | + | ||
9 | +from utils.parse_config import * | ||
10 | +from utils.utils import build_targets, to_cpu, non_max_suppression | ||
11 | + | ||
12 | +import matplotlib.pyplot as plt | ||
13 | +import matplotlib.patches as patches | ||
14 | + | ||
15 | + | ||
16 | +def create_modules(module_defs): | ||
17 | + """ | ||
18 | + Constructs module list of layer blocks from module configuration in module_defs | ||
19 | + """ | ||
20 | + hyperparams = module_defs.pop(0) | ||
21 | + output_filters = [int(hyperparams["channels"])] | ||
22 | + module_list = nn.ModuleList() | ||
23 | + for module_i, module_def in enumerate(module_defs): | ||
24 | + modules = nn.Sequential() | ||
25 | + | ||
26 | + if module_def["type"] == "convolutional": | ||
27 | + bn = int(module_def["batch_normalize"]) | ||
28 | + filters = int(module_def["filters"]) | ||
29 | + kernel_size = int(module_def["size"]) | ||
30 | + pad = (kernel_size - 1) // 2 | ||
31 | + modules.add_module( | ||
32 | + "conv_{}".format(module_i), | ||
33 | + nn.Conv2d( | ||
34 | + in_channels=output_filters[-1], | ||
35 | + out_channels=filters, | ||
36 | + kernel_size=kernel_size, | ||
37 | + stride=int(module_def["stride"]), | ||
38 | + padding=pad, | ||
39 | + bias=not bn, | ||
40 | + ), | ||
41 | + ) | ||
42 | + if bn: | ||
43 | + modules.add_module("batch_norm_{}".format(module_i), nn.BatchNorm2d(filters, momentum=0.9, eps=1e-5)) | ||
44 | + if module_def["activation"] == "leaky": | ||
45 | + modules.add_module("leaky_{}".format(module_i), nn.LeakyReLU(0.1)) | ||
46 | + | ||
47 | + elif module_def["type"] == "maxpool": | ||
48 | + kernel_size = int(module_def["size"]) | ||
49 | + stride = int(module_def["stride"]) | ||
50 | + if kernel_size == 2 and stride == 1: | ||
51 | + modules.add_module("_debug_padding_{}".format(module_i), nn.ZeroPad2d((0, 1, 0, 1))) | ||
52 | + maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2)) | ||
53 | + modules.add_module("maxpool_{}".format(module_i), maxpool) | ||
54 | + | ||
55 | + elif module_def["type"] == "upsample": | ||
56 | + upsample = Upsample(scale_factor=int(module_def["stride"]), mode="nearest") | ||
57 | + modules.add_module("upsample_{}".format(module_i), upsample) | ||
58 | + | ||
59 | + elif module_def["type"] == "route": | ||
60 | + layers = [int(x) for x in module_def["layers"].split(",")] | ||
61 | + filters = sum([output_filters[1:][i] for i in layers]) | ||
62 | + modules.add_module("route_{}".format(module_i), EmptyLayer()) | ||
63 | + | ||
64 | + elif module_def["type"] == "shortcut": # 없음 | ||
65 | + filters = output_filters[1:][int(module_def["from"])] | ||
66 | + modules.add_module("shortcut_{}".format(module_i), EmptyLayer()) | ||
67 | + | ||
68 | + elif module_def["type"] == "yolo": | ||
69 | + anchor_idxs = [int(x) for x in module_def["mask"].split(",")] | ||
70 | + # Extract anchors | ||
71 | + anchors = [int(x) for x in module_def["anchors"].split(",")] | ||
72 | + anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)] | ||
73 | + anchors = [anchors[i] for i in anchor_idxs] | ||
74 | + num_classes = int(module_def["classes"]) | ||
75 | + img_size = int(hyperparams["height"]) | ||
76 | + # Define detection layer | ||
77 | + yolo_layer = YOLOLayer(anchors, num_classes, img_size) | ||
78 | + modules.add_module("yolo_{}".format(module_i), yolo_layer) | ||
79 | + # Register module list and number of output filters | ||
80 | + module_list.append(modules) | ||
81 | + output_filters.append(filters) | ||
82 | + | ||
83 | + return hyperparams, module_list | ||
84 | + | ||
85 | + | ||
86 | +class Upsample(nn.Module): | ||
87 | + """ nn.Upsample is deprecated """ | ||
88 | + | ||
89 | + def __init__(self, scale_factor, mode="nearest"): | ||
90 | + super(Upsample, self).__init__() | ||
91 | + self.scale_factor = scale_factor | ||
92 | + self.mode = mode | ||
93 | + | ||
94 | + def forward(self, x): | ||
95 | + x = F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode) | ||
96 | + return x | ||
97 | + | ||
98 | + | ||
99 | +class EmptyLayer(nn.Module): | ||
100 | + """Placeholder for 'route' and 'shortcut' layers""" | ||
101 | + | ||
102 | + def __init__(self): | ||
103 | + super(EmptyLayer, self).__init__() | ||
104 | + | ||
105 | + | ||
106 | +class YOLOLayer(nn.Module): | ||
107 | + """Detection layer""" | ||
108 | + | ||
109 | + def __init__(self, anchors, num_classes, img_dim=416): | ||
110 | + super(YOLOLayer, self).__init__() | ||
111 | + self.anchors = anchors | ||
112 | + self.num_anchors = len(anchors) | ||
113 | + self.num_classes = num_classes | ||
114 | + self.ignore_thres = 0.5 | ||
115 | + self.mse_loss = nn.MSELoss() | ||
116 | + self.bce_loss = nn.BCELoss() | ||
117 | + self.obj_scale = 1 | ||
118 | + self.noobj_scale = 100 | ||
119 | + self.metrics = {} | ||
120 | + self.img_dim = img_dim | ||
121 | + self.grid_size = 0 # grid size | ||
122 | + | ||
123 | + def compute_grid_offsets(self, grid_size, cuda=True): | ||
124 | + self.grid_size = grid_size | ||
125 | + g = self.grid_size | ||
126 | + FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor | ||
127 | + self.stride = self.img_dim / self.grid_size | ||
128 | + # Calculate offsets for each grid | ||
129 | + self.grid_x = torch.arange(g).repeat(g, 1).view([1, 1, g, g]).type(FloatTensor) | ||
130 | + self.grid_y = torch.arange(g).repeat(g, 1).t().view([1, 1, g, g]).type(FloatTensor) | ||
131 | + self.scaled_anchors = FloatTensor([(a_w / self.stride, a_h / self.stride) for a_w, a_h in self.anchors]) | ||
132 | + self.anchor_w = self.scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1)) | ||
133 | + self.anchor_h = self.scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1)) | ||
134 | + | ||
135 | + def forward(self, x, targets=None, img_dim=None): | ||
136 | + | ||
137 | + # Tensors for cuda support | ||
138 | + FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor | ||
139 | + LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor | ||
140 | + ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor | ||
141 | + | ||
142 | + self.img_dim = img_dim | ||
143 | + num_samples = x.size(0) | ||
144 | + grid_size = x.size(2) | ||
145 | + | ||
146 | + prediction = ( | ||
147 | + x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size) | ||
148 | + .permute(0, 1, 3, 4, 2) | ||
149 | + .contiguous() | ||
150 | + ) | ||
151 | + | ||
152 | + # Get outputs | ||
153 | + x = torch.sigmoid(prediction[..., 0]) # Center x | ||
154 | + y = torch.sigmoid(prediction[..., 1]) # Center y | ||
155 | + w = prediction[..., 2] # Width | ||
156 | + h = prediction[..., 3] # Height | ||
157 | + pred_conf = torch.sigmoid(prediction[..., 4]) # Conf | ||
158 | + pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred. | ||
159 | + | ||
160 | + # If grid size does not match current we compute new offsets | ||
161 | + if grid_size != self.grid_size: | ||
162 | + self.compute_grid_offsets(grid_size, cuda=x.is_cuda) | ||
163 | + | ||
164 | + # Add offset and scale with anchors | ||
165 | + pred_boxes = FloatTensor(prediction[..., :4].shape) | ||
166 | + pred_boxes[..., 0] = x.data + self.grid_x | ||
167 | + pred_boxes[..., 1] = y.data + self.grid_y | ||
168 | + pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w | ||
169 | + pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h | ||
170 | + | ||
171 | + output = torch.cat( | ||
172 | + ( | ||
173 | + pred_boxes.view(num_samples, -1, 4) * self.stride, | ||
174 | + pred_conf.view(num_samples, -1, 1), | ||
175 | + pred_cls.view(num_samples, -1, self.num_classes), | ||
176 | + ), | ||
177 | + -1, | ||
178 | + ) | ||
179 | + | ||
180 | + if targets is None: | ||
181 | + return output, 0 | ||
182 | + else: | ||
183 | + iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets( | ||
184 | + pred_boxes=pred_boxes, | ||
185 | + pred_cls=pred_cls, | ||
186 | + target=targets, | ||
187 | + anchors=self.scaled_anchors, | ||
188 | + ignore_thres=self.ignore_thres, | ||
189 | + ) | ||
190 | + | ||
191 | + # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) | ||
192 | + loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) | ||
193 | + loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) | ||
194 | + loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) | ||
195 | + loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) | ||
196 | + loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) | ||
197 | + loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) | ||
198 | + loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj | ||
199 | + loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) | ||
200 | + total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls | ||
201 | + | ||
202 | + # Metrics | ||
203 | + cls_acc = 100 * class_mask[obj_mask].mean() | ||
204 | + conf_obj = pred_conf[obj_mask].mean() | ||
205 | + conf_noobj = pred_conf[noobj_mask].mean() | ||
206 | + conf50 = (pred_conf > 0.5).float() | ||
207 | + iou50 = (iou_scores > 0.5).float() | ||
208 | + iou75 = (iou_scores > 0.75).float() | ||
209 | + detected_mask = conf50 * class_mask * tconf | ||
210 | + precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16) | ||
211 | + recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16) | ||
212 | + recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16) | ||
213 | + | ||
214 | + self.metrics = { | ||
215 | + "loss": to_cpu(total_loss).item(), | ||
216 | + "x": to_cpu(loss_x).item(), | ||
217 | + "y": to_cpu(loss_y).item(), | ||
218 | + "w": to_cpu(loss_w).item(), | ||
219 | + "h": to_cpu(loss_h).item(), | ||
220 | + "conf": to_cpu(loss_conf).item(), | ||
221 | + "cls": to_cpu(loss_cls).item(), | ||
222 | + "cls_acc": to_cpu(cls_acc).item(), | ||
223 | + "recall50": to_cpu(recall50).item(), | ||
224 | + "recall75": to_cpu(recall75).item(), | ||
225 | + "precision": to_cpu(precision).item(), | ||
226 | + "conf_obj": to_cpu(conf_obj).item(), | ||
227 | + "conf_noobj": to_cpu(conf_noobj).item(), | ||
228 | + "grid_size": grid_size, | ||
229 | + } | ||
230 | + | ||
231 | + return output, total_loss | ||
232 | + | ||
233 | + | ||
234 | + | ||
235 | + | ||
236 | +class Darknet(nn.Module): | ||
237 | + """YOLOv3 object detection model""" | ||
238 | + | ||
239 | + def __init__(self, config_path, img_size=416): | ||
240 | + super(Darknet, self).__init__() | ||
241 | + self.module_defs = parse_model_config(config_path) | ||
242 | + self.hyperparams, self.module_list = create_modules(self.module_defs) | ||
243 | + self.yolo_layers = [layer[0] for layer in self.module_list if hasattr(layer[0], "metrics")] | ||
244 | + self.img_size = img_size | ||
245 | + self.seen = 0 | ||
246 | + self.header_info = np.array([0, 0, 0, self.seen, 0], dtype=np.int32) | ||
247 | + | ||
248 | + def forward(self, x, targets=None): | ||
249 | + img_dim = x.shape[2] | ||
250 | + loss = 0 | ||
251 | + layer_outputs, yolo_outputs = [], [] | ||
252 | + isfeature = False | ||
253 | + for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)): | ||
254 | + if module_def["type"] in ["convolutional", "upsample", "maxpool"]: | ||
255 | + x = module(x) | ||
256 | + elif module_def["type"] == "route": | ||
257 | + x = torch.cat([layer_outputs[int(layer_i)] for layer_i in module_def["layers"].split(",")], 1) | ||
258 | + if (not isfeature): | ||
259 | + featuremap = x | ||
260 | + isfeature = True | ||
261 | + elif module_def["type"] == "shortcut": # shortcut없음 | ||
262 | + layer_i = int(module_def["from"]) | ||
263 | + x = layer_outputs[-1] + layer_outputs[layer_i] | ||
264 | + elif module_def["type"] == "yolo": | ||
265 | + | ||
266 | + x, layer_loss = module[0](x, targets, img_dim) | ||
267 | + loss += layer_loss | ||
268 | + yolo_outputs.append(x) | ||
269 | + layer_outputs.append(x) | ||
270 | + yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1)) | ||
271 | + | ||
272 | + # yolo_outputs = non_max_suppression(yolo_outputs, 0.8, 0.4) | ||
273 | + # if yolo_outputs is not None: | ||
274 | + # res = self.roipool(featuremap, yolo_outputs, targets) | ||
275 | + | ||
276 | + return featuremap, yolo_outputs if targets is None else (loss, yolo_outputs) | ||
277 | + | ||
278 | + def load_darknet_weights(self, weights_path): | ||
279 | + """Parses and loads the weights stored in 'weights_path'""" | ||
280 | + | ||
281 | + # Open the weights file | ||
282 | + with open(weights_path, "rb") as f: | ||
283 | + header = np.fromfile(f, dtype=np.int32, count=5) # First five are header values | ||
284 | + self.header_info = header # Needed to write header when saving weights | ||
285 | + self.seen = header[3] # number of images seen during training | ||
286 | + weights = np.fromfile(f, dtype=np.float32) # The rest are weights | ||
287 | + | ||
288 | + # Establish cutoff for loading backbone weights | ||
289 | + cutoff = None | ||
290 | + if "darknet53.conv.74" in weights_path: | ||
291 | + cutoff = 75 | ||
292 | + | ||
293 | + ptr = 0 | ||
294 | + for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)): | ||
295 | + if i == cutoff: | ||
296 | + break | ||
297 | + if module_def["type"] == "convolutional": | ||
298 | + conv_layer = module[0] | ||
299 | + if module_def["batch_normalize"]: | ||
300 | + # Load BN bias, weights, running mean and running variance | ||
301 | + bn_layer = module[1] | ||
302 | + num_b = bn_layer.bias.numel() # Number of biases | ||
303 | + # Bias | ||
304 | + bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.bias) | ||
305 | + bn_layer.bias.data.copy_(bn_b) | ||
306 | + ptr += num_b | ||
307 | + # Weight | ||
308 | + bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.weight) | ||
309 | + bn_layer.weight.data.copy_(bn_w) | ||
310 | + ptr += num_b | ||
311 | + # Running Mean | ||
312 | + bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_mean) | ||
313 | + bn_layer.running_mean.data.copy_(bn_rm) | ||
314 | + ptr += num_b | ||
315 | + # Running Var | ||
316 | + bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_var) | ||
317 | + bn_layer.running_var.data.copy_(bn_rv) | ||
318 | + ptr += num_b | ||
319 | + else: | ||
320 | + # Load conv. bias | ||
321 | + num_b = conv_layer.bias.numel() | ||
322 | + conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(conv_layer.bias) | ||
323 | + conv_layer.bias.data.copy_(conv_b) | ||
324 | + ptr += num_b | ||
325 | + # Load conv. weights | ||
326 | + num_w = conv_layer.weight.numel() | ||
327 | + conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as(conv_layer.weight) | ||
328 | + conv_layer.weight.data.copy_(conv_w) | ||
329 | + ptr += num_w | ||
330 | + | ||
331 | + def save_darknet_weights(self, path, cutoff=-1): | ||
332 | + """ | ||
333 | + @:param path - path of the new weights file | ||
334 | + @:param cutoff - save layers between 0 and cutoff (cutoff = -1 -> all are saved) | ||
335 | + """ | ||
336 | + fp = open(path, "wb") | ||
337 | + self.header_info[3] = self.seen | ||
338 | + self.header_info.tofile(fp) | ||
339 | + | ||
340 | + # Iterate through layers | ||
341 | + for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])): | ||
342 | + if module_def["type"] == "convolutional": | ||
343 | + conv_layer = module[0] | ||
344 | + # If batch norm, load bn first | ||
345 | + if module_def["batch_normalize"]: | ||
346 | + bn_layer = module[1] | ||
347 | + bn_layer.bias.data.cpu().numpy().tofile(fp) | ||
348 | + bn_layer.weight.data.cpu().numpy().tofile(fp) | ||
349 | + bn_layer.running_mean.data.cpu().numpy().tofile(fp) | ||
350 | + bn_layer.running_var.data.cpu().numpy().tofile(fp) | ||
351 | + # Load conv bias | ||
352 | + else: | ||
353 | + conv_layer.bias.data.cpu().numpy().tofile(fp) | ||
354 | + # Load conv weights | ||
355 | + conv_layer.weight.data.cpu().numpy().tofile(fp) | ||
356 | + | ||
357 | + fp.close() |
source_code/roipool.py
0 → 100644
1 | +from __future__ import division | ||
2 | + | ||
3 | +import torch | ||
4 | +import torch.nn as nn | ||
5 | +import torch.nn.functional as F | ||
6 | +from torch.nn.modules import module | ||
7 | + | ||
8 | +from utils.utils import * | ||
9 | + | ||
10 | + | ||
11 | +class ROIPool(nn.Module): | ||
12 | + def __init__(self, output_size): | ||
13 | + super(ROIPool, self).__init__() | ||
14 | + self.maxpool = nn.AdaptiveMaxPool2d(output_size) | ||
15 | + self.size = output_size | ||
16 | + self.fc1 = nn.Linear(2304, 1024) | ||
17 | + self.fc2 = nn.Linear(1024, 512) | ||
18 | + self.fc3 = nn.Linear(512, 1) | ||
19 | + self.softplus = nn.Softplus() | ||
20 | + self.smoothl1 = nn.SmoothL1Loss() | ||
21 | + self.mse = nn.MSELoss() | ||
22 | + | ||
23 | + | ||
24 | + def target_detection_iou(self, box1, box2): | ||
25 | + b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] | ||
26 | + b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] | ||
27 | + | ||
28 | + # get the corrdinates of the intersection rectangle | ||
29 | + b1_x1 = b1_x1.type(torch.float64) | ||
30 | + b1_y1 = b1_y1.type(torch.float64) | ||
31 | + b1_x2 = b1_x2.type(torch.float64) | ||
32 | + b1_y2 = b1_y2.type(torch.float64) | ||
33 | + | ||
34 | + inter_rect_x1 = torch.max(b1_x1, b2_x1) | ||
35 | + inter_rect_y1 = torch.max(b1_y1, b2_y1) | ||
36 | + inter_rect_x2 = torch.min(b1_x2, b2_x2) | ||
37 | + inter_rect_y2 = torch.min(b1_y2, b2_y2) | ||
38 | + # Intersection area | ||
39 | + inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp( | ||
40 | + inter_rect_y2 - inter_rect_y1 + 1, min=0 | ||
41 | + ) | ||
42 | + # Union Area | ||
43 | + b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1) | ||
44 | + b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1) | ||
45 | + | ||
46 | + iou = inter_area / (b1_area + b2_area - inter_area + 1e-16) | ||
47 | + | ||
48 | + return iou | ||
49 | + | ||
50 | + def similar_bbox(self, detections, targets): | ||
51 | + rescaled_boxes = rescale_boxes(detections, 416, (480, 640)) | ||
52 | + similar_box = list(range(len(rescaled_boxes))) | ||
53 | + for i in range(len(rescaled_boxes)): | ||
54 | + for j in range(len(targets)): | ||
55 | + target_xyxy = [(targets[j][0]-(targets[j][2]/2))*640, (targets[j][1]-(targets[j][3]/2))*480, (targets[j][0]+(targets[j][2]/2))*640, (targets[j][1]+(targets[j][3]/2))*480] | ||
56 | + target_xyxy = torch.tensor(target_xyxy) | ||
57 | + iou = self.target_detection_iou(rescaled_boxes[i][:4], target_xyxy) | ||
58 | + if iou > 0.01: | ||
59 | + similar_box[i] = targets[j][-1] | ||
60 | + break | ||
61 | + else: | ||
62 | + similar_box[i] = -1 | ||
63 | + return similar_box | ||
64 | + | ||
65 | + | ||
66 | + def cal_scale(self, x, detections, targets): | ||
67 | + targets_distance = targets[:, :4] | ||
68 | + square_targets = [] | ||
69 | + | ||
70 | + for target_distance in targets_distance: | ||
71 | + x1 = (target_distance[0]-(target_distance[2]/2))*416 | ||
72 | + y1 = ((target_distance[1]-(target_distance[3]/2))*480+80)*13/15 | ||
73 | + x2 = (target_distance[0]+(target_distance[2]/2))*416 | ||
74 | + y2 = ((target_distance[1]+(target_distance[3]/2))*480+80)*13/15 | ||
75 | + | ||
76 | + square_targets.append([x1, y1, x2, y2]) | ||
77 | + square_targets = torch.tensor(square_targets) | ||
78 | + | ||
79 | + scale = get_scale(square_targets) | ||
80 | + output_distance = [] | ||
81 | + | ||
82 | + roi_results = [] | ||
83 | + for i in scale: | ||
84 | + x1_scale = i[0] | ||
85 | + y1_scale = i[1] | ||
86 | + x2_scale = i[2] | ||
87 | + y2_scale = i[3] | ||
88 | + | ||
89 | + output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1] | ||
90 | + | ||
91 | + output = self.maxpool(output) | ||
92 | + | ||
93 | + output = output.view(1, -1) | ||
94 | + # print(output) | ||
95 | + roi_results.append(output) | ||
96 | + return roi_results | ||
97 | + | ||
98 | + def cal_scale_evaL(self, x, detections): | ||
99 | + detections = detections[:, :4] | ||
100 | + scale = get_scale(detections) | ||
101 | + output_distance = [] | ||
102 | + roi_results = [] | ||
103 | + for i in scale: | ||
104 | + x1_scale = i[0] | ||
105 | + y1_scale = i[1] | ||
106 | + x2_scale = i[2] | ||
107 | + y2_scale = i[3] | ||
108 | + | ||
109 | + output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1] | ||
110 | + output = self.maxpool(output) | ||
111 | + output = output.view(1, -1) | ||
112 | + roi_results.append(output) | ||
113 | + return roi_results | ||
114 | + | ||
115 | + def forward(self, x, detections, targets=None): | ||
116 | + if targets is not None: | ||
117 | + distances = targets[:, 4] | ||
118 | + distances = distances * 10 | ||
119 | + # distances = distances * 10 | ||
120 | + # print(f'disatnces = {distances}') | ||
121 | + # targets_distance = targets[:, :4] | ||
122 | + # square_targets = [] | ||
123 | + | ||
124 | + # for target_distance in targets_distance: | ||
125 | + # x1 = (target_distance[0]-(target_distance[2]/2))*416 | ||
126 | + # y1 = ((target_distance[1]-(target_distance[3]/2))*480+80)*13/15 | ||
127 | + # x2 = (target_distance[0]+(target_distance[2]/2))*416 | ||
128 | + # y2 = ((target_distance[1]+(target_distance[3]/2))*480+80)*13/15 | ||
129 | + | ||
130 | + # square_targets.append([x1, y1, x2, y2]) | ||
131 | + # square_targets = torch.tensor(square_targets) | ||
132 | + | ||
133 | + # scale = get_scale(square_targets) | ||
134 | + # output_distance = [] | ||
135 | + | ||
136 | + # roi_results = [] | ||
137 | + # for i in scale: | ||
138 | + # x1_scale = i[0] | ||
139 | + # y1_scale = i[1] | ||
140 | + # x2_scale = i[2] | ||
141 | + # y2_scale = i[3] | ||
142 | + | ||
143 | + # output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1] | ||
144 | + | ||
145 | + # output = self.maxpool(output) | ||
146 | + | ||
147 | + # output = output.view(1, -1).cuda() | ||
148 | + # # print(output) | ||
149 | + # roi_results.append(output) | ||
150 | + roi_results = self.cal_scale(x, detections, targets) | ||
151 | + | ||
152 | + output = torch.cat(roi_results, 0) | ||
153 | + # print(output.shape) | ||
154 | + # print(output.shape) | ||
155 | + output = self.fc1(output) | ||
156 | + output = self.fc2(output) | ||
157 | + output = self.fc3(output) | ||
158 | + output = self.softplus(output) | ||
159 | + # print(f'output = {output}') | ||
160 | + #loss = 0 | ||
161 | + # output_distance = torch.tensor(output, requires_grad=True) | ||
162 | + | ||
163 | + | ||
164 | + ''' | ||
165 | + output = x | ||
166 | + # output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1] | ||
167 | + output = self.maxpool(output) | ||
168 | + output = output.view(1, -1).cuda() | ||
169 | + # print(output.shape) | ||
170 | + output = self.fc1(output) | ||
171 | + output = self.fc2(output) | ||
172 | + output = self.fc3(output) | ||
173 | + output = self.softplus(output) | ||
174 | + ''' | ||
175 | + | ||
176 | + # output_distance = torch.cuda.FloatTensor(output_distance, requires_grad=True)#.to('cpu') | ||
177 | + | ||
178 | + #print(f'output_distance = {output_distance}') | ||
179 | + #print(output_distance.shape) | ||
180 | + #print(f'distances = {distances}') | ||
181 | + #print(distances.shape) | ||
182 | + distances = distances.cuda() | ||
183 | + # print(f'output = {output}') | ||
184 | + # print(f'output = {output}') | ||
185 | + # print(f'distances = {distances}') | ||
186 | + loss = self.smoothl1(output, distances.float()) | ||
187 | + # print(f'loss = {loss}') | ||
188 | + | ||
189 | + # print(f'output_distance = {output_distance}') | ||
190 | + # print(f'distances = {distances}') | ||
191 | + # print(f'loss = {loss}') | ||
192 | + return loss, output | ||
193 | + | ||
194 | + else: | ||
195 | + | ||
196 | + ''' | ||
197 | + detections = detections[:, :4] | ||
198 | + scale = get_scale(detections) | ||
199 | + output_distance = [] | ||
200 | + for i in scale: | ||
201 | + x1_scale = i[0] | ||
202 | + y1_scale = i[1] | ||
203 | + x2_scale = i[2] | ||
204 | + y2_scale = i[3] | ||
205 | + | ||
206 | + output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1] | ||
207 | + output = self.maxpool(output) | ||
208 | + output = output.view(1, -1).cuda() | ||
209 | + ''' | ||
210 | + roi_results = self.cal_scale_evaL(x, detections) | ||
211 | + output = torch.cat(roi_results, 0) | ||
212 | + # print(f'output = {output.shape}') | ||
213 | + output = self.fc1(output) | ||
214 | + output = self.fc2(output) | ||
215 | + output = self.fc3(output) | ||
216 | + output = self.softplus(output) | ||
217 | + # print(f'output = {output}') | ||
218 | + | ||
219 | + | ||
220 | + return output | ||
221 | + | ||
222 | + | ||
223 | + ''' | ||
224 | + scale = get_scale(detections) | ||
225 | + | ||
226 | + | ||
227 | + output_distance = [] | ||
228 | + for i in scale: | ||
229 | + x1_scale = i[0] | ||
230 | + y1_scale = i[1] | ||
231 | + x2_scale = i[2] | ||
232 | + y2_scale = i[3] | ||
233 | + | ||
234 | + output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1] | ||
235 | + # output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1] | ||
236 | + output = self.maxpool(output) | ||
237 | + output = output.view(1, -1).cuda() | ||
238 | + output = self.fc1(output) | ||
239 | + output = self.fc2(output) | ||
240 | + | ||
241 | + output_distance.append(output) | ||
242 | + | ||
243 | + if targets is None: | ||
244 | + return output_distance, 0 | ||
245 | + | ||
246 | + else: | ||
247 | + loss = 0 | ||
248 | + box_similar_distance = self.similar_bbox(detections, targets) | ||
249 | + for i in range(len(box_similar_distance)): | ||
250 | + if box_similar_distance[i] == -1: | ||
251 | + output_distance[i] = -1 | ||
252 | + | ||
253 | + | ||
254 | + output_distance = torch.FloatTensor(output_distance).to('cpu') | ||
255 | + box_similar_distance = torch.FloatTensor(box_similar_distance).to('cpu') | ||
256 | + | ||
257 | + | ||
258 | + # print(f'output_distance = {output_distance}') | ||
259 | + # print(f'target_distance = {box_similar_distance}') | ||
260 | + loss = self.smoothl1(output_distance, box_similar_distance) | ||
261 | + ''' | ||
262 | + | ||
263 | + | ||
264 | + | ||
265 | + | ||
266 | + | ||
267 | + | ||
268 | + | ||
269 | + |
source_code/utils/__init__.py
0 → 100644
File mode changed
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
source_code/utils/augmentations.py
0 → 100644
source_code/utils/datasets.py
0 → 100644
1 | +import glob | ||
2 | +import random | ||
3 | +import os | ||
4 | +import sys | ||
5 | +import numpy as np | ||
6 | +from PIL import Image | ||
7 | +import torch | ||
8 | +import torch.nn.functional as F | ||
9 | +import time | ||
10 | + | ||
11 | +from utils.augmentations import horisontal_flip | ||
12 | +from torch.utils.data import Dataset | ||
13 | +import torchvision.transforms as transforms | ||
14 | + | ||
15 | + | ||
16 | +def pad_to_square(img, pad_value): | ||
17 | + c, h, w = img.shape | ||
18 | + dim_diff = np.abs(h - w) | ||
19 | + # (upper / left) padding and (lower / right) padding | ||
20 | + pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2 | ||
21 | + # Determine padding | ||
22 | + pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0) | ||
23 | + # Add padding | ||
24 | + img = F.pad(img, pad, "constant", value=pad_value) | ||
25 | + | ||
26 | + return img, pad | ||
27 | + | ||
28 | + | ||
29 | +def resize(image, size): | ||
30 | + image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0) | ||
31 | + return image | ||
32 | + | ||
33 | + | ||
34 | +def random_resize(images, min_size=288, max_size=448): | ||
35 | + new_size = random.sample(list(range(min_size, max_size + 1, 32)), 1)[0] | ||
36 | + images = F.interpolate(images, size=new_size, mode="nearest") | ||
37 | + return images | ||
38 | + | ||
39 | + | ||
40 | +class ImageFolder(Dataset): | ||
41 | + def __init__(self, folder_path, img_size=416): | ||
42 | + self.files = sorted(glob.glob("%s/*.*" % folder_path)) | ||
43 | + self.img_size = img_size | ||
44 | + | ||
45 | + def __getitem__(self, index): | ||
46 | + img_path = self.files[index % len(self.files)] | ||
47 | + # Extract image as PyTorch tensor | ||
48 | + img = transforms.ToTensor()(Image.open(img_path)) | ||
49 | + # Pad to square resolution | ||
50 | + img, _ = pad_to_square(img, 0) | ||
51 | + # Resize | ||
52 | + img = resize(img, self.img_size) | ||
53 | + | ||
54 | + return img_path, img | ||
55 | + | ||
56 | + def __len__(self): | ||
57 | + return len(self.files) | ||
58 | + | ||
59 | + | ||
60 | +class ListDataset(Dataset): | ||
61 | + def __init__(self, list_path, img_size=416, augment=True, multiscale=True, normalized_labels=True): | ||
62 | + with open(list_path, "r") as file: | ||
63 | + self.img_files = file.readlines() | ||
64 | + | ||
65 | + self.label_files = [ | ||
66 | + path.replace("images", "labels").replace(".png", ".txt").replace(".jpg", ".txt") | ||
67 | + for path in self.img_files | ||
68 | + ] | ||
69 | + self.img_size = img_size | ||
70 | + self.max_objects = 100 | ||
71 | + self.augment = augment | ||
72 | + self.multiscale = multiscale | ||
73 | + self.normalized_labels = normalized_labels | ||
74 | + self.min_size = self.img_size - 3 * 32 | ||
75 | + self.max_size = self.img_size + 3 * 32 | ||
76 | + self.batch_count = 0 | ||
77 | + | ||
78 | + def __getitem__(self, index): | ||
79 | + | ||
80 | + # --------- | ||
81 | + # Image | ||
82 | + # --------- | ||
83 | + | ||
84 | + img_path = self.img_files[index % len(self.img_files)].rstrip() | ||
85 | + # Extract image as PyTorch tensor | ||
86 | + img = transforms.ToTensor()(Image.open(img_path, 'r').convert('RGB')) | ||
87 | + | ||
88 | + # Handle images with less than three channels | ||
89 | + if len(img.shape) != 3: | ||
90 | + img = img.unsqueeze(0) | ||
91 | + img = img.expand((3, img.shape[1:])) | ||
92 | + | ||
93 | + _, h, w = img.shape | ||
94 | + h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1) | ||
95 | + # Pad to square resolution | ||
96 | + img, pad = pad_to_square(img, 0) | ||
97 | + _, padded_h, padded_w = img.shape | ||
98 | + | ||
99 | + # --------- | ||
100 | + # Label | ||
101 | + # --------- | ||
102 | + | ||
103 | + label_path = self.label_files[index % len(self.img_files)].rstrip() | ||
104 | + | ||
105 | + targets = None | ||
106 | + targets_distance = None | ||
107 | + if os.path.exists(label_path): | ||
108 | + if torch.from_numpy(np.loadtxt(label_path)).ndim == 2: | ||
109 | + boxes = torch.from_numpy(np.loadtxt(label_path)[:,:-1].reshape(-1, 5)) | ||
110 | + else: | ||
111 | + boxes = torch.from_numpy(np.loadtxt(label_path)[:-1].reshape(-1, 5)) | ||
112 | + # Extract coordinates for unpadded + unscaled image | ||
113 | + x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2) | ||
114 | + y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2) | ||
115 | + x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2) | ||
116 | + y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2) | ||
117 | + # Adjust for added padding | ||
118 | + x1 += pad[0] | ||
119 | + y1 += pad[2] | ||
120 | + x2 += pad[1] | ||
121 | + y2 += pad[3] | ||
122 | + # Returns (x, y, w, h) | ||
123 | + boxes[:, 1] = ((x1 + x2) / 2) / padded_w | ||
124 | + boxes[:, 2] = ((y1 + y2) / 2) / padded_h | ||
125 | + boxes[:, 3] *= w_factor / padded_w | ||
126 | + boxes[:, 4] *= h_factor / padded_h | ||
127 | + | ||
128 | + targets = torch.zeros((len(boxes), 6)) | ||
129 | + targets[:, 1:] = boxes | ||
130 | + | ||
131 | + if torch.from_numpy(np.loadtxt(label_path)).ndim == 2: | ||
132 | + targets_distance = torch.from_numpy(np.loadtxt(label_path)[:,1:].reshape(-1, 5)) | ||
133 | + else: | ||
134 | + targets_distance = torch.from_numpy(np.loadtxt(label_path)[1:].reshape(-1, 5)) | ||
135 | + | ||
136 | + # Apply augmentations | ||
137 | + # if self.augment: | ||
138 | + # if np.random.random() < 0.5: | ||
139 | + # img, targets = horisontal_flip(img, targets) | ||
140 | + | ||
141 | + return img_path, img, targets, targets_distance | ||
142 | + | ||
143 | + def collate_fn(self, batch): | ||
144 | + paths, imgs, targets, targets_distance = list(zip(*batch)) | ||
145 | + # Remove empty placeholder targets | ||
146 | + targets = [boxes for boxes in targets if boxes is not None] | ||
147 | + # Add sample index to targets | ||
148 | + for i, boxes in enumerate(targets): | ||
149 | + boxes[:, 0] = i | ||
150 | + targets = torch.cat(targets, 0) | ||
151 | + # Selects new image size every tenth batch | ||
152 | + if self.multiscale and self.batch_count % 10 == 0: | ||
153 | + self.img_size = random.choice(range(self.min_size, self.max_size + 1, 32)) | ||
154 | + # Resize images to input shape | ||
155 | + imgs = torch.stack([resize(img, self.img_size) for img in imgs]) | ||
156 | + self.batch_count += 1 | ||
157 | + return paths, imgs, targets, targets_distance | ||
158 | + | ||
159 | + def __len__(self): | ||
160 | + return len(self.img_files) |
source_code/utils/logger.py
0 → 100644
1 | +import tensorflow as tf | ||
2 | + | ||
3 | + | ||
4 | +class Logger(object): | ||
5 | + def __init__(self, log_dir): | ||
6 | + """Create a summary writer logging to log_dir.""" | ||
7 | + self.writer = tf.summary.create_file_writer(log_dir) | ||
8 | + | ||
9 | + def scalar_summary(self, tag, value, step): | ||
10 | + """Log a scalar variable.""" | ||
11 | + summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) | ||
12 | + self.writer.add_summary(summary, step) | ||
13 | + | ||
14 | + def list_of_scalars_summary(self, tag_value_pairs, step): | ||
15 | + """Log scalar variables.""" | ||
16 | + # summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value) for tag, value in tag_value_pairs]) | ||
17 | + # self.writer.add_summary(summary, step) |
source_code/utils/parse_config.py
0 → 100644
1 | + | ||
2 | + | ||
3 | +def parse_model_config(path): | ||
4 | + """Parses the yolo-v3 layer configuration file and returns module definitions""" | ||
5 | + file = open(path, 'r') | ||
6 | + lines = file.read().split('\n') | ||
7 | + lines = [x for x in lines if x and not x.startswith('#')] | ||
8 | + lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces | ||
9 | + module_defs = [] | ||
10 | + for line in lines: | ||
11 | + if line.startswith('['): # This marks the start of a new block | ||
12 | + module_defs.append({}) | ||
13 | + module_defs[-1]['type'] = line[1:-1].rstrip() | ||
14 | + if module_defs[-1]['type'] == 'convolutional': | ||
15 | + module_defs[-1]['batch_normalize'] = 0 | ||
16 | + else: | ||
17 | + key, value = line.split("=") | ||
18 | + value = value.strip() | ||
19 | + module_defs[-1][key.rstrip()] = value.strip() | ||
20 | + | ||
21 | + return module_defs | ||
22 | + | ||
23 | +def parse_data_config(path): | ||
24 | + """Parses the data configuration file""" | ||
25 | + options = dict() | ||
26 | + options['gpus'] = '0,1,2,3' | ||
27 | + options['num_workers'] = '10' | ||
28 | + with open(path, 'r') as fp: | ||
29 | + lines = fp.readlines() | ||
30 | + for line in lines: | ||
31 | + line = line.strip() | ||
32 | + if line == '' or line.startswith('#'): | ||
33 | + continue | ||
34 | + key, value = line.split('=') | ||
35 | + options[key.strip()] = value.strip() | ||
36 | + return options |
source_code/utils/utils.py
0 → 100644
1 | +from __future__ import division | ||
2 | +import math | ||
3 | +import time | ||
4 | +import tqdm | ||
5 | +import torch | ||
6 | +import torch.nn as nn | ||
7 | +import torch.nn.functional as F | ||
8 | +from torch.autograd import Variable | ||
9 | +import numpy as np | ||
10 | +import matplotlib.pyplot as plt | ||
11 | +import matplotlib.patches as patches | ||
12 | + | ||
13 | + | ||
14 | +def to_cpu(tensor): | ||
15 | + return tensor.detach().cpu() | ||
16 | + | ||
17 | + | ||
18 | +def load_classes(path): | ||
19 | + """ | ||
20 | + Loads class labels at 'path' | ||
21 | + """ | ||
22 | + fp = open(path, "r") | ||
23 | + names = fp.read().split("\n")[:-1] | ||
24 | + return names | ||
25 | + | ||
26 | + | ||
27 | +def weights_init_normal(m): | ||
28 | + classname = m.__class__.__name__ | ||
29 | + if classname.find("Conv") != -1: | ||
30 | + torch.nn.init.normal_(m.weight.data, 0.0, 0.02) | ||
31 | + elif classname.find("BatchNorm2d") != -1: | ||
32 | + torch.nn.init.normal_(m.weight.data, 1.0, 0.02) | ||
33 | + torch.nn.init.constant_(m.bias.data, 0.0) | ||
34 | + | ||
35 | + | ||
36 | +def rescale_boxes(boxes, current_dim, original_shape): | ||
37 | + """ Rescales bounding boxes to the original shape """ | ||
38 | + orig_h, orig_w = original_shape | ||
39 | + # The amount of padding that was added | ||
40 | + pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape)) | ||
41 | + pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape)) | ||
42 | + # Image height and width after padding is removed | ||
43 | + unpad_h = current_dim - pad_y | ||
44 | + unpad_w = current_dim - pad_x | ||
45 | + # Rescale bounding boxes to dimension of original image | ||
46 | + boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w | ||
47 | + boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h | ||
48 | + boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w | ||
49 | + boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h | ||
50 | + return boxes | ||
51 | + | ||
52 | + | ||
53 | +def xywh2xyxy(x): | ||
54 | + y = x.new(x.shape) | ||
55 | + y[..., 0] = x[..., 0] - x[..., 2] / 2 | ||
56 | + y[..., 1] = x[..., 1] - x[..., 3] / 2 | ||
57 | + y[..., 2] = x[..., 0] + x[..., 2] / 2 | ||
58 | + y[..., 3] = x[..., 1] + x[..., 3] / 2 | ||
59 | + return y | ||
60 | + | ||
61 | + | ||
62 | +def ap_per_class(tp, conf, pred_cls, target_cls): | ||
63 | + """ Compute the average precision, given the recall and precision curves. | ||
64 | + Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. | ||
65 | + # Arguments | ||
66 | + tp: True positives (list). | ||
67 | + conf: Objectness value from 0-1 (list). | ||
68 | + pred_cls: Predicted object classes (list). | ||
69 | + target_cls: True object classes (list). | ||
70 | + # Returns | ||
71 | + The average precision as computed in py-faster-rcnn. | ||
72 | + """ | ||
73 | + | ||
74 | + # Sort by objectness | ||
75 | + i = np.argsort(-conf) | ||
76 | + tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] | ||
77 | + | ||
78 | + # Find unique classes | ||
79 | + unique_classes = np.unique(target_cls) | ||
80 | + | ||
81 | + # Create Precision-Recall curve and compute AP for each class | ||
82 | + ap, p, r = [], [], [] | ||
83 | + for c in tqdm.tqdm(unique_classes, desc="Computing AP"): | ||
84 | + i = pred_cls == c | ||
85 | + n_gt = (target_cls == c).sum() # Number of ground truth objects | ||
86 | + n_p = i.sum() # Number of predicted objects | ||
87 | + | ||
88 | + if n_p == 0 and n_gt == 0: | ||
89 | + continue | ||
90 | + elif n_p == 0 or n_gt == 0: | ||
91 | + ap.append(0) | ||
92 | + r.append(0) | ||
93 | + p.append(0) | ||
94 | + else: | ||
95 | + # Accumulate FPs and TPs | ||
96 | + fpc = (1 - tp[i]).cumsum() | ||
97 | + tpc = (tp[i]).cumsum() | ||
98 | + | ||
99 | + # Recall | ||
100 | + recall_curve = tpc / (n_gt + 1e-16) | ||
101 | + r.append(recall_curve[-1]) | ||
102 | + | ||
103 | + # Precision | ||
104 | + precision_curve = tpc / (tpc + fpc) | ||
105 | + p.append(precision_curve[-1]) | ||
106 | + | ||
107 | + # AP from recall-precision curve | ||
108 | + ap.append(compute_ap(recall_curve, precision_curve)) | ||
109 | + | ||
110 | + # Compute F1 score (harmonic mean of precision and recall) | ||
111 | + p, r, ap = np.array(p), np.array(r), np.array(ap) | ||
112 | + f1 = 2 * p * r / (p + r + 1e-16) | ||
113 | + | ||
114 | + return p, r, ap, f1, unique_classes.astype("int32") | ||
115 | + | ||
116 | + | ||
117 | +def compute_ap(recall, precision): | ||
118 | + """ Compute the average precision, given the recall and precision curves. | ||
119 | + Code originally from https://github.com/rbgirshick/py-faster-rcnn. | ||
120 | + | ||
121 | + # Arguments | ||
122 | + recall: The recall curve (list). | ||
123 | + precision: The precision curve (list). | ||
124 | + # Returns | ||
125 | + The average precision as computed in py-faster-rcnn. | ||
126 | + """ | ||
127 | + # correct AP calculation | ||
128 | + # first append sentinel values at the end | ||
129 | + mrec = np.concatenate(([0.0], recall, [1.0])) | ||
130 | + mpre = np.concatenate(([0.0], precision, [0.0])) | ||
131 | + | ||
132 | + # compute the precision envelope | ||
133 | + for i in range(mpre.size - 1, 0, -1): | ||
134 | + mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) | ||
135 | + | ||
136 | + # to calculate area under PR curve, look for points | ||
137 | + # where X axis (recall) changes value | ||
138 | + i = np.where(mrec[1:] != mrec[:-1])[0] | ||
139 | + | ||
140 | + # and sum (\Delta recall) * prec | ||
141 | + ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) | ||
142 | + return ap | ||
143 | + | ||
144 | + | ||
145 | +def get_batch_statistics(outputs, targets, iou_threshold): | ||
146 | + """ Compute true positives, predicted scores and predicted labels per sample """ | ||
147 | + batch_metrics = [] | ||
148 | + for sample_i in range(len(outputs)): | ||
149 | + | ||
150 | + if outputs[sample_i] is None: | ||
151 | + continue | ||
152 | + | ||
153 | + output = outputs[sample_i] | ||
154 | + pred_boxes = output[:, :4] | ||
155 | + pred_scores = output[:, 4] | ||
156 | + pred_labels = output[:, -1] | ||
157 | + | ||
158 | + true_positives = np.zeros(pred_boxes.shape[0]) | ||
159 | + | ||
160 | + annotations = targets[targets[:, 0] == sample_i][:, 1:] | ||
161 | + target_labels = annotations[:, 0] if len(annotations) else [] | ||
162 | + if len(annotations): | ||
163 | + detected_boxes = [] | ||
164 | + target_boxes = annotations[:, 1:] | ||
165 | + | ||
166 | + for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)): | ||
167 | + | ||
168 | + # If targets are found break | ||
169 | + if len(detected_boxes) == len(annotations): | ||
170 | + break | ||
171 | + | ||
172 | + # Ignore if label is not one of the target labels | ||
173 | + if pred_label not in target_labels: | ||
174 | + continue | ||
175 | + | ||
176 | + iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0) | ||
177 | + if iou >= iou_threshold and box_index not in detected_boxes: | ||
178 | + true_positives[pred_i] = 1 | ||
179 | + detected_boxes += [box_index] | ||
180 | + batch_metrics.append([true_positives, pred_scores, pred_labels]) | ||
181 | + return batch_metrics | ||
182 | + | ||
183 | + | ||
184 | +def bbox_wh_iou(wh1, wh2): | ||
185 | + wh2 = wh2.t() | ||
186 | + w1, h1 = wh1[0], wh1[1] | ||
187 | + w2, h2 = wh2[0], wh2[1] | ||
188 | + inter_area = torch.min(w1, w2) * torch.min(h1, h2) | ||
189 | + union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area | ||
190 | + return inter_area / union_area | ||
191 | + | ||
192 | + | ||
193 | +def bbox_iou(box1, box2, x1y1x2y2=True): | ||
194 | + """ | ||
195 | + Returns the IoU of two bounding boxes | ||
196 | + """ | ||
197 | + if not x1y1x2y2: | ||
198 | + # Transform from center and width to exact coordinates | ||
199 | + b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2 | ||
200 | + b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2 | ||
201 | + b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2 | ||
202 | + b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2 | ||
203 | + else: | ||
204 | + # Get the coordinates of bounding boxes | ||
205 | + b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3] | ||
206 | + b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3] | ||
207 | + | ||
208 | + # get the corrdinates of the intersection rectangle | ||
209 | + inter_rect_x1 = torch.max(b1_x1, b2_x1) | ||
210 | + inter_rect_y1 = torch.max(b1_y1, b2_y1) | ||
211 | + inter_rect_x2 = torch.min(b1_x2, b2_x2) | ||
212 | + inter_rect_y2 = torch.min(b1_y2, b2_y2) | ||
213 | + # Intersection area | ||
214 | + inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp( | ||
215 | + inter_rect_y2 - inter_rect_y1 + 1, min=0 | ||
216 | + ) | ||
217 | + # Union Area | ||
218 | + b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1) | ||
219 | + b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1) | ||
220 | + | ||
221 | + iou = inter_area / (b1_area + b2_area - inter_area + 1e-16) | ||
222 | + | ||
223 | + return iou | ||
224 | + | ||
225 | + | ||
226 | +def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4): | ||
227 | + """ | ||
228 | + Removes detections with lower object confidence score than 'conf_thres' and performs | ||
229 | + Non-Maximum Suppression to further filter detections. | ||
230 | + Returns detections with shape: | ||
231 | + (x1, y1, x2, y2, object_conf, class_score, class_pred) | ||
232 | + """ | ||
233 | + | ||
234 | + # From (center x, center y, width, height) to (x1, y1, x2, y2) | ||
235 | + prediction[..., :4] = xywh2xyxy(prediction[..., :4]) | ||
236 | + output = [None for _ in range(len(prediction))] | ||
237 | + for image_i, image_pred in enumerate(prediction): | ||
238 | + # Filter out confidence scores below threshold | ||
239 | + image_pred = image_pred[image_pred[:, 4] >= conf_thres] | ||
240 | + # If none are remaining => process next image | ||
241 | + if not image_pred.size(0): | ||
242 | + continue | ||
243 | + # Object confidence times class confidence | ||
244 | + score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0] | ||
245 | + # Sort by it | ||
246 | + image_pred = image_pred[(-score).argsort()] | ||
247 | + class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True) | ||
248 | + detections = torch.cat((image_pred[:, :5], class_confs.float(), class_preds.float()), 1) | ||
249 | + # Perform non-maximum suppression | ||
250 | + keep_boxes = [] | ||
251 | + while detections.size(0): | ||
252 | + large_overlap = bbox_iou(detections[0, :4].unsqueeze(0), detections[:, :4]) > nms_thres | ||
253 | + label_match = detections[0, -1] == detections[:, -1] | ||
254 | + # Indices of boxes with lower confidence scores, large IOUs and matching labels | ||
255 | + invalid = large_overlap & label_match | ||
256 | + weights = detections[invalid, 4:5] | ||
257 | + # Merge overlapping bboxes by order of confidence | ||
258 | + detections[0, :4] = (weights * detections[invalid, :4]).sum(0) / weights.sum() | ||
259 | + keep_boxes += [detections[0]] | ||
260 | + detections = detections[~invalid] | ||
261 | + if keep_boxes: | ||
262 | + output[image_i] = torch.stack(keep_boxes) | ||
263 | + | ||
264 | + return output | ||
265 | + | ||
266 | + | ||
267 | +def build_targets(pred_boxes, pred_cls, target, anchors, ignore_thres): | ||
268 | + | ||
269 | + ByteTensor = torch.cuda.ByteTensor if pred_boxes.is_cuda else torch.ByteTensor | ||
270 | + FloatTensor = torch.cuda.FloatTensor if pred_boxes.is_cuda else torch.FloatTensor | ||
271 | + | ||
272 | + nB = pred_boxes.size(0) | ||
273 | + nA = pred_boxes.size(1) | ||
274 | + nC = pred_cls.size(-1) | ||
275 | + nG = pred_boxes.size(2) | ||
276 | + | ||
277 | + # Output tensors | ||
278 | + obj_mask = ByteTensor(nB, nA, nG, nG).fill_(0) | ||
279 | + noobj_mask = ByteTensor(nB, nA, nG, nG).fill_(1) | ||
280 | + class_mask = FloatTensor(nB, nA, nG, nG).fill_(0) | ||
281 | + iou_scores = FloatTensor(nB, nA, nG, nG).fill_(0) | ||
282 | + tx = FloatTensor(nB, nA, nG, nG).fill_(0) | ||
283 | + ty = FloatTensor(nB, nA, nG, nG).fill_(0) | ||
284 | + tw = FloatTensor(nB, nA, nG, nG).fill_(0) | ||
285 | + th = FloatTensor(nB, nA, nG, nG).fill_(0) | ||
286 | + tcls = FloatTensor(nB, nA, nG, nG, nC).fill_(0) | ||
287 | + | ||
288 | + # Convert to position relative to box | ||
289 | + target_boxes = target[:, 2:6] * nG | ||
290 | + gxy = target_boxes[:, :2] | ||
291 | + gwh = target_boxes[:, 2:] | ||
292 | + # Get anchors with best iou | ||
293 | + ious = torch.stack([bbox_wh_iou(anchor, gwh) for anchor in anchors]) | ||
294 | + best_ious, best_n = ious.max(0) | ||
295 | + # Separate target values | ||
296 | + b, target_labels = target[:, :2].long().t() | ||
297 | + gx, gy = gxy.t() | ||
298 | + gw, gh = gwh.t() | ||
299 | + gi, gj = gxy.long().t() | ||
300 | + # Set masks | ||
301 | + obj_mask[b, best_n, gj, gi] = 1 | ||
302 | + noobj_mask[b, best_n, gj, gi] = 0 | ||
303 | + | ||
304 | + # Set noobj mask to zero where iou exceeds ignore threshold | ||
305 | + for i, anchor_ious in enumerate(ious.t()): | ||
306 | + noobj_mask[b[i], anchor_ious > ignore_thres, gj[i], gi[i]] = 0 | ||
307 | + | ||
308 | + # Coordinates | ||
309 | + tx[b, best_n, gj, gi] = gx - gx.floor() | ||
310 | + ty[b, best_n, gj, gi] = gy - gy.floor() | ||
311 | + # Width and height | ||
312 | + tw[b, best_n, gj, gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16) | ||
313 | + th[b, best_n, gj, gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16) | ||
314 | + # One-hot encoding of label | ||
315 | + tcls[b, best_n, gj, gi, target_labels] = 1 | ||
316 | + # Compute label correctness and iou at best anchor | ||
317 | + class_mask[b, best_n, gj, gi] = (pred_cls[b, best_n, gj, gi].argmax(-1) == target_labels).float() | ||
318 | + iou_scores[b, best_n, gj, gi] = bbox_iou(pred_boxes[b, best_n, gj, gi], target_boxes, x1y1x2y2=False) | ||
319 | + | ||
320 | + tconf = obj_mask.float() | ||
321 | + return iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf | ||
322 | + | ||
323 | +def count_parameters(model): | ||
324 | + return sum(p.numel() for p in model.parameters() if p.requires_grad) | ||
325 | + | ||
326 | +def get_scale(detections): | ||
327 | + num_roi = detections.size(0) | ||
328 | + outputs = [] | ||
329 | + for num in range(num_roi): | ||
330 | + x1, y1, x2, y2 = detections[num] | ||
331 | + standard = 416/7 | ||
332 | + x1_scale = math.floor(x1/standard) | ||
333 | + y1_scale = math.floor(y1/standard) | ||
334 | + x2_scale = math.ceil(x2/standard) | ||
335 | + y2_scale = math.ceil(y2/standard) | ||
336 | + outputs.append([x1_scale, y1_scale, x2_scale, y2_scale]) | ||
337 | + | ||
338 | + return outputs |
-
Please register or login to post a comment