최인훈

Distance Estimation Module

1 +classes=12
2 +train=data/cafe/train.txt
3 +valid=data/cafe/valid.txt
4 +names=data/cafe/classes.names
...\ No newline at end of file ...\ No newline at end of file
1 +classes=5
2 +train=data/cafe_distance/train.txt
3 +valid=data/cafe_distance/valid.txt
4 +names=data/cafe_distance/classes.names
5 +
6 +
7 +
8 +
1 +classes=5
2 +train=data/testdata/train.txt
3 +valid=data/testdata/valid.txt
4 +names=data/testdata/classes.names
5 +
6 +
7 +
8 +
1 +[net]
2 +# Testing
3 +# batch=1
4 +# subdivisions=1
5 +# Training
6 +batch=8
7 +subdivisions=2
8 +width=416
9 +height=416
10 +channels=3
11 +momentum=0.9
12 +decay=0.0005
13 +angle=0
14 +saturation = 1.5
15 +exposure = 1.5
16 +hue=.1
17 +
18 +learning_rate=0.001
19 +burn_in=1000
20 +max_batches = 500200
21 +policy=steps
22 +steps=400000,450000
23 +scales=.1,.1
24 +
25 +# 0
26 +[convolutional]
27 +batch_normalize=1
28 +filters=16
29 +size=3
30 +stride=1
31 +pad=1
32 +activation=leaky
33 +
34 +# 1
35 +[maxpool]
36 +size=2
37 +stride=2
38 +
39 +# 2
40 +[convolutional]
41 +batch_normalize=1
42 +filters=32
43 +size=3
44 +stride=1
45 +pad=1
46 +activation=leaky
47 +
48 +# 3
49 +[maxpool]
50 +size=2
51 +stride=2
52 +
53 +# 4
54 +[convolutional]
55 +batch_normalize=1
56 +filters=64
57 +size=3
58 +stride=1
59 +pad=1
60 +activation=leaky
61 +
62 +# 5
63 +[maxpool]
64 +size=2
65 +stride=2
66 +
67 +# 6
68 +[convolutional]
69 +batch_normalize=1
70 +filters=128
71 +size=3
72 +stride=1
73 +pad=1
74 +activation=leaky
75 +
76 +# 7
77 +[maxpool]
78 +size=2
79 +stride=2
80 +
81 +# 8
82 +[convolutional]
83 +batch_normalize=1
84 +filters=256
85 +size=3
86 +stride=1
87 +pad=1
88 +activation=leaky
89 +
90 +# 9
91 +[convolutional]
92 +batch_normalize=1
93 +filters=512
94 +size=3
95 +stride=1
96 +pad=1
97 +activation=leaky
98 +
99 +# 10
100 +[convolutional]
101 +size=1
102 +stride=1
103 +pad=1
104 +filters=42
105 +activation=linear
106 +
107 +# 11
108 +[yolo]
109 +mask = 0, 1, 2
110 +anchors = 37,58, 81,82, 135,169
111 +classes=9
112 +num=3
113 +jitter=.3
114 +ignore_thresh = .7
115 +truth_thresh = 1
116 +random=1
117 +
118 +
119 +
120 +
121 +
1 +[net]
2 +# Testing
3 +# batch=1
4 +# subdivisions=1
5 +# Training
6 +batch=8
7 +subdivisions=2
8 +width=416
9 +height=416
10 +channels=3
11 +momentum=0.9
12 +decay=0.0005
13 +angle=0
14 +saturation = 1.5
15 +exposure = 1.5
16 +hue=.1
17 +
18 +learning_rate=0.001
19 +burn_in=1000
20 +max_batches = 500200
21 +policy=steps
22 +steps=400000,450000
23 +scales=.1,.1
24 +
25 +# 0
26 +[convolutional]
27 +batch_normalize=1
28 +filters=16
29 +size=3
30 +stride=1
31 +pad=1
32 +activation=leaky
33 +
34 +# 1
35 +[maxpool]
36 +size=2
37 +stride=2
38 +
39 +# 2
40 +[convolutional]
41 +batch_normalize=1
42 +filters=32
43 +size=3
44 +stride=1
45 +pad=1
46 +activation=leaky
47 +
48 +# 3
49 +[maxpool]
50 +size=2
51 +stride=2
52 +
53 +# 4
54 +[convolutional]
55 +batch_normalize=1
56 +filters=64
57 +size=3
58 +stride=1
59 +pad=1
60 +activation=leaky
61 +
62 +# 5
63 +[maxpool]
64 +size=2
65 +stride=2
66 +
67 +# 6
68 +[convolutional]
69 +batch_normalize=1
70 +filters=128
71 +size=3
72 +stride=1
73 +pad=1
74 +activation=leaky
75 +
76 +# 7
77 +[maxpool]
78 +size=2
79 +stride=2
80 +
81 +# 8
82 +[convolutional]
83 +batch_normalize=1
84 +filters=256
85 +size=3
86 +stride=1
87 +pad=1
88 +activation=leaky
89 +
90 +# 9
91 +[convolutional]
92 +size=1
93 +stride=1
94 +pad=1
95 +filters=42
96 +activation=linear
97 +
98 +# 10
99 +[yolo]
100 +mask = 0, 1, 2
101 +anchors = 59,119, 81,82, 135,169
102 +classes=9
103 +num=3
104 +jitter=.3
105 +ignore_thresh = .7
106 +truth_thresh = 1
107 +random=1
108 +
109 +
110 +
111 +
112 +
1 +[net]
2 +# Testing
3 +# batch=1
4 +# subdivisions=1
5 +# Training
6 +batch=8
7 +subdivisions=2
8 +width=416
9 +height=416
10 +channels=3
11 +momentum=0.9
12 +decay=0.0005
13 +angle=0
14 +saturation = 1.5
15 +exposure = 1.5
16 +hue=.1
17 +
18 +learning_rate=0.001
19 +burn_in=1000
20 +max_batches = 500200
21 +policy=steps
22 +steps=400000,450000
23 +scales=.1,.1
24 +
25 +# 0
26 +[convolutional]
27 +batch_normalize=1
28 +filters=16
29 +size=3
30 +stride=1
31 +pad=1
32 +activation=leaky
33 +
34 +# 1
35 +[maxpool]
36 +size=2
37 +stride=2
38 +
39 +# 2
40 +[convolutional]
41 +batch_normalize=1
42 +filters=32
43 +size=3
44 +stride=1
45 +pad=1
46 +activation=leaky
47 +
48 +# 3
49 +[maxpool]
50 +size=2
51 +stride=2
52 +
53 +# 4
54 +[convolutional]
55 +batch_normalize=1
56 +filters=64
57 +size=3
58 +stride=1
59 +pad=1
60 +activation=leaky
61 +
62 +# 5
63 +[maxpool]
64 +size=2
65 +stride=2
66 +
67 +# 6
68 +[convolutional]
69 +batch_normalize=1
70 +filters=128
71 +size=3
72 +stride=1
73 +pad=1
74 +activation=leaky
75 +
76 +# 7
77 +[maxpool]
78 +size=2
79 +stride=2
80 +
81 +# 8
82 +[convolutional]
83 +batch_normalize=1
84 +filters=256
85 +size=3
86 +stride=1
87 +pad=1
88 +activation=leaky
89 +
90 +# 9
91 +[maxpool]
92 +size=2
93 +stride=2
94 +
95 +# 10
96 +[convolutional]
97 +batch_normalize=1
98 +filters=512
99 +size=3
100 +stride=1
101 +pad=1
102 +activation=leaky
103 +
104 +# 11
105 +[maxpool]
106 +size=2
107 +stride=1
108 +
109 +# 12
110 +[convolutional]
111 +batch_normalize=1
112 +filters=1024
113 +size=3
114 +stride=1
115 +pad=1
116 +activation=leaky
117 +
118 +###########
119 +
120 +# 13
121 +[convolutional]
122 +batch_normalize=1
123 +filters=256
124 +size=1
125 +stride=1
126 +pad=1
127 +activation=leaky
128 +
129 +# 14
130 +[convolutional]
131 +batch_normalize=1
132 +filters=512
133 +size=3
134 +stride=1
135 +pad=1
136 +activation=leaky
137 +
138 +# 15
139 +[convolutional]
140 +size=1
141 +stride=1
142 +pad=1
143 +filters=30
144 +activation=linear
145 +
146 +
147 +
148 +# 16
149 +[yolo]
150 +mask = 3,4,5
151 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
152 +classes=5
153 +num=6
154 +jitter=.3
155 +ignore_thresh = .7
156 +truth_thresh = 1
157 +random=1
158 +
159 +# 17
160 +[route]
161 +layers = -4
162 +
163 +# 18
164 +[convolutional]
165 +batch_normalize=1
166 +filters=128
167 +size=1
168 +stride=1
169 +pad=1
170 +activation=leaky
171 +
172 +# 19
173 +[upsample]
174 +stride=2
175 +
176 +# 20
177 +[route]
178 +layers = -1, 8
179 +
180 +# 21
181 +[convolutional]
182 +batch_normalize=1
183 +filters=256
184 +size=3
185 +stride=1
186 +pad=1
187 +activation=leaky
188 +
189 +# 22
190 +[convolutional]
191 +size=1
192 +stride=1
193 +pad=1
194 +filters=30
195 +activation=linear
196 +
197 +# 23
198 +[yolo]
199 +mask = 0,1,2
200 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
201 +classes=5
202 +num=6
203 +jitter=.3
204 +ignore_thresh = .7
205 +truth_thresh = 1
206 +random=1
This diff is collapsed. Click to expand it.
1 +from __future__ import division
2 +
3 +import torch
4 +import torch.nn as nn
5 +import torch.nn.functional as F
6 +from torch.nn.modules import module
7 +
8 +from utils.utils import *
9 +
10 +
11 +class ROIPool(nn.Module):
12 + def __init__(self, output_size):
13 + super(ROIPool, self).__init__()
14 + self.maxpool = nn.AdaptiveMaxPool2d(output_size)
15 + self.size = output_size
16 + self.fc1 = nn.Linear(2304, 1024)
17 + self.fc2 = nn.Linear(1024, 512)
18 + self.fc3 = nn.Linear(512, 1)
19 + self.softplus = nn.Softplus()
20 + self.smoothl1 = nn.SmoothL1Loss()
21 + self.mse = nn.MSELoss()
22 +
23 +
24 + def target_detection_iou(self, box1, box2):
25 + b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
26 + b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
27 +
28 + # get the corrdinates of the intersection rectangle
29 + b1_x1 = b1_x1.type(torch.float64)
30 + b1_y1 = b1_y1.type(torch.float64)
31 + b1_x2 = b1_x2.type(torch.float64)
32 + b1_y2 = b1_y2.type(torch.float64)
33 +
34 + inter_rect_x1 = torch.max(b1_x1, b2_x1)
35 + inter_rect_y1 = torch.max(b1_y1, b2_y1)
36 + inter_rect_x2 = torch.min(b1_x2, b2_x2)
37 + inter_rect_y2 = torch.min(b1_y2, b2_y2)
38 + # Intersection area
39 + inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
40 + inter_rect_y2 - inter_rect_y1 + 1, min=0
41 + )
42 + # Union Area
43 + b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
44 + b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
45 +
46 + iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
47 +
48 + return iou
49 +
50 + def similar_bbox(self, detections, targets):
51 + rescaled_boxes = rescale_boxes(detections, 416, (480, 640))
52 + similar_box = list(range(len(rescaled_boxes)))
53 + for i in range(len(rescaled_boxes)):
54 + for j in range(len(targets)):
55 + target_xyxy = [(targets[j][0]-(targets[j][2]/2))*640, (targets[j][1]-(targets[j][3]/2))*480, (targets[j][0]+(targets[j][2]/2))*640, (targets[j][1]+(targets[j][3]/2))*480]
56 + target_xyxy = torch.tensor(target_xyxy)
57 + iou = self.target_detection_iou(rescaled_boxes[i][:4], target_xyxy)
58 + if iou > 0.01:
59 + similar_box[i] = targets[j][-1]
60 + break
61 + else:
62 + similar_box[i] = -1
63 + return similar_box
64 +
65 +
66 + def cal_scale(self, x, detections, targets):
67 + targets_distance = targets[:, :4]
68 + square_targets = []
69 +
70 + for target_distance in targets_distance:
71 + x1 = (target_distance[0]-(target_distance[2]/2))*416
72 + y1 = ((target_distance[1]-(target_distance[3]/2))*480+80)*13/15
73 + x2 = (target_distance[0]+(target_distance[2]/2))*416
74 + y2 = ((target_distance[1]+(target_distance[3]/2))*480+80)*13/15
75 +
76 + square_targets.append([x1, y1, x2, y2])
77 + square_targets = torch.tensor(square_targets)
78 +
79 + scale = get_scale(square_targets)
80 + output_distance = []
81 +
82 + roi_results = []
83 + for i in scale:
84 + x1_scale = i[0]
85 + y1_scale = i[1]
86 + x2_scale = i[2]
87 + y2_scale = i[3]
88 +
89 + output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1]
90 +
91 + output = self.maxpool(output)
92 +
93 + output = output.view(1, -1)
94 + # print(output)
95 + roi_results.append(output)
96 + return roi_results
97 +
98 + def cal_scale_evaL(self, x, detections):
99 + detections = detections[:, :4]
100 + scale = get_scale(detections)
101 + output_distance = []
102 + roi_results = []
103 + for i in scale:
104 + x1_scale = i[0]
105 + y1_scale = i[1]
106 + x2_scale = i[2]
107 + y2_scale = i[3]
108 +
109 + output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1]
110 + output = self.maxpool(output)
111 + output = output.view(1, -1)
112 + roi_results.append(output)
113 + return roi_results
114 +
115 + def forward(self, x, detections, targets=None):
116 + if targets is not None:
117 + distances = targets[:, 4]
118 + distances = distances * 10
119 + # distances = distances * 10
120 + # print(f'disatnces = {distances}')
121 + # targets_distance = targets[:, :4]
122 + # square_targets = []
123 +
124 + # for target_distance in targets_distance:
125 + # x1 = (target_distance[0]-(target_distance[2]/2))*416
126 + # y1 = ((target_distance[1]-(target_distance[3]/2))*480+80)*13/15
127 + # x2 = (target_distance[0]+(target_distance[2]/2))*416
128 + # y2 = ((target_distance[1]+(target_distance[3]/2))*480+80)*13/15
129 +
130 + # square_targets.append([x1, y1, x2, y2])
131 + # square_targets = torch.tensor(square_targets)
132 +
133 + # scale = get_scale(square_targets)
134 + # output_distance = []
135 +
136 + # roi_results = []
137 + # for i in scale:
138 + # x1_scale = i[0]
139 + # y1_scale = i[1]
140 + # x2_scale = i[2]
141 + # y2_scale = i[3]
142 +
143 + # output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1]
144 +
145 + # output = self.maxpool(output)
146 +
147 + # output = output.view(1, -1).cuda()
148 + # # print(output)
149 + # roi_results.append(output)
150 + roi_results = self.cal_scale(x, detections, targets)
151 +
152 + output = torch.cat(roi_results, 0)
153 + # print(output.shape)
154 + # print(output.shape)
155 + output = self.fc1(output)
156 + output = self.fc2(output)
157 + output = self.fc3(output)
158 + output = self.softplus(output)
159 + # print(f'output = {output}')
160 + #loss = 0
161 + # output_distance = torch.tensor(output, requires_grad=True)
162 +
163 +
164 + '''
165 + output = x
166 + # output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1]
167 + output = self.maxpool(output)
168 + output = output.view(1, -1).cuda()
169 + # print(output.shape)
170 + output = self.fc1(output)
171 + output = self.fc2(output)
172 + output = self.fc3(output)
173 + output = self.softplus(output)
174 + '''
175 +
176 + # output_distance = torch.cuda.FloatTensor(output_distance, requires_grad=True)#.to('cpu')
177 +
178 + #print(f'output_distance = {output_distance}')
179 + #print(output_distance.shape)
180 + #print(f'distances = {distances}')
181 + #print(distances.shape)
182 + distances = distances.cuda()
183 + # print(f'output = {output}')
184 + # print(f'output = {output}')
185 + # print(f'distances = {distances}')
186 + loss = self.smoothl1(output, distances.float())
187 + # print(f'loss = {loss}')
188 +
189 + # print(f'output_distance = {output_distance}')
190 + # print(f'distances = {distances}')
191 + # print(f'loss = {loss}')
192 + return loss, output
193 +
194 + else:
195 +
196 + '''
197 + detections = detections[:, :4]
198 + scale = get_scale(detections)
199 + output_distance = []
200 + for i in scale:
201 + x1_scale = i[0]
202 + y1_scale = i[1]
203 + x2_scale = i[2]
204 + y2_scale = i[3]
205 +
206 + output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1]
207 + output = self.maxpool(output)
208 + output = output.view(1, -1).cuda()
209 + '''
210 + roi_results = self.cal_scale_evaL(x, detections)
211 + output = torch.cat(roi_results, 0)
212 + # print(f'output = {output.shape}')
213 + output = self.fc1(output)
214 + output = self.fc2(output)
215 + output = self.fc3(output)
216 + output = self.softplus(output)
217 + # print(f'output = {output}')
218 +
219 +
220 + return output
221 +
222 +
223 + '''
224 + scale = get_scale(detections)
225 +
226 +
227 + output_distance = []
228 + for i in scale:
229 + x1_scale = i[0]
230 + y1_scale = i[1]
231 + x2_scale = i[2]
232 + y2_scale = i[3]
233 +
234 + output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1]
235 + # output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1]
236 + output = self.maxpool(output)
237 + output = output.view(1, -1).cuda()
238 + output = self.fc1(output)
239 + output = self.fc2(output)
240 +
241 + output_distance.append(output)
242 +
243 + if targets is None:
244 + return output_distance, 0
245 +
246 + else:
247 + loss = 0
248 + box_similar_distance = self.similar_bbox(detections, targets)
249 + for i in range(len(box_similar_distance)):
250 + if box_similar_distance[i] == -1:
251 + output_distance[i] = -1
252 +
253 +
254 + output_distance = torch.FloatTensor(output_distance).to('cpu')
255 + box_similar_distance = torch.FloatTensor(box_similar_distance).to('cpu')
256 +
257 +
258 + # print(f'output_distance = {output_distance}')
259 + # print(f'target_distance = {box_similar_distance}')
260 + loss = self.smoothl1(output_distance, box_similar_distance)
261 + '''
262 +
263 +
264 +
265 +
266 +
267 +
268 +
269 +
1 +import torch
2 +import torch.nn.functional as F
3 +import numpy as np
4 +
5 +
6 +def horisontal_flip(images, targets):
7 + images = torch.flip(images, [-1])
8 + targets[:, 2] = 1 - targets[:, 2]
9 + return images, targets
1 +import glob
2 +import random
3 +import os
4 +import sys
5 +import numpy as np
6 +from PIL import Image
7 +import torch
8 +import torch.nn.functional as F
9 +import time
10 +
11 +from utils.augmentations import horisontal_flip
12 +from torch.utils.data import Dataset
13 +import torchvision.transforms as transforms
14 +
15 +
16 +def pad_to_square(img, pad_value):
17 + c, h, w = img.shape
18 + dim_diff = np.abs(h - w)
19 + # (upper / left) padding and (lower / right) padding
20 + pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
21 + # Determine padding
22 + pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0)
23 + # Add padding
24 + img = F.pad(img, pad, "constant", value=pad_value)
25 +
26 + return img, pad
27 +
28 +
29 +def resize(image, size):
30 + image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0)
31 + return image
32 +
33 +
34 +def random_resize(images, min_size=288, max_size=448):
35 + new_size = random.sample(list(range(min_size, max_size + 1, 32)), 1)[0]
36 + images = F.interpolate(images, size=new_size, mode="nearest")
37 + return images
38 +
39 +
40 +class ImageFolder(Dataset):
41 + def __init__(self, folder_path, img_size=416):
42 + self.files = sorted(glob.glob("%s/*.*" % folder_path))
43 + self.img_size = img_size
44 +
45 + def __getitem__(self, index):
46 + img_path = self.files[index % len(self.files)]
47 + # Extract image as PyTorch tensor
48 + img = transforms.ToTensor()(Image.open(img_path))
49 + # Pad to square resolution
50 + img, _ = pad_to_square(img, 0)
51 + # Resize
52 + img = resize(img, self.img_size)
53 +
54 + return img_path, img
55 +
56 + def __len__(self):
57 + return len(self.files)
58 +
59 +
60 +class ListDataset(Dataset):
61 + def __init__(self, list_path, img_size=416, augment=True, multiscale=True, normalized_labels=True):
62 + with open(list_path, "r") as file:
63 + self.img_files = file.readlines()
64 +
65 + self.label_files = [
66 + path.replace("images", "labels").replace(".png", ".txt").replace(".jpg", ".txt")
67 + for path in self.img_files
68 + ]
69 + self.img_size = img_size
70 + self.max_objects = 100
71 + self.augment = augment
72 + self.multiscale = multiscale
73 + self.normalized_labels = normalized_labels
74 + self.min_size = self.img_size - 3 * 32
75 + self.max_size = self.img_size + 3 * 32
76 + self.batch_count = 0
77 +
78 + def __getitem__(self, index):
79 +
80 + # ---------
81 + # Image
82 + # ---------
83 +
84 + img_path = self.img_files[index % len(self.img_files)].rstrip()
85 + # Extract image as PyTorch tensor
86 + img = transforms.ToTensor()(Image.open(img_path, 'r').convert('RGB'))
87 +
88 + # Handle images with less than three channels
89 + if len(img.shape) != 3:
90 + img = img.unsqueeze(0)
91 + img = img.expand((3, img.shape[1:]))
92 +
93 + _, h, w = img.shape
94 + h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
95 + # Pad to square resolution
96 + img, pad = pad_to_square(img, 0)
97 + _, padded_h, padded_w = img.shape
98 +
99 + # ---------
100 + # Label
101 + # ---------
102 +
103 + label_path = self.label_files[index % len(self.img_files)].rstrip()
104 +
105 + targets = None
106 + targets_distance = None
107 + if os.path.exists(label_path):
108 + if torch.from_numpy(np.loadtxt(label_path)).ndim == 2:
109 + boxes = torch.from_numpy(np.loadtxt(label_path)[:,:-1].reshape(-1, 5))
110 + else:
111 + boxes = torch.from_numpy(np.loadtxt(label_path)[:-1].reshape(-1, 5))
112 + # Extract coordinates for unpadded + unscaled image
113 + x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
114 + y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
115 + x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
116 + y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)
117 + # Adjust for added padding
118 + x1 += pad[0]
119 + y1 += pad[2]
120 + x2 += pad[1]
121 + y2 += pad[3]
122 + # Returns (x, y, w, h)
123 + boxes[:, 1] = ((x1 + x2) / 2) / padded_w
124 + boxes[:, 2] = ((y1 + y2) / 2) / padded_h
125 + boxes[:, 3] *= w_factor / padded_w
126 + boxes[:, 4] *= h_factor / padded_h
127 +
128 + targets = torch.zeros((len(boxes), 6))
129 + targets[:, 1:] = boxes
130 +
131 + if torch.from_numpy(np.loadtxt(label_path)).ndim == 2:
132 + targets_distance = torch.from_numpy(np.loadtxt(label_path)[:,1:].reshape(-1, 5))
133 + else:
134 + targets_distance = torch.from_numpy(np.loadtxt(label_path)[1:].reshape(-1, 5))
135 +
136 + # Apply augmentations
137 + # if self.augment:
138 + # if np.random.random() < 0.5:
139 + # img, targets = horisontal_flip(img, targets)
140 +
141 + return img_path, img, targets, targets_distance
142 +
143 + def collate_fn(self, batch):
144 + paths, imgs, targets, targets_distance = list(zip(*batch))
145 + # Remove empty placeholder targets
146 + targets = [boxes for boxes in targets if boxes is not None]
147 + # Add sample index to targets
148 + for i, boxes in enumerate(targets):
149 + boxes[:, 0] = i
150 + targets = torch.cat(targets, 0)
151 + # Selects new image size every tenth batch
152 + if self.multiscale and self.batch_count % 10 == 0:
153 + self.img_size = random.choice(range(self.min_size, self.max_size + 1, 32))
154 + # Resize images to input shape
155 + imgs = torch.stack([resize(img, self.img_size) for img in imgs])
156 + self.batch_count += 1
157 + return paths, imgs, targets, targets_distance
158 +
159 + def __len__(self):
160 + return len(self.img_files)
1 +import tensorflow as tf
2 +
3 +
4 +class Logger(object):
5 + def __init__(self, log_dir):
6 + """Create a summary writer logging to log_dir."""
7 + self.writer = tf.summary.create_file_writer(log_dir)
8 +
9 + def scalar_summary(self, tag, value, step):
10 + """Log a scalar variable."""
11 + summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
12 + self.writer.add_summary(summary, step)
13 +
14 + def list_of_scalars_summary(self, tag_value_pairs, step):
15 + """Log scalar variables."""
16 + # summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value) for tag, value in tag_value_pairs])
17 + # self.writer.add_summary(summary, step)
1 +
2 +
3 +def parse_model_config(path):
4 + """Parses the yolo-v3 layer configuration file and returns module definitions"""
5 + file = open(path, 'r')
6 + lines = file.read().split('\n')
7 + lines = [x for x in lines if x and not x.startswith('#')]
8 + lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
9 + module_defs = []
10 + for line in lines:
11 + if line.startswith('['): # This marks the start of a new block
12 + module_defs.append({})
13 + module_defs[-1]['type'] = line[1:-1].rstrip()
14 + if module_defs[-1]['type'] == 'convolutional':
15 + module_defs[-1]['batch_normalize'] = 0
16 + else:
17 + key, value = line.split("=")
18 + value = value.strip()
19 + module_defs[-1][key.rstrip()] = value.strip()
20 +
21 + return module_defs
22 +
23 +def parse_data_config(path):
24 + """Parses the data configuration file"""
25 + options = dict()
26 + options['gpus'] = '0,1,2,3'
27 + options['num_workers'] = '10'
28 + with open(path, 'r') as fp:
29 + lines = fp.readlines()
30 + for line in lines:
31 + line = line.strip()
32 + if line == '' or line.startswith('#'):
33 + continue
34 + key, value = line.split('=')
35 + options[key.strip()] = value.strip()
36 + return options
This diff is collapsed. Click to expand it.