Showing
25 changed files
with
950 additions
and
0 deletions
source_code/config/cafe.data
0 → 100644
source_code/config/cafe_distance.data
0 → 100644
source_code/config/testdata.data
0 → 100644
source_code/config/tiny1.cfg
0 → 100644
| 1 | +[net] | ||
| 2 | +# Testing | ||
| 3 | +# batch=1 | ||
| 4 | +# subdivisions=1 | ||
| 5 | +# Training | ||
| 6 | +batch=8 | ||
| 7 | +subdivisions=2 | ||
| 8 | +width=416 | ||
| 9 | +height=416 | ||
| 10 | +channels=3 | ||
| 11 | +momentum=0.9 | ||
| 12 | +decay=0.0005 | ||
| 13 | +angle=0 | ||
| 14 | +saturation = 1.5 | ||
| 15 | +exposure = 1.5 | ||
| 16 | +hue=.1 | ||
| 17 | + | ||
| 18 | +learning_rate=0.001 | ||
| 19 | +burn_in=1000 | ||
| 20 | +max_batches = 500200 | ||
| 21 | +policy=steps | ||
| 22 | +steps=400000,450000 | ||
| 23 | +scales=.1,.1 | ||
| 24 | + | ||
| 25 | +# 0 | ||
| 26 | +[convolutional] | ||
| 27 | +batch_normalize=1 | ||
| 28 | +filters=16 | ||
| 29 | +size=3 | ||
| 30 | +stride=1 | ||
| 31 | +pad=1 | ||
| 32 | +activation=leaky | ||
| 33 | + | ||
| 34 | +# 1 | ||
| 35 | +[maxpool] | ||
| 36 | +size=2 | ||
| 37 | +stride=2 | ||
| 38 | + | ||
| 39 | +# 2 | ||
| 40 | +[convolutional] | ||
| 41 | +batch_normalize=1 | ||
| 42 | +filters=32 | ||
| 43 | +size=3 | ||
| 44 | +stride=1 | ||
| 45 | +pad=1 | ||
| 46 | +activation=leaky | ||
| 47 | + | ||
| 48 | +# 3 | ||
| 49 | +[maxpool] | ||
| 50 | +size=2 | ||
| 51 | +stride=2 | ||
| 52 | + | ||
| 53 | +# 4 | ||
| 54 | +[convolutional] | ||
| 55 | +batch_normalize=1 | ||
| 56 | +filters=64 | ||
| 57 | +size=3 | ||
| 58 | +stride=1 | ||
| 59 | +pad=1 | ||
| 60 | +activation=leaky | ||
| 61 | + | ||
| 62 | +# 5 | ||
| 63 | +[maxpool] | ||
| 64 | +size=2 | ||
| 65 | +stride=2 | ||
| 66 | + | ||
| 67 | +# 6 | ||
| 68 | +[convolutional] | ||
| 69 | +batch_normalize=1 | ||
| 70 | +filters=128 | ||
| 71 | +size=3 | ||
| 72 | +stride=1 | ||
| 73 | +pad=1 | ||
| 74 | +activation=leaky | ||
| 75 | + | ||
| 76 | +# 7 | ||
| 77 | +[maxpool] | ||
| 78 | +size=2 | ||
| 79 | +stride=2 | ||
| 80 | + | ||
| 81 | +# 8 | ||
| 82 | +[convolutional] | ||
| 83 | +batch_normalize=1 | ||
| 84 | +filters=256 | ||
| 85 | +size=3 | ||
| 86 | +stride=1 | ||
| 87 | +pad=1 | ||
| 88 | +activation=leaky | ||
| 89 | + | ||
| 90 | +# 9 | ||
| 91 | +[convolutional] | ||
| 92 | +batch_normalize=1 | ||
| 93 | +filters=512 | ||
| 94 | +size=3 | ||
| 95 | +stride=1 | ||
| 96 | +pad=1 | ||
| 97 | +activation=leaky | ||
| 98 | + | ||
| 99 | +# 10 | ||
| 100 | +[convolutional] | ||
| 101 | +size=1 | ||
| 102 | +stride=1 | ||
| 103 | +pad=1 | ||
| 104 | +filters=42 | ||
| 105 | +activation=linear | ||
| 106 | + | ||
| 107 | +# 11 | ||
| 108 | +[yolo] | ||
| 109 | +mask = 0, 1, 2 | ||
| 110 | +anchors = 37,58, 81,82, 135,169 | ||
| 111 | +classes=9 | ||
| 112 | +num=3 | ||
| 113 | +jitter=.3 | ||
| 114 | +ignore_thresh = .7 | ||
| 115 | +truth_thresh = 1 | ||
| 116 | +random=1 | ||
| 117 | + | ||
| 118 | + | ||
| 119 | + | ||
| 120 | + | ||
| 121 | + |
source_code/config/tiny2.cfg
0 → 100644
| 1 | +[net] | ||
| 2 | +# Testing | ||
| 3 | +# batch=1 | ||
| 4 | +# subdivisions=1 | ||
| 5 | +# Training | ||
| 6 | +batch=8 | ||
| 7 | +subdivisions=2 | ||
| 8 | +width=416 | ||
| 9 | +height=416 | ||
| 10 | +channels=3 | ||
| 11 | +momentum=0.9 | ||
| 12 | +decay=0.0005 | ||
| 13 | +angle=0 | ||
| 14 | +saturation = 1.5 | ||
| 15 | +exposure = 1.5 | ||
| 16 | +hue=.1 | ||
| 17 | + | ||
| 18 | +learning_rate=0.001 | ||
| 19 | +burn_in=1000 | ||
| 20 | +max_batches = 500200 | ||
| 21 | +policy=steps | ||
| 22 | +steps=400000,450000 | ||
| 23 | +scales=.1,.1 | ||
| 24 | + | ||
| 25 | +# 0 | ||
| 26 | +[convolutional] | ||
| 27 | +batch_normalize=1 | ||
| 28 | +filters=16 | ||
| 29 | +size=3 | ||
| 30 | +stride=1 | ||
| 31 | +pad=1 | ||
| 32 | +activation=leaky | ||
| 33 | + | ||
| 34 | +# 1 | ||
| 35 | +[maxpool] | ||
| 36 | +size=2 | ||
| 37 | +stride=2 | ||
| 38 | + | ||
| 39 | +# 2 | ||
| 40 | +[convolutional] | ||
| 41 | +batch_normalize=1 | ||
| 42 | +filters=32 | ||
| 43 | +size=3 | ||
| 44 | +stride=1 | ||
| 45 | +pad=1 | ||
| 46 | +activation=leaky | ||
| 47 | + | ||
| 48 | +# 3 | ||
| 49 | +[maxpool] | ||
| 50 | +size=2 | ||
| 51 | +stride=2 | ||
| 52 | + | ||
| 53 | +# 4 | ||
| 54 | +[convolutional] | ||
| 55 | +batch_normalize=1 | ||
| 56 | +filters=64 | ||
| 57 | +size=3 | ||
| 58 | +stride=1 | ||
| 59 | +pad=1 | ||
| 60 | +activation=leaky | ||
| 61 | + | ||
| 62 | +# 5 | ||
| 63 | +[maxpool] | ||
| 64 | +size=2 | ||
| 65 | +stride=2 | ||
| 66 | + | ||
| 67 | +# 6 | ||
| 68 | +[convolutional] | ||
| 69 | +batch_normalize=1 | ||
| 70 | +filters=128 | ||
| 71 | +size=3 | ||
| 72 | +stride=1 | ||
| 73 | +pad=1 | ||
| 74 | +activation=leaky | ||
| 75 | + | ||
| 76 | +# 7 | ||
| 77 | +[maxpool] | ||
| 78 | +size=2 | ||
| 79 | +stride=2 | ||
| 80 | + | ||
| 81 | +# 8 | ||
| 82 | +[convolutional] | ||
| 83 | +batch_normalize=1 | ||
| 84 | +filters=256 | ||
| 85 | +size=3 | ||
| 86 | +stride=1 | ||
| 87 | +pad=1 | ||
| 88 | +activation=leaky | ||
| 89 | + | ||
| 90 | +# 9 | ||
| 91 | +[convolutional] | ||
| 92 | +size=1 | ||
| 93 | +stride=1 | ||
| 94 | +pad=1 | ||
| 95 | +filters=42 | ||
| 96 | +activation=linear | ||
| 97 | + | ||
| 98 | +# 10 | ||
| 99 | +[yolo] | ||
| 100 | +mask = 0, 1, 2 | ||
| 101 | +anchors = 59,119, 81,82, 135,169 | ||
| 102 | +classes=9 | ||
| 103 | +num=3 | ||
| 104 | +jitter=.3 | ||
| 105 | +ignore_thresh = .7 | ||
| 106 | +truth_thresh = 1 | ||
| 107 | +random=1 | ||
| 108 | + | ||
| 109 | + | ||
| 110 | + | ||
| 111 | + | ||
| 112 | + |
source_code/config/yolov3-tiny.cfg
0 → 100644
| 1 | +[net] | ||
| 2 | +# Testing | ||
| 3 | +# batch=1 | ||
| 4 | +# subdivisions=1 | ||
| 5 | +# Training | ||
| 6 | +batch=8 | ||
| 7 | +subdivisions=2 | ||
| 8 | +width=416 | ||
| 9 | +height=416 | ||
| 10 | +channels=3 | ||
| 11 | +momentum=0.9 | ||
| 12 | +decay=0.0005 | ||
| 13 | +angle=0 | ||
| 14 | +saturation = 1.5 | ||
| 15 | +exposure = 1.5 | ||
| 16 | +hue=.1 | ||
| 17 | + | ||
| 18 | +learning_rate=0.001 | ||
| 19 | +burn_in=1000 | ||
| 20 | +max_batches = 500200 | ||
| 21 | +policy=steps | ||
| 22 | +steps=400000,450000 | ||
| 23 | +scales=.1,.1 | ||
| 24 | + | ||
| 25 | +# 0 | ||
| 26 | +[convolutional] | ||
| 27 | +batch_normalize=1 | ||
| 28 | +filters=16 | ||
| 29 | +size=3 | ||
| 30 | +stride=1 | ||
| 31 | +pad=1 | ||
| 32 | +activation=leaky | ||
| 33 | + | ||
| 34 | +# 1 | ||
| 35 | +[maxpool] | ||
| 36 | +size=2 | ||
| 37 | +stride=2 | ||
| 38 | + | ||
| 39 | +# 2 | ||
| 40 | +[convolutional] | ||
| 41 | +batch_normalize=1 | ||
| 42 | +filters=32 | ||
| 43 | +size=3 | ||
| 44 | +stride=1 | ||
| 45 | +pad=1 | ||
| 46 | +activation=leaky | ||
| 47 | + | ||
| 48 | +# 3 | ||
| 49 | +[maxpool] | ||
| 50 | +size=2 | ||
| 51 | +stride=2 | ||
| 52 | + | ||
| 53 | +# 4 | ||
| 54 | +[convolutional] | ||
| 55 | +batch_normalize=1 | ||
| 56 | +filters=64 | ||
| 57 | +size=3 | ||
| 58 | +stride=1 | ||
| 59 | +pad=1 | ||
| 60 | +activation=leaky | ||
| 61 | + | ||
| 62 | +# 5 | ||
| 63 | +[maxpool] | ||
| 64 | +size=2 | ||
| 65 | +stride=2 | ||
| 66 | + | ||
| 67 | +# 6 | ||
| 68 | +[convolutional] | ||
| 69 | +batch_normalize=1 | ||
| 70 | +filters=128 | ||
| 71 | +size=3 | ||
| 72 | +stride=1 | ||
| 73 | +pad=1 | ||
| 74 | +activation=leaky | ||
| 75 | + | ||
| 76 | +# 7 | ||
| 77 | +[maxpool] | ||
| 78 | +size=2 | ||
| 79 | +stride=2 | ||
| 80 | + | ||
| 81 | +# 8 | ||
| 82 | +[convolutional] | ||
| 83 | +batch_normalize=1 | ||
| 84 | +filters=256 | ||
| 85 | +size=3 | ||
| 86 | +stride=1 | ||
| 87 | +pad=1 | ||
| 88 | +activation=leaky | ||
| 89 | + | ||
| 90 | +# 9 | ||
| 91 | +[maxpool] | ||
| 92 | +size=2 | ||
| 93 | +stride=2 | ||
| 94 | + | ||
| 95 | +# 10 | ||
| 96 | +[convolutional] | ||
| 97 | +batch_normalize=1 | ||
| 98 | +filters=512 | ||
| 99 | +size=3 | ||
| 100 | +stride=1 | ||
| 101 | +pad=1 | ||
| 102 | +activation=leaky | ||
| 103 | + | ||
| 104 | +# 11 | ||
| 105 | +[maxpool] | ||
| 106 | +size=2 | ||
| 107 | +stride=1 | ||
| 108 | + | ||
| 109 | +# 12 | ||
| 110 | +[convolutional] | ||
| 111 | +batch_normalize=1 | ||
| 112 | +filters=1024 | ||
| 113 | +size=3 | ||
| 114 | +stride=1 | ||
| 115 | +pad=1 | ||
| 116 | +activation=leaky | ||
| 117 | + | ||
| 118 | +########### | ||
| 119 | + | ||
| 120 | +# 13 | ||
| 121 | +[convolutional] | ||
| 122 | +batch_normalize=1 | ||
| 123 | +filters=256 | ||
| 124 | +size=1 | ||
| 125 | +stride=1 | ||
| 126 | +pad=1 | ||
| 127 | +activation=leaky | ||
| 128 | + | ||
| 129 | +# 14 | ||
| 130 | +[convolutional] | ||
| 131 | +batch_normalize=1 | ||
| 132 | +filters=512 | ||
| 133 | +size=3 | ||
| 134 | +stride=1 | ||
| 135 | +pad=1 | ||
| 136 | +activation=leaky | ||
| 137 | + | ||
| 138 | +# 15 | ||
| 139 | +[convolutional] | ||
| 140 | +size=1 | ||
| 141 | +stride=1 | ||
| 142 | +pad=1 | ||
| 143 | +filters=30 | ||
| 144 | +activation=linear | ||
| 145 | + | ||
| 146 | + | ||
| 147 | + | ||
| 148 | +# 16 | ||
| 149 | +[yolo] | ||
| 150 | +mask = 3,4,5 | ||
| 151 | +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 | ||
| 152 | +classes=5 | ||
| 153 | +num=6 | ||
| 154 | +jitter=.3 | ||
| 155 | +ignore_thresh = .7 | ||
| 156 | +truth_thresh = 1 | ||
| 157 | +random=1 | ||
| 158 | + | ||
| 159 | +# 17 | ||
| 160 | +[route] | ||
| 161 | +layers = -4 | ||
| 162 | + | ||
| 163 | +# 18 | ||
| 164 | +[convolutional] | ||
| 165 | +batch_normalize=1 | ||
| 166 | +filters=128 | ||
| 167 | +size=1 | ||
| 168 | +stride=1 | ||
| 169 | +pad=1 | ||
| 170 | +activation=leaky | ||
| 171 | + | ||
| 172 | +# 19 | ||
| 173 | +[upsample] | ||
| 174 | +stride=2 | ||
| 175 | + | ||
| 176 | +# 20 | ||
| 177 | +[route] | ||
| 178 | +layers = -1, 8 | ||
| 179 | + | ||
| 180 | +# 21 | ||
| 181 | +[convolutional] | ||
| 182 | +batch_normalize=1 | ||
| 183 | +filters=256 | ||
| 184 | +size=3 | ||
| 185 | +stride=1 | ||
| 186 | +pad=1 | ||
| 187 | +activation=leaky | ||
| 188 | + | ||
| 189 | +# 22 | ||
| 190 | +[convolutional] | ||
| 191 | +size=1 | ||
| 192 | +stride=1 | ||
| 193 | +pad=1 | ||
| 194 | +filters=30 | ||
| 195 | +activation=linear | ||
| 196 | + | ||
| 197 | +# 23 | ||
| 198 | +[yolo] | ||
| 199 | +mask = 0,1,2 | ||
| 200 | +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 | ||
| 201 | +classes=5 | ||
| 202 | +num=6 | ||
| 203 | +jitter=.3 | ||
| 204 | +ignore_thresh = .7 | ||
| 205 | +truth_thresh = 1 | ||
| 206 | +random=1 |
source_code/models.py
0 → 100644
This diff is collapsed. Click to expand it.
source_code/roipool.py
0 → 100644
| 1 | +from __future__ import division | ||
| 2 | + | ||
| 3 | +import torch | ||
| 4 | +import torch.nn as nn | ||
| 5 | +import torch.nn.functional as F | ||
| 6 | +from torch.nn.modules import module | ||
| 7 | + | ||
| 8 | +from utils.utils import * | ||
| 9 | + | ||
| 10 | + | ||
| 11 | +class ROIPool(nn.Module): | ||
| 12 | + def __init__(self, output_size): | ||
| 13 | + super(ROIPool, self).__init__() | ||
| 14 | + self.maxpool = nn.AdaptiveMaxPool2d(output_size) | ||
| 15 | + self.size = output_size | ||
| 16 | + self.fc1 = nn.Linear(2304, 1024) | ||
| 17 | + self.fc2 = nn.Linear(1024, 512) | ||
| 18 | + self.fc3 = nn.Linear(512, 1) | ||
| 19 | + self.softplus = nn.Softplus() | ||
| 20 | + self.smoothl1 = nn.SmoothL1Loss() | ||
| 21 | + self.mse = nn.MSELoss() | ||
| 22 | + | ||
| 23 | + | ||
| 24 | + def target_detection_iou(self, box1, box2): | ||
| 25 | + b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] | ||
| 26 | + b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] | ||
| 27 | + | ||
| 28 | + # get the corrdinates of the intersection rectangle | ||
| 29 | + b1_x1 = b1_x1.type(torch.float64) | ||
| 30 | + b1_y1 = b1_y1.type(torch.float64) | ||
| 31 | + b1_x2 = b1_x2.type(torch.float64) | ||
| 32 | + b1_y2 = b1_y2.type(torch.float64) | ||
| 33 | + | ||
| 34 | + inter_rect_x1 = torch.max(b1_x1, b2_x1) | ||
| 35 | + inter_rect_y1 = torch.max(b1_y1, b2_y1) | ||
| 36 | + inter_rect_x2 = torch.min(b1_x2, b2_x2) | ||
| 37 | + inter_rect_y2 = torch.min(b1_y2, b2_y2) | ||
| 38 | + # Intersection area | ||
| 39 | + inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp( | ||
| 40 | + inter_rect_y2 - inter_rect_y1 + 1, min=0 | ||
| 41 | + ) | ||
| 42 | + # Union Area | ||
| 43 | + b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1) | ||
| 44 | + b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1) | ||
| 45 | + | ||
| 46 | + iou = inter_area / (b1_area + b2_area - inter_area + 1e-16) | ||
| 47 | + | ||
| 48 | + return iou | ||
| 49 | + | ||
| 50 | + def similar_bbox(self, detections, targets): | ||
| 51 | + rescaled_boxes = rescale_boxes(detections, 416, (480, 640)) | ||
| 52 | + similar_box = list(range(len(rescaled_boxes))) | ||
| 53 | + for i in range(len(rescaled_boxes)): | ||
| 54 | + for j in range(len(targets)): | ||
| 55 | + target_xyxy = [(targets[j][0]-(targets[j][2]/2))*640, (targets[j][1]-(targets[j][3]/2))*480, (targets[j][0]+(targets[j][2]/2))*640, (targets[j][1]+(targets[j][3]/2))*480] | ||
| 56 | + target_xyxy = torch.tensor(target_xyxy) | ||
| 57 | + iou = self.target_detection_iou(rescaled_boxes[i][:4], target_xyxy) | ||
| 58 | + if iou > 0.01: | ||
| 59 | + similar_box[i] = targets[j][-1] | ||
| 60 | + break | ||
| 61 | + else: | ||
| 62 | + similar_box[i] = -1 | ||
| 63 | + return similar_box | ||
| 64 | + | ||
| 65 | + | ||
| 66 | + def cal_scale(self, x, detections, targets): | ||
| 67 | + targets_distance = targets[:, :4] | ||
| 68 | + square_targets = [] | ||
| 69 | + | ||
| 70 | + for target_distance in targets_distance: | ||
| 71 | + x1 = (target_distance[0]-(target_distance[2]/2))*416 | ||
| 72 | + y1 = ((target_distance[1]-(target_distance[3]/2))*480+80)*13/15 | ||
| 73 | + x2 = (target_distance[0]+(target_distance[2]/2))*416 | ||
| 74 | + y2 = ((target_distance[1]+(target_distance[3]/2))*480+80)*13/15 | ||
| 75 | + | ||
| 76 | + square_targets.append([x1, y1, x2, y2]) | ||
| 77 | + square_targets = torch.tensor(square_targets) | ||
| 78 | + | ||
| 79 | + scale = get_scale(square_targets) | ||
| 80 | + output_distance = [] | ||
| 81 | + | ||
| 82 | + roi_results = [] | ||
| 83 | + for i in scale: | ||
| 84 | + x1_scale = i[0] | ||
| 85 | + y1_scale = i[1] | ||
| 86 | + x2_scale = i[2] | ||
| 87 | + y2_scale = i[3] | ||
| 88 | + | ||
| 89 | + output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1] | ||
| 90 | + | ||
| 91 | + output = self.maxpool(output) | ||
| 92 | + | ||
| 93 | + output = output.view(1, -1) | ||
| 94 | + # print(output) | ||
| 95 | + roi_results.append(output) | ||
| 96 | + return roi_results | ||
| 97 | + | ||
| 98 | + def cal_scale_evaL(self, x, detections): | ||
| 99 | + detections = detections[:, :4] | ||
| 100 | + scale = get_scale(detections) | ||
| 101 | + output_distance = [] | ||
| 102 | + roi_results = [] | ||
| 103 | + for i in scale: | ||
| 104 | + x1_scale = i[0] | ||
| 105 | + y1_scale = i[1] | ||
| 106 | + x2_scale = i[2] | ||
| 107 | + y2_scale = i[3] | ||
| 108 | + | ||
| 109 | + output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1] | ||
| 110 | + output = self.maxpool(output) | ||
| 111 | + output = output.view(1, -1) | ||
| 112 | + roi_results.append(output) | ||
| 113 | + return roi_results | ||
| 114 | + | ||
| 115 | + def forward(self, x, detections, targets=None): | ||
| 116 | + if targets is not None: | ||
| 117 | + distances = targets[:, 4] | ||
| 118 | + distances = distances * 10 | ||
| 119 | + # distances = distances * 10 | ||
| 120 | + # print(f'disatnces = {distances}') | ||
| 121 | + # targets_distance = targets[:, :4] | ||
| 122 | + # square_targets = [] | ||
| 123 | + | ||
| 124 | + # for target_distance in targets_distance: | ||
| 125 | + # x1 = (target_distance[0]-(target_distance[2]/2))*416 | ||
| 126 | + # y1 = ((target_distance[1]-(target_distance[3]/2))*480+80)*13/15 | ||
| 127 | + # x2 = (target_distance[0]+(target_distance[2]/2))*416 | ||
| 128 | + # y2 = ((target_distance[1]+(target_distance[3]/2))*480+80)*13/15 | ||
| 129 | + | ||
| 130 | + # square_targets.append([x1, y1, x2, y2]) | ||
| 131 | + # square_targets = torch.tensor(square_targets) | ||
| 132 | + | ||
| 133 | + # scale = get_scale(square_targets) | ||
| 134 | + # output_distance = [] | ||
| 135 | + | ||
| 136 | + # roi_results = [] | ||
| 137 | + # for i in scale: | ||
| 138 | + # x1_scale = i[0] | ||
| 139 | + # y1_scale = i[1] | ||
| 140 | + # x2_scale = i[2] | ||
| 141 | + # y2_scale = i[3] | ||
| 142 | + | ||
| 143 | + # output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1] | ||
| 144 | + | ||
| 145 | + # output = self.maxpool(output) | ||
| 146 | + | ||
| 147 | + # output = output.view(1, -1).cuda() | ||
| 148 | + # # print(output) | ||
| 149 | + # roi_results.append(output) | ||
| 150 | + roi_results = self.cal_scale(x, detections, targets) | ||
| 151 | + | ||
| 152 | + output = torch.cat(roi_results, 0) | ||
| 153 | + # print(output.shape) | ||
| 154 | + # print(output.shape) | ||
| 155 | + output = self.fc1(output) | ||
| 156 | + output = self.fc2(output) | ||
| 157 | + output = self.fc3(output) | ||
| 158 | + output = self.softplus(output) | ||
| 159 | + # print(f'output = {output}') | ||
| 160 | + #loss = 0 | ||
| 161 | + # output_distance = torch.tensor(output, requires_grad=True) | ||
| 162 | + | ||
| 163 | + | ||
| 164 | + ''' | ||
| 165 | + output = x | ||
| 166 | + # output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1] | ||
| 167 | + output = self.maxpool(output) | ||
| 168 | + output = output.view(1, -1).cuda() | ||
| 169 | + # print(output.shape) | ||
| 170 | + output = self.fc1(output) | ||
| 171 | + output = self.fc2(output) | ||
| 172 | + output = self.fc3(output) | ||
| 173 | + output = self.softplus(output) | ||
| 174 | + ''' | ||
| 175 | + | ||
| 176 | + # output_distance = torch.cuda.FloatTensor(output_distance, requires_grad=True)#.to('cpu') | ||
| 177 | + | ||
| 178 | + #print(f'output_distance = {output_distance}') | ||
| 179 | + #print(output_distance.shape) | ||
| 180 | + #print(f'distances = {distances}') | ||
| 181 | + #print(distances.shape) | ||
| 182 | + distances = distances.cuda() | ||
| 183 | + # print(f'output = {output}') | ||
| 184 | + # print(f'output = {output}') | ||
| 185 | + # print(f'distances = {distances}') | ||
| 186 | + loss = self.smoothl1(output, distances.float()) | ||
| 187 | + # print(f'loss = {loss}') | ||
| 188 | + | ||
| 189 | + # print(f'output_distance = {output_distance}') | ||
| 190 | + # print(f'distances = {distances}') | ||
| 191 | + # print(f'loss = {loss}') | ||
| 192 | + return loss, output | ||
| 193 | + | ||
| 194 | + else: | ||
| 195 | + | ||
| 196 | + ''' | ||
| 197 | + detections = detections[:, :4] | ||
| 198 | + scale = get_scale(detections) | ||
| 199 | + output_distance = [] | ||
| 200 | + for i in scale: | ||
| 201 | + x1_scale = i[0] | ||
| 202 | + y1_scale = i[1] | ||
| 203 | + x2_scale = i[2] | ||
| 204 | + y2_scale = i[3] | ||
| 205 | + | ||
| 206 | + output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1] | ||
| 207 | + output = self.maxpool(output) | ||
| 208 | + output = output.view(1, -1).cuda() | ||
| 209 | + ''' | ||
| 210 | + roi_results = self.cal_scale_evaL(x, detections) | ||
| 211 | + output = torch.cat(roi_results, 0) | ||
| 212 | + # print(f'output = {output.shape}') | ||
| 213 | + output = self.fc1(output) | ||
| 214 | + output = self.fc2(output) | ||
| 215 | + output = self.fc3(output) | ||
| 216 | + output = self.softplus(output) | ||
| 217 | + # print(f'output = {output}') | ||
| 218 | + | ||
| 219 | + | ||
| 220 | + return output | ||
| 221 | + | ||
| 222 | + | ||
| 223 | + ''' | ||
| 224 | + scale = get_scale(detections) | ||
| 225 | + | ||
| 226 | + | ||
| 227 | + output_distance = [] | ||
| 228 | + for i in scale: | ||
| 229 | + x1_scale = i[0] | ||
| 230 | + y1_scale = i[1] | ||
| 231 | + x2_scale = i[2] | ||
| 232 | + y2_scale = i[3] | ||
| 233 | + | ||
| 234 | + output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1] | ||
| 235 | + # output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1] | ||
| 236 | + output = self.maxpool(output) | ||
| 237 | + output = output.view(1, -1).cuda() | ||
| 238 | + output = self.fc1(output) | ||
| 239 | + output = self.fc2(output) | ||
| 240 | + | ||
| 241 | + output_distance.append(output) | ||
| 242 | + | ||
| 243 | + if targets is None: | ||
| 244 | + return output_distance, 0 | ||
| 245 | + | ||
| 246 | + else: | ||
| 247 | + loss = 0 | ||
| 248 | + box_similar_distance = self.similar_bbox(detections, targets) | ||
| 249 | + for i in range(len(box_similar_distance)): | ||
| 250 | + if box_similar_distance[i] == -1: | ||
| 251 | + output_distance[i] = -1 | ||
| 252 | + | ||
| 253 | + | ||
| 254 | + output_distance = torch.FloatTensor(output_distance).to('cpu') | ||
| 255 | + box_similar_distance = torch.FloatTensor(box_similar_distance).to('cpu') | ||
| 256 | + | ||
| 257 | + | ||
| 258 | + # print(f'output_distance = {output_distance}') | ||
| 259 | + # print(f'target_distance = {box_similar_distance}') | ||
| 260 | + loss = self.smoothl1(output_distance, box_similar_distance) | ||
| 261 | + ''' | ||
| 262 | + | ||
| 263 | + | ||
| 264 | + | ||
| 265 | + | ||
| 266 | + | ||
| 267 | + | ||
| 268 | + | ||
| 269 | + |
source_code/utils/__init__.py
0 → 100644
File mode changed
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
source_code/utils/augmentations.py
0 → 100644
source_code/utils/datasets.py
0 → 100644
| 1 | +import glob | ||
| 2 | +import random | ||
| 3 | +import os | ||
| 4 | +import sys | ||
| 5 | +import numpy as np | ||
| 6 | +from PIL import Image | ||
| 7 | +import torch | ||
| 8 | +import torch.nn.functional as F | ||
| 9 | +import time | ||
| 10 | + | ||
| 11 | +from utils.augmentations import horisontal_flip | ||
| 12 | +from torch.utils.data import Dataset | ||
| 13 | +import torchvision.transforms as transforms | ||
| 14 | + | ||
| 15 | + | ||
| 16 | +def pad_to_square(img, pad_value): | ||
| 17 | + c, h, w = img.shape | ||
| 18 | + dim_diff = np.abs(h - w) | ||
| 19 | + # (upper / left) padding and (lower / right) padding | ||
| 20 | + pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2 | ||
| 21 | + # Determine padding | ||
| 22 | + pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0) | ||
| 23 | + # Add padding | ||
| 24 | + img = F.pad(img, pad, "constant", value=pad_value) | ||
| 25 | + | ||
| 26 | + return img, pad | ||
| 27 | + | ||
| 28 | + | ||
| 29 | +def resize(image, size): | ||
| 30 | + image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0) | ||
| 31 | + return image | ||
| 32 | + | ||
| 33 | + | ||
| 34 | +def random_resize(images, min_size=288, max_size=448): | ||
| 35 | + new_size = random.sample(list(range(min_size, max_size + 1, 32)), 1)[0] | ||
| 36 | + images = F.interpolate(images, size=new_size, mode="nearest") | ||
| 37 | + return images | ||
| 38 | + | ||
| 39 | + | ||
| 40 | +class ImageFolder(Dataset): | ||
| 41 | + def __init__(self, folder_path, img_size=416): | ||
| 42 | + self.files = sorted(glob.glob("%s/*.*" % folder_path)) | ||
| 43 | + self.img_size = img_size | ||
| 44 | + | ||
| 45 | + def __getitem__(self, index): | ||
| 46 | + img_path = self.files[index % len(self.files)] | ||
| 47 | + # Extract image as PyTorch tensor | ||
| 48 | + img = transforms.ToTensor()(Image.open(img_path)) | ||
| 49 | + # Pad to square resolution | ||
| 50 | + img, _ = pad_to_square(img, 0) | ||
| 51 | + # Resize | ||
| 52 | + img = resize(img, self.img_size) | ||
| 53 | + | ||
| 54 | + return img_path, img | ||
| 55 | + | ||
| 56 | + def __len__(self): | ||
| 57 | + return len(self.files) | ||
| 58 | + | ||
| 59 | + | ||
| 60 | +class ListDataset(Dataset): | ||
| 61 | + def __init__(self, list_path, img_size=416, augment=True, multiscale=True, normalized_labels=True): | ||
| 62 | + with open(list_path, "r") as file: | ||
| 63 | + self.img_files = file.readlines() | ||
| 64 | + | ||
| 65 | + self.label_files = [ | ||
| 66 | + path.replace("images", "labels").replace(".png", ".txt").replace(".jpg", ".txt") | ||
| 67 | + for path in self.img_files | ||
| 68 | + ] | ||
| 69 | + self.img_size = img_size | ||
| 70 | + self.max_objects = 100 | ||
| 71 | + self.augment = augment | ||
| 72 | + self.multiscale = multiscale | ||
| 73 | + self.normalized_labels = normalized_labels | ||
| 74 | + self.min_size = self.img_size - 3 * 32 | ||
| 75 | + self.max_size = self.img_size + 3 * 32 | ||
| 76 | + self.batch_count = 0 | ||
| 77 | + | ||
| 78 | + def __getitem__(self, index): | ||
| 79 | + | ||
| 80 | + # --------- | ||
| 81 | + # Image | ||
| 82 | + # --------- | ||
| 83 | + | ||
| 84 | + img_path = self.img_files[index % len(self.img_files)].rstrip() | ||
| 85 | + # Extract image as PyTorch tensor | ||
| 86 | + img = transforms.ToTensor()(Image.open(img_path, 'r').convert('RGB')) | ||
| 87 | + | ||
| 88 | + # Handle images with less than three channels | ||
| 89 | + if len(img.shape) != 3: | ||
| 90 | + img = img.unsqueeze(0) | ||
| 91 | + img = img.expand((3, img.shape[1:])) | ||
| 92 | + | ||
| 93 | + _, h, w = img.shape | ||
| 94 | + h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1) | ||
| 95 | + # Pad to square resolution | ||
| 96 | + img, pad = pad_to_square(img, 0) | ||
| 97 | + _, padded_h, padded_w = img.shape | ||
| 98 | + | ||
| 99 | + # --------- | ||
| 100 | + # Label | ||
| 101 | + # --------- | ||
| 102 | + | ||
| 103 | + label_path = self.label_files[index % len(self.img_files)].rstrip() | ||
| 104 | + | ||
| 105 | + targets = None | ||
| 106 | + targets_distance = None | ||
| 107 | + if os.path.exists(label_path): | ||
| 108 | + if torch.from_numpy(np.loadtxt(label_path)).ndim == 2: | ||
| 109 | + boxes = torch.from_numpy(np.loadtxt(label_path)[:,:-1].reshape(-1, 5)) | ||
| 110 | + else: | ||
| 111 | + boxes = torch.from_numpy(np.loadtxt(label_path)[:-1].reshape(-1, 5)) | ||
| 112 | + # Extract coordinates for unpadded + unscaled image | ||
| 113 | + x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2) | ||
| 114 | + y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2) | ||
| 115 | + x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2) | ||
| 116 | + y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2) | ||
| 117 | + # Adjust for added padding | ||
| 118 | + x1 += pad[0] | ||
| 119 | + y1 += pad[2] | ||
| 120 | + x2 += pad[1] | ||
| 121 | + y2 += pad[3] | ||
| 122 | + # Returns (x, y, w, h) | ||
| 123 | + boxes[:, 1] = ((x1 + x2) / 2) / padded_w | ||
| 124 | + boxes[:, 2] = ((y1 + y2) / 2) / padded_h | ||
| 125 | + boxes[:, 3] *= w_factor / padded_w | ||
| 126 | + boxes[:, 4] *= h_factor / padded_h | ||
| 127 | + | ||
| 128 | + targets = torch.zeros((len(boxes), 6)) | ||
| 129 | + targets[:, 1:] = boxes | ||
| 130 | + | ||
| 131 | + if torch.from_numpy(np.loadtxt(label_path)).ndim == 2: | ||
| 132 | + targets_distance = torch.from_numpy(np.loadtxt(label_path)[:,1:].reshape(-1, 5)) | ||
| 133 | + else: | ||
| 134 | + targets_distance = torch.from_numpy(np.loadtxt(label_path)[1:].reshape(-1, 5)) | ||
| 135 | + | ||
| 136 | + # Apply augmentations | ||
| 137 | + # if self.augment: | ||
| 138 | + # if np.random.random() < 0.5: | ||
| 139 | + # img, targets = horisontal_flip(img, targets) | ||
| 140 | + | ||
| 141 | + return img_path, img, targets, targets_distance | ||
| 142 | + | ||
| 143 | + def collate_fn(self, batch): | ||
| 144 | + paths, imgs, targets, targets_distance = list(zip(*batch)) | ||
| 145 | + # Remove empty placeholder targets | ||
| 146 | + targets = [boxes for boxes in targets if boxes is not None] | ||
| 147 | + # Add sample index to targets | ||
| 148 | + for i, boxes in enumerate(targets): | ||
| 149 | + boxes[:, 0] = i | ||
| 150 | + targets = torch.cat(targets, 0) | ||
| 151 | + # Selects new image size every tenth batch | ||
| 152 | + if self.multiscale and self.batch_count % 10 == 0: | ||
| 153 | + self.img_size = random.choice(range(self.min_size, self.max_size + 1, 32)) | ||
| 154 | + # Resize images to input shape | ||
| 155 | + imgs = torch.stack([resize(img, self.img_size) for img in imgs]) | ||
| 156 | + self.batch_count += 1 | ||
| 157 | + return paths, imgs, targets, targets_distance | ||
| 158 | + | ||
| 159 | + def __len__(self): | ||
| 160 | + return len(self.img_files) |
source_code/utils/logger.py
0 → 100644
| 1 | +import tensorflow as tf | ||
| 2 | + | ||
| 3 | + | ||
| 4 | +class Logger(object): | ||
| 5 | + def __init__(self, log_dir): | ||
| 6 | + """Create a summary writer logging to log_dir.""" | ||
| 7 | + self.writer = tf.summary.create_file_writer(log_dir) | ||
| 8 | + | ||
| 9 | + def scalar_summary(self, tag, value, step): | ||
| 10 | + """Log a scalar variable.""" | ||
| 11 | + summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) | ||
| 12 | + self.writer.add_summary(summary, step) | ||
| 13 | + | ||
| 14 | + def list_of_scalars_summary(self, tag_value_pairs, step): | ||
| 15 | + """Log scalar variables.""" | ||
| 16 | + # summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value) for tag, value in tag_value_pairs]) | ||
| 17 | + # self.writer.add_summary(summary, step) |
source_code/utils/parse_config.py
0 → 100644
| 1 | + | ||
| 2 | + | ||
| 3 | +def parse_model_config(path): | ||
| 4 | + """Parses the yolo-v3 layer configuration file and returns module definitions""" | ||
| 5 | + file = open(path, 'r') | ||
| 6 | + lines = file.read().split('\n') | ||
| 7 | + lines = [x for x in lines if x and not x.startswith('#')] | ||
| 8 | + lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces | ||
| 9 | + module_defs = [] | ||
| 10 | + for line in lines: | ||
| 11 | + if line.startswith('['): # This marks the start of a new block | ||
| 12 | + module_defs.append({}) | ||
| 13 | + module_defs[-1]['type'] = line[1:-1].rstrip() | ||
| 14 | + if module_defs[-1]['type'] == 'convolutional': | ||
| 15 | + module_defs[-1]['batch_normalize'] = 0 | ||
| 16 | + else: | ||
| 17 | + key, value = line.split("=") | ||
| 18 | + value = value.strip() | ||
| 19 | + module_defs[-1][key.rstrip()] = value.strip() | ||
| 20 | + | ||
| 21 | + return module_defs | ||
| 22 | + | ||
| 23 | +def parse_data_config(path): | ||
| 24 | + """Parses the data configuration file""" | ||
| 25 | + options = dict() | ||
| 26 | + options['gpus'] = '0,1,2,3' | ||
| 27 | + options['num_workers'] = '10' | ||
| 28 | + with open(path, 'r') as fp: | ||
| 29 | + lines = fp.readlines() | ||
| 30 | + for line in lines: | ||
| 31 | + line = line.strip() | ||
| 32 | + if line == '' or line.startswith('#'): | ||
| 33 | + continue | ||
| 34 | + key, value = line.split('=') | ||
| 35 | + options[key.strip()] = value.strip() | ||
| 36 | + return options |
source_code/utils/utils.py
0 → 100644
This diff is collapsed. Click to expand it.
-
Please register or login to post a comment