최인훈

Distance Estimation Module

classes=12
train=data/cafe/train.txt
valid=data/cafe/valid.txt
names=data/cafe/classes.names
\ No newline at end of file
classes=5
train=data/cafe_distance/train.txt
valid=data/cafe_distance/valid.txt
names=data/cafe_distance/classes.names
classes=5
train=data/testdata/train.txt
valid=data/testdata/valid.txt
names=data/testdata/classes.names
[net]
# Testing
# batch=1
# subdivisions=1
# Training
batch=8
subdivisions=2
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 500200
policy=steps
steps=400000,450000
scales=.1,.1
# 0
[convolutional]
batch_normalize=1
filters=16
size=3
stride=1
pad=1
activation=leaky
# 1
[maxpool]
size=2
stride=2
# 2
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
# 3
[maxpool]
size=2
stride=2
# 4
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
# 5
[maxpool]
size=2
stride=2
# 6
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
# 7
[maxpool]
size=2
stride=2
# 8
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
# 9
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
# 10
[convolutional]
size=1
stride=1
pad=1
filters=42
activation=linear
# 11
[yolo]
mask = 0, 1, 2
anchors = 37,58, 81,82, 135,169
classes=9
num=3
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
[net]
# Testing
# batch=1
# subdivisions=1
# Training
batch=8
subdivisions=2
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 500200
policy=steps
steps=400000,450000
scales=.1,.1
# 0
[convolutional]
batch_normalize=1
filters=16
size=3
stride=1
pad=1
activation=leaky
# 1
[maxpool]
size=2
stride=2
# 2
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
# 3
[maxpool]
size=2
stride=2
# 4
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
# 5
[maxpool]
size=2
stride=2
# 6
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
# 7
[maxpool]
size=2
stride=2
# 8
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
# 9
[convolutional]
size=1
stride=1
pad=1
filters=42
activation=linear
# 10
[yolo]
mask = 0, 1, 2
anchors = 59,119, 81,82, 135,169
classes=9
num=3
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
[net]
# Testing
# batch=1
# subdivisions=1
# Training
batch=8
subdivisions=2
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 500200
policy=steps
steps=400000,450000
scales=.1,.1
# 0
[convolutional]
batch_normalize=1
filters=16
size=3
stride=1
pad=1
activation=leaky
# 1
[maxpool]
size=2
stride=2
# 2
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
# 3
[maxpool]
size=2
stride=2
# 4
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
# 5
[maxpool]
size=2
stride=2
# 6
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
# 7
[maxpool]
size=2
stride=2
# 8
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
# 9
[maxpool]
size=2
stride=2
# 10
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
# 11
[maxpool]
size=2
stride=1
# 12
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
###########
# 13
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
# 14
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
# 15
[convolutional]
size=1
stride=1
pad=1
filters=30
activation=linear
# 16
[yolo]
mask = 3,4,5
anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
classes=5
num=6
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
# 17
[route]
layers = -4
# 18
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
# 19
[upsample]
stride=2
# 20
[route]
layers = -1, 8
# 21
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
# 22
[convolutional]
size=1
stride=1
pad=1
filters=30
activation=linear
# 23
[yolo]
mask = 0,1,2
anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
classes=5
num=6
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
from __future__ import division
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
from utils.parse_config import *
from utils.utils import build_targets, to_cpu, non_max_suppression
import matplotlib.pyplot as plt
import matplotlib.patches as patches
def create_modules(module_defs):
"""
Constructs module list of layer blocks from module configuration in module_defs
"""
hyperparams = module_defs.pop(0)
output_filters = [int(hyperparams["channels"])]
module_list = nn.ModuleList()
for module_i, module_def in enumerate(module_defs):
modules = nn.Sequential()
if module_def["type"] == "convolutional":
bn = int(module_def["batch_normalize"])
filters = int(module_def["filters"])
kernel_size = int(module_def["size"])
pad = (kernel_size - 1) // 2
modules.add_module(
"conv_{}".format(module_i),
nn.Conv2d(
in_channels=output_filters[-1],
out_channels=filters,
kernel_size=kernel_size,
stride=int(module_def["stride"]),
padding=pad,
bias=not bn,
),
)
if bn:
modules.add_module("batch_norm_{}".format(module_i), nn.BatchNorm2d(filters, momentum=0.9, eps=1e-5))
if module_def["activation"] == "leaky":
modules.add_module("leaky_{}".format(module_i), nn.LeakyReLU(0.1))
elif module_def["type"] == "maxpool":
kernel_size = int(module_def["size"])
stride = int(module_def["stride"])
if kernel_size == 2 and stride == 1:
modules.add_module("_debug_padding_{}".format(module_i), nn.ZeroPad2d((0, 1, 0, 1)))
maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2))
modules.add_module("maxpool_{}".format(module_i), maxpool)
elif module_def["type"] == "upsample":
upsample = Upsample(scale_factor=int(module_def["stride"]), mode="nearest")
modules.add_module("upsample_{}".format(module_i), upsample)
elif module_def["type"] == "route":
layers = [int(x) for x in module_def["layers"].split(",")]
filters = sum([output_filters[1:][i] for i in layers])
modules.add_module("route_{}".format(module_i), EmptyLayer())
elif module_def["type"] == "shortcut": # 없음
filters = output_filters[1:][int(module_def["from"])]
modules.add_module("shortcut_{}".format(module_i), EmptyLayer())
elif module_def["type"] == "yolo":
anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
# Extract anchors
anchors = [int(x) for x in module_def["anchors"].split(",")]
anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
anchors = [anchors[i] for i in anchor_idxs]
num_classes = int(module_def["classes"])
img_size = int(hyperparams["height"])
# Define detection layer
yolo_layer = YOLOLayer(anchors, num_classes, img_size)
modules.add_module("yolo_{}".format(module_i), yolo_layer)
# Register module list and number of output filters
module_list.append(modules)
output_filters.append(filters)
return hyperparams, module_list
class Upsample(nn.Module):
""" nn.Upsample is deprecated """
def __init__(self, scale_factor, mode="nearest"):
super(Upsample, self).__init__()
self.scale_factor = scale_factor
self.mode = mode
def forward(self, x):
x = F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)
return x
class EmptyLayer(nn.Module):
"""Placeholder for 'route' and 'shortcut' layers"""
def __init__(self):
super(EmptyLayer, self).__init__()
class YOLOLayer(nn.Module):
"""Detection layer"""
def __init__(self, anchors, num_classes, img_dim=416):
super(YOLOLayer, self).__init__()
self.anchors = anchors
self.num_anchors = len(anchors)
self.num_classes = num_classes
self.ignore_thres = 0.5
self.mse_loss = nn.MSELoss()
self.bce_loss = nn.BCELoss()
self.obj_scale = 1
self.noobj_scale = 100
self.metrics = {}
self.img_dim = img_dim
self.grid_size = 0 # grid size
def compute_grid_offsets(self, grid_size, cuda=True):
self.grid_size = grid_size
g = self.grid_size
FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
self.stride = self.img_dim / self.grid_size
# Calculate offsets for each grid
self.grid_x = torch.arange(g).repeat(g, 1).view([1, 1, g, g]).type(FloatTensor)
self.grid_y = torch.arange(g).repeat(g, 1).t().view([1, 1, g, g]).type(FloatTensor)
self.scaled_anchors = FloatTensor([(a_w / self.stride, a_h / self.stride) for a_w, a_h in self.anchors])
self.anchor_w = self.scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1))
self.anchor_h = self.scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1))
def forward(self, x, targets=None, img_dim=None):
# Tensors for cuda support
FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor
self.img_dim = img_dim
num_samples = x.size(0)
grid_size = x.size(2)
prediction = (
x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size)
.permute(0, 1, 3, 4, 2)
.contiguous()
)
# Get outputs
x = torch.sigmoid(prediction[..., 0]) # Center x
y = torch.sigmoid(prediction[..., 1]) # Center y
w = prediction[..., 2] # Width
h = prediction[..., 3] # Height
pred_conf = torch.sigmoid(prediction[..., 4]) # Conf
pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred.
# If grid size does not match current we compute new offsets
if grid_size != self.grid_size:
self.compute_grid_offsets(grid_size, cuda=x.is_cuda)
# Add offset and scale with anchors
pred_boxes = FloatTensor(prediction[..., :4].shape)
pred_boxes[..., 0] = x.data + self.grid_x
pred_boxes[..., 1] = y.data + self.grid_y
pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h
output = torch.cat(
(
pred_boxes.view(num_samples, -1, 4) * self.stride,
pred_conf.view(num_samples, -1, 1),
pred_cls.view(num_samples, -1, self.num_classes),
),
-1,
)
if targets is None:
return output, 0
else:
iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
pred_boxes=pred_boxes,
pred_cls=pred_cls,
target=targets,
anchors=self.scaled_anchors,
ignore_thres=self.ignore_thres,
)
# Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask])
loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
# Metrics
cls_acc = 100 * class_mask[obj_mask].mean()
conf_obj = pred_conf[obj_mask].mean()
conf_noobj = pred_conf[noobj_mask].mean()
conf50 = (pred_conf > 0.5).float()
iou50 = (iou_scores > 0.5).float()
iou75 = (iou_scores > 0.75).float()
detected_mask = conf50 * class_mask * tconf
precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)
self.metrics = {
"loss": to_cpu(total_loss).item(),
"x": to_cpu(loss_x).item(),
"y": to_cpu(loss_y).item(),
"w": to_cpu(loss_w).item(),
"h": to_cpu(loss_h).item(),
"conf": to_cpu(loss_conf).item(),
"cls": to_cpu(loss_cls).item(),
"cls_acc": to_cpu(cls_acc).item(),
"recall50": to_cpu(recall50).item(),
"recall75": to_cpu(recall75).item(),
"precision": to_cpu(precision).item(),
"conf_obj": to_cpu(conf_obj).item(),
"conf_noobj": to_cpu(conf_noobj).item(),
"grid_size": grid_size,
}
return output, total_loss
class Darknet(nn.Module):
"""YOLOv3 object detection model"""
def __init__(self, config_path, img_size=416):
super(Darknet, self).__init__()
self.module_defs = parse_model_config(config_path)
self.hyperparams, self.module_list = create_modules(self.module_defs)
self.yolo_layers = [layer[0] for layer in self.module_list if hasattr(layer[0], "metrics")]
self.img_size = img_size
self.seen = 0
self.header_info = np.array([0, 0, 0, self.seen, 0], dtype=np.int32)
def forward(self, x, targets=None):
img_dim = x.shape[2]
loss = 0
layer_outputs, yolo_outputs = [], []
isfeature = False
for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
x = module(x)
elif module_def["type"] == "route":
x = torch.cat([layer_outputs[int(layer_i)] for layer_i in module_def["layers"].split(",")], 1)
if (not isfeature):
featuremap = x
isfeature = True
elif module_def["type"] == "shortcut": # shortcut없음
layer_i = int(module_def["from"])
x = layer_outputs[-1] + layer_outputs[layer_i]
elif module_def["type"] == "yolo":
x, layer_loss = module[0](x, targets, img_dim)
loss += layer_loss
yolo_outputs.append(x)
layer_outputs.append(x)
yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1))
# yolo_outputs = non_max_suppression(yolo_outputs, 0.8, 0.4)
# if yolo_outputs is not None:
# res = self.roipool(featuremap, yolo_outputs, targets)
return featuremap, yolo_outputs if targets is None else (loss, yolo_outputs)
def load_darknet_weights(self, weights_path):
"""Parses and loads the weights stored in 'weights_path'"""
# Open the weights file
with open(weights_path, "rb") as f:
header = np.fromfile(f, dtype=np.int32, count=5) # First five are header values
self.header_info = header # Needed to write header when saving weights
self.seen = header[3] # number of images seen during training
weights = np.fromfile(f, dtype=np.float32) # The rest are weights
# Establish cutoff for loading backbone weights
cutoff = None
if "darknet53.conv.74" in weights_path:
cutoff = 75
ptr = 0
for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
if i == cutoff:
break
if module_def["type"] == "convolutional":
conv_layer = module[0]
if module_def["batch_normalize"]:
# Load BN bias, weights, running mean and running variance
bn_layer = module[1]
num_b = bn_layer.bias.numel() # Number of biases
# Bias
bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.bias)
bn_layer.bias.data.copy_(bn_b)
ptr += num_b
# Weight
bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.weight)
bn_layer.weight.data.copy_(bn_w)
ptr += num_b
# Running Mean
bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_mean)
bn_layer.running_mean.data.copy_(bn_rm)
ptr += num_b
# Running Var
bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_var)
bn_layer.running_var.data.copy_(bn_rv)
ptr += num_b
else:
# Load conv. bias
num_b = conv_layer.bias.numel()
conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(conv_layer.bias)
conv_layer.bias.data.copy_(conv_b)
ptr += num_b
# Load conv. weights
num_w = conv_layer.weight.numel()
conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as(conv_layer.weight)
conv_layer.weight.data.copy_(conv_w)
ptr += num_w
def save_darknet_weights(self, path, cutoff=-1):
"""
@:param path - path of the new weights file
@:param cutoff - save layers between 0 and cutoff (cutoff = -1 -> all are saved)
"""
fp = open(path, "wb")
self.header_info[3] = self.seen
self.header_info.tofile(fp)
# Iterate through layers
for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
if module_def["type"] == "convolutional":
conv_layer = module[0]
# If batch norm, load bn first
if module_def["batch_normalize"]:
bn_layer = module[1]
bn_layer.bias.data.cpu().numpy().tofile(fp)
bn_layer.weight.data.cpu().numpy().tofile(fp)
bn_layer.running_mean.data.cpu().numpy().tofile(fp)
bn_layer.running_var.data.cpu().numpy().tofile(fp)
# Load conv bias
else:
conv_layer.bias.data.cpu().numpy().tofile(fp)
# Load conv weights
conv_layer.weight.data.cpu().numpy().tofile(fp)
fp.close()
from __future__ import division
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules import module
from utils.utils import *
class ROIPool(nn.Module):
def __init__(self, output_size):
super(ROIPool, self).__init__()
self.maxpool = nn.AdaptiveMaxPool2d(output_size)
self.size = output_size
self.fc1 = nn.Linear(2304, 1024)
self.fc2 = nn.Linear(1024, 512)
self.fc3 = nn.Linear(512, 1)
self.softplus = nn.Softplus()
self.smoothl1 = nn.SmoothL1Loss()
self.mse = nn.MSELoss()
def target_detection_iou(self, box1, box2):
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
# get the corrdinates of the intersection rectangle
b1_x1 = b1_x1.type(torch.float64)
b1_y1 = b1_y1.type(torch.float64)
b1_x2 = b1_x2.type(torch.float64)
b1_y2 = b1_y2.type(torch.float64)
inter_rect_x1 = torch.max(b1_x1, b2_x1)
inter_rect_y1 = torch.max(b1_y1, b2_y1)
inter_rect_x2 = torch.min(b1_x2, b2_x2)
inter_rect_y2 = torch.min(b1_y2, b2_y2)
# Intersection area
inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
inter_rect_y2 - inter_rect_y1 + 1, min=0
)
# Union Area
b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
return iou
def similar_bbox(self, detections, targets):
rescaled_boxes = rescale_boxes(detections, 416, (480, 640))
similar_box = list(range(len(rescaled_boxes)))
for i in range(len(rescaled_boxes)):
for j in range(len(targets)):
target_xyxy = [(targets[j][0]-(targets[j][2]/2))*640, (targets[j][1]-(targets[j][3]/2))*480, (targets[j][0]+(targets[j][2]/2))*640, (targets[j][1]+(targets[j][3]/2))*480]
target_xyxy = torch.tensor(target_xyxy)
iou = self.target_detection_iou(rescaled_boxes[i][:4], target_xyxy)
if iou > 0.01:
similar_box[i] = targets[j][-1]
break
else:
similar_box[i] = -1
return similar_box
def cal_scale(self, x, detections, targets):
targets_distance = targets[:, :4]
square_targets = []
for target_distance in targets_distance:
x1 = (target_distance[0]-(target_distance[2]/2))*416
y1 = ((target_distance[1]-(target_distance[3]/2))*480+80)*13/15
x2 = (target_distance[0]+(target_distance[2]/2))*416
y2 = ((target_distance[1]+(target_distance[3]/2))*480+80)*13/15
square_targets.append([x1, y1, x2, y2])
square_targets = torch.tensor(square_targets)
scale = get_scale(square_targets)
output_distance = []
roi_results = []
for i in scale:
x1_scale = i[0]
y1_scale = i[1]
x2_scale = i[2]
y2_scale = i[3]
output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1]
output = self.maxpool(output)
output = output.view(1, -1)
# print(output)
roi_results.append(output)
return roi_results
def cal_scale_evaL(self, x, detections):
detections = detections[:, :4]
scale = get_scale(detections)
output_distance = []
roi_results = []
for i in scale:
x1_scale = i[0]
y1_scale = i[1]
x2_scale = i[2]
y2_scale = i[3]
output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1]
output = self.maxpool(output)
output = output.view(1, -1)
roi_results.append(output)
return roi_results
def forward(self, x, detections, targets=None):
if targets is not None:
distances = targets[:, 4]
distances = distances * 10
# distances = distances * 10
# print(f'disatnces = {distances}')
# targets_distance = targets[:, :4]
# square_targets = []
# for target_distance in targets_distance:
# x1 = (target_distance[0]-(target_distance[2]/2))*416
# y1 = ((target_distance[1]-(target_distance[3]/2))*480+80)*13/15
# x2 = (target_distance[0]+(target_distance[2]/2))*416
# y2 = ((target_distance[1]+(target_distance[3]/2))*480+80)*13/15
# square_targets.append([x1, y1, x2, y2])
# square_targets = torch.tensor(square_targets)
# scale = get_scale(square_targets)
# output_distance = []
# roi_results = []
# for i in scale:
# x1_scale = i[0]
# y1_scale = i[1]
# x2_scale = i[2]
# y2_scale = i[3]
# output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1]
# output = self.maxpool(output)
# output = output.view(1, -1).cuda()
# # print(output)
# roi_results.append(output)
roi_results = self.cal_scale(x, detections, targets)
output = torch.cat(roi_results, 0)
# print(output.shape)
# print(output.shape)
output = self.fc1(output)
output = self.fc2(output)
output = self.fc3(output)
output = self.softplus(output)
# print(f'output = {output}')
#loss = 0
# output_distance = torch.tensor(output, requires_grad=True)
'''
output = x
# output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1]
output = self.maxpool(output)
output = output.view(1, -1).cuda()
# print(output.shape)
output = self.fc1(output)
output = self.fc2(output)
output = self.fc3(output)
output = self.softplus(output)
'''
# output_distance = torch.cuda.FloatTensor(output_distance, requires_grad=True)#.to('cpu')
#print(f'output_distance = {output_distance}')
#print(output_distance.shape)
#print(f'distances = {distances}')
#print(distances.shape)
distances = distances.cuda()
# print(f'output = {output}')
# print(f'output = {output}')
# print(f'distances = {distances}')
loss = self.smoothl1(output, distances.float())
# print(f'loss = {loss}')
# print(f'output_distance = {output_distance}')
# print(f'distances = {distances}')
# print(f'loss = {loss}')
return loss, output
else:
'''
detections = detections[:, :4]
scale = get_scale(detections)
output_distance = []
for i in scale:
x1_scale = i[0]
y1_scale = i[1]
x2_scale = i[2]
y2_scale = i[3]
output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1]
output = self.maxpool(output)
output = output.view(1, -1).cuda()
'''
roi_results = self.cal_scale_evaL(x, detections)
output = torch.cat(roi_results, 0)
# print(f'output = {output.shape}')
output = self.fc1(output)
output = self.fc2(output)
output = self.fc3(output)
output = self.softplus(output)
# print(f'output = {output}')
return output
'''
scale = get_scale(detections)
output_distance = []
for i in scale:
x1_scale = i[0]
y1_scale = i[1]
x2_scale = i[2]
y2_scale = i[3]
output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1]
# output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1]
output = self.maxpool(output)
output = output.view(1, -1).cuda()
output = self.fc1(output)
output = self.fc2(output)
output_distance.append(output)
if targets is None:
return output_distance, 0
else:
loss = 0
box_similar_distance = self.similar_bbox(detections, targets)
for i in range(len(box_similar_distance)):
if box_similar_distance[i] == -1:
output_distance[i] = -1
output_distance = torch.FloatTensor(output_distance).to('cpu')
box_similar_distance = torch.FloatTensor(box_similar_distance).to('cpu')
# print(f'output_distance = {output_distance}')
# print(f'target_distance = {box_similar_distance}')
loss = self.smoothl1(output_distance, box_similar_distance)
'''
import torch
import torch.nn.functional as F
import numpy as np
def horisontal_flip(images, targets):
images = torch.flip(images, [-1])
targets[:, 2] = 1 - targets[:, 2]
return images, targets
import glob
import random
import os
import sys
import numpy as np
from PIL import Image
import torch
import torch.nn.functional as F
import time
from utils.augmentations import horisontal_flip
from torch.utils.data import Dataset
import torchvision.transforms as transforms
def pad_to_square(img, pad_value):
c, h, w = img.shape
dim_diff = np.abs(h - w)
# (upper / left) padding and (lower / right) padding
pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
# Determine padding
pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0)
# Add padding
img = F.pad(img, pad, "constant", value=pad_value)
return img, pad
def resize(image, size):
image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0)
return image
def random_resize(images, min_size=288, max_size=448):
new_size = random.sample(list(range(min_size, max_size + 1, 32)), 1)[0]
images = F.interpolate(images, size=new_size, mode="nearest")
return images
class ImageFolder(Dataset):
def __init__(self, folder_path, img_size=416):
self.files = sorted(glob.glob("%s/*.*" % folder_path))
self.img_size = img_size
def __getitem__(self, index):
img_path = self.files[index % len(self.files)]
# Extract image as PyTorch tensor
img = transforms.ToTensor()(Image.open(img_path))
# Pad to square resolution
img, _ = pad_to_square(img, 0)
# Resize
img = resize(img, self.img_size)
return img_path, img
def __len__(self):
return len(self.files)
class ListDataset(Dataset):
def __init__(self, list_path, img_size=416, augment=True, multiscale=True, normalized_labels=True):
with open(list_path, "r") as file:
self.img_files = file.readlines()
self.label_files = [
path.replace("images", "labels").replace(".png", ".txt").replace(".jpg", ".txt")
for path in self.img_files
]
self.img_size = img_size
self.max_objects = 100
self.augment = augment
self.multiscale = multiscale
self.normalized_labels = normalized_labels
self.min_size = self.img_size - 3 * 32
self.max_size = self.img_size + 3 * 32
self.batch_count = 0
def __getitem__(self, index):
# ---------
# Image
# ---------
img_path = self.img_files[index % len(self.img_files)].rstrip()
# Extract image as PyTorch tensor
img = transforms.ToTensor()(Image.open(img_path, 'r').convert('RGB'))
# Handle images with less than three channels
if len(img.shape) != 3:
img = img.unsqueeze(0)
img = img.expand((3, img.shape[1:]))
_, h, w = img.shape
h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
# Pad to square resolution
img, pad = pad_to_square(img, 0)
_, padded_h, padded_w = img.shape
# ---------
# Label
# ---------
label_path = self.label_files[index % len(self.img_files)].rstrip()
targets = None
targets_distance = None
if os.path.exists(label_path):
if torch.from_numpy(np.loadtxt(label_path)).ndim == 2:
boxes = torch.from_numpy(np.loadtxt(label_path)[:,:-1].reshape(-1, 5))
else:
boxes = torch.from_numpy(np.loadtxt(label_path)[:-1].reshape(-1, 5))
# Extract coordinates for unpadded + unscaled image
x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)
# Adjust for added padding
x1 += pad[0]
y1 += pad[2]
x2 += pad[1]
y2 += pad[3]
# Returns (x, y, w, h)
boxes[:, 1] = ((x1 + x2) / 2) / padded_w
boxes[:, 2] = ((y1 + y2) / 2) / padded_h
boxes[:, 3] *= w_factor / padded_w
boxes[:, 4] *= h_factor / padded_h
targets = torch.zeros((len(boxes), 6))
targets[:, 1:] = boxes
if torch.from_numpy(np.loadtxt(label_path)).ndim == 2:
targets_distance = torch.from_numpy(np.loadtxt(label_path)[:,1:].reshape(-1, 5))
else:
targets_distance = torch.from_numpy(np.loadtxt(label_path)[1:].reshape(-1, 5))
# Apply augmentations
# if self.augment:
# if np.random.random() < 0.5:
# img, targets = horisontal_flip(img, targets)
return img_path, img, targets, targets_distance
def collate_fn(self, batch):
paths, imgs, targets, targets_distance = list(zip(*batch))
# Remove empty placeholder targets
targets = [boxes for boxes in targets if boxes is not None]
# Add sample index to targets
for i, boxes in enumerate(targets):
boxes[:, 0] = i
targets = torch.cat(targets, 0)
# Selects new image size every tenth batch
if self.multiscale and self.batch_count % 10 == 0:
self.img_size = random.choice(range(self.min_size, self.max_size + 1, 32))
# Resize images to input shape
imgs = torch.stack([resize(img, self.img_size) for img in imgs])
self.batch_count += 1
return paths, imgs, targets, targets_distance
def __len__(self):
return len(self.img_files)
import tensorflow as tf
class Logger(object):
def __init__(self, log_dir):
"""Create a summary writer logging to log_dir."""
self.writer = tf.summary.create_file_writer(log_dir)
def scalar_summary(self, tag, value, step):
"""Log a scalar variable."""
summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
self.writer.add_summary(summary, step)
def list_of_scalars_summary(self, tag_value_pairs, step):
"""Log scalar variables."""
# summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value) for tag, value in tag_value_pairs])
# self.writer.add_summary(summary, step)
def parse_model_config(path):
"""Parses the yolo-v3 layer configuration file and returns module definitions"""
file = open(path, 'r')
lines = file.read().split('\n')
lines = [x for x in lines if x and not x.startswith('#')]
lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
module_defs = []
for line in lines:
if line.startswith('['): # This marks the start of a new block
module_defs.append({})
module_defs[-1]['type'] = line[1:-1].rstrip()
if module_defs[-1]['type'] == 'convolutional':
module_defs[-1]['batch_normalize'] = 0
else:
key, value = line.split("=")
value = value.strip()
module_defs[-1][key.rstrip()] = value.strip()
return module_defs
def parse_data_config(path):
"""Parses the data configuration file"""
options = dict()
options['gpus'] = '0,1,2,3'
options['num_workers'] = '10'
with open(path, 'r') as fp:
lines = fp.readlines()
for line in lines:
line = line.strip()
if line == '' or line.startswith('#'):
continue
key, value = line.split('=')
options[key.strip()] = value.strip()
return options
from __future__ import division
import math
import time
import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
def to_cpu(tensor):
return tensor.detach().cpu()
def load_classes(path):
"""
Loads class labels at 'path'
"""
fp = open(path, "r")
names = fp.read().split("\n")[:-1]
return names
def weights_init_normal(m):
classname = m.__class__.__name__
if classname.find("Conv") != -1:
torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
elif classname.find("BatchNorm2d") != -1:
torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
torch.nn.init.constant_(m.bias.data, 0.0)
def rescale_boxes(boxes, current_dim, original_shape):
""" Rescales bounding boxes to the original shape """
orig_h, orig_w = original_shape
# The amount of padding that was added
pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape))
pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape))
# Image height and width after padding is removed
unpad_h = current_dim - pad_y
unpad_w = current_dim - pad_x
# Rescale bounding boxes to dimension of original image
boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w
boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h
boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w
boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h
return boxes
def xywh2xyxy(x):
y = x.new(x.shape)
y[..., 0] = x[..., 0] - x[..., 2] / 2
y[..., 1] = x[..., 1] - x[..., 3] / 2
y[..., 2] = x[..., 0] + x[..., 2] / 2
y[..., 3] = x[..., 1] + x[..., 3] / 2
return y
def ap_per_class(tp, conf, pred_cls, target_cls):
""" Compute the average precision, given the recall and precision curves.
Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
# Arguments
tp: True positives (list).
conf: Objectness value from 0-1 (list).
pred_cls: Predicted object classes (list).
target_cls: True object classes (list).
# Returns
The average precision as computed in py-faster-rcnn.
"""
# Sort by objectness
i = np.argsort(-conf)
tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
# Find unique classes
unique_classes = np.unique(target_cls)
# Create Precision-Recall curve and compute AP for each class
ap, p, r = [], [], []
for c in tqdm.tqdm(unique_classes, desc="Computing AP"):
i = pred_cls == c
n_gt = (target_cls == c).sum() # Number of ground truth objects
n_p = i.sum() # Number of predicted objects
if n_p == 0 and n_gt == 0:
continue
elif n_p == 0 or n_gt == 0:
ap.append(0)
r.append(0)
p.append(0)
else:
# Accumulate FPs and TPs
fpc = (1 - tp[i]).cumsum()
tpc = (tp[i]).cumsum()
# Recall
recall_curve = tpc / (n_gt + 1e-16)
r.append(recall_curve[-1])
# Precision
precision_curve = tpc / (tpc + fpc)
p.append(precision_curve[-1])
# AP from recall-precision curve
ap.append(compute_ap(recall_curve, precision_curve))
# Compute F1 score (harmonic mean of precision and recall)
p, r, ap = np.array(p), np.array(r), np.array(ap)
f1 = 2 * p * r / (p + r + 1e-16)
return p, r, ap, f1, unique_classes.astype("int32")
def compute_ap(recall, precision):
""" Compute the average precision, given the recall and precision curves.
Code originally from https://github.com/rbgirshick/py-faster-rcnn.
# Arguments
recall: The recall curve (list).
precision: The precision curve (list).
# Returns
The average precision as computed in py-faster-rcnn.
"""
# correct AP calculation
# first append sentinel values at the end
mrec = np.concatenate(([0.0], recall, [1.0]))
mpre = np.concatenate(([0.0], precision, [0.0]))
# compute the precision envelope
for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0]
# and sum (\Delta recall) * prec
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap
def get_batch_statistics(outputs, targets, iou_threshold):
""" Compute true positives, predicted scores and predicted labels per sample """
batch_metrics = []
for sample_i in range(len(outputs)):
if outputs[sample_i] is None:
continue
output = outputs[sample_i]
pred_boxes = output[:, :4]
pred_scores = output[:, 4]
pred_labels = output[:, -1]
true_positives = np.zeros(pred_boxes.shape[0])
annotations = targets[targets[:, 0] == sample_i][:, 1:]
target_labels = annotations[:, 0] if len(annotations) else []
if len(annotations):
detected_boxes = []
target_boxes = annotations[:, 1:]
for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)):
# If targets are found break
if len(detected_boxes) == len(annotations):
break
# Ignore if label is not one of the target labels
if pred_label not in target_labels:
continue
iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0)
if iou >= iou_threshold and box_index not in detected_boxes:
true_positives[pred_i] = 1
detected_boxes += [box_index]
batch_metrics.append([true_positives, pred_scores, pred_labels])
return batch_metrics
def bbox_wh_iou(wh1, wh2):
wh2 = wh2.t()
w1, h1 = wh1[0], wh1[1]
w2, h2 = wh2[0], wh2[1]
inter_area = torch.min(w1, w2) * torch.min(h1, h2)
union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
return inter_area / union_area
def bbox_iou(box1, box2, x1y1x2y2=True):
"""
Returns the IoU of two bounding boxes
"""
if not x1y1x2y2:
# Transform from center and width to exact coordinates
b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
else:
# Get the coordinates of bounding boxes
b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
# get the corrdinates of the intersection rectangle
inter_rect_x1 = torch.max(b1_x1, b2_x1)
inter_rect_y1 = torch.max(b1_y1, b2_y1)
inter_rect_x2 = torch.min(b1_x2, b2_x2)
inter_rect_y2 = torch.min(b1_y2, b2_y2)
# Intersection area
inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
inter_rect_y2 - inter_rect_y1 + 1, min=0
)
# Union Area
b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
return iou
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
"""
Removes detections with lower object confidence score than 'conf_thres' and performs
Non-Maximum Suppression to further filter detections.
Returns detections with shape:
(x1, y1, x2, y2, object_conf, class_score, class_pred)
"""
# From (center x, center y, width, height) to (x1, y1, x2, y2)
prediction[..., :4] = xywh2xyxy(prediction[..., :4])
output = [None for _ in range(len(prediction))]
for image_i, image_pred in enumerate(prediction):
# Filter out confidence scores below threshold
image_pred = image_pred[image_pred[:, 4] >= conf_thres]
# If none are remaining => process next image
if not image_pred.size(0):
continue
# Object confidence times class confidence
score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0]
# Sort by it
image_pred = image_pred[(-score).argsort()]
class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True)
detections = torch.cat((image_pred[:, :5], class_confs.float(), class_preds.float()), 1)
# Perform non-maximum suppression
keep_boxes = []
while detections.size(0):
large_overlap = bbox_iou(detections[0, :4].unsqueeze(0), detections[:, :4]) > nms_thres
label_match = detections[0, -1] == detections[:, -1]
# Indices of boxes with lower confidence scores, large IOUs and matching labels
invalid = large_overlap & label_match
weights = detections[invalid, 4:5]
# Merge overlapping bboxes by order of confidence
detections[0, :4] = (weights * detections[invalid, :4]).sum(0) / weights.sum()
keep_boxes += [detections[0]]
detections = detections[~invalid]
if keep_boxes:
output[image_i] = torch.stack(keep_boxes)
return output
def build_targets(pred_boxes, pred_cls, target, anchors, ignore_thres):
ByteTensor = torch.cuda.ByteTensor if pred_boxes.is_cuda else torch.ByteTensor
FloatTensor = torch.cuda.FloatTensor if pred_boxes.is_cuda else torch.FloatTensor
nB = pred_boxes.size(0)
nA = pred_boxes.size(1)
nC = pred_cls.size(-1)
nG = pred_boxes.size(2)
# Output tensors
obj_mask = ByteTensor(nB, nA, nG, nG).fill_(0)
noobj_mask = ByteTensor(nB, nA, nG, nG).fill_(1)
class_mask = FloatTensor(nB, nA, nG, nG).fill_(0)
iou_scores = FloatTensor(nB, nA, nG, nG).fill_(0)
tx = FloatTensor(nB, nA, nG, nG).fill_(0)
ty = FloatTensor(nB, nA, nG, nG).fill_(0)
tw = FloatTensor(nB, nA, nG, nG).fill_(0)
th = FloatTensor(nB, nA, nG, nG).fill_(0)
tcls = FloatTensor(nB, nA, nG, nG, nC).fill_(0)
# Convert to position relative to box
target_boxes = target[:, 2:6] * nG
gxy = target_boxes[:, :2]
gwh = target_boxes[:, 2:]
# Get anchors with best iou
ious = torch.stack([bbox_wh_iou(anchor, gwh) for anchor in anchors])
best_ious, best_n = ious.max(0)
# Separate target values
b, target_labels = target[:, :2].long().t()
gx, gy = gxy.t()
gw, gh = gwh.t()
gi, gj = gxy.long().t()
# Set masks
obj_mask[b, best_n, gj, gi] = 1
noobj_mask[b, best_n, gj, gi] = 0
# Set noobj mask to zero where iou exceeds ignore threshold
for i, anchor_ious in enumerate(ious.t()):
noobj_mask[b[i], anchor_ious > ignore_thres, gj[i], gi[i]] = 0
# Coordinates
tx[b, best_n, gj, gi] = gx - gx.floor()
ty[b, best_n, gj, gi] = gy - gy.floor()
# Width and height
tw[b, best_n, gj, gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16)
th[b, best_n, gj, gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16)
# One-hot encoding of label
tcls[b, best_n, gj, gi, target_labels] = 1
# Compute label correctness and iou at best anchor
class_mask[b, best_n, gj, gi] = (pred_cls[b, best_n, gj, gi].argmax(-1) == target_labels).float()
iou_scores[b, best_n, gj, gi] = bbox_iou(pred_boxes[b, best_n, gj, gi], target_boxes, x1y1x2y2=False)
tconf = obj_mask.float()
return iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
def get_scale(detections):
num_roi = detections.size(0)
outputs = []
for num in range(num_roi):
x1, y1, x2, y2 = detections[num]
standard = 416/7
x1_scale = math.floor(x1/standard)
y1_scale = math.floor(y1/standard)
x2_scale = math.ceil(x2/standard)
y2_scale = math.ceil(y2/standard)
outputs.append([x1_scale, y1_scale, x2_scale, y2_scale])
return outputs