최인훈

Distance Estimation Module

classes=12
train=data/cafe/train.txt
valid=data/cafe/valid.txt
names=data/cafe/classes.names
\ No newline at end of file
classes=5
train=data/cafe_distance/train.txt
valid=data/cafe_distance/valid.txt
names=data/cafe_distance/classes.names
classes=5
train=data/testdata/train.txt
valid=data/testdata/valid.txt
names=data/testdata/classes.names
[net]
# Testing
# batch=1
# subdivisions=1
# Training
batch=8
subdivisions=2
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 500200
policy=steps
steps=400000,450000
scales=.1,.1
# 0
[convolutional]
batch_normalize=1
filters=16
size=3
stride=1
pad=1
activation=leaky
# 1
[maxpool]
size=2
stride=2
# 2
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
# 3
[maxpool]
size=2
stride=2
# 4
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
# 5
[maxpool]
size=2
stride=2
# 6
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
# 7
[maxpool]
size=2
stride=2
# 8
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
# 9
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
# 10
[convolutional]
size=1
stride=1
pad=1
filters=42
activation=linear
# 11
[yolo]
mask = 0, 1, 2
anchors = 37,58, 81,82, 135,169
classes=9
num=3
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
[net]
# Testing
# batch=1
# subdivisions=1
# Training
batch=8
subdivisions=2
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 500200
policy=steps
steps=400000,450000
scales=.1,.1
# 0
[convolutional]
batch_normalize=1
filters=16
size=3
stride=1
pad=1
activation=leaky
# 1
[maxpool]
size=2
stride=2
# 2
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
# 3
[maxpool]
size=2
stride=2
# 4
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
# 5
[maxpool]
size=2
stride=2
# 6
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
# 7
[maxpool]
size=2
stride=2
# 8
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
# 9
[convolutional]
size=1
stride=1
pad=1
filters=42
activation=linear
# 10
[yolo]
mask = 0, 1, 2
anchors = 59,119, 81,82, 135,169
classes=9
num=3
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
[net]
# Testing
# batch=1
# subdivisions=1
# Training
batch=8
subdivisions=2
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 500200
policy=steps
steps=400000,450000
scales=.1,.1
# 0
[convolutional]
batch_normalize=1
filters=16
size=3
stride=1
pad=1
activation=leaky
# 1
[maxpool]
size=2
stride=2
# 2
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
# 3
[maxpool]
size=2
stride=2
# 4
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
# 5
[maxpool]
size=2
stride=2
# 6
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
# 7
[maxpool]
size=2
stride=2
# 8
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
# 9
[maxpool]
size=2
stride=2
# 10
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
# 11
[maxpool]
size=2
stride=1
# 12
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
###########
# 13
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
# 14
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
# 15
[convolutional]
size=1
stride=1
pad=1
filters=30
activation=linear
# 16
[yolo]
mask = 3,4,5
anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
classes=5
num=6
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
# 17
[route]
layers = -4
# 18
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
# 19
[upsample]
stride=2
# 20
[route]
layers = -1, 8
# 21
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
# 22
[convolutional]
size=1
stride=1
pad=1
filters=30
activation=linear
# 23
[yolo]
mask = 0,1,2
anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
classes=5
num=6
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
This diff is collapsed. Click to expand it.
from __future__ import division
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules import module
from utils.utils import *
class ROIPool(nn.Module):
def __init__(self, output_size):
super(ROIPool, self).__init__()
self.maxpool = nn.AdaptiveMaxPool2d(output_size)
self.size = output_size
self.fc1 = nn.Linear(2304, 1024)
self.fc2 = nn.Linear(1024, 512)
self.fc3 = nn.Linear(512, 1)
self.softplus = nn.Softplus()
self.smoothl1 = nn.SmoothL1Loss()
self.mse = nn.MSELoss()
def target_detection_iou(self, box1, box2):
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
# get the corrdinates of the intersection rectangle
b1_x1 = b1_x1.type(torch.float64)
b1_y1 = b1_y1.type(torch.float64)
b1_x2 = b1_x2.type(torch.float64)
b1_y2 = b1_y2.type(torch.float64)
inter_rect_x1 = torch.max(b1_x1, b2_x1)
inter_rect_y1 = torch.max(b1_y1, b2_y1)
inter_rect_x2 = torch.min(b1_x2, b2_x2)
inter_rect_y2 = torch.min(b1_y2, b2_y2)
# Intersection area
inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
inter_rect_y2 - inter_rect_y1 + 1, min=0
)
# Union Area
b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
return iou
def similar_bbox(self, detections, targets):
rescaled_boxes = rescale_boxes(detections, 416, (480, 640))
similar_box = list(range(len(rescaled_boxes)))
for i in range(len(rescaled_boxes)):
for j in range(len(targets)):
target_xyxy = [(targets[j][0]-(targets[j][2]/2))*640, (targets[j][1]-(targets[j][3]/2))*480, (targets[j][0]+(targets[j][2]/2))*640, (targets[j][1]+(targets[j][3]/2))*480]
target_xyxy = torch.tensor(target_xyxy)
iou = self.target_detection_iou(rescaled_boxes[i][:4], target_xyxy)
if iou > 0.01:
similar_box[i] = targets[j][-1]
break
else:
similar_box[i] = -1
return similar_box
def cal_scale(self, x, detections, targets):
targets_distance = targets[:, :4]
square_targets = []
for target_distance in targets_distance:
x1 = (target_distance[0]-(target_distance[2]/2))*416
y1 = ((target_distance[1]-(target_distance[3]/2))*480+80)*13/15
x2 = (target_distance[0]+(target_distance[2]/2))*416
y2 = ((target_distance[1]+(target_distance[3]/2))*480+80)*13/15
square_targets.append([x1, y1, x2, y2])
square_targets = torch.tensor(square_targets)
scale = get_scale(square_targets)
output_distance = []
roi_results = []
for i in scale:
x1_scale = i[0]
y1_scale = i[1]
x2_scale = i[2]
y2_scale = i[3]
output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1]
output = self.maxpool(output)
output = output.view(1, -1)
# print(output)
roi_results.append(output)
return roi_results
def cal_scale_evaL(self, x, detections):
detections = detections[:, :4]
scale = get_scale(detections)
output_distance = []
roi_results = []
for i in scale:
x1_scale = i[0]
y1_scale = i[1]
x2_scale = i[2]
y2_scale = i[3]
output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1]
output = self.maxpool(output)
output = output.view(1, -1)
roi_results.append(output)
return roi_results
def forward(self, x, detections, targets=None):
if targets is not None:
distances = targets[:, 4]
distances = distances * 10
# distances = distances * 10
# print(f'disatnces = {distances}')
# targets_distance = targets[:, :4]
# square_targets = []
# for target_distance in targets_distance:
# x1 = (target_distance[0]-(target_distance[2]/2))*416
# y1 = ((target_distance[1]-(target_distance[3]/2))*480+80)*13/15
# x2 = (target_distance[0]+(target_distance[2]/2))*416
# y2 = ((target_distance[1]+(target_distance[3]/2))*480+80)*13/15
# square_targets.append([x1, y1, x2, y2])
# square_targets = torch.tensor(square_targets)
# scale = get_scale(square_targets)
# output_distance = []
# roi_results = []
# for i in scale:
# x1_scale = i[0]
# y1_scale = i[1]
# x2_scale = i[2]
# y2_scale = i[3]
# output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1]
# output = self.maxpool(output)
# output = output.view(1, -1).cuda()
# # print(output)
# roi_results.append(output)
roi_results = self.cal_scale(x, detections, targets)
output = torch.cat(roi_results, 0)
# print(output.shape)
# print(output.shape)
output = self.fc1(output)
output = self.fc2(output)
output = self.fc3(output)
output = self.softplus(output)
# print(f'output = {output}')
#loss = 0
# output_distance = torch.tensor(output, requires_grad=True)
'''
output = x
# output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1]
output = self.maxpool(output)
output = output.view(1, -1).cuda()
# print(output.shape)
output = self.fc1(output)
output = self.fc2(output)
output = self.fc3(output)
output = self.softplus(output)
'''
# output_distance = torch.cuda.FloatTensor(output_distance, requires_grad=True)#.to('cpu')
#print(f'output_distance = {output_distance}')
#print(output_distance.shape)
#print(f'distances = {distances}')
#print(distances.shape)
distances = distances.cuda()
# print(f'output = {output}')
# print(f'output = {output}')
# print(f'distances = {distances}')
loss = self.smoothl1(output, distances.float())
# print(f'loss = {loss}')
# print(f'output_distance = {output_distance}')
# print(f'distances = {distances}')
# print(f'loss = {loss}')
return loss, output
else:
'''
detections = detections[:, :4]
scale = get_scale(detections)
output_distance = []
for i in scale:
x1_scale = i[0]
y1_scale = i[1]
x2_scale = i[2]
y2_scale = i[3]
output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1]
output = self.maxpool(output)
output = output.view(1, -1).cuda()
'''
roi_results = self.cal_scale_evaL(x, detections)
output = torch.cat(roi_results, 0)
# print(f'output = {output.shape}')
output = self.fc1(output)
output = self.fc2(output)
output = self.fc3(output)
output = self.softplus(output)
# print(f'output = {output}')
return output
'''
scale = get_scale(detections)
output_distance = []
for i in scale:
x1_scale = i[0]
y1_scale = i[1]
x2_scale = i[2]
y2_scale = i[3]
output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1]
# output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1]
output = self.maxpool(output)
output = output.view(1, -1).cuda()
output = self.fc1(output)
output = self.fc2(output)
output_distance.append(output)
if targets is None:
return output_distance, 0
else:
loss = 0
box_similar_distance = self.similar_bbox(detections, targets)
for i in range(len(box_similar_distance)):
if box_similar_distance[i] == -1:
output_distance[i] = -1
output_distance = torch.FloatTensor(output_distance).to('cpu')
box_similar_distance = torch.FloatTensor(box_similar_distance).to('cpu')
# print(f'output_distance = {output_distance}')
# print(f'target_distance = {box_similar_distance}')
loss = self.smoothl1(output_distance, box_similar_distance)
'''
import torch
import torch.nn.functional as F
import numpy as np
def horisontal_flip(images, targets):
images = torch.flip(images, [-1])
targets[:, 2] = 1 - targets[:, 2]
return images, targets
import glob
import random
import os
import sys
import numpy as np
from PIL import Image
import torch
import torch.nn.functional as F
import time
from utils.augmentations import horisontal_flip
from torch.utils.data import Dataset
import torchvision.transforms as transforms
def pad_to_square(img, pad_value):
c, h, w = img.shape
dim_diff = np.abs(h - w)
# (upper / left) padding and (lower / right) padding
pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
# Determine padding
pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0)
# Add padding
img = F.pad(img, pad, "constant", value=pad_value)
return img, pad
def resize(image, size):
image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0)
return image
def random_resize(images, min_size=288, max_size=448):
new_size = random.sample(list(range(min_size, max_size + 1, 32)), 1)[0]
images = F.interpolate(images, size=new_size, mode="nearest")
return images
class ImageFolder(Dataset):
def __init__(self, folder_path, img_size=416):
self.files = sorted(glob.glob("%s/*.*" % folder_path))
self.img_size = img_size
def __getitem__(self, index):
img_path = self.files[index % len(self.files)]
# Extract image as PyTorch tensor
img = transforms.ToTensor()(Image.open(img_path))
# Pad to square resolution
img, _ = pad_to_square(img, 0)
# Resize
img = resize(img, self.img_size)
return img_path, img
def __len__(self):
return len(self.files)
class ListDataset(Dataset):
def __init__(self, list_path, img_size=416, augment=True, multiscale=True, normalized_labels=True):
with open(list_path, "r") as file:
self.img_files = file.readlines()
self.label_files = [
path.replace("images", "labels").replace(".png", ".txt").replace(".jpg", ".txt")
for path in self.img_files
]
self.img_size = img_size
self.max_objects = 100
self.augment = augment
self.multiscale = multiscale
self.normalized_labels = normalized_labels
self.min_size = self.img_size - 3 * 32
self.max_size = self.img_size + 3 * 32
self.batch_count = 0
def __getitem__(self, index):
# ---------
# Image
# ---------
img_path = self.img_files[index % len(self.img_files)].rstrip()
# Extract image as PyTorch tensor
img = transforms.ToTensor()(Image.open(img_path, 'r').convert('RGB'))
# Handle images with less than three channels
if len(img.shape) != 3:
img = img.unsqueeze(0)
img = img.expand((3, img.shape[1:]))
_, h, w = img.shape
h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
# Pad to square resolution
img, pad = pad_to_square(img, 0)
_, padded_h, padded_w = img.shape
# ---------
# Label
# ---------
label_path = self.label_files[index % len(self.img_files)].rstrip()
targets = None
targets_distance = None
if os.path.exists(label_path):
if torch.from_numpy(np.loadtxt(label_path)).ndim == 2:
boxes = torch.from_numpy(np.loadtxt(label_path)[:,:-1].reshape(-1, 5))
else:
boxes = torch.from_numpy(np.loadtxt(label_path)[:-1].reshape(-1, 5))
# Extract coordinates for unpadded + unscaled image
x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)
# Adjust for added padding
x1 += pad[0]
y1 += pad[2]
x2 += pad[1]
y2 += pad[3]
# Returns (x, y, w, h)
boxes[:, 1] = ((x1 + x2) / 2) / padded_w
boxes[:, 2] = ((y1 + y2) / 2) / padded_h
boxes[:, 3] *= w_factor / padded_w
boxes[:, 4] *= h_factor / padded_h
targets = torch.zeros((len(boxes), 6))
targets[:, 1:] = boxes
if torch.from_numpy(np.loadtxt(label_path)).ndim == 2:
targets_distance = torch.from_numpy(np.loadtxt(label_path)[:,1:].reshape(-1, 5))
else:
targets_distance = torch.from_numpy(np.loadtxt(label_path)[1:].reshape(-1, 5))
# Apply augmentations
# if self.augment:
# if np.random.random() < 0.5:
# img, targets = horisontal_flip(img, targets)
return img_path, img, targets, targets_distance
def collate_fn(self, batch):
paths, imgs, targets, targets_distance = list(zip(*batch))
# Remove empty placeholder targets
targets = [boxes for boxes in targets if boxes is not None]
# Add sample index to targets
for i, boxes in enumerate(targets):
boxes[:, 0] = i
targets = torch.cat(targets, 0)
# Selects new image size every tenth batch
if self.multiscale and self.batch_count % 10 == 0:
self.img_size = random.choice(range(self.min_size, self.max_size + 1, 32))
# Resize images to input shape
imgs = torch.stack([resize(img, self.img_size) for img in imgs])
self.batch_count += 1
return paths, imgs, targets, targets_distance
def __len__(self):
return len(self.img_files)
import tensorflow as tf
class Logger(object):
def __init__(self, log_dir):
"""Create a summary writer logging to log_dir."""
self.writer = tf.summary.create_file_writer(log_dir)
def scalar_summary(self, tag, value, step):
"""Log a scalar variable."""
summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
self.writer.add_summary(summary, step)
def list_of_scalars_summary(self, tag_value_pairs, step):
"""Log scalar variables."""
# summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value) for tag, value in tag_value_pairs])
# self.writer.add_summary(summary, step)
def parse_model_config(path):
"""Parses the yolo-v3 layer configuration file and returns module definitions"""
file = open(path, 'r')
lines = file.read().split('\n')
lines = [x for x in lines if x and not x.startswith('#')]
lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
module_defs = []
for line in lines:
if line.startswith('['): # This marks the start of a new block
module_defs.append({})
module_defs[-1]['type'] = line[1:-1].rstrip()
if module_defs[-1]['type'] == 'convolutional':
module_defs[-1]['batch_normalize'] = 0
else:
key, value = line.split("=")
value = value.strip()
module_defs[-1][key.rstrip()] = value.strip()
return module_defs
def parse_data_config(path):
"""Parses the data configuration file"""
options = dict()
options['gpus'] = '0,1,2,3'
options['num_workers'] = '10'
with open(path, 'r') as fp:
lines = fp.readlines()
for line in lines:
line = line.strip()
if line == '' or line.startswith('#'):
continue
key, value = line.split('=')
options[key.strip()] = value.strip()
return options
This diff is collapsed. Click to expand it.