Distance Estimation Module

최인훈
Commit 36c95f70f9680bb0faa761b00e914331b1218c08 36c95f70 1 parent 7520fbf2
Showing 25 changed files with 950 additions and 0 deletions
source_code/config/cafe.data
source_code/config/cafe_distance.data
source_code/config/testdata.data
source_code/config/tiny1.cfg
source_code/config/tiny2.cfg
source_code/config/yolov3-tiny.cfg
source_code/models.py
source_code/roipool.py
source_code/utils/__init__.py
source_code/utils/__pycache__/__init__.cpython-36.pyc
source_code/utils/__pycache__/__init__.cpython-38.pyc
source_code/utils/__pycache__/augmentations.cpython-36.pyc
source_code/utils/__pycache__/augmentations.cpython-38.pyc
source_code/utils/__pycache__/datasets.cpython-36.pyc
source_code/utils/__pycache__/datasets.cpython-38.pyc
source_code/utils/__pycache__/logger.cpython-36.pyc
source_code/utils/__pycache__/parse_config.cpython-36.pyc
source_code/utils/__pycache__/parse_config.cpython-38.pyc
source_code/utils/__pycache__/utils.cpython-36.pyc
source_code/utils/__pycache__/utils.cpython-38.pyc
--- a/source_code/config/cafe.data 0 → 100644
View file @36c95f7
+++ b/source_code/config/cafe.data 0 → 100644
View file @36c95f7
+ classes=12
+ train=data/cafe/train.txt
+ valid=data/cafe/valid.txt
+ names=data/cafe/classes.names
\ No newline at end of file
--- a/source_code/config/cafe_distance.data 0 → 100644
View file @36c95f7
+++ b/source_code/config/cafe_distance.data 0 → 100644
View file @36c95f7
+ classes=5
+ train=data/cafe_distance/train.txt
+ valid=data/cafe_distance/valid.txt
+ names=data/cafe_distance/classes.names
+ 
+ 
+ 
+ 
--- a/source_code/config/testdata.data 0 → 100644
View file @36c95f7
+++ b/source_code/config/testdata.data 0 → 100644
View file @36c95f7
+ classes=5
+ train=data/testdata/train.txt
+ valid=data/testdata/valid.txt
+ names=data/testdata/classes.names
+ 
+ 
+ 
+ 
--- a/source_code/config/tiny1.cfg 0 → 100644
View file @36c95f7
+++ b/source_code/config/tiny1.cfg 0 → 100644
View file @36c95f7
+ [net]
+ # Testing
+ # batch=1
+ # subdivisions=1
+ # Training
+ batch=8
+ subdivisions=2
+ width=416
+ height=416
+ channels=3
+ momentum=0.9
+ decay=0.0005
+ angle=0
+ saturation = 1.5
+ exposure = 1.5
+ hue=.1
+ 
+ learning_rate=0.001
+ burn_in=1000
+ max_batches = 500200
+ policy=steps
+ steps=400000,450000
+ scales=.1,.1
+ 
+ # 0
+ [convolutional]
+ batch_normalize=1
+ filters=16
+ size=3
+ stride=1
+ pad=1
+ activation=leaky
+ 
+ # 1
+ [maxpool]
+ size=2
+ stride=2
+ 
+ # 2
+ [convolutional]
+ batch_normalize=1
+ filters=32
+ size=3
+ stride=1
+ pad=1
+ activation=leaky
+ 
+ # 3
+ [maxpool]
+ size=2
+ stride=2
+ 
+ # 4
+ [convolutional]
+ batch_normalize=1
+ filters=64
+ size=3
+ stride=1
+ pad=1
+ activation=leaky
+ 
+ # 5
+ [maxpool]
+ size=2
+ stride=2
+ 
+ # 6
+ [convolutional]
+ batch_normalize=1
+ filters=128
+ size=3
+ stride=1
+ pad=1
+ activation=leaky
+ 
+ # 7
+ [maxpool]
+ size=2
+ stride=2
+ 
+ # 8
+ [convolutional]
+ batch_normalize=1
+ filters=256
+ size=3
+ stride=1
+ pad=1
+ activation=leaky
+ 
+ # 9
+ [convolutional]
+ batch_normalize=1
+ filters=512
+ size=3
+ stride=1
+ pad=1
+ activation=leaky
+ 
+ # 10
+ [convolutional]
+ size=1
+ stride=1
+ pad=1
+ filters=42
+ activation=linear
+ 
+ # 11
+ [yolo]
+ mask = 0, 1, 2
+ anchors = 37,58, 81,82, 135,169
+ classes=9
+ num=3
+ jitter=.3
+ ignore_thresh = .7
+ truth_thresh = 1
+ random=1
+ 
+ 
+ 
+ 
+ 
--- a/source_code/config/tiny2.cfg 0 → 100644
View file @36c95f7
+++ b/source_code/config/tiny2.cfg 0 → 100644
View file @36c95f7
+ [net]
+ # Testing
+ # batch=1
+ # subdivisions=1
+ # Training
+ batch=8
+ subdivisions=2
+ width=416
+ height=416
+ channels=3
+ momentum=0.9
+ decay=0.0005
+ angle=0
+ saturation = 1.5
+ exposure = 1.5
+ hue=.1
+ 
+ learning_rate=0.001
+ burn_in=1000
+ max_batches = 500200
+ policy=steps
+ steps=400000,450000
+ scales=.1,.1
+ 
+ # 0
+ [convolutional]
+ batch_normalize=1
+ filters=16
+ size=3
+ stride=1
+ pad=1
+ activation=leaky
+ 
+ # 1
+ [maxpool]
+ size=2
+ stride=2
+ 
+ # 2
+ [convolutional]
+ batch_normalize=1
+ filters=32
+ size=3
+ stride=1
+ pad=1
+ activation=leaky
+ 
+ # 3
+ [maxpool]
+ size=2
+ stride=2
+ 
+ # 4
+ [convolutional]
+ batch_normalize=1
+ filters=64
+ size=3
+ stride=1
+ pad=1
+ activation=leaky
+ 
+ # 5
+ [maxpool]
+ size=2
+ stride=2
+ 
+ # 6
+ [convolutional]
+ batch_normalize=1
+ filters=128
+ size=3
+ stride=1
+ pad=1
+ activation=leaky
+ 
+ # 7
+ [maxpool]
+ size=2
+ stride=2
+ 
+ # 8
+ [convolutional]
+ batch_normalize=1
+ filters=256
+ size=3
+ stride=1
+ pad=1
+ activation=leaky
+ 
+ # 9
+ [convolutional]
+ size=1
+ stride=1
+ pad=1
+ filters=42
+ activation=linear
+ 
+ # 10
+ [yolo]
+ mask = 0, 1, 2
+ anchors = 59,119, 81,82, 135,169
+ classes=9
+ num=3
+ jitter=.3
+ ignore_thresh = .7
+ truth_thresh = 1
+ random=1
+ 
+ 
+ 
+ 
+ 
--- a/source_code/config/yolov3-tiny.cfg 0 → 100644
View file @36c95f7
+++ b/source_code/config/yolov3-tiny.cfg 0 → 100644
View file @36c95f7
+ [net]
+ # Testing
+ # batch=1
+ # subdivisions=1
+ # Training
+ batch=8
+ subdivisions=2
+ width=416
+ height=416
+ channels=3
+ momentum=0.9
+ decay=0.0005
+ angle=0
+ saturation = 1.5
+ exposure = 1.5
+ hue=.1
+ 
+ learning_rate=0.001
+ burn_in=1000
+ max_batches = 500200
+ policy=steps
+ steps=400000,450000
+ scales=.1,.1
+ 
+ # 0
+ [convolutional]
+ batch_normalize=1
+ filters=16
+ size=3
+ stride=1
+ pad=1
+ activation=leaky
+ 
+ # 1
+ [maxpool]
+ size=2
+ stride=2
+ 
+ # 2
+ [convolutional]
+ batch_normalize=1
+ filters=32
+ size=3
+ stride=1
+ pad=1
+ activation=leaky
+ 
+ # 3
+ [maxpool]
+ size=2
+ stride=2
+ 
+ # 4
+ [convolutional]
+ batch_normalize=1
+ filters=64
+ size=3
+ stride=1
+ pad=1
+ activation=leaky
+ 
+ # 5
+ [maxpool]
+ size=2
+ stride=2
+ 
+ # 6
+ [convolutional]
+ batch_normalize=1
+ filters=128
+ size=3
+ stride=1
+ pad=1
+ activation=leaky
+ 
+ # 7
+ [maxpool]
+ size=2
+ stride=2
+ 
+ # 8
+ [convolutional]
+ batch_normalize=1
+ filters=256
+ size=3
+ stride=1
+ pad=1
+ activation=leaky
+ 
+ # 9
+ [maxpool]
+ size=2
+ stride=2
+ 
+ # 10
+ [convolutional]
+ batch_normalize=1
+ filters=512
+ size=3
+ stride=1
+ pad=1
+ activation=leaky
+ 
+ # 11
+ [maxpool]
+ size=2
+ stride=1
+ 
+ # 12
+ [convolutional]
+ batch_normalize=1
+ filters=1024
+ size=3
+ stride=1
+ pad=1
+ activation=leaky
+ 
+ ###########
+ 
+ # 13
+ [convolutional]
+ batch_normalize=1
+ filters=256
+ size=1
+ stride=1
+ pad=1
+ activation=leaky
+ 
+ # 14
+ [convolutional]
+ batch_normalize=1
+ filters=512
+ size=3
+ stride=1
+ pad=1
+ activation=leaky
+ 
+ # 15
+ [convolutional]
+ size=1
+ stride=1
+ pad=1
+ filters=30
+ activation=linear
+ 
+ 
+ 
+ # 16
+ [yolo]
+ mask = 3,4,5
+ anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
+ classes=5
+ num=6
+ jitter=.3
+ ignore_thresh = .7
+ truth_thresh = 1
+ random=1
+ 
+ # 17
+ [route]
+ layers = -4 
+ 
+ # 18
+ [convolutional]
+ batch_normalize=1
+ filters=128
+ size=1
+ stride=1
+ pad=1
+ activation=leaky
+ 
+ # 19
+ [upsample]
+ stride=2
+ 
+ # 20
+ [route]
+ layers = -1, 8
+ 
+ # 21
+ [convolutional]
+ batch_normalize=1
+ filters=256
+ size=3
+ stride=1
+ pad=1
+ activation=leaky
+ 
+ # 22
+ [convolutional]
+ size=1
+ stride=1
+ pad=1
+ filters=30
+ activation=linear
+ 
+ # 23
+ [yolo]
+ mask = 0,1,2
+ anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
+ classes=5
+ num=6
+ jitter=.3
+ ignore_thresh = .7
+ truth_thresh = 1
+ random=1
--- a/source_code/models.py 0 → 100644
View file @36c95f7
+++ b/source_code/models.py 0 → 100644
View file @36c95f7
--- a/source_code/roipool.py 0 → 100644
View file @36c95f7
+++ b/source_code/roipool.py 0 → 100644
View file @36c95f7
+ from __future__ import division
+ 
+ import torch
+ import torch.nn as nn
+ import torch.nn.functional as F
+ from torch.nn.modules import module
+ 
+ from utils.utils import *
+ 
+ 
+ class ROIPool(nn.Module):
+     def __init__(self, output_size):
+         super(ROIPool, self).__init__()
+         self.maxpool = nn.AdaptiveMaxPool2d(output_size)
+         self.size = output_size
+         self.fc1 = nn.Linear(2304, 1024)
+         self.fc2 = nn.Linear(1024, 512)
+         self.fc3 = nn.Linear(512, 1)
+         self.softplus = nn.Softplus()
+         self.smoothl1 = nn.SmoothL1Loss()
+         self.mse = nn.MSELoss()
+ 
+ 
+     def target_detection_iou(self, box1, box2):
+         b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
+         b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
+ 
+         # get the corrdinates of the intersection rectangle
+         b1_x1 = b1_x1.type(torch.float64)
+         b1_y1 = b1_y1.type(torch.float64)
+         b1_x2 = b1_x2.type(torch.float64)
+         b1_y2 = b1_y2.type(torch.float64)
+ 
+         inter_rect_x1 = torch.max(b1_x1, b2_x1)
+         inter_rect_y1 = torch.max(b1_y1, b2_y1)
+         inter_rect_x2 = torch.min(b1_x2, b2_x2)
+         inter_rect_y2 = torch.min(b1_y2, b2_y2)
+         # Intersection area
+         inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
+             inter_rect_y2 - inter_rect_y1 + 1, min=0
+         )
+         # Union Area
+         b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
+         b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
+ 
+         iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
+ 
+         return iou
+ 
+     def similar_bbox(self, detections, targets):
+         rescaled_boxes = rescale_boxes(detections, 416, (480, 640))
+         similar_box = list(range(len(rescaled_boxes)))
+         for i in range(len(rescaled_boxes)):
+             for j in range(len(targets)):
+                 target_xyxy = [(targets[j][0]-(targets[j][2]/2))*640, (targets[j][1]-(targets[j][3]/2))*480, (targets[j][0]+(targets[j][2]/2))*640, (targets[j][1]+(targets[j][3]/2))*480]
+                 target_xyxy = torch.tensor(target_xyxy)
+                 iou = self.target_detection_iou(rescaled_boxes[i][:4], target_xyxy)
+                 if iou > 0.01:
+                     similar_box[i] = targets[j][-1]
+                     break
+                 else:
+                     similar_box[i] = -1
+         return similar_box
+ 
+ 
+     def cal_scale(self, x, detections, targets):
+         targets_distance = targets[:, :4]
+         square_targets = []
+         
+         for target_distance in targets_distance:
+             x1 = (target_distance[0]-(target_distance[2]/2))*416
+             y1 = ((target_distance[1]-(target_distance[3]/2))*480+80)*13/15
+             x2 = (target_distance[0]+(target_distance[2]/2))*416
+             y2 = ((target_distance[1]+(target_distance[3]/2))*480+80)*13/15
+         
+             square_targets.append([x1, y1, x2, y2])
+         square_targets = torch.tensor(square_targets)
+         
+         scale = get_scale(square_targets)
+         output_distance = []
+ 
+         roi_results = []
+         for i in scale:
+             x1_scale = i[0]
+             y1_scale = i[1]
+             x2_scale = i[2]
+             y2_scale = i[3]
+             
+             output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1]
+         
+             output = self.maxpool(output)
+             
+             output = output.view(1, -1)
+             # print(output)
+             roi_results.append(output)
+         return roi_results
+ 
+     def cal_scale_evaL(self, x, detections):
+         detections = detections[:, :4]
+         scale = get_scale(detections)
+         output_distance = []
+         roi_results = []
+         for i in scale:
+             x1_scale = i[0]
+             y1_scale = i[1]
+             x2_scale = i[2]
+             y2_scale = i[3]
+ 
+             output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1]
+             output = self.maxpool(output)
+             output = output.view(1, -1)
+             roi_results.append(output)
+         return roi_results
+ 
+     def forward(self, x, detections, targets=None):
+         if targets is not None:
+             distances = targets[:, 4]
+             distances = distances * 10
+             # distances = distances * 10
+             # print(f'disatnces = {distances}')
+             # targets_distance = targets[:, :4]
+             # square_targets = []
+             
+             # for target_distance in targets_distance:
+             #     x1 = (target_distance[0]-(target_distance[2]/2))*416
+             #     y1 = ((target_distance[1]-(target_distance[3]/2))*480+80)*13/15
+             #     x2 = (target_distance[0]+(target_distance[2]/2))*416
+             #     y2 = ((target_distance[1]+(target_distance[3]/2))*480+80)*13/15
+             
+             #     square_targets.append([x1, y1, x2, y2])
+             # square_targets = torch.tensor(square_targets)
+             
+             # scale = get_scale(square_targets)
+             # output_distance = []
+ 
+             # roi_results = []
+             # for i in scale:
+             #     x1_scale = i[0]
+             #     y1_scale = i[1]
+             #     x2_scale = i[2]
+             #     y2_scale = i[3]
+                 
+             #     output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1]
+             
+             #     output = self.maxpool(output)
+                 
+             #     output = output.view(1, -1).cuda()
+             #     # print(output)
+             #     roi_results.append(output)
+             roi_results = self.cal_scale(x, detections, targets)
+ 
+             output = torch.cat(roi_results, 0)
+             # print(output.shape)
+             # print(output.shape)
+             output = self.fc1(output)
+             output = self.fc2(output)
+             output = self.fc3(output)
+             output = self.softplus(output)
+             # print(f'output = {output}')
+             #loss = 0
+             # output_distance = torch.tensor(output, requires_grad=True)
+ 
+ 
+             '''
+             output = x
+             # output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1]
+             output = self.maxpool(output)
+             output = output.view(1, -1).cuda()
+             # print(output.shape)
+             output = self.fc1(output)
+             output = self.fc2(output)
+             output = self.fc3(output)
+             output = self.softplus(output)
+             '''
+ 
+             # output_distance = torch.cuda.FloatTensor(output_distance, requires_grad=True)#.to('cpu')
+             
+             #print(f'output_distance = {output_distance}')
+             #print(output_distance.shape)
+             #print(f'distances = {distances}')
+             #print(distances.shape)
+             distances = distances.cuda()
+             # print(f'output = {output}')
+             # print(f'output = {output}')
+             # print(f'distances = {distances}')
+             loss = self.smoothl1(output, distances.float())
+             # print(f'loss = {loss}')
+             
+             # print(f'output_distance = {output_distance}')
+             # print(f'distances = {distances}')
+             # print(f'loss = {loss}')
+             return loss, output
+ 
+         else:
+ 
+             '''
+             detections = detections[:, :4]
+             scale = get_scale(detections)
+             output_distance = []
+             for i in scale:
+                 x1_scale = i[0]
+                 y1_scale = i[1]
+                 x2_scale = i[2]
+                 y2_scale = i[3]
+ 
+                 output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1]
+                 output = self.maxpool(output)
+                 output = output.view(1, -1).cuda()
+             '''
+             roi_results = self.cal_scale_evaL(x, detections)
+             output = torch.cat(roi_results, 0)
+                 #   print(f'output = {output.shape}')
+             output = self.fc1(output)
+             output = self.fc2(output)
+             output = self.fc3(output)
+             output = self.softplus(output)
+             # print(f'output = {output}')
+             
+ 
+             return output
+ 
+ 
+         '''
+         scale = get_scale(detections)
+ 
+         
+         output_distance = []
+         for i in scale:
+             x1_scale = i[0]
+             y1_scale = i[1]
+             x2_scale = i[2]
+             y2_scale = i[3]
+ 
+             output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1]
+             # output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1]
+             output = self.maxpool(output)
+             output = output.view(1, -1).cuda()
+             output = self.fc1(output)
+             output = self.fc2(output)
+ 
+             output_distance.append(output)
+         
+         if targets is None:
+             return output_distance, 0
+             
+         else:
+             loss = 0
+             box_similar_distance = self.similar_bbox(detections, targets)
+             for i in range(len(box_similar_distance)):
+                 if box_similar_distance[i] == -1:
+                     output_distance[i] = -1
+             
+             
+             output_distance = torch.FloatTensor(output_distance).to('cpu')
+             box_similar_distance = torch.FloatTensor(box_similar_distance).to('cpu')
+ 
+             
+             # print(f'output_distance = {output_distance}')
+             # print(f'target_distance = {box_similar_distance}')
+             loss = self.smoothl1(output_distance, box_similar_distance)
+         '''
+ 
+ 
+ 
+ 
+         
+ 
+ 
+         
--- a/source_code/utils/__init__.py 0 → 100644
View file @36c95f7
+++ b/source_code/utils/__init__.py 0 → 100644
View file @36c95f7
--- a/source_code/utils/__pycache__/__init__.cpython-36.pyc 0 → 100644
View file @36c95f7
+++ b/source_code/utils/__pycache__/__init__.cpython-36.pyc 0 → 100644
View file @36c95f7
--- a/source_code/utils/__pycache__/__init__.cpython-38.pyc 0 → 100644
View file @36c95f7
+++ b/source_code/utils/__pycache__/__init__.cpython-38.pyc 0 → 100644
View file @36c95f7
--- a/source_code/utils/__pycache__/augmentations.cpython-36.pyc 0 → 100644
View file @36c95f7
+++ b/source_code/utils/__pycache__/augmentations.cpython-36.pyc 0 → 100644
View file @36c95f7
--- a/source_code/utils/__pycache__/augmentations.cpython-38.pyc 0 → 100644
View file @36c95f7
+++ b/source_code/utils/__pycache__/augmentations.cpython-38.pyc 0 → 100644
View file @36c95f7
--- a/source_code/utils/__pycache__/datasets.cpython-36.pyc 0 → 100644
View file @36c95f7
+++ b/source_code/utils/__pycache__/datasets.cpython-36.pyc 0 → 100644
View file @36c95f7
--- a/source_code/utils/__pycache__/datasets.cpython-38.pyc 0 → 100644
View file @36c95f7
+++ b/source_code/utils/__pycache__/datasets.cpython-38.pyc 0 → 100644
View file @36c95f7
--- a/source_code/utils/__pycache__/logger.cpython-36.pyc 0 → 100644
View file @36c95f7
+++ b/source_code/utils/__pycache__/logger.cpython-36.pyc 0 → 100644
View file @36c95f7
--- a/source_code/utils/__pycache__/parse_config.cpython-36.pyc 0 → 100644
View file @36c95f7
+++ b/source_code/utils/__pycache__/parse_config.cpython-36.pyc 0 → 100644
View file @36c95f7
--- a/source_code/utils/__pycache__/parse_config.cpython-38.pyc 0 → 100644
View file @36c95f7
+++ b/source_code/utils/__pycache__/parse_config.cpython-38.pyc 0 → 100644
View file @36c95f7
--- a/source_code/utils/__pycache__/utils.cpython-36.pyc 0 → 100644
View file @36c95f7
+++ b/source_code/utils/__pycache__/utils.cpython-36.pyc 0 → 100644
View file @36c95f7
--- a/source_code/utils/__pycache__/utils.cpython-38.pyc 0 → 100644
View file @36c95f7
+++ b/source_code/utils/__pycache__/utils.cpython-38.pyc 0 → 100644
View file @36c95f7
--- a/source_code/utils/augmentations.py 0 → 100644
View file @36c95f7
+++ b/source_code/utils/augmentations.py 0 → 100644
View file @36c95f7
+ import torch
+ import torch.nn.functional as F
+ import numpy as np
+ 
+ 
+ def horisontal_flip(images, targets):
+     images = torch.flip(images, [-1])
+     targets[:, 2] = 1 - targets[:, 2]
+     return images, targets
--- a/source_code/utils/datasets.py 0 → 100644
View file @36c95f7
+++ b/source_code/utils/datasets.py 0 → 100644
View file @36c95f7
+ import glob
+ import random
+ import os
+ import sys
+ import numpy as np
+ from PIL import Image
+ import torch
+ import torch.nn.functional as F
+ import time
+ 
+ from utils.augmentations import horisontal_flip
+ from torch.utils.data import Dataset
+ import torchvision.transforms as transforms
+ 
+ 
+ def pad_to_square(img, pad_value):
+     c, h, w = img.shape
+     dim_diff = np.abs(h - w)
+     # (upper / left) padding and (lower / right) padding
+     pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
+     # Determine padding
+     pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0)
+     # Add padding
+     img = F.pad(img, pad, "constant", value=pad_value)
+ 
+     return img, pad
+ 
+ 
+ def resize(image, size):
+     image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0)
+     return image
+ 
+ 
+ def random_resize(images, min_size=288, max_size=448):
+     new_size = random.sample(list(range(min_size, max_size + 1, 32)), 1)[0]
+     images = F.interpolate(images, size=new_size, mode="nearest")
+     return images
+ 
+ 
+ class ImageFolder(Dataset):
+     def __init__(self, folder_path, img_size=416):
+         self.files = sorted(glob.glob("%s/*.*" % folder_path))
+         self.img_size = img_size
+ 
+     def __getitem__(self, index):
+         img_path = self.files[index % len(self.files)]
+         # Extract image as PyTorch tensor
+         img = transforms.ToTensor()(Image.open(img_path))
+         # Pad to square resolution
+         img, _ = pad_to_square(img, 0)
+         # Resize
+         img = resize(img, self.img_size)
+ 
+         return img_path, img
+ 
+     def __len__(self):
+         return len(self.files)
+ 
+ 
+ class ListDataset(Dataset):
+     def __init__(self, list_path, img_size=416, augment=True, multiscale=True, normalized_labels=True):
+         with open(list_path, "r") as file:
+             self.img_files = file.readlines()
+ 
+         self.label_files = [
+             path.replace("images", "labels").replace(".png", ".txt").replace(".jpg", ".txt")
+             for path in self.img_files
+         ]
+         self.img_size = img_size
+         self.max_objects = 100
+         self.augment = augment
+         self.multiscale = multiscale
+         self.normalized_labels = normalized_labels
+         self.min_size = self.img_size - 3 * 32
+         self.max_size = self.img_size + 3 * 32
+         self.batch_count = 0
+ 
+     def __getitem__(self, index):
+ 
+         # ---------
+         #  Image
+         # ---------
+ 
+         img_path = self.img_files[index % len(self.img_files)].rstrip()
+         # Extract image as PyTorch tensor
+         img = transforms.ToTensor()(Image.open(img_path, 'r').convert('RGB'))
+ 
+         # Handle images with less than three channels
+         if len(img.shape) != 3:
+             img = img.unsqueeze(0)
+             img = img.expand((3, img.shape[1:]))
+ 
+         _, h, w = img.shape
+         h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
+         # Pad to square resolution
+         img, pad = pad_to_square(img, 0)
+         _, padded_h, padded_w = img.shape
+ 
+         # ---------
+         #  Label
+         # ---------
+ 
+         label_path = self.label_files[index % len(self.img_files)].rstrip()
+ 
+         targets = None
+         targets_distance = None
+         if os.path.exists(label_path):
+             if torch.from_numpy(np.loadtxt(label_path)).ndim == 2:
+                 boxes = torch.from_numpy(np.loadtxt(label_path)[:,:-1].reshape(-1, 5))
+             else:
+                 boxes = torch.from_numpy(np.loadtxt(label_path)[:-1].reshape(-1, 5))
+             # Extract coordinates for unpadded + unscaled image
+             x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
+             y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
+             x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
+             y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)
+             # Adjust for added padding
+             x1 += pad[0]
+             y1 += pad[2]
+             x2 += pad[1]
+             y2 += pad[3]
+             # Returns (x, y, w, h)
+             boxes[:, 1] = ((x1 + x2) / 2) / padded_w
+             boxes[:, 2] = ((y1 + y2) / 2) / padded_h
+             boxes[:, 3] *= w_factor / padded_w
+             boxes[:, 4] *= h_factor / padded_h
+ 
+             targets = torch.zeros((len(boxes), 6))
+             targets[:, 1:] = boxes
+ 
+             if torch.from_numpy(np.loadtxt(label_path)).ndim == 2:
+                 targets_distance = torch.from_numpy(np.loadtxt(label_path)[:,1:].reshape(-1, 5))
+             else:
+                 targets_distance = torch.from_numpy(np.loadtxt(label_path)[1:].reshape(-1, 5))
+             
+         # Apply augmentations
+         # if self.augment:
+         #    if np.random.random() < 0.5:
+         #        img, targets = horisontal_flip(img, targets)
+         
+         return img_path, img, targets, targets_distance
+ 
+     def collate_fn(self, batch):
+         paths, imgs, targets, targets_distance = list(zip(*batch))
+         # Remove empty placeholder targets
+         targets = [boxes for boxes in targets if boxes is not None]
+         # Add sample index to targets
+         for i, boxes in enumerate(targets):
+             boxes[:, 0] = i
+         targets = torch.cat(targets, 0)
+         # Selects new image size every tenth batch
+         if self.multiscale and self.batch_count % 10 == 0:
+             self.img_size = random.choice(range(self.min_size, self.max_size + 1, 32))
+         # Resize images to input shape
+         imgs = torch.stack([resize(img, self.img_size) for img in imgs])
+         self.batch_count += 1
+         return paths, imgs, targets, targets_distance
+ 
+     def __len__(self):
+         return len(self.img_files)
--- a/source_code/utils/logger.py 0 → 100644
View file @36c95f7
+++ b/source_code/utils/logger.py 0 → 100644
View file @36c95f7
+ import tensorflow as tf
+ 
+ 
+ class Logger(object):
+     def __init__(self, log_dir):
+         """Create a summary writer logging to log_dir."""
+         self.writer = tf.summary.create_file_writer(log_dir)
+ 
+     def scalar_summary(self, tag, value, step):
+         """Log a scalar variable."""
+         summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
+         self.writer.add_summary(summary, step)
+ 
+     def list_of_scalars_summary(self, tag_value_pairs, step):
+         """Log scalar variables."""
+         # summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value) for tag, value in tag_value_pairs])
+         # self.writer.add_summary(summary, step)
--- a/source_code/utils/parse_config.py 0 → 100644
View file @36c95f7
+++ b/source_code/utils/parse_config.py 0 → 100644
View file @36c95f7
+ 
+ 
+ def parse_model_config(path):
+     """Parses the yolo-v3 layer configuration file and returns module definitions"""
+     file = open(path, 'r')
+     lines = file.read().split('\n')
+     lines = [x for x in lines if x and not x.startswith('#')]
+     lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
+     module_defs = []
+     for line in lines:
+         if line.startswith('['): # This marks the start of a new block
+             module_defs.append({})
+             module_defs[-1]['type'] = line[1:-1].rstrip()
+             if module_defs[-1]['type'] == 'convolutional':
+                 module_defs[-1]['batch_normalize'] = 0
+         else:
+             key, value = line.split("=")
+             value = value.strip()
+             module_defs[-1][key.rstrip()] = value.strip()
+ 
+     return module_defs
+ 
+ def parse_data_config(path):
+     """Parses the data configuration file"""
+     options = dict()
+     options['gpus'] = '0,1,2,3'
+     options['num_workers'] = '10'
+     with open(path, 'r') as fp:
+         lines = fp.readlines()
+     for line in lines:
+         line = line.strip()
+         if line == '' or line.startswith('#'):
+             continue
+         key, value = line.split('=')
+         options[key.strip()] = value.strip()
+     return options
--- a/source_code/utils/utils.py 0 → 100644
View file @36c95f7
+++ b/source_code/utils/utils.py 0 → 100644
View file @36c95f7