Distance Estimation Module

최인훈
Commit 36c95f70f9680bb0faa761b00e914331b1218c08 36c95f70 1 parent 7520fbf2
Showing 25 changed files with 950 additions and 0 deletions
source_code/config/cafe.data
source_code/config/cafe_distance.data
source_code/config/testdata.data
source_code/config/tiny1.cfg
source_code/config/tiny2.cfg
source_code/config/yolov3-tiny.cfg
source_code/models.py
source_code/roipool.py
source_code/utils/__init__.py
source_code/utils/__pycache__/__init__.cpython-36.pyc
source_code/utils/__pycache__/__init__.cpython-38.pyc
source_code/utils/__pycache__/augmentations.cpython-36.pyc
source_code/utils/__pycache__/augmentations.cpython-38.pyc
source_code/utils/__pycache__/datasets.cpython-36.pyc
source_code/utils/__pycache__/datasets.cpython-38.pyc
source_code/utils/__pycache__/logger.cpython-36.pyc
source_code/utils/__pycache__/parse_config.cpython-36.pyc
source_code/utils/__pycache__/parse_config.cpython-38.pyc
source_code/utils/__pycache__/utils.cpython-36.pyc
source_code/utils/__pycache__/utils.cpython-38.pyc
--- a/source_code/config/cafe.data 0 → 100644
View file @36c95f7
+++ b/source_code/config/cafe.data 0 → 100644
View file @36c95f7
+classes=12
+train=data/cafe/train.txt
+valid=data/cafe/valid.txt
+names=data/cafe/classes.names
\ No newline at end of file
--- a/source_code/config/cafe_distance.data 0 → 100644
View file @36c95f7
+++ b/source_code/config/cafe_distance.data 0 → 100644
View file @36c95f7
+classes=5
+train=data/cafe_distance/train.txt
+valid=data/cafe_distance/valid.txt
+names=data/cafe_distance/classes.names
+
+
+
+
--- a/source_code/config/testdata.data 0 → 100644
View file @36c95f7
+++ b/source_code/config/testdata.data 0 → 100644
View file @36c95f7
+classes=5
+train=data/testdata/train.txt
+valid=data/testdata/valid.txt
+names=data/testdata/classes.names
+
+
+
+
--- a/source_code/config/tiny1.cfg 0 → 100644
View file @36c95f7
+++ b/source_code/config/tiny1.cfg 0 → 100644
View file @36c95f7
+[net]
+# Testing
+# batch=1
+# subdivisions=1
+# Training
+batch=8
+subdivisions=2
+width=416
+height=416
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500200
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+# 0
+[convolutional]
+batch_normalize=1
+filters=16
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 1
+[maxpool]
+size=2
+stride=2
+
+# 2
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 3
+[maxpool]
+size=2
+stride=2
+
+# 4
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 5
+[maxpool]
+size=2
+stride=2
+
+# 6
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 7
+[maxpool]
+size=2
+stride=2
+
+# 8
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 9
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 10
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=42
+activation=linear
+
+# 11
+[yolo]
+mask = 0, 1, 2
+anchors = 37,58, 81,82, 135,169
+classes=9
+num=3
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+
+
+
+
--- a/source_code/config/tiny2.cfg 0 → 100644
View file @36c95f7
+++ b/source_code/config/tiny2.cfg 0 → 100644
View file @36c95f7
+[net]
+# Testing
+# batch=1
+# subdivisions=1
+# Training
+batch=8
+subdivisions=2
+width=416
+height=416
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500200
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+# 0
+[convolutional]
+batch_normalize=1
+filters=16
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 1
+[maxpool]
+size=2
+stride=2
+
+# 2
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 3
+[maxpool]
+size=2
+stride=2
+
+# 4
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 5
+[maxpool]
+size=2
+stride=2
+
+# 6
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 7
+[maxpool]
+size=2
+stride=2
+
+# 8
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 9
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=42
+activation=linear
+
+# 10
+[yolo]
+mask = 0, 1, 2
+anchors = 59,119, 81,82, 135,169
+classes=9
+num=3
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+
+
+
+
--- a/source_code/config/yolov3-tiny.cfg 0 → 100644
View file @36c95f7
+++ b/source_code/config/yolov3-tiny.cfg 0 → 100644
View file @36c95f7
+[net]
+# Testing
+# batch=1
+# subdivisions=1
+# Training
+batch=8
+subdivisions=2
+width=416
+height=416
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500200
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+# 0
+[convolutional]
+batch_normalize=1
+filters=16
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 1
+[maxpool]
+size=2
+stride=2
+
+# 2
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 3
+[maxpool]
+size=2
+stride=2
+
+# 4
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 5
+[maxpool]
+size=2
+stride=2
+
+# 6
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 7
+[maxpool]
+size=2
+stride=2
+
+# 8
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 9
+[maxpool]
+size=2
+stride=2
+
+# 10
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 11
+[maxpool]
+size=2
+stride=1
+
+# 12
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+###########
+
+# 13
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+# 14
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 15
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=30
+activation=linear
+
+
+
+# 16
+[yolo]
+mask = 3,4,5
+anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
+classes=5
+num=6
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+# 17
+[route]
+layers = -4 
+
+# 18
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+# 19
+[upsample]
+stride=2
+
+# 20
+[route]
+layers = -1, 8
+
+# 21
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# 22
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=30
+activation=linear
+
+# 23
+[yolo]
+mask = 0,1,2
+anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
+classes=5
+num=6
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
--- a/source_code/models.py 0 → 100644
View file @36c95f7
+++ b/source_code/models.py 0 → 100644
View file @36c95f7
--- a/source_code/roipool.py 0 → 100644
View file @36c95f7
+++ b/source_code/roipool.py 0 → 100644
View file @36c95f7
+from __future__ import division
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.modules import module
+
+from utils.utils import *
+
+
+class ROIPool(nn.Module):
+    def __init__(self, output_size):
+        super(ROIPool, self).__init__()
+        self.maxpool = nn.AdaptiveMaxPool2d(output_size)
+        self.size = output_size
+        self.fc1 = nn.Linear(2304, 1024)
+        self.fc2 = nn.Linear(1024, 512)
+        self.fc3 = nn.Linear(512, 1)
+        self.softplus = nn.Softplus()
+        self.smoothl1 = nn.SmoothL1Loss()
+        self.mse = nn.MSELoss()
+
+
+    def target_detection_iou(self, box1, box2):
+        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
+        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
+
+        # get the corrdinates of the intersection rectangle
+        b1_x1 = b1_x1.type(torch.float64)
+        b1_y1 = b1_y1.type(torch.float64)
+        b1_x2 = b1_x2.type(torch.float64)
+        b1_y2 = b1_y2.type(torch.float64)
+
+        inter_rect_x1 = torch.max(b1_x1, b2_x1)
+        inter_rect_y1 = torch.max(b1_y1, b2_y1)
+        inter_rect_x2 = torch.min(b1_x2, b2_x2)
+        inter_rect_y2 = torch.min(b1_y2, b2_y2)
+        # Intersection area
+        inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
+            inter_rect_y2 - inter_rect_y1 + 1, min=0
+        )
+        # Union Area
+        b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
+        b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
+
+        iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
+
+        return iou
+
+    def similar_bbox(self, detections, targets):
+        rescaled_boxes = rescale_boxes(detections, 416, (480, 640))
+        similar_box = list(range(len(rescaled_boxes)))
+        for i in range(len(rescaled_boxes)):
+            for j in range(len(targets)):
+                target_xyxy = [(targets[j][0]-(targets[j][2]/2))*640, (targets[j][1]-(targets[j][3]/2))*480, (targets[j][0]+(targets[j][2]/2))*640, (targets[j][1]+(targets[j][3]/2))*480]
+                target_xyxy = torch.tensor(target_xyxy)
+                iou = self.target_detection_iou(rescaled_boxes[i][:4], target_xyxy)
+                if iou > 0.01:
+                    similar_box[i] = targets[j][-1]
+                    break
+                else:
+                    similar_box[i] = -1
+        return similar_box
+
+
+    def cal_scale(self, x, detections, targets):
+        targets_distance = targets[:, :4]
+        square_targets = []
+        
+        for target_distance in targets_distance:
+            x1 = (target_distance[0]-(target_distance[2]/2))*416
+            y1 = ((target_distance[1]-(target_distance[3]/2))*480+80)*13/15
+            x2 = (target_distance[0]+(target_distance[2]/2))*416
+            y2 = ((target_distance[1]+(target_distance[3]/2))*480+80)*13/15
+        
+            square_targets.append([x1, y1, x2, y2])
+        square_targets = torch.tensor(square_targets)
+        
+        scale = get_scale(square_targets)
+        output_distance = []
+
+        roi_results = []
+        for i in scale:
+            x1_scale = i[0]
+            y1_scale = i[1]
+            x2_scale = i[2]
+            y2_scale = i[3]
+            
+            output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1]
+        
+            output = self.maxpool(output)
+            
+            output = output.view(1, -1)
+            # print(output)
+            roi_results.append(output)
+        return roi_results
+
+    def cal_scale_evaL(self, x, detections):
+        detections = detections[:, :4]
+        scale = get_scale(detections)
+        output_distance = []
+        roi_results = []
+        for i in scale:
+            x1_scale = i[0]
+            y1_scale = i[1]
+            x2_scale = i[2]
+            y2_scale = i[3]
+
+            output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1]
+            output = self.maxpool(output)
+            output = output.view(1, -1)
+            roi_results.append(output)
+        return roi_results
+
+    def forward(self, x, detections, targets=None):
+        if targets is not None:
+            distances = targets[:, 4]
+            distances = distances * 10
+            # distances = distances * 10
+            # print(f'disatnces = {distances}')
+            # targets_distance = targets[:, :4]
+            # square_targets = []
+            
+            # for target_distance in targets_distance:
+            #     x1 = (target_distance[0]-(target_distance[2]/2))*416
+            #     y1 = ((target_distance[1]-(target_distance[3]/2))*480+80)*13/15
+            #     x2 = (target_distance[0]+(target_distance[2]/2))*416
+            #     y2 = ((target_distance[1]+(target_distance[3]/2))*480+80)*13/15
+            
+            #     square_targets.append([x1, y1, x2, y2])
+            # square_targets = torch.tensor(square_targets)
+            
+            # scale = get_scale(square_targets)
+            # output_distance = []
+
+            # roi_results = []
+            # for i in scale:
+            #     x1_scale = i[0]
+            #     y1_scale = i[1]
+            #     x2_scale = i[2]
+            #     y2_scale = i[3]
+                
+            #     output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1]
+            
+            #     output = self.maxpool(output)
+                
+            #     output = output.view(1, -1).cuda()
+            #     # print(output)
+            #     roi_results.append(output)
+            roi_results = self.cal_scale(x, detections, targets)
+
+            output = torch.cat(roi_results, 0)
+            # print(output.shape)
+            # print(output.shape)
+            output = self.fc1(output)
+            output = self.fc2(output)
+            output = self.fc3(output)
+            output = self.softplus(output)
+            # print(f'output = {output}')
+            #loss = 0
+            # output_distance = torch.tensor(output, requires_grad=True)
+
+
+            '''
+            output = x
+            # output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1]
+            output = self.maxpool(output)
+            output = output.view(1, -1).cuda()
+            # print(output.shape)
+            output = self.fc1(output)
+            output = self.fc2(output)
+            output = self.fc3(output)
+            output = self.softplus(output)
+            '''
+
+            # output_distance = torch.cuda.FloatTensor(output_distance, requires_grad=True)#.to('cpu')
+            
+            #print(f'output_distance = {output_distance}')
+            #print(output_distance.shape)
+            #print(f'distances = {distances}')
+            #print(distances.shape)
+            distances = distances.cuda()
+            # print(f'output = {output}')
+            # print(f'output = {output}')
+            # print(f'distances = {distances}')
+            loss = self.smoothl1(output, distances.float())
+            # print(f'loss = {loss}')
+            
+            # print(f'output_distance = {output_distance}')
+            # print(f'distances = {distances}')
+            # print(f'loss = {loss}')
+            return loss, output
+
+        else:
+
+            '''
+            detections = detections[:, :4]
+            scale = get_scale(detections)
+            output_distance = []
+            for i in scale:
+                x1_scale = i[0]
+                y1_scale = i[1]
+                x2_scale = i[2]
+                y2_scale = i[3]
+
+                output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1]
+                output = self.maxpool(output)
+                output = output.view(1, -1).cuda()
+            '''
+            roi_results = self.cal_scale_evaL(x, detections)
+            output = torch.cat(roi_results, 0)
+                #   print(f'output = {output.shape}')
+            output = self.fc1(output)
+            output = self.fc2(output)
+            output = self.fc3(output)
+            output = self.softplus(output)
+            # print(f'output = {output}')
+            
+
+            return output
+
+
+        '''
+        scale = get_scale(detections)
+
+        
+        output_distance = []
+        for i in scale:
+            x1_scale = i[0]
+            y1_scale = i[1]
+            x2_scale = i[2]
+            y2_scale = i[3]
+
+            output = x[:, :, y1_scale:y2_scale+1, x1_scale:x2_scale+1]
+            # output = x[:, :, x1_scale:x2_scale+1, y1_scale:y2_scale+1]
+            output = self.maxpool(output)
+            output = output.view(1, -1).cuda()
+            output = self.fc1(output)
+            output = self.fc2(output)
+
+            output_distance.append(output)
+        
+        if targets is None:
+            return output_distance, 0
+            
+        else:
+            loss = 0
+            box_similar_distance = self.similar_bbox(detections, targets)
+            for i in range(len(box_similar_distance)):
+                if box_similar_distance[i] == -1:
+                    output_distance[i] = -1
+            
+            
+            output_distance = torch.FloatTensor(output_distance).to('cpu')
+            box_similar_distance = torch.FloatTensor(box_similar_distance).to('cpu')
+
+            
+            # print(f'output_distance = {output_distance}')
+            # print(f'target_distance = {box_similar_distance}')
+            loss = self.smoothl1(output_distance, box_similar_distance)
+        '''
+
+
+
+
+        
+
+
+        
--- a/source_code/utils/__init__.py 0 → 100644
View file @36c95f7
+++ b/source_code/utils/__init__.py 0 → 100644
View file @36c95f7
--- a/source_code/utils/__pycache__/__init__.cpython-36.pyc 0 → 100644
View file @36c95f7
+++ b/source_code/utils/__pycache__/__init__.cpython-36.pyc 0 → 100644
View file @36c95f7
--- a/source_code/utils/__pycache__/__init__.cpython-38.pyc 0 → 100644
View file @36c95f7
+++ b/source_code/utils/__pycache__/__init__.cpython-38.pyc 0 → 100644
View file @36c95f7
--- a/source_code/utils/__pycache__/augmentations.cpython-36.pyc 0 → 100644
View file @36c95f7
+++ b/source_code/utils/__pycache__/augmentations.cpython-36.pyc 0 → 100644
View file @36c95f7
--- a/source_code/utils/__pycache__/augmentations.cpython-38.pyc 0 → 100644
View file @36c95f7
+++ b/source_code/utils/__pycache__/augmentations.cpython-38.pyc 0 → 100644
View file @36c95f7
--- a/source_code/utils/__pycache__/datasets.cpython-36.pyc 0 → 100644
View file @36c95f7
+++ b/source_code/utils/__pycache__/datasets.cpython-36.pyc 0 → 100644
View file @36c95f7
--- a/source_code/utils/__pycache__/datasets.cpython-38.pyc 0 → 100644
View file @36c95f7
+++ b/source_code/utils/__pycache__/datasets.cpython-38.pyc 0 → 100644
View file @36c95f7
--- a/source_code/utils/__pycache__/logger.cpython-36.pyc 0 → 100644
View file @36c95f7
+++ b/source_code/utils/__pycache__/logger.cpython-36.pyc 0 → 100644
View file @36c95f7
--- a/source_code/utils/__pycache__/parse_config.cpython-36.pyc 0 → 100644
View file @36c95f7
+++ b/source_code/utils/__pycache__/parse_config.cpython-36.pyc 0 → 100644
View file @36c95f7
--- a/source_code/utils/__pycache__/parse_config.cpython-38.pyc 0 → 100644
View file @36c95f7
+++ b/source_code/utils/__pycache__/parse_config.cpython-38.pyc 0 → 100644
View file @36c95f7
--- a/source_code/utils/__pycache__/utils.cpython-36.pyc 0 → 100644
View file @36c95f7
+++ b/source_code/utils/__pycache__/utils.cpython-36.pyc 0 → 100644
View file @36c95f7
--- a/source_code/utils/__pycache__/utils.cpython-38.pyc 0 → 100644
View file @36c95f7
+++ b/source_code/utils/__pycache__/utils.cpython-38.pyc 0 → 100644
View file @36c95f7
--- a/source_code/utils/augmentations.py 0 → 100644
View file @36c95f7
+++ b/source_code/utils/augmentations.py 0 → 100644
View file @36c95f7
+import torch
+import torch.nn.functional as F
+import numpy as np
+
+
+def horisontal_flip(images, targets):
+    images = torch.flip(images, [-1])
+    targets[:, 2] = 1 - targets[:, 2]
+    return images, targets
--- a/source_code/utils/datasets.py 0 → 100644
View file @36c95f7
+++ b/source_code/utils/datasets.py 0 → 100644
View file @36c95f7
+import glob
+import random
+import os
+import sys
+import numpy as np
+from PIL import Image
+import torch
+import torch.nn.functional as F
+import time
+
+from utils.augmentations import horisontal_flip
+from torch.utils.data import Dataset
+import torchvision.transforms as transforms
+
+
+def pad_to_square(img, pad_value):
+    c, h, w = img.shape
+    dim_diff = np.abs(h - w)
+    # (upper / left) padding and (lower / right) padding
+    pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
+    # Determine padding
+    pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0)
+    # Add padding
+    img = F.pad(img, pad, "constant", value=pad_value)
+
+    return img, pad
+
+
+def resize(image, size):
+    image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0)
+    return image
+
+
+def random_resize(images, min_size=288, max_size=448):
+    new_size = random.sample(list(range(min_size, max_size + 1, 32)), 1)[0]
+    images = F.interpolate(images, size=new_size, mode="nearest")
+    return images
+
+
+class ImageFolder(Dataset):
+    def __init__(self, folder_path, img_size=416):
+        self.files = sorted(glob.glob("%s/*.*" % folder_path))
+        self.img_size = img_size
+
+    def __getitem__(self, index):
+        img_path = self.files[index % len(self.files)]
+        # Extract image as PyTorch tensor
+        img = transforms.ToTensor()(Image.open(img_path))
+        # Pad to square resolution
+        img, _ = pad_to_square(img, 0)
+        # Resize
+        img = resize(img, self.img_size)
+
+        return img_path, img
+
+    def __len__(self):
+        return len(self.files)
+
+
+class ListDataset(Dataset):
+    def __init__(self, list_path, img_size=416, augment=True, multiscale=True, normalized_labels=True):
+        with open(list_path, "r") as file:
+            self.img_files = file.readlines()
+
+        self.label_files = [
+            path.replace("images", "labels").replace(".png", ".txt").replace(".jpg", ".txt")
+            for path in self.img_files
+        ]
+        self.img_size = img_size
+        self.max_objects = 100
+        self.augment = augment
+        self.multiscale = multiscale
+        self.normalized_labels = normalized_labels
+        self.min_size = self.img_size - 3 * 32
+        self.max_size = self.img_size + 3 * 32
+        self.batch_count = 0
+
+    def __getitem__(self, index):
+
+        # ---------
+        #  Image
+        # ---------
+
+        img_path = self.img_files[index % len(self.img_files)].rstrip()
+        # Extract image as PyTorch tensor
+        img = transforms.ToTensor()(Image.open(img_path, 'r').convert('RGB'))
+
+        # Handle images with less than three channels
+        if len(img.shape) != 3:
+            img = img.unsqueeze(0)
+            img = img.expand((3, img.shape[1:]))
+
+        _, h, w = img.shape
+        h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
+        # Pad to square resolution
+        img, pad = pad_to_square(img, 0)
+        _, padded_h, padded_w = img.shape
+
+        # ---------
+        #  Label
+        # ---------
+
+        label_path = self.label_files[index % len(self.img_files)].rstrip()
+
+        targets = None
+        targets_distance = None
+        if os.path.exists(label_path):
+            if torch.from_numpy(np.loadtxt(label_path)).ndim == 2:
+                boxes = torch.from_numpy(np.loadtxt(label_path)[:,:-1].reshape(-1, 5))
+            else:
+                boxes = torch.from_numpy(np.loadtxt(label_path)[:-1].reshape(-1, 5))
+            # Extract coordinates for unpadded + unscaled image
+            x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
+            y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
+            x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
+            y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)
+            # Adjust for added padding
+            x1 += pad[0]
+            y1 += pad[2]
+            x2 += pad[1]
+            y2 += pad[3]
+            # Returns (x, y, w, h)
+            boxes[:, 1] = ((x1 + x2) / 2) / padded_w
+            boxes[:, 2] = ((y1 + y2) / 2) / padded_h
+            boxes[:, 3] *= w_factor / padded_w
+            boxes[:, 4] *= h_factor / padded_h
+
+            targets = torch.zeros((len(boxes), 6))
+            targets[:, 1:] = boxes
+
+            if torch.from_numpy(np.loadtxt(label_path)).ndim == 2:
+                targets_distance = torch.from_numpy(np.loadtxt(label_path)[:,1:].reshape(-1, 5))
+            else:
+                targets_distance = torch.from_numpy(np.loadtxt(label_path)[1:].reshape(-1, 5))
+            
+        # Apply augmentations
+        # if self.augment:
+        #    if np.random.random() < 0.5:
+        #        img, targets = horisontal_flip(img, targets)
+        
+        return img_path, img, targets, targets_distance
+
+    def collate_fn(self, batch):
+        paths, imgs, targets, targets_distance = list(zip(*batch))
+        # Remove empty placeholder targets
+        targets = [boxes for boxes in targets if boxes is not None]
+        # Add sample index to targets
+        for i, boxes in enumerate(targets):
+            boxes[:, 0] = i
+        targets = torch.cat(targets, 0)
+        # Selects new image size every tenth batch
+        if self.multiscale and self.batch_count % 10 == 0:
+            self.img_size = random.choice(range(self.min_size, self.max_size + 1, 32))
+        # Resize images to input shape
+        imgs = torch.stack([resize(img, self.img_size) for img in imgs])
+        self.batch_count += 1
+        return paths, imgs, targets, targets_distance
+
+    def __len__(self):
+        return len(self.img_files)
--- a/source_code/utils/logger.py 0 → 100644
View file @36c95f7
+++ b/source_code/utils/logger.py 0 → 100644
View file @36c95f7
+import tensorflow as tf
+
+
+class Logger(object):
+    def __init__(self, log_dir):
+        """Create a summary writer logging to log_dir."""
+        self.writer = tf.summary.create_file_writer(log_dir)
+
+    def scalar_summary(self, tag, value, step):
+        """Log a scalar variable."""
+        summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
+        self.writer.add_summary(summary, step)
+
+    def list_of_scalars_summary(self, tag_value_pairs, step):
+        """Log scalar variables."""
+        # summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value) for tag, value in tag_value_pairs])
+        # self.writer.add_summary(summary, step)
--- a/source_code/utils/parse_config.py 0 → 100644
View file @36c95f7
+++ b/source_code/utils/parse_config.py 0 → 100644
View file @36c95f7
+
+
+def parse_model_config(path):
+    """Parses the yolo-v3 layer configuration file and returns module definitions"""
+    file = open(path, 'r')
+    lines = file.read().split('\n')
+    lines = [x for x in lines if x and not x.startswith('#')]
+    lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
+    module_defs = []
+    for line in lines:
+        if line.startswith('['): # This marks the start of a new block
+            module_defs.append({})
+            module_defs[-1]['type'] = line[1:-1].rstrip()
+            if module_defs[-1]['type'] == 'convolutional':
+                module_defs[-1]['batch_normalize'] = 0
+        else:
+            key, value = line.split("=")
+            value = value.strip()
+            module_defs[-1][key.rstrip()] = value.strip()
+
+    return module_defs
+
+def parse_data_config(path):
+    """Parses the data configuration file"""
+    options = dict()
+    options['gpus'] = '0,1,2,3'
+    options['num_workers'] = '10'
+    with open(path, 'r') as fp:
+        lines = fp.readlines()
+    for line in lines:
+        line = line.strip()
+        if line == '' or line.startswith('#'):
+            continue
+        key, value = line.split('=')
+        options[key.strip()] = value.strip()
+    return options
--- a/source_code/utils/utils.py 0 → 100644
View file @36c95f7
+++ b/source_code/utils/utils.py 0 → 100644
View file @36c95f7