added the modified version of yolov3 python code

김성주
Commit 31684832905a17921aecdf952bb3c9f6dd31c457 31684832 1 parent a8e4d7c6
Showing 12 changed files with 891 additions and 0 deletions
code/yolov3/args.py
code/yolov3/data_utils.py
code/yolov3/eval.py
code/yolov3/eval_utils.py
code/yolov3/misc_utils.py
code/yolov3/model.py
code/yolov3/nms_utils.py
code/yolov3/plot_utils.py
code/yolov3/test_single_image.py
code/yolov3/tfrecord_utils.py
code/yolov3/train.py
code/yolov3/video_test.py
--- a/code/yolov3/args.py 0 → 100644
View file @3168483
+++ b/code/yolov3/args.py 0 → 100644
View file @3168483
+from __future__ import division, print_function
+
+import numpy as np
+import tensorflow as tf
+import random
+import math
+
+from misc_utils import parse_anchors, read_class_names
+from tfrecord_utils import TFRecordIterator
+
+### Some paths
+data_path = '../../data/'
+train_file = data_path + 'train.tfrecord'  # The path of the training txt file.
+val_file = data_path + 'val.tfrecord'  # The path of the validation txt file.
+restore_path = data_path + 'darknet_weights/yolov3.ckpt'  # The path of the weights to restore.
+save_dir = '../../checkpoint/'  # The directory of the weights to save.
+
+### we are not using tensorboard logs in this code
+
+log_dir = data_path + 'logs/'  # The directory to store the tensorboard log files.
+progress_log_path = data_path + 'progress.log'  # The path to record the training progress.
+
+anchor_path = data_path + 'yolo_anchors.txt'  # The path of the anchor txt file.
+class_name_path = data_path + 'classes.txt'  # The path of the class names.
+
+### Training releated numbers
+batch_size = 6
+img_size = [416, 416]  # Images will be resized to `img_size` and fed to the network, size format: [width, height]
+letterbox_resize = True  # Whether to use the letterbox resize, i.e., keep the original aspect ratio in the resized image.
+total_epoches = 50
+train_evaluation_step = 10  # Evaluate on the training batch after some steps.
+val_evaluation_epoch = 2  # Evaluate on the whole validation dataset after some epochs. Set to None to evaluate every epoch.
+save_epoch = 5  # Save the model after some epochs.
+batch_norm_decay = 0.99  # decay in bn ops
+weight_decay = 5e-4  # l2 weight decay
+global_step = 0  # used when resuming training
+
+### tf.data parameters
+num_threads = 10  # Number of threads for image processing used in tf.data pipeline.
+prefetech_buffer = 5  # Prefetech_buffer used in tf.data pipeline.
+
+### Learning rate and optimizer
+optimizer_name = 'momentum'  # Chosen from [sgd, momentum, adam, rmsprop]
+save_optimizer = True  # Whether to save the optimizer parameters into the checkpoint file.
+learning_rate_init = 1e-4
+lr_type = 'piecewise'  # Chosen from [fixed, exponential, cosine_decay, cosine_decay_restart, piecewise]
+lr_decay_epoch = 5  # Epochs after which learning rate decays. Int or float. Used when chosen `exponential` and `cosine_decay_restart` lr_type.
+lr_decay_factor = 0.96  # The learning rate decay factor. Used when chosen `exponential` lr_type.
+lr_lower_bound = 1e-6  # The minimum learning rate.
+# only used in piecewise lr type
+pw_boundaries = [30, 50]  # epoch based boundaries
+pw_values = [learning_rate_init, 3e-5, 1e-5]
+
+### Load and finetune
+# Choose the parts you want to restore the weights. List form.
+# restore_include: None, restore_exclude: None  => restore the whole model
+# restore_include: None, restore_exclude: scope  => restore the whole model except `scope`
+# restore_include: scope1, restore_exclude: scope2  => if scope1 contains scope2, restore scope1 and not restore scope2 (scope1 - scope2)
+# choise 1: only restore the darknet body
+# restore_include = ['yolov3/darknet53_body']
+# restore_exclude = None
+# choise 2: restore all layers except the last 3 conv2d layers in 3 scale
+restore_include = None
+restore_exclude = ['yolov3/yolov3_head/Conv_14', 'yolov3/yolov3_head/Conv_6', 'yolov3/yolov3_head/Conv_22']
+# Choose the parts you want to finetune. List form.
+# Set to None to train the whole model.
+
+update_part = ['yolov3/yolov3_head']
+
+### other training strategies
+multi_scale_train = True  # Whether to apply multi-scale training strategy. Image size varies from [320, 320] to [640, 640] by default.
+use_label_smooth = True # Whether to use class label smoothing strategy.
+use_focal_loss = True  # Whether to apply focal loss on the conf loss.
+use_mix_up = True  # Whether to use mix up data augmentation strategy. 
+use_warm_up = True  # whether to use warm up strategy to prevent from gradient exploding.
+warm_up_epoch = 3  # Warm up training epoches. Set to a larger value if gradient explodes.
+
+### some constants in validation
+# nms
+nms_threshold = 0.45  # iou threshold in nms operation
+score_threshold = 0.01  # threshold of the probability of the classes in nms operation, i.e. score = pred_confs * pred_probs. set lower for higher recall.
+nms_topk = 150  # keep at most nms_topk outputs after nms
+# mAP eval
+eval_threshold = 0.5  # the iou threshold applied in mAP evaluation
+use_voc_07_metric = False  # whether to use voc 2007 evaluation metric, i.e. the 11-point metric
+
+### parse some params
+anchors = parse_anchors(anchor_path)
+classes = read_class_names(class_name_path)
+class_num = len(classes)
+train_img_cnt = TFRecordIterator(train_file, 'GZIP').count()
+val_img_cnt = TFRecordIterator(val_file, 'GZIP').count()
+train_batch_num = int(math.ceil(float(train_img_cnt) / batch_size))
+
+lr_decay_freq = int(train_batch_num * lr_decay_epoch)
+pw_boundaries = [float(i) * train_batch_num + global_step for i in pw_boundaries]
--- a/code/yolov3/data_utils.py 0 → 100644
View file @3168483
+++ b/code/yolov3/data_utils.py 0 → 100644
View file @3168483
--- a/code/yolov3/eval.py 0 → 100644
View file @3168483
+++ b/code/yolov3/eval.py 0 → 100644
View file @3168483
+from __future__ import division, print_function
+
+import tensorflow as tf
+import numpy as np
+import argparse
+from tqdm import trange
+import os
+
+from data_utils import get_batch_data
+from misc_utils import parse_anchors, read_class_names, AverageMeter
+from eval_utils import evaluate_on_cpu, evaluate_on_gpu, get_preds_gpu, voc_eval, parse_gt_rec
+from nms_utils import gpu_nms
+
+from model import yolov3
+
+### ArgumentParser
+parser = argparse.ArgumentParser(description="YOLO-V3 eval procedure.")
+
+# paths
+parser.add_argument("--eval_file", type=str, default="./data/my_data/val.txt",
+                    help="The path of the validation or test txt file.")
+
+parser.add_argument("--restore_path", type=str, default="./data/darknet_weights/yolov3.ckpt",
+                    help="The path of the weights to restore.")
+
+parser.add_argument("--anchor_path", type=str, default="./data/yolo_anchors.txt",
+                    help="The path of the anchor txt file.")
+
+parser.add_argument("--class_name_path", type=str, default="./data/coco.names",
+                    help="The path of the class names.")
+
+# some numbers
+parser.add_argument("--img_size", nargs='*', type=int, default=[416, 416],
+                    help="Resize the input image to `img_size`, size format: [width, height]")
+
+parser.add_argument("--letterbox_resize", type=lambda x: (str(x).lower() == 'true'), default=False,
+                    help="Whether to use the letterbox resize, i.e., keep the original image aspect ratio.")
+
+parser.add_argument("--num_threads", type=int, default=10,
+                    help="Number of threads for image processing used in tf.data pipeline.")
+
+parser.add_argument("--prefetech_buffer", type=int, default=5,
+                    help="Prefetech_buffer used in tf.data pipeline.")
+
+parser.add_argument("--nms_threshold", type=float, default=0.45,
+                    help="IOU threshold in nms operation.")
+
+parser.add_argument("--score_threshold", type=float, default=0.01,
+                    help="Threshold of the probability of the classes in nms operation.")
+
+parser.add_argument("--nms_topk", type=int, default=400,
+                    help="Keep at most nms_topk outputs after nms.")
+
+parser.add_argument("--use_voc_07_metric", type=lambda x: (str(x).lower() == 'true'), default=False,
+                    help="Whether to use the voc 2007 mAP metrics.")
+
+args = parser.parse_args()
+
+# args params
+args.anchors = parse_anchors(args.anchor_path)
+args.classes = read_class_names(args.class_name_path)
+args.class_num = len(args.classes)
+args.img_cnt = len(open(args.eval_file, 'r').readlines())
+
+# setting placeholders
+is_training = tf.placeholder(dtype=tf.bool, name="phase_train")
+handle_flag = tf.placeholder(tf.string, [], name='iterator_handle_flag')
+pred_boxes_flag = tf.placeholder(tf.float32, [1, None, None])
+pred_scores_flag = tf.placeholder(tf.float32, [1, None, None])
+gpu_nms_op = gpu_nms(pred_boxes_flag, pred_scores_flag, args.class_num, args.nms_topk, args.score_threshold, args.nms_threshold)
+
+### tf.data pipeline
+val_dataset = tf.data.TFRecordDataset(filenames=args.eval_file, compression_type='GZIP')
+val_dataset = val_dataset.batch(1)
+val_dataset = val_dataset.map(
+    lambda x: tf.py_func(get_batch_data, [x, args.class_num, args.img_size, args.anchors, False, False, False, args.letterbox_resize], [tf.int64, tf.float32, tf.float32, tf.float32, tf.float32]),
+    num_parallel_calls=args.num_threads
+)
+val_dataset.prefetch(args.prefetech_buffer)
+iterator = val_dataset.make_one_shot_iterator()
+
+image_ids, image, y_true_13, y_true_26, y_true_52 = iterator.get_next()
+image_ids.set_shape([None])
+y_true = [y_true_13, y_true_26, y_true_52]
+image.set_shape([None, args.img_size[1], args.img_size[0], 3])
+for y in y_true:
+    y.set_shape([None, None, None, None, None])
+
+### Model definition
+yolo_model = yolov3(args.class_num, args.anchors)
+with tf.variable_scope('yolov3'):
+    pred_feature_maps = yolo_model.forward(image, is_training=is_training)
+loss = yolo_model.compute_loss(pred_feature_maps, y_true)
+y_pred = yolo_model.predict(pred_feature_maps)
+
+saver_to_restore = tf.train.Saver()
+
+with tf.Session() as sess:
+    sess.run([tf.global_variables_initializer()])
+    if os.path.exists(args.restore_path):
+        saver_to_restore.restore(sess, args.restore_path)
+
+    print('\nStart evaluation...\n')
+
+    val_loss_total, val_loss_xy, val_loss_wh, val_loss_conf, val_loss_class = \
+        AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
+    val_preds = []
+
+    for j in trange(args.img_cnt):
+        __image_ids, __y_pred, __loss = sess.run([image_ids, y_pred, loss], feed_dict={is_training: False})
+        pred_content = get_preds_gpu(sess, gpu_nms_op, pred_boxes_flag, pred_scores_flag, __image_ids, __y_pred)
+
+        val_preds.extend(pred_content)
+        val_loss_total.update(__loss[0])
+        val_loss_xy.update(__loss[1])
+        val_loss_wh.update(__loss[2])
+        val_loss_conf.update(__loss[3])
+        val_loss_class.update(__loss[4])
+
+    rec_total, prec_total, ap_total = AverageMeter(), AverageMeter(), AverageMeter()
+    gt_dict = parse_gt_rec(args.eval_file, 'GZIP', args.img_size, args.letterbox_resize)
+    print('mAP eval:')
+    for ii in range(args.class_num):
+        npos, nd, rec, prec, ap = voc_eval(gt_dict, val_preds, ii, iou_thres=0.5, use_07_metric=args.use_voc_07_metric)
+        rec_total.update(rec, npos)
+        prec_total.update(prec, nd)
+        ap_total.update(ap, 1)
+        print('Class {}: Recall: {:.4f}, Precision: {:.4f}, AP: {:.4f}'.format(ii, rec, prec, ap))
+
+    mAP = ap_total.average
+    print('final mAP: {:.4f}'.format(mAP))
+    print("recall: {:.3f}, precision: {:.3f}".format(rec_total.average, prec_total.average))
+    print("total_loss: {:.3f}, loss_xy: {:.3f}, loss_wh: {:.3f}, loss_conf: {:.3f}, loss_class: {:.3f}".format(
+        val_loss_total.average, val_loss_xy.average, val_loss_wh.average, val_loss_conf.average, val_loss_class.average
+    ))
\ No newline at end of file
--- a/code/yolov3/eval_utils.py 0 → 100644
View file @3168483
+++ b/code/yolov3/eval_utils.py 0 → 100644
View file @3168483
--- a/code/yolov3/misc_utils.py 0 → 100644
View file @3168483
+++ b/code/yolov3/misc_utils.py 0 → 100644
View file @3168483
+import numpy as np
+import tensorflow as tf
+import random
+
+class AverageMeter(object):
+    def __init__(self):
+        self.reset()
+
+    def reset(self):
+        self.val = 0
+        self.average = 0
+        self.sum = 0
+        self.count = 0
+
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.average = self.sum / float(self.count)
+
+
+def parse_anchors(anchor_path):
+    anchors = np.reshape(np.asarray(open(anchor_path, 'r').read().split(','), np.float32), [-1, 2])
+    return anchors
+
+
+def read_class_names(class_name_path):
+    names = {}
+    with open(class_name_path, 'r') as data:
+        for ID, name in enumerate(data):
+            names[ID] = name.strip('\n')
+    return names
+
+
+def shuffle_and_overwrite(file_name):
+    content = open(file_name, 'r').readlines()
+    random.shuffle(content)
+    with open(file_name, 'w') as f:
+        for line in content:
+            f.write(line)
+
+
+def update_dict(ori_dict, new_dict):
+    if not ori_dict:
+        return new_dict
+    for key in ori_dict:
+        ori_dict[key] += new_dict[key]
+    return ori_dict
+
+
+def list_add(ori_list, new_list):
+    for i in range(len(ori_list)):
+        ori_list[i] += new_list[i]
+    return ori_list
+
+
+def load_weights(var_list, weights_file):
+    with open(weights_file, "rb") as fp:
+        np.fromfile(fp, dtype=np.int32, count=5)
+        weights = np.fromfile(fp, dtype=np.float32)
+
+    ptr = 0
+    i = 0
+    assign_ops = []
+    while i < len(var_list) - 1:
+        var1 = var_list[i]
+        var2 = var_list[i + 1]
+        if 'Conv' in var1.name.split('/')[-2]:
+            if 'BatchNorm' in var2.name.split('/')[-2]:
+                gamma, beta, mean, var = var_list[i + 1:i + 5]
+                batch_norm_vars = [beta, gamma, mean, var]
+                for var in batch_norm_vars:
+                    shape = var.shape.as_list()
+                    num_params = np.prod(shape)
+                    var_weights = weights[ptr:ptr + num_params].reshape(shape)
+                    ptr += num_params
+                    assign_ops.append(tf.assign(var, var_weights, validate_shape=True))
+                i += 4
+            elif 'Conv' in var2.name.split('/')[-2]:
+                # load biases
+                bias = var2
+                bias_shape = bias.shape.as_list()
+                bias_params = np.prod(bias_shape)
+                bias_weights = weights[ptr:ptr +
+                                       bias_params].reshape(bias_shape)
+                ptr += bias_params
+                assign_ops.append(tf.assign(bias, bias_weights, validate_shape=True))
+                i += 1
+
+            shape = var1.shape.as_list()
+            num_params = np.prod(shape)
+
+            var_weights = weights[ptr:ptr + num_params].reshape(
+                (shape[3], shape[2], shape[0], shape[1]))
+
+            var_weights = np.transpose(var_weights, (2, 3, 1, 0))
+            ptr += num_params
+            assign_ops.append(
+                tf.assign(var1, var_weights, validate_shape=True))
+            i += 1
+
+    return assign_ops
+
+
+def config_learning_rate(args, global_step):
+    if args.lr_type == 'exponential':
+        lr_tmp = tf.train.exponential_decay(args.learning_rate_init, global_step, args.lr_decay_freq,
+                                            args.lr_decay_factor, staircase=True, name='exponential_learning_rate')
+        return tf.maximum(lr_tmp, args.lr_lower_bound)
+    elif args.lr_type == 'cosine_decay':
+        train_steps = (args.total_epoches - float(args.use_warm_up) * args.warm_up_epoch) * args.train_batch_num
+        return args.lr_lower_bound + 0.5 * (args.learning_rate_init - args.lr_lower_bound) * \
+            (1 + tf.cos(global_step / train_steps * np.pi))
+    elif args.lr_type == 'cosine_decay_restart':
+        return tf.train.cosine_decay_restarts(args.learning_rate_init, global_step, 
+                                              args.lr_decay_freq, t_mul=2.0, m_mul=1.0, 
+                                              name='cosine_decay_learning_rate_restart')
+    elif args.lr_type == 'fixed':
+        return tf.convert_to_tensor(args.learning_rate_init, name='fixed_learning_rate')
+    elif args.lr_type == 'piecewise':
+        return tf.train.piecewise_constant(global_step, boundaries=args.pw_boundaries, values=args.pw_values,
+                                           name='piecewise_learning_rate')
+    else:
+        raise ValueError('Unsupported learning rate type!')
+
+
+def config_optimizer(optimizer_name, learning_rate, decay=0.9, momentum=0.9):
+    if optimizer_name == 'momentum':
+        return tf.train.MomentumOptimizer(learning_rate, momentum=momentum)
+    elif optimizer_name == 'rmsprop':
+        return tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=momentum)
+    elif optimizer_name == 'adam':
+        return tf.train.AdamOptimizer(learning_rate)
+    elif optimizer_name == 'sgd':
+        return tf.train.GradientDescentOptimizer(learning_rate)
+    else:
+        raise ValueError('Unsupported optimizer type!')
\ No newline at end of file
--- a/code/yolov3/model.py 0 → 100644
View file @3168483
+++ b/code/yolov3/model.py 0 → 100644
View file @3168483
--- a/code/yolov3/nms_utils.py 0 → 100644
View file @3168483
+++ b/code/yolov3/nms_utils.py 0 → 100644
View file @3168483
+from __future__ import division, print_function
+
+import numpy as np
+import tensorflow as tf
+
+def gpu_nms(boxes, scores, num_classes, max_boxes=50, score_thresh=0.5, nms_thresh=0.5):
+    boxes_list, label_list, score_list = [], [], []
+    max_boxes = tf.constant(max_boxes, dtype='int32')
+
+    boxes = tf.reshape(boxes, [-1, 4]) # '-1' means we don't konw the exact number of boxes
+    score = tf.reshape(scores, [-1, num_classes])
+
+    # Step 1: Create a filtering mask based on "box_class_scores" by using "threshold".
+    mask = tf.greater_equal(score, tf.constant(score_thresh))
+    # Step 2: Do non_max_suppression for each class
+    for i in range(num_classes):
+        # Step 3: Apply the mask to scores, boxes and pick them out
+        filter_boxes = tf.boolean_mask(boxes, mask[:,i])
+        filter_score = tf.boolean_mask(score[:,i], mask[:,i])
+        nms_indices = tf.image.non_max_suppression(boxes=filter_boxes,
+                                                   scores=filter_score,
+                                                   max_output_size=max_boxes,
+                                                   iou_threshold=nms_thresh, name='nms_indices')
+        label_list.append(tf.ones_like(tf.gather(filter_score, nms_indices), 'int32')*i)
+        boxes_list.append(tf.gather(filter_boxes, nms_indices))
+        score_list.append(tf.gather(filter_score, nms_indices))
+
+    boxes = tf.concat(boxes_list, axis=0)
+    score = tf.concat(score_list, axis=0)
+    label = tf.concat(label_list, axis=0)
+
+    return boxes, score, label
+
+
+def py_nms(boxes, scores, max_boxes=50, iou_thresh=0.5):
+    assert boxes.shape[1] == 4 and len(scores.shape) == 1
+
+    x1 = boxes[:, 0]
+    y1 = boxes[:, 1]
+    x2 = boxes[:, 2]
+    y2 = boxes[:, 3]
+
+    areas = (x2 - x1) * (y2 - y1)
+    order = scores.argsort()[::-1]
+
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        xx1 = np.maximum(x1[i], x1[order[1:]])
+        yy1 = np.maximum(y1[i], y1[order[1:]])
+        xx2 = np.minimum(x2[i], x2[order[1:]])
+        yy2 = np.minimum(y2[i], y2[order[1:]])
+
+        w = np.maximum(0.0, xx2 - xx1 + 1)
+        h = np.maximum(0.0, yy2 - yy1 + 1)
+        inter = w * h
+        ovr = inter / (areas[i] + areas[order[1:]] - inter)
+
+        inds = np.where(ovr <= iou_thresh)[0]
+        order = order[inds + 1]
+
+    return keep[:max_boxes]
+
+
+def cpu_nms(boxes, scores, num_classes, max_boxes=50, score_thresh=0.5, iou_thresh=0.5):
+    boxes = boxes.reshape(-1, 4)
+    scores = scores.reshape(-1, num_classes)
+    picked_boxes, picked_score, picked_label = [], [], []
+
+    for i in range(num_classes):
+        indices = np.where(scores[:,i] >= score_thresh)
+        filter_boxes = boxes[indices]
+        filter_scores = scores[:,i][indices]
+        if len(filter_boxes) == 0: 
+            continue
+
+        indices = py_nms(filter_boxes, filter_scores,
+                         max_boxes=max_boxes, iou_thresh=iou_thresh)
+        picked_boxes.append(filter_boxes[indices])
+        picked_score.append(filter_scores[indices])
+        picked_label.append(np.ones(len(indices), dtype='int32')*i)
+    if len(picked_boxes) == 0: 
+        return None, None, None
+
+    boxes = np.concatenate(picked_boxes, axis=0)
+    score = np.concatenate(picked_score, axis=0)
+    label = np.concatenate(picked_label, axis=0)
+
+    return boxes, score, label
\ No newline at end of file
--- a/code/yolov3/plot_utils.py 0 → 100644
View file @3168483
+++ b/code/yolov3/plot_utils.py 0 → 100644
View file @3168483
+from __future__ import division, print_function
+
+import cv2
+import random
+
+
+def get_color_table(class_num, seed=2):
+    random.seed(seed)
+    color_table = {}
+    for i in range(class_num):
+        color_table[i] = [random.randint(0, 255) for _ in range(3)]
+    return color_table
+
+
+def plot_one_box(img, coord, label=None, color=None, line_thickness=None):
+    tl = line_thickness or int(round(0.002 * max(img.shape[0:2])))  # line thickness
+    color = color or [random.randint(0, 255) for _ in range(3)]
+    c1, c2 = (int(coord[0]), int(coord[1])), (int(coord[2]), int(coord[3]))
+    cv2.rectangle(img, c1, c2, color, thickness=tl)
+    if label:
+        tf = max(tl - 1, 1)  # font thickness
+        t_size = cv2.getTextSize(label, 0, fontScale=float(tl) / 3, thickness=tf)[0]
+        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
+        cv2.rectangle(img, c1, c2, color, -1)  # filled
+        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, float(tl) / 3, [0, 0, 0], thickness=tf, lineType=cv2.LINE_AA)
\ No newline at end of file
--- a/code/yolov3/test_single_image.py 0 → 100644
View file @3168483
+++ b/code/yolov3/test_single_image.py 0 → 100644
View file @3168483
+from __future__ import division, print_function
+
+import tensorflow as tf
+import numpy as np
+import argparse
+import cv2
+
+from misc_utils import parse_anchors, read_class_names
+from nms_utils import gpu_nms
+from plot_utils import get_color_table, plot_one_box
+from data_utils import letterbox_resize
+
+from model import yolov3
+
+parser = argparse.ArgumentParser(description="YOLO-V3 test single image test procedure.")
+parser.add_argument("input_image", type=str,
+                    help="The path of the input image.")
+parser.add_argument("--anchor_path", type=str, default="./data/yolo_anchors.txt",
+                    help="The path of the anchor txt file.")
+parser.add_argument("--new_size", nargs='*', type=int, default=[416, 416],
+                    help="Resize the input image with `new_size`, size format: [width, height]")
+parser.add_argument("--letterbox_resize", type=lambda x: (str(x).lower() == 'true'), default=True,
+                    help="Whether to use the letterbox resize.")
+parser.add_argument("--class_name_path", type=str, default="./data/coco.names",
+                    help="The path of the class names.")
+parser.add_argument("--restore_path", type=str, default="./data/darknet_weights/yolov3.ckpt",
+                    help="The path of the weights to restore.")
+args = parser.parse_args()
+
+args.anchors = parse_anchors(args.anchor_path)
+args.classes = read_class_names(args.class_name_path)
+args.num_class = len(args.classes)
+
+color_table = get_color_table(args.num_class)
+
+img_ori = cv2.imread(args.input_image)
+if args.letterbox_resize:
+    img, resize_ratio, dw, dh = letterbox_resize(img_ori, args.new_size[0], args.new_size[1])
+else:
+    height_ori, width_ori = img_ori.shape[:2]
+    img = cv2.resize(img_ori, tuple(args.new_size))
+img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+img = np.asarray(img, np.float32)
+img = img[np.newaxis, :] / 255.
+
+with tf.Session() as sess:
+    input_data = tf.placeholder(tf.float32, [1, args.new_size[1], args.new_size[0], 3], name='input_data')
+    yolo_model = yolov3(args.num_class, args.anchors)
+    with tf.variable_scope('yolov3'):
+        pred_feature_maps = yolo_model.forward(input_data, False)
+    pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps)
+
+    pred_scores = pred_confs * pred_probs
+
+    boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, args.num_class, max_boxes=200, score_thresh=0.3, nms_thresh=0.45)
+
+    saver = tf.train.Saver()
+    saver.restore(sess, args.restore_path)
+
+    boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img})
+
+    if args.letterbox_resize:
+        boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio
+        boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio
+    else:
+        boxes_[:, [0, 2]] *= (width_ori/float(args.new_size[0]))
+        boxes_[:, [1, 3]] *= (height_ori/float(args.new_size[1]))
+
+    print("box coords:")
+    print(boxes_)
+    print('*' * 30)
+    print("scores:")
+    print(scores_)
+    print('*' * 30)
+    print("labels:")
+    print(labels_)
+
+    for i in range(len(boxes_)):
+        x0, y0, x1, y1 = boxes_[i]
+        plot_one_box(img_ori, [x0, y0, x1, y1], label=args.classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]])
+    cv2.imshow('Detection result', img_ori)
+    cv2.imwrite('detection_result.jpg', img_ori)
+    cv2.waitKey(0)
\ No newline at end of file
--- a/code/yolov3/tfrecord_utils.py 0 → 100644
View file @3168483
+++ b/code/yolov3/tfrecord_utils.py 0 → 100644
View file @3168483
+import tensorflow as tf
+from itertools import tee
+
+class TFRecordIterator:
+    def __init__(self, path, compression=None):
+        self._core = tf.python_io.tf_record_iterator(path, tf.python_io.TFRecordOptions(compression))
+        self._iterator = iter(self._core)
+        self._iterator, self._iterator_temp = tee(self._iterator)
+        self._total_cnt = sum(1 for _ in self._iterator_temp)
+
+    def _read_value(self, feature):
+        if len(feature.int64_list.value) > 0:
+            return feature.int64_list.value
+
+        if len(feature.bytes_list.value) > 0:
+            return feature.bytes_list.value
+
+        if len(feature.float_list.value) > 0:
+            return feature.float_list.value
+
+        return None
+
+    def _read_features(self, features):
+        d = dict()
+        for data in features:
+            d[data] = self._read_value(features[data])
+        return d
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exception_type, exception_value, traceback):
+        pass
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        record = next(self._iterator)
+        example = tf.train.Example()
+        example.ParseFromString(record)
+        return self._read_features(example.features.feature)
+
+    def count(self):
+        return self._total_cnt
+
+    
\ No newline at end of file
--- a/code/yolov3/train.py 0 → 100644
View file @3168483
+++ b/code/yolov3/train.py 0 → 100644
View file @3168483
+from __future__ import division, print_function
+
+import tensorflow as tf
+import numpy as np
+import os
+from tqdm import trange
+
+import args
+
+from misc_utils import shuffle_and_overwrite, config_learning_rate, config_optimizer, AverageMeter
+from data_utils import get_batch_data
+from eval_utils import evaluate_on_cpu, evaluate_on_gpu, get_preds_gpu, voc_eval, parse_gt_rec
+from nms_utils import gpu_nms
+
+from model import yolov3
+
+train_dataset = tf.data.TFRecordDataset(filenames=train_file, compression_type='GZIP')
+train_dataset = train_dataset.shuffle(train_img_cnt)
+train_dataset = train_dataset.batch(batch_size)
+train_dataset = train_dataset.map(
+    lambda x: tf.py_func(get_batch_data,
+                         inp=[x, args.class_num, args.img_size, args.anchors, True, args.multi_scale_train, args.use_mix_up, args.letterbox_resize],
+                         Tout=[tf.int64, tf.float32, tf.float32, tf.float32, tf.float32]),
+    num_parallel_calls=args.num_threads
+)
+train_dataset = train_dataset.prefetch(prefetech_buffer)
+
+val_dataset = tf.data.TFRecordDataset(filenames=val_file, compression_type='GZIP')
+val_dataset = val_dataset.batch(1)
+val_dataset = val_dataset.map(
+    lambda x: tf.py_func(get_batch_data,
+                         inp=[x, args.class_num, args.img_size, args.anchors, False, False, False, args.letterbox_resize],
+                         Tout=[tf.int64, tf.float32, tf.float32, tf.float32, tf.float32]),
+    num_parallel_calls=args.num_threads
+)
+val_dataset.prefetch(prefetech_buffer)
+
+iterator = tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes)
+train_init_op = iterator.make_initializer(train_dataset)
+val_init_op = iterator.make_initializer(val_dataset)
+
+image_ids, image, y_true_13, y_true_26, y_true_52 = iterator.get_next()
+y_true = [y_true_13, y_true_26, y_true_52]
+
+image_ids.set_shape([None])
+image.set_shape([None, None, None, 3])
+for y in y_true:
+    y.set_shape([None, None, None, None, None])
+
+
+### Model definition
+yolo_model = yolov3(class_num, anchors, use_label_smooth, use_focal_loss, batch_norm_decay, weight_decay, use_static_shape=False)
+
+with tf.variable_scope('yolov3'):
+    pred_feature_maps = yolo_model.forward(image, is_training=is_training)
+
+loss = yolo_model.compute_loss(pred_feature_maps, y_true)
+y_pred = yolo_model.predict(pred_feature_maps)
+
+l2_loss = tf.losses.get_regularization_loss()
+
+saver_to_restore = tf.train.Saver(var_list=tf.contrib.framework.get_variables_to_restore(include=restore_include, exclude=restore_exclude))
+update_vars = tf.contrib.framework.get_variables_to_restore(include=update_part)
+
+
+global_step = tf.Variable(float(global_step), trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
+if use_warm_up:
+    learning_rate = tf.cond(tf.less(global_step, train_batch_num * warm_up_epoch), 
+                            lambda: learning_rate_init * global_step / (train_batch_num * warm_up_epoch),
+                            lambda: config_learning_rate(global_step - args.train_batch_num * args.warm_up_epoch))
+else:
+    learning_rate = config_learning_rate(global_step)
+
+optimizer = config_optimizer(args.optimizer_name, learning_rate)
+
+update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
+
+with tf.control_dependencies(update_ops):
+    gvs = optimizer.compute_gradients(loss[0] + l2_loss, var_list=update_vars)
+    clip_grad_var = [gv if gv[0] is None else [
+          tf.clip_by_norm(gv[0], 100.), gv[1]] for gv in gvs]
+    train_op = optimizer.apply_gradients(clip_grad_var, global_step=global_step)
+
+if args.save_optimizer:
+    print('Saving optimizer parameters: ON')
+    saver_to_save = tf.train.Saver()
+    saver_best = tf.train.Saver()
+else:
+    print('Saving optimizer parameters: OFF')
+
+
+##### Start training
+
+with tf.Session() as sess:
+    sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
+
+    if os.path.exists(args.restore_path):
+        saver_to_restore.restore(sess, args.restore_path)
+
+    print('\nStart training...\n')
+
+    best_mAP = -np.Inf
+
+    for epoch in range(args.total_epoches):
+        sess.run(train_init_op)
+        loss_total, loss_xy, loss_wh, loss_conf, loss_class = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
+
+        ### train part
+        for i in trange(args.train_batch_num):
+            _, __y_pred, __y_true, __loss, __global_step, __lr = sess.run(
+                [train_op, y_pred, y_true, loss, global_step, learning_rate],
+                feed_dict={is_training: True})
+
+            loss_total.update(__loss[0], len(__y_pred[0]))
+            loss_xy.update(__loss[1], len(__y_pred[0]))
+            loss_wh.update(__loss[2], len(__y_pred[0]))
+            loss_conf.update(__loss[3], len(__y_pred[0]))
+            loss_class.update(__loss[4], len(__y_pred[0]))
+
+            if __global_step % args.train_evaluation_step == 0 and __global_step > 0:
+                recall, precision = evaluate_on_gpu(sess, gpu_nms_op, pred_boxes_flag, pred_scores_flag, __y_pred, __y_true, args.class_num, args.nms_threshold)
+
+                info = "Epoch: {}, global_step: {} | loss: total: {:.2f}, xy: {:.2f}, wh: {:.2f}, conf: {:.2f}, class: {:.2f} | ".format(
+                        epoch, int(__global_step), loss_total.average, loss_xy.average, loss_wh.average, loss_conf.average, loss_class.average)
+                info += 'Last batch: rec: {:.3f}, prec: {:.3f} | lr: {:.5g}'.format(recall, precision, __lr)
+                print(info)
+				
+                if np.isnan(loss_total.average):
+                    print('****' * 10)
+                    raise ArithmeticError('Gradient exploded!')
+
+        ## train end (saving parameters)
+        if args.save_optimizer and epoch % args.save_epoch == 0 and epoch > 0:
+            if loss_total.average <= 2.:
+                saver_to_save.save(sess, args.save_dir + 'model-epoch_{}_step_{}_loss_{:.4f}_lr_{:.5g}'.format(epoch, int(__global_step), loss_total.average, __lr))
+
+        ### validation part
+        if epoch % args.val_evaluation_epoch == 0 and epoch >= args.warm_up_epoch:
+            sess.run(val_init_op)
+
+            val_loss_total, val_loss_xy, val_loss_wh, val_loss_conf, val_loss_class = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
+
+            val_preds = []
+
+            for j in trange(args.val_img_cnt):
+                __image_ids, __y_pred, __loss = sess.run([image_ids, y_pred, loss],
+                                                         feed_dict={is_training: False})
+                pred_content = get_preds_gpu(sess, gpu_nms_op, pred_boxes_flag, pred_scores_flag, __image_ids, __y_pred)
+                val_preds.extend(pred_content)
+                val_loss_total.update(__loss[0])
+                val_loss_xy.update(__loss[1])
+                val_loss_wh.update(__loss[2])
+                val_loss_conf.update(__loss[3])
+                val_loss_class.update(__loss[4])
+
+            # calc mAP
+            rec_total, prec_total, ap_total = AverageMeter(), AverageMeter(), AverageMeter()
+            gt_dict = parse_gt_rec(args.val_file, args.img_size, args.letterbox_resize)
+
+            info = '======> Epoch: {}, global_step: {}, lr: {:.6g} <======\n'.format(epoch, __global_step, __lr)
+
+            for ii in range(args.class_num):
+                npos, nd, rec, prec, ap = voc_eval(gt_dict, val_preds, ii, iou_thres=args.eval_threshold, use_07_metric=args.use_voc_07_metric)
+                info += 'EVAL: Class {}: Recall: {:.4f}, Precision: {:.4f}, AP: {:.4f}\n'.format(ii, rec, prec, ap)
+                rec_total.update(rec, npos)
+                prec_total.update(prec, nd)
+                ap_total.update(ap, 1)
+
+            mAP = ap_total.average
+            info += 'EVAL: Recall: {:.4f}, Precison: {:.4f}, mAP: {:.4f}\n'.format(rec_total.average, prec_total.average, mAP)
+            info += 'EVAL: loss: total: {:.2f}, xy: {:.2f}, wh: {:.2f}, conf: {:.2f}, class: {:.2f}\n'.format(
+                val_loss_total.average, val_loss_xy.average, val_loss_wh.average, val_loss_conf.average, val_loss_class.average)
+            print(info)
+
+            if args.save_optimizer and mAP > best_mAP:
+                best_mAP = mAP
+                saver_best.save(sess, args.save_dir + 'best_model_Epoch_{}_step_{}_mAP_{:.4f}_loss_{:.4f}_lr_{:.7g}'.format(
+                                   epoch, int(__global_step), best_mAP, val_loss_total.average, __lr))
\ No newline at end of file
--- a/code/yolov3/video_test.py 0 → 100644
View file @3168483
+++ b/code/yolov3/video_test.py 0 → 100644
View file @3168483
+from __future__ import division, print_function
+
+import tensorflow as tf
+import numpy as np
+import argparse
+import cv2
+import time
+
+from misc_utils import parse_anchors, read_class_names
+from nms_utils import gpu_nms
+from plot_utils import get_color_table, plot_one_box
+from data_utils import letterbox_resize
+
+from model import yolov3
+
+parser = argparse.ArgumentParser(description="YOLO-V3 video test procedure.")
+parser.add_argument("input_video", type=str,
+                    help="The path of the input video.")
+parser.add_argument("--anchor_path", type=str, default="./data/yolo_anchors.txt",
+                    help="The path of the anchor txt file.")
+parser.add_argument("--new_size", nargs='*', type=int, default=[416, 416],
+                    help="Resize the input image with `new_size`, size format: [width, height]")
+parser.add_argument("--letterbox_resize", type=lambda x: (str(x).lower() == 'true'), default=True,
+                    help="Whether to use the letterbox resize.")
+parser.add_argument("--class_name_path", type=str, default="./data/classes.txt",
+                    help="The path of the class names.")
+parser.add_argument("--restore_path", type=str, default="./data/darknet_weights/yolov3.ckpt",
+                    help="The path of the weights to restore.")
+parser.add_argument("--save_video", type=lambda x: (str(x).lower() == 'true'), default=False,
+                    help="Whether to save the video detection results.")
+args = parser.parse_args()
+
+args.anchors = parse_anchors(args.anchor_path)
+args.classes = read_class_names(args.class_name_path)
+args.num_class = len(args.classes)
+
+color_table = get_color_table(args.num_class)
+
+vid = cv2.VideoCapture(args.input_video)
+video_frame_cnt = int(vid.get(7))
+video_width = int(vid.get(3))
+video_height = int(vid.get(4))
+video_fps = int(vid.get(5))
+
+if args.save_video:
+    fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
+    videoWriter = cv2.VideoWriter('video_result.mp4', fourcc, video_fps, (video_width, video_height))
+
+with tf.Session() as sess:
+    input_data = tf.placeholder(tf.float32, [1, args.new_size[1], args.new_size[0], 3], name='input_data')
+    yolo_model = yolov3(args.num_class, args.anchors)
+    with tf.variable_scope('yolov3'):
+        pred_feature_maps = yolo_model.forward(input_data, False)
+    pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps)
+
+    pred_scores = pred_confs * pred_probs
+
+    boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, args.num_class, max_boxes=200, score_thresh=0.3, nms_thresh=0.45)
+
+    saver = tf.train.Saver()
+    saver.restore(sess, args.restore_path)
+
+    for i in range(video_frame_cnt):
+        ret, img_ori = vid.read()
+        if args.letterbox_resize:
+            img, resize_ratio, dw, dh = letterbox_resize(img_ori, args.new_size[0], args.new_size[1])
+        else:
+            height_ori, width_ori = img_ori.shape[:2]
+            img = cv2.resize(img_ori, tuple(args.new_size))
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        img = np.asarray(img, np.float32)
+        img = img[np.newaxis, :] / 255.
+
+        start_time = time.time()
+        boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img})
+        end_time = time.time()
+
+        # rescale the coordinates to the original image
+        if args.letterbox_resize:
+            boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio
+            boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio
+        else:
+            boxes_[:, [0, 2]] *= (width_ori/float(args.new_size[0]))
+            boxes_[:, [1, 3]] *= (height_ori/float(args.new_size[1]))
+
+
+        for i in range(len(boxes_)):
+            x0, y0, x1, y1 = boxes_[i]
+            plot_one_box(img_ori, [x0, y0, x1, y1], label=args.classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]])
+        cv2.putText(img_ori, '{:.2f}ms'.format((end_time - start_time) * 1000), (40, 40), 0,
+                    fontScale=1, color=(0, 255, 0), thickness=2)
+        cv2.imshow('image', img_ori)
+        if args.save_video:
+            videoWriter.write(img_ori)
+        if cv2.waitKey(1) & 0xFF == ord('q'):
+            break
+
+    vid.release()
+    if args.save_video:
+        videoWriter.release()
\ No newline at end of file