김성주

added the modified version of yolov3 python code

1 +from __future__ import division, print_function
2 +
3 +import numpy as np
4 +import tensorflow as tf
5 +import random
6 +import math
7 +
8 +from misc_utils import parse_anchors, read_class_names
9 +from tfrecord_utils import TFRecordIterator
10 +
11 +### Some paths
12 +data_path = '../../data/'
13 +train_file = data_path + 'train.tfrecord' # The path of the training txt file.
14 +val_file = data_path + 'val.tfrecord' # The path of the validation txt file.
15 +restore_path = data_path + 'darknet_weights/yolov3.ckpt' # The path of the weights to restore.
16 +save_dir = '../../checkpoint/' # The directory of the weights to save.
17 +
18 +### we are not using tensorboard logs in this code
19 +
20 +log_dir = data_path + 'logs/' # The directory to store the tensorboard log files.
21 +progress_log_path = data_path + 'progress.log' # The path to record the training progress.
22 +
23 +anchor_path = data_path + 'yolo_anchors.txt' # The path of the anchor txt file.
24 +class_name_path = data_path + 'classes.txt' # The path of the class names.
25 +
26 +### Training releated numbers
27 +batch_size = 6
28 +img_size = [416, 416] # Images will be resized to `img_size` and fed to the network, size format: [width, height]
29 +letterbox_resize = True # Whether to use the letterbox resize, i.e., keep the original aspect ratio in the resized image.
30 +total_epoches = 50
31 +train_evaluation_step = 10 # Evaluate on the training batch after some steps.
32 +val_evaluation_epoch = 2 # Evaluate on the whole validation dataset after some epochs. Set to None to evaluate every epoch.
33 +save_epoch = 5 # Save the model after some epochs.
34 +batch_norm_decay = 0.99 # decay in bn ops
35 +weight_decay = 5e-4 # l2 weight decay
36 +global_step = 0 # used when resuming training
37 +
38 +### tf.data parameters
39 +num_threads = 10 # Number of threads for image processing used in tf.data pipeline.
40 +prefetech_buffer = 5 # Prefetech_buffer used in tf.data pipeline.
41 +
42 +### Learning rate and optimizer
43 +optimizer_name = 'momentum' # Chosen from [sgd, momentum, adam, rmsprop]
44 +save_optimizer = True # Whether to save the optimizer parameters into the checkpoint file.
45 +learning_rate_init = 1e-4
46 +lr_type = 'piecewise' # Chosen from [fixed, exponential, cosine_decay, cosine_decay_restart, piecewise]
47 +lr_decay_epoch = 5 # Epochs after which learning rate decays. Int or float. Used when chosen `exponential` and `cosine_decay_restart` lr_type.
48 +lr_decay_factor = 0.96 # The learning rate decay factor. Used when chosen `exponential` lr_type.
49 +lr_lower_bound = 1e-6 # The minimum learning rate.
50 +# only used in piecewise lr type
51 +pw_boundaries = [30, 50] # epoch based boundaries
52 +pw_values = [learning_rate_init, 3e-5, 1e-5]
53 +
54 +### Load and finetune
55 +# Choose the parts you want to restore the weights. List form.
56 +# restore_include: None, restore_exclude: None => restore the whole model
57 +# restore_include: None, restore_exclude: scope => restore the whole model except `scope`
58 +# restore_include: scope1, restore_exclude: scope2 => if scope1 contains scope2, restore scope1 and not restore scope2 (scope1 - scope2)
59 +# choise 1: only restore the darknet body
60 +# restore_include = ['yolov3/darknet53_body']
61 +# restore_exclude = None
62 +# choise 2: restore all layers except the last 3 conv2d layers in 3 scale
63 +restore_include = None
64 +restore_exclude = ['yolov3/yolov3_head/Conv_14', 'yolov3/yolov3_head/Conv_6', 'yolov3/yolov3_head/Conv_22']
65 +# Choose the parts you want to finetune. List form.
66 +# Set to None to train the whole model.
67 +
68 +update_part = ['yolov3/yolov3_head']
69 +
70 +### other training strategies
71 +multi_scale_train = True # Whether to apply multi-scale training strategy. Image size varies from [320, 320] to [640, 640] by default.
72 +use_label_smooth = True # Whether to use class label smoothing strategy.
73 +use_focal_loss = True # Whether to apply focal loss on the conf loss.
74 +use_mix_up = True # Whether to use mix up data augmentation strategy.
75 +use_warm_up = True # whether to use warm up strategy to prevent from gradient exploding.
76 +warm_up_epoch = 3 # Warm up training epoches. Set to a larger value if gradient explodes.
77 +
78 +### some constants in validation
79 +# nms
80 +nms_threshold = 0.45 # iou threshold in nms operation
81 +score_threshold = 0.01 # threshold of the probability of the classes in nms operation, i.e. score = pred_confs * pred_probs. set lower for higher recall.
82 +nms_topk = 150 # keep at most nms_topk outputs after nms
83 +# mAP eval
84 +eval_threshold = 0.5 # the iou threshold applied in mAP evaluation
85 +use_voc_07_metric = False # whether to use voc 2007 evaluation metric, i.e. the 11-point metric
86 +
87 +### parse some params
88 +anchors = parse_anchors(anchor_path)
89 +classes = read_class_names(class_name_path)
90 +class_num = len(classes)
91 +train_img_cnt = TFRecordIterator(train_file, 'GZIP').count()
92 +val_img_cnt = TFRecordIterator(val_file, 'GZIP').count()
93 +train_batch_num = int(math.ceil(float(train_img_cnt) / batch_size))
94 +
95 +lr_decay_freq = int(train_batch_num * lr_decay_epoch)
96 +pw_boundaries = [float(i) * train_batch_num + global_step for i in pw_boundaries]
This diff is collapsed. Click to expand it.
1 +from __future__ import division, print_function
2 +
3 +import tensorflow as tf
4 +import numpy as np
5 +import argparse
6 +from tqdm import trange
7 +import os
8 +
9 +from data_utils import get_batch_data
10 +from misc_utils import parse_anchors, read_class_names, AverageMeter
11 +from eval_utils import evaluate_on_cpu, evaluate_on_gpu, get_preds_gpu, voc_eval, parse_gt_rec
12 +from nms_utils import gpu_nms
13 +
14 +from model import yolov3
15 +
16 +### ArgumentParser
17 +parser = argparse.ArgumentParser(description="YOLO-V3 eval procedure.")
18 +
19 +# paths
20 +parser.add_argument("--eval_file", type=str, default="./data/my_data/val.txt",
21 + help="The path of the validation or test txt file.")
22 +
23 +parser.add_argument("--restore_path", type=str, default="./data/darknet_weights/yolov3.ckpt",
24 + help="The path of the weights to restore.")
25 +
26 +parser.add_argument("--anchor_path", type=str, default="./data/yolo_anchors.txt",
27 + help="The path of the anchor txt file.")
28 +
29 +parser.add_argument("--class_name_path", type=str, default="./data/coco.names",
30 + help="The path of the class names.")
31 +
32 +# some numbers
33 +parser.add_argument("--img_size", nargs='*', type=int, default=[416, 416],
34 + help="Resize the input image to `img_size`, size format: [width, height]")
35 +
36 +parser.add_argument("--letterbox_resize", type=lambda x: (str(x).lower() == 'true'), default=False,
37 + help="Whether to use the letterbox resize, i.e., keep the original image aspect ratio.")
38 +
39 +parser.add_argument("--num_threads", type=int, default=10,
40 + help="Number of threads for image processing used in tf.data pipeline.")
41 +
42 +parser.add_argument("--prefetech_buffer", type=int, default=5,
43 + help="Prefetech_buffer used in tf.data pipeline.")
44 +
45 +parser.add_argument("--nms_threshold", type=float, default=0.45,
46 + help="IOU threshold in nms operation.")
47 +
48 +parser.add_argument("--score_threshold", type=float, default=0.01,
49 + help="Threshold of the probability of the classes in nms operation.")
50 +
51 +parser.add_argument("--nms_topk", type=int, default=400,
52 + help="Keep at most nms_topk outputs after nms.")
53 +
54 +parser.add_argument("--use_voc_07_metric", type=lambda x: (str(x).lower() == 'true'), default=False,
55 + help="Whether to use the voc 2007 mAP metrics.")
56 +
57 +args = parser.parse_args()
58 +
59 +# args params
60 +args.anchors = parse_anchors(args.anchor_path)
61 +args.classes = read_class_names(args.class_name_path)
62 +args.class_num = len(args.classes)
63 +args.img_cnt = len(open(args.eval_file, 'r').readlines())
64 +
65 +# setting placeholders
66 +is_training = tf.placeholder(dtype=tf.bool, name="phase_train")
67 +handle_flag = tf.placeholder(tf.string, [], name='iterator_handle_flag')
68 +pred_boxes_flag = tf.placeholder(tf.float32, [1, None, None])
69 +pred_scores_flag = tf.placeholder(tf.float32, [1, None, None])
70 +gpu_nms_op = gpu_nms(pred_boxes_flag, pred_scores_flag, args.class_num, args.nms_topk, args.score_threshold, args.nms_threshold)
71 +
72 +### tf.data pipeline
73 +val_dataset = tf.data.TFRecordDataset(filenames=args.eval_file, compression_type='GZIP')
74 +val_dataset = val_dataset.batch(1)
75 +val_dataset = val_dataset.map(
76 + lambda x: tf.py_func(get_batch_data, [x, args.class_num, args.img_size, args.anchors, False, False, False, args.letterbox_resize], [tf.int64, tf.float32, tf.float32, tf.float32, tf.float32]),
77 + num_parallel_calls=args.num_threads
78 +)
79 +val_dataset.prefetch(args.prefetech_buffer)
80 +iterator = val_dataset.make_one_shot_iterator()
81 +
82 +image_ids, image, y_true_13, y_true_26, y_true_52 = iterator.get_next()
83 +image_ids.set_shape([None])
84 +y_true = [y_true_13, y_true_26, y_true_52]
85 +image.set_shape([None, args.img_size[1], args.img_size[0], 3])
86 +for y in y_true:
87 + y.set_shape([None, None, None, None, None])
88 +
89 +### Model definition
90 +yolo_model = yolov3(args.class_num, args.anchors)
91 +with tf.variable_scope('yolov3'):
92 + pred_feature_maps = yolo_model.forward(image, is_training=is_training)
93 +loss = yolo_model.compute_loss(pred_feature_maps, y_true)
94 +y_pred = yolo_model.predict(pred_feature_maps)
95 +
96 +saver_to_restore = tf.train.Saver()
97 +
98 +with tf.Session() as sess:
99 + sess.run([tf.global_variables_initializer()])
100 + if os.path.exists(args.restore_path):
101 + saver_to_restore.restore(sess, args.restore_path)
102 +
103 + print('\nStart evaluation...\n')
104 +
105 + val_loss_total, val_loss_xy, val_loss_wh, val_loss_conf, val_loss_class = \
106 + AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
107 + val_preds = []
108 +
109 + for j in trange(args.img_cnt):
110 + __image_ids, __y_pred, __loss = sess.run([image_ids, y_pred, loss], feed_dict={is_training: False})
111 + pred_content = get_preds_gpu(sess, gpu_nms_op, pred_boxes_flag, pred_scores_flag, __image_ids, __y_pred)
112 +
113 + val_preds.extend(pred_content)
114 + val_loss_total.update(__loss[0])
115 + val_loss_xy.update(__loss[1])
116 + val_loss_wh.update(__loss[2])
117 + val_loss_conf.update(__loss[3])
118 + val_loss_class.update(__loss[4])
119 +
120 + rec_total, prec_total, ap_total = AverageMeter(), AverageMeter(), AverageMeter()
121 + gt_dict = parse_gt_rec(args.eval_file, 'GZIP', args.img_size, args.letterbox_resize)
122 + print('mAP eval:')
123 + for ii in range(args.class_num):
124 + npos, nd, rec, prec, ap = voc_eval(gt_dict, val_preds, ii, iou_thres=0.5, use_07_metric=args.use_voc_07_metric)
125 + rec_total.update(rec, npos)
126 + prec_total.update(prec, nd)
127 + ap_total.update(ap, 1)
128 + print('Class {}: Recall: {:.4f}, Precision: {:.4f}, AP: {:.4f}'.format(ii, rec, prec, ap))
129 +
130 + mAP = ap_total.average
131 + print('final mAP: {:.4f}'.format(mAP))
132 + print("recall: {:.3f}, precision: {:.3f}".format(rec_total.average, prec_total.average))
133 + print("total_loss: {:.3f}, loss_xy: {:.3f}, loss_wh: {:.3f}, loss_conf: {:.3f}, loss_class: {:.3f}".format(
134 + val_loss_total.average, val_loss_xy.average, val_loss_wh.average, val_loss_conf.average, val_loss_class.average
135 + ))
...\ No newline at end of file ...\ No newline at end of file
This diff is collapsed. Click to expand it.
1 +import numpy as np
2 +import tensorflow as tf
3 +import random
4 +
5 +class AverageMeter(object):
6 + def __init__(self):
7 + self.reset()
8 +
9 + def reset(self):
10 + self.val = 0
11 + self.average = 0
12 + self.sum = 0
13 + self.count = 0
14 +
15 + def update(self, val, n=1):
16 + self.val = val
17 + self.sum += val * n
18 + self.count += n
19 + self.average = self.sum / float(self.count)
20 +
21 +
22 +def parse_anchors(anchor_path):
23 + anchors = np.reshape(np.asarray(open(anchor_path, 'r').read().split(','), np.float32), [-1, 2])
24 + return anchors
25 +
26 +
27 +def read_class_names(class_name_path):
28 + names = {}
29 + with open(class_name_path, 'r') as data:
30 + for ID, name in enumerate(data):
31 + names[ID] = name.strip('\n')
32 + return names
33 +
34 +
35 +def shuffle_and_overwrite(file_name):
36 + content = open(file_name, 'r').readlines()
37 + random.shuffle(content)
38 + with open(file_name, 'w') as f:
39 + for line in content:
40 + f.write(line)
41 +
42 +
43 +def update_dict(ori_dict, new_dict):
44 + if not ori_dict:
45 + return new_dict
46 + for key in ori_dict:
47 + ori_dict[key] += new_dict[key]
48 + return ori_dict
49 +
50 +
51 +def list_add(ori_list, new_list):
52 + for i in range(len(ori_list)):
53 + ori_list[i] += new_list[i]
54 + return ori_list
55 +
56 +
57 +def load_weights(var_list, weights_file):
58 + with open(weights_file, "rb") as fp:
59 + np.fromfile(fp, dtype=np.int32, count=5)
60 + weights = np.fromfile(fp, dtype=np.float32)
61 +
62 + ptr = 0
63 + i = 0
64 + assign_ops = []
65 + while i < len(var_list) - 1:
66 + var1 = var_list[i]
67 + var2 = var_list[i + 1]
68 + if 'Conv' in var1.name.split('/')[-2]:
69 + if 'BatchNorm' in var2.name.split('/')[-2]:
70 + gamma, beta, mean, var = var_list[i + 1:i + 5]
71 + batch_norm_vars = [beta, gamma, mean, var]
72 + for var in batch_norm_vars:
73 + shape = var.shape.as_list()
74 + num_params = np.prod(shape)
75 + var_weights = weights[ptr:ptr + num_params].reshape(shape)
76 + ptr += num_params
77 + assign_ops.append(tf.assign(var, var_weights, validate_shape=True))
78 + i += 4
79 + elif 'Conv' in var2.name.split('/')[-2]:
80 + # load biases
81 + bias = var2
82 + bias_shape = bias.shape.as_list()
83 + bias_params = np.prod(bias_shape)
84 + bias_weights = weights[ptr:ptr +
85 + bias_params].reshape(bias_shape)
86 + ptr += bias_params
87 + assign_ops.append(tf.assign(bias, bias_weights, validate_shape=True))
88 + i += 1
89 +
90 + shape = var1.shape.as_list()
91 + num_params = np.prod(shape)
92 +
93 + var_weights = weights[ptr:ptr + num_params].reshape(
94 + (shape[3], shape[2], shape[0], shape[1]))
95 +
96 + var_weights = np.transpose(var_weights, (2, 3, 1, 0))
97 + ptr += num_params
98 + assign_ops.append(
99 + tf.assign(var1, var_weights, validate_shape=True))
100 + i += 1
101 +
102 + return assign_ops
103 +
104 +
105 +def config_learning_rate(args, global_step):
106 + if args.lr_type == 'exponential':
107 + lr_tmp = tf.train.exponential_decay(args.learning_rate_init, global_step, args.lr_decay_freq,
108 + args.lr_decay_factor, staircase=True, name='exponential_learning_rate')
109 + return tf.maximum(lr_tmp, args.lr_lower_bound)
110 + elif args.lr_type == 'cosine_decay':
111 + train_steps = (args.total_epoches - float(args.use_warm_up) * args.warm_up_epoch) * args.train_batch_num
112 + return args.lr_lower_bound + 0.5 * (args.learning_rate_init - args.lr_lower_bound) * \
113 + (1 + tf.cos(global_step / train_steps * np.pi))
114 + elif args.lr_type == 'cosine_decay_restart':
115 + return tf.train.cosine_decay_restarts(args.learning_rate_init, global_step,
116 + args.lr_decay_freq, t_mul=2.0, m_mul=1.0,
117 + name='cosine_decay_learning_rate_restart')
118 + elif args.lr_type == 'fixed':
119 + return tf.convert_to_tensor(args.learning_rate_init, name='fixed_learning_rate')
120 + elif args.lr_type == 'piecewise':
121 + return tf.train.piecewise_constant(global_step, boundaries=args.pw_boundaries, values=args.pw_values,
122 + name='piecewise_learning_rate')
123 + else:
124 + raise ValueError('Unsupported learning rate type!')
125 +
126 +
127 +def config_optimizer(optimizer_name, learning_rate, decay=0.9, momentum=0.9):
128 + if optimizer_name == 'momentum':
129 + return tf.train.MomentumOptimizer(learning_rate, momentum=momentum)
130 + elif optimizer_name == 'rmsprop':
131 + return tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=momentum)
132 + elif optimizer_name == 'adam':
133 + return tf.train.AdamOptimizer(learning_rate)
134 + elif optimizer_name == 'sgd':
135 + return tf.train.GradientDescentOptimizer(learning_rate)
136 + else:
137 + raise ValueError('Unsupported optimizer type!')
...\ No newline at end of file ...\ No newline at end of file
This diff is collapsed. Click to expand it.
1 +from __future__ import division, print_function
2 +
3 +import numpy as np
4 +import tensorflow as tf
5 +
6 +def gpu_nms(boxes, scores, num_classes, max_boxes=50, score_thresh=0.5, nms_thresh=0.5):
7 + boxes_list, label_list, score_list = [], [], []
8 + max_boxes = tf.constant(max_boxes, dtype='int32')
9 +
10 + boxes = tf.reshape(boxes, [-1, 4]) # '-1' means we don't konw the exact number of boxes
11 + score = tf.reshape(scores, [-1, num_classes])
12 +
13 + # Step 1: Create a filtering mask based on "box_class_scores" by using "threshold".
14 + mask = tf.greater_equal(score, tf.constant(score_thresh))
15 + # Step 2: Do non_max_suppression for each class
16 + for i in range(num_classes):
17 + # Step 3: Apply the mask to scores, boxes and pick them out
18 + filter_boxes = tf.boolean_mask(boxes, mask[:,i])
19 + filter_score = tf.boolean_mask(score[:,i], mask[:,i])
20 + nms_indices = tf.image.non_max_suppression(boxes=filter_boxes,
21 + scores=filter_score,
22 + max_output_size=max_boxes,
23 + iou_threshold=nms_thresh, name='nms_indices')
24 + label_list.append(tf.ones_like(tf.gather(filter_score, nms_indices), 'int32')*i)
25 + boxes_list.append(tf.gather(filter_boxes, nms_indices))
26 + score_list.append(tf.gather(filter_score, nms_indices))
27 +
28 + boxes = tf.concat(boxes_list, axis=0)
29 + score = tf.concat(score_list, axis=0)
30 + label = tf.concat(label_list, axis=0)
31 +
32 + return boxes, score, label
33 +
34 +
35 +def py_nms(boxes, scores, max_boxes=50, iou_thresh=0.5):
36 + assert boxes.shape[1] == 4 and len(scores.shape) == 1
37 +
38 + x1 = boxes[:, 0]
39 + y1 = boxes[:, 1]
40 + x2 = boxes[:, 2]
41 + y2 = boxes[:, 3]
42 +
43 + areas = (x2 - x1) * (y2 - y1)
44 + order = scores.argsort()[::-1]
45 +
46 + keep = []
47 + while order.size > 0:
48 + i = order[0]
49 + keep.append(i)
50 + xx1 = np.maximum(x1[i], x1[order[1:]])
51 + yy1 = np.maximum(y1[i], y1[order[1:]])
52 + xx2 = np.minimum(x2[i], x2[order[1:]])
53 + yy2 = np.minimum(y2[i], y2[order[1:]])
54 +
55 + w = np.maximum(0.0, xx2 - xx1 + 1)
56 + h = np.maximum(0.0, yy2 - yy1 + 1)
57 + inter = w * h
58 + ovr = inter / (areas[i] + areas[order[1:]] - inter)
59 +
60 + inds = np.where(ovr <= iou_thresh)[0]
61 + order = order[inds + 1]
62 +
63 + return keep[:max_boxes]
64 +
65 +
66 +def cpu_nms(boxes, scores, num_classes, max_boxes=50, score_thresh=0.5, iou_thresh=0.5):
67 + boxes = boxes.reshape(-1, 4)
68 + scores = scores.reshape(-1, num_classes)
69 + picked_boxes, picked_score, picked_label = [], [], []
70 +
71 + for i in range(num_classes):
72 + indices = np.where(scores[:,i] >= score_thresh)
73 + filter_boxes = boxes[indices]
74 + filter_scores = scores[:,i][indices]
75 + if len(filter_boxes) == 0:
76 + continue
77 +
78 + indices = py_nms(filter_boxes, filter_scores,
79 + max_boxes=max_boxes, iou_thresh=iou_thresh)
80 + picked_boxes.append(filter_boxes[indices])
81 + picked_score.append(filter_scores[indices])
82 + picked_label.append(np.ones(len(indices), dtype='int32')*i)
83 + if len(picked_boxes) == 0:
84 + return None, None, None
85 +
86 + boxes = np.concatenate(picked_boxes, axis=0)
87 + score = np.concatenate(picked_score, axis=0)
88 + label = np.concatenate(picked_label, axis=0)
89 +
90 + return boxes, score, label
...\ No newline at end of file ...\ No newline at end of file
1 +from __future__ import division, print_function
2 +
3 +import cv2
4 +import random
5 +
6 +
7 +def get_color_table(class_num, seed=2):
8 + random.seed(seed)
9 + color_table = {}
10 + for i in range(class_num):
11 + color_table[i] = [random.randint(0, 255) for _ in range(3)]
12 + return color_table
13 +
14 +
15 +def plot_one_box(img, coord, label=None, color=None, line_thickness=None):
16 + tl = line_thickness or int(round(0.002 * max(img.shape[0:2]))) # line thickness
17 + color = color or [random.randint(0, 255) for _ in range(3)]
18 + c1, c2 = (int(coord[0]), int(coord[1])), (int(coord[2]), int(coord[3]))
19 + cv2.rectangle(img, c1, c2, color, thickness=tl)
20 + if label:
21 + tf = max(tl - 1, 1) # font thickness
22 + t_size = cv2.getTextSize(label, 0, fontScale=float(tl) / 3, thickness=tf)[0]
23 + c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
24 + cv2.rectangle(img, c1, c2, color, -1) # filled
25 + cv2.putText(img, label, (c1[0], c1[1] - 2), 0, float(tl) / 3, [0, 0, 0], thickness=tf, lineType=cv2.LINE_AA)
...\ No newline at end of file ...\ No newline at end of file
1 +from __future__ import division, print_function
2 +
3 +import tensorflow as tf
4 +import numpy as np
5 +import argparse
6 +import cv2
7 +
8 +from misc_utils import parse_anchors, read_class_names
9 +from nms_utils import gpu_nms
10 +from plot_utils import get_color_table, plot_one_box
11 +from data_utils import letterbox_resize
12 +
13 +from model import yolov3
14 +
15 +parser = argparse.ArgumentParser(description="YOLO-V3 test single image test procedure.")
16 +parser.add_argument("input_image", type=str,
17 + help="The path of the input image.")
18 +parser.add_argument("--anchor_path", type=str, default="./data/yolo_anchors.txt",
19 + help="The path of the anchor txt file.")
20 +parser.add_argument("--new_size", nargs='*', type=int, default=[416, 416],
21 + help="Resize the input image with `new_size`, size format: [width, height]")
22 +parser.add_argument("--letterbox_resize", type=lambda x: (str(x).lower() == 'true'), default=True,
23 + help="Whether to use the letterbox resize.")
24 +parser.add_argument("--class_name_path", type=str, default="./data/coco.names",
25 + help="The path of the class names.")
26 +parser.add_argument("--restore_path", type=str, default="./data/darknet_weights/yolov3.ckpt",
27 + help="The path of the weights to restore.")
28 +args = parser.parse_args()
29 +
30 +args.anchors = parse_anchors(args.anchor_path)
31 +args.classes = read_class_names(args.class_name_path)
32 +args.num_class = len(args.classes)
33 +
34 +color_table = get_color_table(args.num_class)
35 +
36 +img_ori = cv2.imread(args.input_image)
37 +if args.letterbox_resize:
38 + img, resize_ratio, dw, dh = letterbox_resize(img_ori, args.new_size[0], args.new_size[1])
39 +else:
40 + height_ori, width_ori = img_ori.shape[:2]
41 + img = cv2.resize(img_ori, tuple(args.new_size))
42 +img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
43 +img = np.asarray(img, np.float32)
44 +img = img[np.newaxis, :] / 255.
45 +
46 +with tf.Session() as sess:
47 + input_data = tf.placeholder(tf.float32, [1, args.new_size[1], args.new_size[0], 3], name='input_data')
48 + yolo_model = yolov3(args.num_class, args.anchors)
49 + with tf.variable_scope('yolov3'):
50 + pred_feature_maps = yolo_model.forward(input_data, False)
51 + pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps)
52 +
53 + pred_scores = pred_confs * pred_probs
54 +
55 + boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, args.num_class, max_boxes=200, score_thresh=0.3, nms_thresh=0.45)
56 +
57 + saver = tf.train.Saver()
58 + saver.restore(sess, args.restore_path)
59 +
60 + boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img})
61 +
62 + if args.letterbox_resize:
63 + boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio
64 + boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio
65 + else:
66 + boxes_[:, [0, 2]] *= (width_ori/float(args.new_size[0]))
67 + boxes_[:, [1, 3]] *= (height_ori/float(args.new_size[1]))
68 +
69 + print("box coords:")
70 + print(boxes_)
71 + print('*' * 30)
72 + print("scores:")
73 + print(scores_)
74 + print('*' * 30)
75 + print("labels:")
76 + print(labels_)
77 +
78 + for i in range(len(boxes_)):
79 + x0, y0, x1, y1 = boxes_[i]
80 + plot_one_box(img_ori, [x0, y0, x1, y1], label=args.classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]])
81 + cv2.imshow('Detection result', img_ori)
82 + cv2.imwrite('detection_result.jpg', img_ori)
83 + cv2.waitKey(0)
...\ No newline at end of file ...\ No newline at end of file
1 +import tensorflow as tf
2 +from itertools import tee
3 +
4 +class TFRecordIterator:
5 + def __init__(self, path, compression=None):
6 + self._core = tf.python_io.tf_record_iterator(path, tf.python_io.TFRecordOptions(compression))
7 + self._iterator = iter(self._core)
8 + self._iterator, self._iterator_temp = tee(self._iterator)
9 + self._total_cnt = sum(1 for _ in self._iterator_temp)
10 +
11 + def _read_value(self, feature):
12 + if len(feature.int64_list.value) > 0:
13 + return feature.int64_list.value
14 +
15 + if len(feature.bytes_list.value) > 0:
16 + return feature.bytes_list.value
17 +
18 + if len(feature.float_list.value) > 0:
19 + return feature.float_list.value
20 +
21 + return None
22 +
23 + def _read_features(self, features):
24 + d = dict()
25 + for data in features:
26 + d[data] = self._read_value(features[data])
27 + return d
28 +
29 + def __enter__(self):
30 + return self
31 +
32 + def __exit__(self, exception_type, exception_value, traceback):
33 + pass
34 +
35 + def __iter__(self):
36 + return self
37 +
38 + def __next__(self):
39 + record = next(self._iterator)
40 + example = tf.train.Example()
41 + example.ParseFromString(record)
42 + return self._read_features(example.features.feature)
43 +
44 + def count(self):
45 + return self._total_cnt
46 +
47 +
...\ No newline at end of file ...\ No newline at end of file
1 +from __future__ import division, print_function
2 +
3 +import tensorflow as tf
4 +import numpy as np
5 +import os
6 +from tqdm import trange
7 +
8 +import args
9 +
10 +from misc_utils import shuffle_and_overwrite, config_learning_rate, config_optimizer, AverageMeter
11 +from data_utils import get_batch_data
12 +from eval_utils import evaluate_on_cpu, evaluate_on_gpu, get_preds_gpu, voc_eval, parse_gt_rec
13 +from nms_utils import gpu_nms
14 +
15 +from model import yolov3
16 +
17 +train_dataset = tf.data.TFRecordDataset(filenames=train_file, compression_type='GZIP')
18 +train_dataset = train_dataset.shuffle(train_img_cnt)
19 +train_dataset = train_dataset.batch(batch_size)
20 +train_dataset = train_dataset.map(
21 + lambda x: tf.py_func(get_batch_data,
22 + inp=[x, args.class_num, args.img_size, args.anchors, True, args.multi_scale_train, args.use_mix_up, args.letterbox_resize],
23 + Tout=[tf.int64, tf.float32, tf.float32, tf.float32, tf.float32]),
24 + num_parallel_calls=args.num_threads
25 +)
26 +train_dataset = train_dataset.prefetch(prefetech_buffer)
27 +
28 +val_dataset = tf.data.TFRecordDataset(filenames=val_file, compression_type='GZIP')
29 +val_dataset = val_dataset.batch(1)
30 +val_dataset = val_dataset.map(
31 + lambda x: tf.py_func(get_batch_data,
32 + inp=[x, args.class_num, args.img_size, args.anchors, False, False, False, args.letterbox_resize],
33 + Tout=[tf.int64, tf.float32, tf.float32, tf.float32, tf.float32]),
34 + num_parallel_calls=args.num_threads
35 +)
36 +val_dataset.prefetch(prefetech_buffer)
37 +
38 +iterator = tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes)
39 +train_init_op = iterator.make_initializer(train_dataset)
40 +val_init_op = iterator.make_initializer(val_dataset)
41 +
42 +image_ids, image, y_true_13, y_true_26, y_true_52 = iterator.get_next()
43 +y_true = [y_true_13, y_true_26, y_true_52]
44 +
45 +image_ids.set_shape([None])
46 +image.set_shape([None, None, None, 3])
47 +for y in y_true:
48 + y.set_shape([None, None, None, None, None])
49 +
50 +
51 +### Model definition
52 +yolo_model = yolov3(class_num, anchors, use_label_smooth, use_focal_loss, batch_norm_decay, weight_decay, use_static_shape=False)
53 +
54 +with tf.variable_scope('yolov3'):
55 + pred_feature_maps = yolo_model.forward(image, is_training=is_training)
56 +
57 +loss = yolo_model.compute_loss(pred_feature_maps, y_true)
58 +y_pred = yolo_model.predict(pred_feature_maps)
59 +
60 +l2_loss = tf.losses.get_regularization_loss()
61 +
62 +saver_to_restore = tf.train.Saver(var_list=tf.contrib.framework.get_variables_to_restore(include=restore_include, exclude=restore_exclude))
63 +update_vars = tf.contrib.framework.get_variables_to_restore(include=update_part)
64 +
65 +
66 +global_step = tf.Variable(float(global_step), trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
67 +if use_warm_up:
68 + learning_rate = tf.cond(tf.less(global_step, train_batch_num * warm_up_epoch),
69 + lambda: learning_rate_init * global_step / (train_batch_num * warm_up_epoch),
70 + lambda: config_learning_rate(global_step - args.train_batch_num * args.warm_up_epoch))
71 +else:
72 + learning_rate = config_learning_rate(global_step)
73 +
74 +optimizer = config_optimizer(args.optimizer_name, learning_rate)
75 +
76 +update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
77 +
78 +with tf.control_dependencies(update_ops):
79 + gvs = optimizer.compute_gradients(loss[0] + l2_loss, var_list=update_vars)
80 + clip_grad_var = [gv if gv[0] is None else [
81 + tf.clip_by_norm(gv[0], 100.), gv[1]] for gv in gvs]
82 + train_op = optimizer.apply_gradients(clip_grad_var, global_step=global_step)
83 +
84 +if args.save_optimizer:
85 + print('Saving optimizer parameters: ON')
86 + saver_to_save = tf.train.Saver()
87 + saver_best = tf.train.Saver()
88 +else:
89 + print('Saving optimizer parameters: OFF')
90 +
91 +
92 +##### Start training
93 +
94 +with tf.Session() as sess:
95 + sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
96 +
97 + if os.path.exists(args.restore_path):
98 + saver_to_restore.restore(sess, args.restore_path)
99 +
100 + print('\nStart training...\n')
101 +
102 + best_mAP = -np.Inf
103 +
104 + for epoch in range(args.total_epoches):
105 + sess.run(train_init_op)
106 + loss_total, loss_xy, loss_wh, loss_conf, loss_class = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
107 +
108 + ### train part
109 + for i in trange(args.train_batch_num):
110 + _, __y_pred, __y_true, __loss, __global_step, __lr = sess.run(
111 + [train_op, y_pred, y_true, loss, global_step, learning_rate],
112 + feed_dict={is_training: True})
113 +
114 + loss_total.update(__loss[0], len(__y_pred[0]))
115 + loss_xy.update(__loss[1], len(__y_pred[0]))
116 + loss_wh.update(__loss[2], len(__y_pred[0]))
117 + loss_conf.update(__loss[3], len(__y_pred[0]))
118 + loss_class.update(__loss[4], len(__y_pred[0]))
119 +
120 + if __global_step % args.train_evaluation_step == 0 and __global_step > 0:
121 + recall, precision = evaluate_on_gpu(sess, gpu_nms_op, pred_boxes_flag, pred_scores_flag, __y_pred, __y_true, args.class_num, args.nms_threshold)
122 +
123 + info = "Epoch: {}, global_step: {} | loss: total: {:.2f}, xy: {:.2f}, wh: {:.2f}, conf: {:.2f}, class: {:.2f} | ".format(
124 + epoch, int(__global_step), loss_total.average, loss_xy.average, loss_wh.average, loss_conf.average, loss_class.average)
125 + info += 'Last batch: rec: {:.3f}, prec: {:.3f} | lr: {:.5g}'.format(recall, precision, __lr)
126 + print(info)
127 +
128 + if np.isnan(loss_total.average):
129 + print('****' * 10)
130 + raise ArithmeticError('Gradient exploded!')
131 +
132 + ## train end (saving parameters)
133 + if args.save_optimizer and epoch % args.save_epoch == 0 and epoch > 0:
134 + if loss_total.average <= 2.:
135 + saver_to_save.save(sess, args.save_dir + 'model-epoch_{}_step_{}_loss_{:.4f}_lr_{:.5g}'.format(epoch, int(__global_step), loss_total.average, __lr))
136 +
137 + ### validation part
138 + if epoch % args.val_evaluation_epoch == 0 and epoch >= args.warm_up_epoch:
139 + sess.run(val_init_op)
140 +
141 + val_loss_total, val_loss_xy, val_loss_wh, val_loss_conf, val_loss_class = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
142 +
143 + val_preds = []
144 +
145 + for j in trange(args.val_img_cnt):
146 + __image_ids, __y_pred, __loss = sess.run([image_ids, y_pred, loss],
147 + feed_dict={is_training: False})
148 + pred_content = get_preds_gpu(sess, gpu_nms_op, pred_boxes_flag, pred_scores_flag, __image_ids, __y_pred)
149 + val_preds.extend(pred_content)
150 + val_loss_total.update(__loss[0])
151 + val_loss_xy.update(__loss[1])
152 + val_loss_wh.update(__loss[2])
153 + val_loss_conf.update(__loss[3])
154 + val_loss_class.update(__loss[4])
155 +
156 + # calc mAP
157 + rec_total, prec_total, ap_total = AverageMeter(), AverageMeter(), AverageMeter()
158 + gt_dict = parse_gt_rec(args.val_file, args.img_size, args.letterbox_resize)
159 +
160 + info = '======> Epoch: {}, global_step: {}, lr: {:.6g} <======\n'.format(epoch, __global_step, __lr)
161 +
162 + for ii in range(args.class_num):
163 + npos, nd, rec, prec, ap = voc_eval(gt_dict, val_preds, ii, iou_thres=args.eval_threshold, use_07_metric=args.use_voc_07_metric)
164 + info += 'EVAL: Class {}: Recall: {:.4f}, Precision: {:.4f}, AP: {:.4f}\n'.format(ii, rec, prec, ap)
165 + rec_total.update(rec, npos)
166 + prec_total.update(prec, nd)
167 + ap_total.update(ap, 1)
168 +
169 + mAP = ap_total.average
170 + info += 'EVAL: Recall: {:.4f}, Precison: {:.4f}, mAP: {:.4f}\n'.format(rec_total.average, prec_total.average, mAP)
171 + info += 'EVAL: loss: total: {:.2f}, xy: {:.2f}, wh: {:.2f}, conf: {:.2f}, class: {:.2f}\n'.format(
172 + val_loss_total.average, val_loss_xy.average, val_loss_wh.average, val_loss_conf.average, val_loss_class.average)
173 + print(info)
174 +
175 + if args.save_optimizer and mAP > best_mAP:
176 + best_mAP = mAP
177 + saver_best.save(sess, args.save_dir + 'best_model_Epoch_{}_step_{}_mAP_{:.4f}_loss_{:.4f}_lr_{:.7g}'.format(
178 + epoch, int(__global_step), best_mAP, val_loss_total.average, __lr))
...\ No newline at end of file ...\ No newline at end of file
1 +from __future__ import division, print_function
2 +
3 +import tensorflow as tf
4 +import numpy as np
5 +import argparse
6 +import cv2
7 +import time
8 +
9 +from misc_utils import parse_anchors, read_class_names
10 +from nms_utils import gpu_nms
11 +from plot_utils import get_color_table, plot_one_box
12 +from data_utils import letterbox_resize
13 +
14 +from model import yolov3
15 +
16 +parser = argparse.ArgumentParser(description="YOLO-V3 video test procedure.")
17 +parser.add_argument("input_video", type=str,
18 + help="The path of the input video.")
19 +parser.add_argument("--anchor_path", type=str, default="./data/yolo_anchors.txt",
20 + help="The path of the anchor txt file.")
21 +parser.add_argument("--new_size", nargs='*', type=int, default=[416, 416],
22 + help="Resize the input image with `new_size`, size format: [width, height]")
23 +parser.add_argument("--letterbox_resize", type=lambda x: (str(x).lower() == 'true'), default=True,
24 + help="Whether to use the letterbox resize.")
25 +parser.add_argument("--class_name_path", type=str, default="./data/classes.txt",
26 + help="The path of the class names.")
27 +parser.add_argument("--restore_path", type=str, default="./data/darknet_weights/yolov3.ckpt",
28 + help="The path of the weights to restore.")
29 +parser.add_argument("--save_video", type=lambda x: (str(x).lower() == 'true'), default=False,
30 + help="Whether to save the video detection results.")
31 +args = parser.parse_args()
32 +
33 +args.anchors = parse_anchors(args.anchor_path)
34 +args.classes = read_class_names(args.class_name_path)
35 +args.num_class = len(args.classes)
36 +
37 +color_table = get_color_table(args.num_class)
38 +
39 +vid = cv2.VideoCapture(args.input_video)
40 +video_frame_cnt = int(vid.get(7))
41 +video_width = int(vid.get(3))
42 +video_height = int(vid.get(4))
43 +video_fps = int(vid.get(5))
44 +
45 +if args.save_video:
46 + fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
47 + videoWriter = cv2.VideoWriter('video_result.mp4', fourcc, video_fps, (video_width, video_height))
48 +
49 +with tf.Session() as sess:
50 + input_data = tf.placeholder(tf.float32, [1, args.new_size[1], args.new_size[0], 3], name='input_data')
51 + yolo_model = yolov3(args.num_class, args.anchors)
52 + with tf.variable_scope('yolov3'):
53 + pred_feature_maps = yolo_model.forward(input_data, False)
54 + pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps)
55 +
56 + pred_scores = pred_confs * pred_probs
57 +
58 + boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, args.num_class, max_boxes=200, score_thresh=0.3, nms_thresh=0.45)
59 +
60 + saver = tf.train.Saver()
61 + saver.restore(sess, args.restore_path)
62 +
63 + for i in range(video_frame_cnt):
64 + ret, img_ori = vid.read()
65 + if args.letterbox_resize:
66 + img, resize_ratio, dw, dh = letterbox_resize(img_ori, args.new_size[0], args.new_size[1])
67 + else:
68 + height_ori, width_ori = img_ori.shape[:2]
69 + img = cv2.resize(img_ori, tuple(args.new_size))
70 + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
71 + img = np.asarray(img, np.float32)
72 + img = img[np.newaxis, :] / 255.
73 +
74 + start_time = time.time()
75 + boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img})
76 + end_time = time.time()
77 +
78 + # rescale the coordinates to the original image
79 + if args.letterbox_resize:
80 + boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio
81 + boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio
82 + else:
83 + boxes_[:, [0, 2]] *= (width_ori/float(args.new_size[0]))
84 + boxes_[:, [1, 3]] *= (height_ori/float(args.new_size[1]))
85 +
86 +
87 + for i in range(len(boxes_)):
88 + x0, y0, x1, y1 = boxes_[i]
89 + plot_one_box(img_ori, [x0, y0, x1, y1], label=args.classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]])
90 + cv2.putText(img_ori, '{:.2f}ms'.format((end_time - start_time) * 1000), (40, 40), 0,
91 + fontScale=1, color=(0, 255, 0), thickness=2)
92 + cv2.imshow('image', img_ori)
93 + if args.save_video:
94 + videoWriter.write(img_ori)
95 + if cv2.waitKey(1) & 0xFF == ord('q'):
96 + break
97 +
98 + vid.release()
99 + if args.save_video:
100 + videoWriter.release()
...\ No newline at end of file ...\ No newline at end of file