added the modified version of yolov3 python code

김성주
Commit 31684832905a17921aecdf952bb3c9f6dd31c457 31684832 1 parent a8e4d7c6
Showing 12 changed files with 1995 additions and 0 deletions
code/yolov3/args.py
code/yolov3/data_utils.py
code/yolov3/eval.py
code/yolov3/eval_utils.py
code/yolov3/misc_utils.py
code/yolov3/model.py
code/yolov3/nms_utils.py
code/yolov3/plot_utils.py
code/yolov3/test_single_image.py
code/yolov3/tfrecord_utils.py
code/yolov3/train.py
code/yolov3/video_test.py
--- a/code/yolov3/args.py 0 → 100644
View file @3168483
+++ b/code/yolov3/args.py 0 → 100644
View file @3168483
+ from __future__ import division, print_function
+ 
+ import numpy as np
+ import tensorflow as tf
+ import random
+ import math
+ 
+ from misc_utils import parse_anchors, read_class_names
+ from tfrecord_utils import TFRecordIterator
+ 
+ ### Some paths
+ data_path = '../../data/'
+ train_file = data_path + 'train.tfrecord'  # The path of the training txt file.
+ val_file = data_path + 'val.tfrecord'  # The path of the validation txt file.
+ restore_path = data_path + 'darknet_weights/yolov3.ckpt'  # The path of the weights to restore.
+ save_dir = '../../checkpoint/'  # The directory of the weights to save.
+ 
+ ### we are not using tensorboard logs in this code
+ 
+ log_dir = data_path + 'logs/'  # The directory to store the tensorboard log files.
+ progress_log_path = data_path + 'progress.log'  # The path to record the training progress.
+ 
+ anchor_path = data_path + 'yolo_anchors.txt'  # The path of the anchor txt file.
+ class_name_path = data_path + 'classes.txt'  # The path of the class names.
+ 
+ ### Training releated numbers
+ batch_size = 6
+ img_size = [416, 416]  # Images will be resized to `img_size` and fed to the network, size format: [width, height]
+ letterbox_resize = True  # Whether to use the letterbox resize, i.e., keep the original aspect ratio in the resized image.
+ total_epoches = 50
+ train_evaluation_step = 10  # Evaluate on the training batch after some steps.
+ val_evaluation_epoch = 2  # Evaluate on the whole validation dataset after some epochs. Set to None to evaluate every epoch.
+ save_epoch = 5  # Save the model after some epochs.
+ batch_norm_decay = 0.99  # decay in bn ops
+ weight_decay = 5e-4  # l2 weight decay
+ global_step = 0  # used when resuming training
+ 
+ ### tf.data parameters
+ num_threads = 10  # Number of threads for image processing used in tf.data pipeline.
+ prefetech_buffer = 5  # Prefetech_buffer used in tf.data pipeline.
+ 
+ ### Learning rate and optimizer
+ optimizer_name = 'momentum'  # Chosen from [sgd, momentum, adam, rmsprop]
+ save_optimizer = True  # Whether to save the optimizer parameters into the checkpoint file.
+ learning_rate_init = 1e-4
+ lr_type = 'piecewise'  # Chosen from [fixed, exponential, cosine_decay, cosine_decay_restart, piecewise]
+ lr_decay_epoch = 5  # Epochs after which learning rate decays. Int or float. Used when chosen `exponential` and `cosine_decay_restart` lr_type.
+ lr_decay_factor = 0.96  # The learning rate decay factor. Used when chosen `exponential` lr_type.
+ lr_lower_bound = 1e-6  # The minimum learning rate.
+ # only used in piecewise lr type
+ pw_boundaries = [30, 50]  # epoch based boundaries
+ pw_values = [learning_rate_init, 3e-5, 1e-5]
+ 
+ ### Load and finetune
+ # Choose the parts you want to restore the weights. List form.
+ # restore_include: None, restore_exclude: None  => restore the whole model
+ # restore_include: None, restore_exclude: scope  => restore the whole model except `scope`
+ # restore_include: scope1, restore_exclude: scope2  => if scope1 contains scope2, restore scope1 and not restore scope2 (scope1 - scope2)
+ # choise 1: only restore the darknet body
+ # restore_include = ['yolov3/darknet53_body']
+ # restore_exclude = None
+ # choise 2: restore all layers except the last 3 conv2d layers in 3 scale
+ restore_include = None
+ restore_exclude = ['yolov3/yolov3_head/Conv_14', 'yolov3/yolov3_head/Conv_6', 'yolov3/yolov3_head/Conv_22']
+ # Choose the parts you want to finetune. List form.
+ # Set to None to train the whole model.
+ 
+ update_part = ['yolov3/yolov3_head']
+ 
+ ### other training strategies
+ multi_scale_train = True  # Whether to apply multi-scale training strategy. Image size varies from [320, 320] to [640, 640] by default.
+ use_label_smooth = True # Whether to use class label smoothing strategy.
+ use_focal_loss = True  # Whether to apply focal loss on the conf loss.
+ use_mix_up = True  # Whether to use mix up data augmentation strategy. 
+ use_warm_up = True  # whether to use warm up strategy to prevent from gradient exploding.
+ warm_up_epoch = 3  # Warm up training epoches. Set to a larger value if gradient explodes.
+ 
+ ### some constants in validation
+ # nms
+ nms_threshold = 0.45  # iou threshold in nms operation
+ score_threshold = 0.01  # threshold of the probability of the classes in nms operation, i.e. score = pred_confs * pred_probs. set lower for higher recall.
+ nms_topk = 150  # keep at most nms_topk outputs after nms
+ # mAP eval
+ eval_threshold = 0.5  # the iou threshold applied in mAP evaluation
+ use_voc_07_metric = False  # whether to use voc 2007 evaluation metric, i.e. the 11-point metric
+ 
+ ### parse some params
+ anchors = parse_anchors(anchor_path)
+ classes = read_class_names(class_name_path)
+ class_num = len(classes)
+ train_img_cnt = TFRecordIterator(train_file, 'GZIP').count()
+ val_img_cnt = TFRecordIterator(val_file, 'GZIP').count()
+ train_batch_num = int(math.ceil(float(train_img_cnt) / batch_size))
+ 
+ lr_decay_freq = int(train_batch_num * lr_decay_epoch)
+ pw_boundaries = [float(i) * train_batch_num + global_step for i in pw_boundaries]
--- a/code/yolov3/data_utils.py 0 → 100644
View file @3168483
+++ b/code/yolov3/data_utils.py 0 → 100644
View file @3168483
+ from __future__ import division, print_function
+ 
+ import tensorflow as tf
+ import numpy as np
+ import cv2
+ import sys
+ import random
+ 
+ PY_VERSION = sys.version_info[0]
+ iter_cnt = 0
+ 
+ FEATURE_DESCRIPTION = {
+     'index': tf.FixedLenFeature([], tf.int64),
+     'image': tf.FixedLenFeature([], tf.string),
+     'width': tf.FixedLenFeature([], tf.int64),
+     'height': tf.FixedLenFeature([], tf.int64),
+     'boxes': tf.VarLenFeature(tf.int64)
+ }
+ 
+ def parse_tfrecord(data):
+     # tfrecord parser for TFRecordDataset (raw data)
+     features = tf.parse_single_example(data, FEATURE_DESCRIPTION)
+     index = int(features['index'])
+     encoded_image = np.frombuffer(features['image'], dtype = np.uint8)
+     width = int(features['width'])
+     height = int(features['height'])
+     boxes = features['boxes'].eval()
+ 
+     assert len(boxes) % 5 == 0, 'Annotation error occured in box array.'
+     box_cnt = len(boxes) // 5
+ 
+     aligned_boxes = []
+     labels = []
+ 
+     for i in range(box_cnt):
+         label, x_min, y_min, x_max, y_max = int(boxes[i * 5]), float(boxes[i * 5 + 1]), float(boxes[i * 5 + 2]), float(boxes[i * 5 + 3]) ## do we need to change int to float? is there float rectangle sample?
+         aligned_boxes.append([x_min, y_min, x_max, y_max])
+         labels.append(label)
+ 
+     aligned_boxes = np.asarray(aligned_boxes, np.float32)
+     labels = np.asarray(labels, np.int64)
+ 
+     return index, encoded_image, aligned_boxes, labels, width, height
+ 
+ def parse_record(features):
+     # tfrecord parser for TFRecordIterator (primitive data)
+ 
+     index = int(features['index'])
+     encoded_image = np.frombuffer(features['image'], dtype = np.uint8)
+     width = int(features['width'])
+     height = int(features['height'])
+     boxes = features['boxes']
+ 
+     assert len(boxes) % 5 == 0, 'Annotation error occured in box array.'
+     box_cnt = len(boxes) // 5
+ 
+     aligned_boxes = []
+     labels = []
+ 
+     for i in range(box_cnt):
+         label, x_min, y_min, x_max, y_max = int(boxes[i * 5]), float(boxes[i * 5 + 1]), float(boxes[i * 5 + 2]), float(boxes[i * 5 + 3])
+         aligned_boxes.append([x_min, y_min, x_max, y_max])
+         labels.append(label)
+ 
+     aligned_boxes = np.asarray(aligned_boxes, np.float32)
+     labels = np.asarray(labels, np.int64)
+ 
+     return index, encoded_image, aligned_boxes, labels, width, height
+ 
+ def bbox_crop(bbox, crop_box=None, allow_outside_center=True):
+     bbox = bbox.copy()
+     if crop_box is None:
+         return bbox
+     if not len(crop_box) == 4:
+         raise ValueError(
+             "Invalid crop_box parameter, requires length 4, given {}".format(str(crop_box)))
+     if sum([int(c is None) for c in crop_box]) == 4:
+         return bbox
+ 
+     l, t, w, h = crop_box
+ 
+     left = l if l else 0
+     top = t if t else 0
+     right = left + (w if w else np.inf)
+     bottom = top + (h if h else np.inf)
+     crop_bbox = np.array((left, top, right, bottom))
+ 
+     if allow_outside_center:
+         mask = np.ones(bbox.shape[0], dtype=bool)
+     else:
+         centers = (bbox[:, :2] + bbox[:, 2:4]) / 2
+         mask = np.logical_and(crop_bbox[:2] <= centers, centers < crop_bbox[2:]).all(axis=1)
+ 
+     # transform borders
+     bbox[:, :2] = np.maximum(bbox[:, :2], crop_bbox[:2])
+     bbox[:, 2:4] = np.minimum(bbox[:, 2:4], crop_bbox[2:4])
+     bbox[:, :2] -= crop_bbox[:2]
+     bbox[:, 2:4] -= crop_bbox[:2]
+ 
+     mask = np.logical_and(mask, (bbox[:, :2] < bbox[:, 2:4]).all(axis=1))
+     bbox = bbox[mask]
+     return bbox
+ 
+ def bbox_iou(bbox_a, bbox_b, offset=0):
+     if bbox_a.shape[1] < 4 or bbox_b.shape[1] < 4:
+         raise IndexError("Bounding boxes axis 1 must have at least length 4")
+ 
+     tl = np.maximum(bbox_a[:, None, :2], bbox_b[:, :2])
+     br = np.minimum(bbox_a[:, None, 2:4], bbox_b[:, 2:4])
+ 
+     area_i = np.prod(br - tl + offset, axis=2) * (tl < br).all(axis=2)
+     area_a = np.prod(bbox_a[:, 2:4] - bbox_a[:, :2] + offset, axis=1)
+     area_b = np.prod(bbox_b[:, 2:4] - bbox_b[:, :2] + offset, axis=1)
+     return area_i / (area_a[:, None] + area_b - area_i)
+ 
+ 
+ def random_crop_with_constraints(bbox, size, min_scale=0.3, max_scale=1,
+                                  max_aspect_ratio=2, constraints=None,
+                                  max_trial=50):
+     # default params in paper
+     if constraints is None:
+         constraints = (
+             (0.1, None),
+             (0.3, None),
+             (0.5, None),
+             (0.7, None),
+             (0.9, None),
+             (None, 1),
+         )
+ 
+     w, h = size
+ 
+     candidates = [(0, 0, w, h)]
+     for min_iou, max_iou in constraints:
+         min_iou = -np.inf if min_iou is None else min_iou
+         max_iou = np.inf if max_iou is None else max_iou
+ 
+         for _ in range(max_trial):
+             scale = random.uniform(min_scale, max_scale)
+             aspect_ratio = random.uniform(
+                 max(1 / max_aspect_ratio, scale * scale),
+                 min(max_aspect_ratio, 1 / (scale * scale)))
+             crop_h = int(h * scale / np.sqrt(aspect_ratio))
+             crop_w = int(w * scale * np.sqrt(aspect_ratio))
+ 
+             crop_t = random.randrange(h - crop_h)
+             crop_l = random.randrange(w - crop_w)
+             crop_bb = np.array((crop_l, crop_t, crop_l + crop_w, crop_t + crop_h))
+ 
+             if len(bbox) == 0:
+                 top, bottom = crop_t, crop_t + crop_h
+                 left, right = crop_l, crop_l + crop_w
+                 return bbox, (left, top, right-left, bottom-top)
+ 
+             iou = bbox_iou(bbox, crop_bb[np.newaxis])
+             if min_iou <= iou.min() and iou.max() <= max_iou:
+                 top, bottom = crop_t, crop_t + crop_h
+                 left, right = crop_l, crop_l + crop_w
+                 candidates.append((left, top, right-left, bottom-top))
+                 break
+ 
+     # random select one
+     while candidates:
+         crop = candidates.pop(np.random.randint(0, len(candidates)))
+         new_bbox = bbox_crop(bbox, crop, allow_outside_center=False)
+         if new_bbox.size < 1:
+             continue
+         new_crop = (crop[0], crop[1], crop[2], crop[3])
+         return new_bbox, new_crop
+     return bbox, (0, 0, w, h)
+ 
+ 
+ def random_color_distort(img, brightness_delta=32, hue_vari=18, sat_vari=0.5, val_vari=0.5):
+     def random_hue(img_hsv, hue_vari, p=0.5):
+         if np.random.uniform(0, 1) > p:
+             hue_delta = np.random.randint(-hue_vari, hue_vari)
+             img_hsv[:, :, 0] = (img_hsv[:, :, 0] + hue_delta) % 180
+         return img_hsv
+ 
+     def random_saturation(img_hsv, sat_vari, p=0.5):
+         if np.random.uniform(0, 1) > p:
+             sat_mult = 1 + np.random.uniform(-sat_vari, sat_vari)
+             img_hsv[:, :, 1] *= sat_mult
+         return img_hsv
+ 
+     def random_value(img_hsv, val_vari, p=0.5):
+         if np.random.uniform(0, 1) > p:
+             val_mult = 1 + np.random.uniform(-val_vari, val_vari)
+             img_hsv[:, :, 2] *= val_mult
+         return img_hsv
+ 
+     def random_brightness(img, brightness_delta, p=0.5):
+         if np.random.uniform(0, 1) > p:
+             img = img.astype(np.float32)
+             brightness_delta = int(np.random.uniform(-brightness_delta, brightness_delta))
+             img = img + brightness_delta
+         return np.clip(img, 0, 255)
+ 
+     # brightness
+     img = random_brightness(img, brightness_delta)
+     img = img.astype(np.uint8)
+ 
+     # color jitter
+     img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.float32)
+ 
+     if np.random.randint(0, 2):
+         img_hsv = random_value(img_hsv, val_vari)
+         img_hsv = random_saturation(img_hsv, sat_vari)
+         img_hsv = random_hue(img_hsv, hue_vari)
+     else:
+         img_hsv = random_saturation(img_hsv, sat_vari)
+         img_hsv = random_hue(img_hsv, hue_vari)
+         img_hsv = random_value(img_hsv, val_vari)
+ 
+     img_hsv = np.clip(img_hsv, 0, 255)
+     img = cv2.cvtColor(img_hsv.astype(np.uint8), cv2.COLOR_HSV2BGR)
+ 
+     return img
+ 
+ 
+ def letterbox_resize(img, new_width, new_height, interp=0):
+     ori_height, ori_width = img.shape[:2]
+ 
+     resize_ratio = min(new_width / ori_width, new_height / ori_height)
+ 
+     resize_w = int(resize_ratio * ori_width)
+     resize_h = int(resize_ratio * ori_height)
+ 
+     img = cv2.resize(img, (resize_w, resize_h), interpolation=interp)
+     image_padded = np.full((new_height, new_width, 3), 128, np.uint8)
+ 
+     dw = int((new_width - resize_w) / 2)
+     dh = int((new_height - resize_h) / 2)
+ 
+     image_padded[dh: resize_h + dh, dw: resize_w + dw, :] = img
+ 
+     return image_padded, resize_ratio, dw, dh
+ 
+ 
+ def resize_with_bbox(img, bbox, new_width, new_height, interp=0, letterbox=False):
+     if letterbox:
+         image_padded, resize_ratio, dw, dh = letterbox_resize(img, new_width, new_height, interp)
+ 
+         # xmin, xmax
+         bbox[:, [0, 2]] = bbox[:, [0, 2]] * resize_ratio + dw
+         # ymin, ymax
+         bbox[:, [1, 3]] = bbox[:, [1, 3]] * resize_ratio + dh
+ 
+         return image_padded, bbox
+     else:
+         ori_height, ori_width = img.shape[:2]
+ 
+         img = cv2.resize(img, (new_width, new_height), interpolation=interp)
+ 
+         # xmin, xmax
+         bbox[:, [0, 2]] = bbox[:, [0, 2]] / ori_width * new_width
+         # ymin, ymax
+         bbox[:, [1, 3]] = bbox[:, [1, 3]] / ori_height * new_height
+ 
+         return img, bbox
+ 
+ 
+ def random_flip(img, bbox, px=0, py=0):
+     height, width = img.shape[:2]
+     if np.random.uniform(0, 1) < px:
+         img = cv2.flip(img, 1)
+         xmax = width - bbox[:, 0]
+         xmin = width - bbox[:, 2]
+         bbox[:, 0] = xmin
+         bbox[:, 2] = xmax
+ 
+     if np.random.uniform(0, 1) < py:
+         img = cv2.flip(img, 0)
+         ymax = height - bbox[:, 1]
+         ymin = height - bbox[:, 3]
+         bbox[:, 1] = ymin
+         bbox[:, 3] = ymax
+     return img, bbox
+ 
+ 
+ def random_expand(img, bbox, max_ratio=4, fill=0, keep_ratio=True):
+     h, w, c = img.shape
+     ratio_x = random.uniform(1, max_ratio)
+     if keep_ratio:
+         ratio_y = ratio_x
+     else:
+         ratio_y = random.uniform(1, max_ratio)
+ 
+     oh, ow = int(h * ratio_y), int(w * ratio_x)
+     off_y = random.randint(0, oh - h)
+     off_x = random.randint(0, ow - w)
+ 
+     dst = np.full(shape=(oh, ow, c), fill_value=fill, dtype=img.dtype)
+ 
+     dst[off_y:off_y + h, off_x:off_x + w, :] = img
+ 
+     # correct bbox
+     bbox[:, :2] += (off_x, off_y)
+     bbox[:, 2:4] += (off_x, off_y)
+ 
+     return dst, bbox
+ 
+ def process_box(boxes, labels, img_size, class_num, anchors):
+     anchors_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+ 
+     # convert boxes form:
+     # shape: [N, 2]
+     # (x_center, y_center)
+     box_centers = (boxes[:, 0:2] + boxes[:, 2:4]) / 2
+     # (width, height)
+     box_sizes = boxes[:, 2:4] - boxes[:, 0:2]
+ 
+     # [13, 13, 3, 5+num_class+1] `5` means coords and labels. `1` means mix up weight. 
+     y_true_13 = np.zeros((img_size[1] // 32, img_size[0] // 32, 3, 6 + class_num), np.float32)
+     y_true_26 = np.zeros((img_size[1] // 16, img_size[0] // 16, 3, 6 + class_num), np.float32)
+     y_true_52 = np.zeros((img_size[1] // 8, img_size[0] // 8, 3, 6 + class_num), np.float32)
+ 
+     # mix up weight default to 1.
+     y_true_13[..., -1] = 1.
+     y_true_26[..., -1] = 1.
+     y_true_52[..., -1] = 1.
+ 
+     y_true = [y_true_13, y_true_26, y_true_52]
+ 
+     # [N, 1, 2]
+     box_sizes = np.expand_dims(box_sizes, 1)
+     # broadcast tricks
+     # [N, 1, 2] & [9, 2] ==> [N, 9, 2]
+     mins = np.maximum(- box_sizes / 2, - anchors / 2)
+     maxs = np.minimum(box_sizes / 2, anchors / 2)
+     # [N, 9, 2]
+     whs = maxs - mins
+ 
+     # [N, 9]
+     iou = (whs[:, :, 0] * whs[:, :, 1]) / (
+                 box_sizes[:, :, 0] * box_sizes[:, :, 1] + anchors[:, 0] * anchors[:, 1] - whs[:, :, 0] * whs[:, :,
+                                                                                                          1] + 1e-10)
+     # [N]
+     best_match_idx = np.argmax(iou, axis=1)
+ 
+     ratio_dict = {1.: 8., 2.: 16., 3.: 32.}
+     for i, idx in enumerate(best_match_idx):
+         # idx: 0,1,2 ==> 2; 3,4,5 ==> 1; 6,7,8 ==> 0
+         feature_map_group = 2 - idx // 3
+         # scale ratio: 0,1,2 ==> 8; 3,4,5 ==> 16; 6,7,8 ==> 32
+         ratio = ratio_dict[np.ceil((idx + 1) / 3.)]
+         x = int(np.floor(box_centers[i, 0] / ratio))
+         y = int(np.floor(box_centers[i, 1] / ratio))
+         k = anchors_mask[feature_map_group].index(idx)
+         c = labels[i]
+         # print(feature_map_group, '|', y,x,k,c)
+ 
+         y_true[feature_map_group][y, x, k, :2] = box_centers[i]
+         y_true[feature_map_group][y, x, k, 2:4] = box_sizes[i]
+         y_true[feature_map_group][y, x, k, 4] = 1.
+         y_true[feature_map_group][y, x, k, 5 + c] = 1.
+         y_true[feature_map_group][y, x, k, -1] = boxes[i, -1]
+ 
+     return y_true_13, y_true_26, y_true_52
+ 
+ 
+ def parse_data(data, class_num, img_size, anchors, is_training, letterbox_resize):
+     
+     img_idx, encoded_img, boxes, labels, _, _ = parse_tfrecord(data)
+     img = cv2.imdecode(encoded_img, cv2.IMREAD_COLOR)
+     boxes = np.concatenate((boxes, np.full(shape=(boxes.shape[0], 1), fill_value=1., dtype=np.float32)), axis=-1)
+ 
+     ## I erased mix-up method here
+ 
+     if is_training:
+         # random color distortion
+         img = random_color_distort(img)
+ 
+         # random expansion with prob 0.5
+         if np.random.uniform(0, 1) > 0.5:
+             img, boxes = random_expand(img, boxes, 4)
+ 
+         # random cropping
+         h, w, _ = img.shape
+         boxes, crop = random_crop_with_constraints(boxes, (w, h))
+         x0, y0, w, h = crop
+         img = img[y0: y0+h, x0: x0+w]
+ 
+         # resize with random interpolation
+         h, w, _ = img.shape
+         interp = np.random.randint(0, 5)
+         img, boxes = resize_with_bbox(img, boxes, img_size[0], img_size[1], interp=interp, letterbox=letterbox_resize)
+ 
+         # random horizontal flip
+         h, w, _ = img.shape
+         img, boxes = random_flip(img, boxes, px=0.5)
+     else:
+         img, boxes = resize_with_bbox(img, boxes, img_size[0], img_size[1], interp=1, letterbox=letterbox_resize)
+ 
+     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
+ 
+     # the input of yolo_v3 should be in range 0~1
+     img = img / 255.
+ 
+     y_true_13, y_true_26, y_true_52 = process_box(boxes, labels, img_size, class_num, anchors)
+ 
+     return img_idx, img, y_true_13, y_true_26, y_true_52
+ 
+ 
+ def get_batch_data(records, class_num, img_size, anchors, is_training, multi_scale=False, mix_up=False, letterbox_resize=True, interval=10):
+     global iter_cnt
+ 
+     # multi_scale training
+     if multi_scale and is_training:
+         random.seed(iter_cnt // interval)
+         random_img_size = [[x * 32, x * 32] for x in range(10, 20)]
+         img_size = random.sample(random_img_size, 1)[0]
+     iter_cnt += 1
+ 
+     img_idx_batch, img_batch, y_true_13_batch, y_true_26_batch, y_true_52_batch = [], [], [], [], []
+ 
+     # deleted mix up strategy
+     
+     for data in records:
+         img_idx, img, y_true_13, y_true_26, y_true_52 = parse_data(data, class_num, img_size, anchors, is_training, letterbox_resize)
+ 
+         img_idx_batch.append(img_idx)
+         img_batch.append(img)
+         y_true_13_batch.append(y_true_13)
+         y_true_26_batch.append(y_true_26)
+         y_true_52_batch.append(y_true_52)
+ 
+     img_idx_batch, img_batch, y_true_13_batch, y_true_26_batch, y_true_52_batch = np.asarray(img_idx_batch, np.int64), np.asarray(img_batch), np.asarray(y_true_13_batch), np.asarray(y_true_26_batch), np.asarray(y_true_52_batch)
+ 
+     return img_idx_batch, img_batch, y_true_13_batch, y_true_26_batch, y_true_52_batch
\ No newline at end of file
--- a/code/yolov3/eval.py 0 → 100644
View file @3168483
+++ b/code/yolov3/eval.py 0 → 100644
View file @3168483
+ from __future__ import division, print_function
+ 
+ import tensorflow as tf
+ import numpy as np
+ import argparse
+ from tqdm import trange
+ import os
+ 
+ from data_utils import get_batch_data
+ from misc_utils import parse_anchors, read_class_names, AverageMeter
+ from eval_utils import evaluate_on_cpu, evaluate_on_gpu, get_preds_gpu, voc_eval, parse_gt_rec
+ from nms_utils import gpu_nms
+ 
+ from model import yolov3
+ 
+ ### ArgumentParser
+ parser = argparse.ArgumentParser(description="YOLO-V3 eval procedure.")
+ 
+ # paths
+ parser.add_argument("--eval_file", type=str, default="./data/my_data/val.txt",
+                     help="The path of the validation or test txt file.")
+ 
+ parser.add_argument("--restore_path", type=str, default="./data/darknet_weights/yolov3.ckpt",
+                     help="The path of the weights to restore.")
+ 
+ parser.add_argument("--anchor_path", type=str, default="./data/yolo_anchors.txt",
+                     help="The path of the anchor txt file.")
+ 
+ parser.add_argument("--class_name_path", type=str, default="./data/coco.names",
+                     help="The path of the class names.")
+ 
+ # some numbers
+ parser.add_argument("--img_size", nargs='*', type=int, default=[416, 416],
+                     help="Resize the input image to `img_size`, size format: [width, height]")
+ 
+ parser.add_argument("--letterbox_resize", type=lambda x: (str(x).lower() == 'true'), default=False,
+                     help="Whether to use the letterbox resize, i.e., keep the original image aspect ratio.")
+ 
+ parser.add_argument("--num_threads", type=int, default=10,
+                     help="Number of threads for image processing used in tf.data pipeline.")
+ 
+ parser.add_argument("--prefetech_buffer", type=int, default=5,
+                     help="Prefetech_buffer used in tf.data pipeline.")
+ 
+ parser.add_argument("--nms_threshold", type=float, default=0.45,
+                     help="IOU threshold in nms operation.")
+ 
+ parser.add_argument("--score_threshold", type=float, default=0.01,
+                     help="Threshold of the probability of the classes in nms operation.")
+ 
+ parser.add_argument("--nms_topk", type=int, default=400,
+                     help="Keep at most nms_topk outputs after nms.")
+ 
+ parser.add_argument("--use_voc_07_metric", type=lambda x: (str(x).lower() == 'true'), default=False,
+                     help="Whether to use the voc 2007 mAP metrics.")
+ 
+ args = parser.parse_args()
+ 
+ # args params
+ args.anchors = parse_anchors(args.anchor_path)
+ args.classes = read_class_names(args.class_name_path)
+ args.class_num = len(args.classes)
+ args.img_cnt = len(open(args.eval_file, 'r').readlines())
+ 
+ # setting placeholders
+ is_training = tf.placeholder(dtype=tf.bool, name="phase_train")
+ handle_flag = tf.placeholder(tf.string, [], name='iterator_handle_flag')
+ pred_boxes_flag = tf.placeholder(tf.float32, [1, None, None])
+ pred_scores_flag = tf.placeholder(tf.float32, [1, None, None])
+ gpu_nms_op = gpu_nms(pred_boxes_flag, pred_scores_flag, args.class_num, args.nms_topk, args.score_threshold, args.nms_threshold)
+ 
+ ### tf.data pipeline
+ val_dataset = tf.data.TFRecordDataset(filenames=args.eval_file, compression_type='GZIP')
+ val_dataset = val_dataset.batch(1)
+ val_dataset = val_dataset.map(
+     lambda x: tf.py_func(get_batch_data, [x, args.class_num, args.img_size, args.anchors, False, False, False, args.letterbox_resize], [tf.int64, tf.float32, tf.float32, tf.float32, tf.float32]),
+     num_parallel_calls=args.num_threads
+ )
+ val_dataset.prefetch(args.prefetech_buffer)
+ iterator = val_dataset.make_one_shot_iterator()
+ 
+ image_ids, image, y_true_13, y_true_26, y_true_52 = iterator.get_next()
+ image_ids.set_shape([None])
+ y_true = [y_true_13, y_true_26, y_true_52]
+ image.set_shape([None, args.img_size[1], args.img_size[0], 3])
+ for y in y_true:
+     y.set_shape([None, None, None, None, None])
+ 
+ ### Model definition
+ yolo_model = yolov3(args.class_num, args.anchors)
+ with tf.variable_scope('yolov3'):
+     pred_feature_maps = yolo_model.forward(image, is_training=is_training)
+ loss = yolo_model.compute_loss(pred_feature_maps, y_true)
+ y_pred = yolo_model.predict(pred_feature_maps)
+ 
+ saver_to_restore = tf.train.Saver()
+ 
+ with tf.Session() as sess:
+     sess.run([tf.global_variables_initializer()])
+     if os.path.exists(args.restore_path):
+         saver_to_restore.restore(sess, args.restore_path)
+ 
+     print('\nStart evaluation...\n')
+ 
+     val_loss_total, val_loss_xy, val_loss_wh, val_loss_conf, val_loss_class = \
+         AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
+     val_preds = []
+ 
+     for j in trange(args.img_cnt):
+         __image_ids, __y_pred, __loss = sess.run([image_ids, y_pred, loss], feed_dict={is_training: False})
+         pred_content = get_preds_gpu(sess, gpu_nms_op, pred_boxes_flag, pred_scores_flag, __image_ids, __y_pred)
+ 
+         val_preds.extend(pred_content)
+         val_loss_total.update(__loss[0])
+         val_loss_xy.update(__loss[1])
+         val_loss_wh.update(__loss[2])
+         val_loss_conf.update(__loss[3])
+         val_loss_class.update(__loss[4])
+ 
+     rec_total, prec_total, ap_total = AverageMeter(), AverageMeter(), AverageMeter()
+     gt_dict = parse_gt_rec(args.eval_file, 'GZIP', args.img_size, args.letterbox_resize)
+     print('mAP eval:')
+     for ii in range(args.class_num):
+         npos, nd, rec, prec, ap = voc_eval(gt_dict, val_preds, ii, iou_thres=0.5, use_07_metric=args.use_voc_07_metric)
+         rec_total.update(rec, npos)
+         prec_total.update(prec, nd)
+         ap_total.update(ap, 1)
+         print('Class {}: Recall: {:.4f}, Precision: {:.4f}, AP: {:.4f}'.format(ii, rec, prec, ap))
+ 
+     mAP = ap_total.average
+     print('final mAP: {:.4f}'.format(mAP))
+     print("recall: {:.3f}, precision: {:.3f}".format(rec_total.average, prec_total.average))
+     print("total_loss: {:.3f}, loss_xy: {:.3f}, loss_wh: {:.3f}, loss_conf: {:.3f}, loss_class: {:.3f}".format(
+         val_loss_total.average, val_loss_xy.average, val_loss_wh.average, val_loss_conf.average, val_loss_class.average
+     ))
\ No newline at end of file
--- a/code/yolov3/eval_utils.py 0 → 100644
View file @3168483
+++ b/code/yolov3/eval_utils.py 0 → 100644
View file @3168483
+ from __future__ import division, print_function
+ 
+ import tensorflow as tf
+ import numpy as np
+ import cv2
+ from collections import Counter
+ 
+ from data_utils import parse_record
+ from nms_utils import cpu_nms, gpu_nms
+ from tfrecord_utils import TFRecordIterator
+ 
+ 
+ def calc_iou(pred_boxes, true_boxes):
+     pred_boxes = np.expand_dims(pred_boxes, -2)
+     true_boxes = np.expand_dims(true_boxes, 0)
+ 
+     intersect_mins = np.maximum(pred_boxes[..., :2], true_boxes[..., :2])
+     intersect_maxs = np.minimum(pred_boxes[..., 2:], true_boxes[..., 2:])
+     intersect_wh = np.maximum(intersect_maxs - intersect_mins, 0.)
+ 
+     intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
+     pred_box_wh = pred_boxes[..., 2:] - pred_boxes[..., :2]
+     pred_box_area = pred_box_wh[..., 0] * pred_box_wh[..., 1]
+     true_boxes_wh = true_boxes[..., 2:] - true_boxes[..., :2]
+     true_boxes_area = true_boxes_wh[..., 0] * true_boxes_wh[..., 1]
+ 
+     iou = intersect_area / (pred_box_area + true_boxes_area - intersect_area + 1e-10)
+ 
+     return iou
+ 
+ 
+ def evaluate_on_cpu(y_pred, y_true, num_classes, calc_now=True, max_boxes=50, score_thresh=0.5, iou_thresh=0.5):
+     num_images = y_true[0].shape[0]
+     true_labels_dict = {i: 0 for i in range(num_classes)}
+     pred_labels_dict = {i: 0 for i in range(num_classes)}
+     true_positive_dict = {i: 0 for i in range(num_classes)}
+ 
+     for i in range(num_images):
+         true_labels_list, true_boxes_list = [], []
+         for j in range(3):
+             true_probs_temp = y_true[j][i][..., 5:-1]
+             true_boxes_temp = y_true[j][i][..., 0:4]
+ 
+             object_mask = true_probs_temp.sum(axis=-1) > 0
+ 
+             true_probs_temp = true_probs_temp[object_mask]
+             true_boxes_temp = true_boxes_temp[object_mask]
+ 
+             true_labels_list += np.argmax(true_probs_temp, axis=-1).tolist()
+             true_boxes_list += true_boxes_temp.tolist()
+ 
+         if len(true_labels_list) != 0:
+             for cls, count in Counter(true_labels_list).items():
+                 true_labels_dict[cls] += count
+ 
+         true_boxes = np.array(true_boxes_list)
+         box_centers, box_sizes = true_boxes[:, 0:2], true_boxes[:, 2:4]
+         true_boxes[:, 0:2] = box_centers - box_sizes / 2.
+         true_boxes[:, 2:4] = true_boxes[:, 0:2] + box_sizes
+ 
+         pred_boxes = y_pred[0][i:i + 1]
+         pred_confs = y_pred[1][i:i + 1]
+         pred_probs = y_pred[2][i:i + 1]
+ 
+         pred_boxes, pred_confs, pred_labels = cpu_nms(pred_boxes, pred_confs * pred_probs, num_classes, max_boxes=max_boxes, score_thresh=score_thresh, iou_thresh=iou_thresh)
+ 
+         pred_labels_list = [] if pred_labels is None else pred_labels.tolist()
+         if pred_labels_list == []:
+             continue
+ 
+         # calc iou
+         iou_matrix = calc_iou(pred_boxes, true_boxes)
+         max_iou_idx = np.argmax(iou_matrix, axis=-1)
+ 
+         correct_idx = []
+         correct_conf = []
+ 
+         for k in range(max_iou_idx.shape[0]):
+             pred_labels_dict[pred_labels_list[k]] += 1
+             match_idx = max_iou_idx[k]  # V level
+             if iou_matrix[k, match_idx] > iou_thresh and true_labels_list[match_idx] == pred_labels_list[k]:
+                 if match_idx not in correct_idx:
+                     correct_idx.append(match_idx)
+                     correct_conf.append(pred_confs[k])
+                 else:
+                     same_idx = correct_idx.index(match_idx)
+                     if pred_confs[k] > correct_conf[same_idx]:
+                         correct_idx.pop(same_idx)
+                         correct_conf.pop(same_idx)
+                         correct_idx.append(match_idx)
+                         correct_conf.append(pred_confs[k])
+ 
+         for t in correct_idx:
+             true_positive_dict[true_labels_list[t]] += 1
+ 
+     if calc_now:
+         # avoid divided by 0
+         recall = sum(true_positive_dict.values()) / (sum(true_labels_dict.values()) + 1e-6)
+         precision = sum(true_positive_dict.values()) / (sum(pred_labels_dict.values()) + 1e-6)
+ 
+         return recall, precision
+     else:
+         return true_positive_dict, true_labels_dict, pred_labels_dict
+ 
+ 
+ def evaluate_on_gpu(sess, gpu_nms_op, pred_boxes_flag, pred_scores_flag, y_pred, y_true, num_classes, iou_thresh=0.5, calc_now=True):
+     num_images = y_true[0].shape[0]
+     true_labels_dict = {i: 0 for i in range(num_classes)}
+     pred_labels_dict = {i: 0 for i in range(num_classes)}
+     true_positive_dict = {i: 0 for i in range(num_classes)}
+ 
+     for i in range(num_images):
+         true_labels_list, true_boxes_list = [], []
+         for j in range(3):
+             true_probs_temp = y_true[j][i][..., 5:-1]
+             true_boxes_temp = y_true[j][i][..., 0:4]
+ 
+             object_mask = true_probs_temp.sum(axis=-1) > 0
+ 
+             true_probs_temp = true_probs_temp[object_mask]
+             true_boxes_temp = true_boxes_temp[object_mask]
+ 
+             true_labels_list += np.argmax(true_probs_temp, axis=-1).tolist()
+             true_boxes_list += true_boxes_temp.tolist()
+ 
+         if len(true_labels_list) != 0:
+             for cls, count in Counter(true_labels_list).items():
+                 true_labels_dict[cls] += count
+ 
+         true_boxes = np.array(true_boxes_list)
+         box_centers, box_sizes = true_boxes[:, 0:2], true_boxes[:, 2:4]
+         true_boxes[:, 0:2] = box_centers - box_sizes / 2.
+         true_boxes[:, 2:4] = true_boxes[:, 0:2] + box_sizes
+ 
+         pred_boxes = y_pred[0][i:i + 1]
+         pred_confs = y_pred[1][i:i + 1]
+         pred_probs = y_pred[2][i:i + 1]
+ 
+         pred_boxes, pred_confs, pred_labels = sess.run(gpu_nms_op, feed_dict={pred_boxes_flag: pred_boxes, pred_scores_flag: pred_confs * pred_probs})
+ 
+         pred_labels_list = [] if pred_labels is None else pred_labels.tolist()
+         if pred_labels_list == []:
+             continue
+ 
+         # calc iou
+         iou_matrix = calc_iou(pred_boxes, true_boxes)
+         max_iou_idx = np.argmax(iou_matrix, axis=-1)
+ 
+         correct_idx = []
+         correct_conf = []
+         for k in range(max_iou_idx.shape[0]):
+             pred_labels_dict[pred_labels_list[k]] += 1
+             match_idx = max_iou_idx[k]  # V level
+             if iou_matrix[k, match_idx] > iou_thresh and true_labels_list[match_idx] == pred_labels_list[k]:
+                 if match_idx not in correct_idx:
+                     correct_idx.append(match_idx)
+                     correct_conf.append(pred_confs[k])
+                 else:
+                     same_idx = correct_idx.index(match_idx)
+                     if pred_confs[k] > correct_conf[same_idx]:
+                         correct_idx.pop(same_idx)
+                         correct_conf.pop(same_idx)
+                         correct_idx.append(match_idx)
+                         correct_conf.append(pred_confs[k])
+ 
+         for t in correct_idx:
+             true_positive_dict[true_labels_list[t]] += 1
+ 
+     if calc_now:
+         # avoid divided by 0
+         recall = sum(true_positive_dict.values()) / (sum(true_labels_dict.values()) + 1e-6)
+         precision = sum(true_positive_dict.values()) / (sum(pred_labels_dict.values()) + 1e-6)
+ 
+         return recall, precision
+     else:
+         return true_positive_dict, true_labels_dict, pred_labels_dict
+ 
+ 
+ def get_preds_gpu(sess, gpu_nms_op, pred_boxes_flag, pred_scores_flag, image_ids, y_pred):
+     image_id = image_ids[0]
+ 
+     pred_boxes = y_pred[0][0:1]
+     pred_confs = y_pred[1][0:1]
+     pred_probs = y_pred[2][0:1]
+ 
+     boxes, scores, labels = sess.run(gpu_nms_op, feed_dict={pred_boxes_flag: pred_boxes, pred_scores_flag: pred_confs * pred_probs})
+ 
+     pred_content = []
+     for i in range(len(labels)):
+         x_min, y_min, x_max, y_max = boxes[i]
+         score = scores[i]
+         label = labels[i]
+         pred_content.append([image_id, x_min, y_min, x_max, y_max, score, label])
+ 
+     return pred_content
+ 
+ gt_dict = {}  # key: img_id, value: gt object list
+ def parse_gt_rec(gt_filename, compression_type, target_img_size, letterbox_resize=True):
+     global gt_dict
+ 
+     if not gt_dict:
+         new_width, new_height = target_img_size
+ 
+         with TFRecordIterator(gt_filename, compression_type) as reader:
+             for data in reader:
+                 img_id, image, boxes, labels, ori_width, ori_height = parse_record(data)
+ 
+                 objects = []
+                 for i in range(len(labels)):
+                     x_min, y_min, x_max, y_max = boxes[i]
+                     label = labels[i]
+ 
+                     if letterbox_resize:
+                         resize_ratio = min(new_width / ori_width, new_height / ori_height)
+ 
+                         resize_w = int(resize_ratio * ori_width)
+                         resize_h = int(resize_ratio * ori_height)
+ 
+                         dw = int((new_width - resize_w) / 2)
+                         dh = int((new_height - resize_h) / 2)
+ 
+                         objects.append([x_min * resize_ratio + dw,
+                                         y_min * resize_ratio + dh,
+                                         x_max * resize_ratio + dw,
+                                         y_max * resize_ratio + dh,
+                                         label])
+                     else:
+                         objects.append([x_min * new_width / ori_width,
+                                         y_min * new_height / ori_height,
+                                         x_max * new_width / ori_width,
+                                         y_max * new_height / ori_height,
+                                         label])
+                 gt_dict[img_id] = objects
+     return gt_dict
+ 
+ 
+ # The following two functions are modified from FAIR's Detectron repo to calculate mAP:
+ # https://github.com/facebookresearch/Detectron/blob/master/detectron/datasets/voc_eval.py
+ def voc_ap(rec, prec, use_07_metric=False):
+     if use_07_metric:
+         ap = 0.
+         for t in np.arange(0., 1.1, 0.1):
+             if np.sum(rec >= t) == 0:
+                 p = 0
+             else:
+                 p = np.max(prec[rec >= t])
+             ap = ap + p / 11.
+     else:
+         mrec = np.concatenate(([0.], rec, [1.]))
+         mpre = np.concatenate(([0.], prec, [0.]))
+ 
+         for i in range(mpre.size - 1, 0, -1):
+             mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+ 
+         i = np.where(mrec[1:] != mrec[:-1])[0]
+ 
+         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+     return ap
+ 
+ 
+ def voc_eval(gt_dict, val_preds, classidx, iou_thres=0.5, use_07_metric=False):
+     # 1.obtain gt: extract all gt objects for this class
+     class_recs = {}
+     npos = 0
+     for img_id in gt_dict:
+         R = [obj for obj in gt_dict[img_id] if obj[-1] == classidx]
+         bbox = np.array([x[:4] for x in R])
+         det = [False] * len(R)
+         npos += len(R)
+         class_recs[img_id] = {'bbox': bbox, 'det': det}
+ 
+     # 2. obtain pred results
+     pred = [x for x in val_preds if x[-1] == classidx]
+     img_ids = [x[0] for x in pred]
+     confidence = np.array([x[-2] for x in pred])
+     BB = np.array([[x[1], x[2], x[3], x[4]] for x in pred])
+ 
+     # 3. sort by confidence
+     sorted_ind = np.argsort(-confidence)
+     try:
+         BB = BB[sorted_ind, :]
+     except:
+         print('no box, ignore')
+         return 1e-6, 1e-6, 0, 0, 0
+     img_ids = [img_ids[x] for x in sorted_ind]
+ 
+     # 4. mark TPs and FPs
+     nd = len(img_ids)
+     tp = np.zeros(nd)
+     fp = np.zeros(nd)
+ 
+     for d in range(nd):
+         R = class_recs[img_ids[d]]
+         bb = BB[d, :]
+         ovmax = -np.Inf
+         BBGT = R['bbox']
+ 
+         if BBGT.size > 0:
+             ixmin = np.maximum(BBGT[:, 0], bb[0])
+             iymin = np.maximum(BBGT[:, 1], bb[1])
+             ixmax = np.minimum(BBGT[:, 2], bb[2])
+             iymax = np.minimum(BBGT[:, 3], bb[3])
+             iw = np.maximum(ixmax - ixmin + 1., 0.)
+             ih = np.maximum(iymax - iymin + 1., 0.)
+             inters = iw * ih
+ 
+             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + (BBGT[:, 2] - BBGT[:, 0] + 1.) * (
+                         BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
+ 
+             overlaps = inters / uni
+             ovmax = np.max(overlaps)
+             jmax = np.argmax(overlaps)
+ 
+         if ovmax > iou_thres:
+             # gt not matched yet
+             if not R['det'][jmax]:
+                 tp[d] = 1.
+                 R['det'][jmax] = 1
+             else:
+                 fp[d] = 1.
+         else:
+             fp[d] = 1.
+ 
+     fp = np.cumsum(fp)
+     tp = np.cumsum(tp)
+     rec = tp / float(npos)
+     # avoid divide by zero in case the first detection matches a difficult
+     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
+     ap = voc_ap(rec, prec, use_07_metric)
+ 
+     # return rec, prec, ap
+     return npos, nd, tp[-1] / float(npos), tp[-1] / float(nd), ap
\ No newline at end of file
--- a/code/yolov3/misc_utils.py 0 → 100644
View file @3168483
+++ b/code/yolov3/misc_utils.py 0 → 100644
View file @3168483
+ import numpy as np
+ import tensorflow as tf
+ import random
+ 
+ class AverageMeter(object):
+     def __init__(self):
+         self.reset()
+ 
+     def reset(self):
+         self.val = 0
+         self.average = 0
+         self.sum = 0
+         self.count = 0
+ 
+     def update(self, val, n=1):
+         self.val = val
+         self.sum += val * n
+         self.count += n
+         self.average = self.sum / float(self.count)
+ 
+ 
+ def parse_anchors(anchor_path):
+     anchors = np.reshape(np.asarray(open(anchor_path, 'r').read().split(','), np.float32), [-1, 2])
+     return anchors
+ 
+ 
+ def read_class_names(class_name_path):
+     names = {}
+     with open(class_name_path, 'r') as data:
+         for ID, name in enumerate(data):
+             names[ID] = name.strip('\n')
+     return names
+ 
+ 
+ def shuffle_and_overwrite(file_name):
+     content = open(file_name, 'r').readlines()
+     random.shuffle(content)
+     with open(file_name, 'w') as f:
+         for line in content:
+             f.write(line)
+ 
+ 
+ def update_dict(ori_dict, new_dict):
+     if not ori_dict:
+         return new_dict
+     for key in ori_dict:
+         ori_dict[key] += new_dict[key]
+     return ori_dict
+ 
+ 
+ def list_add(ori_list, new_list):
+     for i in range(len(ori_list)):
+         ori_list[i] += new_list[i]
+     return ori_list
+ 
+ 
+ def load_weights(var_list, weights_file):
+     with open(weights_file, "rb") as fp:
+         np.fromfile(fp, dtype=np.int32, count=5)
+         weights = np.fromfile(fp, dtype=np.float32)
+ 
+     ptr = 0
+     i = 0
+     assign_ops = []
+     while i < len(var_list) - 1:
+         var1 = var_list[i]
+         var2 = var_list[i + 1]
+         if 'Conv' in var1.name.split('/')[-2]:
+             if 'BatchNorm' in var2.name.split('/')[-2]:
+                 gamma, beta, mean, var = var_list[i + 1:i + 5]
+                 batch_norm_vars = [beta, gamma, mean, var]
+                 for var in batch_norm_vars:
+                     shape = var.shape.as_list()
+                     num_params = np.prod(shape)
+                     var_weights = weights[ptr:ptr + num_params].reshape(shape)
+                     ptr += num_params
+                     assign_ops.append(tf.assign(var, var_weights, validate_shape=True))
+                 i += 4
+             elif 'Conv' in var2.name.split('/')[-2]:
+                 # load biases
+                 bias = var2
+                 bias_shape = bias.shape.as_list()
+                 bias_params = np.prod(bias_shape)
+                 bias_weights = weights[ptr:ptr +
+                                        bias_params].reshape(bias_shape)
+                 ptr += bias_params
+                 assign_ops.append(tf.assign(bias, bias_weights, validate_shape=True))
+                 i += 1
+ 
+             shape = var1.shape.as_list()
+             num_params = np.prod(shape)
+ 
+             var_weights = weights[ptr:ptr + num_params].reshape(
+                 (shape[3], shape[2], shape[0], shape[1]))
+ 
+             var_weights = np.transpose(var_weights, (2, 3, 1, 0))
+             ptr += num_params
+             assign_ops.append(
+                 tf.assign(var1, var_weights, validate_shape=True))
+             i += 1
+ 
+     return assign_ops
+ 
+ 
+ def config_learning_rate(args, global_step):
+     if args.lr_type == 'exponential':
+         lr_tmp = tf.train.exponential_decay(args.learning_rate_init, global_step, args.lr_decay_freq,
+                                             args.lr_decay_factor, staircase=True, name='exponential_learning_rate')
+         return tf.maximum(lr_tmp, args.lr_lower_bound)
+     elif args.lr_type == 'cosine_decay':
+         train_steps = (args.total_epoches - float(args.use_warm_up) * args.warm_up_epoch) * args.train_batch_num
+         return args.lr_lower_bound + 0.5 * (args.learning_rate_init - args.lr_lower_bound) * \
+             (1 + tf.cos(global_step / train_steps * np.pi))
+     elif args.lr_type == 'cosine_decay_restart':
+         return tf.train.cosine_decay_restarts(args.learning_rate_init, global_step, 
+                                               args.lr_decay_freq, t_mul=2.0, m_mul=1.0, 
+                                               name='cosine_decay_learning_rate_restart')
+     elif args.lr_type == 'fixed':
+         return tf.convert_to_tensor(args.learning_rate_init, name='fixed_learning_rate')
+     elif args.lr_type == 'piecewise':
+         return tf.train.piecewise_constant(global_step, boundaries=args.pw_boundaries, values=args.pw_values,
+                                            name='piecewise_learning_rate')
+     else:
+         raise ValueError('Unsupported learning rate type!')
+ 
+ 
+ def config_optimizer(optimizer_name, learning_rate, decay=0.9, momentum=0.9):
+     if optimizer_name == 'momentum':
+         return tf.train.MomentumOptimizer(learning_rate, momentum=momentum)
+     elif optimizer_name == 'rmsprop':
+         return tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=momentum)
+     elif optimizer_name == 'adam':
+         return tf.train.AdamOptimizer(learning_rate)
+     elif optimizer_name == 'sgd':
+         return tf.train.GradientDescentOptimizer(learning_rate)
+     else:
+         raise ValueError('Unsupported optimizer type!')
\ No newline at end of file
--- a/code/yolov3/model.py 0 → 100644
View file @3168483
+++ b/code/yolov3/model.py 0 → 100644
View file @3168483
+ ##### layer utils
+ from __future__ import division, print_function
+ 
+ import numpy as np
+ import tensorflow as tf
+ slim = tf.contrib.slim
+ 
+ def conv2d(inputs, filters, kernel_size, strides=1):
+     def _fixed_padding(inputs, kernel_size):
+         pad_total = kernel_size - 1
+         pad_beg = pad_total // 2
+         pad_end = pad_total - pad_beg
+ 
+         padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end],
+                                         [pad_beg, pad_end], [0, 0]], mode='CONSTANT')
+         return padded_inputs
+     if strides > 1: 
+         inputs = _fixed_padding(inputs, kernel_size)
+     inputs = slim.conv2d(inputs, filters, kernel_size, stride=strides,
+                          padding=('SAME' if strides == 1 else 'VALID'))
+     return inputs
+ 
+ def darknet53_body(inputs):
+     def res_block(inputs, filters):
+         shortcut = inputs
+         net = conv2d(inputs, filters * 1, 1)
+         net = conv2d(net, filters * 2, 3)
+ 
+         net = net + shortcut
+ 
+         return net
+     
+     # first two conv2d layers
+     net = conv2d(inputs, 32,  3, strides=1)
+     net = conv2d(net, 64,  3, strides=2)
+ 
+     # res_block * 1
+     net = res_block(net, 32)
+ 
+     net = conv2d(net, 128, 3, strides=2)
+ 
+     # res_block * 2
+     for i in range(2):
+         net = res_block(net, 64)
+ 
+     net = conv2d(net, 256, 3, strides=2)
+ 
+     # res_block * 8
+     for i in range(8):
+         net = res_block(net, 128)
+ 
+     route_1 = net
+     net = conv2d(net, 512, 3, strides=2)
+ 
+     # res_block * 8
+     for i in range(8):
+         net = res_block(net, 256)
+ 
+     route_2 = net
+     net = conv2d(net, 1024, 3, strides=2)
+ 
+     # res_block * 4
+     for i in range(4):
+         net = res_block(net, 512)
+     route_3 = net
+ 
+     return route_1, route_2, route_3
+ 
+ 
+ def yolo_block(inputs, filters):
+     net = conv2d(inputs, filters * 1, 1)
+     net = conv2d(net, filters * 2, 3)
+     net = conv2d(net, filters * 1, 1)
+     net = conv2d(net, filters * 2, 3)
+     net = conv2d(net, filters * 1, 1)
+     route = net
+     net = conv2d(net, filters * 2, 3)
+     return route, net
+ 
+ 
+ def upsample_layer(inputs, out_shape):
+     new_height, new_width = out_shape[1], out_shape[2]
+     # NOTE: here height is the first
+     inputs = tf.image.resize_nearest_neighbor(inputs, (new_height, new_width), name='upsampled')
+     return inputs
+ 
+ class yolov3(object):
+ 
+     def __init__(self, class_num, anchors, use_label_smooth=False, use_focal_loss=False, batch_norm_decay=0.999, weight_decay=5e-4, use_static_shape=True):
+         self.class_num = class_num
+         self.anchors = anchors
+         self.batch_norm_decay = batch_norm_decay
+         self.use_label_smooth = use_label_smooth
+         self.use_focal_loss = use_focal_loss
+         self.weight_decay = weight_decay
+         self.use_static_shape = use_static_shape
+ 
+     def forward(self, inputs, is_training=False, reuse=False):
+         # the input size: [height, weight] format
+         self.img_size = tf.shape(inputs)[1:3]
+ 		print("Img size:", self.img_size)
+ 		
+         batch_norm_params = {
+             'decay': self.batch_norm_decay,
+             'epsilon': 1e-05,
+             'scale': True,
+             'is_training': is_training,
+             'fused': None,
+         }
+ 
+         with slim.arg_scope([slim.conv2d, slim.batch_norm], reuse=reuse):
+             with slim.arg_scope([slim.conv2d], 
+                                 normalizer_fn=slim.batch_norm,
+                                 normalizer_params=batch_norm_params,
+                                 biases_initializer=None,
+                                 activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=0.1),
+                                 weights_regularizer=slim.l2_regularizer(self.weight_decay)):
+ 
+                 with tf.variable_scope('darknet53_body'):
+                     route_1, route_2, route_3 = darknet53_body(inputs)
+ 
+                 with tf.variable_scope('yolov3_head'):
+                     inter1, net = yolo_block(route_3, 512)
+                     feature_map_1 = slim.conv2d(net, 3 * (5 + self.class_num), 1,
+                                                 stride=1, normalizer_fn=None,
+                                                 activation_fn=None, biases_initializer=tf.zeros_initializer())
+                     feature_map_1 = tf.identity(feature_map_1, name='feature_map_1')
+ 
+                     inter1 = conv2d(inter1, 256, 1)
+                     inter1 = upsample_layer(inter1, route_2.get_shape().as_list() if self.use_static_shape else tf.shape(route_2))
+                     concat1 = tf.concat([inter1, route_2], axis=3)
+ 
+                     inter2, net = yolo_block(concat1, 256)
+                     feature_map_2 = slim.conv2d(net, 3 * (5 + self.class_num), 1,
+                                                 stride=1, normalizer_fn=None,
+                                                 activation_fn=None, biases_initializer=tf.zeros_initializer())
+                     feature_map_2 = tf.identity(feature_map_2, name='feature_map_2')
+ 
+                     inter2 = conv2d(inter2, 128, 1)
+                     inter2 = upsample_layer(inter2, route_1.get_shape().as_list() if self.use_static_shape else tf.shape(route_1))
+                     concat2 = tf.concat([inter2, route_1], axis=3)
+ 
+                     _, feature_map_3 = yolo_block(concat2, 128)
+                     feature_map_3 = slim.conv2d(feature_map_3, 3 * (5 + self.class_num), 1,
+                                                 stride=1, normalizer_fn=None,
+                                                 activation_fn=None, biases_initializer=tf.zeros_initializer())
+                     feature_map_3 = tf.identity(feature_map_3, name='feature_map_3')
+ 
+             return feature_map_1, feature_map_2, feature_map_3
+ 
+     def reorganize_layer(self, feature_map, anchors):	
+         # size : [h, w] format
+         grid_size = feature_map.get_shape().as_list()[1:3] if self.use_static_shape else tf.shape(feature_map)[1:3]  # [13, 13]
+         ratio = tf.cast(self.img_size / grid_size, tf.float32)
+ 		
+         # anchor : [w, h] format
+         rescaled_anchors = [(anchor[0] / ratio[1], anchor[1] / ratio[0]) for anchor in anchors]
+ 
+         feature_map = tf.reshape(feature_map, [-1, grid_size[0], grid_size[1], 3, 5 + self.class_num])
+ 		
+         box_centers, box_sizes, conf_logits, prob_logits = tf.split(feature_map, [2, 2, 1, self.class_num], axis=-1)
+         box_centers = tf.nn.sigmoid(box_centers)
+ 
+         grid_x = tf.range(grid_size[1], dtype=tf.int32)
+         grid_y = tf.range(grid_size[0], dtype=tf.int32)
+         grid_x, grid_y = tf.meshgrid(grid_x, grid_y)
+         x_offset = tf.reshape(grid_x, (-1, 1))
+         y_offset = tf.reshape(grid_y, (-1, 1))
+         x_y_offset = tf.concat([x_offset, y_offset], axis=-1)
+ 		
+         x_y_offset = tf.cast(tf.reshape(x_y_offset, [grid_size[0], grid_size[1], 1, 2]), tf.float32)
+ 
+         box_centers = box_centers + x_y_offset
+         box_centers = box_centers * ratio[::-1]
+ 
+         box_sizes = tf.exp(box_sizes) * rescaled_anchors
+         box_sizes = box_sizes * ratio[::-1]
+ 
+         boxes = tf.concat([box_centers, box_sizes], axis=-1)
+ 
+         return x_y_offset, boxes, conf_logits, prob_logits
+ 
+ 
+     def _reshape_logit(result):
+             x_y_offset, boxes, conf_logits, prob_logits = result
+             grid_size = x_y_offset.get_shape().as_list()[:2] if self.use_static_shape else tf.shape(x_y_offset)[:2]
+             boxes = tf.reshape(boxes, [-1, grid_size[0] * grid_size[1] * 3, 4])
+             conf_logits = tf.reshape(conf_logits, [-1, grid_size[0] * grid_size[1] * 3, 1])
+             prob_logits = tf.reshape(prob_logits, [-1, grid_size[0] * grid_size[1] * 3, self.class_num])
+             return boxes, conf_logits, prob_logits
+     
+     def predict(self, feature_maps):
+         feature_map_1, feature_map_2, feature_map_3 = feature_maps
+ 
+         feature_map_anchors = [(feature_map_1, self.anchors[6:9]),
+                                (feature_map_2, self.anchors[3:6]),
+                                (feature_map_3, self.anchors[0:3])]
+         reorg_results = [self.reorganize_layer(feature_map, anchors) for (feature_map, anchors) in feature_map_anchors]
+ 
+         boxes_list, confs_list, probs_list = [], [], []
+ 		
+         for result in reorg_results:
+             boxes, conf_logits, prob_logits = _reshape_logit(result)
+             confs = tf.sigmoid(conf_logits)
+             probs = tf.sigmoid(prob_logits)
+             boxes_list.append(boxes)
+             confs_list.append(confs)
+             probs_list.append(probs)
+         
+         boxes = tf.concat(boxes_list, axis=1)
+         confs = tf.concat(confs_list, axis=1)
+         probs = tf.concat(probs_list, axis=1)
+ 
+         center_x, center_y, width, height = tf.split(boxes, [1, 1, 1, 1], axis=-1)
+         x_min = center_x - width / 2
+         y_min = center_y - height / 2
+         x_max = center_x + width / 2
+         y_max = center_y + height / 2
+ 
+         boxes = tf.concat([x_min, y_min, x_max, y_max], axis=-1)
+ 
+         return boxes, confs, probs
+     
+     def loss_layer(self, feature_map_i, y_true, anchors):
+         grid_size = tf.shape(feature_map_i)[1:3]
+         ratio = tf.cast(self.img_size / grid_size, tf.float32)
+         # N: batch_size
+         N = tf.cast(tf.shape(feature_map_i)[0], tf.float32)
+ 
+         x_y_offset, pred_boxes, pred_conf_logits, pred_prob_logits = self.reorg_layer(feature_map_i, anchors)
+ 
+ 		### mask
+         object_mask = y_true[..., 4:5]
+         ignore_mask = tf.TensorArray(tf.float32, size=0, dynamic_size=True)
+ 		
+         def loop_cond(idx, ignore_mask):
+             return tf.less(idx, tf.cast(N, tf.int32))
+ 			
+         def loop_body(idx, ignore_mask):
+             valid_true_boxes = tf.boolean_mask(y_true[idx, ..., 0:4], tf.cast(object_mask[idx, ..., 0], 'bool'))
+ 			
+             iou = self.box_iou(pred_boxes[idx], valid_true_boxes)			
+             best_iou = tf.reduce_max(iou, axis=-1)
+ 			
+             ignore_mask_tmp = tf.cast(best_iou < 0.5, tf.float32)
+ 			
+             ignore_mask = ignore_mask.write(idx, ignore_mask_tmp)
+             return idx + 1, ignore_mask
+ 			
+         _, ignore_mask = tf.while_loop(cond=loop_cond, body=loop_body, loop_vars=[0, ignore_mask])
+         ignore_mask = ignore_mask.stack()
+         ignore_mask = tf.expand_dims(ignore_mask, -1)
+ 
+         pred_box_xy = pred_boxes[..., 0:2]
+         pred_box_wh = pred_boxes[..., 2:4]
+ 
+         true_xy = y_true[..., 0:2] / ratio[::-1] - x_y_offset
+         pred_xy = pred_box_xy / ratio[::-1] - x_y_offset
+ 
+         true_tw_th = y_true[..., 2:4] / anchors
+         pred_tw_th = pred_box_wh / anchors
+ 		
+         true_tw_th = tf.where(condition=tf.equal(true_tw_th, 0),
+                               x=tf.ones_like(true_tw_th), y=true_tw_th)
+         pred_tw_th = tf.where(condition=tf.equal(pred_tw_th, 0),
+                               x=tf.ones_like(pred_tw_th), y=pred_tw_th)
+         true_tw_th = tf.log(tf.clip_by_value(true_tw_th, 1e-9, 1e9))
+         pred_tw_th = tf.log(tf.clip_by_value(pred_tw_th, 1e-9, 1e9))
+ 
+         box_loss_scale = 2. - (y_true[..., 2:3] / tf.cast(self.img_size[1], tf.float32)) * (y_true[..., 3:4] / tf.cast(self.img_size[0], tf.float32))
+ 
+         ### loss
+ 		
+         mix_w = y_true[..., -1:]
+ 		
+         xy_loss = tf.reduce_sum(tf.square(true_xy - pred_xy) * object_mask * box_loss_scale * mix_w) / N
+         wh_loss = tf.reduce_sum(tf.square(true_tw_th - pred_tw_th) * object_mask * box_loss_scale * mix_w) / N
+ 
+         conf_pos_mask = object_mask
+         conf_neg_mask = (1 - object_mask) * ignore_mask
+         conf_loss_pos = conf_pos_mask * tf.nn.sigmoid_cross_entropy_with_logits(labels=object_mask, logits=pred_conf_logits)
+         conf_loss_neg = conf_neg_mask * tf.nn.sigmoid_cross_entropy_with_logits(labels=object_mask, logits=pred_conf_logits)
+ 		
+         conf_loss = conf_loss_pos + conf_loss_neg
+ 
+         if self.use_focal_loss:
+             alpha = 1.0
+             gamma = 2.0
+             focal_mask = alpha * tf.pow(tf.abs(object_mask - tf.sigmoid(pred_conf_logits)), gamma)
+             conf_loss *= focal_mask
+         conf_loss = tf.reduce_sum(conf_loss * mix_w) / N
+ 
+         if self.use_label_smooth:
+             delta = 0.01
+             label_target = (1 - delta) * y_true[..., 5:-1] + delta * 1. / self.class_num
+         else:
+             label_target = y_true[..., 5:-1]
+ 			
+         class_loss = object_mask * tf.nn.sigmoid_cross_entropy_with_logits(labels=label_target, logits=pred_prob_logits) * mix_w
+         class_loss = tf.reduce_sum(class_loss) / N
+ 
+         return xy_loss, wh_loss, conf_loss, class_loss
+     
+ 
+     def box_iou(self, pred_boxes, valid_true_boxes):
+         pred_box_xy = pred_boxes[..., 0:2]
+         pred_box_wh = pred_boxes[..., 2:4]
+ 
+         pred_box_xy = tf.expand_dims(pred_box_xy, -2)
+         pred_box_wh = tf.expand_dims(pred_box_wh, -2)
+ 
+         true_box_xy = valid_true_boxes[:, 0:2]
+         true_box_wh = valid_true_boxes[:, 2:4]
+ 
+         intersect_mins = tf.maximum(pred_box_xy - pred_box_wh / 2.,
+                                     true_box_xy - true_box_wh / 2.)
+         intersect_maxs = tf.minimum(pred_box_xy + pred_box_wh / 2.,
+                                     true_box_xy + true_box_wh / 2.)
+         intersect_wh = tf.maximum(intersect_maxs - intersect_mins, 0.)
+ 
+         intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
+         pred_box_area = pred_box_wh[..., 0] * pred_box_wh[..., 1]
+         true_box_area = true_box_wh[..., 0] * true_box_wh[..., 1]
+         true_box_area = tf.expand_dims(true_box_area, axis=0)
+ 
+         iou = intersect_area / (pred_box_area + true_box_area - intersect_area + 1e-10)
+ 
+         return iou
+ 
+     
+     def compute_loss(self, y_pred, y_true):
+         loss_xy, loss_wh, loss_conf, loss_class = 0., 0., 0., 0.
+         anchor_group = [self.anchors[6:9], self.anchors[3:6], self.anchors[0:3]]
+ 
+         for i in range(len(y_pred)):
+             result = self.loss_layer(y_pred[i], y_true[i], anchor_group[i])
+             loss_xy += result[0]
+             loss_wh += result[1]
+             loss_conf += result[2]
+             loss_class += result[3]
+         total_loss = loss_xy + loss_wh + loss_conf + loss_class
+         return [total_loss, loss_xy, loss_wh, loss_conf, loss_class]
\ No newline at end of file
--- a/code/yolov3/nms_utils.py 0 → 100644
View file @3168483
+++ b/code/yolov3/nms_utils.py 0 → 100644
View file @3168483
+ from __future__ import division, print_function
+ 
+ import numpy as np
+ import tensorflow as tf
+ 
+ def gpu_nms(boxes, scores, num_classes, max_boxes=50, score_thresh=0.5, nms_thresh=0.5):
+     boxes_list, label_list, score_list = [], [], []
+     max_boxes = tf.constant(max_boxes, dtype='int32')
+ 
+     boxes = tf.reshape(boxes, [-1, 4]) # '-1' means we don't konw the exact number of boxes
+     score = tf.reshape(scores, [-1, num_classes])
+ 
+     # Step 1: Create a filtering mask based on "box_class_scores" by using "threshold".
+     mask = tf.greater_equal(score, tf.constant(score_thresh))
+     # Step 2: Do non_max_suppression for each class
+     for i in range(num_classes):
+         # Step 3: Apply the mask to scores, boxes and pick them out
+         filter_boxes = tf.boolean_mask(boxes, mask[:,i])
+         filter_score = tf.boolean_mask(score[:,i], mask[:,i])
+         nms_indices = tf.image.non_max_suppression(boxes=filter_boxes,
+                                                    scores=filter_score,
+                                                    max_output_size=max_boxes,
+                                                    iou_threshold=nms_thresh, name='nms_indices')
+         label_list.append(tf.ones_like(tf.gather(filter_score, nms_indices), 'int32')*i)
+         boxes_list.append(tf.gather(filter_boxes, nms_indices))
+         score_list.append(tf.gather(filter_score, nms_indices))
+ 
+     boxes = tf.concat(boxes_list, axis=0)
+     score = tf.concat(score_list, axis=0)
+     label = tf.concat(label_list, axis=0)
+ 
+     return boxes, score, label
+ 
+ 
+ def py_nms(boxes, scores, max_boxes=50, iou_thresh=0.5):
+     assert boxes.shape[1] == 4 and len(scores.shape) == 1
+ 
+     x1 = boxes[:, 0]
+     y1 = boxes[:, 1]
+     x2 = boxes[:, 2]
+     y2 = boxes[:, 3]
+ 
+     areas = (x2 - x1) * (y2 - y1)
+     order = scores.argsort()[::-1]
+ 
+     keep = []
+     while order.size > 0:
+         i = order[0]
+         keep.append(i)
+         xx1 = np.maximum(x1[i], x1[order[1:]])
+         yy1 = np.maximum(y1[i], y1[order[1:]])
+         xx2 = np.minimum(x2[i], x2[order[1:]])
+         yy2 = np.minimum(y2[i], y2[order[1:]])
+ 
+         w = np.maximum(0.0, xx2 - xx1 + 1)
+         h = np.maximum(0.0, yy2 - yy1 + 1)
+         inter = w * h
+         ovr = inter / (areas[i] + areas[order[1:]] - inter)
+ 
+         inds = np.where(ovr <= iou_thresh)[0]
+         order = order[inds + 1]
+ 
+     return keep[:max_boxes]
+ 
+ 
+ def cpu_nms(boxes, scores, num_classes, max_boxes=50, score_thresh=0.5, iou_thresh=0.5):
+     boxes = boxes.reshape(-1, 4)
+     scores = scores.reshape(-1, num_classes)
+     picked_boxes, picked_score, picked_label = [], [], []
+ 
+     for i in range(num_classes):
+         indices = np.where(scores[:,i] >= score_thresh)
+         filter_boxes = boxes[indices]
+         filter_scores = scores[:,i][indices]
+         if len(filter_boxes) == 0: 
+             continue
+ 
+         indices = py_nms(filter_boxes, filter_scores,
+                          max_boxes=max_boxes, iou_thresh=iou_thresh)
+         picked_boxes.append(filter_boxes[indices])
+         picked_score.append(filter_scores[indices])
+         picked_label.append(np.ones(len(indices), dtype='int32')*i)
+     if len(picked_boxes) == 0: 
+         return None, None, None
+ 
+     boxes = np.concatenate(picked_boxes, axis=0)
+     score = np.concatenate(picked_score, axis=0)
+     label = np.concatenate(picked_label, axis=0)
+ 
+     return boxes, score, label
\ No newline at end of file
--- a/code/yolov3/plot_utils.py 0 → 100644
View file @3168483
+++ b/code/yolov3/plot_utils.py 0 → 100644
View file @3168483
+ from __future__ import division, print_function
+ 
+ import cv2
+ import random
+ 
+ 
+ def get_color_table(class_num, seed=2):
+     random.seed(seed)
+     color_table = {}
+     for i in range(class_num):
+         color_table[i] = [random.randint(0, 255) for _ in range(3)]
+     return color_table
+ 
+ 
+ def plot_one_box(img, coord, label=None, color=None, line_thickness=None):
+     tl = line_thickness or int(round(0.002 * max(img.shape[0:2])))  # line thickness
+     color = color or [random.randint(0, 255) for _ in range(3)]
+     c1, c2 = (int(coord[0]), int(coord[1])), (int(coord[2]), int(coord[3]))
+     cv2.rectangle(img, c1, c2, color, thickness=tl)
+     if label:
+         tf = max(tl - 1, 1)  # font thickness
+         t_size = cv2.getTextSize(label, 0, fontScale=float(tl) / 3, thickness=tf)[0]
+         c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
+         cv2.rectangle(img, c1, c2, color, -1)  # filled
+         cv2.putText(img, label, (c1[0], c1[1] - 2), 0, float(tl) / 3, [0, 0, 0], thickness=tf, lineType=cv2.LINE_AA)
\ No newline at end of file
--- a/code/yolov3/test_single_image.py 0 → 100644
View file @3168483
+++ b/code/yolov3/test_single_image.py 0 → 100644
View file @3168483
+ from __future__ import division, print_function
+ 
+ import tensorflow as tf
+ import numpy as np
+ import argparse
+ import cv2
+ 
+ from misc_utils import parse_anchors, read_class_names
+ from nms_utils import gpu_nms
+ from plot_utils import get_color_table, plot_one_box
+ from data_utils import letterbox_resize
+ 
+ from model import yolov3
+ 
+ parser = argparse.ArgumentParser(description="YOLO-V3 test single image test procedure.")
+ parser.add_argument("input_image", type=str,
+                     help="The path of the input image.")
+ parser.add_argument("--anchor_path", type=str, default="./data/yolo_anchors.txt",
+                     help="The path of the anchor txt file.")
+ parser.add_argument("--new_size", nargs='*', type=int, default=[416, 416],
+                     help="Resize the input image with `new_size`, size format: [width, height]")
+ parser.add_argument("--letterbox_resize", type=lambda x: (str(x).lower() == 'true'), default=True,
+                     help="Whether to use the letterbox resize.")
+ parser.add_argument("--class_name_path", type=str, default="./data/coco.names",
+                     help="The path of the class names.")
+ parser.add_argument("--restore_path", type=str, default="./data/darknet_weights/yolov3.ckpt",
+                     help="The path of the weights to restore.")
+ args = parser.parse_args()
+ 
+ args.anchors = parse_anchors(args.anchor_path)
+ args.classes = read_class_names(args.class_name_path)
+ args.num_class = len(args.classes)
+ 
+ color_table = get_color_table(args.num_class)
+ 
+ img_ori = cv2.imread(args.input_image)
+ if args.letterbox_resize:
+     img, resize_ratio, dw, dh = letterbox_resize(img_ori, args.new_size[0], args.new_size[1])
+ else:
+     height_ori, width_ori = img_ori.shape[:2]
+     img = cv2.resize(img_ori, tuple(args.new_size))
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+ img = np.asarray(img, np.float32)
+ img = img[np.newaxis, :] / 255.
+ 
+ with tf.Session() as sess:
+     input_data = tf.placeholder(tf.float32, [1, args.new_size[1], args.new_size[0], 3], name='input_data')
+     yolo_model = yolov3(args.num_class, args.anchors)
+     with tf.variable_scope('yolov3'):
+         pred_feature_maps = yolo_model.forward(input_data, False)
+     pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps)
+ 
+     pred_scores = pred_confs * pred_probs
+ 
+     boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, args.num_class, max_boxes=200, score_thresh=0.3, nms_thresh=0.45)
+ 
+     saver = tf.train.Saver()
+     saver.restore(sess, args.restore_path)
+ 
+     boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img})
+ 
+     if args.letterbox_resize:
+         boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio
+         boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio
+     else:
+         boxes_[:, [0, 2]] *= (width_ori/float(args.new_size[0]))
+         boxes_[:, [1, 3]] *= (height_ori/float(args.new_size[1]))
+ 
+     print("box coords:")
+     print(boxes_)
+     print('*' * 30)
+     print("scores:")
+     print(scores_)
+     print('*' * 30)
+     print("labels:")
+     print(labels_)
+ 
+     for i in range(len(boxes_)):
+         x0, y0, x1, y1 = boxes_[i]
+         plot_one_box(img_ori, [x0, y0, x1, y1], label=args.classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]])
+     cv2.imshow('Detection result', img_ori)
+     cv2.imwrite('detection_result.jpg', img_ori)
+     cv2.waitKey(0)
\ No newline at end of file
--- a/code/yolov3/tfrecord_utils.py 0 → 100644
View file @3168483
+++ b/code/yolov3/tfrecord_utils.py 0 → 100644
View file @3168483
+ import tensorflow as tf
+ from itertools import tee
+ 
+ class TFRecordIterator:
+     def __init__(self, path, compression=None):
+         self._core = tf.python_io.tf_record_iterator(path, tf.python_io.TFRecordOptions(compression))
+         self._iterator = iter(self._core)
+         self._iterator, self._iterator_temp = tee(self._iterator)
+         self._total_cnt = sum(1 for _ in self._iterator_temp)
+ 
+     def _read_value(self, feature):
+         if len(feature.int64_list.value) > 0:
+             return feature.int64_list.value
+ 
+         if len(feature.bytes_list.value) > 0:
+             return feature.bytes_list.value
+ 
+         if len(feature.float_list.value) > 0:
+             return feature.float_list.value
+ 
+         return None
+ 
+     def _read_features(self, features):
+         d = dict()
+         for data in features:
+             d[data] = self._read_value(features[data])
+         return d
+ 
+     def __enter__(self):
+         return self
+ 
+     def __exit__(self, exception_type, exception_value, traceback):
+         pass
+ 
+     def __iter__(self):
+         return self
+ 
+     def __next__(self):
+         record = next(self._iterator)
+         example = tf.train.Example()
+         example.ParseFromString(record)
+         return self._read_features(example.features.feature)
+ 
+     def count(self):
+         return self._total_cnt
+ 
+     
\ No newline at end of file
--- a/code/yolov3/train.py 0 → 100644
View file @3168483
+++ b/code/yolov3/train.py 0 → 100644
View file @3168483
+ from __future__ import division, print_function
+ 
+ import tensorflow as tf
+ import numpy as np
+ import os
+ from tqdm import trange
+ 
+ import args
+ 
+ from misc_utils import shuffle_and_overwrite, config_learning_rate, config_optimizer, AverageMeter
+ from data_utils import get_batch_data
+ from eval_utils import evaluate_on_cpu, evaluate_on_gpu, get_preds_gpu, voc_eval, parse_gt_rec
+ from nms_utils import gpu_nms
+ 
+ from model import yolov3
+ 
+ train_dataset = tf.data.TFRecordDataset(filenames=train_file, compression_type='GZIP')
+ train_dataset = train_dataset.shuffle(train_img_cnt)
+ train_dataset = train_dataset.batch(batch_size)
+ train_dataset = train_dataset.map(
+     lambda x: tf.py_func(get_batch_data,
+                          inp=[x, args.class_num, args.img_size, args.anchors, True, args.multi_scale_train, args.use_mix_up, args.letterbox_resize],
+                          Tout=[tf.int64, tf.float32, tf.float32, tf.float32, tf.float32]),
+     num_parallel_calls=args.num_threads
+ )
+ train_dataset = train_dataset.prefetch(prefetech_buffer)
+ 
+ val_dataset = tf.data.TFRecordDataset(filenames=val_file, compression_type='GZIP')
+ val_dataset = val_dataset.batch(1)
+ val_dataset = val_dataset.map(
+     lambda x: tf.py_func(get_batch_data,
+                          inp=[x, args.class_num, args.img_size, args.anchors, False, False, False, args.letterbox_resize],
+                          Tout=[tf.int64, tf.float32, tf.float32, tf.float32, tf.float32]),
+     num_parallel_calls=args.num_threads
+ )
+ val_dataset.prefetch(prefetech_buffer)
+ 
+ iterator = tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes)
+ train_init_op = iterator.make_initializer(train_dataset)
+ val_init_op = iterator.make_initializer(val_dataset)
+ 
+ image_ids, image, y_true_13, y_true_26, y_true_52 = iterator.get_next()
+ y_true = [y_true_13, y_true_26, y_true_52]
+ 
+ image_ids.set_shape([None])
+ image.set_shape([None, None, None, 3])
+ for y in y_true:
+     y.set_shape([None, None, None, None, None])
+ 
+ 
+ ### Model definition
+ yolo_model = yolov3(class_num, anchors, use_label_smooth, use_focal_loss, batch_norm_decay, weight_decay, use_static_shape=False)
+ 
+ with tf.variable_scope('yolov3'):
+     pred_feature_maps = yolo_model.forward(image, is_training=is_training)
+ 
+ loss = yolo_model.compute_loss(pred_feature_maps, y_true)
+ y_pred = yolo_model.predict(pred_feature_maps)
+ 
+ l2_loss = tf.losses.get_regularization_loss()
+ 
+ saver_to_restore = tf.train.Saver(var_list=tf.contrib.framework.get_variables_to_restore(include=restore_include, exclude=restore_exclude))
+ update_vars = tf.contrib.framework.get_variables_to_restore(include=update_part)
+ 
+ 
+ global_step = tf.Variable(float(global_step), trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
+ if use_warm_up:
+     learning_rate = tf.cond(tf.less(global_step, train_batch_num * warm_up_epoch), 
+                             lambda: learning_rate_init * global_step / (train_batch_num * warm_up_epoch),
+                             lambda: config_learning_rate(global_step - args.train_batch_num * args.warm_up_epoch))
+ else:
+     learning_rate = config_learning_rate(global_step)
+ 
+ optimizer = config_optimizer(args.optimizer_name, learning_rate)
+ 
+ update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
+ 
+ with tf.control_dependencies(update_ops):
+     gvs = optimizer.compute_gradients(loss[0] + l2_loss, var_list=update_vars)
+     clip_grad_var = [gv if gv[0] is None else [
+           tf.clip_by_norm(gv[0], 100.), gv[1]] for gv in gvs]
+     train_op = optimizer.apply_gradients(clip_grad_var, global_step=global_step)
+ 
+ if args.save_optimizer:
+     print('Saving optimizer parameters: ON')
+     saver_to_save = tf.train.Saver()
+     saver_best = tf.train.Saver()
+ else:
+     print('Saving optimizer parameters: OFF')
+ 
+ 
+ ##### Start training
+ 
+ with tf.Session() as sess:
+     sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
+ 
+     if os.path.exists(args.restore_path):
+         saver_to_restore.restore(sess, args.restore_path)
+ 
+     print('\nStart training...\n')
+ 
+     best_mAP = -np.Inf
+ 
+     for epoch in range(args.total_epoches):
+         sess.run(train_init_op)
+         loss_total, loss_xy, loss_wh, loss_conf, loss_class = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
+ 
+         ### train part
+         for i in trange(args.train_batch_num):
+             _, __y_pred, __y_true, __loss, __global_step, __lr = sess.run(
+                 [train_op, y_pred, y_true, loss, global_step, learning_rate],
+                 feed_dict={is_training: True})
+ 
+             loss_total.update(__loss[0], len(__y_pred[0]))
+             loss_xy.update(__loss[1], len(__y_pred[0]))
+             loss_wh.update(__loss[2], len(__y_pred[0]))
+             loss_conf.update(__loss[3], len(__y_pred[0]))
+             loss_class.update(__loss[4], len(__y_pred[0]))
+ 
+             if __global_step % args.train_evaluation_step == 0 and __global_step > 0:
+                 recall, precision = evaluate_on_gpu(sess, gpu_nms_op, pred_boxes_flag, pred_scores_flag, __y_pred, __y_true, args.class_num, args.nms_threshold)
+ 
+                 info = "Epoch: {}, global_step: {} | loss: total: {:.2f}, xy: {:.2f}, wh: {:.2f}, conf: {:.2f}, class: {:.2f} | ".format(
+                         epoch, int(__global_step), loss_total.average, loss_xy.average, loss_wh.average, loss_conf.average, loss_class.average)
+                 info += 'Last batch: rec: {:.3f}, prec: {:.3f} | lr: {:.5g}'.format(recall, precision, __lr)
+                 print(info)
+ 				
+                 if np.isnan(loss_total.average):
+                     print('****' * 10)
+                     raise ArithmeticError('Gradient exploded!')
+ 
+         ## train end (saving parameters)
+         if args.save_optimizer and epoch % args.save_epoch == 0 and epoch > 0:
+             if loss_total.average <= 2.:
+                 saver_to_save.save(sess, args.save_dir + 'model-epoch_{}_step_{}_loss_{:.4f}_lr_{:.5g}'.format(epoch, int(__global_step), loss_total.average, __lr))
+ 
+         ### validation part
+         if epoch % args.val_evaluation_epoch == 0 and epoch >= args.warm_up_epoch:
+             sess.run(val_init_op)
+ 
+             val_loss_total, val_loss_xy, val_loss_wh, val_loss_conf, val_loss_class = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
+ 
+             val_preds = []
+ 
+             for j in trange(args.val_img_cnt):
+                 __image_ids, __y_pred, __loss = sess.run([image_ids, y_pred, loss],
+                                                          feed_dict={is_training: False})
+                 pred_content = get_preds_gpu(sess, gpu_nms_op, pred_boxes_flag, pred_scores_flag, __image_ids, __y_pred)
+                 val_preds.extend(pred_content)
+                 val_loss_total.update(__loss[0])
+                 val_loss_xy.update(__loss[1])
+                 val_loss_wh.update(__loss[2])
+                 val_loss_conf.update(__loss[3])
+                 val_loss_class.update(__loss[4])
+ 
+             # calc mAP
+             rec_total, prec_total, ap_total = AverageMeter(), AverageMeter(), AverageMeter()
+             gt_dict = parse_gt_rec(args.val_file, args.img_size, args.letterbox_resize)
+ 
+             info = '======> Epoch: {}, global_step: {}, lr: {:.6g} <======\n'.format(epoch, __global_step, __lr)
+ 
+             for ii in range(args.class_num):
+                 npos, nd, rec, prec, ap = voc_eval(gt_dict, val_preds, ii, iou_thres=args.eval_threshold, use_07_metric=args.use_voc_07_metric)
+                 info += 'EVAL: Class {}: Recall: {:.4f}, Precision: {:.4f}, AP: {:.4f}\n'.format(ii, rec, prec, ap)
+                 rec_total.update(rec, npos)
+                 prec_total.update(prec, nd)
+                 ap_total.update(ap, 1)
+ 
+             mAP = ap_total.average
+             info += 'EVAL: Recall: {:.4f}, Precison: {:.4f}, mAP: {:.4f}\n'.format(rec_total.average, prec_total.average, mAP)
+             info += 'EVAL: loss: total: {:.2f}, xy: {:.2f}, wh: {:.2f}, conf: {:.2f}, class: {:.2f}\n'.format(
+                 val_loss_total.average, val_loss_xy.average, val_loss_wh.average, val_loss_conf.average, val_loss_class.average)
+             print(info)
+ 
+             if args.save_optimizer and mAP > best_mAP:
+                 best_mAP = mAP
+                 saver_best.save(sess, args.save_dir + 'best_model_Epoch_{}_step_{}_mAP_{:.4f}_loss_{:.4f}_lr_{:.7g}'.format(
+                                    epoch, int(__global_step), best_mAP, val_loss_total.average, __lr))
\ No newline at end of file
--- a/code/yolov3/video_test.py 0 → 100644
View file @3168483
+++ b/code/yolov3/video_test.py 0 → 100644
View file @3168483
+ from __future__ import division, print_function
+ 
+ import tensorflow as tf
+ import numpy as np
+ import argparse
+ import cv2
+ import time
+ 
+ from misc_utils import parse_anchors, read_class_names
+ from nms_utils import gpu_nms
+ from plot_utils import get_color_table, plot_one_box
+ from data_utils import letterbox_resize
+ 
+ from model import yolov3
+ 
+ parser = argparse.ArgumentParser(description="YOLO-V3 video test procedure.")
+ parser.add_argument("input_video", type=str,
+                     help="The path of the input video.")
+ parser.add_argument("--anchor_path", type=str, default="./data/yolo_anchors.txt",
+                     help="The path of the anchor txt file.")
+ parser.add_argument("--new_size", nargs='*', type=int, default=[416, 416],
+                     help="Resize the input image with `new_size`, size format: [width, height]")
+ parser.add_argument("--letterbox_resize", type=lambda x: (str(x).lower() == 'true'), default=True,
+                     help="Whether to use the letterbox resize.")
+ parser.add_argument("--class_name_path", type=str, default="./data/classes.txt",
+                     help="The path of the class names.")
+ parser.add_argument("--restore_path", type=str, default="./data/darknet_weights/yolov3.ckpt",
+                     help="The path of the weights to restore.")
+ parser.add_argument("--save_video", type=lambda x: (str(x).lower() == 'true'), default=False,
+                     help="Whether to save the video detection results.")
+ args = parser.parse_args()
+ 
+ args.anchors = parse_anchors(args.anchor_path)
+ args.classes = read_class_names(args.class_name_path)
+ args.num_class = len(args.classes)
+ 
+ color_table = get_color_table(args.num_class)
+ 
+ vid = cv2.VideoCapture(args.input_video)
+ video_frame_cnt = int(vid.get(7))
+ video_width = int(vid.get(3))
+ video_height = int(vid.get(4))
+ video_fps = int(vid.get(5))
+ 
+ if args.save_video:
+     fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
+     videoWriter = cv2.VideoWriter('video_result.mp4', fourcc, video_fps, (video_width, video_height))
+ 
+ with tf.Session() as sess:
+     input_data = tf.placeholder(tf.float32, [1, args.new_size[1], args.new_size[0], 3], name='input_data')
+     yolo_model = yolov3(args.num_class, args.anchors)
+     with tf.variable_scope('yolov3'):
+         pred_feature_maps = yolo_model.forward(input_data, False)
+     pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps)
+ 
+     pred_scores = pred_confs * pred_probs
+ 
+     boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, args.num_class, max_boxes=200, score_thresh=0.3, nms_thresh=0.45)
+ 
+     saver = tf.train.Saver()
+     saver.restore(sess, args.restore_path)
+ 
+     for i in range(video_frame_cnt):
+         ret, img_ori = vid.read()
+         if args.letterbox_resize:
+             img, resize_ratio, dw, dh = letterbox_resize(img_ori, args.new_size[0], args.new_size[1])
+         else:
+             height_ori, width_ori = img_ori.shape[:2]
+             img = cv2.resize(img_ori, tuple(args.new_size))
+         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+         img = np.asarray(img, np.float32)
+         img = img[np.newaxis, :] / 255.
+ 
+         start_time = time.time()
+         boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img})
+         end_time = time.time()
+ 
+         # rescale the coordinates to the original image
+         if args.letterbox_resize:
+             boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio
+             boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio
+         else:
+             boxes_[:, [0, 2]] *= (width_ori/float(args.new_size[0]))
+             boxes_[:, [1, 3]] *= (height_ori/float(args.new_size[1]))
+ 
+ 
+         for i in range(len(boxes_)):
+             x0, y0, x1, y1 = boxes_[i]
+             plot_one_box(img_ori, [x0, y0, x1, y1], label=args.classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]])
+         cv2.putText(img_ori, '{:.2f}ms'.format((end_time - start_time) * 1000), (40, 40), 0,
+                     fontScale=1, color=(0, 255, 0), thickness=2)
+         cv2.imshow('image', img_ori)
+         if args.save_video:
+             videoWriter.write(img_ori)
+         if cv2.waitKey(1) & 0xFF == ord('q'):
+             break
+ 
+     vid.release()
+     if args.save_video:
+         videoWriter.release()
\ No newline at end of file