fixed some errors

김성주
Commit 09bd413c91065fdd5ee54e55e4ac4667c372a3ba 09bd413c 1 parent 31684832
Showing 5 changed files with 184 additions and 17 deletions
code/yolov3/changes.txt
code/yolov3/eval.py
code/yolov3/get_kmeans.py
code/yolov3/model.py
code/yolov3/train.py
--- a/code/yolov3/changes.txt 0 → 100644
View file @09bd413
+++ b/code/yolov3/changes.txt 0 → 100644
View file @09bd413
+changes from https://github.com/wizyoung/YOLOv3_TensorFlow
+
+by Seongju Kim, kareus1@khu.ac.kr
+
+1] changed TextLineDataset to TFRecordDataset. (also changed data parsing in data utils and eval utils)
+2] fixed restore-does-not-exist problem in train/eval mode
+3] fixed saver to save the parameter only when save-optimizer option is true
+4] changed parameter 'mode' to bool value 'is_training' in data util functions (string value 'mode' is passed as byte string, so functions do not evaluate if-clauses as expected. ex) 'train' != b'train')
+5] wrote TFRecord binary iterator, which runs without tf session (references: https://github.com/pgmmpk/tfrecord )
\ No newline at end of file
--- a/code/yolov3/eval.py
View file @09bd413
+++ b/code/yolov3/eval.py
View file @09bd413
@@ -17,16 +17,16 @@ from model import yolov3
 parser = argparse.ArgumentParser(description="YOLO-V3 eval procedure.")
 # paths
-parser.add_argument("--eval_file", type=str, default="./data/my_data/val.txt",
+parser.add_argument("--eval_file", type=str, default="../../data/test.tfrecord",
                     help="The path of the validation or test txt file.")
-parser.add_argument("--restore_path", type=str, default="./data/darknet_weights/yolov3.ckpt",
+parser.add_argument("--restore_path", type=str, default="../../data/darknet_weights/yolov3.ckpt",
                     help="The path of the weights to restore.")
-parser.add_argument("--anchor_path", type=str, default="./data/yolo_anchors.txt",
+parser.add_argument("--anchor_path", type=str, default="../../data/yolo_anchors.txt",
                     help="The path of the anchor txt file.")
-parser.add_argument("--class_name_path", type=str, default="./data/coco.names",
+parser.add_argument("--class_name_path", type=str, default="../../data/classes.txt",
                     help="The path of the class names.")
 # some numbers
--- a/code/yolov3/get_kmeans.py 0 → 100644
View file @09bd413
+++ b/code/yolov3/get_kmeans.py 0 → 100644
View file @09bd413
+from __future__ import division, print_function
+
+import numpy as np
+
+def iou(box, clusters):
+    """
+    Calculates the Intersection over Union (IoU) between a box and k clusters.
+    param:
+        box: tuple or array, shifted to the origin (i. e. width and height)
+        clusters: numpy array of shape (k, 2) where k is the number of clusters
+    return:
+        numpy array of shape (k, 0) where k is the number of clusters
+    """
+    x = np.minimum(clusters[:, 0], box[0])
+    y = np.minimum(clusters[:, 1], box[1])
+    if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0:
+        raise ValueError("Box has no area")
+
+    intersection = x * y
+    box_area = box[0] * box[1]
+    cluster_area = clusters[:, 0] * clusters[:, 1]
+
+    iou_ = np.true_divide(intersection, box_area + cluster_area - intersection + 1e-10)
+    # iou_ = intersection / (box_area + cluster_area - intersection + 1e-10)
+
+    return iou_
+
+
+def avg_iou(boxes, clusters):
+    """
+    Calculates the average Intersection over Union (IoU) between a numpy array of boxes and k clusters.
+    param:
+        boxes: numpy array of shape (r, 2), where r is the number of rows
+        clusters: numpy array of shape (k, 2) where k is the number of clusters
+    return:
+        average IoU as a single float
+    """
+    return np.mean([np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])])
+
+
+def translate_boxes(boxes):
+    """
+    Translates all the boxes to the origin.
+    param:
+        boxes: numpy array of shape (r, 4)
+    return:
+    numpy array of shape (r, 2)
+    """
+    new_boxes = boxes.copy()
+    for row in range(new_boxes.shape[0]):
+        new_boxes[row][2] = np.abs(new_boxes[row][2] - new_boxes[row][0])
+        new_boxes[row][3] = np.abs(new_boxes[row][3] - new_boxes[row][1])
+    return np.delete(new_boxes, [0, 1], axis=1)
+
+
+def kmeans(boxes, k, dist=np.median):
+    """
+    Calculates k-means clustering with the Intersection over Union (IoU) metric.
+    param:
+        boxes: numpy array of shape (r, 2), where r is the number of rows
+        k: number of clusters
+        dist: distance function
+    return:
+        numpy array of shape (k, 2)
+    """
+    rows = boxes.shape[0]
+
+    distances = np.empty((rows, k))
+    last_clusters = np.zeros((rows,))
+
+    np.random.seed()
+
+    # the Forgy method will fail if the whole array contains the same rows
+    clusters = boxes[np.random.choice(rows, k, replace=False)]
+
+    while True:
+        for row in range(rows):
+            distances[row] = 1 - iou(boxes[row], clusters)
+
+        nearest_clusters = np.argmin(distances, axis=1)
+
+        if (last_clusters == nearest_clusters).all():
+            break
+
+        for cluster in range(k):
+            clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0)
+
+        last_clusters = nearest_clusters
+
+    return clusters
+
+
+def parse_anno(annotation_path, target_size=None):
+    anno = open(annotation_path, 'r')
+    result = []
+    for line in anno:
+        s = line.strip().split(' ')
+        img_w = int(s[2])
+        img_h = int(s[3])
+        s = s[4:]
+        box_cnt = len(s) // 5
+        for i in range(box_cnt):
+            x_min, y_min, x_max, y_max = float(s[i*5+1]), float(s[i*5+2]), float(s[i*5+3]), float(s[i*5+4])
+            width = x_max - x_min
+            height = y_max - y_min
+            assert width > 0
+            assert height > 0
+            # use letterbox resize, i.e. keep the original aspect ratio
+            # get k-means anchors on the resized target image size
+            if target_size is not None:
+                resize_ratio = min(target_size[0] / img_w, target_size[1] / img_h)
+                width *= resize_ratio
+                height *= resize_ratio
+                result.append([width, height])
+            # get k-means anchors on the original image size
+            else:
+                result.append([width, height])
+    result = np.asarray(result)
+    return result
+
+
+def get_kmeans(anno, cluster_num=9):
+
+    anchors = kmeans(anno, cluster_num)
+    ave_iou = avg_iou(anno, anchors)
+
+    anchors = anchors.astype('int').tolist()
+
+    anchors = sorted(anchors, key=lambda x: x[0] * x[1])
+
+    return anchors, ave_iou
+
+
+if __name__ == '__main__':
+    # target resize format: [width, height]
+    # if target_resize is speficied, the anchors are on the resized image scale
+    # if target_resize is set to None, the anchors are on the original image scale
+    target_size = [416, 416]
+    annotation_path = "../train.txt"
+    anno_result = parse_anno(annotation_path, target_size=target_size)
+    anchors, ave_iou = get_kmeans(anno_result, 9)
+
+    anchor_string = ''
+    for anchor in anchors:
+        anchor_string += '{},{}, '.format(anchor[0], anchor[1])
+    anchor_string = anchor_string[:-2]
+
+    print('### Save the anchors to text file ###')
+    print('anchors are:')
+    print(anchor_string)
+    print('the average iou is:')
+    print(ave_iou)
--- a/code/yolov3/model.py
View file @09bd413
+++ b/code/yolov3/model.py
View file @09bd413
@@ -98,7 +98,6 @@ class yolov3(object):
     def forward(self, inputs, is_training=False, reuse=False):
         # the input size: [height, weight] format
         self.img_size = tf.shape(inputs)[1:3]
-		print("Img size:", self.img_size)
         batch_norm_params = {
             'decay': self.batch_norm_decay,
@@ -148,7 +147,7 @@ class yolov3(object):
             return feature_map_1, feature_map_2, feature_map_3
-    def reorganize_layer(self, feature_map, anchors):	
+    def reorg_layer(self, feature_map, anchors):	
         # size : [h, w] format
         grid_size = feature_map.get_shape().as_list()[1:3] if self.use_static_shape else tf.shape(feature_map)[1:3]  # [13, 13]
         ratio = tf.cast(self.img_size / grid_size, tf.float32)
@@ -179,15 +178,6 @@ class yolov3(object):
         boxes = tf.concat([box_centers, box_sizes], axis=-1)
         return x_y_offset, boxes, conf_logits, prob_logits
-
-
-    def _reshape_logit(result):
-            x_y_offset, boxes, conf_logits, prob_logits = result
-            grid_size = x_y_offset.get_shape().as_list()[:2] if self.use_static_shape else tf.shape(x_y_offset)[:2]
-            boxes = tf.reshape(boxes, [-1, grid_size[0] * grid_size[1] * 3, 4])
-            conf_logits = tf.reshape(conf_logits, [-1, grid_size[0] * grid_size[1] * 3, 1])
-            prob_logits = tf.reshape(prob_logits, [-1, grid_size[0] * grid_size[1] * 3, self.class_num])
-            return boxes, conf_logits, prob_logits
     def predict(self, feature_maps):
         feature_map_1, feature_map_2, feature_map_3 = feature_maps
@@ -195,8 +185,16 @@ class yolov3(object):
         feature_map_anchors = [(feature_map_1, self.anchors[6:9]),
                                (feature_map_2, self.anchors[3:6]),
                                (feature_map_3, self.anchors[0:3])]
-        reorg_results = [self.reorganize_layer(feature_map, anchors) for (feature_map, anchors) in feature_map_anchors]
+        reorg_results = [self.reorg_layer(feature_map, anchors) for (feature_map, anchors) in feature_map_anchors]
-
+        
+        def _reshape_logit(result):
+            x_y_offset, boxes, conf_logits, prob_logits = result
+            grid_size = x_y_offset.get_shape().as_list()[:2] if self.use_static_shape else tf.shape(x_y_offset)[:2]
+            boxes = tf.reshape(boxes, [-1, grid_size[0] * grid_size[1] * 3, 4])
+            conf_logits = tf.reshape(conf_logits, [-1, grid_size[0] * grid_size[1] * 3, 1])
+            prob_logits = tf.reshape(prob_logits, [-1, grid_size[0] * grid_size[1] * 3, self.class_num])
+            return boxes, conf_logits, prob_logits
+            
         boxes_list, confs_list, probs_list = [], [], []
         for result in reorg_results:
--- a/code/yolov3/train.py
View file @09bd413
+++ b/code/yolov3/train.py
View file @09bd413
@@ -14,6 +14,14 @@ from nms_utils import gpu_nms
 from model import yolov3
+is_training = tf.placeholder(tf.bool, name="phase_train")
+handle_flag = tf.placeholder(tf.string, [], name='iterator_handle_flag')
+
+pred_boxes_flag = tf.placeholder(tf.float32, [1, None, None])
+pred_scores_flag = tf.placeholder(tf.float32, [1, None, None])
+gpu_nms_op = gpu_nms(pred_boxes_flag, pred_scores_flag, args.class_num, args.nms_topk, args.score_threshold, args.nms_threshold)
+
+### tf.data pipeline
 train_dataset = tf.data.TFRecordDataset(filenames=train_file, compression_type='GZIP')
 train_dataset = train_dataset.shuffle(train_img_cnt)
 train_dataset = train_dataset.batch(batch_size)