김성주

fixed some errors

1 +changes from https://github.com/wizyoung/YOLOv3_TensorFlow
2 +
3 +by Seongju Kim, kareus1@khu.ac.kr
4 +
5 +1] changed TextLineDataset to TFRecordDataset. (also changed data parsing in data utils and eval utils)
6 +2] fixed restore-does-not-exist problem in train/eval mode
7 +3] fixed saver to save the parameter only when save-optimizer option is true
8 +4] changed parameter 'mode' to bool value 'is_training' in data util functions (string value 'mode' is passed as byte string, so functions do not evaluate if-clauses as expected. ex) 'train' != b'train')
9 +5] wrote TFRecord binary iterator, which runs without tf session (references: https://github.com/pgmmpk/tfrecord )
...\ No newline at end of file ...\ No newline at end of file
...@@ -17,16 +17,16 @@ from model import yolov3 ...@@ -17,16 +17,16 @@ from model import yolov3
17 parser = argparse.ArgumentParser(description="YOLO-V3 eval procedure.") 17 parser = argparse.ArgumentParser(description="YOLO-V3 eval procedure.")
18 18
19 # paths 19 # paths
20 -parser.add_argument("--eval_file", type=str, default="./data/my_data/val.txt", 20 +parser.add_argument("--eval_file", type=str, default="../../data/test.tfrecord",
21 help="The path of the validation or test txt file.") 21 help="The path of the validation or test txt file.")
22 22
23 -parser.add_argument("--restore_path", type=str, default="./data/darknet_weights/yolov3.ckpt", 23 +parser.add_argument("--restore_path", type=str, default="../../data/darknet_weights/yolov3.ckpt",
24 help="The path of the weights to restore.") 24 help="The path of the weights to restore.")
25 25
26 -parser.add_argument("--anchor_path", type=str, default="./data/yolo_anchors.txt", 26 +parser.add_argument("--anchor_path", type=str, default="../../data/yolo_anchors.txt",
27 help="The path of the anchor txt file.") 27 help="The path of the anchor txt file.")
28 28
29 -parser.add_argument("--class_name_path", type=str, default="./data/coco.names", 29 +parser.add_argument("--class_name_path", type=str, default="../../data/classes.txt",
30 help="The path of the class names.") 30 help="The path of the class names.")
31 31
32 # some numbers 32 # some numbers
......
1 +from __future__ import division, print_function
2 +
3 +import numpy as np
4 +
5 +def iou(box, clusters):
6 + """
7 + Calculates the Intersection over Union (IoU) between a box and k clusters.
8 + param:
9 + box: tuple or array, shifted to the origin (i. e. width and height)
10 + clusters: numpy array of shape (k, 2) where k is the number of clusters
11 + return:
12 + numpy array of shape (k, 0) where k is the number of clusters
13 + """
14 + x = np.minimum(clusters[:, 0], box[0])
15 + y = np.minimum(clusters[:, 1], box[1])
16 + if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0:
17 + raise ValueError("Box has no area")
18 +
19 + intersection = x * y
20 + box_area = box[0] * box[1]
21 + cluster_area = clusters[:, 0] * clusters[:, 1]
22 +
23 + iou_ = np.true_divide(intersection, box_area + cluster_area - intersection + 1e-10)
24 + # iou_ = intersection / (box_area + cluster_area - intersection + 1e-10)
25 +
26 + return iou_
27 +
28 +
29 +def avg_iou(boxes, clusters):
30 + """
31 + Calculates the average Intersection over Union (IoU) between a numpy array of boxes and k clusters.
32 + param:
33 + boxes: numpy array of shape (r, 2), where r is the number of rows
34 + clusters: numpy array of shape (k, 2) where k is the number of clusters
35 + return:
36 + average IoU as a single float
37 + """
38 + return np.mean([np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])])
39 +
40 +
41 +def translate_boxes(boxes):
42 + """
43 + Translates all the boxes to the origin.
44 + param:
45 + boxes: numpy array of shape (r, 4)
46 + return:
47 + numpy array of shape (r, 2)
48 + """
49 + new_boxes = boxes.copy()
50 + for row in range(new_boxes.shape[0]):
51 + new_boxes[row][2] = np.abs(new_boxes[row][2] - new_boxes[row][0])
52 + new_boxes[row][3] = np.abs(new_boxes[row][3] - new_boxes[row][1])
53 + return np.delete(new_boxes, [0, 1], axis=1)
54 +
55 +
56 +def kmeans(boxes, k, dist=np.median):
57 + """
58 + Calculates k-means clustering with the Intersection over Union (IoU) metric.
59 + param:
60 + boxes: numpy array of shape (r, 2), where r is the number of rows
61 + k: number of clusters
62 + dist: distance function
63 + return:
64 + numpy array of shape (k, 2)
65 + """
66 + rows = boxes.shape[0]
67 +
68 + distances = np.empty((rows, k))
69 + last_clusters = np.zeros((rows,))
70 +
71 + np.random.seed()
72 +
73 + # the Forgy method will fail if the whole array contains the same rows
74 + clusters = boxes[np.random.choice(rows, k, replace=False)]
75 +
76 + while True:
77 + for row in range(rows):
78 + distances[row] = 1 - iou(boxes[row], clusters)
79 +
80 + nearest_clusters = np.argmin(distances, axis=1)
81 +
82 + if (last_clusters == nearest_clusters).all():
83 + break
84 +
85 + for cluster in range(k):
86 + clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0)
87 +
88 + last_clusters = nearest_clusters
89 +
90 + return clusters
91 +
92 +
93 +def parse_anno(annotation_path, target_size=None):
94 + anno = open(annotation_path, 'r')
95 + result = []
96 + for line in anno:
97 + s = line.strip().split(' ')
98 + img_w = int(s[2])
99 + img_h = int(s[3])
100 + s = s[4:]
101 + box_cnt = len(s) // 5
102 + for i in range(box_cnt):
103 + x_min, y_min, x_max, y_max = float(s[i*5+1]), float(s[i*5+2]), float(s[i*5+3]), float(s[i*5+4])
104 + width = x_max - x_min
105 + height = y_max - y_min
106 + assert width > 0
107 + assert height > 0
108 + # use letterbox resize, i.e. keep the original aspect ratio
109 + # get k-means anchors on the resized target image size
110 + if target_size is not None:
111 + resize_ratio = min(target_size[0] / img_w, target_size[1] / img_h)
112 + width *= resize_ratio
113 + height *= resize_ratio
114 + result.append([width, height])
115 + # get k-means anchors on the original image size
116 + else:
117 + result.append([width, height])
118 + result = np.asarray(result)
119 + return result
120 +
121 +
122 +def get_kmeans(anno, cluster_num=9):
123 +
124 + anchors = kmeans(anno, cluster_num)
125 + ave_iou = avg_iou(anno, anchors)
126 +
127 + anchors = anchors.astype('int').tolist()
128 +
129 + anchors = sorted(anchors, key=lambda x: x[0] * x[1])
130 +
131 + return anchors, ave_iou
132 +
133 +
134 +if __name__ == '__main__':
135 + # target resize format: [width, height]
136 + # if target_resize is speficied, the anchors are on the resized image scale
137 + # if target_resize is set to None, the anchors are on the original image scale
138 + target_size = [416, 416]
139 + annotation_path = "../train.txt"
140 + anno_result = parse_anno(annotation_path, target_size=target_size)
141 + anchors, ave_iou = get_kmeans(anno_result, 9)
142 +
143 + anchor_string = ''
144 + for anchor in anchors:
145 + anchor_string += '{},{}, '.format(anchor[0], anchor[1])
146 + anchor_string = anchor_string[:-2]
147 +
148 + print('### Save the anchors to text file ###')
149 + print('anchors are:')
150 + print(anchor_string)
151 + print('the average iou is:')
152 + print(ave_iou)
...@@ -98,7 +98,6 @@ class yolov3(object): ...@@ -98,7 +98,6 @@ class yolov3(object):
98 def forward(self, inputs, is_training=False, reuse=False): 98 def forward(self, inputs, is_training=False, reuse=False):
99 # the input size: [height, weight] format 99 # the input size: [height, weight] format
100 self.img_size = tf.shape(inputs)[1:3] 100 self.img_size = tf.shape(inputs)[1:3]
101 - print("Img size:", self.img_size)
102 101
103 batch_norm_params = { 102 batch_norm_params = {
104 'decay': self.batch_norm_decay, 103 'decay': self.batch_norm_decay,
...@@ -148,7 +147,7 @@ class yolov3(object): ...@@ -148,7 +147,7 @@ class yolov3(object):
148 147
149 return feature_map_1, feature_map_2, feature_map_3 148 return feature_map_1, feature_map_2, feature_map_3
150 149
151 - def reorganize_layer(self, feature_map, anchors): 150 + def reorg_layer(self, feature_map, anchors):
152 # size : [h, w] format 151 # size : [h, w] format
153 grid_size = feature_map.get_shape().as_list()[1:3] if self.use_static_shape else tf.shape(feature_map)[1:3] # [13, 13] 152 grid_size = feature_map.get_shape().as_list()[1:3] if self.use_static_shape else tf.shape(feature_map)[1:3] # [13, 13]
154 ratio = tf.cast(self.img_size / grid_size, tf.float32) 153 ratio = tf.cast(self.img_size / grid_size, tf.float32)
...@@ -179,15 +178,6 @@ class yolov3(object): ...@@ -179,15 +178,6 @@ class yolov3(object):
179 boxes = tf.concat([box_centers, box_sizes], axis=-1) 178 boxes = tf.concat([box_centers, box_sizes], axis=-1)
180 179
181 return x_y_offset, boxes, conf_logits, prob_logits 180 return x_y_offset, boxes, conf_logits, prob_logits
182 -
183 -
184 - def _reshape_logit(result):
185 - x_y_offset, boxes, conf_logits, prob_logits = result
186 - grid_size = x_y_offset.get_shape().as_list()[:2] if self.use_static_shape else tf.shape(x_y_offset)[:2]
187 - boxes = tf.reshape(boxes, [-1, grid_size[0] * grid_size[1] * 3, 4])
188 - conf_logits = tf.reshape(conf_logits, [-1, grid_size[0] * grid_size[1] * 3, 1])
189 - prob_logits = tf.reshape(prob_logits, [-1, grid_size[0] * grid_size[1] * 3, self.class_num])
190 - return boxes, conf_logits, prob_logits
191 181
192 def predict(self, feature_maps): 182 def predict(self, feature_maps):
193 feature_map_1, feature_map_2, feature_map_3 = feature_maps 183 feature_map_1, feature_map_2, feature_map_3 = feature_maps
...@@ -195,8 +185,16 @@ class yolov3(object): ...@@ -195,8 +185,16 @@ class yolov3(object):
195 feature_map_anchors = [(feature_map_1, self.anchors[6:9]), 185 feature_map_anchors = [(feature_map_1, self.anchors[6:9]),
196 (feature_map_2, self.anchors[3:6]), 186 (feature_map_2, self.anchors[3:6]),
197 (feature_map_3, self.anchors[0:3])] 187 (feature_map_3, self.anchors[0:3])]
198 - reorg_results = [self.reorganize_layer(feature_map, anchors) for (feature_map, anchors) in feature_map_anchors] 188 + reorg_results = [self.reorg_layer(feature_map, anchors) for (feature_map, anchors) in feature_map_anchors]
199 - 189 +
190 + def _reshape_logit(result):
191 + x_y_offset, boxes, conf_logits, prob_logits = result
192 + grid_size = x_y_offset.get_shape().as_list()[:2] if self.use_static_shape else tf.shape(x_y_offset)[:2]
193 + boxes = tf.reshape(boxes, [-1, grid_size[0] * grid_size[1] * 3, 4])
194 + conf_logits = tf.reshape(conf_logits, [-1, grid_size[0] * grid_size[1] * 3, 1])
195 + prob_logits = tf.reshape(prob_logits, [-1, grid_size[0] * grid_size[1] * 3, self.class_num])
196 + return boxes, conf_logits, prob_logits
197 +
200 boxes_list, confs_list, probs_list = [], [], [] 198 boxes_list, confs_list, probs_list = [], [], []
201 199
202 for result in reorg_results: 200 for result in reorg_results:
......
...@@ -14,6 +14,14 @@ from nms_utils import gpu_nms ...@@ -14,6 +14,14 @@ from nms_utils import gpu_nms
14 14
15 from model import yolov3 15 from model import yolov3
16 16
17 +is_training = tf.placeholder(tf.bool, name="phase_train")
18 +handle_flag = tf.placeholder(tf.string, [], name='iterator_handle_flag')
19 +
20 +pred_boxes_flag = tf.placeholder(tf.float32, [1, None, None])
21 +pred_scores_flag = tf.placeholder(tf.float32, [1, None, None])
22 +gpu_nms_op = gpu_nms(pred_boxes_flag, pred_scores_flag, args.class_num, args.nms_topk, args.score_threshold, args.nms_threshold)
23 +
24 +### tf.data pipeline
17 train_dataset = tf.data.TFRecordDataset(filenames=train_file, compression_type='GZIP') 25 train_dataset = tf.data.TFRecordDataset(filenames=train_file, compression_type='GZIP')
18 train_dataset = train_dataset.shuffle(train_img_cnt) 26 train_dataset = train_dataset.shuffle(train_img_cnt)
19 train_dataset = train_dataset.batch(batch_size) 27 train_dataset = train_dataset.batch(batch_size)
......