Make inference_pb.py, Complete making ML recommend module

이현규
Commit a2447ee7d69f6cf0d4b2e1d696e747fa82ac6114 a2447ee7 1 parent 793c6b89
Showing 5 changed files with 122 additions and 29 deletions
web/backend/yt8m/esot3ria/features.pb
web/backend/yt8m/esot3ria/inference_pb.py
web/backend/yt8m/esot3ria/pbutil.py → web/backend/yt8m/esot3ria/pb_util.py
web/backend/yt8m/esot3ria/video_recommender.py
web/backend/yt8m/esot3ria/url_generator.py → web/backend/yt8m/esot3ria/video_util.py
--- a/web/backend/yt8m/esot3ria/features.pb
View file @a2447ee
+++ b/web/backend/yt8m/esot3ria/features.pb
View file @a2447ee
--- a/web/backend/yt8m/esot3ria/inference_pb.py
View file @a2447ee
+++ b/web/backend/yt8m/esot3ria/inference_pb.py
View file @a2447ee
@@ -2,7 +2,22 @@ import numpy as np
 import tensorflow as tf
 from tensorflow import logging
 from tensorflow import gfile
- import esot3ria.pbutil as pbutil
+ import operator
+ import esot3ria.pb_util as pbutil
+ import esot3ria.video_recommender as recommender
+ import esot3ria.video_util as videoutil
+ 
+ # Define file paths.
+ MODEL_PATH = "/Users/esot3ria/PycharmProjects/yt8m/models/frame/" \
+              "refined_model/inference_model/segment_inference_model"
+ VOCAB_PATH = "../vocabulary.csv"
+ VIDEO_TAGS_PATH = "./kaggle_solution_40k.csv"
+ TAG_VECTOR_MODEL_PATH = "./tag_vectors.model"
+ VIDEO_VECTOR_MODEL_PATH = "./video_vectors.model"
+ 
+ # Define parameters.
+ TAG_TOP_K = 5
+ VIDEO_TOP_K = 10
 
 
 def get_segments(batch_video_mtx, batch_num_frames, segment_size):
@@ -42,7 +57,7 @@ def get_segments(batch_video_mtx, batch_num_frames, segment_size):
     }
 
 
- def format_prediction(video_ids, predictions, top_k, whitelisted_cls_mask=None):
+ def format_predictions(video_ids, predictions, top_k, whitelisted_cls_mask=None):
     batch_size = len(video_ids)
     for video_index in range(batch_size):
         video_prediction = predictions[video_index]
@@ -53,15 +68,26 @@ def format_prediction(video_ids, predictions, top_k, whitelisted_cls_mask=None):
         line = [(class_index, predictions[video_index][class_index])
                 for class_index in top_indices]
         line = sorted(line, key=lambda p: -p[1])
-         return (video_ids[video_index] + "," +
+         yield (video_ids[video_index] + "," +
                " ".join("%i %g" % (label, score) for (label, score) in line) +
                "\n").encode("utf8")
 
 
- def inference_pb(file_path, model_path):
+ def normalize_tag(tag):
+     if isinstance(tag, str):
+         new_tag = tag.lower().replace('[^a-zA-Z]', ' ')
+         if new_tag.find(" (") != -1:
+             new_tag = new_tag[:new_tag.find(" (")]
+         new_tag = new_tag.replace(" ", "-")
+         return new_tag
+     else:
+         return tag
+ 
+ 
+ def inference_pb(file_path):
+     inference_result = {}
     with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
 
-         # 200527 Esot3riA
         # 0. Import SequenceExample type target from pb.
         target_video = pbutil.convert_pb(file_path)
 
@@ -80,18 +106,17 @@ def inference_pb(file_path, model_path):
             video_batch_val[i] = np.concatenate([video_batch_rgb, video_batch_audio], axis=0)
         video_batch_val = np.array([video_batch_val])
         num_frames_batch_val = np.array([n_frames])
-         # 200527 Esot3riA End
 
-         # Restore checkpoint and meta-graph file
-         if not gfile.Exists(model_path + ".meta"):
-           raise IOError("Cannot find %s. Did you run eval.py?" % model_path)
-         meta_graph_location = model_path + ".meta"
+         # Restore checkpoint and meta-graph file.
+         if not gfile.Exists(MODEL_PATH + ".meta"):
+             raise IOError("Cannot find %s. Did you run eval.py?" % MODEL_PATH)
+         meta_graph_location = MODEL_PATH + ".meta"
         logging.info("loading meta-graph: " + meta_graph_location)
 
         with tf.device("/cpu:0"):
             saver = tf.train.import_meta_graph(meta_graph_location, clear_devices=True)
-         logging.info("restoring variables from " + model_path)
-         saver.restore(sess, model_path)
+         logging.info("restoring variables from " + MODEL_PATH)
+         saver.restore(sess, MODEL_PATH)
         input_tensor = tf.get_collection("input_batch_raw")[0]
         num_frames_tensor = tf.get_collection("num_frames")[0]
         predictions_tensor = tf.get_collection("predictions")[0]
@@ -109,8 +134,6 @@ def inference_pb(file_path, model_path):
         sess.run(
             set_up_init_ops(tf.get_collection_ref(tf.GraphKeys.LOCAL_VARIABLES)))
 
-         coord = tf.train.Coordinator()
-         threads = tf.train.start_queue_runners(sess=sess, coord=coord)
         whitelisted_cls_mask = np.zeros((predictions_tensor.get_shape()[-1],),
                                         dtype=np.float32)
         segment_label_ids_file = '../segment_label_ids.csv'
@@ -123,7 +146,6 @@ def inference_pb(file_path, model_path):
                     # Simply skip the non-integer line.
                     continue
 
-         # 200527 Esot3riA
         # 2. Make segment features.
         results = get_segments(video_batch_val, num_frames_batch_val, 5)
         video_segment_ids = results["video_segment_ids"]
@@ -143,22 +165,59 @@ def inference_pb(file_path, model_path):
                                         input_tensor: video_batch_val,
                                         num_frames_tensor: num_frames_batch_val
                                     })
-         logging.info(predictions_val)
-         logging.info("profit :D")
 
-         # result = format_prediction(video_id_batch_val, predictions_val, 10, whitelisted_cls_mask)
-         # 결과값
-         # 1. Tag 목록들(5개) + 각 Tag의 유사도(dict format)
-         # 2. 연관된 영상들의 링크 => 모델에서 연관영상 찾아서, 유저 인풋(Threshold) 받아서 (20%~80%) 연관영상 + 연관도 5개 출력.
+         # 3. Make vocabularies.
+         voca_dict = {}
+         vocabs = open(VOCAB_PATH, 'r')
+         while True:
+             line = vocabs.readline()
+             if not line: break
+             vocab_dict_item = line.split(",")
+             if vocab_dict_item[0] != "Index":
+                 voca_dict[vocab_dict_item[0]] = vocab_dict_item[3]
+         vocabs.close()
+ 
+         # 4. Make combined scores.
+         combined_scores = {}
+         for line in format_predictions(video_id_batch_val, predictions_val, TAG_TOP_K, whitelisted_cls_mask):
+             segment_id, preds = line.decode("utf8").split(",")
+             preds = preds.split(" ")
+             pred_cls_ids = [int(preds[idx]) for idx in range(0, len(preds), 2)]
+             pred_cls_scores = [float(preds[idx]) for idx in range(1, len(preds), 2)]
+             for i in range(len(pred_cls_ids)):
+                 if pred_cls_ids[i] in combined_scores:
+                     combined_scores[pred_cls_ids[i]] += pred_cls_scores[i]
+                 else:
+                     combined_scores[pred_cls_ids[i]] = pred_cls_scores[i]
+ 
+         combined_scores = sorted(combined_scores.items(), key=operator.itemgetter(1), reverse=True)
+         demoninator = float(combined_scores[0][1] + combined_scores[1][1]
+                             + combined_scores[2][1] + combined_scores[3][1] + combined_scores[4][1])
+ 
+         tag_result = []
+         for itemIndex in range(TAG_TOP_K):
+             segment_tag = str(voca_dict[str(combined_scores[itemIndex][0])])
+             normalized_tag = normalize_tag(segment_tag)
+             tag_percentage = format(combined_scores[itemIndex][1] / demoninator, ".3f")
+             tag_result.append((normalized_tag, tag_percentage))
+ 
+         # 5. Create recommend videos info, Combine results.
+         recommend_video_ids = recommender.recommend_videos(tag_result, TAG_VECTOR_MODEL_PATH,
+                                                            VIDEO_VECTOR_MODEL_PATH, VIDEO_TOP_K)
+         video_result = [videoutil.getVideoInfo(ids, VIDEO_TAGS_PATH, TAG_TOP_K) for ids in recommend_video_ids]
+ 
+         inference_result = {
+             "tag_result": tag_result,
+             "video_result": video_result
+         }
 
+         # 6. Dispose instances.
+         sess.close()
 
+     return inference_result
 
 
 if __name__ == '__main__':
-     logging.set_verbosity(tf.logging.INFO)
- 
-     file_path = '/tmp/mediapipe/features.pb'
-     model_path = '/Users/esot3ria/PycharmProjects/yt8m/models/frame' \
-                  '/sample_model/inference_model/segment_inference_model'
- 
-     inference_pb(file_path, model_path)
+     filepath = "features.pb"
+     result = inference_pb(filepath)
+     print(result)
--- a/web/backend/yt8m/esot3ria/pbutil.py → web/backend/yt8m/esot3ria/pb_util.py
View file @a2447ee
+++ b/web/backend/yt8m/esot3ria/pbutil.py → web/backend/yt8m/esot3ria/pb_util.py
View file @a2447ee
--- a/web/backend/yt8m/esot3ria/video_recommender.py 0 → 100644
View file @a2447ee
+++ b/web/backend/yt8m/esot3ria/video_recommender.py 0 → 100644
View file @a2447ee
+ from gensim.models import Word2Vec
+ import numpy as np
+ 
+ def recommend_videos(tags, tag_model_path, video_model_path, top_k):
+     tag_vectors = Word2Vec.load(tag_model_path).wv
+     video_vectors = Word2Vec().wv.load(video_model_path)
+     error_tags = []
+ 
+     video_vector = np.zeros(100)
+     for (tag, weight) in tags:
+         if tag in tag_vectors.vocab:
+             video_vector = video_vector + (tag_vectors[tag] * float(weight))
+         else:
+             # Pass if tag is unknown
+             if tag not in error_tags:
+                 error_tags.append(tag)
+ 
+     similar_ids = [x[0] for x in video_vectors.similar_by_vector(video_vector, top_k)]
+     return similar_ids
--- a/web/backend/yt8m/esot3ria/url_generator.py → web/backend/yt8m/esot3ria/video_util.py
View file @a2447ee
+++ b/web/backend/yt8m/esot3ria/url_generator.py → web/backend/yt8m/esot3ria/video_util.py
View file @a2447ee
 import requests
+ import pandas as pd
 
 base_URL = 'https://data.yt8m.org/2/j/i/'
 youtube_url = 'https://www.youtube.com/watch?v='
 
+ 
 def getURL(vid_id):
     URL = base_URL + vid_id[:-2] + '/' + vid_id + '.js'
     response = requests.get(URL, verify = False)
@@ -10,4 +12,17 @@ def getURL(vid_id):
         return youtube_url + response.text[10:-3]
 
 
- # example usage : getURL('nXSc');
\ No newline at end of file
+ def getVideoInfo(vid_id, video_tags_path, top_k):
+     video_url = getURL(vid_id)
+ 
+     entire_video_tags = pd.read_csv(video_tags_path)
+     video_tags_info = entire_video_tags.loc[entire_video_tags["vid_id"] == vid_id]
+     video_tags = []
+     for i in range(1, top_k + 1):
+         video_tag_tuple = video_tags_info["segment" + str(i)].values[0]     # ex: "mobile-phone:0.361"
+         video_tags.append(video_tag_tuple.split(":")[0])
+ 
+     return {
+         "video_url": video_url,
+         "video_tags": video_tags
+     }