이현규

Rebase static files and models

......@@ -45,7 +45,7 @@ if __name__ == "__main__":
"that you have already run eval.py onto this, such that "
"inference_model.* files already exist.")
flags.DEFINE_string(
"input_data_pattern", "/Volumes/HDD/develop/yt8m/3/frame/test/test*.tfrecord",
"input_data_pattern", "/Volumes/HDD/develop/yt8m/3/frame/eval/eval*.tfrecord",
"File glob defining the evaluation dataset in tensorflow.SequenceExample "
"format. The SequenceExamples are expected to have an 'rgb' byte array "
"sequence feature as well as a 'labels' int64 context feature.")
......
......@@ -7,14 +7,20 @@ import src.pb_util as pbutil
import src.video_recommender as recommender
import src.video_util as videoutil
# Define model paths.
# Old model
MODEL_PATH = "./model/inference_model/segment_inference_model"
TAG_VECTOR_MODEL_PATH = "./model/tag_vectors.model"
VIDEO_VECTOR_MODEL_PATH = "./model/video_vectors.model"
VIDEO_TAGS_PATH = "./statics/kaggle_solution_40k.csv"
# New model
# MODEL_PATH = "./new_model/inference_model/segment_inference_model"
# TAG_VECTOR_MODEL_PATH = "./new_model/tag_vectors.model"
# VIDEO_VECTOR_MODEL_PATH = "./new_model/video_vectors.model"
# VIDEO_TAGS_PATH = "./statics/new_kaggle_solution_40k.csv"
# Define static file paths.
SEGMENT_LABEL_PATH = "./statics/segment_label_ids.csv"
VIDEO_TAGS_PATH = "./statics/kaggle_solution_40k.csv"
VOCAB_PATH = "./statics/vocabulary.csv"
# Define parameters.
......
This file is too large to display.
This file is too large to display.
......@@ -4,7 +4,7 @@ import pandas as pd
# Load files.
nltk.download('stopwords')
vocab = pd.read_csv('../vocabulary.csv')
vocab = pd.read_csv('./new_statics/vocabulary.csv')
# Lower corpus and Remove () from name.
vocab['WikiDescription'] = vocab['WikiDescription'].str.lower().str.replace('[^a-zA-Z0-9]', ' ')
......
......@@ -8,7 +8,7 @@ def recommend_videos(tags, tag_model_path, video_model_path, top_k):
video_vector = np.zeros(100)
for (tag, weight) in tags:
if tag in tag_vectors.index_to_key:
if tag in tag_vectors.vocab:
video_vector = video_vector + (tag_vectors[tag] * float(weight))
else:
# Pass if tag is unknown
......
......@@ -5,16 +5,12 @@ from gensim.models import Word2Vec
BATCH_SIZE = 1000
def vectorization_video():
print('[0.1 0.2]')
if __name__ == '__main__':
tag_vectors = Word2Vec.load("tag_vectors.model").wv
tag_vectors = Word2Vec.load("new_model/tag_vectors.model").wv
video_vectors = Word2Vec().wv # Empty model
# Load video recommendation tags.
video_tags = pd.read_csv('statics/kaggle_solution_40k.csv')
video_tags = pd.read_csv('statics/new_kaggle_solution_40k.csv')
# Define batch variables.
batch_video_ids = []
......