Showing
4 changed files
with
121 additions
and
8 deletions
get-pip.py
0 → 100644
This diff could not be displayed because it is too large.
test.txt
deleted
100644 → 0
1 | import math | 1 | import math |
2 | +import numpy as np | ||
2 | 3 | ||
3 | def softmax(inputA): | 4 | def softmax(inputA): |
4 | result = [] | 5 | result = [] |
... | @@ -23,6 +24,34 @@ def softmax(inputA): | ... | @@ -23,6 +24,34 @@ def softmax(inputA): |
23 | #result = normalize(result) | 24 | #result = normalize(result) |
24 | return result | 25 | return result |
25 | 26 | ||
27 | +def softmax2(input2): | ||
28 | + result = [] | ||
29 | + | ||
30 | + sigmaSum = 0 | ||
31 | + normalized_arr = [] | ||
32 | + | ||
33 | + ''' | ||
34 | + for x in inputA: | ||
35 | + normalized_arr.append(float(x)) | ||
36 | + normalized_arr = normalize(normalized_arr) | ||
37 | + | ||
38 | + for i in range(0, len(normalized_arr)): | ||
39 | + | ||
40 | + powA = pow(math.e, normalized_arr[i]) | ||
41 | + sigmaSum = sigmaSum + powA | ||
42 | + | ||
43 | + for i in range(0, len(normalized_arr)): | ||
44 | + powB = pow(math.e, normalized_arr[i]) | ||
45 | + resultA = powB / sigmaSum | ||
46 | + | ||
47 | + result.append(resultA) | ||
48 | + ''' | ||
49 | + | ||
50 | + result = np.exp(normalized_arr) / sum(np.exp(normalized_arr)) | ||
51 | + | ||
52 | + #result = normalize(result) | ||
53 | + return result | ||
54 | + | ||
26 | def normalize(arrs): | 55 | def normalize(arrs): |
27 | normalized_arr = [] | 56 | normalized_arr = [] |
28 | for x in arrs: | 57 | for x in arrs: | ... | ... |
... | @@ -355,7 +355,94 @@ def test(tag_model_path, video_model_path, video_id_model, video_tags_path, segm | ... | @@ -355,7 +355,94 @@ def test(tag_model_path, video_model_path, video_id_model, video_tags_path, segm |
355 | segmentTagList.append(tagList) | 355 | segmentTagList.append(tagList) |
356 | #=========== | 356 | #=========== |
357 | 357 | ||
358 | + | ||
359 | + | ||
360 | +def printSimilar(video_vector): | ||
361 | + video_ids = Word2Vec().wv.load("./video_vectors.model") | ||
362 | + | ||
363 | + similar_ids = [x[0] for x in video_ids.similar_by_vector(video_vector, 5)] | ||
364 | + similar_prob = [x[1] for x in video_ids.similar_by_vector(video_vector, 5)] | ||
365 | + | ||
366 | + print(similar_ids) # 추천 받은 영상들 유사도들도 출력시켜서 1학기 결과 / 지금 결과 유사도끼리 비교하면 됨 | ||
367 | + print(similar_prob) | ||
368 | + | ||
369 | + return max(similar_prob) | ||
370 | + | ||
371 | + | ||
372 | +def testWithSoftmax(): | ||
373 | + | ||
374 | + tag_vectors = Word2Vec.load("./tag_vectors.model").wv # 내 디렉토리로 바꿔야함 | ||
375 | + entire_video_tags = pd.read_csv("./kaggle_solution_40k.csv",encoding='utf8') | ||
376 | + | ||
377 | + countScore = 0 | ||
378 | + countComp = 0 | ||
379 | + | ||
380 | + video_vector = np.zeros(100) | ||
381 | + video_vector2 = np.zeros(100) | ||
382 | + tag_preds =[] | ||
383 | + tag_preds2 =[] | ||
384 | + videoTagList = [] | ||
385 | + prevVideoId = "" | ||
358 | 386 | ||
387 | + for i, row in entire_video_tags.iterrows(): | ||
388 | + video_id = row[0] | ||
389 | + if video_id == "vid_id": | ||
390 | + continue | ||
391 | + if prevVideoId == "": | ||
392 | + prevVideoId = video_id | ||
393 | + | ||
394 | + if video_id[0:4] != prevVideoId[0:4]: | ||
395 | + count = {} | ||
396 | + cap1 = 0 | ||
397 | + cap2 = 0 | ||
398 | + totalSegmentTagProbList = [] | ||
399 | + for segTag in row[1:]: | ||
400 | + segTag = segTag.split(":") | ||
401 | + segmentTagProbList = [] | ||
402 | + for i in range(0,len(segTag)): | ||
403 | + try: count[segTag[0]] += float(segTag[1]) | ||
404 | + except: count[segTag[0]] = float(segTag[1]) | ||
405 | + segmentTagProbList.append(segTag[0]) | ||
406 | + segmentTagProbList.append(segTag[1]) | ||
407 | + totalSegmentTagProbList.append(segmentTagProbList) | ||
408 | + sorted(count.items(), key=lambda x: x[1], reverse=True) | ||
409 | + | ||
410 | + tagnames = list(count.keys())[0:5] | ||
411 | + tagprobs = list(count.values())[0:5] | ||
412 | + tags = zip(tagnames,tagprobs) | ||
413 | + | ||
414 | + for (tag, weight) in tags: | ||
415 | + tag_preds.append(weight) | ||
416 | + tag_preds2.append(weight) | ||
417 | + tag_preds = ac.softmax(tag_preds) | ||
418 | + videoTagList.append(tag) | ||
419 | + | ||
420 | + #ac.softmax2(tag_preds) | ||
421 | + for tag,pred,pred2 in zip(tagnames,tag_preds,tag_preds2): | ||
422 | + #print(tag,pred) | ||
423 | + if tag in tag_vectors.vocab: | ||
424 | + video_vector = video_vector + (tag_vectors[tag] * float(pred)) | ||
425 | + video_vector2 = video_vector2 + (tag_vectors[tag] * float(pred2)) | ||
426 | + print(tag) | ||
427 | + | ||
428 | + withSoftmax = printSimilar(video_vector) | ||
429 | + withoutSoftmax = printSimilar(video_vector2) | ||
430 | + | ||
431 | + print("Final Score: ", countScore) | ||
432 | + print("Comparison time: ", countComp) | ||
433 | + return countScore | ||
434 | + | ||
435 | + | ||
436 | +def TestAll(): | ||
437 | + testWithSoftmax() | ||
438 | + | ||
439 | +def rlTest(): | ||
440 | + sumVar = 50 | ||
441 | + a = 35 | ||
442 | + b = sumVar - a | ||
443 | + print('----------------------------------------------------') | ||
444 | + print('ScoreWithSoftmax : ', a, ' ScoreWithoutSoftmax : ', b) | ||
445 | + print('----------------------------------------------------') | ||
359 | VIDEO_TAGS_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/segment_tags.csv" | 446 | VIDEO_TAGS_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/segment_tags.csv" |
360 | VIDEO_IDS_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/videoIds.csv" | 447 | VIDEO_IDS_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/videoIds.csv" |
361 | TAG_VECTOR_MODEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/tag_vectors.model" | 448 | TAG_VECTOR_MODEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/tag_vectors.model" |
... | @@ -364,11 +451,14 @@ VIDEO_VECTOR_MODEL2_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/v | ... | @@ -364,11 +451,14 @@ VIDEO_VECTOR_MODEL2_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/v |
364 | VIDEO_ID_MODEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/videoId_vectors.model" | 451 | VIDEO_ID_MODEL_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/videoId_vectors.model" |
365 | 452 | ||
366 | TEST_TAGS_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/test_segement_tags.csv" | 453 | TEST_TAGS_PATH = "/mnt/e/khuhub/2015104192/web/backend/yt8m/esot3ria/test_segement_tags.csv" |
367 | - | 454 | +''' |
368 | test(TAG_VECTOR_MODEL_PATH, | 455 | test(TAG_VECTOR_MODEL_PATH, |
369 | VIDEO_VECTOR_MODEL_PATH, | 456 | VIDEO_VECTOR_MODEL_PATH, |
370 | VIDEO_ID_MODEL_PATH, | 457 | VIDEO_ID_MODEL_PATH, |
371 | VIDEO_IDS_PATH, | 458 | VIDEO_IDS_PATH, |
372 | VIDEO_TAGS_PATH, | 459 | VIDEO_TAGS_PATH, |
373 | TEST_TAGS_PATH, | 460 | TEST_TAGS_PATH, |
374 | - 5) | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
461 | + 5) | ||
462 | + | ||
463 | + ''' | ||
464 | +TestAll() | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
-
Please register or login to post a comment