Showing
4 changed files
with
119 additions
and
6 deletions
JPype1-0.7.0-cp38-cp38-win_amd64.whl
0 → 100644
No preview for this file type
1 | import downloader | 1 | import downloader |
2 | from time import sleep | 2 | from time import sleep |
3 | +from konlpy.tag import Twitter | ||
4 | +from collections import Counter | ||
5 | +import pytagcloud | ||
6 | +import operator | ||
7 | +def get_tags (Comment_List) : | ||
8 | + | ||
9 | + okja = [] | ||
10 | + for temp in Comment_List : | ||
11 | + okja.append(temp['text']) | ||
12 | + twitter = Twitter() | ||
13 | + sentence_tag =[] | ||
14 | + for sentence in okja: | ||
15 | + morph = twitter.pos(sentence) | ||
16 | + sentence_tag.append(morph) | ||
17 | + print(morph) | ||
18 | + print('-'*30) | ||
19 | + print(sentence_tag) | ||
20 | + print(len(sentence_tag)) | ||
21 | + print('\n'*3) | ||
22 | + | ||
23 | + noun_adj_list = [] | ||
24 | + for sentence1 in sentence_tag: | ||
25 | + for word,tag in sentence1: | ||
26 | + if len(word) >=2 and tag == 'Noun': | ||
27 | + noun_adj_list.append(word) | ||
28 | + counts = Counter(noun_adj_list) | ||
29 | + print(' 가장 많이 등장한 10개의 키워드. \n') | ||
30 | + print(counts.most_common(10)) | ||
31 | + tags2 = counts.most_common(10) | ||
32 | + taglist = pytagcloud.make_tags(tags2,maxsize=80) | ||
33 | + pytagcloud.create_tag_image(taglist,'wordcloud.jpg',size =(900,600),fontname ='Nanum Gothic', rectangular = False) | ||
3 | 34 | ||
4 | def print_result(Comment_List) : | 35 | def print_result(Comment_List) : |
5 | for var in Comment_List : | 36 | for var in Comment_List : |
... | @@ -23,6 +54,66 @@ def search_by_keyword(Comment_List,keyword) : | ... | @@ -23,6 +54,66 @@ def search_by_keyword(Comment_List,keyword) : |
23 | result_List.append(var) | 54 | result_List.append(var) |
24 | 55 | ||
25 | return result_List | 56 | return result_List |
57 | +def search_by_time(Comment_List,Time_input) : | ||
58 | + result_List = [] | ||
59 | + for var in Comment_List : | ||
60 | + if(var['time'] == Time_input) : | ||
61 | + result_List.append(var) | ||
62 | + return result_List | ||
63 | + | ||
64 | +def make_time_chart (Comment_List) : | ||
65 | + result_List = [] | ||
66 | + save_List = [] | ||
67 | + day_dict = {} | ||
68 | + month_dict = {} | ||
69 | + year_dict = {} | ||
70 | + hour_dict = {} | ||
71 | + minute_dict = {} | ||
72 | + week_dict = {} | ||
73 | + for var in Comment_List : | ||
74 | + result_List.append(var['time']) | ||
75 | + for i in range(len(result_List)) : | ||
76 | + print(result_List[i] + ' ') | ||
77 | + print('\n\n\n\n') | ||
78 | + temp_List = list(set(result_List)) | ||
79 | + for i in range(len(temp_List)) : | ||
80 | + print(temp_List[i] + ' ') | ||
81 | + print('\n\n\n\n') | ||
82 | + for i in range (len(temp_List)) : | ||
83 | + result_dict = {} | ||
84 | + a = result_List.count(temp_List[i]) | ||
85 | + result_dict[temp_List[i]] = a | ||
86 | + save_List.append(result_dict) | ||
87 | + | ||
88 | + for i in range (len(save_List)): | ||
89 | + num = '' | ||
90 | + data = 0 | ||
91 | + for j in save_List[i] : | ||
92 | + num = j | ||
93 | + for k in save_List[i].values() : | ||
94 | + data = k | ||
95 | + if num.find('개월') >= 0 : | ||
96 | + month_dict[num] = k | ||
97 | + elif num.find('일') >= 0 : | ||
98 | + day_dict[num] = k | ||
99 | + elif num.find('년') >= 0 : | ||
100 | + year_dict[num] = k | ||
101 | + elif num.find('시간') >= 0 : | ||
102 | + hour_dict[num] = k | ||
103 | + elif num.find('주') >= 0 : | ||
104 | + week_dict[num] = k | ||
105 | + elif num.find('분') >= 0 : | ||
106 | + minute_dict[num] = k | ||
107 | + year_data = sorted(year_dict.items(), key=operator.itemgetter(0)) | ||
108 | + month_data = sorted(month_dict.items(), key=operator.itemgetter(0)) | ||
109 | + week_data = sorted(week_dict.items(), key=operator.itemgetter(0)) | ||
110 | + day_data = sorted(day_dict.items(), key=operator.itemgetter(0)) | ||
111 | + hour_data = sorted(hour_dict.items(), key=operator.itemgetter(0)) | ||
112 | + minute_data = sorted(minute_dict.items(), key=operator.itemgetter(0)) | ||
113 | + print(month_data) | ||
114 | + print(week_data) | ||
115 | + print(day_data) | ||
116 | + | ||
26 | def call_main (): | 117 | def call_main (): |
27 | print(' Comment Thread 생성중 \n') | 118 | print(' Comment Thread 생성중 \n') |
28 | 119 | ||
... | @@ -35,10 +126,14 @@ def call_main (): | ... | @@ -35,10 +126,14 @@ def call_main (): |
35 | print(' **************************************************************') | 126 | print(' **************************************************************') |
36 | print(' **************************************************************') | 127 | print(' **************************************************************') |
37 | a = downloader.main() | 128 | a = downloader.main() |
38 | - author_results = search_by_author(a,'광고제거기') | 129 | + |
39 | - text_resutls = search_by_keyword(a,'지현') | ||
40 | - print_result(author_results) | ||
41 | - print_result(text_resutls) | ||
42 | return a | 130 | return a |
43 | 131 | ||
44 | -CommentList = call_main() | 132 | +if __name__ == "__main__": |
133 | + CommentList = call_main() | ||
134 | + make_time_chart(CommentList) | ||
135 | + ##author_results = search_by_author(CommentList,'광고제거기') | ||
136 | + ##text_resutls = search_by_keyword(CommentList,'지현') | ||
137 | + ##get_tags(CommentList) | ||
138 | + ##print_result(author_results) | ||
139 | + ##print_result(text_resutls) | ... | ... |
... | @@ -34,3 +34,15 @@ Youtube 3차 수정 사항 | ... | @@ -34,3 +34,15 @@ Youtube 3차 수정 사항 |
34 | 34 | ||
35 | 1. konlpy (http://konlpy.org/ko/latest/)를 통하여 명사 추출 후 keyword 분석하기 | 35 | 1. konlpy (http://konlpy.org/ko/latest/)를 통하여 명사 추출 후 keyword 분석하기 |
36 | 2. 시간대를 추출하여 시간대 별로 Comment 정리하기 | 36 | 2. 시간대를 추출하여 시간대 별로 Comment 정리하기 |
37 | +----------------------------------------------------- | ||
38 | +4차 개발사항 | ||
39 | + | ||
40 | +1. konlpy를 이용하여 keyword 분석 후 가장 많이 등장한 키워드 리스트 출력 | ||
41 | +2. 1번 기능을 사용하여 wordcloud 구성 | ||
42 | +3. 시간대를 이용하여 검색할 수 있는 기능 구현 | ||
43 | +4. 시간대 별로 sort된 리스트를 가질 수 있도록 구현 | ||
44 | +----------------------------------------------------- | ||
45 | +추가 구현 사항 | ||
46 | + | ||
47 | +1. 시간대 별로 sort된 리스트를 matplotlib python을 이용하여 차트화 시키기 | ||
48 | +2. 기능 별로 접근할 수 있도록 정리할 것 | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
-
Please register or login to post a comment