김건

4차 구현사항 upload

No preview for this file type
1 import downloader 1 import downloader
2 from time import sleep 2 from time import sleep
3 +from konlpy.tag import Twitter
4 +from collections import Counter
5 +import pytagcloud
6 +import operator
7 +def get_tags (Comment_List) :
8 +
9 + okja = []
10 + for temp in Comment_List :
11 + okja.append(temp['text'])
12 + twitter = Twitter()
13 + sentence_tag =[]
14 + for sentence in okja:
15 + morph = twitter.pos(sentence)
16 + sentence_tag.append(morph)
17 + print(morph)
18 + print('-'*30)
19 + print(sentence_tag)
20 + print(len(sentence_tag))
21 + print('\n'*3)
22 +
23 + noun_adj_list = []
24 + for sentence1 in sentence_tag:
25 + for word,tag in sentence1:
26 + if len(word) >=2 and tag == 'Noun':
27 + noun_adj_list.append(word)
28 + counts = Counter(noun_adj_list)
29 + print(' 가장 많이 등장한 10개의 키워드. \n')
30 + print(counts.most_common(10))
31 + tags2 = counts.most_common(10)
32 + taglist = pytagcloud.make_tags(tags2,maxsize=80)
33 + pytagcloud.create_tag_image(taglist,'wordcloud.jpg',size =(900,600),fontname ='Nanum Gothic', rectangular = False)
3 34
4 def print_result(Comment_List) : 35 def print_result(Comment_List) :
5 for var in Comment_List : 36 for var in Comment_List :
...@@ -23,6 +54,66 @@ def search_by_keyword(Comment_List,keyword) : ...@@ -23,6 +54,66 @@ def search_by_keyword(Comment_List,keyword) :
23 result_List.append(var) 54 result_List.append(var)
24 55
25 return result_List 56 return result_List
57 +def search_by_time(Comment_List,Time_input) :
58 + result_List = []
59 + for var in Comment_List :
60 + if(var['time'] == Time_input) :
61 + result_List.append(var)
62 + return result_List
63 +
64 +def make_time_chart (Comment_List) :
65 + result_List = []
66 + save_List = []
67 + day_dict = {}
68 + month_dict = {}
69 + year_dict = {}
70 + hour_dict = {}
71 + minute_dict = {}
72 + week_dict = {}
73 + for var in Comment_List :
74 + result_List.append(var['time'])
75 + for i in range(len(result_List)) :
76 + print(result_List[i] + ' ')
77 + print('\n\n\n\n')
78 + temp_List = list(set(result_List))
79 + for i in range(len(temp_List)) :
80 + print(temp_List[i] + ' ')
81 + print('\n\n\n\n')
82 + for i in range (len(temp_List)) :
83 + result_dict = {}
84 + a = result_List.count(temp_List[i])
85 + result_dict[temp_List[i]] = a
86 + save_List.append(result_dict)
87 +
88 + for i in range (len(save_List)):
89 + num = ''
90 + data = 0
91 + for j in save_List[i] :
92 + num = j
93 + for k in save_List[i].values() :
94 + data = k
95 + if num.find('개월') >= 0 :
96 + month_dict[num] = k
97 + elif num.find('일') >= 0 :
98 + day_dict[num] = k
99 + elif num.find('년') >= 0 :
100 + year_dict[num] = k
101 + elif num.find('시간') >= 0 :
102 + hour_dict[num] = k
103 + elif num.find('주') >= 0 :
104 + week_dict[num] = k
105 + elif num.find('분') >= 0 :
106 + minute_dict[num] = k
107 + year_data = sorted(year_dict.items(), key=operator.itemgetter(0))
108 + month_data = sorted(month_dict.items(), key=operator.itemgetter(0))
109 + week_data = sorted(week_dict.items(), key=operator.itemgetter(0))
110 + day_data = sorted(day_dict.items(), key=operator.itemgetter(0))
111 + hour_data = sorted(hour_dict.items(), key=operator.itemgetter(0))
112 + minute_data = sorted(minute_dict.items(), key=operator.itemgetter(0))
113 + print(month_data)
114 + print(week_data)
115 + print(day_data)
116 +
26 def call_main (): 117 def call_main ():
27 print(' Comment Thread 생성중 \n') 118 print(' Comment Thread 생성중 \n')
28 119
...@@ -35,10 +126,14 @@ def call_main (): ...@@ -35,10 +126,14 @@ def call_main ():
35 print(' **************************************************************') 126 print(' **************************************************************')
36 print(' **************************************************************') 127 print(' **************************************************************')
37 a = downloader.main() 128 a = downloader.main()
38 - author_results = search_by_author(a,'광고제거기') 129 +
39 - text_resutls = search_by_keyword(a,'지현')
40 - print_result(author_results)
41 - print_result(text_resutls)
42 return a 130 return a
43 131
44 -CommentList = call_main() 132 +if __name__ == "__main__":
133 + CommentList = call_main()
134 + make_time_chart(CommentList)
135 + ##author_results = search_by_author(CommentList,'광고제거기')
136 + ##text_resutls = search_by_keyword(CommentList,'지현')
137 + ##get_tags(CommentList)
138 + ##print_result(author_results)
139 + ##print_result(text_resutls)
......
...@@ -2,3 +2,9 @@ requests ...@@ -2,3 +2,9 @@ requests
2 beautifulsoup4 2 beautifulsoup4
3 lxml 3 lxml
4 cssselect 4 cssselect
5 +### ũѸ
6 +pygame
7 +pytagcloud
8 +### wordcloud
9 +Jpye1
10 +### Ű м
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -34,3 +34,15 @@ Youtube 3차 수정 사항 ...@@ -34,3 +34,15 @@ Youtube 3차 수정 사항
34 34
35 1. konlpy (http://konlpy.org/ko/latest/)를 통하여 명사 추출 후 keyword 분석하기 35 1. konlpy (http://konlpy.org/ko/latest/)를 통하여 명사 추출 후 keyword 분석하기
36 2. 시간대를 추출하여 시간대 별로 Comment 정리하기 36 2. 시간대를 추출하여 시간대 별로 Comment 정리하기
37 +-----------------------------------------------------
38 +4차 개발사항
39 +
40 +1. konlpy를 이용하여 keyword 분석 후 가장 많이 등장한 키워드 리스트 출력
41 +2. 1번 기능을 사용하여 wordcloud 구성
42 +3. 시간대를 이용하여 검색할 수 있는 기능 구현
43 +4. 시간대 별로 sort된 리스트를 가질 수 있도록 구현
44 +-----------------------------------------------------
45 +추가 구현 사항
46 +
47 +1. 시간대 별로 sort된 리스트를 matplotlib python을 이용하여 차트화 시키기
48 +2. 기능 별로 접근할 수 있도록 정리할 것
...\ No newline at end of file ...\ No newline at end of file
......