Merge branch 'Youtube' into 'master'
Youtube Crawl Youtube Crawl 결과 및 기능 구현 See merge request !1
Showing
9 changed files
with
573 additions
and
24 deletions
JPype1-0.7.0-cp38-cp38-win_amd64.whl
0 → 100644
No preview for this file type
Youtube/.gitignore
0 → 100644
1 | +# Byte-compiled / optimized / DLL files | ||
2 | +__pycache__/ | ||
3 | +*.py[cod] | ||
4 | + | ||
5 | +# C extensions | ||
6 | +*.so | ||
7 | + | ||
8 | +# Distribution / packaging | ||
9 | +.Python | ||
10 | +env/ | ||
11 | +build/ | ||
12 | +develop-eggs/ | ||
13 | +dist/ | ||
14 | +downloads/ | ||
15 | +eggs/ | ||
16 | +.eggs/ | ||
17 | +lib/ | ||
18 | +lib64/ | ||
19 | +parts/ | ||
20 | +sdist/ | ||
21 | +var/ | ||
22 | +*.egg-info/ | ||
23 | +.installed.cfg | ||
24 | +*.egg | ||
25 | + | ||
26 | +# PyInstaller | ||
27 | +# Usually these files are written by a python script from a template | ||
28 | +# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
29 | +*.manifest | ||
30 | +*.spec | ||
31 | + | ||
32 | +# Installer logs | ||
33 | +pip-log.txt | ||
34 | +pip-delete-this-directory.txt | ||
35 | + | ||
36 | +# Unit test / coverage reports | ||
37 | +htmlcov/ | ||
38 | +.tox/ | ||
39 | +.coverage | ||
40 | +.coverage.* | ||
41 | +.cache | ||
42 | +nosetests.xml | ||
43 | +coverage.xml | ||
44 | +*,cover | ||
45 | + | ||
46 | +# Translations | ||
47 | +*.mo | ||
48 | +*.pot | ||
49 | + | ||
50 | +# Django stuff: | ||
51 | +*.log | ||
52 | + | ||
53 | +# Sphinx documentation | ||
54 | +docs/_build/ | ||
55 | + | ||
56 | +# PyBuilder | ||
57 | +target/ |
Youtube/LICENSE
0 → 100644
1 | +The MIT License (MIT) | ||
2 | + | ||
3 | +Copyright (c) 2015 Egbert Bouman | ||
4 | + | ||
5 | +Permission is hereby granted, free of charge, to any person obtaining a copy | ||
6 | +of this software and associated documentation files (the "Software"), to deal | ||
7 | +in the Software without restriction, including without limitation the rights | ||
8 | +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
9 | +copies of the Software, and to permit persons to whom the Software is | ||
10 | +furnished to do so, subject to the following conditions: | ||
11 | + | ||
12 | +The above copyright notice and this permission notice shall be included in all | ||
13 | +copies or substantial portions of the Software. | ||
14 | + | ||
15 | +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
16 | +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
17 | +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
18 | +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
19 | +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
20 | +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
21 | +SOFTWARE. | ||
22 | + |
Youtube/README.md
0 → 100644
1 | +# youtube-comment-downloader | ||
2 | +Simple script for downloading Youtube comments without using the Youtube API. The output is in line delimited JSON. | ||
3 | + | ||
4 | +### Dependencies | ||
5 | +* Python 2.7+ | ||
6 | +* requests | ||
7 | +* lxml | ||
8 | +* cssselect | ||
9 | + | ||
10 | +The python packages can be installed with | ||
11 | + | ||
12 | + pip install requests | ||
13 | + pip install lxml | ||
14 | + pip install cssselect | ||
15 | + | ||
16 | +### Usage | ||
17 | +``` | ||
18 | +usage: downloader.py [--help] [--youtubeid YOUTUBEID] [--output OUTPUT] | ||
19 | + | ||
20 | +Download Youtube comments without using the Youtube API | ||
21 | + | ||
22 | +optional arguments: | ||
23 | + --help, -h Show this help message and exit | ||
24 | + --youtubeid YOUTUBEID, -y YOUTUBEID | ||
25 | + ID of Youtube video for which to download the comments | ||
26 | + --output OUTPUT, -o OUTPUT | ||
27 | + Output filename (output format is line delimited JSON) | ||
28 | +``` |
Youtube/downloader.py
0 → 100644
1 | +#!/usr/bin/env python | ||
2 | + | ||
3 | +from __future__ import print_function | ||
4 | +import sys | ||
5 | +import os | ||
6 | +import time | ||
7 | +import json | ||
8 | +import requests | ||
9 | +import argparse | ||
10 | +import lxml.html | ||
11 | +import io | ||
12 | +from urllib.parse import urlparse, parse_qs | ||
13 | +from lxml.cssselect import CSSSelector | ||
14 | + | ||
15 | +YOUTUBE_COMMENTS_URL = 'https://www.youtube.com/all_comments?v={youtube_id}' | ||
16 | +YOUTUBE_COMMENTS_AJAX_URL = 'https://www.youtube.com/comment_ajax' | ||
17 | + | ||
18 | +USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36' | ||
19 | + | ||
20 | + | ||
21 | +def find_value(html, key, num_chars=2): | ||
22 | + pos_begin = html.find(key) + len(key) + num_chars | ||
23 | + pos_end = html.find('"', pos_begin) | ||
24 | + return html[pos_begin: pos_end] | ||
25 | + | ||
26 | + | ||
27 | +def extract_comments(html): | ||
28 | + tree = lxml.html.fromstring(html) | ||
29 | + item_sel = CSSSelector('.comment-item') | ||
30 | + text_sel = CSSSelector('.comment-text-content') | ||
31 | + time_sel = CSSSelector('.time') | ||
32 | + author_sel = CSSSelector('.user-name') | ||
33 | + | ||
34 | + for item in item_sel(tree): | ||
35 | + yield {'cid': item.get('data-cid'), | ||
36 | + 'text': text_sel(item)[0].text_content(), | ||
37 | + 'time': time_sel(item)[0].text_content().strip(), | ||
38 | + 'author': author_sel(item)[0].text_content()} | ||
39 | + | ||
40 | + | ||
41 | +def extract_reply_cids(html): | ||
42 | + tree = lxml.html.fromstring(html) | ||
43 | + sel = CSSSelector('.comment-replies-header > .load-comments') | ||
44 | + return [i.get('data-cid') for i in sel(tree)] | ||
45 | + | ||
46 | + | ||
47 | +def ajax_request(session, url, params, data, retries=10, sleep=20): | ||
48 | + for _ in range(retries): | ||
49 | + response = session.post(url, params=params, data=data) | ||
50 | + if response.status_code == 200: | ||
51 | + response_dict = json.loads(response.text) | ||
52 | + return response_dict.get('page_token', None), response_dict['html_content'] | ||
53 | + else: | ||
54 | + time.sleep(sleep) | ||
55 | + | ||
56 | + | ||
57 | +def download_comments(youtube_id, sleep=1): | ||
58 | + session = requests.Session() | ||
59 | + session.headers['User-Agent'] = USER_AGENT | ||
60 | + # Get Youtube page with initial comments | ||
61 | + response = session.get(YOUTUBE_COMMENTS_URL.format(youtube_id=youtube_id)) | ||
62 | + html = response.text | ||
63 | + reply_cids = extract_reply_cids(html) | ||
64 | + | ||
65 | + ret_cids = [] | ||
66 | + for comment in extract_comments(html): | ||
67 | + ret_cids.append(comment['cid']) | ||
68 | + yield comment | ||
69 | + page_token = find_value(html, 'data-token') | ||
70 | + session_token = find_value(html, 'XSRF_TOKEN', 4) | ||
71 | + first_iteration = True | ||
72 | + | ||
73 | + # Get remaining comments (the same as pressing the 'Show more' button) | ||
74 | + while page_token: | ||
75 | + data = {'video_id': youtube_id, | ||
76 | + 'session_token': session_token} | ||
77 | + | ||
78 | + params = {'action_load_comments': 1, | ||
79 | + 'order_by_time': True, | ||
80 | + 'filter': youtube_id} | ||
81 | + | ||
82 | + if first_iteration: | ||
83 | + params['order_menu'] = True | ||
84 | + else: | ||
85 | + data['page_token'] = page_token | ||
86 | + | ||
87 | + response = ajax_request(session, YOUTUBE_COMMENTS_AJAX_URL, params, data) | ||
88 | + if not response: | ||
89 | + break | ||
90 | + | ||
91 | + page_token, html = response | ||
92 | + | ||
93 | + reply_cids += extract_reply_cids(html) | ||
94 | + for comment in extract_comments(html): | ||
95 | + if comment['cid'] not in ret_cids: | ||
96 | + ret_cids.append(comment['cid']) | ||
97 | + yield comment | ||
98 | + | ||
99 | + first_iteration = False | ||
100 | + time.sleep(sleep) | ||
101 | + # Get replies (the same as pressing the 'View all X replies' link) | ||
102 | + for cid in reply_cids: | ||
103 | + data = {'comment_id': cid, | ||
104 | + 'video_id': youtube_id, | ||
105 | + 'can_reply': 1, | ||
106 | + 'session_token': session_token} | ||
107 | + params = {'action_load_replies': 1, | ||
108 | + 'order_by_time': True, | ||
109 | + 'filter': youtube_id, | ||
110 | + 'tab': 'inbox'} | ||
111 | + response = ajax_request(session, YOUTUBE_COMMENTS_AJAX_URL, params, data) | ||
112 | + if not response: | ||
113 | + break | ||
114 | + | ||
115 | + _, html = response | ||
116 | + | ||
117 | + for comment in extract_comments(html): | ||
118 | + if comment['cid'] not in ret_cids: | ||
119 | + ret_cids.append(comment['cid']) | ||
120 | + yield comment | ||
121 | + time.sleep(sleep) | ||
122 | + | ||
123 | +## input video 값 parsing | ||
124 | +def video_id(value): | ||
125 | + query = urlparse(value) | ||
126 | + if query.hostname == 'youtu.be': | ||
127 | + return query.path[1:] | ||
128 | + if query.hostname in ('www.youtube.com', 'youtube.com'): | ||
129 | + if query.path == '/watch': | ||
130 | + p = parse_qs(query.query) | ||
131 | + return p['v'][0] | ||
132 | + if query.path[:7] == '/embed/': | ||
133 | + return query.path.split('/')[2] | ||
134 | + if query.path[:3] == '/v/': | ||
135 | + return query.path.split('/')[2] | ||
136 | + # fail? | ||
137 | + return None | ||
138 | + | ||
139 | + | ||
140 | +def main(): | ||
141 | + | ||
142 | + #parser = argparse.ArgumentParser(add_help=False, description=('Download Youtube comments without using the Youtube API')) | ||
143 | + #parser.add_argument('--help', '-h', action='help', default=argparse.SUPPRESS, help='Show this help message and exit') | ||
144 | + #parser.add_argument('--youtubeid', '-y', help='ID of Youtube video for which to download the comments') | ||
145 | + #parser.add_argument('--output', '-o', help='Output filename (output format is line delimited JSON)') | ||
146 | + #parser.add_argument('--limit', '-l', type=int, help='Limit the number of comments') | ||
147 | + Youtube_id1 = input('Youtube_ID 입력 :') | ||
148 | + ## Cutting Link를 받고 id만 딸 수 있도록 | ||
149 | + Youtube_id1 = video_id(Youtube_id1) | ||
150 | + youtube_id = Youtube_id1 | ||
151 | + try: | ||
152 | + # args = parser.parse_args(argv) | ||
153 | + | ||
154 | + #youtube_id = args.youtubeid | ||
155 | + #output = args.output | ||
156 | + #limit = args.limit | ||
157 | + result_List = [] | ||
158 | + ## input 값을 받고 값에 할당 | ||
159 | + | ||
160 | + ## Limit에 빈 값이 들어갈 경우 Default 값으로 100을 넣게 하였음 | ||
161 | + if not youtube_id : | ||
162 | + #parser.print_usage() | ||
163 | + #raise ValueError('you need to specify a Youtube ID and an output filename') | ||
164 | + raise ValueError('올바른 입력 값을 입력하세요') | ||
165 | + | ||
166 | + print('Downloading Youtube comments for video:', youtube_id) | ||
167 | + Number = input(' 저장 - 0 저장 안함- 1 : ') | ||
168 | + if Number == '0' : | ||
169 | + Output1 = input('결과를 받을 파일 입력 :') | ||
170 | + Limit1 = input('제한 갯수 입력 : ') | ||
171 | + if Limit1 == '' : | ||
172 | + Limit1 = 100 | ||
173 | + Limit1 = int(Limit1) | ||
174 | + limit = int(Limit1) | ||
175 | + | ||
176 | + output = Output1 | ||
177 | + ##### argument로 받지 않고 input으로 받기 위한 것 | ||
178 | + with io.open(output, 'w', encoding='utf8') as fp: | ||
179 | + for comment in download_comments(youtube_id): | ||
180 | + comment_json = json.dumps(comment, ensure_ascii=False) | ||
181 | + print(comment_json.decode('utf-8') if isinstance(comment_json, bytes) else comment_json, file=fp) | ||
182 | + count += 1 | ||
183 | + sys.stdout.flush() | ||
184 | + if limit and count >= limit: | ||
185 | + print('Downloaded {} comment(s)\r'.format(count)) | ||
186 | + print('\nDone!') | ||
187 | + break | ||
188 | + | ||
189 | + else : | ||
190 | + count = 0 | ||
191 | + i = 0 | ||
192 | + limit = 40 | ||
193 | + for comment in download_comments(youtube_id): | ||
194 | + dic = {} | ||
195 | + dic['cid'] = comment['cid'] | ||
196 | + dic['text'] = comment['text'] | ||
197 | + dic['time'] = comment['time'] | ||
198 | + dic['author'] = comment['author'] | ||
199 | + result_List.append(dic) | ||
200 | + count += 1 | ||
201 | + i += 1 | ||
202 | + if limit == count : | ||
203 | + print(' Comment Thread 생성 완료') | ||
204 | + print ('\n\n\n\n\n\n\n') | ||
205 | + break | ||
206 | + return result_List | ||
207 | + #goto_Menu(result_List) | ||
208 | + | ||
209 | + | ||
210 | + | ||
211 | + except Exception as e: | ||
212 | + print('Error:', str(e)) | ||
213 | + sys.exit(1) | ||
214 | + | ||
215 | + | ||
216 | +if __name__ == "__main__": | ||
217 | + main() |
Youtube/main.py
0 → 100644
1 | +import downloader | ||
2 | +from time import sleep | ||
3 | +from konlpy.tag import Twitter | ||
4 | +from collections import Counter | ||
5 | +from matplotlib import rc | ||
6 | +import matplotlib.pyplot as plt | ||
7 | +from matplotlib import font_manager as fm | ||
8 | +import pytagcloud | ||
9 | +import operator | ||
10 | +def get_tags (Comment_List) : | ||
11 | + | ||
12 | + okja = [] | ||
13 | + for temp in Comment_List : | ||
14 | + okja.append(temp['text']) | ||
15 | + twitter = Twitter() | ||
16 | + sentence_tag =[] | ||
17 | + for sentence in okja: | ||
18 | + morph = twitter.pos(sentence) | ||
19 | + sentence_tag.append(morph) | ||
20 | + print(morph) | ||
21 | + print('-'*30) | ||
22 | + print(sentence_tag) | ||
23 | + print(len(sentence_tag)) | ||
24 | + print('\n'*3) | ||
25 | + | ||
26 | + noun_adj_list = [] | ||
27 | + for sentence1 in sentence_tag: | ||
28 | + for word,tag in sentence1: | ||
29 | + if len(word) >=2 and tag == 'Noun': | ||
30 | + noun_adj_list.append(word) | ||
31 | + counts = Counter(noun_adj_list) | ||
32 | + print(' 가장 많이 등장한 10개의 키워드. \n') | ||
33 | + print(counts.most_common(10)) | ||
34 | + tags2 = counts.most_common(10) | ||
35 | + taglist = pytagcloud.make_tags(tags2,maxsize=80) | ||
36 | + pytagcloud.create_tag_image(taglist,'wordcloud.jpg',size =(900,600),fontname ='Nanum Gothic', rectangular = False) | ||
37 | + | ||
38 | +def print_result(Comment_List) : | ||
39 | + for var in Comment_List : | ||
40 | + print(var) | ||
41 | + print('******* 검색 완료 *******') | ||
42 | + print('\n\n\n') | ||
43 | + | ||
44 | +def search_by_author(Comment_List,author_name) : | ||
45 | + result_List = [] | ||
46 | + | ||
47 | + for var in Comment_List : | ||
48 | + if (var['author'] == author_name) : | ||
49 | + result_List.append(var) | ||
50 | + | ||
51 | + return result_List | ||
52 | +def search_by_keyword(Comment_List,keyword) : | ||
53 | + result_List = [] | ||
54 | + for var in Comment_List : | ||
55 | + print(var['text']) | ||
56 | + if ( keyword in var['text']) : | ||
57 | + result_List.append(var) | ||
58 | + | ||
59 | + return result_List | ||
60 | +def search_by_time(Comment_List,Time_input) : | ||
61 | + result_List = [] | ||
62 | + for var in Comment_List : | ||
63 | + if(var['time'] == Time_input) : | ||
64 | + result_List.append(var) | ||
65 | + return result_List | ||
66 | + | ||
67 | +def make_time_chart (Comment_List) : | ||
68 | + result_List = [] | ||
69 | + save_List = [] | ||
70 | + day_dict = {} | ||
71 | + month_dict = {} | ||
72 | + year_dict = {} | ||
73 | + hour_dict = {} | ||
74 | + minute_dict = {} | ||
75 | + week_dict = {} | ||
76 | + for var in Comment_List : | ||
77 | + result_List.append(var['time']) | ||
78 | + for i in range(len(result_List)) : | ||
79 | + print(result_List[i] + ' ') | ||
80 | + print('\n\n\n\n') | ||
81 | + temp_List = list(set(result_List)) | ||
82 | + for i in range(len(temp_List)) : | ||
83 | + print(temp_List[i] + ' ') | ||
84 | + print('\n\n\n\n') | ||
85 | + for i in range (len(temp_List)) : | ||
86 | + result_dict = {} | ||
87 | + a = result_List.count(temp_List[i]) | ||
88 | + result_dict[temp_List[i]] = a | ||
89 | + save_List.append(result_dict) | ||
90 | + | ||
91 | + for i in range (len(save_List)): | ||
92 | + num = '' | ||
93 | + data = 0 | ||
94 | + for j in save_List[i] : | ||
95 | + num = j | ||
96 | + for k in save_List[i].values() : | ||
97 | + data = k | ||
98 | + if num.find('개월') >= 0 : | ||
99 | + month_dict[num] = k | ||
100 | + elif num.find('일') >= 0 : | ||
101 | + day_dict[num] = k | ||
102 | + elif num.find('년') >= 0 : | ||
103 | + year_dict[num] = k | ||
104 | + elif num.find('시간') >= 0 : | ||
105 | + hour_dict[num] = k | ||
106 | + elif num.find('주') >= 0 : | ||
107 | + week_dict[num] = k | ||
108 | + elif num.find('분') >= 0 : | ||
109 | + minute_dict[num] = k | ||
110 | + year_data = sorted(year_dict.items(), key=operator.itemgetter(0)) | ||
111 | + month_data = sorted(month_dict.items(), key=operator.itemgetter(0)) | ||
112 | + week_data = sorted(week_dict.items(), key=operator.itemgetter(0)) | ||
113 | + day_data = sorted(day_dict.items(), key=operator.itemgetter(0)) | ||
114 | + hour_data = sorted(hour_dict.items(), key=operator.itemgetter(0)) | ||
115 | + minute_data = sorted(minute_dict.items(), key=operator.itemgetter(0)) | ||
116 | + #print(month_data) | ||
117 | + #print(week_data) | ||
118 | + #print(day_data) | ||
119 | + make_chart(year_data,month_data,week_data,day_data,hour_data,minute_data) | ||
120 | + | ||
121 | +def make_chart(year_data,month_data,week_data,day_data,hour_data,minute_data) : | ||
122 | + temp_list = [year_data,month_data,week_data,day_data,hour_data,minute_data] | ||
123 | + x_list = [] | ||
124 | + y_list = [] | ||
125 | + print(temp_list) | ||
126 | + for var1 in temp_list : | ||
127 | + for var2 in var1 : | ||
128 | + if(var2[0].find('년')>=0): | ||
129 | + temp1 = var2[0][0] + 'years' | ||
130 | + temp2 = int(var2[1]) | ||
131 | + x_list.append(temp1) | ||
132 | + y_list.append(temp2) | ||
133 | + elif(var2[0].find('개월')>=0): | ||
134 | + temp1 = var2[0][0] + 'months' | ||
135 | + temp2 = int(var2[1]) | ||
136 | + x_list.append(temp1) | ||
137 | + y_list.append(temp2) | ||
138 | + elif(var2[0].find('주')>=0): | ||
139 | + temp1 = var2[0][0] + 'weeks' | ||
140 | + temp2 = int(var2[1]) | ||
141 | + x_list.append(temp1) | ||
142 | + y_list.append(temp2) | ||
143 | + elif(var2[0].find('일')>=0): | ||
144 | + temp1 = var2[0][0] + 'days' | ||
145 | + temp2 = int(var2[1]) | ||
146 | + x_list.append(temp1) | ||
147 | + y_list.append(temp2) | ||
148 | + elif(var2[0].find('시간')>=0): | ||
149 | + temp1 = var2[0][0] + 'hours' | ||
150 | + temp2 = int(var2[1]) | ||
151 | + x_list.append(temp1) | ||
152 | + y_list.append(temp2) | ||
153 | + else: | ||
154 | + temp1 = var2[0][0] + 'minutes' | ||
155 | + temp2 = int(var2[1]) | ||
156 | + x_list.append(temp1) | ||
157 | + y_list.append(temp2) | ||
158 | + print(x_list) | ||
159 | + plt.bar(x_list,y_list,width = 0.5 , color = "blue") | ||
160 | + # plt.show() -> 출력 | ||
161 | + plt.savefig('chart.png',dpi=300) | ||
162 | + # plt.savefig('chart.png', dpi=300) | ||
163 | + | ||
164 | +def call_main (): | ||
165 | + print(' Comment Thread 생성중 \n') | ||
166 | + | ||
167 | + sleep(1) | ||
168 | + print(' **************************************************************') | ||
169 | + print(' **************************************************************') | ||
170 | + print(' **************************************************************') | ||
171 | + print(' **************** 생성 완료 정보를 입력하세요. **************** ') | ||
172 | + print(' **************************************************************') | ||
173 | + print(' **************************************************************') | ||
174 | + print(' **************************************************************') | ||
175 | + a = downloader.main() | ||
176 | + | ||
177 | + return a | ||
178 | + | ||
179 | +if __name__ == "__main__": | ||
180 | + CommentList = call_main() | ||
181 | + make_time_chart(CommentList) | ||
182 | + ##author_results = search_by_author(CommentList,'광고제거기') | ||
183 | + ##text_resutls = search_by_keyword(CommentList,'지현') | ||
184 | + ##get_tags(CommentList) | ||
185 | + ##print_result(author_results) | ||
186 | + ##print_result(text_resutls) |
Youtube/requirements.txt
0 → 100644
readme.md
deleted
100644 → 0
1 | -개발할 기능 | ||
2 | -- 자신의 닉네임으로 댓글 찾기 | ||
3 | -- 타인의 닉네임으로 댓글 찾기 | ||
4 | -- 키워드를 통한 댓글 찾기 | ||
5 | -- 좋아요 높은 순서로 댓글 찾기 | ||
6 | - | ||
7 | -2019.11.01 ~ 2019.11.08 | ||
8 | -1차 구현 | ||
9 | -- 분석할 대상 결정 및 구현 방법 결정 | ||
10 | - | ||
11 | -2019.11.09 ~ 2019.11.16 | ||
12 | -2차 구현 | ||
13 | -- 실질적인 구현 | ||
14 | - | ||
15 | -2019.11.17 ~ 2019.11.23 | ||
16 | -3차 구현 | ||
17 | -- 분석한 대상들을 merge한 후 서로에 대한 피드백 받기 | ||
18 | - | ||
19 | -2019.11.24 ~ 2019.12.01 | ||
20 | -3차 구현 | ||
21 | -- node js를 통한 웹서버 구현 | ||
22 | - | ||
23 | -2019.12.02 ~ 2019.12.05 | ||
24 | -최종 점검 및 발표 준비 | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
youtube.md
0 → 100644
1 | +Youtube 3차 수정 사항 | ||
2 | +----------------------------------------------------- | ||
3 | +1차에서 추가적으로 구현 할 사항 | ||
4 | + | ||
5 | +1. 명령행 파라미터를 input 으로 넣는 함수 | ||
6 | +2. csv 파일에서 리스트를 받아오는 함수 | ||
7 | +3. 받아 온 Data를 가공 처리 하는 함수 | ||
8 | + * 가장 많이 등장한 키워드 찾는 함수 | ||
9 | + * 저자를 통해 검색하는 함수 | ||
10 | + * 내가 쓴 댓글을 확인 하는 함수 | ||
11 | + * 가장 댓글을 많이 입력한 사람을 찾는 함수 | ||
12 | +----------------------------------------------------- | ||
13 | +2차 Update 사항 | ||
14 | + | ||
15 | +1. 명령행 파라미터를 Input으로 변경하여 받도록 수정하였음 | ||
16 | +2. csv 파일으로 저장 할 것인지 여부를 묻고, 저장 하지 않는 경우 Dictionary 형태로 List에 넣도록 수정하였음 | ||
17 | +3. Test 형식으로 List에 들어간 값들이 정상적으로 출력되는지 점검하였음 | ||
18 | +----------------------------------------------------- | ||
19 | +이후 추가 구현 사항 | ||
20 | + | ||
21 | +1. Module 분리 (List 반환 모듈, Main 부분) -> 굳이 분리하지 않을 경우 | ||
22 | +추가적으로 함수를 구현해야함 | ||
23 | +2. 본격적으로 Data Set을 어떤 식으로 분리하여 제공 할지에 대한 추가적인 기능 구현 필요 | ||
24 | + | ||
25 | +----------------------------------------------------- | ||
26 | + | ||
27 | +1. 2차 개발사항에서 오류가 있던 부분을 수정하였음 | ||
28 | +2. 가져온 Comment를 가공하여 처리할 수 있도록 일부 함수 구현 | ||
29 | + (1) 키워드를 통해 검색할 수 있도록 함수 구현 | ||
30 | + (2) 작성자 이름을 통해 검색할 수 있도록 함수 구현 | ||
31 | + | ||
32 | +----------------------------------------------------- | ||
33 | +추가 구현 사항 | ||
34 | + | ||
35 | +1. konlpy (http://konlpy.org/ko/latest/)를 통하여 명사 추출 후 keyword 분석하기 | ||
36 | +2. 시간대를 추출하여 시간대 별로 Comment 정리하기 | ||
37 | +----------------------------------------------------- | ||
38 | +4차 개발사항 | ||
39 | + | ||
40 | +1. konlpy를 이용하여 keyword 분석 후 가장 많이 등장한 키워드 리스트 출력 | ||
41 | +2. 1번 기능을 사용하여 wordcloud 구성 | ||
42 | +3. 시간대를 이용하여 검색할 수 있는 기능 구현 | ||
43 | +4. 시간대 별로 sort된 리스트를 가질 수 있도록 구현 | ||
44 | +----------------------------------------------------- | ||
45 | +추가 구현 사항 | ||
46 | + | ||
47 | +1. 시간대 별로 sort된 리스트를 matplotlib python을 이용하여 차트화 시키기 | ||
48 | +2. 기능 별로 접근할 수 있도록 정리할 것 | ||
49 | +----------------------------------------------------- | ||
50 | +5차 개발사항 | ||
51 | + | ||
52 | +1. 시간대 별로 sort된 리스트를 matplotlib for python을 이용하여 차트화 하였음 | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
-
Please register or login to post a comment