김건

Schema and Data set

1 +from selenium import webdriver
2 +from selenium.common import exceptions
3 +from bs4 import BeautifulSoup
4 +import time
5 +import pymysql
6 +
7 +conn = pymysql.connect(host = 'database-1.cg0acc768it6.us-east-1.rds.amazonaws.com', user = 'admin', password ='41545737!',db= 'os_db',charset = 'utf8')
8 +curs = conn.cursor()
9 +def getData(url):
10 + ## chrome option
11 + options = webdriver.ChromeOptions()
12 + #options.add_argument('headless')
13 + #options.add_argument("disable-gpu")
14 + _url = url # 크롤링할 URL
15 + webDriver = "C:\\Users\\KimGun\\Desktop\\chromedriver_win32\\chromedriver.exe" # 내 웹드라이버 위치
16 + driver = webdriver.Chrome(webDriver,chrome_options=options)
17 + #driver = webdriver.Chrome(webDriver)
18 + driver.get(_url)
19 + pageCnt = 0
20 + driver.implicitly_wait(3) # 페이지가 다 로드 될때까지 기다리게함
21 + try:
22 + while True: # 댓글 페이지 끝날때까지 돌림
23 + #driver의 find_element_by_css_selector함수로 '네이버 뉴스'의 댓글 '더보기' 버튼을 찾아서 계속 클릭해준다(끝까지)
24 + time.sleep(0.5)
25 + driver.find_element_by_css_selector(".u_cbox_btn_more").click()
26 + pageCnt = pageCnt+1
27 +
28 + except exceptions.ElementNotVisibleException as e: # 페이지가 끝남
29 + pass
30 +
31 + except Exception as e: # 다른 예외 발생시 확인
32 + print(e)
33 +
34 + pageSource = driver.page_source # 페이지 소스를 따와서
35 + result = BeautifulSoup(pageSource, "lxml") # 빠르게 뽑아오기 위해 lxml 사용
36 + # nickname, text, time을 raw하게 뽑아온다
37 + comments_raw = result.find_all("span", {"class" : "u_cbox_contents"})
38 + nicknames_raw = result.find_all("span", {"class" : "u_cbox_nick"})
39 + times_raw = result.find_all("span", {"class" : "u_cbox_date"})
40 +
41 +
42 +
43 + # nickname, text, time 값 만을 뽑아내어 리스트로 정리한다
44 + comments = [comment.text for comment in comments_raw]
45 + nicknames = [nickname.text for nickname in nicknames_raw]
46 + times = [time.text for time in times_raw]
47 +
48 +
49 + naverNewsList = []
50 +
51 + for i in range(len(comments)):
52 + info_dic = {'userID' : nicknames[i], 'comment' : comments[i], 'time' : times[i]}
53 + naverNewsList.append(info_dic)
54 +
55 + print(naverNewsList)
56 + return naverNewsList
57 + #driver.quit()
58 +
59 +_url = input('검색하고자 하는 url을 입력해주세요: ')
60 +print('comment_list를 가져오는 중.....')
61 +cList = getData(_url)
62 +i = 194
63 +for row in cList : ## Name, Text, time
64 + temp = row['comment'].replace("'",'')
65 + sql = "insert into naver (Naver_ID,Naver_Name,Naver_Text,Naver_Date,Naver_link) values({},'{}','{}','{}','{}')".format(i,row['userID'],temp,row['time'],_url)
66 + print(sql)
67 + i = i + 1
68 + curs.execute(sql)
69 +conn.commit()
70 +conn.close()
1 +{
2 + "cells": [
3 + {
4 + "cell_type": "code",
5 + "execution_count": 6,
6 + "metadata": {},
7 + "outputs": [
8 + {
9 + "name": "stdout",
10 + "output_type": "stream",
11 + "text": [
12 + "=== 기본으로 설정된 트윗 수집 기간은 2019-12-08 에서 2019-12-10 까지 입니다 ===\n",
13 + "=== 총 3일 간의 데이터 수집 ===\n",
14 + "검색할 키워드를 입력해주세요: 이동찬\n"
15 + ]
16 + },
17 + {
18 + "data": {
19 + "application/vnd.jupyter.widget-view+json": {
20 + "model_id": "55c5a56d9ba7478f80d07518e22a3177",
21 + "version_major": 2,
22 + "version_minor": 0
23 + },
24 + "text/plain": [
25 + "HBox(children=(IntProgress(value=0, max=10), HTML(value='')))"
26 + ]
27 + },
28 + "metadata": {},
29 + "output_type": "display_data"
30 + },
31 + {
32 + "name": "stdout",
33 + "output_type": "stream",
34 + "text": [
35 + "\n",
36 + "insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(0,'r9OWHkiDE9EG3W9','I reached alamo for take my money back several times. They only keep showing me wrong information. Maybe ai works for it.','2019-12-10','https://twitter.com/r9OWHkiDE9EG3W9/status/1204195691775451136')\n",
37 + "insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(1,'r9OWHkiDE9EG3W9','Nope. I gave up 431 dollars already. I just want ppl not to lose their money in a pleasant place.','2019-12-10','https://twitter.com/r9OWHkiDE9EG3W9/status/1204195233975504896')\n",
38 + "insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(2,'optimum0524','IMF 환란이전 96년쯤 예금금리가 12%였던걸로 기억합니다. 80년대 코오롱그룹 이동찬 회장은 장영자에게 어음수표깡을 받으면서 50%이상의 금리를 적용받았다고하니 정말 요즘기준으로는 이해하기 힘든 시대였지요.','2019-12-10','https://twitter.com/optimum0524/status/1204190641866956801')\n",
39 + "insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(3,'Naerum10','@이동찬','2019-12-09','https://twitter.com/Naerum10/status/1203920823725121537')\n",
40 + "insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(4,'4bur0','이동찬 달려오는 폼이 너무 웃겨','2019-12-08','https://twitter.com/4bur0/status/1203736355584393216')\n",
41 + "insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(5,'r9OWHkiDE9EG3W9','And i tried to contact to the headquarters. They only says “contact the branch”. So irresponsible and irritating reaction to customers.','2019-12-08','https://twitter.com/r9OWHkiDE9EG3W9/status/1203517328811417600')\n",
42 + "insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(6,'r9OWHkiDE9EG3W9','They told me deposit had been refunded at that time, but now i know they gave me a bullshit.','2019-12-08','https://twitter.com/r9OWHkiDE9EG3W9/status/1203517327506993152')\n",
43 + "insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(7,'r9OWHkiDE9EG3W9','If u use cash, there wouldn’t remain any record or deposit back at all.','2019-12-08','https://twitter.com/r9OWHkiDE9EG3W9/status/1203517326278053888')\n",
44 + "insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(8,'r9OWHkiDE9EG3W9','Ppl!! Warning!! At #1778 ala moana blvd, DO NOT use cash!!!','2019-12-08','https://twitter.com/r9OWHkiDE9EG3W9/status/1203517325023928320')\n",
45 + "insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(9,'r9OWHkiDE9EG3W9','@Alamo hello. Alamo and ppl who rent a car at beautiful hawaii~','2019-12-08','https://twitter.com/r9OWHkiDE9EG3W9/status/1203517323283324928')\n"
46 + ]
47 + }
48 + ],
49 + "source": [
50 + "import GetOldTweets3 as got\n",
51 + "from bs4 import BeautifulSoup\n",
52 + "import pymysql\n",
53 + "import datetime\n",
54 + "import time\n",
55 + "from random import uniform\n",
56 + "from tqdm import tqdm_notebook\n",
57 + "conn = pymysql.connect(host = 'database-1.cg0acc768it6.us-east-1.rds.amazonaws.com', user = 'admin', password ='41545737!',db= 'os_db',charset = 'utf8')\n",
58 + "curs = conn.cursor()\n",
59 + "\n",
60 + "def get_tweets(criteria):\n",
61 + " tweet = got.manager.TweetManager.getTweets(criteria)\n",
62 + " tweet_list = []\n",
63 + "\n",
64 + " for index in tqdm_notebook(tweet):\n",
65 + "\n",
66 + " # 메타데이터 목록\n",
67 + " username = index.username\n",
68 + " link = index.permalink\n",
69 + " content = index.text\n",
70 + " tweet_date = index.date.strftime(\"%Y-%m-%d\")\n",
71 + " retweets = index.retweets\n",
72 + " favorites = index.favorites\n",
73 + "\n",
74 + " # 결과 합치기\n",
75 + " info_list = {'username' : username, 'text': content, 'time': tweet_date, 'link': link}\n",
76 + " tweet_list.append(info_list)\n",
77 + " # 휴식\n",
78 + " time.sleep(uniform(1,2))\n",
79 + " return tweet_list\n",
80 + "days_range = []\n",
81 + "\n",
82 + "start = datetime.datetime.strptime(\"2019-12-08\", \"%Y-%m-%d\")\n",
83 + "end = datetime.datetime.strptime(\"2019-12-11\", \"%Y-%m-%d\")\n",
84 + "date_generated = [start + datetime.timedelta(days=x) for x in range(0, (end-start).days)]\n",
85 + "\n",
86 + "for date in date_generated:\n",
87 + " days_range.append(date.strftime(\"%Y-%m-%d\"))\n",
88 + "print(\"=== 기본으로 설정된 트윗 수집 기간은 {} 에서 {} 까지 입니다 ===\".format(days_range[0], days_range[-1]))\n",
89 + "print(\"=== 총 {}일 간의 데이터 수집 ===\".format(len(days_range)))\n",
90 + "\n",
91 + "# 수집 기간 맞추기\n",
92 + "start_date = days_range[0]\n",
93 + "end_date = (datetime.datetime.strptime(days_range[-1], \"%Y-%m-%d\")\n",
94 + " + datetime.timedelta(days=1)).strftime(\"%Y-%m-%d\") # setUntil이 끝을 포함하지 않으므로, day + 1\n",
95 + "\n",
96 + "my_key = input(\"검색할 키워드를 입력해주세요: \")\n",
97 + "\n",
98 + "tweetCriteria = got.manager.TweetCriteria().setQuerySearch(my_key)\\\n",
99 + " .setSince(\"2019-12-08\")\\\n",
100 + " .setUntil(\"2019-12-11\")\\\n",
101 + " .setMaxTweets(10)\n",
102 + "result_list = get_tweets(tweetCriteria)\n",
103 + "\n",
104 + "i = 0\n",
105 + "for row in result_list : # 이름 내용 날짜 링크\n",
106 + " sql = \"insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values({},'{}','{}','{}','{}')\".format(i,row['username'],row['text'],row['time'],row['link'])\n",
107 + " print(sql)\n",
108 + " i = i + 1\n",
109 + " curs.execute(sql)\n",
110 + "conn.commit()\n",
111 + "conn.close()\n"
112 + ]
113 + },
114 + {
115 + "cell_type": "code",
116 + "execution_count": null,
117 + "metadata": {},
118 + "outputs": [],
119 + "source": []
120 + }
121 + ],
122 + "metadata": {
123 + "kernelspec": {
124 + "display_name": "Python 3",
125 + "language": "python",
126 + "name": "python3"
127 + },
128 + "language_info": {
129 + "codemirror_mode": {
130 + "name": "ipython",
131 + "version": 3
132 + },
133 + "file_extension": ".py",
134 + "mimetype": "text/x-python",
135 + "name": "python",
136 + "nbconvert_exporter": "python",
137 + "pygments_lexer": "ipython3",
138 + "version": "3.7.3"
139 + }
140 + },
141 + "nbformat": 4,
142 + "nbformat_minor": 2
143 +}
1 +#!/usr/bin/env python
2 +from __future__ import print_function
3 +import sys
4 +import os
5 +import time
6 +import json
7 +import requests
8 +import argparse
9 +import lxml.html
10 +import io
11 +from urllib.parse import urlparse, parse_qs
12 +from lxml.cssselect import CSSSelector
13 +YOUTUBE_COMMENTS_URL = 'https://www.youtube.com/all_comments?v={youtube_id}'
14 +YOUTUBE_COMMENTS_AJAX_URL = 'https://www.youtube.com/comment_ajax'
15 +USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36'
16 +def find_value(html, key, num_chars=2):
17 + pos_begin = html.find(key) + len(key) + num_chars
18 + pos_end = html.find('"', pos_begin)
19 + return html[pos_begin: pos_end]
20 +def extract_comments(html):
21 + tree = lxml.html.fromstring(html)
22 + item_sel = CSSSelector('.comment-item')
23 + text_sel = CSSSelector('.comment-text-content')
24 + time_sel = CSSSelector('.time')
25 + author_sel = CSSSelector('.user-name')
26 + for item in item_sel(tree):
27 + yield {'cid': item.get('data-cid'),
28 + 'text': text_sel(item)[0].text_content(),
29 + 'time': time_sel(item)[0].text_content().strip(),
30 + 'author': author_sel(item)[0].text_content()}
31 +def extract_reply_cids(html):
32 + tree = lxml.html.fromstring(html)
33 + sel = CSSSelector('.comment-replies-header > .load-comments')
34 + return [i.get('data-cid') for i in sel(tree)]
35 +def ajax_request(session, url, params, data, retries=10, sleep=20):
36 + for _ in range(retries):
37 + response = session.post(url, params=params, data=data)
38 + if response.status_code == 200:
39 + response_dict = json.loads(response.text)
40 + return response_dict.get('page_token', None), response_dict['html_content']
41 + else:
42 + time.sleep(sleep)
43 +def download_comments(youtube_id, sleep=1):
44 + session = requests.Session()
45 + session.headers['User-Agent'] = USER_AGENT
46 + # Get Youtube page with initial comments
47 + response = session.get(YOUTUBE_COMMENTS_URL.format(youtube_id=youtube_id))
48 + html = response.text
49 + reply_cids = extract_reply_cids(html)
50 + ret_cids = []
51 + for comment in extract_comments(html):
52 + ret_cids.append(comment['cid'])
53 + yield comment
54 + page_token = find_value(html, 'data-token')
55 + session_token = find_value(html, 'XSRF_TOKEN', 4)
56 + first_iteration = True
57 + # Get remaining comments (the same as pressing the 'Show more' button)
58 + while page_token:
59 + data = {'video_id': youtube_id,
60 + 'session_token': session_token}
61 + params = {'action_load_comments': 1,
62 + 'order_by_time': True,
63 + 'filter': youtube_id}
64 + if first_iteration:
65 + params['order_menu'] = True
66 + else:
67 + data['page_token'] = page_token
68 + response = ajax_request(session, YOUTUBE_COMMENTS_AJAX_URL, params, data)
69 + if not response:
70 + break
71 + page_token, html = response
72 + reply_cids += extract_reply_cids(html)
73 + for comment in extract_comments(html):
74 + if comment['cid'] not in ret_cids:
75 + ret_cids.append(comment['cid'])
76 + yield comment
77 + first_iteration = False
78 + time.sleep(sleep)
79 + # Get replies (the same as pressing the 'View all X replies' link)
80 + for cid in reply_cids:
81 + data = {'comment_id': cid,
82 + 'video_id': youtube_id,
83 + 'can_reply': 1,
84 + 'session_token': session_token}
85 + params = {'action_load_replies': 1,
86 + 'order_by_time': True,
87 + 'filter': youtube_id,
88 + 'tab': 'inbox'}
89 + response = ajax_request(session, YOUTUBE_COMMENTS_AJAX_URL, params, data)
90 + if not response:
91 + break
92 + _, html = response
93 + for comment in extract_comments(html):
94 + if comment['cid'] not in ret_cids:
95 + ret_cids.append(comment['cid'])
96 + yield comment
97 + time.sleep(sleep)
98 +## input video 값 parsing
99 +def video_id(value):
100 + query = urlparse(value)
101 + if query.hostname == 'youtu.be':
102 + return query.path[1:]
103 + if query.hostname in ('www.youtube.com', 'youtube.com'):
104 + if query.path == '/watch':
105 + p = parse_qs(query.query)
106 + return p['v'][0]
107 + if query.path[:7] == '/embed/':
108 + return query.path.split('/')[2]
109 + if query.path[:3] == '/v/':
110 + return query.path.split('/')[2]
111 + # fail?
112 + return None
113 +def main():
114 + #parser = argparse.ArgumentParser(add_help=False, description=('Download Youtube comments without using the Youtube API'))
115 + #parser.add_argument('--help', '-h', action='help', default=argparse.SUPPRESS, help='Show this help message and exit')
116 + #parser.add_argument('--youtubeid', '-y', help='ID of Youtube video for which to download the comments')
117 + #parser.add_argument('--output', '-o', help='Output filename (output format is line delimited JSON)')
118 + #parser.add_argument('--limit', '-l', type=int, help='Limit the number of comments')
119 + Youtube_id1 = input('Youtube_ID 입력 :')
120 + ## Cutting Link를 받고 id만 딸 수 있도록
121 + Youtube_id2 = Youtube_id1
122 + Youtube_id1 = video_id(Youtube_id1)
123 + youtube_id = Youtube_id1
124 + try:
125 + # args = parser.parse_args(argv)
126 + #youtube_id = args.youtubeid
127 + #output = args.output
128 + #limit = args.limit
129 + result_List = []
130 + ## input 값을 받고 값에 할당
131 + ## Limit에 빈 값이 들어갈 경우 Default 값으로 100을 넣게 하였음
132 + if not youtube_id :
133 + #parser.print_usage()
134 + #raise ValueError('you need to specify a Youtube ID and an output filename')
135 + raise ValueError('올바른 입력 값을 입력하세요')
136 + print('Downloading Youtube comments for video:', youtube_id)
137 + Number = 1
138 + if Number == '0' :
139 + Output1 = input('결과를 받을 파일 입력 :')
140 + Limit1 = input('제한 갯수 입력 : ')
141 + if Limit1 == '' :
142 + Limit1 = 100
143 + Limit1 = int(Limit1)
144 + limit = int(Limit1)
145 + output = Output1
146 + ##### argument로 받지 않고 input으로 받기 위한 것
147 + with io.open(output, 'w', encoding='utf8') as fp:
148 + for comment in download_comments(youtube_id):
149 + comment_json = json.dumps(comment, ensure_ascii=False)
150 + print(comment_json.decode('utf-8') if isinstance(comment_json, bytes) else comment_json, file=fp)
151 + count += 1
152 + sys.stdout.flush()
153 + if limit and count >= limit:
154 + print('Downloaded {} comment(s)\r'.format(count))
155 + print('\nDone!')
156 + break
157 + else :
158 + count = 0
159 + i = 0
160 + limit = 100
161 + for comment in download_comments(youtube_id):
162 + dic = {}
163 + dic['cid'] = comment['cid']
164 + dic['text'] = str(comment['text'])
165 + dic['time'] = comment['time']
166 + dic['author'] = comment['author']
167 + dic['link'] = Youtube_id2
168 + result_List.append(dic)
169 + count += 1
170 + i += 1
171 + if limit == count :
172 + print(' Comment Thread 생성 완료')
173 + print ('\n\n\n\n\n\n\n')
174 + break
175 + return result_List
176 + #goto_Menu(result_List)
177 + except Exception as e:
178 + print('Error:', str(e))
179 + sys.exit(1)
180 +if __name__ == "__main__":
181 + main()
1 +-- MySQL Workbench Forward Engineering
2 +
3 +SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0;
4 +SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0;
5 +SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION';
6 +
7 +-- -----------------------------------------------------
8 +-- Schema os_db
9 +-- -----------------------------------------------------
10 +
11 +-- -----------------------------------------------------
12 +-- Schema os_db
13 +-- -----------------------------------------------------
14 +CREATE SCHEMA IF NOT EXISTS `os_db` DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci ;
15 +USE `os_db` ;
16 +
17 +-- -----------------------------------------------------
18 +-- Table `os_db`.`naver`
19 +-- -----------------------------------------------------
20 +CREATE TABLE IF NOT EXISTS `os_db`.`naver` (
21 + `Naver_ID` INT(11) NOT NULL,
22 + `Naver_Name` VARCHAR(6000) NULL DEFAULT NULL,
23 + `Naver_Text` VARCHAR(6000) NULL DEFAULT NULL,
24 + `Naver_Date` VARCHAR(100) NULL DEFAULT NULL,
25 + `Naver_Link` VARCHAR(1000) NULL DEFAULT NULL,
26 + PRIMARY KEY (`Naver_ID`))
27 +ENGINE = InnoDB
28 +DEFAULT CHARACTER SET = utf8;
29 +
30 +
31 +-- -----------------------------------------------------
32 +-- Table `os_db`.`twitter`
33 +-- -----------------------------------------------------
34 +CREATE TABLE IF NOT EXISTS `os_db`.`twitter` (
35 + `Twitter_ID` INT(11) NOT NULL,
36 + `Twitter_Name` VARCHAR(6000) NULL DEFAULT NULL,
37 + `Twitter_Link` VARCHAR(6000) NULL DEFAULT NULL,
38 + `Twitter_Date` VARCHAR(100) NULL DEFAULT NULL,
39 + `Twitter_Text` VARCHAR(6000) NULL DEFAULT NULL,
40 + PRIMARY KEY (`Twitter_ID`))
41 +ENGINE = InnoDB
42 +DEFAULT CHARACTER SET = utf8;
43 +
44 +
45 +-- -----------------------------------------------------
46 +-- Table `os_db`.`user`
47 +-- -----------------------------------------------------
48 +CREATE TABLE IF NOT EXISTS `os_db`.`user` (
49 + `User_ID` INT(11) NOT NULL,
50 + PRIMARY KEY (`User_ID`))
51 +ENGINE = InnoDB
52 +DEFAULT CHARACTER SET = utf8mb4
53 +COLLATE = utf8mb4_unicode_ci;
54 +
55 +
56 +-- -----------------------------------------------------
57 +-- Table `os_db`.`user_naver`
58 +-- -----------------------------------------------------
59 +CREATE TABLE IF NOT EXISTS `os_db`.`user_naver` (
60 + `User_ID` INT(11) NOT NULL,
61 + `Naver_ID` INT(11) NOT NULL,
62 + PRIMARY KEY (`User_ID`, `Naver_ID`),
63 + INDEX `UN_Naver_idx` (`Naver_ID` ASC) ,
64 + CONSTRAINT `UN_Naver`
65 + FOREIGN KEY (`Naver_ID`)
66 + REFERENCES `os_db`.`naver` (`Naver_ID`),
67 + CONSTRAINT `UN_user`
68 + FOREIGN KEY (`User_ID`)
69 + REFERENCES `os_db`.`user` (`User_ID`))
70 +ENGINE = InnoDB
71 +DEFAULT CHARACTER SET = utf8mb4
72 +COLLATE = utf8mb4_unicode_ci;
73 +
74 +
75 +-- -----------------------------------------------------
76 +-- Table `os_db`.`user_twitter`
77 +-- -----------------------------------------------------
78 +CREATE TABLE IF NOT EXISTS `os_db`.`user_twitter` (
79 + `User_ID` INT(11) NOT NULL,
80 + `Twitter_ID` INT(11) NOT NULL,
81 + PRIMARY KEY (`User_ID`, `Twitter_ID`),
82 + INDEX `UT_twitter_idx` (`Twitter_ID` ASC) ,
83 + CONSTRAINT `UT_twitter`
84 + FOREIGN KEY (`Twitter_ID`)
85 + REFERENCES `os_db`.`twitter` (`Twitter_ID`),
86 + CONSTRAINT `UT_user`
87 + FOREIGN KEY (`User_ID`)
88 + REFERENCES `os_db`.`user` (`User_ID`))
89 +ENGINE = InnoDB
90 +DEFAULT CHARACTER SET = utf8mb4
91 +COLLATE = utf8mb4_unicode_ci;
92 +
93 +
94 +-- -----------------------------------------------------
95 +-- Table `os_db`.`youtube`
96 +-- -----------------------------------------------------
97 +CREATE TABLE IF NOT EXISTS `os_db`.`youtube` (
98 + `Youtube_ID` INT(11) NOT NULL,
99 + `Youtube_Text` VARCHAR(12000) NULL DEFAULT NULL,
100 + `Youtube_Name` VARCHAR(6000) NULL DEFAULT NULL,
101 + `Youtube_Date` VARCHAR(100) NULL DEFAULT NULL,
102 + `Youtube_Link` VARCHAR(1000) NULL DEFAULT NULL,
103 + PRIMARY KEY (`Youtube_ID`))
104 +ENGINE = MyISAM
105 +DEFAULT CHARACTER SET = utf8;
106 +
107 +
108 +-- -----------------------------------------------------
109 +-- Table `os_db`.`user_youtube`
110 +-- -----------------------------------------------------
111 +CREATE TABLE IF NOT EXISTS `os_db`.`user_youtube` (
112 + `User_ID` INT(11) NOT NULL,
113 + `Youtube_ID` INT(11) NOT NULL,
114 + PRIMARY KEY (`User_ID`, `Youtube_ID`),
115 + INDEX `UY_youtube_idx` (`Youtube_ID` ASC) ,
116 + CONSTRAINT `UY_user`
117 + FOREIGN KEY (`User_ID`)
118 + REFERENCES `os_db`.`user` (`User_ID`),
119 + CONSTRAINT `UY_youtube`
120 + FOREIGN KEY (`Youtube_ID`)
121 + REFERENCES `os_db`.`youtube` (`Youtube_ID`))
122 +ENGINE = InnoDB
123 +DEFAULT CHARACTER SET = utf8mb4
124 +COLLATE = utf8mb4_unicode_ci;
125 +
126 +
127 +SET SQL_MODE=@OLD_SQL_MODE;
128 +SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS;
129 +SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS;
1 +import downloader
2 +import pymysql
3 +import csv
4 +import random
5 +
6 +conn = pymysql.connect(host = 'database-1.cg0acc768it6.us-east-1.rds.amazonaws.com', user = 'admin', password ='41545737!',db= 'os_db',charset = 'utf8')
7 +curs = conn.cursor()
8 +
9 +def call_main ():
10 + print(' Comment Thread 생성중 \n')
11 + print(' **************************************************************')
12 + print(' **************************************************************')
13 + print(' **************************************************************')
14 + print(' **************** 생성 완료 정보를 입력하세요. **************** ')
15 + print(' **************************************************************')
16 + print(' **************************************************************')
17 + print(' **************************************************************')
18 + a = downloader.main()
19 + return a
20 +
21 +CommentList = call_main() ## dic 형식으로 cid, text, time, author
22 +i = 0
23 +for row in CommentList :
24 + temp = row['text'].replace("'",'')
25 + sql = "insert into youtube (Youtube_ID,Youtube_Text,Youtube_Date,Youtube_Name,Youtube_Link) values({},'{}','{}','{}','{}')".format(i,temp,row['time'],row['author'],row['link'])
26 + print(sql)
27 + i = i + 1
28 + curs.execute(sql)
29 +conn.commit()
30 +conn.close()