Twitter_Input.ipynb
7.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
{
"cells": [
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"=== 기본으로 설정된 트윗 수집 기간은 2019-12-08 에서 2019-12-10 까지 입니다 ===\n",
"=== 총 3일 간의 데이터 수집 ===\n",
"검색할 키워드를 입력해주세요: 이동찬\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "55c5a56d9ba7478f80d07518e22a3177",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=10), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(0,'r9OWHkiDE9EG3W9','I reached alamo for take my money back several times. They only keep showing me wrong information. Maybe ai works for it.','2019-12-10','https://twitter.com/r9OWHkiDE9EG3W9/status/1204195691775451136')\n",
"insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(1,'r9OWHkiDE9EG3W9','Nope. I gave up 431 dollars already. I just want ppl not to lose their money in a pleasant place.','2019-12-10','https://twitter.com/r9OWHkiDE9EG3W9/status/1204195233975504896')\n",
"insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(2,'optimum0524','IMF 환란이전 96년쯤 예금금리가 12%였던걸로 기억합니다. 80년대 코오롱그룹 이동찬 회장은 장영자에게 어음수표깡을 받으면서 50%이상의 금리를 적용받았다고하니 정말 요즘기준으로는 이해하기 힘든 시대였지요.','2019-12-10','https://twitter.com/optimum0524/status/1204190641866956801')\n",
"insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(3,'Naerum10','@이동찬','2019-12-09','https://twitter.com/Naerum10/status/1203920823725121537')\n",
"insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(4,'4bur0','이동찬 달려오는 폼이 너무 웃겨','2019-12-08','https://twitter.com/4bur0/status/1203736355584393216')\n",
"insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(5,'r9OWHkiDE9EG3W9','And i tried to contact to the headquarters. They only says “contact the branch”. So irresponsible and irritating reaction to customers.','2019-12-08','https://twitter.com/r9OWHkiDE9EG3W9/status/1203517328811417600')\n",
"insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(6,'r9OWHkiDE9EG3W9','They told me deposit had been refunded at that time, but now i know they gave me a bullshit.','2019-12-08','https://twitter.com/r9OWHkiDE9EG3W9/status/1203517327506993152')\n",
"insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(7,'r9OWHkiDE9EG3W9','If u use cash, there wouldn’t remain any record or deposit back at all.','2019-12-08','https://twitter.com/r9OWHkiDE9EG3W9/status/1203517326278053888')\n",
"insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(8,'r9OWHkiDE9EG3W9','Ppl!! Warning!! At #1778 ala moana blvd, DO NOT use cash!!!','2019-12-08','https://twitter.com/r9OWHkiDE9EG3W9/status/1203517325023928320')\n",
"insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(9,'r9OWHkiDE9EG3W9','@Alamo hello. Alamo and ppl who rent a car at beautiful hawaii~','2019-12-08','https://twitter.com/r9OWHkiDE9EG3W9/status/1203517323283324928')\n"
]
}
],
"source": [
"import GetOldTweets3 as got\n",
"from bs4 import BeautifulSoup\n",
"import pymysql\n",
"import datetime\n",
"import time\n",
"from random import uniform\n",
"from tqdm import tqdm_notebook\n",
"conn = pymysql.connect(host = 'database-1.cg0acc768it6.us-east-1.rds.amazonaws.com', user = 'admin', password ='41545737!',db= 'os_db',charset = 'utf8')\n",
"curs = conn.cursor()\n",
"\n",
"def get_tweets(criteria):\n",
" tweet = got.manager.TweetManager.getTweets(criteria)\n",
" tweet_list = []\n",
"\n",
" for index in tqdm_notebook(tweet):\n",
"\n",
" # 메타데이터 목록\n",
" username = index.username\n",
" link = index.permalink\n",
" content = index.text\n",
" tweet_date = index.date.strftime(\"%Y-%m-%d\")\n",
" retweets = index.retweets\n",
" favorites = index.favorites\n",
"\n",
" # 결과 합치기\n",
" info_list = {'username' : username, 'text': content, 'time': tweet_date, 'link': link}\n",
" tweet_list.append(info_list)\n",
" # 휴식\n",
" time.sleep(uniform(1,2))\n",
" return tweet_list\n",
"days_range = []\n",
"\n",
"start = datetime.datetime.strptime(\"2019-12-08\", \"%Y-%m-%d\")\n",
"end = datetime.datetime.strptime(\"2019-12-11\", \"%Y-%m-%d\")\n",
"date_generated = [start + datetime.timedelta(days=x) for x in range(0, (end-start).days)]\n",
"\n",
"for date in date_generated:\n",
" days_range.append(date.strftime(\"%Y-%m-%d\"))\n",
"print(\"=== 기본으로 설정된 트윗 수집 기간은 {} 에서 {} 까지 입니다 ===\".format(days_range[0], days_range[-1]))\n",
"print(\"=== 총 {}일 간의 데이터 수집 ===\".format(len(days_range)))\n",
"\n",
"# 수집 기간 맞추기\n",
"start_date = days_range[0]\n",
"end_date = (datetime.datetime.strptime(days_range[-1], \"%Y-%m-%d\")\n",
" + datetime.timedelta(days=1)).strftime(\"%Y-%m-%d\") # setUntil이 끝을 포함하지 않으므로, day + 1\n",
"\n",
"my_key = input(\"검색할 키워드를 입력해주세요: \")\n",
"\n",
"tweetCriteria = got.manager.TweetCriteria().setQuerySearch(my_key)\\\n",
" .setSince(\"2019-12-08\")\\\n",
" .setUntil(\"2019-12-11\")\\\n",
" .setMaxTweets(10)\n",
"result_list = get_tweets(tweetCriteria)\n",
"\n",
"i = 0\n",
"for row in result_list : # 이름 내용 날짜 링크\n",
" sql = \"insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values({},'{}','{}','{}','{}')\".format(i,row['username'],row['text'],row['time'],row['link'])\n",
" print(sql)\n",
" i = i + 1\n",
" curs.execute(sql)\n",
"conn.commit()\n",
"conn.close()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}