Twitter_Input.ipynb 7.26 KB
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "=== 기본으로 설정된 트윗 수집 기간은 2019-12-08 에서 2019-12-10 까지 입니다 ===\n",
      "=== 총 3일 간의 데이터 수집 ===\n",
      "검색할 키워드를 입력해주세요: 이동찬\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "55c5a56d9ba7478f80d07518e22a3177",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=10), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(0,'r9OWHkiDE9EG3W9','I reached alamo for take my money back several times. They only keep showing me wrong information. Maybe ai works for it.','2019-12-10','https://twitter.com/r9OWHkiDE9EG3W9/status/1204195691775451136')\n",
      "insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(1,'r9OWHkiDE9EG3W9','Nope. I gave up 431 dollars already. I just want ppl not to lose their money in a pleasant place.','2019-12-10','https://twitter.com/r9OWHkiDE9EG3W9/status/1204195233975504896')\n",
      "insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(2,'optimum0524','IMF 환란이전 96년쯤 예금금리가 12%였던걸로 기억합니다. 80년대 코오롱그룹 이동찬 회장은 장영자에게 어음수표깡을 받으면서 50%이상의 금리를 적용받았다고하니 정말 요즘기준으로는 이해하기 힘든 시대였지요.','2019-12-10','https://twitter.com/optimum0524/status/1204190641866956801')\n",
      "insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(3,'Naerum10','@이동찬','2019-12-09','https://twitter.com/Naerum10/status/1203920823725121537')\n",
      "insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(4,'4bur0','이동찬 달려오는 폼이 너무 웃겨','2019-12-08','https://twitter.com/4bur0/status/1203736355584393216')\n",
      "insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(5,'r9OWHkiDE9EG3W9','And i tried to contact to the headquarters. They only says “contact the branch”. So irresponsible and irritating reaction to customers.','2019-12-08','https://twitter.com/r9OWHkiDE9EG3W9/status/1203517328811417600')\n",
      "insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(6,'r9OWHkiDE9EG3W9','They told me deposit had been refunded at that time, but now i know they gave me a bullshit.','2019-12-08','https://twitter.com/r9OWHkiDE9EG3W9/status/1203517327506993152')\n",
      "insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(7,'r9OWHkiDE9EG3W9','If u use cash, there wouldn’t remain any record or deposit back at all.','2019-12-08','https://twitter.com/r9OWHkiDE9EG3W9/status/1203517326278053888')\n",
      "insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(8,'r9OWHkiDE9EG3W9','Ppl!! Warning!! At #1778 ala moana blvd, DO NOT use cash!!!','2019-12-08','https://twitter.com/r9OWHkiDE9EG3W9/status/1203517325023928320')\n",
      "insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values(9,'r9OWHkiDE9EG3W9','@Alamo hello. Alamo and ppl who rent a car at beautiful hawaii~','2019-12-08','https://twitter.com/r9OWHkiDE9EG3W9/status/1203517323283324928')\n"
     ]
    }
   ],
   "source": [
    "import GetOldTweets3 as got\n",
    "from bs4 import BeautifulSoup\n",
    "import pymysql\n",
    "import datetime\n",
    "import time\n",
    "from random import uniform\n",
    "from tqdm import tqdm_notebook\n",
    "conn = pymysql.connect(host = 'database-1.cg0acc768it6.us-east-1.rds.amazonaws.com', user = 'admin', password ='41545737!',db= 'os_db',charset = 'utf8')\n",
    "curs = conn.cursor()\n",
    "\n",
    "def get_tweets(criteria):\n",
    "    tweet = got.manager.TweetManager.getTweets(criteria)\n",
    "    tweet_list = []\n",
    "\n",
    "    for index in tqdm_notebook(tweet):\n",
    "\n",
    "        # 메타데이터 목록\n",
    "        username = index.username\n",
    "        link = index.permalink\n",
    "        content = index.text\n",
    "        tweet_date = index.date.strftime(\"%Y-%m-%d\")\n",
    "        retweets = index.retweets\n",
    "        favorites = index.favorites\n",
    "\n",
    "        # 결과 합치기\n",
    "        info_list = {'username' : username, 'text': content, 'time': tweet_date, 'link': link}\n",
    "        tweet_list.append(info_list)\n",
    "        # 휴식\n",
    "        time.sleep(uniform(1,2))\n",
    "    return tweet_list\n",
    "days_range = []\n",
    "\n",
    "start = datetime.datetime.strptime(\"2019-12-08\", \"%Y-%m-%d\")\n",
    "end = datetime.datetime.strptime(\"2019-12-11\", \"%Y-%m-%d\")\n",
    "date_generated = [start + datetime.timedelta(days=x) for x in range(0, (end-start).days)]\n",
    "\n",
    "for date in date_generated:\n",
    "    days_range.append(date.strftime(\"%Y-%m-%d\"))\n",
    "print(\"=== 기본으로 설정된 트윗 수집 기간은 {} 에서 {} 까지 입니다 ===\".format(days_range[0], days_range[-1]))\n",
    "print(\"=== 총 {}일 간의 데이터 수집 ===\".format(len(days_range)))\n",
    "\n",
    "# 수집 기간 맞추기\n",
    "start_date = days_range[0]\n",
    "end_date = (datetime.datetime.strptime(days_range[-1], \"%Y-%m-%d\")\n",
    "            + datetime.timedelta(days=1)).strftime(\"%Y-%m-%d\") # setUntil이 끝을 포함하지 않으므로, day + 1\n",
    "\n",
    "my_key = input(\"검색할 키워드를 입력해주세요: \")\n",
    "\n",
    "tweetCriteria = got.manager.TweetCriteria().setQuerySearch(my_key)\\\n",
    "                                           .setSince(\"2019-12-08\")\\\n",
    "                                           .setUntil(\"2019-12-11\")\\\n",
    "                                           .setMaxTweets(10)\n",
    "result_list = get_tweets(tweetCriteria)\n",
    "\n",
    "i = 0\n",
    "for row in result_list : # 이름 내용 날짜 링크\n",
    "    sql = \"insert into twitter (Twitter_ID,Twitter_Name,Twitter_Text,Twitter_Date,Twitter_Link) values({},'{}','{}','{}','{}')\".format(i,row['username'],row['text'],row['time'],row['link'])\n",
    "    print(sql)\n",
    "    i = i + 1\n",
    "    curs.execute(sql)\n",
    "conn.commit()\n",
    "conn.close()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}