Showing
2 changed files
with
76 additions
and
0 deletions
twitter/readme.txt
0 → 100644
twitter/twitter.py
0 → 100644
| 1 | +#!/usr/bin/env python | ||
| 2 | +# coding: utf-8 | ||
| 3 | + | ||
| 4 | +# In[ ]: | ||
| 5 | + | ||
| 6 | + | ||
| 7 | +import GetOldTweets3 as got | ||
| 8 | +from bs4 import BeautifulSoup | ||
| 9 | + | ||
| 10 | +import datetime | ||
| 11 | + | ||
| 12 | +days_range = [] | ||
| 13 | + | ||
| 14 | +start = datetime.datetime.strptime("2019-11-14", "%Y-%m-%d") | ||
| 15 | +end = datetime.datetime.strptime("2019-11-15", "%Y-%m-%d") | ||
| 16 | +date_generated = [start + datetime.timedelta(days=x) for x in range(0, (end-start).days)] | ||
| 17 | + | ||
| 18 | +for date in date_generated: | ||
| 19 | + days_range.append(date.strftime("%Y-%m-%d")) | ||
| 20 | +print("=== 설정된 트윗 수집 기간은 {} 에서 {} 까지 입니다 ===".format(days_range[0], days_range[-1])) | ||
| 21 | +print("=== 총 {}일 간의 데이터 수집 ===".format(len(days_range))) | ||
| 22 | + | ||
| 23 | +import time | ||
| 24 | + | ||
| 25 | +# 수집 기간 맞추기 | ||
| 26 | +start_date = days_range[0] | ||
| 27 | +end_date = (datetime.datetime.strptime(days_range[-1], "%Y-%m-%d") | ||
| 28 | + + datetime.timedelta(days=1)).strftime("%Y-%m-%d") # setUntil이 끝을 포함하지 않으므로, day + 1 | ||
| 29 | + | ||
| 30 | +# 트윗 수집 기준 정의 | ||
| 31 | +tweetCriteria = got.manager.TweetCriteria().setQuerySearch('한글') .setSince(start_date) .setUntil(end_date) .setMaxTweets(-1) | ||
| 32 | + | ||
| 33 | +# 수집 with GetOldTweet3 | ||
| 34 | +print("Collecting data start.. from {} to {}".format(days_range[0], days_range[-1])) | ||
| 35 | +start_time = time.time() | ||
| 36 | + | ||
| 37 | +tweet = got.manager.TweetManager.getTweets(tweetCriteria) | ||
| 38 | + | ||
| 39 | +print("Collecting data end.. {0:0.2f} Minutes".format((time.time() - start_time)/60)) | ||
| 40 | +print("=== Total num of tweets is {} ===".format(len(tweet))) | ||
| 41 | +from random import uniform | ||
| 42 | +from tqdm import tqdm_notebook | ||
| 43 | + | ||
| 44 | +# initialize | ||
| 45 | +tweet_list = [] | ||
| 46 | + | ||
| 47 | +for index in tqdm_notebook(tweet): | ||
| 48 | + | ||
| 49 | + # 메타데이터 목록 | ||
| 50 | + username = index.username | ||
| 51 | + link = index.permalink | ||
| 52 | + content = index.text | ||
| 53 | + tweet_date = index.date.strftime("%Y-%m-%d") | ||
| 54 | + retweets = index.retweets | ||
| 55 | + favorites = index.favorites | ||
| 56 | + | ||
| 57 | + # 결과 합치기 | ||
| 58 | + info_list = {'username' : username, 'text': content, 'time': tweet_date, 'link': link} | ||
| 59 | + tweet_list.append(info_list) | ||
| 60 | + print(tweet_list) | ||
| 61 | + # 휴식 | ||
| 62 | + time.sleep(uniform(1,2)) | ||
| 63 | + | ||
| 64 | + | ||
| 65 | +# In[ ]: | ||
| 66 | + | ||
| 67 | + | ||
| 68 | + | ||
| 69 | + |
-
Please register or login to post a comment