Toggle navigation
Toggle navigation
This project
Loading...
Sign in
김건
/
Comment_Analysis
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
HyoJoon
2019-11-19 16:56:31 +0900
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
b4e76fffe63ad40ea3e32c9de233c72b82161b7b
b4e76fff
1 parent
4d4fba8a
코드 다시 수정
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
90 additions
and
50 deletions
twitter/modify.md
twitter/twitter.py
twitter/modify.md
0 → 100644
View file @
b4e76ff
߸
twitter/twitter.py
View file @
b4e76ff
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
import
GetOldTweets3
as
got
from
bs4
import
BeautifulSoup
import
datetime
import
time
from
random
import
uniform
from
tqdm
import
tqdm_notebook
def
get_tweets
(
criteria
):
tweet
=
got
.
manager
.
TweetManager
.
getTweets
(
criteria
)
tweet_list
=
[]
for
index
in
tqdm_notebook
(
tweet
):
# 메타데이터 목록
username
=
index
.
username
link
=
index
.
permalink
content
=
index
.
text
tweet_date
=
index
.
date
.
strftime
(
"
%
Y-
%
m-
%
d"
)
retweets
=
index
.
retweets
favorites
=
index
.
favorites
# 결과 합치기
info_list
=
{
'username'
:
username
,
'text'
:
content
,
'time'
:
tweet_date
,
'link'
:
link
}
tweet_list
.
append
(
info_list
)
print
(
tweet_list
)
# 휴식
time
.
sleep
(
uniform
(
1
,
2
))
days_range
=
[]
start
=
datetime
.
datetime
.
strptime
(
"2019-11-1
4
"
,
"
%
Y-
%
m-
%
d"
)
end
=
datetime
.
datetime
.
strptime
(
"2019-11-1
5
"
,
"
%
Y-
%
m-
%
d"
)
start
=
datetime
.
datetime
.
strptime
(
"2019-11-1
7
"
,
"
%
Y-
%
m-
%
d"
)
end
=
datetime
.
datetime
.
strptime
(
"2019-11-1
8
"
,
"
%
Y-
%
m-
%
d"
)
date_generated
=
[
start
+
datetime
.
timedelta
(
days
=
x
)
for
x
in
range
(
0
,
(
end
-
start
)
.
days
)]
for
date
in
date_generated
:
days_range
.
append
(
date
.
strftime
(
"
%
Y-
%
m-
%
d"
))
print
(
"=== 설정된 트윗 수집 기간은 {} 에서 {} 까지 입니다 ==="
.
format
(
days_range
[
0
],
days_range
[
-
1
]))
print
(
"===
기본으로
설정된 트윗 수집 기간은 {} 에서 {} 까지 입니다 ==="
.
format
(
days_range
[
0
],
days_range
[
-
1
]))
print
(
"=== 총 {}일 간의 데이터 수집 ==="
.
format
(
len
(
days_range
)))
import
time
# 수집 기간 맞추기
start_date
=
days_range
[
0
]
end_date
=
(
datetime
.
datetime
.
strptime
(
days_range
[
-
1
],
"
%
Y-
%
m-
%
d"
)
+
datetime
.
timedelta
(
days
=
1
))
.
strftime
(
"
%
Y-
%
m-
%
d"
)
# setUntil이 끝을 포함하지 않으므로, day + 1
# 트윗 수집 기준 정의
tweetCriteria
=
got
.
manager
.
TweetCriteria
()
.
setQuerySearch
(
'한글'
)
.
setSince
(
start_date
)
.
setUntil
(
end_date
)
.
setMaxTweets
(
-
1
)
# 수집 with GetOldTweet3
print
(
"Collecting data start.. from {} to {}"
.
format
(
days_range
[
0
],
days_range
[
-
1
]))
start_time
=
time
.
time
()
tweet
=
got
.
manager
.
TweetManager
.
getTweets
(
tweetCriteria
)
print
(
"Collecting data end.. {0:0.2f} Minutes"
.
format
((
time
.
time
()
-
start_time
)
/
60
))
print
(
"=== Total num of tweets is {} ==="
.
format
(
len
(
tweet
)))
from
random
import
uniform
from
tqdm
import
tqdm_notebook
# initialize
tweet_list
=
[]
for
index
in
tqdm_notebook
(
tweet
):
my_key
=
input
(
"검색할 키워드를 입력해주세요: "
)
while
(
True
):
temp1
=
"현재 검색어는 "
+
my_key
+
"입니다. "
print
(
temp1
)
print
(
"기간은 기본적으로 최근 1일입니다."
)
print
(
"1. 닉네임을 통한 검색"
)
print
(
"2. 키워드를 통한 검색"
)
print
(
"3. 시간을 통한 검색"
)
print
(
"4. 종료"
)
userNum
=
int
(
input
(
"무엇을 하시겠습니까?: "
))
# 메타데이터 목록
username
=
index
.
username
link
=
index
.
permalink
content
=
index
.
text
tweet_date
=
index
.
date
.
strftime
(
"
%
Y-
%
m-
%
d"
)
retweets
=
index
.
retweets
favorites
=
index
.
favorites
# 결과 합치기
info_list
=
{
'username'
:
username
,
'text'
:
content
,
'time'
:
tweet_date
,
'link'
:
link
}
tweet_list
.
append
(
info_list
)
print
(
tweet_list
)
# 휴식
time
.
sleep
(
uniform
(
1
,
2
))
# In[ ]:
if
userNum
==
1
:
nick
=
input
(
"검색할 닉네임을 입력해주세요: "
)
tweetCriteria
=
got
.
manager
.
TweetCriteria
()
.
setUsername
(
nick
)
\
.
setQuerySearch
(
my_key
)
\
.
setSince
(
start_date
)
\
.
setUntil
(
end_date
)
\
.
setMaxTweets
(
-
1
)
get_tweets
(
tweetCriteria
)
elif
userNum
==
2
:
my_key
=
input
(
"검색할 키워드를 입력해주세요: "
)
tweetCriteria
=
got
.
manager
.
TweetCriteria
()
.
setQuerySearch
(
my_key
)
\
.
setSince
(
start_date
)
\
.
setUntil
(
end_date
)
\
.
setMaxTweets
(
-
1
)
get_tweets
(
tweetCriteria
)
elif
userNum
==
3
:
user_start
=
int
(
input
(
"시작일을 입력해주세요(yyyymmdd형태): "
))
if
(
user_start
<
20170000
or
user_start
>
20191200
):
print
(
"최근 3년 이내만 검색가능합니다."
)
continue
user_end
=
int
(
input
(
"종료일을 입력해주세요(yyyymmdd형태): "
))
if
(
user_end
>
20191200
):
print
(
"미래로 갈 수는 없습니다."
)
continue
elif
(
user_end
<
user_start
):
print
(
"시작일보다 작을 수 없습니다."
)
continue
if
(
user_end
-
8
>
user_start
):
print
(
"최대 1주일까지 검색이 가능합니다."
)
continue
else
:
start_year
=
user_start
//
10000
start_month
=
user_start
//
100
-
start_year
*
100
start_day
=
user_start
-
start_year
*
10000
-
start_month
*
100
end_year
=
user_end
//
10000
end_month
=
user_end
//
100
-
end_year
*
100
end_day
=
user_end
-
end_year
*
10000
-
end_month
*
100
d1
=
str
(
start_year
)
+
"-"
+
str
(
start_month
)
+
"-"
+
str
(
start_day
)
d2
=
str
(
end_year
)
+
"-"
+
str
(
end_month
)
+
"-"
+
str
(
end_day
)
print
(
"=== 현재 설정된 트윗 수집 기간은 {} 에서 {} 까지 입니다 ==="
.
format
(
d1
,
d2
))
print
(
"=== 총 {}일 간의 데이터 수집 ==="
.
format
(
user_end
-
user_start
))
tweetCriteria
=
got
.
manager
.
TweetCriteria
()
.
setQuerySearch
(
my_key
)
\
.
setSince
(
d1
)
\
.
setUntil
(
d2
)
\
.
setMaxTweets
(
-
1
)
get_tweets
(
tweetCriteria
)
elif
userNum
==
4
:
break
else
:
print
(
"잘못된 입력입니다."
)
continue
...
...
Please
register
or
login
to post a comment