Toggle navigation
Toggle navigation
This project
Loading...
Sign in
김건
/
Comment_Analysis
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
김건
2019-11-21 01:38:57 +0900
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
5f685fbb431f29f653e447c4caaf5cf985156edc
5f685fbb
1 parent
5a4687b2
4차 구현사항 upload
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
119 additions
and
6 deletions
JPype1-0.7.0-cp38-cp38-win_amd64.whl
Youtube/main.py
Youtube/requirements.txt
youtube.md
JPype1-0.7.0-cp38-cp38-win_amd64.whl
0 → 100644
View file @
5f685fb
No preview for this file type
Youtube/main.py
View file @
5f685fb
import
downloader
from
time
import
sleep
from
konlpy.tag
import
Twitter
from
collections
import
Counter
import
pytagcloud
import
operator
def
get_tags
(
Comment_List
)
:
okja
=
[]
for
temp
in
Comment_List
:
okja
.
append
(
temp
[
'text'
])
twitter
=
Twitter
()
sentence_tag
=
[]
for
sentence
in
okja
:
morph
=
twitter
.
pos
(
sentence
)
sentence_tag
.
append
(
morph
)
print
(
morph
)
print
(
'-'
*
30
)
print
(
sentence_tag
)
print
(
len
(
sentence_tag
))
print
(
'
\n
'
*
3
)
noun_adj_list
=
[]
for
sentence1
in
sentence_tag
:
for
word
,
tag
in
sentence1
:
if
len
(
word
)
>=
2
and
tag
==
'Noun'
:
noun_adj_list
.
append
(
word
)
counts
=
Counter
(
noun_adj_list
)
print
(
' 가장 많이 등장한 10개의 키워드.
\n
'
)
print
(
counts
.
most_common
(
10
))
tags2
=
counts
.
most_common
(
10
)
taglist
=
pytagcloud
.
make_tags
(
tags2
,
maxsize
=
80
)
pytagcloud
.
create_tag_image
(
taglist
,
'wordcloud.jpg'
,
size
=
(
900
,
600
),
fontname
=
'Nanum Gothic'
,
rectangular
=
False
)
def
print_result
(
Comment_List
)
:
for
var
in
Comment_List
:
...
...
@@ -23,6 +54,66 @@ def search_by_keyword(Comment_List,keyword) :
result_List
.
append
(
var
)
return
result_List
def
search_by_time
(
Comment_List
,
Time_input
)
:
result_List
=
[]
for
var
in
Comment_List
:
if
(
var
[
'time'
]
==
Time_input
)
:
result_List
.
append
(
var
)
return
result_List
def
make_time_chart
(
Comment_List
)
:
result_List
=
[]
save_List
=
[]
day_dict
=
{}
month_dict
=
{}
year_dict
=
{}
hour_dict
=
{}
minute_dict
=
{}
week_dict
=
{}
for
var
in
Comment_List
:
result_List
.
append
(
var
[
'time'
])
for
i
in
range
(
len
(
result_List
))
:
print
(
result_List
[
i
]
+
' '
)
print
(
'
\n\n\n\n
'
)
temp_List
=
list
(
set
(
result_List
))
for
i
in
range
(
len
(
temp_List
))
:
print
(
temp_List
[
i
]
+
' '
)
print
(
'
\n\n\n\n
'
)
for
i
in
range
(
len
(
temp_List
))
:
result_dict
=
{}
a
=
result_List
.
count
(
temp_List
[
i
])
result_dict
[
temp_List
[
i
]]
=
a
save_List
.
append
(
result_dict
)
for
i
in
range
(
len
(
save_List
)):
num
=
''
data
=
0
for
j
in
save_List
[
i
]
:
num
=
j
for
k
in
save_List
[
i
]
.
values
()
:
data
=
k
if
num
.
find
(
'개월'
)
>=
0
:
month_dict
[
num
]
=
k
elif
num
.
find
(
'일'
)
>=
0
:
day_dict
[
num
]
=
k
elif
num
.
find
(
'년'
)
>=
0
:
year_dict
[
num
]
=
k
elif
num
.
find
(
'시간'
)
>=
0
:
hour_dict
[
num
]
=
k
elif
num
.
find
(
'주'
)
>=
0
:
week_dict
[
num
]
=
k
elif
num
.
find
(
'분'
)
>=
0
:
minute_dict
[
num
]
=
k
year_data
=
sorted
(
year_dict
.
items
(),
key
=
operator
.
itemgetter
(
0
))
month_data
=
sorted
(
month_dict
.
items
(),
key
=
operator
.
itemgetter
(
0
))
week_data
=
sorted
(
week_dict
.
items
(),
key
=
operator
.
itemgetter
(
0
))
day_data
=
sorted
(
day_dict
.
items
(),
key
=
operator
.
itemgetter
(
0
))
hour_data
=
sorted
(
hour_dict
.
items
(),
key
=
operator
.
itemgetter
(
0
))
minute_data
=
sorted
(
minute_dict
.
items
(),
key
=
operator
.
itemgetter
(
0
))
print
(
month_data
)
print
(
week_data
)
print
(
day_data
)
def
call_main
():
print
(
' Comment Thread 생성중
\n
'
)
...
...
@@ -35,10 +126,14 @@ def call_main ():
print
(
' **************************************************************'
)
print
(
' **************************************************************'
)
a
=
downloader
.
main
()
author_results
=
search_by_author
(
a
,
'광고제거기'
)
text_resutls
=
search_by_keyword
(
a
,
'지현'
)
print_result
(
author_results
)
print_result
(
text_resutls
)
return
a
CommentList
=
call_main
()
if
__name__
==
"__main__"
:
CommentList
=
call_main
()
make_time_chart
(
CommentList
)
##author_results = search_by_author(CommentList,'광고제거기')
##text_resutls = search_by_keyword(CommentList,'지현')
##get_tags(CommentList)
##print_result(author_results)
##print_result(text_resutls)
...
...
Youtube/requirements.txt
View file @
5f685fb
requests
beautifulsoup4
lxml
cssselect
\ No newline at end of file
cssselect
### ũѸ
pygame
pytagcloud
### wordcloud
Jpye1
### Ű м
\ No newline at end of file
...
...
youtube.md
View file @
5f685fb
...
...
@@ -34,3 +34,15 @@ Youtube 3차 수정 사항
1.
konlpy (http://konlpy.org/ko/latest/)를 통하여 명사 추출 후 keyword 분석하기
2. 시간대를 추출하여 시간대 별로 Comment 정리하기
-----------------------------------------------------
4차 개발사항
1.
konlpy를 이용하여 keyword 분석 후 가장 많이 등장한 키워드 리스트 출력
2.
1번 기능을 사용하여 wordcloud 구성
3.
시간대를 이용하여 검색할 수 있는 기능 구현
4. 시간대 별로 sort된 리스트를 가질 수 있도록 구현
-----------------------------------------------------
추가 구현 사항
1.
시간대 별로 sort된 리스트를 matplotlib python을 이용하여 차트화 시키기
2.
기능 별로 접근할 수 있도록 정리할 것
\ No newline at end of file
...
...
Please
register
or
login
to post a comment