Toggle navigation
Toggle navigation
This project
Loading...
Sign in
ShinSeungMin
/
Multiplex_Ticketing_Platform
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
임승현
2022-05-21 21:10:17 +0900
Browse Files
Options
Browse Files
Download
Plain Diff
Commit
9a6070b74d03a37353ef5d26eec01a2ec0fd8fbf
9a6070b7
2 parents
7ddc20ed
56cc6203
Merge branch 'feature/Megabox_Crawling' into 'feature/Megabox_Crawling'
코드 정리 See merge request !12
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
45 additions
and
48 deletions
Megabox_crawling/megaboxCrawling.py
Megabox_crawling/megaboxCrawling.py
View file @
9a6070b
from
urllib
import
response
import
requests
from
bs4
import
BeautifulSoup
from
selenium
import
webdriver
from
webdriver_manager.chrome
import
ChromeDriverManager
from
selenium.webdriver.common.keys
import
Keys
import
chromedriver_autoinstaller
chromedriver_autoinstaller
.
install
()
booking_url
=
"https://megabox.co.kr/booking"
rate_url
=
"https://www.megabox.co.kr/movie"
options
=
webdriver
.
ChromeOptions
()
options
.
add_argument
(
"headless"
)
#창 안 띄움
options
.
add_experimental_option
(
"excludeSwitches"
,
[
"enable-logging"
])
driver
=
webdriver
.
Chrome
(
options
=
options
)
driver2
=
webdriver
.
Chrome
(
options
=
options
)
url
=
"https://megabox.co.kr/booking"
rate_url
=
"https://www.megabox.co.kr/movie"
driver
=
webdriver
.
Chrome
(
options
=
options
)
driver
.
maximize_window
()
driver2
.
maximize_window
()
driver
.
implicitly_wait
(
2
)
driver
.
get
(
url
)
# driver.implicitly_wait(2)
driver
.
get
(
booking_url
)
driver2
.
implicitly_wait
(
2
)
driver2
=
webdriver
.
Chrome
(
options
=
options
)
driver2
.
maximize_window
()
# driver2.implicitly_wait(2)
driver2
.
get
(
rate_url
)
r2
=
driver2
.
page_source
soup
=
BeautifulSoup
(
r2
,
"html.parser"
)
ticketing_rate
=
soup
.
select
(
'.rate'
)
movie_name
=
soup
.
select
(
'.tit-area > p.tit'
)
iframes
=
driver
.
find_elements_by_css_selector
(
'iframe'
)
theater_location
=
dict
(
)
# iframes = driver.find_elements_by_css_selector('iframe')
driver
.
switch_to
.
frame
(
'frameBokdMBooking'
)
r
=
driver
.
page_source
soup
=
BeautifulSoup
(
r
,
"html.parser"
)
seoul
=
soup
.
select
(
"#mCSB_4_container>ul>li>button"
)
Gyeonggi
=
soup
.
select
(
"#mCSB_5_container>ul>li>button"
)
Incheon
=
soup
.
select
(
"#mCSB_6_container>ul>li>button"
)
DCS
=
soup
.
select
(
"#mCSB_7_container>ul>li>button"
)
#Daejeon Chungcheong Sejong
BDG
=
soup
.
select
(
"#mCSB_8_container>ul>li>button"
)
#Busan Daegu Gyeongsang
GJ
=
soup
.
select
(
"#mCSB_9_container>ul>li>button"
)
#gwangju_jeonla
Gangwon
=
soup
.
select
(
"#mCSB_10_container>ul>li>button"
)
dict1
=
dict
()
for
brch
in
seoul
:
dict1
[
brch
[
'brch-nm'
]]
=
brch
[
'brch-no'
]
for
brch
in
Gyeonggi
:
dict1
[
brch
[
'brch-nm'
]]
=
brch
[
'brch-no'
]
for
brch
in
Incheon
:
dict1
[
brch
[
'brch-nm'
]]
=
brch
[
'brch-no'
]
for
brch
in
DCS
:
dict1
[
brch
[
'brch-nm'
]]
=
brch
[
'brch-no'
]
for
brch
in
BDG
:
dict1
[
brch
[
'brch-nm'
]]
=
brch
[
'brch-no'
]
for
brch
in
GJ
:
dict1
[
brch
[
'brch-nm'
]]
=
brch
[
'brch-no'
]
for
brch
in
Gangwon
:
dict1
[
brch
[
'brch-nm'
]]
=
brch
[
'brch-no'
]
attr1
=
soup
.
select
(
"#mCSB_1_container>ul>li>button"
)
page1
=
driver
.
page_source
soup
1
=
BeautifulSoup
(
page1
,
"html.parser"
)
seoul
=
soup
1
.
select
(
"#mCSB_4_container>ul>li>button"
)
Gyeonggi
=
soup
1
.
select
(
"#mCSB_5_container>ul>li>button"
)
Incheon
=
soup
1
.
select
(
"#mCSB_6_container>ul>li>button"
)
DCS
=
soup
1
.
select
(
"#mCSB_7_container>ul>li>button"
)
#Daejeon Chungcheong Sejong
BDG
=
soup
1
.
select
(
"#mCSB_8_container>ul>li>button"
)
#Busan Daegu Gyeongsang
GJ
=
soup
1
.
select
(
"#mCSB_9_container>ul>li>button"
)
#gwangju_jeonla
Gangwon
=
soup
1
.
select
(
"#mCSB_10_container>ul>li>button"
)
loc
=
[
seoul
,
Gyeonggi
,
Incheon
,
DCS
,
BDG
,
GJ
,
Gangwon
]
def
get_location_code
(
location
):
for
brch
in
location
:
theater_location
[
brch
[
'brch-nm'
]]
=
brch
[
'brch-no'
]
for
parameter
in
loc
:
get_location_code
(
parameter
)
page2
=
driver2
.
page_source
soup2
=
BeautifulSoup
(
page2
,
"html.parser"
)
ticketing_rate
=
soup2
.
select
(
'.rate'
)
movie_name
=
soup2
.
select
(
'.tit-area > p.tit'
)
get_movie_info
=
soup1
.
select
(
"#mCSB_1_container>ul>li>button"
)
movie_dict
=
dict
()
for
movie
in
attr1
:
rank
=
1
for
movie
in
get_movie_info
:
movie_dict
[
movie
[
'movie-nm'
]]
=
[
movie
[
'movie-no'
],
movie
[
'form-at'
]]
for
r
,
m
in
zip
(
ticketing_rate
,
movie_name
):
movie_dict
[
m
[
'title'
]]
.
append
(
r
.
string
)
rank
=
1
for
value
in
movie_dict
.
values
():
if
(
len
(
value
)
==
2
):
value
.
append
(
"예메율 0.0
%
"
)
if
(
rank
<=
10
):
value
.
append
({
'rank'
:
rank
})
rank
+=
1
print
(
movie_dict
)
#form-at: 처음 그냥 받아올 때 해당 날짜에 영화 있는지 확인(장소 상관 없이)
#brch-no로 쿼리 주고 나서 form-at확인 필요
\ No newline at end of file
...
...
Please
register
or
login
to post a comment