임승현

Merge branch 'feature/Megabox_Crawling' into 'feature/Megabox_Crawling'

코드 정리



See merge request !12
from urllib import response
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
import chromedriver_autoinstaller
chromedriver_autoinstaller.install()
booking_url = "https://megabox.co.kr/booking"
rate_url = "https://www.megabox.co.kr/movie"
options = webdriver.ChromeOptions()
options.add_argument("headless") #창 안 띄움
options.add_experimental_option("excludeSwitches", ["enable-logging"])
driver = webdriver.Chrome(options = options)
driver2=webdriver.Chrome(options = options)
url = "https://megabox.co.kr/booking"
rate_url = "https://www.megabox.co.kr/movie"
driver = webdriver.Chrome(options = options)
driver.maximize_window()
driver2.maximize_window()
driver.implicitly_wait(2)
driver.get(url)
# driver.implicitly_wait(2)
driver.get(booking_url)
driver2.implicitly_wait(2)
driver2=webdriver.Chrome(options = options)
driver2.maximize_window()
# driver2.implicitly_wait(2)
driver2.get(rate_url)
r2 = driver2.page_source
soup = BeautifulSoup(r2, "html.parser")
ticketing_rate = soup.select('.rate')
movie_name = soup.select('.tit-area > p.tit')
iframes = driver.find_elements_by_css_selector('iframe')
theater_location = dict()
# iframes = driver.find_elements_by_css_selector('iframe')
driver.switch_to.frame('frameBokdMBooking')
r = driver.page_source
soup = BeautifulSoup(r, "html.parser")
seoul = soup.select("#mCSB_4_container>ul>li>button")
Gyeonggi = soup.select("#mCSB_5_container>ul>li>button")
Incheon = soup.select("#mCSB_6_container>ul>li>button")
DCS = soup.select("#mCSB_7_container>ul>li>button")#Daejeon Chungcheong Sejong
BDG = soup.select("#mCSB_8_container>ul>li>button")#Busan Daegu Gyeongsang
GJ= soup.select("#mCSB_9_container>ul>li>button")#gwangju_jeonla
Gangwon = soup.select("#mCSB_10_container>ul>li>button")
dict1 = dict()
for brch in seoul:
dict1[brch['brch-nm']] = brch['brch-no']
for brch in Gyeonggi:
dict1[brch['brch-nm']] = brch['brch-no']
for brch in Incheon:
dict1[brch['brch-nm']] = brch['brch-no']
for brch in DCS:
dict1[brch['brch-nm']] = brch['brch-no']
for brch in BDG:
dict1[brch['brch-nm']] = brch['brch-no']
for brch in GJ:
dict1[brch['brch-nm']] = brch['brch-no']
for brch in Gangwon:
dict1[brch['brch-nm']] = brch['brch-no']
attr1 = soup.select("#mCSB_1_container>ul>li>button")
page1 = driver.page_source
soup1 = BeautifulSoup(page1, "html.parser")
seoul = soup1.select("#mCSB_4_container>ul>li>button")
Gyeonggi = soup1.select("#mCSB_5_container>ul>li>button")
Incheon = soup1.select("#mCSB_6_container>ul>li>button")
DCS = soup1.select("#mCSB_7_container>ul>li>button")#Daejeon Chungcheong Sejong
BDG = soup1.select("#mCSB_8_container>ul>li>button")#Busan Daegu Gyeongsang
GJ= soup1.select("#mCSB_9_container>ul>li>button")#gwangju_jeonla
Gangwon = soup1.select("#mCSB_10_container>ul>li>button")
loc = [seoul, Gyeonggi, Incheon, DCS, BDG, GJ, Gangwon]
def get_location_code(location):
for brch in location:
theater_location[brch['brch-nm']] = brch['brch-no']
for parameter in loc:
get_location_code(parameter)
page2 = driver2.page_source
soup2 = BeautifulSoup(page2, "html.parser")
ticketing_rate = soup2.select('.rate')
movie_name = soup2.select('.tit-area > p.tit')
get_movie_info = soup1.select("#mCSB_1_container>ul>li>button")
movie_dict = dict()
for movie in attr1:
movie_dict[movie['movie-nm']] = [movie['movie-no'], movie['form-at']]
rank = 1
for movie in get_movie_info:
movie_dict[movie['movie-nm']] = [movie['movie-no'], movie['form-at']]
for r, m in zip(ticketing_rate, movie_name):
movie_dict[m['title']].append(r.string)
movie_dict[m['title']].append(r.string)
rank = 1
for value in movie_dict.values():
if(len(value) == 2):
value.append("예메율 0.0%")
if(rank<=10):
value.append({'rank' : rank})
rank += 1
print(movie_dict)
#form-at: 처음 그냥 받아올 때 해당 날짜에 영화 있는지 확인(장소 상관 없이)
#brch-no로 쿼리 주고 나서 form-at확인 필요
#brch-no로 쿼리 주고 나서 form-at확인 필요
\ No newline at end of file
......