Merge branch 'feature/Megabox_Crawling' into 'feature/Megabox_Crawling'
코드 정리 See merge request !12
Showing
1 changed file
with
48 additions
and
51 deletions
1 | -from urllib import response | ||
2 | -import requests | ||
3 | from bs4 import BeautifulSoup | 1 | from bs4 import BeautifulSoup |
4 | from selenium import webdriver | 2 | from selenium import webdriver |
5 | -from webdriver_manager.chrome import ChromeDriverManager | 3 | +import chromedriver_autoinstaller |
6 | -from selenium.webdriver.common.keys import Keys | 4 | + |
5 | +chromedriver_autoinstaller.install() | ||
6 | + | ||
7 | +booking_url = "https://megabox.co.kr/booking" | ||
8 | +rate_url = "https://www.megabox.co.kr/movie" | ||
7 | 9 | ||
8 | options = webdriver.ChromeOptions() | 10 | options = webdriver.ChromeOptions() |
9 | options.add_argument("headless") #창 안 띄움 | 11 | options.add_argument("headless") #창 안 띄움 |
10 | options.add_experimental_option("excludeSwitches", ["enable-logging"]) | 12 | options.add_experimental_option("excludeSwitches", ["enable-logging"]) |
11 | -driver = webdriver.Chrome(options = options) | ||
12 | -driver2=webdriver.Chrome(options = options) | ||
13 | 13 | ||
14 | -url = "https://megabox.co.kr/booking" | 14 | +driver = webdriver.Chrome(options = options) |
15 | -rate_url = "https://www.megabox.co.kr/movie" | ||
16 | driver.maximize_window() | 15 | driver.maximize_window() |
17 | -driver2.maximize_window() | 16 | +# driver.implicitly_wait(2) |
18 | - | 17 | +driver.get(booking_url) |
19 | -driver.implicitly_wait(2) | ||
20 | -driver.get(url) | ||
21 | 18 | ||
22 | -driver2.implicitly_wait(2) | 19 | +driver2=webdriver.Chrome(options = options) |
20 | +driver2.maximize_window() | ||
21 | +# driver2.implicitly_wait(2) | ||
23 | driver2.get(rate_url) | 22 | driver2.get(rate_url) |
24 | 23 | ||
25 | -r2 = driver2.page_source | ||
26 | -soup = BeautifulSoup(r2, "html.parser") | ||
27 | -ticketing_rate = soup.select('.rate') | ||
28 | -movie_name = soup.select('.tit-area > p.tit') | ||
29 | 24 | ||
30 | -iframes = driver.find_elements_by_css_selector('iframe') | 25 | +theater_location = dict() |
31 | 26 | ||
27 | +# iframes = driver.find_elements_by_css_selector('iframe') | ||
32 | driver.switch_to.frame('frameBokdMBooking') | 28 | driver.switch_to.frame('frameBokdMBooking') |
33 | -r = driver.page_source | 29 | +page1 = driver.page_source |
34 | -soup = BeautifulSoup(r, "html.parser") | 30 | +soup1 = BeautifulSoup(page1, "html.parser") |
35 | - | 31 | + |
36 | -seoul = soup.select("#mCSB_4_container>ul>li>button") | 32 | +seoul = soup1.select("#mCSB_4_container>ul>li>button") |
37 | -Gyeonggi = soup.select("#mCSB_5_container>ul>li>button") | 33 | +Gyeonggi = soup1.select("#mCSB_5_container>ul>li>button") |
38 | -Incheon = soup.select("#mCSB_6_container>ul>li>button") | 34 | +Incheon = soup1.select("#mCSB_6_container>ul>li>button") |
39 | -DCS = soup.select("#mCSB_7_container>ul>li>button")#Daejeon Chungcheong Sejong | 35 | +DCS = soup1.select("#mCSB_7_container>ul>li>button")#Daejeon Chungcheong Sejong |
40 | -BDG = soup.select("#mCSB_8_container>ul>li>button")#Busan Daegu Gyeongsang | 36 | +BDG = soup1.select("#mCSB_8_container>ul>li>button")#Busan Daegu Gyeongsang |
41 | -GJ= soup.select("#mCSB_9_container>ul>li>button")#gwangju_jeonla | 37 | +GJ= soup1.select("#mCSB_9_container>ul>li>button")#gwangju_jeonla |
42 | -Gangwon = soup.select("#mCSB_10_container>ul>li>button") | 38 | +Gangwon = soup1.select("#mCSB_10_container>ul>li>button") |
43 | -dict1 = dict() | 39 | + |
44 | -for brch in seoul: | 40 | +loc = [seoul, Gyeonggi, Incheon, DCS, BDG, GJ, Gangwon] |
45 | - dict1[brch['brch-nm']] = brch['brch-no'] | 41 | + |
46 | -for brch in Gyeonggi: | 42 | +def get_location_code(location): |
47 | - dict1[brch['brch-nm']] = brch['brch-no'] | 43 | + for brch in location: |
48 | -for brch in Incheon: | 44 | + theater_location[brch['brch-nm']] = brch['brch-no'] |
49 | - dict1[brch['brch-nm']] = brch['brch-no'] | 45 | + |
50 | -for brch in DCS: | 46 | + |
51 | - dict1[brch['brch-nm']] = brch['brch-no'] | 47 | +for parameter in loc: |
52 | -for brch in BDG: | 48 | + get_location_code(parameter) |
53 | - dict1[brch['brch-nm']] = brch['brch-no'] | 49 | + |
54 | -for brch in GJ: | 50 | + |
55 | - dict1[brch['brch-nm']] = brch['brch-no'] | 51 | +page2 = driver2.page_source |
56 | -for brch in Gangwon: | 52 | +soup2 = BeautifulSoup(page2, "html.parser") |
57 | - dict1[brch['brch-nm']] = brch['brch-no'] | 53 | +ticketing_rate = soup2.select('.rate') |
58 | - | 54 | +movie_name = soup2.select('.tit-area > p.tit') |
59 | -attr1 = soup.select("#mCSB_1_container>ul>li>button") | 55 | +get_movie_info = soup1.select("#mCSB_1_container>ul>li>button") |
60 | 56 | ||
61 | movie_dict = dict() | 57 | movie_dict = dict() |
62 | -for movie in attr1: | 58 | +rank = 1 |
63 | - movie_dict[movie['movie-nm']] = [movie['movie-no'], movie['form-at']] | 59 | + |
60 | +for movie in get_movie_info: | ||
61 | + movie_dict[movie['movie-nm']] = [movie['movie-no'], movie['form-at']] | ||
64 | 62 | ||
65 | for r, m in zip(ticketing_rate, movie_name): | 63 | for r, m in zip(ticketing_rate, movie_name): |
66 | - movie_dict[m['title']].append(r.string) | 64 | + movie_dict[m['title']].append(r.string) |
65 | + | ||
67 | 66 | ||
68 | -rank = 1 | ||
69 | for value in movie_dict.values(): | 67 | for value in movie_dict.values(): |
70 | if(len(value) == 2): | 68 | if(len(value) == 2): |
71 | value.append("예메율 0.0%") | 69 | value.append("예메율 0.0%") |
72 | if(rank<=10): | 70 | if(rank<=10): |
73 | value.append({'rank' : rank}) | 71 | value.append({'rank' : rank}) |
74 | rank += 1 | 72 | rank += 1 |
75 | -print(movie_dict) | ||
76 | 73 | ||
77 | #form-at: 처음 그냥 받아올 때 해당 날짜에 영화 있는지 확인(장소 상관 없이) | 74 | #form-at: 처음 그냥 받아올 때 해당 날짜에 영화 있는지 확인(장소 상관 없이) |
78 | -#brch-no로 쿼리 주고 나서 form-at확인 필요 | 75 | +#brch-no로 쿼리 주고 나서 form-at확인 필요 |
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
-
Please register or login to post a comment