임승현

Merge branch 'feature/Megabox_Crawling' into 'feature/Megabox_Crawling'

코드 정리



See merge request !12
1 -from urllib import response
2 -import requests
3 from bs4 import BeautifulSoup 1 from bs4 import BeautifulSoup
4 from selenium import webdriver 2 from selenium import webdriver
5 -from webdriver_manager.chrome import ChromeDriverManager 3 +import chromedriver_autoinstaller
6 -from selenium.webdriver.common.keys import Keys 4 +
5 +chromedriver_autoinstaller.install()
6 +
7 +booking_url = "https://megabox.co.kr/booking"
8 +rate_url = "https://www.megabox.co.kr/movie"
7 9
8 options = webdriver.ChromeOptions() 10 options = webdriver.ChromeOptions()
9 options.add_argument("headless") #창 안 띄움 11 options.add_argument("headless") #창 안 띄움
10 options.add_experimental_option("excludeSwitches", ["enable-logging"]) 12 options.add_experimental_option("excludeSwitches", ["enable-logging"])
11 -driver = webdriver.Chrome(options = options)
12 -driver2=webdriver.Chrome(options = options)
13 13
14 -url = "https://megabox.co.kr/booking" 14 +driver = webdriver.Chrome(options = options)
15 -rate_url = "https://www.megabox.co.kr/movie"
16 driver.maximize_window() 15 driver.maximize_window()
17 -driver2.maximize_window() 16 +# driver.implicitly_wait(2)
18 - 17 +driver.get(booking_url)
19 -driver.implicitly_wait(2)
20 -driver.get(url)
21 18
22 -driver2.implicitly_wait(2) 19 +driver2=webdriver.Chrome(options = options)
20 +driver2.maximize_window()
21 +# driver2.implicitly_wait(2)
23 driver2.get(rate_url) 22 driver2.get(rate_url)
24 23
25 -r2 = driver2.page_source
26 -soup = BeautifulSoup(r2, "html.parser")
27 -ticketing_rate = soup.select('.rate')
28 -movie_name = soup.select('.tit-area > p.tit')
29 24
30 -iframes = driver.find_elements_by_css_selector('iframe') 25 +theater_location = dict()
31 26
27 +# iframes = driver.find_elements_by_css_selector('iframe')
32 driver.switch_to.frame('frameBokdMBooking') 28 driver.switch_to.frame('frameBokdMBooking')
33 -r = driver.page_source 29 +page1 = driver.page_source
34 -soup = BeautifulSoup(r, "html.parser") 30 +soup1 = BeautifulSoup(page1, "html.parser")
35 - 31 +
36 -seoul = soup.select("#mCSB_4_container>ul>li>button") 32 +seoul = soup1.select("#mCSB_4_container>ul>li>button")
37 -Gyeonggi = soup.select("#mCSB_5_container>ul>li>button") 33 +Gyeonggi = soup1.select("#mCSB_5_container>ul>li>button")
38 -Incheon = soup.select("#mCSB_6_container>ul>li>button") 34 +Incheon = soup1.select("#mCSB_6_container>ul>li>button")
39 -DCS = soup.select("#mCSB_7_container>ul>li>button")#Daejeon Chungcheong Sejong 35 +DCS = soup1.select("#mCSB_7_container>ul>li>button")#Daejeon Chungcheong Sejong
40 -BDG = soup.select("#mCSB_8_container>ul>li>button")#Busan Daegu Gyeongsang 36 +BDG = soup1.select("#mCSB_8_container>ul>li>button")#Busan Daegu Gyeongsang
41 -GJ= soup.select("#mCSB_9_container>ul>li>button")#gwangju_jeonla 37 +GJ= soup1.select("#mCSB_9_container>ul>li>button")#gwangju_jeonla
42 -Gangwon = soup.select("#mCSB_10_container>ul>li>button") 38 +Gangwon = soup1.select("#mCSB_10_container>ul>li>button")
43 -dict1 = dict() 39 +
44 -for brch in seoul: 40 +loc = [seoul, Gyeonggi, Incheon, DCS, BDG, GJ, Gangwon]
45 - dict1[brch['brch-nm']] = brch['brch-no'] 41 +
46 -for brch in Gyeonggi: 42 +def get_location_code(location):
47 - dict1[brch['brch-nm']] = brch['brch-no'] 43 + for brch in location:
48 -for brch in Incheon: 44 + theater_location[brch['brch-nm']] = brch['brch-no']
49 - dict1[brch['brch-nm']] = brch['brch-no'] 45 +
50 -for brch in DCS: 46 +
51 - dict1[brch['brch-nm']] = brch['brch-no'] 47 +for parameter in loc:
52 -for brch in BDG: 48 + get_location_code(parameter)
53 - dict1[brch['brch-nm']] = brch['brch-no'] 49 +
54 -for brch in GJ: 50 +
55 - dict1[brch['brch-nm']] = brch['brch-no'] 51 +page2 = driver2.page_source
56 -for brch in Gangwon: 52 +soup2 = BeautifulSoup(page2, "html.parser")
57 - dict1[brch['brch-nm']] = brch['brch-no'] 53 +ticketing_rate = soup2.select('.rate')
58 - 54 +movie_name = soup2.select('.tit-area > p.tit')
59 -attr1 = soup.select("#mCSB_1_container>ul>li>button") 55 +get_movie_info = soup1.select("#mCSB_1_container>ul>li>button")
60 56
61 movie_dict = dict() 57 movie_dict = dict()
62 -for movie in attr1: 58 +rank = 1
63 - movie_dict[movie['movie-nm']] = [movie['movie-no'], movie['form-at']] 59 +
60 +for movie in get_movie_info:
61 + movie_dict[movie['movie-nm']] = [movie['movie-no'], movie['form-at']]
64 62
65 for r, m in zip(ticketing_rate, movie_name): 63 for r, m in zip(ticketing_rate, movie_name):
66 - movie_dict[m['title']].append(r.string) 64 + movie_dict[m['title']].append(r.string)
65 +
67 66
68 -rank = 1
69 for value in movie_dict.values(): 67 for value in movie_dict.values():
70 if(len(value) == 2): 68 if(len(value) == 2):
71 value.append("예메율 0.0%") 69 value.append("예메율 0.0%")
72 if(rank<=10): 70 if(rank<=10):
73 value.append({'rank' : rank}) 71 value.append({'rank' : rank})
74 rank += 1 72 rank += 1
75 -print(movie_dict)
76 73
77 #form-at: 처음 그냥 받아올 때 해당 날짜에 영화 있는지 확인(장소 상관 없이) 74 #form-at: 처음 그냥 받아올 때 해당 날짜에 영화 있는지 확인(장소 상관 없이)
78 -#brch-no로 쿼리 주고 나서 form-at확인 필요 75 +#brch-no로 쿼리 주고 나서 form-at확인 필요
...\ No newline at end of file ...\ No newline at end of file
......