Showing
3 changed files
with
65 additions
and
75 deletions
Megabox_crawling/app.js
0 → 100644
1 | +const request = require('request'); | ||
2 | +const cheerio = require('cheerio'); | ||
3 | +const puppeteer = require('puppeteer'); | ||
4 | +const {Builder,Key,until} = require('selenium-webdriver'); //모듈 불러오기 | ||
5 | +var webdriver = require('selenium-webdriver'); | ||
6 | +var By = webdriver.By; | ||
7 | +const chrome = require('selenium-webdriver/chrome');//크롬 사용시 | ||
8 | + | ||
9 | + | ||
10 | +let booking_url = "https://megabox.co.kr/booking"; | ||
11 | +const rate_url = "https://www.megabox.co.kr/movie"; | ||
12 | +// var booking_options = { encoding: "utf-8", method: "GET", uri: booking_url}; | ||
13 | + | ||
14 | +let r =0; | ||
15 | +let movie_data = []; | ||
16 | + | ||
17 | + | ||
18 | +(async () => { | ||
19 | + | ||
20 | + r = 0; | ||
21 | + const browser = await puppeteer.launch({ | ||
22 | + headless: true | ||
23 | + }); | ||
24 | + const page = await browser.newPage(); | ||
25 | + await page.goto(rate_url); | ||
26 | + const content = await page.content(); | ||
27 | + | ||
28 | + const $ = cheerio.load(content); | ||
29 | + const $rate_lists = $("ol.list>li"); | ||
30 | + $rate_lists.each((index, list) => { | ||
31 | + const name = $(list).find('div.tit-area > p.tit').attr('title'); | ||
32 | + const rate = $(list).find('div.rate-date > span.rate').text(); | ||
33 | + | ||
34 | + }); | ||
35 | + r = 0; | ||
36 | + for(i of movie_data){ | ||
37 | + console.log(i); | ||
38 | + } | ||
39 | + | ||
40 | + browser.close(); | ||
41 | +})(); | ||
42 | + | ||
43 | + | ||
44 | + | ||
45 | + | ||
46 | + |
Megabox_crawling/megaboxCrawling.py
deleted
100644 → 0
1 | -from bs4 import BeautifulSoup | ||
2 | -from selenium import webdriver | ||
3 | -import chromedriver_autoinstaller | ||
4 | - | ||
5 | -chromedriver_autoinstaller.install() | ||
6 | - | ||
7 | -booking_url = "https://megabox.co.kr/booking" | ||
8 | -rate_url = "https://www.megabox.co.kr/movie" | ||
9 | - | ||
10 | -options = webdriver.ChromeOptions() | ||
11 | -options.add_argument("headless") #창 안 띄움 | ||
12 | -options.add_experimental_option("excludeSwitches", ["enable-logging"]) | ||
13 | - | ||
14 | -driver = webdriver.Chrome(options = options) | ||
15 | -driver.maximize_window() | ||
16 | -# driver.implicitly_wait(2) | ||
17 | -driver.get(booking_url) | ||
18 | - | ||
19 | -driver2=webdriver.Chrome(options = options) | ||
20 | -driver2.maximize_window() | ||
21 | -# driver2.implicitly_wait(2) | ||
22 | -driver2.get(rate_url) | ||
23 | - | ||
24 | - | ||
25 | -theater_location = dict() | ||
26 | - | ||
27 | -# iframes = driver.find_elements_by_css_selector('iframe') | ||
28 | -driver.switch_to.frame('frameBokdMBooking') | ||
29 | -page1 = driver.page_source | ||
30 | -soup1 = BeautifulSoup(page1, "html.parser") | ||
31 | - | ||
32 | -seoul = soup1.select("#mCSB_4_container>ul>li>button") | ||
33 | -Gyeonggi = soup1.select("#mCSB_5_container>ul>li>button") | ||
34 | -Incheon = soup1.select("#mCSB_6_container>ul>li>button") | ||
35 | -DCS = soup1.select("#mCSB_7_container>ul>li>button")#Daejeon Chungcheong Sejong | ||
36 | -BDG = soup1.select("#mCSB_8_container>ul>li>button")#Busan Daegu Gyeongsang | ||
37 | -GJ= soup1.select("#mCSB_9_container>ul>li>button")#gwangju_jeonla | ||
38 | -Gangwon = soup1.select("#mCSB_10_container>ul>li>button") | ||
39 | - | ||
40 | -loc = [seoul, Gyeonggi, Incheon, DCS, BDG, GJ, Gangwon] | ||
41 | - | ||
42 | -def get_location_code(location): | ||
43 | - for brch in location: | ||
44 | - theater_location[brch['brch-nm']] = brch['brch-no'] | ||
45 | - | ||
46 | - | ||
47 | -for parameter in loc: | ||
48 | - get_location_code(parameter) | ||
49 | - | ||
50 | - | ||
51 | -page2 = driver2.page_source | ||
52 | -soup2 = BeautifulSoup(page2, "html.parser") | ||
53 | -ticketing_rate = soup2.select('.rate') | ||
54 | -movie_name = soup2.select('.tit-area > p.tit') | ||
55 | -get_movie_info = soup1.select("#mCSB_1_container>ul>li>button") | ||
56 | - | ||
57 | -movie_dict = dict() | ||
58 | -rank = 1 | ||
59 | - | ||
60 | -for movie in get_movie_info: | ||
61 | - movie_dict[movie['movie-nm']] = [movie['movie-no'], movie['form-at']] | ||
62 | - | ||
63 | -for r, m in zip(ticketing_rate, movie_name): | ||
64 | - movie_dict[m['title']].append(r.string) | ||
65 | - | ||
66 | - | ||
67 | -for value in movie_dict.values(): | ||
68 | - if(len(value) == 2): | ||
69 | - value.append("예메율 0.0%") | ||
70 | - if(rank<=10): | ||
71 | - value.append({'rank' : rank}) | ||
72 | - rank += 1 | ||
73 | - | ||
74 | -#form-at: 처음 그냥 받아올 때 해당 날짜에 영화 있는지 확인(장소 상관 없이) | ||
75 | -#brch-no로 쿼리 주고 나서 form-at확인 필요 | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
Megabox_crawling/package.json
0 → 100644
1 | +{ | ||
2 | + "name": "me", | ||
3 | + "version": "1.0.0", | ||
4 | + "description": "", | ||
5 | + "main": "app.js", | ||
6 | + "scripts": { | ||
7 | + "test": "echo \"Error: no test specified\" && exit 1" | ||
8 | + }, | ||
9 | + "keywords": [], | ||
10 | + "author": "", | ||
11 | + "license": "ISC", | ||
12 | + "dependencies": { | ||
13 | + "body-parser": "^1.20.0", | ||
14 | + "cheerio": "^1.0.0-rc.11", | ||
15 | + "express": "^4.18.1", | ||
16 | + "puppeteer": "^14.1.1", | ||
17 | + "selenium-webdriver": "^4.1.2" | ||
18 | + } | ||
19 | +} |
-
Please register or login to post a comment