Merge branch 'feature/Megabox_Crawling' into 'feature/Megabox_Crawling'
Feature/megabox crawling 크롤링 언어 변경 및 위치와 일자 받아서 상영 여부 받아오기 진행 See merge request !15
Showing
3 changed files
with
165 additions
and
75 deletions
Megabox_crawling/app.js
0 → 100644
1 | +const request = require('request'); | ||
2 | +const cheerio = require('cheerio'); | ||
3 | +const puppeteer = require('puppeteer'); | ||
4 | + | ||
5 | +const {Builder,until} = require('selenium-webdriver'); //모듈 불러오기 | ||
6 | +var webdriver = require('selenium-webdriver'); | ||
7 | +var By = webdriver.By; | ||
8 | +const chrome = require('selenium-webdriver/chrome');//크롬 사용시 | ||
9 | + | ||
10 | +const async = require('async') | ||
11 | +let express = require('express'); | ||
12 | +let app = express(); | ||
13 | +let bodyParser = require('body-parser'); | ||
14 | +const { timeout } = require('async'); | ||
15 | +app.use(bodyParser.urlencoded({ extended: false })); | ||
16 | +app.use(bodyParser.json()); | ||
17 | + | ||
18 | + | ||
19 | +const booking_url = "https://megabox.co.kr/booking"; | ||
20 | +const rate_url = "https://www.megabox.co.kr/movie"; | ||
21 | + | ||
22 | +let r =0; | ||
23 | +let movie_data = []; | ||
24 | +let location_data = []; | ||
25 | +let index = 0; | ||
26 | + | ||
27 | + | ||
28 | +async.waterfall([//for 동기적 처리 | ||
29 | + async () => { | ||
30 | + const driver = new webdriver.Builder().forBrowser('chrome').setChromeOptions(new chrome.Options().headless()).build();// | ||
31 | + driver.get(booking_url); | ||
32 | + driver.switchTo().frame(0)//frameBokdMBooking 프레임 가져옴 | ||
33 | + | ||
34 | + let seoul = await driver.wait(until.elementsLocated(By.css('#mCSB_4_container>ul>li>#btn'))); | ||
35 | + let Gyeonggi = await driver.wait(until.elementsLocated(By.css('#mCSB_5_container>ul>li>#btn'))); | ||
36 | + const Incheon = await driver.wait(until.elementsLocated(By.css('#mCSB_6_container>ul>li>#btn'))); | ||
37 | + const DCS = await driver.wait(until.elementsLocated(By.css('#mCSB_7_container>ul>li>#btn')));//Daejeon Chungcheong Sejong | ||
38 | + const BDG = await driver.wait(until.elementsLocated(By.css('#mCSB_8_container>ul>li>#btn')));//Busan Daegu Gyeongsang | ||
39 | + const GJ= await driver.wait(until.elementsLocated(By.css('#mCSB_9_container>ul>li>#btn')));//gwangju_jeonla | ||
40 | + const Gangwon = await driver.wait(until.elementsLocated(By.css('#mCSB_10_container>ul>li>#btn'))); | ||
41 | + const location_list = [seoul, Gyeonggi, Incheon, DCS, BDG, GJ, Gangwon]// | ||
42 | + for(let i = 0; i < location_list.length; i++){ | ||
43 | + for (item of location_list[i]) { | ||
44 | + location_data[index++] = { | ||
45 | + 'LocationName':await item.getAttribute("brch-nm"), | ||
46 | + 'LocationNUm' : await item.getAttribute("brch-no") | ||
47 | + } | ||
48 | + // let location_name = await item.getAttribute("brch-nm"); | ||
49 | + // let location_num = await item.getAttribute("brch-no"); | ||
50 | + // let obj = {}; | ||
51 | + // obj[location_name]= location_num | ||
52 | + // location_data[index++] = obj; | ||
53 | + | ||
54 | + } | ||
55 | + } | ||
56 | + | ||
57 | + let movie_list = await driver.wait(until.elementsLocated(By.css('#mCSB_1_container>ul>li>.btn'))); | ||
58 | + r = 0; | ||
59 | + for (item of movie_list) { | ||
60 | + //Using getAttribute to get the data | ||
61 | + movie_data[r++] = { | ||
62 | + 'rank' : r, | ||
63 | + 'title' : await item.getAttribute("movie-nm"), | ||
64 | + 'movie_num':await item.getAttribute("movie-no"), | ||
65 | + } | ||
66 | + } | ||
67 | + | ||
68 | + driver.close(); | ||
69 | + | ||
70 | + }, | ||
71 | + | ||
72 | + async () => { | ||
73 | + | ||
74 | + r = 0; | ||
75 | + const browser = await puppeteer.launch({ | ||
76 | + headless: true | ||
77 | + }); | ||
78 | + const page = await browser.newPage(); | ||
79 | + await page.goto(rate_url); | ||
80 | + const content = await page.content(); | ||
81 | + | ||
82 | + const $ = cheerio.load(content); | ||
83 | + const $rate_lists = $("ol.list>li"); | ||
84 | + $rate_lists.each((index, list) => { | ||
85 | + const name = $(list).find('div.tit-area > p.tit').attr('title'); | ||
86 | + const rate = $(list).find('div.rate-date > span.rate').text(); | ||
87 | + | ||
88 | + if(movie_data[r].title === name){ | ||
89 | + movie_data[r++]['rate'] = rate; | ||
90 | + } | ||
91 | + }); | ||
92 | + for(i of movie_data){ | ||
93 | + if(Object.keys(i).length==3){ | ||
94 | + movie_data[r++]['rate'] = '예매율 0%'; | ||
95 | + } | ||
96 | + } | ||
97 | + | ||
98 | + browser.close(); | ||
99 | + }, | ||
100 | + | ||
101 | +]) | ||
102 | + | ||
103 | + | ||
104 | +let userData = { | ||
105 | + 'Date': '', | ||
106 | + 'location':'' | ||
107 | +}; | ||
108 | +// const _sleep = (delay) => new Promise((resolve) => setTimeout(resolve, delay)); | ||
109 | +app.get('/Megabox', (req, res) => { | ||
110 | + | ||
111 | + res.send(movie_data); | ||
112 | + | ||
113 | +}) | ||
114 | + | ||
115 | +const appdriver = new webdriver.Builder().forBrowser('chrome').setChromeOptions(new chrome.Options().headless()).build();//.setChromeOptions(new chrome.Options().headless()) | ||
116 | + | ||
117 | +app.post('/Megabox', (req, res) => {//사용자에게 Date와 location(영화관 장소) 받아옴 | ||
118 | + userData['Date'] = req.body.Date; | ||
119 | + for(i of location_data){ | ||
120 | + if(i['LocationName'] == req.body.location){ | ||
121 | + userData['location']=i['LocationNUm']; | ||
122 | + break; | ||
123 | + } | ||
124 | + } | ||
125 | + | ||
126 | + let PlayingMovieURL = booking_url + '?brchNo1='+userData['location']+'&playDe='+userData['Date'];//사용자 정보 바탕으로 해당 일자 영화관 영화 상영 여부 확인 | ||
127 | + | ||
128 | + appdriver.get(PlayingMovieURL); | ||
129 | + appdriver.switchTo().frame(0)//frameBokdMBooking 프레임 가져옴 | ||
130 | + res.send(movie_data); | ||
131 | + | ||
132 | +}) | ||
133 | + | ||
134 | +app.get('/Megabox/GetPlayingMovie', async(req, res, next) => {//영화 상영 여부 객체에 넣음 | ||
135 | + | ||
136 | + let movie_list = await appdriver.wait(until.elementsLocated(By.css('#mCSB_1_container>ul>li>.btn'))); | ||
137 | + let n = 0; | ||
138 | + for (item of movie_list) { | ||
139 | + movie_data[n++]['running'] = await item.getAttribute('form-at') | ||
140 | + } | ||
141 | + | ||
142 | + res.send(movie_data); | ||
143 | +}) | ||
144 | + | ||
145 | +app.listen(23023); | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
Megabox_crawling/megaboxCrawling.py
deleted
100644 → 0
1 | -from bs4 import BeautifulSoup | ||
2 | -from selenium import webdriver | ||
3 | -import chromedriver_autoinstaller | ||
4 | - | ||
5 | -chromedriver_autoinstaller.install() | ||
6 | - | ||
7 | -booking_url = "https://megabox.co.kr/booking" | ||
8 | -rate_url = "https://www.megabox.co.kr/movie" | ||
9 | - | ||
10 | -options = webdriver.ChromeOptions() | ||
11 | -options.add_argument("headless") #창 안 띄움 | ||
12 | -options.add_experimental_option("excludeSwitches", ["enable-logging"]) | ||
13 | - | ||
14 | -driver = webdriver.Chrome(options = options) | ||
15 | -driver.maximize_window() | ||
16 | -# driver.implicitly_wait(2) | ||
17 | -driver.get(booking_url) | ||
18 | - | ||
19 | -driver2=webdriver.Chrome(options = options) | ||
20 | -driver2.maximize_window() | ||
21 | -# driver2.implicitly_wait(2) | ||
22 | -driver2.get(rate_url) | ||
23 | - | ||
24 | - | ||
25 | -theater_location = dict() | ||
26 | - | ||
27 | -# iframes = driver.find_elements_by_css_selector('iframe') | ||
28 | -driver.switch_to.frame('frameBokdMBooking') | ||
29 | -page1 = driver.page_source | ||
30 | -soup1 = BeautifulSoup(page1, "html.parser") | ||
31 | - | ||
32 | -seoul = soup1.select("#mCSB_4_container>ul>li>button") | ||
33 | -Gyeonggi = soup1.select("#mCSB_5_container>ul>li>button") | ||
34 | -Incheon = soup1.select("#mCSB_6_container>ul>li>button") | ||
35 | -DCS = soup1.select("#mCSB_7_container>ul>li>button")#Daejeon Chungcheong Sejong | ||
36 | -BDG = soup1.select("#mCSB_8_container>ul>li>button")#Busan Daegu Gyeongsang | ||
37 | -GJ= soup1.select("#mCSB_9_container>ul>li>button")#gwangju_jeonla | ||
38 | -Gangwon = soup1.select("#mCSB_10_container>ul>li>button") | ||
39 | - | ||
40 | -loc = [seoul, Gyeonggi, Incheon, DCS, BDG, GJ, Gangwon] | ||
41 | - | ||
42 | -def get_location_code(location): | ||
43 | - for brch in location: | ||
44 | - theater_location[brch['brch-nm']] = brch['brch-no'] | ||
45 | - | ||
46 | - | ||
47 | -for parameter in loc: | ||
48 | - get_location_code(parameter) | ||
49 | - | ||
50 | - | ||
51 | -page2 = driver2.page_source | ||
52 | -soup2 = BeautifulSoup(page2, "html.parser") | ||
53 | -ticketing_rate = soup2.select('.rate') | ||
54 | -movie_name = soup2.select('.tit-area > p.tit') | ||
55 | -get_movie_info = soup1.select("#mCSB_1_container>ul>li>button") | ||
56 | - | ||
57 | -movie_dict = dict() | ||
58 | -rank = 1 | ||
59 | - | ||
60 | -for movie in get_movie_info: | ||
61 | - movie_dict[movie['movie-nm']] = [movie['movie-no'], movie['form-at']] | ||
62 | - | ||
63 | -for r, m in zip(ticketing_rate, movie_name): | ||
64 | - movie_dict[m['title']].append(r.string) | ||
65 | - | ||
66 | - | ||
67 | -for value in movie_dict.values(): | ||
68 | - if(len(value) == 2): | ||
69 | - value.append("예메율 0.0%") | ||
70 | - if(rank<=10): | ||
71 | - value.append({'rank' : rank}) | ||
72 | - rank += 1 | ||
73 | - | ||
74 | -#form-at: 처음 그냥 받아올 때 해당 날짜에 영화 있는지 확인(장소 상관 없이) | ||
75 | -#brch-no로 쿼리 주고 나서 form-at확인 필요 | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
Megabox_crawling/package.json
0 → 100644
1 | +{ | ||
2 | + "name": "me", | ||
3 | + "version": "1.0.0", | ||
4 | + "description": "", | ||
5 | + "main": "app.js", | ||
6 | + "scripts": { | ||
7 | + "test": "echo \"Error: no test specified\" && exit 1" | ||
8 | + }, | ||
9 | + "keywords": [], | ||
10 | + "author": "", | ||
11 | + "license": "ISC", | ||
12 | + "dependencies": { | ||
13 | + "async": "^3.2.3", | ||
14 | + "body-parser": "^1.20.0", | ||
15 | + "cheerio": "^1.0.0-rc.11", | ||
16 | + "express": "^4.18.1", | ||
17 | + "puppeteer": "^14.1.1", | ||
18 | + "selenium-webdriver": "^4.1.2" | ||
19 | + } | ||
20 | +} |
-
Please register or login to post a comment