임승현

Merge branch 'feature/Megabox_Crawling' into 'feature/Megabox_Crawling'

Feature/megabox crawling

크롤링 언어 변경 및 위치와 일자 받아서 상영 여부 받아오기 진행

See merge request !15
1 +const request = require('request');
2 +const cheerio = require('cheerio');
3 +const puppeteer = require('puppeteer');
4 +
5 +const {Builder,until} = require('selenium-webdriver'); //모듈 불러오기
6 +var webdriver = require('selenium-webdriver');
7 +var By = webdriver.By;
8 +const chrome = require('selenium-webdriver/chrome');//크롬 사용시
9 +
10 +const async = require('async')
11 +let express = require('express');
12 +let app = express();
13 +let bodyParser = require('body-parser');
14 +const { timeout } = require('async');
15 +app.use(bodyParser.urlencoded({ extended: false }));
16 +app.use(bodyParser.json());
17 +
18 +
19 +const booking_url = "https://megabox.co.kr/booking";
20 +const rate_url = "https://www.megabox.co.kr/movie";
21 +
22 +let r =0;
23 +let movie_data = [];
24 +let location_data = [];
25 +let index = 0;
26 +
27 +
28 +async.waterfall([//for 동기적 처리
29 + async () => {
30 + const driver = new webdriver.Builder().forBrowser('chrome').setChromeOptions(new chrome.Options().headless()).build();//
31 + driver.get(booking_url);
32 + driver.switchTo().frame(0)//frameBokdMBooking 프레임 가져옴
33 +
34 + let seoul = await driver.wait(until.elementsLocated(By.css('#mCSB_4_container>ul>li>#btn')));
35 + let Gyeonggi = await driver.wait(until.elementsLocated(By.css('#mCSB_5_container>ul>li>#btn')));
36 + const Incheon = await driver.wait(until.elementsLocated(By.css('#mCSB_6_container>ul>li>#btn')));
37 + const DCS = await driver.wait(until.elementsLocated(By.css('#mCSB_7_container>ul>li>#btn')));//Daejeon Chungcheong Sejong
38 + const BDG = await driver.wait(until.elementsLocated(By.css('#mCSB_8_container>ul>li>#btn')));//Busan Daegu Gyeongsang
39 + const GJ= await driver.wait(until.elementsLocated(By.css('#mCSB_9_container>ul>li>#btn')));//gwangju_jeonla
40 + const Gangwon = await driver.wait(until.elementsLocated(By.css('#mCSB_10_container>ul>li>#btn')));
41 + const location_list = [seoul, Gyeonggi, Incheon, DCS, BDG, GJ, Gangwon]//
42 + for(let i = 0; i < location_list.length; i++){
43 + for (item of location_list[i]) {
44 + location_data[index++] = {
45 + 'LocationName':await item.getAttribute("brch-nm"),
46 + 'LocationNUm' : await item.getAttribute("brch-no")
47 + }
48 + // let location_name = await item.getAttribute("brch-nm");
49 + // let location_num = await item.getAttribute("brch-no");
50 + // let obj = {};
51 + // obj[location_name]= location_num
52 + // location_data[index++] = obj;
53 +
54 + }
55 + }
56 +
57 + let movie_list = await driver.wait(until.elementsLocated(By.css('#mCSB_1_container>ul>li>.btn')));
58 + r = 0;
59 + for (item of movie_list) {
60 + //Using getAttribute to get the data
61 + movie_data[r++] = {
62 + 'rank' : r,
63 + 'title' : await item.getAttribute("movie-nm"),
64 + 'movie_num':await item.getAttribute("movie-no"),
65 + }
66 + }
67 +
68 + driver.close();
69 +
70 + },
71 +
72 + async () => {
73 +
74 + r = 0;
75 + const browser = await puppeteer.launch({
76 + headless: true
77 + });
78 + const page = await browser.newPage();
79 + await page.goto(rate_url);
80 + const content = await page.content();
81 +
82 + const $ = cheerio.load(content);
83 + const $rate_lists = $("ol.list>li");
84 + $rate_lists.each((index, list) => {
85 + const name = $(list).find('div.tit-area > p.tit').attr('title');
86 + const rate = $(list).find('div.rate-date > span.rate').text();
87 +
88 + if(movie_data[r].title === name){
89 + movie_data[r++]['rate'] = rate;
90 + }
91 + });
92 + for(i of movie_data){
93 + if(Object.keys(i).length==3){
94 + movie_data[r++]['rate'] = '예매율 0%';
95 + }
96 + }
97 +
98 + browser.close();
99 + },
100 +
101 +])
102 +
103 +
104 +let userData = {
105 + 'Date': '',
106 + 'location':''
107 +};
108 +// const _sleep = (delay) => new Promise((resolve) => setTimeout(resolve, delay));
109 +app.get('/Megabox', (req, res) => {
110 +
111 + res.send(movie_data);
112 +
113 +})
114 +
115 +const appdriver = new webdriver.Builder().forBrowser('chrome').setChromeOptions(new chrome.Options().headless()).build();//.setChromeOptions(new chrome.Options().headless())
116 +
117 +app.post('/Megabox', (req, res) => {//사용자에게 Date와 location(영화관 장소) 받아옴
118 + userData['Date'] = req.body.Date;
119 + for(i of location_data){
120 + if(i['LocationName'] == req.body.location){
121 + userData['location']=i['LocationNUm'];
122 + break;
123 + }
124 + }
125 +
126 + let PlayingMovieURL = booking_url + '?brchNo1='+userData['location']+'&playDe='+userData['Date'];//사용자 정보 바탕으로 해당 일자 영화관 영화 상영 여부 확인
127 +
128 + appdriver.get(PlayingMovieURL);
129 + appdriver.switchTo().frame(0)//frameBokdMBooking 프레임 가져옴
130 + res.send(movie_data);
131 +
132 +})
133 +
134 +app.get('/Megabox/GetPlayingMovie', async(req, res, next) => {//영화 상영 여부 객체에 넣음
135 +
136 + let movie_list = await appdriver.wait(until.elementsLocated(By.css('#mCSB_1_container>ul>li>.btn')));
137 + let n = 0;
138 + for (item of movie_list) {
139 + movie_data[n++]['running'] = await item.getAttribute('form-at')
140 + }
141 +
142 + res.send(movie_data);
143 +})
144 +
145 +app.listen(23023);
...\ No newline at end of file ...\ No newline at end of file
1 -from bs4 import BeautifulSoup
2 -from selenium import webdriver
3 -import chromedriver_autoinstaller
4 -
5 -chromedriver_autoinstaller.install()
6 -
7 -booking_url = "https://megabox.co.kr/booking"
8 -rate_url = "https://www.megabox.co.kr/movie"
9 -
10 -options = webdriver.ChromeOptions()
11 -options.add_argument("headless") #창 안 띄움
12 -options.add_experimental_option("excludeSwitches", ["enable-logging"])
13 -
14 -driver = webdriver.Chrome(options = options)
15 -driver.maximize_window()
16 -# driver.implicitly_wait(2)
17 -driver.get(booking_url)
18 -
19 -driver2=webdriver.Chrome(options = options)
20 -driver2.maximize_window()
21 -# driver2.implicitly_wait(2)
22 -driver2.get(rate_url)
23 -
24 -
25 -theater_location = dict()
26 -
27 -# iframes = driver.find_elements_by_css_selector('iframe')
28 -driver.switch_to.frame('frameBokdMBooking')
29 -page1 = driver.page_source
30 -soup1 = BeautifulSoup(page1, "html.parser")
31 -
32 -seoul = soup1.select("#mCSB_4_container>ul>li>button")
33 -Gyeonggi = soup1.select("#mCSB_5_container>ul>li>button")
34 -Incheon = soup1.select("#mCSB_6_container>ul>li>button")
35 -DCS = soup1.select("#mCSB_7_container>ul>li>button")#Daejeon Chungcheong Sejong
36 -BDG = soup1.select("#mCSB_8_container>ul>li>button")#Busan Daegu Gyeongsang
37 -GJ= soup1.select("#mCSB_9_container>ul>li>button")#gwangju_jeonla
38 -Gangwon = soup1.select("#mCSB_10_container>ul>li>button")
39 -
40 -loc = [seoul, Gyeonggi, Incheon, DCS, BDG, GJ, Gangwon]
41 -
42 -def get_location_code(location):
43 - for brch in location:
44 - theater_location[brch['brch-nm']] = brch['brch-no']
45 -
46 -
47 -for parameter in loc:
48 - get_location_code(parameter)
49 -
50 -
51 -page2 = driver2.page_source
52 -soup2 = BeautifulSoup(page2, "html.parser")
53 -ticketing_rate = soup2.select('.rate')
54 -movie_name = soup2.select('.tit-area > p.tit')
55 -get_movie_info = soup1.select("#mCSB_1_container>ul>li>button")
56 -
57 -movie_dict = dict()
58 -rank = 1
59 -
60 -for movie in get_movie_info:
61 - movie_dict[movie['movie-nm']] = [movie['movie-no'], movie['form-at']]
62 -
63 -for r, m in zip(ticketing_rate, movie_name):
64 - movie_dict[m['title']].append(r.string)
65 -
66 -
67 -for value in movie_dict.values():
68 - if(len(value) == 2):
69 - value.append("예메율 0.0%")
70 - if(rank<=10):
71 - value.append({'rank' : rank})
72 - rank += 1
73 -
74 -#form-at: 처음 그냥 받아올 때 해당 날짜에 영화 있는지 확인(장소 상관 없이)
75 -#brch-no로 쿼리 주고 나서 form-at확인 필요
...\ No newline at end of file ...\ No newline at end of file
1 +{
2 + "name": "me",
3 + "version": "1.0.0",
4 + "description": "",
5 + "main": "app.js",
6 + "scripts": {
7 + "test": "echo \"Error: no test specified\" && exit 1"
8 + },
9 + "keywords": [],
10 + "author": "",
11 + "license": "ISC",
12 + "dependencies": {
13 + "async": "^3.2.3",
14 + "body-parser": "^1.20.0",
15 + "cheerio": "^1.0.0-rc.11",
16 + "express": "^4.18.1",
17 + "puppeteer": "^14.1.1",
18 + "selenium-webdriver": "^4.1.2"
19 + }
20 +}