임승현

Merge branch 'feature/Megabox_Crawling' into 'feature/Megabox_Crawling'

Feature/megabox crawling

크롤링 언어 변경 및 위치와 일자 받아서 상영 여부 받아오기 진행

See merge request !15
const request = require('request');
const cheerio = require('cheerio');
const puppeteer = require('puppeteer');
const {Builder,until} = require('selenium-webdriver'); //모듈 불러오기
var webdriver = require('selenium-webdriver');
var By = webdriver.By;
const chrome = require('selenium-webdriver/chrome');//크롬 사용시
const async = require('async')
let express = require('express');
let app = express();
let bodyParser = require('body-parser');
const { timeout } = require('async');
app.use(bodyParser.urlencoded({ extended: false }));
app.use(bodyParser.json());
const booking_url = "https://megabox.co.kr/booking";
const rate_url = "https://www.megabox.co.kr/movie";
let r =0;
let movie_data = [];
let location_data = [];
let index = 0;
async.waterfall([//for 동기적 처리
async () => {
const driver = new webdriver.Builder().forBrowser('chrome').setChromeOptions(new chrome.Options().headless()).build();//
driver.get(booking_url);
driver.switchTo().frame(0)//frameBokdMBooking 프레임 가져옴
let seoul = await driver.wait(until.elementsLocated(By.css('#mCSB_4_container>ul>li>#btn')));
let Gyeonggi = await driver.wait(until.elementsLocated(By.css('#mCSB_5_container>ul>li>#btn')));
const Incheon = await driver.wait(until.elementsLocated(By.css('#mCSB_6_container>ul>li>#btn')));
const DCS = await driver.wait(until.elementsLocated(By.css('#mCSB_7_container>ul>li>#btn')));//Daejeon Chungcheong Sejong
const BDG = await driver.wait(until.elementsLocated(By.css('#mCSB_8_container>ul>li>#btn')));//Busan Daegu Gyeongsang
const GJ= await driver.wait(until.elementsLocated(By.css('#mCSB_9_container>ul>li>#btn')));//gwangju_jeonla
const Gangwon = await driver.wait(until.elementsLocated(By.css('#mCSB_10_container>ul>li>#btn')));
const location_list = [seoul, Gyeonggi, Incheon, DCS, BDG, GJ, Gangwon]//
for(let i = 0; i < location_list.length; i++){
for (item of location_list[i]) {
location_data[index++] = {
'LocationName':await item.getAttribute("brch-nm"),
'LocationNUm' : await item.getAttribute("brch-no")
}
// let location_name = await item.getAttribute("brch-nm");
// let location_num = await item.getAttribute("brch-no");
// let obj = {};
// obj[location_name]= location_num
// location_data[index++] = obj;
}
}
let movie_list = await driver.wait(until.elementsLocated(By.css('#mCSB_1_container>ul>li>.btn')));
r = 0;
for (item of movie_list) {
//Using getAttribute to get the data
movie_data[r++] = {
'rank' : r,
'title' : await item.getAttribute("movie-nm"),
'movie_num':await item.getAttribute("movie-no"),
}
}
driver.close();
},
async () => {
r = 0;
const browser = await puppeteer.launch({
headless: true
});
const page = await browser.newPage();
await page.goto(rate_url);
const content = await page.content();
const $ = cheerio.load(content);
const $rate_lists = $("ol.list>li");
$rate_lists.each((index, list) => {
const name = $(list).find('div.tit-area > p.tit').attr('title');
const rate = $(list).find('div.rate-date > span.rate').text();
if(movie_data[r].title === name){
movie_data[r++]['rate'] = rate;
}
});
for(i of movie_data){
if(Object.keys(i).length==3){
movie_data[r++]['rate'] = '예매율 0%';
}
}
browser.close();
},
])
let userData = {
'Date': '',
'location':''
};
// const _sleep = (delay) => new Promise((resolve) => setTimeout(resolve, delay));
app.get('/Megabox', (req, res) => {
res.send(movie_data);
})
const appdriver = new webdriver.Builder().forBrowser('chrome').setChromeOptions(new chrome.Options().headless()).build();//.setChromeOptions(new chrome.Options().headless())
app.post('/Megabox', (req, res) => {//사용자에게 Date와 location(영화관 장소) 받아옴
userData['Date'] = req.body.Date;
for(i of location_data){
if(i['LocationName'] == req.body.location){
userData['location']=i['LocationNUm'];
break;
}
}
let PlayingMovieURL = booking_url + '?brchNo1='+userData['location']+'&playDe='+userData['Date'];//사용자 정보 바탕으로 해당 일자 영화관 영화 상영 여부 확인
appdriver.get(PlayingMovieURL);
appdriver.switchTo().frame(0)//frameBokdMBooking 프레임 가져옴
res.send(movie_data);
})
app.get('/Megabox/GetPlayingMovie', async(req, res, next) => {//영화 상영 여부 객체에 넣음
let movie_list = await appdriver.wait(until.elementsLocated(By.css('#mCSB_1_container>ul>li>.btn')));
let n = 0;
for (item of movie_list) {
movie_data[n++]['running'] = await item.getAttribute('form-at')
}
res.send(movie_data);
})
app.listen(23023);
\ No newline at end of file
from bs4 import BeautifulSoup
from selenium import webdriver
import chromedriver_autoinstaller
chromedriver_autoinstaller.install()
booking_url = "https://megabox.co.kr/booking"
rate_url = "https://www.megabox.co.kr/movie"
options = webdriver.ChromeOptions()
options.add_argument("headless") #창 안 띄움
options.add_experimental_option("excludeSwitches", ["enable-logging"])
driver = webdriver.Chrome(options = options)
driver.maximize_window()
# driver.implicitly_wait(2)
driver.get(booking_url)
driver2=webdriver.Chrome(options = options)
driver2.maximize_window()
# driver2.implicitly_wait(2)
driver2.get(rate_url)
theater_location = dict()
# iframes = driver.find_elements_by_css_selector('iframe')
driver.switch_to.frame('frameBokdMBooking')
page1 = driver.page_source
soup1 = BeautifulSoup(page1, "html.parser")
seoul = soup1.select("#mCSB_4_container>ul>li>button")
Gyeonggi = soup1.select("#mCSB_5_container>ul>li>button")
Incheon = soup1.select("#mCSB_6_container>ul>li>button")
DCS = soup1.select("#mCSB_7_container>ul>li>button")#Daejeon Chungcheong Sejong
BDG = soup1.select("#mCSB_8_container>ul>li>button")#Busan Daegu Gyeongsang
GJ= soup1.select("#mCSB_9_container>ul>li>button")#gwangju_jeonla
Gangwon = soup1.select("#mCSB_10_container>ul>li>button")
loc = [seoul, Gyeonggi, Incheon, DCS, BDG, GJ, Gangwon]
def get_location_code(location):
for brch in location:
theater_location[brch['brch-nm']] = brch['brch-no']
for parameter in loc:
get_location_code(parameter)
page2 = driver2.page_source
soup2 = BeautifulSoup(page2, "html.parser")
ticketing_rate = soup2.select('.rate')
movie_name = soup2.select('.tit-area > p.tit')
get_movie_info = soup1.select("#mCSB_1_container>ul>li>button")
movie_dict = dict()
rank = 1
for movie in get_movie_info:
movie_dict[movie['movie-nm']] = [movie['movie-no'], movie['form-at']]
for r, m in zip(ticketing_rate, movie_name):
movie_dict[m['title']].append(r.string)
for value in movie_dict.values():
if(len(value) == 2):
value.append("예메율 0.0%")
if(rank<=10):
value.append({'rank' : rank})
rank += 1
#form-at: 처음 그냥 받아올 때 해당 날짜에 영화 있는지 확인(장소 상관 없이)
#brch-no로 쿼리 주고 나서 form-at확인 필요
\ No newline at end of file
{
"name": "me",
"version": "1.0.0",
"description": "",
"main": "app.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "",
"license": "ISC",
"dependencies": {
"async": "^3.2.3",
"body-parser": "^1.20.0",
"cheerio": "^1.0.0-rc.11",
"express": "^4.18.1",
"puppeteer": "^14.1.1",
"selenium-webdriver": "^4.1.2"
}
}