이혜인

crawlling all info including location, movie

const request = require('request');
const cheerio = require('cheerio');
const puppeteer = require('puppeteer');
const {Builder,Key,until} = require('selenium-webdriver'); //모듈 불러오기
const {Builder,until} = require('selenium-webdriver'); //모듈 불러오기
var webdriver = require('selenium-webdriver');
var By = webdriver.By;
const chrome = require('selenium-webdriver/chrome');//크롬 사용시
const async = require('async')
let booking_url = "https://megabox.co.kr/booking";
const rate_url = "https://www.megabox.co.kr/movie";
// var booking_options = { encoding: "utf-8", method: "GET", uri: booking_url};
let r =0;
let movie_data = [];
let location_data = [];
let index = 0;
async.waterfall([
async () => {
(async () => {
const driver = new webdriver.Builder().forBrowser('chrome').build();
driver.get(booking_url);
driver.switchTo().frame(0)//frameBokdMBooking 프레임 가져옴
let list = await driver.wait(until.elementsLocated(By.css('#mCSB_1_container>ul>li>.btn')));
r = 0;
for (item of list) {
//Using getAttribute to get the data
movie_data[r++] = {
'rank' : r,
'title' : await item.getAttribute("movie-nm"),
'movie_num':await item.getAttribute("movie-no"),
'running':await item.getAttribute("form-at"),
}
}
driver.close();
r = 0;
const browser = await puppeteer.launch({
headless: true
});
const page = await browser.newPage();
await page.goto(rate_url);
const content = await page.content();
const $ = cheerio.load(content);
const $rate_lists = $("ol.list>li");
$rate_lists.each((index, list) => {
const name = $(list).find('div.tit-area > p.tit').attr('title');
const rate = $(list).find('div.rate-date > span.rate').text();
const driver = new webdriver.Builder().forBrowser('chrome').build();//.setChromeOptions(new chrome.Options().headless())
driver.get(booking_url);
driver.switchTo().frame(0)//frameBokdMBooking 프레임 가져옴
if(movie_data[r].title === name){
movie_data[r]['rate'] = rate;
}else{
movie_data[r]['rate'] = '예매율 0.0%';
let seoul = await driver.wait(until.elementsLocated(By.css('#mCSB_4_container>ul>li>#btn')));
let Gyeonggi = await driver.wait(until.elementsLocated(By.css('#mCSB_5_container>ul>li>#btn')));
const Incheon = await driver.wait(until.elementsLocated(By.css('#mCSB_6_container>ul>li>#btn')));
const DCS = await driver.wait(until.elementsLocated(By.css('#mCSB_7_container>ul>li>#btn')));//Daejeon Chungcheong Sejong
const BDG = await driver.wait(until.elementsLocated(By.css('#mCSB_8_container>ul>li>#btn')));//Busan Daegu Gyeongsang
const GJ= await driver.wait(until.elementsLocated(By.css('#mCSB_9_container>ul>li>#btn')));//gwangju_jeonla
const Gangwon = await driver.wait(until.elementsLocated(By.css('#mCSB_10_container>ul>li>#btn')));
const location_list = [seoul, Gyeonggi, Incheon, DCS, BDG, GJ, Gangwon]//
for(let i = 0; i < location_list.length; i++){
for (item of location_list[i]) {
let location_name = await item.getAttribute("brch-nm");
let location_num = await item.getAttribute("brch-no");
let obj = {};
obj[location_name]= location_num
console.log(obj)
location_data[index++] = obj;
}
}
});
r = 0;
for(i of movie_data){
console.log(i);
}
browser.close();
})();
// (async () => {
// })();
let movie_list = await driver.wait(until.elementsLocated(By.css('#mCSB_1_container>ul>li>.btn')));
r = 0;
for (item of movie_list) {
//Using getAttribute to get the data
movie_data[r++] = {
'rank' : r,
'title' : await item.getAttribute("movie-nm"),
'movie_num':await item.getAttribute("movie-no"),
'running':await item.getAttribute("form-at"),
}
}
driver.close();
},
async () => {
r = 0;
const browser = await puppeteer.launch({
headless: true
});
const page = await browser.newPage();
await page.goto(rate_url);
const content = await page.content();
const $ = cheerio.load(content);
const $rate_lists = $("ol.list>li");
$rate_lists.each((index, list) => {
const name = $(list).find('div.tit-area > p.tit').attr('title');
const rate = $(list).find('div.rate-date > span.rate').text();
if(movie_data[r].title === name){
movie_data[r++]['rate'] = rate;
}
});
for(i of movie_data){
if(Object.keys(i).length==4){
movie_data[r++]['rate'] = '예매율 0%';
}
}
for(i of location_data){
console.log(i['동탄'])
}
browser.close();
}
])
......
......@@ -10,6 +10,7 @@
"author": "",
"license": "ISC",
"dependencies": {
"async": "^3.2.3",
"body-parser": "^1.20.0",
"cheerio": "^1.0.0-rc.11",
"express": "^4.18.1",
......