이혜인

crawlling all info including location, movie

1 const request = require('request'); 1 const request = require('request');
2 const cheerio = require('cheerio'); 2 const cheerio = require('cheerio');
3 const puppeteer = require('puppeteer'); 3 const puppeteer = require('puppeteer');
4 -const {Builder,Key,until} = require('selenium-webdriver'); //모듈 불러오기 4 +
5 +const {Builder,until} = require('selenium-webdriver'); //모듈 불러오기
5 var webdriver = require('selenium-webdriver'); 6 var webdriver = require('selenium-webdriver');
6 var By = webdriver.By; 7 var By = webdriver.By;
7 const chrome = require('selenium-webdriver/chrome');//크롬 사용시 8 const chrome = require('selenium-webdriver/chrome');//크롬 사용시
8 9
10 +const async = require('async')
9 11
10 let booking_url = "https://megabox.co.kr/booking"; 12 let booking_url = "https://megabox.co.kr/booking";
11 const rate_url = "https://www.megabox.co.kr/movie"; 13 const rate_url = "https://www.megabox.co.kr/movie";
12 -// var booking_options = { encoding: "utf-8", method: "GET", uri: booking_url};
13 14
14 let r =0; 15 let r =0;
15 let movie_data = []; 16 let movie_data = [];
17 +let location_data = [];
18 +let index = 0;
16 19
20 +async.waterfall([
21 + async () => {
17 22
18 -(async () => { 23 + const driver = new webdriver.Builder().forBrowser('chrome').build();//.setChromeOptions(new chrome.Options().headless())
19 -
20 - const driver = new webdriver.Builder().forBrowser('chrome').build();
21 driver.get(booking_url); 24 driver.get(booking_url);
22 driver.switchTo().frame(0)//frameBokdMBooking 프레임 가져옴 25 driver.switchTo().frame(0)//frameBokdMBooking 프레임 가져옴
23 26
24 - let list = await driver.wait(until.elementsLocated(By.css('#mCSB_1_container>ul>li>.btn'))); 27 + let seoul = await driver.wait(until.elementsLocated(By.css('#mCSB_4_container>ul>li>#btn')));
28 + let Gyeonggi = await driver.wait(until.elementsLocated(By.css('#mCSB_5_container>ul>li>#btn')));
29 + const Incheon = await driver.wait(until.elementsLocated(By.css('#mCSB_6_container>ul>li>#btn')));
30 + const DCS = await driver.wait(until.elementsLocated(By.css('#mCSB_7_container>ul>li>#btn')));//Daejeon Chungcheong Sejong
31 + const BDG = await driver.wait(until.elementsLocated(By.css('#mCSB_8_container>ul>li>#btn')));//Busan Daegu Gyeongsang
32 + const GJ= await driver.wait(until.elementsLocated(By.css('#mCSB_9_container>ul>li>#btn')));//gwangju_jeonla
33 + const Gangwon = await driver.wait(until.elementsLocated(By.css('#mCSB_10_container>ul>li>#btn')));
34 + const location_list = [seoul, Gyeonggi, Incheon, DCS, BDG, GJ, Gangwon]//
35 + for(let i = 0; i < location_list.length; i++){
36 + for (item of location_list[i]) {
37 + let location_name = await item.getAttribute("brch-nm");
38 + let location_num = await item.getAttribute("brch-no");
39 + let obj = {};
40 + obj[location_name]= location_num
41 + console.log(obj)
42 + location_data[index++] = obj;
43 +
44 + }
45 + }
46 +
47 + let movie_list = await driver.wait(until.elementsLocated(By.css('#mCSB_1_container>ul>li>.btn')));
25 r = 0; 48 r = 0;
26 - for (item of list) { 49 + for (item of movie_list) {
27 //Using getAttribute to get the data 50 //Using getAttribute to get the data
28 movie_data[r++] = { 51 movie_data[r++] = {
29 'rank' : r, 52 'rank' : r,
...@@ -32,8 +55,13 @@ let movie_data = []; ...@@ -32,8 +55,13 @@ let movie_data = [];
32 'running':await item.getAttribute("form-at"), 55 'running':await item.getAttribute("form-at"),
33 } 56 }
34 } 57 }
58 +
35 driver.close(); 59 driver.close();
36 60
61 + },
62 +
63 + async () => {
64 +
37 r = 0; 65 r = 0;
38 const browser = await puppeteer.launch({ 66 const browser = await puppeteer.launch({
39 headless: true 67 headless: true
...@@ -49,25 +77,17 @@ let movie_data = []; ...@@ -49,25 +77,17 @@ let movie_data = [];
49 const rate = $(list).find('div.rate-date > span.rate').text(); 77 const rate = $(list).find('div.rate-date > span.rate').text();
50 78
51 if(movie_data[r].title === name){ 79 if(movie_data[r].title === name){
52 - movie_data[r]['rate'] = rate; 80 + movie_data[r++]['rate'] = rate;
53 - }else{
54 - movie_data[r]['rate'] = '예매율 0.0%';
55 } 81 }
56 }); 82 });
57 - r = 0;
58 for(i of movie_data){ 83 for(i of movie_data){
59 - console.log(i); 84 + if(Object.keys(i).length==4){
85 + movie_data[r++]['rate'] = '예매율 0%';
86 + }
87 + }
88 + for(i of location_data){
89 + console.log(i['동탄'])
60 } 90 }
61 -
62 browser.close(); 91 browser.close();
63 -})(); 92 + }
64 - 93 +])
65 -// (async () => {
66 -
67 -
68 -// })();
69 -
70 -
71 -
72 -
73 -
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
10 "author": "", 10 "author": "",
11 "license": "ISC", 11 "license": "ISC",
12 "dependencies": { 12 "dependencies": {
13 + "async": "^3.2.3",
13 "body-parser": "^1.20.0", 14 "body-parser": "^1.20.0",
14 "cheerio": "^1.0.0-rc.11", 15 "cheerio": "^1.0.0-rc.11",
15 "express": "^4.18.1", 16 "express": "^4.18.1",
......