이혜인

Crawling again with js, rate crawling

1 +const request = require('request');
2 +const cheerio = require('cheerio');
3 +const puppeteer = require('puppeteer');
4 +const {Builder,Key,until} = require('selenium-webdriver'); //모듈 불러오기
5 +var webdriver = require('selenium-webdriver');
6 +var By = webdriver.By;
7 +const chrome = require('selenium-webdriver/chrome');//크롬 사용시
8 +
9 +
10 +let booking_url = "https://megabox.co.kr/booking";
11 +const rate_url = "https://www.megabox.co.kr/movie";
12 +// var booking_options = { encoding: "utf-8", method: "GET", uri: booking_url};
13 +
14 +let r =0;
15 +let movie_data = [];
16 +
17 +
18 +(async () => {
19 +
20 + r = 0;
21 + const browser = await puppeteer.launch({
22 + headless: true
23 + });
24 + const page = await browser.newPage();
25 + await page.goto(rate_url);
26 + const content = await page.content();
27 +
28 + const $ = cheerio.load(content);
29 + const $rate_lists = $("ol.list>li");
30 + $rate_lists.each((index, list) => {
31 + const name = $(list).find('div.tit-area > p.tit').attr('title');
32 + const rate = $(list).find('div.rate-date > span.rate').text();
33 +
34 + });
35 + r = 0;
36 + for(i of movie_data){
37 + console.log(i);
38 + }
39 +
40 + browser.close();
41 +})();
42 +
43 +
44 +
45 +
46 +
1 -from bs4 import BeautifulSoup
2 -from selenium import webdriver
3 -import chromedriver_autoinstaller
4 -
5 -chromedriver_autoinstaller.install()
6 -
7 -booking_url = "https://megabox.co.kr/booking"
8 -rate_url = "https://www.megabox.co.kr/movie"
9 -
10 -options = webdriver.ChromeOptions()
11 -options.add_argument("headless") #창 안 띄움
12 -options.add_experimental_option("excludeSwitches", ["enable-logging"])
13 -
14 -driver = webdriver.Chrome(options = options)
15 -driver.maximize_window()
16 -# driver.implicitly_wait(2)
17 -driver.get(booking_url)
18 -
19 -driver2=webdriver.Chrome(options = options)
20 -driver2.maximize_window()
21 -# driver2.implicitly_wait(2)
22 -driver2.get(rate_url)
23 -
24 -
25 -theater_location = dict()
26 -
27 -# iframes = driver.find_elements_by_css_selector('iframe')
28 -driver.switch_to.frame('frameBokdMBooking')
29 -page1 = driver.page_source
30 -soup1 = BeautifulSoup(page1, "html.parser")
31 -
32 -seoul = soup1.select("#mCSB_4_container>ul>li>button")
33 -Gyeonggi = soup1.select("#mCSB_5_container>ul>li>button")
34 -Incheon = soup1.select("#mCSB_6_container>ul>li>button")
35 -DCS = soup1.select("#mCSB_7_container>ul>li>button")#Daejeon Chungcheong Sejong
36 -BDG = soup1.select("#mCSB_8_container>ul>li>button")#Busan Daegu Gyeongsang
37 -GJ= soup1.select("#mCSB_9_container>ul>li>button")#gwangju_jeonla
38 -Gangwon = soup1.select("#mCSB_10_container>ul>li>button")
39 -
40 -loc = [seoul, Gyeonggi, Incheon, DCS, BDG, GJ, Gangwon]
41 -
42 -def get_location_code(location):
43 - for brch in location:
44 - theater_location[brch['brch-nm']] = brch['brch-no']
45 -
46 -
47 -for parameter in loc:
48 - get_location_code(parameter)
49 -
50 -
51 -page2 = driver2.page_source
52 -soup2 = BeautifulSoup(page2, "html.parser")
53 -ticketing_rate = soup2.select('.rate')
54 -movie_name = soup2.select('.tit-area > p.tit')
55 -get_movie_info = soup1.select("#mCSB_1_container>ul>li>button")
56 -
57 -movie_dict = dict()
58 -rank = 1
59 -
60 -for movie in get_movie_info:
61 - movie_dict[movie['movie-nm']] = [movie['movie-no'], movie['form-at']]
62 -
63 -for r, m in zip(ticketing_rate, movie_name):
64 - movie_dict[m['title']].append(r.string)
65 -
66 -
67 -for value in movie_dict.values():
68 - if(len(value) == 2):
69 - value.append("예메율 0.0%")
70 - if(rank<=10):
71 - value.append({'rank' : rank})
72 - rank += 1
73 -
74 -#form-at: 처음 그냥 받아올 때 해당 날짜에 영화 있는지 확인(장소 상관 없이)
75 -#brch-no로 쿼리 주고 나서 form-at확인 필요
...\ No newline at end of file ...\ No newline at end of file
1 +{
2 + "name": "me",
3 + "version": "1.0.0",
4 + "description": "",
5 + "main": "app.js",
6 + "scripts": {
7 + "test": "echo \"Error: no test specified\" && exit 1"
8 + },
9 + "keywords": [],
10 + "author": "",
11 + "license": "ISC",
12 + "dependencies": {
13 + "body-parser": "^1.20.0",
14 + "cheerio": "^1.0.0-rc.11",
15 + "express": "^4.18.1",
16 + "puppeteer": "^14.1.1",
17 + "selenium-webdriver": "^4.1.2"
18 + }
19 +}