임승현

Merge branch 'feature/LotteCinema_Crawling' into 'master'

Feature/lotte cinema crawling

First Merge from feature/LotteCinema_Crawling into master

See merge request !17
let express = require('express');
let app = express();
let request = require('request');
const asyncHandler = require('express-async-handler')
const { response } = require('express');
const puppeteer = require('puppeteer');
const cheerio = require('cheerio');
const { textContent } = require('domutils');
//https://www.lottecinema.co.kr/NLCHS/Ticketing?movieCd=18632&movieName=범죄도시%202&screenCd=1|1|1009&screenName=김포공항&releaseDate=2022-05-18
let movieData = [];
let theaterData = [];
function getToday(){
var date = new Date();
var year = date.getFullYear();
var month = ("0" + (1 + date.getMonth())).slice(-2);
var day = ("0" + date.getDate()).slice(-2);
return year + "-" + month + "-" + day;
}
//console.log(getToday());
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
// 수집하고자 하는 URL을 입력
await page.goto('https://www.lottecinema.co.kr/NLCHS/Movie/List?flag=1');
let content = await page.content();
let $ = cheerio.load(content, {decodeEntities: true});
const $bodyList = $("#contents > div > ul.movie_list.type2").children("li");
let i =0;
$bodyList.each(function(temp_Body){
if($(this).find('a > em').text() != "AD"){
movieData[i++]={
rank : i,
url: $(this).find('div.top_info > div > div > a').attr('href'),
title : $(this).find(' div.btm_info > strong').text(),
rate : $(this).find('div.btm_info > span > span.rate_info > em').text(),
star : $(this).find('div.btm_info > span > span.star_info').text(),
};
}
});
await page.goto('https://www.lottecinema.co.kr/NLCHS/');
content = await page.content();
$ = cheerio.load(content, {decodeEntities: true});
const $TypeList = $("#nav > ul > li:nth-child(3) > div > ul").children("li").find("div > ul").children("li");
i =0;
let flag = 0;
$TypeList.each(function(temp_Type){
if(flag==1){
theaterData[i++]={
LocateUrl : $(this).find('a').attr('href'),
LocateName : $(this).find('a').text(),
LocateQuery : $(this).find('a').attr('href')
.replace("https://www.lottecinema.co.kr/NLCHS/Cinema/Detail?divisionCode=","")
.replace("https://www.lottecinema.co.kr/NLCHS/Cinema/SpecialCinema?divisionCode=","")
.replace("&detailDivisionCode=","|").replace("&cinemaID=","|").replace("&screendivcd=","|"),
};
}else{
flag++;
}
});
await browser.close();
console.log("Completed!");
})();
app.get('/LotteCinema', asyncHandler(async (req, res, next) => {
res.send(movieData);
//console.log(movieData);
}))
app.get('/LotteCinema/theater', asyncHandler(async (req, res, next) => {
res.send(theaterData);
//console.log(theaterData);
}))
app.get('/LotteCinema/GetPlayingMovie', asyncHandler(async (req, res, next) => {
const browser1 = await puppeteer.launch();
const page1 = await browser1.newPage();
testTheaterName = "판교";
testDate = "2022-05-30"
let PlayingMovieURL;
let playingMovieData = [];
theaterData.forEach((val, index)=>{
// console.log(val);
if(val.LocateName.includes(testTheaterName)){
PlayingMovieURL = movieData[0].url+ "&screenCd="+ val.LocateQuery +
"&screenName=" + val.LocateName +
"&releaseDate=" + testDate;
}
})
//console.log(!PlayingMovieURL);
if(PlayingMovieURL){
await page1.goto(PlayingMovieURL);
content = await page1.content();
$ = cheerio.load(content, {decodeEntities: true});
let i = 0;
const $AbleList = $("#mCSB_9_container > ul").children("li");
$AbleList.each(function(temp_Able){
if($(this).attr("class") != "disabled"){
playingMovieData[i++]={
rank : i,
title : $(this).find("a > div.group_infor > div > strong").text(),
}
}
});
}else{
res.send("Please wait until get Movie and Theater information!");
console.log("Please wait until get Movie and Theater information!");
}
await browser1.close();
res.send(playingMovieData);
//console.log(theaterData);
}))
let server = app.listen(80);
{
"name": "Crawling LotteCinema Site",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"dependencies": {
"body-parser": "^1.17.1",
"express": "^4.15.2",
"cheerio": "^0.22.0",
"puppeteer": "^14.1.0",
"express-async-handler": "^1.2.0"
}
}
\ No newline at end of file
This diff could not be displayed because it is too large.
{
"dependencies": {
"axios": "^0.27.2",
"body-parser": "^1.20.0",
"cheerio": "^1.0.0-rc.10",
"dom-parser": "^0.1.6",
"ejs": "^3.1.7",
"express": "^4.17.3",
"express-async-handler": "^1.2.0",
"express-session": "^1.17.2",
"puppeteer": "^14.1.0",
"request": "^2.88.2",
"sanitize-html": "^2.7.0"
}
}