임승현

Merge branch 'feature/LotteCinema_Crawling' into 'feature/LotteCinema_Crawling'

Add Crawling LotteCinema Theater Code



See merge request !10
...@@ -6,6 +6,7 @@ const { response } = require('express'); ...@@ -6,6 +6,7 @@ const { response } = require('express');
6 6
7 const puppeteer = require('puppeteer'); 7 const puppeteer = require('puppeteer');
8 const cheerio = require('cheerio'); 8 const cheerio = require('cheerio');
9 +const { textContent } = require('domutils');
9 10
10 11
11 app.get('/LotteCinema', asyncHandler(async (req, res, next) => { 12 app.get('/LotteCinema', asyncHandler(async (req, res, next) => {
...@@ -28,7 +29,7 @@ app.get('/LotteCinema', asyncHandler(async (req, res, next) => { ...@@ -28,7 +29,7 @@ app.get('/LotteCinema', asyncHandler(async (req, res, next) => {
28 if($(this).find('a > em').text() != "AD"){ 29 if($(this).find('a > em').text() != "AD"){
29 data[i++]={ 30 data[i++]={
30 rank : i, 31 rank : i,
31 - url: $(this).find('div.top_info > div > div > a').attr('href').replace(""), 32 + url: $(this).find('div.top_info > div > div > a').attr('href'),
32 title : $(this).find(' div.btm_info > strong').text(), 33 title : $(this).find(' div.btm_info > strong').text(),
33 rate : $(this).find('div.btm_info > span > span.rate_info > em').text(), 34 rate : $(this).find('div.btm_info > span > span.rate_info > em').text(),
34 star : $(this).find('div.btm_info > span > span.star_info').text(), 35 star : $(this).find('div.btm_info > span > span.star_info').text(),
...@@ -41,35 +42,39 @@ app.get('/LotteCinema', asyncHandler(async (req, res, next) => { ...@@ -41,35 +42,39 @@ app.get('/LotteCinema', asyncHandler(async (req, res, next) => {
41 //console.log(data); 42 //console.log(data);
42 })) 43 }))
43 44
44 -let server = app.listen(80); 45 +app.get('/LotteCinema/theater', asyncHandler(async (req, res, next) => {
45 -
46 -// (async () => {
47 -// const browser = await puppeteer.launch();
48 46
49 -// const page = await browser.newPage(); 47 + const browser = await puppeteer.launch();
50 48
51 -// // 수집하고자 하는 URL을 입력 49 + const page = await browser.newPage();
52 -// await page.goto('https://www.lottecinema.co.kr/NLCHS/Movie/List?flag=1');
53 50
51 + // 수집하고자 하는 URL을 입력
52 + await page.goto('https://www.lottecinema.co.kr/NLCHS/');
54 53
55 -// let content = await page.content(); 54 + let content = await page.content();
56 -// let $ = cheerio.load(content, {decodeEntities: true}); 55 + let $ = cheerio.load(content, {decodeEntities: true});
57 -// let data = []; 56 + let theaterData = [];
58 -// const $bodyList = $("#contents > div > ul.movie_list.type2").children("li"); 57 + const $TypeList = $("#nav > ul > li:nth-child(3) > div > ul").children("li").find("div > ul").children("li");
58 + let i =0;
59 + let flag = 0;
60 + $TypeList.each(function(temp_Type){
61 + if(flag==1){
62 + theaterData[i++]={
63 + LocateUrl : $(this).find('a').attr('href'),
64 + LocateName : $(this).find('a').text(),
65 + LocateQuery : $(this).find('a').attr('href')
66 + .replace("https://www.lottecinema.co.kr/NLCHS/Cinema/Detail?divisionCode=","")
67 + .replace("https://www.lottecinema.co.kr/NLCHS/Cinema/SpecialCinema?divisionCode=","")
68 + .replace("&detailDivisionCode=","|").replace("&cinemaID=","|").replace("&screendivcd=","|"),
69 + };
70 + }else{
71 + flag++;
72 + }
73 + });
74 + await browser.close();
59 75
60 -// let i =0; 76 + res.send(theaterData);
61 -// $bodyList.each(function(elem){ 77 + //console.log(theaterData);
62 -// if($(this).find('a > em').text() != "AD"){ 78 +}))
63 -// data[i++]={
64 -// rank : i,
65 -// url: $(this).find('div.top_info > div > div > a').attr('href').replace(""),
66 -// title : $(this).find(' div.btm_info > strong').text(),
67 -// rate : $(this).find('div.btm_info > span > span.rate_info > em').text(),
68 -// star : $(this).find('div.btm_info > span > span.star_info').text(),
69 -// };
70 -// }
71 -// });
72 -// await browser.close();
73 79
74 -// console.log(data); 80 +let server = app.listen(80);
75 -// })();
......