Merge branch 'feature/LotteCinema_Crawling' into 'feature/LotteCinema_Crawling'
Add Crawling LotteCinema Theater Code See merge request !10
Showing
1 changed file
with
32 additions
and
27 deletions
... | @@ -6,6 +6,7 @@ const { response } = require('express'); | ... | @@ -6,6 +6,7 @@ const { response } = require('express'); |
6 | 6 | ||
7 | const puppeteer = require('puppeteer'); | 7 | const puppeteer = require('puppeteer'); |
8 | const cheerio = require('cheerio'); | 8 | const cheerio = require('cheerio'); |
9 | +const { textContent } = require('domutils'); | ||
9 | 10 | ||
10 | 11 | ||
11 | app.get('/LotteCinema', asyncHandler(async (req, res, next) => { | 12 | app.get('/LotteCinema', asyncHandler(async (req, res, next) => { |
... | @@ -28,7 +29,7 @@ app.get('/LotteCinema', asyncHandler(async (req, res, next) => { | ... | @@ -28,7 +29,7 @@ app.get('/LotteCinema', asyncHandler(async (req, res, next) => { |
28 | if($(this).find('a > em').text() != "AD"){ | 29 | if($(this).find('a > em').text() != "AD"){ |
29 | data[i++]={ | 30 | data[i++]={ |
30 | rank : i, | 31 | rank : i, |
31 | - url: $(this).find('div.top_info > div > div > a').attr('href').replace(""), | 32 | + url: $(this).find('div.top_info > div > div > a').attr('href'), |
32 | title : $(this).find(' div.btm_info > strong').text(), | 33 | title : $(this).find(' div.btm_info > strong').text(), |
33 | rate : $(this).find('div.btm_info > span > span.rate_info > em').text(), | 34 | rate : $(this).find('div.btm_info > span > span.rate_info > em').text(), |
34 | star : $(this).find('div.btm_info > span > span.star_info').text(), | 35 | star : $(this).find('div.btm_info > span > span.star_info').text(), |
... | @@ -41,35 +42,39 @@ app.get('/LotteCinema', asyncHandler(async (req, res, next) => { | ... | @@ -41,35 +42,39 @@ app.get('/LotteCinema', asyncHandler(async (req, res, next) => { |
41 | //console.log(data); | 42 | //console.log(data); |
42 | })) | 43 | })) |
43 | 44 | ||
44 | -let server = app.listen(80); | 45 | +app.get('/LotteCinema/theater', asyncHandler(async (req, res, next) => { |
45 | - | ||
46 | -// (async () => { | ||
47 | -// const browser = await puppeteer.launch(); | ||
48 | 46 | ||
49 | -// const page = await browser.newPage(); | 47 | + const browser = await puppeteer.launch(); |
50 | 48 | ||
51 | -// // 수집하고자 하는 URL을 입력 | 49 | + const page = await browser.newPage(); |
52 | -// await page.goto('https://www.lottecinema.co.kr/NLCHS/Movie/List?flag=1'); | ||
53 | 50 | ||
51 | + // 수집하고자 하는 URL을 입력 | ||
52 | + await page.goto('https://www.lottecinema.co.kr/NLCHS/'); | ||
54 | 53 | ||
55 | -// let content = await page.content(); | 54 | + let content = await page.content(); |
56 | -// let $ = cheerio.load(content, {decodeEntities: true}); | 55 | + let $ = cheerio.load(content, {decodeEntities: true}); |
57 | -// let data = []; | 56 | + let theaterData = []; |
58 | -// const $bodyList = $("#contents > div > ul.movie_list.type2").children("li"); | 57 | + const $TypeList = $("#nav > ul > li:nth-child(3) > div > ul").children("li").find("div > ul").children("li"); |
58 | + let i =0; | ||
59 | + let flag = 0; | ||
60 | + $TypeList.each(function(temp_Type){ | ||
61 | + if(flag==1){ | ||
62 | + theaterData[i++]={ | ||
63 | + LocateUrl : $(this).find('a').attr('href'), | ||
64 | + LocateName : $(this).find('a').text(), | ||
65 | + LocateQuery : $(this).find('a').attr('href') | ||
66 | + .replace("https://www.lottecinema.co.kr/NLCHS/Cinema/Detail?divisionCode=","") | ||
67 | + .replace("https://www.lottecinema.co.kr/NLCHS/Cinema/SpecialCinema?divisionCode=","") | ||
68 | + .replace("&detailDivisionCode=","|").replace("&cinemaID=","|").replace("&screendivcd=","|"), | ||
69 | + }; | ||
70 | + }else{ | ||
71 | + flag++; | ||
72 | + } | ||
73 | + }); | ||
74 | + await browser.close(); | ||
59 | 75 | ||
60 | -// let i =0; | 76 | + res.send(theaterData); |
61 | -// $bodyList.each(function(elem){ | 77 | + //console.log(theaterData); |
62 | -// if($(this).find('a > em').text() != "AD"){ | 78 | +})) |
63 | -// data[i++]={ | ||
64 | -// rank : i, | ||
65 | -// url: $(this).find('div.top_info > div > div > a').attr('href').replace(""), | ||
66 | -// title : $(this).find(' div.btm_info > strong').text(), | ||
67 | -// rate : $(this).find('div.btm_info > span > span.rate_info > em').text(), | ||
68 | -// star : $(this).find('div.btm_info > span > span.star_info').text(), | ||
69 | -// }; | ||
70 | -// } | ||
71 | -// }); | ||
72 | -// await browser.close(); | ||
73 | 79 | ||
74 | -// console.log(data); | 80 | +let server = app.listen(80); |
75 | -// })(); | ... | ... |
-
Please register or login to post a comment