임승현

Merge branch 'feature/LotteCinema_Crawling' into 'feature/LotteCinema_Crawling'

Add Crawling LotteCinema Movie Chart and Get Ticekt URL



See merge request !9
1 +let express = require('express');
2 +let app = express();
3 +let request = require('request');
4 +const asyncHandler = require('express-async-handler')
5 +const { response } = require('express');
6 +
7 +const puppeteer = require('puppeteer');
8 +const cheerio = require('cheerio');
9 +
10 +
11 +app.get('/LotteCinema', asyncHandler(async (req, res, next) => {
12 +
13 + const browser = await puppeteer.launch();
14 +
15 + const page = await browser.newPage();
16 +
17 + // 수집하고자 하는 URL을 입력
18 + await page.goto('https://www.lottecinema.co.kr/NLCHS/Movie/List?flag=1');
19 +
20 +
21 + let content = await page.content();
22 + let $ = cheerio.load(content, {decodeEntities: true});
23 + let data = [];
24 + const $bodyList = $("#contents > div > ul.movie_list.type2").children("li");
25 +
26 + let i =0;
27 + $bodyList.each(function(elem){
28 + if($(this).find('a > em').text() != "AD"){
29 + data[i++]={
30 + rank : i,
31 + url: $(this).find('div.top_info > div > div > a').attr('href').replace(""),
32 + title : $(this).find(' div.btm_info > strong').text(),
33 + rate : $(this).find('div.btm_info > span > span.rate_info > em').text(),
34 + star : $(this).find('div.btm_info > span > span.star_info').text(),
35 + };
36 + }
37 + });
38 + await browser.close();
39 +
40 + res.send(data);
41 + //console.log(data);
42 +}))
43 +
44 +let server = app.listen(80);
45 +
46 +// (async () => {
47 +// const browser = await puppeteer.launch();
48 +
49 +// const page = await browser.newPage();
50 +
51 +// // 수집하고자 하는 URL을 입력
52 +// await page.goto('https://www.lottecinema.co.kr/NLCHS/Movie/List?flag=1');
53 +
54 +
55 +// let content = await page.content();
56 +// let $ = cheerio.load(content, {decodeEntities: true});
57 +// let data = [];
58 +// const $bodyList = $("#contents > div > ul.movie_list.type2").children("li");
59 +
60 +// let i =0;
61 +// $bodyList.each(function(elem){
62 +// if($(this).find('a > em').text() != "AD"){
63 +// data[i++]={
64 +// rank : i,
65 +// url: $(this).find('div.top_info > div > div > a').attr('href').replace(""),
66 +// title : $(this).find(' div.btm_info > strong').text(),
67 +// rate : $(this).find('div.btm_info > span > span.rate_info > em').text(),
68 +// star : $(this).find('div.btm_info > span > span.star_info').text(),
69 +// };
70 +// }
71 +// });
72 +// await browser.close();
73 +
74 +// console.log(data);
75 +// })();
1 +{
2 + "name": "Crawling LotteCinema Site",
3 + "version": "1.0.0",
4 + "description": "",
5 + "main": "index.js",
6 + "scripts": {
7 + "test": "echo \"Error: no test specified\" && exit 1"
8 + },
9 + "author": "",
10 + "license": "ISC",
11 + "dependencies": {
12 + "body-parser": "^1.17.1",
13 + "express": "^4.15.2",
14 + "cheerio": "^0.22.0",
15 + "puppeteer": "^14.1.0",
16 + "express-async-handler": "^1.2.0"
17 + }
18 +}
...\ No newline at end of file ...\ No newline at end of file