Toggle navigation
Toggle navigation
This project
Loading...
Sign in
ShinSeungMin
/
Multiplex_Ticketing_Platform
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
이혜인
2022-05-28 01:23:16 +0900
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
02fb17156cc61b66bf996a3aef19e177e47c927c
02fb1715
1 parent
a5d4af65
crawlling all info including location, movie
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
74 additions
and
53 deletions
Megabox_crawling/app.js
Megabox_crawling/package.json
Megabox_crawling/app.js
View file @
02fb171
const
request
=
require
(
'request'
);
const
cheerio
=
require
(
'cheerio'
);
const
puppeteer
=
require
(
'puppeteer'
);
const
{
Builder
,
Key
,
until
}
=
require
(
'selenium-webdriver'
);
//모듈 불러오기
const
{
Builder
,
until
}
=
require
(
'selenium-webdriver'
);
//모듈 불러오기
var
webdriver
=
require
(
'selenium-webdriver'
);
var
By
=
webdriver
.
By
;
const
chrome
=
require
(
'selenium-webdriver/chrome'
);
//크롬 사용시
const
async
=
require
(
'async'
)
let
booking_url
=
"https://megabox.co.kr/booking"
;
const
rate_url
=
"https://www.megabox.co.kr/movie"
;
// var booking_options = { encoding: "utf-8", method: "GET", uri: booking_url};
let
r
=
0
;
let
movie_data
=
[];
let
location_data
=
[];
let
index
=
0
;
async
.
waterfall
([
async
()
=>
{
(
async
()
=>
{
const
driver
=
new
webdriver
.
Builder
().
forBrowser
(
'chrome'
).
build
();
driver
.
get
(
booking_url
);
driver
.
switchTo
().
frame
(
0
)
//frameBokdMBooking 프레임 가져옴
let
list
=
await
driver
.
wait
(
until
.
elementsLocated
(
By
.
css
(
'#mCSB_1_container>ul>li>.btn'
)));
r
=
0
;
for
(
item
of
list
)
{
//Using getAttribute to get the data
movie_data
[
r
++
]
=
{
'rank'
:
r
,
'title'
:
await
item
.
getAttribute
(
"movie-nm"
),
'movie_num'
:
await
item
.
getAttribute
(
"movie-no"
),
'running'
:
await
item
.
getAttribute
(
"form-at"
),
}
}
driver
.
close
();
r
=
0
;
const
browser
=
await
puppeteer
.
launch
({
headless
:
true
});
const
page
=
await
browser
.
newPage
();
await
page
.
goto
(
rate_url
);
const
content
=
await
page
.
content
();
const
$
=
cheerio
.
load
(
content
);
const
$rate_lists
=
$
(
"ol.list>li"
);
$rate_lists
.
each
((
index
,
list
)
=>
{
const
name
=
$
(
list
).
find
(
'div.tit-area > p.tit'
).
attr
(
'title'
);
const
rate
=
$
(
list
).
find
(
'div.rate-date > span.rate'
).
text
();
const
driver
=
new
webdriver
.
Builder
().
forBrowser
(
'chrome'
).
build
();
//.setChromeOptions(new chrome.Options().headless())
driver
.
get
(
booking_url
);
driver
.
switchTo
().
frame
(
0
)
//frameBokdMBooking 프레임 가져옴
if
(
movie_data
[
r
].
title
===
name
){
movie_data
[
r
][
'rate'
]
=
rate
;
}
else
{
movie_data
[
r
][
'rate'
]
=
'예매율 0.0%'
;
let
seoul
=
await
driver
.
wait
(
until
.
elementsLocated
(
By
.
css
(
'#mCSB_4_container>ul>li>#btn'
)));
let
Gyeonggi
=
await
driver
.
wait
(
until
.
elementsLocated
(
By
.
css
(
'#mCSB_5_container>ul>li>#btn'
)));
const
Incheon
=
await
driver
.
wait
(
until
.
elementsLocated
(
By
.
css
(
'#mCSB_6_container>ul>li>#btn'
)));
const
DCS
=
await
driver
.
wait
(
until
.
elementsLocated
(
By
.
css
(
'#mCSB_7_container>ul>li>#btn'
)));
//Daejeon Chungcheong Sejong
const
BDG
=
await
driver
.
wait
(
until
.
elementsLocated
(
By
.
css
(
'#mCSB_8_container>ul>li>#btn'
)));
//Busan Daegu Gyeongsang
const
GJ
=
await
driver
.
wait
(
until
.
elementsLocated
(
By
.
css
(
'#mCSB_9_container>ul>li>#btn'
)));
//gwangju_jeonla
const
Gangwon
=
await
driver
.
wait
(
until
.
elementsLocated
(
By
.
css
(
'#mCSB_10_container>ul>li>#btn'
)));
const
location_list
=
[
seoul
,
Gyeonggi
,
Incheon
,
DCS
,
BDG
,
GJ
,
Gangwon
]
//
for
(
let
i
=
0
;
i
<
location_list
.
length
;
i
++
){
for
(
item
of
location_list
[
i
])
{
let
location_name
=
await
item
.
getAttribute
(
"brch-nm"
);
let
location_num
=
await
item
.
getAttribute
(
"brch-no"
);
let
obj
=
{};
obj
[
location_name
]
=
location_num
console
.
log
(
obj
)
location_data
[
index
++
]
=
obj
;
}
}
});
r
=
0
;
for
(
i
of
movie_data
){
console
.
log
(
i
);
}
browser
.
close
();
})();
// (async () => {
// })();
let
movie_list
=
await
driver
.
wait
(
until
.
elementsLocated
(
By
.
css
(
'#mCSB_1_container>ul>li>.btn'
)));
r
=
0
;
for
(
item
of
movie_list
)
{
//Using getAttribute to get the data
movie_data
[
r
++
]
=
{
'rank'
:
r
,
'title'
:
await
item
.
getAttribute
(
"movie-nm"
),
'movie_num'
:
await
item
.
getAttribute
(
"movie-no"
),
'running'
:
await
item
.
getAttribute
(
"form-at"
),
}
}
driver
.
close
();
},
async
()
=>
{
r
=
0
;
const
browser
=
await
puppeteer
.
launch
({
headless
:
true
});
const
page
=
await
browser
.
newPage
();
await
page
.
goto
(
rate_url
);
const
content
=
await
page
.
content
();
const
$
=
cheerio
.
load
(
content
);
const
$rate_lists
=
$
(
"ol.list>li"
);
$rate_lists
.
each
((
index
,
list
)
=>
{
const
name
=
$
(
list
).
find
(
'div.tit-area > p.tit'
).
attr
(
'title'
);
const
rate
=
$
(
list
).
find
(
'div.rate-date > span.rate'
).
text
();
if
(
movie_data
[
r
].
title
===
name
){
movie_data
[
r
++
][
'rate'
]
=
rate
;
}
});
for
(
i
of
movie_data
){
if
(
Object
.
keys
(
i
).
length
==
4
){
movie_data
[
r
++
][
'rate'
]
=
'예매율 0%'
;
}
}
for
(
i
of
location_data
){
console
.
log
(
i
[
'동탄'
])
}
browser
.
close
();
}
])
...
...
Megabox_crawling/package.json
View file @
02fb171
...
...
@@ -10,6 +10,7 @@
"author"
:
""
,
"license"
:
"ISC"
,
"dependencies"
:
{
"async"
:
"^3.2.3"
,
"body-parser"
:
"^1.20.0"
,
"cheerio"
:
"^1.0.0-rc.11"
,
"express"
:
"^4.18.1"
,
...
...
Please
register
or
login
to post a comment