Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Chat_Bot
/
KaKao_ChatBot
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Authored by
윤준현
2018-12-05 21:06:31 +0900
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
c9b14db20b3a35cd0acc49d6f6b617a1b77cfc72
c9b14db2
1 parent
b19d0a6a
add_Mot_Guckka
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
117 additions
and
0 deletions
crawling/Guckkasten.js
crawling/Mot.js
crawling/Guckkasten.js
0 → 100644
View file @
c9b14db
var
request
=
require
(
'request'
);
var
cheerio
=
require
(
'cheerio'
);
function
crawl_guckkasten
(
callback
){
var
n
=
0
;
var
guckkasten
=
[];
crawl
(
function
(
album
,
year
,
track
){
n
++
if
(
n
==
10
)
{
var
j
;
var
a
,
y
,
t
;
for
(
var
i
=
1
;
i
<
year
.
length
;
i
++
)
{
y
=
year
[
i
];
a
=
album
[
i
];
t
=
track
[
i
]
j
=
i
-
1
;
while
(
j
>=
0
&&
year
[
j
]
>
y
)
{
album
[
j
+
1
]
=
album
[
j
];
year
[
j
+
1
]
=
year
[
j
];
track
[
j
+
1
]
=
track
[
j
];
j
--
;
}
album
[
j
+
1
]
=
a
;
year
[
j
+
1
]
=
y
;
track
[
j
+
1
]
=
t
;
}
for
(
var
i
=
0
;
i
<
album
.
length
;
i
++
){
album
[
i
]
=
(
i
+
1
)
+
'. '
+
album
[
i
];
}
guckkasten
.
push
(
album
,
year
,
track
);
callback
(
guckkasten
);
}
})
}
function
crawl
(
callback
){
var
url
;
let
json_album
=
[],
json_year
=
[],
json_track
=
[];
var
track
;
var
year
;
var
album
;
for
(
var
i
=
1
;
i
<=
13
;
i
++
)
{
url
=
'http://www.interparkenter.com/guckkasten/Album/'
+
i
;
if
(
!
(
i
>
9
&&
i
<
13
))
{
request
(
url
,
function
(
err
,
res
,
body
)
{
const
$
=
cheerio
.
load
(
body
);
var
track_num
=
0
;
var
album_track
=
''
;
$
(
'#divDetail > div.disco_left_box > div.track_area > div > ol'
).
find
(
'li'
).
each
(
function
()
{
track_num
++
;
track
=
$
(
'#divDetail > div.disco_left_box > div.track_area > div > ol > li:nth-child('
+
track_num
+
')'
).
text
()
album_track
+=
track
+
'\n'
;
})
json_track
.
push
(
album_track
);
year
=
$
(
'#divDetail > div.disco_right_box > div.Introduction > p.txt_release'
).
text
();
year
=
year
.
substr
(
3
);
json_year
.
push
(
year
);
album
=
$
(
'#divDetail > div.disco_right_box > div.Introduction > p.txt_title'
).
text
();
json_album
.
push
(
album
);
callback
(
json_album
,
json_year
,
json_track
);
});
}
}
}
exports
.
crawl_guckkasten
=
crawl_guckkasten
;
\ No newline at end of file
crawling/Mot.js
0 → 100644
View file @
c9b14db
var
request
=
require
(
'request'
);
var
cheerio
=
require
(
'cheerio'
);
function
crawl_mot
(
callback
){
var
url
=
'https://namu.wiki/w/%EB%AA%BB(%EB%B0%B4%EB%93%9C)'
;
let
json_album
=
[],
json_year
=
[],
json_track
=
[];
let
Mot
=
[];
var
track
;
json_year
.
push
(
'2004. 06. 18'
);
json_year
.
push
(
'2007. 05. 22'
);
json_year
.
push
(
'2016. 02. 19'
);
request
(
url
,
function
(
err
,
res
,
body
)
{
const
$
=
cheerio
.
load
(
body
);
var
index
=
12
;
var
album_num
=
0
;
for
(
var
i
=
1
;
i
<=
3
;
i
++
){
index
+=
2
;
if
(
index
==
18
)
index
+=
6
;
var
title_index
=
0
;
var
track_cnt
=
0
;
var
album_track
=
''
;
$
(
'body > div.content-wrapper > article > div.wiki-content.clearfix > div > div:nth-child('
+
index
+
') > div > table > tbody'
).
find
(
'tr'
).
each
(
function
()
{
title_index
++
;
if
(
title_index
==
1
)
{
album_num
++
;
album
=
$
(
'body > div.content-wrapper > article > div.wiki-content.clearfix > div > div:nth-child('
+
index
+
') > div > table > tbody > tr:nth-child(1)'
).
text
()
album
=
album_num
+
'. '
+
album
;
}
else
if
(
title_index
>=
3
)
{
track_cnt
++
;
track
=
$
(
'body > div.content-wrapper > article > div.wiki-content.clearfix > div > div:nth-child('
+
index
+
') > div > table > tbody > tr:nth-child('
+
title_index
+
') > td:nth-child(2)'
).
text
();
track
=
track_cnt
+
'. '
+
track
;
album_track
+=
track
+
'\n'
;
}
});
json_album
.
push
(
album
);
json_track
.
push
(
album_track
);
}
Mot
.
push
({
json_album
,
json_year
,
json_track
});
callback
(
Mot
);
});
}
exports
.
crawl_mot
=
crawl_mot
;
Please
register
or
login
to post a comment