Showing
2 changed files
with
117 additions
and
0 deletions
crawling/Guckkasten.js
0 → 100644
1 | +var request = require('request'); | ||
2 | +var cheerio = require('cheerio'); | ||
3 | + | ||
4 | +function crawl_guckkasten(callback){ | ||
5 | + var n = 0; | ||
6 | + var guckkasten = []; | ||
7 | + crawl(function(album,year,track){ | ||
8 | + n++ | ||
9 | + if (n == 10) { | ||
10 | + var j; | ||
11 | + var a, y, t; | ||
12 | + for (var i = 1; i < year.length; i++) { | ||
13 | + y = year[i]; | ||
14 | + a = album[i]; | ||
15 | + t = track[i] | ||
16 | + j = i - 1; | ||
17 | + while (j >= 0 && year[j] > y) { | ||
18 | + album[j + 1] = album[j]; | ||
19 | + year[j + 1] = year[j]; | ||
20 | + track[j + 1] = track[j]; | ||
21 | + j--; | ||
22 | + } | ||
23 | + album[j + 1] = a; | ||
24 | + year[j + 1] = y; | ||
25 | + track[j + 1] = t; | ||
26 | + } | ||
27 | + for(var i = 0; i<album.length; i++){ | ||
28 | + album[i] = (i+1)+'. ' +album[i]; | ||
29 | + } | ||
30 | + guckkasten.push(album, year, track); | ||
31 | + callback(guckkasten); | ||
32 | + } | ||
33 | + }) | ||
34 | +} | ||
35 | + | ||
36 | +function crawl(callback){ | ||
37 | + var url; | ||
38 | + let json_album = [], json_year = [], json_track = []; | ||
39 | + var track; | ||
40 | + var year; | ||
41 | + var album; | ||
42 | + for (var i = 1; i <= 13; i++) { | ||
43 | + url = 'http://www.interparkenter.com/guckkasten/Album/' + i; | ||
44 | + if (!(i > 9 && i < 13)) { | ||
45 | + request(url, function (err, res, body) { | ||
46 | + const $ = cheerio.load(body); | ||
47 | + | ||
48 | + var track_num = 0; | ||
49 | + var album_track = ''; | ||
50 | + $('#divDetail > div.disco_left_box > div.track_area > div > ol').find('li').each(function () { | ||
51 | + track_num++; | ||
52 | + track = $('#divDetail > div.disco_left_box > div.track_area > div > ol > li:nth-child(' + track_num + ')').text() | ||
53 | + album_track += track + '\n'; | ||
54 | + }) | ||
55 | + json_track.push(album_track); | ||
56 | + | ||
57 | + year = $('#divDetail > div.disco_right_box > div.Introduction > p.txt_release').text(); | ||
58 | + year = year.substr(3); | ||
59 | + json_year.push(year); | ||
60 | + | ||
61 | + album = $('#divDetail > div.disco_right_box > div.Introduction > p.txt_title').text(); | ||
62 | + json_album.push(album); | ||
63 | + callback(json_album,json_year,json_track); | ||
64 | + }); | ||
65 | + } | ||
66 | + } | ||
67 | +} | ||
68 | + | ||
69 | + | ||
70 | +exports.crawl_guckkasten = crawl_guckkasten; | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
crawling/Mot.js
0 → 100644
1 | +var request = require('request'); | ||
2 | +var cheerio = require('cheerio'); | ||
3 | + | ||
4 | +function crawl_mot(callback){ | ||
5 | + var url = 'https://namu.wiki/w/%EB%AA%BB(%EB%B0%B4%EB%93%9C)'; | ||
6 | + let json_album = [], json_year = [], json_track = []; | ||
7 | + let Mot = []; | ||
8 | + var track; | ||
9 | + json_year.push('2004. 06. 18'); | ||
10 | + json_year.push('2007. 05. 22'); | ||
11 | + json_year.push('2016. 02. 19'); | ||
12 | + | ||
13 | + request(url, function (err, res, body) { | ||
14 | + const $ = cheerio.load(body); | ||
15 | + var index = 12; | ||
16 | + var album_num = 0; | ||
17 | + for(var i = 1; i<=3; i++){ | ||
18 | + index += 2; | ||
19 | + if(index == 18) | ||
20 | + index += 6; | ||
21 | + var title_index = 0; | ||
22 | + var track_cnt = 0; | ||
23 | + var album_track = ''; | ||
24 | + $('body > div.content-wrapper > article > div.wiki-content.clearfix > div > div:nth-child('+index+') > div > table > tbody').find('tr').each(function () { | ||
25 | + title_index++; | ||
26 | + if (title_index == 1) { | ||
27 | + album_num++; | ||
28 | + album = $('body > div.content-wrapper > article > div.wiki-content.clearfix > div > div:nth-child('+index+') > div > table > tbody > tr:nth-child(1)').text() | ||
29 | + album = album_num + '. ' + album; | ||
30 | + } | ||
31 | + else if (title_index >= 3) { | ||
32 | + track_cnt++; | ||
33 | + track = $('body > div.content-wrapper > article > div.wiki-content.clearfix > div > div:nth-child('+index+') > div > table > tbody > tr:nth-child(' + title_index + ') > td:nth-child(2)').text(); | ||
34 | + track = track_cnt + '. ' + track; | ||
35 | + album_track += track + '\n'; | ||
36 | + } | ||
37 | + }); | ||
38 | + json_album.push(album); | ||
39 | + json_track.push(album_track); | ||
40 | + } | ||
41 | + | ||
42 | + Mot.push({ json_album, json_year, json_track}); | ||
43 | + callback(Mot); | ||
44 | + }); | ||
45 | +} | ||
46 | + | ||
47 | +exports.crawl_mot = crawl_mot; |
-
Please register or login to post a comment