Showing
2 changed files
with
117 additions
and
0 deletions
crawling/Guckkasten.js
0 → 100644
| 1 | +var request = require('request'); | ||
| 2 | +var cheerio = require('cheerio'); | ||
| 3 | + | ||
| 4 | +function crawl_guckkasten(callback){ | ||
| 5 | + var n = 0; | ||
| 6 | + var guckkasten = []; | ||
| 7 | + crawl(function(album,year,track){ | ||
| 8 | + n++ | ||
| 9 | + if (n == 10) { | ||
| 10 | + var j; | ||
| 11 | + var a, y, t; | ||
| 12 | + for (var i = 1; i < year.length; i++) { | ||
| 13 | + y = year[i]; | ||
| 14 | + a = album[i]; | ||
| 15 | + t = track[i] | ||
| 16 | + j = i - 1; | ||
| 17 | + while (j >= 0 && year[j] > y) { | ||
| 18 | + album[j + 1] = album[j]; | ||
| 19 | + year[j + 1] = year[j]; | ||
| 20 | + track[j + 1] = track[j]; | ||
| 21 | + j--; | ||
| 22 | + } | ||
| 23 | + album[j + 1] = a; | ||
| 24 | + year[j + 1] = y; | ||
| 25 | + track[j + 1] = t; | ||
| 26 | + } | ||
| 27 | + for(var i = 0; i<album.length; i++){ | ||
| 28 | + album[i] = (i+1)+'. ' +album[i]; | ||
| 29 | + } | ||
| 30 | + guckkasten.push(album, year, track); | ||
| 31 | + callback(guckkasten); | ||
| 32 | + } | ||
| 33 | + }) | ||
| 34 | +} | ||
| 35 | + | ||
| 36 | +function crawl(callback){ | ||
| 37 | + var url; | ||
| 38 | + let json_album = [], json_year = [], json_track = []; | ||
| 39 | + var track; | ||
| 40 | + var year; | ||
| 41 | + var album; | ||
| 42 | + for (var i = 1; i <= 13; i++) { | ||
| 43 | + url = 'http://www.interparkenter.com/guckkasten/Album/' + i; | ||
| 44 | + if (!(i > 9 && i < 13)) { | ||
| 45 | + request(url, function (err, res, body) { | ||
| 46 | + const $ = cheerio.load(body); | ||
| 47 | + | ||
| 48 | + var track_num = 0; | ||
| 49 | + var album_track = ''; | ||
| 50 | + $('#divDetail > div.disco_left_box > div.track_area > div > ol').find('li').each(function () { | ||
| 51 | + track_num++; | ||
| 52 | + track = $('#divDetail > div.disco_left_box > div.track_area > div > ol > li:nth-child(' + track_num + ')').text() | ||
| 53 | + album_track += track + '\n'; | ||
| 54 | + }) | ||
| 55 | + json_track.push(album_track); | ||
| 56 | + | ||
| 57 | + year = $('#divDetail > div.disco_right_box > div.Introduction > p.txt_release').text(); | ||
| 58 | + year = year.substr(3); | ||
| 59 | + json_year.push(year); | ||
| 60 | + | ||
| 61 | + album = $('#divDetail > div.disco_right_box > div.Introduction > p.txt_title').text(); | ||
| 62 | + json_album.push(album); | ||
| 63 | + callback(json_album,json_year,json_track); | ||
| 64 | + }); | ||
| 65 | + } | ||
| 66 | + } | ||
| 67 | +} | ||
| 68 | + | ||
| 69 | + | ||
| 70 | +exports.crawl_guckkasten = crawl_guckkasten; | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
crawling/Mot.js
0 → 100644
| 1 | +var request = require('request'); | ||
| 2 | +var cheerio = require('cheerio'); | ||
| 3 | + | ||
| 4 | +function crawl_mot(callback){ | ||
| 5 | + var url = 'https://namu.wiki/w/%EB%AA%BB(%EB%B0%B4%EB%93%9C)'; | ||
| 6 | + let json_album = [], json_year = [], json_track = []; | ||
| 7 | + let Mot = []; | ||
| 8 | + var track; | ||
| 9 | + json_year.push('2004. 06. 18'); | ||
| 10 | + json_year.push('2007. 05. 22'); | ||
| 11 | + json_year.push('2016. 02. 19'); | ||
| 12 | + | ||
| 13 | + request(url, function (err, res, body) { | ||
| 14 | + const $ = cheerio.load(body); | ||
| 15 | + var index = 12; | ||
| 16 | + var album_num = 0; | ||
| 17 | + for(var i = 1; i<=3; i++){ | ||
| 18 | + index += 2; | ||
| 19 | + if(index == 18) | ||
| 20 | + index += 6; | ||
| 21 | + var title_index = 0; | ||
| 22 | + var track_cnt = 0; | ||
| 23 | + var album_track = ''; | ||
| 24 | + $('body > div.content-wrapper > article > div.wiki-content.clearfix > div > div:nth-child('+index+') > div > table > tbody').find('tr').each(function () { | ||
| 25 | + title_index++; | ||
| 26 | + if (title_index == 1) { | ||
| 27 | + album_num++; | ||
| 28 | + album = $('body > div.content-wrapper > article > div.wiki-content.clearfix > div > div:nth-child('+index+') > div > table > tbody > tr:nth-child(1)').text() | ||
| 29 | + album = album_num + '. ' + album; | ||
| 30 | + } | ||
| 31 | + else if (title_index >= 3) { | ||
| 32 | + track_cnt++; | ||
| 33 | + track = $('body > div.content-wrapper > article > div.wiki-content.clearfix > div > div:nth-child('+index+') > div > table > tbody > tr:nth-child(' + title_index + ') > td:nth-child(2)').text(); | ||
| 34 | + track = track_cnt + '. ' + track; | ||
| 35 | + album_track += track + '\n'; | ||
| 36 | + } | ||
| 37 | + }); | ||
| 38 | + json_album.push(album); | ||
| 39 | + json_track.push(album_track); | ||
| 40 | + } | ||
| 41 | + | ||
| 42 | + Mot.push({ json_album, json_year, json_track}); | ||
| 43 | + callback(Mot); | ||
| 44 | + }); | ||
| 45 | +} | ||
| 46 | + | ||
| 47 | +exports.crawl_mot = crawl_mot; |
-
Please register or login to post a comment