윤준현

add_Mot_Guckka

1 +var request = require('request');
2 +var cheerio = require('cheerio');
3 +
4 +function crawl_guckkasten(callback){
5 + var n = 0;
6 + var guckkasten = [];
7 + crawl(function(album,year,track){
8 + n++
9 + if (n == 10) {
10 + var j;
11 + var a, y, t;
12 + for (var i = 1; i < year.length; i++) {
13 + y = year[i];
14 + a = album[i];
15 + t = track[i]
16 + j = i - 1;
17 + while (j >= 0 && year[j] > y) {
18 + album[j + 1] = album[j];
19 + year[j + 1] = year[j];
20 + track[j + 1] = track[j];
21 + j--;
22 + }
23 + album[j + 1] = a;
24 + year[j + 1] = y;
25 + track[j + 1] = t;
26 + }
27 + for(var i = 0; i<album.length; i++){
28 + album[i] = (i+1)+'. ' +album[i];
29 + }
30 + guckkasten.push(album, year, track);
31 + callback(guckkasten);
32 + }
33 + })
34 +}
35 +
36 +function crawl(callback){
37 + var url;
38 + let json_album = [], json_year = [], json_track = [];
39 + var track;
40 + var year;
41 + var album;
42 + for (var i = 1; i <= 13; i++) {
43 + url = 'http://www.interparkenter.com/guckkasten/Album/' + i;
44 + if (!(i > 9 && i < 13)) {
45 + request(url, function (err, res, body) {
46 + const $ = cheerio.load(body);
47 +
48 + var track_num = 0;
49 + var album_track = '';
50 + $('#divDetail > div.disco_left_box > div.track_area > div > ol').find('li').each(function () {
51 + track_num++;
52 + track = $('#divDetail > div.disco_left_box > div.track_area > div > ol > li:nth-child(' + track_num + ')').text()
53 + album_track += track + '\n';
54 + })
55 + json_track.push(album_track);
56 +
57 + year = $('#divDetail > div.disco_right_box > div.Introduction > p.txt_release').text();
58 + year = year.substr(3);
59 + json_year.push(year);
60 +
61 + album = $('#divDetail > div.disco_right_box > div.Introduction > p.txt_title').text();
62 + json_album.push(album);
63 + callback(json_album,json_year,json_track);
64 + });
65 + }
66 + }
67 +}
68 +
69 +
70 +exports.crawl_guckkasten = crawl_guckkasten;
...\ No newline at end of file ...\ No newline at end of file
1 +var request = require('request');
2 +var cheerio = require('cheerio');
3 +
4 +function crawl_mot(callback){
5 + var url = 'https://namu.wiki/w/%EB%AA%BB(%EB%B0%B4%EB%93%9C)';
6 + let json_album = [], json_year = [], json_track = [];
7 + let Mot = [];
8 + var track;
9 + json_year.push('2004. 06. 18');
10 + json_year.push('2007. 05. 22');
11 + json_year.push('2016. 02. 19');
12 +
13 + request(url, function (err, res, body) {
14 + const $ = cheerio.load(body);
15 + var index = 12;
16 + var album_num = 0;
17 + for(var i = 1; i<=3; i++){
18 + index += 2;
19 + if(index == 18)
20 + index += 6;
21 + var title_index = 0;
22 + var track_cnt = 0;
23 + var album_track = '';
24 + $('body > div.content-wrapper > article > div.wiki-content.clearfix > div > div:nth-child('+index+') > div > table > tbody').find('tr').each(function () {
25 + title_index++;
26 + if (title_index == 1) {
27 + album_num++;
28 + album = $('body > div.content-wrapper > article > div.wiki-content.clearfix > div > div:nth-child('+index+') > div > table > tbody > tr:nth-child(1)').text()
29 + album = album_num + '. ' + album;
30 + }
31 + else if (title_index >= 3) {
32 + track_cnt++;
33 + track = $('body > div.content-wrapper > article > div.wiki-content.clearfix > div > div:nth-child('+index+') > div > table > tbody > tr:nth-child(' + title_index + ') > td:nth-child(2)').text();
34 + track = track_cnt + '. ' + track;
35 + album_track += track + '\n';
36 + }
37 + });
38 + json_album.push(album);
39 + json_track.push(album_track);
40 + }
41 +
42 + Mot.push({ json_album, json_year, json_track});
43 + callback(Mot);
44 + });
45 +}
46 +
47 +exports.crawl_mot = crawl_mot;