김서영

add search.js

const rp = require("request-promise");
const cheerio = require("cheerio");
const Entities = require('html-entities').XmlEntities;
const machineRead = require('./machineRead');
const entities = new Entities();
const keywordChecking = ( keywordText, $, elem ) => {
let tempCheck = false;
keywordText.split( ' ' ).forEach( ( Word ) => {
if( $( elem ).text().indexOf( Word ) !== -1 ) {
tempCheck = true;
}
});
if( tempCheck ) {
return true;
}
return false;
}
const naver = ( searchResult, $, elem , defaultURL ) => {
searchResult.title = $( elem ).parent().attr( "title" );
searchResult.passage = entities.decode( $( elem ).parent().parent().parent().text()).trim(),
searchResult.url = $( elem ).parent().attr( "href" );
if( searchResult.url === undefined ) {
searchResult.url = defaultURL;
}
}
const searchToResult = (searchResult, result, keywordCheck) => {
searchResult.passage = searchResult.passage.replace( /(http(s)?:\/\/)([a-z0-9\w]+\.*)+[a-z0-9]{2,4}/gi, ' ' ).replace( /\s{1,}|\s{1,}|\r\n|\r|\n/g, ' ' ).trim();
if( searchResult.title === undefined || !searchResult.title.length ) {
searchResult.title = searchResult.passage.split(' ').slice( 0, 3 ).toString().replace(/,/g,' ') + "..";
} else {
searchResult.title = searchResult.title.replace( /(http(s)?:\/\/)([a-z0-9\w]+\.*)+[a-z0-9]{2,4}/gi, ' ' ).replace( /\s{1,}|\s{1,}|\r\n|\r|\n/g, ' ' ).trim();
searchResult.passage = searchResult.passage.replace( searchResult.title, '' );
}
if( !result.length ) {
if( keywordCheck ) {
result.push( searchResult );
}
} else if( keywordCheck ) {
}}
const getHtmlMain = ( main, keywordText, html, defaultURL, findSearchResult ) => {
const $ = cheerio.load( html );
let result = [];
$( main ).each( (i, elem ) => {
let keywordCheck = keywordChecking( keywordText, $, elem );
if( keywordCheck ) {
let searchResult = {};
findSearchResult( searchResult, $, elem , defaultURL );
searchToResult( searchResult, result, keywordCheck );
}
});
return result;
}
const search = {};
search.naver = ( keywordText ) => {
return new Promise( async ( resolve, reject ) => {
let naverMain = "#main_pack strong",
result = [],
naverURL = "https://search.naver.com/search.naver?query=" + encodeURI( keywordText );
rp( {
"uri" : naverURL,
} )
.then( ( html ) => {
result = getHtmlMain( naverMain, keywordText, html, naverURL, naver );
resolve( result );
})
})
}
module.exports = search
\ No newline at end of file