윤준석

Merge branch 'feature/220515_joongna_selenium_to_rod' into 'main'

Feature/220515 joongna selenium to rod

# 중고나라 api 크롤링 패키지 변경 및 병렬 처리

1. 기존에 사용하던 go-selenium이 headless를 지원 안함

2. go-rod로 변경 및 goroutine으로 병렬 처리

See merge request !10
FROM golang:1.17.3
ENV Xvfb :99
ENV DISPLAY=:99
ENV GO111MODULE=on
RUN apt-get -y update
RUN apt-get install -y wget xvfb gnupg
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add -
RUN sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list'
RUN apt-get -y update
RUN apt-get install -y google-chrome-stable
RUN apt-get install -yqq unzip
RUN wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE`/chromedriver_linux64.zip
RUN unzip /tmp/chromedriver.zip chromedriver -d /usr/local/bin/
WORKDIR /src
COPY . /src
RUN apt-get update
RUN apt-get install -y libgconf-2-4 libatk1.0-0 libatk-bridge2.0-0 libgdk-pixbuf2.0-0 libgtk-3-0 libgbm-dev libnss3-dev libxss-dev libasound2
RUN go build -o Joongna_api_server
EXPOSE 8080
......
......@@ -9,6 +9,7 @@ require (
github.com/bunsenapp/go-selenium v0.1.0 // indirect
github.com/caarlos0/env/v6 v6.9.1 // indirect
github.com/fedesog/webdriver v0.0.0-20180606182539-99f36c92eaef // indirect
github.com/go-rod/rod v0.106.6 // indirect
github.com/joho/godotenv v1.4.0 // indirect
github.com/labstack/echo/v4 v4.7.2 // indirect
github.com/labstack/gommon v0.3.1 // indirect
......@@ -17,6 +18,9 @@ require (
github.com/tebeka/selenium v0.9.9 // indirect
github.com/valyala/bytebufferpool v1.0.0 // indirect
github.com/valyala/fasttemplate v1.2.1 // indirect
github.com/ysmood/goob v0.4.0 // indirect
github.com/ysmood/gson v0.7.1 // indirect
github.com/ysmood/leakless v0.7.0 // indirect
golang.org/x/crypto v0.0.0-20210817164053-32db794688a5 // indirect
golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f // indirect
golang.org/x/sys v0.0.0-20211103235746-7861aae1554b // indirect
......
......@@ -21,6 +21,8 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fedesog/webdriver v0.0.0-20180606182539-99f36c92eaef h1:0z8rB8nAGEso7PhKS21wBjjxTp2uGPyZ6STzRc7mnBY=
github.com/fedesog/webdriver v0.0.0-20180606182539-99f36c92eaef/go.mod h1:RUn/EmpfFIGHvmeXmh+hk1UaCbjOXa6vl7/kx1b6wxw=
github.com/go-rod/rod v0.106.6 h1:zJorVPG7s8Xgbh7PkSySP4FNoo0OiougKaMb3j6zT6w=
github.com/go-rod/rod v0.106.6/go.mod h1:xkZOchuKqTOkMOBkrzb7uJpbKZRab1haPCWDvuZkS2U=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
......@@ -60,6 +62,14 @@ github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6Kllzaw
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/valyala/fasttemplate v1.2.1 h1:TVEnxayobAdVkhQfrfes2IzOB6o+z4roRkPF52WA1u4=
github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ=
github.com/ysmood/goob v0.4.0 h1:HsxXhyLBeGzWXnqVKtmT9qM7EuVs/XOgkX7T6r1o1AQ=
github.com/ysmood/goob v0.4.0/go.mod h1:u6yx7ZhS4Exf2MwciFr6nIM8knHQIE22lFpWHnfql18=
github.com/ysmood/got v0.29.1/go.mod h1:pE1l4LOwOBhQg6A/8IAatkGp7uZjnalzrZolnlhhMgY=
github.com/ysmood/gotrace v0.6.0/go.mod h1:TzhIG7nHDry5//eYZDYcTzuJLYQIkykJzCRIo4/dzQM=
github.com/ysmood/gson v0.7.1 h1:zKL2MTGtynxdBdlZjyGsvEOZ7dkxaY5TH6QhAbTgz0Q=
github.com/ysmood/gson v0.7.1/go.mod h1:3Kzs5zDl21g5F/BlLTNcuAGAYLKt2lV5G8D1zF3RNmg=
github.com/ysmood/leakless v0.7.0 h1:XCGdaPExyoreoQd+H5qgxM3ReNbSPFsEXpSKwbXbwQw=
github.com/ysmood/leakless v0.7.0/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY8q0JvMQ=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
......
......@@ -8,7 +8,7 @@ import (
const (
API = "/api/v2"
APIJoongNa = API + "/JoongNa"
APIJoongNa = API + "/joongna"
APIKeyword = APIJoongNa + "/:keyword"
)
......
......@@ -12,47 +12,49 @@ import (
"net/url"
"strconv"
"strings"
"sync"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/fedesog/webdriver"
"github.com/go-rod/rod"
)
func GetItemByKeyword(keyword string) ([]model.Item, error) {
var items []model.Item
wg := sync.WaitGroup{}
itemsInfo := getItemsInfoByKeyword(keyword)
for _, itemInfo := range itemsInfo {
if itemInfo.CafeName != "중고나라" {
continue
itemsInfo, err := getItemsInfoByKeyword(keyword)
if err != nil {
return nil, err
}
itemUrl := itemInfo.Link
sold, price, thumbnailUrl, extraInfo := crawlingNaverCafe(itemUrl)
if sold == "판매 완료" {
for _, itemInfo := range itemsInfo {
itemUrl := itemInfo.Link
if itemInfo.CafeName != "중고나라" {
continue
}
item := model.Item{
Platform: "중고나라",
Name: itemInfo.Title,
Price: price,
ThumbnailUrl: thumbnailUrl,
ItemUrl: itemUrl,
ExtraInfo: extraInfo,
wg.Add(1)
go func(itemUrl string) {
defer wg.Done()
item, err := crawlingNaverCafe(itemUrl)
if err != nil {
log.Fatal(err)
}
items = append(items, item)
items = append(items, *item)
}(itemUrl)
}
wg.Wait()
return items, nil
}
func getItemsInfoByKeyword(keyword string) []model.ApiResponseItem {
func getItemsInfoByKeyword(keyword string) ([]model.ApiResponseItem, error) {
encText := url.QueryEscape("중고나라 " + keyword + " 판매중")
apiUrl := "https://openapi.naver.com/v1/search/cafearticle.json?query=" + encText + "&sort=sim"
req, err := http.NewRequest("GET", apiUrl, nil)
if err != nil {
log.Fatal(err)
return nil, err
}
req.Header.Add("X-Naver-Client-Id", config.Cfg.Secret.CLIENTID)
req.Header.Add("X-Naver-Client-Secret", config.Cfg.Secret.CLIENTSECRET)
......@@ -60,7 +62,7 @@ func getItemsInfoByKeyword(keyword string) []model.ApiResponseItem {
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
log.Fatal(err)
return nil, err
}
defer func(Body io.ReadCloser) {
err := Body.Close()
......@@ -75,52 +77,48 @@ func getItemsInfoByKeyword(keyword string) []model.ApiResponseItem {
if err != nil {
log.Fatal(err)
}
return apiResponse.Items
return apiResponse.Items, nil
}
func crawlingNaverCafe(cafeUrl string) (string, int, string, string) {
driver := webdriver.NewChromeDriver("./chromedriver")
err := driver.Start()
func crawlingNaverCafe(cafeUrl string) (*model.Item, error) {
frame := rod.New().MustConnect().MustPage(cafeUrl).MustElement("iframe#cafe_main")
time.Sleep(time.Second * 2)
source := frame.MustFrame().MustHTML()
html, err := goquery.NewDocumentFromReader(bytes.NewReader([]byte(source)))
if err != nil {
log.Println(err)
}
desired := webdriver.Capabilities{"Platform": "Linux"}
required := webdriver.Capabilities{}
session, err := driver.NewSession(desired, required)
if err != nil {
log.Println(err)
}
err = session.Url(cafeUrl)
if err != nil {
log.Println(err)
}
time.Sleep(time.Second * 1)
err = session.FocusOnFrame("cafe_main")
if err != nil {
log.Fatal(err)
}
resp, err := session.Source()
html, err := goquery.NewDocumentFromReader(bytes.NewReader([]byte(resp)))
if err != nil {
log.Fatal(err)
return nil, err
}
title := html.Find("h3.title_text").Text()
sold := html.Find("div.sold_area").Text()
price := priceStringToInt(html.Find(".ProductPrice").Text())
thumbnailUrl, _ := html.Find("div.product_thumb img").Attr("src")
extraInfo := html.Find(".se-module-text").Text()
title = strings.TrimSpace(title)
sold = strings.TrimSpace(sold)
thumbnailUrl = strings.TrimSpace(thumbnailUrl)
extraInfo = strings.TrimSpace(extraInfo)
return sold, price, thumbnailUrl, extraInfo
item := model.Item{
Platform: "중고나라",
Name: title,
Price: price,
ThumbnailUrl: thumbnailUrl,
ItemUrl: cafeUrl,
ExtraInfo: extraInfo,
}
return &item, nil
}
func priceStringToInt(priceString string) int {
strings.TrimSpace(priceString)
if priceString == "" {
return 0
}
priceString = strings.ReplaceAll(priceString, "원", "")
priceString = strings.ReplaceAll(priceString, ",", "")
......