윤준석

Merge branch 'feature/220515_joongna_selenium_to_rod' into 'main'

Feature/220515 joongna selenium to rod

# 중고나라 api 크롤링 패키지 변경 및 병렬 처리

1. 기존에 사용하던 go-selenium이 headless를 지원 안함

2. go-rod로 변경 및 goroutine으로 병렬 처리

See merge request !10
1 FROM golang:1.17.3 1 FROM golang:1.17.3
2 2
3 -ENV Xvfb :99
4 -ENV DISPLAY=:99
5 -ENV GO111MODULE=on
6 -
7 -RUN apt-get -y update
8 -RUN apt-get install -y wget xvfb gnupg
9 -
10 -RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add -
11 -RUN sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list'
12 -RUN apt-get -y update
13 -RUN apt-get install -y google-chrome-stable
14 -
15 -RUN apt-get install -yqq unzip
16 -RUN wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE`/chromedriver_linux64.zip
17 -RUN unzip /tmp/chromedriver.zip chromedriver -d /usr/local/bin/
18 -
19 WORKDIR /src 3 WORKDIR /src
20 COPY . /src 4 COPY . /src
21 5
6 +RUN apt-get update
7 +RUN apt-get install -y libgconf-2-4 libatk1.0-0 libatk-bridge2.0-0 libgdk-pixbuf2.0-0 libgtk-3-0 libgbm-dev libnss3-dev libxss-dev libasound2
22 RUN go build -o Joongna_api_server 8 RUN go build -o Joongna_api_server
23 9
24 EXPOSE 8080 10 EXPOSE 8080
......
...@@ -9,6 +9,7 @@ require ( ...@@ -9,6 +9,7 @@ require (
9 github.com/bunsenapp/go-selenium v0.1.0 // indirect 9 github.com/bunsenapp/go-selenium v0.1.0 // indirect
10 github.com/caarlos0/env/v6 v6.9.1 // indirect 10 github.com/caarlos0/env/v6 v6.9.1 // indirect
11 github.com/fedesog/webdriver v0.0.0-20180606182539-99f36c92eaef // indirect 11 github.com/fedesog/webdriver v0.0.0-20180606182539-99f36c92eaef // indirect
12 + github.com/go-rod/rod v0.106.6 // indirect
12 github.com/joho/godotenv v1.4.0 // indirect 13 github.com/joho/godotenv v1.4.0 // indirect
13 github.com/labstack/echo/v4 v4.7.2 // indirect 14 github.com/labstack/echo/v4 v4.7.2 // indirect
14 github.com/labstack/gommon v0.3.1 // indirect 15 github.com/labstack/gommon v0.3.1 // indirect
...@@ -17,6 +18,9 @@ require ( ...@@ -17,6 +18,9 @@ require (
17 github.com/tebeka/selenium v0.9.9 // indirect 18 github.com/tebeka/selenium v0.9.9 // indirect
18 github.com/valyala/bytebufferpool v1.0.0 // indirect 19 github.com/valyala/bytebufferpool v1.0.0 // indirect
19 github.com/valyala/fasttemplate v1.2.1 // indirect 20 github.com/valyala/fasttemplate v1.2.1 // indirect
21 + github.com/ysmood/goob v0.4.0 // indirect
22 + github.com/ysmood/gson v0.7.1 // indirect
23 + github.com/ysmood/leakless v0.7.0 // indirect
20 golang.org/x/crypto v0.0.0-20210817164053-32db794688a5 // indirect 24 golang.org/x/crypto v0.0.0-20210817164053-32db794688a5 // indirect
21 golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f // indirect 25 golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f // indirect
22 golang.org/x/sys v0.0.0-20211103235746-7861aae1554b // indirect 26 golang.org/x/sys v0.0.0-20211103235746-7861aae1554b // indirect
......
...@@ -21,6 +21,8 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs ...@@ -21,6 +21,8 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs
21 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 21 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
22 github.com/fedesog/webdriver v0.0.0-20180606182539-99f36c92eaef h1:0z8rB8nAGEso7PhKS21wBjjxTp2uGPyZ6STzRc7mnBY= 22 github.com/fedesog/webdriver v0.0.0-20180606182539-99f36c92eaef h1:0z8rB8nAGEso7PhKS21wBjjxTp2uGPyZ6STzRc7mnBY=
23 github.com/fedesog/webdriver v0.0.0-20180606182539-99f36c92eaef/go.mod h1:RUn/EmpfFIGHvmeXmh+hk1UaCbjOXa6vl7/kx1b6wxw= 23 github.com/fedesog/webdriver v0.0.0-20180606182539-99f36c92eaef/go.mod h1:RUn/EmpfFIGHvmeXmh+hk1UaCbjOXa6vl7/kx1b6wxw=
24 +github.com/go-rod/rod v0.106.6 h1:zJorVPG7s8Xgbh7PkSySP4FNoo0OiougKaMb3j6zT6w=
25 +github.com/go-rod/rod v0.106.6/go.mod h1:xkZOchuKqTOkMOBkrzb7uJpbKZRab1haPCWDvuZkS2U=
24 github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= 26 github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
25 github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= 27 github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
26 github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= 28 github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
...@@ -60,6 +62,14 @@ github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6Kllzaw ...@@ -60,6 +62,14 @@ github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6Kllzaw
60 github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= 62 github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
61 github.com/valyala/fasttemplate v1.2.1 h1:TVEnxayobAdVkhQfrfes2IzOB6o+z4roRkPF52WA1u4= 63 github.com/valyala/fasttemplate v1.2.1 h1:TVEnxayobAdVkhQfrfes2IzOB6o+z4roRkPF52WA1u4=
62 github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ= 64 github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ=
65 +github.com/ysmood/goob v0.4.0 h1:HsxXhyLBeGzWXnqVKtmT9qM7EuVs/XOgkX7T6r1o1AQ=
66 +github.com/ysmood/goob v0.4.0/go.mod h1:u6yx7ZhS4Exf2MwciFr6nIM8knHQIE22lFpWHnfql18=
67 +github.com/ysmood/got v0.29.1/go.mod h1:pE1l4LOwOBhQg6A/8IAatkGp7uZjnalzrZolnlhhMgY=
68 +github.com/ysmood/gotrace v0.6.0/go.mod h1:TzhIG7nHDry5//eYZDYcTzuJLYQIkykJzCRIo4/dzQM=
69 +github.com/ysmood/gson v0.7.1 h1:zKL2MTGtynxdBdlZjyGsvEOZ7dkxaY5TH6QhAbTgz0Q=
70 +github.com/ysmood/gson v0.7.1/go.mod h1:3Kzs5zDl21g5F/BlLTNcuAGAYLKt2lV5G8D1zF3RNmg=
71 +github.com/ysmood/leakless v0.7.0 h1:XCGdaPExyoreoQd+H5qgxM3ReNbSPFsEXpSKwbXbwQw=
72 +github.com/ysmood/leakless v0.7.0/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY8q0JvMQ=
63 go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= 73 go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
64 go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= 74 go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
65 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 75 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
......
...@@ -8,7 +8,7 @@ import ( ...@@ -8,7 +8,7 @@ import (
8 8
9 const ( 9 const (
10 API = "/api/v2" 10 API = "/api/v2"
11 - APIJoongNa = API + "/JoongNa" 11 + APIJoongNa = API + "/joongna"
12 APIKeyword = APIJoongNa + "/:keyword" 12 APIKeyword = APIJoongNa + "/:keyword"
13 ) 13 )
14 14
......
...@@ -12,47 +12,49 @@ import ( ...@@ -12,47 +12,49 @@ import (
12 "net/url" 12 "net/url"
13 "strconv" 13 "strconv"
14 "strings" 14 "strings"
15 + "sync"
15 "time" 16 "time"
16 17
17 "github.com/PuerkitoBio/goquery" 18 "github.com/PuerkitoBio/goquery"
18 - "github.com/fedesog/webdriver" 19 + "github.com/go-rod/rod"
19 ) 20 )
20 21
21 func GetItemByKeyword(keyword string) ([]model.Item, error) { 22 func GetItemByKeyword(keyword string) ([]model.Item, error) {
22 var items []model.Item 23 var items []model.Item
24 + wg := sync.WaitGroup{}
25 +
26 + itemsInfo, err := getItemsInfoByKeyword(keyword)
27 + if err != nil {
28 + return nil, err
29 + }
23 30
24 - itemsInfo := getItemsInfoByKeyword(keyword)
25 for _, itemInfo := range itemsInfo { 31 for _, itemInfo := range itemsInfo {
26 - if itemInfo.CafeName != "중고나라" {
27 - continue
28 - }
29 itemUrl := itemInfo.Link 32 itemUrl := itemInfo.Link
30 - sold, price, thumbnailUrl, extraInfo := crawlingNaverCafe(itemUrl) 33 + if itemInfo.CafeName != "중고나라" {
31 -
32 - if sold == "판매 완료" {
33 continue 34 continue
34 } 35 }
35 - 36 + wg.Add(1)
36 - item := model.Item{ 37 + go func(itemUrl string) {
37 - Platform: "중고나라", 38 + defer wg.Done()
38 - Name: itemInfo.Title, 39 + item, err := crawlingNaverCafe(itemUrl)
39 - Price: price, 40 + if err != nil {
40 - ThumbnailUrl: thumbnailUrl, 41 + log.Fatal(err)
41 - ItemUrl: itemUrl, 42 + }
42 - ExtraInfo: extraInfo, 43 + items = append(items, *item)
43 - } 44 + }(itemUrl)
44 - items = append(items, item)
45 } 45 }
46 + wg.Wait()
47 +
46 return items, nil 48 return items, nil
47 } 49 }
48 50
49 -func getItemsInfoByKeyword(keyword string) []model.ApiResponseItem { 51 +func getItemsInfoByKeyword(keyword string) ([]model.ApiResponseItem, error) {
50 encText := url.QueryEscape("중고나라 " + keyword + " 판매중") 52 encText := url.QueryEscape("중고나라 " + keyword + " 판매중")
51 apiUrl := "https://openapi.naver.com/v1/search/cafearticle.json?query=" + encText + "&sort=sim" 53 apiUrl := "https://openapi.naver.com/v1/search/cafearticle.json?query=" + encText + "&sort=sim"
52 54
53 req, err := http.NewRequest("GET", apiUrl, nil) 55 req, err := http.NewRequest("GET", apiUrl, nil)
54 if err != nil { 56 if err != nil {
55 - log.Fatal(err) 57 + return nil, err
56 } 58 }
57 req.Header.Add("X-Naver-Client-Id", config.Cfg.Secret.CLIENTID) 59 req.Header.Add("X-Naver-Client-Id", config.Cfg.Secret.CLIENTID)
58 req.Header.Add("X-Naver-Client-Secret", config.Cfg.Secret.CLIENTSECRET) 60 req.Header.Add("X-Naver-Client-Secret", config.Cfg.Secret.CLIENTSECRET)
...@@ -60,7 +62,7 @@ func getItemsInfoByKeyword(keyword string) []model.ApiResponseItem { ...@@ -60,7 +62,7 @@ func getItemsInfoByKeyword(keyword string) []model.ApiResponseItem {
60 client := &http.Client{} 62 client := &http.Client{}
61 resp, err := client.Do(req) 63 resp, err := client.Do(req)
62 if err != nil { 64 if err != nil {
63 - log.Fatal(err) 65 + return nil, err
64 } 66 }
65 defer func(Body io.ReadCloser) { 67 defer func(Body io.ReadCloser) {
66 err := Body.Close() 68 err := Body.Close()
...@@ -75,52 +77,48 @@ func getItemsInfoByKeyword(keyword string) []model.ApiResponseItem { ...@@ -75,52 +77,48 @@ func getItemsInfoByKeyword(keyword string) []model.ApiResponseItem {
75 if err != nil { 77 if err != nil {
76 log.Fatal(err) 78 log.Fatal(err)
77 } 79 }
78 - return apiResponse.Items 80 + return apiResponse.Items, nil
79 } 81 }
80 82
81 -func crawlingNaverCafe(cafeUrl string) (string, int, string, string) { 83 +func crawlingNaverCafe(cafeUrl string) (*model.Item, error) {
82 - driver := webdriver.NewChromeDriver("./chromedriver") 84 + frame := rod.New().MustConnect().MustPage(cafeUrl).MustElement("iframe#cafe_main")
83 - err := driver.Start() 85 + time.Sleep(time.Second * 2)
84 - if err != nil { 86 + source := frame.MustFrame().MustHTML()
85 - log.Println(err) 87 + html, err := goquery.NewDocumentFromReader(bytes.NewReader([]byte(source)))
86 - }
87 - desired := webdriver.Capabilities{"Platform": "Linux"}
88 - required := webdriver.Capabilities{}
89 - session, err := driver.NewSession(desired, required)
90 - if err != nil {
91 - log.Println(err)
92 - }
93 - err = session.Url(cafeUrl)
94 - if err != nil {
95 - log.Println(err)
96 - }
97 - time.Sleep(time.Second * 1)
98 - err = session.FocusOnFrame("cafe_main")
99 - if err != nil {
100 - log.Fatal(err)
101 - }
102 - resp, err := session.Source()
103 -
104 - html, err := goquery.NewDocumentFromReader(bytes.NewReader([]byte(resp)))
105 if err != nil { 88 if err != nil {
106 - log.Fatal(err) 89 + return nil, err
107 } 90 }
108 91
92 + title := html.Find("h3.title_text").Text()
109 sold := html.Find("div.sold_area").Text() 93 sold := html.Find("div.sold_area").Text()
110 price := priceStringToInt(html.Find(".ProductPrice").Text()) 94 price := priceStringToInt(html.Find(".ProductPrice").Text())
111 thumbnailUrl, _ := html.Find("div.product_thumb img").Attr("src") 95 thumbnailUrl, _ := html.Find("div.product_thumb img").Attr("src")
112 extraInfo := html.Find(".se-module-text").Text() 96 extraInfo := html.Find(".se-module-text").Text()
113 97
98 + title = strings.TrimSpace(title)
114 sold = strings.TrimSpace(sold) 99 sold = strings.TrimSpace(sold)
115 thumbnailUrl = strings.TrimSpace(thumbnailUrl) 100 thumbnailUrl = strings.TrimSpace(thumbnailUrl)
116 extraInfo = strings.TrimSpace(extraInfo) 101 extraInfo = strings.TrimSpace(extraInfo)
117 102
118 - return sold, price, thumbnailUrl, extraInfo 103 + item := model.Item{
104 + Platform: "중고나라",
105 + Name: title,
106 + Price: price,
107 + ThumbnailUrl: thumbnailUrl,
108 + ItemUrl: cafeUrl,
109 + ExtraInfo: extraInfo,
110 + }
111 +
112 + return &item, nil
119 } 113 }
120 114
121 func priceStringToInt(priceString string) int { 115 func priceStringToInt(priceString string) int {
122 strings.TrimSpace(priceString) 116 strings.TrimSpace(priceString)
123 117
118 + if priceString == "" {
119 + return 0
120 + }
121 +
124 priceString = strings.ReplaceAll(priceString, "원", "") 122 priceString = strings.ReplaceAll(priceString, "원", "")
125 priceString = strings.ReplaceAll(priceString, ",", "") 123 priceString = strings.ReplaceAll(priceString, ",", "")
126 124
......