ADD: Crawling item with selenium
get sold_info, price, thumbnailUrl, extraInfo and process that strings or integer values
Showing
5 changed files
with
91 additions
and
3 deletions
... | @@ -3,16 +3,23 @@ module joongna | ... | @@ -3,16 +3,23 @@ module joongna |
3 | go 1.17 | 3 | go 1.17 |
4 | 4 | ||
5 | require ( | 5 | require ( |
6 | + github.com/PuerkitoBio/goquery v1.8.0 // indirect | ||
7 | + github.com/andybalholm/cascadia v1.3.1 // indirect | ||
8 | + github.com/blang/semver v3.5.1+incompatible // indirect | ||
9 | + github.com/bunsenapp/go-selenium v0.1.0 // indirect | ||
6 | github.com/caarlos0/env/v6 v6.9.1 // indirect | 10 | github.com/caarlos0/env/v6 v6.9.1 // indirect |
11 | + github.com/fedesog/webdriver v0.0.0-20180606182539-99f36c92eaef // indirect | ||
7 | github.com/joho/godotenv v1.4.0 // indirect | 12 | github.com/joho/godotenv v1.4.0 // indirect |
8 | github.com/labstack/echo/v4 v4.7.2 // indirect | 13 | github.com/labstack/echo/v4 v4.7.2 // indirect |
9 | github.com/labstack/gommon v0.3.1 // indirect | 14 | github.com/labstack/gommon v0.3.1 // indirect |
10 | github.com/mattn/go-colorable v0.1.11 // indirect | 15 | github.com/mattn/go-colorable v0.1.11 // indirect |
11 | github.com/mattn/go-isatty v0.0.14 // indirect | 16 | github.com/mattn/go-isatty v0.0.14 // indirect |
17 | + github.com/tebeka/selenium v0.9.9 // indirect | ||
12 | github.com/valyala/bytebufferpool v1.0.0 // indirect | 18 | github.com/valyala/bytebufferpool v1.0.0 // indirect |
13 | github.com/valyala/fasttemplate v1.2.1 // indirect | 19 | github.com/valyala/fasttemplate v1.2.1 // indirect |
14 | golang.org/x/crypto v0.0.0-20210817164053-32db794688a5 // indirect | 20 | golang.org/x/crypto v0.0.0-20210817164053-32db794688a5 // indirect |
15 | golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f // indirect | 21 | golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f // indirect |
16 | golang.org/x/sys v0.0.0-20211103235746-7861aae1554b // indirect | 22 | golang.org/x/sys v0.0.0-20211103235746-7861aae1554b // indirect |
17 | golang.org/x/text v0.3.7 // indirect | 23 | golang.org/x/text v0.3.7 // indirect |
24 | + sourcegraph.com/sourcegraph/go-selenium v0.0.0-20170113155244-3da7d00aac9c // indirect | ||
18 | ) | 25 | ) | ... | ... |
This diff is collapsed. Click to expand it.
... | @@ -13,5 +13,4 @@ type ApiResponseItem struct { | ... | @@ -13,5 +13,4 @@ type ApiResponseItem struct { |
13 | Link string `json:"link"` | 13 | Link string `json:"link"` |
14 | Description string `json:"description"` | 14 | Description string `json:"description"` |
15 | CafeName string `json:"cafename"` | 15 | CafeName string `json:"cafename"` |
16 | - CafeUrl string `json:"cafeurl"` | ||
17 | } | 16 | } | ... | ... |
... | @@ -3,7 +3,7 @@ package model | ... | @@ -3,7 +3,7 @@ package model |
3 | type Item struct { | 3 | type Item struct { |
4 | Platform string `json:"platform"` | 4 | Platform string `json:"platform"` |
5 | Name string `json:"name"` | 5 | Name string `json:"name"` |
6 | - Price uint `json:"price"` | 6 | + Price int `json:"price"` |
7 | ThumbnailUrl string `json:"thumbnailUrl"` | 7 | ThumbnailUrl string `json:"thumbnailUrl"` |
8 | ItemUrl string `json:"itemUrl"` | 8 | ItemUrl string `json:"itemUrl"` |
9 | ExtraInfo string `json:"extraInfo"` | 9 | ExtraInfo string `json:"extraInfo"` | ... | ... |
1 | package service | 1 | package service |
2 | 2 | ||
3 | import ( | 3 | import ( |
4 | + "bytes" | ||
4 | "encoding/json" | 5 | "encoding/json" |
6 | + "fmt" | ||
5 | "io" | 7 | "io" |
6 | "io/ioutil" | 8 | "io/ioutil" |
7 | "joongna/config" | 9 | "joongna/config" |
... | @@ -9,15 +11,45 @@ import ( | ... | @@ -9,15 +11,45 @@ import ( |
9 | "log" | 11 | "log" |
10 | "net/http" | 12 | "net/http" |
11 | "net/url" | 13 | "net/url" |
14 | + "strconv" | ||
15 | + "strings" | ||
16 | + "time" | ||
17 | + | ||
18 | + "github.com/PuerkitoBio/goquery" | ||
19 | + "github.com/fedesog/webdriver" | ||
12 | ) | 20 | ) |
13 | 21 | ||
14 | func GetItemByKeyword(keyword string) ([]model.Item, error) { | 22 | func GetItemByKeyword(keyword string) ([]model.Item, error) { |
15 | var items []model.Item | 23 | var items []model.Item |
24 | + | ||
25 | + itemsInfo := getItemsInfoByKeyword(keyword) | ||
26 | + for _, itemInfo := range itemsInfo { | ||
27 | + if itemInfo.CafeName != "중고나라" { | ||
28 | + continue | ||
29 | + } | ||
30 | + itemUrl := itemInfo.Link | ||
31 | + sold, price, thumbnailUrl, extraInfo := crawlingNaverCafe(itemUrl) | ||
32 | + | ||
33 | + if sold == "판매 완료" { | ||
34 | + continue | ||
35 | + } | ||
36 | + | ||
37 | + item := model.Item{ | ||
38 | + Platform: "중고나라", | ||
39 | + Name: itemInfo.Title, | ||
40 | + Price: price, | ||
41 | + ThumbnailUrl: thumbnailUrl, | ||
42 | + ItemUrl: itemUrl, | ||
43 | + ExtraInfo: extraInfo, | ||
44 | + } | ||
45 | + fmt.Println(item) | ||
46 | + items = append(items, item) | ||
47 | + } | ||
16 | return items, nil | 48 | return items, nil |
17 | } | 49 | } |
18 | 50 | ||
19 | func getItemsInfoByKeyword(keyword string) []model.ApiResponseItem { | 51 | func getItemsInfoByKeyword(keyword string) []model.ApiResponseItem { |
20 | - encText := url.QueryEscape("중고나라" + keyword) | 52 | + encText := url.QueryEscape("중고나라 " + keyword + " 판매중") |
21 | apiUrl := "https://openapi.naver.com/v1/search/cafearticle.json?query=" + encText + "&sort=sim" | 53 | apiUrl := "https://openapi.naver.com/v1/search/cafearticle.json?query=" + encText + "&sort=sim" |
22 | 54 | ||
23 | req, err := http.NewRequest("GET", apiUrl, nil) | 55 | req, err := http.NewRequest("GET", apiUrl, nil) |
... | @@ -47,3 +79,53 @@ func getItemsInfoByKeyword(keyword string) []model.ApiResponseItem { | ... | @@ -47,3 +79,53 @@ func getItemsInfoByKeyword(keyword string) []model.ApiResponseItem { |
47 | } | 79 | } |
48 | return apiResponse.Items | 80 | return apiResponse.Items |
49 | } | 81 | } |
82 | + | ||
83 | +func crawlingNaverCafe(cafeUrl string) (string, int, string, string) { | ||
84 | + driver := webdriver.NewChromeDriver("./chromedriver") | ||
85 | + err := driver.Start() | ||
86 | + if err != nil { | ||
87 | + log.Println(err) | ||
88 | + } | ||
89 | + desired := webdriver.Capabilities{"Platform": "MacOS"} | ||
90 | + required := webdriver.Capabilities{} | ||
91 | + session, err := driver.NewSession(desired, required) | ||
92 | + if err != nil { | ||
93 | + log.Println(err) | ||
94 | + } | ||
95 | + err = session.Url(cafeUrl) | ||
96 | + if err != nil { | ||
97 | + log.Println(err) | ||
98 | + } | ||
99 | + time.Sleep(time.Second * 1) | ||
100 | + session.FocusOnFrame("cafe_main") | ||
101 | + resp, err := session.Source() | ||
102 | + | ||
103 | + html, err := goquery.NewDocumentFromReader(bytes.NewReader([]byte(resp))) | ||
104 | + if err != nil { | ||
105 | + log.Fatal(err) | ||
106 | + } | ||
107 | + | ||
108 | + sold := html.Find("div.sold_area").Text() | ||
109 | + price := priceStringToInt(html.Find(".ProductPrice").Text()) | ||
110 | + thumbnailUrl, _ := html.Find("div.product_thumb img").Attr("src") | ||
111 | + extraInfo := html.Find(".se-module-text").Text() | ||
112 | + | ||
113 | + sold = strings.TrimSpace(sold) | ||
114 | + thumbnailUrl = strings.TrimSpace(thumbnailUrl) | ||
115 | + extraInfo = strings.TrimSpace(extraInfo) | ||
116 | + | ||
117 | + return sold, price, thumbnailUrl, extraInfo | ||
118 | +} | ||
119 | + | ||
120 | +func priceStringToInt(priceString string) int { | ||
121 | + strings.TrimSpace(priceString) | ||
122 | + | ||
123 | + priceString = strings.ReplaceAll(priceString, "원", "") | ||
124 | + priceString = strings.ReplaceAll(priceString, ",", "") | ||
125 | + | ||
126 | + price, err := strconv.Atoi(priceString) | ||
127 | + if err != nil { | ||
128 | + log.Fatal(err) | ||
129 | + } | ||
130 | + return price | ||
131 | +} | ... | ... |
-
Please register or login to post a comment