ADD: goroutine in GetItemByKeyword for concurrent execution of crawling
waiting by waitgroup
Showing
1 changed file
with
37 additions
and
23 deletions
... | @@ -3,6 +3,7 @@ package service | ... | @@ -3,6 +3,7 @@ package service |
3 | import ( | 3 | import ( |
4 | "bytes" | 4 | "bytes" |
5 | "encoding/json" | 5 | "encoding/json" |
6 | + "fmt" | ||
6 | "io" | 7 | "io" |
7 | "io/ioutil" | 8 | "io/ioutil" |
8 | "joongna/config" | 9 | "joongna/config" |
... | @@ -12,6 +13,7 @@ import ( | ... | @@ -12,6 +13,7 @@ import ( |
12 | "net/url" | 13 | "net/url" |
13 | "strconv" | 14 | "strconv" |
14 | "strings" | 15 | "strings" |
16 | + "sync" | ||
15 | "time" | 17 | "time" |
16 | 18 | ||
17 | "github.com/PuerkitoBio/goquery" | 19 | "github.com/PuerkitoBio/goquery" |
... | @@ -20,29 +22,27 @@ import ( | ... | @@ -20,29 +22,27 @@ import ( |
20 | 22 | ||
21 | func GetItemByKeyword(keyword string) ([]model.Item, error) { | 23 | func GetItemByKeyword(keyword string) ([]model.Item, error) { |
22 | var items []model.Item | 24 | var items []model.Item |
25 | + wg := sync.WaitGroup{} | ||
23 | 26 | ||
24 | itemsInfo := getItemsInfoByKeyword(keyword) | 27 | itemsInfo := getItemsInfoByKeyword(keyword) |
28 | + | ||
25 | for _, itemInfo := range itemsInfo { | 29 | for _, itemInfo := range itemsInfo { |
26 | - if itemInfo.CafeName != "중고나라" { | ||
27 | - continue | ||
28 | - } | ||
29 | itemUrl := itemInfo.Link | 30 | itemUrl := itemInfo.Link |
30 | - sold, price, thumbnailUrl, extraInfo := crawlingNaverCafe(itemUrl) | 31 | + if itemInfo.CafeName != "중고나라" { |
31 | - | ||
32 | - if sold == "판매 완료" { | ||
33 | continue | 32 | continue |
34 | } | 33 | } |
35 | - | 34 | + wg.Add(1) |
36 | - item := model.Item{ | 35 | + go func(itemUrl string) { |
37 | - Platform: "중고나라", | 36 | + defer wg.Done() |
38 | - Name: itemInfo.Title, | 37 | + err, item := crawlingNaverCafe(itemUrl) |
39 | - Price: price, | 38 | + if err != nil { |
40 | - ThumbnailUrl: thumbnailUrl, | 39 | + log.Fatal(err) |
41 | - ItemUrl: itemUrl, | ||
42 | - ExtraInfo: extraInfo, | ||
43 | } | 40 | } |
44 | - items = append(items, item) | 41 | + items = append(items, *item) |
42 | + }(itemUrl) | ||
45 | } | 43 | } |
44 | + wg.Wait() | ||
45 | + | ||
46 | return items, nil | 46 | return items, nil |
47 | } | 47 | } |
48 | 48 | ||
... | @@ -78,32 +78,46 @@ func getItemsInfoByKeyword(keyword string) []model.ApiResponseItem { | ... | @@ -78,32 +78,46 @@ func getItemsInfoByKeyword(keyword string) []model.ApiResponseItem { |
78 | return apiResponse.Items | 78 | return apiResponse.Items |
79 | } | 79 | } |
80 | 80 | ||
81 | -func crawlingNaverCafe(cafeUrl string) (string, int, string, string) { | 81 | +func crawlingNaverCafe(cafeUrl string) (error, *model.Item) { |
82 | - page := rod.New().MustConnect().MustPage(cafeUrl) | 82 | + frame := rod.New().MustConnect().MustPage(cafeUrl).MustElement("iframe#cafe_main") |
83 | - | 83 | + time.Sleep(time.Second * 2) |
84 | - time.Sleep(time.Second * 1) | 84 | + source := frame.MustFrame().MustHTML() |
85 | - | ||
86 | - source := page.MustElement("iframe#cafe_main").MustFrame().MustHTML() | ||
87 | html, err := goquery.NewDocumentFromReader(bytes.NewReader([]byte(source))) | 85 | html, err := goquery.NewDocumentFromReader(bytes.NewReader([]byte(source))) |
88 | if err != nil { | 86 | if err != nil { |
89 | - log.Fatal(err) | 87 | + return err, nil |
90 | } | 88 | } |
91 | 89 | ||
90 | + title := html.Find("h3.title_text").Text() | ||
92 | sold := html.Find("div.sold_area").Text() | 91 | sold := html.Find("div.sold_area").Text() |
93 | price := priceStringToInt(html.Find(".ProductPrice").Text()) | 92 | price := priceStringToInt(html.Find(".ProductPrice").Text()) |
94 | thumbnailUrl, _ := html.Find("div.product_thumb img").Attr("src") | 93 | thumbnailUrl, _ := html.Find("div.product_thumb img").Attr("src") |
95 | extraInfo := html.Find(".se-module-text").Text() | 94 | extraInfo := html.Find(".se-module-text").Text() |
96 | 95 | ||
96 | + title = strings.TrimSpace(title) | ||
97 | sold = strings.TrimSpace(sold) | 97 | sold = strings.TrimSpace(sold) |
98 | thumbnailUrl = strings.TrimSpace(thumbnailUrl) | 98 | thumbnailUrl = strings.TrimSpace(thumbnailUrl) |
99 | extraInfo = strings.TrimSpace(extraInfo) | 99 | extraInfo = strings.TrimSpace(extraInfo) |
100 | 100 | ||
101 | - return sold, price, thumbnailUrl, extraInfo | 101 | + item := model.Item{ |
102 | + Platform: "중고나라", | ||
103 | + Name: title, | ||
104 | + Price: price, | ||
105 | + ThumbnailUrl: thumbnailUrl, | ||
106 | + ItemUrl: cafeUrl, | ||
107 | + ExtraInfo: extraInfo, | ||
108 | + } | ||
109 | + fmt.Println("crawling " + cafeUrl + " title: " + title) | ||
110 | + | ||
111 | + return nil, &item | ||
102 | } | 112 | } |
103 | 113 | ||
104 | func priceStringToInt(priceString string) int { | 114 | func priceStringToInt(priceString string) int { |
105 | strings.TrimSpace(priceString) | 115 | strings.TrimSpace(priceString) |
106 | 116 | ||
117 | + if priceString == "" { | ||
118 | + return 0 | ||
119 | + } | ||
120 | + | ||
107 | priceString = strings.ReplaceAll(priceString, "원", "") | 121 | priceString = strings.ReplaceAll(priceString, "원", "") |
108 | priceString = strings.ReplaceAll(priceString, ",", "") | 122 | priceString = strings.ReplaceAll(priceString, ",", "") |
109 | 123 | ... | ... |
-
Please register or login to post a comment