Showing
3 changed files
with
30 additions
and
62 deletions
... | @@ -3,10 +3,6 @@ from bs4 import BeautifulSoup | ... | @@ -3,10 +3,6 @@ from bs4 import BeautifulSoup |
3 | import requests | 3 | import requests |
4 | import json | 4 | import json |
5 | 5 | ||
6 | -# TODO: 크롤링한 데이터가 이미 서버에 있는지 확인하는 method 필요 | ||
7 | -def check_duplication_item(item): | ||
8 | - return | ||
9 | - | ||
10 | def save_json(parsed_items): | 6 | def save_json(parsed_items): |
11 | with open('./sample.json', 'w') as f: | 7 | with open('./sample.json', 'w') as f: |
12 | json.dump(parsed_items, f, indent=2, ensure_ascii=False) | 8 | json.dump(parsed_items, f, indent=2, ensure_ascii=False) |
... | @@ -14,18 +10,28 @@ def save_json(parsed_items): | ... | @@ -14,18 +10,28 @@ def save_json(parsed_items): |
14 | def convert_item_to_dict(item): | 10 | def convert_item_to_dict(item): |
15 | dict_item = {} | 11 | dict_item = {} |
16 | dict_item["platform"] = "daangn" | 12 | dict_item["platform"] = "daangn" |
17 | - dict_item["itemUrl"] = "https://www.daangn.com" + item["href"] | ||
18 | - dict_item["thumbnailUrl"] = item.find("img")["src"] | ||
19 | dict_item["name"] = item.find("span", class_="article-title").text.strip() | 13 | dict_item["name"] = item.find("span", class_="article-title").text.strip() |
14 | + unparsed_price = item.find("p", class_="article-price").text.strip(" \n ") | ||
15 | + unparsed_price = unparsed_price.replace("원", "")\ | ||
16 | + .replace("만", "0000")\ | ||
17 | + .replace(",", "") | ||
18 | + parsed_price = int(unparsed_price) | ||
19 | + print(parsed_price) | ||
20 | + dict_item["price"] = parsed_price | ||
21 | + # dict_item["price"] = item.find("p", class_="article-price").text.strip() | ||
22 | + dict_item["thumbnailUrl"] = item.find("img")["src"] | ||
23 | + dict_item["itemUrl"] = "https://www.daangn.com" + item["href"] | ||
20 | dict_item["extraInfo"] = item.find("span", class_="article-content").text.strip() | 24 | dict_item["extraInfo"] = item.find("span", class_="article-content").text.strip() |
21 | - # dict_item["item-region"] = item.find("p", class_="article-region-name").text.strip() | 25 | + # dict_item["item-region"] = item.find("p", class_="article-region-name").text.strip() # 거래 지역 |
22 | - dict_item["price"] = item.find("p", class_="article-price").text.strip() | 26 | + |
27 | + # 좋아요 개수 | ||
23 | # if(item.find("span", class_="article-watch") == None): | 28 | # if(item.find("span", class_="article-watch") == None): |
24 | # dict_item["item-watch-count"] = '0' | 29 | # dict_item["item-watch-count"] = '0' |
25 | # else: | 30 | # else: |
26 | # dict_item["item-watch-count"] = item.find("span", class_="article-watch").text.strip() | 31 | # dict_item["item-watch-count"] = item.find("span", class_="article-watch").text.strip() |
27 | return dict_item | 32 | return dict_item |
28 | 33 | ||
34 | + | ||
29 | def crawl(keyword): | 35 | def crawl(keyword): |
30 | if type(keyword) != type("test string"): | 36 | if type(keyword) != type("test string"): |
31 | return "Error: Invalid Keyword" | 37 | return "Error: Invalid Keyword" |
... | @@ -45,9 +51,9 @@ def crawl(keyword): | ... | @@ -45,9 +51,9 @@ def crawl(keyword): |
45 | json_item = convert_item_to_dict(item) | 51 | json_item = convert_item_to_dict(item) |
46 | parsed_items["items"].append(json_item) | 52 | parsed_items["items"].append(json_item) |
47 | 53 | ||
54 | + # sample 저장 | ||
48 | save_json(parsed_items) | 55 | save_json(parsed_items) |
49 | - json_items = json.dumps(parsed_items, ensure_ascii=False, indent=2) | 56 | + return parsed_items |
50 | - return json_items | ||
51 | 57 | ||
52 | if __name__ == "__main__": | 58 | if __name__ == "__main__": |
53 | - print(crawl("RTX 3080")) | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
59 | + print(crawl("아메리카노")) | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
daangn/main.py
0 → 100644
1 | +from typing import Optional | ||
2 | +from fastapi import FastAPI | ||
3 | +import daangn_crawl | ||
4 | + | ||
5 | +# uvicorn main:app --reload | ||
6 | +app = FastAPI() | ||
7 | + | ||
8 | +@app.get("/daangn/{item}") | ||
9 | +def read_item(item: str, q: Optional[str] = None): | ||
10 | + crawl_json = daangn_crawl.crawl(item) | ||
11 | + return crawl_json | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
1 | { | 1 | { |
2 | - "length": "6", | 2 | + "length": "0", |
3 | - "items": [ | 3 | + "items": [] |
4 | - { | ||
5 | - "platform": "daangn", | ||
6 | - "itemUrl": "https://www.daangn.com/articles/402161213", | ||
7 | - "thumbnailUrl": "https://dnvefa72aowie.cloudfront.net/origin/article/202205/4d97771a56f839845b0e662710df28586c5b2c36fc814e2445b037b15e2af50b.webp?q=82&s=300x300&t=crop", | ||
8 | - "name": "RTX3080 터보젯 10GB / RTX3080ti 교환", | ||
9 | - "extraInfo": "22.4.29 구매 했으며 영수증 보관중\n\n3080 ti 로 가고싶어서 교환 올려봅니다 !\n\n실사용시간 얼마 안되며 처음 받은 구성품 모두 있습니다 !\n\n모델명이랑 원하시는 추금 알려주시면 감사하겠습니다 !\n\nMSI emTek ASUS 쪽 선호 합니다 !", | ||
10 | - "price": "119만원" | ||
11 | - }, | ||
12 | - { | ||
13 | - "platform": "daangn", | ||
14 | - "itemUrl": "https://www.daangn.com/articles/401605077", | ||
15 | - "thumbnailUrl": "https://dnvefa72aowie.cloudfront.net/origin/article/202205/340e595ee8ccbf65fdd6b03bc54c093b21f4d8a7ffd1a67e3b472c5af4b6ca9c.webp?q=82&s=300x300&t=crop", | ||
16 | - "name": "rtx3080ti슈프림 150만 팝니다 3080ti모델중 고클럭상위제품입니다", | ||
17 | - "extraInfo": "컴터 업그레이드하려고 삿으나 사정상 판매합니다", | ||
18 | - "price": "150만원" | ||
19 | - }, | ||
20 | - { | ||
21 | - "platform": "daangn", | ||
22 | - "itemUrl": "https://www.daangn.com/articles/378414761", | ||
23 | - "thumbnailUrl": "https://dnvefa72aowie.cloudfront.net/origin/article/202203/dfd07d54a1b7cca948e58118e6b64f8a17978d534e16c8b8e48df0dbb29a5900.webp?q=82&s=300x300&t=crop", | ||
24 | - "name": "레노버7 16ACHg R9 3080 PRO/R9-5900HX/RTX3080 게이밍노트북 판매합니다", | ||
25 | - "extraInfo": "네고 문의 안받습니다\n\n램 32기가 ssd 1테라 이며\n\n보증 기간 짱짱합니다\n\n2026.1.25일까지에요 \n\n배터리 보증 35개월\n\n출장보증 4년 \n\n우발적손상 4년으로 늘려졋습니다\n\n박스랑 갖고 잇으며 직거래 일곡동입니다\n\n정품 key랑 같이 드려요\n\n택배거래 가능합니다", | ||
26 | - "price": "330만원" | ||
27 | - }, | ||
28 | - { | ||
29 | - "platform": "daangn", | ||
30 | - "itemUrl": "https://www.daangn.com/articles/398175307", | ||
31 | - "thumbnailUrl": "https://dnvefa72aowie.cloudfront.net/origin/article/202205/B42DADA9DCAC867B1D86871592153570FAE307E8D2D28FCDF72709952344FAD6.jpg?q=82&s=300x300&t=crop", | ||
32 | - "name": "RTX 3080 기가바이트 3080 어로스 익스트림 10G 그래픽카드", | ||
33 | - "extraInfo": "마지막 가격다운 오늘까지 안팔리면 그냥 쓸려구여~\nA/s 4년\n신품수준\n가끔 베그만했고 거의 관상용으로만 썼습니다\n풀박스 구성품 모두있습니다\n기스. 하자. 먼지. 없습니다\n결벽증 환자\n이제 낚시하러 갑니다~~~\n그래픽카드. 글카. 기가바이트. 컴퓨터. 게이밍pc. 어로스. 익스트림. 배그. 배틀그라운드. 3080", | ||
34 | - "price": "115만원" | ||
35 | - }, | ||
36 | - { | ||
37 | - "platform": "daangn", | ||
38 | - "itemUrl": "https://www.daangn.com/articles/259000809", | ||
39 | - "thumbnailUrl": "https://dnvefa72aowie.cloudfront.net/hoian/category/thumbnails/v2/img_thumb_digital.png", | ||
40 | - "name": "ASUS 3080Ti STRIX\n.RTX 3080 Ti vulcan 삽니다", | ||
41 | - "extraInfo": "둘중하나 삽니다", | ||
42 | - "price": "130만원" | ||
43 | - }, | ||
44 | - { | ||
45 | - "platform": "daangn", | ||
46 | - "itemUrl": "https://www.daangn.com/articles/402321234", | ||
47 | - "thumbnailUrl": "https://dnvefa72aowie.cloudfront.net/origin/article/202205/bbd9a7ac59464c4def96142a8c8d58b3a3d9d7cddc9ffc687c157b66b3a76953.webp?q=82&s=300x300&t=crop", | ||
48 | - "name": "컬러풀 RTX 3080TI advanced OC (미개봉)", | ||
49 | - "extraInfo": "colorful igame 지포스 rtx 3080 ti advanced oc d6x 12gb 입니다\n미개봉 상품입니다.\n\n방배역에서 직거래합니다", | ||
50 | - "price": "122만원" | ||
51 | - } | ||
52 | - ] | ||
53 | } | 4 | } |
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
-
Please register or login to post a comment