Toggle navigation
Toggle navigation
This project
Loading...
Sign in
윤준석
/
mamuri-bot
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
유명현
2022-05-11 01:24:26 +0900
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
9743dfc8b8d9a3226848c6a059b0a42fb3c86563
9743dfc8
1 parent
87dd0bc7
Implement FastAPI
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
30 additions
and
62 deletions
daangn/daangn_crawl.py
daangn/main.py
daangn/sample.json
daangn/daangn_crawl.py
View file @
9743dfc
...
...
@@ -3,10 +3,6 @@ from bs4 import BeautifulSoup
import
requests
import
json
# TODO: 크롤링한 데이터가 이미 서버에 있는지 확인하는 method 필요
def
check_duplication_item
(
item
):
return
def
save_json
(
parsed_items
):
with
open
(
'./sample.json'
,
'w'
)
as
f
:
json
.
dump
(
parsed_items
,
f
,
indent
=
2
,
ensure_ascii
=
False
)
...
...
@@ -14,18 +10,28 @@ def save_json(parsed_items):
def
convert_item_to_dict
(
item
):
dict_item
=
{}
dict_item
[
"platform"
]
=
"daangn"
dict_item
[
"itemUrl"
]
=
"https://www.daangn.com"
+
item
[
"href"
]
dict_item
[
"thumbnailUrl"
]
=
item
.
find
(
"img"
)[
"src"
]
dict_item
[
"name"
]
=
item
.
find
(
"span"
,
class_
=
"article-title"
)
.
text
.
strip
()
unparsed_price
=
item
.
find
(
"p"
,
class_
=
"article-price"
)
.
text
.
strip
(
"
\n
"
)
unparsed_price
=
unparsed_price
.
replace
(
"원"
,
""
)
\
.
replace
(
"만"
,
"0000"
)
\
.
replace
(
","
,
""
)
parsed_price
=
int
(
unparsed_price
)
print
(
parsed_price
)
dict_item
[
"price"
]
=
parsed_price
# dict_item["price"] = item.find("p", class_="article-price").text.strip()
dict_item
[
"thumbnailUrl"
]
=
item
.
find
(
"img"
)[
"src"
]
dict_item
[
"itemUrl"
]
=
"https://www.daangn.com"
+
item
[
"href"
]
dict_item
[
"extraInfo"
]
=
item
.
find
(
"span"
,
class_
=
"article-content"
)
.
text
.
strip
()
# dict_item["item-region"] = item.find("p", class_="article-region-name").text.strip()
dict_item
[
"price"
]
=
item
.
find
(
"p"
,
class_
=
"article-price"
)
.
text
.
strip
()
# dict_item["item-region"] = item.find("p", class_="article-region-name").text.strip() # 거래 지역
# 좋아요 개수
# if(item.find("span", class_="article-watch") == None):
# dict_item["item-watch-count"] = '0'
# else:
# dict_item["item-watch-count"] = item.find("span", class_="article-watch").text.strip()
return
dict_item
def
crawl
(
keyword
):
if
type
(
keyword
)
!=
type
(
"test string"
):
return
"Error: Invalid Keyword"
...
...
@@ -45,9 +51,9 @@ def crawl(keyword):
json_item
=
convert_item_to_dict
(
item
)
parsed_items
[
"items"
]
.
append
(
json_item
)
# sample 저장
save_json
(
parsed_items
)
json_items
=
json
.
dumps
(
parsed_items
,
ensure_ascii
=
False
,
indent
=
2
)
return
json_items
return
parsed_items
if
__name__
==
"__main__"
:
print
(
crawl
(
"RTX 3080"
))
\ No newline at end of file
print
(
crawl
(
"아메리카노"
))
\ No newline at end of file
...
...
daangn/main.py
0 → 100644
View file @
9743dfc
from
typing
import
Optional
from
fastapi
import
FastAPI
import
daangn_crawl
# uvicorn main:app --reload
app
=
FastAPI
()
@app.get
(
"/daangn/{item}"
)
def
read_item
(
item
:
str
,
q
:
Optional
[
str
]
=
None
):
crawl_json
=
daangn_crawl
.
crawl
(
item
)
return
crawl_json
\ No newline at end of file
daangn/sample.json
View file @
9743dfc
{
"length"
:
"6"
,
"items"
:
[
{
"platform"
:
"daangn"
,
"itemUrl"
:
"https://www.daangn.com/articles/402161213"
,
"thumbnailUrl"
:
"https://dnvefa72aowie.cloudfront.net/origin/article/202205/4d97771a56f839845b0e662710df28586c5b2c36fc814e2445b037b15e2af50b.webp?q=82&s=300x300&t=crop"
,
"name"
:
"RTX3080 터보젯 10GB / RTX3080ti 교환"
,
"extraInfo"
:
"22.4.29 구매 했으며 영수증 보관중
\n\n
3080 ti 로 가고싶어서 교환 올려봅니다 !
\n\n
실사용시간 얼마 안되며 처음 받은 구성품 모두 있습니다 !
\n\n
모델명이랑 원하시는 추금 알려주시면 감사하겠습니다 !
\n\n
MSI emTek ASUS 쪽 선호 합니다 !"
,
"price"
:
"119만원"
},
{
"platform"
:
"daangn"
,
"itemUrl"
:
"https://www.daangn.com/articles/401605077"
,
"thumbnailUrl"
:
"https://dnvefa72aowie.cloudfront.net/origin/article/202205/340e595ee8ccbf65fdd6b03bc54c093b21f4d8a7ffd1a67e3b472c5af4b6ca9c.webp?q=82&s=300x300&t=crop"
,
"name"
:
"rtx3080ti슈프림 150만 팝니다 3080ti모델중 고클럭상위제품입니다"
,
"extraInfo"
:
"컴터 업그레이드하려고 삿으나 사정상 판매합니다"
,
"price"
:
"150만원"
},
{
"platform"
:
"daangn"
,
"itemUrl"
:
"https://www.daangn.com/articles/378414761"
,
"thumbnailUrl"
:
"https://dnvefa72aowie.cloudfront.net/origin/article/202203/dfd07d54a1b7cca948e58118e6b64f8a17978d534e16c8b8e48df0dbb29a5900.webp?q=82&s=300x300&t=crop"
,
"name"
:
"레노버7 16ACHg R9 3080 PRO/R9-5900HX/RTX3080 게이밍노트북 판매합니다"
,
"extraInfo"
:
"네고 문의 안받습니다
\n\n
램 32기가 ssd 1테라 이며
\n\n
보증 기간 짱짱합니다
\n\n
2026.1.25일까지에요
\n\n
배터리 보증 35개월
\n\n
출장보증 4년
\n\n
우발적손상 4년으로 늘려졋습니다
\n\n
박스랑 갖고 잇으며 직거래 일곡동입니다
\n\n
정품 key랑 같이 드려요
\n\n
택배거래 가능합니다"
,
"price"
:
"330만원"
},
{
"platform"
:
"daangn"
,
"itemUrl"
:
"https://www.daangn.com/articles/398175307"
,
"thumbnailUrl"
:
"https://dnvefa72aowie.cloudfront.net/origin/article/202205/B42DADA9DCAC867B1D86871592153570FAE307E8D2D28FCDF72709952344FAD6.jpg?q=82&s=300x300&t=crop"
,
"name"
:
"RTX 3080 기가바이트 3080 어로스 익스트림 10G 그래픽카드"
,
"extraInfo"
:
"마지막 가격다운 오늘까지 안팔리면 그냥 쓸려구여~
\n
A/s 4년
\n
신품수준
\n
가끔 베그만했고 거의 관상용으로만 썼습니다
\n
풀박스 구성품 모두있습니다
\n
기스. 하자. 먼지. 없습니다
\n
결벽증 환자
\n
이제 낚시하러 갑니다~~~
\n
그래픽카드. 글카. 기가바이트. 컴퓨터. 게이밍pc. 어로스. 익스트림. 배그. 배틀그라운드. 3080"
,
"price"
:
"115만원"
},
{
"platform"
:
"daangn"
,
"itemUrl"
:
"https://www.daangn.com/articles/259000809"
,
"thumbnailUrl"
:
"https://dnvefa72aowie.cloudfront.net/hoian/category/thumbnails/v2/img_thumb_digital.png"
,
"name"
:
"ASUS 3080Ti STRIX
\n
.RTX 3080 Ti vulcan 삽니다"
,
"extraInfo"
:
"둘중하나 삽니다"
,
"price"
:
"130만원"
},
{
"platform"
:
"daangn"
,
"itemUrl"
:
"https://www.daangn.com/articles/402321234"
,
"thumbnailUrl"
:
"https://dnvefa72aowie.cloudfront.net/origin/article/202205/bbd9a7ac59464c4def96142a8c8d58b3a3d9d7cddc9ffc687c157b66b3a76953.webp?q=82&s=300x300&t=crop"
,
"name"
:
"컬러풀 RTX 3080TI advanced OC (미개봉)"
,
"extraInfo"
:
"colorful igame 지포스 rtx 3080 ti advanced oc d6x 12gb 입니다
\n
미개봉 상품입니다.
\n\n
방배역에서 직거래합니다"
,
"price"
:
"122만원"
}
]
"length"
:
"0"
,
"items"
:
[]
}
\ No newline at end of file
...
...
Please
register
or
login
to post a comment