Showing
4 changed files
with
30 additions
and
16 deletions
app/public/images/food/구구당.jpg
deleted
100644 → 0
13.1 KB
app/public/images/food/랑데자뷰 강남점.jpg
deleted
100644 → 0
10.7 KB
8.27 KB
| ... | @@ -5,72 +5,86 @@ import time | ... | @@ -5,72 +5,86 @@ import time |
| 5 | from selenium.webdriver.common.keys import Keys | 5 | from selenium.webdriver.common.keys import Keys |
| 6 | import urllib.request | 6 | import urllib.request |
| 7 | import sys | 7 | import sys |
| 8 | +from bs4 import BeautifulSoup | ||
| 8 | 9 | ||
| 9 | options = webdriver.ChromeOptions() | 10 | options = webdriver.ChromeOptions() |
| 10 | options.add_argument('window-size=1000,800') | 11 | options.add_argument('window-size=1000,800') |
| 11 | -options.add_argument("headless") | 12 | +# options.add_argument("headless") |
| 12 | 13 | ||
| 13 | driver = webdriver.Chrome(ChromeDriverManager().install(), options=options) | 14 | driver = webdriver.Chrome(ChromeDriverManager().install(), options=options) |
| 14 | 15 | ||
| 15 | list1 = [] | 16 | list1 = [] |
| 16 | list2 = [] | 17 | list2 = [] |
| 17 | list3 = [] | 18 | list3 = [] |
| 19 | +cnt = 1 | ||
| 20 | + | ||
| 18 | 21 | ||
| 19 | # 사용자가 input 한 place 가져옴 | 22 | # 사용자가 input 한 place 가져옴 |
| 20 | keyword = sys.argv[1] | 23 | keyword = sys.argv[1] |
| 21 | 24 | ||
| 22 | kakao_map_search_url = f"https://map.kakao.com/?q={keyword}" | 25 | kakao_map_search_url = f"https://map.kakao.com/?q={keyword}" |
| 23 | driver.get(kakao_map_search_url) | 26 | driver.get(kakao_map_search_url) |
| 24 | -time.sleep(1) | 27 | +time.sleep(0.2) |
| 25 | 28 | ||
| 26 | button = driver.find_element_by_xpath( | 29 | button = driver.find_element_by_xpath( |
| 27 | "/html/body/div[5]/div[2]/div[1]/div[7]/div[5]/div[1]/ol/li[2]/a") | 30 | "/html/body/div[5]/div[2]/div[1]/div[7]/div[5]/div[1]/ol/li[2]/a") |
| 28 | driver.execute_script("arguments[0].click();", button) | 31 | driver.execute_script("arguments[0].click();", button) |
| 29 | -time.sleep(1) | 32 | +time.sleep(0.6) |
| 33 | + | ||
| 34 | +while True: | ||
| 35 | + resName = driver.find_element_by_css_selector( | ||
| 36 | + f"#info\.search\.place\.list > li:nth-child({cnt}) > div.head_item.clickArea > strong > a.link_name").text | ||
| 37 | + if(resName.startswith('광고')): | ||
| 38 | + cnt += 1 | ||
| 39 | + else: | ||
| 40 | + break | ||
| 30 | 41 | ||
| 31 | resName = driver.find_element_by_css_selector( | 42 | resName = driver.find_element_by_css_selector( |
| 32 | - "#info\.search\.place\.list > li:nth-child(4) > div.head_item.clickArea > strong > a.link_name").text | 43 | + f"#info\.search\.place\.list > li:nth-child({cnt}) > div.head_item.clickArea > strong > a.link_name").text |
| 33 | reviewNum = driver.find_element_by_css_selector( | 44 | reviewNum = driver.find_element_by_css_selector( |
| 34 | - "#info\.search\.place\.list > li:nth-child(4) > div.rating.clickArea > a > em").text | 45 | + f"#info\.search\.place\.list > li:nth-child({cnt}) > div.rating.clickArea > a > em").text |
| 35 | rate = driver.find_element_by_css_selector( | 46 | rate = driver.find_element_by_css_selector( |
| 36 | - "#info\.search\.place\.list > li:nth-child(4) > div.rating.clickArea > span.score > em").text | 47 | + f"#info\.search\.place\.list > li:nth-child({cnt}) > div.rating.clickArea > span.score > em").text |
| 37 | address = driver.find_element_by_css_selector( | 48 | address = driver.find_element_by_css_selector( |
| 38 | - "#info\.search\.place\.list > li:nth-child(4) > div.info_item > div.addr > p:nth-child(1)").text | 49 | + f"#info\.search\.place\.list > li:nth-child({cnt}) > div.info_item > div.addr > p:nth-child(1)").text |
| 39 | 50 | ||
| 40 | 51 | ||
| 41 | list1.append(resName) | 52 | list1.append(resName) |
| 42 | list1.append(reviewNum) | 53 | list1.append(reviewNum) |
| 43 | list1.append(rate) | 54 | list1.append(rate) |
| 44 | list1.append(address) | 55 | list1.append(address) |
| 56 | +cnt += 1 | ||
| 45 | 57 | ||
| 46 | 58 | ||
| 47 | resName = driver.find_element_by_css_selector( | 59 | resName = driver.find_element_by_css_selector( |
| 48 | - "#info\.search\.place\.list > li:nth-child(5) > div.head_item.clickArea > strong > a.link_name").text | 60 | + f"#info\.search\.place\.list > li:nth-child({cnt}) > div.head_item.clickArea > strong > a.link_name").text |
| 49 | reviewNum = driver.find_element_by_css_selector( | 61 | reviewNum = driver.find_element_by_css_selector( |
| 50 | - "#info\.search\.place\.list > li:nth-child(5) > div.rating.clickArea > a > em").text | 62 | + f"#info\.search\.place\.list > li:nth-child({cnt}) > div.rating.clickArea > a > em").text |
| 51 | rate = driver.find_element_by_css_selector( | 63 | rate = driver.find_element_by_css_selector( |
| 52 | - "#info\.search\.place\.list > li:nth-child(5) > div.rating.clickArea > span.score > em").text | 64 | + f"#info\.search\.place\.list > li:nth-child({cnt}) > div.rating.clickArea > span.score > em").text |
| 53 | address = driver.find_element_by_css_selector( | 65 | address = driver.find_element_by_css_selector( |
| 54 | - "#info\.search\.place\.list > li:nth-child(5) > div.info_item > div.addr > p:nth-child(1)").text | 66 | + f"#info\.search\.place\.list > li:nth-child({cnt}) > div.info_item > div.addr > p:nth-child(1)").text |
| 55 | 67 | ||
| 56 | list2.append(resName) | 68 | list2.append(resName) |
| 57 | list2.append(reviewNum) | 69 | list2.append(reviewNum) |
| 58 | list2.append(rate) | 70 | list2.append(rate) |
| 59 | list2.append(address) | 71 | list2.append(address) |
| 72 | +cnt += 1 | ||
| 60 | 73 | ||
| 61 | resName = driver.find_element_by_css_selector( | 74 | resName = driver.find_element_by_css_selector( |
| 62 | - "#info\.search\.place\.list > li:nth-child(6) > div.head_item.clickArea > strong > a.link_name").text | 75 | + f"#info\.search\.place\.list > li:nth-child({cnt}) > div.head_item.clickArea > strong > a.link_name").text |
| 63 | reviewNum = driver.find_element_by_css_selector( | 76 | reviewNum = driver.find_element_by_css_selector( |
| 64 | - "#info\.search\.place\.list > li:nth-child(6) > div.rating.clickArea > a > em").text | 77 | + f"#info\.search\.place\.list > li:nth-child({cnt}) > div.rating.clickArea > a > em").text |
| 65 | rate = driver.find_element_by_css_selector( | 78 | rate = driver.find_element_by_css_selector( |
| 66 | - "#info\.search\.place\.list > li:nth-child(6) > div.rating.clickArea > span.score > em").text | 79 | + f"#info\.search\.place\.list > li:nth-child({cnt}) > div.rating.clickArea > span.score > em").text |
| 67 | address = driver.find_element_by_css_selector( | 80 | address = driver.find_element_by_css_selector( |
| 68 | - "#info\.search\.place\.list > li:nth-child(6) > div.info_item > div.addr > p:nth-child(1)").text | 81 | + f"#info\.search\.place\.list > li:nth-child({cnt}) > div.info_item > div.addr > p:nth-child(1)").text |
| 69 | 82 | ||
| 70 | list3.append(resName) | 83 | list3.append(resName) |
| 71 | list3.append(reviewNum) | 84 | list3.append(reviewNum) |
| 72 | list3.append(rate) | 85 | list3.append(rate) |
| 73 | list3.append(address) | 86 | list3.append(address) |
| 87 | +cnt+=1 | ||
| 74 | 88 | ||
| 75 | frame = pd.DataFrame([list1, list2, list3]) | 89 | frame = pd.DataFrame([list1, list2, list3]) |
| 76 | frame.columns = ['가게 이름', '리뷰수', '평점', '주소'] | 90 | frame.columns = ['가게 이름', '리뷰수', '평점', '주소'] |
| ... | @@ -82,7 +96,7 @@ for i, shopName in enumerate(frame['가게 이름'].tolist()): | ... | @@ -82,7 +96,7 @@ for i, shopName in enumerate(frame['가게 이름'].tolist()): |
| 82 | elem.send_keys(shopName) | 96 | elem.send_keys(shopName) |
| 83 | elem.send_keys(Keys.RETURN) | 97 | elem.send_keys(Keys.RETURN) |
| 84 | driver.find_elements_by_css_selector(".rg_i.Q4LuWd")[1].click() | 98 | driver.find_elements_by_css_selector(".rg_i.Q4LuWd")[1].click() |
| 85 | - time.sleep(3) | 99 | + time.sleep(0.2) |
| 86 | imgUrl = driver.find_element_by_css_selector( | 100 | imgUrl = driver.find_element_by_css_selector( |
| 87 | ".n3VNCb").get_attribute("src") | 101 | ".n3VNCb").get_attribute("src") |
| 88 | urllib.request.urlretrieve( | 102 | urllib.request.urlretrieve( | ... | ... |
-
Please register or login to post a comment