김재웅

Commercial phrase removed

...@@ -5,72 +5,86 @@ import time ...@@ -5,72 +5,86 @@ import time
5 from selenium.webdriver.common.keys import Keys 5 from selenium.webdriver.common.keys import Keys
6 import urllib.request 6 import urllib.request
7 import sys 7 import sys
8 +from bs4 import BeautifulSoup
8 9
9 options = webdriver.ChromeOptions() 10 options = webdriver.ChromeOptions()
10 options.add_argument('window-size=1000,800') 11 options.add_argument('window-size=1000,800')
11 -options.add_argument("headless") 12 +# options.add_argument("headless")
12 13
13 driver = webdriver.Chrome(ChromeDriverManager().install(), options=options) 14 driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
14 15
15 list1 = [] 16 list1 = []
16 list2 = [] 17 list2 = []
17 list3 = [] 18 list3 = []
19 +cnt = 1
20 +
18 21
19 # 사용자가 input 한 place 가져옴 22 # 사용자가 input 한 place 가져옴
20 keyword = sys.argv[1] 23 keyword = sys.argv[1]
21 24
22 kakao_map_search_url = f"https://map.kakao.com/?q={keyword}" 25 kakao_map_search_url = f"https://map.kakao.com/?q={keyword}"
23 driver.get(kakao_map_search_url) 26 driver.get(kakao_map_search_url)
24 -time.sleep(1) 27 +time.sleep(0.2)
25 28
26 button = driver.find_element_by_xpath( 29 button = driver.find_element_by_xpath(
27 "/html/body/div[5]/div[2]/div[1]/div[7]/div[5]/div[1]/ol/li[2]/a") 30 "/html/body/div[5]/div[2]/div[1]/div[7]/div[5]/div[1]/ol/li[2]/a")
28 driver.execute_script("arguments[0].click();", button) 31 driver.execute_script("arguments[0].click();", button)
29 -time.sleep(1) 32 +time.sleep(0.6)
33 +
34 +while True:
35 + resName = driver.find_element_by_css_selector(
36 + f"#info\.search\.place\.list > li:nth-child({cnt}) > div.head_item.clickArea > strong > a.link_name").text
37 + if(resName.startswith('광고')):
38 + cnt += 1
39 + else:
40 + break
30 41
31 resName = driver.find_element_by_css_selector( 42 resName = driver.find_element_by_css_selector(
32 - "#info\.search\.place\.list > li:nth-child(4) > div.head_item.clickArea > strong > a.link_name").text 43 + f"#info\.search\.place\.list > li:nth-child({cnt}) > div.head_item.clickArea > strong > a.link_name").text
33 reviewNum = driver.find_element_by_css_selector( 44 reviewNum = driver.find_element_by_css_selector(
34 - "#info\.search\.place\.list > li:nth-child(4) > div.rating.clickArea > a > em").text 45 + f"#info\.search\.place\.list > li:nth-child({cnt}) > div.rating.clickArea > a > em").text
35 rate = driver.find_element_by_css_selector( 46 rate = driver.find_element_by_css_selector(
36 - "#info\.search\.place\.list > li:nth-child(4) > div.rating.clickArea > span.score > em").text 47 + f"#info\.search\.place\.list > li:nth-child({cnt}) > div.rating.clickArea > span.score > em").text
37 address = driver.find_element_by_css_selector( 48 address = driver.find_element_by_css_selector(
38 - "#info\.search\.place\.list > li:nth-child(4) > div.info_item > div.addr > p:nth-child(1)").text 49 + f"#info\.search\.place\.list > li:nth-child({cnt}) > div.info_item > div.addr > p:nth-child(1)").text
39 50
40 51
41 list1.append(resName) 52 list1.append(resName)
42 list1.append(reviewNum) 53 list1.append(reviewNum)
43 list1.append(rate) 54 list1.append(rate)
44 list1.append(address) 55 list1.append(address)
56 +cnt += 1
45 57
46 58
47 resName = driver.find_element_by_css_selector( 59 resName = driver.find_element_by_css_selector(
48 - "#info\.search\.place\.list > li:nth-child(5) > div.head_item.clickArea > strong > a.link_name").text 60 + f"#info\.search\.place\.list > li:nth-child({cnt}) > div.head_item.clickArea > strong > a.link_name").text
49 reviewNum = driver.find_element_by_css_selector( 61 reviewNum = driver.find_element_by_css_selector(
50 - "#info\.search\.place\.list > li:nth-child(5) > div.rating.clickArea > a > em").text 62 + f"#info\.search\.place\.list > li:nth-child({cnt}) > div.rating.clickArea > a > em").text
51 rate = driver.find_element_by_css_selector( 63 rate = driver.find_element_by_css_selector(
52 - "#info\.search\.place\.list > li:nth-child(5) > div.rating.clickArea > span.score > em").text 64 + f"#info\.search\.place\.list > li:nth-child({cnt}) > div.rating.clickArea > span.score > em").text
53 address = driver.find_element_by_css_selector( 65 address = driver.find_element_by_css_selector(
54 - "#info\.search\.place\.list > li:nth-child(5) > div.info_item > div.addr > p:nth-child(1)").text 66 + f"#info\.search\.place\.list > li:nth-child({cnt}) > div.info_item > div.addr > p:nth-child(1)").text
55 67
56 list2.append(resName) 68 list2.append(resName)
57 list2.append(reviewNum) 69 list2.append(reviewNum)
58 list2.append(rate) 70 list2.append(rate)
59 list2.append(address) 71 list2.append(address)
72 +cnt += 1
60 73
61 resName = driver.find_element_by_css_selector( 74 resName = driver.find_element_by_css_selector(
62 - "#info\.search\.place\.list > li:nth-child(6) > div.head_item.clickArea > strong > a.link_name").text 75 + f"#info\.search\.place\.list > li:nth-child({cnt}) > div.head_item.clickArea > strong > a.link_name").text
63 reviewNum = driver.find_element_by_css_selector( 76 reviewNum = driver.find_element_by_css_selector(
64 - "#info\.search\.place\.list > li:nth-child(6) > div.rating.clickArea > a > em").text 77 + f"#info\.search\.place\.list > li:nth-child({cnt}) > div.rating.clickArea > a > em").text
65 rate = driver.find_element_by_css_selector( 78 rate = driver.find_element_by_css_selector(
66 - "#info\.search\.place\.list > li:nth-child(6) > div.rating.clickArea > span.score > em").text 79 + f"#info\.search\.place\.list > li:nth-child({cnt}) > div.rating.clickArea > span.score > em").text
67 address = driver.find_element_by_css_selector( 80 address = driver.find_element_by_css_selector(
68 - "#info\.search\.place\.list > li:nth-child(6) > div.info_item > div.addr > p:nth-child(1)").text 81 + f"#info\.search\.place\.list > li:nth-child({cnt}) > div.info_item > div.addr > p:nth-child(1)").text
69 82
70 list3.append(resName) 83 list3.append(resName)
71 list3.append(reviewNum) 84 list3.append(reviewNum)
72 list3.append(rate) 85 list3.append(rate)
73 list3.append(address) 86 list3.append(address)
87 +cnt+=1
74 88
75 frame = pd.DataFrame([list1, list2, list3]) 89 frame = pd.DataFrame([list1, list2, list3])
76 frame.columns = ['가게 이름', '리뷰수', '평점', '주소'] 90 frame.columns = ['가게 이름', '리뷰수', '평점', '주소']
...@@ -82,7 +96,7 @@ for i, shopName in enumerate(frame['가게 이름'].tolist()): ...@@ -82,7 +96,7 @@ for i, shopName in enumerate(frame['가게 이름'].tolist()):
82 elem.send_keys(shopName) 96 elem.send_keys(shopName)
83 elem.send_keys(Keys.RETURN) 97 elem.send_keys(Keys.RETURN)
84 driver.find_elements_by_css_selector(".rg_i.Q4LuWd")[1].click() 98 driver.find_elements_by_css_selector(".rg_i.Q4LuWd")[1].click()
85 - time.sleep(3) 99 + time.sleep(0.2)
86 imgUrl = driver.find_element_by_css_selector( 100 imgUrl = driver.find_element_by_css_selector(
87 ".n3VNCb").get_attribute("src") 101 ".n3VNCb").get_attribute("src")
88 urllib.request.urlretrieve( 102 urllib.request.urlretrieve(
......