Showing
1 changed file
with
123 additions
and
0 deletions
| 1 | +import os | ||
| 2 | +from time import sleep | ||
| 3 | + | ||
| 4 | +from selenium import webdriver | ||
| 5 | +from selenium.webdriver.common.keys import Keys | ||
| 6 | +from selenium.common.exceptions import ElementNotInteractableException | ||
| 7 | +from bs4 import BeautifulSoup | ||
| 8 | +from selenium.webdriver.common.by import By | ||
| 9 | + | ||
| 10 | +# termianl UTF-8 encoding : chcp 65001 | ||
| 11 | + | ||
| 12 | +########################################################################## | ||
| 13 | +##################### variable related selenium ########################## | ||
| 14 | +########################################################################## | ||
| 15 | +options = webdriver.ChromeOptions() | ||
| 16 | +options.add_argument('headless') | ||
| 17 | +options.add_argument('lang=ko_KR') | ||
| 18 | +chromedriver_path = "chromedriver" | ||
| 19 | +driver = webdriver.Chrome(os.path.join(os.getcwd(), chromedriver_path), options=options) # chromedriver 열기 | ||
| 20 | + | ||
| 21 | + | ||
| 22 | +def main(): | ||
| 23 | + global driver, menu_wb | ||
| 24 | + | ||
| 25 | + driver.implicitly_wait(4) # 렌더링 될때까지 기다린다 4초 | ||
| 26 | + driver.get('https://map.kakao.com/') # 주소 가져오기 | ||
| 27 | + | ||
| 28 | + search("영통역맛집") | ||
| 29 | + | ||
| 30 | + driver.quit() | ||
| 31 | + print("finish") | ||
| 32 | + | ||
| 33 | + | ||
| 34 | +def search(place): | ||
| 35 | + global driver | ||
| 36 | + | ||
| 37 | + search_area = driver.find_element(By.XPATH,'//*[@id="search.keyword.query"]') # 검색 창 | ||
| 38 | + search_area.send_keys(place) # 검색어 입력 | ||
| 39 | + driver.find_element(By.XPATH,'//*[@id="search.keyword.submit"]').send_keys(Keys.ENTER) # Enter로 검색 | ||
| 40 | + sleep(1) | ||
| 41 | + | ||
| 42 | + # 검색된 정보가 있는 경우에만 탐색 | ||
| 43 | + # 1번 페이지 place list 읽기 | ||
| 44 | + html = driver.page_source | ||
| 45 | + | ||
| 46 | + soup = BeautifulSoup(html, 'html.parser') | ||
| 47 | + place_lists = soup.select('.placelist > .PlaceItem') # 검색된 장소 목록 | ||
| 48 | + | ||
| 49 | + # 검색된 첫 페이지 장소 목록 크롤링하기 | ||
| 50 | + crawling(place_lists) | ||
| 51 | + search_area.clear() | ||
| 52 | + | ||
| 53 | + # 우선 더보기 클릭해서 2페이지 | ||
| 54 | + try: | ||
| 55 | + driver.find_element(By.XPATH,'//*[@id="info.search.place.more"]').send_keys(Keys.ENTER) | ||
| 56 | + sleep(1) | ||
| 57 | + | ||
| 58 | + # 2~ 5페이지 읽기 | ||
| 59 | + for i in range(2, 6): | ||
| 60 | + # 페이지 넘기기 | ||
| 61 | + xPath = '//*[@id="info.search.page.no' + str(i) + '"]' | ||
| 62 | + driver.find_element(By.XPATH,xPath).send_keys(Keys.ENTER) | ||
| 63 | + sleep(1) | ||
| 64 | + | ||
| 65 | + html = driver.page_source | ||
| 66 | + soup = BeautifulSoup(html, 'html.parser') | ||
| 67 | + place_lists = soup.select('.placelist > .PlaceItem') # 장소 목록 list | ||
| 68 | + | ||
| 69 | + crawling(place_lists) | ||
| 70 | + | ||
| 71 | + except ElementNotInteractableException: | ||
| 72 | + print('not found') | ||
| 73 | + finally: | ||
| 74 | + search_area.clear() | ||
| 75 | + | ||
| 76 | +def crawling(placeLists): | ||
| 77 | + for i, place in enumerate(placeLists): | ||
| 78 | + menuInfos = getMenuInfo(i, driver) | ||
| 79 | + print(menuInfos) | ||
| 80 | + | ||
| 81 | +def getMenuInfo(i, driver): | ||
| 82 | + # 상세페이지로 가서 메뉴찾기 | ||
| 83 | + detail_page_xpath = '//*[@id="info.search.place.list"]/li[' + str(i + 1) + ']/div[5]/div[4]/a[1]' | ||
| 84 | + driver.find_element(By.XPATH,detail_page_xpath).send_keys(Keys.ENTER) | ||
| 85 | + driver.switch_to.window(driver.window_handles[-1]) # 상세정보 탭으로 변환 | ||
| 86 | + sleep(1) | ||
| 87 | + | ||
| 88 | + menuInfos = [] | ||
| 89 | + html = driver.page_source | ||
| 90 | + soup = BeautifulSoup(html, 'html.parser') | ||
| 91 | + | ||
| 92 | + # 메뉴의 3가지 타입 | ||
| 93 | + menuonlyType = soup.select('.cont_menu > .list_menu > .menuonly_type') | ||
| 94 | + nophotoType = soup.select('.cont_menu > .list_menu > .nophoto_type') | ||
| 95 | + photoType = soup.select('.cont_menu > .list_menu > .photo_type') | ||
| 96 | + | ||
| 97 | + if len(menuonlyType) != 0: | ||
| 98 | + for menu in menuonlyType: | ||
| 99 | + menuInfos.append(_getMenuInfo(menu)) | ||
| 100 | + elif len(nophotoType) != 0: | ||
| 101 | + for menu in nophotoType: | ||
| 102 | + menuInfos.append(_getMenuInfo(menu)) | ||
| 103 | + else: | ||
| 104 | + for menu in photoType: | ||
| 105 | + menuInfos.append(_getMenuInfo(menu)) | ||
| 106 | + | ||
| 107 | + driver.close() | ||
| 108 | + driver.switch_to.window(driver.window_handles[0]) # 검색 탭으로 전환 | ||
| 109 | + | ||
| 110 | + return menuInfos | ||
| 111 | + | ||
| 112 | +def _getMenuInfo(menu): | ||
| 113 | + menuName = menu.select('.info_menu > .loss_word')[0].text | ||
| 114 | + menuPrices = menu.select('.info_menu > .price_menu') | ||
| 115 | + menuPrice = '' | ||
| 116 | + | ||
| 117 | + if len(menuPrices) != 0: | ||
| 118 | + menuPrice = menuPrices[0].text.split(' ')[1] | ||
| 119 | + | ||
| 120 | + return [menuName, menuPrice] | ||
| 121 | + | ||
| 122 | +if __name__ == "__main__": | ||
| 123 | + main() | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
-
Please register or login to post a comment