장준영

함수 세부구현(시간대별)

...@@ -28,5 +28,14 @@ ...@@ -28,5 +28,14 @@
28 28
29 * 4차 수정사항 29 * 4차 수정사항
30 30
31 - 기존파일의 분 관리 시, import관련 오류 문제 해결 완료(하나의 파일로 관리) 31 + 기존파일의 분 관리 시, import관련 오류 문제 해결 완료(하나의 파일로 관리)
32 사용자 UI의 틀을 구축해놓았고, 곧바로 함수별 추가 세부 구현 예정 32 사용자 UI의 틀을 구축해놓았고, 곧바로 함수별 추가 세부 구현 예정
33 +
34 +* 5차 수정사항
35 +
36 + 1) 네이버 댓글공간엑서 받아온 날짜 정보를 YYYY-MM-DD형식으로 바꿈. ('방금 전, 몇 분 전, 몇 시간 전, 몇 일 전'의 경우를 처리하기 위해 dateTime과 timeDelta 모듈을 활용하여
37 + 현재 날짜를 기준으로 계산하여 YYYY-MM-DD로 저장될 수 있도록
38 + 코드 추가)
39 + 2) 시간대별로 (시작시간, 끝시간)을 입력하여 그 시간에 해당하는 기사를 출력해주는 함수 구현
40 +
41 + 가장 자주 많이 나온 단어 검색과 MATPLOTLIB을 활용한 시각적 표현 구현 예정
...\ No newline at end of file ...\ No newline at end of file
......
1 -{ 1 +from selenium import webdriver
2 - "cells": [ 2 +from selenium.common import exceptions
3 - { 3 +from bs4 import BeautifulSoup
4 - "cell_type": "code", 4 +from datetime import datetime, timedelta
5 - "execution_count": 3, 5 +import time
6 - "metadata": {}, 6 +
7 - "outputs": [ 7 +
8 - { 8 +def getData(url):
9 - "name": "stdout", 9 + ## chrome option걸기 (headless하게 웹 크롤링 수행하기 위해<웹페이지 안보이게 하기>)
10 - "output_type": "stream", 10 + options = webdriver.ChromeOptions()
11 - "text": [ 11 + #options.add_argument('headless')
12 - "******************************\n", 12 + #options.add_argument("disable-gpu")
13 - "\n", 13 + #_url = "https://entertain.naver.com/ranking/comment/list?oid=144&aid=0000642175" # 크롤링할 URL
14 - "\n", 14 + _url = url # 크롤링할 URL
15 - "***< Naver News Crawling >****\n", 15 + webDriver = "C:\\Users\\user\\Desktop\\chromedriver_win32\\chromedriver.exe" # 내 웹드라이버 위치
16 - "\n", 16 + driver = webdriver.Chrome(webDriver,chrome_options=options)
17 - "\n", 17 + #driver = webdriver.Chrome(webDriver)
18 - "******************************\n", 18 + driver.get(_url)
19 - "검색하고자 하는 url을 입력해주세요: \n", 19 + pageCnt = 0
20 - "comment_list를 가져오는 중.....\n" 20 + driver.implicitly_wait(3) # 페이지가 다 로드 될때까지 기다리게함
21 - ] 21 + try:
22 - }, 22 + while True: # 댓글 페이지 끝날때까지 돌림
23 - { 23 + #driver의 find_element_by_css_selector함수로 '네이버 뉴스'의 댓글 '더보기' 버튼을 찾아서 계속 클릭해준다(끝까지)
24 - "name": "stderr", 24 + driver.find_element_by_css_selector(".u_cbox_btn_more").click()
25 - "output_type": "stream", 25 + pageCnt = pageCnt+1
26 - "text": [ 26 +
27 - "C:\\Users\\user\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:14: DeprecationWarning: use options instead of chrome_options\n", 27 + except exceptions.ElementNotVisibleException as e: # 페이지가 끝남
28 - " \n" 28 + pass
29 - ] 29 +
30 - }, 30 + except Exception as e: # 다른 예외 발생시 확인
31 - { 31 + print(e)
32 - "ename": "InvalidArgumentException", 32 +
33 - "evalue": "Message: invalid argument\n (Session info: chrome=78.0.3904.108)\n", 33 + pageSource = driver.page_source # 페이지 소스를 따와서
34 - "output_type": "error", 34 + result = BeautifulSoup(pageSource, "lxml") # 빠르게 뽑아오기 위해 lxml 사용
35 - "traceback": [ 35 +
36 - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 36 + # nickname, text, time을 raw하게 뽑아온다
37 - "\u001b[1;31mInvalidArgumentException\u001b[0m Traceback (most recent call last)", 37 + comments_raw = result.find_all("span", {"class" : "u_cbox_contents"})
38 - "\u001b[1;32m<ipython-input-3-aa9195667f4b>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 113\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 114\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 115\u001b[1;33m \u001b[0mmain\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", 38 + nicknames_raw = result.find_all("span", {"class" : "u_cbox_nick"})
39 - "\u001b[1;32m<ipython-input-3-aa9195667f4b>\u001b[0m in \u001b[0;36mmain\u001b[1;34m()\u001b[0m\n\u001b[0;32m 97\u001b[0m \u001b[0m_url\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0minput\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'검색하고자 하는 url을 입력해주세요: '\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 98\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'comment_list를 가져오는 중.....'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 99\u001b[1;33m \u001b[0mcList\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgetData\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_url\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 100\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'\\n'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 101\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'comment_list를 다 가져왔습니다!'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 39 + times_raw = result.find_all("span", {"class" : "u_cbox_date"})
40 - "\u001b[1;32m<ipython-input-3-aa9195667f4b>\u001b[0m in \u001b[0;36mgetData\u001b[1;34m(url)\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[0mdriver\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mwebdriver\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mChrome\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mwebDriver\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mchrome_options\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0moptions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 15\u001b[0m \u001b[1;31m#driver = webdriver.Chrome(webDriver)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 16\u001b[1;33m \u001b[0mdriver\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_url\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 17\u001b[0m \u001b[0mpageCnt\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[0mdriver\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mimplicitly_wait\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# 페이지가 다 로드 될때까지 기다리게함\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 40 +
41 - "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py\u001b[0m in \u001b[0;36mget\u001b[1;34m(self, url)\u001b[0m\n\u001b[0;32m 331\u001b[0m \u001b[0mLoads\u001b[0m \u001b[0ma\u001b[0m \u001b[0mweb\u001b[0m \u001b[0mpage\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mcurrent\u001b[0m \u001b[0mbrowser\u001b[0m \u001b[0msession\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 332\u001b[0m \"\"\"\n\u001b[1;32m--> 333\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mCommand\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mGET\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m{\u001b[0m\u001b[1;34m'url'\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0murl\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 334\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 335\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 41 + # nickname, text, time 값 만을 뽑아내어 리스트로 정리한다
42 - "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, driver_command, params)\u001b[0m\n\u001b[0;32m 319\u001b[0m \u001b[0mresponse\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcommand_executor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdriver_command\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 320\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mresponse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 321\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merror_handler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcheck_response\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 322\u001b[0m response['value'] = self._unwrap_value(\n\u001b[0;32m 323\u001b[0m response.get('value', None))\n", 42 + comments = [comment.text for comment in comments_raw]
43 - "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\selenium\\webdriver\\remote\\errorhandler.py\u001b[0m in \u001b[0;36mcheck_response\u001b[1;34m(self, response)\u001b[0m\n\u001b[0;32m 240\u001b[0m \u001b[0malert_text\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'alert'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'text'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 241\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0malert_text\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 242\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 243\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 244\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_value_or_default\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdefault\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 43 + nicknames = [nickname.text for nickname in nicknames_raw]
44 - "\u001b[1;31mInvalidArgumentException\u001b[0m: Message: invalid argument\n (Session info: chrome=78.0.3904.108)\n" 44 + times = [time.text for time in times_raw]
45 - ] 45 +
46 - } 46 + naverNewsList = []
47 - ], 47 +
48 - "source": [ 48 + for i in range(len(comments)):
49 - "from selenium import webdriver\n", 49 + info_dic = {'userID' : nicknames[i], 'comment' : comments[i], 'time' : times[i]}
50 - "from selenium.common import exceptions\n", 50 + naverNewsList.append(info_dic)
51 - "from bs4 import BeautifulSoup\n", 51 +
52 - "import time\n", 52 + return naverNewsList
53 - "\n", 53 + #driver.quit()
54 - "def getData(url):\n", 54 +
55 - " ## chrome option걸기 (headless하게 웹 크롤링 수행하기 위해<웹페이지 안보이게 하기>)\n", 55 +from time import sleep
56 - " options = webdriver.ChromeOptions()\n", 56 +
57 - " #options.add_argument('headless')\n", 57 +def print_cList(c_List) :
58 - " #options.add_argument(\"disable-gpu\")\n", 58 + for item in c_List :
59 - " #_url = \"https://entertain.naver.com/ranking/comment/list?oid=144&aid=0000642175\" # 크롤링할 URL\n", 59 + print(item)
60 - " _url = url # 크롤링할 URL\n", 60 +
61 - " webDriver = \"C:\\\\Users\\\\user\\\\Desktop\\\\chromedriver_win32\\\\chromedriver.exe\" # 내 웹드라이버 위치\n", 61 +def search_by_author(c_List,user_ID) :
62 - " driver = webdriver.Chrome(webDriver,chrome_options=options)\n", 62 + result_List = []
63 - " #driver = webdriver.Chrome(webDriver)\n", 63 + for item in c_List :
64 - " driver.get(_url)\n", 64 + #print(item['userID'])
65 - " pageCnt = 0\n", 65 + if ( user_ID in item['userID']) :
66 - " driver.implicitly_wait(3) # 페이지가 다 로드 될때까지 기다리게함\n", 66 + result_List.append(item)
67 - " try:\n", 67 + return result_List
68 - " while True: # 댓글 페이지 끝날때까지 돌림\n", 68 +
69 - " #driver의 find_element_by_css_selector함수로 '네이버 뉴스'의 댓글 '더보기' 버튼을 찾아서 계속 클릭해준다(끝까지)\n", 69 +def search_by_keyword(c_List,keyword) :
70 - " driver.find_element_by_css_selector(\".u_cbox_btn_more\").click() \n", 70 + result_List = []
71 - " pageCnt = pageCnt+1\n", 71 + for item in c_List :
72 - " \n", 72 + #print(item['comment'])
73 - " except exceptions.ElementNotVisibleException as e: # 페이지가 끝남\n", 73 + if ( keyword in item['comment']) :
74 - " pass\n", 74 + result_List.append(item)
75 - " \n", 75 + return result_List
76 - " except Exception as e: # 다른 예외 발생시 확인\n", 76 +
77 - " print(e)\n", 77 +def refine_time(c_List): # 시간에서 몇일 전, 몇 분 전, 방금 전 등의 형태를 YYYY.MM.DD로 바꿔준다
78 - " \n", 78 + now = datetime.now()
79 - " pageSource = driver.page_source # 페이지 소스를 따와서\n", 79 +
80 - " result = BeautifulSoup(pageSource, \"lxml\") # 빠르게 뽑아오기 위해 lxml 사용\n", 80 + for item in c_List:
81 - "\n", 81 + if (item['time'].find('전') != -1): # ~~전이 있으면
82 - " # nickname, text, time을 raw하게 뽑아온다\n", 82 + if (item['time'].find('일 전') != -1): # ~일 전이라면
83 - " comments_raw = result.find_all(\"span\", {\"class\" : \"u_cbox_contents\"})\n", 83 + _day = -(int)(item['time'][0]) # 몇 일전인지에 대한 정수형 변수
84 - " nicknames_raw = result.find_all(\"span\", {\"class\" : \"u_cbox_nick\"})\n", 84 + tempTime = now + timedelta(days=_day)
85 - " times_raw = result.find_all(\"span\", {\"class\" : \"u_cbox_date\"})\n", 85 + item['time'] = str(tempTime)
86 - "\n", 86 + item['time'] = item['time'][0:10]
87 - " # nickname, text, time 값 만을 뽑아내어 리스트로 정리한다\n", 87 + continue
88 - " comments = [comment.text for comment in comments_raw]\n", 88 + elif (item['time'].find('시간 전') != -1):
89 - " nicknames = [nickname.text for nickname in nicknames_raw]\n", 89 + _index = item['time'].index('시')
90 - " times = [time.text for time in times_raw]\n", 90 + _time = -(int)(item['time'][0:_index]) # 몇 시간 전인지에 대한 정수형 변수
91 - " \n", 91 + tempTime = now + timedelta(hours = _time)
92 - " naverNewsList = []\n", 92 + item['time'] = str(tempTime)
93 - " \n", 93 + item['time'] = item['time'][0:10]
94 - " for i in range(len(comments)):\n", 94 + continue
95 - " info_dic = {'userID' : nicknames[i], 'comment' : comments[i], 'time' : times[i]}\n", 95 + elif (item['time'].find('분 전') != -1):
96 - " naverNewsList.append(info_dic)\n", 96 + _index = item['time'].index('분')
97 - " \n", 97 + _minute = -(int)(item['time'][0:_index]) # 몇 분 전인지에 대한 정수형 변수
98 - " return naverNewsList\n", 98 + tempTime = now + timedelta(minutes = _minute)
99 - " #driver.quit()\n", 99 + item['time'] = str(tempTime)
100 - " \n", 100 + item['time'] = item['time'][0:10]
101 - "from time import sleep\n", 101 + continue
102 - "\n", 102 + elif (item['time'].find('방금 전') != -1):
103 - "def print_cList(c_List) :\n", 103 + tempTime = now
104 - " for item in c_List :\n", 104 + item['time'] = str(tempTime)
105 - " print(item)\n", 105 + item['time'] = item['time'][0:10]
106 - "\n", 106 + continue
107 - "def search_by_author(c_List,user_ID) :\n", 107 + else:
108 - " result_List = []\n", 108 + item['time'] = item['time'][0:10]
109 - " for item in c_List :\n", 109 + continue
110 - " print(item['userID'])\n", 110 +
111 - " if ( user_ID in item['userID']) :\n", 111 +
112 - " result_List.append(item)\n", 112 +
113 - " return result_List\n", 113 +
114 - "\n", 114 +
115 - "def search_by_keyword(c_List,keyword) :\n", 115 +def search_by_time(c_List,startTime, endTime) :
116 - " result_List = []\n", 116 + result_List = []
117 - " for item in c_List :\n", 117 +
118 - " print(item['comment'])\n", 118 + startYear = int(startTime[0:4])
119 - " if ( keyword in item['comment']) :\n", 119 +
120 - " result_List.append(item)\n", 120 + if (int(startTime[5]) == 0): # 한자리의 월일 때
121 - " return result_List\n", 121 + startMonth = int(startTime[6])
122 - " \n", 122 + else:
123 - "\n", 123 + startMonth = int(startTime[5:7])
124 - "def search_by_time(c_List,_time) :\n", 124 +
125 - " result_List = []\n", 125 + if (int(startTime[8]) == 0): # 한자리의 일일 때
126 - " for item in c_List :\n", 126 + startDay = int(startTime[9])
127 - " print(item['time'])\n", 127 + else:
128 - " if ( keyword in item['comment']) :\n", 128 + startDay = int(startTime[8:10])
129 - " result_List.append(item)\n", 129 +
130 - " return result_List\n", 130 +
131 - " \n", 131 +
132 - "def main ():\n", 132 + endYear = int(endTime[0:4])
133 - " ## 시작화면\n", 133 +
134 - " \n", 134 + if (int(endTime[5]) == 0): # 한자리의 월일 때
135 - " _star = '*'\n", 135 + endMonth = int(endTime[6])
136 - " print(_star.center(30,'*'))\n", 136 + else:
137 - " print('\\n')\n", 137 + endMonth = int(endTime[5:7])
138 - " headString = '< Naver News Crawling >'\n", 138 +
139 - " print(headString.center(30,'*'))\n", 139 + if (int(endTime[8]) == 0): # 한자리의 일일 때
140 - " print('\\n')\n", 140 + endDay = int(endTime[9])
141 - " print(_star.center(30,'*'))\n", 141 + else:
142 - " \n", 142 + endDay = int(endTime[8:10])
143 - " \n", 143 +
144 - " # 검색하고자 하는 url을 입력받는다\n", 144 + for item in c_List:
145 - " _url = input('검색하고자 하는 url을 입력해주세요: ')\n", 145 + itemYear = int(item['time'][0:4])
146 - " print('comment_list를 가져오는 중.....')\n", 146 +
147 - " cList = getData(_url)\n", 147 + if (int(item['time'][5]) == 0): # 한자리의 월일 때
148 - " print('\\n')\n", 148 + itemMonth = int(item['time'][6])
149 - " print('comment_list를 다 가져왔습니다!')\n", 149 + else:
150 - " \n", 150 + itemMonth = int(item['time'][5:7])
151 - " while(True):\n", 151 +
152 - " print('***********************************')\n", 152 + if (int(item['time'][8]) == 0): # 한자리의 일일 때
153 - " print('1.닉네임 기반 검색')\n", 153 + itemDay = int(item['time'][9])
154 - " print('2.키워드 기반 검색')\n", 154 + else:
155 - " print('3.작성시간 기반 검색')\n", 155 + itemDay = int(item['time'][8:10])
156 - " menu = input('메뉴를 입력해주세요: ')\n", 156 +
157 - " \n", 157 + if (itemYear >= startYear and itemYear <= endYear):
158 - " if(menu == 1):\n", 158 + if (itemMonth >= startMonth and itemMonth <= endMonth):
159 - " print('***********************************')\n", 159 + if(itemDay >= startDay and itemDay <= endDay):
160 - " inputID = input('검색할 닉네임 앞 4자리를 입력해주세요: ')\n", 160 + result_List.append(item)
161 - " search_by_author(cList,inputID)\n", 161 +
162 - " elif(menu == 2):\n", 162 + return result_List
163 - " print('***********************************')\n", 163 +
164 - " inputKW = input('검색할 키워드를 입력해주세요: ')\n", 164 +def printResult(c_List):
165 - " search_by_keyword(cList,inputKW)\n", 165 + for i in range(0,len(c_List)):
166 - " else:\n", 166 + print(c_List[i])
167 - " print('***********************************')\n", 167 +
168 - " inputTime = input('검색할 시간대를 입력해주세요: ')\n", 168 +def main ():
169 - " search_by_time(cList,inputTime)\n", 169 + ## 시작화면
170 - "\n", 170 +
171 - " \n", 171 + _star = '*'
172 - "main()" 172 + print(_star.center(30,'*'))
173 - ] 173 + print('\n')
174 - }, 174 + headString = '< Naver News Crawling >'
175 - { 175 + print(headString.center(30,'*'))
176 - "cell_type": "code", 176 + print('\n')
177 - "execution_count": null, 177 + print(_star.center(30,'*'))
178 - "metadata": {}, 178 +
179 - "outputs": [], 179 +
180 - "source": [] 180 + # 검색하고자 하는 url을 입력받는다
181 - } 181 + _url = input('검색하고자 하는 url을 입력해주세요: ')
182 - ], 182 + print('comment_list를 가져오는 중.....')
183 - "metadata": { 183 + cList = getData(_url)
184 - "kernelspec": { 184 + refine_time(cList)
185 - "display_name": "Python 3", 185 + print('\n')
186 - "language": "python", 186 + print('comment_list를 다 가져왔습니다!')
187 - "name": "python3" 187 +
188 - }, 188 + while(True):
189 - "language_info": { 189 + print('***********************************')
190 - "codemirror_mode": { 190 + print('1.닉네임 기반 검색')
191 - "name": "ipython", 191 + print('2.키워드 기반 검색')
192 - "version": 3 192 + print('3.작성시간 기반 검색')
193 - }, 193 + menu = input('메뉴를 입력해주세요: ')
194 - "file_extension": ".py", 194 +
195 - "mimetype": "text/x-python", 195 + if(menu == str(1)):
196 - "name": "python", 196 + print('***********************************')
197 - "nbconvert_exporter": "python", 197 + inputID = input('검색할 닉네임 앞 4자리를 입력해주세요(전 단계로 가시려면 -1을 입력해주세요): ')
198 - "pygments_lexer": "ipython3", 198 + if(inputID == str(-1)):
199 - "version": "3.7.3" 199 + continue
200 - } 200 + _result = search_by_author(cList,inputID)
201 - }, 201 + printResult(_result)
202 - "nbformat": 4, 202 + print(_result)
203 - "nbformat_minor": 2 203 + elif(menu == str(2)):
204 -} 204 + print('***********************************')
205 + inputKW = input('검색할 키워드를 입력해주세요(전 단계로 가시려면 -1을 입력해주세요): ')
206 + if(inputKW == str(-1)):
207 + continue
208 + _result = search_by_keyword(cList,inputKW)
209 + printResult(_result)
210 + elif(menu == str(3)):
211 + print('***********************************')
212 + print('전 단계로 돌아가시려면 -1을 입력해주세요')
213 + startTime = input('검색할 시간대의 시작일을 입력해주세요(YYYY-MM-DD): ')
214 + endTime = input('검색할 시간대의 마지막 일을 입력해주세요(YYYY-MM-DD): ')
215 +
216 + if(startTime == str(-1) or endTime == str(-1)):
217 + continue
218 +
219 + _result = search_by_time(cList,startTime,endTime)
220 + printResult(_result)
221 + else:
222 + print('잘못된 입력입니다')
223 + continue
224 +
225 +
226 +
227 +main()
......