함수 세부구현(시간대별)

장준영
Commit 06b9bee0e3dfcf6444a3f983f00017c7c1e50edb 06b9bee0 1 parent de28eb9d
Showing 2 changed files with 237 additions and 205 deletions
naverNews/naverNews.md
naverNews/naverNews_crawling.py
--- a/naverNews/naverNews.md
View file @06b9bee
+++ b/naverNews/naverNews.md
View file @06b9bee
@@ -28,5 +28,14 @@
 * 4차 수정사항
-    기존파일의 분라 관리 시, import관련 오류 문제 해결 완료(하나의 파일로 관리) 
+    기존파일의 분리 관리 시, import관련 오류 문제 해결 완료(하나의 파일로 관리) 
     사용자 UI의 틀을 구축해놓았고, 곧바로 함수별 추가 세부 구현 예정
+    
+* 5차 수정사항
+
+    1) 네이버 댓글공간엑서 받아온 날짜 정보를 YYYY-MM-DD형식으로 바꿈. ('방금 전, 몇 분 전, 몇 시간 전, 몇 일 전'의 경우를 처리하기 위해 dateTime과 timeDelta 모듈을 활용하여 
+    현재 날짜를 기준으로 계산하여 YYYY-MM-DD로 저장될 수 있도록
+    코드 추가)
+    2) 시간대별로 (시작시간, 끝시간)을 입력하여 그 시간에 해당하는 기사를 출력해주는 함수 구현
+    
+    가장 자주 많이 나온 단어 검색과 MATPLOTLIB을 활용한 시각적 표현 구현 예정
\ No newline at end of file
--- a/naverNews/naverNews_crawling.py
View file @06b9bee
+++ b/naverNews/naverNews_crawling.py
View file @06b9bee
-{
+from selenium import webdriver
- "cells": [
+from selenium.common import exceptions
-  {
+from bs4 import BeautifulSoup
-   "cell_type": "code",
+from datetime import datetime, timedelta
-   "execution_count": 3,
+import time
-   "metadata": {},
+
-   "outputs": [
+
-    {
+def getData(url):
-     "name": "stdout",
+    ## chrome option걸기 (headless하게 웹 크롤링 수행하기 위해<웹페이지 안보이게 하기>)
-     "output_type": "stream",
+    options = webdriver.ChromeOptions()
-     "text": [
+    #options.add_argument('headless')
-      "******************************\n",
+    #options.add_argument("disable-gpu")
-      "\n",
+    #_url = "https://entertain.naver.com/ranking/comment/list?oid=144&aid=0000642175" # 크롤링할 URL
-      "\n",
+    _url = url # 크롤링할 URL
-      "***< Naver News Crawling >****\n",
+    webDriver = "C:\\Users\\user\\Desktop\\chromedriver_win32\\chromedriver.exe"  # 내 웹드라이버 위치
-      "\n",
+    driver = webdriver.Chrome(webDriver,chrome_options=options)
-      "\n",
+    #driver = webdriver.Chrome(webDriver)
-      "******************************\n",
+    driver.get(_url)
-      "검색하고자 하는 url을 입력해주세요: \n",
+    pageCnt = 0
-      "comment_list를 가져오는 중.....\n"
+    driver.implicitly_wait(3) # 페이지가 다 로드 될때까지 기다리게함
-     ]
+    try:
-    },
+        while True: # 댓글 페이지 끝날때까지 돌림
-    {
+            #driver의 find_element_by_css_selector함수로 '네이버 뉴스'의 댓글 '더보기' 버튼을 찾아서 계속 클릭해준다(끝까지)
-     "name": "stderr",
+            driver.find_element_by_css_selector(".u_cbox_btn_more").click() 
-     "output_type": "stream",
+            pageCnt = pageCnt+1
-     "text": [
+        
-      "C:\\Users\\user\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:14: DeprecationWarning: use options instead of chrome_options\n",
+    except exceptions.ElementNotVisibleException as e: # 페이지가 끝남
-      "  \n"
+        pass
-     ]
+        
-    },
+    except Exception as e: # 다른 예외 발생시 확인
-    {
+        print(e)
-     "ename": "InvalidArgumentException",
+    
-     "evalue": "Message: invalid argument\n  (Session info: chrome=78.0.3904.108)\n",
+    pageSource = driver.page_source # 페이지 소스를 따와서
-     "output_type": "error",
+    result = BeautifulSoup(pageSource, "lxml") # 빠르게 뽑아오기 위해 lxml 사용
-     "traceback": [
+
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+    # nickname, text, time을 raw하게 뽑아온다
-      "\u001b[1;31mInvalidArgumentException\u001b[0m                  Traceback (most recent call last)",
+    comments_raw = result.find_all("span", {"class" : "u_cbox_contents"})
-      "\u001b[1;32m<ipython-input-3-aa9195667f4b>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m    113\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    114\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 115\u001b[1;33m \u001b[0mmain\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
+    nicknames_raw = result.find_all("span", {"class" : "u_cbox_nick"})
-      "\u001b[1;32m<ipython-input-3-aa9195667f4b>\u001b[0m in \u001b[0;36mmain\u001b[1;34m()\u001b[0m\n\u001b[0;32m     97\u001b[0m     \u001b[0m_url\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0minput\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'검색하고자 하는 url을 입력해주세요: '\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     98\u001b[0m     \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'comment_list를 가져오는 중.....'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 99\u001b[1;33m     \u001b[0mcList\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgetData\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_url\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    100\u001b[0m     \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'\\n'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    101\u001b[0m     \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'comment_list를 다 가져왔습니다!'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+    times_raw = result.find_all("span", {"class" : "u_cbox_date"})
-      "\u001b[1;32m<ipython-input-3-aa9195667f4b>\u001b[0m in \u001b[0;36mgetData\u001b[1;34m(url)\u001b[0m\n\u001b[0;32m     14\u001b[0m     \u001b[0mdriver\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mwebdriver\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mChrome\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mwebDriver\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mchrome_options\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0moptions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     15\u001b[0m     \u001b[1;31m#driver = webdriver.Chrome(webDriver)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 16\u001b[1;33m     \u001b[0mdriver\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_url\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     17\u001b[0m     \u001b[0mpageCnt\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     18\u001b[0m     \u001b[0mdriver\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mimplicitly_wait\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# 페이지가 다 로드 될때까지 기다리게함\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+
-      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py\u001b[0m in \u001b[0;36mget\u001b[1;34m(self, url)\u001b[0m\n\u001b[0;32m    331\u001b[0m         \u001b[0mLoads\u001b[0m \u001b[0ma\u001b[0m \u001b[0mweb\u001b[0m \u001b[0mpage\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mcurrent\u001b[0m \u001b[0mbrowser\u001b[0m \u001b[0msession\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    332\u001b[0m         \"\"\"\n\u001b[1;32m--> 333\u001b[1;33m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mCommand\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mGET\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m{\u001b[0m\u001b[1;34m'url'\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0murl\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    334\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    335\u001b[0m     \u001b[1;33m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+    # nickname, text, time 값 만을 뽑아내어 리스트로 정리한다
-      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, driver_command, params)\u001b[0m\n\u001b[0;32m    319\u001b[0m         \u001b[0mresponse\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcommand_executor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdriver_command\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    320\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mresponse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 321\u001b[1;33m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merror_handler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcheck_response\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    322\u001b[0m             response['value'] = self._unwrap_value(\n\u001b[0;32m    323\u001b[0m                 response.get('value', None))\n",
+    comments = [comment.text for comment in comments_raw]
-      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\selenium\\webdriver\\remote\\errorhandler.py\u001b[0m in \u001b[0;36mcheck_response\u001b[1;34m(self, response)\u001b[0m\n\u001b[0;32m    240\u001b[0m                 \u001b[0malert_text\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'alert'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'text'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    241\u001b[0m             \u001b[1;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0malert_text\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 242\u001b[1;33m         \u001b[1;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    243\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    244\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m_value_or_default\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdefault\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+    nicknames = [nickname.text for nickname in nicknames_raw]
-      "\u001b[1;31mInvalidArgumentException\u001b[0m: Message: invalid argument\n  (Session info: chrome=78.0.3904.108)\n"
+    times = [time.text for time in times_raw]
-     ]
+    
-    }
+    naverNewsList = []
-   ],
+    
-   "source": [
+    for i in range(len(comments)):
-    "from selenium import webdriver\n",
+        info_dic = {'userID' : nicknames[i], 'comment' : comments[i], 'time' : times[i]}
-    "from selenium.common import exceptions\n",
+        naverNewsList.append(info_dic)
-    "from bs4 import BeautifulSoup\n",
+        
-    "import time\n",
+    return naverNewsList
-    "\n",
+    #driver.quit()
-    "def getData(url):\n",
+    
-    "    ## chrome option걸기 (headless하게 웹 크롤링 수행하기 위해<웹페이지 안보이게 하기>)\n",
+from time import sleep
-    "    options = webdriver.ChromeOptions()\n",
+
-    "    #options.add_argument('headless')\n",
+def print_cList(c_List) :
-    "    #options.add_argument(\"disable-gpu\")\n",
+    for item in c_List :
-    "    #_url = \"https://entertain.naver.com/ranking/comment/list?oid=144&aid=0000642175\" # 크롤링할 URL\n",
+        print(item)
-    "    _url = url # 크롤링할 URL\n",
+
-    "    webDriver = \"C:\\\\Users\\\\user\\\\Desktop\\\\chromedriver_win32\\\\chromedriver.exe\"  # 내 웹드라이버 위치\n",
+def search_by_author(c_List,user_ID) :
-    "    driver = webdriver.Chrome(webDriver,chrome_options=options)\n",
+        result_List = []
-    "    #driver = webdriver.Chrome(webDriver)\n",
+        for item in c_List :
-    "    driver.get(_url)\n",
+           #print(item['userID'])
-    "    pageCnt = 0\n",
+            if ( user_ID in item['userID']) :
-    "    driver.implicitly_wait(3) # 페이지가 다 로드 될때까지 기다리게함\n",
+                result_List.append(item)
-    "    try:\n",
+        return result_List
-    "        while True: # 댓글 페이지 끝날때까지 돌림\n",
+
-    "            #driver의 find_element_by_css_selector함수로 '네이버 뉴스'의 댓글 '더보기' 버튼을 찾아서 계속 클릭해준다(끝까지)\n",
+def search_by_keyword(c_List,keyword) :
-    "            driver.find_element_by_css_selector(\".u_cbox_btn_more\").click() \n",
+        result_List = []
-    "            pageCnt = pageCnt+1\n",
+        for item in c_List :
-    "        \n",
+            #print(item['comment'])
-    "    except exceptions.ElementNotVisibleException as e: # 페이지가 끝남\n",
+            if ( keyword in item['comment']) :
-    "        pass\n",
+                result_List.append(item)
-    "        \n",
+        return result_List
-    "    except Exception as e: # 다른 예외 발생시 확인\n",
+
-    "        print(e)\n",
+def refine_time(c_List): # 시간에서 몇일 전, 몇 분 전, 방금 전 등의 형태를 YYYY.MM.DD로 바꿔준다
-    "    \n",
+    now = datetime.now()
-    "    pageSource = driver.page_source # 페이지 소스를 따와서\n",
+    
-    "    result = BeautifulSoup(pageSource, \"lxml\") # 빠르게 뽑아오기 위해 lxml 사용\n",
+    for item in c_List:
-    "\n",
+        if (item['time'].find('전') != -1): # ~~전이 있으면
-    "    # nickname, text, time을 raw하게 뽑아온다\n",
+            if (item['time'].find('일 전') != -1): # ~일 전이라면
-    "    comments_raw = result.find_all(\"span\", {\"class\" : \"u_cbox_contents\"})\n",
+                _day = -(int)(item['time'][0]) # 몇 일전인지에 대한 정수형 변수
-    "    nicknames_raw = result.find_all(\"span\", {\"class\" : \"u_cbox_nick\"})\n",
+                tempTime = now + timedelta(days=_day)
-    "    times_raw = result.find_all(\"span\", {\"class\" : \"u_cbox_date\"})\n",
+                item['time'] = str(tempTime)
-    "\n",
+                item['time'] = item['time'][0:10]
-    "    # nickname, text, time 값 만을 뽑아내어 리스트로 정리한다\n",
+                continue
-    "    comments = [comment.text for comment in comments_raw]\n",
+            elif (item['time'].find('시간 전') != -1):
-    "    nicknames = [nickname.text for nickname in nicknames_raw]\n",
+                _index = item['time'].index('시')
-    "    times = [time.text for time in times_raw]\n",
+                _time = -(int)(item['time'][0:_index]) # 몇 시간 전인지에 대한 정수형 변수
-    "    \n",
+                tempTime = now + timedelta(hours = _time)
-    "    naverNewsList = []\n",
+                item['time'] = str(tempTime)
-    "    \n",
+                item['time'] = item['time'][0:10]
-    "    for i in range(len(comments)):\n",
+                continue
-    "        info_dic = {'userID' : nicknames[i], 'comment' : comments[i], 'time' : times[i]}\n",
+            elif (item['time'].find('분 전') != -1):
-    "        naverNewsList.append(info_dic)\n",
+                _index = item['time'].index('분')
-    "        \n",
+                _minute = -(int)(item['time'][0:_index]) # 몇 분 전인지에 대한 정수형 변수
-    "    return naverNewsList\n",
+                tempTime = now + timedelta(minutes = _minute)
-    "    #driver.quit()\n",
+                item['time'] = str(tempTime)
-    "    \n",
+                item['time'] = item['time'][0:10]
-    "from time import sleep\n",
+                continue
-    "\n",
+            elif (item['time'].find('방금 전') != -1):
-    "def print_cList(c_List) :\n",
+                tempTime = now
-    "    for item in c_List :\n",
+                item['time'] = str(tempTime)
-    "        print(item)\n",
+                item['time'] = item['time'][0:10]
-    "\n",
+                continue
-    "def search_by_author(c_List,user_ID) :\n",
+            else:
-    "        result_List = []\n",
+                item['time'] = item['time'][0:10]
-    "        for item in c_List :\n",
+                continue
-    "            print(item['userID'])\n",
+        
-    "            if ( user_ID in item['userID']) :\n",
+        
-    "                result_List.append(item)\n",
+                
-    "        return result_List\n",
+            
-    "\n",
+
-    "def search_by_keyword(c_List,keyword) :\n",
+def search_by_time(c_List,startTime, endTime) : 
-    "        result_List = []\n",
+    result_List = []
-    "        for item in c_List :\n",
+    
-    "            print(item['comment'])\n",
+    startYear = int(startTime[0:4])
-    "            if ( keyword in item['comment']) :\n",
+    
-    "                result_List.append(item)\n",
+    if (int(startTime[5]) == 0): # 한자리의 월일 때
-    "        return result_List\n",
+        startMonth = int(startTime[6])
-    "    \n",
+    else:
-    "\n",
+        startMonth = int(startTime[5:7])
-    "def search_by_time(c_List,_time) :\n",
+        
-    "        result_List = []\n",
+    if (int(startTime[8]) == 0): # 한자리의 일일 때
-    "        for item in c_List :\n",
+        startDay = int(startTime[9])
-    "            print(item['time'])\n",
+    else:
-    "            if ( keyword in item['comment']) :\n",
+        startDay = int(startTime[8:10])
-    "                result_List.append(item)\n",
+    
-    "        return result_List\n",
+    
-    "                \n",
+    
-    "def main ():\n",
+    endYear = int(endTime[0:4])
-    "    ## 시작화면\n",
+    
-    "    \n",
+    if (int(endTime[5]) == 0): # 한자리의 월일 때
-    "    _star = '*'\n",
+        endMonth = int(endTime[6])
-    "    print(_star.center(30,'*'))\n",
+    else:
-    "    print('\\n')\n",
+        endMonth = int(endTime[5:7])
-    "    headString = '< Naver News Crawling >'\n",
+        
-    "    print(headString.center(30,'*'))\n",
+    if (int(endTime[8]) == 0): # 한자리의 일일 때
-    "    print('\\n')\n",
+        endDay = int(endTime[9])
-    "    print(_star.center(30,'*'))\n",
+    else:
-    "    \n",
+        endDay = int(endTime[8:10])
-    "    \n",
+    
-    "    # 검색하고자 하는 url을 입력받는다\n",
+    for item in c_List:
-    "    _url = input('검색하고자 하는 url을 입력해주세요: ')\n",
+        itemYear = int(item['time'][0:4])
-    "    print('comment_list를 가져오는 중.....')\n",
+        
-    "    cList = getData(_url)\n",
+        if (int(item['time'][5]) == 0): # 한자리의 월일 때
-    "    print('\\n')\n",
+            itemMonth = int(item['time'][6])
-    "    print('comment_list를 다 가져왔습니다!')\n",
+        else:
-    "    \n",
+            itemMonth = int(item['time'][5:7])
-    "    while(True):\n",
+        
-    "        print('***********************************')\n",
+        if (int(item['time'][8]) == 0): # 한자리의 일일 때
-    "        print('1.닉네임 기반 검색')\n",
+            itemDay = int(item['time'][9])
-    "        print('2.키워드 기반 검색')\n",
+        else:
-    "        print('3.작성시간 기반 검색')\n",
+            itemDay = int(item['time'][8:10])
-    "        menu = input('메뉴를 입력해주세요: ')\n",
+        
-    "        \n",
+        if (itemYear >= startYear and itemYear <= endYear):
-    "        if(menu == 1):\n",
+            if (itemMonth >= startMonth and itemMonth <= endMonth):
-    "            print('***********************************')\n",
+                if(itemDay >= startDay and itemDay <= endDay):
-    "            inputID = input('검색할 닉네임 앞 4자리를 입력해주세요: ')\n",
+                    result_List.append(item)
-    "            search_by_author(cList,inputID)\n",
+    
-    "        elif(menu == 2):\n",
+    return result_List
-    "            print('***********************************')\n",
+            
-    "            inputKW = input('검색할 키워드를 입력해주세요: ')\n",
+def printResult(c_List):
-    "            search_by_keyword(cList,inputKW)\n",
+    for i in range(0,len(c_List)):
-    "        else:\n",
+        print(c_List[i])
-    "            print('***********************************')\n",
+
-    "            inputTime = input('검색할 시간대를 입력해주세요: ')\n",
+def main ():
-    "            search_by_time(cList,inputTime)\n",
+    ## 시작화면
-    "\n",
+    
-    "    \n",
+    _star = '*'
-    "main()"
+    print(_star.center(30,'*'))
-   ]
+    print('\n')
-  },
+    headString = '< Naver News Crawling >'
-  {
+    print(headString.center(30,'*'))
-   "cell_type": "code",
+    print('\n')
-   "execution_count": null,
+    print(_star.center(30,'*'))
-   "metadata": {},
+    
-   "outputs": [],
+    
-   "source": []
+    # 검색하고자 하는 url을 입력받는다
-  }
+    _url = input('검색하고자 하는 url을 입력해주세요: ')
- ],
+    print('comment_list를 가져오는 중.....')
- "metadata": {
+    cList = getData(_url)
-  "kernelspec": {
+    refine_time(cList)
-   "display_name": "Python 3",
+    print('\n')
-   "language": "python",
+    print('comment_list를 다 가져왔습니다!')
-   "name": "python3"
+    
-  },
+    while(True):
-  "language_info": {
+        print('***********************************')
-   "codemirror_mode": {
+        print('1.닉네임 기반 검색')
-    "name": "ipython",
+        print('2.키워드 기반 검색')
-    "version": 3
+        print('3.작성시간 기반 검색')
-   },
+        menu = input('메뉴를 입력해주세요: ')
-   "file_extension": ".py",
+        
-   "mimetype": "text/x-python",
+        if(menu == str(1)):
-   "name": "python",
+            print('***********************************')
-   "nbconvert_exporter": "python",
+            inputID = input('검색할 닉네임 앞 4자리를 입력해주세요(전 단계로 가시려면 -1을 입력해주세요): ')
-   "pygments_lexer": "ipython3",
+            if(inputID == str(-1)):
-   "version": "3.7.3"
+                continue
-  }
+            _result = search_by_author(cList,inputID)
- },
+            printResult(_result)
- "nbformat": 4,
+            print(_result)
- "nbformat_minor": 2
+        elif(menu == str(2)):
-}
+            print('***********************************')
+            inputKW = input('검색할 키워드를 입력해주세요(전 단계로 가시려면 -1을 입력해주세요): ')
+            if(inputKW == str(-1)):
+                continue
+            _result = search_by_keyword(cList,inputKW)
+            printResult(_result)
+        elif(menu == str(3)):
+            print('***********************************')
+            print('전 단계로 돌아가시려면 -1을 입력해주세요')
+            startTime = input('검색할 시간대의 시작일을 입력해주세요(YYYY-MM-DD): ')
+            endTime = input('검색할 시간대의 마지막 일을 입력해주세요(YYYY-MM-DD): ')
+            
+            if(startTime == str(-1) or endTime == str(-1)):
+                continue
+                
+            _result = search_by_time(cList,startTime,endTime)
+            printResult(_result)
+        else:
+            print('잘못된 입력입니다')
+            continue
+            
+
+    
+main()