장준영

화일 통합 및 UI 틀 작성

1 -{
2 - "cells": [
3 - {
4 - "cell_type": "code",
5 - "execution_count": 2,
6 - "metadata": {},
7 - "outputs": [
8 - {
9 - "name": "stdout",
10 - "output_type": "stream",
11 - "text": [
12 - "******************************\n",
13 - "\n",
14 - "\n",
15 - "***< Naver News Crawling >****\n",
16 - "\n",
17 - "\n",
18 - "******************************\n",
19 - "검색하고자 하는 url을 입력해주세요: https://entertain.naver.com/ranking/comment/list?oid=144&aid=0000642175\n",
20 - "comment_list를 가져오는 중.....\n",
21 - "Message: element not interactable\n",
22 - " (Session info: chrome=78.0.3904.97)\n",
23 - "\n",
24 - "[{'userID': 'ydja****', 'comment': '옹벤져스 너무웃겨', 'time': '6일 전'}, {'userID': 'kims****', 'comment': '사랑해요 옹벤져스! 준기엄마 다리 찢을 때 웃겨죽는 줄 진짜 츤데레언니들', 'time': '6일 전'}, {'userID': 'hoho****', 'comment': '옹벤져스가 다른 마을 살인마 잡는 이야기로 시즌 2. 갑시다', 'time': '6일 전'}]\n",
25 - "comment_list를 다 가져왔습니다!\n"
26 - ]
27 - }
28 - ],
29 - "source": [
30 - "import naverNews_crawling \n",
31 - "from time import sleep\n",
32 - "\n",
33 - "def print_cList(c_List) :\n",
34 - " for item in c_List :\n",
35 - " print(item)\n",
36 - "\n",
37 - "def search_by_author(c_List,user_ID) :\n",
38 - " result_List = []\n",
39 - " for item in c_List :\n",
40 - " print(item['userID'])\n",
41 - " if ( user_ID in item['userID']) :\n",
42 - " result_List.append(item)\n",
43 - " return result_List\n",
44 - "\n",
45 - "def search_by_keyword(c_List,keyword) :\n",
46 - " result_List = []\n",
47 - " for item in c_List :\n",
48 - " print(item['comment'])\n",
49 - " if ( keyword in item['comment']) :\n",
50 - " result_List.append(item)\n",
51 - " return result_List\n",
52 - " \n",
53 - "'''\n",
54 - "def search_by_time(c_List,_time) :\n",
55 - " result_List = []\n",
56 - " for item in c_List :\n",
57 - " print(item['time'])\n",
58 - " if ( keyword in item['comment']) :\n",
59 - " result_List.append(item)\n",
60 - " return result_List \n",
61 - " \n",
62 - "''' \n",
63 - "\n",
64 - "def main ():\n",
65 - " ## 시작화면\n",
66 - " \n",
67 - " _star = '*'\n",
68 - " print(_star.center(30,'*'))\n",
69 - " print('\\n')\n",
70 - " headString = '< Naver News Crawling >'\n",
71 - " print(headString.center(30,'*'))\n",
72 - " print('\\n')\n",
73 - " print(_star.center(30,'*'))\n",
74 - " \n",
75 - " \n",
76 - " # 검색하고자 하는 url을 입력받는다\n",
77 - " _url = input('검색하고자 하는 url을 입력해주세요: ')\n",
78 - " print('comment_list를 가져오는 중.....')\n",
79 - " cList = naverNews_crawling.getData(_url)\n",
80 - " print('comment_list를 다 가져왔습니다!')\n",
81 - "\n",
82 - "main()"
83 - ]
84 - }
85 - ],
86 - "metadata": {
87 - "kernelspec": {
88 - "display_name": "Python 3",
89 - "language": "python",
90 - "name": "python3"
91 - },
92 - "language_info": {
93 - "codemirror_mode": {
94 - "name": "ipython",
95 - "version": 3
96 - },
97 - "file_extension": ".py",
98 - "mimetype": "text/x-python",
99 - "name": "python",
100 - "nbconvert_exporter": "python",
101 - "pygments_lexer": "ipython3",
102 - "version": "3.7.3"
103 - }
104 - },
105 - "nbformat": 4,
106 - "nbformat_minor": 2
107 -}
...@@ -25,3 +25,8 @@ ...@@ -25,3 +25,8 @@
25 main 에서 사용할 수 있도록 한다. 이 후 main에서 리스트를 받아와 url을 입력받아 데이터를 25 main 에서 사용할 수 있도록 한다. 이 후 main에서 리스트를 받아와 url을 입력받아 데이터를
26 받아오는 방식으로 사용한다. 이 후, keyword기반, id기반, 시간대 기반 검색 함수를 구현하였고 26 받아오는 방식으로 사용한다. 이 후, keyword기반, id기반, 시간대 기반 검색 함수를 구현하였고
27 시간대별 검색 함수의 기능 보강과 가장 자주 나온 단어 검색 기능을 추가 구현할 예정이다. 27 시간대별 검색 함수의 기능 보강과 가장 자주 나온 단어 검색 기능을 추가 구현할 예정이다.
28 +
29 +* 4차 수정사항
30 +
31 + 기존파일의 분라 관리 시, import관련 오류 문제 해결 완료(하나의 파일로 관리)
32 + 사용자 UI의 틀을 구축해놓았고, 곧바로 함수별 추가 세부 구현 예정
...\ No newline at end of file ...\ No newline at end of file
......
1 -from selenium import webdriver
2 -from selenium.common import exceptions
3 -from bs4 import BeautifulSoup
4 -import time
5 -
6 -def getData(url):
7 - ## chrome option걸기 (headless하게 웹 크롤링 수행하기 위해<웹페이지 안보이게 하기>)
8 - options = webdriver.ChromeOptions()
9 - #options.add_argument('headless')
10 - #options.add_argument("disable-gpu")
11 - #_url = "https://entertain.naver.com/ranking/comment/list?oid=144&aid=0000642175" # 크롤링할 URL
12 - _url = url # 크롤링할 URL
13 - webDriver = "C:\\Users\\user\\Desktop\\chromedriver_win32\\chromedriver.exe" # 내 웹드라이버 위치
14 - driver = webdriver.Chrome(webDriver,chrome_options=options)
15 - #driver = webdriver.Chrome(webDriver)
16 - driver.get(_url)
17 - pageCnt = 0
18 - driver.implicitly_wait(3) # 페이지가 다 로드 될때까지 기다리게함
19 - try:
20 - while True: # 댓글 페이지 끝날때까지 돌림
21 - #driver의 find_element_by_css_selector함수로 '네이버 뉴스'의 댓글 '더보기' 버튼을 찾아서 계속 클릭해준다(끝까지)
22 - driver.find_element_by_css_selector(".u_cbox_btn_more").click()
23 - pageCnt = pageCnt+1
24 -
25 - except exceptions.ElementNotVisibleException as e: # 페이지가 끝남
26 - pass
27 -
28 - except Exception as e: # 다른 예외 발생시 확인
29 - print(e)
30 -
31 - pageSource = driver.page_source # 페이지 소스를 따와서
32 - result = BeautifulSoup(pageSource, "lxml") # 빠르게 뽑아오기 위해 lxml 사용
33 -
34 - # nickname, text, time을 raw하게 뽑아온다
35 - comments_raw = result.find_all("span", {"class" : "u_cbox_contents"})
36 - nicknames_raw = result.find_all("span", {"class" : "u_cbox_nick"})
37 - times_raw = result.find_all("span", {"class" : "u_cbox_date"})
38 -
39 - # nickname, text, time 값 만을 뽑아내어 리스트로 정리한다
40 - comments = [comment.text for comment in comments_raw]
41 - nicknames = [nickname.text for nickname in nicknames_raw]
42 - times = [time.text for time in times_raw]
43 -
44 - naverNewsList = []
45 -
46 - for i in range(len(comments)):
47 - info_dic = {'userID' : nicknames[i], 'comment' : comments[i], 'time' : times[i]}
48 - naverNewsList.append(info_dic)
49 -
50 - print(naverNewsList[:3])
51 -
52 - return naverNewsList
53 - #driver.quit()
...\ No newline at end of file ...\ No newline at end of file
1 +{
2 + "cells": [
3 + {
4 + "cell_type": "code",
5 + "execution_count": 3,
6 + "metadata": {},
7 + "outputs": [
8 + {
9 + "name": "stdout",
10 + "output_type": "stream",
11 + "text": [
12 + "******************************\n",
13 + "\n",
14 + "\n",
15 + "***< Naver News Crawling >****\n",
16 + "\n",
17 + "\n",
18 + "******************************\n",
19 + "검색하고자 하는 url을 입력해주세요: \n",
20 + "comment_list를 가져오는 중.....\n"
21 + ]
22 + },
23 + {
24 + "name": "stderr",
25 + "output_type": "stream",
26 + "text": [
27 + "C:\\Users\\user\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:14: DeprecationWarning: use options instead of chrome_options\n",
28 + " \n"
29 + ]
30 + },
31 + {
32 + "ename": "InvalidArgumentException",
33 + "evalue": "Message: invalid argument\n (Session info: chrome=78.0.3904.108)\n",
34 + "output_type": "error",
35 + "traceback": [
36 + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
37 + "\u001b[1;31mInvalidArgumentException\u001b[0m Traceback (most recent call last)",
38 + "\u001b[1;32m<ipython-input-3-aa9195667f4b>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 113\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 114\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 115\u001b[1;33m \u001b[0mmain\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
39 + "\u001b[1;32m<ipython-input-3-aa9195667f4b>\u001b[0m in \u001b[0;36mmain\u001b[1;34m()\u001b[0m\n\u001b[0;32m 97\u001b[0m \u001b[0m_url\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0minput\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'검색하고자 하는 url을 입력해주세요: '\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 98\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'comment_list를 가져오는 중.....'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 99\u001b[1;33m \u001b[0mcList\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgetData\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_url\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 100\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'\\n'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 101\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'comment_list를 다 가져왔습니다!'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
40 + "\u001b[1;32m<ipython-input-3-aa9195667f4b>\u001b[0m in \u001b[0;36mgetData\u001b[1;34m(url)\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[0mdriver\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mwebdriver\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mChrome\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mwebDriver\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mchrome_options\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0moptions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 15\u001b[0m \u001b[1;31m#driver = webdriver.Chrome(webDriver)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 16\u001b[1;33m \u001b[0mdriver\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_url\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 17\u001b[0m \u001b[0mpageCnt\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[0mdriver\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mimplicitly_wait\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# 페이지가 다 로드 될때까지 기다리게함\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
41 + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py\u001b[0m in \u001b[0;36mget\u001b[1;34m(self, url)\u001b[0m\n\u001b[0;32m 331\u001b[0m \u001b[0mLoads\u001b[0m \u001b[0ma\u001b[0m \u001b[0mweb\u001b[0m \u001b[0mpage\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mcurrent\u001b[0m \u001b[0mbrowser\u001b[0m \u001b[0msession\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 332\u001b[0m \"\"\"\n\u001b[1;32m--> 333\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mCommand\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mGET\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m{\u001b[0m\u001b[1;34m'url'\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0murl\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 334\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 335\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
42 + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, driver_command, params)\u001b[0m\n\u001b[0;32m 319\u001b[0m \u001b[0mresponse\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcommand_executor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdriver_command\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 320\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mresponse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 321\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merror_handler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcheck_response\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 322\u001b[0m response['value'] = self._unwrap_value(\n\u001b[0;32m 323\u001b[0m response.get('value', None))\n",
43 + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\selenium\\webdriver\\remote\\errorhandler.py\u001b[0m in \u001b[0;36mcheck_response\u001b[1;34m(self, response)\u001b[0m\n\u001b[0;32m 240\u001b[0m \u001b[0malert_text\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'alert'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'text'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 241\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0malert_text\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 242\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 243\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 244\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_value_or_default\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdefault\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
44 + "\u001b[1;31mInvalidArgumentException\u001b[0m: Message: invalid argument\n (Session info: chrome=78.0.3904.108)\n"
45 + ]
46 + }
47 + ],
48 + "source": [
49 + "from selenium import webdriver\n",
50 + "from selenium.common import exceptions\n",
51 + "from bs4 import BeautifulSoup\n",
52 + "import time\n",
53 + "\n",
54 + "def getData(url):\n",
55 + " ## chrome option걸기 (headless하게 웹 크롤링 수행하기 위해<웹페이지 안보이게 하기>)\n",
56 + " options = webdriver.ChromeOptions()\n",
57 + " #options.add_argument('headless')\n",
58 + " #options.add_argument(\"disable-gpu\")\n",
59 + " #_url = \"https://entertain.naver.com/ranking/comment/list?oid=144&aid=0000642175\" # 크롤링할 URL\n",
60 + " _url = url # 크롤링할 URL\n",
61 + " webDriver = \"C:\\\\Users\\\\user\\\\Desktop\\\\chromedriver_win32\\\\chromedriver.exe\" # 내 웹드라이버 위치\n",
62 + " driver = webdriver.Chrome(webDriver,chrome_options=options)\n",
63 + " #driver = webdriver.Chrome(webDriver)\n",
64 + " driver.get(_url)\n",
65 + " pageCnt = 0\n",
66 + " driver.implicitly_wait(3) # 페이지가 다 로드 될때까지 기다리게함\n",
67 + " try:\n",
68 + " while True: # 댓글 페이지 끝날때까지 돌림\n",
69 + " #driver의 find_element_by_css_selector함수로 '네이버 뉴스'의 댓글 '더보기' 버튼을 찾아서 계속 클릭해준다(끝까지)\n",
70 + " driver.find_element_by_css_selector(\".u_cbox_btn_more\").click() \n",
71 + " pageCnt = pageCnt+1\n",
72 + " \n",
73 + " except exceptions.ElementNotVisibleException as e: # 페이지가 끝남\n",
74 + " pass\n",
75 + " \n",
76 + " except Exception as e: # 다른 예외 발생시 확인\n",
77 + " print(e)\n",
78 + " \n",
79 + " pageSource = driver.page_source # 페이지 소스를 따와서\n",
80 + " result = BeautifulSoup(pageSource, \"lxml\") # 빠르게 뽑아오기 위해 lxml 사용\n",
81 + "\n",
82 + " # nickname, text, time을 raw하게 뽑아온다\n",
83 + " comments_raw = result.find_all(\"span\", {\"class\" : \"u_cbox_contents\"})\n",
84 + " nicknames_raw = result.find_all(\"span\", {\"class\" : \"u_cbox_nick\"})\n",
85 + " times_raw = result.find_all(\"span\", {\"class\" : \"u_cbox_date\"})\n",
86 + "\n",
87 + " # nickname, text, time 값 만을 뽑아내어 리스트로 정리한다\n",
88 + " comments = [comment.text for comment in comments_raw]\n",
89 + " nicknames = [nickname.text for nickname in nicknames_raw]\n",
90 + " times = [time.text for time in times_raw]\n",
91 + " \n",
92 + " naverNewsList = []\n",
93 + " \n",
94 + " for i in range(len(comments)):\n",
95 + " info_dic = {'userID' : nicknames[i], 'comment' : comments[i], 'time' : times[i]}\n",
96 + " naverNewsList.append(info_dic)\n",
97 + " \n",
98 + " return naverNewsList\n",
99 + " #driver.quit()\n",
100 + " \n",
101 + "from time import sleep\n",
102 + "\n",
103 + "def print_cList(c_List) :\n",
104 + " for item in c_List :\n",
105 + " print(item)\n",
106 + "\n",
107 + "def search_by_author(c_List,user_ID) :\n",
108 + " result_List = []\n",
109 + " for item in c_List :\n",
110 + " print(item['userID'])\n",
111 + " if ( user_ID in item['userID']) :\n",
112 + " result_List.append(item)\n",
113 + " return result_List\n",
114 + "\n",
115 + "def search_by_keyword(c_List,keyword) :\n",
116 + " result_List = []\n",
117 + " for item in c_List :\n",
118 + " print(item['comment'])\n",
119 + " if ( keyword in item['comment']) :\n",
120 + " result_List.append(item)\n",
121 + " return result_List\n",
122 + " \n",
123 + "\n",
124 + "def search_by_time(c_List,_time) :\n",
125 + " result_List = []\n",
126 + " for item in c_List :\n",
127 + " print(item['time'])\n",
128 + " if ( keyword in item['comment']) :\n",
129 + " result_List.append(item)\n",
130 + " return result_List\n",
131 + " \n",
132 + "def main ():\n",
133 + " ## 시작화면\n",
134 + " \n",
135 + " _star = '*'\n",
136 + " print(_star.center(30,'*'))\n",
137 + " print('\\n')\n",
138 + " headString = '< Naver News Crawling >'\n",
139 + " print(headString.center(30,'*'))\n",
140 + " print('\\n')\n",
141 + " print(_star.center(30,'*'))\n",
142 + " \n",
143 + " \n",
144 + " # 검색하고자 하는 url을 입력받는다\n",
145 + " _url = input('검색하고자 하는 url을 입력해주세요: ')\n",
146 + " print('comment_list를 가져오는 중.....')\n",
147 + " cList = getData(_url)\n",
148 + " print('\\n')\n",
149 + " print('comment_list를 다 가져왔습니다!')\n",
150 + " \n",
151 + " while(True):\n",
152 + " print('***********************************')\n",
153 + " print('1.닉네임 기반 검색')\n",
154 + " print('2.키워드 기반 검색')\n",
155 + " print('3.작성시간 기반 검색')\n",
156 + " menu = input('메뉴를 입력해주세요: ')\n",
157 + " \n",
158 + " if(menu == 1):\n",
159 + " print('***********************************')\n",
160 + " inputID = input('검색할 닉네임 앞 4자리를 입력해주세요: ')\n",
161 + " search_by_author(cList,inputID)\n",
162 + " elif(menu == 2):\n",
163 + " print('***********************************')\n",
164 + " inputKW = input('검색할 키워드를 입력해주세요: ')\n",
165 + " search_by_keyword(cList,inputKW)\n",
166 + " else:\n",
167 + " print('***********************************')\n",
168 + " inputTime = input('검색할 시간대를 입력해주세요: ')\n",
169 + " search_by_time(cList,inputTime)\n",
170 + "\n",
171 + " \n",
172 + "main()"
173 + ]
174 + },
175 + {
176 + "cell_type": "code",
177 + "execution_count": null,
178 + "metadata": {},
179 + "outputs": [],
180 + "source": []
181 + }
182 + ],
183 + "metadata": {
184 + "kernelspec": {
185 + "display_name": "Python 3",
186 + "language": "python",
187 + "name": "python3"
188 + },
189 + "language_info": {
190 + "codemirror_mode": {
191 + "name": "ipython",
192 + "version": 3
193 + },
194 + "file_extension": ".py",
195 + "mimetype": "text/x-python",
196 + "name": "python",
197 + "nbconvert_exporter": "python",
198 + "pygments_lexer": "ipython3",
199 + "version": "3.7.3"
200 + }
201 + },
202 + "nbformat": 4,
203 + "nbformat_minor": 2
204 +}
......