장준영

화일 통합 및 UI 틀 작성

1 -{
2 - "cells": [
3 - {
4 - "cell_type": "code",
5 - "execution_count": 2,
6 - "metadata": {},
7 - "outputs": [
8 - {
9 - "name": "stdout",
10 - "output_type": "stream",
11 - "text": [
12 - "******************************\n",
13 - "\n",
14 - "\n",
15 - "***< Naver News Crawling >****\n",
16 - "\n",
17 - "\n",
18 - "******************************\n",
19 - "검색하고자 하는 url을 입력해주세요: https://entertain.naver.com/ranking/comment/list?oid=144&aid=0000642175\n",
20 - "comment_list를 가져오는 중.....\n",
21 - "Message: element not interactable\n",
22 - " (Session info: chrome=78.0.3904.97)\n",
23 - "\n",
24 - "[{'userID': 'ydja****', 'comment': '옹벤져스 너무웃겨', 'time': '6일 전'}, {'userID': 'kims****', 'comment': '사랑해요 옹벤져스! 준기엄마 다리 찢을 때 웃겨죽는 줄 진짜 츤데레언니들', 'time': '6일 전'}, {'userID': 'hoho****', 'comment': '옹벤져스가 다른 마을 살인마 잡는 이야기로 시즌 2. 갑시다', 'time': '6일 전'}]\n",
25 - "comment_list를 다 가져왔습니다!\n"
26 - ]
27 - }
28 - ],
29 - "source": [
30 - "import naverNews_crawling \n",
31 - "from time import sleep\n",
32 - "\n",
33 - "def print_cList(c_List) :\n",
34 - " for item in c_List :\n",
35 - " print(item)\n",
36 - "\n",
37 - "def search_by_author(c_List,user_ID) :\n",
38 - " result_List = []\n",
39 - " for item in c_List :\n",
40 - " print(item['userID'])\n",
41 - " if ( user_ID in item['userID']) :\n",
42 - " result_List.append(item)\n",
43 - " return result_List\n",
44 - "\n",
45 - "def search_by_keyword(c_List,keyword) :\n",
46 - " result_List = []\n",
47 - " for item in c_List :\n",
48 - " print(item['comment'])\n",
49 - " if ( keyword in item['comment']) :\n",
50 - " result_List.append(item)\n",
51 - " return result_List\n",
52 - " \n",
53 - "'''\n",
54 - "def search_by_time(c_List,_time) :\n",
55 - " result_List = []\n",
56 - " for item in c_List :\n",
57 - " print(item['time'])\n",
58 - " if ( keyword in item['comment']) :\n",
59 - " result_List.append(item)\n",
60 - " return result_List \n",
61 - " \n",
62 - "''' \n",
63 - "\n",
64 - "def main ():\n",
65 - " ## 시작화면\n",
66 - " \n",
67 - " _star = '*'\n",
68 - " print(_star.center(30,'*'))\n",
69 - " print('\\n')\n",
70 - " headString = '< Naver News Crawling >'\n",
71 - " print(headString.center(30,'*'))\n",
72 - " print('\\n')\n",
73 - " print(_star.center(30,'*'))\n",
74 - " \n",
75 - " \n",
76 - " # 검색하고자 하는 url을 입력받는다\n",
77 - " _url = input('검색하고자 하는 url을 입력해주세요: ')\n",
78 - " print('comment_list를 가져오는 중.....')\n",
79 - " cList = naverNews_crawling.getData(_url)\n",
80 - " print('comment_list를 다 가져왔습니다!')\n",
81 - "\n",
82 - "main()"
83 - ]
84 - }
85 - ],
86 - "metadata": {
87 - "kernelspec": {
88 - "display_name": "Python 3",
89 - "language": "python",
90 - "name": "python3"
91 - },
92 - "language_info": {
93 - "codemirror_mode": {
94 - "name": "ipython",
95 - "version": 3
96 - },
97 - "file_extension": ".py",
98 - "mimetype": "text/x-python",
99 - "name": "python",
100 - "nbconvert_exporter": "python",
101 - "pygments_lexer": "ipython3",
102 - "version": "3.7.3"
103 - }
104 - },
105 - "nbformat": 4,
106 - "nbformat_minor": 2
107 -}
...@@ -24,4 +24,9 @@ ...@@ -24,4 +24,9 @@
24 data를 get하여 정제하는 파일을 모듈로 분리해 내어 list형태로 저장된 데이터셋을 반환하여 24 data를 get하여 정제하는 파일을 모듈로 분리해 내어 list형태로 저장된 데이터셋을 반환하여
25 main 에서 사용할 수 있도록 한다. 이 후 main에서 리스트를 받아와 url을 입력받아 데이터를 25 main 에서 사용할 수 있도록 한다. 이 후 main에서 리스트를 받아와 url을 입력받아 데이터를
26 받아오는 방식으로 사용한다. 이 후, keyword기반, id기반, 시간대 기반 검색 함수를 구현하였고 26 받아오는 방식으로 사용한다. 이 후, keyword기반, id기반, 시간대 기반 검색 함수를 구현하였고
27 - 시간대별 검색 함수의 기능 보강과 가장 자주 나온 단어 검색 기능을 추가 구현할 예정이다.
...\ No newline at end of file ...\ No newline at end of file
27 + 시간대별 검색 함수의 기능 보강과 가장 자주 나온 단어 검색 기능을 추가 구현할 예정이다.
28 +
29 +* 4차 수정사항
30 +
31 + 기존파일의 분라 관리 시, import관련 오류 문제 해결 완료(하나의 파일로 관리)
32 + 사용자 UI의 틀을 구축해놓았고, 곧바로 함수별 추가 세부 구현 예정
...\ No newline at end of file ...\ No newline at end of file
......
1 -from selenium import webdriver
2 -from selenium.common import exceptions
3 -from bs4 import BeautifulSoup
4 -import time
5 -
6 -def getData(url):
7 - ## chrome option걸기 (headless하게 웹 크롤링 수행하기 위해<웹페이지 안보이게 하기>)
8 - options = webdriver.ChromeOptions()
9 - #options.add_argument('headless')
10 - #options.add_argument("disable-gpu")
11 - #_url = "https://entertain.naver.com/ranking/comment/list?oid=144&aid=0000642175" # 크롤링할 URL
12 - _url = url # 크롤링할 URL
13 - webDriver = "C:\\Users\\user\\Desktop\\chromedriver_win32\\chromedriver.exe" # 내 웹드라이버 위치
14 - driver = webdriver.Chrome(webDriver,chrome_options=options)
15 - #driver = webdriver.Chrome(webDriver)
16 - driver.get(_url)
17 - pageCnt = 0
18 - driver.implicitly_wait(3) # 페이지가 다 로드 될때까지 기다리게함
19 - try:
20 - while True: # 댓글 페이지 끝날때까지 돌림
21 - #driver의 find_element_by_css_selector함수로 '네이버 뉴스'의 댓글 '더보기' 버튼을 찾아서 계속 클릭해준다(끝까지)
22 - driver.find_element_by_css_selector(".u_cbox_btn_more").click()
23 - pageCnt = pageCnt+1
24 -
25 - except exceptions.ElementNotVisibleException as e: # 페이지가 끝남
26 - pass
27 -
28 - except Exception as e: # 다른 예외 발생시 확인
29 - print(e)
30 -
31 - pageSource = driver.page_source # 페이지 소스를 따와서
32 - result = BeautifulSoup(pageSource, "lxml") # 빠르게 뽑아오기 위해 lxml 사용
33 -
34 - # nickname, text, time을 raw하게 뽑아온다
35 - comments_raw = result.find_all("span", {"class" : "u_cbox_contents"})
36 - nicknames_raw = result.find_all("span", {"class" : "u_cbox_nick"})
37 - times_raw = result.find_all("span", {"class" : "u_cbox_date"})
38 -
39 - # nickname, text, time 값 만을 뽑아내어 리스트로 정리한다
40 - comments = [comment.text for comment in comments_raw]
41 - nicknames = [nickname.text for nickname in nicknames_raw]
42 - times = [time.text for time in times_raw]
43 -
44 - naverNewsList = []
45 -
46 - for i in range(len(comments)):
47 - info_dic = {'userID' : nicknames[i], 'comment' : comments[i], 'time' : times[i]}
48 - naverNewsList.append(info_dic)
49 -
50 - print(naverNewsList[:3])
51 -
52 - return naverNewsList
53 - #driver.quit()
...\ No newline at end of file ...\ No newline at end of file
1 +{
2 + "cells": [
3 + {
4 + "cell_type": "code",
5 + "execution_count": 3,
6 + "metadata": {},
7 + "outputs": [
8 + {
9 + "name": "stdout",
10 + "output_type": "stream",
11 + "text": [
12 + "******************************\n",
13 + "\n",
14 + "\n",
15 + "***< Naver News Crawling >****\n",
16 + "\n",
17 + "\n",
18 + "******************************\n",
19 + "검색하고자 하는 url을 입력해주세요: \n",
20 + "comment_list를 가져오는 중.....\n"
21 + ]
22 + },
23 + {
24 + "name": "stderr",
25 + "output_type": "stream",
26 + "text": [
27 + "C:\\Users\\user\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:14: DeprecationWarning: use options instead of chrome_options\n",
28 + " \n"
29 + ]
30 + },
31 + {
32 + "ename": "InvalidArgumentException",
33 + "evalue": "Message: invalid argument\n (Session info: chrome=78.0.3904.108)\n",
34 + "output_type": "error",
35 + "traceback": [
36 + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
37 + "\u001b[1;31mInvalidArgumentException\u001b[0m Traceback (most recent call last)",
38 + "\u001b[1;32m<ipython-input-3-aa9195667f4b>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 113\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 114\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 115\u001b[1;33m \u001b[0mmain\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
39 + "\u001b[1;32m<ipython-input-3-aa9195667f4b>\u001b[0m in \u001b[0;36mmain\u001b[1;34m()\u001b[0m\n\u001b[0;32m 97\u001b[0m \u001b[0m_url\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0minput\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'검색하고자 하는 url을 입력해주세요: '\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 98\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'comment_list를 가져오는 중.....'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 99\u001b[1;33m \u001b[0mcList\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgetData\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_url\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 100\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'\\n'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 101\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'comment_list를 다 가져왔습니다!'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
40 + "\u001b[1;32m<ipython-input-3-aa9195667f4b>\u001b[0m in \u001b[0;36mgetData\u001b[1;34m(url)\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[0mdriver\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mwebdriver\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mChrome\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mwebDriver\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mchrome_options\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0moptions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 15\u001b[0m \u001b[1;31m#driver = webdriver.Chrome(webDriver)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 16\u001b[1;33m \u001b[0mdriver\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_url\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 17\u001b[0m \u001b[0mpageCnt\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[0mdriver\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mimplicitly_wait\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# 페이지가 다 로드 될때까지 기다리게함\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
41 + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py\u001b[0m in \u001b[0;36mget\u001b[1;34m(self, url)\u001b[0m\n\u001b[0;32m 331\u001b[0m \u001b[0mLoads\u001b[0m \u001b[0ma\u001b[0m \u001b[0mweb\u001b[0m \u001b[0mpage\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mcurrent\u001b[0m \u001b[0mbrowser\u001b[0m \u001b[0msession\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 332\u001b[0m \"\"\"\n\u001b[1;32m--> 333\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mCommand\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mGET\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m{\u001b[0m\u001b[1;34m'url'\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0murl\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 334\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 335\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
42 + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, driver_command, params)\u001b[0m\n\u001b[0;32m 319\u001b[0m \u001b[0mresponse\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcommand_executor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdriver_command\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 320\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mresponse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 321\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merror_handler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcheck_response\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 322\u001b[0m response['value'] = self._unwrap_value(\n\u001b[0;32m 323\u001b[0m response.get('value', None))\n",
43 + "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\selenium\\webdriver\\remote\\errorhandler.py\u001b[0m in \u001b[0;36mcheck_response\u001b[1;34m(self, response)\u001b[0m\n\u001b[0;32m 240\u001b[0m \u001b[0malert_text\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'alert'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'text'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 241\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0malert_text\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 242\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 243\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 244\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_value_or_default\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdefault\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
44 + "\u001b[1;31mInvalidArgumentException\u001b[0m: Message: invalid argument\n (Session info: chrome=78.0.3904.108)\n"
45 + ]
46 + }
47 + ],
48 + "source": [
49 + "from selenium import webdriver\n",
50 + "from selenium.common import exceptions\n",
51 + "from bs4 import BeautifulSoup\n",
52 + "import time\n",
53 + "\n",
54 + "def getData(url):\n",
55 + " ## chrome option걸기 (headless하게 웹 크롤링 수행하기 위해<웹페이지 안보이게 하기>)\n",
56 + " options = webdriver.ChromeOptions()\n",
57 + " #options.add_argument('headless')\n",
58 + " #options.add_argument(\"disable-gpu\")\n",
59 + " #_url = \"https://entertain.naver.com/ranking/comment/list?oid=144&aid=0000642175\" # 크롤링할 URL\n",
60 + " _url = url # 크롤링할 URL\n",
61 + " webDriver = \"C:\\\\Users\\\\user\\\\Desktop\\\\chromedriver_win32\\\\chromedriver.exe\" # 내 웹드라이버 위치\n",
62 + " driver = webdriver.Chrome(webDriver,chrome_options=options)\n",
63 + " #driver = webdriver.Chrome(webDriver)\n",
64 + " driver.get(_url)\n",
65 + " pageCnt = 0\n",
66 + " driver.implicitly_wait(3) # 페이지가 다 로드 될때까지 기다리게함\n",
67 + " try:\n",
68 + " while True: # 댓글 페이지 끝날때까지 돌림\n",
69 + " #driver의 find_element_by_css_selector함수로 '네이버 뉴스'의 댓글 '더보기' 버튼을 찾아서 계속 클릭해준다(끝까지)\n",
70 + " driver.find_element_by_css_selector(\".u_cbox_btn_more\").click() \n",
71 + " pageCnt = pageCnt+1\n",
72 + " \n",
73 + " except exceptions.ElementNotVisibleException as e: # 페이지가 끝남\n",
74 + " pass\n",
75 + " \n",
76 + " except Exception as e: # 다른 예외 발생시 확인\n",
77 + " print(e)\n",
78 + " \n",
79 + " pageSource = driver.page_source # 페이지 소스를 따와서\n",
80 + " result = BeautifulSoup(pageSource, \"lxml\") # 빠르게 뽑아오기 위해 lxml 사용\n",
81 + "\n",
82 + " # nickname, text, time을 raw하게 뽑아온다\n",
83 + " comments_raw = result.find_all(\"span\", {\"class\" : \"u_cbox_contents\"})\n",
84 + " nicknames_raw = result.find_all(\"span\", {\"class\" : \"u_cbox_nick\"})\n",
85 + " times_raw = result.find_all(\"span\", {\"class\" : \"u_cbox_date\"})\n",
86 + "\n",
87 + " # nickname, text, time 값 만을 뽑아내어 리스트로 정리한다\n",
88 + " comments = [comment.text for comment in comments_raw]\n",
89 + " nicknames = [nickname.text for nickname in nicknames_raw]\n",
90 + " times = [time.text for time in times_raw]\n",
91 + " \n",
92 + " naverNewsList = []\n",
93 + " \n",
94 + " for i in range(len(comments)):\n",
95 + " info_dic = {'userID' : nicknames[i], 'comment' : comments[i], 'time' : times[i]}\n",
96 + " naverNewsList.append(info_dic)\n",
97 + " \n",
98 + " return naverNewsList\n",
99 + " #driver.quit()\n",
100 + " \n",
101 + "from time import sleep\n",
102 + "\n",
103 + "def print_cList(c_List) :\n",
104 + " for item in c_List :\n",
105 + " print(item)\n",
106 + "\n",
107 + "def search_by_author(c_List,user_ID) :\n",
108 + " result_List = []\n",
109 + " for item in c_List :\n",
110 + " print(item['userID'])\n",
111 + " if ( user_ID in item['userID']) :\n",
112 + " result_List.append(item)\n",
113 + " return result_List\n",
114 + "\n",
115 + "def search_by_keyword(c_List,keyword) :\n",
116 + " result_List = []\n",
117 + " for item in c_List :\n",
118 + " print(item['comment'])\n",
119 + " if ( keyword in item['comment']) :\n",
120 + " result_List.append(item)\n",
121 + " return result_List\n",
122 + " \n",
123 + "\n",
124 + "def search_by_time(c_List,_time) :\n",
125 + " result_List = []\n",
126 + " for item in c_List :\n",
127 + " print(item['time'])\n",
128 + " if ( keyword in item['comment']) :\n",
129 + " result_List.append(item)\n",
130 + " return result_List\n",
131 + " \n",
132 + "def main ():\n",
133 + " ## 시작화면\n",
134 + " \n",
135 + " _star = '*'\n",
136 + " print(_star.center(30,'*'))\n",
137 + " print('\\n')\n",
138 + " headString = '< Naver News Crawling >'\n",
139 + " print(headString.center(30,'*'))\n",
140 + " print('\\n')\n",
141 + " print(_star.center(30,'*'))\n",
142 + " \n",
143 + " \n",
144 + " # 검색하고자 하는 url을 입력받는다\n",
145 + " _url = input('검색하고자 하는 url을 입력해주세요: ')\n",
146 + " print('comment_list를 가져오는 중.....')\n",
147 + " cList = getData(_url)\n",
148 + " print('\\n')\n",
149 + " print('comment_list를 다 가져왔습니다!')\n",
150 + " \n",
151 + " while(True):\n",
152 + " print('***********************************')\n",
153 + " print('1.닉네임 기반 검색')\n",
154 + " print('2.키워드 기반 검색')\n",
155 + " print('3.작성시간 기반 검색')\n",
156 + " menu = input('메뉴를 입력해주세요: ')\n",
157 + " \n",
158 + " if(menu == 1):\n",
159 + " print('***********************************')\n",
160 + " inputID = input('검색할 닉네임 앞 4자리를 입력해주세요: ')\n",
161 + " search_by_author(cList,inputID)\n",
162 + " elif(menu == 2):\n",
163 + " print('***********************************')\n",
164 + " inputKW = input('검색할 키워드를 입력해주세요: ')\n",
165 + " search_by_keyword(cList,inputKW)\n",
166 + " else:\n",
167 + " print('***********************************')\n",
168 + " inputTime = input('검색할 시간대를 입력해주세요: ')\n",
169 + " search_by_time(cList,inputTime)\n",
170 + "\n",
171 + " \n",
172 + "main()"
173 + ]
174 + },
175 + {
176 + "cell_type": "code",
177 + "execution_count": null,
178 + "metadata": {},
179 + "outputs": [],
180 + "source": []
181 + }
182 + ],
183 + "metadata": {
184 + "kernelspec": {
185 + "display_name": "Python 3",
186 + "language": "python",
187 + "name": "python3"
188 + },
189 + "language_info": {
190 + "codemirror_mode": {
191 + "name": "ipython",
192 + "version": 3
193 + },
194 + "file_extension": ".py",
195 + "mimetype": "text/x-python",
196 + "name": "python",
197 + "nbconvert_exporter": "python",
198 + "pygments_lexer": "ipython3",
199 + "version": "3.7.3"
200 + }
201 + },
202 + "nbformat": 4,
203 + "nbformat_minor": 2
204 +}
......