임승현

Merge branch 'feature/CGV_Crawling' into 'master'

Feature/cgv crawling

First Merge from feature/CGV_Crawling to master

See merge request !16
1 +# Default ignored files
2 +/shelf/
3 +/workspace.xml
4 +# Editor-based HTTP Client requests
5 +/httpRequests/
6 +# Datasource local storage ignored files
7 +/dataSources/
8 +/dataSources.local.xml
1 +<component name="libraryTable">
2 + <library name="jsoup-1.15.1">
3 + <CLASSES>
4 + <root url="jar://$PROJECT_DIR$/jsoup-1.15.1.jar!/" />
5 + </CLASSES>
6 + <JAVADOC />
7 + <SOURCES />
8 + </library>
9 +</component>
...\ No newline at end of file ...\ No newline at end of file
1 +<?xml version="1.0" encoding="UTF-8"?>
2 +<project version="4">
3 + <component name="ProjectRootManager" version="2" languageLevel="JDK_15" default="true" project-jdk-name="15" project-jdk-type="JavaSDK">
4 + <output url="file://$PROJECT_DIR$/out" />
5 + </component>
6 +</project>
...\ No newline at end of file ...\ No newline at end of file
1 +<?xml version="1.0" encoding="UTF-8"?>
2 +<project version="4">
3 + <component name="ProjectModuleManager">
4 + <modules>
5 + <module fileurl="file://$PROJECT_DIR$/WebCrawling.iml" filepath="$PROJECT_DIR$/WebCrawling.iml" />
6 + </modules>
7 + </component>
8 +</project>
...\ No newline at end of file ...\ No newline at end of file
1 +<?xml version="1.0" encoding="UTF-8"?>
2 +<project version="4">
3 + <component name="VcsDirectoryMappings">
4 + <mapping directory="$PROJECT_DIR$/.." vcs="Git" />
5 + </component>
6 +</project>
...\ No newline at end of file ...\ No newline at end of file
1 +<?xml version="1.0" encoding="UTF-8"?>
2 +<module type="JAVA_MODULE" version="4">
3 + <component name="NewModuleRootManager" inherit-compiler-output="true">
4 + <exclude-output />
5 + <content url="file://$MODULE_DIR$">
6 + <sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
7 + </content>
8 + <orderEntry type="inheritedJdk" />
9 + <orderEntry type="sourceFolder" forTests="false" />
10 + <orderEntry type="library" name="jsoup-1.15.1" level="project" />
11 + </component>
12 +</module>
...\ No newline at end of file ...\ No newline at end of file
This file is too large to display.
No preview for this file type
1 +import org.jsoup.Jsoup;
2 +import org.jsoup.nodes.Document;
3 +import org.jsoup.nodes.Element;
4 +import org.jsoup.select.Elements;
5 +
6 +import java.awt.*;
7 +import java.io.*;
8 +import java.net.URI;
9 +import java.net.URISyntaxException;
10 +import java.util.*;
11 +import java.util.List;
12 +
13 +import org.openqa.selenium.By;
14 +import org.openqa.selenium.WebDriver;
15 +import org.openqa.selenium.WebElement;
16 +import org.openqa.selenium.chrome.ChromeDriver;
17 +import org.openqa.selenium.chrome.ChromeOptions;
18 +
19 +class CGVMovieInfo { //CGV 영화 정보를 담는 class
20 + private String title; //영화 제목
21 + private int rank; //CGV 내 예매율 순위
22 + private float score; //예매율
23 + private String GoldenEgg; //골든에그 지수
24 + private String movieCode; //CGV 고유 영화코드 - 예매 사이트 연결 시 사용
25 +
26 + public CGVMovieInfo(String title, int rank, float score, String GoldenEgg, String movieCode) {
27 + this.title = title;
28 + this.rank = rank;
29 + this.score = score;
30 + this.GoldenEgg = GoldenEgg;
31 + this.movieCode = movieCode;
32 + }
33 +
34 + public String getTitle() {
35 + return title;
36 + }
37 +
38 + public void setTitle(String title) {
39 + this.title = title;
40 + }
41 +
42 + public int getRank() {
43 + return rank;
44 + }
45 +
46 + public void setRank(int rank) {
47 + this.rank = rank;
48 + }
49 +
50 + public float getScore() {
51 + return score;
52 + }
53 +
54 + public void setScore(float score) {
55 + this.score = score;
56 + }
57 +
58 + public String getGoldenEgg() {
59 + return GoldenEgg;
60 + }
61 +
62 + public void setGoldenEgg(String goldenEgg) {
63 + GoldenEgg = goldenEgg;
64 + }
65 +
66 + public String getMovieCode() {
67 + return movieCode;
68 + }
69 +
70 + public void setMovieCode(String movieCode) {
71 + this.movieCode = movieCode;
72 + }
73 +
74 + public String getLink() {
75 + return String.format("https://www.cgv.co.kr/ticket/?MOVIE_CD=%s&MOVIE_CD_GROUP=%s", this.movieCode, this.movieCode);
76 + }
77 +
78 + public void printMovieInfo(){
79 + System.out.println("-------------------------------------------------------");
80 + System.out.println(this.rank + " : " + this.title);
81 + System.out.println("예매율 : " + this.score + "%");
82 + System.out.println("골든에그지수 : " + this.GoldenEgg);
83 + System.out.println("영화코드 : " + this.movieCode);
84 + System.out.println("-------------------------------------------------------");
85 + }
86 +
87 +}
88 +
89 +public class CGVExample {
90 +
91 + public static final String WEB_DRIVER_ID = "webdriver.chrome.driver"; //드라이버 ID
92 + public static final String WEB_DRIVER_PATH = "WebCrawling/chromedriver"; //드라이버 경로
93 +
94 + public static void main(String[] args) {
95 +
96 + Scanner scanner = new Scanner(System.in);
97 + String url_movies = "https://www.cgv.co.kr/movies/?lt=1&ft=1"; //끝의 쿼리 0은 개봉 전 영화도 포함하는 것. 예매율 순위 가져오기
98 + String url_theaters = "https://www.cgv.co.kr/theaters"; //영화관 정보 가져오는 링크.
99 + String url_ticketing = "https://www.cgv.co.kr/ticket/"; //상영중인 영화 정보 가져오는 링크.
100 +
101 + ArrayList<LinkedHashMap<String, String>> theaters = new ArrayList<>(); //지역별 영화관 HashMap(Key: 영화관, value:영화관별 고유코드)으로 이루어진 Arraylist
102 + ArrayList<CGVMovieInfo> Movies = new ArrayList<>(); //CGVMovieInfo 클래스의 인스턴스들을 원소로 가지는 Arraylist
103 +
104 + // 여기부터 영화관 및 영화관별 고유코드 가져오는 부분.
105 + try{ //드라이버 설정
106 + System.setProperty(WEB_DRIVER_ID,WEB_DRIVER_PATH);
107 + }catch (Exception e){
108 + e.printStackTrace();
109 + }
110 +
111 + ChromeOptions options = new ChromeOptions(); //크롬 설정을 담은 객체 생성
112 + options.addArguments("headless"); //브라우저가 눈에 보이지 않고 컴파일러 내부에서 작동됨.
113 +
114 + WebDriver driver_theaters = new ChromeDriver(options); //위에서 설정한 옵션을 파라미터로 넘겨주고, 드라이버 객체 생성.
115 + driver_theaters.get(url_theaters); //WebDriver 객체를 해당 URL로 이동시킨다.
116 +
117 + //브라우저 이동시 생기는 로드시간을 기다린다.
118 + //HTTP 응답속도 보다 자바의 컴파일 속도가 더 빠르기 때문에 임의적으로 1초를 대기한다.
119 + try {Thread.sleep(1000);} catch (InterruptedException e) {e.printStackTrace();}
120 +
121 + //영화관 및 영화관에 대응되는 영화관별 고유 코드 가져오기.
122 + List<WebElement> area = driver_theaters.findElements(By.className("area"));
123 + for (WebElement elem : area) {
124 + LinkedHashMap<String, String> theaters_info = new LinkedHashMap<>();
125 + List<WebElement> theaters_by_area = elem.findElements(By.tagName("a"));
126 + for (WebElement theater : theaters_by_area) {
127 + String theater_name = theater.getAttribute("title").replace("CGV", "");
128 + String theater_code = theater.getAttribute("href").replaceAll("(.+(?<=theaterCode=))|(.+(?<=theatercode=))", "").substring(0,4);
129 + theaters_info.put(theater_name, theater_code);
130 + }
131 + theaters.add(theaters_info);
132 + }
133 +
134 + try {
135 + driver_theaters.close(); //드라이버 연결 해제
136 + driver_theaters.quit(); //프로세스 종료
137 + } catch (Exception e) {
138 + throw new RuntimeException(e.getMessage());
139 + }
140 +
141 + //여기부터 예매율 순위 가져오는 부분
142 + Document doc_movies;
143 + try {
144 + doc_movies = Jsoup.connect(url_movies).get();
145 + //예매율 Top19까지의 영화의 정보를 가져옴.
146 + Elements elements1 = doc_movies.select("div.sect-movie-chart");
147 + Iterator<Element> rank = elements1.select("strong.rank").iterator();
148 + Iterator<Element> title = elements1.select("strong.title").iterator();
149 + Iterator<Element> score = elements1.select("strong.percent").iterator();
150 + Iterator<Element> GoldenEgg = elements1.select("span.percent").iterator();
151 + Iterator<Element> link = elements1.select("a.link-reservation").iterator();
152 +
153 + //영화 제목, 순위, 예매율, 영화 코드, 골든에그 지수를 가져와 CGVMovieInfo 객체 생성자에 파라미터로 넘겨주고, 인스턴스를 받아옴.
154 + while(title.hasNext()){
155 + String newTitle = title.next().text();
156 + int newRank = Integer.parseInt(rank.next().text().replace("No.",""));
157 + float newScore = Float.parseFloat(score.next().text().replace("예매율", "").replace("%", ""));
158 + String newCode = link.next().attr("href").replaceAll("[^0-9]", "").substring(0,8);
159 +
160 + CGVMovieInfo newMovie = new CGVMovieInfo(newTitle, newRank, newScore, GoldenEgg.next().text(), newCode);
161 + Movies.add(newMovie);
162 + }
163 + }catch(IOException e){
164 + e.printStackTrace();
165 + }
166 +
167 + for (CGVMovieInfo elem : Movies) {
168 + //elem.printMovieInfo();
169 + System.out.println(elem.getRank() + " : " + elem.getTitle());
170 + }
171 +
172 + //영화 이름(Integer 선택지), 영화관 지역 코드, 영화관 이름, 관람 일자 입력 시, (시간 선택 가능한) 예매 사이트로 이동.
173 + System.out.print("예매하고 싶은 영화의 순위를 입력하세요 : ");
174 + int inputRank = scanner.nextInt();
175 +
176 + System.out.print("지역 코드를 입력하세요 : ");
177 + int regionCode = scanner.nextInt();
178 +
179 + System.out.print("영화관명을 입력하세요 : ");
180 + String theaterName = scanner.next();
181 + String theaterCode = theaters.get(regionCode).get(theaterName);
182 +
183 + System.out.print("관람 일자를 입력하세요 : ");
184 + int date = scanner.nextInt();
185 +
186 + String otherFormat = String.format("THEATER_CD=%s&PLAY_YMD=%s", theaterCode, date);
187 + url_ticketing += ("?" + otherFormat);
188 +
189 + //예매 가능한 영화 리스트를 얻기 위해 빠른 예매 사이트로 이동.
190 + WebDriver driver_ticketing = new ChromeDriver();
191 + driver_ticketing.get(url_ticketing);
192 + try {Thread.sleep(1000);} catch (InterruptedException e) {e.printStackTrace();}
193 +
194 + //Frame 전환
195 + WebElement selecting_area = driver_ticketing.switchTo().frame("ticket_iframe").findElement(By.className("theater-area-list"));
196 + List<WebElement> selected_areas_list = selecting_area.findElements(By.cssSelector("ul > li > a > span.name"));
197 +
198 + //지역 코드에 맞게 list element click
199 + selected_areas_list.get(regionCode).click();
200 + try {Thread.sleep(1000);} catch (InterruptedException e) {e.printStackTrace();}
201 +
202 + //선택한 지역에 대응되는 영화관 정보 가져오기
203 + WebElement selecting_theaters = selecting_area.findElement(By.cssSelector("ul > li.selected > div > ul"));
204 + List<WebElement> selected_theaters_list = selecting_theaters.findElements(By.tagName("li"));
205 +
206 + //프로그램 내부에서 가지고 있는 영화관코드와 웹에서 받아온 영화관코드가 일치하는 경우, selected_theaters_list element 클릭
207 + for(WebElement theater_element : selected_theaters_list) {
208 + if(theater_element.getAttribute("theater_cd").equals(theaterCode)){
209 + theater_element.click();
210 + try {Thread.sleep(1000);} catch (InterruptedException e) {e.printStackTrace();}
211 + break;
212 + }
213 + }
214 +
215 + //선택한 영화관에서, 선택한 일자에 상영하는 영화 목록 들고오기
216 + WebElement selecting_movies = driver_ticketing.findElement(By.className("movie-select"));
217 + List<WebElement> selected_movies_list = selecting_movies.findElements(By.cssSelector("#movie_list > ul > li"));
218 + LinkedHashMap<String, String> accessible_movies = new LinkedHashMap<>();
219 +
220 + //선택불가를 제외한 영화 제목 출력
221 + for(WebElement movie_element : selected_movies_list){
222 + String movie_enabled = movie_element.getAttribute("class");
223 + if(movie_enabled.endsWith("dimmed"))
224 + break;
225 + else{
226 + String title = movie_element.findElement(By.cssSelector("span.text")).getText();
227 + String code = movie_element.getAttribute("movie_cd_group");
228 + accessible_movies.put(title, code);
229 + System.out.println(title + " : " + code);
230 + }
231 + }
232 +
233 + try{
234 + driver_ticketing.close(); //드라이버 연결 해제
235 + driver_ticketing.quit(); //프로세스 종료
236 + } catch (Exception e) {
237 + throw new RuntimeException(e.getMessage());
238 + }
239 +
240 + try{
241 + Desktop.getDesktop().browse(new URI(Movies.get(inputRank - 1).getLink() + "&" + otherFormat));
242 + }
243 + catch(IndexOutOfBoundsException | URISyntaxException | IOException e){
244 + System.out.println(e.getClass());
245 + }
246 + }
247 +}