신은섭(Shin Eun Seop)

add build script and update README.md

now you can use `aggregation.jar` and `desicionTree.jar` after run `mvn package`

Java-Cesco/Detecting_fraud_clicks/#10
...@@ -16,5 +16,15 @@ sudo sed -i s/\$releasever/6/g /etc/yum.repos.d/epel-apache-maven.repo ...@@ -16,5 +16,15 @@ sudo sed -i s/\$releasever/6/g /etc/yum.repos.d/epel-apache-maven.repo
16 sudo yum install -y apache-maven 16 sudo yum install -y apache-maven
17 mvn --version 17 mvn --version
18 18
19 +# clone repo
20 +git clone https://github.com/Java-Cesco/Detecting_fraud_clicks.git
21 +cd Detecting_fraud_clicks
22 +
23 +# maven build
24 +mvn package
25 +
26 +# run
27 +java -jar target/assembly/Detecting_fraud_clicks-aggregation.jar train_sample.csv agg_data
28 +java -jar target/assembly/Detecting_fraud_clicks-decisionTree.jar agg_data
19 29
20 ``` 30 ```
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -37,8 +37,59 @@ ...@@ -37,8 +37,59 @@
37 <plugins> 37 <plugins>
38 <plugin> 38 <plugin>
39 <groupId>org.apache.maven.plugins</groupId> 39 <groupId>org.apache.maven.plugins</groupId>
40 - <artifactId>maven-compiler-plugin</artifactId> 40 + <artifactId>maven-shade-plugin</artifactId>
41 - <version>3.7.0</version> 41 + <executions>
42 + <execution>
43 + <id>aggregation</id>
44 + <goals>
45 + <goal>shade</goal>
46 + </goals>
47 + <configuration>
48 + <outputFile>target/assembly/${project.artifactId}-aggregation.jar</outputFile>
49 + <shadedArtifactAttached>true</shadedArtifactAttached>
50 + <transformers>
51 + <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
52 + <mainClass>detact.Aggregation</mainClass>
53 + </transformer>
54 + </transformers>
55 + <filters>
56 + <filter>
57 + <artifact>*:*</artifact>
58 + <excludes>
59 + <exclude>META-INF/*.SF</exclude>
60 + <exclude>META-INF/*.DSA</exclude>
61 + <exclude>META-INF/*.RSA</exclude>
62 + </excludes>
63 + </filter>
64 + </filters>
65 + </configuration>
66 + </execution>
67 + <execution>
68 + <id>decisionTree</id>
69 + <goals>
70 + <goal>shade</goal>
71 + </goals>
72 + <configuration>
73 + <outputFile>target/assembly/${project.artifactId}-decisionTree.jar</outputFile>
74 + <shadedArtifactAttached>true</shadedArtifactAttached>
75 + <transformers>
76 + <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
77 + <mainClass>detact.ML.DecisionTree</mainClass>
78 + </transformer>
79 + </transformers>
80 + <filters>
81 + <filter>
82 + <artifact>*:*</artifact>
83 + <excludes>
84 + <exclude>META-INF/*.SF</exclude>
85 + <exclude>META-INF/*.DSA</exclude>
86 + <exclude>META-INF/*.RSA</exclude>
87 + </excludes>
88 + </filter>
89 + </filters>
90 + </configuration>
91 + </execution>
92 + </executions>
42 </plugin> 93 </plugin>
43 </plugins> 94 </plugins>
44 </build> 95 </build>
......
...@@ -9,12 +9,17 @@ import org.apache.spark.sql.expressions.WindowSpec; ...@@ -9,12 +9,17 @@ import org.apache.spark.sql.expressions.WindowSpec;
9 import static org.apache.spark.sql.functions.*; 9 import static org.apache.spark.sql.functions.*;
10 10
11 public class Aggregation { 11 public class Aggregation {
12 -
13 - public static String AGGREGATED_PATH = "agg_data";
14 - public static String ORIGINAL_DATA_PATH = "train_sample.csv";
15 12
16 public static void main(String[] args) { 13 public static void main(String[] args) {
17 - 14 +
15 + if (args.length != 2) {
16 + System.out.println("Usage: java -jar aggregation.jar <data_path> <result_path>");
17 + System.exit(0);
18 + }
19 +
20 + String data_path = args[0];
21 + String result_path = args[1];
22 +
18 //Create Session 23 //Create Session
19 SparkSession spark = SparkSession 24 SparkSession spark = SparkSession
20 .builder() 25 .builder()
...@@ -25,7 +30,7 @@ public class Aggregation { ...@@ -25,7 +30,7 @@ public class Aggregation {
25 // detact.Aggregation 30 // detact.Aggregation
26 Aggregation agg = new Aggregation(); 31 Aggregation agg = new Aggregation();
27 32
28 - Dataset<Row> dataset = Utill.loadCSVDataSet(Aggregation.ORIGINAL_DATA_PATH, spark); 33 + Dataset<Row> dataset = Utill.loadCSVDataSet(data_path, spark);
29 dataset = agg.changeTimestempToLong(dataset); 34 dataset = agg.changeTimestempToLong(dataset);
30 dataset = agg.averageValidClickCount(dataset); 35 dataset = agg.averageValidClickCount(dataset);
31 dataset = agg.clickTimeDelta(dataset); 36 dataset = agg.clickTimeDelta(dataset);
...@@ -35,7 +40,7 @@ public class Aggregation { ...@@ -35,7 +40,7 @@ public class Aggregation {
35 dataset.where("ip == '5348' and app == '19'").show(10); 40 dataset.where("ip == '5348' and app == '19'").show(10);
36 41
37 // Save to scv 42 // Save to scv
38 - Utill.saveCSVDataSet(dataset, Aggregation.AGGREGATED_PATH); 43 + Utill.saveCSVDataSet(dataset, result_path);
39 } 44 }
40 45
41 private Dataset<Row> changeTimestempToLong(Dataset<Row> dataset){ 46 private Dataset<Row> changeTimestempToLong(Dataset<Row> dataset){
......
...@@ -21,7 +21,14 @@ import org.apache.spark.sql.SparkSession; ...@@ -21,7 +21,14 @@ import org.apache.spark.sql.SparkSession;
21 public class DecisionTree { 21 public class DecisionTree {
22 22
23 public static void main(String[] args) throws Exception { 23 public static void main(String[] args) throws Exception {
24 - 24 +
25 + if (args.length != 1) {
26 + System.out.println("Usage: java -jar decisionTree.jar <agg_path>");
27 + System.exit(0);
28 + }
29 +
30 + String agg_path = args[0];
31 +
25 //Create Session 32 //Create Session
26 SparkSession spark = SparkSession 33 SparkSession spark = SparkSession
27 .builder() 34 .builder()
...@@ -30,7 +37,7 @@ public class DecisionTree { ...@@ -30,7 +37,7 @@ public class DecisionTree {
30 .getOrCreate(); 37 .getOrCreate();
31 38
32 // load aggregated dataset 39 // load aggregated dataset
33 - Dataset<Row> resultds = Utill.loadCSVDataSet(Aggregation.AGGREGATED_PATH, spark); 40 + Dataset<Row> resultds = Utill.loadCSVDataSet(agg_path, spark);
34 41
35 // show Dataset schema 42 // show Dataset schema
36 // System.out.println("schema start"); 43 // System.out.println("schema start");
......