add build script and update README.md
now you can use `aggregation.jar` and `desicionTree.jar` after run `mvn package` Java-Cesco/Detecting_fraud_clicks/#10
Showing
4 changed files
with
83 additions
and
10 deletions
... | @@ -16,5 +16,15 @@ sudo sed -i s/\$releasever/6/g /etc/yum.repos.d/epel-apache-maven.repo | ... | @@ -16,5 +16,15 @@ sudo sed -i s/\$releasever/6/g /etc/yum.repos.d/epel-apache-maven.repo |
16 | sudo yum install -y apache-maven | 16 | sudo yum install -y apache-maven |
17 | mvn --version | 17 | mvn --version |
18 | 18 | ||
19 | +# clone repo | ||
20 | +git clone https://github.com/Java-Cesco/Detecting_fraud_clicks.git | ||
21 | +cd Detecting_fraud_clicks | ||
22 | + | ||
23 | +# maven build | ||
24 | +mvn package | ||
25 | + | ||
26 | +# run | ||
27 | +java -jar target/assembly/Detecting_fraud_clicks-aggregation.jar train_sample.csv agg_data | ||
28 | +java -jar target/assembly/Detecting_fraud_clicks-decisionTree.jar agg_data | ||
19 | 29 | ||
20 | ``` | 30 | ``` |
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
... | @@ -37,8 +37,59 @@ | ... | @@ -37,8 +37,59 @@ |
37 | <plugins> | 37 | <plugins> |
38 | <plugin> | 38 | <plugin> |
39 | <groupId>org.apache.maven.plugins</groupId> | 39 | <groupId>org.apache.maven.plugins</groupId> |
40 | - <artifactId>maven-compiler-plugin</artifactId> | 40 | + <artifactId>maven-shade-plugin</artifactId> |
41 | - <version>3.7.0</version> | 41 | + <executions> |
42 | + <execution> | ||
43 | + <id>aggregation</id> | ||
44 | + <goals> | ||
45 | + <goal>shade</goal> | ||
46 | + </goals> | ||
47 | + <configuration> | ||
48 | + <outputFile>target/assembly/${project.artifactId}-aggregation.jar</outputFile> | ||
49 | + <shadedArtifactAttached>true</shadedArtifactAttached> | ||
50 | + <transformers> | ||
51 | + <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> | ||
52 | + <mainClass>detact.Aggregation</mainClass> | ||
53 | + </transformer> | ||
54 | + </transformers> | ||
55 | + <filters> | ||
56 | + <filter> | ||
57 | + <artifact>*:*</artifact> | ||
58 | + <excludes> | ||
59 | + <exclude>META-INF/*.SF</exclude> | ||
60 | + <exclude>META-INF/*.DSA</exclude> | ||
61 | + <exclude>META-INF/*.RSA</exclude> | ||
62 | + </excludes> | ||
63 | + </filter> | ||
64 | + </filters> | ||
65 | + </configuration> | ||
66 | + </execution> | ||
67 | + <execution> | ||
68 | + <id>decisionTree</id> | ||
69 | + <goals> | ||
70 | + <goal>shade</goal> | ||
71 | + </goals> | ||
72 | + <configuration> | ||
73 | + <outputFile>target/assembly/${project.artifactId}-decisionTree.jar</outputFile> | ||
74 | + <shadedArtifactAttached>true</shadedArtifactAttached> | ||
75 | + <transformers> | ||
76 | + <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> | ||
77 | + <mainClass>detact.ML.DecisionTree</mainClass> | ||
78 | + </transformer> | ||
79 | + </transformers> | ||
80 | + <filters> | ||
81 | + <filter> | ||
82 | + <artifact>*:*</artifact> | ||
83 | + <excludes> | ||
84 | + <exclude>META-INF/*.SF</exclude> | ||
85 | + <exclude>META-INF/*.DSA</exclude> | ||
86 | + <exclude>META-INF/*.RSA</exclude> | ||
87 | + </excludes> | ||
88 | + </filter> | ||
89 | + </filters> | ||
90 | + </configuration> | ||
91 | + </execution> | ||
92 | + </executions> | ||
42 | </plugin> | 93 | </plugin> |
43 | </plugins> | 94 | </plugins> |
44 | </build> | 95 | </build> | ... | ... |
... | @@ -9,12 +9,17 @@ import org.apache.spark.sql.expressions.WindowSpec; | ... | @@ -9,12 +9,17 @@ import org.apache.spark.sql.expressions.WindowSpec; |
9 | import static org.apache.spark.sql.functions.*; | 9 | import static org.apache.spark.sql.functions.*; |
10 | 10 | ||
11 | public class Aggregation { | 11 | public class Aggregation { |
12 | - | ||
13 | - public static String AGGREGATED_PATH = "agg_data"; | ||
14 | - public static String ORIGINAL_DATA_PATH = "train_sample.csv"; | ||
15 | 12 | ||
16 | public static void main(String[] args) { | 13 | public static void main(String[] args) { |
17 | - | 14 | + |
15 | + if (args.length != 2) { | ||
16 | + System.out.println("Usage: java -jar aggregation.jar <data_path> <result_path>"); | ||
17 | + System.exit(0); | ||
18 | + } | ||
19 | + | ||
20 | + String data_path = args[0]; | ||
21 | + String result_path = args[1]; | ||
22 | + | ||
18 | //Create Session | 23 | //Create Session |
19 | SparkSession spark = SparkSession | 24 | SparkSession spark = SparkSession |
20 | .builder() | 25 | .builder() |
... | @@ -25,7 +30,7 @@ public class Aggregation { | ... | @@ -25,7 +30,7 @@ public class Aggregation { |
25 | // detact.Aggregation | 30 | // detact.Aggregation |
26 | Aggregation agg = new Aggregation(); | 31 | Aggregation agg = new Aggregation(); |
27 | 32 | ||
28 | - Dataset<Row> dataset = Utill.loadCSVDataSet(Aggregation.ORIGINAL_DATA_PATH, spark); | 33 | + Dataset<Row> dataset = Utill.loadCSVDataSet(data_path, spark); |
29 | dataset = agg.changeTimestempToLong(dataset); | 34 | dataset = agg.changeTimestempToLong(dataset); |
30 | dataset = agg.averageValidClickCount(dataset); | 35 | dataset = agg.averageValidClickCount(dataset); |
31 | dataset = agg.clickTimeDelta(dataset); | 36 | dataset = agg.clickTimeDelta(dataset); |
... | @@ -35,7 +40,7 @@ public class Aggregation { | ... | @@ -35,7 +40,7 @@ public class Aggregation { |
35 | dataset.where("ip == '5348' and app == '19'").show(10); | 40 | dataset.where("ip == '5348' and app == '19'").show(10); |
36 | 41 | ||
37 | // Save to scv | 42 | // Save to scv |
38 | - Utill.saveCSVDataSet(dataset, Aggregation.AGGREGATED_PATH); | 43 | + Utill.saveCSVDataSet(dataset, result_path); |
39 | } | 44 | } |
40 | 45 | ||
41 | private Dataset<Row> changeTimestempToLong(Dataset<Row> dataset){ | 46 | private Dataset<Row> changeTimestempToLong(Dataset<Row> dataset){ | ... | ... |
... | @@ -21,7 +21,14 @@ import org.apache.spark.sql.SparkSession; | ... | @@ -21,7 +21,14 @@ import org.apache.spark.sql.SparkSession; |
21 | public class DecisionTree { | 21 | public class DecisionTree { |
22 | 22 | ||
23 | public static void main(String[] args) throws Exception { | 23 | public static void main(String[] args) throws Exception { |
24 | - | 24 | + |
25 | + if (args.length != 1) { | ||
26 | + System.out.println("Usage: java -jar decisionTree.jar <agg_path>"); | ||
27 | + System.exit(0); | ||
28 | + } | ||
29 | + | ||
30 | + String agg_path = args[0]; | ||
31 | + | ||
25 | //Create Session | 32 | //Create Session |
26 | SparkSession spark = SparkSession | 33 | SparkSession spark = SparkSession |
27 | .builder() | 34 | .builder() |
... | @@ -30,7 +37,7 @@ public class DecisionTree { | ... | @@ -30,7 +37,7 @@ public class DecisionTree { |
30 | .getOrCreate(); | 37 | .getOrCreate(); |
31 | 38 | ||
32 | // load aggregated dataset | 39 | // load aggregated dataset |
33 | - Dataset<Row> resultds = Utill.loadCSVDataSet(Aggregation.AGGREGATED_PATH, spark); | 40 | + Dataset<Row> resultds = Utill.loadCSVDataSet(agg_path, spark); |
34 | 41 | ||
35 | // show Dataset schema | 42 | // show Dataset schema |
36 | // System.out.println("schema start"); | 43 | // System.out.println("schema start"); | ... | ... |
-
Please register or login to post a comment