신은섭(Shin Eun Seop)

add build script and update README.md

now you can use `aggregation.jar` and `desicionTree.jar` after run `mvn package`

Java-Cesco/Detecting_fraud_clicks/#10
......@@ -16,5 +16,15 @@ sudo sed -i s/\$releasever/6/g /etc/yum.repos.d/epel-apache-maven.repo
sudo yum install -y apache-maven
mvn --version
# clone repo
git clone https://github.com/Java-Cesco/Detecting_fraud_clicks.git
cd Detecting_fraud_clicks
# maven build
mvn package
# run
java -jar target/assembly/Detecting_fraud_clicks-aggregation.jar train_sample.csv agg_data
java -jar target/assembly/Detecting_fraud_clicks-decisionTree.jar agg_data
```
\ No newline at end of file
......
......@@ -37,8 +37,59 @@
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.7.0</version>
<artifactId>maven-shade-plugin</artifactId>
<executions>
<execution>
<id>aggregation</id>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<outputFile>target/assembly/${project.artifactId}-aggregation.jar</outputFile>
<shadedArtifactAttached>true</shadedArtifactAttached>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>detact.Aggregation</mainClass>
</transformer>
</transformers>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
</configuration>
</execution>
<execution>
<id>decisionTree</id>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<outputFile>target/assembly/${project.artifactId}-decisionTree.jar</outputFile>
<shadedArtifactAttached>true</shadedArtifactAttached>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>detact.ML.DecisionTree</mainClass>
</transformer>
</transformers>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
......
......@@ -9,12 +9,17 @@ import org.apache.spark.sql.expressions.WindowSpec;
import static org.apache.spark.sql.functions.*;
public class Aggregation {
public static String AGGREGATED_PATH = "agg_data";
public static String ORIGINAL_DATA_PATH = "train_sample.csv";
public static void main(String[] args) {
if (args.length != 2) {
System.out.println("Usage: java -jar aggregation.jar <data_path> <result_path>");
System.exit(0);
}
String data_path = args[0];
String result_path = args[1];
//Create Session
SparkSession spark = SparkSession
.builder()
......@@ -25,7 +30,7 @@ public class Aggregation {
// detact.Aggregation
Aggregation agg = new Aggregation();
Dataset<Row> dataset = Utill.loadCSVDataSet(Aggregation.ORIGINAL_DATA_PATH, spark);
Dataset<Row> dataset = Utill.loadCSVDataSet(data_path, spark);
dataset = agg.changeTimestempToLong(dataset);
dataset = agg.averageValidClickCount(dataset);
dataset = agg.clickTimeDelta(dataset);
......@@ -35,7 +40,7 @@ public class Aggregation {
dataset.where("ip == '5348' and app == '19'").show(10);
// Save to scv
Utill.saveCSVDataSet(dataset, Aggregation.AGGREGATED_PATH);
Utill.saveCSVDataSet(dataset, result_path);
}
private Dataset<Row> changeTimestempToLong(Dataset<Row> dataset){
......
......@@ -21,7 +21,14 @@ import org.apache.spark.sql.SparkSession;
public class DecisionTree {
public static void main(String[] args) throws Exception {
if (args.length != 1) {
System.out.println("Usage: java -jar decisionTree.jar <agg_path>");
System.exit(0);
}
String agg_path = args[0];
//Create Session
SparkSession spark = SparkSession
.builder()
......@@ -30,7 +37,7 @@ public class DecisionTree {
.getOrCreate();
// load aggregated dataset
Dataset<Row> resultds = Utill.loadCSVDataSet(Aggregation.AGGREGATED_PATH, spark);
Dataset<Row> resultds = Utill.loadCSVDataSet(agg_path, spark);
// show Dataset schema
// System.out.println("schema start");
......