Toggle navigation
Toggle navigation
This project
Loading...
Sign in
신은섭(Shin Eun Seop)
/
Detecting_fraud_clicks
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
2
Merge Requests
0
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
hyungyun.Moon
2018-05-28 04:14:01 +0900
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
efbc91aa3118038a001d6a5ab1b101c771612745
efbc91aa
1 parent
bf4b71aa
change to window function
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
51 additions
and
2 deletions
pom.xml
src/main/java/CountTen.java
src/main/java/MapExample.java
pom.xml
View file @
efbc91a
...
...
@@ -21,7 +21,17 @@
<artifactId>
spark-sql_2.11
</artifactId>
<version>
2.2.0
</version>
</dependency>
<dependency>
<groupId>
org.apache.spark
</groupId>
<artifactId>
spark-sql_2.11
</artifactId>
<version>
2.3.0
</version>
</dependency>
<dependency>
<groupId>
com.databricks
</groupId>
<artifactId>
spark-csv_2.11
</artifactId>
<version>
1.5.0
</version>
</dependency>
</dependencies>
</project>
\ No newline at end of file
...
...
src/main/java/CountTen.java
0 → 100644
View file @
efbc91a
import
org.apache.spark.sql.Column
;
import
org.apache.spark.sql.Dataset
;
import
org.apache.spark.sql.Row
;
import
org.apache.spark.sql.SparkSession
;
import
org.apache.spark.sql.expressions.Window
;
import
org.apache.spark.sql.expressions.WindowSpec
;
import
static
org
.
apache
.
spark
.
sql
.
functions
.*;
public
class
CountTen
{
public
static
void
main
(
String
[]
args
)
throws
Exception
{
SparkSession
spark
=
SparkSession
.
builder
()
.
master
(
"local"
)
.
appName
(
"Java Spark SQL basic example"
)
.
getOrCreate
();
Dataset
<
Row
>
df
=
spark
.
read
().
format
(
"csv"
)
.
option
(
"inferSchema"
,
"true"
)
.
option
(
"header"
,
"true"
)
.
load
(
"./data/train.csv"
);
// cast timestamp to long
Dataset
<
Row
>
newdf
=
df
.
withColumn
(
"utc_click_time"
,
df
.
col
(
"click_time"
).
cast
(
"long"
));
newdf
=
newdf
.
withColumn
(
"utc_attributed_time"
,
df
.
col
(
"attributed_time"
).
cast
(
"long"
));
newdf
=
newdf
.
drop
(
"click_time"
).
drop
(
"attributed_time"
);
WindowSpec
w
=
Window
.
partitionBy
(
"ip"
)
.
orderBy
(
"utc_click_time"
);
// .rowsBetween(Window.currentRow(), Window.unboundedPreceding()); //Boundary end is not a valid integer: -9223372036854775808
newdf
=
newdf
.
withColumn
(
"is_clicked_in_ten_mins"
,
(
lead
(
col
(
"utc_click_time"
),
1
).
over
(
w
).
minus
(
col
(
"utc_click_time"
)).
lt
((
long
)
600
)).
cast
(
"long"
));
newdf
.
where
(
"ip == '117898'"
).
show
(
false
);
}
}
\ No newline at end of file
src/main/java/MapExample.java
View file @
efbc91a
...
...
@@ -70,7 +70,7 @@ public class MapExample {
static
SparkConf
conf
=
new
SparkConf
().
setMaster
(
"local[*]"
).
setAppName
(
"Cesco"
);
static
JavaSparkContext
sc
=
new
JavaSparkContext
(
conf
);
static
SQLContext
sqlContext
=
new
SQLContext
(
sc
);
public
static
void
main
(
String
[]
args
)
throws
Exception
{
JavaRDD
<
String
>
file
=
sc
.
textFile
(
"/Users/hyeongyunmun/Dropbox/DetectFraudClick/data/train.csv"
,
1
);
...
...
Please
register
or
login
to post a comment