신은섭(Shin Eun Seop)

Merge branch 'feature/#3' into calcDelta

This diff is collapsed. Click to expand it.
1 +<?xml version="1.0" encoding="UTF-8"?>
2 +<project version="4">
3 + <component name="MarkdownExportedFiles">
4 + <htmlFiles />
5 + <imageFiles />
6 + <otherFiles />
7 + </component>
8 +</project>
...\ No newline at end of file ...\ No newline at end of file
1 +<?xml version="1.0" encoding="UTF-8"?>
2 +<project version="4">
3 + <component name="MarkdownProjectSettings">
4 + <PreviewSettings splitEditorLayout="SPLIT" splitEditorPreview="PREVIEW" useGrayscaleRendering="false" zoomFactor="1.0" maxImageWidth="0" showGitHubPageIfSynced="false" allowBrowsingInPreview="false" synchronizePreviewPosition="true" highlightPreviewType="NONE" highlightFadeOut="5" highlightOnTyping="true" synchronizeSourcePosition="true" verticallyAlignSourceAndPreviewSyncPosition="true" showSearchHighlightsInPreview="false" showSelectionInPreview="true">
5 + <PanelProvider>
6 + <provider providerId="com.vladsch.idea.multimarkdown.editor.swing.html.panel" providerName="Default - Swing" />
7 + </PanelProvider>
8 + </PreviewSettings>
9 + <ParserSettings gitHubSyntaxChange="false">
10 + <PegdownExtensions>
11 + <option name="ABBREVIATIONS" value="false" />
12 + <option name="ANCHORLINKS" value="true" />
13 + <option name="ASIDE" value="false" />
14 + <option name="ATXHEADERSPACE" value="true" />
15 + <option name="AUTOLINKS" value="true" />
16 + <option name="DEFINITIONS" value="false" />
17 + <option name="DEFINITION_BREAK_DOUBLE_BLANK_LINE" value="false" />
18 + <option name="FENCED_CODE_BLOCKS" value="true" />
19 + <option name="FOOTNOTES" value="false" />
20 + <option name="HARDWRAPS" value="false" />
21 + <option name="HTML_DEEP_PARSER" value="false" />
22 + <option name="INSERTED" value="false" />
23 + <option name="QUOTES" value="false" />
24 + <option name="RELAXEDHRULES" value="true" />
25 + <option name="SMARTS" value="false" />
26 + <option name="STRIKETHROUGH" value="true" />
27 + <option name="SUBSCRIPT" value="false" />
28 + <option name="SUPERSCRIPT" value="false" />
29 + <option name="SUPPRESS_HTML_BLOCKS" value="false" />
30 + <option name="SUPPRESS_INLINE_HTML" value="false" />
31 + <option name="TABLES" value="true" />
32 + <option name="TASKLISTITEMS" value="true" />
33 + <option name="TOC" value="false" />
34 + <option name="WIKILINKS" value="true" />
35 + </PegdownExtensions>
36 + <ParserOptions>
37 + <option name="COMMONMARK_LISTS" value="true" />
38 + <option name="DUMMY" value="false" />
39 + <option name="EMOJI_SHORTCUTS" value="true" />
40 + <option name="FLEXMARK_FRONT_MATTER" value="false" />
41 + <option name="GFM_LOOSE_BLANK_LINE_AFTER_ITEM_PARA" value="false" />
42 + <option name="GFM_TABLE_RENDERING" value="true" />
43 + <option name="GITBOOK_URL_ENCODING" value="false" />
44 + <option name="GITHUB_EMOJI_URL" value="false" />
45 + <option name="GITHUB_LISTS" value="false" />
46 + <option name="GITHUB_WIKI_LINKS" value="true" />
47 + <option name="JEKYLL_FRONT_MATTER" value="false" />
48 + <option name="SIM_TOC_BLANK_LINE_SPACER" value="true" />
49 + </ParserOptions>
50 + </ParserSettings>
51 + <HtmlSettings headerTopEnabled="false" headerBottomEnabled="false" bodyTopEnabled="false" bodyBottomEnabled="false" embedUrlContent="false" addPageHeader="true">
52 + <GeneratorProvider>
53 + <provider providerId="com.vladsch.idea.multimarkdown.editor.swing.html.generator" providerName="Default Swing HTML Generator" />
54 + </GeneratorProvider>
55 + <headerTop />
56 + <headerBottom />
57 + <bodyTop />
58 + <bodyBottom />
59 + </HtmlSettings>
60 + <CssSettings previewScheme="UI_SCHEME" cssUri="" isCssUriEnabled="false" isCssTextEnabled="false" isDynamicPageWidth="true">
61 + <StylesheetProvider>
62 + <provider providerId="com.vladsch.idea.multimarkdown.editor.swing.html.css" providerName="Default Swing Stylesheet" />
63 + </StylesheetProvider>
64 + <ScriptProviders />
65 + <cssText />
66 + </CssSettings>
67 + <HtmlExportSettings updateOnSave="false" parentDir="$ProjectFileDir$" targetDir="$ProjectFileDir$" cssDir="" scriptDir="" plainHtml="false" imageDir="" copyLinkedImages="false" imageUniquifyType="0" targetExt="" useTargetExt="false" noCssNoScripts="false" linkToExportedHtml="true" exportOnSettingsChange="true" regenerateOnProjectOpen="false" />
68 + <LinkMapSettings>
69 + <textMaps />
70 + </LinkMapSettings>
71 + </component>
72 +</project>
...\ No newline at end of file ...\ No newline at end of file
1 +<component name="MarkdownNavigator.ProfileManager">
2 + <settings default="" pdf-export="" />
3 +</component>
...\ No newline at end of file ...\ No newline at end of file
...@@ -11,4 +11,14 @@ ...@@ -11,4 +11,14 @@
11 <component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" project-jdk-name="1.8" project-jdk-type="JavaSDK"> 11 <component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" project-jdk-name="1.8" project-jdk-type="JavaSDK">
12 <output url="file://$PROJECT_DIR$/out" /> 12 <output url="file://$PROJECT_DIR$/out" />
13 </component> 13 </component>
14 + <component name="MavenProjectsManager">
15 + <option name="originalFiles">
16 + <list>
17 + <option value="$PROJECT_DIR$/pom.xml" />
18 + </list>
19 + </option>
20 + </component>
21 + <component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="false" project-jdk-name="1.8" project-jdk-type="JavaSDK">
22 + <output url="file:///tmp" />
23 + </component>
14 </project> 24 </project>
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
2 <project xmlns="http://maven.apache.org/POM/4.0.0" 2 <project xmlns="http://maven.apache.org/POM/4.0.0"
3 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 3 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
5 - <modelVersion>4.0.0</modelVersion> 5 + <modelVersion>1.0.0</modelVersion>
6 6
7 <groupId>cesco</groupId> 7 <groupId>cesco</groupId>
8 <artifactId>Detecting_fraud_clicks</artifactId> 8 <artifactId>Detecting_fraud_clicks</artifactId>
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
16 <artifactId>spark-core_2.11</artifactId> 16 <artifactId>spark-core_2.11</artifactId>
17 <version>2.3.0</version> 17 <version>2.3.0</version>
18 </dependency> 18 </dependency>
19 - <!-- https://mavnrepository.com/artifact/org.apache.spark/spark-sql --> 19 +
20 <dependency> 20 <dependency>
21 <groupId>org.apache.spark</groupId> 21 <groupId>org.apache.spark</groupId>
22 <artifactId>spark-sql_2.11</artifactId> 22 <artifactId>spark-sql_2.11</artifactId>
...@@ -28,21 +28,22 @@ ...@@ -28,21 +28,22 @@
28 <artifactId>spark-csv_2.11</artifactId> 28 <artifactId>spark-csv_2.11</artifactId>
29 <version>1.5.0</version> 29 <version>1.5.0</version>
30 </dependency> 30 </dependency>
31 - </dependencies>
32 31
32 + </dependencies>
33 33
34 - <!--maven-compiler-plugin-->
35 <build> 34 <build>
36 - <plugins> 35 + <plugins>
37 - <plugin> 36 + <plugin>
38 - <groupId>org.apache.maven.plugins</groupId> 37 + <groupId>org.apache.maven.plugins</groupId>
39 - <artifactId>maven-compiler-plugin</artifactId> 38 + <artifactId>maven-compiler-plugin</artifactId>
40 - <version>3.1</version> 39 + <version>3.6.1</version>
41 - <configuration> 40 + <configuration>
42 - <source>1.8</source> 41 + <source>1.8</source>
43 - <target>1.8</target> 42 + <target>1.8</target>
44 - </configuration> 43 + </configuration>
45 - </plugin> 44 + </plugin>
46 - </plugins> 45 + </plugins>
47 - </build> 46 + </build>
48 -</project> 47 +
48 +
49 +</project>
...\ No newline at end of file ...\ No newline at end of file
......
1 +import org.apache.spark.sql.Dataset;
2 +import org.apache.spark.sql.Row;
3 +import org.apache.spark.sql.SparkSession;
4 +import org.apache.spark.sql.expressions.Window;
5 +import org.apache.spark.sql.expressions.WindowSpec;
6 +
7 +import static org.apache.spark.sql.functions.col;
8 +import static org.apache.spark.sql.functions.count;
9 +import static org.apache.spark.sql.functions.sum;
10 +
11 +
12 +public class AvgAdvTime {
13 +
14 + public static void main(String[] args) throws Exception {
15 +
16 + // Start Spark Session
17 + SparkSession spark = SparkSession
18 + .builder()
19 + .master("local")
20 + .appName("Java Spark SQL basic example")
21 + .getOrCreate();
22 +
23 + // Read SCV to DataSet
24 + Dataset<Row> df = spark.read().format("csv")
25 + .option("inferSchema", "true")
26 + .option("header", "true")
27 + .load("train_sample.csv");
28 +
29 + // cast timestamp to long
30 + Dataset<Row> newdf = df.withColumn("utc_click_time", df.col("click_time").cast("long"));
31 + newdf = newdf.withColumn("utc_attributed_time", df.col("attributed_time").cast("long"));
32 + newdf = newdf.drop("click_time").drop("attributed_time");
33 +
34 + // set Window partition by 'ip' and 'app' order by 'utc_click_time' select rows between 1st row to current row
35 + WindowSpec w = Window.partitionBy("ip", "app")
36 + .orderBy("utc_click_time")
37 + .rowsBetween(Window.unboundedPreceding(), Window.currentRow());
38 +
39 + // aggregation
40 + newdf = newdf.withColumn("cum_count_click", count("utc_click_time").over(w));
41 + newdf = newdf.withColumn("cum_sum_attributed", sum("is_attributed").over(w));
42 + newdf = newdf.withColumn("avg_efficient", col("cum_sum_attributed").divide(col("cum_count_click")));
43 +
44 + // print example
45 + newdf.where("ip == '5348' and app == '19'").show();
46 + newdf.printSchema();
47 +
48 + }
49 +}
...\ No newline at end of file ...\ No newline at end of file
This diff could not be displayed because it is too large.