Calculating Backward/Forward DELTA Prob2

tnt-ooo-tnt
Commit b40afe35dd78e83cbc08cafd96d84bd072d94af6 b40afe35 1 parent 76cfbe89
Showing 14 changed files with 102 additions and 146 deletions
.gitignore
.idea/Detecting_fraud_clicks.iml
.idea/markdown-navigator.xml
.idea/markdown-navigator/profiles_settings.xml
.idea/misc.xml
.idea/modules.xml
.idea/vcs.xml
2018-1-java.iml
README.md
pom.xml
src/main/java/MapExample.java
src/main/java/calForwardTimeDelta.java
src/main/java/valid.java
src/test/java/testValid.java
--- a/.gitignore 100644 → 100755
View file @b40afe3
+++ b/.gitignore 100644 → 100755
View file @b40afe3
--- a/.idea/Detecting_fraud_clicks.iml deleted 100644 → 0
View file @76cfbe8
+++ b/.idea/Detecting_fraud_clicks.iml deleted 100644 → 0
View file @76cfbe8
- <?xml version="1.0" encoding="UTF-8"?>
- <module type="JAVA_MODULE" version="4">
-   <component name="NewModuleRootManager" inherit-compiler-output="true">
-     <exclude-output />
-     <content url="file://$MODULE_DIR$" />
-     <orderEntry type="inheritedJdk" />
-     <orderEntry type="sourceFolder" forTests="false" />
-   </component>
- </module>
\ No newline at end of file
--- a/.idea/markdown-navigator.xml deleted 100644 → 0
View file @76cfbe8
+++ b/.idea/markdown-navigator.xml deleted 100644 → 0
View file @76cfbe8
- <?xml version="1.0" encoding="UTF-8"?>
- <project version="4">
-   <component name="MarkdownProjectSettings">
-     <PreviewSettings splitEditorLayout="SPLIT" splitEditorPreview="PREVIEW" useGrayscaleRendering="false" zoomFactor="1.0" maxImageWidth="0" showGitHubPageIfSynced="false" allowBrowsingInPreview="false" synchronizePreviewPosition="true" highlightPreviewType="NONE" highlightFadeOut="5" highlightOnTyping="true" synchronizeSourcePosition="true" verticallyAlignSourceAndPreviewSyncPosition="true" showSearchHighlightsInPreview="false" showSelectionInPreview="true">
-       <PanelProvider>
-         <provider providerId="com.vladsch.idea.multimarkdown.editor.swing.html.panel" providerName="Default - Swing" />
-       </PanelProvider>
-     </PreviewSettings>
-     <ParserSettings gitHubSyntaxChange="false">
-       <PegdownExtensions>
-         <option name="ABBREVIATIONS" value="false" />
-         <option name="ANCHORLINKS" value="true" />
-         <option name="ASIDE" value="false" />
-         <option name="ATXHEADERSPACE" value="true" />
-         <option name="AUTOLINKS" value="true" />
-         <option name="DEFINITIONS" value="false" />
-         <option name="DEFINITION_BREAK_DOUBLE_BLANK_LINE" value="false" />
-         <option name="FENCED_CODE_BLOCKS" value="true" />
-         <option name="FOOTNOTES" value="false" />
-         <option name="HARDWRAPS" value="false" />
-         <option name="HTML_DEEP_PARSER" value="false" />
-         <option name="INSERTED" value="false" />
-         <option name="QUOTES" value="false" />
-         <option name="RELAXEDHRULES" value="true" />
-         <option name="SMARTS" value="false" />
-         <option name="STRIKETHROUGH" value="true" />
-         <option name="SUBSCRIPT" value="false" />
-         <option name="SUPERSCRIPT" value="false" />
-         <option name="SUPPRESS_HTML_BLOCKS" value="false" />
-         <option name="SUPPRESS_INLINE_HTML" value="false" />
-         <option name="TABLES" value="true" />
-         <option name="TASKLISTITEMS" value="true" />
-         <option name="TOC" value="false" />
-         <option name="WIKILINKS" value="true" />
-       </PegdownExtensions>
-       <ParserOptions>
-         <option name="COMMONMARK_LISTS" value="true" />
-         <option name="DUMMY" value="false" />
-         <option name="EMOJI_SHORTCUTS" value="true" />
-         <option name="FLEXMARK_FRONT_MATTER" value="false" />
-         <option name="GFM_LOOSE_BLANK_LINE_AFTER_ITEM_PARA" value="false" />
-         <option name="GFM_TABLE_RENDERING" value="true" />
-         <option name="GITBOOK_URL_ENCODING" value="false" />
-         <option name="GITHUB_EMOJI_URL" value="false" />
-         <option name="GITHUB_LISTS" value="false" />
-         <option name="GITHUB_WIKI_LINKS" value="true" />
-         <option name="JEKYLL_FRONT_MATTER" value="false" />
-         <option name="SIM_TOC_BLANK_LINE_SPACER" value="true" />
-       </ParserOptions>
-     </ParserSettings>
-     <HtmlSettings headerTopEnabled="false" headerBottomEnabled="false" bodyTopEnabled="false" bodyBottomEnabled="false" embedUrlContent="false" addPageHeader="true">
-       <GeneratorProvider>
-         <provider providerId="com.vladsch.idea.multimarkdown.editor.swing.html.generator" providerName="Default Swing HTML Generator" />
-       </GeneratorProvider>
-       <headerTop />
-       <headerBottom />
-       <bodyTop />
-       <bodyBottom />
-     </HtmlSettings>
-     <CssSettings previewScheme="UI_SCHEME" cssUri="" isCssUriEnabled="false" isCssTextEnabled="false" isDynamicPageWidth="true">
-       <StylesheetProvider>
-         <provider providerId="com.vladsch.idea.multimarkdown.editor.swing.html.css" providerName="Default Swing Stylesheet" />
-       </StylesheetProvider>
-       <ScriptProviders />
-       <cssText />
-     </CssSettings>
-     <HtmlExportSettings updateOnSave="false" parentDir="$ProjectFileDir$" targetDir="$ProjectFileDir$" cssDir="" scriptDir="" plainHtml="false" imageDir="" copyLinkedImages="false" imageUniquifyType="0" targetExt="" useTargetExt="false" noCssNoScripts="false" linkToExportedHtml="true" exportOnSettingsChange="true" regenerateOnProjectOpen="false" />
-     <LinkMapSettings>
-       <textMaps />
-     </LinkMapSettings>
-   </component>
- </project>
\ No newline at end of file
--- a/.idea/markdown-navigator/profiles_settings.xml deleted 100644 → 0
View file @76cfbe8
+++ b/.idea/markdown-navigator/profiles_settings.xml deleted 100644 → 0
View file @76cfbe8
- <component name="MarkdownNavigator.ProfileManager">
-   <settings default="" pdf-export="" />
- </component>
\ No newline at end of file
--- a/.idea/misc.xml
View file @b40afe3
+++ b/.idea/misc.xml
View file @b40afe3
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-   <component name="JavaScriptSettings">
-     <option name="languageLevel" value="ES6" />
+   <component name="ExternalStorageConfigurationManager" enabled="true" />
+   <component name="MavenProjectsManager">
+     <option name="originalFiles">
+       <list>
+         <option value="$PROJECT_DIR$/pom.xml" />
+       </list>
+     </option>
+   </component>
+   <component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" project-jdk-name="1.8" project-jdk-type="JavaSDK">
+     <output url="file://$PROJECT_DIR$/out" />
   </component>
 </project>
\ No newline at end of file
--- a/.idea/modules.xml deleted 100644 → 0
View file @76cfbe8
+++ b/.idea/modules.xml deleted 100644 → 0
View file @76cfbe8
- <?xml version="1.0" encoding="UTF-8"?>
- <project version="4">
-   <component name="ProjectModuleManager">
-     <modules>
-       <module fileurl="file://$PROJECT_DIR$/.idea/Detecting_fraud_clicks.iml" filepath="$PROJECT_DIR$/.idea/Detecting_fraud_clicks.iml" />
-     </modules>
-   </component>
- </project>
\ No newline at end of file
--- a/.idea/vcs.xml
View file @b40afe3
+++ b/.idea/vcs.xml
View file @b40afe3
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
   <component name="VcsDirectoryMappings">
-     <mapping directory="" vcs="Git" />
+     <mapping directory="$PROJECT_DIR$" vcs="Git" />
   </component>
 </project>
\ No newline at end of file
--- a/2018-1-java.iml 100644 → 100755
View file @b40afe3
+++ b/2018-1-java.iml 100644 → 100755
View file @b40afe3
--- a/README.md 100644 → 100755
View file @b40afe3
+++ b/README.md 100644 → 100755
View file @b40afe3
--- a/pom.xml 100644 → 100755
View file @b40afe3
+++ b/pom.xml 100644 → 100755
View file @b40afe3
@@ -2,7 +2,7 @@
 <project xmlns="http://maven.apache.org/POM/4.0.0"
          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-     <modelVersion>1.0.0</modelVersion>
+     <modelVersion>4.0.0</modelVersion>
 
     <groupId>cesco</groupId>
     <artifactId>Detecting_fraud_clicks</artifactId>
@@ -16,7 +16,33 @@
             <artifactId>spark-core_2.11</artifactId>
             <version>2.3.0</version>
         </dependency>
+ 	<!-- https://mavnrepository.com/artifact/org.apache.spark/spark-sql -->
+         <dependency>
+             <groupId>org.apache.spark</groupId>
+             <artifactId>spark-sql_2.11</artifactId>
+             <version>2.3.0</version>
+         </dependency>
 
+         <dependency>
+             <groupId>com.databricks</groupId>
+             <artifactId>spark-csv_2.11</artifactId>
+             <version>1.5.0</version>
+         </dependency>
     </dependencies>
-     
- </project>
\ No newline at end of file
+ 
+ 
+     <!--maven-compiler-plugin-->
+     <build>
+ 	<plugins>
+ 	    <plugin>
+ 		<groupId>org.apache.maven.plugins</groupId>
+ 		<artifactId>maven-compiler-plugin</artifactId>
+ 		<version>3.1</version>
+ 		<configuration>
+ 			<source>1.8</source>
+ 			<target>1.8</target>
+ 		</configuration>
+ 	    </plugin>
+ 	</plugins>
+     </build>    
+ </project>
--- a/src/main/java/MapExample.java deleted 100644 → 0
View file @76cfbe8
+++ b/src/main/java/MapExample.java deleted 100644 → 0
View file @76cfbe8
- import org.apache.spark.SparkConf;
- import org.apache.spark.api.java.JavaRDD;
- import org.apache.spark.api.java.JavaSparkContext;
- import scala.Tuple2;
- 
- import java.util.Arrays;
- import java.util.List;
- 
- public class MapExample {
- 
-     static SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("Cesco");
-     static JavaSparkContext sc = new JavaSparkContext(conf);
-     
-     public static void main(String[] args) throws Exception {
-         
-         // Parallelized with 2 partitions
-         JavaRDD<String> x = sc.parallelize(
-                 Arrays.asList("spark", "rdd", "example", "sample", "example"),
-                 2);
- 
-         // Word Count Map Example
-         JavaRDD<Tuple2<String, Integer>> y1 = x.map(e -> new Tuple2<>(e, 1));
-         List<Tuple2<String, Integer>> list1 = y1.collect();
- 
-         // Another example of making tuple with string and it's length
-         JavaRDD<Tuple2<String, Integer>> y2 = x.map(e -> new Tuple2<>(e, e.length()));
-         List<Tuple2<String, Integer>> list2 = y2.collect();
-         
-         System.out.println(list1);
-     }
- }
--- a/src/main/java/calForwardTimeDelta.java 0 → 100644
View file @b40afe3
+++ b/src/main/java/calForwardTimeDelta.java 0 → 100644
View file @b40afe3
+ import org.apache.spark.SparkConf;
+ import org.apache.spark.api.java.JavaSparkContext;
+ import org.apache.spark.sql.Dataset;
+ import org.apache.spark.sql.Row;
+ import org.apache.spark.sql.SparkSession;
+ import org.apache.spark.sql.expressions.Window;
+ import org.apache.spark.sql.expressions.WindowSpec;
+ 
+ import javax.xml.crypto.Data;
+ 
+ import static org.apache.spark.sql.functions.*;
+ 
+ public class calForwardTimeDelta {
+  static SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("Cesco");
+  static JavaSparkContext sc = new JavaSparkContext(conf);
+ 
+  public static void main(String[] args) throws Exception{
+      //Create Session
+      SparkSession spark = SparkSession
+              .builder()
+              .appName("Detecting Fraud Clicks")
+              .getOrCreate();
+ 
+      //run methods here
+      calcDelta(spark);
+  }
+ 
+  private static void calcDelta(SparkSession spark){
+      // put the path the file you gonna deal with being placed
+      String filepath =  "/home/chris/.kaggle/competitions/talkingdata-adtracking-fraud-detection/mnt/ssd/kaggle-talkingdata2/competition_files/train_sample.csv";
+ 
+      // create Dataset from files
+      Dataset<Row> logDF = spark.read()
+              .format("csv")
+              .option("inferSchema", "true")
+              .option("header","true")
+              .load(filepath);
+ 
+      // cast timestamp(click_time, attributed_time) type to long type
+ 
+      //add column for long(click_time)
+      Dataset<Row> newDF = logDF.withColumn("utc_click_time", logDF.col("click_time").cast("long"));
+      //add column for long(attributed_time)
+      newDF = newDF.withColumn("utc_attributed_time", logDF.col("attributed_time").cast("long"));
+      //drop timestamp type columns
+      newDF = newDF.drop("click_time").drop("attributed_time");
+      newDF.createOrReplaceTempView("logs");
+ 
+      WindowSpec w = Window.partitionBy ("ip")
+              .orderBy("utc_click_time");
+ 
+      newDF = newDF.withColumn("lag(utc_click_time)", lag("utc_click_time",1).over(w));
+      newDF.where("ip=10").show();
+      newDF = newDF.withColumn("delta", when(col("lag(utc_click_time)").isNull(),lit(0)).otherwise(col("utc_click_time")).minus(when(col("lag(utc_click_time)").isNull(),lit(0)).otherwise(col("lag(utc_click_time)"))));
+      //newDF = newDF.withColumn("delta", datediff());
+      newDF = newDF.drop("lag(utc_click_time)");
+      newDF = newDF.orderBy("ip");
+ 
+      newDF.show();
+  }
+ 
+ }
--- a/src/main/java/valid.java deleted 100644 → 0
View file @76cfbe8
+++ b/src/main/java/valid.java deleted 100644 → 0
View file @76cfbe8
- public class valid {
-     private int x;
-     
-     valid() {
-         x = 0;
-     }
-     
-     void printX(){
-         System.out.println(x);
-     }
-     
-     public static void main(String[] args){
-         valid v = new valid();
-         v.printX();
-     }
-     
- }
--- a/src/test/java/testValid.java 100644 → 100755
View file @b40afe3
+++ b/src/test/java/testValid.java 100644 → 100755
View file @b40afe3