Showing
14 changed files
with
102 additions
and
146 deletions
.gitignore
100644 → 100755
File mode changed
.idea/Detecting_fraud_clicks.iml
deleted
100644 → 0
| 1 | -<?xml version="1.0" encoding="UTF-8"?> | ||
| 2 | -<module type="JAVA_MODULE" version="4"> | ||
| 3 | - <component name="NewModuleRootManager" inherit-compiler-output="true"> | ||
| 4 | - <exclude-output /> | ||
| 5 | - <content url="file://$MODULE_DIR$" /> | ||
| 6 | - <orderEntry type="inheritedJdk" /> | ||
| 7 | - <orderEntry type="sourceFolder" forTests="false" /> | ||
| 8 | - </component> | ||
| 9 | -</module> | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
.idea/markdown-navigator.xml
deleted
100644 → 0
| 1 | -<?xml version="1.0" encoding="UTF-8"?> | ||
| 2 | -<project version="4"> | ||
| 3 | - <component name="MarkdownProjectSettings"> | ||
| 4 | - <PreviewSettings splitEditorLayout="SPLIT" splitEditorPreview="PREVIEW" useGrayscaleRendering="false" zoomFactor="1.0" maxImageWidth="0" showGitHubPageIfSynced="false" allowBrowsingInPreview="false" synchronizePreviewPosition="true" highlightPreviewType="NONE" highlightFadeOut="5" highlightOnTyping="true" synchronizeSourcePosition="true" verticallyAlignSourceAndPreviewSyncPosition="true" showSearchHighlightsInPreview="false" showSelectionInPreview="true"> | ||
| 5 | - <PanelProvider> | ||
| 6 | - <provider providerId="com.vladsch.idea.multimarkdown.editor.swing.html.panel" providerName="Default - Swing" /> | ||
| 7 | - </PanelProvider> | ||
| 8 | - </PreviewSettings> | ||
| 9 | - <ParserSettings gitHubSyntaxChange="false"> | ||
| 10 | - <PegdownExtensions> | ||
| 11 | - <option name="ABBREVIATIONS" value="false" /> | ||
| 12 | - <option name="ANCHORLINKS" value="true" /> | ||
| 13 | - <option name="ASIDE" value="false" /> | ||
| 14 | - <option name="ATXHEADERSPACE" value="true" /> | ||
| 15 | - <option name="AUTOLINKS" value="true" /> | ||
| 16 | - <option name="DEFINITIONS" value="false" /> | ||
| 17 | - <option name="DEFINITION_BREAK_DOUBLE_BLANK_LINE" value="false" /> | ||
| 18 | - <option name="FENCED_CODE_BLOCKS" value="true" /> | ||
| 19 | - <option name="FOOTNOTES" value="false" /> | ||
| 20 | - <option name="HARDWRAPS" value="false" /> | ||
| 21 | - <option name="HTML_DEEP_PARSER" value="false" /> | ||
| 22 | - <option name="INSERTED" value="false" /> | ||
| 23 | - <option name="QUOTES" value="false" /> | ||
| 24 | - <option name="RELAXEDHRULES" value="true" /> | ||
| 25 | - <option name="SMARTS" value="false" /> | ||
| 26 | - <option name="STRIKETHROUGH" value="true" /> | ||
| 27 | - <option name="SUBSCRIPT" value="false" /> | ||
| 28 | - <option name="SUPERSCRIPT" value="false" /> | ||
| 29 | - <option name="SUPPRESS_HTML_BLOCKS" value="false" /> | ||
| 30 | - <option name="SUPPRESS_INLINE_HTML" value="false" /> | ||
| 31 | - <option name="TABLES" value="true" /> | ||
| 32 | - <option name="TASKLISTITEMS" value="true" /> | ||
| 33 | - <option name="TOC" value="false" /> | ||
| 34 | - <option name="WIKILINKS" value="true" /> | ||
| 35 | - </PegdownExtensions> | ||
| 36 | - <ParserOptions> | ||
| 37 | - <option name="COMMONMARK_LISTS" value="true" /> | ||
| 38 | - <option name="DUMMY" value="false" /> | ||
| 39 | - <option name="EMOJI_SHORTCUTS" value="true" /> | ||
| 40 | - <option name="FLEXMARK_FRONT_MATTER" value="false" /> | ||
| 41 | - <option name="GFM_LOOSE_BLANK_LINE_AFTER_ITEM_PARA" value="false" /> | ||
| 42 | - <option name="GFM_TABLE_RENDERING" value="true" /> | ||
| 43 | - <option name="GITBOOK_URL_ENCODING" value="false" /> | ||
| 44 | - <option name="GITHUB_EMOJI_URL" value="false" /> | ||
| 45 | - <option name="GITHUB_LISTS" value="false" /> | ||
| 46 | - <option name="GITHUB_WIKI_LINKS" value="true" /> | ||
| 47 | - <option name="JEKYLL_FRONT_MATTER" value="false" /> | ||
| 48 | - <option name="SIM_TOC_BLANK_LINE_SPACER" value="true" /> | ||
| 49 | - </ParserOptions> | ||
| 50 | - </ParserSettings> | ||
| 51 | - <HtmlSettings headerTopEnabled="false" headerBottomEnabled="false" bodyTopEnabled="false" bodyBottomEnabled="false" embedUrlContent="false" addPageHeader="true"> | ||
| 52 | - <GeneratorProvider> | ||
| 53 | - <provider providerId="com.vladsch.idea.multimarkdown.editor.swing.html.generator" providerName="Default Swing HTML Generator" /> | ||
| 54 | - </GeneratorProvider> | ||
| 55 | - <headerTop /> | ||
| 56 | - <headerBottom /> | ||
| 57 | - <bodyTop /> | ||
| 58 | - <bodyBottom /> | ||
| 59 | - </HtmlSettings> | ||
| 60 | - <CssSettings previewScheme="UI_SCHEME" cssUri="" isCssUriEnabled="false" isCssTextEnabled="false" isDynamicPageWidth="true"> | ||
| 61 | - <StylesheetProvider> | ||
| 62 | - <provider providerId="com.vladsch.idea.multimarkdown.editor.swing.html.css" providerName="Default Swing Stylesheet" /> | ||
| 63 | - </StylesheetProvider> | ||
| 64 | - <ScriptProviders /> | ||
| 65 | - <cssText /> | ||
| 66 | - </CssSettings> | ||
| 67 | - <HtmlExportSettings updateOnSave="false" parentDir="$ProjectFileDir$" targetDir="$ProjectFileDir$" cssDir="" scriptDir="" plainHtml="false" imageDir="" copyLinkedImages="false" imageUniquifyType="0" targetExt="" useTargetExt="false" noCssNoScripts="false" linkToExportedHtml="true" exportOnSettingsChange="true" regenerateOnProjectOpen="false" /> | ||
| 68 | - <LinkMapSettings> | ||
| 69 | - <textMaps /> | ||
| 70 | - </LinkMapSettings> | ||
| 71 | - </component> | ||
| 72 | -</project> | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
| 1 | <?xml version="1.0" encoding="UTF-8"?> | 1 | <?xml version="1.0" encoding="UTF-8"?> |
| 2 | <project version="4"> | 2 | <project version="4"> |
| 3 | - <component name="JavaScriptSettings"> | 3 | + <component name="ExternalStorageConfigurationManager" enabled="true" /> |
| 4 | - <option name="languageLevel" value="ES6" /> | 4 | + <component name="MavenProjectsManager"> |
| 5 | + <option name="originalFiles"> | ||
| 6 | + <list> | ||
| 7 | + <option value="$PROJECT_DIR$/pom.xml" /> | ||
| 8 | + </list> | ||
| 9 | + </option> | ||
| 10 | + </component> | ||
| 11 | + <component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" project-jdk-name="1.8" project-jdk-type="JavaSDK"> | ||
| 12 | + <output url="file://$PROJECT_DIR$/out" /> | ||
| 5 | </component> | 13 | </component> |
| 6 | </project> | 14 | </project> |
| ... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
.idea/modules.xml
deleted
100644 → 0
| 1 | -<?xml version="1.0" encoding="UTF-8"?> | ||
| 2 | -<project version="4"> | ||
| 3 | - <component name="ProjectModuleManager"> | ||
| 4 | - <modules> | ||
| 5 | - <module fileurl="file://$PROJECT_DIR$/.idea/Detecting_fraud_clicks.iml" filepath="$PROJECT_DIR$/.idea/Detecting_fraud_clicks.iml" /> | ||
| 6 | - </modules> | ||
| 7 | - </component> | ||
| 8 | -</project> | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
| 1 | <?xml version="1.0" encoding="UTF-8"?> | 1 | <?xml version="1.0" encoding="UTF-8"?> |
| 2 | <project version="4"> | 2 | <project version="4"> |
| 3 | <component name="VcsDirectoryMappings"> | 3 | <component name="VcsDirectoryMappings"> |
| 4 | - <mapping directory="" vcs="Git" /> | 4 | + <mapping directory="$PROJECT_DIR$" vcs="Git" /> |
| 5 | </component> | 5 | </component> |
| 6 | </project> | 6 | </project> |
| ... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
2018-1-java.iml
100644 → 100755
File mode changed
README.md
100644 → 100755
File mode changed
pom.xml
100644 → 100755
| ... | @@ -2,7 +2,7 @@ | ... | @@ -2,7 +2,7 @@ |
| 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" | 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" |
| 3 | xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | 3 | xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
| 4 | xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | 4 | xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> |
| 5 | - <modelVersion>1.0.0</modelVersion> | 5 | + <modelVersion>4.0.0</modelVersion> |
| 6 | 6 | ||
| 7 | <groupId>cesco</groupId> | 7 | <groupId>cesco</groupId> |
| 8 | <artifactId>Detecting_fraud_clicks</artifactId> | 8 | <artifactId>Detecting_fraud_clicks</artifactId> |
| ... | @@ -16,7 +16,33 @@ | ... | @@ -16,7 +16,33 @@ |
| 16 | <artifactId>spark-core_2.11</artifactId> | 16 | <artifactId>spark-core_2.11</artifactId> |
| 17 | <version>2.3.0</version> | 17 | <version>2.3.0</version> |
| 18 | </dependency> | 18 | </dependency> |
| 19 | + <!-- https://mavnrepository.com/artifact/org.apache.spark/spark-sql --> | ||
| 20 | + <dependency> | ||
| 21 | + <groupId>org.apache.spark</groupId> | ||
| 22 | + <artifactId>spark-sql_2.11</artifactId> | ||
| 23 | + <version>2.3.0</version> | ||
| 24 | + </dependency> | ||
| 19 | 25 | ||
| 26 | + <dependency> | ||
| 27 | + <groupId>com.databricks</groupId> | ||
| 28 | + <artifactId>spark-csv_2.11</artifactId> | ||
| 29 | + <version>1.5.0</version> | ||
| 30 | + </dependency> | ||
| 20 | </dependencies> | 31 | </dependencies> |
| 21 | - | ||
| 22 | -</project> | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
| 32 | + | ||
| 33 | + | ||
| 34 | + <!--maven-compiler-plugin--> | ||
| 35 | + <build> | ||
| 36 | + <plugins> | ||
| 37 | + <plugin> | ||
| 38 | + <groupId>org.apache.maven.plugins</groupId> | ||
| 39 | + <artifactId>maven-compiler-plugin</artifactId> | ||
| 40 | + <version>3.1</version> | ||
| 41 | + <configuration> | ||
| 42 | + <source>1.8</source> | ||
| 43 | + <target>1.8</target> | ||
| 44 | + </configuration> | ||
| 45 | + </plugin> | ||
| 46 | + </plugins> | ||
| 47 | + </build> | ||
| 48 | +</project> | ... | ... |
src/main/java/MapExample.java
deleted
100644 → 0
| 1 | -import org.apache.spark.SparkConf; | ||
| 2 | -import org.apache.spark.api.java.JavaRDD; | ||
| 3 | -import org.apache.spark.api.java.JavaSparkContext; | ||
| 4 | -import scala.Tuple2; | ||
| 5 | - | ||
| 6 | -import java.util.Arrays; | ||
| 7 | -import java.util.List; | ||
| 8 | - | ||
| 9 | -public class MapExample { | ||
| 10 | - | ||
| 11 | - static SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("Cesco"); | ||
| 12 | - static JavaSparkContext sc = new JavaSparkContext(conf); | ||
| 13 | - | ||
| 14 | - public static void main(String[] args) throws Exception { | ||
| 15 | - | ||
| 16 | - // Parallelized with 2 partitions | ||
| 17 | - JavaRDD<String> x = sc.parallelize( | ||
| 18 | - Arrays.asList("spark", "rdd", "example", "sample", "example"), | ||
| 19 | - 2); | ||
| 20 | - | ||
| 21 | - // Word Count Map Example | ||
| 22 | - JavaRDD<Tuple2<String, Integer>> y1 = x.map(e -> new Tuple2<>(e, 1)); | ||
| 23 | - List<Tuple2<String, Integer>> list1 = y1.collect(); | ||
| 24 | - | ||
| 25 | - // Another example of making tuple with string and it's length | ||
| 26 | - JavaRDD<Tuple2<String, Integer>> y2 = x.map(e -> new Tuple2<>(e, e.length())); | ||
| 27 | - List<Tuple2<String, Integer>> list2 = y2.collect(); | ||
| 28 | - | ||
| 29 | - System.out.println(list1); | ||
| 30 | - } | ||
| 31 | -} |
src/main/java/calForwardTimeDelta.java
0 → 100644
| 1 | +import org.apache.spark.SparkConf; | ||
| 2 | +import org.apache.spark.api.java.JavaSparkContext; | ||
| 3 | +import org.apache.spark.sql.Dataset; | ||
| 4 | +import org.apache.spark.sql.Row; | ||
| 5 | +import org.apache.spark.sql.SparkSession; | ||
| 6 | +import org.apache.spark.sql.expressions.Window; | ||
| 7 | +import org.apache.spark.sql.expressions.WindowSpec; | ||
| 8 | + | ||
| 9 | +import javax.xml.crypto.Data; | ||
| 10 | + | ||
| 11 | +import static org.apache.spark.sql.functions.*; | ||
| 12 | + | ||
| 13 | +public class calForwardTimeDelta { | ||
| 14 | + static SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("Cesco"); | ||
| 15 | + static JavaSparkContext sc = new JavaSparkContext(conf); | ||
| 16 | + | ||
| 17 | + public static void main(String[] args) throws Exception{ | ||
| 18 | + //Create Session | ||
| 19 | + SparkSession spark = SparkSession | ||
| 20 | + .builder() | ||
| 21 | + .appName("Detecting Fraud Clicks") | ||
| 22 | + .getOrCreate(); | ||
| 23 | + | ||
| 24 | + //run methods here | ||
| 25 | + calcDelta(spark); | ||
| 26 | + } | ||
| 27 | + | ||
| 28 | + private static void calcDelta(SparkSession spark){ | ||
| 29 | + // put the path the file you gonna deal with being placed | ||
| 30 | + String filepath = "/home/chris/.kaggle/competitions/talkingdata-adtracking-fraud-detection/mnt/ssd/kaggle-talkingdata2/competition_files/train_sample.csv"; | ||
| 31 | + | ||
| 32 | + // create Dataset from files | ||
| 33 | + Dataset<Row> logDF = spark.read() | ||
| 34 | + .format("csv") | ||
| 35 | + .option("inferSchema", "true") | ||
| 36 | + .option("header","true") | ||
| 37 | + .load(filepath); | ||
| 38 | + | ||
| 39 | + // cast timestamp(click_time, attributed_time) type to long type | ||
| 40 | + | ||
| 41 | + //add column for long(click_time) | ||
| 42 | + Dataset<Row> newDF = logDF.withColumn("utc_click_time", logDF.col("click_time").cast("long")); | ||
| 43 | + //add column for long(attributed_time) | ||
| 44 | + newDF = newDF.withColumn("utc_attributed_time", logDF.col("attributed_time").cast("long")); | ||
| 45 | + //drop timestamp type columns | ||
| 46 | + newDF = newDF.drop("click_time").drop("attributed_time"); | ||
| 47 | + newDF.createOrReplaceTempView("logs"); | ||
| 48 | + | ||
| 49 | + WindowSpec w = Window.partitionBy ("ip") | ||
| 50 | + .orderBy("utc_click_time"); | ||
| 51 | + | ||
| 52 | + newDF = newDF.withColumn("lag(utc_click_time)", lag("utc_click_time",1).over(w)); | ||
| 53 | + newDF.where("ip=10").show(); | ||
| 54 | + newDF = newDF.withColumn("delta", when(col("lag(utc_click_time)").isNull(),lit(0)).otherwise(col("utc_click_time")).minus(when(col("lag(utc_click_time)").isNull(),lit(0)).otherwise(col("lag(utc_click_time)")))); | ||
| 55 | + //newDF = newDF.withColumn("delta", datediff()); | ||
| 56 | + newDF = newDF.drop("lag(utc_click_time)"); | ||
| 57 | + newDF = newDF.orderBy("ip"); | ||
| 58 | + | ||
| 59 | + newDF.show(); | ||
| 60 | + } | ||
| 61 | + | ||
| 62 | +} |
src/main/java/valid.java
deleted
100644 → 0
src/test/java/testValid.java
100644 → 100755
File mode changed
-
Please register or login to post a comment