Showing
1 changed file
with
0 additions
and
67 deletions
1차전처리Code.R
deleted
100644 → 0
1 | - | ||
2 | -src_dir<-c("2010104050_박재호_졸업논문/데이터/원자료/대기오염데이터/") | ||
3 | - | ||
4 | -src_file<-list.files(src_dir) | ||
5 | -src_file | ||
6 | -src_file_cnt<-length(src_file) | ||
7 | - | ||
8 | -set_src_dir<-c("2010104050_박재호_졸업논문/데이터/대기오염데이터/1차전처리(결측치제거 및 시간변수 통일)/") | ||
9 | - | ||
10 | -#for(i in 1:src_file_cnt){ | ||
11 | - | ||
12 | -# rawdata_Set <- read.csv( | ||
13 | -# paste(src_dir,"/",src_file[i],sep=""),sep = ",",header = F,stringsAsFactors = F) | ||
14 | - | ||
15 | -# write.csv(rawdata_Set, | ||
16 | -# paste(src_dir,"/","2014~2017.csv",sep = ""), | ||
17 | -# sep = ",", | ||
18 | -# row.names = FALSE, | ||
19 | -# col.names = FALSE, | ||
20 | -# quote = FALSE, | ||
21 | -# append = TRUE) #append-> stacking임 | ||
22 | - | ||
23 | -# rm(rawdata_Set) | ||
24 | - | ||
25 | -#} | ||
26 | -# 파일 용량이 커서 실패, R의 한계 | ||
27 | - | ||
28 | -for(i in 1:src_file_cnt){ | ||
29 | - | ||
30 | - refactoring_Data<- read.csv(paste(src_dir, src_file[i], sep=""), header = F, stringsAsFactors = F) | ||
31 | - | ||
32 | - colnames(refactoring_Data)<-c("지역","측정소코드","측정소명","측정일시","SO2","CO","O3","NO2","PM10","PM25","주소") | ||
33 | - | ||
34 | - refactoring_Data[is.na(refactoring_Data)]<-0 #결측치 제거 | ||
35 | - | ||
36 | - refactoring_Data<- refactoring_Data[-1,-c(2,10,11)] #1행 제거, 3열 제거(측정소코드, PM25, 주소) | ||
37 | - | ||
38 | - ## Start of 시간변수 분해 결합##### | ||
39 | - | ||
40 | - refactoring_Date<- refactoring_Data$측정일시 | ||
41 | - | ||
42 | - year<-substr(refactoring_Date,1,4) | ||
43 | - month<-substr(refactoring_Date,5,6) | ||
44 | - day<-substr(refactoring_Date,7,8) | ||
45 | - hour<-substr(refactoring_Date,9,10) | ||
46 | - | ||
47 | - hour<-sub(pattern = "24",replacement = "00",x = hour) | ||
48 | - hour<-paste0(hour,sep=":00") | ||
49 | - | ||
50 | - refactoring_Date<-paste(year,month,sep="-") | ||
51 | - refactoring_Date<-paste(refactoring_Date,day,sep="-") | ||
52 | - refactoring_Date<-paste(refactoring_Date,hour,sep=" ") | ||
53 | - | ||
54 | - refactoring_Data$측정일시<-refactoring_Date | ||
55 | - | ||
56 | - ## End of 시간변수 분해 ##### | ||
57 | - | ||
58 | - write.csv(refactoring_Data, | ||
59 | - paste(set_src_dir,"set_", src_file[i],sep = ""), | ||
60 | - sep = ",", | ||
61 | - row.names = FALSE, | ||
62 | - col.names = FALSE, | ||
63 | - quote = FALSE, | ||
64 | - append = FALSE) | ||
65 | -} | ||
66 | - | ||
67 | -rm(i,src_dir,src_file,src_file_cnt,year,month,day,hour,refactoring_Date,refactoring_Data, set_src_dir) | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
-
Please register or login to post a comment