Add files via upload

pjhmong · GitHub
Commit 6759ef001dc1c0f3366b064dea66ae351359f681 6759ef00 1 parent 3fd5aef5
Showing 9 changed files with 391 additions and 0 deletions
1차전처리Code.R
2010104050_박재호_졸업논문.zip
2차전처리_연도별Code.R
2차전처리_전체Code.R
3차전처리_연도별Code.R
3차전처리_전체Code.R
[박재호]머신러닝을 활용한 대기환경 빅데이터 분석 및 예측.docx
대본.docx
종관관측기상데이터_전처리.R
--- a/1차전처리Code.R 0 → 100644
View file @6759ef0
+++ b/1차전처리Code.R 0 → 100644
View file @6759ef0
+
+src_dir<-c("2010104050_박재호_졸업논문/데이터/원자료/대기오염데이터/")
+
+src_file<-list.files(src_dir)
+src_file
+src_file_cnt<-length(src_file)
+
+set_src_dir<-c("2010104050_박재호_졸업논문/데이터/대기오염데이터/1차전처리(결측치제거 및 시간변수 통일)/")
+
+#for(i in 1:src_file_cnt){
+  
+#  rawdata_Set <- read.csv(
+#    paste(src_dir,"/",src_file[i],sep=""),sep = ",",header = F,stringsAsFactors = F)
+  
+#  write.csv(rawdata_Set,
+#            paste(src_dir,"/","2014~2017.csv",sep = ""),
+#            sep = ",",
+#            row.names = FALSE,
+#            col.names = FALSE,
+#            quote = FALSE,
+#            append = TRUE) #append-> stacking임
+  
+#  rm(rawdata_Set)
+  
+#}
+# 파일 용량이 커서 실패, R의 한계
+
+for(i in 1:src_file_cnt){
+
+  refactoring_Data<- read.csv(paste(src_dir, src_file[i], sep=""), header = F, stringsAsFactors = F)
+  
+  colnames(refactoring_Data)<-c("지역","측정소코드","측정소명","측정일시","SO2","CO","O3","NO2","PM10","PM25","주소")
+  
+  refactoring_Data[is.na(refactoring_Data)]<-0 #결측치 제거
+  
+  refactoring_Data<- refactoring_Data[-1,-c(2,10,11)] #1행 제거, 3열 제거(측정소코드, PM25, 주소)
+  
+  ## Start of 시간변수 분해 결합#####
+  
+  refactoring_Date<- refactoring_Data$측정일시
+  
+  year<-substr(refactoring_Date,1,4)
+  month<-substr(refactoring_Date,5,6)
+  day<-substr(refactoring_Date,7,8)
+  hour<-substr(refactoring_Date,9,10)
+  
+  hour<-sub(pattern = "24",replacement = "00",x = hour)
+  hour<-paste0(hour,sep=":00")
+  
+  refactoring_Date<-paste(year,month,sep="-")
+  refactoring_Date<-paste(refactoring_Date,day,sep="-")
+  refactoring_Date<-paste(refactoring_Date,hour,sep=" ")
+  
+  refactoring_Data$측정일시<-refactoring_Date
+  
+  ## End of 시간변수 분해 #####
+  
+  write.csv(refactoring_Data,
+                        paste(set_src_dir,"set_", src_file[i],sep = ""),
+                        sep = ",",
+                        row.names = FALSE,
+                        col.names = FALSE,
+                        quote = FALSE,
+                        append = FALSE)
+} 
+
+rm(i,src_dir,src_file,src_file_cnt,year,month,day,hour,refactoring_Date,refactoring_Data, set_src_dir)
\ No newline at end of file
--- a/2010104050_박재호_졸업논문.zip 0 → 100644
View file @6759ef0
+++ b/2010104050_박재호_졸업논문.zip 0 → 100644
View file @6759ef0
--- a/2차전처리_연도별Code.R 0 → 100644
View file @6759ef0
+++ b/2차전처리_연도별Code.R 0 → 100644
View file @6759ef0
+
+
+src_dir<-c("2010104050_박재호_졸업논문/데이터/대기오염데이터/1차전처리(결측치제거 및 시간변수 통일)/")
+src_file<-list.files(src_dir)
+src_file_cnt<-length(src_file)
+
+set_src_dir<-c("2010104050_박재호_졸업논문/데이터/대기오염데이터/2차전처리(지역별분할 및 곽측소 시별 통합 및 연도통합)/")
+
+LocationNameList<-read.csv("2010104050_박재호_졸업논문/데이터/대기오염데이터/1차전처리(결측치제거 및 시간변수 통일)/set_2014_1.csv")
+LocationNameList<-unique(LocationNameList$지역)
+LocationNameList_cnt<-length(LocationNameList)
+
+LocationDataList<-list()
+
+
+for(i in 1:src_file_cnt){ # 1:4 -> 2014년평균 5:8-> 2015년 평균, 9:12-> 2016년 평균, 1:src_file_cnt -> 전체 평ㄱ
+  
+  temp_Data<-read.csv(paste(src_dir, src_file[i], sep=""), stringsAsFactors = F)
+  
+  colnames(temp_Data)<-c("지역","측정소명","측정일시","SO2","CO","O3","NO2","PM10")
+  
+  temp_Data<-temp_Data[-1,]
+  
+  for(j in 1:LocationNameList_cnt){
+    
+    if(i==1){
+      
+      refactoring_Data<-subset(temp_Data,지역==LocationNameList[j])
+      
+      refactoring_Data[is.na(refactoring_Data)]<-0 #결측치 제거
+      
+      refactoring_Data<-data.frame( #서로다른 측정소들의 측정값을 평균으로 통합
+        aggregate(SO2 ~ 측정일시,refactoring_Data,mean),
+        aggregate(CO ~ 측정일시,refactoring_Data,mean),
+        aggregate(O3 ~ 측정일시,refactoring_Data,mean),
+        aggregate(NO2 ~ 측정일시,refactoring_Data,mean),
+        aggregate(PM10 ~ 측정일시,refactoring_Data,mean)
+        
+      )
+      refactoring_Data<-refactoring_Data[-c(3,5,7,9)]
+      
+      LocationDataList[[j]]<-refactoring_Data
+      
+    }else{
+      
+      refactoring_Data<-subset(temp_Data,지역==LocationNameList[j])
+      
+      refactoring_Data[is.na(refactoring_Data)]<-0 #결측치 제거
+      
+      refactoring_Data<-data.frame( #서로다른 측정소들의 측정값을 평균으로 통합
+        aggregate(SO2 ~ 측정일시,refactoring_Data,mean),
+        aggregate(CO ~ 측정일시,refactoring_Data,mean),
+        aggregate(O3 ~ 측정일시,refactoring_Data,mean),
+        aggregate(NO2 ~ 측정일시,refactoring_Data,mean),
+        aggregate(PM10 ~ 측정일시,refactoring_Data,mean)
+        
+      )
+      refactoring_Data<-refactoring_Data[-c(3,5,7,9)]
+      
+      rbind_data<-rbind(data.frame(LocationDataList[j]),refactoring_Data)
+      
+      LocationDataList[[j]]<-rbind_data
+      
+    }
+  }
+  
+}
+
+for(i in 1:LocationNameList_cnt){
+  
+  write.csv(data.frame(LocationDataList[i]),
+            paste(set_src_dir,"전체/",LocationNameList[i],".csv",sep = ""), # 2014 or 2015 or 2016 or 전체
+            sep = ",",
+            row.names = FALSE,
+            col.names = FALSE,
+            quote = FALSE,
+            append = FALSE)
+  
+}
+
--- a/2차전처리_전체Code.R 0 → 100644
View file @6759ef0
+++ b/2차전처리_전체Code.R 0 → 100644
View file @6759ef0
+
+
+src_dir<-c("2010104050_박재호_졸업논문/데이터/대기오염데이터/1차전처리(결측치제거 및 시간변수 통일)/")
+src_file<-list.files(src_dir)
+src_file_cnt<-length(src_file)
+
+set_src_dir<-c("2010104050_박재호_졸업논문/데이터/대기오염데이터/2차전처리(지역별분할 및 곽측소 시별 통합 및 연도통합)/")
+
+LocationNameList<-read.csv("2010104050_박재호_졸업논문/데이터/대기오염데이터/1차전처리(결측치제거 및 시간변수 통일)/set_2014_1.csv")
+LocationNameList<-unique(LocationNameList$지역)
+LocationNameList_cnt<-length(LocationNameList)
+
+LocationDataList<-list()
+
+for(i in 1:src_file_cnt){ 
+  
+  temp_Data<-read.csv(paste(src_dir, src_file[i], sep=""), stringsAsFactors = F)
+  
+  colnames(temp_Data)<-c("지역","측정소명","측정일시","SO2","CO","O3","NO2","PM10")
+  
+  set_Date<-temp_Data$측정일시
+  set_Date<-substr(set_Date,6,16)
+  temp_Data$측정일시<-set_Date
+  
+  temp_Data<-temp_Data[-1,]
+  
+  for(j in 1:LocationNameList_cnt){
+    
+    if(i==1){
+      
+      refactoring_Data<-subset(temp_Data,지역==LocationNameList[j])
+      
+      refactoring_Data[is.na(refactoring_Data)]<-0 #결측치 제거
+      
+      refactoring_Data<-data.frame( #서로다른 측정소들의 측정값을 평균으로 통합
+        aggregate(SO2 ~ 측정일시,refactoring_Data,mean),
+        aggregate(CO ~ 측정일시,refactoring_Data,mean),
+        aggregate(O3 ~ 측정일시,refactoring_Data,mean),
+        aggregate(NO2 ~ 측정일시,refactoring_Data,mean),
+        aggregate(PM10 ~ 측정일시,refactoring_Data,mean)
+        
+      )
+      refactoring_Data<-refactoring_Data[-c(3,5,7,9)]
+      
+      LocationDataList[[j]]<-refactoring_Data
+      
+    }else{
+      
+      refactoring_Data<-subset(temp_Data,지역==LocationNameList[j])
+      
+      refactoring_Data[is.na(refactoring_Data)]<-0 #결측치 제거
+      
+      refactoring_Data<-data.frame( #서로다른 측정소들의 측정값을 평균으로 통합
+        aggregate(SO2 ~ 측정일시,refactoring_Data,mean),
+        aggregate(CO ~ 측정일시,refactoring_Data,mean),
+        aggregate(O3 ~ 측정일시,refactoring_Data,mean),
+        aggregate(NO2 ~ 측정일시,refactoring_Data,mean),
+        aggregate(PM10 ~ 측정일시,refactoring_Data,mean)
+        
+      )
+      refactoring_Data<-refactoring_Data[-c(3,5,7,9)]
+      
+      rbind_data<-rbind(data.frame(LocationDataList[j]),refactoring_Data)
+      
+      LocationDataList[[j]]<-rbind_data
+      
+    }
+  }
+  
+}
+
+
+for(i in 1:LocationNameList_cnt){
+  
+  write.csv(data.frame(LocationDataList[i]),
+            paste(set_src_dir,"전체/",LocationNameList[i],".csv",sep = ""), 
+            sep = ",",
+            row.names = FALSE,
+            col.names = FALSE,
+            quote = FALSE,
+            append = FALSE)
+  
+}
+
--- a/3차전처리_연도별Code.R 0 → 100644
View file @6759ef0
+++ b/3차전처리_연도별Code.R 0 → 100644
View file @6759ef0
+
+
+src_dir<-c("2010104050_박재호_졸업논문/데이터/대기오염데이터/2차전처리(지역별분할 및 곽측소 시별 통합 및 연도통합)/2016/")
+src_file<-list.files(src_dir)
+src_file_cnt<-length(src_file)
+#src_file_cnt
+
+temp <- read.csv(paste(src_dir, src_file[j], sep=""), stringsAsFactors = F)
+
+
+for(i in 1:3){ # 연도별 데이터 처리시 src_dir 과 substr 범위 수정
+  
+  for(j in 1:src_file_cnt){ 
+    
+    temp <- read.csv(paste(src_dir, src_file[j], sep=""), stringsAsFactors = F)
+    if(i==2){
+      temp$측정일시<-substr(temp$측정일시,1,10) # Day
+    }
+    if(i==3){
+      temp$측정일시<-substr(temp$측정일시,1,7) # month
+    }
+    
+    if(j==1){
+      
+      refactoring_Data<-temp
+      
+    }else{
+      
+      
+      refactoring_Data<-rbind(refactoring_Data, temp)
+      
+      refactoring_Data<-data.frame( 
+        aggregate(SO2 ~ 측정일시,refactoring_Data,mean),
+        aggregate(CO ~ 측정일시,refactoring_Data,mean),
+        aggregate(O3 ~ 측정일시,refactoring_Data,mean),
+        aggregate(NO2 ~ 측정일시,refactoring_Data,mean),
+        aggregate(PM10 ~ 측정일시,refactoring_Data,mean)
+        
+      )
+      refactoring_Data<-refactoring_Data[-c(3,5,7,9)]
+      
+    }
+    
+  }
+  
+  if(i==1){
+    write.csv(refactoring_Data, "2010104050_박재호_졸업논문/데이터/대기오염데이터/3차전처리(경기도 전체)/2016/경기도2016_Hour.csv")
+  }
+  if(i==2){
+    write.csv(refactoring_Data, "2010104050_박재호_졸업논문/데이터/대기오염데이터/3차전처리(경기도 전체)/2016/경기도2016_Day.csv")
+  }
+  if(i==3){
+    write.csv(refactoring_Data, "2010104050_박재호_졸업논문/데이터/대기오염데이터/3차전처리(경기도 전체)/2016/경기도2016_month.csv")
+    
+  }
+  
+}
+
+
+
+
+
+
--- a/3차전처리_전체Code.R 0 → 100644
View file @6759ef0
+++ b/3차전처리_전체Code.R 0 → 100644
View file @6759ef0
+
+
+src_dir<-c("2010104050_박재호_졸업논문/데이터/대기오염데이터/2차전처리(지역별분할 및 곽측소 시별 통합 및 연도통합)/전체/")
+src_file<-list.files(src_dir)
+src_file_cnt<-length(src_file)
+#src_file_cnt
+set_src_dir<-c("2010104050_박재호_졸업논문/데이터/대기오염데이터/3차전처리(경기도 전체)/")
+
+for(i in 1:3){ # 연도별 데이터 처리시 src_dir 과 substr 범위 수정
+  
+  for(j in 1:src_file_cnt){ 
+    
+    temp <- read.csv(paste(src_dir, src_file[j], sep=""), stringsAsFactors = F)
+    if(i==2){
+      temp$측정일시<-substr(temp$측정일시,1,5) # Day
+    }
+    if(i==3){
+      temp$측정일시<-substr(temp$측정일시,1,2) # month
+    }
+    
+    if(j==1){
+      
+      refactoring_Data<-temp
+      
+    }else{
+      
+      
+      refactoring_Data<-rbind(refactoring_Data, temp)
+      
+      refactoring_Data<-data.frame( 
+        aggregate(SO2 ~ 측정일시,refactoring_Data,mean),
+        aggregate(CO ~ 측정일시,refactoring_Data,mean),
+        aggregate(O3 ~ 측정일시,refactoring_Data,mean),
+        aggregate(NO2 ~ 측정일시,refactoring_Data,mean),
+        aggregate(PM10 ~ 측정일시,refactoring_Data,mean)
+        
+      )
+      refactoring_Data<-refactoring_Data[-c(3,5,7,9)]
+      
+    }
+    
+  }
+ 
+  if(i==1){
+    write.csv(refactoring_Data, "2010104050_박재호_졸업논문/데이터/대기오염데이터/3차전처리(경기도 전체)/경기도전체_Hour.csv")
+  }
+  if(i==2){
+    write.csv(refactoring_Data, "2010104050_박재호_졸업논문/데이터/대기오염데이터/3차전처리(경기도 전체)/경기도전체_Day.csv")
+  }
+  if(i==3){
+    write.csv(refactoring_Data, "2010104050_박재호_졸업논문/데이터/대기오염데이터/3차전처리(경기도 전체)/경기도전체_month.csv")
+    
+  }
+   
+}
+
+
+
+
+
+
--- a/[박재호]머신러닝을 활용한 대기환경 빅데이터 분석 및 예측.docx 0 → 100644
View file @6759ef0
+++ b/[박재호]머신러닝을 활용한 대기환경 빅데이터 분석 및 예측.docx 0 → 100644
View file @6759ef0
--- a/대본.docx 0 → 100644
View file @6759ef0
+++ b/대본.docx 0 → 100644
View file @6759ef0
--- a/종관관측기상데이터_전처리.R 0 → 100644
View file @6759ef0
+++ b/종관관측기상데이터_전처리.R 0 → 100644
View file @6759ef0
+
+src_dir<-c("2010104050_박재호_졸업논문/데이터/원자료/종관관측기상데이터/")
+
+src_file<-list.files(src_dir)
+#src_file
+
+src_file_cnt<-length(src_file)
+#src_file_cnt
+
+
+temp_Data<-read.csv(paste(src_dir, src_file[1], sep=""), header = F, stringsAsFactors = F)
+
+for(i in 1:src_file_cnt){
+  
+  temp_Data<-read.csv(paste(src_dir, src_file[i], sep=""), header = F, stringsAsFactors = F)
+  temp_Data<-temp_Data[-1,c(2:6)]
+  colnames(temp_Data)<-c("측정일시","기온","강수량","풍속(m/s)","풍향(16방위)")
+  
+  
+  temp_Data$측정일시 <- sub(pattern = " 0:",replacement = " 00:",x = temp_Data$측정일시)
+  temp_Data$측정일시 <- sub(pattern = " 1:",replacement = " 01:",x = temp_Data$측정일시)
+  temp_Data$측정일시 <- sub(pattern = " 2:",replacement = " 02:",x = temp_Data$측정일시)
+  temp_Data$측정일시 <- sub(pattern = " 3:",replacement = " 03:",x = temp_Data$측정일시)
+  temp_Data$측정일시 <- sub(pattern = " 4:",replacement = " 04:",x = temp_Data$측정일시)
+  temp_Data$측정일시 <- sub(pattern = " 5:",replacement = " 05:",x = temp_Data$측정일시)
+  temp_Data$측정일시 <- sub(pattern = " 6:",replacement = " 06:",x = temp_Data$측정일시)
+  temp_Data$측정일시 <- sub(pattern = " 7:",replacement = " 07:",x = temp_Data$측정일시)
+  temp_Data$측정일시 <- sub(pattern = " 8:",replacement = " 08:",x = temp_Data$측정일시)
+  temp_Data$측정일시 <- sub(pattern = " 9:",replacement = " 09:",x = temp_Data$측정일시)
+  
+  temp_Data[is.na(temp_Data)]<-0
+  
+  write.csv(temp_Data,paste("2010104050_박재호_졸업논문/데이터/종관관측기상데이터/AOS_",src_file[i]))
+  
+}
+