양지수

Revert "Merge branch 'master' into 'master'"

This reverts merge request !1
Showing 720 changed files with 0 additions and 843 deletions
No preview for this file type
import warnings
#########5/23일작성중 #########
warnings.simplefilter(("ignore"))
import openpyxl
import pandas as pd
####### [날짜, 뉴스단어 한개] 구성 만드는 파일 차트 분석 시 count함수 이욜 할 때 참조 자료
# ex)hmm뉴스키워드날짜뉴스모으고특수삭제.xlsx 파일 넣음<- DayNewsMerge.py 중간에 주석처리 된 부분 해제하고 결과 얻기
Stockfilename = input("키워드파일이름입력:")
fileStock = "C:/Users/yangj/PycharmProjects/pythonProject1/샘플/" + Stockfilename + ".xlsx"
Stockfile = openpyxl.load_workbook(fileStock) # 파일이름입력
stock_ws = Stockfile.active
Stock_data = [] # list 타입
date=[]
i = 0
for row in stock_ws.rows:
Stock_data.append([])
date.append(row[1].value)
for cell in row:
if cell.value != None :
Stock_data[i].append(cell.value)
i += 1
del Stock_data[0]
del date[0]
for i in range(len(Stock_data)):
del Stock_data[i][0] #각 열의 첫번째 행 삭제
for i in range(len(Stock_data)):
del Stock_data[i][0] #각 열의 첫번째 행 삭제
print(Stock_data)
print(date)
a=[] #
print(len(date),len(Stock_data))
for j in range(len(Stock_data)):
for k in range(len(Stock_data[j])):
a.append([date[j],Stock_data[j][k]])
print(a)
df_SourTar = pd.DataFrame(a)
df_SourTar.to_excel(Stockfilename+'countif.xlsx',sheet_name='sheet1')
\ No newline at end of file
import pandas as pd
source = {
'학년': [1, 2, 1, 3, 4],
'성별': ['남자', '여자', '남자', '여자', '남자'],
'국어': [98, 88, 92, 63, 120],
'영어': [88, 90, 70, 60, 50],
'수학': [64, 62, None, 31, None],
'과학': [None, 72, None, 70, 88]
}
df = pd.DataFrame(source)
df1=df[(df['성별']!='남자')].index
df2=df.drop(df1)
print(df)
print("---------")
print(df2)
import warnings
warnings.simplefilter(("ignore"))
import openpyxl
import pandas as pd
# 000_KNU_New_Vdic2.xlsx 파일 넣기
Stockfilefolder = input("종목시세폴더입력: ")
Stockfilename = input("시세파일이름입력:")
fileStock = "C:/Users/yangj/PycharmProjects/pythonProject1/뉴스키워드/" + Stockfilefolder + "/" + Stockfilename + ".xlsx"
Stockfile = openpyxl.load_workbook(fileStock) # 파일이름입력
stock_ws = Stockfile.active
Stock_data = [] # list 타입
i = 0
for row in stock_ws.rows:
Stock_data.append([])
for cell in row:
if cell.value != None:
Stock_data[i].append(cell.value)
i += 1
del Stock_data[0]
for i in range(len(Stock_data)):
del Stock_data[i][0]
#print(Stock_data)
vert_p = [] # 수직 중복 삭제
for i in range(len(Stock_data)):
vert_p.append([])
for j in range(len(Stock_data[i])):
vert_p[i].append(Stock_data[i][j]) # 단어만 넣기
print(vert_p)
vert_p.sort(key=lambda x: x[0]) # 단어 기준으로 정렬
for i in range(len(vert_p) - 2): # 단어 비교해서 같으면 누적, 다르면 값 바꾸기
for j in range(i + 1, len(vert_p)):
if vert_p[i][0] == vert_p[j][0] :
vert_p[i][1] += vert_p[j][1]
vert_p[j] = ['0', 0]
if str.isalnum(vert_p[i][0]) == False:
vert_p[i] =['0', 0]
vert_p = [i for i in vert_p if not '0' in i] # '0'들어간 열 제거
df_ver = pd.DataFrame(vert_p)
df_ver.to_excel(Stockfilename + ' Stock_dictionary2.xlsx', sheet_name='sheet1')
####사전 완성####
import warnings
warnings.simplefilter(("ignore"))
import openpyxl
import pandas as pd
Stockfilefolder = input("종목시세폴더입력: ")
Stockfilename = input("시세파일이름입력:")
fileStock = "C:/Users/yangj/PycharmProjects/pythonProject1/종목별시세/" + Stockfilefolder + "/" + Stockfilename + ".xlsx"
Stockfile = openpyxl.load_workbook(fileStock) # 파일이름입력
stock_ws = Stockfile.active
Stock_data = [] # list 타입
i = 0
for row in stock_ws.rows:
Stock_data.append([])
for cell in row:
if cell.value != None:
Stock_data[i].append(cell.value)
i += 1
del Stock_data[0]
for i in range(len(Stock_data)):
del Stock_data[i][0] # 대비 삭제
#print(Stock_data)
vert_p = [] # 수직 중복 삭제
for i in range(len(Stock_data)):
vert_p.append([])
for j in range(len(Stock_data[i])):
vert_p[i].append(Stock_data[i][j]) # 단어만 넣기
print(vert_p)
vert_p.sort(key=lambda x: x[0]) # 단어 기준으로 정렬
for i in range(len(vert_p) - 2): # 단어 비교해서 같으면 누적 다르면 값 바꾸기
for j in range(i + 1, len(vert_p)):
if vert_p[i][0] == vert_p[j][0] :
vert_p[i][1] += vert_p[j][1]
vert_p[j] = ['0', 0]
if str.isalnum(vert_p[i][0]) == False:
vert_p[i] =['0', 0]
vert_p = [i for i in vert_p if not '0' in i] # '0'들어간 열 제거
df_ver = pd.DataFrame(vert_p)
df_ver.to_excel(Stockfilename + ' Stock_dictionary2.xlsx', sheet_name='sheet1')
# **뉴스 키워드 노출 빈도수에 따른 기업 주가 영향 분석**
---------------------------------------------------
----------------------------------------------------
## **지도교수님**
* 한치근 교수님
## **팀원**
* 2017104003 컴퓨터공학과 양지수
* 2017104039 컴퓨터공학과 한서흔
---
## **개발일정**
* 2021.03.09~2021.03.22 데이터 수집
* 2021.03.23~2021.04.19 자연어 처리,중간보고서 작성
* 2021.04.20~2021.05.04 감성사전 분석 및 특화 사전 제작
* 2021.05.05~ 중심성 분석 및 최종 결과 제작
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
import warnings
warnings.simplefilter(("ignore"))
import konlpy
from konlpy.tag import *
import openpyxl
import pandas as pd
from math import log10
import numpy as np
#형태소분석라이브러리
#okt = Okt()
hannanum = Hannanum()
#filename= input("분석할 파일이름 입력:") #파일명
filefolder = input("종목폴더입력: ")
filename=input("파일이름입력:")
filepos = "C:/Users/yangj/PycharmProjects/pythonProject1/뉴스크롤링/"+filefolder+"/" + filename + ".xlsx"
kfile = openpyxl.load_workbook(filepos)#파일이름입력
sheet=kfile.worksheets[0]#sheet1에 있는 데이터 가죠오기
#print(sheet)
data=[]
for row in sheet.rows: #data에 크롤링한 뉴스 제목들 저장
data.append(
row[1].value
)
#print(data)
#print(type(data[1])) #str
newData2=[]
#print(newData)
for i in range(len(data)-1):
newData2.append(hannanum.nouns(data[i+1])) #명사만 추출hannanum가 okt보다 성능좋음
#print(newData2)
newData3=[]
for i in range(len(newData2)):
newData3.append([])
for j in newData2[i]:
if any(map(str.isdigit,j))==False and len(j)>1: #추출한 결과가 숫자포함이거나 한글자 인것 제외
newData3[i].append(j)
#print(newData3)
#print(type(newData2))#newData2 데이터 형식은 list
#df= pd.DataFrame.from_records(newData3)#newData3 dataframe으로 변환
#df.to_excel(filename+'_명사추출_숫자제외'+'.xlsx') #파일명의 엑셀로 변환
#TF-IDF함수 시작
def f(t, d): # 엑셀 d 안에 있는 t 빈도 세기
return d.count(t)
def tf(t, d): #tf(t,d)증가빈도 공식 적용
return 0.5 + 0.5*f(t,d)/max([f(w,d) for w in d])
def idf(t, D): #역문서 빈도 공식 적용
numerator = len(D) #문서 집합에 포함 된 문서 수
denominator = 1 + len([ True for d in D if t in d]) #1더해서 0되는 것 방지
return log10(numerator/denominator)
def tfidf(t, d, D):
return tf(t,d)*idf(t, D)
def tfidfScorer(D):
result = []
for d in D:
result.append([(t, tfidf(t, d, D)) for t in d] )
return result
#newData3는 명사추출을 통해 분리되어있음(이미 split상태)
if __name__ == '__main__':
corpus=[]
for i in range(len(newData3)):
corpus.append(newData3[i])
TfIf=[] #결과저장
for i, result in enumerate(tfidfScorer(corpus)):
#print('====== document[%d] ======' % i)
#print(result)
TfIf.append(result)
print(TfIf)#TFIF는 (단어,가중치) 조합으로 저장
for i in range(len(TfIf)-1):
TfIf[i].sort(key=lambda x:x[1], reverse=True) #가중치기준 내림차순 정렬
print(TfIf)
onlynouns = []
for i in range(len(TfIf)):
onlynouns.append([])
for j in range(len(TfIf[i])):
for k in range(len(TfIf[i][j])):
if k%2==0:
onlynouns[i].append(TfIf[i][j][k])
print(onlynouns)
#df= pd.DataFrame.from_records(TfIf)#TfIf dataframe으로 변환
#df.to_excel(filename+'_가중치추출_내림정렬'+'.xlsx')
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/knu.iml" filepath="$PROJECT_DIR$/.idea/knu.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$/.." vcs="Git" />
<mapping directory="$PROJECT_DIR$/KnuSentiLex" vcs="Git" />
</component>
</project>
\ No newline at end of file
This diff is collapsed. Click to expand it.
import warnings
warnings.simplefilter(("ignore"))
import openpyxl
import pandas as pd
#######Gephi에 사용할 edge파일 만들기 전에 필요한 자료 만드는 과정
####6개월치 키워드 합친 키워드 파일 넣기
Stockfilename = input("키워드파일이름입력:")
fileStock = "C:/Users/yangj/PycharmProjects/pythonProject1/샘플/" + Stockfilename + ".xlsx"
Stockfile = openpyxl.load_workbook(fileStock) # 파일이름입력
stock_ws = Stockfile.active
Stock_data = [] # list 타입
date=[]
i = 0
for row in stock_ws.rows:
Stock_data.append([])
date.append(row[1].value)
for cell in row:
if cell.value != None:
Stock_data[i].append(cell.value)
i += 1
del Stock_data[0] #첫번째 의미없는 열 삭제
del date[0]
for i in range(len(Stock_data)):
del Stock_data[i][0] #각 열의 첫번째 행 삭제
for i in range(len(Stock_data)):
del Stock_data[i][0] #각 열의 날짜 행 삭제
Tdata=[]
for x in range(len(Stock_data)):
Tdata.append([])
for y in range(len(Stock_data[x])):
if str.isalnum(Stock_data[x][y]) == True:
Tdata[x].append(Stock_data[x][y])
result = { '날짜':date, '단어':Tdata }
df = pd.DataFrame(result)
#print(df)
list_df=df.values.tolist() #dataframe list로 변경
print(list_df)
#print(list_df[0][0]) 날짜 2021.01.01.
new_date = [] # 날짜 중복 삭제
for v in date:
if v not in new_date:
new_date.append(v)
#print(new_date)
Setlist =[]# 날짜별 키워드 넣기
for v in range(len(new_date)):
Setlist.append([])
Setlist[v].append(new_date[v])
for i in range(len(list_df)):
for j in range(len(list_df[i][1])):
if new_date[v] == list_df[i][0] :
Setlist[v].append(list_df[i][1][j])
print(Setlist)
df_ver= pd.DataFrame(Setlist)
#df_ver.to_excel(Stockfilename+' 날짜뉴스모으고특수삭제.xlsx',sheet_name='sheet1')
SourceTarget=[]
for i in range(len(list_df)):
SourceTarget.append([])
for j in range(len(list_df[i][1])-1):
SourceTarget.append([list_df[i][0],list_df[i][1][j],list_df[i][1][j+1],1])
print(SourceTarget)
SourceTarget = [v for v in SourceTarget if v]
df_SourTar = pd.DataFrame(SourceTarget)
df_SourTar.to_excel(Stockfilename+'Edge3.xlsx',sheet_name='sheet1')
\ No newline at end of file
# KNU 한국어 감성사전
# 작성자 : 온병원, 박상민, 나철원
# 소속 : 군산대학교 소프트웨어융합공학과 Data Intelligence Lab
# 홈페이지 : dilab.kunsan.ac.kr
# 작성일 : 2018.05.14
# 뜻풀이 데이터 출처 : https://github.com/mrchypark/stdkor
# 신조어 데이터 출처 : https://ko.wikipedia.org/wiki/%EB%8C%80%ED%95%9C%EB%AF%BC%EA%B5%AD%EC%9D%98_%EC%9D%B8%ED%84%B0%EB%84%B7_%EC%8B%A0%EC%A1%B0%EC%96%B4_%EB%AA%A9%EB%A1%9D
# 이모티콘 데이터 출처: https://ko.wikipedia.org/wiki/%EC%9D%B4%EB%AA%A8%ED%8B%B0%EC%BD%98
# SentiWordNet_3.0.0_20130122 데이터 출처 : http://sentiwordnet.isti.cnr.it/
# SenticNet-5.0 데이터 출처 : http://sentic.net/
# 감정단어사전0603 데이터 출처 : http://datascience.khu.ac.kr/board/bbs/board.php?bo_table=05_01&wr_id=91
# 김은영, “국어 감정동사 연구”, 2004.02, 학위논문(박사) - 전남대학교 국어국문학과 대학원
#-*-coding:utf-8-*-
import collections
import json
import warnings
warnings.simplefilter(("ignore"))
import openpyxl
import pandas as pd
import re
from datetime import datetime
############종목 감성 판단 ex)hmm뉴스키워드.xlsx 파일 넣는 과정
class KnuSL():
def data_list(wordname):
with open('KnuSentiLex/data/SentiWord_info.json', encoding='utf-8-sig', mode='r') as f:
data = json.load(f)
result = [0,0]
for i in range(0, len(data)):
if data[i]['word'] == wordname:
result.pop()
result.pop()
result.append(data[i]['word_root'])
result.append(int(data[i]['polarity']))
r_word = result[0] #어근
s_word = result[1] #극성
return s_word
if __name__ == "__main__":
ksl = KnuSL
print("\nKNU 한국어 감성사전입니다~ :)")
print("사전에 단어가 없는 경우 결과가 None으로 나타납니다!!!")
print("종료하시려면 #을 입력해주세요!!!")
print("-2:매우 부정, -1:부정, 0:중립 or Unkwon, 1:긍정, 2:매우 긍정")
print("\n")
#########
Newsfilefolder = input("종목폴더입력: ")
Newsfilename=input("파일이름입력:")
Newsfilepos = "C:/Users/yangj/PycharmProjects/pythonProject1/뉴스키워드/"+Newsfilefolder+"/" + Newsfilename + ".xlsx"
Newsfile = openpyxl.load_workbook(Newsfilepos)#파일이름입력
ws=Newsfile.active
data=[]
date=[]
i=0
for row in ws.rows:
data.append([])
date.append(row[1].value)
for cell in row:
if cell.value != None:
data[i].append(cell.value)
i += 1
del data[0] #첫번째 의미없는 열 삭제
del date[0]
for i in range(len(data)):
del data[i][0] #각 열의 첫번째 행 삭제
for i in range(len(data)):
del data[i][0] #각 열의 날짜 행 삭제
KNUdata=[]
Tdata=[]
for x in range(len(data)):
KNUdata.append([])
Tdata.append([])
for y in range(len(data[x])):
KNUdata[x].append(ksl.data_list(data[x][y]))
Tdata[x].append([data[x][y], KNUdata[x][y]])
result = { '날짜':date, '단어, 극성':Tdata }
df = pd.DataFrame(result)
#print(df)
list_df=df.values.tolist() #dataframe list로 변경
#print(list_df)
#print(list_df[0][0]) 날짜 2021.01.01.
new_date = [] # 날짜 중복 삭제
for v in date:
if v not in new_date:
new_date.append(v)
#print(new_date)
Setlist =[]# 날짜별 키워드 넣기
for v in range(len(new_date)):
Setlist.append([])
Setlist[v].append(new_date[v])
for i in range(len(list_df)):
for j in range(len(list_df[i][1])):
if new_date[v] == list_df[i][0]:
Setlist[v].append(list_df[i][1][j])
print(Setlist)
print(Setlist[0][0]) #2021.01.01
print(type(Setlist[0][0]))
print(Setlist[0][0].split('-'))
print(Setlist[0][1][1]) #극성 0
print(type(Setlist[0][1][1])) #극성 모든 타입 int
#print(list_df[0][1][0]) 키워드와 극성 ['HMM…"체질개선해', 'X']
#print(list_df[0][1][0][1]) 극성 x
#print(list_df[0][0].split('.')[:3]) ['2021', '01', '01']
#df.to_excel(Newsfilename+' KNU.xlsx',sheet_name='sheet1')
Stockfilefolder = input("종목시세폴더입력: ")
Stockfilename=input("시세파일이름입력:")
fileStock = "C:/Users/yangj/PycharmProjects/pythonProject1/종목별시세/"+Stockfilefolder+"/" + Stockfilename + ".xlsx"
Stockfile = openpyxl.load_workbook(fileStock)#파일이름입력
stock_ws=Stockfile.active
Stock_data=[] #list 타입
i=0
for row in stock_ws.rows:
Stock_data.append([])
for cell in row:
if cell.value != None:
Stock_data[i].append(cell.value)
i += 1
del Stock_data[0]
for i in range(len(Stock_data)):
del Stock_data[i][2] # 대비 삭제
for i in range(len(Stock_data)):
del Stock_data[i][7] #거래대금 삭제
for i in range(len(Stock_data)):
del Stock_data[i][7] #시가 총액 삭제
for i in range(len(Stock_data)):
del Stock_data[i][7] #상장주식 수 삭제 / 결과:'일자', '종가', '등락률', '시가', '고가', '저가', '거래량'
#print(Stock_data)
def Calpercentage(a,b): #시초가 대비 고점/저점 비율
return abs(a-b)/a*100
####아래로 수정 필요 (미완성)####
i=0
for k in range(len(Setlist)):
if( Stock_data[i][0].split('/') == Setlist[k][0].split('-')): # 날짜 비교 날짜가 같다면
if Calpercentage(Stock_data[i][3],Stock_data[i][4]) > 2 : #당일 시가 대비 고가가 2퍼 높을때
for j in range(1,len(Setlist[k])):
if Setlist[k][j][1] == 0:
Setlist[k][j][1] = 1
else:
Setlist[0][j][1] += 1
elif Calpercentage(Stock_data[i][3],Stock_data[i][5]) < -2 : #당일 시가 대비 저가가 2퍼 낮을 때
for j in range(1,len(Setlist[k])):
if Setlist[k][j][1] == 0:
Setlist[k][j][1] = -1
else:
Setlist[0][j][1] -= 1
else:
if Stock_data[i+1][2] > 0: # 다음날 주가 등락률이 양수면
for j in range(1,len(Setlist[k])): #어제뉴스는 호재 취급
if Setlist[k][j][1] == 0:
Setlist[k][j][1] = 1
else:
Setlist[k][j][1] += 1
elif Stock_data[i+1][2] < 0:
for j in range(1,len(Setlist[k])): # 음수면 어제 뉴스는 악재 취급
if Setlist[k][j][1] == 0:
Setlist[k][j][1] = -1
else:
Setlist[k][j][1] -= 1
i+=1
else:
if Calpercentage(Stock_data[i][3], Stock_data[i][4]) > 2: # 당일 시가 대비 고가가 2퍼 높을때
for j in range(1, len(Setlist[k])):
if Setlist[k][j][1] == 0:
Setlist[k][j][1] = 1
else:
Setlist[k][j][1] += 1
elif Calpercentage(Stock_data[i][3], Stock_data[i][5]) < -2: # 당일 시가 대비 저가가 2퍼 낮을 때
for j in range(1, len(Setlist[k])):
if Setlist[k][j][1] == 0:
Setlist[k][j][1] = -1
else:
Setlist[k][j][1] -= 1
else:
if Stock_data[i + 1][2] > 0: # 다음날 주가 등락률이 양수면
for j in range(1, len(Setlist[k])): # 어제뉴스는 호재 취급
if Setlist[k][j][1] == 0:
Setlist[k][j][1] = 1
else:
Setlist[k][j][1] += 1
elif Stock_data[i + 1][2] < 0:
for j in range(1, len(Setlist[k])): # 음수면 어제 뉴스는 악재 취급
if Setlist[k][j][1] == 0:
Setlist[k][j][1] = -1
else:
Setlist[k][j][1] -= 1
i+=1 #<이거 삭제서 hmm한번 더 돌려보기
print(Setlist)
#df_Setlist = pd.DataFrame(Setlist)
#df_Setlist.to_excel(Stockfilename+' KNU_New.xlsx',sheet_name='sheet1')
Setlist_w = []
for i in range(len(Setlist)):
Setlist_w.append([])
for j in range(1, len(Setlist[i])):
Setlist_w[i].append(Setlist[i][j][0]) # 극성 제외 단어만 추출
counter = {}
for i in range(len(Setlist_w)):
counter[i] = collections.Counter(Setlist_w[i]) # 누적치
for i in range(len(Setlist_w)):
Setlist_w[i] = list(zip(counter[i].keys(), counter[i].values())) # 튜플 리스트화 [(값, 값)]
Plist = []
for i in range(len(Setlist_w)):
Plist.append([])
for j in range(len(Setlist_w[i])):
Plist[i].append(list(Setlist_w[i][j])) # 튜플 -> 리스트화 [[값, 값]]
for i in range(len(Plist)):
for j in range(len(Plist[i])):
Plist[i][j][1] = 0 # 극성 0으로 초기화
for i in range(len(Setlist)):
for j in range(1, len(Setlist[i])):
for h in range(len(Plist[i])):
if Setlist[i][j][0] == Plist[i][h][0]:
Plist[i][h][1] += Setlist[i][j][1] #누적치
vert_p=[] #수직 중복 삭제
for i in range(len(Plist)):
for j in range(len(Plist[i])):
vert_p.append(Plist[i][j]) #단어만 넣기
#print(vert_p)
vert_p.sort(key=lambda x:x[0]) #단어 기준으로 정렬
for i in range(len(vert_p)-2): #단어 비교해서 같으면 누적 다르면 값 바꾸기
for j in range(i+1,len(vert_p)):
if vert_p[i][0] == vert_p[j][0]:
vert_p[i][1]+=vert_p[j][1]
vert_p[j]=['0',0]
print(vert_p)
vert_p=[i for i in vert_p if not '0' in i] #'0'들어간 열 제거
df_ver= pd.DataFrame(vert_p)
df_ver.to_excel(Stockfilename+' KNU_New_Vdic2.xlsx',sheet_name='sheet1')
####키워드파일 월별로 돌려서 그 나온 결과 파일들을 합쳐서 Merge_dictionay.py에 넣어서 사전 만들기 ####
This diff is collapsed. Click to expand it.
# -*-coding:utf-8-*-
import collections
import json
import warnings
warnings.simplefilter(("ignore"))
import openpyxl
import pandas as pd
import re
from datetime import datetime
########코스피 감성 판단
class KnuSL():
def data_list(wordname):
with open('KnuSentiLex/data/SentiWord_info.json', encoding='utf-8-sig', mode='r') as f:
data = json.load(f)
result = [0, 0]
for i in range(0, len(data)):
if data[i]['word'] == wordname:
result.pop()
result.pop()
result.append(data[i]['word_root'])
result.append(int(data[i]['polarity']))
r_word = result[0] # 어근
s_word = result[1] # 극성
return s_word
if __name__ == "__main__":
ksl = KnuSL
print("\nKNU 한국어 감성사전입니다~ :)")
print("사전에 단어가 없는 경우 결과가 None으로 나타납니다!!!")
print("종료하시려면 #을 입력해주세요!!!")
print("-2:매우 부정, -1:부정, 0:중립 or Unkwon, 1:긍정, 2:매우 긍정")
print("\n")
#########
Newsfilefolder = input("종목폴더입력: ")
Newsfilename = input("파일이름입력:")
Newsfilepos = "C:/Users/yangj/PycharmProjects/pythonProject1/뉴스키워드/" + Newsfilefolder + "/" + Newsfilename + ".xlsx"
Newsfile = openpyxl.load_workbook(Newsfilepos) # 파일이름입력
ws = Newsfile.active
data = []
date = []
i = 0
for row in ws.rows:
data.append([])
date.append(row[1].value)
for cell in row:
if cell.value != None:
data[i].append(cell.value)
i += 1
del data[0] # 첫번째 의미없는 열 삭제
del date[0]
for i in range(len(data)):
del data[i][0] # 각 열의 첫번째 행 삭제
for i in range(len(data)):
del data[i][0] # 각 열의 날짜 행 삭제
KNUdata = []
Tdata = []
for x in range(len(data)):
KNUdata.append([])
Tdata.append([])
for y in range(len(data[x])):
KNUdata[x].append(ksl.data_list(data[x][y]))
Tdata[x].append([data[x][y], KNUdata[x][y]])
result = {'날짜': date, '단어, 극성': Tdata}
df = pd.DataFrame(result)
list_df = df.values.tolist() # dataframe list로 변경
new_date = [] # 날짜 중복 삭제
for v in date:
if v not in new_date:
new_date.append(v)
# print(new_date)
Setlist = [] # 날짜별 키워드 넣기
for v in range(len(new_date)):
Setlist.append([])
Setlist[v].append(new_date[v])
for i in range(len(list_df)):
for j in range(len(list_df[i][1])):
if new_date[v] == list_df[i][0]:
Setlist[v].append(list_df[i][1][j])
Stockfilefolder = input("종목시세폴더입력: ")
Stockfilename = input("시세파일이름입력:")
fileStock = "C:/Users/yangj/PycharmProjects/pythonProject1/종목별시세/" + Stockfilefolder + "/" + Stockfilename + ".xlsx"
Stockfile = openpyxl.load_workbook(fileStock) # 파일이름입력
stock_ws = Stockfile.active
Stock_data = [] # list 타입
i = 0
for row in stock_ws.rows:
Stock_data.append([])
for cell in row:
if cell.value != None:
Stock_data[i].append(cell.value)
i += 1
del Stock_data[0]
for i in range(len(Stock_data)):
del Stock_data[i][2] # 대비 삭제
for i in range(len(Stock_data)):
del Stock_data[i][7] # 거래대금 삭제
for i in range(len(Stock_data)):
del Stock_data[i][7] # 시가 총액 삭제
i = 0
for k in range(len(Setlist)):
if (Stock_data[i][0].split('/') == Setlist[k][0].split('.')[:3]): # 날짜 비교 날짜가 같다면
if Stock_data[i][2] > 0: # 코스피 등락이 양수
for j in range(1, len(Setlist[k])):
if Setlist[k][j][1] == 0:
Setlist[k][j][1] = 1
else:
Setlist[k][j][1] += 1
elif Stock_data[i][2] < 0:
for j in range(1, len(Setlist[k])): # 음수면 어제 뉴스는 악재 취급
if Setlist[k][j][1] == 0:
Setlist[k][j][1] = -1
else:
Setlist[k][j][1] -= 1
i += 1
else:
if Stock_data[i+1][2] > 0: # 다음날 주가 등락률이 양수면
for j in range(1, len(Setlist[k])): # 어제뉴스는 호재 취급
if Setlist[k][j][1] == 0:
Setlist[k][j][1] = 1
else:
Setlist[k][j][1] += 1
elif Stock_data[i+1][2] < 0:
for j in range(1, len(Setlist[k])): # 음수면 어제 뉴스는 악재 취급
if Setlist[k][j][1] == 0:
Setlist[k][j][1] = -1
else:
Setlist[k][j][1] -= 1
Setlist_w = []
for i in range(len(Setlist)):
Setlist_w.append([])
for j in range(1, len(Setlist[i])):
Setlist_w[i].append(Setlist[i][j][0]) # 극성 제외 단어만 추출
counter = {}
for i in range(len(Setlist_w)):
counter[i] = collections.Counter(Setlist_w[i]) # 누적치
for i in range(len(Setlist_w)):
Setlist_w[i] = list(zip(counter[i].keys(), counter[i].values())) # 튜플 리스트화 [(값, 값)]
Plist = []
for i in range(len(Setlist_w)):
Plist.append([])
for j in range(len(Setlist_w[i])):
Plist[i].append(list(Setlist_w[i][j])) # 튜플 -> 리스트화 [[값, 값]]
for i in range(len(Plist)):
for j in range(len(Plist[i])):
Plist[i][j][1] = 0 # 극성 0으로 초기화
for i in range(len(Setlist)):
for j in range(1, len(Setlist[i])):
for h in range(len(Plist[i])):
if Setlist[i][j][0] == Plist[i][h][0]:
Plist[i][h][1] += Setlist[i][j][1] # 누적치
vert_p = [] # 수직 중복 삭제
for i in range(len(Plist)):
for j in range(len(Plist[i])):
vert_p.append(Plist[i][j]) # 단어만 넣기
# print(vert_p)
vert_p.sort(key=lambda x: x[0]) # 단어 기준으로 정렬
for i in range(len(vert_p) - 2): # 단어 비교해서 같으면 누적 다르면 값 바꾸기
for j in range(i + 1, len(vert_p)):
if vert_p[i][0] == vert_p[j][0]:
vert_p[i][1] += vert_p[j][1]
vert_p[j] = ['0', 0]
vert_p = [i for i in vert_p if not '0' in i] # '0'들어간 열 제거
df_ver = pd.DataFrame(vert_p)
df_ver.to_excel(Stockfilename + ' KNU_New_vdic2.xlsx', sheet_name='sheet1')
This diff is collapsed. Click to expand it.
No preview for this file type
Metadata-Version: 2.1
Name: mecab-python
Version: 0.996-ko-0.9.2-msvc
Summary: UNKNOWN
Home-page: UNKNOWN
Author: UNKNOWN
Author-email: UNKNOWN
License: UNKNOWN
Platform: UNKNOWN
UNKNOWN
MeCab.py,sha256=cuvFTwJk_Z38aY54gIKoBtR46p6tPXOVSKkpoA1PcY4,15733
_MeCab.cp37-win_amd64.pyd,sha256=u-WVy7oAK9dySDKhPh_DNHIWW4x6yHSwu6hjMKqdz5E,116736
__pycache__/MeCab.cpython-37.pyc,,
libmecab.dll,sha256=lRhwboyrXWEXXnDn8soNA05id3UvWfPpP7bvTXj5iH4,1908736
mecab_python-0.996_ko_0.9.2_msvc.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
mecab_python-0.996_ko_0.9.2_msvc.dist-info/METADATA,sha256=Qw4D_1k4TCzF1Ul5k47s6hOnafUJSLyiiGhqffEnszs,190
mecab_python-0.996_ko_0.9.2_msvc.dist-info/RECORD,,
mecab_python-0.996_ko_0.9.2_msvc.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
mecab_python-0.996_ko_0.9.2_msvc.dist-info/WHEEL,sha256=jmFYSwR2oi5DgMGgmnuB-EJxqLMkUojGGdmJ0wz35aI,106
mecab_python-0.996_ko_0.9.2_msvc.dist-info/direct_url.json,sha256=8Wv8YjFpjisUlH--mXnzxpj5B8uexD6AtZjonw2yk5k,142
mecab_python-0.996_ko_0.9.2_msvc.dist-info/top_level.txt,sha256=E6HHbCcV114TjQmzLJGG5aSu2Sb0tjGGxkjler1jxrQ,13
Wheel-Version: 1.0
Generator: bdist_wheel (0.31.1)
Root-Is-Purelib: false
Tag: cp37-cp37m-win_amd64
{"archive_info": {}, "url": "file:///C:/Users/yangj/PycharmProjects/pythonProject1/mecab_python-0.996_ko_0.9.2_msvc-cp37-cp37m-win_amd64.whl"}
\ No newline at end of file
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type