양지수

Revert "Merge branch 'master' into 'master'"

This reverts merge request !1
Showing 720 changed files with 0 additions and 843 deletions
No preview for this file type
1 -import warnings
2 -#########5/23일작성중 #########
3 -warnings.simplefilter(("ignore"))
4 -import openpyxl
5 -import pandas as pd
6 -
7 -####### [날짜, 뉴스단어 한개] 구성 만드는 파일 차트 분석 시 count함수 이욜 할 때 참조 자료
8 -# ex)hmm뉴스키워드날짜뉴스모으고특수삭제.xlsx 파일 넣음<- DayNewsMerge.py 중간에 주석처리 된 부분 해제하고 결과 얻기
9 -Stockfilename = input("키워드파일이름입력:")
10 -fileStock = "C:/Users/yangj/PycharmProjects/pythonProject1/샘플/" + Stockfilename + ".xlsx"
11 -Stockfile = openpyxl.load_workbook(fileStock) # 파일이름입력
12 -stock_ws = Stockfile.active
13 -Stock_data = [] # list 타입
14 -date=[]
15 -i = 0
16 -for row in stock_ws.rows:
17 - Stock_data.append([])
18 - date.append(row[1].value)
19 - for cell in row:
20 - if cell.value != None :
21 - Stock_data[i].append(cell.value)
22 - i += 1
23 -del Stock_data[0]
24 -del date[0]
25 -for i in range(len(Stock_data)):
26 - del Stock_data[i][0] #각 열의 첫번째 행 삭제
27 -for i in range(len(Stock_data)):
28 - del Stock_data[i][0] #각 열의 첫번째 행 삭제
29 -print(Stock_data)
30 -print(date)
31 -a=[] #
32 -print(len(date),len(Stock_data))
33 -for j in range(len(Stock_data)):
34 - for k in range(len(Stock_data[j])):
35 - a.append([date[j],Stock_data[j][k]])
36 -print(a)
37 -df_SourTar = pd.DataFrame(a)
38 -df_SourTar.to_excel(Stockfilename+'countif.xlsx',sheet_name='sheet1')
...\ No newline at end of file ...\ No newline at end of file
1 -import pandas as pd
2 -
3 -source = {
4 - '학년': [1, 2, 1, 3, 4],
5 - '성별': ['남자', '여자', '남자', '여자', '남자'],
6 - '국어': [98, 88, 92, 63, 120],
7 - '영어': [88, 90, 70, 60, 50],
8 - '수학': [64, 62, None, 31, None],
9 - '과학': [None, 72, None, 70, 88]
10 -}
11 -df = pd.DataFrame(source)
12 -df1=df[(df['성별']!='남자')].index
13 -df2=df.drop(df1)
14 -print(df)
15 -print("---------")
16 -print(df2)
1 -import warnings
2 -
3 -warnings.simplefilter(("ignore"))
4 -import openpyxl
5 -import pandas as pd
6 -
7 -# 000_KNU_New_Vdic2.xlsx 파일 넣기
8 -
9 -Stockfilefolder = input("종목시세폴더입력: ")
10 -Stockfilename = input("시세파일이름입력:")
11 -fileStock = "C:/Users/yangj/PycharmProjects/pythonProject1/뉴스키워드/" + Stockfilefolder + "/" + Stockfilename + ".xlsx"
12 -Stockfile = openpyxl.load_workbook(fileStock) # 파일이름입력
13 -stock_ws = Stockfile.active
14 -Stock_data = [] # list 타입
15 -i = 0
16 -for row in stock_ws.rows:
17 - Stock_data.append([])
18 - for cell in row:
19 - if cell.value != None:
20 - Stock_data[i].append(cell.value)
21 - i += 1
22 -del Stock_data[0]
23 -for i in range(len(Stock_data)):
24 - del Stock_data[i][0]
25 -#print(Stock_data)
26 -
27 -
28 -vert_p = [] # 수직 중복 삭제
29 -for i in range(len(Stock_data)):
30 - vert_p.append([])
31 - for j in range(len(Stock_data[i])):
32 - vert_p[i].append(Stock_data[i][j]) # 단어만 넣기
33 -print(vert_p)
34 -
35 -vert_p.sort(key=lambda x: x[0]) # 단어 기준으로 정렬
36 -for i in range(len(vert_p) - 2): # 단어 비교해서 같으면 누적, 다르면 값 바꾸기
37 - for j in range(i + 1, len(vert_p)):
38 - if vert_p[i][0] == vert_p[j][0] :
39 - vert_p[i][1] += vert_p[j][1]
40 - vert_p[j] = ['0', 0]
41 - if str.isalnum(vert_p[i][0]) == False:
42 - vert_p[i] =['0', 0]
43 -
44 -vert_p = [i for i in vert_p if not '0' in i] # '0'들어간 열 제거
45 -df_ver = pd.DataFrame(vert_p)
46 -df_ver.to_excel(Stockfilename + ' Stock_dictionary2.xlsx', sheet_name='sheet1')
47 -####사전 완성####
1 -import warnings
2 -
3 -warnings.simplefilter(("ignore"))
4 -import openpyxl
5 -import pandas as pd
6 -
7 -Stockfilefolder = input("종목시세폴더입력: ")
8 -Stockfilename = input("시세파일이름입력:")
9 -fileStock = "C:/Users/yangj/PycharmProjects/pythonProject1/종목별시세/" + Stockfilefolder + "/" + Stockfilename + ".xlsx"
10 -Stockfile = openpyxl.load_workbook(fileStock) # 파일이름입력
11 -stock_ws = Stockfile.active
12 -Stock_data = [] # list 타입
13 -i = 0
14 -for row in stock_ws.rows:
15 - Stock_data.append([])
16 - for cell in row:
17 - if cell.value != None:
18 - Stock_data[i].append(cell.value)
19 - i += 1
20 -del Stock_data[0]
21 -for i in range(len(Stock_data)):
22 - del Stock_data[i][0] # 대비 삭제
23 -#print(Stock_data)
24 -
25 -
26 -vert_p = [] # 수직 중복 삭제
27 -for i in range(len(Stock_data)):
28 - vert_p.append([])
29 - for j in range(len(Stock_data[i])):
30 - vert_p[i].append(Stock_data[i][j]) # 단어만 넣기
31 -print(vert_p)
32 -
33 -vert_p.sort(key=lambda x: x[0]) # 단어 기준으로 정렬
34 -for i in range(len(vert_p) - 2): # 단어 비교해서 같으면 누적 다르면 값 바꾸기
35 - for j in range(i + 1, len(vert_p)):
36 - if vert_p[i][0] == vert_p[j][0] :
37 - vert_p[i][1] += vert_p[j][1]
38 - vert_p[j] = ['0', 0]
39 - if str.isalnum(vert_p[i][0]) == False:
40 - vert_p[i] =['0', 0]
41 -
42 -vert_p = [i for i in vert_p if not '0' in i] # '0'들어간 열 제거
43 -df_ver = pd.DataFrame(vert_p)
44 -df_ver.to_excel(Stockfilename + ' Stock_dictionary2.xlsx', sheet_name='sheet1')
1 -# **뉴스 키워드 노출 빈도수에 따른 기업 주가 영향 분석**
2 ----------------------------------------------------
3 -----------------------------------------------------
4 -
5 -
6 -## **지도교수님**
7 -* 한치근 교수님
8 -
9 -## **팀원**
10 -* 2017104003 컴퓨터공학과 양지수
11 -* 2017104039 컴퓨터공학과 한서흔
12 -
13 ----
14 -
15 -## **개발일정**
16 -* 2021.03.09~2021.03.22 데이터 수집
17 -* 2021.03.23~2021.04.19 자연어 처리,중간보고서 작성
18 -* 2021.04.20~2021.05.04 감성사전 분석 및 특화 사전 제작
19 -* 2021.05.05~ 중심성 분석 및 최종 결과 제작
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
1 -import warnings
2 -warnings.simplefilter(("ignore"))
3 -import konlpy
4 -from konlpy.tag import *
5 -import openpyxl
6 -import pandas as pd
7 -from math import log10
8 -import numpy as np
9 -
10 -#형태소분석라이브러리
11 -#okt = Okt()
12 -hannanum = Hannanum()
13 -#filename= input("분석할 파일이름 입력:") #파일명
14 -filefolder = input("종목폴더입력: ")
15 -filename=input("파일이름입력:")
16 -filepos = "C:/Users/yangj/PycharmProjects/pythonProject1/뉴스크롤링/"+filefolder+"/" + filename + ".xlsx"
17 -kfile = openpyxl.load_workbook(filepos)#파일이름입력
18 -sheet=kfile.worksheets[0]#sheet1에 있는 데이터 가죠오기
19 -#print(sheet)
20 -data=[]
21 -for row in sheet.rows: #data에 크롤링한 뉴스 제목들 저장
22 - data.append(
23 - row[1].value
24 - )
25 -#print(data)
26 -#print(type(data[1])) #str
27 -
28 -newData2=[]
29 -
30 -#print(newData)
31 -for i in range(len(data)-1):
32 - newData2.append(hannanum.nouns(data[i+1])) #명사만 추출hannanum가 okt보다 성능좋음
33 -#print(newData2)
34 -
35 -newData3=[]
36 -for i in range(len(newData2)):
37 - newData3.append([])
38 - for j in newData2[i]:
39 - if any(map(str.isdigit,j))==False and len(j)>1: #추출한 결과가 숫자포함이거나 한글자 인것 제외
40 - newData3[i].append(j)
41 -#print(newData3)
42 -
43 -#print(type(newData2))#newData2 데이터 형식은 list
44 -#df= pd.DataFrame.from_records(newData3)#newData3 dataframe으로 변환
45 -#df.to_excel(filename+'_명사추출_숫자제외'+'.xlsx') #파일명의 엑셀로 변환
46 -
47 -#TF-IDF함수 시작
48 -
49 -def f(t, d): # 엑셀 d 안에 있는 t 빈도 세기
50 - return d.count(t)
51 -
52 -def tf(t, d): #tf(t,d)증가빈도 공식 적용
53 - return 0.5 + 0.5*f(t,d)/max([f(w,d) for w in d])
54 -
55 -def idf(t, D): #역문서 빈도 공식 적용
56 - numerator = len(D) #문서 집합에 포함 된 문서 수
57 - denominator = 1 + len([ True for d in D if t in d]) #1더해서 0되는 것 방지
58 - return log10(numerator/denominator)
59 -
60 -def tfidf(t, d, D):
61 - return tf(t,d)*idf(t, D)
62 -
63 -def tfidfScorer(D):
64 - result = []
65 - for d in D:
66 - result.append([(t, tfidf(t, d, D)) for t in d] )
67 - return result
68 -
69 -#newData3는 명사추출을 통해 분리되어있음(이미 split상태)
70 -
71 -if __name__ == '__main__':
72 - corpus=[]
73 - for i in range(len(newData3)):
74 - corpus.append(newData3[i])
75 - TfIf=[] #결과저장
76 - for i, result in enumerate(tfidfScorer(corpus)):
77 - #print('====== document[%d] ======' % i)
78 - #print(result)
79 - TfIf.append(result)
80 -print(TfIf)#TFIF는 (단어,가중치) 조합으로 저장
81 -for i in range(len(TfIf)-1):
82 - TfIf[i].sort(key=lambda x:x[1], reverse=True) #가중치기준 내림차순 정렬
83 -print(TfIf)
84 -onlynouns = []
85 -for i in range(len(TfIf)):
86 - onlynouns.append([])
87 - for j in range(len(TfIf[i])):
88 - for k in range(len(TfIf[i][j])):
89 - if k%2==0:
90 - onlynouns[i].append(TfIf[i][j][k])
91 -print(onlynouns)
92 -#df= pd.DataFrame.from_records(TfIf)#TfIf dataframe으로 변환
93 -#df.to_excel(filename+'_가중치추출_내림정렬'+'.xlsx')
94 -
1 -<component name="InspectionProjectProfileManager">
2 - <settings>
3 - <option name="USE_PROJECT_PROFILE" value="false" />
4 - <version value="1.0" />
5 - </settings>
6 -</component>
...\ No newline at end of file ...\ No newline at end of file
1 -<?xml version="1.0" encoding="UTF-8"?>
2 -<module type="PYTHON_MODULE" version="4">
3 - <component name="NewModuleRootManager">
4 - <content url="file://$MODULE_DIR$" />
5 - <orderEntry type="inheritedJdk" />
6 - <orderEntry type="sourceFolder" forTests="false" />
7 - </component>
8 -</module>
...\ No newline at end of file ...\ No newline at end of file
1 -<?xml version="1.0" encoding="UTF-8"?>
2 -<project version="4">
3 - <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" />
4 -</project>
...\ No newline at end of file ...\ No newline at end of file
1 -<?xml version="1.0" encoding="UTF-8"?>
2 -<project version="4">
3 - <component name="ProjectModuleManager">
4 - <modules>
5 - <module fileurl="file://$PROJECT_DIR$/.idea/knu.iml" filepath="$PROJECT_DIR$/.idea/knu.iml" />
6 - </modules>
7 - </component>
8 -</project>
...\ No newline at end of file ...\ No newline at end of file
1 -<?xml version="1.0" encoding="UTF-8"?>
2 -<project version="4">
3 - <component name="VcsDirectoryMappings">
4 - <mapping directory="$PROJECT_DIR$/.." vcs="Git" />
5 - <mapping directory="$PROJECT_DIR$/KnuSentiLex" vcs="Git" />
6 - </component>
7 -</project>
...\ No newline at end of file ...\ No newline at end of file
This diff is collapsed. Click to expand it.
1 -import warnings
2 -
3 -warnings.simplefilter(("ignore"))
4 -import openpyxl
5 -import pandas as pd
6 -
7 -#######Gephi에 사용할 edge파일 만들기 전에 필요한 자료 만드는 과정
8 -####6개월치 키워드 합친 키워드 파일 넣기
9 -
10 -Stockfilename = input("키워드파일이름입력:")
11 -fileStock = "C:/Users/yangj/PycharmProjects/pythonProject1/샘플/" + Stockfilename + ".xlsx"
12 -Stockfile = openpyxl.load_workbook(fileStock) # 파일이름입력
13 -stock_ws = Stockfile.active
14 -Stock_data = [] # list 타입
15 -date=[]
16 -i = 0
17 -for row in stock_ws.rows:
18 - Stock_data.append([])
19 - date.append(row[1].value)
20 - for cell in row:
21 - if cell.value != None:
22 - Stock_data[i].append(cell.value)
23 - i += 1
24 -del Stock_data[0] #첫번째 의미없는 열 삭제
25 -del date[0]
26 -for i in range(len(Stock_data)):
27 - del Stock_data[i][0] #각 열의 첫번째 행 삭제
28 -for i in range(len(Stock_data)):
29 - del Stock_data[i][0] #각 열의 날짜 행 삭제
30 -
31 -
32 -Tdata=[]
33 -
34 -for x in range(len(Stock_data)):
35 - Tdata.append([])
36 - for y in range(len(Stock_data[x])):
37 - if str.isalnum(Stock_data[x][y]) == True:
38 - Tdata[x].append(Stock_data[x][y])
39 -
40 -result = { '날짜':date, '단어':Tdata }
41 -
42 -df = pd.DataFrame(result)
43 -#print(df)
44 -list_df=df.values.tolist() #dataframe list로 변경
45 -print(list_df)
46 -#print(list_df[0][0]) 날짜 2021.01.01.
47 -
48 -new_date = [] # 날짜 중복 삭제
49 -for v in date:
50 - if v not in new_date:
51 - new_date.append(v)
52 -#print(new_date)
53 -
54 -Setlist =[]# 날짜별 키워드 넣기
55 -for v in range(len(new_date)):
56 - Setlist.append([])
57 - Setlist[v].append(new_date[v])
58 - for i in range(len(list_df)):
59 - for j in range(len(list_df[i][1])):
60 - if new_date[v] == list_df[i][0] :
61 - Setlist[v].append(list_df[i][1][j])
62 -print(Setlist)
63 -df_ver= pd.DataFrame(Setlist)
64 -#df_ver.to_excel(Stockfilename+' 날짜뉴스모으고특수삭제.xlsx',sheet_name='sheet1')
65 -SourceTarget=[]
66 -for i in range(len(list_df)):
67 - SourceTarget.append([])
68 - for j in range(len(list_df[i][1])-1):
69 - SourceTarget.append([list_df[i][0],list_df[i][1][j],list_df[i][1][j+1],1])
70 -print(SourceTarget)
71 -SourceTarget = [v for v in SourceTarget if v]
72 -df_SourTar = pd.DataFrame(SourceTarget)
73 -df_SourTar.to_excel(Stockfilename+'Edge3.xlsx',sheet_name='sheet1')
...\ No newline at end of file ...\ No newline at end of file
1 -# KNU 한국어 감성사전
2 -# 작성자 : 온병원, 박상민, 나철원
3 -# 소속 : 군산대학교 소프트웨어융합공학과 Data Intelligence Lab
4 -# 홈페이지 : dilab.kunsan.ac.kr
5 -# 작성일 : 2018.05.14
6 -# 뜻풀이 데이터 출처 : https://github.com/mrchypark/stdkor
7 -# 신조어 데이터 출처 : https://ko.wikipedia.org/wiki/%EB%8C%80%ED%95%9C%EB%AF%BC%EA%B5%AD%EC%9D%98_%EC%9D%B8%ED%84%B0%EB%84%B7_%EC%8B%A0%EC%A1%B0%EC%96%B4_%EB%AA%A9%EB%A1%9D
8 -# 이모티콘 데이터 출처: https://ko.wikipedia.org/wiki/%EC%9D%B4%EB%AA%A8%ED%8B%B0%EC%BD%98
9 -# SentiWordNet_3.0.0_20130122 데이터 출처 : http://sentiwordnet.isti.cnr.it/
10 -# SenticNet-5.0 데이터 출처 : http://sentic.net/
11 -# 감정단어사전0603 데이터 출처 : http://datascience.khu.ac.kr/board/bbs/board.php?bo_table=05_01&wr_id=91
12 -# 김은영, “국어 감정동사 연구”, 2004.02, 학위논문(박사) - 전남대학교 국어국문학과 대학원
13 -
14 -#-*-coding:utf-8-*-
15 -import collections
16 -import json
17 -
18 -import warnings
19 -warnings.simplefilter(("ignore"))
20 -import openpyxl
21 -import pandas as pd
22 -import re
23 -from datetime import datetime
24 -
25 -############종목 감성 판단 ex)hmm뉴스키워드.xlsx 파일 넣는 과정
26 -class KnuSL():
27 -
28 - def data_list(wordname):
29 - with open('KnuSentiLex/data/SentiWord_info.json', encoding='utf-8-sig', mode='r') as f:
30 - data = json.load(f)
31 - result = [0,0]
32 -
33 - for i in range(0, len(data)):
34 - if data[i]['word'] == wordname:
35 - result.pop()
36 - result.pop()
37 - result.append(data[i]['word_root'])
38 - result.append(int(data[i]['polarity']))
39 -
40 - r_word = result[0] #어근
41 - s_word = result[1] #극성
42 -
43 - return s_word
44 -
45 -if __name__ == "__main__":
46 -
47 - ksl = KnuSL
48 -
49 - print("\nKNU 한국어 감성사전입니다~ :)")
50 - print("사전에 단어가 없는 경우 결과가 None으로 나타납니다!!!")
51 - print("종료하시려면 #을 입력해주세요!!!")
52 - print("-2:매우 부정, -1:부정, 0:중립 or Unkwon, 1:긍정, 2:매우 긍정")
53 - print("\n")
54 -#########
55 -Newsfilefolder = input("종목폴더입력: ")
56 -Newsfilename=input("파일이름입력:")
57 -Newsfilepos = "C:/Users/yangj/PycharmProjects/pythonProject1/뉴스키워드/"+Newsfilefolder+"/" + Newsfilename + ".xlsx"
58 -Newsfile = openpyxl.load_workbook(Newsfilepos)#파일이름입력
59 -ws=Newsfile.active
60 -data=[]
61 -date=[]
62 -i=0
63 -for row in ws.rows:
64 - data.append([])
65 - date.append(row[1].value)
66 - for cell in row:
67 - if cell.value != None:
68 - data[i].append(cell.value)
69 - i += 1
70 -del data[0] #첫번째 의미없는 열 삭제
71 -del date[0]
72 -for i in range(len(data)):
73 - del data[i][0] #각 열의 첫번째 행 삭제
74 -for i in range(len(data)):
75 - del data[i][0] #각 열의 날짜 행 삭제
76 -
77 -KNUdata=[]
78 -Tdata=[]
79 -
80 -for x in range(len(data)):
81 - KNUdata.append([])
82 - Tdata.append([])
83 - for y in range(len(data[x])):
84 - KNUdata[x].append(ksl.data_list(data[x][y]))
85 - Tdata[x].append([data[x][y], KNUdata[x][y]])
86 -
87 -result = { '날짜':date, '단어, 극성':Tdata }
88 -
89 -df = pd.DataFrame(result)
90 -#print(df)
91 -list_df=df.values.tolist() #dataframe list로 변경
92 -#print(list_df)
93 -#print(list_df[0][0]) 날짜 2021.01.01.
94 -
95 -new_date = [] # 날짜 중복 삭제
96 -for v in date:
97 - if v not in new_date:
98 - new_date.append(v)
99 -#print(new_date)
100 -
101 -Setlist =[]# 날짜별 키워드 넣기
102 -for v in range(len(new_date)):
103 - Setlist.append([])
104 - Setlist[v].append(new_date[v])
105 - for i in range(len(list_df)):
106 - for j in range(len(list_df[i][1])):
107 - if new_date[v] == list_df[i][0]:
108 - Setlist[v].append(list_df[i][1][j])
109 -print(Setlist)
110 -print(Setlist[0][0]) #2021.01.01
111 -print(type(Setlist[0][0]))
112 -print(Setlist[0][0].split('-'))
113 -print(Setlist[0][1][1]) #극성 0
114 -print(type(Setlist[0][1][1])) #극성 모든 타입 int
115 -
116 -#print(list_df[0][1][0]) 키워드와 극성 ['HMM…"체질개선해', 'X']
117 -#print(list_df[0][1][0][1]) 극성 x
118 -#print(list_df[0][0].split('.')[:3]) ['2021', '01', '01']
119 -#df.to_excel(Newsfilename+' KNU.xlsx',sheet_name='sheet1')
120 -
121 -Stockfilefolder = input("종목시세폴더입력: ")
122 -Stockfilename=input("시세파일이름입력:")
123 -fileStock = "C:/Users/yangj/PycharmProjects/pythonProject1/종목별시세/"+Stockfilefolder+"/" + Stockfilename + ".xlsx"
124 -Stockfile = openpyxl.load_workbook(fileStock)#파일이름입력
125 -stock_ws=Stockfile.active
126 -Stock_data=[] #list 타입
127 -i=0
128 -for row in stock_ws.rows:
129 - Stock_data.append([])
130 - for cell in row:
131 - if cell.value != None:
132 - Stock_data[i].append(cell.value)
133 - i += 1
134 -del Stock_data[0]
135 -for i in range(len(Stock_data)):
136 - del Stock_data[i][2] # 대비 삭제
137 -for i in range(len(Stock_data)):
138 - del Stock_data[i][7] #거래대금 삭제
139 -for i in range(len(Stock_data)):
140 - del Stock_data[i][7] #시가 총액 삭제
141 -for i in range(len(Stock_data)):
142 - del Stock_data[i][7] #상장주식 수 삭제 / 결과:'일자', '종가', '등락률', '시가', '고가', '저가', '거래량'
143 -#print(Stock_data)
144 -
145 -def Calpercentage(a,b): #시초가 대비 고점/저점 비율
146 - return abs(a-b)/a*100
147 -####아래로 수정 필요 (미완성)####
148 -
149 -i=0
150 -for k in range(len(Setlist)):
151 - if( Stock_data[i][0].split('/') == Setlist[k][0].split('-')): # 날짜 비교 날짜가 같다면
152 - if Calpercentage(Stock_data[i][3],Stock_data[i][4]) > 2 : #당일 시가 대비 고가가 2퍼 높을때
153 - for j in range(1,len(Setlist[k])):
154 - if Setlist[k][j][1] == 0:
155 - Setlist[k][j][1] = 1
156 - else:
157 - Setlist[0][j][1] += 1
158 - elif Calpercentage(Stock_data[i][3],Stock_data[i][5]) < -2 : #당일 시가 대비 저가가 2퍼 낮을 때
159 - for j in range(1,len(Setlist[k])):
160 - if Setlist[k][j][1] == 0:
161 - Setlist[k][j][1] = -1
162 - else:
163 - Setlist[0][j][1] -= 1
164 - else:
165 - if Stock_data[i+1][2] > 0: # 다음날 주가 등락률이 양수면
166 - for j in range(1,len(Setlist[k])): #어제뉴스는 호재 취급
167 - if Setlist[k][j][1] == 0:
168 - Setlist[k][j][1] = 1
169 - else:
170 - Setlist[k][j][1] += 1
171 - elif Stock_data[i+1][2] < 0:
172 - for j in range(1,len(Setlist[k])): # 음수면 어제 뉴스는 악재 취급
173 - if Setlist[k][j][1] == 0:
174 - Setlist[k][j][1] = -1
175 - else:
176 - Setlist[k][j][1] -= 1
177 - i+=1
178 - else:
179 - if Calpercentage(Stock_data[i][3], Stock_data[i][4]) > 2: # 당일 시가 대비 고가가 2퍼 높을때
180 - for j in range(1, len(Setlist[k])):
181 - if Setlist[k][j][1] == 0:
182 - Setlist[k][j][1] = 1
183 - else:
184 - Setlist[k][j][1] += 1
185 - elif Calpercentage(Stock_data[i][3], Stock_data[i][5]) < -2: # 당일 시가 대비 저가가 2퍼 낮을 때
186 - for j in range(1, len(Setlist[k])):
187 - if Setlist[k][j][1] == 0:
188 - Setlist[k][j][1] = -1
189 - else:
190 - Setlist[k][j][1] -= 1
191 - else:
192 - if Stock_data[i + 1][2] > 0: # 다음날 주가 등락률이 양수면
193 - for j in range(1, len(Setlist[k])): # 어제뉴스는 호재 취급
194 - if Setlist[k][j][1] == 0:
195 - Setlist[k][j][1] = 1
196 - else:
197 - Setlist[k][j][1] += 1
198 - elif Stock_data[i + 1][2] < 0:
199 - for j in range(1, len(Setlist[k])): # 음수면 어제 뉴스는 악재 취급
200 - if Setlist[k][j][1] == 0:
201 - Setlist[k][j][1] = -1
202 - else:
203 - Setlist[k][j][1] -= 1
204 - i+=1 #<이거 삭제서 hmm한번 더 돌려보기
205 -
206 -print(Setlist)
207 -
208 -#df_Setlist = pd.DataFrame(Setlist)
209 -#df_Setlist.to_excel(Stockfilename+' KNU_New.xlsx',sheet_name='sheet1')
210 -
211 -Setlist_w = []
212 -for i in range(len(Setlist)):
213 - Setlist_w.append([])
214 - for j in range(1, len(Setlist[i])):
215 - Setlist_w[i].append(Setlist[i][j][0]) # 극성 제외 단어만 추출
216 -
217 -counter = {}
218 -for i in range(len(Setlist_w)):
219 - counter[i] = collections.Counter(Setlist_w[i]) # 누적치
220 -
221 -for i in range(len(Setlist_w)):
222 - Setlist_w[i] = list(zip(counter[i].keys(), counter[i].values())) # 튜플 리스트화 [(값, 값)]
223 -
224 -Plist = []
225 -for i in range(len(Setlist_w)):
226 - Plist.append([])
227 - for j in range(len(Setlist_w[i])):
228 - Plist[i].append(list(Setlist_w[i][j])) # 튜플 -> 리스트화 [[값, 값]]
229 -
230 -for i in range(len(Plist)):
231 - for j in range(len(Plist[i])):
232 - Plist[i][j][1] = 0 # 극성 0으로 초기화
233 -
234 -for i in range(len(Setlist)):
235 - for j in range(1, len(Setlist[i])):
236 - for h in range(len(Plist[i])):
237 - if Setlist[i][j][0] == Plist[i][h][0]:
238 - Plist[i][h][1] += Setlist[i][j][1] #누적치
239 -vert_p=[] #수직 중복 삭제
240 -for i in range(len(Plist)):
241 - for j in range(len(Plist[i])):
242 - vert_p.append(Plist[i][j]) #단어만 넣기
243 -#print(vert_p)
244 -vert_p.sort(key=lambda x:x[0]) #단어 기준으로 정렬
245 -for i in range(len(vert_p)-2): #단어 비교해서 같으면 누적 다르면 값 바꾸기
246 - for j in range(i+1,len(vert_p)):
247 - if vert_p[i][0] == vert_p[j][0]:
248 - vert_p[i][1]+=vert_p[j][1]
249 - vert_p[j]=['0',0]
250 -print(vert_p)
251 -vert_p=[i for i in vert_p if not '0' in i] #'0'들어간 열 제거
252 -df_ver= pd.DataFrame(vert_p)
253 -df_ver.to_excel(Stockfilename+' KNU_New_Vdic2.xlsx',sheet_name='sheet1')
254 -
255 -####키워드파일 월별로 돌려서 그 나온 결과 파일들을 합쳐서 Merge_dictionay.py에 넣어서 사전 만들기 ####
256 -
257 -
This diff is collapsed. Click to expand it.
1 -# -*-coding:utf-8-*-
2 -import collections
3 -import json
4 -
5 -import warnings
6 -
7 -warnings.simplefilter(("ignore"))
8 -import openpyxl
9 -import pandas as pd
10 -import re
11 -from datetime import datetime
12 -
13 -########코스피 감성 판단
14 -class KnuSL():
15 -
16 - def data_list(wordname):
17 - with open('KnuSentiLex/data/SentiWord_info.json', encoding='utf-8-sig', mode='r') as f:
18 - data = json.load(f)
19 - result = [0, 0]
20 -
21 - for i in range(0, len(data)):
22 - if data[i]['word'] == wordname:
23 - result.pop()
24 - result.pop()
25 - result.append(data[i]['word_root'])
26 - result.append(int(data[i]['polarity']))
27 -
28 - r_word = result[0] # 어근
29 - s_word = result[1] # 극성
30 -
31 - return s_word
32 -
33 -
34 -if __name__ == "__main__":
35 - ksl = KnuSL
36 -
37 - print("\nKNU 한국어 감성사전입니다~ :)")
38 - print("사전에 단어가 없는 경우 결과가 None으로 나타납니다!!!")
39 - print("종료하시려면 #을 입력해주세요!!!")
40 - print("-2:매우 부정, -1:부정, 0:중립 or Unkwon, 1:긍정, 2:매우 긍정")
41 - print("\n")
42 -#########
43 -Newsfilefolder = input("종목폴더입력: ")
44 -Newsfilename = input("파일이름입력:")
45 -Newsfilepos = "C:/Users/yangj/PycharmProjects/pythonProject1/뉴스키워드/" + Newsfilefolder + "/" + Newsfilename + ".xlsx"
46 -Newsfile = openpyxl.load_workbook(Newsfilepos) # 파일이름입력
47 -ws = Newsfile.active
48 -data = []
49 -date = []
50 -i = 0
51 -for row in ws.rows:
52 - data.append([])
53 - date.append(row[1].value)
54 - for cell in row:
55 - if cell.value != None:
56 - data[i].append(cell.value)
57 - i += 1
58 -del data[0] # 첫번째 의미없는 열 삭제
59 -del date[0]
60 -for i in range(len(data)):
61 - del data[i][0] # 각 열의 첫번째 행 삭제
62 -for i in range(len(data)):
63 - del data[i][0] # 각 열의 날짜 행 삭제
64 -
65 -KNUdata = []
66 -Tdata = []
67 -
68 -for x in range(len(data)):
69 - KNUdata.append([])
70 - Tdata.append([])
71 - for y in range(len(data[x])):
72 - KNUdata[x].append(ksl.data_list(data[x][y]))
73 - Tdata[x].append([data[x][y], KNUdata[x][y]])
74 -
75 -result = {'날짜': date, '단어, 극성': Tdata}
76 -
77 -df = pd.DataFrame(result)
78 -
79 -list_df = df.values.tolist() # dataframe list로 변경
80 -new_date = [] # 날짜 중복 삭제
81 -for v in date:
82 - if v not in new_date:
83 - new_date.append(v)
84 -# print(new_date)
85 -
86 -Setlist = [] # 날짜별 키워드 넣기
87 -for v in range(len(new_date)):
88 - Setlist.append([])
89 - Setlist[v].append(new_date[v])
90 - for i in range(len(list_df)):
91 - for j in range(len(list_df[i][1])):
92 - if new_date[v] == list_df[i][0]:
93 - Setlist[v].append(list_df[i][1][j])
94 -
95 -Stockfilefolder = input("종목시세폴더입력: ")
96 -Stockfilename = input("시세파일이름입력:")
97 -fileStock = "C:/Users/yangj/PycharmProjects/pythonProject1/종목별시세/" + Stockfilefolder + "/" + Stockfilename + ".xlsx"
98 -Stockfile = openpyxl.load_workbook(fileStock) # 파일이름입력
99 -stock_ws = Stockfile.active
100 -Stock_data = [] # list 타입
101 -i = 0
102 -for row in stock_ws.rows:
103 - Stock_data.append([])
104 - for cell in row:
105 - if cell.value != None:
106 - Stock_data[i].append(cell.value)
107 - i += 1
108 -del Stock_data[0]
109 -for i in range(len(Stock_data)):
110 - del Stock_data[i][2] # 대비 삭제
111 -for i in range(len(Stock_data)):
112 - del Stock_data[i][7] # 거래대금 삭제
113 -for i in range(len(Stock_data)):
114 - del Stock_data[i][7] # 시가 총액 삭제
115 -
116 -
117 -i = 0
118 -for k in range(len(Setlist)):
119 - if (Stock_data[i][0].split('/') == Setlist[k][0].split('.')[:3]): # 날짜 비교 날짜가 같다면
120 - if Stock_data[i][2] > 0: # 코스피 등락이 양수
121 - for j in range(1, len(Setlist[k])):
122 - if Setlist[k][j][1] == 0:
123 - Setlist[k][j][1] = 1
124 - else:
125 - Setlist[k][j][1] += 1
126 - elif Stock_data[i][2] < 0:
127 - for j in range(1, len(Setlist[k])): # 음수면 어제 뉴스는 악재 취급
128 - if Setlist[k][j][1] == 0:
129 - Setlist[k][j][1] = -1
130 - else:
131 - Setlist[k][j][1] -= 1
132 - i += 1
133 - else:
134 - if Stock_data[i+1][2] > 0: # 다음날 주가 등락률이 양수면
135 - for j in range(1, len(Setlist[k])): # 어제뉴스는 호재 취급
136 - if Setlist[k][j][1] == 0:
137 - Setlist[k][j][1] = 1
138 - else:
139 - Setlist[k][j][1] += 1
140 - elif Stock_data[i+1][2] < 0:
141 - for j in range(1, len(Setlist[k])): # 음수면 어제 뉴스는 악재 취급
142 - if Setlist[k][j][1] == 0:
143 - Setlist[k][j][1] = -1
144 - else:
145 - Setlist[k][j][1] -= 1
146 -
147 -Setlist_w = []
148 -for i in range(len(Setlist)):
149 - Setlist_w.append([])
150 - for j in range(1, len(Setlist[i])):
151 - Setlist_w[i].append(Setlist[i][j][0]) # 극성 제외 단어만 추출
152 -
153 -counter = {}
154 -for i in range(len(Setlist_w)):
155 - counter[i] = collections.Counter(Setlist_w[i]) # 누적치
156 -
157 -for i in range(len(Setlist_w)):
158 - Setlist_w[i] = list(zip(counter[i].keys(), counter[i].values())) # 튜플 리스트화 [(값, 값)]
159 -
160 -Plist = []
161 -for i in range(len(Setlist_w)):
162 - Plist.append([])
163 - for j in range(len(Setlist_w[i])):
164 - Plist[i].append(list(Setlist_w[i][j])) # 튜플 -> 리스트화 [[값, 값]]
165 -
166 -for i in range(len(Plist)):
167 - for j in range(len(Plist[i])):
168 - Plist[i][j][1] = 0 # 극성 0으로 초기화
169 -
170 -for i in range(len(Setlist)):
171 - for j in range(1, len(Setlist[i])):
172 - for h in range(len(Plist[i])):
173 - if Setlist[i][j][0] == Plist[i][h][0]:
174 - Plist[i][h][1] += Setlist[i][j][1] # 누적치
175 -vert_p = [] # 수직 중복 삭제
176 -for i in range(len(Plist)):
177 - for j in range(len(Plist[i])):
178 - vert_p.append(Plist[i][j]) # 단어만 넣기
179 -# print(vert_p)
180 -vert_p.sort(key=lambda x: x[0]) # 단어 기준으로 정렬
181 -for i in range(len(vert_p) - 2): # 단어 비교해서 같으면 누적 다르면 값 바꾸기
182 - for j in range(i + 1, len(vert_p)):
183 - if vert_p[i][0] == vert_p[j][0]:
184 - vert_p[i][1] += vert_p[j][1]
185 - vert_p[j] = ['0', 0]
186 -
187 -vert_p = [i for i in vert_p if not '0' in i] # '0'들어간 열 제거
188 -df_ver = pd.DataFrame(vert_p)
189 -df_ver.to_excel(Stockfilename + ' KNU_New_vdic2.xlsx', sheet_name='sheet1')
This diff is collapsed. Click to expand it.
No preview for this file type
1 -Metadata-Version: 2.1
2 -Name: mecab-python
3 -Version: 0.996-ko-0.9.2-msvc
4 -Summary: UNKNOWN
5 -Home-page: UNKNOWN
6 -Author: UNKNOWN
7 -Author-email: UNKNOWN
8 -License: UNKNOWN
9 -Platform: UNKNOWN
10 -
11 -UNKNOWN
12 -
13 -
1 -MeCab.py,sha256=cuvFTwJk_Z38aY54gIKoBtR46p6tPXOVSKkpoA1PcY4,15733
2 -_MeCab.cp37-win_amd64.pyd,sha256=u-WVy7oAK9dySDKhPh_DNHIWW4x6yHSwu6hjMKqdz5E,116736
3 -__pycache__/MeCab.cpython-37.pyc,,
4 -libmecab.dll,sha256=lRhwboyrXWEXXnDn8soNA05id3UvWfPpP7bvTXj5iH4,1908736
5 -mecab_python-0.996_ko_0.9.2_msvc.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
6 -mecab_python-0.996_ko_0.9.2_msvc.dist-info/METADATA,sha256=Qw4D_1k4TCzF1Ul5k47s6hOnafUJSLyiiGhqffEnszs,190
7 -mecab_python-0.996_ko_0.9.2_msvc.dist-info/RECORD,,
8 -mecab_python-0.996_ko_0.9.2_msvc.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9 -mecab_python-0.996_ko_0.9.2_msvc.dist-info/WHEEL,sha256=jmFYSwR2oi5DgMGgmnuB-EJxqLMkUojGGdmJ0wz35aI,106
10 -mecab_python-0.996_ko_0.9.2_msvc.dist-info/direct_url.json,sha256=8Wv8YjFpjisUlH--mXnzxpj5B8uexD6AtZjonw2yk5k,142
11 -mecab_python-0.996_ko_0.9.2_msvc.dist-info/top_level.txt,sha256=E6HHbCcV114TjQmzLJGG5aSu2Sb0tjGGxkjler1jxrQ,13
1 -Wheel-Version: 1.0
2 -Generator: bdist_wheel (0.31.1)
3 -Root-Is-Purelib: false
4 -Tag: cp37-cp37m-win_amd64
5 -
1 -{"archive_info": {}, "url": "file:///C:/Users/yangj/PycharmProjects/pythonProject1/mecab_python-0.996_ko_0.9.2_msvc-cp37-cp37m-win_amd64.whl"}
...\ No newline at end of file ...\ No newline at end of file
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type