Showing
1 changed file
with
73 additions
and
0 deletions
knu/KnuSentiLex/DayNewsMerge.py
0 → 100644
| 1 | +import warnings | ||
| 2 | + | ||
| 3 | +warnings.simplefilter(("ignore")) | ||
| 4 | +import openpyxl | ||
| 5 | +import pandas as pd | ||
| 6 | + | ||
| 7 | +#######Gephi에 사용할 edge파일 만들기 전에 필요한 자료 만드는 과정 | ||
| 8 | +####6개월치 키워드 합친 키워드 파일 넣기 | ||
| 9 | + | ||
| 10 | +Stockfilename = input("키워드파일이름입력:") | ||
| 11 | +fileStock = "C:/Users/yangj/PycharmProjects/pythonProject1/샘플/" + Stockfilename + ".xlsx" | ||
| 12 | +Stockfile = openpyxl.load_workbook(fileStock) # 파일이름입력 | ||
| 13 | +stock_ws = Stockfile.active | ||
| 14 | +Stock_data = [] # list 타입 | ||
| 15 | +date=[] | ||
| 16 | +i = 0 | ||
| 17 | +for row in stock_ws.rows: | ||
| 18 | + Stock_data.append([]) | ||
| 19 | + date.append(row[1].value) | ||
| 20 | + for cell in row: | ||
| 21 | + if cell.value != None: | ||
| 22 | + Stock_data[i].append(cell.value) | ||
| 23 | + i += 1 | ||
| 24 | +del Stock_data[0] #첫번째 의미없는 열 삭제 | ||
| 25 | +del date[0] | ||
| 26 | +for i in range(len(Stock_data)): | ||
| 27 | + del Stock_data[i][0] #각 열의 첫번째 행 삭제 | ||
| 28 | +for i in range(len(Stock_data)): | ||
| 29 | + del Stock_data[i][0] #각 열의 날짜 행 삭제 | ||
| 30 | + | ||
| 31 | + | ||
| 32 | +Tdata=[] | ||
| 33 | + | ||
| 34 | +for x in range(len(Stock_data)): | ||
| 35 | + Tdata.append([]) | ||
| 36 | + for y in range(len(Stock_data[x])): | ||
| 37 | + if str.isalnum(Stock_data[x][y]) == True: | ||
| 38 | + Tdata[x].append(Stock_data[x][y]) | ||
| 39 | + | ||
| 40 | +result = { '날짜':date, '단어':Tdata } | ||
| 41 | + | ||
| 42 | +df = pd.DataFrame(result) | ||
| 43 | +#print(df) | ||
| 44 | +list_df=df.values.tolist() #dataframe list로 변경 | ||
| 45 | +print(list_df) | ||
| 46 | +#print(list_df[0][0]) 날짜 2021.01.01. | ||
| 47 | + | ||
| 48 | +new_date = [] # 날짜 중복 삭제 | ||
| 49 | +for v in date: | ||
| 50 | + if v not in new_date: | ||
| 51 | + new_date.append(v) | ||
| 52 | +#print(new_date) | ||
| 53 | + | ||
| 54 | +Setlist =[]# 날짜별 키워드 넣기 | ||
| 55 | +for v in range(len(new_date)): | ||
| 56 | + Setlist.append([]) | ||
| 57 | + Setlist[v].append(new_date[v]) | ||
| 58 | + for i in range(len(list_df)): | ||
| 59 | + for j in range(len(list_df[i][1])): | ||
| 60 | + if new_date[v] == list_df[i][0] : | ||
| 61 | + Setlist[v].append(list_df[i][1][j]) | ||
| 62 | +print(Setlist) | ||
| 63 | +df_ver= pd.DataFrame(Setlist) | ||
| 64 | +#df_ver.to_excel(Stockfilename+' 날짜뉴스모으고특수삭제.xlsx',sheet_name='sheet1') | ||
| 65 | +SourceTarget=[] | ||
| 66 | +for i in range(len(list_df)): | ||
| 67 | + SourceTarget.append([]) | ||
| 68 | + for j in range(len(list_df[i][1])-1): | ||
| 69 | + SourceTarget.append([list_df[i][0],list_df[i][1][j],list_df[i][1][j+1],1]) | ||
| 70 | +print(SourceTarget) | ||
| 71 | +SourceTarget = [v for v in SourceTarget if v] | ||
| 72 | +df_SourTar = pd.DataFrame(SourceTarget) | ||
| 73 | +df_SourTar.to_excel(Stockfilename+'Edge3.xlsx',sheet_name='sheet1') | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
-
Please register or login to post a comment