Showing
1 changed file
with
73 additions
and
0 deletions
knu/KnuSentiLex/DayNewsMerge.py
0 → 100644
1 | +import warnings | ||
2 | + | ||
3 | +warnings.simplefilter(("ignore")) | ||
4 | +import openpyxl | ||
5 | +import pandas as pd | ||
6 | + | ||
7 | +#######Gephi에 사용할 edge파일 만들기 전에 필요한 자료 만드는 과정 | ||
8 | +####6개월치 키워드 합친 키워드 파일 넣기 | ||
9 | + | ||
10 | +Stockfilename = input("키워드파일이름입력:") | ||
11 | +fileStock = "C:/Users/yangj/PycharmProjects/pythonProject1/샘플/" + Stockfilename + ".xlsx" | ||
12 | +Stockfile = openpyxl.load_workbook(fileStock) # 파일이름입력 | ||
13 | +stock_ws = Stockfile.active | ||
14 | +Stock_data = [] # list 타입 | ||
15 | +date=[] | ||
16 | +i = 0 | ||
17 | +for row in stock_ws.rows: | ||
18 | + Stock_data.append([]) | ||
19 | + date.append(row[1].value) | ||
20 | + for cell in row: | ||
21 | + if cell.value != None: | ||
22 | + Stock_data[i].append(cell.value) | ||
23 | + i += 1 | ||
24 | +del Stock_data[0] #첫번째 의미없는 열 삭제 | ||
25 | +del date[0] | ||
26 | +for i in range(len(Stock_data)): | ||
27 | + del Stock_data[i][0] #각 열의 첫번째 행 삭제 | ||
28 | +for i in range(len(Stock_data)): | ||
29 | + del Stock_data[i][0] #각 열의 날짜 행 삭제 | ||
30 | + | ||
31 | + | ||
32 | +Tdata=[] | ||
33 | + | ||
34 | +for x in range(len(Stock_data)): | ||
35 | + Tdata.append([]) | ||
36 | + for y in range(len(Stock_data[x])): | ||
37 | + if str.isalnum(Stock_data[x][y]) == True: | ||
38 | + Tdata[x].append(Stock_data[x][y]) | ||
39 | + | ||
40 | +result = { '날짜':date, '단어':Tdata } | ||
41 | + | ||
42 | +df = pd.DataFrame(result) | ||
43 | +#print(df) | ||
44 | +list_df=df.values.tolist() #dataframe list로 변경 | ||
45 | +print(list_df) | ||
46 | +#print(list_df[0][0]) 날짜 2021.01.01. | ||
47 | + | ||
48 | +new_date = [] # 날짜 중복 삭제 | ||
49 | +for v in date: | ||
50 | + if v not in new_date: | ||
51 | + new_date.append(v) | ||
52 | +#print(new_date) | ||
53 | + | ||
54 | +Setlist =[]# 날짜별 키워드 넣기 | ||
55 | +for v in range(len(new_date)): | ||
56 | + Setlist.append([]) | ||
57 | + Setlist[v].append(new_date[v]) | ||
58 | + for i in range(len(list_df)): | ||
59 | + for j in range(len(list_df[i][1])): | ||
60 | + if new_date[v] == list_df[i][0] : | ||
61 | + Setlist[v].append(list_df[i][1][j]) | ||
62 | +print(Setlist) | ||
63 | +df_ver= pd.DataFrame(Setlist) | ||
64 | +#df_ver.to_excel(Stockfilename+' 날짜뉴스모으고특수삭제.xlsx',sheet_name='sheet1') | ||
65 | +SourceTarget=[] | ||
66 | +for i in range(len(list_df)): | ||
67 | + SourceTarget.append([]) | ||
68 | + for j in range(len(list_df[i][1])-1): | ||
69 | + SourceTarget.append([list_df[i][0],list_df[i][1][j],list_df[i][1][j+1],1]) | ||
70 | +print(SourceTarget) | ||
71 | +SourceTarget = [v for v in SourceTarget if v] | ||
72 | +df_SourTar = pd.DataFrame(SourceTarget) | ||
73 | +df_SourTar.to_excel(Stockfilename+'Edge3.xlsx',sheet_name='sheet1') | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
-
Please register or login to post a comment