양지수

Edge file base file

1 +import warnings
2 +
3 +warnings.simplefilter(("ignore"))
4 +import openpyxl
5 +import pandas as pd
6 +
7 +#######Gephi에 사용할 edge파일 만들기 전에 필요한 자료 만드는 과정
8 +####6개월치 키워드 합친 키워드 파일 넣기
9 +
10 +Stockfilename = input("키워드파일이름입력:")
11 +fileStock = "C:/Users/yangj/PycharmProjects/pythonProject1/샘플/" + Stockfilename + ".xlsx"
12 +Stockfile = openpyxl.load_workbook(fileStock) # 파일이름입력
13 +stock_ws = Stockfile.active
14 +Stock_data = [] # list 타입
15 +date=[]
16 +i = 0
17 +for row in stock_ws.rows:
18 + Stock_data.append([])
19 + date.append(row[1].value)
20 + for cell in row:
21 + if cell.value != None:
22 + Stock_data[i].append(cell.value)
23 + i += 1
24 +del Stock_data[0] #첫번째 의미없는 열 삭제
25 +del date[0]
26 +for i in range(len(Stock_data)):
27 + del Stock_data[i][0] #각 열의 첫번째 행 삭제
28 +for i in range(len(Stock_data)):
29 + del Stock_data[i][0] #각 열의 날짜 행 삭제
30 +
31 +
32 +Tdata=[]
33 +
34 +for x in range(len(Stock_data)):
35 + Tdata.append([])
36 + for y in range(len(Stock_data[x])):
37 + if str.isalnum(Stock_data[x][y]) == True:
38 + Tdata[x].append(Stock_data[x][y])
39 +
40 +result = { '날짜':date, '단어':Tdata }
41 +
42 +df = pd.DataFrame(result)
43 +#print(df)
44 +list_df=df.values.tolist() #dataframe list로 변경
45 +print(list_df)
46 +#print(list_df[0][0]) 날짜 2021.01.01.
47 +
48 +new_date = [] # 날짜 중복 삭제
49 +for v in date:
50 + if v not in new_date:
51 + new_date.append(v)
52 +#print(new_date)
53 +
54 +Setlist =[]# 날짜별 키워드 넣기
55 +for v in range(len(new_date)):
56 + Setlist.append([])
57 + Setlist[v].append(new_date[v])
58 + for i in range(len(list_df)):
59 + for j in range(len(list_df[i][1])):
60 + if new_date[v] == list_df[i][0] :
61 + Setlist[v].append(list_df[i][1][j])
62 +print(Setlist)
63 +df_ver= pd.DataFrame(Setlist)
64 +#df_ver.to_excel(Stockfilename+' 날짜뉴스모으고특수삭제.xlsx',sheet_name='sheet1')
65 +SourceTarget=[]
66 +for i in range(len(list_df)):
67 + SourceTarget.append([])
68 + for j in range(len(list_df[i][1])-1):
69 + SourceTarget.append([list_df[i][0],list_df[i][1][j],list_df[i][1][j+1],1])
70 +print(SourceTarget)
71 +SourceTarget = [v for v in SourceTarget if v]
72 +df_SourTar = pd.DataFrame(SourceTarget)
73 +df_SourTar.to_excel(Stockfilename+'Edge3.xlsx',sheet_name='sheet1')
...\ No newline at end of file ...\ No newline at end of file