utils.py
2.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import numpy as np
import scipy.sparse as sp
import torch, csv
import pandas as pd
from sklearn import preprocessing
def sparse_mx_to_torch_sparse_tensor(sparse_mx):
"""Convert a scipy sparse matrix to a torch sparse tensor."""
sparse_mx = sparse_mx.tocoo().astype(np.float32)
indices = torch.from_numpy(
np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
values = torch.from_numpy(sparse_mx.data)
shape = torch.Size(sparse_mx.shape)
return torch.sparse.FloatTensor(indices, values, shape)
def load_data():
x = []
f = open('data/data_x.csv', 'r', encoding='utf-8')
rdr = csv.reader(f)
for line in rdr:
x.append(line)
y = []
f = open('data/data_y.csv', 'r', encoding='utf-8')
rdr = csv.reader(f)
for line in rdr:
y.append(line)
raw_data = np.array(x, dtype=np.float32)
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(raw_data)
df = pd.DataFrame(x_scaled)
x_data = np.array(df, dtype=np.float32)
features = sp.csr_matrix(np.array(x_data), dtype=np.float32)
labels = np.array(y, dtype=np.float32)
labels = labels / 100
edge_list = []
f = open('data/data_edge.csv', 'r', encoding='utf-8')
rdr = csv.reader(f)
for line in rdr:
edge_list.append(line)
edges_unordered = np.array(edge_list, dtype=np.int32)
idx_map = {i : i for i, j in enumerate(range(len(y)))}
edges = np.array(list(map(idx_map.get, edges_unordered.flatten())),
dtype=np.int32).reshape(edges_unordered.shape)
adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),
shape=(labels.shape[0], labels.shape[0]),
dtype=np.float32)
# build symmetric adjacency matrix
adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
features = torch.FloatTensor(np.array(features.todense()))
labels = torch.FloatTensor(np.array(labels))
adj = sparse_mx_to_torch_sparse_tensor(adj)
idx_train = [i for i in range(len(y)) if i % 10 != 0]
idx_test = [i for i in range(len(y)) if i % 10 == 0]
idx_train = torch.LongTensor(idx_train)
idx_test = torch.LongTensor(idx_test)
return adj, features, labels, idx_train, idx_test
def accuracy(output, labels):
o = output.detach().numpy()
l = labels.detach().numpy()
o = [max(i, 0) for i in o]
o = [min(i, 1) for i in o]
correct = sum([abs(i - j) <= 0.1 for i, j in zip(o, l)])
return correct / len(labels)
def accuracy_per(output, labels, num):
o = output.detach().numpy()
l = labels.detach().numpy()
o = [max(i, 0) for i in o]
o = [min(i, 1) for i in o]
correct = sum([abs(i - j) <= num / 100 for i, j in zip(o, l)])
return correct / len(labels)