김지훈

연합학습 code 정리중

CAN_ID_BIT = 29
\ No newline at end of file
CAN_DATA_LEN = 8
SYNCAN_DATA_LEN = 4
\ No newline at end of file
......
import os
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import Sampler
import const
'''
def int_to_binary(x, bits):
mask = 2 ** torch.arange(bits).to(x.device, x.dtype)
return x.unsqueeze(-1).bitwise_and(mask).ne(0).byte()
'''
def unpack_bits(x, num_bits):
"""
Args:
x (int): bit로 변환할 정수
num_bits (int): 표현할 비트수
"""
xshape = list(x.shape)
x = x.reshape([-1, 1])
mask = 2**np.arange(num_bits).reshape([1, num_bits])
return (x & mask).astype(bool).astype(int).reshape(xshape + [num_bits])
# 0, batch * 1, batch * 2 ...
class BatchIntervalSampler(Sampler):
def __init__(self, data_length, batch_size):
# data length 가 batch size 로 나뉘게 만듦
if data_length % batch_size != 0:
data_length = data_length - (data_length % batch_size)
# def CsvToNumpy(csv_file):
# target_csv = pd.read_csv(csv_file)
# inputs_save_numpy = 'inputs_' + csv_file.split('/')[-1].split('.')[0].split('_')[0] + '.npy'
# labels_save_numpy = 'labels_' + csv_file.split('/')[-1].split('.')[0].split('_')[0] + '.npy'
# print(inputs_save_numpy, labels_save_numpy)
self.indices =[]
# print(data_length)
batch_group_interval = int(data_length / batch_size)
for group_idx in range(batch_group_interval):
for local_idx in range(batch_size):
self.indices.append(group_idx + local_idx * batch_group_interval)
# print('sampler init', self.indices)
# i = 0
# inputs_array = []
# labels_array = []
# print(len(target_csv))
def __iter__(self):
return iter(self.indices)
# while i + const.CAN_ID_BIT - 1 < len(target_csv):
# is_regular = True
# for j in range(const.CAN_ID_BIT):
# l = target_csv.iloc[i + j]
# b = l[2]
# r = (l[b+2+1] == 'R')
# if not r:
# is_regular = False
# break
# inputs = np.zeros((const.CAN_ID_BIT, const.CAN_ID_BIT))
# for idx in range(const.CAN_ID_BIT):
# can_id = int(target_csv.iloc[i + idx, 1], 16)
# inputs[idx] = unpack_bits(np.array(can_id), const.CAN_ID_BIT)
# inputs = np.reshape(inputs, (1, const.CAN_ID_BIT, const.CAN_ID_BIT))
# if is_regular:
# labels = 1
# else:
# labels = 0
# inputs_array.append(inputs)
# labels_array.append(labels)
# i+=1
# if (i % 5000 == 0):
# print(i)
# # break
# inputs_array = np.array(inputs_array)
# labels_array = np.array(labels_array)
# np.save(inputs_save_numpy, arr=inputs_array)
# np.save(labels_save_numpy, arr=labels_array)
# print('done')
def CsvToText(csv_file):
target_csv = pd.read_csv(csv_file)
text_file_name = csv_file.split('/')[-1].split('.')[0] + '.txt'
print(text_file_name)
target_text = open(text_file_name, mode='wt', encoding='utf-8')
i = 0
datum = [ [], [] ]
print(len(target_csv))
while i + const.CAN_ID_BIT - 1 < len(target_csv):
is_regular = True
for j in range(const.CAN_ID_BIT):
l = target_csv.iloc[i + j]
b = l[2]
r = (l[b+2+1] == 'R')
if not r:
is_regular = False
break
if is_regular:
target_text.write("%d R\n" % i)
else:
target_text.write("%d T\n" % i)
i+=1
if (i % 5000 == 0):
print(i)
target_text.close()
print('done')
def __len__(self):
return len(self.indices)
def record_net_data_stats(label_temp, data_idx_map):
......@@ -120,205 +42,92 @@ def record_net_data_stats(label_temp, data_idx_map):
return net_class_count, net_data_count
def GetCanDatasetUsingTxtKwarg(total_edge, fold_num, **kwargs):
csv_list = []
total_datum = []
total_label_temp = []
csv_idx = 0
for csv_file, txt_file in kwargs.items():
csv = pd.read_csv(csv_file)
csv_list.append(csv)
txt = open(txt_file, "r")
lines = txt.read().splitlines()
def GetCanDataset(total_edge, fold_num, packet_num, csv_path, txt_path):
csv = pd.read_csv(csv_path)
txt = open(txt_path, "r")
lines = txt.read().splitlines()
idx = 0
local_datum = []
while idx + const.CAN_ID_BIT - 1 < len(csv):
line = lines[idx]
if not line:
break
idx = 0
datum = []
label_temp = []
# [cur_idx ~ cur_idx + packet_num)
while idx + packet_num - 1 < len(csv) // 2:
line = lines[idx + packet_num - 1]
if not line:
break
if line.split(' ')[1] == 'R':
local_datum.append((csv_idx, idx, 1))
total_label_temp.append(1)
else:
local_datum.append((csv_idx, idx, 0))
total_label_temp.append(0)
if line.split(' ')[1] == 'R':
datum.append((idx, 1))
label_temp.append(1)
else:
datum.append((idx, 0))
label_temp.append(0)
idx += 1
if (idx % 1000000 == 0):
print(idx)
idx += 1
if (idx % 1000000 == 0):
print(idx)
csv_idx += 1
total_datum += local_datum
fold_length = int(len(total_label_temp) / 5)
datum = []
label_temp = []
fold_length = int(len(label_temp) / 5)
train_datum = []
train_label_temp = []
for i in range(5):
if i != fold_num:
datum += total_datum[i*fold_length:(i+1)*fold_length]
label_temp += total_label_temp[i*fold_length:(i+1)*fold_length]
train_datum += datum[i*fold_length:(i+1)*fold_length]
train_label_temp += label_temp[i*fold_length:(i+1)*fold_length]
else:
test_datum = total_datum[i*fold_length:(i+1)*fold_length]
test_datum = datum[i*fold_length:(i+1)*fold_length]
min_size = 0
output_class_num = 2
N = len(label_temp)
label_temp = np.array(label_temp)
data_idx_map = {}
while min_size < 512:
idx_batch = [[] for _ in range(total_edge)]
# for each class in the dataset
for k in range(output_class_num):
idx_k = np.where(label_temp == k)[0]
np.random.shuffle(idx_k)
proportions = np.random.dirichlet(np.repeat(1, total_edge))
## Balance
proportions = np.array([p*(len(idx_j)<N/total_edge) for p,idx_j in zip(proportions,idx_batch)])
proportions = proportions/proportions.sum()
proportions = (np.cumsum(proportions)*len(idx_k)).astype(int)[:-1]
idx_batch = [idx_j + idx.tolist() for idx_j,idx in zip(idx_batch,np.split(idx_k,proportions))]
min_size = min([len(idx_j) for idx_j in idx_batch])
N = len(train_label_temp)
train_label_temp = np.array(train_label_temp)
proportions = np.random.dirichlet(np.repeat(1, total_edge))
proportions = np.cumsum(proportions)
idx_batch = [[] for _ in range(total_edge)]
data_idx_map = {}
prev = 0.0
for j in range(total_edge):
np.random.shuffle(idx_batch[j])
idx_batch[j] = [idx for idx in range(int(prev * N), int(proportions[j] * N))]
prev = proportions[j]
data_idx_map[j] = idx_batch[j]
net_class_count, net_data_count = record_net_data_stats(label_temp, data_idx_map)
_, net_data_count = record_net_data_stats(train_label_temp, data_idx_map)
return CanDatasetKwarg(csv_list, datum), data_idx_map, net_class_count, net_data_count, CanDatasetKwarg(csv_list, test_datum, False)
return CanDataset(csv, train_datum, packet_num), data_idx_map, net_data_count, CanDataset(csv, test_datum, packet_num, False)
class CanDatasetKwarg(Dataset):
class CanDataset(Dataset):
def __init__(self, csv_list, datum, is_train=True):
self.csv_list = csv_list
def __init__(self, csv, datum, packet_num, is_train=True):
self.csv = csv
self.datum = datum
self.packet_num = packet_num
if is_train:
self.idx_map = []
else:
self.idx_map = [idx for idx in range(len(self.datum))]
def __len__(self):
return len(self.idx_map)
return len(self.idx_map) - self.packet_num + 1
def set_idx_map(self, data_idx_map):
self.idx_map = data_idx_map
def __getitem__(self, idx):
csv_idx = self.datum[self.idx_map[idx]][0]
start_i = self.datum[self.idx_map[idx]][1]
is_regular = self.datum[self.idx_map[idx]][2]
l = np.zeros((const.CAN_ID_BIT, const.CAN_ID_BIT))
for i in range(const.CAN_ID_BIT):
id_ = int(self.csv_list[csv_idx].iloc[start_i + i, 1], 16)
bits = unpack_bits(np.array(id_), const.CAN_ID_BIT)
l[i] = bits
l = np.reshape(l, (1, const.CAN_ID_BIT, const.CAN_ID_BIT))
return (l, is_regular)
def GetCanDatasetUsingTxt(csv_file, txt_path, length):
csv = pd.read_csv(csv_file)
txt = open(txt_path, "r")
lines = txt.read().splitlines()
idx = 0
datum = [ [], [] ]
while idx + const.CAN_ID_BIT - 1 < len(csv):
if len(datum[0]) >= length//2 and len(datum[1]) >= length//2:
break
line = lines[idx]
if not line:
break
if line.split(' ')[1] == 'R':
if len(datum[0]) < length//2:
datum[0].append((idx, 1))
else:
if len(datum[1]) < length//2:
datum[1].append((idx, 0))
idx += 1
if (idx % 5000 == 0):
print(idx, len(datum[0]), len(datum[1]))
l = int((length // 2) * 0.9)
return CanDataset(csv, datum[0][:l] + datum[1][:l]), \
CanDataset(csv, datum[0][l:] + datum[1][l:])
def GetCanDataset(csv_file, length):
csv = pd.read_csv(csv_file)
i = 0
datum = [ [], [] ]
while i + const.CAN_ID_BIT - 1 < len(csv):
if len(datum[0]) >= length//2 and len(datum[1]) >= length//2:
break
is_regular = True
for j in range(const.CAN_ID_BIT):
l = csv.iloc[i + j]
b = l[2]
r = (l[b+2+1] == 'R')
if not r:
is_regular = False
break
if is_regular:
if len(datum[0]) < length//2:
datum[0].append((i, 1))
else:
if len(datum[1]) < length//2:
datum[1].append((i, 0))
i+=1
if (i % 5000 == 0):
print(i, len(datum[0]), len(datum[1]))
l = int((length // 2) * 0.9)
return CanDataset(csv, datum[0][:l] + datum[1][:l]), \
CanDataset(csv, datum[0][l:] + datum[1][l:])
class CanDataset(Dataset):
def __init__(self, csv, datum):
self.csv = csv
self.datum = datum
def __len__(self):
return len(self.datum)
def __getitem__(self, idx):
start_i = self.datum[idx][0]
is_regular = self.datum[idx][1]
# [cur_idx ~ cur_idx + packet_num)
start_i = self.datum[self.idx_map[idx]][0]
is_regular = self.datum[self.idx_map[idx + self.packet_num - 1]][1]
l = np.zeros((const.CAN_ID_BIT, const.CAN_ID_BIT))
for i in range(const.CAN_ID_BIT):
id = int(self.csv.iloc[start_i + i, 1], 16)
bits = unpack_bits(np.array(id), const.CAN_ID_BIT)
l[i] = bits
l = np.reshape(l, (1, const.CAN_ID_BIT, const.CAN_ID_BIT))
packet = np.zeros((const.CAN_DATA_LEN * self.packet_num))
for next_i in range(self.packet_num):
packet = np.zeros((const.CAN_DATA_LEN * self.packet_num))
data_len = self.csv.iloc[start_i + next_i, 1]
for j in range(data_len):
data_value = int(self.csv.iloc[start_i + next_i, 2 + j], 16) / 255.0
packet[j + const.CAN_DATA_LEN * next_i] = data_value
return (l, is_regular)
return torch.from_numpy(packet).float(), is_regular
if __name__ == "__main__":
kwargs = {"./dataset/DoS_dataset.csv" : './DoS_dataset.txt'}
test_data_set = dataset.GetCanDatasetUsingTxtKwarg(-1, -1, False, **kwargs)
testloader = torch.utils.data.DataLoader(test_data_set, batch_size=batch_size,
shuffle=False, num_workers=2)
for x, y in testloader:
print(x)
print(y)
break
pass
......
#### utils ####
# for mixed dataset
def CsvToTextCNN(csv_file):
target_csv = pd.read_csv(csv_file)
file_name, extension = os.path.splitext(csv_file)
print(file_name, extension)
target_text = open(file_name + '_CNN8.txt', mode='wt', encoding='utf-8')
idx = 0
print(len(target_csv))
while idx + const.CNN_FRAME_LEN - 1 < len(target_csv):
is_regular = True
for j in range(const.CNN_FRAME_LEN):
l = target_csv.iloc[idx + j]
b = l[1]
r = (l[b+2] == 'R')
if not r:
is_regular = False
break
if is_regular:
target_text.write("%d R\n" % idx)
else:
target_text.write("%d T\n" % idx)
idx += 1
if idx % 300000 == 0:
print(idx)
target_text.close()
print('done')
#### dataset ####
def GetCanDatasetUsingTxtKwarg(total_edge, fold_num, **kwargs):
csv_list = []
total_datum = []
total_label_temp = []
csv_idx = 0
for csv_file, txt_file in kwargs.items():
csv = pd.read_csv(csv_file)
csv_list.append(csv)
txt = open(txt_file, "r")
lines = txt.read().splitlines()
idx = 0
local_datum = []
while idx + const.CAN_ID_BIT - 1 < len(csv):
line = lines[idx]
if not line:
break
if line.split(' ')[1] == 'R':
local_datum.append((csv_idx, idx, 1))
total_label_temp.append(1)
else:
local_datum.append((csv_idx, idx, 0))
total_label_temp.append(0)
idx += 1
if (idx % 1000000 == 0):
print(idx)
csv_idx += 1
total_datum += local_datum
fold_length = int(len(total_label_temp) / 5)
datum = []
label_temp = []
for i in range(5):
if i != fold_num:
datum += total_datum[i*fold_length:(i+1)*fold_length]
label_temp += total_label_temp[i*fold_length:(i+1)*fold_length]
else:
test_datum = total_datum[i*fold_length:(i+1)*fold_length]
min_size = 0
output_class_num = 2
N = len(label_temp)
label_temp = np.array(label_temp)
data_idx_map = {}
while min_size < 512:
idx_batch = [[] for _ in range(total_edge)]
# for each class in the dataset
for k in range(output_class_num):
idx_k = np.where(label_temp == k)[0]
np.random.shuffle(idx_k)
proportions = np.random.dirichlet(np.repeat(1, total_edge))
## Balance
proportions = np.array([p*(len(idx_j)<N/total_edge) for p,idx_j in zip(proportions,idx_batch)])
proportions = proportions/proportions.sum()
proportions = (np.cumsum(proportions)*len(idx_k)).astype(int)[:-1]
idx_batch = [idx_j + idx.tolist() for idx_j,idx in zip(idx_batch,np.split(idx_k,proportions))]
min_size = min([len(idx_j) for idx_j in idx_batch])
for j in range(total_edge):
np.random.shuffle(idx_batch[j])
data_idx_map[j] = idx_batch[j]
net_class_count, net_data_count = record_net_data_stats(label_temp, data_idx_map)
return CanDatasetKwarg(csv_list, datum), data_idx_map, net_class_count, net_data_count, CanDatasetKwarg(csv_list, test_datum, False)
class CanDatasetKwarg(Dataset):
def __init__(self, csv_list, datum, is_train=True):
self.csv_list = csv_list
self.datum = datum
if is_train:
self.idx_map = []
else:
self.idx_map = [idx for idx in range(len(self.datum))]
def __len__(self):
return len(self.idx_map)
def set_idx_map(self, data_idx_map):
self.idx_map = data_idx_map
def __getitem__(self, idx):
csv_idx = self.datum[self.idx_map[idx]][0]
start_i = self.datum[self.idx_map[idx]][1]
is_regular = self.datum[self.idx_map[idx]][2]
l = np.zeros((const.CAN_ID_BIT, const.CAN_ID_BIT))
for i in range(const.CAN_ID_BIT):
id_ = int(self.csv_list[csv_idx].iloc[start_i + i, 1], 16)
bits = unpack_bits(np.array(id_), const.CAN_ID_BIT)
l[i] = bits
l = np.reshape(l, (1, const.CAN_ID_BIT, const.CAN_ID_BIT))
return (l, is_regular)
def GetCanDataset(total_edge, fold_num, csv_path, txt_path):
csv = pd.read_csv(csv_path)
txt = open(txt_path, "r")
lines = txt.read().splitlines()
frame_size = const.CAN_FRAME_LEN
idx = 0
datum = []
label_temp = []
while idx + frame_size - 1 < len(csv) // 2:
# csv_row = csv.iloc[idx + frame_size - 1]
# data_len = csv_row[1]
# is_regular = (csv_row[data_len + 2] == 'R')
# if is_regular:
# datum.append((idx, 1))
# label_temp.append(1)
# else:
# datum.append((idx, 0))
# label_temp.append(0)
line = lines[idx]
if not line:
break
if line.split(' ')[1] == 'R':
datum.append((idx, 1))
label_temp.append(1)
else:
datum.append((idx, 0))
label_temp.append(0)
idx += 1
if (idx % 1000000 == 0):
print(idx)
fold_length = int(len(label_temp) / 5)
train_datum = []
train_label_temp = []
for i in range(5):
if i != fold_num:
train_datum += datum[i*fold_length:(i+1)*fold_length]
train_label_temp += label_temp[i*fold_length:(i+1)*fold_length]
else:
test_datum = datum[i*fold_length:(i+1)*fold_length]
min_size = 0
output_class_num = 2
N = len(train_label_temp)
train_label_temp = np.array(train_label_temp)
data_idx_map = {}
# proportions = np.random.dirichlet(np.repeat(1, total_edge))
# proportions = np.cumsum(proportions)
# idx_batch = [[] for _ in range(total_edge)]
# prev = 0.0
# for j in range(total_edge):
# idx_batch[j] = [idx for idx in range(int(prev * N), int(proportions[j] * N))]
# prev = proportions[j]
# np.random.shuffle(idx_batch[j])
# data_idx_map[j] = idx_batch[j]
while min_size < 512:
idx_batch = [[] for _ in range(total_edge)]
# for each class in the dataset
for k in range(output_class_num):
idx_k = np.where(train_label_temp == k)[0]
np.random.shuffle(idx_k)
proportions = np.random.dirichlet(np.repeat(1, total_edge))
## Balance
proportions = np.array([p*(len(idx_j)<N/total_edge) for p,idx_j in zip(proportions,idx_batch)])
proportions = proportions/proportions.sum()
proportions = (np.cumsum(proportions)*len(idx_k)).astype(int)[:-1]
idx_batch = [idx_j + idx.tolist() for idx_j,idx in zip(idx_batch,np.split(idx_k,proportions))]
min_size = min([len(idx_j) for idx_j in idx_batch])
for j in range(total_edge):
np.random.shuffle(idx_batch[j])
data_idx_map[j] = idx_batch[j]
_, net_data_count = record_net_data_stats(train_label_temp, data_idx_map)
return CanDataset(csv, train_datum), data_idx_map, net_data_count, CanDataset(csv, test_datum, False)
class CanDataset(Dataset):
def __init__(self, csv, datum, is_train=True):
self.csv = csv
self.datum = datum
self.is_train = is_train
if self.is_train:
self.idx_map = []
else:
self.idx_map = [idx for idx in range(len(self.datum))]
def __len__(self):
return len(self.idx_map)
def set_idx_map(self, data_idx_map):
self.idx_map = data_idx_map
def __getitem__(self, idx):
start_i = self.datum[self.idx_map[idx]][0]
if self.is_train:
is_regular = self.datum[self.idx_map[idx]][1]
l = np.zeros((const.CAN_FRAME_LEN, const.CAN_DATA_LEN))
'''
각 바이트 값은 모두 normalized 된다.
0 ~ 255 -> 0.0 ~ 1.0
'''
for i in range(const.CAN_FRAME_LEN):
data_len = self.csv.iloc[start_i + i, 1]
for j in range(data_len):
k = int(self.csv.iloc[start_i + i, 2 + j], 16) / 255.0
l[i][j] = k
l = np.reshape(l, (1, const.CAN_FRAME_LEN, const.CAN_DATA_LEN))
else:
l = np.zeros((const.CAN_DATA_LEN))
data_len = self.csv.iloc[start_i, 1]
is_regular = self.csv.iloc[start_i, data_len + 2] == 'R'
if is_regular:
is_regular = 1
else:
is_regular = 0
for j in range(data_len):
k = int(self.csv.iloc[start_i, 2 + j], 16) / 255.0
l[j] = k
l = np.reshape(l, (1, const.CAN_DATA_LEN))
return (l, is_regular)
def GetCanDatasetCNN(total_edge, fold_num, csv_path, txt_path):
csv = pd.read_csv(csv_path)
txt = open(txt_path, "r")
lines = txt.read().splitlines()
idx = 0
datum = []
label_temp = []
while idx < len(csv) // 2:
line = lines[idx]
if not line:
break
if line.split(' ')[1] == 'R':
datum.append((idx, 1))
label_temp.append(1)
else:
datum.append((idx, 0))
label_temp.append(0)
idx += 1
if (idx % 1000000 == 0):
print(idx)
fold_length = int(len(label_temp) / 5)
train_datum = []
train_label_temp = []
for i in range(5):
if i != fold_num:
train_datum += datum[i*fold_length:(i+1)*fold_length]
train_label_temp += label_temp[i*fold_length:(i+1)*fold_length]
else:
test_datum = datum[i*fold_length:(i+1)*fold_length]
N = len(train_label_temp)
train_label_temp = np.array(train_label_temp)
proportions = np.random.dirichlet(np.repeat(1, total_edge))
proportions = np.cumsum(proportions)
idx_batch = [[] for _ in range(total_edge)]
data_idx_map = {}
prev = 0.0
for j in range(total_edge):
idx_batch[j] = [idx for idx in range(int(prev * N), int(proportions[j] * N))]
prev = proportions[j]
data_idx_map[j] = idx_batch[j]
_, net_data_count = record_net_data_stats(train_label_temp, data_idx_map)
return CanDatasetCNN(csv, train_datum), data_idx_map, net_data_count, CanDatasetCNN(csv, test_datum, False)
class CanDatasetCNN(Dataset):
def __init__(self, csv, datum, is_train=True):
self.csv = csv
self.datum = datum
if is_train:
self.idx_map = []
else:
self.idx_map = [idx for idx in range(len(self.datum))]
def __len__(self):
return len(self.idx_map)
def set_idx_map(self, data_idx_map):
self.idx_map = data_idx_map
def __getitem__(self, idx):
start_i = self.datum[self.idx_map[idx]][0]
is_regular = self.datum[self.idx_map[idx]][1]
packet = np.zeros((const.CNN_FRAME_LEN, const.CNN_FRAME_LEN))
for i in range(const.CNN_FRAME_LEN):
data_len = self.csv.iloc[start_i + i, 1]
for j in range(data_len):
k = int(self.csv.iloc[start_i + i, 2 + j], 16) / 255.0
packet[i][j] = k
packet = np.reshape(packet, (1, const.CNN_FRAME_LEN, const.CNN_FRAME_LEN))
return (packet, is_regular)
def unpack_bits(x, num_bits):
"""
Args:
x (int): bit로 변환할 정수
num_bits (int): 표현할 비트수
"""
xshape = list(x.shape)
x = x.reshape([-1, 1])
mask = 2**np.arange(num_bits).reshape([1, num_bits])
return (x & mask).astype(bool).astype(int).reshape(xshape + [num_bits])
\ No newline at end of file
import utils
import copy
import argparse
import time
import math
import numpy as np
import os
from collections import OrderedDict
import torch
import torch.optim as optim
import torch.nn as nn
import model
import utils
import dataset
# for google colab reload
import importlib
importlib.reload(utils)
importlib.reload(model)
importlib.reload(utils)
importlib.reload(dataset)
from utils import *
## paramter
# shared
criterion = nn.CrossEntropyLoss()
C = 0.1
#
# prox
mu = 0.001
#
# time weight
twa_exp = 1.1
#
# dynamic weight
H = 0.5
P = 0.1
G = 0.1
R = 0.1
alpha, beta, gamma = 40.0/100.0, 40.0/100.0, 20.0/100.0
#
## end
def add_args(parser):
# parser.add_argument('--model', type=str, default='moderate-cnn',
# help='neural network used in training')
parser.add_argument('--dataset', type=str, default='cifar10', metavar='N',
help='dataset used for training')
parser.add_argument('--packet_num', type=int, default=1,
help='packet number used in training, 1 ~ 3')
parser.add_argument('--dataset', type=str, default='can',
help='dataset used for training, can or syncan')
parser.add_argument('--fold_num', type=int, default=0,
help='5-fold, 0 ~ 4')
parser.add_argument('--batch_size', type=int, default=256, metavar='N',
parser.add_argument('--batch_size', type=int, default=128,
help='input batch size for training')
parser.add_argument('--lr', type=float, default=0.002, metavar='LR',
parser.add_argument('--lr', type=float, default=0.001,
help='learning rate')
parser.add_argument('--n_nets', type=int, default=100, metavar='NN',
parser.add_argument('--n_nets', type=int, default=100,
help='number of workers in a distributed cluster')
parser.add_argument('--comm_type', type=str, default='fedtwa',
help='which type of communication strategy is going to be used: layerwise/blockwise')
parser.add_argument('--comm_round', type=int, default=10,
parser.add_argument('--comm_type', type=str, default='fedprox',
help='type of communication, [fedavg, fedprox, fedtwa, feddw, edge]')
parser.add_argument('--comm_round', type=int, default=50,
help='how many round of communications we shoud use')
parser.add_argument('--weight_save_path', type=str, default='./weights',
help='model weight save path')
args = parser.parse_args(args=[])
return args
def test_model(fed_model, args, testloader, device):
fed_model.to(device)
fed_model.eval()
cnt = 0
step_acc = 0.0
with torch.no_grad():
packet_state = torch.zeros(args.batch_size, model.STATE_DIM).to(device)
for i, (inputs, labels) in enumerate(testloader):
inputs, labels = inputs.to(device), labels.to(device)
outputs, packet_state = fed_model(inputs, packet_state)
packet_state = torch.autograd.Variable(packet_state, requires_grad=False)
_, preds = torch.max(outputs, 1)
cnt += inputs.shape[0]
corr_sum = torch.sum(preds == labels.data)
step_acc += corr_sum.double()
if i % 200 == 0:
print('test [%4d/%4d] acc: %.3f' % (i, len(testloader), (step_acc / cnt).item()))
# break
fed_accuracy = (step_acc / cnt).item()
print('test acc', fed_accuracy)
fed_model.to('cpu')
fed_model.train()
torch.save(fed_model.state_dict(), os.path.join(args.weight_save_path, '%s_%d_%.4f.pth' % (args.comm_type, cr, fed_accuracy)))
def start_fedavg(fed_model, args,
train_data_set,
data_idx_map,
......@@ -42,8 +106,7 @@ def start_fedavg(fed_model, args,
edges,
device):
print("start fed avg")
criterion = nn.CrossEntropyLoss()
C = 0.1
num_edge = int(max(C * args.n_nets, 1))
total_data_count = 0
for _, data_count in net_data_count.items():
......@@ -59,20 +122,25 @@ def start_fedavg(fed_model, args,
for edge_progress, edge_index in enumerate(selected_edge):
train_data_set.set_idx_map(data_idx_map[edge_index])
train_loader = torch.utils.data.DataLoader(train_data_set, batch_size=args.batch_size,
shuffle=True, num_workers=2)
sampler = dataset.BatchIntervalSampler(len(train_data_set), args.batch_size)
train_loader = torch.utils.data.DataLoader(train_data_set, batch_size=args.batch_size, sampler=sampler,
shuffle=False, num_workers=2, drop_last=True)
print("[%2d/%2d] edge: %d, data len: %d" % (edge_progress, len(selected_edge), edge_index, len(train_data_set)))
edges[edge_index] = copy.deepcopy(fed_model)
edges[edge_index].to(device)
edges[edge_index].train()
edge_opt = optim.Adam(params=edges[edge_index].parameters(), lr=args.lr)
# train
packet_state = torch.zeros(args.batch_size, model.STATE_DIM).to(device)
for data_idx, (inputs, labels) in enumerate(train_loader):
inputs, labels = inputs.float().to(device), labels.long().to(device)
inputs, labels = inputs.to(device), labels.to(device)
edge_pred, packet_state = edges[edge_index](inputs, packet_state)
packet_state = torch.autograd.Variable(packet_state, requires_grad=False)
edge_opt.zero_grad()
edge_pred = edges[edge_index](inputs)
edge_loss = criterion(edge_pred, labels)
edge_loss.backward()
......@@ -90,39 +158,13 @@ def start_fedavg(fed_model, args,
local_state = edge.state_dict()
for key in fed_model.state_dict().keys():
if k == 0:
update_state[key] = local_state[key] * net_data_count[k] / total_data_count
update_state[key] = local_state[key] * (net_data_count[k] / total_data_count)
else:
update_state[key] += local_state[key] * net_data_count[k] / total_data_count
update_state[key] += local_state[key] * (net_data_count[k] / total_data_count)
fed_model.load_state_dict(update_state)
if cr % 10 == 0:
fed_model.to(device)
fed_model.eval()
total_loss = 0.0
cnt = 0
step_acc = 0.0
with torch.no_grad():
for i, data in enumerate(testloader):
inputs, labels = data
inputs, labels = inputs.float().to(device), labels.long().to(device)
outputs = fed_model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
cnt += inputs.shape[0]
corr_sum = torch.sum(preds == labels.data)
step_acc += corr_sum.double()
running_loss = loss.item() * inputs.shape[0]
total_loss += running_loss
if i % 200 == 0:
print('test [%4d] loss: %.3f' % (i, loss.item()))
# break
print((step_acc / cnt).data)
print(total_loss / cnt)
fed_model.to('cpu')
test_model(fed_model, args, testloader, device)
def start_fedprox(fed_model, args,
......@@ -131,9 +173,7 @@ def start_fedprox(fed_model, args,
testloader,
device):
print("start fed prox")
criterion = nn.CrossEntropyLoss()
mu = 0.001
C = 0.1
num_edge = int(max(C * args.n_nets, 1))
fed_model.to(device)
......@@ -149,25 +189,26 @@ def start_fedprox(fed_model, args,
selected_edge = np.random.choice(args.n_nets, num_edge, replace=False)
print("selected edge", selected_edge)
total_data_length = 0
edge_data_len = []
for edge_progress, edge_index in enumerate(selected_edge):
train_data_set.set_idx_map(data_idx_map[edge_index])
train_loader = torch.utils.data.DataLoader(train_data_set, batch_size=args.batch_size,
shuffle=True, num_workers=2)
sampler = dataset.BatchIntervalSampler(len(train_data_set), args.batch_size)
train_loader = torch.utils.data.DataLoader(train_data_set, batch_size=args.batch_size, sampler=sampler,
shuffle=False, num_workers=2, drop_last=True)
print("[%2d/%2d] edge: %d, data len: %d" % (edge_progress, len(selected_edge), edge_index, len(train_data_set)))
total_data_length += len(train_data_set)
edge_data_len.append(len(train_data_set))
edge_model = copy.deepcopy(fed_model)
edge_model.to(device)
edge_model.train()
edge_opt = optim.Adam(params=edge_model.parameters(),lr=args.lr)
# train
packet_state = torch.zeros(args.batch_size, model.STATE_DIM).to(device)
for data_idx, (inputs, labels) in enumerate(train_loader):
inputs, labels = inputs.float().to(device), labels.long().to(device)
inputs, labels = inputs.to(device), labels.to(device)
edge_pred, packet_state = edge_model(inputs, packet_state)
packet_state = torch.autograd.Variable(packet_state, requires_grad=False)
edge_opt.zero_grad()
edge_pred = edge_model(inputs)
edge_loss = criterion(edge_pred, labels)
# prox term
......@@ -196,30 +237,8 @@ def start_fedprox(fed_model, args,
fed_model.to(device)
if cr % 10 == 0:
fed_model.eval()
total_loss = 0.0
cnt = 0
step_acc = 0.0
with torch.no_grad():
for i, data in enumerate(testloader):
inputs, labels = data
inputs, labels = inputs.float().to(device), labels.long().to(device)
outputs = fed_model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
cnt += inputs.shape[0]
corr_sum = torch.sum(preds == labels.data)
step_acc += corr_sum.double()
running_loss = loss.item() * inputs.shape[0]
total_loss += running_loss
if i % 200 == 0:
print('test [%4d] loss: %.3f' % (i, loss.item()))
# break
print((step_acc / cnt).data)
print(total_loss / cnt)
test_model(fed_model, args, testloader, device)
fed_model.to(device)
def start_fedtwa(fed_model, args,
......@@ -231,10 +250,8 @@ def start_fedtwa(fed_model, args,
device):
# TEFL, without asynchronous model update
print("start fed temporally weighted aggregation")
criterion = nn.CrossEntropyLoss()
time_stamp = [0 for worker in range(args.n_nets)]
twa_exp = math.e / 2.0
C = 0.1
num_edge = int(max(C * args.n_nets, 1))
total_data_count = 0
for _, data_count in net_data_count.items():
......@@ -251,20 +268,25 @@ def start_fedtwa(fed_model, args,
for edge_progress, edge_index in enumerate(selected_edge):
time_stamp[edge_index] = cr
train_data_set.set_idx_map(data_idx_map[edge_index])
train_loader = torch.utils.data.DataLoader(train_data_set, batch_size=args.batch_size,
shuffle=True, num_workers=2)
sampler = dataset.BatchIntervalSampler(len(train_data_set), args.batch_size)
train_loader = torch.utils.data.DataLoader(train_data_set, batch_size=args.batch_size, sampler=sampler,
shuffle=False, num_workers=2, drop_last=True)
print("[%2d/%2d] edge: %d, data len: %d" % (edge_progress, len(selected_edge), edge_index, len(train_data_set)))
edges[edge_index] = copy.deepcopy(fed_model)
edges[edge_index].to(device)
edges[edge_index].train()
edge_opt = optim.Adam(params=edges[edge_index].parameters(), lr=args.lr)
# train
packet_state = torch.zeros(args.batch_size, model.STATE_DIM).to(device)
for data_idx, (inputs, labels) in enumerate(train_loader):
inputs, labels = inputs.float().to(device), labels.long().to(device)
edge_pred, packet_state = edges[edge_index](inputs, packet_state)
packet_state = torch.autograd.Variable(packet_state, requires_grad=False)
edge_opt.zero_grad()
edge_pred = edges[edge_index](inputs)
edge_loss = criterion(edge_pred, labels)
edge_loss.backward()
......@@ -277,44 +299,19 @@ def start_fedtwa(fed_model, args,
edges[edge_index].to('cpu')
# cal weight using time stamp
# in paper, cr - time_stamp[k] used, but error is high
update_state = OrderedDict()
for k, edge in enumerate(edges):
local_state = edge.state_dict()
for key in fed_model.state_dict().keys():
if k == 0:
update_state[key] = local_state[key] * (net_data_count[k] / total_data_count) * math.pow(twa_exp, -(cr - time_stamp[k]))
update_state[key] = local_state[key] * (net_data_count[k] / total_data_count) * math.pow(twa_exp, -(cr -2 - time_stamp[k]))
else:
update_state[key] += local_state[key] * (net_data_count[k] / total_data_count) * math.pow(twa_exp, -(cr - time_stamp[k]))
update_state[key] += local_state[key] * (net_data_count[k] / total_data_count) * math.pow(twa_exp, -(cr -2 - time_stamp[k]))
fed_model.load_state_dict(update_state)
if cr % 10 == 0:
fed_model.to(device)
fed_model.eval()
total_loss = 0.0
cnt = 0
step_acc = 0.0
with torch.no_grad():
for i, data in enumerate(testloader):
inputs, labels = data
inputs, labels = inputs.float().to(device), labels.long().to(device)
outputs = fed_model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
cnt += inputs.shape[0]
corr_sum = torch.sum(preds == labels.data)
step_acc += corr_sum.double()
running_loss = loss.item() * inputs.shape[0]
total_loss += running_loss
if i % 200 == 0:
print('test [%4d] loss: %.3f' % (i, loss.item()))
# break
print((step_acc / cnt).data)
print(total_loss / cnt)
fed_model.to('cpu')
test_model(fed_model, args, testloader, device)
def start_feddw(fed_model, args,
......@@ -326,13 +323,8 @@ def start_feddw(fed_model, args,
edges,
device):
print("start fed Node-aware Dynamic Weighting")
worker_selected_frequency = [0 for worker in range(args.n_nets)]
criterion = nn.CrossEntropyLoss()
H = 0.5
P = 0.5
G = 0.1
R = 0.1
alpha, beta, gamma = 30.0/100.0, 50.0/100.0, 20.0/100.0
num_edge = int(max(G * args.n_nets, 1))
# cal data weight for selecting participants
......@@ -382,20 +374,25 @@ def start_feddw(fed_model, args,
for edge_progress, edge_index in enumerate(selected_edge):
worker_selected_frequency[edge_index] += 1
train_data_set.set_idx_map(data_idx_map[edge_index])
train_loader = torch.utils.data.DataLoader(train_data_set, batch_size=args.batch_size,
shuffle=True, num_workers=2)
sampler = dataset.BatchIntervalSampler(len(train_data_set), args.batch_size)
train_loader = torch.utils.data.DataLoader(train_data_set, batch_size=args.batch_size, sampler=sampler,
shuffle=False, num_workers=2, drop_last=True)
print("[%2d/%2d] edge: %d, data len: %d" % (edge_progress, len(selected_edge), edge_index, len(train_data_set)))
edges[edge_index] = copy.deepcopy(fed_model)
edges[edge_index].to(device)
edges[edge_index].train()
edge_opt = optim.Adam(params=edges[edge_index].parameters(), lr=args.lr)
# train
packet_state = torch.zeros(args.batch_size, model.STATE_DIM).to(device)
for data_idx, (inputs, labels) in enumerate(train_loader):
inputs, labels = inputs.float().to(device), labels.long().to(device)
edge_pred, packet_state = edges[edge_index](inputs, packet_state)
packet_state = torch.autograd.Variable(packet_state, requires_grad=False)
edge_opt.zero_grad()
edge_pred = edges[edge_index](inputs)
edge_loss = criterion(edge_pred, labels)
edge_loss.backward()
......@@ -408,17 +405,20 @@ def start_feddw(fed_model, args,
# get edge accuracy using subset of testset
edges[edge_index].eval()
print("[%2d/%2d] edge: %d, cal accuracy" % (edge_progress, len(selected_edge), edge_index))
print("[%2d/%2d] edge: %d, cal local accuracy" % (edge_progress, len(selected_edge), edge_index))
cnt = 0
step_acc = 0.0
with torch.no_grad():
packet_state = torch.zeros(args.batch_size, model.STATE_DIM).to(device)
for inputs, labels in local_test_loader:
inputs, labels = inputs.float().to(device), labels.long().to(device)
outputs = edges[edge_index](inputs)
_, preds = torch.max(outputs, 1)
edge_pred, packet_state = edges[edge_index](inputs, packet_state)
packet_state = torch.autograd.Variable(packet_state, requires_grad=False)
_, preds = torch.max(edge_pred, 1)
loss = criterion(outputs, labels)
loss = criterion(edge_pred, labels)
cnt += inputs.shape[0]
corr_sum = torch.sum(preds == labels.data)
......@@ -426,7 +426,7 @@ def start_feddw(fed_model, args,
# break
worker_local_accuracy[edge_index] = (step_acc / cnt).item()
print(worker_local_accuracy[edge_index])
print('edge local accuracy', worker_local_accuracy[edge_index])
edges[edge_index].to('cpu')
# cal weight dynamically
......@@ -449,59 +449,123 @@ def start_feddw(fed_model, args,
fed_model.load_state_dict(update_state)
if cr % 10 == 0:
fed_model.to(device)
fed_model.eval()
total_loss = 0.0
cnt = 0
step_acc = 0.0
with torch.no_grad():
for i, data in enumerate(testloader):
inputs, labels = data
inputs, labels = inputs.float().to(device), labels.long().to(device)
test_model(fed_model, args, testloader, device)
outputs = fed_model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
cnt += inputs.shape[0]
def start_only_edge(args,
train_data_set,
data_idx_map,
testloader,
edges,
device):
print("start only edge")
total_epoch = int(args.comm_round * C)
corr_sum = torch.sum(preds == labels.data)
step_acc += corr_sum.double()
running_loss = loss.item() * inputs.shape[0]
total_loss += running_loss
if i % 200 == 0:
print('test [%4d] loss: %.3f' % (i, loss.item()))
for cr in range(1, total_epoch + 1):
print("Edge round : %d" % (cr))
edge_accuracy_list = []
for edge_index, edge_model in enumerate(edges):
train_data_set.set_idx_map(data_idx_map[edge_index])
sampler = dataset.BatchIntervalSampler(len(train_data_set), args.batch_size)
train_loader = torch.utils.data.DataLoader(train_data_set, batch_size=args.batch_size, sampler=sampler,
shuffle=False, num_workers=2, drop_last=True)
print("edge[%2d/%2d] data len: %d" % (edge_index, len(edges), len(train_data_set)))
edge_model.to(device)
edge_model.train()
edge_opt = optim.Adam(params=edge_model.parameters(),lr=args.lr)
# train
packet_state = torch.zeros(args.batch_size, model.STATE_DIM).to(device)
for data_idx, (inputs, labels) in enumerate(train_loader):
inputs, labels = inputs.float().to(device), labels.long().to(device)
edge_pred, packet_state = edge_model(inputs, packet_state)
packet_state = torch.autograd.Variable(packet_state, requires_grad=False)
edge_opt.zero_grad()
edge_loss = criterion(edge_pred, labels)
edge_loss.backward()
edge_opt.step()
edge_loss = edge_loss.item()
if data_idx % 100 == 0:
print('[%4d] loss: %.3f' % (data_idx, edge_loss))
# break
print((step_acc / cnt).data)
print(total_loss / cnt)
fed_model.to('cpu')
# test
# if cr < 4:
# continue
edge_model.eval()
total_loss = 0.0
cnt = 0
step_acc = 0.0
with torch.no_grad():
packet_state = torch.zeros(args.batch_size, model.STATE_DIM).to(device)
for i, (inputs, labels) in enumerate(testloader):
inputs, labels = inputs.float().to(device), labels.long().to(device)
outputs, packet_state = edge_model(inputs, packet_state)
packet_state = torch.autograd.Variable(packet_state, requires_grad=False)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
cnt += inputs.shape[0]
corr_sum = torch.sum(preds == labels.data)
step_acc += corr_sum.double()
running_loss = loss.item() * inputs.shape[0]
total_loss += running_loss
if i % 200 == 0:
print('test [%4d] loss: %.3f' % (i, loss.item()))
# break
edge_accuracy = (step_acc / cnt).item()
edge_accuracy_list.append(edge_accuracy)
print("edge[%2d/%2d] acc: %.4f" % (edge_index, len(edges), edge_accuracy))
edge_model.to('cpu')
# if cr < 4:
# continue
edge_accuracy_avg = sum(edge_accuracy_list) / len(edge_accuracy_list)
torch.save(edges[0].state_dict(), os.path.join(weight_path, 'edge_%d_%.4f.pth' % (cr, edge_accuracy_avg)))
def start_train():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
print('device:', device)
args = add_args(argparse.ArgumentParser())
# make weight folder
os.makedirs(args.weight_save_path, exist_ok=True)
# for reproductivity
seed = 0
np.random.seed(seed)
torch.manual_seed(seed)
print("Loading data...")
# kwargs = {"./dataset/DoS_dataset.csv" : './DoS_dataset.txt',
# "./dataset/Fuzzy_dataset.csv" : './Fuzzy_dataset.txt',
# "./dataset/RPM_dataset.csv" : './RPM_dataset.txt',
# "./dataset/gear_dataset.csv" : './gear_dataset.txt'
# }
kwargs = {"./dataset/DoS_dataset.csv" : './DoS_dataset.txt'}
train_data_set, data_idx_map, net_class_count, net_data_count, test_data_set = dataset.GetCanDatasetUsingTxtKwarg(args.n_nets, args.fold_num, **kwargs)
testloader = torch.utils.data.DataLoader(test_data_set, batch_size=args.batch_size,
shuffle=False, num_workers=2)
fed_model = model.Net()
args.comm_type = 'feddw'
if args.dataset == 'can':
train_data_set, data_idx_map, net_data_count, test_data_set = dataset.GetCanDataset(args.n_nets, args.fold_num, args.packet_num, "./dataset/Mixed_dataset.csv", "./dataset/Mixed_dataset_1.txt")
elif args.dataset == 'syncan':
train_data_set, data_idx_map, net_data_count, test_data_set = dataset.GetCanDataset(args.n_nets, args.fold_num, args.packet_num, "./dataset/test_mixed.csv", "./dataset/Mixed_dataset_1.txt")
sampler = dataset.BatchIntervalSampler(len(test_data_set), args.batch_size)
testloader = torch.utils.data.DataLoader(test_data_set, batch_size=args.batch_size, sampler=sampler,
shuffle=False, num_workers=2, drop_last=True)
if args.dataset == 'can':
fed_model = model.OneNet(args.packet_num)
edges = [model.OneNet(args.packet_num) for _ in range(args.n_nets)]
elif args.dataset == 'syncan':
fed_model = model.OneNet(args.packet_num)
edges = [model.OneNet(args.packet_num) for _ in range(args.n_nets)]
if args.comm_type == "fedavg":
edges, _, _ = init_models(args.n_nets, args)
start_fedavg(fed_model, args,
train_data_set,
data_idx_map,
......@@ -516,7 +580,6 @@ def start_train():
testloader,
device)
elif args.comm_type == "fedtwa":
edges, _, _ = init_models(args.n_nets, args)
start_fedtwa(fed_model, args,
train_data_set,
data_idx_map,
......@@ -526,14 +589,14 @@ def start_train():
device)
elif args.comm_type == "feddw":
local_test_set = copy.deepcopy(test_data_set)
# mnist train 60,000 / test 10,000 / 1,000
# CAN train ~ 13,000,000 / test 2,000,000 / for speed 40,000
local_test_idx = np.random.choice(len(local_test_set), len(local_test_set) // 50, replace=False)
# in paper, mnist train 60,000 / test 10,000 / 1,000 - 10%
# CAN train ~ 1,400,000 / test 300,000 / for speed 15,000 - 5%
local_test_idx = [idx for idx in range(0, len(local_test_set) // 20)]
local_test_set.set_idx_map(local_test_idx)
local_test_loader = torch.utils.data.DataLoader(local_test_set, batch_size=args.batch_size,
shuffle=False, num_workers=2)
sampler = dataset.BatchIntervalSampler(len(local_test_set), args.batch_size)
local_test_loader = torch.utils.data.DataLoader(local_test_set, batch_size=args.batch_size, sampler=sampler,
shuffle=False, num_workers=2, drop_last=True)
edges, _, _ = init_models(args.n_nets, args)
start_feddw(fed_model, args,
train_data_set,
data_idx_map,
......@@ -542,6 +605,13 @@ def start_train():
local_test_loader,
edges,
device)
elif args.comm_type == "edge":
start_only_edge(args,
train_data_set,
data_idx_map,
testloader,
edges,
device)
if __name__ == "__main__":
start_train()
\ No newline at end of file
......
import torch.nn as nn
import torch.nn.functional as F
import torch
import const
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.f1 = nn.Sequential(
nn.Conv2d(1, 2, 3),
nn.ReLU(True),
STATE_DIM = 8 * 32
class OneNet(nn.Module):
def __init__(self, packet_num):
super(OneNet, self).__init__()
IN_DIM = 8 * packet_num # byte
FEATURE_DIM = 32
# transform the given packet into a tensor which is in a good feature space
self.feature_layer = nn.Sequential(
nn.Linear(IN_DIM, 32),
nn.ReLU(),
nn.Linear(32, FEATURE_DIM),
nn.ReLU()
)
self.f2 = nn.Sequential(
nn.Conv2d(2, 4, 3),
nn.ReLU(True),
)
self.f3 = nn.Sequential(
nn.Conv2d(4, 8, 3),
nn.ReLU(True),
# generates the current state 's'
self.f = nn.Sequential(
nn.Linear(STATE_DIM + FEATURE_DIM, STATE_DIM),
nn.ReLU(),
nn.Linear(STATE_DIM, STATE_DIM),
nn.ReLU()
)
self.f4 = nn.Sequential(
nn.Linear(8 * 23 * 23, 2),
# check whether the given packet is malicious
self.g = nn.Sequential(
nn.Linear(STATE_DIM + FEATURE_DIM, 64),
nn.ReLU(),
nn.Linear(64, 64),
nn.ReLU(),
nn.Linear(64, 2),
)
def forward(self, x):
x = self.f1(x)
x = self.f2(x)
x = self.f3(x)
x = torch.flatten(x, 1)
x = self.f4(x)
return x
\ No newline at end of file
def forward(self, x, s):
x = self.feature_layer(x)
x = torch.cat((x, s), 1)
s2 = self.f(x)
x2 = self.g(x)
return x2, s2
\ No newline at end of file
......
import tensorrt as trt
onnx_file_name = 'bert.onnx'
tensorrt_file_name = 'bert.plan'
fp16_mode = True
# int8_mode = True
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network(EXPLICIT_BATCH)
parser = trt.OnnxParser(network, TRT_LOGGER)
builder.max_workspace_size = (1 << 30)
builder.fp16_mode = fp16_mode
# builder.int8_mode = int8_mode
with open(onnx_file_name, 'rb') as model:
if not parser.parse(model.read()):
for error in range(parser.num_errors):
print (parser.get_error(error))
# for int8 mode
# print(network.num_layers, network.num_inputs , network.num_outputs)
# for layer_index in range(network.num_layers):
# layer = network[layer_index]
# print(layer.name)
# tensor = layer.get_output(0)
# print(tensor.name)
# tensor.dynamic_range = (0, 255)
# input_tensor = layer.get_input(0)
# print(input_tensor)
# input_tensor.dynamic_range = (0, 255)
engine = builder.build_cuda_engine(network)
buf = engine.serialize()
with open(tensorrt_file_name, 'wb') as f:
f.write(buf)
print('done, trt model')
\ No newline at end of file
import tensorrt as trt
import pycuda.driver as cuda
import numpy as np
import torch
import pycuda.autoinit
import dataset
import model
import time
# print(dir(trt))
tensorrt_file_name = 'bert.plan'
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
trt_runtime = trt.Runtime(TRT_LOGGER)
with open(tensorrt_file_name, 'rb') as f:
engine_data = f.read()
engine = trt_runtime.deserialize_cuda_engine(engine_data)
context = engine.create_execution_context()
# class HostDeviceMem(object):
# def __init__(self, host_mem, device_mem):
# self.host = host_mem
# self.device = device_mem
# def __str__(self):
# return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
# def __repr__(self):
# return self.__str__()
# inputs, outputs, bindings, stream = [], [], [], []
# for binding in engine:
# size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
# dtype = trt.nptype(engine.get_binding_dtype(binding))
# host_mem = cuda.pagelocked_empty(size, dtype)
# device_mem = cuda.mem_alloc(host_mem.nbytes)
# bindings.append(int(device_mem))
# if engine.binding_is_input(binding):
# inputs.append( HostDeviceMem(host_mem, device_mem) )
# else:
# outputs.append(HostDeviceMem(host_mem, device_mem))
# input_ids = np.ones([1, 1, 29, 29])
# numpy_array_input = [input_ids]
# hosts = [input.host for input in inputs]
# trt_types = [trt.int32]
# for numpy_array, host, trt_types in zip(numpy_array_input, hosts, trt_types):
# numpy_array = np.asarray(numpy_array).ravel()
# np.copyto(host, numpy_array)
# def do_inference(context, bindings, inputs, outputs, stream):
# [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
# context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
# [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
# stream.synchronize()
# return [out.host for out in outputs]
# trt_outputs = do_inference(
# context=context,
# bindings=bindings,
# inputs=inputs,
# outputs=outputs,
# stream=stream)
def infer(context, input_img, output_size, batch_size):
# Load engine
# engine = context.get_engine()
# assert(engine.get_nb_bindings() == 2)
# Convert input data to float32
input_img = input_img.astype(np.float32)
# Create host buffer to receive data
output = np.empty(output_size, dtype = np.float32)
# Allocate device memory
d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
# Transfer input data to device
cuda.memcpy_htod_async(d_input, input_img, stream)
# Execute model
context.execute_async(batch_size, bindings, stream.handle, None)
# Transfer predictions back
cuda.memcpy_dtoh_async(output, d_output, stream)
# Synchronize threads
stream.synchronize()
# Return predictions
return output
# kwargs = {"./dataset/DoS_dataset.csv" : './DoS_dataset.txt'}
# train_data_set, data_idx_map, net_class_count, net_data_count, test_data_set = dataset.GetCanDatasetUsingTxtKwarg(100, 0, **kwargs)
# testloader = torch.utils.data.DataLoader(test_data_set, batch_size=256,
# shuffle=False, num_workers=2)
check_time = time.time()
cnt = 0
temp = np.ones([256, 1, 29, 29])
for idx in range(100):
# for i, (inputs, labels) in enumerate(testloader):
trt_outputs = infer(context, temp, (256, 2), 256)
print(trt_outputs.shape)
# print(trt_outputs)
# print(np.argmax(trt_outputs, axis=0))
# cnt += 1
# if cnt == 100:
# break
print(time.time() - check_time)
tensorrt_file_name = 'bert_int.plan'
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
trt_runtime = trt.Runtime(TRT_LOGGER)
with open(tensorrt_file_name, 'rb') as f:
engine_data = f.read()
engine = trt_runtime.deserialize_cuda_engine(engine_data)
context = engine.create_execution_context()
check_time = time.time()
cnt = 0
temp = np.ones([256, 1, 29, 29])
for idx in range(100):
# for i, (inputs, labels) in enumerate(testloader):
trt_outputs = infer(context, temp, (256, 2), 256)
print(trt_outputs.shape)
# print(trt_outputs)
# print(np.argmax(trt_outputs, axis=0))
# cnt += 1
# if cnt == 100:
# break
print(time.time() - check_time)
test_model = model.Net().cuda()
check_time = time.time()
cnt = 0
temp = torch.randn(256, 1, 29, 29).cuda()
for idx in range(100):
# for i, (inputs, labels) in enumerate(testloader):
# inputs = inputs.float().cuda()
normal_outputs = test_model(temp)
# print(normal_outputs)
print(normal_outputs.shape)
cnt += 1
if cnt == 100:
break
print(time.time() - check_time)
import tensorrt as trt
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
import time
model_path = "bert.onnx"
input_size = 32
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
# def build_engine(model_path):
# with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
# builder.max_workspace_size = 1<<20
# builder.max_batch_size = 1
# with open(model_path, "rb") as f:
# parser.parse(f.read())
# engine = builder.build_cuda_engine(network)
# return engine
def alloc_buf(engine):
# host cpu mem
h_in_size = trt.volume(engine.get_binding_shape(0))
h_out_size = trt.volume(engine.get_binding_shape(1))
h_in_dtype = trt.nptype(engine.get_binding_dtype(0))
h_out_dtype = trt.nptype(engine.get_binding_dtype(1))
in_cpu = cuda.pagelocked_empty(h_in_size, h_in_dtype)
out_cpu = cuda.pagelocked_empty(h_out_size, h_out_dtype)
# allocate gpu mem
in_gpu = cuda.mem_alloc(in_cpu.nbytes)
out_gpu = cuda.mem_alloc(out_cpu.nbytes)
stream = cuda.Stream()
return in_cpu, out_cpu, in_gpu, out_gpu, stream
def inference(engine, context, inputs, out_cpu, in_gpu, out_gpu, stream):
# async version
# with engine.create_execution_context() as context: # cost time to initialize
# cuda.memcpy_htod_async(in_gpu, inputs, stream)
# context.execute_async(1, [int(in_gpu), int(out_gpu)], stream.handle, None)
# cuda.memcpy_dtoh_async(out_cpu, out_gpu, stream)
# stream.synchronize()
# sync version
cuda.memcpy_htod(in_gpu, inputs)
context.execute(1, [int(in_gpu), int(out_gpu)])
cuda.memcpy_dtoh(out_cpu, out_gpu)
return out_cpu
if __name__ == "__main__":
inputs = np.random.random((1, 1, 29, 29)).astype(np.float32)
tensorrt_file_name = '/content/drive/My Drive/capstone1/CAN/bert.plan'
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
trt_runtime = trt.Runtime(TRT_LOGGER)
with open(tensorrt_file_name, 'rb') as f:
engine_data = f.read()
engine = trt_runtime.deserialize_cuda_engine(engine_data)
# engine = build_engine(model_path)
context = engine.create_execution_context()
for _ in range(10):
t1 = time.time()
in_cpu, out_cpu, in_gpu, out_gpu, stream = alloc_buf(engine)
res = inference(engine, context, inputs.reshape(-1), out_cpu, in_gpu, out_gpu, stream)
print(res)
print("cost time: ", time.time()-t1)
\ No newline at end of file
import model
import torch
import importlib
importlib.reload(model)
batch_size = 256
model = model.Net().cuda().eval()
inputs = torch.randn(batch_size, 1, 29, 29, requires_grad=True).cuda()
torch_out = model(inputs)
torch.onnx.export(
model,
inputs,
'bert.onnx',
input_names=['inputs'],
output_names=['outputs'],
export_params=True)
print('done, onnx model')
\ No newline at end of file
import pandas as pd
import numpy as np
import csv
import os
import const
from matplotlib import pyplot as plt
def run_benchmark_cnn():
import sys
sys.path.append("/content/drive/My Drive/capstone1/CAN/torch2trt")
from torch2trt import torch2trt
import model
import time
import torch
import dataset
import torch.nn as nn
test_model = model.CnnNet()
test_model.eval().cuda()
batch_size = 1
# _, _, _, test_data_set = dataset.GetCanDataset(100, 0, "./dataset/Mixed_dataset.csv", "./dataset/Mixed_dataset_1.txt")
# sampler = dataset.BatchIntervalSampler(len(test_data_set), batch_size)
# testloader = torch.utils.data.DataLoader(test_data_set, batch_size=batch_size, sampler=sampler,
# shuffle=False, num_workers=2, drop_last=True)
# create model and input data
# for inputs, labels in testloader:
# trt_x = inputs.float().cuda()
# trt_state = torch.zeros((batch_size, 8 * 32)).float().cuda()
# trt_model = model.OneNet()
# trt_model.load_state_dict(torch.load(weight_path))
# trt_model.float().eval().cuda()
# trt_f16_x = inputs.half().cuda()
# trt_f16_state = torch.zeros((batch_size, 8 * 32)).half().cuda()
# trt_f16_model = model.OneNet()
# trt_f16_model.load_state_dict(torch.load(weight_path))
# trt_f16_model.half().eval().cuda()
# trt_int8_strict_x = inputs.float().cuda()
# trt_int8_strict_state = torch.zeros((batch_size, 8 * 32)).float().cuda() # match model weight
# trt_int8_strict_model = model.OneNet()
# trt_int8_strict_model.load_state_dict(torch.load(weight_path))
# trt_int8_strict_model.eval().cuda() # no attribute 'char'
# break
inputs = torch.ones((batch_size, 1, const.CNN_FRAME_LEN, const.CNN_FRAME_LEN))
trt_x = inputs.half().cuda() # ??? densenet error?
trt_model = model.CnnNet()
# trt_model.load_state_dict(torch.load(weight_path))
trt_model.eval().cuda()
trt_f16_x = inputs.half().cuda()
trt_f16_model = model.CnnNet().half()
# trt_f16_model.load_state_dict(torch.load(weight_path))
trt_f16_model.half().eval().cuda()
trt_int8_strict_x = inputs.half().cuda() # match model weight
trt_int8_strict_model = model.CnnNet()
# trt_int8_strict_model.load_state_dict(torch.load(weight_path))
trt_int8_strict_model.eval().cuda() # no attribute 'char'
# convert to TensorRT feeding sample data as input
model_trt = torch2trt(trt_model, [trt_x], max_batch_size=batch_size)
model_trt_f16 = torch2trt(trt_f16_model, [trt_f16_x], fp16_mode=True, max_batch_size=batch_size)
model_trt_int8_strict = torch2trt(trt_int8_strict_model, [trt_int8_strict_x], fp16_mode=False, int8_mode=True, strict_type_constraints=True, max_batch_size=batch_size)
# testloader = torch.utils.data.DataLoader(test_data_set, batch_size=batch_size, sampler=sampler,
# shuffle=False, num_workers=2, drop_last=True)
with torch.no_grad():
### test inference time
dummy_x = torch.ones((batch_size, 1, const.CNN_FRAME_LEN, const.CNN_FRAME_LEN)).half().cuda()
dummy_cnt = 10000
print('ignore data loading time, inference random data')
check_time = time.time()
for i in range(dummy_cnt):
_ = test_model(dummy_x)
print('torch model: %.6f' % ((time.time() - check_time) / dummy_cnt))
check_time = time.time()
for i in range(dummy_cnt):
_ = model_trt(dummy_x)
print('trt model: %.6f' % ((time.time() - check_time) / dummy_cnt))
dummy_x = torch.ones((batch_size, 1, const.CNN_FRAME_LEN, const.CNN_FRAME_LEN)).half().cuda()
check_time = time.time()
for i in range(dummy_cnt):
_ = model_trt_f16(dummy_x)
print('trt float 16 model: %.6f' % ((time.time() - check_time) / dummy_cnt))
dummy_x = torch.ones((batch_size, 1, const.CNN_FRAME_LEN, const.CNN_FRAME_LEN)).char().cuda()
check_time = time.time()
for i in range(dummy_cnt):
_ = model_trt_int8_strict(dummy_x)
print('trt int8 strict model: %.6f' % ((time.time() - check_time) / dummy_cnt))
return
## end
criterion = nn.CrossEntropyLoss()
state_temp = torch.zeros((batch_size, 8 * 32)).cuda()
step_acc = 0.0
step_loss = 0.0
cnt = 0
loss_cnt = 0
for i, (inputs, labels) in enumerate(testloader):
inputs, labels = inputs.float().cuda(), labels.long().cuda()
normal_outputs, state_temp = test_model(inputs, state_temp)
_, preds = torch.max(normal_outputs, 1)
edge_loss = criterion(normal_outputs, labels)
step_loss += edge_loss.item()
loss_cnt += 1
corr_sum = torch.sum(preds == labels.data)
step_acc += corr_sum.double()
cnt += batch_size
print('torch', step_acc.item() / cnt, step_loss / loss_cnt)
state_temp = torch.zeros((batch_size, 8 * 32)).cuda()
step_acc = 0.0
cnt = 0
step_loss = 0.0
loss_cnt = 0
for i, (inputs, labels) in enumerate(testloader):
inputs, labels = inputs.float().cuda(), labels.long().cuda()
normal_outputs, state_temp = model_trt(inputs, state_temp)
_, preds = torch.max(normal_outputs, 1)
edge_loss = criterion(normal_outputs, labels)
step_loss += edge_loss.item()
loss_cnt += 1
corr_sum = torch.sum(preds == labels.data)
step_acc += corr_sum.double()
cnt += batch_size
print('trt', step_acc.item() / cnt, step_loss / loss_cnt)
state_temp = torch.zeros((batch_size, 8 * 32)).half().cuda()
step_acc = 0.0
cnt = 0
step_loss = 0.0
loss_cnt = 0
for i, (inputs, labels) in enumerate(testloader):
inputs, labels = inputs.half().cuda(), labels.long().cuda()
normal_outputs, state_temp = model_trt_f16(inputs, state_temp)
_, preds = torch.max(normal_outputs, 1)
edge_loss = criterion(normal_outputs, labels)
step_loss += edge_loss.item()
loss_cnt += 1
corr_sum = torch.sum(preds == labels.data)
step_acc += corr_sum.double()
cnt += batch_size
print('float16', step_acc.item() / cnt, step_loss / loss_cnt)
state_temp = torch.zeros((batch_size, 8 * 32)).float().cuda()
step_acc = 0.0
cnt = 0
step_loss = 0.0
loss_cnt = 0
for i, (inputs, labels) in enumerate(testloader):
inputs, labels = inputs.float().cuda(), labels.long().cuda()
normal_outputs, state_temp = model_trt_int8_strict(inputs, state_temp)
_, preds = torch.max(normal_outputs, 1)
edge_loss = criterion(normal_outputs, labels)
step_loss += edge_loss.item()
loss_cnt += 1
corr_sum = torch.sum(preds == labels.data)
step_acc += corr_sum.double()
cnt += batch_size
print('int8 strict', step_acc.item() / cnt, step_loss / loss_cnt)
def run_benchmark(weight_path):
import sys
sys.path.append("/content/drive/My Drive/capstone1/CAN/torch2trt")
from torch2trt import torch2trt
import model
import time
import torch
import dataset
import torch.nn as nn
test_model = model.OneNet()
test_model.load_state_dict(torch.load(weight_path))
test_model.eval().cuda()
batch_size = 1
_, _, _, test_data_set = dataset.GetCanDataset(100, 0, "./dataset/Mixed_dataset.csv", "./dataset/Mixed_dataset_1.txt")
sampler = dataset.BatchIntervalSampler(len(test_data_set), batch_size)
testloader = torch.utils.data.DataLoader(test_data_set, batch_size=batch_size, sampler=sampler,
shuffle=False, num_workers=2, drop_last=True)
# create model and input data
for inputs, labels in testloader:
# inputs = torch.cat([inputs, inputs, inputs], 1)
trt_x = inputs.float().cuda()
trt_state = torch.zeros((batch_size, 8 * 32)).float().cuda()
trt_model = model.OneNet()
trt_model.load_state_dict(torch.load(weight_path))
trt_model.float().eval().cuda()
trt_f16_x = inputs.half().cuda()
trt_f16_state = torch.zeros((batch_size, 8 * 32)).half().cuda()
trt_f16_model = model.OneNet().half()
trt_f16_model.load_state_dict(torch.load(weight_path))
trt_f16_model.half().eval().cuda()
trt_int8_strict_x = inputs.float().cuda()
trt_int8_strict_state = torch.zeros((batch_size, 8 * 32)).float().cuda() # match model weight
trt_int8_strict_model = model.OneNet()
trt_int8_strict_model.load_state_dict(torch.load(weight_path))
trt_int8_strict_model.eval().cuda() # no attribute 'char'
break
# convert to TensorRT feeding sample data as input
model_trt = torch2trt(trt_model, [trt_x, trt_state], max_batch_size=batch_size)
model_trt_f16 = torch2trt(trt_f16_model, [trt_f16_x, trt_f16_state], fp16_mode=True, max_batch_size=batch_size)
model_trt_int8_strict = torch2trt(trt_int8_strict_model, [trt_int8_strict_x, trt_int8_strict_state], fp16_mode=False, int8_mode=True, strict_type_constraints=True, max_batch_size=batch_size)
testloader = torch.utils.data.DataLoader(test_data_set, batch_size=batch_size, sampler=sampler,
shuffle=False, num_workers=2, drop_last=True)
with torch.no_grad():
### test inference time
dummy_x = torch.ones((batch_size, 8)).cuda()
dummy_state = torch.zeros(batch_size, model.STATE_DIM).cuda()
dummy_cnt = 10000
print('ignore data loading time, inference random data')
check_time = time.time()
for i in range(dummy_cnt):
_, _ = test_model(dummy_x, dummy_state)
print('torch model: %.6f' % ((time.time() - check_time) / dummy_cnt))
check_time = time.time()
for i in range(dummy_cnt):
_, _ = model_trt(dummy_x, dummy_state)
print('trt model: %.6f' % ((time.time() - check_time) / dummy_cnt))
dummy_x = torch.ones((batch_size, 8)).half().cuda()
dummy_state = torch.zeros(batch_size, model.STATE_DIM).half().cuda()
check_time = time.time()
for i in range(dummy_cnt):
_, _ = model_trt_f16(dummy_x, dummy_state)
print('trt float 16 model: %.6f' % ((time.time() - check_time) / dummy_cnt))
dummy_x = torch.ones((batch_size, 8)).char().cuda()
dummy_state = torch.zeros(batch_size, model.STATE_DIM).char().cuda()
check_time = time.time()
for i in range(dummy_cnt):
_, _ = model_trt_int8_strict(dummy_x, dummy_state)
print('trt int8 strict model: %.6f' % ((time.time() - check_time) / dummy_cnt))
return
## end
criterion = nn.CrossEntropyLoss()
state_temp = torch.zeros((batch_size, 8 * 32)).cuda()
step_acc = 0.0
step_loss = 0.0
cnt = 0
loss_cnt = 0
for i, (inputs, labels) in enumerate(testloader):
inputs, labels = inputs.float().cuda(), labels.long().cuda()
normal_outputs, state_temp = test_model(inputs, state_temp)
_, preds = torch.max(normal_outputs, 1)
edge_loss = criterion(normal_outputs, labels)
step_loss += edge_loss.item()
loss_cnt += 1
corr_sum = torch.sum(preds == labels.data)
step_acc += corr_sum.double()
cnt += batch_size
print('torch', step_acc.item() / cnt, step_loss / loss_cnt)
state_temp = torch.zeros((batch_size, 8 * 32)).cuda()
step_acc = 0.0
cnt = 0
step_loss = 0.0
loss_cnt = 0
for i, (inputs, labels) in enumerate(testloader):
inputs, labels = inputs.float().cuda(), labels.long().cuda()
normal_outputs, state_temp = model_trt(inputs, state_temp)
_, preds = torch.max(normal_outputs, 1)
edge_loss = criterion(normal_outputs, labels)
step_loss += edge_loss.item()
loss_cnt += 1
corr_sum = torch.sum(preds == labels.data)
step_acc += corr_sum.double()
cnt += batch_size
print('trt', step_acc.item() / cnt, step_loss / loss_cnt)
state_temp = torch.zeros((batch_size, 8 * 32)).half().cuda()
step_acc = 0.0
cnt = 0
step_loss = 0.0
loss_cnt = 0
for i, (inputs, labels) in enumerate(testloader):
inputs, labels = inputs.half().cuda(), labels.long().cuda()
normal_outputs, state_temp = model_trt_f16(inputs, state_temp)
_, preds = torch.max(normal_outputs, 1)
edge_loss = criterion(normal_outputs, labels)
step_loss += edge_loss.item()
loss_cnt += 1
corr_sum = torch.sum(preds == labels.data)
step_acc += corr_sum.double()
cnt += batch_size
print('float16', step_acc.item() / cnt, step_loss / loss_cnt)
state_temp = torch.zeros((batch_size, 8 * 32)).float().cuda()
step_acc = 0.0
cnt = 0
step_loss = 0.0
loss_cnt = 0
for i, (inputs, labels) in enumerate(testloader):
inputs, labels = inputs.float().cuda(), labels.long().cuda()
normal_outputs, state_temp = model_trt_int8_strict(inputs, state_temp)
_, preds = torch.max(normal_outputs, 1)
edge_loss = criterion(normal_outputs, labels)
step_loss += edge_loss.item()
loss_cnt += 1
corr_sum = torch.sum(preds == labels.data)
step_acc += corr_sum.double()
cnt += batch_size
print('int8 strict', step_acc.item() / cnt, step_loss / loss_cnt)
def drawGraph(x_value, x_label, y_axis, y_label):
pass
def CsvToTextOne(csv_file):
target_csv = pd.read_csv(csv_file)
file_name, extension = os.path.splitext(csv_file)
print(file_name, extension)
target_text = open(file_name + '_1.txt', mode='wt', encoding='utf-8')
idx = 0
print(len(target_csv))
while idx < len(target_csv):
csv_row = target_csv.iloc[idx]
data_len = csv_row[1]
is_regular = (csv_row[data_len + 2] == 'R')
if is_regular:
target_text.write("%d R\n" % idx)
else:
target_text.write("%d T\n" % idx)
idx += 1
if (idx % 1000000 == 0):
print(idx)
target_text.close()
print('done')
def Mix_Four_CANDataset():
Dos_csv = pd.read_csv('./dataset/DoS_dataset.csv')
Other_csv = [pd.read_csv('./dataset/Fuzzy_dataset.csv'),
pd.read_csv('./dataset/RPM_dataset.csv'),
pd.read_csv('./dataset/gear_dataset.csv')]
Other_csv_idx = [0, 0, 0]
save_csv = open('./dataset/Mixed_dataset.csv', 'w')
save_csv_file = csv.writer(save_csv)
# DoS 유해 트래픽 주기를 바꿈
# DoS 다음 세번의 Dos 자리를 다른 유해 트래픽으로 바꿈
# DoS / (Fuzzy, RPM, gear) 중 3번 순서 랜덤, 뽑히는 개수 랜덤 / Dos ...
dos_idx = 0
dos_preriod = 3
while dos_idx < len(Dos_csv):
dos_row = Dos_csv.iloc[dos_idx]
number_of_data = dos_row[2]
is_regular = (dos_row[number_of_data + 3] == 'R')
dos_row.dropna(inplace=True)
if is_regular:
save_csv_file.writerow(dos_row[1:])
else:
if dos_preriod == 3:
save_csv_file.writerow(dos_row[1:])
np.random.seed(dos_idx)
selected_edge = np.random.choice([0, 1, 2], 3, replace=True)
else:
selected_idx = selected_edge[dos_preriod]
local_csv = Other_csv[selected_idx]
local_idx = Other_csv_idx[selected_idx]
while True:
local_row = local_csv.iloc[local_idx]
local_number_of_data = local_row[2]
is_injected = (local_row[local_number_of_data + 3] == 'T')
local_idx += 1
if is_injected:
local_row.dropna(inplace=True)
save_csv_file.writerow(local_row[1:])
break
Other_csv_idx[selected_idx] = local_idx
dos_preriod -= 1
if dos_preriod == -1:
dos_preriod = 3
dos_idx += 1
if dos_idx % 100000 == 0:
print(dos_idx)
# break
save_csv.close()
def Mix_Six_SynCANDataset():
normal_csv = pd.read_csv('./dataset/test_normal.csv')
normal_idx = 0
target_len = len(normal_csv)
save_csv = open('./dataset/test_mixed.csv', 'w')
save_csv_file = csv.writer(save_csv)
other_csv = [pd.read_csv('./dataset/test_continuous.csv'),
pd.read_csv('./dataset/test_flooding.csv'),
pd.read_csv('./dataset/test_plateau.csv'),
pd.read_csv('./dataset/test_playback.csv'),
pd.read_csv('./dataset/test_suppress.csv')]
other_csv_idx = [0, 0, 0, 0, 0]
while normal_idx < target_len:
np.random.seed(normal_idx)
selected_csv = np.random.choice([0, 1, 2, 3, 4], 5, replace=True)
all_done = True
for csv_idx in selected_csv:
now_csv = other_csv[csv_idx]
now_idx = other_csv_idx[csv_idx]
start_normal_idx = now_idx
while now_idx < len(now_csv):
csv_row_ahead = now_csv.iloc[now_idx + 1]
label_ahead = csv_row_ahead[0]
csv_row_behind = now_csv.iloc[now_idx]
label_behind = csv_row_behind[0]
if label_ahead == 1 and label_behind == 0:
print(now_idx, 'start error')
add_normal_len = (now_idx - start_normal_idx) // 9
start_abnormal_idx = now_idx + 1
elif label_ahead == 0 and label_behind == 1:
print(now_idx, 'end error')
add_abnormal_len = (now_idx - start_abnormal_idx) // 6
for _ in range(6):
# done
if normal_idx + add_normal_len >= target_len:
save_csv.close()
return
# write normal
for idx in range(normal_idx, normal_idx + add_normal_len):
row = normal_csv.iloc[idx]
row = row.fillna(0)
save_csv_file.writerow(row[0:1].append(row[2:]))
normal_idx += add_normal_len
# write abnormal
for idx in range(start_abnormal_idx, start_abnormal_idx + add_abnormal_len):
row = now_csv.iloc[idx]
row = row.fillna(0)
save_csv_file.writerow(row[0:1].append(row[2:]))
start_abnormal_idx += add_abnormal_len
other_csv_idx[csv_idx] = now_idx + 1
# check other csv not end
all_done = False
break
now_idx += 1
if all_done:
break
save_csv.close()