
연합학습 code 정리중

import torch.nn as nn
import torch.nn.functional as F
import torch
import const
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.f1 = nn.Sequential(
nn.Conv2d(1, 2, 3),
STATE_DIM = 8 * 32
class OneNet(nn.Module):
def __init__(self, packet_num):
super(OneNet, self).__init__()
IN_DIM = 8 * packet_num # byte
# transform the given packet into a tensor which is in a good feature space
self.feature_layer = nn.Sequential(
nn.Linear(IN_DIM, 32),
nn.Linear(32, FEATURE_DIM),
self.f2 = nn.Sequential(
nn.Conv2d(2, 4, 3),
self.f3 = nn.Sequential(
nn.Conv2d(4, 8, 3),
# generates the current state 's'
self.f = nn.Sequential(
self.f4 = nn.Sequential(
nn.Linear(8 * 23 * 23, 2),
# check whether the given packet is malicious
self.g = nn.Sequential(
nn.Linear(STATE_DIM + FEATURE_DIM, 64),
nn.Linear(64, 64),
nn.Linear(64, 2),
def forward(self, x):
x = self.f1(x)
x = self.f2(x)
x = self.f3(x)
x = torch.flatten(x, 1)
x = self.f4(x)
return x
def forward(self, x, s):
x = self.feature_layer(x)
x = torch.cat((x, s), 1)
s2 = self.f(x)
x2 = self.g(x)
return x2, s2
import tensorrt as trt
onnx_file_name = 'bert.onnx'
tensorrt_file_name = 'bert.plan'
fp16_mode = True
# int8_mode = True
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network(EXPLICIT_BATCH)
parser = trt.OnnxParser(network, TRT_LOGGER)
builder.max_workspace_size = (1 << 30)
builder.fp16_mode = fp16_mode
# builder.int8_mode = int8_mode
with open(onnx_file_name, 'rb') as model:
if not parser.parse(model.read()):
for error in range(parser.num_errors):
print (parser.get_error(error))
# for int8 mode
# print(network.num_layers, network.num_inputs , network.num_outputs)
# for layer_index in range(network.num_layers):
# layer = network[layer_index]
# print(layer.name)
# tensor = layer.get_output(0)
# print(tensor.name)
# tensor.dynamic_range = (0, 255)
# input_tensor = layer.get_input(0)
# print(input_tensor)
# input_tensor.dynamic_range = (0, 255)
engine = builder.build_cuda_engine(network)
buf = engine.serialize()
with open(tensorrt_file_name, 'wb') as f:
print('done, trt model')
import tensorrt as trt
import pycuda.driver as cuda
import numpy as np
import torch
import pycuda.autoinit
import dataset
import model
import time
# print(dir(trt))
tensorrt_file_name = 'bert.plan'
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
trt_runtime = trt.Runtime(TRT_LOGGER)
with open(tensorrt_file_name, 'rb') as f:
engine_data = f.read()
engine = trt_runtime.deserialize_cuda_engine(engine_data)
context = engine.create_execution_context()
# class HostDeviceMem(object):
# def __init__(self, host_mem, device_mem):
# self.host = host_mem
# self.device = device_mem
# def __str__(self):
# return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
# def __repr__(self):
# return self.__str__()
# inputs, outputs, bindings, stream = [], [], [], []
# for binding in engine:
# size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
# dtype = trt.nptype(engine.get_binding_dtype(binding))
# host_mem = cuda.pagelocked_empty(size, dtype)
# device_mem = cuda.mem_alloc(host_mem.nbytes)
# bindings.append(int(device_mem))
# if engine.binding_is_input(binding):
# inputs.append( HostDeviceMem(host_mem, device_mem) )
# else:
# outputs.append(HostDeviceMem(host_mem, device_mem))
# input_ids = np.ones([1, 1, 29, 29])
# numpy_array_input = [input_ids]
# hosts = [input.host for input in inputs]
# trt_types = [trt.int32]
# for numpy_array, host, trt_types in zip(numpy_array_input, hosts, trt_types):
# numpy_array = np.asarray(numpy_array).ravel()
# np.copyto(host, numpy_array)
# def do_inference(context, bindings, inputs, outputs, stream):
# [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
# context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
# [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
# stream.synchronize()
# return [out.host for out in outputs]
# trt_outputs = do_inference(
# context=context,
# bindings=bindings,
# inputs=inputs,
# outputs=outputs,
# stream=stream)
def infer(context, input_img, output_size, batch_size):
# Load engine
# engine = context.get_engine()
# assert(engine.get_nb_bindings() == 2)
# Convert input data to float32
input_img = input_img.astype(np.float32)
# Create host buffer to receive data
output = np.empty(output_size, dtype = np.float32)
# Allocate device memory
d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
# Transfer input data to device
cuda.memcpy_htod_async(d_input, input_img, stream)
# Execute model
context.execute_async(batch_size, bindings, stream.handle, None)
# Transfer predictions back
cuda.memcpy_dtoh_async(output, d_output, stream)
# Synchronize threads
# Return predictions
return output
# kwargs = {"./dataset/DoS_dataset.csv" : './DoS_dataset.txt'}
# train_data_set, data_idx_map, net_class_count, net_data_count, test_data_set = dataset.GetCanDatasetUsingTxtKwarg(100, 0, **kwargs)
# testloader = torch.utils.data.DataLoader(test_data_set, batch_size=256,
# shuffle=False, num_workers=2)
check_time = time.time()
cnt = 0
temp = np.ones([256, 1, 29, 29])
for idx in range(100):
# for i, (inputs, labels) in enumerate(testloader):
trt_outputs = infer(context, temp, (256, 2), 256)
# print(trt_outputs)
# print(np.argmax(trt_outputs, axis=0))
# cnt += 1
# if cnt == 100:
# break
print(time.time() - check_time)
tensorrt_file_name = 'bert_int.plan'
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
trt_runtime = trt.Runtime(TRT_LOGGER)
with open(tensorrt_file_name, 'rb') as f:
engine_data = f.read()
engine = trt_runtime.deserialize_cuda_engine(engine_data)
context = engine.create_execution_context()
check_time = time.time()
cnt = 0
temp = np.ones([256, 1, 29, 29])
for idx in range(100):
# for i, (inputs, labels) in enumerate(testloader):
trt_outputs = infer(context, temp, (256, 2), 256)
# print(trt_outputs)
# print(np.argmax(trt_outputs, axis=0))
# cnt += 1
# if cnt == 100:
# break
print(time.time() - check_time)
test_model = model.Net().cuda()
check_time = time.time()
cnt = 0
temp = torch.randn(256, 1, 29, 29).cuda()
for idx in range(100):
# for i, (inputs, labels) in enumerate(testloader):
# inputs = inputs.float().cuda()
normal_outputs = test_model(temp)
# print(normal_outputs)
cnt += 1
if cnt == 100:
print(time.time() - check_time)
import tensorrt as trt
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
import time
model_path = "bert.onnx"
input_size = 32
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
# def build_engine(model_path):
# with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
# builder.max_workspace_size = 1<<20
# builder.max_batch_size = 1
# with open(model_path, "rb") as f:
# parser.parse(f.read())
# engine = builder.build_cuda_engine(network)
# return engine
def alloc_buf(engine):
# host cpu mem
h_in_size = trt.volume(engine.get_binding_shape(0))
h_out_size = trt.volume(engine.get_binding_shape(1))
h_in_dtype = trt.nptype(engine.get_binding_dtype(0))
h_out_dtype = trt.nptype(engine.get_binding_dtype(1))
in_cpu = cuda.pagelocked_empty(h_in_size, h_in_dtype)
out_cpu = cuda.pagelocked_empty(h_out_size, h_out_dtype)
# allocate gpu mem
in_gpu = cuda.mem_alloc(in_cpu.nbytes)
out_gpu = cuda.mem_alloc(out_cpu.nbytes)
stream = cuda.Stream()
return in_cpu, out_cpu, in_gpu, out_gpu, stream
def inference(engine, context, inputs, out_cpu, in_gpu, out_gpu, stream):
# async version
# with engine.create_execution_context() as context: # cost time to initialize
# cuda.memcpy_htod_async(in_gpu, inputs, stream)
# context.execute_async(1, [int(in_gpu), int(out_gpu)], stream.handle, None)
# cuda.memcpy_dtoh_async(out_cpu, out_gpu, stream)
# stream.synchronize()
# sync version
cuda.memcpy_htod(in_gpu, inputs)
context.execute(1, [int(in_gpu), int(out_gpu)])
cuda.memcpy_dtoh(out_cpu, out_gpu)
return out_cpu
if __name__ == "__main__":
inputs = np.random.random((1, 1, 29, 29)).astype(np.float32)
tensorrt_file_name = '/content/drive/My Drive/capstone1/CAN/bert.plan'
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
trt_runtime = trt.Runtime(TRT_LOGGER)
with open(tensorrt_file_name, 'rb') as f:
engine_data = f.read()
engine = trt_runtime.deserialize_cuda_engine(engine_data)
# engine = build_engine(model_path)
context = engine.create_execution_context()
for _ in range(10):
t1 = time.time()
in_cpu, out_cpu, in_gpu, out_gpu, stream = alloc_buf(engine)
res = inference(engine, context, inputs.reshape(-1), out_cpu, in_gpu, out_gpu, stream)
print("cost time: ", time.time()-t1)
import model
import torch
import importlib
batch_size = 256
model = model.Net().cuda().eval()
inputs = torch.randn(batch_size, 1, 29, 29, requires_grad=True).cuda()
torch_out = model(inputs)
print('done, onnx model')
