연합학습 code 정리중

김지훈
Commit fb56811d4e4a799cec9f1da06c4c99c612d00699 fb56811d 1 parent e47eb05a
Showing 9 changed files with 319 additions and 24 deletions
코드/연합학습/const.py
코드/연합학습/dataset.py
코드/연합학습/deprecated.py
코드/연합학습/fed_train.py
코드/연합학습/model.py
코드/연합학습/quantization/onnx2tensorRT.py
코드/연합학습/quantization/tensorRT_test.py
코드/연합학습/quantization/torch2onnx.py
코드/연합학습/utils.py
--- a/코드/연합학습/const.py
View file @fb56811
+++ b/코드/연합학습/const.py
View file @fb56811
- CAN_ID_BIT = 29
\ No newline at end of file
+ CAN_DATA_LEN    = 8
+ SYNCAN_DATA_LEN   = 4
\ No newline at end of file
--- a/코드/연합학습/dataset.py
View file @fb56811
+++ b/코드/연합학습/dataset.py
View file @fb56811
--- a/코드/연합학습/deprecated.py 0 → 100644
View file @fb56811
+++ b/코드/연합학습/deprecated.py 0 → 100644
View file @fb56811
--- a/코드/연합학습/fed_train.py
View file @fb56811
+++ b/코드/연합학습/fed_train.py
View file @fb56811
--- a/코드/연합학습/model.py
View file @fb56811
+++ b/코드/연합학습/model.py
View file @fb56811
 import torch.nn as nn
- import torch.nn.functional as F
 import torch
 import const
 
- class Net(nn.Module):
-     def __init__(self):
-         super(Net, self).__init__()
 
-         self.f1 = nn.Sequential(
-             nn.Conv2d(1, 2, 3),
-             nn.ReLU(True),
+ STATE_DIM = 8 * 32
+ class OneNet(nn.Module):
+     def __init__(self, packet_num):
+         super(OneNet, self).__init__()
+         IN_DIM = 8 * packet_num # byte
+         FEATURE_DIM = 32
+         
+         # transform the given packet into a tensor which is in a good feature space
+         self.feature_layer = nn.Sequential(
+             nn.Linear(IN_DIM, 32),
+             nn.ReLU(),
+             nn.Linear(32, FEATURE_DIM),
+             nn.ReLU()
         )
-         self.f2 = nn.Sequential(
-           nn.Conv2d(2, 4, 3),
-           nn.ReLU(True),
-         )
-         self.f3 = nn.Sequential(
-           nn.Conv2d(4, 8, 3),
-           nn.ReLU(True),
+ 
+         # generates the current state 's'
+         self.f = nn.Sequential(
+             nn.Linear(STATE_DIM + FEATURE_DIM, STATE_DIM),
+             nn.ReLU(),
+             nn.Linear(STATE_DIM, STATE_DIM),
+             nn.ReLU()
         )
-         self.f4 = nn.Sequential(
-           nn.Linear(8 * 23 * 23, 2),
+ 
+         # check whether the given packet is malicious
+         self.g = nn.Sequential(
+             nn.Linear(STATE_DIM + FEATURE_DIM, 64),
+             nn.ReLU(),
+             nn.Linear(64, 64),
+             nn.ReLU(),
+             nn.Linear(64, 2),
         )
 
-     def forward(self, x):
-         x = self.f1(x)
-         x = self.f2(x)
-         x = self.f3(x)
-         x = torch.flatten(x, 1)
-         x = self.f4(x)
-         return x
\ No newline at end of file
+     def forward(self, x, s):
+         x   = self.feature_layer(x)
+         x   = torch.cat((x, s), 1)
+         s2  = self.f(x)
+         x2  = self.g(x)
+ 
+         return x2, s2
\ No newline at end of file
--- a/코드/연합학습/quantization/onnx2tensorRT.py 0 → 100644
View file @fb56811
+++ b/코드/연합학습/quantization/onnx2tensorRT.py 0 → 100644
View file @fb56811
+ import tensorrt as trt
+  
+ onnx_file_name = 'bert.onnx'
+ tensorrt_file_name = 'bert.plan'
+ fp16_mode = True
+ # int8_mode = True
+ TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
+ EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+  
+ builder = trt.Builder(TRT_LOGGER)
+ network = builder.create_network(EXPLICIT_BATCH)
+ parser = trt.OnnxParser(network, TRT_LOGGER)
+  
+ builder.max_workspace_size = (1 << 30)
+ builder.fp16_mode = fp16_mode
+ # builder.int8_mode = int8_mode
+  
+ with open(onnx_file_name, 'rb') as model:
+     if not parser.parse(model.read()):
+         for error in range(parser.num_errors):
+             print (parser.get_error(error))
+ 
+ # for int8 mode
+ # print(network.num_layers, network.num_inputs , network.num_outputs)
+ # for layer_index in range(network.num_layers):
+ #   layer = network[layer_index]
+ #   print(layer.name)
+ #   tensor = layer.get_output(0)
+ #   print(tensor.name)
+ #   tensor.dynamic_range = (0, 255)
+ 
+   # input_tensor = layer.get_input(0)
+   # print(input_tensor)
+   # input_tensor.dynamic_range = (0, 255)
+  
+ engine = builder.build_cuda_engine(network)
+ buf = engine.serialize()
+ with open(tensorrt_file_name, 'wb') as f:
+     f.write(buf)
+ 
+ print('done, trt model')
\ No newline at end of file
--- a/코드/연합학습/quantization/tensorRT_test.py 0 → 100644
View file @fb56811
+++ b/코드/연합학습/quantization/tensorRT_test.py 0 → 100644
View file @fb56811
+ import tensorrt as trt
+ import pycuda.driver as cuda
+ import numpy as np
+ import torch
+ import pycuda.autoinit
+ import dataset
+ import model
+ import time
+ # print(dir(trt))
+  
+ tensorrt_file_name = 'bert.plan'
+ TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
+ trt_runtime = trt.Runtime(TRT_LOGGER)
+  
+ with open(tensorrt_file_name, 'rb') as f:
+     engine_data = f.read()
+ engine = trt_runtime.deserialize_cuda_engine(engine_data)
+ context = engine.create_execution_context()
+ 
+ # class HostDeviceMem(object):
+ #     def __init__(self, host_mem, device_mem):
+ #         self.host = host_mem
+ #         self.device = device_mem
+  
+ #     def __str__(self):
+ #         return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
+  
+ #     def __repr__(self):
+ #         return self.__str__()
+  
+ # inputs, outputs, bindings, stream = [], [], [], []
+ # for binding in engine:
+ #     size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
+ #     dtype = trt.nptype(engine.get_binding_dtype(binding))
+ #     host_mem = cuda.pagelocked_empty(size, dtype)
+ #     device_mem = cuda.mem_alloc(host_mem.nbytes)
+ #     bindings.append(int(device_mem))
+ #     if engine.binding_is_input(binding):
+ #         inputs.append( HostDeviceMem(host_mem, device_mem) )
+ #     else:
+ #         outputs.append(HostDeviceMem(host_mem, device_mem))
+ 
+ # input_ids = np.ones([1, 1, 29, 29])
+  
+ # numpy_array_input = [input_ids]
+ # hosts = [input.host for input in inputs]
+ # trt_types = [trt.int32]
+  
+ # for numpy_array, host, trt_types in zip(numpy_array_input, hosts, trt_types):
+ #     numpy_array = np.asarray(numpy_array).ravel()
+ #     np.copyto(host, numpy_array)
+ 
+ # def do_inference(context, bindings, inputs, outputs, stream):
+ #     [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
+ #     context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
+ #     [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
+ #     stream.synchronize()
+ #     return [out.host for out in outputs]
+ 
+ # trt_outputs = do_inference(
+ #                         context=context,
+ #                         bindings=bindings,
+ #                         inputs=inputs,
+ #                         outputs=outputs,
+ #                         stream=stream)
+ 
+ def infer(context, input_img, output_size, batch_size):
+     # Load engine
+     # engine = context.get_engine()
+     # assert(engine.get_nb_bindings() == 2)
+     # Convert input data to float32
+     input_img = input_img.astype(np.float32)
+     # Create host buffer to receive data
+     output = np.empty(output_size, dtype = np.float32)
+     # Allocate device memory
+     d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
+     d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
+ 
+     bindings = [int(d_input), int(d_output)]
+     stream = cuda.Stream()
+     # Transfer input data to device
+     cuda.memcpy_htod_async(d_input, input_img, stream)
+     # Execute model
+     context.execute_async(batch_size, bindings, stream.handle, None)
+     # Transfer predictions back
+     cuda.memcpy_dtoh_async(output, d_output, stream)
+     # Synchronize threads
+     stream.synchronize()
+     # Return predictions
+     return output
+ 
+ 
+ # kwargs = {"./dataset/DoS_dataset.csv" : './DoS_dataset.txt'}
+ # train_data_set, data_idx_map, net_class_count, net_data_count, test_data_set = dataset.GetCanDatasetUsingTxtKwarg(100, 0, **kwargs)
+ # testloader = torch.utils.data.DataLoader(test_data_set, batch_size=256,
+ #                                         shuffle=False, num_workers=2)
+ 
+ check_time = time.time()
+ cnt = 0
+ temp = np.ones([256, 1, 29, 29])
+ for idx in range(100):
+ # for i, (inputs, labels) in enumerate(testloader):
+     trt_outputs = infer(context, temp, (256, 2), 256)
+ 
+     print(trt_outputs.shape)
+     # print(trt_outputs)
+     # print(np.argmax(trt_outputs, axis=0))
+     # cnt += 1
+     # if cnt == 100:
+     #     break
+ print(time.time() - check_time)
+ 
+ 
+ tensorrt_file_name = 'bert_int.plan'
+ TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
+ trt_runtime = trt.Runtime(TRT_LOGGER)
+  
+ with open(tensorrt_file_name, 'rb') as f:
+     engine_data = f.read()
+ engine = trt_runtime.deserialize_cuda_engine(engine_data)
+ context = engine.create_execution_context()
+ check_time = time.time()
+ cnt = 0
+ temp = np.ones([256, 1, 29, 29])
+ for idx in range(100):
+ # for i, (inputs, labels) in enumerate(testloader):
+     trt_outputs = infer(context, temp, (256, 2), 256)
+ 
+     print(trt_outputs.shape)
+     # print(trt_outputs)
+     # print(np.argmax(trt_outputs, axis=0))
+     # cnt += 1
+     # if cnt == 100:
+     #     break
+ print(time.time() - check_time)
+ 
+ 
+ test_model = model.Net().cuda()
+ check_time = time.time()
+ cnt = 0
+ temp = torch.randn(256, 1, 29, 29).cuda()
+ for idx in range(100):
+ # for i, (inputs, labels) in enumerate(testloader):
+     # inputs = inputs.float().cuda()
+     normal_outputs = test_model(temp)
+     # print(normal_outputs)
+     print(normal_outputs.shape)
+     cnt += 1
+     if cnt == 100:
+         break
+ print(time.time() - check_time)
+ 
+ 
+ 
+ import tensorrt as trt
+ import numpy as np
+ import pycuda.autoinit
+ import pycuda.driver as cuda 
+ import time
+ 
+ model_path = "bert.onnx"
+ input_size = 32
+ 
+ TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
+ 
+ # def build_engine(model_path):
+ #     with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser: 
+ #         builder.max_workspace_size = 1<<20
+ #         builder.max_batch_size = 1
+ #         with open(model_path, "rb") as f:
+ #             parser.parse(f.read())
+ #         engine = builder.build_cuda_engine(network)
+ #         return engine
+ 
+ def alloc_buf(engine):
+     # host cpu mem
+     h_in_size = trt.volume(engine.get_binding_shape(0))
+     h_out_size = trt.volume(engine.get_binding_shape(1))
+     h_in_dtype = trt.nptype(engine.get_binding_dtype(0))
+     h_out_dtype = trt.nptype(engine.get_binding_dtype(1))
+     in_cpu = cuda.pagelocked_empty(h_in_size, h_in_dtype)
+     out_cpu = cuda.pagelocked_empty(h_out_size, h_out_dtype)
+     # allocate gpu mem
+     in_gpu = cuda.mem_alloc(in_cpu.nbytes)
+     out_gpu = cuda.mem_alloc(out_cpu.nbytes)
+     stream = cuda.Stream()
+     return in_cpu, out_cpu, in_gpu, out_gpu, stream
+ 
+ 
+ def inference(engine, context, inputs, out_cpu, in_gpu, out_gpu, stream):
+     # async version
+     # with engine.create_execution_context() as context:  # cost time to initialize
+     # cuda.memcpy_htod_async(in_gpu, inputs, stream)
+     # context.execute_async(1, [int(in_gpu), int(out_gpu)], stream.handle, None)
+     # cuda.memcpy_dtoh_async(out_cpu, out_gpu, stream)
+     # stream.synchronize()
+ 
+     # sync version
+     cuda.memcpy_htod(in_gpu, inputs)
+     context.execute(1, [int(in_gpu), int(out_gpu)])
+     cuda.memcpy_dtoh(out_cpu, out_gpu)
+     return out_cpu
+ 
+ if __name__ == "__main__":
+     inputs = np.random.random((1, 1, 29, 29)).astype(np.float32)
+ 
+     tensorrt_file_name = '/content/drive/My Drive/capstone1/CAN/bert.plan'
+     TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
+     trt_runtime = trt.Runtime(TRT_LOGGER)
+     
+     with open(tensorrt_file_name, 'rb') as f:
+         engine_data = f.read()
+     engine = trt_runtime.deserialize_cuda_engine(engine_data)
+     # engine = build_engine(model_path)
+     context = engine.create_execution_context()
+     for _ in range(10):
+         t1 = time.time()
+         in_cpu, out_cpu, in_gpu, out_gpu, stream = alloc_buf(engine)
+         res = inference(engine, context, inputs.reshape(-1), out_cpu, in_gpu, out_gpu, stream)
+         print(res)
+         print("cost time: ", time.time()-t1)
\ No newline at end of file
--- a/코드/연합학습/quantization/torch2onnx.py 0 → 100644
View file @fb56811
+++ b/코드/연합학습/quantization/torch2onnx.py 0 → 100644
View file @fb56811
+ import model
+ import torch
+ 
+ import importlib
+ importlib.reload(model)
+ 
+ batch_size = 256
+ model = model.Net().cuda().eval()
+ inputs = torch.randn(batch_size, 1, 29, 29, requires_grad=True).cuda()
+ torch_out = model(inputs)
+ 
+ torch.onnx.export(
+     model,
+     inputs,
+     'bert.onnx',
+     input_names=['inputs'],
+     output_names=['outputs'],
+     export_params=True)
+ 
+ print('done, onnx model')
\ No newline at end of file
--- a/코드/연합학습/utils.py 0 → 100644
View file @fb56811
+++ b/코드/연합학습/utils.py 0 → 100644
View file @fb56811