Showing
9 changed files
with
319 additions
and
24 deletions
This diff is collapsed. Click to expand it.
코드/연합학습/deprecated.py
0 → 100644
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
| 1 | import torch.nn as nn | 1 | import torch.nn as nn |
| 2 | -import torch.nn.functional as F | ||
| 3 | import torch | 2 | import torch |
| 4 | import const | 3 | import const |
| 5 | 4 | ||
| 6 | -class Net(nn.Module): | ||
| 7 | - def __init__(self): | ||
| 8 | - super(Net, self).__init__() | ||
| 9 | 5 | ||
| 10 | - self.f1 = nn.Sequential( | 6 | +STATE_DIM = 8 * 32 |
| 11 | - nn.Conv2d(1, 2, 3), | 7 | +class OneNet(nn.Module): |
| 12 | - nn.ReLU(True), | 8 | + def __init__(self, packet_num): |
| 9 | + super(OneNet, self).__init__() | ||
| 10 | + IN_DIM = 8 * packet_num # byte | ||
| 11 | + FEATURE_DIM = 32 | ||
| 12 | + | ||
| 13 | + # transform the given packet into a tensor which is in a good feature space | ||
| 14 | + self.feature_layer = nn.Sequential( | ||
| 15 | + nn.Linear(IN_DIM, 32), | ||
| 16 | + nn.ReLU(), | ||
| 17 | + nn.Linear(32, FEATURE_DIM), | ||
| 18 | + nn.ReLU() | ||
| 13 | ) | 19 | ) |
| 14 | - self.f2 = nn.Sequential( | 20 | + |
| 15 | - nn.Conv2d(2, 4, 3), | 21 | + # generates the current state 's' |
| 16 | - nn.ReLU(True), | 22 | + self.f = nn.Sequential( |
| 17 | - ) | 23 | + nn.Linear(STATE_DIM + FEATURE_DIM, STATE_DIM), |
| 18 | - self.f3 = nn.Sequential( | 24 | + nn.ReLU(), |
| 19 | - nn.Conv2d(4, 8, 3), | 25 | + nn.Linear(STATE_DIM, STATE_DIM), |
| 20 | - nn.ReLU(True), | 26 | + nn.ReLU() |
| 21 | ) | 27 | ) |
| 22 | - self.f4 = nn.Sequential( | 28 | + |
| 23 | - nn.Linear(8 * 23 * 23, 2), | 29 | + # check whether the given packet is malicious |
| 30 | + self.g = nn.Sequential( | ||
| 31 | + nn.Linear(STATE_DIM + FEATURE_DIM, 64), | ||
| 32 | + nn.ReLU(), | ||
| 33 | + nn.Linear(64, 64), | ||
| 34 | + nn.ReLU(), | ||
| 35 | + nn.Linear(64, 2), | ||
| 24 | ) | 36 | ) |
| 25 | 37 | ||
| 26 | - def forward(self, x): | ||
| 27 | - x = self.f1(x) | ||
| 28 | - x = self.f2(x) | ||
| 29 | - x = self.f3(x) | ||
| 30 | - x = torch.flatten(x, 1) | ||
| 31 | - x = self.f4(x) | ||
| 32 | - return x | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
| 38 | + def forward(self, x, s): | ||
| 39 | + x = self.feature_layer(x) | ||
| 40 | + x = torch.cat((x, s), 1) | ||
| 41 | + s2 = self.f(x) | ||
| 42 | + x2 = self.g(x) | ||
| 43 | + | ||
| 44 | + return x2, s2 | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
코드/연합학습/quantization/onnx2tensorRT.py
0 → 100644
| 1 | +import tensorrt as trt | ||
| 2 | + | ||
| 3 | +onnx_file_name = 'bert.onnx' | ||
| 4 | +tensorrt_file_name = 'bert.plan' | ||
| 5 | +fp16_mode = True | ||
| 6 | +# int8_mode = True | ||
| 7 | +TRT_LOGGER = trt.Logger(trt.Logger.WARNING) | ||
| 8 | +EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) | ||
| 9 | + | ||
| 10 | +builder = trt.Builder(TRT_LOGGER) | ||
| 11 | +network = builder.create_network(EXPLICIT_BATCH) | ||
| 12 | +parser = trt.OnnxParser(network, TRT_LOGGER) | ||
| 13 | + | ||
| 14 | +builder.max_workspace_size = (1 << 30) | ||
| 15 | +builder.fp16_mode = fp16_mode | ||
| 16 | +# builder.int8_mode = int8_mode | ||
| 17 | + | ||
| 18 | +with open(onnx_file_name, 'rb') as model: | ||
| 19 | + if not parser.parse(model.read()): | ||
| 20 | + for error in range(parser.num_errors): | ||
| 21 | + print (parser.get_error(error)) | ||
| 22 | + | ||
| 23 | +# for int8 mode | ||
| 24 | +# print(network.num_layers, network.num_inputs , network.num_outputs) | ||
| 25 | +# for layer_index in range(network.num_layers): | ||
| 26 | +# layer = network[layer_index] | ||
| 27 | +# print(layer.name) | ||
| 28 | +# tensor = layer.get_output(0) | ||
| 29 | +# print(tensor.name) | ||
| 30 | +# tensor.dynamic_range = (0, 255) | ||
| 31 | + | ||
| 32 | + # input_tensor = layer.get_input(0) | ||
| 33 | + # print(input_tensor) | ||
| 34 | + # input_tensor.dynamic_range = (0, 255) | ||
| 35 | + | ||
| 36 | +engine = builder.build_cuda_engine(network) | ||
| 37 | +buf = engine.serialize() | ||
| 38 | +with open(tensorrt_file_name, 'wb') as f: | ||
| 39 | + f.write(buf) | ||
| 40 | + | ||
| 41 | +print('done, trt model') | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
코드/연합학습/quantization/tensorRT_test.py
0 → 100644
| 1 | +import tensorrt as trt | ||
| 2 | +import pycuda.driver as cuda | ||
| 3 | +import numpy as np | ||
| 4 | +import torch | ||
| 5 | +import pycuda.autoinit | ||
| 6 | +import dataset | ||
| 7 | +import model | ||
| 8 | +import time | ||
| 9 | +# print(dir(trt)) | ||
| 10 | + | ||
| 11 | +tensorrt_file_name = 'bert.plan' | ||
| 12 | +TRT_LOGGER = trt.Logger(trt.Logger.WARNING) | ||
| 13 | +trt_runtime = trt.Runtime(TRT_LOGGER) | ||
| 14 | + | ||
| 15 | +with open(tensorrt_file_name, 'rb') as f: | ||
| 16 | + engine_data = f.read() | ||
| 17 | +engine = trt_runtime.deserialize_cuda_engine(engine_data) | ||
| 18 | +context = engine.create_execution_context() | ||
| 19 | + | ||
| 20 | +# class HostDeviceMem(object): | ||
| 21 | +# def __init__(self, host_mem, device_mem): | ||
| 22 | +# self.host = host_mem | ||
| 23 | +# self.device = device_mem | ||
| 24 | + | ||
| 25 | +# def __str__(self): | ||
| 26 | +# return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device) | ||
| 27 | + | ||
| 28 | +# def __repr__(self): | ||
| 29 | +# return self.__str__() | ||
| 30 | + | ||
| 31 | +# inputs, outputs, bindings, stream = [], [], [], [] | ||
| 32 | +# for binding in engine: | ||
| 33 | +# size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size | ||
| 34 | +# dtype = trt.nptype(engine.get_binding_dtype(binding)) | ||
| 35 | +# host_mem = cuda.pagelocked_empty(size, dtype) | ||
| 36 | +# device_mem = cuda.mem_alloc(host_mem.nbytes) | ||
| 37 | +# bindings.append(int(device_mem)) | ||
| 38 | +# if engine.binding_is_input(binding): | ||
| 39 | +# inputs.append( HostDeviceMem(host_mem, device_mem) ) | ||
| 40 | +# else: | ||
| 41 | +# outputs.append(HostDeviceMem(host_mem, device_mem)) | ||
| 42 | + | ||
| 43 | +# input_ids = np.ones([1, 1, 29, 29]) | ||
| 44 | + | ||
| 45 | +# numpy_array_input = [input_ids] | ||
| 46 | +# hosts = [input.host for input in inputs] | ||
| 47 | +# trt_types = [trt.int32] | ||
| 48 | + | ||
| 49 | +# for numpy_array, host, trt_types in zip(numpy_array_input, hosts, trt_types): | ||
| 50 | +# numpy_array = np.asarray(numpy_array).ravel() | ||
| 51 | +# np.copyto(host, numpy_array) | ||
| 52 | + | ||
| 53 | +# def do_inference(context, bindings, inputs, outputs, stream): | ||
| 54 | +# [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs] | ||
| 55 | +# context.execute_async_v2(bindings=bindings, stream_handle=stream.handle) | ||
| 56 | +# [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs] | ||
| 57 | +# stream.synchronize() | ||
| 58 | +# return [out.host for out in outputs] | ||
| 59 | + | ||
| 60 | +# trt_outputs = do_inference( | ||
| 61 | +# context=context, | ||
| 62 | +# bindings=bindings, | ||
| 63 | +# inputs=inputs, | ||
| 64 | +# outputs=outputs, | ||
| 65 | +# stream=stream) | ||
| 66 | + | ||
| 67 | +def infer(context, input_img, output_size, batch_size): | ||
| 68 | + # Load engine | ||
| 69 | + # engine = context.get_engine() | ||
| 70 | + # assert(engine.get_nb_bindings() == 2) | ||
| 71 | + # Convert input data to float32 | ||
| 72 | + input_img = input_img.astype(np.float32) | ||
| 73 | + # Create host buffer to receive data | ||
| 74 | + output = np.empty(output_size, dtype = np.float32) | ||
| 75 | + # Allocate device memory | ||
| 76 | + d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize) | ||
| 77 | + d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize) | ||
| 78 | + | ||
| 79 | + bindings = [int(d_input), int(d_output)] | ||
| 80 | + stream = cuda.Stream() | ||
| 81 | + # Transfer input data to device | ||
| 82 | + cuda.memcpy_htod_async(d_input, input_img, stream) | ||
| 83 | + # Execute model | ||
| 84 | + context.execute_async(batch_size, bindings, stream.handle, None) | ||
| 85 | + # Transfer predictions back | ||
| 86 | + cuda.memcpy_dtoh_async(output, d_output, stream) | ||
| 87 | + # Synchronize threads | ||
| 88 | + stream.synchronize() | ||
| 89 | + # Return predictions | ||
| 90 | + return output | ||
| 91 | + | ||
| 92 | + | ||
| 93 | +# kwargs = {"./dataset/DoS_dataset.csv" : './DoS_dataset.txt'} | ||
| 94 | +# train_data_set, data_idx_map, net_class_count, net_data_count, test_data_set = dataset.GetCanDatasetUsingTxtKwarg(100, 0, **kwargs) | ||
| 95 | +# testloader = torch.utils.data.DataLoader(test_data_set, batch_size=256, | ||
| 96 | +# shuffle=False, num_workers=2) | ||
| 97 | + | ||
| 98 | +check_time = time.time() | ||
| 99 | +cnt = 0 | ||
| 100 | +temp = np.ones([256, 1, 29, 29]) | ||
| 101 | +for idx in range(100): | ||
| 102 | +# for i, (inputs, labels) in enumerate(testloader): | ||
| 103 | + trt_outputs = infer(context, temp, (256, 2), 256) | ||
| 104 | + | ||
| 105 | + print(trt_outputs.shape) | ||
| 106 | + # print(trt_outputs) | ||
| 107 | + # print(np.argmax(trt_outputs, axis=0)) | ||
| 108 | + # cnt += 1 | ||
| 109 | + # if cnt == 100: | ||
| 110 | + # break | ||
| 111 | +print(time.time() - check_time) | ||
| 112 | + | ||
| 113 | + | ||
| 114 | +tensorrt_file_name = 'bert_int.plan' | ||
| 115 | +TRT_LOGGER = trt.Logger(trt.Logger.WARNING) | ||
| 116 | +trt_runtime = trt.Runtime(TRT_LOGGER) | ||
| 117 | + | ||
| 118 | +with open(tensorrt_file_name, 'rb') as f: | ||
| 119 | + engine_data = f.read() | ||
| 120 | +engine = trt_runtime.deserialize_cuda_engine(engine_data) | ||
| 121 | +context = engine.create_execution_context() | ||
| 122 | +check_time = time.time() | ||
| 123 | +cnt = 0 | ||
| 124 | +temp = np.ones([256, 1, 29, 29]) | ||
| 125 | +for idx in range(100): | ||
| 126 | +# for i, (inputs, labels) in enumerate(testloader): | ||
| 127 | + trt_outputs = infer(context, temp, (256, 2), 256) | ||
| 128 | + | ||
| 129 | + print(trt_outputs.shape) | ||
| 130 | + # print(trt_outputs) | ||
| 131 | + # print(np.argmax(trt_outputs, axis=0)) | ||
| 132 | + # cnt += 1 | ||
| 133 | + # if cnt == 100: | ||
| 134 | + # break | ||
| 135 | +print(time.time() - check_time) | ||
| 136 | + | ||
| 137 | + | ||
| 138 | +test_model = model.Net().cuda() | ||
| 139 | +check_time = time.time() | ||
| 140 | +cnt = 0 | ||
| 141 | +temp = torch.randn(256, 1, 29, 29).cuda() | ||
| 142 | +for idx in range(100): | ||
| 143 | +# for i, (inputs, labels) in enumerate(testloader): | ||
| 144 | + # inputs = inputs.float().cuda() | ||
| 145 | + normal_outputs = test_model(temp) | ||
| 146 | + # print(normal_outputs) | ||
| 147 | + print(normal_outputs.shape) | ||
| 148 | + cnt += 1 | ||
| 149 | + if cnt == 100: | ||
| 150 | + break | ||
| 151 | +print(time.time() - check_time) | ||
| 152 | + | ||
| 153 | + | ||
| 154 | + | ||
| 155 | +import tensorrt as trt | ||
| 156 | +import numpy as np | ||
| 157 | +import pycuda.autoinit | ||
| 158 | +import pycuda.driver as cuda | ||
| 159 | +import time | ||
| 160 | + | ||
| 161 | +model_path = "bert.onnx" | ||
| 162 | +input_size = 32 | ||
| 163 | + | ||
| 164 | +TRT_LOGGER = trt.Logger(trt.Logger.WARNING) | ||
| 165 | + | ||
| 166 | +# def build_engine(model_path): | ||
| 167 | +# with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser: | ||
| 168 | +# builder.max_workspace_size = 1<<20 | ||
| 169 | +# builder.max_batch_size = 1 | ||
| 170 | +# with open(model_path, "rb") as f: | ||
| 171 | +# parser.parse(f.read()) | ||
| 172 | +# engine = builder.build_cuda_engine(network) | ||
| 173 | +# return engine | ||
| 174 | + | ||
| 175 | +def alloc_buf(engine): | ||
| 176 | + # host cpu mem | ||
| 177 | + h_in_size = trt.volume(engine.get_binding_shape(0)) | ||
| 178 | + h_out_size = trt.volume(engine.get_binding_shape(1)) | ||
| 179 | + h_in_dtype = trt.nptype(engine.get_binding_dtype(0)) | ||
| 180 | + h_out_dtype = trt.nptype(engine.get_binding_dtype(1)) | ||
| 181 | + in_cpu = cuda.pagelocked_empty(h_in_size, h_in_dtype) | ||
| 182 | + out_cpu = cuda.pagelocked_empty(h_out_size, h_out_dtype) | ||
| 183 | + # allocate gpu mem | ||
| 184 | + in_gpu = cuda.mem_alloc(in_cpu.nbytes) | ||
| 185 | + out_gpu = cuda.mem_alloc(out_cpu.nbytes) | ||
| 186 | + stream = cuda.Stream() | ||
| 187 | + return in_cpu, out_cpu, in_gpu, out_gpu, stream | ||
| 188 | + | ||
| 189 | + | ||
| 190 | +def inference(engine, context, inputs, out_cpu, in_gpu, out_gpu, stream): | ||
| 191 | + # async version | ||
| 192 | + # with engine.create_execution_context() as context: # cost time to initialize | ||
| 193 | + # cuda.memcpy_htod_async(in_gpu, inputs, stream) | ||
| 194 | + # context.execute_async(1, [int(in_gpu), int(out_gpu)], stream.handle, None) | ||
| 195 | + # cuda.memcpy_dtoh_async(out_cpu, out_gpu, stream) | ||
| 196 | + # stream.synchronize() | ||
| 197 | + | ||
| 198 | + # sync version | ||
| 199 | + cuda.memcpy_htod(in_gpu, inputs) | ||
| 200 | + context.execute(1, [int(in_gpu), int(out_gpu)]) | ||
| 201 | + cuda.memcpy_dtoh(out_cpu, out_gpu) | ||
| 202 | + return out_cpu | ||
| 203 | + | ||
| 204 | +if __name__ == "__main__": | ||
| 205 | + inputs = np.random.random((1, 1, 29, 29)).astype(np.float32) | ||
| 206 | + | ||
| 207 | + tensorrt_file_name = '/content/drive/My Drive/capstone1/CAN/bert.plan' | ||
| 208 | + TRT_LOGGER = trt.Logger(trt.Logger.WARNING) | ||
| 209 | + trt_runtime = trt.Runtime(TRT_LOGGER) | ||
| 210 | + | ||
| 211 | + with open(tensorrt_file_name, 'rb') as f: | ||
| 212 | + engine_data = f.read() | ||
| 213 | + engine = trt_runtime.deserialize_cuda_engine(engine_data) | ||
| 214 | + # engine = build_engine(model_path) | ||
| 215 | + context = engine.create_execution_context() | ||
| 216 | + for _ in range(10): | ||
| 217 | + t1 = time.time() | ||
| 218 | + in_cpu, out_cpu, in_gpu, out_gpu, stream = alloc_buf(engine) | ||
| 219 | + res = inference(engine, context, inputs.reshape(-1), out_cpu, in_gpu, out_gpu, stream) | ||
| 220 | + print(res) | ||
| 221 | + print("cost time: ", time.time()-t1) | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
코드/연합학습/quantization/torch2onnx.py
0 → 100644
| 1 | +import model | ||
| 2 | +import torch | ||
| 3 | + | ||
| 4 | +import importlib | ||
| 5 | +importlib.reload(model) | ||
| 6 | + | ||
| 7 | +batch_size = 256 | ||
| 8 | +model = model.Net().cuda().eval() | ||
| 9 | +inputs = torch.randn(batch_size, 1, 29, 29, requires_grad=True).cuda() | ||
| 10 | +torch_out = model(inputs) | ||
| 11 | + | ||
| 12 | +torch.onnx.export( | ||
| 13 | + model, | ||
| 14 | + inputs, | ||
| 15 | + 'bert.onnx', | ||
| 16 | + input_names=['inputs'], | ||
| 17 | + output_names=['outputs'], | ||
| 18 | + export_params=True) | ||
| 19 | + | ||
| 20 | +print('done, onnx model') | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
코드/연합학습/utils.py
0 → 100644
This diff is collapsed. Click to expand it.
-
Please register or login to post a comment