Hyunjun

add original python code

No preview for this file type
No preview for this file type
This file is too large to display.
This file is too large to display.
No preview for this file type
1 +# coding: utf-8
2 +try:
3 + import urllib.request
4 +except ImportError:
5 + raise ImportError('You should use Python 3.x')
6 +import os.path
7 +import gzip
8 +import pickle
9 +import os
10 +import numpy as np
11 +
12 +
13 +key_file = {
14 + 'train':'cifar10-train.gz',
15 + 'test':'cifar10-test.gz'
16 +}
17 +
18 +dataset_dir = os.path.dirname(os.path.abspath('/Users/HyeonJun/Desktop/simple_convnet/dataset'))
19 +save_file = dataset_dir + "/cifar10.pkl"
20 +
21 +train_num = 50000
22 +test_num = 10000
23 +img_dim = (3, 32, 32)
24 +img_size = 3072
25 +
26 +def _load_label(file_name):
27 + file_path = dataset_dir + "/" + file_name
28 +
29 + print("Converting " + file_name + " to NumPy Array ...")
30 + with gzip.open(file_path, 'rb') as f:
31 + labels = np.frombuffer(f.read(), np.uint8, offset=0)
32 + labels = labels.reshape(-1, img_size+1)
33 + labels = labels.T
34 + print("Done")
35 +
36 + return labels[0]
37 +
38 +def _load_img(file_name):
39 + file_path = dataset_dir + "/" + file_name
40 +
41 + print("Converting " + file_name + " to NumPy Array ...")
42 + with gzip.open(file_path, 'rb') as f:
43 + data = np.frombuffer(f.read(), np.uint8, offset=0)
44 + data = data.reshape(-1, img_size+1)
45 + data = np.delete(data, 0, 1)
46 + print("Done")
47 +
48 + return data
49 +
50 +def _convert_numpy():
51 + dataset = {}
52 + dataset['train_img'] = _load_img(key_file['train'])
53 + dataset['train_label'] = _load_label(key_file['train'])
54 + dataset['test_img'] = _load_img(key_file['test'])
55 + dataset['test_label'] = _load_label(key_file['test'])
56 +
57 + return dataset
58 +
59 +def init_cifar10():
60 + dataset = _convert_numpy()
61 + print("Creating pickle file ...")
62 + with open(save_file, 'wb') as f:
63 + pickle.dump(dataset, f, -1)
64 + print("Done!")
65 +
66 +def _change_one_hot_label(X):
67 + T = np.zeros((X.size, 10))
68 + for idx, row in enumerate(T):
69 + row[X[idx]] = 1
70 +
71 + return T
72 +
73 +def load_cifar10(normalize=True, flatten=True, one_hot_label=False):
74 + """CIFAR-10データセットの読み込み
75 +
76 + Parameters
77 + ----------
78 + normalize : 画像のピクセル値を0.0~1.0に正規化する
79 + one_hot_label :
80 + one_hot_labelがTrueの場合、ラベルはone-hot配列として返す
81 + one-hot配列とは、たとえば[0,0,1,0,0,0,0,0,0,0]のような配列
82 + flatten : 画像を一次元配列に平にするかどうか
83 +
84 + Returns
85 + -------
86 + (訓練画像, 訓練ラベル), (テスト画像, テストラベル)
87 + """
88 + if not os.path.exists(save_file):
89 + init_cifar10()
90 +
91 + with open(save_file, 'rb') as f:
92 + dataset = pickle.load(f)
93 +
94 + if normalize:
95 + for key in ('train_img', 'test_img'):
96 + dataset[key] = dataset[key].astype(np.float32)
97 + dataset[key] /= 255.0
98 +
99 + if one_hot_label:
100 + dataset['train_label'] = _change_one_hot_label(dataset['train_label'])
101 + dataset['test_label'] = _change_one_hot_label(dataset['test_label'])
102 +
103 + if not flatten:
104 + for key in ('train_img', 'test_img'):
105 + dataset[key] = dataset[key].reshape(-1, 3, 32, 32)
106 +
107 + return (dataset['train_img'], dataset['train_label']), (dataset['test_img'], dataset['test_label'])
108 +
109 +
110 +if __name__ == '__main__':
111 + init_cifar10()
1 +# coding: utf-8
2 +#import cupy as cp
3 +import numpy as cp
4 +import numpy as np
5 +
6 +
7 +def identity_function(x):
8 + return x
9 +
10 +
11 +def step_function(x):
12 + return np.array(x > 0, dtype=np.int)
13 +
14 +
15 +def sigmoid(x):
16 + return 1 / (1 + np.exp(-x))
17 +
18 +
19 +def sigmoid_grad(x):
20 + return (1.0 - sigmoid(x)) * sigmoid(x)
21 +
22 +
23 +def relu(x):
24 + return np.maximum(0, x)
25 +
26 +
27 +def relu_grad(x):
28 + grad = np.zeros(x)
29 + grad[x>=0] = 1
30 + return grad
31 +
32 +
33 +def softmax(x):
34 + if x.ndim == 2:
35 + x = x.T
36 + x = x - cp.max(x, axis=0)
37 + y = cp.exp(x, dtype=np.float32) / cp.sum(cp.exp(x, dtype=np.float32), axis=0, dtype=np.float32)
38 + return y.T
39 +
40 + x = x - cp.max(x) # オーバーフロー対策
41 + return cp.exp(x) / cp.sum(cp.exp(x))
42 +
43 +
44 +def mean_squared_error(y, t):
45 + return 0.5 * np.sum((y-t)**2)
46 +
47 +
48 +def cross_entropy_error(y, t):
49 + if y.ndim == 1:
50 + t = t.reshape(1, t.size)
51 + y = y.reshape(1, y.size)
52 +
53 + # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換
54 + if t.size == y.size:
55 + t = t.argmax(axis=1)
56 +
57 + batch_size = y.shape[0]
58 + return -cp.sum(cp.log(y[cp.arange(batch_size), t])) / batch_size
59 +
60 +
61 +def softmax_loss(X, t):
62 + y = softmax(X)
63 + return cross_entropy_error(y, t)
1 +# coding: utf-8
2 +import numpy as np
3 +
4 +def _numerical_gradient_1d(f, x):
5 + h = 1e-4 # 0.0001
6 + grad = np.zeros_like(x)
7 +
8 + for idx in range(x.size):
9 + tmp_val = x[idx]
10 + x[idx] = float(tmp_val) + h
11 + fxh1 = f(x) # f(x+h)
12 +
13 + x[idx] = tmp_val - h
14 + fxh2 = f(x) # f(x-h)
15 + grad[idx] = (fxh1 - fxh2) / (2*h)
16 +
17 + x[idx] = tmp_val
18 +
19 + return grad
20 +
21 +
22 +def numerical_gradient_2d(f, X):
23 + if X.ndim == 1:
24 + return _numerical_gradient_1d(f, X)
25 + else:
26 + grad = np.zeros_like(X)
27 +
28 + for idx, x in enumerate(X):
29 + grad[idx] = _numerical_gradient_1d(f, x)
30 +
31 + return grad
32 +
33 +
34 +def numerical_gradient(f, x):
35 + h = 1e-4 # 0.0001
36 + grad = np.zeros_like(x)
37 +
38 + it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
39 + while not it.finished:
40 + idx = it.multi_index
41 + tmp_val = x[idx]
42 + x[idx] = float(tmp_val) + h
43 + fxh1 = f(x) # f(x+h)
44 +
45 + x[idx] = tmp_val - h
46 + fxh2 = f(x) # f(x-h)
47 + grad[idx] = (fxh1 - fxh2) / (2*h)
48 +
49 + x[idx] = tmp_val # 値を元に戻す
50 + it.iternext()
51 +
52 + return grad
...\ No newline at end of file ...\ No newline at end of file
1 +# coding: utf-8
2 +#import cupy as cp
3 +import numpy as cp
4 +import numpy as np
5 +from functions import *
6 +from util import im2col, col2im, DW_im2col
7 +
8 +
9 +class Relu:
10 + def __init__(self):
11 + self.mask = None
12 +
13 + def forward(self, x):
14 + self.mask = (x <= 0)
15 + out = x.copy()
16 + out[self.mask] = 0
17 +
18 + return out
19 +
20 + def backward(self, dout):
21 + dout[self.mask] = 0
22 + dx = dout
23 +
24 + return dx
25 +
26 +
27 +class Sigmoid:
28 + def __init__(self):
29 + self.out = None
30 +
31 + def forward(self, x):
32 + out = sigmoid(x)
33 + self.out = out
34 + return out
35 +
36 + def backward(self, dout):
37 + dx = dout * (1.0 - self.out) * self.out
38 +
39 + return dx
40 +
41 +
42 +class Affine:
43 + def __init__(self, W):
44 + self.W =W
45 +# self.b = b
46 +
47 + self.x = None
48 + self.original_x_shape = None
49 + # 重み・バイアスパラメータの微分
50 + self.dW = None
51 +# self.db = None
52 +
53 + def forward(self, x):
54 + # テンソル対応
55 + self.original_x_shape = x.shape
56 + x = x.reshape(x.shape[0], -1)
57 + self.x = x
58 +
59 + out = cp.dot(self.x, self.W) #+ self.b
60 +
61 + return out
62 +
63 + def backward(self, dout):
64 + dx = cp.dot(dout, self.W.T)
65 + self.dW = cp.dot(self.x.T, dout)
66 +# self.db = cp.sum(dout, axis=0)
67 +
68 + dx = dx.reshape(*self.original_x_shape) # 入力データの形状に戻す(テンソル対応)
69 + return dx
70 +
71 +
72 +class SoftmaxWithLoss:
73 + def __init__(self):
74 + self.loss = None
75 + self.y = None # softmaxの出力
76 + self.t = None # 教師データ
77 +
78 + def forward(self, x, t):
79 + self.t = t
80 + self.y = softmax(x)
81 + self.loss = cross_entropy_error(self.y, self.t)
82 +
83 + return self.loss
84 +
85 + def backward(self, dout=1):
86 + batch_size = self.t.shape[0]
87 + if self.t.size == self.y.size: # 教師データがone-hot-vectorの場合
88 + dx = (self.y - self.t) / batch_size
89 + else:
90 + dx = self.y.copy()
91 + dx[np.arange(batch_size), self.t] -= 1
92 + dx = dx / batch_size
93 +
94 + return dx
95 +
96 +
97 +class Dropout:
98 + """
99 + http://arxiv.org/abs/1207.0580
100 + """
101 + def __init__(self, dropout_ratio=0.5):
102 + self.dropout_ratio = dropout_ratio
103 + self.mask = None
104 +
105 + def forward(self, x, train_flg=True):
106 + if train_flg:
107 + self.mask = np.random.rand(*x.shape) > self.dropout_ratio
108 + return x * self.mask
109 + else:
110 + return x * (1.0 - self.dropout_ratio)
111 +
112 + def backward(self, dout):
113 + return dout * self.mask
114 +
115 +
116 +class LightNormalization:
117 + """
118 + """
119 + def __init__(self, momentum=0.9, running_mean=None, running_var=None):
120 + self.momentum = momentum
121 + self.input_shape = None # Conv層の場合は4次元、全結合層の場合は2次元
122 +
123 + # テスト時に使用する平均と分散
124 + self.running_mean = running_mean
125 + self.running_var = running_var
126 +
127 + # backward時に使用する中間データ
128 + self.batch_size = None
129 + self.xc = None
130 + self.std = None
131 +
132 + def forward(self, x, train_flg=True):
133 + self.input_shape = x.shape
134 + if x.ndim == 2:
135 + N, D = x.shape
136 + x = x.reshape(N, D, 1, 1)
137 +
138 + x = x.transpose(0, 2, 3, 1)
139 + out = self.__forward(x, train_flg)
140 + out = out.transpose(0, 3, 1, 2)
141 +
142 + return out.reshape(*self.input_shape)
143 +
144 + def __forward(self, x, train_flg):
145 + if self.running_mean is None:
146 + N, H, W, C = x.shape
147 + self.running_mean = cp.zeros(C, dtype=np.float32)
148 + self.running_var = cp.zeros(C, dtype=np.float32)
149 +
150 + if train_flg:
151 + mu = x.mean(axis=(0, 1, 2))
152 + xc = x - mu
153 + var = cp.mean(xc**2, axis=(0, 1, 2), dtype=np.float32)
154 + std = cp.sqrt(var + 10e-7, dtype=np.float32)
155 + xn = xc / std
156 +
157 + self.batch_size = x.shape[0]
158 + self.xc = xc
159 + self.xn = xn
160 + self.std = std
161 + self.running_mean = self.momentum * self.running_mean + (1-self.momentum) * mu
162 + self.running_var = self.momentum * self.running_var + (1-self.momentum) * var
163 + else:
164 + xc = x - self.running_mean
165 + xn = xc / ((cp.sqrt(self.running_var + 10e-7, dtype=np.float32)))
166 +
167 + out = xn
168 + return out
169 +
170 + def backward(self, dout):
171 + if dout.ndim == 2:
172 + N, D = dout.shape
173 + dout = dout.reshape(N, D, 1, 1)
174 +
175 + dout = dout.transpose(0, 2, 3, 1)
176 + dx = self.__backward(dout)
177 + dx = dx.transpose(0, 3, 1, 2)
178 +
179 + dx = dx.reshape(*self.input_shape)
180 + return dx
181 +
182 + def __backward(self, dout):
183 + dxn = dout
184 + dxc = dxn / self.std
185 + dstd = -cp.sum((dxn * self.xc) / (self.std * self.std), axis=0)
186 + dvar = 0.5 * dstd / self.std
187 + dxc += (2.0 / self.batch_size) * self.xc * dvar
188 + dmu = cp.sum(dxc, axis=0)
189 + dx = dxc - dmu / self.batch_size
190 +
191 + return dx
192 +
193 +class BatchNormalization:
194 + """
195 + http://arxiv.org/abs/1502.03167
196 + """
197 + def __init__(self, gamma, beta, momentum=0.9, running_mean=None, running_var=None):
198 + self.gamma = gamma
199 + self.beta = beta
200 + self.momentum = momentum
201 + self.input_shape = None # Conv層の場合は4次元、全結合層の場合は2次元
202 +
203 + # テスト時に使用する平均と分散
204 + self.running_mean = running_mean
205 + self.running_var = running_var
206 +
207 + # backward時に使用する中間データ
208 + self.batch_size = None
209 + self.xc = None
210 + self.std = None
211 + self.dgamma = None
212 + self.dbeta = None
213 +
214 + def forward(self, x, train_flg=True):
215 + self.input_shape = x.shape
216 + if x.ndim != 2:
217 + N, C, H, W = x.shape
218 + x = x.reshape(N, -1)
219 +
220 + out = self.__forward(x, train_flg)
221 +
222 + return out.reshape(*self.input_shape)
223 +
224 + def __forward(self, x, train_flg):
225 + if self.running_mean is None:
226 + N, D = x.shape
227 + self.running_mean = cp.zeros(D, dtype=np.float32)
228 + self.running_var = cp.zeros(D, dtype=np.float32)
229 +
230 + if train_flg:
231 + mu = x.mean(axis=0)
232 + xc = x - mu
233 + var = cp.mean(xc**2, axis=0, dtype=np.float32)
234 + std = cp.sqrt(var + 10e-7, dtype=np.float32)
235 + xn = xc / std
236 +
237 + self.batch_size = x.shape[0]
238 + self.xc = xc
239 + self.xn = xn
240 + self.std = std
241 + self.running_mean = self.momentum * self.running_mean + (1-self.momentum) * mu
242 + self.running_var = self.momentum * self.running_var + (1-self.momentum) * var
243 + else:
244 + xc = x - self.running_mean
245 + xn = xc / ((cp.sqrt(self.running_var + 10e-7, dtype=np.float32)))
246 +
247 + out = self.gamma * xn + self.beta
248 + return out
249 +
250 + def backward(self, dout):
251 + if dout.ndim != 2:
252 + N, C, H, W = dout.shape
253 + dout = dout.reshape(N, -1)
254 +
255 + dx = self.__backward(dout)
256 +
257 + dx = dx.reshape(*self.input_shape)
258 + return dx
259 +
260 + def __backward(self, dout):
261 + dbeta = dout.sum(axis=0)
262 + dgamma = cp.sum(self.xn * dout, axis=0)
263 + dxn = self.gamma * dout
264 + dxc = dxn / self.std
265 + dstd = -cp.sum((dxn * self.xc) / (self.std * self.std), axis=0)
266 + dvar = 0.5 * dstd / self.std
267 + dxc += (2.0 / self.batch_size) * self.xc * dvar
268 + dmu = cp.sum(dxc, axis=0)
269 + dx = dxc - dmu / self.batch_size
270 +
271 + self.dgamma = dgamma
272 + self.dbeta = dbeta
273 +
274 + return dx
275 +
276 +
277 +class Convolution:
278 + def __init__(self, W, stride=1, pad=0):
279 + self.W = W
280 + self.stride = stride
281 + self.pad = pad
282 +
283 + self.x = None
284 + self.col = None
285 + self.col_W = None
286 +
287 + self.dW = None
288 +
289 + def forward(self, x):
290 + FN, C, FH, FW = self.W.shape
291 + N, C, H, W = x.shape
292 + out_h = 1 + int((H + 2*self.pad - FH) / self.stride)
293 + out_w = 1 + int((W + 2*self.pad - FW) / self.stride)
294 +
295 + col = im2col(x, FH, FW, self.stride, self.pad)
296 + col_W = self.W.reshape(FN, -1).T
297 +
298 + out = cp.dot(col, col_W)
299 + out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)
300 +
301 + self.x = x
302 + self.col = col
303 + self.col_W = col_W
304 +
305 + return out
306 +
307 + def backward(self, dout):
308 + FN, C, FH, FW = self.W.shape
309 + dout = dout.transpose(0,2,3,1).reshape(-1, FN)
310 +
311 + self.dW = cp.dot(self.col.T, dout)
312 + self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW)
313 +
314 + dcol = cp.dot(dout, self.col_W.T)
315 + dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)
316 +
317 + return dx
318 +
319 +
320 +class Pooling:
321 + def __init__(self, pool_h, pool_w, stride=1, pad=0):
322 + self.pool_h = pool_h
323 + self.pool_w = pool_w
324 + self.stride = stride
325 + self.pad = pad
326 +
327 + self.x = None
328 + self.arg_max = None
329 +
330 + def forward(self, x):
331 + N, C, H, W = x.shape
332 + out_h = int(1 + (H - self.pool_h) / self.stride)
333 + out_w = int(1 + (W - self.pool_w) / self.stride)
334 +
335 + col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
336 + col = col.reshape(-1, self.pool_h*self.pool_w)
337 +
338 + arg_max = cp.argmax(col, axis=1)
339 + out = cp.array(cp.max(col, axis=1), dtype=np.float32)
340 + out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
341 +
342 + self.x = x
343 + self.arg_max = arg_max
344 +
345 + return out
346 +
347 + def backward(self, dout):
348 + dout = dout.transpose(0, 2, 3, 1)
349 +
350 + pool_size = self.pool_h * self.pool_w
351 + dmax = cp.zeros((dout.size, pool_size), dtype=np.float32)
352 + dmax[cp.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten()
353 + dmax = dmax.reshape(dout.shape + (pool_size,))
354 +
355 + dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
356 + dx = col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad)
357 +
358 + return dx
359 +
360 +class DW_Convolution:
361 + def __init__(self, W, stride=1, pad=0):
362 + self.W = W
363 + self.stride = stride
364 + self.pad = pad
365 +
366 + self.x = None
367 + self.col = None
368 + self.col_W = None
369 +
370 + self.dW = None
371 + self.db = None
372 +
373 +
374 +
375 + def forward(self, x):
376 + FN, C, FH, FW = self.W.shape
377 + N, C, H, W = x.shape
378 + out_h = 1 + int((H + 2*self.pad - FH) / self.stride)
379 + out_w = 1 + int((W + 2*self.pad - FW) / self.stride)
380 +
381 + col = DW_im2col(x, FH, FW, self.stride, self.pad)
382 + col_W = self.W.reshape(FN, -1).T
383 +
384 + outlist = []
385 + outlist = np.zeros((FN, N*H*W, 1))
386 + for count in range(FN):
387 + outlist[count] = np.dot(col[count, :, :], col_W[:, count]).reshape(-1,1)
388 +
389 + out = outlist.transpose(1,0,2)
390 + out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)
391 +
392 + self.x = x
393 + self.col = col
394 + self.col_W = col_W
395 + return out
396 +
397 +
398 + def backward(self, dout):
399 + FN, C, FH, FW = self.W.shape
400 + N, XC, H, W = dout.shape
401 + dout = dout.transpose(0,2,3,1).reshape(-1, FN)
402 +
403 +
404 + dW_list = np.zeros((FN, FH*FW))
405 + dcol_list = np.zeros((N * H * W, FN, FH * FW))
406 + for count in range(FN):
407 + dW_list[count] = np.dot(self.col[count].transpose(1,0), dout[:, count])
408 + dcol_list[:,count,:] = np.dot(dout[:,count].reshape(-1,1), self.col_W.T[count,:].reshape(1,-1))
409 + self.dW = dW_list
410 + self.dW = self.dW.reshape(FN, C, FH, FW)
411 +
412 +
413 + dcol = dcol_list
414 + dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)
415 +
416 +
417 + return dx
418 +
419 +
1 +
2 +import numpy as cp
3 +import numpy as np
4 +
5 +class SGD:
6 +
7 +
8 + def __init__(self, lr=0.01):
9 + self.lr = lr
10 +
11 + def update(self, params, grads):
12 + for key in params.keys():
13 + params[key] -= self.lr * grads[key]
14 +
15 +
16 +class Momentum:
17 +
18 +
19 + def __init__(self, lr=0.01, momentum=0.9):
20 + self.lr = lr
21 + self.momentum = momentum
22 + self.v = None
23 +
24 + def update(self, params, grads):
25 + if self.v is None:
26 + self.v = {}
27 + for key, val in params.items():
28 + self.v[key] = np.zeros_like(val)
29 +
30 + for key in params.keys():
31 + self.v[key] = self.momentum*self.v[key] - self.lr*grads[key]
32 + params[key] += self.v[key]
33 +
34 +
35 +class Nesterov:
36 +
37 +
38 + def __init__(self, lr=0.01, momentum=0.9):
39 + self.lr = lr
40 + self.momentum = momentum
41 + self.v = None
42 +
43 + def update(self, params, grads):
44 + if self.v is None:
45 + self.v = {}
46 + for key, val in params.items():
47 + self.v[key] = np.zeros_like(val)
48 +
49 + for key in params.keys():
50 + self.v[key] *= self.momentum
51 + self.v[key] -= self.lr * grads[key]
52 + params[key] += self.momentum * self.momentum * self.v[key]
53 + params[key] -= (1 + self.momentum) * self.lr * grads[key]
54 +
55 +
56 +class AdaGrad:
57 +
58 +
59 + def __init__(self, lr=0.01):
60 + self.lr = lr
61 + self.h = None
62 +
63 + def update(self, params, grads):
64 + if self.h is None:
65 + self.h = {}
66 + for key, val in params.items():
67 + self.h[key] = np.zeros_like(val)
68 +
69 + for key in params.keys():
70 + self.h[key] += grads[key] * grads[key]
71 + params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)
72 +
73 +
74 +class RMSprop:
75 +
76 +
77 + def __init__(self, lr=0.01, decay_rate = 0.99):
78 + self.lr = lr
79 + self.decay_rate = decay_rate
80 + self.h = None
81 +
82 + def update(self, params, grads):
83 + if self.h is None:
84 + self.h = {}
85 + for key, val in params.items():
86 + self.h[key] = np.zeros_like(val)
87 +
88 + for key in params.keys():
89 + self.h[key] *= self.decay_rate
90 + self.h[key] += (1 - self.decay_rate) * grads[key] * grads[key]
91 + params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)
92 +
93 +
94 +class Adam:
95 +
96 +
97 + def __init__(self, lr=0.001, beta1=0.9, beta2=0.999):
98 + self.lr = lr
99 + self.beta1 = beta1
100 + self.beta2 = beta2
101 + self.iter = 0
102 + self.m = None
103 + self.v = None
104 +
105 + def update(self, params, grads):
106 + if self.m is None:
107 + self.m, self.v = {}, {}
108 + for key, val in params.items():
109 + self.m[key] = cp.zeros_like(val, dtype=np.float32)
110 + self.v[key] = cp.zeros_like(val, dtype=np.float32)
111 +
112 + self.iter += 1
113 + lr_t = self.lr * cp.sqrt(1.0 - self.beta2**self.iter, dtype=np.float32) / (1.0 - self.beta1**self.iter)
114 +
115 + for key in params.keys():
116 +
117 + self.m[key] += (1 - self.beta1) * (grads[key] - self.m[key])
118 + self.v[key] += (1 - self.beta2) * (grads[key]**2 - self.v[key])
119 +
120 + params[key] -= lr_t * self.m[key] / (cp.sqrt(self.v[key], dtype=np.float32) + 1e-7)
121 +
122 +
123 +class EarlyStopping():
124 + def __init__(self, patience=0, verbose=0):
125 + self.step = 0
126 + self.acc = 0.0
127 + self.patience = patience
128 + self.verbose = verbose
129 +
130 + def validate(self, acc):
131 + if self.acc > acc:
132 + self.step += 1
133 + if self.step > self.patience:
134 + if self.verbose:
135 + print('early stopping')
136 + return True
137 + else:
138 + self.step = 0
139 + self.acc = acc
140 + return False
1 +import pickle as pk
2 +import numpy as np
3 +
4 +with open('/Users/HyeonJun/Desktop/simple_convnet/params.pkl', 'rb') as file:
5 + dict = pk.load(file)
6 + for key in dict.keys():
7 + print(key, " : ", dict[key])
No preview for this file type
1 +import sys, os
2 +sys.path.append(os.pardir)
3 +import pickle
4 +import numpy as cp
5 +import numpy as np
6 +from collections import OrderedDict
7 +from layers import *
8 +from gradient import numerical_gradient
9 +
10 +
11 +class SimpleConvNet:
12 + def __init__(self, input_dim=(3, 32, 32),
13 + conv_param={'filter_num':(32, 32, 64), 'filter_size':3, 'pad':1, 'stride':1},
14 + hidden_size=512, output_size=10, weight_init_std=0.01):
15 + filter_num = conv_param['filter_num']
16 + filter_size = conv_param['filter_size']
17 + filter_pad = conv_param['pad']
18 + filter_stride = conv_param['stride']
19 + input_size = input_dim[1]
20 + conv_output_size = (input_size - filter_size + 2*filter_pad) / filter_stride + 1
21 + conv_data_size = int(filter_num[0] * conv_output_size * conv_output_size )
22 + pool1_output_size = int(filter_num[1] * (conv_output_size/2) * (conv_output_size/2))
23 + pool2_output_size = int(filter_num[2] * (conv_output_size/4) * (conv_output_size/4))
24 + pool3_output_size = int(filter_num[2] * (conv_output_size/8) * (conv_output_size/8))
25 +
26 + self.params = {}
27 + self.params['W1'] = cp.array( weight_init_std * \
28 + cp.random.randn(filter_num[0], input_dim[0], filter_size, filter_size), dtype=np.float32)
29 +
30 + self.params['W2'] = cp.array( weight_init_std * \
31 + cp.random.randn(filter_num[1], filter_num[0], 1, 1), dtype=np.float32)
32 +
33 + self.params['W3'] = cp.array( weight_init_std * \
34 + cp.random.randn(filter_num[1], 1, filter_size, filter_size), dtype=np.float32)
35 +
36 + self.params['W4'] = cp.array( weight_init_std * \
37 + cp.random.randn(filter_num[2], filter_num[1], 1, 1), dtype=np.float32)
38 +
39 + self.params['W5'] = cp.array( weight_init_std * \
40 + cp.random.randn(filter_num[2], 1, filter_size, filter_size), dtype=np.float32)
41 +
42 + self.params['W6'] = cp.array( weight_init_std * \
43 + cp.random.randn(pool3_output_size, hidden_size), dtype=np.float32)
44 +
45 + self.params['W7'] = cp.array( weight_init_std * \
46 + cp.random.randn(hidden_size, output_size), dtype=np.float32)
47 +
48 + self.layers = OrderedDict()
49 + self.layers['Conv1'] = Convolution(self.params['W1'],
50 + conv_param['stride'], conv_param['pad'])
51 + self.layers['LightNorm1'] = LightNormalization()
52 + self.layers['Relu1'] = Relu()
53 + self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
54 +
55 + self.layers['Conv2'] = Convolution(self.params['W2'],
56 + 1, 0)
57 + self.layers['LightNorm2'] = LightNormalization()
58 + self.layers['Relu2'] = Relu()
59 + self.layers['Conv3'] = DW_Convolution(self.params['W3'],
60 + conv_param['stride'], conv_param['pad'])
61 + self.layers['LightNorm3'] = LightNormalization()
62 + self.layers['Relu3'] = Relu()
63 + self.layers['Pool2'] = Pooling(pool_h=2, pool_w=2, stride=2)
64 +
65 + self.layers['Conv4'] = Convolution(self.params['W4'],
66 + 1, 0)
67 + self.layers['LightNorm4'] = LightNormalization()
68 + self.layers['Relu4'] = Relu()
69 + self.layers['Conv5'] = DW_Convolution(self.params['W5'],
70 + conv_param['stride'], conv_param['pad'])
71 + self.layers['LightNorm5'] = LightNormalization()
72 + self.layers['Relu5'] = Relu()
73 + self.layers['Pool3'] = Pooling(pool_h=2, pool_w=2, stride=2)
74 +
75 + self.layers['Affine4'] = Affine(self.params['W6'])
76 + self.layers['LightNorm6'] = LightNormalization()
77 + self.layers['Relu6'] = Relu()
78 +
79 + self.layers['Affine5'] = Affine(self.params['W7'])
80 +
81 + self.last_layer = SoftmaxWithLoss()
82 +
83 + def predict(self, x):
84 + for layer in self.layers.values():
85 + x = layer.forward(x)
86 +
87 + return x
88 +
89 + def loss(self, x, t):
90 + y = self.predict(x)
91 + return self.last_layer.forward(y, t)
92 +
93 + def accuracy(self, x, t, batch_size=100):
94 + if t.ndim != 1 : t = np.argmax(t, axis=1)
95 +
96 + acc = 0.0
97 +
98 + for i in range(int(x.shape[0] / batch_size)):
99 + tx = x[i*batch_size:(i+1)*batch_size]
100 + tt = t[i*batch_size:(i+1)*batch_size]
101 + y = self.predict(tx)
102 + y = np.argmax(y, axis=1)
103 + acc += np.sum(y == tt) #numpy
104 +
105 + return acc / x.shape[0]
106 +
107 + def gradient(self, x, t):
108 +
109 + self.loss(x, t)
110 +
111 + dout = 1
112 + dout = self.last_layer.backward(dout)
113 +
114 + layers = list(self.layers.values())
115 + layers.reverse()
116 + for layer in layers:
117 + dout = layer.backward(dout)
118 +
119 + grads = {}
120 + grads['W1'] = self.layers['Conv1'].dW
121 + grads['W2'] = self.layers['Conv2'].dW
122 + grads['W3'] = self.layers['Conv3'].dW
123 + grads['W4'] = self.layers['Conv4'].dW
124 + grads['W5'] = self.layers['Conv5'].dW
125 + grads['W6'] = self.layers['Affine4'].dW
126 + grads['W7'] = self.layers['Affine5'].dW
127 + return grads
128 +
129 + def save_params(self, file_name="params.pkl"):
130 + params = {}
131 + for key, val in self.params.items():
132 + params[key] = val
133 + with open(file_name, 'wb') as f:
134 + pickle.dump(params, f)
135 +
1 +# coding: utf-8
2 +import sys, os
3 +sys.path.append(os.pardir)
4 +import time
5 +import numpy as np
6 +from dataset.cifar10 import load_cifar10
7 +from simple_convnet4 import SimpleConvNet
8 +from trainer import Trainer
9 +
10 +(x_train, t_train), (x_test, t_test) = load_cifar10(flatten=False)
11 +
12 +test_mask = np.random.choice(x_test.shape[0], 1000)
13 +x_test = x_test[test_mask]
14 +t_test = t_test[test_mask]
15 +
16 +max_epochs = 30
17 +
18 +network = SimpleConvNet(input_dim=(3,32,32),
19 + conv_param = {'filter_num': (32, 32, 64), 'filter_size': 3, 'pad': 1, 'stride': 1},
20 + hidden_size=512, output_size=10, weight_init_std=0.01)
21 +
22 +trainer = Trainer(network, x_train, t_train, x_test, t_test,
23 + epochs=max_epochs, mini_batch_size=100,
24 + optimizer='Adam', optimizer_param={'lr': 0.001},
25 + evaluate_sample_num_per_epoch=1000, early_stopping=5)
26 +start = time.time()
27 +trainer.train()
28 +elapsed_time = time.time() - start
29 +print ("elapsed_time:{0}".format(elapsed_time) + "[sec]")
30 +
31 +network.save_params("params.pkl")
32 +print("Saved Network Parameters!")
33 +
34 +markers = {'train': 'o', 'test': 's'}
35 +x = np.arange(trainer.current_epoch)
36 +plt.plot(x, trainer.train_acc_list, marker='o', label='train', markevery=2)
37 +plt.plot(x, trainer.test_acc_list, marker='s', label='test', markevery=2)
38 +plt.xlabel("epochs")
39 +plt.ylabel("accuracy")
40 +plt.ylim(0, 1.0)
41 +plt.legend(loc='lower right')
42 +plt.show()
1 +# coding: utf-8
2 +import sys, os
3 +sys.path.append(os.pardir) # 親ディレクトリのファイルをインポートするための設定
4 +import numpy as np
5 +from optimizer import *
6 +
7 +class Trainer:
8 + """ニューラルネットの訓練を行うクラス
9 + """
10 + def __init__(self, network, x_train, t_train, x_test, t_test,
11 + epochs=20, mini_batch_size=100,
12 + optimizer='SGD', optimizer_param={'lr':0.01},
13 + evaluate_sample_num_per_epoch=None, early_stopping=5, verbose=True):
14 + self.network = network
15 + self.verbose = verbose
16 + self.x_train = x_train
17 + self.t_train = t_train
18 + self.x_test = x_test
19 + self.t_test = t_test
20 + self.epochs = epochs
21 + self.batch_size = mini_batch_size
22 + self.evaluate_sample_num_per_epoch = evaluate_sample_num_per_epoch
23 +
24 + # optimzer
25 + optimizer_class_dict = {'sgd':SGD, 'momentum':Momentum, 'nesterov':Nesterov,
26 + 'adagrad':AdaGrad, 'rmsprpo':RMSprop, 'adam':Adam}
27 + self.optimizer = optimizer_class_dict[optimizer.lower()](**optimizer_param)
28 + self.early_stopping = EarlyStopping(patience=early_stopping, verbose=self.verbose)
29 +
30 + self.train_size = x_train.shape[0]
31 + self.iter_per_epoch = max(self.train_size / mini_batch_size, 1)
32 + self.max_iter = int(epochs * self.iter_per_epoch)
33 + self.current_iter = 0
34 + self.current_epoch = 0
35 +
36 + self.train_loss_list = []
37 + self.train_acc_list = []
38 + self.test_acc_list = []
39 +
40 + def train_step(self):
41 + early_stopping = False
42 + batch_mask = np.random.choice(self.train_size, self.batch_size)
43 + x_batch = self.x_train[batch_mask]
44 + t_batch = self.t_train[batch_mask]
45 +
46 + grads = self.network.gradient(x_batch, t_batch)
47 + self.optimizer.update(self.network.params, grads)
48 +
49 + loss = self.network.loss(x_batch, t_batch)
50 + self.train_loss_list.append(loss)
51 + if self.verbose: print(str(self.current_epoch) + " : " + str(int(self.current_iter % self.iter_per_epoch)) + " : train loss:" + str(loss))
52 +
53 + if self.current_iter % self.iter_per_epoch == 0:
54 + self.current_epoch += 1
55 +
56 + x_train_sample, t_train_sample = self.x_train, self.t_train
57 + x_test_sample, t_test_sample = self.x_test, self.t_test
58 + if not self.evaluate_sample_num_per_epoch is None:
59 + t = self.evaluate_sample_num_per_epoch
60 + x_train_sample, t_train_sample = self.x_train[:t], self.t_train[:t]
61 + x_test_sample, t_test_sample = self.x_test[:t], self.t_test[:t]
62 +
63 + train_acc = self.network.accuracy(x_train_sample, t_train_sample)
64 + test_acc = self.network.accuracy(x_test_sample, t_test_sample)
65 + self.train_acc_list.append(train_acc)
66 + self.test_acc_list.append(test_acc)
67 + early_stopping = self.early_stopping.validate(test_acc)
68 +
69 + if self.verbose: print("=== epoch:" + str(self.current_epoch) + ", train acc:" + str(train_acc) + ", test acc:" + str(test_acc) + " ===")
70 + self.current_iter += 1
71 + return early_stopping
72 +
73 + def train(self):
74 + for i in range(self.max_iter):
75 + if self.train_step():
76 + break
77 +
78 + test_acc = self.network.accuracy(self.x_test, self.t_test)
79 +
80 + if self.verbose:
81 + print("=============== Final Test Accuracy ===============")
82 + print("test acc:" + str(test_acc))
83 +
1 +# coding: utf-8
2 +#import cupy as cp
3 +import numpy as cp
4 +import numpy as np
5 +
6 +def DW_im2col(input_data, filter_h, filter_w, stride=1, pad=0):
7 + """다수의 이미지를 입력받아 2차원 배열로 변환한다(평탄화).
8 +
9 + Parameters
10 + ----------
11 + input_data : 4차원 배열 형태의 입력 데이터(이미지 수, 채널 수, 높이, 너비)
12 + filter_h : 필터의 높이
13 + filter_w : 필터의 너비
14 + stride : 스트라이드
15 + pad : 패딩
16 +
17 + Returns
18 + -------
19 + col : 2차원 배열
20 + """
21 + N, C, H, W = input_data.shape
22 + out_h = (H + 2 * pad - filter_h) // stride + 1
23 + out_w = (W + 2 * pad - filter_w) // stride + 1
24 +
25 + img = np.pad(input_data, [(0, 0), (0, 0), (pad, pad), (pad, pad)], 'constant')
26 + col = np.zeros((N, C, filter_h, filter_w, out_h, out_w))
27 +
28 + for y in range(filter_h):
29 + y_max = y + stride * out_h
30 + for x in range(filter_w):
31 + x_max = x + stride * out_w
32 + col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride]
33 +
34 + col = col.transpose(1, 0, 4, 5, 2, 3).reshape(C, N * out_h * out_w, -1)
35 + return col
36 +
37 +def smooth_curve(x):
38 + """損失関数のグラフを滑らかにするために用いる
39 +
40 + 参考:http://glowingpython.blogspot.jp/2012/02/convolution-with-numpy.html
41 + """
42 + window_len = 11
43 + s = np.r_[x[window_len-1:0:-1], x, x[-1:-window_len:-1]]
44 + w = np.kaiser(window_len, 2)
45 + y = np.convolve(w/w.sum(), s, mode='valid')
46 + return y[5:len(y)-5]
47 +
48 +
49 +def shuffle_dataset(x, t):
50 + """データセットのシャッフルを行う
51 +
52 + Parameters
53 + ----------
54 + x : 訓練データ
55 + t : 教師データ
56 +
57 + Returns
58 + -------
59 + x, t : シャッフルを行った訓練データと教師データ
60 + """
61 + permutation = np.random.permutation(x.shape[0])
62 + x = x[permutation,:] if x.ndim == 2 else x[permutation,:,:,:]
63 + t = t[permutation]
64 +
65 + return x, t
66 +
67 +def conv_output_size(input_size, filter_size, stride=1, pad=0):
68 + return (input_size + 2*pad - filter_size) / stride + 1
69 +
70 +
71 +def im2col(input_data, filter_h, filter_w, stride=1, pad=0):
72 + """
73 +
74 + Parameters
75 + ----------
76 + input_data : (データ数, チャンネル, 高さ, 幅)の4次元配列からなる入力データ
77 + filter_h : フィルターの高さ
78 + filter_w : フィルターの幅
79 + stride : ストライド
80 + pad : パディング
81 +
82 + Returns
83 + -------
84 + col : 2次元配列
85 + """
86 + N, C, H, W = input_data.shape
87 + out_h = (H + 2*pad - filter_h)//stride + 1
88 + out_w = (W + 2*pad - filter_w)//stride + 1
89 +
90 + img = cp.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
91 + col = cp.zeros((N, C, filter_h, filter_w, out_h, out_w), dtype=np.float32)
92 +
93 + for y in range(filter_h):
94 + y_max = y + stride*out_h
95 + for x in range(filter_w):
96 + x_max = x + stride*out_w
97 + col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride]
98 +
99 + col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1)
100 + return col
101 +
102 +
103 +def col2im(col, input_shape, filter_h, filter_w, stride=1, pad=0):
104 + """
105 +
106 + Parameters
107 + ----------
108 + col :
109 + input_shape : 入力データの形状(例:(10, 1, 28, 28))
110 + filter_h :
111 + filter_w
112 + stride
113 + pad
114 +
115 + Returns
116 + -------
117 +
118 + """
119 + N, C, H, W = input_shape
120 + out_h = (H + 2*pad - filter_h)//stride + 1
121 + out_w = (W + 2*pad - filter_w)//stride + 1
122 + col = col.reshape(N, out_h, out_w, C, filter_h, filter_w).transpose(0, 3, 4, 5, 1, 2)
123 +
124 + img = cp.zeros((N, C, H + 2*pad + stride - 1, W + 2*pad + stride - 1), dtype=np.float32)
125 + for y in range(filter_h):
126 + y_max = y + stride*out_h
127 + for x in range(filter_w):
128 + x_max = x + stride*out_w
129 + img[:, :, y:y_max:stride, x:x_max:stride] += col[:, :, y, x, :, :]
130 +
131 + return img[:, :, pad:H + pad, pad:W + pad]