이혜리

데이터 전처리

1 +import os
2 +import numpy as np
3 +import random
4 +from imageio import imread
5 +from skimage.transform import resize
6 +import hickle as hkl
7 +from setting import *
8 +
9 +#tr : val = 9:1
10 +desired_im_sz = (128, 160) #높이,너비
11 +#categories = ['walk', 'run', 'hug', 'crossarms', 'jump', 'clap', 'etc', 'beverage', 'phone', 'calling']
12 +num_pic = 12 #각 sequence마다 받아올 프레임 개수. (개수가 일정하기 않기 때문)
13 +
14 +
15 +# Create image datasets.
16 +def process_data():
17 + base_dir = os.path.join(DATA_DIR, 'action_data/')
18 + temp_list = []
19 + source_temp = [] # corresponds to recording that image came from
20 + im_list = []
21 + source_list = []
22 + validation = []
23 + val_source = []
24 + val_idx =[]
25 + num_data = 0 # 비디오 개수
26 +
27 + for top, dir, f in os.walk(base_dir):
28 + if(len(f) > 0 and len(f) >= num_pic+1):
29 + f.sort() #오마이갓 이걸 해줘야해,,,,,
30 + temp_list += [top+'/'+ f[idx] for idx in range(1,13)]
31 + start = top.rfind('/')
32 + source_temp += [top[start+1:]] * num_pic
33 + num_data += 1
34 +
35 +
36 + # 파일 2000개만
37 + for i in range(1900):
38 + t = random.randrange(num_data)
39 + im_list += temp_list[t:t+num_pic]
40 + source_list += [source_temp[t]] * num_pic
41 + del temp_list[t:t+num_pic]
42 + del source_temp[t:t+num_pic]
43 +
44 + for i in range(100):
45 + t = random.randrange(num_data)
46 + validation += temp_list[t:t+num_pic]
47 + val_source += [source_temp[t]] * num_pic
48 + del temp_list[t:t+num_pic]
49 + del source_temp[t:t+num_pic]
50 +
51 + # print(len(im_list), ", ", len(validation))
52 +
53 + X_t = np.zeros((len(im_list),) + desired_im_sz + (3,))
54 + X_v = np.zeros((len(validation),) + desired_im_sz + (3,))
55 + for i, im_file in enumerate(im_list):
56 + im = imread(im_file)
57 + X_t[i] = resize(im, (desired_im_sz[0], desired_im_sz[1]))
58 +
59 + for i, im_file in enumerate(validation):
60 + im = imread(im_file)
61 + X_v[i] = resize(im, (desired_im_sz[0], desired_im_sz[1]))
62 +
63 + # print(X_t.shape, ", ", X_v.shape)
64 + # print(X_t[0], end ='\n\n')
65 + # print(X_v[0])
66 +
67 +
68 + hkl.dump(X_t, os.path.join(DATA_DIR, 'X_train.hkl'))
69 + hkl.dump(source_list, os.path.join(DATA_DIR, 'sources_train.hkl'))
70 + hkl.dump(X_v, os.path.join(DATA_DIR, 'X_val.hkl'))
71 + hkl.dump(val_source, os.path.join(DATA_DIR, 'sources_val.hkl'))
72 +
73 +
74 +if __name__ == '__main__':
75 + process_data()
...\ No newline at end of file ...\ No newline at end of file