Showing
9 changed files
with
594 additions
and
0 deletions
소스코드/MobileNet/attention_module.py
0 → 100644
1 | +""" | ||
2 | + 기존의 Self Attention을 경량화한 CONVOLUTION BLOCK ATTENTION MODULE | ||
3 | + @FUNCTION se_block : Squeeze and Excitation Block | ||
4 | + @FUNCTION cbam_block : Convolution Block Attetntion Module | ||
5 | + @FUNCTION channel_attention : Channel Attention | ||
6 | + @FUNCITON Spatial_attention : Spation_attention | ||
7 | +""" | ||
8 | +import numpy as np | ||
9 | +from tensorflow.keras.layers import Conv2D | ||
10 | +from tensorflow.keras.layers import GlobalAvgPool2D, GlobalMaxPool2D | ||
11 | +from tensorflow.keras.layers import Reshape, Dense, Permute, Lambda | ||
12 | +from tensorflow.keras.layers import Add, Activation | ||
13 | +from tensorflow.keras import backend as K | ||
14 | +from keras.activations import sigmoid | ||
15 | +from tensorflow.keras import layers | ||
16 | + | ||
17 | +""" | ||
18 | + Squeeze-and-Excitation(SE) Block | ||
19 | + @brief : 채널간의 관계를 재종정 시켜줌 | ||
20 | + @param input_feature : tensor | ||
21 | +""" | ||
22 | +def se_block(input_feature, ratio=8): | ||
23 | + | ||
24 | + se_feature = GlobalAvgPool2D()(input_feature) | ||
25 | + channel = input_feature._shape[-1] | ||
26 | + | ||
27 | + se_feature = Reshape((1, 1, channel))(se_feature) | ||
28 | + se_feature = Dense(channel // ratio, | ||
29 | + activation='relu', | ||
30 | + kernel_initializer='he_normal', | ||
31 | + use_bias=True, | ||
32 | + bias_initializer='zeros')(se_feature) | ||
33 | + | ||
34 | + se_feature = Dense(channel, | ||
35 | + activation='sigmoid', | ||
36 | + kernel_initializer='he_normal', | ||
37 | + use_bias=True, | ||
38 | + bias_initializer='zeros')(se_feature) | ||
39 | + | ||
40 | + se_feature = layers.multiply([input_feature, se_feature]) | ||
41 | + | ||
42 | + return se_feature | ||
43 | + | ||
44 | +""" | ||
45 | + CBAM_BLOCK | ||
46 | + @brief : Convolution Block Attention Module | ||
47 | + @param cbam_feature : input tensor | ||
48 | + @param ratio(int) : channel reduce ratio | ||
49 | + @return cbam_feature : dynamic feature selection | ||
50 | +""" | ||
51 | +def cbam_block(cbam_feature, ratio=8): | ||
52 | + | ||
53 | + cbam_feature = channel_attention(cbam_feature, ratio) | ||
54 | + cbam_feature = spatial_attention(cbam_feature) | ||
55 | + | ||
56 | + return cbam_feature | ||
57 | + | ||
58 | +""" | ||
59 | + Channel Attention | ||
60 | + @brief : Channel Attention, average pool과 max pool을 사용(파라미터 양을 줄일 수 있음) | ||
61 | + 두 가지 pooled feature는 같은 의미를 공유하는 값이기 때문에 하나의 공유된 MLP를 사용 | ||
62 | + @param input_feature = input_tensor | ||
63 | + @return cbam_feature | ||
64 | +""" | ||
65 | +def channel_attention(input_feature, ratio=8): | ||
66 | + | ||
67 | + # 채널을 먼저 적용 | ||
68 | + channel = input_feature._shape[-1] | ||
69 | + | ||
70 | + shared_layer_one = Dense(channel//ratio, | ||
71 | + activation='relu', | ||
72 | + kernel_initializer='he_normal', | ||
73 | + use_bias=True, | ||
74 | + bias_initializer='zeros') | ||
75 | + | ||
76 | + shared_layer_two = Dense(channel, | ||
77 | + kernel_initializer='he_normal', | ||
78 | + use_bias=True, | ||
79 | + bias_initializer='zeros') | ||
80 | + | ||
81 | + # average pool과 max pool 두 가지를 결합하여 사용 | ||
82 | + avg_pool = GlobalAvgPool2D()(input_feature) | ||
83 | + avg_pool = Reshape((1, 1, channel))(avg_pool) | ||
84 | + avg_pool = shared_layer_one(avg_pool) | ||
85 | + avg_pool = shared_layer_two(avg_pool) | ||
86 | + | ||
87 | + max_pool = GlobalMaxPool2D()(input_feature) | ||
88 | + max_pool = Reshape((1, 1, channel))(max_pool) | ||
89 | + max_pool = shared_layer_one(max_pool) | ||
90 | + max_pool = shared_layer_two(max_pool) | ||
91 | + | ||
92 | + cbam_feature = Add()([avg_pool, max_pool]) | ||
93 | + # 가장 중요한 feature를 찾는 것이 목적이 아니기 때문에 mutually exclusive한 | ||
94 | + # softmax 대신 sigmoid를 사용 | ||
95 | + cbam_feature = Activation('sigmoid')(cbam_feature) | ||
96 | + cbam_feature = layers.multiply([avg_pool, max_pool]) | ||
97 | + return cbam_feature | ||
98 | + | ||
99 | +""" | ||
100 | + Spatial Attention | ||
101 | + @brief : 2차원의 spatial attention, single convolution을 사용하여 특징이 보이는 | ||
102 | + channel을 만듬, 정보가 어디에 있는지 중점을 둠 | ||
103 | + @param ipnut_feature : input_tensor(Channel-refined feature) | ||
104 | +""" | ||
105 | +def spatial_attention(input_feature, kernel_size=7): | ||
106 | + | ||
107 | + cbam_feature = input_feature | ||
108 | + | ||
109 | + avg_pool = Lambda(lambda x : K.mean(x, axis=3, keepdims=True))(cbam_feature) | ||
110 | + max_pool = Lambda(lambda x : K.max(x, axis=3, keepdims=True))(cbam_feature) | ||
111 | + concat = layers.concatenate([avg_pool, max_pool]) | ||
112 | + cbam_feature = Conv2D(filters=1, | ||
113 | + kernel_size=kernel_size, | ||
114 | + strides=1, | ||
115 | + padding='same', | ||
116 | + activation='sigmoid', | ||
117 | + kernel_initializer='he_normal', | ||
118 | + use_bias=False)(concat) | ||
119 | + | ||
120 | + cbam_feature = Conv2D(filters=1, | ||
121 | + kernel_size=kernel_size, | ||
122 | + strides=1, | ||
123 | + padding='same', | ||
124 | + activation='sigmoid', | ||
125 | + kernel_initializer='he_normal')(concat) | ||
126 | + | ||
127 | + return layers.multiply([input_feature, cbam_feature]) | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
소스코드/MobileNet/mobile_net.py
0 → 100644
1 | +""" | ||
2 | + ATTENTION을 적용한 MOBILE NET | ||
3 | + @FUNCTION load_data : pickle 데이터를 로딩하는 함수 | ||
4 | + @FUNCTION Mobile_net : 모바일 넷 모델 함수 | ||
5 | + @FUNCITON predict : 모델 예측하는 함수 | ||
6 | +""" | ||
7 | +import pickle | ||
8 | +import numpy as np | ||
9 | +from sklearn.model_selection import train_test_split | ||
10 | +import matplotlib.pyplot as plt | ||
11 | +import keras | ||
12 | +import tensorflow as tf | ||
13 | +import tensorflow.keras as keras | ||
14 | +import tensorflow.keras.backend as K | ||
15 | +from tensorflow.keras.models import Model | ||
16 | +from tensorflow.keras.layers import Input, Dense, Conv2D, Conv3D, DepthwiseConv2D, SeparableConv2D, Conv3DTranspose | ||
17 | +from tensorflow.keras.layers import Flatten, MaxPool2D, AvgPool2D, GlobalAvgPool2D, UpSampling2D, BatchNormalization | ||
18 | +from tensorflow.keras.layers import Concatenate, Add, Dropout, ReLU, Lambda, Activation, LeakyReLU, PReLU | ||
19 | + | ||
20 | +from attention_module import cbam_block | ||
21 | + | ||
22 | +""" | ||
23 | + 데이터 로드 | ||
24 | + @brief : load Inputs and Targets from pickle data | ||
25 | + @param data_path(str) : path to pickle file containing data | ||
26 | + @return X(ndarray) : Inputs | ||
27 | + @return y(ndarray) : Targets | ||
28 | +""" | ||
29 | +def load_data(): | ||
30 | + X = pickle.load(open("X_9.pickle", "rb")) | ||
31 | + y = pickle.load(open("y_9.pickle", "rb")) | ||
32 | + | ||
33 | + X = X/225.0 | ||
34 | + | ||
35 | + return X, y | ||
36 | + | ||
37 | +""" | ||
38 | + mobile net 구현 | ||
39 | + @brief : Mobile net with Convolution Block Attention Module(CBAM) | ||
40 | + I used cbam at last of convolution when I used it at every conv block, | ||
41 | + the result was worse. | ||
42 | + @return model : Mobile Net Model | ||
43 | +""" | ||
44 | +def mobile_net(input_shape): | ||
45 | + | ||
46 | + def mobile_net_block(x, f, s=1): | ||
47 | + x = DepthwiseConv2D(3, strides=s, padding='same')(x) | ||
48 | + x = BatchNormalization()(x) | ||
49 | + x = ReLU()(x) | ||
50 | + | ||
51 | + | ||
52 | + x = Conv2D(f, 1, strides=1, padding='same')(x) | ||
53 | + x = BatchNormalization()(x) | ||
54 | + x = ReLU()(x) | ||
55 | + | ||
56 | + | ||
57 | + return x | ||
58 | + | ||
59 | + input = Input(input_shape) | ||
60 | + | ||
61 | + x = Conv2D(32, 3, strides=2, padding='same')(input) | ||
62 | + x = BatchNormalization()(x) | ||
63 | + x = ReLU()(x) | ||
64 | + | ||
65 | + x = mobile_net_block(x, 64) | ||
66 | + x = mobile_net_block(x, 128, 2) | ||
67 | + x = mobile_net_block(x, 128) | ||
68 | + | ||
69 | + x = mobile_net_block(x, 256, 2) | ||
70 | + x = mobile_net_block(x, 256) | ||
71 | + | ||
72 | + x = mobile_net_block(x, 512, 2) | ||
73 | + for _ in range(5): | ||
74 | + x = mobile_net_block(x, 512) | ||
75 | + | ||
76 | + x = mobile_net_block(x, 1024, 2) | ||
77 | + x = mobile_net_block(x, 1024) | ||
78 | + x = cbam_block(x) | ||
79 | + | ||
80 | + x = GlobalAvgPool2D()(x) | ||
81 | + | ||
82 | + | ||
83 | + output = Dense(4, activation='softmax')(x) | ||
84 | + | ||
85 | + model = Model(input, output) | ||
86 | + return model | ||
87 | + | ||
88 | +""" | ||
89 | + 학습된 모델로 예측 | ||
90 | + @brief : predict data from trained mobile net model | ||
91 | + @param model : Trained classifier | ||
92 | + @param X : Input data | ||
93 | + @param y(int): Target | ||
94 | +""" | ||
95 | +def predict(model, X, y): | ||
96 | + # sample의 입력 데이터에 차원 추가 | ||
97 | + X = X[np.newaxis, ...] | ||
98 | + | ||
99 | + prediction = model.predict(X) | ||
100 | + | ||
101 | + # argmax를 사용해서 index의 최대 값을 얻음 | ||
102 | + predicted_index = np.argmax(prediction, axis=1) | ||
103 | + | ||
104 | + print("Target: {}, Predicted label: {}".format(y, predicted_index)) | ||
105 | + | ||
106 | + | ||
107 | +# 메인 함수 | ||
108 | +if __name__ == "__main__": | ||
109 | + | ||
110 | + # load data and split to X_train and y_train | ||
111 | + X_train, y_train = load_data() | ||
112 | + | ||
113 | + # create network | ||
114 | + K.clear_session() | ||
115 | + input_shape = (X_train.shape[1], X_train.shape[2], 3) | ||
116 | + model= mobile_net(input_shape) | ||
117 | + | ||
118 | + # compile model | ||
119 | + optimiser = keras.optimizers.Adam(learning_rate=0.0001) | ||
120 | + model.compile(optimizer=optimiser, | ||
121 | + loss='sparse_categorical_crossentropy', | ||
122 | + metrics=['accuracy']) | ||
123 | + | ||
124 | + model.summary() | ||
125 | + | ||
126 | + # train model | ||
127 | + hist = model.fit(X_train, y_train, validation_split=0.1, batch_size=32, epochs=30) | ||
128 | + | ||
129 | + fig, loss_ax = plt.subplots() | ||
130 | + | ||
131 | + acc_ax = loss_ax.twinx() | ||
132 | + | ||
133 | + loss_ax.plot(hist.history['loss'], 'y', label='train loss') | ||
134 | + loss_ax.plot(hist.history['val_loss'], label='validation loss') | ||
135 | + | ||
136 | + acc_ax.plot(hist.history['acc'], 'b', label='train_acc') | ||
137 | + acc_ax.plot(hist.history['val_acc'], 'g', label='validation_acc') | ||
138 | + | ||
139 | + loss_ax.set_xlabel('epoch') | ||
140 | + loss_ax.set_ylabel('loss') | ||
141 | + acc_ax.set_ylabel('accuracy') | ||
142 | + | ||
143 | + loss_ax.legend(loc='upper left') | ||
144 | + loss_ax.legend(loc='lower left') | ||
145 | + | ||
146 | + # evalute model | ||
147 | + test_loss, test_acc = model.evaluate(X_train, y_train, verbose=2) | ||
148 | + print('\nTest accuracy:', test_acc) | ||
149 | + | ||
150 | + X_to_predict = X_train[100] | ||
151 | + y_to_predict = y_train[100] | ||
152 | + | ||
153 | + # predict sample | ||
154 | + predict(model, X_to_predict, y_to_predict) | ||
155 | + | ||
156 | + model.save('C:/Users/nokh9/Desktop/mobile_net2.h5') | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
소스코드/Preprocess/Data_Augumentation.py
0 → 100644
1 | +""" | ||
2 | + 부족한 데이터를 증강 | ||
3 | + frequncy를 나타내는 mel-spectrogram에서 데이터 증강(뒤틀림, 뒤집기)을 사용하면 | ||
4 | + 학습이 더 안될 것 같아서 실제로 사용은 안 함 | ||
5 | + @FUNCTION data_augumentation : 데이터를 증강하는 함수 | ||
6 | + @FUNCTION save_into_folder : 증강된 데이터들을 이름에 맞게 폴더별로 정리하는 함 | ||
7 | +""" | ||
8 | +import os | ||
9 | +from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img | ||
10 | + | ||
11 | + | ||
12 | +""" | ||
13 | + Data Augmentation | ||
14 | + @brief : 모델이 적은 이미지에서 최대한 많은 정보를 뽑아내서 학습할 수 있도록 | ||
15 | + 데이터를 증강시킴 | ||
16 | + @param dataset_path(str) : dog_sounds converted into mel_spectrogram | ||
17 | +""" | ||
18 | +def data_augumentation(dataset_path): | ||
19 | + datagen = ImageDataGenerator( | ||
20 | + rotation_range=40, # 이미지 회전 범위(degree) | ||
21 | + width_shift_range=0.2, # 이미지를수 수평 또는 수직으로 랜덤하게 평행 이동 | ||
22 | + height_shift_range=0.2, | ||
23 | + rescale=1./225, # 0-1범위로 변환 | ||
24 | + shear_range=0.2, # 임의 전단 변환(shearing transformation) 범위 | ||
25 | + zoom_range=0.2, # 임의 확대/축소 범위 | ||
26 | + horizontal_flip=False, # 소리에 대한 이미지이기 때문에 뒤집지 않음 | ||
27 | + fill_mode='nearest') # 이미지를 회전 이동하거나 축소할 때 생기는 공간을 채움 | ||
28 | + | ||
29 | + for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)): | ||
30 | + | ||
31 | + if dirpath is not dataset_path: | ||
32 | + | ||
33 | + for f in filenames: | ||
34 | + | ||
35 | + file_path = os.path.join(dirpath, f) | ||
36 | + img = load_img(file_path) # PIL 이미지 | ||
37 | + i = 0 | ||
38 | + x = img_to_array(img) # (x, x, 3) 크기의 Numpy 배열 | ||
39 | + x=x.reshape((1,)+x.shape) # (1, x, x, 3) 크기의 Numpy 배열 | ||
40 | + | ||
41 | + # 임의 변환된 이미지를 배치 단위로 생성해서 | ||
42 | + # 이미지 20장을 생성하고 마침 | ||
43 | + for batch in datagen.flow(x, batch_size=1, save_to_dir="C:/Users/nokh9/Desktop/dog_sound_train", save_prefix=f, save_format='.jpg'): | ||
44 | + print("{}, data_augument:{}".format(file_path, i)) | ||
45 | + i += 1 | ||
46 | + if i > 10: | ||
47 | + break | ||
48 | + | ||
49 | +""" | ||
50 | + split categories | ||
51 | + @brief 증강된 이미지를 카테고리 별로 나누어서 저장 | ||
52 | + @param dataset_path(str) : dog_sound argumented | ||
53 | +""" | ||
54 | +def save_into_folder(dataset_path): | ||
55 | + | ||
56 | + num = 1 | ||
57 | + | ||
58 | + for filename in os.listdir(dataset_path): | ||
59 | + find_category = filename.split('_')[0] | ||
60 | + des_path = os.path.join(dataset_path, find_category) | ||
61 | + if not os.path.exists(des_path): | ||
62 | + os.makedirs(des_path) | ||
63 | + num = 1 | ||
64 | + os.rename(dataset_path+filename, des_path + '/' + find_category + str(num) + '.jpg') | ||
65 | + num += 1 | ||
66 | + | ||
67 | +# 메인 함수 | ||
68 | +if __name__ == "__main__": | ||
69 | + data_augumentation("C:/Users/nokh9/Desktop/dog_sound_mel") | ||
70 | + save_into_folder("C:/Users/nokh9/Desktop/dog_sound_train/") | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
소스코드/Preprocess/preprocessing.py
0 → 100644
1 | +""" | ||
2 | + Mel-Sepctrogram 이미지를 학습에 쓰이기 전에 전처리 | ||
3 | + @FUNCTION create_training_data : 이미지를 전처리하는 함수 | ||
4 | +""" | ||
5 | +import os | ||
6 | +import random | ||
7 | +import cv2 | ||
8 | +import numpy as np | ||
9 | +import matplotlib.pyplot as plt | ||
10 | +import pickle | ||
11 | + | ||
12 | +dataset_path = "C:/Users/nokh9/Desktop/dog_sound_mel/" # 데이터가 있는 경로 | ||
13 | +CATEGORIES = ["angry", "happy", "lonely", "sad"] # 감정에 대한 카테고리 | ||
14 | + | ||
15 | +training_data = [] | ||
16 | + | ||
17 | +""" | ||
18 | + 이미지를 전처리 | ||
19 | + @brief : preprocessing image for training | ||
20 | +""" | ||
21 | +def create_training_data(): | ||
22 | + | ||
23 | + for category in CATEGORIES: | ||
24 | + path = os.path.join(dataset_path, category) | ||
25 | + class_num = CATEGORIES.index(category) | ||
26 | + print('-'*50) | ||
27 | + print(category + " is started...") | ||
28 | + print('-'*50) | ||
29 | + image_list = os.listdir(path) | ||
30 | + listdir_num = len(image_list) | ||
31 | + | ||
32 | + # 데이터가 있는 폴더에서 모든 이미지들을 전처리함 | ||
33 | + for number in range(listdir_num): | ||
34 | + try: | ||
35 | + image_path = path + '/' + category + '_' + str(number+1) + '.jpg' | ||
36 | + image_array = cv2.imread(image_path, cv2.IMREAD_COLOR) | ||
37 | + new_array = cv2.resize(image_array, (62, 78)) | ||
38 | + training_data.append([new_array, class_num]) | ||
39 | + if((number+1)%100 == 0): | ||
40 | + print(str(number+1) + 'is finished') | ||
41 | + except Exception as e: | ||
42 | + pass | ||
43 | + | ||
44 | +# 메인 함수 | ||
45 | +if __name__ == "__main__": | ||
46 | + create_training_data() | ||
47 | + print(len(training_data)) | ||
48 | + | ||
49 | + # 데이터를 training에 쓰일 데이터와 validation에 쓰일 데이터를 나눔 | ||
50 | + random.shuffle(training_data) | ||
51 | + | ||
52 | + X = [] | ||
53 | + y = [] | ||
54 | + | ||
55 | + for features, label in training_data: | ||
56 | + X.append(features) | ||
57 | + y.append(label) | ||
58 | + | ||
59 | + X = np.array(X).reshape(-1, 62, 78, 3) | ||
60 | + | ||
61 | + # pickle 데이터로 x와 y를 따로 저장해줌 | ||
62 | + pickle_out = open("X_9.pickle", "wb") | ||
63 | + pickle.dump(X, pickle_out) | ||
64 | + pickle_out.close() | ||
65 | + | ||
66 | + pickle_out = open("y_9.pickle", "wb") | ||
67 | + pickle.dump(y, pickle_out) | ||
68 | + pickle_out.close() | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
소스코드/Preprocess/raw2spectro.py
0 → 100644
1 | +""" | ||
2 | + 오디오 파일(.wav)을 다루는 예시 코드 | ||
3 | + librosa 라이브러리가 알아서 특징을 잡아주고 plot해주기 때문에 | ||
4 | + 소리를 데이터화 할 때 편함 | ||
5 | +""" | ||
6 | +import numpy as np | ||
7 | +import librosa, librosa.display | ||
8 | +import matplotlib.pyplot as plt | ||
9 | + | ||
10 | + | ||
11 | +FIG_SIZE = (15,10) | ||
12 | + | ||
13 | +file = "C:/Users/nokh9/Desktop/dog_sound/barking/barking_1.wav" | ||
14 | + | ||
15 | +# | ||
16 | +signal, sample_rate = librosa.load(file, sr=22500) | ||
17 | + | ||
18 | +# WAVEFORM | ||
19 | +# display waveform | ||
20 | +plt.figure(figsize=FIG_SIZE) | ||
21 | +librosa.display.waveplot(signal, sample_rate, alpha=0.4) | ||
22 | +plt.xlabel("Time (s)") | ||
23 | +plt.ylabel("Amplitude") | ||
24 | +plt.title("Waveform") | ||
25 | + | ||
26 | + | ||
27 | +# FFT -> power spectrum (파워 스펙트럼) | ||
28 | +# 퓨리에 변환 | ||
29 | +fft = np.fft.fft(signal) | ||
30 | + | ||
31 | +# 복소수의 abs값을 게산하여 크기를 얻음 | ||
32 | +spectrum = np.abs(fft) | ||
33 | + | ||
34 | +# create frequency variable | ||
35 | +f = np.linspace(0, sample_rate, len(spectrum)) | ||
36 | + | ||
37 | +# take half of the spectrum and frequency | ||
38 | +left_spectrum = spectrum[:int(len(spectrum)/2)] | ||
39 | +left_f = f[:int(len(spectrum)/2)] | ||
40 | + | ||
41 | +# plot spectrum | ||
42 | +plt.figure(figsize=FIG_SIZE) | ||
43 | +plt.plot(left_f, left_spectrum, alpha=0.4) | ||
44 | +plt.xlabel("Frequency") | ||
45 | +plt.ylabel("Magnitude") | ||
46 | +plt.title("Power spectrum") | ||
47 | + | ||
48 | + | ||
49 | +# STFT -> spectrogram | ||
50 | +hop_length = 512 # in num. of samples | ||
51 | +n_fft = 2048 # window in num. of samples | ||
52 | + | ||
53 | + | ||
54 | +hop_length_duration = float(hop_length)/sample_rate | ||
55 | +n_fft_duration = float(n_fft)/sample_rate | ||
56 | + | ||
57 | +print("STFT hop length duration is: {}s".format(hop_length_duration)) | ||
58 | +print("STFT window duration is: {}s".format(n_fft_duration)) | ||
59 | + | ||
60 | +# perform stft | ||
61 | +stft = librosa.stft(signal, n_fft=n_fft, hop_length=hop_length) | ||
62 | + | ||
63 | + | ||
64 | +spectrogram = np.abs(stft) | ||
65 | + | ||
66 | +# 스펙트로그램을 보여줌 | ||
67 | +plt.figure(figsize=FIG_SIZE) | ||
68 | +librosa.display.specshow(spectrogram, sr=sample_rate, hop_length=hop_length) | ||
69 | +plt.xlabel("Time") # x축 : 시간 | ||
70 | +plt.ylabel("Frequency") # y축 : 주파수 | ||
71 | +plt.colorbar() # 스펙트로그램을 색으로 나타내어 시각적으로 확연하게 보일 수 있게함 | ||
72 | +plt.title("Spectrogram") | ||
73 | + | ||
74 | +# apply logarithm to cast amplitude to Decibels | ||
75 | +log_spectrogram = librosa.amplitude_to_db(spectrogram) | ||
76 | + | ||
77 | +plt.figure(figsize=FIG_SIZE) | ||
78 | +librosa.display.specshow(log_spectrogram, sr=sample_rate, hop_length=hop_length) | ||
79 | +plt.xlabel("Time") | ||
80 | +plt.ylabel("Frequency") | ||
81 | +plt.colorbar(format="%+2.0f dB") | ||
82 | +plt.title("Spectrogram (dB)") | ||
83 | + | ||
84 | + | ||
85 | +# MFCCs | ||
86 | +# extract 13 MFCCs | ||
87 | +MFCCs = librosa.feature.mfcc(signal, sample_rate, n_fft=n_fft, hop_length=hop_length, n_mfcc=13) | ||
88 | + | ||
89 | +# display MFCCs | ||
90 | +plt.figure(figsize=FIG_SIZE) | ||
91 | +librosa.display.specshow(MFCCs, sr=sample_rate, hop_length=hop_length) | ||
92 | +plt.xlabel("Time") | ||
93 | +plt.ylabel("MFCC coefficients") | ||
94 | +plt.colorbar() | ||
95 | +plt.title("MFCCs") | ||
96 | +# show plots | ||
97 | +plt.show() |
소스코드/X_9.pickle
0 → 100644
This file is too large to display.
소스코드/demo.py
0 → 100644
1 | +import cv2 | ||
2 | +import numpy as np | ||
3 | +import sys | ||
4 | +import os | ||
5 | +import pyaudio | ||
6 | +import librosa | ||
7 | +import librosa.display | ||
8 | +import matplotlib.pyplot as plt | ||
9 | +import time | ||
10 | +import tensorflow as tf | ||
11 | +import tensorflow.keras as keras | ||
12 | + | ||
13 | +rate = 22500 | ||
14 | +chunk_size = rate // 4 | ||
15 | + | ||
16 | +CATEGORIES = ["angry", "happy", "lonely", "sad"] | ||
17 | +model= keras.models.load_model("C:/Users/nokh9/Desktop/mobile_net2.h5") | ||
18 | + | ||
19 | +def prepare(mel): | ||
20 | + img_array = cv2.imread(mel) | ||
21 | + new_array = cv2.resize(img_array, (62, 78), 3) | ||
22 | + return new_array.reshape(-1, 62, 78, 3) | ||
23 | + | ||
24 | +p = pyaudio.PyAudio() | ||
25 | +stream = p.open(format=pyaudio.paFloat32, | ||
26 | + channels=1, | ||
27 | + rate=rate, | ||
28 | + input=True, | ||
29 | + input_device_index=1, | ||
30 | + frames_per_buffer=chunk_size) | ||
31 | + | ||
32 | + | ||
33 | +frames = [] | ||
34 | + | ||
35 | + | ||
36 | +do_melspec = librosa.feature.melspectrogram | ||
37 | +pwr_to_db = librosa.core.power_to_db | ||
38 | + | ||
39 | +""" | ||
40 | +while True: | ||
41 | + | ||
42 | + start = time.time() | ||
43 | + | ||
44 | + data = stream.read(chunk_size) | ||
45 | + data = np.fromstring(data, dtype=np.float32) | ||
46 | + | ||
47 | + melspec = do_melspec(y=data, sr=rate, n_mels=128, fmax=4000) | ||
48 | + norm_melspec = pwr_to_db(melspec, ref=np.max) | ||
49 | + | ||
50 | + frames.append(norm_melspec) | ||
51 | + | ||
52 | + if len(frames) == 20: | ||
53 | + | ||
54 | + | ||
55 | + stack = np.hstack(frames) | ||
56 | + | ||
57 | + plt.figure(figsize=(5, 4)) | ||
58 | + librosa.display.specshow(stack,fmax=4000) | ||
59 | + plt.savefig('C:/Users/nokh9/Desktop/DB/' + 'db.jpg') | ||
60 | + prediction = model.predict([prepare(r'C:/Users/nokh9/Desktop/DB/db.jpg')]) | ||
61 | + print(CATEGORIES[int(prediction[0][0])]) | ||
62 | + plt.draw() | ||
63 | + plt.pause(0.0001) | ||
64 | + plt.clf() | ||
65 | + #break | ||
66 | + frames.pop(0) | ||
67 | + | ||
68 | + | ||
69 | + | ||
70 | + t = time.time() - start | ||
71 | + | ||
72 | + print(1 / t) | ||
73 | + | ||
74 | +""" | ||
75 | +prediction = model.predict([prepare(r'C:/Users/nokh9/Desktop/DB/lonely_1.jpg')]) | ||
76 | +print(prediction) | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
소스코드/mobile_net2.h5
0 → 100644
This file is too large to display.
소스코드/y_9.pickle
0 → 100644
No preview for this file type
-
Please register or login to post a comment