2014104094

소스 코드 업로드

1 +"""
2 + 기존의 Self Attention을 경량화한 CONVOLUTION BLOCK ATTENTION MODULE
3 + @FUNCTION se_block : Squeeze and Excitation Block
4 + @FUNCTION cbam_block : Convolution Block Attetntion Module
5 + @FUNCTION channel_attention : Channel Attention
6 + @FUNCITON Spatial_attention : Spation_attention
7 +"""
8 +import numpy as np
9 +from tensorflow.keras.layers import Conv2D
10 +from tensorflow.keras.layers import GlobalAvgPool2D, GlobalMaxPool2D
11 +from tensorflow.keras.layers import Reshape, Dense, Permute, Lambda
12 +from tensorflow.keras.layers import Add, Activation
13 +from tensorflow.keras import backend as K
14 +from keras.activations import sigmoid
15 +from tensorflow.keras import layers
16 +
17 +"""
18 + Squeeze-and-Excitation(SE) Block
19 + @brief : 채널간의 관계를 재종정 시켜줌
20 + @param input_feature : tensor
21 +"""
22 +def se_block(input_feature, ratio=8):
23 +
24 + se_feature = GlobalAvgPool2D()(input_feature)
25 + channel = input_feature._shape[-1]
26 +
27 + se_feature = Reshape((1, 1, channel))(se_feature)
28 + se_feature = Dense(channel // ratio,
29 + activation='relu',
30 + kernel_initializer='he_normal',
31 + use_bias=True,
32 + bias_initializer='zeros')(se_feature)
33 +
34 + se_feature = Dense(channel,
35 + activation='sigmoid',
36 + kernel_initializer='he_normal',
37 + use_bias=True,
38 + bias_initializer='zeros')(se_feature)
39 +
40 + se_feature = layers.multiply([input_feature, se_feature])
41 +
42 + return se_feature
43 +
44 +"""
45 + CBAM_BLOCK
46 + @brief : Convolution Block Attention Module
47 + @param cbam_feature : input tensor
48 + @param ratio(int) : channel reduce ratio
49 + @return cbam_feature : dynamic feature selection
50 +"""
51 +def cbam_block(cbam_feature, ratio=8):
52 +
53 + cbam_feature = channel_attention(cbam_feature, ratio)
54 + cbam_feature = spatial_attention(cbam_feature)
55 +
56 + return cbam_feature
57 +
58 +"""
59 + Channel Attention
60 + @brief : Channel Attention, average pool과 max pool을 사용(파라미터 양을 줄일 수 있음)
61 + 두 가지 pooled feature는 같은 의미를 공유하는 값이기 때문에 하나의 공유된 MLP를 사용
62 + @param input_feature = input_tensor
63 + @return cbam_feature
64 +"""
65 +def channel_attention(input_feature, ratio=8):
66 +
67 + # 채널을 먼저 적용
68 + channel = input_feature._shape[-1]
69 +
70 + shared_layer_one = Dense(channel//ratio,
71 + activation='relu',
72 + kernel_initializer='he_normal',
73 + use_bias=True,
74 + bias_initializer='zeros')
75 +
76 + shared_layer_two = Dense(channel,
77 + kernel_initializer='he_normal',
78 + use_bias=True,
79 + bias_initializer='zeros')
80 +
81 + # average pool과 max pool 두 가지를 결합하여 사용
82 + avg_pool = GlobalAvgPool2D()(input_feature)
83 + avg_pool = Reshape((1, 1, channel))(avg_pool)
84 + avg_pool = shared_layer_one(avg_pool)
85 + avg_pool = shared_layer_two(avg_pool)
86 +
87 + max_pool = GlobalMaxPool2D()(input_feature)
88 + max_pool = Reshape((1, 1, channel))(max_pool)
89 + max_pool = shared_layer_one(max_pool)
90 + max_pool = shared_layer_two(max_pool)
91 +
92 + cbam_feature = Add()([avg_pool, max_pool])
93 + # 가장 중요한 feature를 찾는 것이 목적이 아니기 때문에 mutually exclusive한
94 + # softmax 대신 sigmoid를 사용
95 + cbam_feature = Activation('sigmoid')(cbam_feature)
96 + cbam_feature = layers.multiply([avg_pool, max_pool])
97 + return cbam_feature
98 +
99 +"""
100 + Spatial Attention
101 + @brief : 2차원의 spatial attention, single convolution을 사용하여 특징이 보이는
102 + channel을 만듬, 정보가 어디에 있는지 중점을 둠
103 + @param ipnut_feature : input_tensor(Channel-refined feature)
104 +"""
105 +def spatial_attention(input_feature, kernel_size=7):
106 +
107 + cbam_feature = input_feature
108 +
109 + avg_pool = Lambda(lambda x : K.mean(x, axis=3, keepdims=True))(cbam_feature)
110 + max_pool = Lambda(lambda x : K.max(x, axis=3, keepdims=True))(cbam_feature)
111 + concat = layers.concatenate([avg_pool, max_pool])
112 + cbam_feature = Conv2D(filters=1,
113 + kernel_size=kernel_size,
114 + strides=1,
115 + padding='same',
116 + activation='sigmoid',
117 + kernel_initializer='he_normal',
118 + use_bias=False)(concat)
119 +
120 + cbam_feature = Conv2D(filters=1,
121 + kernel_size=kernel_size,
122 + strides=1,
123 + padding='same',
124 + activation='sigmoid',
125 + kernel_initializer='he_normal')(concat)
126 +
127 + return layers.multiply([input_feature, cbam_feature])
...\ No newline at end of file ...\ No newline at end of file
1 +"""
2 + ATTENTION을 적용한 MOBILE NET
3 + @FUNCTION load_data : pickle 데이터를 로딩하는 함수
4 + @FUNCTION Mobile_net : 모바일 넷 모델 함수
5 + @FUNCITON predict : 모델 예측하는 함수
6 +"""
7 +import pickle
8 +import numpy as np
9 +from sklearn.model_selection import train_test_split
10 +import matplotlib.pyplot as plt
11 +import keras
12 +import tensorflow as tf
13 +import tensorflow.keras as keras
14 +import tensorflow.keras.backend as K
15 +from tensorflow.keras.models import Model
16 +from tensorflow.keras.layers import Input, Dense, Conv2D, Conv3D, DepthwiseConv2D, SeparableConv2D, Conv3DTranspose
17 +from tensorflow.keras.layers import Flatten, MaxPool2D, AvgPool2D, GlobalAvgPool2D, UpSampling2D, BatchNormalization
18 +from tensorflow.keras.layers import Concatenate, Add, Dropout, ReLU, Lambda, Activation, LeakyReLU, PReLU
19 +
20 +from attention_module import cbam_block
21 +
22 +"""
23 + 데이터 로드
24 + @brief : load Inputs and Targets from pickle data
25 + @param data_path(str) : path to pickle file containing data
26 + @return X(ndarray) : Inputs
27 + @return y(ndarray) : Targets
28 +"""
29 +def load_data():
30 + X = pickle.load(open("X_9.pickle", "rb"))
31 + y = pickle.load(open("y_9.pickle", "rb"))
32 +
33 + X = X/225.0
34 +
35 + return X, y
36 +
37 +"""
38 + mobile net 구현
39 + @brief : Mobile net with Convolution Block Attention Module(CBAM)
40 + I used cbam at last of convolution when I used it at every conv block,
41 + the result was worse.
42 + @return model : Mobile Net Model
43 +"""
44 +def mobile_net(input_shape):
45 +
46 + def mobile_net_block(x, f, s=1):
47 + x = DepthwiseConv2D(3, strides=s, padding='same')(x)
48 + x = BatchNormalization()(x)
49 + x = ReLU()(x)
50 +
51 +
52 + x = Conv2D(f, 1, strides=1, padding='same')(x)
53 + x = BatchNormalization()(x)
54 + x = ReLU()(x)
55 +
56 +
57 + return x
58 +
59 + input = Input(input_shape)
60 +
61 + x = Conv2D(32, 3, strides=2, padding='same')(input)
62 + x = BatchNormalization()(x)
63 + x = ReLU()(x)
64 +
65 + x = mobile_net_block(x, 64)
66 + x = mobile_net_block(x, 128, 2)
67 + x = mobile_net_block(x, 128)
68 +
69 + x = mobile_net_block(x, 256, 2)
70 + x = mobile_net_block(x, 256)
71 +
72 + x = mobile_net_block(x, 512, 2)
73 + for _ in range(5):
74 + x = mobile_net_block(x, 512)
75 +
76 + x = mobile_net_block(x, 1024, 2)
77 + x = mobile_net_block(x, 1024)
78 + x = cbam_block(x)
79 +
80 + x = GlobalAvgPool2D()(x)
81 +
82 +
83 + output = Dense(4, activation='softmax')(x)
84 +
85 + model = Model(input, output)
86 + return model
87 +
88 +"""
89 + 학습된 모델로 예측
90 + @brief : predict data from trained mobile net model
91 + @param model : Trained classifier
92 + @param X : Input data
93 + @param y(int): Target
94 +"""
95 +def predict(model, X, y):
96 + # sample의 입력 데이터에 차원 추가
97 + X = X[np.newaxis, ...]
98 +
99 + prediction = model.predict(X)
100 +
101 + # argmax를 사용해서 index의 최대 값을 얻음
102 + predicted_index = np.argmax(prediction, axis=1)
103 +
104 + print("Target: {}, Predicted label: {}".format(y, predicted_index))
105 +
106 +
107 +# 메인 함수
108 +if __name__ == "__main__":
109 +
110 + # load data and split to X_train and y_train
111 + X_train, y_train = load_data()
112 +
113 + # create network
114 + K.clear_session()
115 + input_shape = (X_train.shape[1], X_train.shape[2], 3)
116 + model= mobile_net(input_shape)
117 +
118 + # compile model
119 + optimiser = keras.optimizers.Adam(learning_rate=0.0001)
120 + model.compile(optimizer=optimiser,
121 + loss='sparse_categorical_crossentropy',
122 + metrics=['accuracy'])
123 +
124 + model.summary()
125 +
126 + # train model
127 + hist = model.fit(X_train, y_train, validation_split=0.1, batch_size=32, epochs=30)
128 +
129 + fig, loss_ax = plt.subplots()
130 +
131 + acc_ax = loss_ax.twinx()
132 +
133 + loss_ax.plot(hist.history['loss'], 'y', label='train loss')
134 + loss_ax.plot(hist.history['val_loss'], label='validation loss')
135 +
136 + acc_ax.plot(hist.history['acc'], 'b', label='train_acc')
137 + acc_ax.plot(hist.history['val_acc'], 'g', label='validation_acc')
138 +
139 + loss_ax.set_xlabel('epoch')
140 + loss_ax.set_ylabel('loss')
141 + acc_ax.set_ylabel('accuracy')
142 +
143 + loss_ax.legend(loc='upper left')
144 + loss_ax.legend(loc='lower left')
145 +
146 + # evalute model
147 + test_loss, test_acc = model.evaluate(X_train, y_train, verbose=2)
148 + print('\nTest accuracy:', test_acc)
149 +
150 + X_to_predict = X_train[100]
151 + y_to_predict = y_train[100]
152 +
153 + # predict sample
154 + predict(model, X_to_predict, y_to_predict)
155 +
156 + model.save('C:/Users/nokh9/Desktop/mobile_net2.h5')
...\ No newline at end of file ...\ No newline at end of file
1 +"""
2 + 부족한 데이터를 증강
3 + frequncy를 나타내는 mel-spectrogram에서 데이터 증강(뒤틀림, 뒤집기)을 사용하면
4 + 학습이 더 안될 것 같아서 실제로 사용은 안 함
5 + @FUNCTION data_augumentation : 데이터를 증강하는 함수
6 + @FUNCTION save_into_folder : 증강된 데이터들을 이름에 맞게 폴더별로 정리하는 함
7 +"""
8 +import os
9 +from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
10 +
11 +
12 +"""
13 + Data Augmentation
14 + @brief : 모델이 적은 이미지에서 최대한 많은 정보를 뽑아내서 학습할 수 있도록
15 + 데이터를 증강시킴
16 + @param dataset_path(str) : dog_sounds converted into mel_spectrogram
17 +"""
18 +def data_augumentation(dataset_path):
19 + datagen = ImageDataGenerator(
20 + rotation_range=40, # 이미지 회전 범위(degree)
21 + width_shift_range=0.2, # 이미지를수 수평 또는 수직으로 랜덤하게 평행 이동
22 + height_shift_range=0.2,
23 + rescale=1./225, # 0-1범위로 변환
24 + shear_range=0.2, # 임의 전단 변환(shearing transformation) 범위
25 + zoom_range=0.2, # 임의 확대/축소 범위
26 + horizontal_flip=False, # 소리에 대한 이미지이기 때문에 뒤집지 않음
27 + fill_mode='nearest') # 이미지를 회전 이동하거나 축소할 때 생기는 공간을 채움
28 +
29 + for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
30 +
31 + if dirpath is not dataset_path:
32 +
33 + for f in filenames:
34 +
35 + file_path = os.path.join(dirpath, f)
36 + img = load_img(file_path) # PIL 이미지
37 + i = 0
38 + x = img_to_array(img) # (x, x, 3) 크기의 Numpy 배열
39 + x=x.reshape((1,)+x.shape) # (1, x, x, 3) 크기의 Numpy 배열
40 +
41 + # 임의 변환된 이미지를 배치 단위로 생성해서
42 + # 이미지 20장을 생성하고 마침
43 + for batch in datagen.flow(x, batch_size=1, save_to_dir="C:/Users/nokh9/Desktop/dog_sound_train", save_prefix=f, save_format='.jpg'):
44 + print("{}, data_augument:{}".format(file_path, i))
45 + i += 1
46 + if i > 10:
47 + break
48 +
49 +"""
50 + split categories
51 + @brief 증강된 이미지를 카테고리 별로 나누어서 저장
52 + @param dataset_path(str) : dog_sound argumented
53 +"""
54 +def save_into_folder(dataset_path):
55 +
56 + num = 1
57 +
58 + for filename in os.listdir(dataset_path):
59 + find_category = filename.split('_')[0]
60 + des_path = os.path.join(dataset_path, find_category)
61 + if not os.path.exists(des_path):
62 + os.makedirs(des_path)
63 + num = 1
64 + os.rename(dataset_path+filename, des_path + '/' + find_category + str(num) + '.jpg')
65 + num += 1
66 +
67 +# 메인 함수
68 +if __name__ == "__main__":
69 + data_augumentation("C:/Users/nokh9/Desktop/dog_sound_mel")
70 + save_into_folder("C:/Users/nokh9/Desktop/dog_sound_train/")
...\ No newline at end of file ...\ No newline at end of file
1 +"""
2 + Mel-Sepctrogram 이미지를 학습에 쓰이기 전에 전처리
3 + @FUNCTION create_training_data : 이미지를 전처리하는 함수
4 +"""
5 +import os
6 +import random
7 +import cv2
8 +import numpy as np
9 +import matplotlib.pyplot as plt
10 +import pickle
11 +
12 +dataset_path = "C:/Users/nokh9/Desktop/dog_sound_mel/" # 데이터가 있는 경로
13 +CATEGORIES = ["angry", "happy", "lonely", "sad"] # 감정에 대한 카테고리
14 +
15 +training_data = []
16 +
17 +"""
18 + 이미지를 전처리
19 + @brief : preprocessing image for training
20 +"""
21 +def create_training_data():
22 +
23 + for category in CATEGORIES:
24 + path = os.path.join(dataset_path, category)
25 + class_num = CATEGORIES.index(category)
26 + print('-'*50)
27 + print(category + " is started...")
28 + print('-'*50)
29 + image_list = os.listdir(path)
30 + listdir_num = len(image_list)
31 +
32 + # 데이터가 있는 폴더에서 모든 이미지들을 전처리함
33 + for number in range(listdir_num):
34 + try:
35 + image_path = path + '/' + category + '_' + str(number+1) + '.jpg'
36 + image_array = cv2.imread(image_path, cv2.IMREAD_COLOR)
37 + new_array = cv2.resize(image_array, (62, 78))
38 + training_data.append([new_array, class_num])
39 + if((number+1)%100 == 0):
40 + print(str(number+1) + 'is finished')
41 + except Exception as e:
42 + pass
43 +
44 +# 메인 함수
45 +if __name__ == "__main__":
46 + create_training_data()
47 + print(len(training_data))
48 +
49 + # 데이터를 training에 쓰일 데이터와 validation에 쓰일 데이터를 나눔
50 + random.shuffle(training_data)
51 +
52 + X = []
53 + y = []
54 +
55 + for features, label in training_data:
56 + X.append(features)
57 + y.append(label)
58 +
59 + X = np.array(X).reshape(-1, 62, 78, 3)
60 +
61 + # pickle 데이터로 x와 y를 따로 저장해줌
62 + pickle_out = open("X_9.pickle", "wb")
63 + pickle.dump(X, pickle_out)
64 + pickle_out.close()
65 +
66 + pickle_out = open("y_9.pickle", "wb")
67 + pickle.dump(y, pickle_out)
68 + pickle_out.close()
...\ No newline at end of file ...\ No newline at end of file
1 +"""
2 + 오디오 파일(.wav)을 다루는 예시 코드
3 + librosa 라이브러리가 알아서 특징을 잡아주고 plot해주기 때문에
4 + 소리를 데이터화 할 때 편함
5 +"""
6 +import numpy as np
7 +import librosa, librosa.display
8 +import matplotlib.pyplot as plt
9 +
10 +
11 +FIG_SIZE = (15,10)
12 +
13 +file = "C:/Users/nokh9/Desktop/dog_sound/barking/barking_1.wav"
14 +
15 +#
16 +signal, sample_rate = librosa.load(file, sr=22500)
17 +
18 +# WAVEFORM
19 +# display waveform
20 +plt.figure(figsize=FIG_SIZE)
21 +librosa.display.waveplot(signal, sample_rate, alpha=0.4)
22 +plt.xlabel("Time (s)")
23 +plt.ylabel("Amplitude")
24 +plt.title("Waveform")
25 +
26 +
27 +# FFT -> power spectrum (파워 스펙트럼)
28 +# 퓨리에 변환
29 +fft = np.fft.fft(signal)
30 +
31 +# 복소수의 abs값을 게산하여 크기를 얻음
32 +spectrum = np.abs(fft)
33 +
34 +# create frequency variable
35 +f = np.linspace(0, sample_rate, len(spectrum))
36 +
37 +# take half of the spectrum and frequency
38 +left_spectrum = spectrum[:int(len(spectrum)/2)]
39 +left_f = f[:int(len(spectrum)/2)]
40 +
41 +# plot spectrum
42 +plt.figure(figsize=FIG_SIZE)
43 +plt.plot(left_f, left_spectrum, alpha=0.4)
44 +plt.xlabel("Frequency")
45 +plt.ylabel("Magnitude")
46 +plt.title("Power spectrum")
47 +
48 +
49 +# STFT -> spectrogram
50 +hop_length = 512 # in num. of samples
51 +n_fft = 2048 # window in num. of samples
52 +
53 +
54 +hop_length_duration = float(hop_length)/sample_rate
55 +n_fft_duration = float(n_fft)/sample_rate
56 +
57 +print("STFT hop length duration is: {}s".format(hop_length_duration))
58 +print("STFT window duration is: {}s".format(n_fft_duration))
59 +
60 +# perform stft
61 +stft = librosa.stft(signal, n_fft=n_fft, hop_length=hop_length)
62 +
63 +
64 +spectrogram = np.abs(stft)
65 +
66 +# 스펙트로그램을 보여줌
67 +plt.figure(figsize=FIG_SIZE)
68 +librosa.display.specshow(spectrogram, sr=sample_rate, hop_length=hop_length)
69 +plt.xlabel("Time") # x축 : 시간
70 +plt.ylabel("Frequency") # y축 : 주파수
71 +plt.colorbar() # 스펙트로그램을 색으로 나타내어 시각적으로 확연하게 보일 수 있게함
72 +plt.title("Spectrogram")
73 +
74 +# apply logarithm to cast amplitude to Decibels
75 +log_spectrogram = librosa.amplitude_to_db(spectrogram)
76 +
77 +plt.figure(figsize=FIG_SIZE)
78 +librosa.display.specshow(log_spectrogram, sr=sample_rate, hop_length=hop_length)
79 +plt.xlabel("Time")
80 +plt.ylabel("Frequency")
81 +plt.colorbar(format="%+2.0f dB")
82 +plt.title("Spectrogram (dB)")
83 +
84 +
85 +# MFCCs
86 +# extract 13 MFCCs
87 +MFCCs = librosa.feature.mfcc(signal, sample_rate, n_fft=n_fft, hop_length=hop_length, n_mfcc=13)
88 +
89 +# display MFCCs
90 +plt.figure(figsize=FIG_SIZE)
91 +librosa.display.specshow(MFCCs, sr=sample_rate, hop_length=hop_length)
92 +plt.xlabel("Time")
93 +plt.ylabel("MFCC coefficients")
94 +plt.colorbar()
95 +plt.title("MFCCs")
96 +# show plots
97 +plt.show()
This file is too large to display.
1 +import cv2
2 +import numpy as np
3 +import sys
4 +import os
5 +import pyaudio
6 +import librosa
7 +import librosa.display
8 +import matplotlib.pyplot as plt
9 +import time
10 +import tensorflow as tf
11 +import tensorflow.keras as keras
12 +
13 +rate = 22500
14 +chunk_size = rate // 4
15 +
16 +CATEGORIES = ["angry", "happy", "lonely", "sad"]
17 +model= keras.models.load_model("C:/Users/nokh9/Desktop/mobile_net2.h5")
18 +
19 +def prepare(mel):
20 + img_array = cv2.imread(mel)
21 + new_array = cv2.resize(img_array, (62, 78), 3)
22 + return new_array.reshape(-1, 62, 78, 3)
23 +
24 +p = pyaudio.PyAudio()
25 +stream = p.open(format=pyaudio.paFloat32,
26 + channels=1,
27 + rate=rate,
28 + input=True,
29 + input_device_index=1,
30 + frames_per_buffer=chunk_size)
31 +
32 +
33 +frames = []
34 +
35 +
36 +do_melspec = librosa.feature.melspectrogram
37 +pwr_to_db = librosa.core.power_to_db
38 +
39 +"""
40 +while True:
41 +
42 + start = time.time()
43 +
44 + data = stream.read(chunk_size)
45 + data = np.fromstring(data, dtype=np.float32)
46 +
47 + melspec = do_melspec(y=data, sr=rate, n_mels=128, fmax=4000)
48 + norm_melspec = pwr_to_db(melspec, ref=np.max)
49 +
50 + frames.append(norm_melspec)
51 +
52 + if len(frames) == 20:
53 +
54 +
55 + stack = np.hstack(frames)
56 +
57 + plt.figure(figsize=(5, 4))
58 + librosa.display.specshow(stack,fmax=4000)
59 + plt.savefig('C:/Users/nokh9/Desktop/DB/' + 'db.jpg')
60 + prediction = model.predict([prepare(r'C:/Users/nokh9/Desktop/DB/db.jpg')])
61 + print(CATEGORIES[int(prediction[0][0])])
62 + plt.draw()
63 + plt.pause(0.0001)
64 + plt.clf()
65 + #break
66 + frames.pop(0)
67 +
68 +
69 +
70 + t = time.time() - start
71 +
72 + print(1 / t)
73 +
74 +"""
75 +prediction = model.predict([prepare(r'C:/Users/nokh9/Desktop/DB/lonely_1.jpg')])
76 +print(prediction)
...\ No newline at end of file ...\ No newline at end of file
This file is too large to display.
No preview for this file type