소스 코드 업로드

2014104094
Commit 69d87c616fcd94b3ff198fb6d1bf70f3d439a483 69d87c61 1 parent 86509c87
Showing 9 changed files with 594 additions and 0 deletions
소스코드/MobileNet/attention_module.py
소스코드/MobileNet/mobile_net.py
소스코드/Preprocess/Data_Augumentation.py
소스코드/Preprocess/preprocessing.py
소스코드/Preprocess/raw2spectro.py
소스코드/X_9.pickle
소스코드/demo.py
소스코드/mobile_net2.h5
소스코드/y_9.pickle
--- a/소스코드/MobileNet/attention_module.py 0 → 100644
View file @69d87c6
+++ b/소스코드/MobileNet/attention_module.py 0 → 100644
View file @69d87c6
+"""
+    기존의 Self Attention을 경량화한 CONVOLUTION BLOCK ATTENTION MODULE
+    @FUNCTION se_block : Squeeze and Excitation Block
+    @FUNCTION cbam_block : Convolution Block Attetntion Module
+    @FUNCTION channel_attention : Channel Attention
+    @FUNCITON Spatial_attention : Spation_attention
+"""
+import numpy as np
+from tensorflow.keras.layers import Conv2D
+from tensorflow.keras.layers import GlobalAvgPool2D, GlobalMaxPool2D
+from tensorflow.keras.layers import Reshape, Dense, Permute, Lambda
+from tensorflow.keras.layers import Add, Activation
+from tensorflow.keras import backend as K
+from keras.activations import sigmoid
+from tensorflow.keras import layers
+
+"""
+    Squeeze-and-Excitation(SE) Block
+    @brief : 채널간의 관계를 재종정 시켜줌
+    @param input_feature : tensor
+"""
+def se_block(input_feature, ratio=8):
+    
+    se_feature = GlobalAvgPool2D()(input_feature)
+    channel = input_feature._shape[-1]
+    
+    se_feature = Reshape((1, 1, channel))(se_feature)
+    se_feature = Dense(channel // ratio,
+                       activation='relu',
+                       kernel_initializer='he_normal',
+                       use_bias=True,
+                       bias_initializer='zeros')(se_feature)
+    
+    se_feature = Dense(channel,
+                       activation='sigmoid',
+                       kernel_initializer='he_normal',
+                       use_bias=True,
+                       bias_initializer='zeros')(se_feature)
+    
+    se_feature = layers.multiply([input_feature, se_feature])
+    
+    return se_feature
+
+"""
+    CBAM_BLOCK
+    @brief : Convolution Block Attention Module
+    @param cbam_feature : input tensor
+    @param ratio(int) : channel reduce ratio
+    @return cbam_feature : dynamic feature selection
+"""
+def cbam_block(cbam_feature, ratio=8):
+    
+    cbam_feature = channel_attention(cbam_feature, ratio)
+    cbam_feature = spatial_attention(cbam_feature)
+    
+    return cbam_feature
+
+"""
+    Channel Attention
+    @brief : Channel Attention, average pool과 max pool을 사용(파라미터 양을 줄일 수 있음)
+            두 가지 pooled feature는 같은 의미를 공유하는 값이기 때문에 하나의 공유된 MLP를 사용
+    @param input_feature = input_tensor
+    @return cbam_feature
+"""
+def channel_attention(input_feature, ratio=8):
+    
+    # 채널을 먼저 적용
+    channel = input_feature._shape[-1]
+    
+    shared_layer_one = Dense(channel//ratio,
+                             activation='relu',
+                             kernel_initializer='he_normal',
+                             use_bias=True,
+                             bias_initializer='zeros')
+    
+    shared_layer_two = Dense(channel,
+                             kernel_initializer='he_normal',
+                             use_bias=True,
+                             bias_initializer='zeros')
+    
+    # average pool과 max pool 두 가지를 결합하여 사용
+    avg_pool = GlobalAvgPool2D()(input_feature)
+    avg_pool = Reshape((1, 1, channel))(avg_pool)
+    avg_pool = shared_layer_one(avg_pool)
+    avg_pool = shared_layer_two(avg_pool)
+
+    max_pool = GlobalMaxPool2D()(input_feature)
+    max_pool = Reshape((1, 1, channel))(max_pool)
+    max_pool = shared_layer_one(max_pool)
+    max_pool = shared_layer_two(max_pool)
+    
+    cbam_feature = Add()([avg_pool, max_pool])
+    # 가장 중요한 feature를 찾는 것이 목적이 아니기 때문에 mutually exclusive한
+    # softmax 대신 sigmoid를 사용
+    cbam_feature = Activation('sigmoid')(cbam_feature)
+    cbam_feature = layers.multiply([avg_pool, max_pool])
+    return cbam_feature
+
+"""
+    Spatial Attention
+    @brief : 2차원의 spatial attention, single convolution을 사용하여 특징이 보이는 
+            channel을 만듬, 정보가 어디에 있는지 중점을 둠
+    @param ipnut_feature : input_tensor(Channel-refined feature)
+"""
+def spatial_attention(input_feature, kernel_size=7):
+    
+    cbam_feature = input_feature
+    
+    avg_pool = Lambda(lambda x : K.mean(x, axis=3, keepdims=True))(cbam_feature)
+    max_pool = Lambda(lambda x : K.max(x, axis=3, keepdims=True))(cbam_feature)
+    concat = layers.concatenate([avg_pool, max_pool])
+    cbam_feature = Conv2D(filters=1,
+                          kernel_size=kernel_size,
+                          strides=1,
+                          padding='same',
+                          activation='sigmoid',
+                          kernel_initializer='he_normal',
+                          use_bias=False)(concat)
+    
+    cbam_feature = Conv2D(filters=1,
+                          kernel_size=kernel_size,
+                          strides=1,
+                          padding='same',
+                          activation='sigmoid',
+                          kernel_initializer='he_normal')(concat)
+    
+    return layers.multiply([input_feature, cbam_feature])
\ No newline at end of file
--- a/소스코드/MobileNet/mobile_net.py 0 → 100644
View file @69d87c6
+++ b/소스코드/MobileNet/mobile_net.py 0 → 100644
View file @69d87c6
+"""
+    ATTENTION을 적용한 MOBILE NET
+    @FUNCTION load_data : pickle 데이터를 로딩하는 함수
+    @FUNCTION Mobile_net : 모바일 넷 모델 함수
+    @FUNCITON predict : 모델 예측하는 함수
+"""
+import pickle
+import numpy as np
+from sklearn.model_selection import train_test_split
+import matplotlib.pyplot as plt
+import keras
+import tensorflow as tf
+import tensorflow.keras as keras
+import tensorflow.keras.backend as K
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers import Input, Dense, Conv2D, Conv3D, DepthwiseConv2D, SeparableConv2D, Conv3DTranspose
+from tensorflow.keras.layers import Flatten, MaxPool2D, AvgPool2D, GlobalAvgPool2D, UpSampling2D, BatchNormalization
+from tensorflow.keras.layers import Concatenate, Add, Dropout, ReLU, Lambda, Activation, LeakyReLU, PReLU
+
+from attention_module import cbam_block
+
+"""
+    데이터 로드
+    @brief : load Inputs and Targets from pickle data 
+    @param data_path(str) : path to pickle file containing data
+    @return X(ndarray) : Inputs
+    @return y(ndarray) : Targets
+"""
+def load_data():
+    X = pickle.load(open("X_9.pickle", "rb"))
+    y = pickle.load(open("y_9.pickle", "rb"))
+    
+    X = X/225.0
+    
+    return X, y
+    
+"""
+    mobile net 구현
+    @brief : Mobile net with Convolution Block Attention Module(CBAM)
+            I used cbam at last of convolution when I used it at every conv block,
+            the result was worse.
+    @return model : Mobile Net Model
+"""
+def mobile_net(input_shape):
+    
+    def mobile_net_block(x, f, s=1):
+        x = DepthwiseConv2D(3, strides=s, padding='same')(x)
+        x = BatchNormalization()(x)
+        x = ReLU()(x)
+        
+        
+        x = Conv2D(f, 1, strides=1, padding='same')(x)  
+        x = BatchNormalization()(x)
+        x = ReLU()(x)
+        
+        
+        return x
+    
+    input = Input(input_shape)
+    
+    x = Conv2D(32, 3, strides=2, padding='same')(input)
+    x = BatchNormalization()(x)
+    x = ReLU()(x)
+    
+    x = mobile_net_block(x, 64)
+    x = mobile_net_block(x, 128, 2)
+    x = mobile_net_block(x, 128)
+    
+    x = mobile_net_block(x, 256, 2)
+    x = mobile_net_block(x, 256)
+    
+    x = mobile_net_block(x, 512, 2)
+    for _ in range(5):
+        x = mobile_net_block(x, 512)
+        
+    x = mobile_net_block(x, 1024, 2)
+    x = mobile_net_block(x, 1024)
+    x = cbam_block(x)
+    
+    x = GlobalAvgPool2D()(x)
+    
+    
+    output = Dense(4, activation='softmax')(x)
+    
+    model = Model(input, output)
+    return model
+
+"""
+    학습된 모델로 예측
+    @brief : predict data from trained mobile net model
+    @param model : Trained classifier
+    @param X : Input data
+    @param y(int): Target
+"""
+def predict(model, X, y):
+    # sample의 입력 데이터에 차원 추가
+    X = X[np.newaxis, ...]
+    
+    prediction = model.predict(X)
+    
+    # argmax를 사용해서 index의 최대 값을 얻음
+    predicted_index = np.argmax(prediction, axis=1)
+    
+    print("Target: {}, Predicted label: {}".format(y, predicted_index))
+
+
+# 메인 함수
+if __name__ == "__main__":
+    
+    # load data and split to X_train and y_train
+    X_train, y_train = load_data()
+    
+    # create network
+    K.clear_session()
+    input_shape = (X_train.shape[1], X_train.shape[2], 3)
+    model= mobile_net(input_shape)
+    
+    # compile model
+    optimiser = keras.optimizers.Adam(learning_rate=0.0001)
+    model.compile(optimizer=optimiser,
+                  loss='sparse_categorical_crossentropy',
+                  metrics=['accuracy'])
+    
+    model.summary()
+    
+    # train model
+    hist = model.fit(X_train, y_train, validation_split=0.1, batch_size=32, epochs=30)
+    
+    fig, loss_ax = plt.subplots()
+    
+    acc_ax = loss_ax.twinx()
+    
+    loss_ax.plot(hist.history['loss'], 'y', label='train loss')
+    loss_ax.plot(hist.history['val_loss'], label='validation loss')
+    
+    acc_ax.plot(hist.history['acc'], 'b', label='train_acc')
+    acc_ax.plot(hist.history['val_acc'], 'g', label='validation_acc')
+    
+    loss_ax.set_xlabel('epoch')
+    loss_ax.set_ylabel('loss')
+    acc_ax.set_ylabel('accuracy')
+    
+    loss_ax.legend(loc='upper left')
+    loss_ax.legend(loc='lower left')
+    
+    # evalute model
+    test_loss, test_acc = model.evaluate(X_train, y_train, verbose=2)
+    print('\nTest accuracy:', test_acc)
+    
+    X_to_predict = X_train[100]
+    y_to_predict = y_train[100]
+    
+    # predict sample
+    predict(model, X_to_predict, y_to_predict)
+    
+    model.save('C:/Users/nokh9/Desktop/mobile_net2.h5')
\ No newline at end of file
--- a/소스코드/Preprocess/Data_Augumentation.py 0 → 100644
View file @69d87c6
+++ b/소스코드/Preprocess/Data_Augumentation.py 0 → 100644
View file @69d87c6
+"""
+    부족한 데이터를 증강
+    frequncy를 나타내는 mel-spectrogram에서 데이터 증강(뒤틀림, 뒤집기)을 사용하면
+    학습이 더 안될 것 같아서 실제로 사용은 안 함
+    @FUNCTION data_augumentation : 데이터를 증강하는 함수
+    @FUNCTION save_into_folder : 증강된 데이터들을 이름에 맞게 폴더별로 정리하는 함
+"""
+import os
+from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
+
+
+"""
+    Data Augmentation
+    @brief : 모델이 적은 이미지에서 최대한 많은 정보를 뽑아내서 학습할 수 있도록
+            데이터를 증강시킴
+    @param dataset_path(str) : dog_sounds converted into mel_spectrogram
+"""
+def data_augumentation(dataset_path):
+    datagen = ImageDataGenerator(
+        rotation_range=40,          # 이미지 회전 범위(degree)
+        width_shift_range=0.2,      # 이미지를수 수평 또는 수직으로 랜덤하게 평행 이동
+        height_shift_range=0.2,
+        rescale=1./225,             # 0-1범위로 변환
+        shear_range=0.2,            # 임의 전단 변환(shearing transformation) 범위
+        zoom_range=0.2,             # 임의 확대/축소 범위
+        horizontal_flip=False,      # 소리에 대한 이미지이기 때문에 뒤집지 않음
+        fill_mode='nearest')        # 이미지를 회전 이동하거나 축소할 때 생기는 공간을 채움
+    
+    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
+        
+        if dirpath is not dataset_path:
+            
+            for f in filenames:
+                
+                file_path = os.path.join(dirpath, f)
+                img = load_img(file_path)   # PIL 이미지
+                i = 0
+                x = img_to_array(img)       # (x, x, 3) 크기의 Numpy 배열
+                x=x.reshape((1,)+x.shape)   # (1, x, x, 3) 크기의 Numpy 배열
+                
+                # 임의 변환된 이미지를 배치 단위로 생성해서
+                # 이미지 20장을 생성하고 마침
+                for batch in datagen.flow(x, batch_size=1, save_to_dir="C:/Users/nokh9/Desktop/dog_sound_train", save_prefix=f, save_format='.jpg'):
+                    print("{}, data_augument:{}".format(file_path, i))
+                    i += 1
+                    if i > 10:
+                        break
+
+"""
+    split categories
+    @brief 증강된 이미지를 카테고리 별로 나누어서 저장
+    @param dataset_path(str) : dog_sound argumented
+"""
+def save_into_folder(dataset_path):
+    
+    num = 1
+    
+    for filename in os.listdir(dataset_path):
+        find_category = filename.split('_')[0]
+        des_path = os.path.join(dataset_path, find_category)
+        if not os.path.exists(des_path):
+            os.makedirs(des_path)
+            num = 1
+        os.rename(dataset_path+filename, des_path + '/' + find_category + str(num) + '.jpg')
+        num += 1
+    
+# 메인 함수    
+if __name__ == "__main__":
+    data_augumentation("C:/Users/nokh9/Desktop/dog_sound_mel")
+    save_into_folder("C:/Users/nokh9/Desktop/dog_sound_train/")
\ No newline at end of file
--- a/소스코드/Preprocess/preprocessing.py 0 → 100644
View file @69d87c6
+++ b/소스코드/Preprocess/preprocessing.py 0 → 100644
View file @69d87c6
+"""
+    Mel-Sepctrogram 이미지를 학습에 쓰이기 전에 전처리
+    @FUNCTION create_training_data : 이미지를 전처리하는 함수
+"""
+import os
+import random
+import cv2
+import numpy as np
+import matplotlib.pyplot as plt
+import pickle
+
+dataset_path = "C:/Users/nokh9/Desktop/dog_sound_mel/" # 데이터가 있는 경로
+CATEGORIES = ["angry", "happy", "lonely", "sad"] # 감정에 대한 카테고리
+
+training_data = []
+
+"""
+    이미지를 전처리
+    @brief : preprocessing image for training
+"""
+def create_training_data():
+    
+    for category in CATEGORIES:
+        path = os.path.join(dataset_path, category)
+        class_num = CATEGORIES.index(category)
+        print('-'*50)
+        print(category + " is started...")
+        print('-'*50)
+        image_list = os.listdir(path)
+        listdir_num = len(image_list)
+       
+        # 데이터가 있는 폴더에서 모든 이미지들을 전처리함
+        for number in range(listdir_num):
+            try:
+                image_path = path + '/' + category + '_' + str(number+1) + '.jpg'
+                image_array = cv2.imread(image_path, cv2.IMREAD_COLOR)
+                new_array = cv2.resize(image_array, (62, 78))
+                training_data.append([new_array, class_num])
+                if((number+1)%100 == 0):
+                    print(str(number+1) + 'is finished')
+            except Exception as e:
+                pass
+
+# 메인 함수
+if __name__ == "__main__":
+    create_training_data()
+    print(len(training_data))
+
+    # 데이터를 training에 쓰일 데이터와 validation에 쓰일 데이터를 나눔
+    random.shuffle(training_data)
+
+    X = []
+    y = []
+
+    for features, label in training_data:
+        X.append(features)
+        y.append(label)
+
+    X = np.array(X).reshape(-1, 62, 78, 3)
+    
+    # pickle 데이터로 x와 y를 따로 저장해줌
+    pickle_out = open("X_9.pickle", "wb")
+    pickle.dump(X, pickle_out)
+    pickle_out.close()
+
+    pickle_out = open("y_9.pickle", "wb")
+    pickle.dump(y, pickle_out)
+    pickle_out.close()
\ No newline at end of file
--- a/소스코드/Preprocess/raw2spectro.py 0 → 100644
View file @69d87c6
+++ b/소스코드/Preprocess/raw2spectro.py 0 → 100644
View file @69d87c6
+"""
+    오디오 파일(.wav)을 다루는 예시 코드
+    librosa 라이브러리가 알아서 특징을 잡아주고 plot해주기 때문에
+    소리를 데이터화 할 때 편함
+"""
+import numpy as np
+import librosa, librosa.display
+import matplotlib.pyplot as plt
+
+
+FIG_SIZE = (15,10)
+
+file = "C:/Users/nokh9/Desktop/dog_sound/barking/barking_1.wav"
+
+# 
+signal, sample_rate = librosa.load(file, sr=22500)
+
+# WAVEFORM
+# display waveform
+plt.figure(figsize=FIG_SIZE)
+librosa.display.waveplot(signal, sample_rate, alpha=0.4)
+plt.xlabel("Time (s)")
+plt.ylabel("Amplitude")
+plt.title("Waveform")
+
+
+# FFT -> power spectrum (파워 스펙트럼)
+# 퓨리에 변환
+fft = np.fft.fft(signal)
+
+# 복소수의 abs값을 게산하여 크기를 얻음
+spectrum = np.abs(fft)
+
+# create frequency variable
+f = np.linspace(0, sample_rate, len(spectrum))
+
+# take half of the spectrum and frequency
+left_spectrum = spectrum[:int(len(spectrum)/2)]
+left_f = f[:int(len(spectrum)/2)]
+
+# plot spectrum
+plt.figure(figsize=FIG_SIZE)
+plt.plot(left_f, left_spectrum, alpha=0.4)
+plt.xlabel("Frequency")
+plt.ylabel("Magnitude")
+plt.title("Power spectrum")
+
+
+# STFT -> spectrogram
+hop_length = 512 # in num. of samples
+n_fft = 2048 # window in num. of samples
+
+
+hop_length_duration = float(hop_length)/sample_rate
+n_fft_duration = float(n_fft)/sample_rate
+
+print("STFT hop length duration is: {}s".format(hop_length_duration))
+print("STFT window duration is: {}s".format(n_fft_duration))
+
+# perform stft
+stft = librosa.stft(signal, n_fft=n_fft, hop_length=hop_length)
+
+
+spectrogram = np.abs(stft)
+
+# 스펙트로그램을 보여줌
+plt.figure(figsize=FIG_SIZE)
+librosa.display.specshow(spectrogram, sr=sample_rate, hop_length=hop_length)
+plt.xlabel("Time") # x축 : 시간
+plt.ylabel("Frequency") # y축 : 주파수
+plt.colorbar() # 스펙트로그램을 색으로 나타내어 시각적으로 확연하게 보일 수 있게함
+plt.title("Spectrogram")
+
+# apply logarithm to cast amplitude to Decibels
+log_spectrogram = librosa.amplitude_to_db(spectrogram)
+
+plt.figure(figsize=FIG_SIZE)
+librosa.display.specshow(log_spectrogram, sr=sample_rate, hop_length=hop_length)
+plt.xlabel("Time")
+plt.ylabel("Frequency")
+plt.colorbar(format="%+2.0f dB")
+plt.title("Spectrogram (dB)")
+
+
+# MFCCs
+# extract 13 MFCCs
+MFCCs = librosa.feature.mfcc(signal, sample_rate, n_fft=n_fft, hop_length=hop_length, n_mfcc=13)
+
+# display MFCCs
+plt.figure(figsize=FIG_SIZE)
+librosa.display.specshow(MFCCs, sr=sample_rate, hop_length=hop_length)
+plt.xlabel("Time")
+plt.ylabel("MFCC coefficients")
+plt.colorbar()
+plt.title("MFCCs")
+# show plots
+plt.show()
--- a/소스코드/X_9.pickle 0 → 100644
View file @69d87c6
+++ b/소스코드/X_9.pickle 0 → 100644
View file @69d87c6
--- a/소스코드/demo.py 0 → 100644
View file @69d87c6
+++ b/소스코드/demo.py 0 → 100644
View file @69d87c6
+import cv2
+import numpy as np
+import sys
+import os
+import pyaudio
+import librosa
+import librosa.display
+import matplotlib.pyplot as plt
+import time
+import tensorflow as tf
+import tensorflow.keras as keras
+
+rate = 22500
+chunk_size = rate // 4
+
+CATEGORIES = ["angry", "happy", "lonely", "sad"]
+model= keras.models.load_model("C:/Users/nokh9/Desktop/mobile_net2.h5")
+
+def prepare(mel):
+    img_array = cv2.imread(mel)
+    new_array = cv2.resize(img_array, (62, 78), 3)
+    return new_array.reshape(-1, 62, 78, 3)
+
+p = pyaudio.PyAudio()
+stream = p.open(format=pyaudio.paFloat32,
+                channels=1,
+                rate=rate,
+                input=True,
+                input_device_index=1,
+                frames_per_buffer=chunk_size)
+
+
+frames = []
+
+
+do_melspec = librosa.feature.melspectrogram
+pwr_to_db = librosa.core.power_to_db
+
+"""
+while True:
+
+    start = time.time()
+
+    data = stream.read(chunk_size)
+    data = np.fromstring(data, dtype=np.float32)
+
+    melspec = do_melspec(y=data, sr=rate, n_mels=128, fmax=4000)
+    norm_melspec = pwr_to_db(melspec, ref=np.max)
+
+    frames.append(norm_melspec)
+    
+    if len(frames) == 20:
+
+        
+        stack = np.hstack(frames)
+
+        plt.figure(figsize=(5, 4))
+        librosa.display.specshow(stack,fmax=4000)
+        plt.savefig('C:/Users/nokh9/Desktop/DB/' + 'db.jpg')
+        prediction = model.predict([prepare(r'C:/Users/nokh9/Desktop/DB/db.jpg')])
+        print(CATEGORIES[int(prediction[0][0])])
+        plt.draw()
+        plt.pause(0.0001)
+        plt.clf()
+        #break
+        frames.pop(0)
+        
+
+
+    t = time.time() - start
+
+    print(1 / t)
+
+"""
+prediction = model.predict([prepare(r'C:/Users/nokh9/Desktop/DB/lonely_1.jpg')])
+print(prediction)
\ No newline at end of file
--- a/소스코드/mobile_net2.h5 0 → 100644
View file @69d87c6
+++ b/소스코드/mobile_net2.h5 0 → 100644
View file @69d87c6
--- a/소스코드/y_9.pickle 0 → 100644
View file @69d87c6
+++ b/소스코드/y_9.pickle 0 → 100644
View file @69d87c6