Demo.py 1.67 KB
import cv2
import numpy as np
import pyaudio
import librosa
import librosa.display
import matplotlib.pyplot as plt
import time
import tensorflow as tf
import tensorflow.keras as keras

CATEGORES = ["angry", "happy", "lonely", "sad"]

model = keras.load_model("yout model path")

rate = 16000
chunk_size = rate // 4

"""
    생성된 데이터를 모델에 맞게 변경함
    @param filepath(str) : path where your image file is
    @return array that has reshaped for predict
"""
def prepare(filepath):
    img_array = cv2.imread(filepath, cv2.IMREAD_COLOR)
    new_array = cv2.resize(img_array, (62, 78))
    return new_array.reshape(-1, 62, 78, 1)

p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paFloat32,
                channels=1,
                rate=rate,
                input=True,
                input_device_index=1,
                frames_per_buffer=chunk_size)


frames = []

plt.figure(figsize=(10, 4))
do_melspec = librosa.feature.melspectrogram
pwr_to_db = librosa.core.power_to_db

while True:

    start = time.time()

    data = stream.read(chunk_size)
    data = np.fromstring(data, dtype=np.float32)

    melspec = do_melspec(y=data, sr=rate, n_mels=128, fmax=4000)
    norm_melspec = pwr_to_db(melspec, ref=np.max)

    frames.append(norm_melspec)
    
    if len(frames) == 20:

        
        stack = np.hstack(frames)
        librosa.display.specshow(stack, fmax=4000)
        plt.savefig('your save path' + '.jpg', dpi=300)
        prediction = model.predict([prepare('path where your image is')])
        print(CATEGORES[int(prediction[0][0])])
        plt.draw()
        plt.pause(0.0001)
        plt.clf()
        #break
        frames.pop(0)


    t = time.time() - start