demo.py 1.69 KB
import cv2
import numpy as np
import sys
import os
import pyaudio
import librosa
import librosa.display
import matplotlib.pyplot as plt
import time
import tensorflow as tf
import tensorflow.keras as keras

rate = 22500
chunk_size = rate // 4

CATEGORIES = ["angry", "happy", "lonely", "sad"]
model= keras.models.load_model("C:/Users/nokh9/Desktop/mobile_net2.h5")

def prepare(mel):
    img_array = cv2.imread(mel)
    new_array = cv2.resize(img_array, (62, 78), 3)
    return new_array.reshape(-1, 62, 78, 3)

p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paFloat32,
                channels=1,
                rate=rate,
                input=True,
                input_device_index=1,
                frames_per_buffer=chunk_size)


frames = []


do_melspec = librosa.feature.melspectrogram
pwr_to_db = librosa.core.power_to_db

"""
while True:

    start = time.time()

    data = stream.read(chunk_size)
    data = np.fromstring(data, dtype=np.float32)

    melspec = do_melspec(y=data, sr=rate, n_mels=128, fmax=4000)
    norm_melspec = pwr_to_db(melspec, ref=np.max)

    frames.append(norm_melspec)
    
    if len(frames) == 20:

        
        stack = np.hstack(frames)

        plt.figure(figsize=(5, 4))
        librosa.display.specshow(stack,fmax=4000)
        plt.savefig('C:/Users/nokh9/Desktop/DB/' + 'db.jpg')
        prediction = model.predict([prepare(r'C:/Users/nokh9/Desktop/DB/db.jpg')])
        print(CATEGORIES[int(prediction[0][0])])
        plt.draw()
        plt.pause(0.0001)
        plt.clf()
        #break
        frames.pop(0)
        


    t = time.time() - start

    print(1 / t)

"""
prediction = model.predict([prepare(r'C:/Users/nokh9/Desktop/DB/lonely_1.jpg')])
print(prediction)