extract_server.py 933 Bytes
import librosa
import numpy as np
from python_speech_features import fbank
import pickle
sample_rate = 16000
#filename='./sunghwan/8sec2.wav'


def normalize_frames(m, Scale=True):
    if Scale:
        return (m - np.mean(m, axis=0)) / (np.std(m, axis=0) + 2e-12)
    else:
        return (m - np.mean(m, axis=0))


def extract(filename, label='test.p'):
    audio, sr = librosa.load(filename, sr=sample_rate, mono=True)
    filter_banks, energies = fbank(audio,
                                   samplerate=sample_rate,
                                   nfilt=40,
                                   winlen=0.025)
    filter_banks = 20 * np.log10(np.maximum(filter_banks, 1e-5))
    feature = normalize_frames(filter_banks, Scale=False)
    savename = 'test.p'
    if label != savename:
       savename='enroll.p' 
    todump = {'feat': feature, 'label': label}
    with open(savename, 'wb') as f:
        pickle.dump(todump, f)