extract_feature.py 764 Bytes
import librosa
import numpy as np
from python_speech_features import fbank
import pickle
sample_rate=16000
#filename='./sunghwan/8sec2.wav'

def normalize_frames(m,Scale=True):
    if Scale:
        return (m - np.mean(m, axis=0)) / (np.std(m, axis=0) + 2e-12)
    else:
        return (m - np.mean(m, axis=0))

def extract(filename,savename='test.p'):
	audio, sr = librosa.load(filename, sr=sample_rate, mono=True)
	filter_banks, energies = fbank(audio, samplerate=sample_rate, nfilt=40, winlen=0.025)
	filter_banks = 20 * np.log10(np.maximum(filter_banks,1e-5))
	feature = normalize_frames(filter_banks, Scale=False)
        label = savename.split('.')[0]
	todump = {'feat': feature, 'label': label}
	with open(savename,'wb') as f:
    		pickle.dump(todump,f)