Showing
17 changed files
with
1266 additions
and
0 deletions
Speaker_Recognition @ df38711f
| 1 | +Subproject commit df38711f36cfb15ee578d14a70d0141d1d0a8134 |
Speaker_Recognition/identification3.py
0 → 100644
| 1 | +import torch | ||
| 2 | +import torch.nn.functional as F | ||
| 3 | +from torch.autograd import Variable | ||
| 4 | + | ||
| 5 | +import pandas as pd | ||
| 6 | +import math | ||
| 7 | +import os | ||
| 8 | +import configure as c | ||
| 9 | + | ||
| 10 | +from DB_wav_reader import read_feats_structure | ||
| 11 | +from SR_Dataset import read_MFB, ToTensorTestInput | ||
| 12 | +from model.model3 import background_resnet | ||
| 13 | + | ||
| 14 | +def load_model(use_cuda, log_dir, cp_num, embedding_size, n_classes): | ||
| 15 | + model = background_resnet(embedding_size=embedding_size, num_classes=n_classes) | ||
| 16 | + if use_cuda: | ||
| 17 | + model.cuda() | ||
| 18 | + print('=> loading checkpoint') | ||
| 19 | + # original saved file with DataParallel | ||
| 20 | + checkpoint = torch.load(log_dir + '/checkpoint_' + str(cp_num) + '.pth') | ||
| 21 | + # create new OrderedDict that does not contain `module.` | ||
| 22 | + model.load_state_dict(checkpoint['state_dict']) | ||
| 23 | + model.eval() | ||
| 24 | + return model | ||
| 25 | + | ||
| 26 | +def split_enroll_and_test(dataroot_dir): | ||
| 27 | + DB_all = read_feats_structure(dataroot_dir) | ||
| 28 | + enroll_DB = pd.DataFrame() | ||
| 29 | + test_DB = pd.DataFrame() | ||
| 30 | + | ||
| 31 | + enroll_DB = DB_all[DB_all['filename'].str.contains('enroll.p')] | ||
| 32 | + test_DB = DB_all[DB_all['filename'].str.contains('test.p')] | ||
| 33 | + | ||
| 34 | + # Reset the index | ||
| 35 | + enroll_DB = enroll_DB.reset_index(drop=True) | ||
| 36 | + test_DB = test_DB.reset_index(drop=True) | ||
| 37 | + return enroll_DB, test_DB | ||
| 38 | + | ||
| 39 | +def load_enroll_embeddings(embedding_dir): | ||
| 40 | + embeddings = {} | ||
| 41 | + for f in os.listdir(embedding_dir): | ||
| 42 | + spk = f.replace('.pth','') | ||
| 43 | + # Select the speakers who are in the 'enroll_spk_list' | ||
| 44 | + embedding_path = os.path.join(embedding_dir, f) | ||
| 45 | + tmp_embeddings = torch.load(embedding_path) | ||
| 46 | + embeddings[spk] = tmp_embeddings | ||
| 47 | + | ||
| 48 | + return embeddings | ||
| 49 | + | ||
| 50 | +def get_embeddings(use_cuda, filename, model, test_frames): | ||
| 51 | + input, label = read_MFB(filename) # input size:(n_frames, n_dims) | ||
| 52 | + | ||
| 53 | + tot_segments = math.ceil(len(input)/test_frames) # total number of segments with 'test_frames' | ||
| 54 | + activation = 0 | ||
| 55 | + with torch.no_grad(): | ||
| 56 | + for i in range(tot_segments): | ||
| 57 | + temp_input = input[i*test_frames:i*test_frames+test_frames] | ||
| 58 | + | ||
| 59 | + TT = ToTensorTestInput() | ||
| 60 | + temp_input = TT(temp_input) # size:(1, 1, n_dims, n_frames) | ||
| 61 | + | ||
| 62 | + if use_cuda: | ||
| 63 | + temp_input = temp_input.cuda() | ||
| 64 | + temp_activation,_ = model(temp_input) | ||
| 65 | + activation += torch.sum(temp_activation, dim=0, keepdim=True) | ||
| 66 | + | ||
| 67 | + activation = l2_norm(activation, 1) | ||
| 68 | + | ||
| 69 | + return activation | ||
| 70 | + | ||
| 71 | +def l2_norm(input, alpha): | ||
| 72 | + input_size = input.size() # size:(n_frames, dim) | ||
| 73 | + buffer = torch.pow(input, 2) # 2 denotes a squared operation. size:(n_frames, dim) | ||
| 74 | + normp = torch.sum(buffer, 1).add_(1e-10) # size:(n_frames) | ||
| 75 | + norm = torch.sqrt(normp) # size:(n_frames) | ||
| 76 | + _output = torch.div(input, norm.view(-1, 1).expand_as(input)) | ||
| 77 | + output = _output.view(input_size) | ||
| 78 | + # Multiply by alpha = 10 as suggested in https://arxiv.org/pdf/1703.09507.pdf | ||
| 79 | + output = output * alpha | ||
| 80 | + return output | ||
| 81 | + | ||
| 82 | +def perform_identification(use_cuda, model, embeddings, test_filename, test_frames, spk_list): | ||
| 83 | + test_embedding = get_embeddings(use_cuda, test_filename, model, test_frames) | ||
| 84 | + max_score = -10**8 | ||
| 85 | + best_spk = None | ||
| 86 | + for spk in spk_list: | ||
| 87 | + score = F.cosine_similarity(test_embedding, embeddings[spk]) | ||
| 88 | + score = score.data.cpu().numpy() | ||
| 89 | + if score > max_score: | ||
| 90 | + max_score = score | ||
| 91 | + best_spk = spk | ||
| 92 | + #print("Speaker identification result : %s" %best_spk) | ||
| 93 | + true_spk = test_filename.split('/')[-2].split('_')[0] | ||
| 94 | + print("\n=== Speaker identification ===") | ||
| 95 | + print("True speaker : %s\nPredicted speaker : %s\nResult : %s\n" %(true_spk, best_spk, true_spk==best_spk)) | ||
| 96 | + return best_spk | ||
| 97 | + | ||
| 98 | +def main(): | ||
| 99 | + | ||
| 100 | + log_dir = 'new_model3' # Where the checkpoints are saved | ||
| 101 | + embedding_dir = 'enroll_embeddings3' # Where embeddings are saved | ||
| 102 | + test_dir = 'feat_logfbank_nfilt40/test/' # Where test features are saved | ||
| 103 | + | ||
| 104 | + # Settings | ||
| 105 | + use_cuda = True # Use cuda or not | ||
| 106 | + embedding_size = 128 # Dimension of speaker embeddings | ||
| 107 | + cp_num = 11 # Which checkpoint to use? | ||
| 108 | + n_classes = 241 # How many speakers in training data? | ||
| 109 | + test_frames = 100 # Split the test utterance | ||
| 110 | + | ||
| 111 | + # Load model from checkpoint | ||
| 112 | + model = load_model(use_cuda, log_dir, cp_num, embedding_size, n_classes) | ||
| 113 | + | ||
| 114 | + # Get the dataframe for test DB | ||
| 115 | + enroll_DB, test_DB = split_enroll_and_test(c.TEST_FEAT_DIR) | ||
| 116 | + | ||
| 117 | + # Load enroll embeddings | ||
| 118 | + embeddings = load_enroll_embeddings(embedding_dir) | ||
| 119 | + | ||
| 120 | + """ Test speaker list | ||
| 121 | + '103F3021', '207F2088', '213F5100', '217F3038', '225M4062', | ||
| 122 | + '229M2031', '230M4087', '233F4013', '236M3043', '240M3063' | ||
| 123 | + """ | ||
| 124 | + | ||
| 125 | + spk_list = ['103F3021', '207F2088', '213F5100', '217F3038', '225M4062',\ | ||
| 126 | + '229M2031', '230M4087', '233F4013', '236M3043', '240M3063','777M7777','778M8777'] | ||
| 127 | + | ||
| 128 | + # Set the test speaker | ||
| 129 | + test_speaker = '233F4013' | ||
| 130 | + | ||
| 131 | + test_path = os.path.join(test_dir, test_speaker, 'test.p') | ||
| 132 | + | ||
| 133 | + # Perform the test | ||
| 134 | + best_spk = perform_identification(use_cuda, model, embeddings, test_path, test_frames, spk_list) | ||
| 135 | + | ||
| 136 | +if __name__ == '__main__': | ||
| 137 | + main() |
Speaker_Recognition/identification4.py
0 → 100644
| 1 | +import torch | ||
| 2 | +import torch.nn.functional as F | ||
| 3 | +from torch.autograd import Variable | ||
| 4 | + | ||
| 5 | +import pandas as pd | ||
| 6 | +import math | ||
| 7 | +import os | ||
| 8 | +import configure as c | ||
| 9 | + | ||
| 10 | +from DB_wav_reader import read_feats_structure | ||
| 11 | +from SR_Dataset import read_MFB, ToTensorTestInput | ||
| 12 | +from model.model4 import background_resnet | ||
| 13 | + | ||
| 14 | +def load_model(use_cuda, log_dir, cp_num, embedding_size, n_classes): | ||
| 15 | + model = background_resnet(embedding_size=embedding_size, num_classes=n_classes) | ||
| 16 | + if use_cuda: | ||
| 17 | + model.cuda() | ||
| 18 | + print('=> loading checkpoint') | ||
| 19 | + # original saved file with DataParallel | ||
| 20 | + checkpoint = torch.load(log_dir + '/checkpoint_' + str(cp_num) + '.pth') | ||
| 21 | + # create new OrderedDict that does not contain `module.` | ||
| 22 | + model.load_state_dict(checkpoint['state_dict']) | ||
| 23 | + model.eval() | ||
| 24 | + return model | ||
| 25 | + | ||
| 26 | +def split_enroll_and_test(dataroot_dir): | ||
| 27 | + DB_all = read_feats_structure(dataroot_dir) | ||
| 28 | + enroll_DB = pd.DataFrame() | ||
| 29 | + test_DB = pd.DataFrame() | ||
| 30 | + | ||
| 31 | + enroll_DB = DB_all[DB_all['filename'].str.contains('enroll.p')] | ||
| 32 | + test_DB = DB_all[DB_all['filename'].str.contains('test.p')] | ||
| 33 | + | ||
| 34 | + # Reset the index | ||
| 35 | + enroll_DB = enroll_DB.reset_index(drop=True) | ||
| 36 | + test_DB = test_DB.reset_index(drop=True) | ||
| 37 | + return enroll_DB, test_DB | ||
| 38 | + | ||
| 39 | +def load_enroll_embeddings(embedding_dir): | ||
| 40 | + embeddings = {} | ||
| 41 | + for f in os.listdir(embedding_dir): | ||
| 42 | + spk = f.replace('.pth','') | ||
| 43 | + # Select the speakers who are in the 'enroll_spk_list' | ||
| 44 | + embedding_path = os.path.join(embedding_dir, f) | ||
| 45 | + tmp_embeddings = torch.load(embedding_path) | ||
| 46 | + embeddings[spk] = tmp_embeddings | ||
| 47 | + | ||
| 48 | + return embeddings | ||
| 49 | + | ||
| 50 | +def get_embeddings(use_cuda, filename, model, test_frames): | ||
| 51 | + input, label = read_MFB(filename) # input size:(n_frames, n_dims) | ||
| 52 | + | ||
| 53 | + tot_segments = math.ceil(len(input)/test_frames) # total number of segments with 'test_frames' | ||
| 54 | + activation = 0 | ||
| 55 | + with torch.no_grad(): | ||
| 56 | + for i in range(tot_segments): | ||
| 57 | + temp_input = input[i*test_frames:i*test_frames+test_frames] | ||
| 58 | + | ||
| 59 | + TT = ToTensorTestInput() | ||
| 60 | + temp_input = TT(temp_input) # size:(1, 1, n_dims, n_frames) | ||
| 61 | + | ||
| 62 | + if use_cuda: | ||
| 63 | + temp_input = temp_input.cuda() | ||
| 64 | + temp_activation,_ = model(temp_input) | ||
| 65 | + activation += torch.sum(temp_activation, dim=0, keepdim=True) | ||
| 66 | + | ||
| 67 | + activation = l2_norm(activation, 1) | ||
| 68 | + | ||
| 69 | + return activation | ||
| 70 | + | ||
| 71 | +def l2_norm(input, alpha): | ||
| 72 | + input_size = input.size() # size:(n_frames, dim) | ||
| 73 | + buffer = torch.pow(input, 2) # 2 denotes a squared operation. size:(n_frames, dim) | ||
| 74 | + normp = torch.sum(buffer, 1).add_(1e-10) # size:(n_frames) | ||
| 75 | + norm = torch.sqrt(normp) # size:(n_frames) | ||
| 76 | + _output = torch.div(input, norm.view(-1, 1).expand_as(input)) | ||
| 77 | + output = _output.view(input_size) | ||
| 78 | + # Multiply by alpha = 10 as suggested in https://arxiv.org/pdf/1703.09507.pdf | ||
| 79 | + output = output * alpha | ||
| 80 | + return output | ||
| 81 | + | ||
| 82 | +def perform_identification(use_cuda, model, embeddings, test_filename, test_frames, spk_list): | ||
| 83 | + test_embedding = get_embeddings(use_cuda, test_filename, model, test_frames) | ||
| 84 | + max_score = -10**8 | ||
| 85 | + best_spk = None | ||
| 86 | + for spk in spk_list: | ||
| 87 | + score = F.cosine_similarity(test_embedding, embeddings[spk]) | ||
| 88 | + score = score.data.cpu().numpy() | ||
| 89 | + if score > max_score: | ||
| 90 | + max_score = score | ||
| 91 | + best_spk = spk | ||
| 92 | + #print("Speaker identification result : %s" %best_spk) | ||
| 93 | + true_spk = test_filename.split('/')[-2].split('_')[0] | ||
| 94 | + print("\n=== Speaker identification ===") | ||
| 95 | + print("True speaker : %s\nPredicted speaker : %s\nResult : %s\n" %(true_spk, best_spk, true_spk==best_spk)) | ||
| 96 | + return best_spk | ||
| 97 | + | ||
| 98 | +def main(): | ||
| 99 | + | ||
| 100 | + log_dir = 'new_model4' # Where the checkpoints are saved | ||
| 101 | + embedding_dir = 'enroll_embeddings4' # Where embeddings are saved | ||
| 102 | + test_dir = 'feat_logfbank_nfilt40/test/' # Where test features are saved | ||
| 103 | + | ||
| 104 | + # Settings | ||
| 105 | + use_cuda = True # Use cuda or not | ||
| 106 | + embedding_size = 128 # Dimension of speaker embeddings | ||
| 107 | + cp_num = 25 # Which checkpoint to use? | ||
| 108 | + n_classes = 241 # How many speakers in training data? | ||
| 109 | + test_frames = 100 # Split the test utterance | ||
| 110 | + | ||
| 111 | + # Load model from checkpoint | ||
| 112 | + model = load_model(use_cuda, log_dir, cp_num, embedding_size, n_classes) | ||
| 113 | + | ||
| 114 | + # Get the dataframe for test DB | ||
| 115 | + enroll_DB, test_DB = split_enroll_and_test(c.TEST_FEAT_DIR) | ||
| 116 | + | ||
| 117 | + # Load enroll embeddings | ||
| 118 | + embeddings = load_enroll_embeddings(embedding_dir) | ||
| 119 | + | ||
| 120 | + """ Test speaker list | ||
| 121 | + '103F3021', '207F2088', '213F5100', '217F3038', '225M4062', | ||
| 122 | + '229M2031', '230M4087', '233F4013', '236M3043', '240M3063' | ||
| 123 | + """ | ||
| 124 | + | ||
| 125 | + spk_list = ['103F3021', '207F2088', '213F5100', '217F3038', '225M4062',\ | ||
| 126 | + '229M2031', '230M4087', '233F4013', '236M3043', '240M3063','777M7777','778M8777'] | ||
| 127 | + | ||
| 128 | + # Set the test speaker | ||
| 129 | + test_speaker = '207F2088' | ||
| 130 | + | ||
| 131 | + test_path = os.path.join(test_dir, test_speaker, 'test.p') | ||
| 132 | + | ||
| 133 | + # Perform the test | ||
| 134 | + best_spk = perform_identification(use_cuda, model, embeddings, test_path, test_frames, spk_list) | ||
| 135 | + | ||
| 136 | +if __name__ == '__main__': | ||
| 137 | + main() |
Speaker_Recognition/identification5.py
0 → 100644
| 1 | +import torch | ||
| 2 | +import torch.nn.functional as F | ||
| 3 | +from torch.autograd import Variable | ||
| 4 | + | ||
| 5 | +import pandas as pd | ||
| 6 | +import math | ||
| 7 | +import os | ||
| 8 | +import configure as c | ||
| 9 | + | ||
| 10 | +from DB_wav_reader import read_feats_structure | ||
| 11 | +from SR_Dataset import read_MFB, ToTensorTestInput | ||
| 12 | +from model.model5 import background_resnet | ||
| 13 | + | ||
| 14 | +def load_model(use_cuda, log_dir, cp_num, embedding_size, n_classes): | ||
| 15 | + model = background_resnet(embedding_size=embedding_size, num_classes=n_classes) | ||
| 16 | + if use_cuda: | ||
| 17 | + model.cuda() | ||
| 18 | + print('=> loading checkpoint') | ||
| 19 | + # original saved file with DataParallel | ||
| 20 | + checkpoint = torch.load(log_dir + '/checkpoint_' + str(cp_num) + '.pth') | ||
| 21 | + # create new OrderedDict that does not contain `module.` | ||
| 22 | + model.load_state_dict(checkpoint['state_dict']) | ||
| 23 | + model.eval() | ||
| 24 | + return model | ||
| 25 | + | ||
| 26 | +def split_enroll_and_test(dataroot_dir): | ||
| 27 | + DB_all = read_feats_structure(dataroot_dir) | ||
| 28 | + enroll_DB = pd.DataFrame() | ||
| 29 | + test_DB = pd.DataFrame() | ||
| 30 | + | ||
| 31 | + enroll_DB = DB_all[DB_all['filename'].str.contains('enroll.p')] | ||
| 32 | + test_DB = DB_all[DB_all['filename'].str.contains('test.p')] | ||
| 33 | + | ||
| 34 | + # Reset the index | ||
| 35 | + enroll_DB = enroll_DB.reset_index(drop=True) | ||
| 36 | + test_DB = test_DB.reset_index(drop=True) | ||
| 37 | + return enroll_DB, test_DB | ||
| 38 | + | ||
| 39 | +def load_enroll_embeddings(embedding_dir): | ||
| 40 | + embeddings = {} | ||
| 41 | + for f in os.listdir(embedding_dir): | ||
| 42 | + spk = f.replace('.pth','') | ||
| 43 | + # Select the speakers who are in the 'enroll_spk_list' | ||
| 44 | + embedding_path = os.path.join(embedding_dir, f) | ||
| 45 | + tmp_embeddings = torch.load(embedding_path) | ||
| 46 | + embeddings[spk] = tmp_embeddings | ||
| 47 | + | ||
| 48 | + return embeddings | ||
| 49 | + | ||
| 50 | +def get_embeddings(use_cuda, filename, model, test_frames): | ||
| 51 | + input, label = read_MFB(filename) # input size:(n_frames, n_dims) | ||
| 52 | + | ||
| 53 | + tot_segments = math.ceil(len(input)/test_frames) # total number of segments with 'test_frames' | ||
| 54 | + activation = 0 | ||
| 55 | + with torch.no_grad(): | ||
| 56 | + for i in range(tot_segments): | ||
| 57 | + temp_input = input[i*test_frames:i*test_frames+test_frames] | ||
| 58 | + | ||
| 59 | + TT = ToTensorTestInput() | ||
| 60 | + temp_input = TT(temp_input) # size:(1, 1, n_dims, n_frames) | ||
| 61 | + | ||
| 62 | + if use_cuda: | ||
| 63 | + temp_input = temp_input.cuda() | ||
| 64 | + temp_activation,_ = model(temp_input) | ||
| 65 | + activation += torch.sum(temp_activation, dim=0, keepdim=True) | ||
| 66 | + | ||
| 67 | + activation = l2_norm(activation, 1) | ||
| 68 | + | ||
| 69 | + return activation | ||
| 70 | + | ||
| 71 | +def l2_norm(input, alpha): | ||
| 72 | + input_size = input.size() # size:(n_frames, dim) | ||
| 73 | + buffer = torch.pow(input, 2) # 2 denotes a squared operation. size:(n_frames, dim) | ||
| 74 | + normp = torch.sum(buffer, 1).add_(1e-10) # size:(n_frames) | ||
| 75 | + norm = torch.sqrt(normp) # size:(n_frames) | ||
| 76 | + _output = torch.div(input, norm.view(-1, 1).expand_as(input)) | ||
| 77 | + output = _output.view(input_size) | ||
| 78 | + # Multiply by alpha = 10 as suggested in https://arxiv.org/pdf/1703.09507.pdf | ||
| 79 | + output = output * alpha | ||
| 80 | + return output | ||
| 81 | + | ||
| 82 | +def perform_identification(use_cuda, model, embeddings, test_filename, test_frames, spk_list): | ||
| 83 | + test_embedding = get_embeddings(use_cuda, test_filename, model, test_frames) | ||
| 84 | + max_score = -10**8 | ||
| 85 | + best_spk = None | ||
| 86 | + for spk in spk_list: | ||
| 87 | + score = F.cosine_similarity(test_embedding, embeddings[spk]) | ||
| 88 | + score = score.data.cpu().numpy() | ||
| 89 | + if score > max_score: | ||
| 90 | + max_score = score | ||
| 91 | + best_spk = spk | ||
| 92 | + #print("Speaker identification result : %s" %best_spk) | ||
| 93 | + true_spk = test_filename.split('/')[-2].split('_')[0] | ||
| 94 | + print("\n=== Speaker identification ===") | ||
| 95 | + print("True speaker : %s\nPredicted speaker : %s\nResult : %s\n" %(true_spk, best_spk, true_spk==best_spk)) | ||
| 96 | + return best_spk | ||
| 97 | + | ||
| 98 | +def main(): | ||
| 99 | + | ||
| 100 | + log_dir = 'new_model5' # Where the checkpoints are saved | ||
| 101 | + embedding_dir = 'enroll_embeddings5' # Where embeddings are saved | ||
| 102 | + test_dir = 'feat_logfbank_nfilt40/test/' # Where test features are saved | ||
| 103 | + | ||
| 104 | + # Settings | ||
| 105 | + use_cuda = True # Use cuda or not | ||
| 106 | + embedding_size = 128 # Dimension of speaker embeddings | ||
| 107 | + cp_num = 30 # Which checkpoint to use? | ||
| 108 | + n_classes = 241 # How many speakers in training data? | ||
| 109 | + test_frames = 100 # Split the test utterance | ||
| 110 | + | ||
| 111 | + # Load model from checkpoint | ||
| 112 | + model = load_model(use_cuda, log_dir, cp_num, embedding_size, n_classes) | ||
| 113 | + | ||
| 114 | + # Get the dataframe for test DB | ||
| 115 | + enroll_DB, test_DB = split_enroll_and_test(c.TEST_FEAT_DIR) | ||
| 116 | + | ||
| 117 | + # Load enroll embeddings | ||
| 118 | + embeddings = load_enroll_embeddings(embedding_dir) | ||
| 119 | + | ||
| 120 | + """ Test speaker list | ||
| 121 | + '103F3021', '207F2088', '213F5100', '217F3038', '225M4062', | ||
| 122 | + '229M2031', '230M4087', '233F4013', '236M3043', '240M3063' | ||
| 123 | + """ | ||
| 124 | + | ||
| 125 | + spk_list = ['103F3021', '207F2088', '213F5100', '217F3038', '225M4062',\ | ||
| 126 | + '229M2031', '230M4087', '233F4013', '236M3043', '240M3063','777M7777','778M8777'] | ||
| 127 | + | ||
| 128 | + # Set the test speaker | ||
| 129 | + test_speaker = '207F2088' | ||
| 130 | + | ||
| 131 | + test_path = os.path.join(test_dir, test_speaker, 'test.p') | ||
| 132 | + | ||
| 133 | + # Perform the test | ||
| 134 | + best_spk = perform_identification(use_cuda, model, embeddings, test_path, test_frames, spk_list) | ||
| 135 | + | ||
| 136 | +if __name__ == '__main__': | ||
| 137 | + main() |
Speaker_Recognition/model/model3.py
0 → 100644
| 1 | +import torch | ||
| 2 | +import torch.nn as nn | ||
| 3 | +import torch.nn.functional as F | ||
| 4 | +from torch.autograd import Function | ||
| 5 | +import model.resnet1 as resnet | ||
| 6 | + | ||
| 7 | + | ||
| 8 | +class background_resnet(nn.Module): | ||
| 9 | + def __init__(self, embedding_size, num_classes, backbone='resnet18'): | ||
| 10 | + super(background_resnet, self).__init__() | ||
| 11 | + self.backbone = backbone | ||
| 12 | + # copying modules from pretrained models | ||
| 13 | + if backbone == 'resnet50': | ||
| 14 | + self.pretrained = resnet.resnet50(pretrained=False) | ||
| 15 | + elif backbone == 'resnet101': | ||
| 16 | + self.pretrained = resnet.resnet101(pretrained=False) | ||
| 17 | + elif backbone == 'resnet152': | ||
| 18 | + self.pretrained = resnet.resnet152(pretrained=False) | ||
| 19 | + elif backbone == 'resnet18': | ||
| 20 | + self.pretrained = resnet.resnet18(pretrained=False) | ||
| 21 | + elif backbone == 'resnet34': | ||
| 22 | + self.pretrained = resnet.resnet34(pretrained=False) | ||
| 23 | + else: | ||
| 24 | + raise RuntimeError('unknown backbone: {}'.format(backbone)) | ||
| 25 | + | ||
| 26 | + self.fc0 = nn.Linear(256, embedding_size) | ||
| 27 | + self.bn0 = nn.BatchNorm1d(embedding_size) | ||
| 28 | + self.relu = nn.ReLU() | ||
| 29 | + self.last = nn.Linear(embedding_size, num_classes) | ||
| 30 | + | ||
| 31 | + def forward(self, x): | ||
| 32 | + # input x: minibatch x 1 x 40 x 40 | ||
| 33 | + x = self.pretrained.conv1(x) | ||
| 34 | + x = self.pretrained.bn1(x) | ||
| 35 | + x = self.pretrained.relu(x) | ||
| 36 | + x = self.pretrained.layer1(x) | ||
| 37 | + x = self.pretrained.layer2(x) | ||
| 38 | + x = self.pretrained.layer3(x) | ||
| 39 | + x = self.pretrained.layer4(x) | ||
| 40 | + x = self.pretrained.layer5(x) | ||
| 41 | + | ||
| 42 | + out = F.adaptive_avg_pool2d(x,1) # [batch, 128, 1, 1] | ||
| 43 | + out = torch.squeeze(out) # [batch, n_embed] | ||
| 44 | + # flatten the out so that the fully connected layer can be connected from here | ||
| 45 | + out = out.view(x.size(0), -1) # (n_batch, n_embed) | ||
| 46 | + spk_embedding = self.fc0(out) | ||
| 47 | + out = F.relu(self.bn0(spk_embedding)) # [batch, n_embed] | ||
| 48 | + out = self.last(out) | ||
| 49 | + | ||
| 50 | + return spk_embedding, out | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
Speaker_Recognition/model/model4.py
0 → 100644
| 1 | +import torch | ||
| 2 | +import torch.nn as nn | ||
| 3 | +import torch.nn.functional as F | ||
| 4 | +from torch.autograd import Function | ||
| 5 | +import model.resnet1 as resnet | ||
| 6 | + | ||
| 7 | + | ||
| 8 | +class background_resnet(nn.Module): | ||
| 9 | + def __init__(self, embedding_size, num_classes, backbone='resnet34'): | ||
| 10 | + super(background_resnet, self).__init__() | ||
| 11 | + self.backbone = backbone | ||
| 12 | + # copying modules from pretrained models | ||
| 13 | + if backbone == 'resnet50': | ||
| 14 | + self.pretrained = resnet.resnet50(pretrained=False) | ||
| 15 | + elif backbone == 'resnet101': | ||
| 16 | + self.pretrained = resnet.resnet101(pretrained=False) | ||
| 17 | + elif backbone == 'resnet152': | ||
| 18 | + self.pretrained = resnet.resnet152(pretrained=False) | ||
| 19 | + elif backbone == 'resnet18': | ||
| 20 | + self.pretrained = resnet.resnet18(pretrained=False) | ||
| 21 | + elif backbone == 'resnet34': | ||
| 22 | + self.pretrained = resnet.resnet34(pretrained=False) | ||
| 23 | + else: | ||
| 24 | + raise RuntimeError('unknown backbone: {}'.format(backbone)) | ||
| 25 | + | ||
| 26 | + self.fc0 = nn.Linear(256, embedding_size) # 512 등으로 바꿀 경우 resnet 사용할 수 있음 | ||
| 27 | + self.bn0 = nn.BatchNorm1d(embedding_size) | ||
| 28 | + self.relu = nn.ReLU() | ||
| 29 | + self.last = nn.Linear(embedding_size, num_classes) | ||
| 30 | + | ||
| 31 | + def forward(self, x): | ||
| 32 | + # input x: minibatch x 1 x 40 x 40 | ||
| 33 | + x = self.pretrained.conv1(x) | ||
| 34 | + x = self.pretrained.bn1(x) | ||
| 35 | + x = self.pretrained.relu(x) | ||
| 36 | + x = self.pretrained.layer1(x) | ||
| 37 | + x = self.pretrained.layer2(x) | ||
| 38 | + x = self.pretrained.layer3(x) | ||
| 39 | + x = self.pretrained.layer4(x) | ||
| 40 | + x = self.pretrained.layer5(x) | ||
| 41 | + | ||
| 42 | + out = F.adaptive_avg_pool2d(x,1) # [batch, 128, 1, 1] | ||
| 43 | + out = torch.squeeze(out) # [batch, n_embed] | ||
| 44 | + # flatten the out so that the fully connected layer can be connected from here | ||
| 45 | + out = out.view(x.size(0), -1) # (n_batch, n_embed) | ||
| 46 | + spk_embedding = self.fc0(out) | ||
| 47 | + out = F.relu(self.bn0(spk_embedding)) # [batch, n_embed] | ||
| 48 | + out = self.last(out) | ||
| 49 | + | ||
| 50 | + return spk_embedding, out | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
Speaker_Recognition/model/model5.py
0 → 100644
| 1 | +import torch | ||
| 2 | +import torch.nn as nn | ||
| 3 | +import torch.nn.functional as F | ||
| 4 | +from torch.autograd import Function | ||
| 5 | +import model.resnet1 as resnet | ||
| 6 | + | ||
| 7 | + | ||
| 8 | +class background_resnet(nn.Module): | ||
| 9 | + def __init__(self, embedding_size, num_classes, backbone='resnet50'): | ||
| 10 | + super(background_resnet, self).__init__() | ||
| 11 | + self.backbone = backbone | ||
| 12 | + # copying modules from pretrained models | ||
| 13 | + if backbone == 'resnet50': | ||
| 14 | + self.pretrained = resnet.resnet50(pretrained=False) | ||
| 15 | + elif backbone == 'resnet101': | ||
| 16 | + self.pretrained = resnet.resnet101(pretrained=False) | ||
| 17 | + elif backbone == 'resnet152': | ||
| 18 | + self.pretrained = resnet.resnet152(pretrained=False) | ||
| 19 | + elif backbone == 'resnet18': | ||
| 20 | + self.pretrained = resnet.resnet18(pretrained=False) | ||
| 21 | + elif backbone == 'resnet34': | ||
| 22 | + self.pretrained = resnet.resnet34(pretrained=False) | ||
| 23 | + else: | ||
| 24 | + raise RuntimeError('unknown backbone: {}'.format(backbone)) | ||
| 25 | + | ||
| 26 | + self.fc0 = nn.Linear(512, embedding_size) # 512 등으로 바꿀 경우 resnet 사용할 수 있음 | ||
| 27 | + self.bn0 = nn.BatchNorm1d(embedding_size) | ||
| 28 | + self.relu = nn.ReLU() | ||
| 29 | + self.last = nn.Linear(embedding_size, num_classes) | ||
| 30 | + | ||
| 31 | + def forward(self, x): | ||
| 32 | + # input x: minibatch x 1 x 40 x 40 | ||
| 33 | + x = self.pretrained.conv1(x) | ||
| 34 | + x = self.pretrained.bn1(x) | ||
| 35 | + x = self.pretrained.relu(x) | ||
| 36 | + x = self.pretrained.layer1(x) | ||
| 37 | + x = self.pretrained.layer2(x) | ||
| 38 | + x = self.pretrained.layer3(x) | ||
| 39 | + x = self.pretrained.layer4(x) | ||
| 40 | + | ||
| 41 | + out = F.adaptive_avg_pool2d(x,1) # [batch, 128, 1, 1] | ||
| 42 | + out = torch.squeeze(out) # [batch, n_embed] | ||
| 43 | + # flatten the out so that the fully connected layer can be connected from here | ||
| 44 | + out = out.view(x.size(0), -1) # (n_batch, n_embed) | ||
| 45 | + spk_embedding = self.fc0(out) | ||
| 46 | + out = F.relu(self.bn0(spk_embedding)) # [batch, n_embed] | ||
| 47 | + out = self.last(out) | ||
| 48 | + | ||
| 49 | + return spk_embedding, out | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
Speaker_Recognition/train3.py
0 → 100644
This diff is collapsed. Click to expand it.
Speaker_Recognition/train4.py
0 → 100644
This diff is collapsed. Click to expand it.
Speaker_Recognition/train4_merge.py
0 → 100644
This diff is collapsed. Click to expand it.
Speaker_Recognition/train4_zeroth.py
0 → 100644
This diff is collapsed. Click to expand it.
Speaker_Recognition/train5.py
0 → 100644
This diff is collapsed. Click to expand it.
Speaker_Recognition/verification3.py
0 → 100644
| 1 | +import torch | ||
| 2 | +import torch.nn.functional as F | ||
| 3 | +from torch.autograd import Variable | ||
| 4 | + | ||
| 5 | +import pandas as pd | ||
| 6 | +import math | ||
| 7 | +import os | ||
| 8 | +import configure as c | ||
| 9 | + | ||
| 10 | +from DB_wav_reader import read_feats_structure | ||
| 11 | +from SR_Dataset import read_MFB, ToTensorTestInput | ||
| 12 | +from model.model3 import background_resnet | ||
| 13 | + | ||
| 14 | +def load_model(use_cuda, log_dir, cp_num, embedding_size, n_classes): | ||
| 15 | + model = background_resnet(embedding_size=embedding_size, num_classes=n_classes) | ||
| 16 | + if use_cuda: | ||
| 17 | + model.cuda() | ||
| 18 | + print('=> loading checkpoint') | ||
| 19 | + # original saved file with DataParallel | ||
| 20 | + checkpoint = torch.load(log_dir + '/checkpoint_' + str(cp_num) + '.pth') | ||
| 21 | + # create new OrderedDict that does not contain `module.` | ||
| 22 | + model.load_state_dict(checkpoint['state_dict']) | ||
| 23 | + model.eval() | ||
| 24 | + return model | ||
| 25 | + | ||
| 26 | +def split_enroll_and_test(dataroot_dir): | ||
| 27 | + DB_all = read_feats_structure(dataroot_dir) | ||
| 28 | + enroll_DB = pd.DataFrame() | ||
| 29 | + test_DB = pd.DataFrame() | ||
| 30 | + | ||
| 31 | + enroll_DB = DB_all[DB_all['filename'].str.contains('enroll.p')] | ||
| 32 | + test_DB = DB_all[DB_all['filename'].str.contains('test.p')] | ||
| 33 | + | ||
| 34 | + # Reset the index | ||
| 35 | + enroll_DB = enroll_DB.reset_index(drop=True) | ||
| 36 | + test_DB = test_DB.reset_index(drop=True) | ||
| 37 | + return enroll_DB, test_DB | ||
| 38 | + | ||
| 39 | +def load_enroll_embeddings(embedding_dir): | ||
| 40 | + embeddings = {} | ||
| 41 | + for f in os.listdir(embedding_dir): | ||
| 42 | + spk = f.replace('.pth','') | ||
| 43 | + # Select the speakers who are in the 'enroll_spk_list' | ||
| 44 | + embedding_path = os.path.join(embedding_dir, f) | ||
| 45 | + tmp_embeddings = torch.load(embedding_path) | ||
| 46 | + embeddings[spk] = tmp_embeddings | ||
| 47 | + | ||
| 48 | + return embeddings | ||
| 49 | + | ||
| 50 | +def get_embeddings(use_cuda, filename, model, test_frames): | ||
| 51 | + input, label = read_MFB(filename) # input size:(n_frames, n_dims) | ||
| 52 | + | ||
| 53 | + tot_segments = math.ceil(len(input)/test_frames) # total number of segments with 'test_frames' | ||
| 54 | + activation = 0 | ||
| 55 | + with torch.no_grad(): | ||
| 56 | + for i in range(tot_segments): | ||
| 57 | + temp_input = input[i*test_frames:i*test_frames+test_frames] | ||
| 58 | + | ||
| 59 | + TT = ToTensorTestInput() | ||
| 60 | + temp_input = TT(temp_input) # size:(1, 1, n_dims, n_frames) | ||
| 61 | + | ||
| 62 | + if use_cuda: | ||
| 63 | + temp_input = temp_input.cuda() | ||
| 64 | + temp_activation,_ = model(temp_input) | ||
| 65 | + activation += torch.sum(temp_activation, dim=0, keepdim=True) | ||
| 66 | + | ||
| 67 | + activation = l2_norm(activation, 1) | ||
| 68 | + | ||
| 69 | + return activation | ||
| 70 | + | ||
| 71 | +def l2_norm(input, alpha): | ||
| 72 | + input_size = input.size() # size:(n_frames, dim) | ||
| 73 | + buffer = torch.pow(input, 2) # 2 denotes a squared operation. size:(n_frames, dim) | ||
| 74 | + normp = torch.sum(buffer, 1).add_(1e-10) # size:(n_frames) | ||
| 75 | + norm = torch.sqrt(normp) # size:(n_frames) | ||
| 76 | + _output = torch.div(input, norm.view(-1, 1).expand_as(input)) | ||
| 77 | + output = _output.view(input_size) | ||
| 78 | + # Multiply by alpha = 10 as suggested in https://arxiv.org/pdf/1703.09507.pdf | ||
| 79 | + output = output * alpha | ||
| 80 | + return output | ||
| 81 | + | ||
| 82 | +def perform_verification(use_cuda, model, embeddings, enroll_speaker, test_filename, test_frames, thres): | ||
| 83 | + enroll_embedding = embeddings[enroll_speaker] | ||
| 84 | + test_embedding = get_embeddings(use_cuda, test_filename, model, test_frames) | ||
| 85 | + | ||
| 86 | + score = F.cosine_similarity(test_embedding, enroll_embedding) | ||
| 87 | + score = score.data.cpu().numpy() | ||
| 88 | + | ||
| 89 | + if score > thres: | ||
| 90 | + result = 'Accept' | ||
| 91 | + else: | ||
| 92 | + result = 'Reject' | ||
| 93 | + | ||
| 94 | + test_spk = test_filename.split('/')[-2].split('_')[0] | ||
| 95 | + print("\n=== Speaker verification ===") | ||
| 96 | + print("True speaker: %s\nClaimed speaker : %s\n\nResult : %s\n" %(enroll_speaker, test_spk, result)) | ||
| 97 | + print("Score : %0.4f\nThreshold : %0.2f\n" %(score, thres)) | ||
| 98 | + | ||
| 99 | +def main(): | ||
| 100 | + | ||
| 101 | + log_dir = 'new_model3' # Where the checkpoints are saved | ||
| 102 | + embedding_dir = 'enroll_embeddings3' # Where embeddings are saved | ||
| 103 | + test_dir = 'feat_logfbank_nfilt40/test/' # Where test features are saved | ||
| 104 | + | ||
| 105 | + # Settings | ||
| 106 | + use_cuda = True # Use cuda or not | ||
| 107 | + embedding_size = 128 # Dimension of speaker embeddings | ||
| 108 | + cp_num = 11 # Which checkpoint to use? | ||
| 109 | + n_classes = 241 # How many speakers in training data? | ||
| 110 | + test_frames = 100 # Split the test utterance | ||
| 111 | + | ||
| 112 | + # Load model from checkpoint | ||
| 113 | + model = load_model(use_cuda, log_dir, cp_num, embedding_size, n_classes) | ||
| 114 | + | ||
| 115 | + # Get the dataframe for test DB | ||
| 116 | + enroll_DB, test_DB = split_enroll_and_test(c.TEST_FEAT_DIR) | ||
| 117 | + | ||
| 118 | + # Load enroll embeddings | ||
| 119 | + embeddings = load_enroll_embeddings(embedding_dir) | ||
| 120 | + | ||
| 121 | + """ Test speaker list | ||
| 122 | + '103F3021', '207F2088', '213F5100', '217F3038', '225M4062', | ||
| 123 | + '229M2031', '230M4087', '233F4013', '236M3043', '240M3063' | ||
| 124 | + """ | ||
| 125 | + | ||
| 126 | + # Set the true speaker | ||
| 127 | + enroll_speaker = '103F3021' | ||
| 128 | + | ||
| 129 | + # Set the claimed speaker | ||
| 130 | + test_speaker = '207F2088' | ||
| 131 | + | ||
| 132 | + # Threshold | ||
| 133 | + thres = 0.95 | ||
| 134 | + | ||
| 135 | + test_path = os.path.join(test_dir, test_speaker, 'test.p') | ||
| 136 | + | ||
| 137 | + # Perform the test | ||
| 138 | + perform_verification(use_cuda, model, embeddings, enroll_speaker, test_path, test_frames, thres) | ||
| 139 | + | ||
| 140 | +if __name__ == '__main__': | ||
| 141 | + main() |
Speaker_Recognition/verification4.py
0 → 100644
| 1 | +import torch | ||
| 2 | +import torch.nn.functional as F | ||
| 3 | +from torch.autograd import Variable | ||
| 4 | + | ||
| 5 | +import pandas as pd | ||
| 6 | +import math | ||
| 7 | +import os | ||
| 8 | +import configure as c | ||
| 9 | + | ||
| 10 | +from DB_wav_reader import read_feats_structure | ||
| 11 | +from SR_Dataset import read_MFB, ToTensorTestInput | ||
| 12 | +from model.model4 import background_resnet | ||
| 13 | + | ||
| 14 | +def load_model(use_cuda, log_dir, cp_num, embedding_size, n_classes): | ||
| 15 | + model = background_resnet(embedding_size=embedding_size, num_classes=n_classes) | ||
| 16 | + if use_cuda: | ||
| 17 | + model.cuda() | ||
| 18 | + print('=> loading checkpoint') | ||
| 19 | + # original saved file with DataParallel | ||
| 20 | + checkpoint = torch.load(log_dir + '/checkpoint_' + str(cp_num) + '.pth') | ||
| 21 | + # create new OrderedDict that does not contain `module.` | ||
| 22 | + model.load_state_dict(checkpoint['state_dict']) | ||
| 23 | + model.eval() | ||
| 24 | + return model | ||
| 25 | + | ||
| 26 | +def split_enroll_and_test(dataroot_dir): | ||
| 27 | + DB_all = read_feats_structure(dataroot_dir) | ||
| 28 | + enroll_DB = pd.DataFrame() | ||
| 29 | + test_DB = pd.DataFrame() | ||
| 30 | + | ||
| 31 | + enroll_DB = DB_all[DB_all['filename'].str.contains('enroll.p')] | ||
| 32 | + test_DB = DB_all[DB_all['filename'].str.contains('test.p')] | ||
| 33 | + | ||
| 34 | + # Reset the index | ||
| 35 | + enroll_DB = enroll_DB.reset_index(drop=True) | ||
| 36 | + test_DB = test_DB.reset_index(drop=True) | ||
| 37 | + return enroll_DB, test_DB | ||
| 38 | + | ||
| 39 | +def load_enroll_embeddings(embedding_dir): | ||
| 40 | + embeddings = {} | ||
| 41 | + for f in os.listdir(embedding_dir): | ||
| 42 | + spk = f.replace('.pth','') | ||
| 43 | + # Select the speakers who are in the 'enroll_spk_list' | ||
| 44 | + embedding_path = os.path.join(embedding_dir, f) | ||
| 45 | + tmp_embeddings = torch.load(embedding_path) | ||
| 46 | + embeddings[spk] = tmp_embeddings | ||
| 47 | + | ||
| 48 | + return embeddings | ||
| 49 | + | ||
| 50 | +def get_embeddings(use_cuda, filename, model, test_frames): | ||
| 51 | + input, label = read_MFB(filename) # input size:(n_frames, n_dims) | ||
| 52 | + | ||
| 53 | + tot_segments = math.ceil(len(input)/test_frames) # total number of segments with 'test_frames' | ||
| 54 | + activation = 0 | ||
| 55 | + with torch.no_grad(): | ||
| 56 | + for i in range(tot_segments): | ||
| 57 | + temp_input = input[i*test_frames:i*test_frames+test_frames] | ||
| 58 | + | ||
| 59 | + TT = ToTensorTestInput() | ||
| 60 | + temp_input = TT(temp_input) # size:(1, 1, n_dims, n_frames) | ||
| 61 | + | ||
| 62 | + if use_cuda: | ||
| 63 | + temp_input = temp_input.cuda() | ||
| 64 | + temp_activation,_ = model(temp_input) | ||
| 65 | + activation += torch.sum(temp_activation, dim=0, keepdim=True) | ||
| 66 | + | ||
| 67 | + activation = l2_norm(activation, 1) | ||
| 68 | + | ||
| 69 | + return activation | ||
| 70 | + | ||
| 71 | +def l2_norm(input, alpha): | ||
| 72 | + input_size = input.size() # size:(n_frames, dim) | ||
| 73 | + buffer = torch.pow(input, 2) # 2 denotes a squared operation. size:(n_frames, dim) | ||
| 74 | + normp = torch.sum(buffer, 1).add_(1e-10) # size:(n_frames) | ||
| 75 | + norm = torch.sqrt(normp) # size:(n_frames) | ||
| 76 | + _output = torch.div(input, norm.view(-1, 1).expand_as(input)) | ||
| 77 | + output = _output.view(input_size) | ||
| 78 | + # Multiply by alpha = 10 as suggested in https://arxiv.org/pdf/1703.09507.pdf | ||
| 79 | + output = output * alpha | ||
| 80 | + return output | ||
| 81 | + | ||
| 82 | +def perform_verification(use_cuda, model, embeddings, enroll_speaker, test_filename, test_frames, thres): | ||
| 83 | + enroll_embedding = embeddings[enroll_speaker] | ||
| 84 | + test_embedding = get_embeddings(use_cuda, test_filename, model, test_frames) | ||
| 85 | + | ||
| 86 | + score = F.cosine_similarity(test_embedding, enroll_embedding) | ||
| 87 | + score = score.data.cpu().numpy() | ||
| 88 | + | ||
| 89 | + if score > thres: | ||
| 90 | + result = 'Accept' | ||
| 91 | + else: | ||
| 92 | + result = 'Reject' | ||
| 93 | + | ||
| 94 | + test_spk = test_filename.split('/')[-2].split('_')[0] | ||
| 95 | + print("\n=== Speaker verification ===") | ||
| 96 | + print("True speaker: %s\nClaimed speaker : %s\n\nResult : %s\n" %(enroll_speaker, test_spk, result)) | ||
| 97 | + print("Score : %0.4f\nThreshold : %0.2f\n" %(score, thres)) | ||
| 98 | + | ||
| 99 | +def main(): | ||
| 100 | + | ||
| 101 | + log_dir = 'new_model4' # Where the checkpoints are saved | ||
| 102 | + embedding_dir = 'enroll_embeddings4' # Where embeddings are saved | ||
| 103 | + test_dir = 'feat_logfbank_nfilt40/test/' # Where test features are saved | ||
| 104 | + | ||
| 105 | + # Settings | ||
| 106 | + use_cuda = True # Use cuda or not | ||
| 107 | + embedding_size = 128 # Dimension of speaker embeddings | ||
| 108 | + cp_num = 25 # Which checkpoint to use? | ||
| 109 | + n_classes = 241 # How many speakers in training data? | ||
| 110 | + test_frames = 100 # Split the test utterance | ||
| 111 | + | ||
| 112 | + # Load model from checkpoint | ||
| 113 | + model = load_model(use_cuda, log_dir, cp_num, embedding_size, n_classes) | ||
| 114 | + | ||
| 115 | + # Get the dataframe for test DB | ||
| 116 | + enroll_DB, test_DB = split_enroll_and_test(c.TEST_FEAT_DIR) | ||
| 117 | + | ||
| 118 | + # Load enroll embeddings | ||
| 119 | + embeddings = load_enroll_embeddings(embedding_dir) | ||
| 120 | + | ||
| 121 | + """ Test speaker list | ||
| 122 | + '103F3021', '207F2088', '213F5100', '217F3038', '225M4062', | ||
| 123 | + '229M2031', '230M4087', '233F4013', '236M3043', '240M3063' | ||
| 124 | + """ | ||
| 125 | + | ||
| 126 | + # Set the true speaker | ||
| 127 | + enroll_speaker = '229M2031' | ||
| 128 | + | ||
| 129 | + # Set the claimed speaker | ||
| 130 | + test_speaker = 'sunghwan1' | ||
| 131 | + | ||
| 132 | + # Threshold | ||
| 133 | + thres = 0.95 | ||
| 134 | + | ||
| 135 | + test_path = os.path.join(test_dir, test_speaker, 'test.p') | ||
| 136 | + | ||
| 137 | + # Perform the test | ||
| 138 | + perform_verification(use_cuda, model, embeddings, enroll_speaker, test_path, test_frames, thres) | ||
| 139 | + | ||
| 140 | +if __name__ == '__main__': | ||
| 141 | + main() |
Speaker_Recognition/verification4_merge.py
0 → 100644
| 1 | +import torch | ||
| 2 | +import torch.nn.functional as F | ||
| 3 | +from torch.autograd import Variable | ||
| 4 | + | ||
| 5 | +import pandas as pd | ||
| 6 | +import math | ||
| 7 | +import os | ||
| 8 | +import configure as c | ||
| 9 | + | ||
| 10 | +from DB_wav_reader import read_feats_structure | ||
| 11 | +from SR_Dataset import read_MFB, ToTensorTestInput | ||
| 12 | +from model.model4 import background_resnet | ||
| 13 | + | ||
| 14 | +def load_model(use_cuda, log_dir, cp_num, embedding_size, n_classes): | ||
| 15 | + model = background_resnet(embedding_size=embedding_size, num_classes=n_classes) | ||
| 16 | + if use_cuda: | ||
| 17 | + model.cuda() | ||
| 18 | + print('=> loading checkpoint') | ||
| 19 | + # original saved file with DataParallel | ||
| 20 | + checkpoint = torch.load(log_dir + '/checkpoint_' + str(cp_num) + '.pth') | ||
| 21 | + # create new OrderedDict that does not contain `module.` | ||
| 22 | + model.load_state_dict(checkpoint['state_dict']) | ||
| 23 | + model.eval() | ||
| 24 | + return model | ||
| 25 | + | ||
| 26 | +def split_enroll_and_test(dataroot_dir): | ||
| 27 | + DB_all = read_feats_structure(dataroot_dir) | ||
| 28 | + enroll_DB = pd.DataFrame() | ||
| 29 | + test_DB = pd.DataFrame() | ||
| 30 | + | ||
| 31 | + enroll_DB = DB_all[DB_all['filename'].str.contains('enroll.p')] | ||
| 32 | + test_DB = DB_all[DB_all['filename'].str.contains('test.p')] | ||
| 33 | + | ||
| 34 | + # Reset the index | ||
| 35 | + enroll_DB = enroll_DB.reset_index(drop=True) | ||
| 36 | + test_DB = test_DB.reset_index(drop=True) | ||
| 37 | + return enroll_DB, test_DB | ||
| 38 | + | ||
| 39 | +def load_enroll_embeddings(embedding_dir): | ||
| 40 | + embeddings = {} | ||
| 41 | + for f in os.listdir(embedding_dir): | ||
| 42 | + spk = f.replace('.pth','') | ||
| 43 | + # Select the speakers who are in the 'enroll_spk_list' | ||
| 44 | + embedding_path = os.path.join(embedding_dir, f) | ||
| 45 | + tmp_embeddings = torch.load(embedding_path) | ||
| 46 | + embeddings[spk] = tmp_embeddings | ||
| 47 | + | ||
| 48 | + return embeddings | ||
| 49 | + | ||
| 50 | +def get_embeddings(use_cuda, filename, model, test_frames): | ||
| 51 | + input, label = read_MFB(filename) # input size:(n_frames, n_dims) | ||
| 52 | + | ||
| 53 | + tot_segments = math.ceil(len(input)/test_frames) # total number of segments with 'test_frames' | ||
| 54 | + activation = 0 | ||
| 55 | + with torch.no_grad(): | ||
| 56 | + for i in range(tot_segments): | ||
| 57 | + temp_input = input[i*test_frames:i*test_frames+test_frames] | ||
| 58 | + | ||
| 59 | + TT = ToTensorTestInput() | ||
| 60 | + temp_input = TT(temp_input) # size:(1, 1, n_dims, n_frames) | ||
| 61 | + | ||
| 62 | + if use_cuda: | ||
| 63 | + temp_input = temp_input.cuda() | ||
| 64 | + temp_activation,_ = model(temp_input) | ||
| 65 | + activation += torch.sum(temp_activation, dim=0, keepdim=True) | ||
| 66 | + | ||
| 67 | + activation = l2_norm(activation, 1) | ||
| 68 | + | ||
| 69 | + return activation | ||
| 70 | + | ||
| 71 | +def l2_norm(input, alpha): | ||
| 72 | + input_size = input.size() # size:(n_frames, dim) | ||
| 73 | + buffer = torch.pow(input, 2) # 2 denotes a squared operation. size:(n_frames, dim) | ||
| 74 | + normp = torch.sum(buffer, 1).add_(1e-10) # size:(n_frames) | ||
| 75 | + norm = torch.sqrt(normp) # size:(n_frames) | ||
| 76 | + _output = torch.div(input, norm.view(-1, 1).expand_as(input)) | ||
| 77 | + output = _output.view(input_size) | ||
| 78 | + # Multiply by alpha = 10 as suggested in https://arxiv.org/pdf/1703.09507.pdf | ||
| 79 | + output = output * alpha | ||
| 80 | + return output | ||
| 81 | + | ||
| 82 | +def perform_verification(use_cuda, model, embeddings, enroll_speaker, test_filename, test_frames, thres): | ||
| 83 | + enroll_embedding = embeddings[enroll_speaker] | ||
| 84 | + test_embedding = get_embeddings(use_cuda, test_filename, model, test_frames) | ||
| 85 | + | ||
| 86 | + score = F.cosine_similarity(test_embedding, enroll_embedding) | ||
| 87 | + score = score.data.cpu().numpy() | ||
| 88 | + | ||
| 89 | + if score > thres: | ||
| 90 | + result = 'Accept' | ||
| 91 | + else: | ||
| 92 | + result = 'Reject' | ||
| 93 | + | ||
| 94 | + test_spk = test_filename.split('/')[-2].split('_')[0] | ||
| 95 | + print("\n=== Speaker verification ===") | ||
| 96 | + print("True speaker: %s\nClaimed speaker : %s\n\nResult : %s\n" %(enroll_speaker, test_spk, result)) | ||
| 97 | + print("Score : %0.4f\nThreshold : %0.2f\n" %(score, thres)) | ||
| 98 | + | ||
| 99 | +def main(): | ||
| 100 | + | ||
| 101 | + log_dir = 'new_model4_merge' # Where the checkpoints are saved | ||
| 102 | + embedding_dir = 'enroll_embeddings4_merge' # Where embeddings are saved | ||
| 103 | + test_dir = 'feat_logfbank_nfilt40/test/' # Where test features are saved | ||
| 104 | + | ||
| 105 | + # Settings | ||
| 106 | + use_cuda = True # Use cuda or not | ||
| 107 | + embedding_size = 128 # Dimension of speaker embeddings | ||
| 108 | + cp_num = 50 # Which checkpoint to use? | ||
| 109 | + n_classes = 348 # How many speakers in training data? | ||
| 110 | + test_frames = 100 # Split the test utterance | ||
| 111 | + | ||
| 112 | + # Load model from checkpoint | ||
| 113 | + model = load_model(use_cuda, log_dir, cp_num, embedding_size, n_classes) | ||
| 114 | + | ||
| 115 | + # Get the dataframe for test DB | ||
| 116 | + enroll_DB, test_DB = split_enroll_and_test(c.TEST_FEAT_DIR) | ||
| 117 | + | ||
| 118 | + # Load enroll embeddings | ||
| 119 | + embeddings = load_enroll_embeddings(embedding_dir) | ||
| 120 | + | ||
| 121 | + """ Test speaker list | ||
| 122 | + '103F3021', '207F2088', '213F5100', '217F3038', '225M4062', | ||
| 123 | + '229M2031', '230M4087', '233F4013', '236M3043', '240M3063' | ||
| 124 | + """ | ||
| 125 | + | ||
| 126 | + # Set the true speaker | ||
| 127 | + enroll_speaker = '213F5100' | ||
| 128 | + | ||
| 129 | + # Set the claimed speaker | ||
| 130 | + test_speaker = '207F2088' | ||
| 131 | + | ||
| 132 | + # Threshold | ||
| 133 | + thres = 0.95 | ||
| 134 | + | ||
| 135 | + test_path = os.path.join(test_dir, test_speaker, 'test.p') | ||
| 136 | + | ||
| 137 | + # Perform the test | ||
| 138 | + perform_verification(use_cuda, model, embeddings, enroll_speaker, test_path, test_frames, thres) | ||
| 139 | + | ||
| 140 | +if __name__ == '__main__': | ||
| 141 | + main() |
Speaker_Recognition/verification4_zeroth.py
0 → 100644
| 1 | +import torch | ||
| 2 | +import torch.nn.functional as F | ||
| 3 | +from torch.autograd import Variable | ||
| 4 | + | ||
| 5 | +import pandas as pd | ||
| 6 | +import math | ||
| 7 | +import os | ||
| 8 | +import configure as c | ||
| 9 | + | ||
| 10 | +from DB_wav_reader import read_feats_structure | ||
| 11 | +from SR_Dataset import read_MFB, ToTensorTestInput | ||
| 12 | +from model.model4 import background_resnet | ||
| 13 | + | ||
| 14 | +def load_model(use_cuda, log_dir, cp_num, embedding_size, n_classes): | ||
| 15 | + model = background_resnet(embedding_size=embedding_size, num_classes=n_classes) | ||
| 16 | + if use_cuda: | ||
| 17 | + model.cuda() | ||
| 18 | + print('=> loading checkpoint') | ||
| 19 | + # original saved file with DataParallel | ||
| 20 | + checkpoint = torch.load(log_dir + '/checkpoint_' + str(cp_num) + '.pth') | ||
| 21 | + # create new OrderedDict that does not contain `module.` | ||
| 22 | + model.load_state_dict(checkpoint['state_dict']) | ||
| 23 | + model.eval() | ||
| 24 | + return model | ||
| 25 | + | ||
| 26 | +def split_enroll_and_test(dataroot_dir): | ||
| 27 | + DB_all = read_feats_structure(dataroot_dir) | ||
| 28 | + enroll_DB = pd.DataFrame() | ||
| 29 | + test_DB = pd.DataFrame() | ||
| 30 | + | ||
| 31 | + enroll_DB = DB_all[DB_all['filename'].str.contains('enroll.p')] | ||
| 32 | + test_DB = DB_all[DB_all['filename'].str.contains('test.p')] | ||
| 33 | + | ||
| 34 | + # Reset the index | ||
| 35 | + enroll_DB = enroll_DB.reset_index(drop=True) | ||
| 36 | + test_DB = test_DB.reset_index(drop=True) | ||
| 37 | + return enroll_DB, test_DB | ||
| 38 | + | ||
| 39 | +def load_enroll_embeddings(embedding_dir): | ||
| 40 | + embeddings = {} | ||
| 41 | + for f in os.listdir(embedding_dir): | ||
| 42 | + spk = f.replace('.pth','') | ||
| 43 | + # Select the speakers who are in the 'enroll_spk_list' | ||
| 44 | + embedding_path = os.path.join(embedding_dir, f) | ||
| 45 | + tmp_embeddings = torch.load(embedding_path) | ||
| 46 | + embeddings[spk] = tmp_embeddings | ||
| 47 | + | ||
| 48 | + return embeddings | ||
| 49 | + | ||
| 50 | +def get_embeddings(use_cuda, filename, model, test_frames): | ||
| 51 | + input, label = read_MFB(filename) # input size:(n_frames, n_dims) | ||
| 52 | + | ||
| 53 | + tot_segments = math.ceil(len(input)/test_frames) # total number of segments with 'test_frames' | ||
| 54 | + activation = 0 | ||
| 55 | + with torch.no_grad(): | ||
| 56 | + for i in range(tot_segments): | ||
| 57 | + temp_input = input[i*test_frames:i*test_frames+test_frames] | ||
| 58 | + | ||
| 59 | + TT = ToTensorTestInput() | ||
| 60 | + temp_input = TT(temp_input) # size:(1, 1, n_dims, n_frames) | ||
| 61 | + | ||
| 62 | + if use_cuda: | ||
| 63 | + temp_input = temp_input.cuda() | ||
| 64 | + temp_activation,_ = model(temp_input) | ||
| 65 | + activation += torch.sum(temp_activation, dim=0, keepdim=True) | ||
| 66 | + | ||
| 67 | + activation = l2_norm(activation, 1) | ||
| 68 | + | ||
| 69 | + return activation | ||
| 70 | + | ||
| 71 | +def l2_norm(input, alpha): | ||
| 72 | + input_size = input.size() # size:(n_frames, dim) | ||
| 73 | + buffer = torch.pow(input, 2) # 2 denotes a squared operation. size:(n_frames, dim) | ||
| 74 | + normp = torch.sum(buffer, 1).add_(1e-10) # size:(n_frames) | ||
| 75 | + norm = torch.sqrt(normp) # size:(n_frames) | ||
| 76 | + _output = torch.div(input, norm.view(-1, 1).expand_as(input)) | ||
| 77 | + output = _output.view(input_size) | ||
| 78 | + # Multiply by alpha = 10 as suggested in https://arxiv.org/pdf/1703.09507.pdf | ||
| 79 | + output = output * alpha | ||
| 80 | + return output | ||
| 81 | + | ||
| 82 | +def perform_verification(use_cuda, model, embeddings, enroll_speaker, test_filename, test_frames, thres): | ||
| 83 | + enroll_embedding = embeddings[enroll_speaker] | ||
| 84 | + test_embedding = get_embeddings(use_cuda, test_filename, model, test_frames) | ||
| 85 | + | ||
| 86 | + score = F.cosine_similarity(test_embedding, enroll_embedding) | ||
| 87 | + score = score.data.cpu().numpy() | ||
| 88 | + | ||
| 89 | + if score > thres: | ||
| 90 | + result = 'Accept' | ||
| 91 | + else: | ||
| 92 | + result = 'Reject' | ||
| 93 | + | ||
| 94 | + test_spk = test_filename.split('/')[-2].split('_')[0] | ||
| 95 | + print("\n=== Speaker verification ===") | ||
| 96 | + print("True speaker: %s\nClaimed speaker : %s\n\nResult : %s\n" %(enroll_speaker, test_spk, result)) | ||
| 97 | + print("Score : %0.4f\nThreshold : %0.2f\n" %(score, thres)) | ||
| 98 | + | ||
| 99 | +def main(): | ||
| 100 | + | ||
| 101 | + log_dir = 'new_model4_zeroth' # Where the checkpoints are saved | ||
| 102 | + embedding_dir = 'enroll_embeddings4_zeroth' # Where embeddings are saved | ||
| 103 | + test_dir = 'feat_logfbank_nfilt40/test/' # Where test features are saved | ||
| 104 | + | ||
| 105 | + # Settings | ||
| 106 | + use_cuda = True # Use cuda or not | ||
| 107 | + embedding_size = 128 # Dimension of speaker embeddings | ||
| 108 | + cp_num = 30 # Which checkpoint to use? | ||
| 109 | + n_classes = 105 # How many speakers in training data? | ||
| 110 | + test_frames = 100 # Split the test utterance | ||
| 111 | + | ||
| 112 | + # Load model from checkpoint | ||
| 113 | + model = load_model(use_cuda, log_dir, cp_num, embedding_size, n_classes) | ||
| 114 | + | ||
| 115 | + # Get the dataframe for test DB | ||
| 116 | + enroll_DB, test_DB = split_enroll_and_test(c.TEST_FEAT_DIR) | ||
| 117 | + | ||
| 118 | + # Load enroll embeddings | ||
| 119 | + embeddings = load_enroll_embeddings(embedding_dir) | ||
| 120 | + | ||
| 121 | + """ Test speaker list | ||
| 122 | + '103F3021', '207F2088', '213F5100', '217F3038', '225M4062', | ||
| 123 | + '229M2031', '230M4087', '233F4013', '236M3043', '240M3063' | ||
| 124 | + """ | ||
| 125 | + | ||
| 126 | + # Set the true speaker | ||
| 127 | + enroll_speaker = '777M7777' | ||
| 128 | + | ||
| 129 | + # Set the claimed speaker | ||
| 130 | + test_speaker = '103F3021' | ||
| 131 | + | ||
| 132 | + # Threshold | ||
| 133 | + thres = 0.95 | ||
| 134 | + | ||
| 135 | + test_path = os.path.join(test_dir, test_speaker, 'test.p') | ||
| 136 | + | ||
| 137 | + # Perform the test | ||
| 138 | + perform_verification(use_cuda, model, embeddings, enroll_speaker, test_path, test_frames, thres) | ||
| 139 | + | ||
| 140 | +if __name__ == '__main__': | ||
| 141 | + main() |
Speaker_Recognition/verification5.py
0 → 100644
| 1 | +import torch | ||
| 2 | +import torch.nn.functional as F | ||
| 3 | +from torch.autograd import Variable | ||
| 4 | + | ||
| 5 | +import pandas as pd | ||
| 6 | +import math | ||
| 7 | +import os | ||
| 8 | +import configure as c | ||
| 9 | + | ||
| 10 | +from DB_wav_reader import read_feats_structure | ||
| 11 | +from SR_Dataset import read_MFB, ToTensorTestInput | ||
| 12 | +from model.model5 import background_resnet | ||
| 13 | + | ||
| 14 | +def load_model(use_cuda, log_dir, cp_num, embedding_size, n_classes): | ||
| 15 | + model = background_resnet(embedding_size=embedding_size, num_classes=n_classes) | ||
| 16 | + if use_cuda: | ||
| 17 | + model.cuda() | ||
| 18 | + print('=> loading checkpoint') | ||
| 19 | + # original saved file with DataParallel | ||
| 20 | + checkpoint = torch.load(log_dir + '/checkpoint_' + str(cp_num) + '.pth') | ||
| 21 | + # create new OrderedDict that does not contain `module.` | ||
| 22 | + model.load_state_dict(checkpoint['state_dict']) | ||
| 23 | + model.eval() | ||
| 24 | + return model | ||
| 25 | + | ||
| 26 | +def split_enroll_and_test(dataroot_dir): | ||
| 27 | + DB_all = read_feats_structure(dataroot_dir) | ||
| 28 | + enroll_DB = pd.DataFrame() | ||
| 29 | + test_DB = pd.DataFrame() | ||
| 30 | + | ||
| 31 | + enroll_DB = DB_all[DB_all['filename'].str.contains('enroll.p')] | ||
| 32 | + test_DB = DB_all[DB_all['filename'].str.contains('test.p')] | ||
| 33 | + | ||
| 34 | + # Reset the index | ||
| 35 | + enroll_DB = enroll_DB.reset_index(drop=True) | ||
| 36 | + test_DB = test_DB.reset_index(drop=True) | ||
| 37 | + return enroll_DB, test_DB | ||
| 38 | + | ||
| 39 | +def load_enroll_embeddings(embedding_dir): | ||
| 40 | + embeddings = {} | ||
| 41 | + for f in os.listdir(embedding_dir): | ||
| 42 | + spk = f.replace('.pth','') | ||
| 43 | + # Select the speakers who are in the 'enroll_spk_list' | ||
| 44 | + embedding_path = os.path.join(embedding_dir, f) | ||
| 45 | + tmp_embeddings = torch.load(embedding_path) | ||
| 46 | + embeddings[spk] = tmp_embeddings | ||
| 47 | + | ||
| 48 | + return embeddings | ||
| 49 | + | ||
| 50 | +def get_embeddings(use_cuda, filename, model, test_frames): | ||
| 51 | + input, label = read_MFB(filename) # input size:(n_frames, n_dims) | ||
| 52 | + | ||
| 53 | + tot_segments = math.ceil(len(input)/test_frames) # total number of segments with 'test_frames' | ||
| 54 | + activation = 0 | ||
| 55 | + with torch.no_grad(): | ||
| 56 | + for i in range(tot_segments): | ||
| 57 | + temp_input = input[i*test_frames:i*test_frames+test_frames] | ||
| 58 | + | ||
| 59 | + TT = ToTensorTestInput() | ||
| 60 | + temp_input = TT(temp_input) # size:(1, 1, n_dims, n_frames) | ||
| 61 | + | ||
| 62 | + if use_cuda: | ||
| 63 | + temp_input = temp_input.cuda() | ||
| 64 | + temp_activation,_ = model(temp_input) | ||
| 65 | + activation += torch.sum(temp_activation, dim=0, keepdim=True) | ||
| 66 | + | ||
| 67 | + activation = l2_norm(activation, 1) | ||
| 68 | + | ||
| 69 | + return activation | ||
| 70 | + | ||
| 71 | +def l2_norm(input, alpha): | ||
| 72 | + input_size = input.size() # size:(n_frames, dim) | ||
| 73 | + buffer = torch.pow(input, 2) # 2 denotes a squared operation. size:(n_frames, dim) | ||
| 74 | + normp = torch.sum(buffer, 1).add_(1e-10) # size:(n_frames) | ||
| 75 | + norm = torch.sqrt(normp) # size:(n_frames) | ||
| 76 | + _output = torch.div(input, norm.view(-1, 1).expand_as(input)) | ||
| 77 | + output = _output.view(input_size) | ||
| 78 | + # Multiply by alpha = 10 as suggested in https://arxiv.org/pdf/1703.09507.pdf | ||
| 79 | + output = output * alpha | ||
| 80 | + return output | ||
| 81 | + | ||
| 82 | +def perform_verification(use_cuda, model, embeddings, enroll_speaker, test_filename, test_frames, thres): | ||
| 83 | + enroll_embedding = embeddings[enroll_speaker] | ||
| 84 | + test_embedding = get_embeddings(use_cuda, test_filename, model, test_frames) | ||
| 85 | + | ||
| 86 | + score = F.cosine_similarity(test_embedding, enroll_embedding) | ||
| 87 | + score = score.data.cpu().numpy() | ||
| 88 | + | ||
| 89 | + if score > thres: | ||
| 90 | + result = 'Accept' | ||
| 91 | + else: | ||
| 92 | + result = 'Reject' | ||
| 93 | + | ||
| 94 | + test_spk = test_filename.split('/')[-2].split('_')[0] | ||
| 95 | + print("\n=== Speaker verification ===") | ||
| 96 | + print("True speaker: %s\nClaimed speaker : %s\n\nResult : %s\n" %(enroll_speaker, test_spk, result)) | ||
| 97 | + print("Score : %0.4f\nThreshold : %0.2f\n" %(score, thres)) | ||
| 98 | + | ||
| 99 | +def main(): | ||
| 100 | + | ||
| 101 | + log_dir = 'new_model5' # Where the checkpoints are saved | ||
| 102 | + embedding_dir = 'enroll_embeddings5' # Where embeddings are saved | ||
| 103 | + test_dir = 'feat_logfbank_nfilt40/test/' # Where test features are saved | ||
| 104 | + | ||
| 105 | + # Settings | ||
| 106 | + use_cuda = True # Use cuda or not | ||
| 107 | + embedding_size = 128 # Dimension of speaker embeddings | ||
| 108 | + cp_num = 30 # Which checkpoint to use? | ||
| 109 | + n_classes = 241 # How many speakers in training data? | ||
| 110 | + test_frames = 100 # Split the test utterance | ||
| 111 | + | ||
| 112 | + # Load model from checkpoint | ||
| 113 | + model = load_model(use_cuda, log_dir, cp_num, embedding_size, n_classes) | ||
| 114 | + | ||
| 115 | + # Get the dataframe for test DB | ||
| 116 | + enroll_DB, test_DB = split_enroll_and_test(c.TEST_FEAT_DIR) | ||
| 117 | + | ||
| 118 | + # Load enroll embeddings | ||
| 119 | + embeddings = load_enroll_embeddings(embedding_dir) | ||
| 120 | + | ||
| 121 | + """ Test speaker list | ||
| 122 | + '103F3021', '207F2088', '213F5100', '217F3038', '225M4062', | ||
| 123 | + '229M2031', '230M4087', '233F4013', '236M3043', '240M3063' | ||
| 124 | + """ | ||
| 125 | + | ||
| 126 | + # Set the true speaker | ||
| 127 | + enroll_speaker = '777M7777' | ||
| 128 | + | ||
| 129 | + # Set the claimed speaker | ||
| 130 | + test_speaker = 'sunghwan1' | ||
| 131 | + | ||
| 132 | + # Threshold | ||
| 133 | + thres = 0.95 | ||
| 134 | + | ||
| 135 | + test_path = os.path.join(test_dir, test_speaker, 'test.p') | ||
| 136 | + | ||
| 137 | + # Perform the test | ||
| 138 | + perform_verification(use_cuda, model, embeddings, enroll_speaker, test_path, test_frames, thres) | ||
| 139 | + | ||
| 140 | +if __name__ == '__main__': | ||
| 141 | + main() |
-
Please register or login to post a comment