-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathspeech_recognition.py
More file actions
91 lines (79 loc) · 3.03 KB
/
speech_recognition.py
File metadata and controls
91 lines (79 loc) · 3.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os
import argparse
import numpy as np
from scipy.io import wavfile
from hmmlearn import hmm
from python_speech_features import mfcc
from HMMTrainer import HMMTrainer
# Function to parse input arguments
def build_arg_parser():
parser = argparse.ArgumentParser(description='Trains the HMM classifier')
parser.add_argument("--input-folder", dest="input_folder", required=True,
help="Input folder containing the audio files in subfolders")
return parser
if __name__=='__main__':
#args = build_arg_parser().parse_args()
#input_folder = args.input_folder
input_folder = "./vowel_data"
hmm_models = []
# Parse the input directory
for dirname in os.listdir(input_folder):
# Get the name of the subfolder
subfolder = os.path.join(input_folder, dirname)
if not os.path.isdir(subfolder):
continue
# Extract the label
label = subfolder[subfolder.rfind('/') + 1:]
# Initialize variables
X = np.array([])
y_words = []
# Iterate through the audio files (leaving 1 file for testing in each class)
for filename in [x for x in os.listdir(subfolder) if x.endswith('.wav')][:-1]:
# Read the input file
filepath = os.path.join(subfolder, filename)
sampling_freq, audio = wavfile.read(filepath)
# Extract MFCC features
mfcc_features = mfcc(audio, sampling_freq, nfft=1200)
# Append to the variable X
if len(X) == 0:
X = mfcc_features
else:
X = np.append(X, mfcc_features, axis=0)
# Append the label
y_words.append(label)
# Train and save HMM model
hmm_trainer = HMMTrainer(model_name='GMMHMM',n_mix=10)
hmm_trainer.train(X)
hmm_models.append((hmm_trainer, label))
hmm_trainer = None
# # Test files
# input_files = [
# 'data/pineapple/pineapple15.wav',
# 'data/orange/orange15.wav',
# 'data/apple/apple15.wav',
# 'data/kiwi/kiwi15.wav'
# ] # Classify input data
input_files = [
'vowel_data/a/a_1.wav',
'vowel_data/e/e_2.wav',
'vowel_data/o/o_3.wav'
] # Classify input data
for input_file in input_files:
# Read input file
sampling_freq, audio = wavfile.read(input_file)
# Extract MFCC features
mfcc_features = mfcc(audio, sampling_freq)
# Define variables
max_score = None
output_label = None
# Iterate through all HMM models and pick
# the one with the highest score
for item in hmm_models:
hmm_model, label = item
score = hmm_model.get_score(mfcc_features)
if max_score is None or score > max_score:
max_score = score
output_label = label
# Print the output
print("\nTrue:", input_file[input_file.find('/') + 1:input_file.rfind('/')])
print("Predicted:", output_label)