01-speech-commands-mfcc-extraction.ipynb !pip install python_speech_features from os import listdir from os.path import isdir, join import librosa import random import numpy as np import
Trang 1BỘ GIÁO DỤC VÀ ĐÀO TẠO TRƯỜNG ĐẠI HỌC SƯ PHẠM KỸ THUẬT TP HCM
MÔN THỰC TẬP HỆ THỐNG NHÚNG
BÁO CÁO LAB3
GVHD: Thầy Võ Minh Huân SVTH: Ngô Minh Thái 19161161
Lớp sáng thứ 5 tiết 1-3
Tp Hồ Chí Minh, tháng 5 năm 2022
Trang 201-speech-commands-mfcc-extraction.ipynb
!pip install python_speech_features
from os import listdir
from os.path import isdir, join
import librosa
import random
import numpy as np
import matplotlib.pyplot as plt
import python_speech_features
- Cho phép gg colab truy cập vào drive
from google.colab import drive
drive.mount('/content/drive')
- Chuyển vào thư mục muốn training
%cd /content/drive/MyDrive/Nhung
%pwd
- Chọn file âm thanh dùng để training và in các tên file
# Dataset path and view possible targets
dataset_path = 'speech_commands_v0.021'
for name in listdir(dataset_path):
if isdir(join(dataset_path, name)):
print(name)
Trang 3- Tạo danh sách các file cần train và in ra
# Create an all targets list
all_targets = [name for name in listdir(dataset_path) if isdir(join(dataset_path, nam e))]
print(all_targets)
- Loại bỏ file âm thanh nhiễu và in các file còn lại
# Leave off background noise set
all_targets.remove('_background_noise_')
print(all_targets)
- Xem có bao nhiêu tập tin trong từng file
# See how many files are in each
num_samples = 0
for target in all_targets:
print(len(listdir(join(dataset_path, target))))
num_samples += len(listdir(join(dataset_path, target)))
print('Total samples:', num_samples)
- Cài đặt các thông số và tên file train ra được
# Settings
target_list = all_targets
feature_sets_file = 'all_targets_mfcc_sets.npz'
perc_keep_samples = 1.0 # 1.0 is keep all samples
val_ratio = 0.1
test_ratio = 0.1
sample_rate = 8000
num_mfcc = 16
len_mfcc = 16
- Tạo danh sách tên tệp cùng với vectơ
# Create list of filenames along with ground truth vector (y)
filenames = []
y = []
for index, target in enumerate(target_list):
print(join(dataset_path, target))
filenames.append(listdir(join(dataset_path, target)))
y.append(np.ones(len(filenames[index])) * index)
Trang 4# Check ground truth Y vector
print(y)
for item in y:
print(len(item))
# Flatten filename and y vectors
filenames = [item for sublist in filenames for item in sublist]
y = [item for sublist in y for item in sublist]
- Liên kết tên file với các đầu ra và tráo lại
# Associate filenames with true output and shuffle
filenames_y = list(zip(filenames, y))
random.shuffle(filenames_y)
filenames, y = zip(*filenames_y)
- Chỉ giữ lại số lượng mẫu được chỉ định
# Only keep the specified number of samples (shorter extraction/training)
print(len(filenames))
filenames = filenames[:int(len(filenames) * perc_keep_samples)]
print(len(filenames))
- Tính toán kích thước tập hợp kiểm tra và xác thực
# Calculate validation and test set sizes
val_set_size = int(len(filenames) * val_ratio)
test_set_size = int(len(filenames) * test_ratio)
- Chia nhỏ tập dữ liệu thành các tập huấn luyện, xác thực và thử nghiệm
# Break dataset apart into train, validation, and test sets
filenames_val = filenames[:val_set_size]
filenames_test = filenames[val_set_size:(val_set_size + test_set_size)] filenames_train = filenames[(val_set_size + test_set_size):]
- Chia nhỏ y thành các tập hợp đào tạo, xác thực và kiểm tra
# Break y apart into train, validation, and test sets
y_orig_val = y[:val_set_size]
y_orig_test = y[val_set_size:(val_set_size + test_set_size)]
y_orig_train = y[(val_set_size + test_set_size):]
# Function: Create MFCC from given path
def calc_mfcc(path):
# Load wavefile
signal, fs = librosa.load(path, sr=sample_rate)
# Create MFCCs from sound clip
Trang 5mfccs = python_speech_features.base.mfcc(signal,
samplerate=fs,
winlen=0.256,
winstep=0.050,
numcep=num_mfcc,
nfilt=26,
nfft=2048,
preemph=0.0,
ceplifter=0,
appendEnergy=False,
winfunc=np.hanning)
return mfccs.transpose()
- Xây dựng bộ kiểm tra bằng tính toán MFCC của mỗi tệp WAV
# TEST: Construct test set by computing MFCC of each WAV file
prob_cnt = 0
x_test = []
y_test = []
for index, filename in enumerate(filenames_train):
# Stop after 500
if index >= 500:
break
# Create path from given filename and target item
path = join(dataset_path, target_list[int(y_orig_train[index])], filename)
# Create MFCCs
mfccs = calc_mfcc(path)
if mfccs.shape[1] == len_mfcc:
x_test.append(mfccs)
y_test.append(y_orig_train[index])
else:
print('Dropped:', index, mfccs.shape)
prob_cnt += 1
- Các mẫu có vấn đề
print('% of problematic samples:', prob_cnt / 500)
# TEST: Test shorter MFCC
!pip install playsound
from playsound import playsound
idx = 13
# Create path from given filename and target item
path = join(dataset_path, target_list[int(y_orig_train[idx])],
filenames_train[idx])
Trang 6# Create MFCCs
mfccs = calc_mfcc(path)
print("MFCCs:", mfccs)
# Plot MFCC
fig = plt.figure()
plt.imshow(mfccs, cmap='inferno', origin='lower')
# TEST: Play problem sounds
print(target_list[int(y_orig_train[idx])])
playsound(path)
# Function: Create MFCCs, keeping only ones of desired length
def extract_features(in_files, in_y):
prob_cnt = 0
out_x = []
out_y = []
for index, filename in enumerate(in_files):
# Create path from given filename and target item
path = join(dataset_path, target_list[int(in_y[index])], filename)
# Check to make sure we're reading a wav file
if not path.endswith('.wav'):
continue
# Create MFCCs
mfccs = calc_mfcc(path)
# Only keep MFCCs with given length
if mfccs.shape[1] == len_mfcc:
out_x.append(mfccs)
out_y.append(in_y[index])
else:
Trang 7print('Dropped:', index, mfccs.shape)
prob_cnt += 1
return out_x, out_y, prob_cnt
- Tạo tập hợp đào tạo, xác thực và kiểm tra
# Create train, validation, and test sets
x_train, y_train, prob = extract_features(filenames_train,
y_orig_train)
print('Removed percentage:', prob / len(y_orig_train))
x_val, y_val, prob = extract_features(filenames_val, y_orig_val)
print('Removed percentage:', prob / len(y_orig_val))
x_test, y_test, prob = extract_features(filenames_test, y_orig_test)
print('Removed percentage:', prob / len(y_orig_test))
# Save features and truth vector (y) sets to disk
np.savez(feature_sets_file,
x_train=x_train,
y_train=y_train,
x_val=x_val,
y_val=y_val,
x_test=x_test,
y_test=y_test)
# TEST: Load features
feature_sets = np.load(feature_sets_file)
feature_sets.files
len(feature_sets['x_train'])
print(feature_sets['y_val'])
- Sau khi train thu được file như hình dưới đây vào drive
Trang 8from os import listdir
from os.path import isdir, join
from tensorflow.keras import layers, models
import numpy as np
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/Nhung
%pwd
# Create list of all targets (minus background noise)
dataset_path = 'speech_commands_v0.021'
all_targets = all_targets = [name for name in listdir(dataset_path) if isdir(join(dat aset_path, name))]
all_targets.remove('_background_noise_')
print(all_targets)
- Chọn từ để training là one
# Settings
feature_sets_path = '/content/drive/MyDrive/Nhung/'
feature_sets_filename = 'all_targets_mfcc_sets.npz'
model_filename = 'wake_word_one_model.h5'
wake_word = 'one'
# Load feature sets
feature_sets = np.load(join(feature_sets_path, feature_sets_filename))
print(feature_sets.files)
# Assign feature sets
x_train = feature_sets['x_train']
y_train = feature_sets['y_train']
x_val = feature_sets['x_val']
y_val = feature_sets['y_val']
x_test = feature_sets['x_test']
y_test = feature_sets['y_test']
# Look at tensor dimensions
print(x_train.shape)
print(x_val.shape)
print(x_test.shape)
Trang 9# Peek at labels
print(y_val)
# Convert ground truth arrays to one wake word (1) and 'other' (0) wake_word_index = all_targets.index(wake_word)
y_train = np.equal(y_train, wake_word_index).astype('float64') y_val = np.equal(y_val, wake_word_index).astype('float64')
y_test = np.equal(y_test, wake_word_index).astype('float64')
# Peek at labels after conversion
print(y_val)
# What percentage of 'one' appear in validation labels
print(sum(y_val) / len(y_val))
print(1 - sum(y_val) / len(y_val))
# View the dimensions of our input data
print(x_train.shape)
# CNN for TF expects (batch, height, width, channels)
# So we reshape the input tensors with a "color" channel of 1 x_train = x_train.reshape(x_train.shape[0],
x_train.shape[1],
x_train.shape[2],
1
x_val = x_val.reshape(x_val.shape[0],
x_val.shape[1],
x_val.shape[2],
1)
x_test = x_test.reshape(x_test.shape[0],
x_test.shape[1],
x_test.shape[2],
1
print(x_train.shape)
print(x_val.shape)
print(x_test.shape)
# Input shape for CNN is size of MFCC of 1 sample
sample_shape = x_test.shape[1:]
print(sample_shape)
Trang 10# Build model
# Based on: https://www.geeksforgeeks.org/python-image-classification-using-keras/ model = models.Sequential()
model.add(layers.Conv2D(32,
(2, 2),
activation='relu',
input_shape=sample_shape))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Conv2D(32, (2, 2), activation='relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Conv2D(64, (2, 2), activation='relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
# Classifier
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid'))
# Display model
model.summary()
Trang 11# Add training parameters to model
model.compile(loss='binary_crossentropy',
optimizer='rmsprop',
metrics=['acc'])
# Train
history = model.fit(x_train,
y_train,
epochs=30,
batch_size=100,
validation_data=(x_val, y_val))
# Plot results
import matplotlib.pyplot as plt
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
Trang 12plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc') plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss') plt.plot(epochs, val_loss, 'b', label='Validation loss') plt.title('Training and validation loss')
plt.legend()
plt.show()
# Save the model as a file
models.save_model(model, model_filename)
# See which are 'stop'
for idx, y in enumerate(y_test):
if y == 1
print(idx)
Trang 13# TEST: Load model and run it against test set
model = models.load_model(model_filename)
for i in range(100, 110):
print('Answer:', y_test[i], ' Prediction:', model.predict(np.expand_dims(x_test[i ], 0)))
# Evaluate model with test set
model.evaluate(x=x_test, y=y_test)
- Train xong tập tin với độ chính xác là 0,9873 và lỗi là 0,0691
- File xuất vào drive
Trang 14from tensorflow import lite
from tensorflow.keras import models
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/Nhung
%pwd
# Parameters
keras_model_filename = 'wake_word_one_model.h5'
tflite_filename = 'wake_word_one_lite.tflite'
# Convert model to TF Lite model
model = models.load_model(keras_model_filename)
converter = lite.TFLiteConverter.from_keras_model(model) tflite_model = converter.convert()
open(tflite_filename, 'wb').write(tflite_model)
- File sau khi train xong