Môn thực tập hệ thống nhúng báo cáo lab3 01 speech commands mfcc extraction ipynb

01-speech-commands-mfcc-extraction.ipynb !pip install python_speech_features from os import listdir from os.path import isdir, join import librosa import random import numpy as np import

Trang 1

BỘ GIÁO DỤC VÀ ĐÀO TẠO TRƯỜNG ĐẠI HỌC SƯ PHẠM KỸ THUẬT TP HCM



MÔN THỰC TẬP HỆ THỐNG NHÚNG

BÁO CÁO LAB3

GVHD: Thầy Võ Minh Huân SVTH: Ngô Minh Thái 19161161

Lớp sáng thứ 5 tiết 1-3

Tp Hồ Chí Minh, tháng 5 năm 2022

Trang 2

01-speech-commands-mfcc-extraction.ipynb

!pip install python_speech_features

from os import listdir

from os.path import isdir, join

import librosa

import random

import numpy as np

import matplotlib.pyplot as plt

import python_speech_features

- Cho phép gg colab truy cập vào drive

from google.colab import drive

drive.mount('/content/drive')

- Chuyển vào thư mục muốn training

%cd /content/drive/MyDrive/Nhung

%pwd

- Chọn file âm thanh dùng để training và in các tên file

# Dataset path and view possible targets

dataset_path = 'speech_commands_v0.021'

for name in listdir(dataset_path):

if isdir(join(dataset_path, name)):

print(name)

Trang 3

- Tạo danh sách các file cần train và in ra

# Create an all targets list

all_targets = [name for name in listdir(dataset_path) if isdir(join(dataset_path, nam e))]

print(all_targets)

- Loại bỏ file âm thanh nhiễu và in các file còn lại

# Leave off background noise set

all_targets.remove('_background_noise_')

print(all_targets)

- Xem có bao nhiêu tập tin trong từng file

# See how many files are in each

num_samples = 0

for target in all_targets:

print(len(listdir(join(dataset_path, target))))

num_samples += len(listdir(join(dataset_path, target)))

print('Total samples:', num_samples)

- Cài đặt các thông số và tên file train ra được

# Settings

target_list = all_targets

feature_sets_file = 'all_targets_mfcc_sets.npz'

perc_keep_samples = 1.0 # 1.0 is keep all samples

val_ratio = 0.1

test_ratio = 0.1

sample_rate = 8000

num_mfcc = 16

len_mfcc = 16

- Tạo danh sách tên tệp cùng với vectơ

# Create list of filenames along with ground truth vector (y)

filenames = []

y = []

for index, target in enumerate(target_list):

print(join(dataset_path, target))

filenames.append(listdir(join(dataset_path, target)))

y.append(np.ones(len(filenames[index])) * index)

Trang 4

# Check ground truth Y vector

print(y)

for item in y:

print(len(item))

# Flatten filename and y vectors

filenames = [item for sublist in filenames for item in sublist]

y = [item for sublist in y for item in sublist]

- Liên kết tên file với các đầu ra và tráo lại

# Associate filenames with true output and shuffle

filenames_y = list(zip(filenames, y))

random.shuffle(filenames_y)

filenames, y = zip(*filenames_y)

- Chỉ giữ lại số lượng mẫu được chỉ định

# Only keep the specified number of samples (shorter extraction/training)

print(len(filenames))

filenames = filenames[:int(len(filenames) * perc_keep_samples)]

print(len(filenames))

- Tính toán kích thước tập hợp kiểm tra và xác thực

# Calculate validation and test set sizes

val_set_size = int(len(filenames) * val_ratio)

test_set_size = int(len(filenames) * test_ratio)

- Chia nhỏ tập dữ liệu thành các tập huấn luyện, xác thực và thử nghiệm

# Break dataset apart into train, validation, and test sets

filenames_val = filenames[:val_set_size]

filenames_test = filenames[val_set_size:(val_set_size + test_set_size)] filenames_train = filenames[(val_set_size + test_set_size):]

- Chia nhỏ y thành các tập hợp đào tạo, xác thực và kiểm tra

# Break y apart into train, validation, and test sets

y_orig_val = y[:val_set_size]

y_orig_test = y[val_set_size:(val_set_size + test_set_size)]

y_orig_train = y[(val_set_size + test_set_size):]

# Function: Create MFCC from given path

def calc_mfcc(path):

# Load wavefile

signal, fs = librosa.load(path, sr=sample_rate)

# Create MFCCs from sound clip

Trang 5

mfccs = python_speech_features.base.mfcc(signal,

samplerate=fs,

winlen=0.256,

winstep=0.050,

numcep=num_mfcc,

nfilt=26,

nfft=2048,

preemph=0.0,

ceplifter=0,

appendEnergy=False,

winfunc=np.hanning)

return mfccs.transpose()

- Xây dựng bộ kiểm tra bằng tính toán MFCC của mỗi tệp WAV

# TEST: Construct test set by computing MFCC of each WAV file

prob_cnt = 0

x_test = []

y_test = []

for index, filename in enumerate(filenames_train):

# Stop after 500

if index >= 500:

break

# Create path from given filename and target item

path = join(dataset_path, target_list[int(y_orig_train[index])], filename)

# Create MFCCs

mfccs = calc_mfcc(path)

if mfccs.shape[1] == len_mfcc:

x_test.append(mfccs)

y_test.append(y_orig_train[index])

else:

print('Dropped:', index, mfccs.shape)

prob_cnt += 1

- Các mẫu có vấn đề

print('% of problematic samples:', prob_cnt / 500)

# TEST: Test shorter MFCC

!pip install playsound

from playsound import playsound

idx = 13

path = join(dataset_path, target_list[int(y_orig_train[idx])],

filenames_train[idx])

Trang 6

# Create MFCCs

print("MFCCs:", mfccs)

# Plot MFCC

fig = plt.figure()

plt.imshow(mfccs, cmap='inferno', origin='lower')

# TEST: Play problem sounds

print(target_list[int(y_orig_train[idx])])

playsound(path)

# Function: Create MFCCs, keeping only ones of desired length

def extract_features(in_files, in_y):

prob_cnt = 0

out_x = []

out_y = []

for index, filename in enumerate(in_files):

path = join(dataset_path, target_list[int(in_y[index])], filename)

# Check to make sure we're reading a wav file

if not path.endswith('.wav'):

continue

# Create MFCCs

# Only keep MFCCs with given length

if mfccs.shape[1] == len_mfcc:

out_x.append(mfccs)

out_y.append(in_y[index])

else:

Trang 7

print('Dropped:', index, mfccs.shape)

prob_cnt += 1

return out_x, out_y, prob_cnt

- Tạo tập hợp đào tạo, xác thực và kiểm tra

# Create train, validation, and test sets

x_train, y_train, prob = extract_features(filenames_train,

y_orig_train)

print('Removed percentage:', prob / len(y_orig_train))

x_val, y_val, prob = extract_features(filenames_val, y_orig_val)

print('Removed percentage:', prob / len(y_orig_val))

x_test, y_test, prob = extract_features(filenames_test, y_orig_test)

print('Removed percentage:', prob / len(y_orig_test))

# Save features and truth vector (y) sets to disk

np.savez(feature_sets_file,

x_train=x_train,

y_train=y_train,

x_val=x_val,

y_val=y_val,

x_test=x_test,

y_test=y_test)

# TEST: Load features

feature_sets = np.load(feature_sets_file)

feature_sets.files

len(feature_sets['x_train'])

print(feature_sets['y_val'])

- Sau khi train thu được file như hình dưới đây vào drive

Trang 8

from os import listdir

from os.path import isdir, join

from tensorflow.keras import layers, models

import numpy as np

%pwd

# Create list of all targets (minus background noise)

dataset_path = 'speech_commands_v0.021'

all_targets = all_targets = [name for name in listdir(dataset_path) if isdir(join(dat aset_path, name))]

all_targets.remove('_background_noise_')

print(all_targets)

- Chọn từ để training là one

# Settings

feature_sets_path = '/content/drive/MyDrive/Nhung/'

feature_sets_filename = 'all_targets_mfcc_sets.npz'

model_filename = 'wake_word_one_model.h5'

wake_word = 'one'

# Load feature sets

feature_sets = np.load(join(feature_sets_path, feature_sets_filename))

print(feature_sets.files)

# Assign feature sets

x_train = feature_sets['x_train']

y_train = feature_sets['y_train']

x_val = feature_sets['x_val']

y_val = feature_sets['y_val']

x_test = feature_sets['x_test']

y_test = feature_sets['y_test']

# Look at tensor dimensions

print(x_train.shape)

print(x_val.shape)

print(x_test.shape)

Trang 9

# Peek at labels

print(y_val)

# Convert ground truth arrays to one wake word (1) and 'other' (0) wake_word_index = all_targets.index(wake_word)

y_train = np.equal(y_train, wake_word_index).astype('float64') y_val = np.equal(y_val, wake_word_index).astype('float64')

y_test = np.equal(y_test, wake_word_index).astype('float64')

# Peek at labels after conversion

print(y_val)

# What percentage of 'one' appear in validation labels

print(sum(y_val) / len(y_val))

print(1 - sum(y_val) / len(y_val))

# View the dimensions of our input data

# CNN for TF expects (batch, height, width, channels)

# So we reshape the input tensors with a "color" channel of 1 x_train = x_train.reshape(x_train.shape[0],

x_train.shape[1],

x_train.shape[2],

1

x_val = x_val.reshape(x_val.shape[0],

x_val.shape[1],

x_val.shape[2],

1)

x_test = x_test.reshape(x_test.shape[0],

x_test.shape[1],

x_test.shape[2],

1

print(x_val.shape)

print(x_test.shape)

# Input shape for CNN is size of MFCC of 1 sample

sample_shape = x_test.shape[1:]

print(sample_shape)

Trang 10

# Build model

# Based on: https://www.geeksforgeeks.org/python-image-classification-using-keras/ model = models.Sequential()

model.add(layers.Conv2D(32,

(2, 2),

activation='relu',

input_shape=sample_shape))

model.add(layers.MaxPooling2D(pool_size=(2, 2)))

model.add(layers.Conv2D(32, (2, 2), activation='relu'))

model.add(layers.Conv2D(64, (2, 2), activation='relu'))

# Classifier

model.add(layers.Flatten())

model.add(layers.Dense(64, activation='relu'))

model.add(layers.Dropout(0.5))

model.add(layers.Dense(1, activation='sigmoid'))

# Display model

model.summary()

Trang 11

# Add training parameters to model

model.compile(loss='binary_crossentropy',

optimizer='rmsprop',

metrics=['acc'])

# Train

history = model.fit(x_train,

y_train,

epochs=30,

batch_size=100,

validation_data=(x_val, y_val))

# Plot results

import matplotlib.pyplot as plt

acc = history.history['acc']

val_acc = history.history['val_acc']

loss = history.history['loss']

val_loss = history.history['val_loss']

epochs = range(1, len(acc) + 1)

Trang 12

plt.plot(epochs, acc, 'bo', label='Training acc')

plt.plot(epochs, val_acc, 'b', label='Validation acc') plt.title('Training and validation accuracy')

plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss') plt.plot(epochs, val_loss, 'b', label='Validation loss') plt.title('Training and validation loss')

plt.legend()

plt.show()

# Save the model as a file

models.save_model(model, model_filename)

# See which are 'stop'

for idx, y in enumerate(y_test):

if y == 1

print(idx)

Trang 13

# TEST: Load model and run it against test set

model = models.load_model(model_filename)

for i in range(100, 110):

print('Answer:', y_test[i], ' Prediction:', model.predict(np.expand_dims(x_test[i ], 0)))

# Evaluate model with test set

model.evaluate(x=x_test, y=y_test)

- Train xong tập tin với độ chính xác là 0,9873 và lỗi là 0,0691

- File xuất vào drive

Trang 14

from tensorflow import lite

from tensorflow.keras import models

%pwd

# Parameters

keras_model_filename = 'wake_word_one_model.h5'

tflite_filename = 'wake_word_one_lite.tflite'

# Convert model to TF Lite model

model = models.load_model(keras_model_filename)

converter = lite.TFLiteConverter.from_keras_model(model) tflite_model = converter.convert()

open(tflite_filename, 'wb').write(tflite_model)

- File sau khi train xong

Tiêu đề	Môn Thực Tập Hệ Thống Nhúng Báo Cáo Lab3 01 Speech Commands Mfcc Extraction
Tác giả	Ngô Minh Thái
Người hướng dẫn	Thầy Võ Minh Huân
Trường học	Trường Đại Học Sư Phạm Kỹ Thuật Tp. Hcm
Chuyên ngành	Hệ Thống Nhúng
Thể loại	Báo Cáo
Năm xuất bản	2022
Thành phố	Tp. Hồ Chí Minh

Định dạng
Số trang	15
Dung lượng	411,98 KB