4.2.1.Hạn chế của model
- Vẫn chưa thể nhận diện được chữ số viết quá xấu
- Tập dữ liệu để huẩn luyện mạng vẫn hạn chế về mặt số lượng và kích thước ảnh
- Trong một số trường hợp ảnh quá mờ hoặc nhiễu quá cao thì quá trình xử lý ảnh không đáp ứng được dẫn tới sai lệch
- Việc tích hợp các mạng neural vào hệ thống vẫn chưa đa dạng
118
4.2.2.Phương pháp giải quyết
- Huấn luyện cho model thông minh hơn
- Nâng cấp thuật toán: Có thể thay đổi tỷ lệ weight và thay đổi drop out - Sử dụng các mạng nâng cao Yolo, VGG, GoogLeNet…
119
DANH MỤC TÀI LIỆU THAM KHẢO
[1] Mohamed Elgendy, Deep Learning for Vision Systerms, Manning, November 10, 2020.
[2] H. Chui and A. Rangarajan, A new algorithm for non-rigid point mayching. In Proc. IEEE Conf. Comput. Vision and Pattern Recognition, June 2000.
[3] F. Girosi, M. Jones, and T. Poggio. Regularization theory and neural networks architectures. Neural Computation, 7(2): 219-269, 1995
[4] Nidhin Pattaniyil, Deploying a Deep Learing Model on Web and Mobie Application, 2005
[5] Xiao Zhang, Zhiyuan Fang, Yandong Wen, Zhifeng Li, and Yu Qiao (2017) “Range Loss for Deep Face Recognition with Long-Tail”. IEEE Conf. on Computer Vision and Pattern Recognition.
[6] Zhenan Sun, Ran He, Jianjiang Feng, Shiguang Shan, Zhenhua Guo(Eds.).Biometric Recognition, pages 267-312.
[7] Kaipeng Zhang, Zhanpeng Zhang, Zhifeng Li, Senior Member, IEEE, and Yu Qiao, Senior Member, IEEE, (2016) ”Joint Face Detection and Alignment Using Multi-task Cascaded Convolutional Networks”,
[8] H. Li, Z. Lin, X. Shen, J. Brandt, G. Hua (2015) “A Convolutional Neural Network Cascade for Face Detection,” IEEE Conf. on Computer Vision and Pattern Recognition, pp. 5325-5334.
[9] Stan, Z.L., Jain, A.: ‘Handbook of face recognition’ (Springer, New York, USA, 2005)
[10] Introna, L., Wood, D.: ‘Picturing algorithmic surveillance: the politics of facial recognition systems’, Surveillance Soc., 2004, 2, (2/3), pp. 177–198
[11] Batur, A.U., Hayes, M.H.: ‘Segmented linear subspaces for illumination robust face recognition’, Int. J. Comput. Vis., 2004, 57, (1), pp. 49–66
120 [12] Malassiotis, S., Strintzis, M.: ‘Robust face recognition using 2D and 3D data: pose and illumination compensation’, Pattern Recognit., 2005, 38, (12), pp. 2537–2548 [13] Nguyễn Thanh Tuấn (2019), “Deep Learning cơ bản”, trang 106 đến 110.
[14] Nguyễn Quang Hoan, Nguyễn Thị Trang, Nguyễn Thị Huyền, Trương Quốc Khánh, Nguyễn Thị Hoa. (2016). Kết hợp mạng nơ ron với giải thuật di truyền ứng dụng cho lớp bài toán nhận mẫu. Tạp chí KHCN DHSPKTHY, ISSN 2354-0575, số11/9, Tr. 57- 62
[15]X. Lv and Z. J. Wang, "Perceptual Image Hashing Based on Shape Contexts and Local Feature Points," in IEEE Transactions on Information Forensics and Security, vol. 7, no. 3, pp. 1081-1093, June 2012, doi: 10.1109/TIFS.2012.2190594.
121 PHỤ LỤC Code LeNet import numpy as np #import matplotlib.pyplot as plt import keras
from keras.datasets import mnist from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout,AveragePooling2D
from keras.callbacks import ModelCheckpoint
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.astype(np.float32)/ 255 X_test = X_test.astype(np.float32)/ 255
#dinh dang lai hinh anh ve 28 28 1
X_train = X_train.reshape(X_train.shape[0],28,28,1) X_test = X_test.reshape(X_test.shape[0],28,28,1)
y_train = keras.utils.np_utils.to_categorical(y_train) y_test = keras.utils.np_utils.to_categorical(y_test) print('x_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples') print(X_test.shape[0], 'test samples') model = Sequential()
# C1 Convolutional Layer
model.add(Conv2D(filters = 6, kernel_size = 5, strides = 1, activation = 'tanh', input_shape = (28,28,1), padding = 'same'))
122 model.add(AveragePooling2D(pool_size = 2, strides = 2, padding = 'valid'))
# C3 Convolutional Layer
model.add(Conv2D(filters = 16, kernel_size = 5, strides = 1,activation = 'tanh', padding = 'valid'))
# S4 Pooling Layer
model.add(AveragePooling2D(pool_size = 2, strides = 2, padding = 'valid'))
# C5 Convolutional Layer
model.add(Conv2D(filters = 120, kernel_size = 5, strides = 1,activation = 'tanh', padding = 'valid'))
model.add(Flatten())
# FC6 Fully Connected Layer
model.add(Dense(units = 84, activation = 'tanh'))
# FC7 Output layer with softmax activation
model.add(Dense(units = 10, activation = 'softmax')) model.summary() model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # Train model checkpointer = ModelCheckpoint(filepath='LeNet_model.h5', verbose=1,save_best_only=True)
hist=model.fit(X_train, y_train, batch_size=32, epochs=100, verbose=1, validation_data=(X_test, y_test),callbacks=[checkpointer])
# Load model
model_s=keras.models.load_model('CNN_model.h5') score = model_s.evaluate(X_test, y_test, verbose=0) print('\n', 'Test accuracy:', score[1])
123 Code CNN
import numpy as np
import matplotlib.pyplot as plt import keras
from keras.datasets import mnist from keras.utils import np_utils from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout from keras.callbacks import ModelCheckpoint
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.astype(np.float32)/ 255 X_test = X_test.astype(np.float32)/ 255 X_train = X_train.reshape(X_train.shape[0],28,28,1) X_test = X_test.reshape(X_test.shape[0],28,28,1) y_train = keras.utils.np_utils.to_categorical(y_train) y_test = keras.utils.np_utils.to_categorical(y_test)
print('x_train shape:', X_train.shape) print(X_train.shape[0], 'train samples') print(X_test.shape[0], 'test samples')
model = Sequential()
124 activation='relu', input_shape=(28,28,1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), strides=1, padding='same', activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dropout(0.25)) model.add(Dense(64, activation='relu')) model.add(Dense(10, activation='softmax')) model.summary() model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # Train model checkpointer = ModelCheckpoint(filepath='CNN_model.h5', verbose=1,save_best_only=True)
hist=model.fit(X_train, y_train, batch_size=32, epochs=100, verbose=1, validation_data=(X_test, y_test),callbacks=[checkpointer])
# Load model
model_s=keras.models.load_model('CNN_model.h5')
score = model_s.evaluate(X_test, y_test, verbose=0) print('\n', 'Test accuracy:', score[1])
125 Code AlexNet
import numpy as np
import matplotlib.pyplot as plt import keras
from keras.datasets import mnist from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Conv2D, AveragePooling2D, Flatten,
Dense,Activation,MaxPool2D, BatchNormalization, Dropout, MaxPooling2D from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.astype(np.float32)/ 255 X_test = X_test.astype(np.float32)/ 255
#dinh dang lai hinh anh ve 28 28 1
X_train = X_train.reshape(X_train.shape[0],28,28,1) X_test = X_test.reshape(X_test.shape[0],28,28,1)
y_train = keras.utils.np_utils.to_categorical(y_train) y_test = keras.utils.np_utils.to_categorical(y_test) print('x_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples') print(X_test.shape[0], 'test samples')
model = Sequential()
# 1st Convolutional Layer
model.add(Conv2D(filters=96, input_shape=(28,28,1), kernel_size=(3,3), strides=1, padding='valid'))
126 model.add(Activation('relu'))
# Max Pooling
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='valid'))
# 2nd Convolutional Layer
model.add(Conv2D(filters=256, kernel_size=(5,5), strides=(1,1), padding='same'))
model.add(Activation('relu'))
# Max Pooling
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='valid')) model.add(Flatten())
# 1st Fully Connected Layer
model.add(Dense(64))
model.add(Activation('relu'))
# Add Dropout to prevent overfitting
model.add(Dropout(0.5))
# Output Layer
model.add(Dense(10))
model.add(Activation('softmax')) model.summary()
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=np.sqrt(0.1)) model.compile(loss='categorical_crossentropy', optimizer='adam',
metrics=['accuracy'])
# Train model
checkpointer = ModelCheckpoint(filepath='Alexnet_model1.h5', verbose=1,save_best_only=True)
hist=model.fit(X_train, y_train, batch_size=32, epochs=10, verbose=1, validation_data=(X_test, y_test),callbacks=[checkpointer])
"""
model_s=keras.models.load_model('Alexnet_model.h5') score = model_s.evaluate(X_test, y_test, verbose=0) print('\n', 'Test accuracy:', score[1])"""
127 Code giao diện PyQt
import sys
# pip install pyqt5 import cv2
import imutils import numpy as np import pygame
from pygame.locals import * from PyQt5 import QtGui
from PyQt5.QtCore import QThread, pyqtSignal, Qt from PyQt5.QtGui import QPixmap, QImage
from PyQt5.QtWidgets import QApplication, QMainWindow, QFileDialog from keras.models import load_model
#from Camera2 import on_threshold, threshold from Giao_dien import Ui_MainWindow
class MainWindow(QMainWindow): def __init__(self): super().__init__() self.uic = Ui_MainWindow() self.uic.setupUi(self) self.uic.Nut_Mo_Cam.clicked.connect(self.start_capture_video) self.uic.Nut_Bang_ve.clicked.connect(self.chupanh) self.uic.Nut_Chen_anh.clicked.connect(self.insertImage) self.uic.Nut_Doc_anh.clicked.connect(self.docanh) self.uic.Nut_Doc_anh_2.clicked.connect(self.stop_capture_video) self.uic.Chuyen_anh_xam.clicked.connect(self.xoayanh) self.uic.Chuyen_anh_xam_2.clicked.connect(self.chuyenanhxam) self.uic.Nut_Doc_anh_3.clicked.connect(self.uic.label.clear) self.uic.Nut_Doc_anh_3.clicked.connect(self.uic.label_4.clear)
128 self.uic.Nut_Doc_anh_3.clicked.connect(self.uic.label_2.clear) self.thread = {} self.filename = None self.tmp = None self.thr_value_now = 0 def Mobangve(self): WINDOWSIZEX = 640 WINDOWSIZEY = 480 BOUNDARYINC = 5 WHITE = (255, 255, 255) BLACK = (0, 0, 0) RED = (255, 0, 0) IMAGESAVE = False PREDICT = True iswriting = False number_xcord = [] number_ycord = [] image_cnt = 1 MODEL = load_model("Alexnet_model.h5")
LABELS = {0: "KHONG", 1: "MOT", 2: "HAI", 3: "BA", 4: "BON", 5: "NAM", 6: "SAU", 7: "BAY", 8: "TAM", 9: "CHIN"}
pygame.init() FONT = pygame.font.Font("freesansbold.ttf", 18) DISPLAYSURF = pygame.display.set_mode((WINDOWSIZEX, WINDOWSIZEY)) pygame.display.set_caption("Bang Ve") while True:
129 if event.type == QUIT:
pygame.quit() sys.exit()
if event.type == MOUSEMOTION and iswriting: xcord, ycord = event.pos
pygame.draw.circle(DISPLAYSURF, WHITE, (xcord, ycord), 4, 0)
number_xcord.append(xcord) number_ycord.append(ycord) if event.type == MOUSEBUTTONDOWN: iswriting = True if event.type == MOUSEBUTTONUP: iswriting = False if event.type == KEYDOWN: if event.unicode == "l": number_xcord = sorted(number_xcord) number_ycord = sorted(number_ycord)
rect_min_x, rect_max_x = max(number_xcord[0] - BOUNDARYINC, 0), min(WINDOWSIZEX, number_xcord[-1] + BOUNDARYINC)
rect_min_Y, rect_max_Y = max(number_ycord[0] - BOUNDARYINC, 0), min(number_ycord[-1] + BOUNDARYINC, WINDOWSIZEY)
number_xcord = [] number_ycord = []
img_arr =
np.array(pygame.PixelArray(DISPLAYSURF))[rect_min_x:rect_max_x, rect_min_Y:rect_max_Y].T.astype(np.float32)
130 if PREDICT:
image = cv2.resize(img_arr, (rect_max_x - rect_min_x, rect_max_Y - rect_min_Y))
image = cv2.resize(image, (28, 28))
image = np.pad(image, (10, 10), 'constant', constant_values=0) image = cv2.resize(image, (28, 28)) / 255
label = str(LABELS[np.argmax(MODEL.predict(image.reshape(1, 28, 28, 1)))])
textSurface = FONT.render(label, True, RED, WHITE) textRecObj = textSurface.get_rect()
textRecObj.right, textRecObj.bottom = rect_max_x, rect_max_Y
DISPLAYSURF.blit(textSurface, textRecObj)
if event.type == self.KEYDOWN: if event.unicode == "n":
DISPLAYSURF.fill(BLACK) pygame.display.update()
### Chen anh vao giao dien def insertImage(self):
self.filename = QFileDialog.getOpenFileName(filter="Image (*.*)")[0] print(self.filename)
self.image = cv2.imread(self.filename) self.setPhoto1(self.image)
def setPhoto1(self, image): self.tmp = image
h, w, ch = image.shape if w > h:
131 image = imutils.resize(image, width=650)
frame = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = QImage(frame, frame.shape[1], frame.shape[0], frame.strides[0], QImage.Format_RGB888)
self.uic.label.setPixmap(QtGui.QPixmap.fromImage(image)) else:
image = imutils.resize(image, height=400)
frame = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = QImage(frame, frame.shape[1], frame.shape[0], frame.strides[0], QImage.Format_RGB888)
self.uic.label.setPixmap(QtGui.QPixmap.fromImage(image))
def setPhoto2(self, image): self.tmp = image
h, w, ch = image.shape if w > h:
image = imutils.resize(image, width=650)
frame = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = QImage(frame, frame.shape[1], frame.shape[0], frame.strides[0], QImage.Format_RGB888)
self.uic.label_4.setPixmap(QtGui.QPixmap.fromImage(image)) else:
image = imutils.resize(image, height=400)
frame = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = QImage(frame, frame.shape[1], frame.shape[0], frame.strides[0], QImage.Format_RGB888)
self.uic.label_4.setPixmap(QtGui.QPixmap.fromImage(image))
def setPhoto3(self, image): self.tmp = image
image = imutils.resize(image, width=650)
frame = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
132 QImage.Format_RGB888)
self.uic.label_4.setPixmap(QtGui.QPixmap.fromImage(image))
def chuyenanhxam(self):
image = cv2.imread(self.filename)
im_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
ret, im_th = cv2.threshold(im_gray, 90, 255, cv2.THRESH_BINARY_INV) ret, im_th = cv2.threshold(im_th, 90, 255, cv2.THRESH_BINARY_INV) self.filename = 'C:/Users/Administrator/PycharmProjects/pt5/Anh/Anhdentrang.jpg' cv2.cv2.imwrite(self.filename,im_th) self.setPhoto3(im_th) def xoayanh(self): img=cv2.imread(self.filename) img=cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE) self.filename = 'C:/Users/Administrator/PycharmProjects/pt5/Anh/Anhdaxoay.jpg' cv2.imwrite(self.filename,img) self.setPhoto2(img)
def docanh(self, image): if self.filename is not None:
image = cv2.imread(self.filename)
im_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) #im_gray = cv2.GaussianBlur(im_gray, (5, 5), 0)
# Threshold the image
ret, im_th = cv2.threshold(im_gray, 80, 255, cv2.THRESH_BINARY_INV)
# Find contours in the image
ctrs, hier = cv2.findContours(im_th.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
133 rects = [cv2.boundingRect(ctr) for ctr in ctrs]
# For each rectangular region, calculate HOG features and predict # the digit using Linear SVM.
MODEL = load_model("LeNet_model.h5") list=[]
# print(rects) lenth = len(rects) tmp = [0, 0, 0, 0]
for i in range(0, lenth - 1): for j in range(i + 1, lenth): if (rects[i][0] > rects[j][0]): # Hoán đổi vị trí tmp = rects[i] rects[i] = rects[j] rects[j] = tmp # print(rects)
for i in range(0, lenth - 1): for j in range(i + 1, lenth):
if (rects[i][1] > rects[j][1]) and (rects[i][1] - rects[j][1]) > rects[i][3]: # Hoán đổi vị trí
tmp = rects[i] rects[i] = rects[j] rects[j] = tmp
for i in range(0, lenth - 1): for j in range(i + 1, lenth):
if (rects[i][0] > rects[j][0]) and abs(rects[i][1] - rects[j][1]) < (rects[i][1] / 10):
# Hoán đổi vị trí
tmp = rects[i]
134 rects[j] = tmp
for rect in rects: x, y, w, h = rect s = w * h
if s > 1000:
# Draw the rectangles
cv2.rectangle(image, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] +
rect[3]), (0, 255, 0), 3)
# Make the rectangular region around the digit
leng = int(rect[3] * 1.2)
pt1 = int(rect[1] + rect[3] // 2 - leng // 2) pt2 = int(rect[0] + rect[2] // 2 - leng // 2) roi = im_th[pt1:pt1 + leng, pt2:pt2 + leng] # Resize the image
roi = cv2.resize(roi, (28, 28), interpolation=cv2.INTER_AREA)
roi = cv2.dilate(roi, (3, 3))
# Calculate the HOG features
label = [str([np.argmax(MODEL.predict(roi.reshape(1, 28, 28, 1)))])]
list = list + label
cv2.putText(image, label[0], (rect[0], rect[1]), cv2.FONT_HERSHEY_DUPLEX, 1, (255, 0, 0), 1) self.setPhoto2(image) self.uic.label_2.setText(str(list)) def chupanh(self): self.filename = 'C:/Users/Administrator/PycharmProjects/pt5/Anh/Anhdaluu.jpg' #print(self.filename) cv2.imwrite(self.filename, self.image) self.setPhoto3(self.image)
135 ### MO CAM
def closeEvent(self, event):
self.stop_capture_video() self.thread[2].stop() self.sn_chenanh = False self.sn_mocam = False def stop_capture_video(self): self.thread[1].stop() self.sn_mocam = False def start_capture_video(self): self.filename = None self.thread[1] = capture_video(index=1) self.thread[1].start() self.thread[1].signal.connect(self.show_wedcam)
def show_wedcam(self, cv_img):
"""Updates the image_label with a new opencv image"""
grayFrame = cv2.cvtColor(cv_img, cv2.COLOR_RGB2GRAY)
_, grayFrame = cv2.threshold(grayFrame, 90, 255, cv2.THRESH_BINARY_INV) _, self.image = cv2.threshold(grayFrame, 90, 255, cv2.THRESH_BINARY_INV) self.setPhoto1(cv_img) class capture_video(QThread): signal = pyqtSignal(np.ndarray) def __init__(self, index):
self.index = index
print("start threading", self.index) super(capture_video, self).__init__()
136 def run(self):
cap = cv2.VideoCapture(0) while True:
ret, cv_img = cap.read()
cv_img = cv2.resize(cv_img, (650,400)) if ret:
self.signal.emit(cv_img) def stop(self):
print("stop threading", self.index) self.terminate() if __name__ == "__main__": app = QApplication(sys.argv) main_win = MainWindow() main_win.show() sys.exit(app.exec())