Ứng Dụng Mô Hình Chuyển Đổi Thị Giác Cho Bài Toán Phân Loại Và Diễn Giải Ảnh Y Tế.pdf

THÔNG TIN TÀI LIỆU

Untitled BỘ GIÁO DỤC VÀ ĐÀO TẠO TRƯỜNG ĐẠI HỌC SƯ PHẠM KỸ THUẬT THÀNH PHỐ HỒ CHÍ MINH S K C 0 0 3 9 5 9 CÔNG TRÌNH Tp Hồ Chí Minh, MÃ SỐ S KC0 0 7 6 7 6 BỘ GIÁO DỤC VÀ ĐÀO TẠO TRƯỜNG ĐH SƯ PHẠM KỸ THU[.]

BỘ GIÁO DỤC VÀ ĐÀO TẠO TRƯỜNG ĐẠI HỌC SƯ PHẠM KỸ THUẬT THÀNH PHỐ HỒ CHÍ MINH CƠNG TRÌNH 1*+,ầ1&8.+2$+&&$6,1+9,ầ1 1*'1*0é+ẻ1+&+8 1: loss = loss / args.gradient_accumulation_steps if args.fp16: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() if (step + 1) % args.gradient_accumulation_steps == 0: losses.update(loss.item()*args.gradient_accumulation_steps) if args.fp16: torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), args.max_grad_norm) else: torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) scheduler.step() optimizer.step() optimizer.zero_grad() global_step += 60 epoch_iterator.set_description( "Training (%d / %d Steps) (loss=%2.5f)" % (global_step, t_total, losses.val) ) if args.local_rank in [-1, 0]: writer.add_scalar("train/loss", scalar_value=losses.val, global_step=global_step) writer.add_scalar("train/lr", scalar_value=scheduler.get_lr()[0], global_step=global_step) if global_step % args.eval_every == and args.local_rank in [-1, 0]: accuracy = valid(args, model, writer, test_loader, global_step) if best_acc < accuracy: save_model(args, model) best_acc = accuracy model.train() if global_step % t_total == 0: break losses.reset() if global_step % t_total == 0: break if args.local_rank in [-1, 0]: writer.close() logger.info("Best Accuracy: \t%f" % best_acc) logger.info("End Training!") def main(): parser = argparse.ArgumentParser() # Required parameters 61 parser.add_argument(" name", required=True, help="Name of this run Used for monitoring.") parser.add_argument(" dataset", choices=["cifar10", "cifar100"], default="cifar10", help="Which downstream task.") parser.add_argument(" model_type", choices=["ViT-B_16", "ViT-B_32", "ViTL_16", "ViT-L_32", "ViT-H_14", "R50-ViT-B_16", "ViT-S_16", "R26+ViT-S_32", "R+ViT-Ti_16"], default="ViT-B_16", help="Which variant to use.") parser.add_argument(" pretrained_dir", type=str, default="checkpoint/ViT- B_16.npz", help="Where to search for pretrained ViT models.") parser.add_argument(" output_dir", default="output", type=str, help="The output directory where checkpoints will be written.") parser.add_argument(" img_size", default=224, type=int, help="Resolution size") parser.add_argument(" train_batch_size", default=512, type=int, help="Total batch size for training.") parser.add_argument(" eval_batch_size", default=64, type=int, help="Total batch size for eval.") parser.add_argument(" eval_every", default=100, type=int, help="Run prediction on validation set every so many steps." "Will always run one evaluation at the end of training.") parser.add_argument(" learning_rate", default=3e-2, type=float, help="The initial learning rate for SGD.") parser.add_argument(" weight_decay", default=0, type=float, help="Weight deay if we apply some.") parser.add_argument(" num_steps", default=10000, type=int, help="Total number of training epochs to perform.") 62 parser.add_argument(" decay_type", choices=["cosine", "linear"], default="cosine", help="How to decay the learning rate.") parser.add_argument(" warmup_steps", default=500, type=int, help="Step of training to perform learning rate warmup for.") parser.add_argument(" max_grad_norm", default=1.0, type=float, help="Max gradient norm.") parser.add_argument(" local_rank", type=int, default=-1, help="local_rank for distributed training on gpus") parser.add_argument(' seed', type=int, default=42, help="random seed for initialization") parser.add_argument(' gradient_accumulation_steps', type=int, default=1, help="Number of updates steps to accumulate before performing a backward/update pass.") parser.add_argument(' fp16', action='store_true', help="Whether to use 16-bit float precision instead of 32-bit") parser.add_argument(' fp16_opt_level', type=str, default='O2', help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." "See details at https://nvidia.github.io/apex/amp.html") parser.add_argument(' loss_scale', type=float, default=0, help="Loss scaling to improve fp16 numeric stability Only used when fp16 set to True.\n" "0 (default value): dynamic loss scaling.\n" "Positive power of 2: static loss scaling value.\n") args = parser.parse_args() # Setup CUDA, GPU & distributed training if args.local_rank == -1: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") args.n_gpu = torch.cuda.device_count() 63 else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) torch.distributed.init_process_group(backend='nccl', timeout=timedelta(minutes=60)) args.n_gpu = args.device = device # Setup logging logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN) logger.warning("Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s" % (args.local_rank, args.device, args.n_gpu, bool(args.local_rank != -1), args.fp16)) # Set seed set_seed(args) # Model & Tokenizer Setup args, model = setup(args) # Training train(args, model) save_model(args, model) if name == " main ": main() 64 ➢ Chương trình hiển thị Attention Map import typing import io import os import torch import numpy as np import cv2 import matplotlib.pyplot as plt from urllib.request import urlretrieve from PIL import Image from torchvision import transforms, datasets from torch.utils.data import DataLoader from models.modeling import VisionTransformer, CONFIGS # Prepare Model config = CONFIGS["R50-ViT-B_16"] model = VisionTransformer(config, num_classes=10, zero_head=False, img_size=224, vis=True) PATH='model /Finetune_Chest14_R50B16_enhance.bin' model.load_state_dict(torch.load(PATH)) model.eval() VinXray_labels=dict(enumerate(open(' /VinXrayLabel.txt'))) transform = transforms.Compose([ transforms.Resize((224, 224)), 65 transforms.Grayscale(num_output_channels=3), transforms.ToTensor(), transforms.Normalize(mean=[0.5], std=[0.5]), ]) im=Image.open("Images/051c2436c0acdb5e09d085c7e4a764f3.jpg") def get_attention_map(img, get_mask=False): x = transform(img) x.size() logits, att_mat = model(x.unsqueeze(0)) print('logits =',logits) att_mat = torch.stack(att_mat).squeeze(1) # Average the attention weights across all heads att_mat = torch.mean(att_mat, dim=1) # To account for residual connections, we add an identity matrix to the # attention matrix and re-normalize the weights residual_att = torch.eye(att_mat.size(1)) aug_att_mat = att_mat + residual_att aug_att_mat = aug_att_mat / aug_att_mat.sum(dim=-1).unsqueeze(-1) # Recursively multiply the weight matrices joint_attentions = torch.zeros(aug_att_mat.size()) joint_attentions[0] = aug_att_mat[0] for n in range(1, aug_att_mat.size(0)): joint_attentions[n] = torch.matmul(aug_att_mat[n], joint_attentions[n-1]) v = joint_attentions[-1] grid_size = int(np.sqrt(aug_att_mat.size(-1))) 66 mask = v[0, 1:].reshape(grid_size, grid_size).detach().numpy() if get_mask: result = cv2.resize(mask / mask.max(), img.size) else: mask = cv2.resize(mask / mask.max(), img.size)[ , np.newaxis] result = (mask * img).astype("uint8") return result def plot_attention_map(original_img, att_map): fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(16, 16)) ax1.set_title('Original') ax2.set_title('Attention Map Last Layer') _ = ax1.imshow(original_img) _ = ax2.imshow(att_map) result = get_attention_map(im) plot_attention_map(im, result) # Check mask for Attention Map check_mask = get_attention_map(im, True) plot_attention_map(im, check_mask) ➢ Chương trình hiển thị Grad-CAM from pytorch_grad_cam import GradCAM, ScoreCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, EigenCAM, LayerCAM, FullGrad 67 from pytorch_grad_cam.utils.image import show_cam_on_image, preprocess_image, deprocess_image from pytorch_grad_cam import GuidedBackpropReLUModel import torch import torch.nn as nn from torchvision import models import cv2 import numpy as np import matplotlib.pyplot as plt from PIL import Image #Load model model=torch.load('model/ resnet50_ft_VinXray10class.pth') model.eval() #Select the target layer target_layer = [model.layer4[-1]] image_path='Images/1b2a7adb5705d9e3f5b63939046d93c7.jpg' img = cv2.imread(image_path, 1)[:, :, ::-1] # Is read rgb img = cv2.resize(img, (224, 224)) img = np.float32(img) / 255 input_tensor = preprocess_image(img, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) #Construct the CAM object once, and then re-use it on many images: cam = GradCAM(model=model, target_layers=target_layer, use_cuda=True) def show_cam(mask: np.ndarray,use_rgb: bool = False, colormap: int = cv2.COLORMAP_JET) -> np.ndarray: """ This function overlays the cam mask on the image as an heatmap By default the heatmap is in BGR format 68 :param img: The base image in RGB or BGR format :param mask: The cam mask :param use_rgb: Whether to use an RGB or BGR heatmap, this should be set to True if 'img' is in RGB format :param colormap: The OpenCV colormap to be used :returns: The default image with the cam overlay """ heatmap = cv2.applyColorMap(np.uint8(255 * mask), colormap) if use_rgb: heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB) heatmap = np.float32(heatmap) / 255 if np.max(img) > 1: raise Exception( "The input image should np.float32 in the range [0, 1]") cam = heatmap cam = cam / np.max(cam) return np.uint8(255 * cam) #Show cam target_category = None cam.batch_size = 32 #Calculation cam grayscale_cam = cam(input_tensor=input_tensor, target_category=target_category, aug_smooth=True, eigen_smooth=True) #Display and save the heat map , grayscale_cam It's a batch Result , Only one can be selected for display grayscale_cam = grayscale_cam[0,:] visualization = show_cam(grayscale_cam, use_rgb=False) cv2.imwrite(f'cam_image.jpg', visualization) 69 cam_image = Image.open('cam_image.jpg') fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(12, 12)) ax1.set_title('Original') ax2.set_title('Grad-cam') _ = ax1.imshow(img) _ = ax2.imshow(cam_image) plt.show() #Show cam on image visualization = show_cam_on_image(img, grayscale_cam, use_rgb=False) cv2.imwrite(f'cam_image.jpg', visualization) cam_image = Image.open('cam_image.jpg') fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(12, 12)) ax1.set_title('Original') ax2.set_title('Grad-cam') _ = ax1.imshow(img) _ = ax2.imshow(cam_image) plt.show() 70 ➢ Chương trình Đánh giá định lượng kết phân loại mô hình: import numpy as np import pandas as pd def check_FileName(GTData,PDData): GTNAME = GTData['ImageName'].to_list() PDNAME = PDData['ImageName'].to_list() count = for i in range(len(PDNAME)): if PDNAME[i] in GTNAME: count = count + return len(PDNAME)== count def check_Col(GTData,PDData): GT_Col = GTData.columns PD_Col = PDData.columns count = for i in range(len(GT_Col)): if GT_Col[i] == PD_Col[i]: count = count + return len(GT_Col)== count class Evaluator: def init (self, GTfile, PDFile): self.GTname = GTfile self.GTData= pd.read_csv(self.GTname) self.PDname = PDFile self.PDData = pd.read_csv(self.PDname) if not(check_FileName(self.GTData, self.PDData)): raise ValueError("File Name not pass") 71 if not(check_Col(self.GTData, self.PDData)): raise ValueError("Collume not pass") def get_result(self, thre=0.7, k=[1,3,5]): FileName=self.PDData['ImageName'].to_list() col=self.PDData.columns[2:] PreCision=[] ReCall=[] ACC_k=[] for filename in FileName: # print(filename) gt=self.GTData[self.GTData['ImageName']==filename][col].to_numpy() pred=self.PDData[self.PDData['ImageName']==filename][col].to_numpy() The=pred.max()*thre pred_thd=pred>=The pred_thd=pred_thd.astype('float') precision=np.sum(gt*pred_thd)/np.sum(pred_thd) recall = np.sum(gt * pred_thd) / np.sum(gt) PreCision.append(precision) ReCall.append(recall) A_temp=[] y_sort=np.argsort(pred) for i in range (len(k)): y_temp=np.zeros_like(gt) K=k[i] for j in range(K): y_temp[0][int(y_sort[0][-(j+1)])]=1.0 72 r=np.sum(y_temp*gt)>0 if r: A_temp.append(1.0) else: A_temp.append(0.0) ACC_k.append(A_temp) self.Pre_R=np.mean(np.array(PreCision)) self.ReC_R = np.mean(np.array(ReCall)) self.ACC_R =np.mean(np.array(ACC_k),axis=0) if name == " main ": 73 S K L 0

Ngày đăng: 21/06/2023, 21:03

Xem thêm: