Tài liệu hạn chế xem trước, để xem đầy đủ mời bạn chọn Tải xuống
1
/ 47 trang
THÔNG TIN TÀI LIỆU
Thông tin cơ bản
Định dạng
Số trang
47
Dung lượng
508,68 KB
Nội dung
Developing a Filesystem for the Linux Kernel 397 983 ux_prepare_write(struct file *file, struct page *page, 984 unsigned from, unsigned to) 985 { 986 return block_prepare_write(page, from, to, ux_get_block); 987 } 988 989 int 990 ux_bmap(struct address_space *mapping, long block) 991 { 992 return generic_block_bmap(mapping, block, ux_get_block); 993 } 994 995 struct address_space_operations ux_aops = { 996 readpage: ux_readpage, 997 writepage: ux_writepage, 998 sync_page: block_sync_page, 999 prepare_write: ux_prepare_write, 1000 commit_write: generic_commit_write, 1001 bmap: ux_bmap, 1002 }; 1003 1004 struct inode_operations ux_file_inops = { 1005 link: ux_link, 1006 unlink: ux_unlink, 1007 }; 1008 1009 /* */ 1010 /* ux_inode.c */ 1011 /* */ 1012 1013 #include <linux/module.h> 1014 #include <linux/mm.h> 1015 #include <linux/slab.h> 1016 #include <linux/init.h> 1017 #include <linux/locks.h> 1018 #include <linux/smp_lock.h> 1019 #include <asm/uaccess.h> 1020 #include "ux_fs.h" 1021 1022 MODULE_AUTHOR("Steve Pate <spate@veritas.com>"); 1023 MODULE_DESCRIPTION("A primitive filesystem for Linux"); 1024 MODULE_LICENSE("GPL"); 1025 1026 /* 1027 * This function looks for "name" in the directory "dip". 1028 * If found the inode number is returned. 1029 */ 1030 1031 int 1032 ux_find_entry(struct inode *dip, char *name) 1033 { 1034 struct ux_inode *uip = (struct ux_inode *) 1035 &dip->i_private; 1036 struct super_block *sb = dip->i_sb; 1037 struct buffer_head *bh; 398 UNIX Filesystems—Evolution, Design, and Implementation 1038 struct ux_dirent *dirent; 1039 int i, blk = 0; 1040 1041 for (blk=0 ; blk < uip->i_blocks ; blk++) { 1042 bh = sb_bread(sb, uip->i_addr[blk]); 1043 dirent = (struct ux_dirent *)bh->b_data; 1044 for (i=0 ; i < UX_DIRS_PER_BLOCK ; i++) { 1045 if (strcmp(dirent->d_name, name) == 0) { 1046 brelse(bh); 1047 return dirent->d_ino; 1048 } 1049 dirent++; 1050 } 1051 } 1052 brelse(bh); 1053 return 0; 1054 } 1055 1056 /* 1057 * This function is called in response to an iget(). For 1058 * example, we call iget() from ux_lookup(). 1059 */ 1060 1061 void 1062 ux_read_inode(struct inode *inode) 1063 { 1064 struct buffer_head *bh; 1065 struct ux_inode *di; 1066 unsigned long ino = inode->i_ino; 1067 int block; 1068 1069 if (ino < UX_ROOT_INO || ino > UX_MAXFILES) { 1070 printk("uxfs: Bad inode number %lu\n", ino); 1071 return; 1072 } 1073 1074 /* 1075 * Note that for simplicity, there is only one 1076 * inode per block! 1077 */ 1078 1079 block = UX_INODE_BLOCK + ino; 1080 bh = sb_bread(inode->i_sb, block); 1081 if (!bh) { 1082 printk("Unable to read inode %lu\n", ino); 1083 return; 1084 } 1085 1086 di = (struct ux_inode *)(bh->b_data); 1087 inode->i_mode = di->i_mode; 1088 if (di->i_mode & S_IFDIR) { 1089 inode->i_mode |= S_IFDIR; 1090 inode->i_op = &ux_dir_inops; 1091 inode->i_fop = &ux_dir_operations; 1092 } else if (di->i_mode & S_IFREG) { Developing a Filesystem for the Linux Kernel 399 1093 inode->i_mode |= S_IFREG; 1094 inode->i_op = &ux_file_inops; 1095 inode->i_fop = &ux_file_operations; 1096 inode->i_mapping->a_ops = &ux_aops; 1097 } 1098 inode->i_uid = di->i_uid; 1099 inode->i_gid = di->i_gid; 1100 inode->i_nlink = di->i_nlink; 1101 inode->i_size = di->i_size; 1102 inode->i_blocks = di->i_blocks; 1103 inode->i_blksize = UX_BSIZE; 1104 inode->i_atime = di->i_atime; 1105 inode->i_mtime = di->i_mtime; 1106 inode->i_ctime = di->i_ctime; 1107 memcpy(&inode->i_private, di, sizeof(struct ux_inode)); 1108 brelse(bh); 1109 } 1110 1111 /* 1112 * This function is called to write a dirty inode to disk. 1113 */ 1114 1115 void 1116 ux_write_inode(struct inode *inode, int unused) 1117 { 1118 unsigned long ino = inode->i_ino; 1119 struct ux_inode *uip = (struct ux_inode *) 1120 &inode->i_private; 1121 struct buffer_head *bh; 1122 __u32 blk; 1123 1124 if (ino < UX_ROOT_INO || ino > UX_MAXFILES) { 1125 printk("uxfs: Bad inode number %lu\n", ino); 1126 return; 1127 } 1128 blk = UX_INODE_BLOCK + ino; 1129 bh = sb_bread(inode->i_sb, blk); 1130 uip->i_mode = inode->i_mode; 1131 uip->i_nlink = inode->i_nlink; 1132 uip->i_atime = inode->i_atime; 1133 uip->i_mtime = inode->i_mtime; 1134 uip->i_ctime = inode->i_ctime; 1135 uip->i_uid = inode->i_uid; 1136 uip->i_gid = inode->i_gid; 1137 uip->i_size = inode->i_size; 1138 memcpy(bh->b_data, uip, sizeof(struct ux_inode)); 1139 mark_buffer_dirty(bh); 1140 brelse(bh); 1141 } 1142 1143 /* 1144 * This function gets called when the link count goes to zero. 1145 */ 1146 1147 void 400 UNIX Filesystems—Evolution, Design, and Implementation 1148 ux_delete_inode(struct inode *inode) 1149 { 1150 unsigned long inum = inode->i_ino; 1151 struct ux_inode *uip = (struct ux_inode *) 1152 &inode->i_private; 1153 struct super_block *sb = inode->i_sb; 1154 struct ux_fs *fs = (struct ux_fs *) 1155 sb->s_private; 1156 struct ux_superblock *usb = fs->u_sb; 1157 int i; 1158 1159 usb->s_nbfree += uip->i_blocks; 1160 for (i=0 ; i < uip->i_blocks ; i++) { 1161 usb->s_block[uip->i_addr[i]] = UX_BLOCK_FREE; 1162 uip->i_addr[i] = UX_BLOCK_FREE; 1163 } 1164 usb->s_inode[inum] = UX_INODE_FREE; 1165 usb->s_nifree++; 1166 sb->s_dirt = 1; 1167 clear_inode(inode); 1168 } 1169 1170 /* 1171 * This function is called when the filesystem is being 1172 * unmounted. We free the ux_fs structure allocated during 1173 * ux_read_super() and free the superblock buffer_head. 1174 */ 1175 1176 void 1177 ux_put_super(struct super_block *s) 1178 { 1179 struct ux_fs *fs = (struct ux_fs *)s->s_private; 1180 struct buffer_head *bh = fs->u_sbh; 1181 1182 /* 1183 * Free the ux_fs structure allocated by ux_read_super 1184 */ 1185 1186 kfree(fs); 1187 brelse(bh); 1188 } 1189 1190 /* 1191 * This function will be called by the df command. 1192 */ 1193 1194 int 1195 ux_statfs(struct super_block *sb, struct statfs *buf) 1196 { 1197 struct ux_fs *fs = (struct ux_fs *)sb->s_private; 1198 struct ux_superblock *usb = fs->u_sb; 1199 1200 buf->f_type = UX_MAGIC; 1201 buf->f_bsize = UX_BSIZE; 1202 buf->f_blocks = UX_MAXBLOCKS; Developing a Filesystem for the Linux Kernel 401 1203 buf->f_bfree = usb->s_nbfree; 1204 buf->f_bavail = usb->s_nbfree; 1205 buf->f_files = UX_MAXFILES; 1206 buf->f_ffree = usb->s_nifree; 1207 buf->f_fsid.val[0] = kdev_t_to_nr(sb->s_dev); 1208 buf->f_namelen = UX_NAMELEN; 1209 return 0; 1210 } 1211 1212 /* 1213 * This function is called to write the superblock to disk. We 1214 * simply mark it dirty and then set the s_dirt field of the 1215 * in-core superblock to 0 to prevent further unnecessary calls. 1216 */ 1217 1218 void 1219 ux_write_super(struct super_block *sb) 1220 { 1221 struct ux_fs *fs = (struct ux_fs *) 1222 sb->s_private; 1223 struct buffer_head *bh = fs->u_sbh; 1224 1225 if (!(sb->s_flags & MS_RDONLY)) { 1226 mark_buffer_dirty(bh); 1227 } 1228 sb->s_dirt = 0; 1229 } 1230 1231 struct super_operations uxfs_sops = { 1232 read_inode: ux_read_inode, 1233 write_inode: ux_write_inode, 1234 delete_inode: ux_delete_inode, 1235 put_super: ux_put_super, 1236 write_super: ux_write_super, 1237 statfs: ux_statfs, 1238 }; 1239 1240 struct super_block * 1241 ux_read_super(struct super_block *s, void *data, int silent) 1242 { 1243 struct ux_superblock *usb; 1244 struct ux_fs *fs; 1245 struct buffer_head *bh; 1246 struct inode *inode; 1247 kdev_t dev; 1248 1249 dev = s->s_dev; 1250 set_blocksize(dev, UX_BSIZE); 1251 s->s_blocksize = UX_BSIZE; 1252 s->s_blocksize_bits = UX_BSIZE_BITS; 1253 1254 bh = sb_bread(s, 0); 1255 if(!bh) { 1256 goto out; 1257 } 402 UNIX Filesystems—Evolution, Design, and Implementation 1258 usb = (struct ux_superblock *)bh->b_data; 1259 if (usb->s_magic != UX_MAGIC) { 1260 if (!silent) 1261 printk("Unable to find uxfs filesystem\n"); 1262 goto out; 1263 } 1264 if (usb->s_mod == UX_FSDIRTY) { 1265 printk("Filesystem is not clean. Write and " 1266 "run fsck!\n"); 1267 goto out; 1268 } 1269 1270 /* 1271 * We should really mark the superblock to 1272 * be dirty and write it back to disk. 1273 */ 1274 1275 fs = (struct ux_fs *)kmalloc(sizeof(struct ux_fs), 1276 GFP_KERNEL); 1277 fs->u_sb = usb; 1278 fs->u_sbh = bh; 1279 s->s_private = fs; 1280 1281 s->s_magic = UX_MAGIC; 1282 s->s_op = &uxfs_sops; 1283 1284 inode = iget(s, UX_ROOT_INO); 1285 if (!inode) { 1286 goto out; 1287 } 1288 s->s_root = d_alloc_root(inode); 1289 if (!s->s_root) { 1290 iput(inode); 1291 goto out; 1292 } 1293 1294 if (!(s->s_flags & MS_RDONLY)) { 1295 mark_buffer_dirty(bh); 1296 s->s_dirt = 1; 1297 } 1298 return s; 1299 1300 out: 1301 return NULL; 1302 } 1303 1304 static DECLARE_FSTYPE_DEV(uxfs_fs_type, "uxfs", ux_read_super); 1305 1306 static int __init init_uxfs_fs(void) 1307 { 1308 return register_filesystem(&uxfs_fs_type); 1309 } 1310 1311 static void __exit exit_uxfs_fs(void) 1312 { Developing a Filesystem for the Linux Kernel 403 1313 unregister_filesystem(&uxfs_fs_type); 1314 } 1315 1316 module_init(init_uxfs_fs) 1317 module_exit(exit_uxfs_fs) Suggested Exercises Because the filesystem presents only a basic set of operations, there are several things that can be added to increase functionality. There are also several bugs that exist in the filesystem as it stands that could be fixed. This section contains numerous different exercises that readers can follow either to simply experiment with the filesystem as is or to add additional capabilities. Simply playing with the filesystem, compiling kernels, and using one of the kernel level debuggers is a significant amount of work in itself. Don’t underestimate the amount of time that it can take to achieve these tasks. However, the amount of Linux support information on the World Wide Web is extremely good, so it is usually reasonably easy to find answers to most Linux-related questions. Beginning to Intermediate Exercises The exercises in this section can be made to the existing filesystem without changing the underlying disk layout. Some of these exercises involve careful anaysis and some level of testing. 1. What is significant about the uxfs magic number? 2. As a simple way of analyzing the filesystem when running, the silent argument to ux_read_super() can be used to enable debugging. Add some calls to printk() to the filesystem, which are only activated when the silent option is specified. The first step is to determine under what conditions the silent flag is set. The ux_read_super() function provides one example of how silent is used. 3. There are several functions that have not been implemented, such as symbolic links. Look at the various operations vectors and determine which file operations will not work. For each of these functions, locate the place in the kernel where the functions would be called from. 4. For the majority of the operations on the filesystem, various timestamps are not updated. By comparing uxfs with one of the other Linux filesystems—for example ext2—identify those areas where the timestamp updates are missing and implement changes to the filesystem to provide these updates. 5. When the filesystem is mounted, the superblock field s_mod should be set to UX_FSDIRTY and the superblock should be written back to disk. There is already code within ux_read_super() to handle and reject a dirty filesystem. Add this additional feature, but be warned that there is a bug in 404 UNIX Filesystems—Evolution, Design, and Implementation ux_read_super() that must be fixed for this feature to work correctly. Add an option to fsdb to mark the superblock dirty to help test this example. 6. Locate the Loopback Filesystem HOWTO on the World Wide Web and use this to build a device on which a uxfs filesystem can be made. 7. There are places in the filesystem where inodes and buffers are not released correctly. When performing some operations and then unmounting the filesystem, warnings will be displayed by the kernel. Advanced Exercises The following exercises require more modification to the filesystem and require either substantial modification to the command and/or kernel source: 1. If the system crashes the filesystem could be left in an unstable state. Implement a fsck command that can both detect and repair any such inconsistencies. One method of testing a version of fsck is to modify fsdb to actually break the filesystem. Study operations such as directory creation to see how many I/O operations constitute creating the directory. By simulating a subset of these I/O, the filesystem can be left in a state which is not structurally intact. 2. Introduce the concept of indirect, double indirect, and triple indirects. Allow 6 direct blocks, 2 indirect blocks, and 1 triple indirect block to be referenced directly from the inode. What size file does this allow? 3. If the module panics, the kernel is typically able to detect that the uxfs module is at fault and allows the kernel to continue running. If a uxfs filesystem is already mounted, the module is unable to unload because the filesystem is busy. Look at ways in which the filesystem could be unmounted allowing the module to be unloaded. 4. The uxfs filesystem would not work at all well in an SMP environment. By analyzing other Linux filesystems, suggest improvements that could be made to allow uxfs to work in an SMP system. Suggest methods by which coarse grain as well as fine grain locks could be employed. 5. Removing a directory entry leaves a gap within the directory structure. Write a user-level program that enters the filesystem and reorganizes the directory so that unused space is removed. What mechanisms can be used to enter the filesystem? 6. Modify the filesystem to use bitmaps for both inodes and data blocks. Ensure that the bitmaps and blockmaps are separate from the actual superblock. This will involve substantial modifications to both the existing disk layout and in-core structures used to manage filesystem resource. 7. Allow the user to specify the filesystem block size and also the size of the filesystem. This will involve changing the on-disk layout. TEAMFLY TEAM FLY ® Developing a Filesystem for the Linux Kernel 405 8. Study the NFS Linux kernel code and other filesystems to see how NFS file handles are constructed. To avoid invalid file handles due to files being removed and the inode number being reused, filesystems typically employ use of a generation count. Implement this feature in uxfs. Summary As the example filesystem here shows, even with the most minimal set of features and limited operations, and although the source code base is small, there are still a lot of kernel concepts to grasp in order to understand how the filesystem works. Understanding which operations need to be supported and the order in which they occur is a difficult task. For those wishing to write a new filesystem for Linux, the initial learning curve can be overcome by taking a simple filesystem and instrumenting it with printk() calls to see which functions are invoked in response to certain user-level operations and in what order. The uxfs filesystem, although very limited in its abilities, is a simple filesystem from which to learn. Hopefully, the examples shown here provide enough information on which to experiment. I would of course welcome feedback so that I can update any of the material on the Web site where the source code is based: www.wiley.com/compbooks/pate so that I can ensure that it is up-to-date with respect to newer Linux kernels and has more detailed instructions or maybe better information than what is presented here to make it easier for people to experiment and learn. Please send feedback to spate@veritas.com. Happy hacking! [...]... supplier of UNIX to Intel-based PCs and servers Starting with Xenix, SCO moved to SVR3 and then SVR4 following their acquisition of USL The SCO UNIX technology was purchased by Caldera in 2001 and SCO changed its name to Tarantella to develop application technology 419 420 UNIX Filesystems Evolution, Design, and Implementation Single UNIX Specification Although standards such as Posix and the various... kernel 415 416 UNIX Filesystems Evolution, Design, and Implementation mandatory locking Mandatory locking can be enabled on a file if the set group ID bit is switched on and the group execute bit is switched off—a combination that together does not otherwise make any sense Mandatory locking is seldom used megabyte 102 4 * 102 4 kilobytes memory-mapped files In addition to using the read() and write() system,... Conference, Atlanta, GA, pages 299–305 427 428 UNIX Filesystems Evolution, Design and Implementation [SALU96] [SAND85] [SCHI93] [SCHI94] [SNYD90] [STEV92] [TANE87] [TWEE98] [VAHA96] [WAIT87] [WEBB93] [WIRZ95] Salus, P (1996) A Quarter Century of UNIX Reading, Massachusetts: Addison Wesley Sandberg, R., Goldeberg, D., Kleiman, S., Walsh, D., and Lyon, B (1985) Design and Implementation of the Sun Network Filesystem.”... source was published in his book on operating systems A version 7 UNIX clone from the system call perspective, the Minix kernel was very different to UNIX Minix was the inspiration for Linux mkfs The command used to make a UNIX filesystem In most versions of UNIX, there is a generic mkfs command and filesystem-specific mkfs commands that enable filesystems to export different features that can be implemented,... UNIX Filesystems Evolution, Design, and Implementation filename For example, a request to access /home/spate/bin/myls will involve parsing the pathname and looking up each component in turn, starting at home, until it gets to myls Pathname resolution is often performed one component at a time and may involve calling multiple different filesystem types to help Posix The portable operating system standards... place directly between the user buffer and disk and thus eliminate an unnecessary copy in this case 411 412 UNIX Filesystems Evolution, Design, and Implementation discovered direct I/O The VERITAS filesystem, VxFS, detects I/O patterns that it determines would be best managed by direct I/O rather than buffered I/O This type of I/O is called discovered direct I/O and it is not directly under the control... different versions of UNIX, each UNIX vendor still implemented different commands, libraries, and system calls In the early 1990s, a group of companies formed to produce a standard that encompassed Posix, X/Open, and the various additional interfaces There were initially 1,170 APIs in total, and thus the name originally given to the consortium The completed specification became known as UNIX9 5 and has been... memory This area is called the swap space, and there may be multiple different swap spaces in the same system The UNIX kernel employs daemons or kernel threads, which are responsible for ensuring that there is always a set of free 421 422 UNIX Filesystems Evolution, Design, and Implementation pages of memory at any one time Older pages are selected for paging and are written to the swap device to free... user through use of 407 408 UNIX Filesystems Evolution, Design, and Implementation hardware mechanisms The other use for the term is to describe the instructions, data, and stack areas of the kernel There is typically only one kernel address space that is protected from user processes AFS The Andrew File System (AFS) is a distributed filesystem developed at CMU as part of the Andrew Project The goal of... node and remount it on another node The failing node needs a method to forcibly unmount the filesystem FreeBSD Stemming from the official BSD releases distributed by the University of Berkeley, the FreeBSD project was established in the early 1990s to provide a version of BSD UNIX that was free of USL source code licenses or any other licensing obligations 413 UNIX Filesystems Evolution, Design, and Implementation . buffer_head *bh; 398 UNIX Filesystems Evolution, Design, and Implementation 103 8 struct ux_dirent *dirent; 103 9 int i, blk = 0; 104 0 104 1 for (blk=0 ; blk < uip->i_blocks ; blk++) { 104 2 bh = sb_bread(sb,. return dirent->d_ino; 104 8 } 104 9 dirent++; 105 0 } 105 1 } 105 2 brelse(bh); 105 3 return 0; 105 4 } 105 5 105 6 /* 105 7 * This function is called in response to an iget(). For 105 8 * example, we call. ux_lookup(). 105 9 */ 106 0 106 1 void 106 2 ux_read_inode(struct inode *inode) 106 3 { 106 4 struct buffer_head *bh; 106 5 struct ux_inode *di; 106 6 unsigned long ino = inode->i_ino; 106 7 int block; 106 8 106 9