在linux系统中有7中类型的文件:普通文件,目录,链接文件,字符设备文件,块设备文件,套接字文件,fifo文件。一般来说应该能猜到是普通文件和目录文件最多,但具体的数据又是多少?
为了实现这个目的需要如下两个东西:
一个文件的信息会被封装成一个结构体:
struct stat { dev_t st_dev; /* ID of device containing file */ ino_t st_ino; /* inode number */ mode_t st_mode; /* protection */ nlink_t st_nlink; /* number of hard links */ uid_t st_uid; /* user ID of owner */ gid_t st_gid; /* group ID of owner */ dev_t st_rdev; /* device ID (if special file) */ off_t st_size; /* total size, in bytes */ blksize_t st_blksize; /* blocksize for filesystem I/O */ blkcnt_t st_blocks; /* number of blocks allocated */ time_t st_atime; /* time of last access */ time_t st_mtime; /* time of last modification */ time_t st_ctime; /* time of last status change */ };
其中st_mode成员中记载了文件类型。可以用 S_ISREG(mode)等宏去鉴别类型。
希望获得文件的状态信息又需要用到如下函数
int stat(const char *path, struct stat *buf); int fstat(int filedes, struct stat *buf); int lstat(const char *path, struct stat *buf);
其中的 lstat()和 stat()的不同主要是针对链接文件,stat()会返回链接文件指向的文件信息,而lstat()则是链接文件本身。
对目录进行遍历的时候,为了返回遍历得到的节点文件,还有另一种结构体
struct dirent { ino_t d_ino; /* inode number */ off_t d_off; /* offset to the next dirent */ unsigned short d_reclen; /* length of this record */ unsigned char d_type; /* type of file */ char d_name[256]; /* filename */ };
其中的d_name成员包含了本节点项的名字信息,d_type成员包含了本节点项的文件类型信息。(实际上就用这个类型信息就足够统计文件类型了,不用再使用上面涉及到的文件信息读取函数)
遍历一个目录需要用到的函数如下
struct dirent *readdir(DIR *dir); DIR *opendir(const char *name); int closedir(DIR *dir); int chdir(const char *path); int fchdir(int fd);
完整的程序如下
//this program is work for produce count of the type of files. //i.e: normal file, directory, symbolic file, device file and so on. #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> #include <stdio.h> #include <sys/types.h> #include <dirent.h> //type of files typedef enum FileType { TYPE_LNK, TYPE_REG, TYPE_DIR, TYPE_CHR, TYPE_BLK, TYPE_FIFO, TYPE_SOC, TYPE_ERR, TYPE_CNT, }FILETYPE; static unsigned long count[TYPE_CNT]; //count of the type of file static int recur = 0; //the depth of recursion static int recur_max; //tha max value of recursion depth typedef int ( * FUNC) ( char *pathname); static int inline recalRecurMax( int recur, int *recur_max) { return *recur_max = *recur_max<recur?recur:*recur_max; } //identify the type of a file static int file_type( char *pathname) { int ret; struct stat buf; ret = lstat( pathname, &buf); //could‘t use stat(), because of the link file . if( ret<0) { perror(" "); return TYPE_ERR; } FILETYPE type; switch( (buf.st_mode&S_IFMT) ) { case S_IFLNK: type = TYPE_LNK; break; case S_IFREG: type = TYPE_REG; break; case S_IFDIR: type = TYPE_DIR; break; case S_IFCHR: type = TYPE_CHR; break; case S_IFBLK: type = TYPE_BLK; break; case S_IFIFO: type = TYPE_FIFO; break; case S_IFSOCK: type = TYPE_SOC; break; default : type = TYPE_ERR; } return type; //return the type of this file } static int ftw( char *rootpath, FUNC callback) { recur++; recalRecurMax( recur, &recur_max); //record the max value of recursion depth DIR *pdir; pdir = opendir( rootpath); //open this directory if( NULL==pdir ) { perror(" "); return 0; } int ret; ret = chdir( rootpath); //enter this directory if( ret<0) { perror(" "); return 0; } struct dirent *pdirent; do { pdirent = readdir( pdir); //be carefull, this function will traverse all files in this directory. if( NULL!=pdirent) { int type; type = callback( pdirent->d_name ); //count the type of file //printf("%s, %d\n", pdirent->d_name, type); count[type]++; if( (DT_DIR==pdirent->d_type) //enter sub-directory &&(strcmp( pdirent->d_name, ".") )!=0 &&(strcmp( pdirent->d_name, ".."))!=0 ) { int ret; ret = ftw( pdirent->d_name, callback); if( !ret) printf("error: %s is not a valid path\n", pdirent->d_name); } } }while( NULL!=pdirent ); chdir(".."); closedir( pdir); recur --; return 1; } static void show( unsigned long count[]) { double sum=0; int i; for( i=0; i< TYPE_CNT; i++) { sum+= count[i]; } printf( "LNK : %ld --%%%f\n", count[TYPE_LNK], count[TYPE_LNK]*100/sum); printf( "REG : %ld --%%%f\n", count[TYPE_REG], count[TYPE_REG]*100/sum); printf( "DIR : %ld --%%%f\n", count[TYPE_DIR], count[TYPE_DIR]*100/sum); printf( "CHR : %ld --%%%f\n", count[TYPE_CHR], count[TYPE_CHR]*100/sum); printf( "BLK : %ld --%%%f\n", count[TYPE_BLK], count[TYPE_BLK]*100/sum); printf( "FIFO : %ld --%%%f\n", count[TYPE_FIFO], count[TYPE_FIFO]*100/sum); printf( "SOC : %ld --%%%f\n", count[TYPE_SOC], count[TYPE_SOC]*100/sum); printf( "ERR : %ld --%%%f\n", count[TYPE_ERR], count[TYPE_ERR]*100/sum); printf(" recur_max = %d\n", recur_max); } int main( int argc, char *argv[]) { if( 2!=argc ) { printf("usage: a.out <pathname>\n"); return 0; } int ret; ret = ftw( argv[1], file_type); show( count); if( !ret) printf(" %s is not a valid path\n", argv[1]); return 0; }
当然实际上有很多简洁的办法完成这个功能,比如ftw()等函数。
将上面的程序在自己系统中运行后,获得的结果如下
[root@localhost ftw]# ./interesting.out / LNK : 19448 --%4.339260 REG : 330217 --%73.678398 DIR : 98236 --%21.918530 CHR : 152 --%0.033914 BLK : 47 --%0.010487 FIFO : 4 --%0.000892 SOC : 83 --%0.018519 ERR : 0 --%0.000000 recur_max = 14
有趣的小程序--统计系统中不同种类文件数量,布布扣,bubuko.com
原文:http://blog.csdn.net/u012301943/article/details/23038561