首先来看看sys_socket中的函数调用关系:
| sys_socket | +--------- sock_create | | | +------- __sock_create | | | +------- security_socket_create | +-------- sock_alloc() | +--------- rcu_dereference(net_families[family]) | +--------- pf->create(net, sock, protocol, kern) | +--------- module_put(pf->owner) | +--------- security_socket_post_create +---------- sock_map_fd | 
sys_socket 调用sock_create函数,最终调用rcu_dereference函数来得到相应的net_family_ops,在这里是inet_family_ops,然后调用inet_family_ops结构中的create函数,这里是inet_create函数,来创建socket。sock_map_fd是得到一个文件号。
当使用socket(int,int,int)创建一个socket时,socket会调用sys_socket来完成socket的创建。
本次我们来看看socket()函数系统调用过程。
sys_socket()函数声明如下:
asmlinkage long sys_socket(int, int, int);
同样地,sys_socket()函数实现为:
1. sys_socket()
SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
{
	int retval;
	struct socket *sock;
	int flags;
 
	/* Check the SOCK_* constants for consistency.  */
	BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
	BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
	BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
	BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
 
	flags = type & ~SOCK_TYPE_MASK;
	if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
		return -EINVAL;
	type &= SOCK_TYPE_MASK;
 
	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
		flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
	
	/*创建socket及inode*/
	retval = sock_create(family, type, protocol, &sock);
	if (retval < 0)
		goto out;
	
	/*创建file,完成fd与file绑定,file与socket绑定*/
	retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
	if (retval < 0)
		goto out_release;
 
out:
	/* It may be already another descriptor 8) Not kernel problem. */
	return retval;
 
out_release:
	sock_release(sock);
	return retval;
}
参数kern:表示由应用程序还是内核创建该套接口,一般为0(表示应用程序),或者1(表示内核)。
2. sock_create()函数:
这个函数是对__socket_create函数的封装,直接调用__sock_create()函数。
int sock_create(int family, int type, int protocol, struct socket **res)
{
	return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
}
3. __sock_create()函数
创建socket及inode
int __sock_create(struct net *net, int family, int type, int protocol,
			 struct socket **res, int kern)
{
	int err;
	struct socket *sock;
	const struct net_proto_family *pf;
 
	/*
	 *      Check protocol is in range
	 */
	/*family和type字段范围检查*/
	if (family < 0 || family >= NPROTO)
		return -EAFNOSUPPORT;
	if (type < 0 || type >= SOCK_MAX)
		return -EINVAL;
 
	/* Compatibility.
	   This uglymoron is moved from INET layer to here to avoid
	   deadlock in module load.
	 */
	/*兼容性考虑,IPv4协议族的SOCK_PACKET已经废弃,当family ==F_INET && type == SOCK_PACKET时,
	强制把family改为PF_PACKET。*/
	if (family == PF_INET && type == SOCK_PACKET) {
		static int warned;
		if (!warned) {
			warned = 1;
			pr_info("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
				current->comm);
		}
		family = PF_PACKET;
	}
	
	/*安全模块对套接口的创建做检查,安全模块不是网络中必需的组成部门,不做讨论。*/
	// 检查权限,并考虑协议集、类型、协议,以及 socket 是在内核中创建还是在用户空间中创建
    // 可以参考:https://www.ibm.com/developerworks/cn/linux/l-selinux/
	err = security_socket_create(family, type, protocol, kern);
	if (err)
		return err;
 
	/*
	 *	Allocate the socket and allow the family to set things up. if
	 *	the protocol is 0, the family is instructed to select an appropriate
	 *	default.
	 */
	/*调用sock_alloc()在sock_inode_cache缓存中分配与套接口关联的i结点和套接口,同时
	初始化i结点和套接口,失败则直接返回错误码。*/
	sock = sock_alloc();
	if (!sock) {
		net_warn_ratelimited("socket: no more sockets\n");
		return -ENFILE;	/* Not exactly a match, but its the
				   closest posix thing */
	}
 
	sock->type = type;
 
/*如果协议族支持内核模块动态加载,但在创建此协议族类型的套接字时,内核模块并未被加载,则调用
request_module()进行内核模块的动态加载。*/
#ifdef CONFIG_MODULES
	/* Attempt to load a protocol module if the find failed.
	 *
	 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
	 * requested real, full-featured networking support upon configuration.
	 * Otherwise module support will break!
	 */
	if (rcu_access_pointer(net_families[family]) == NULL)
		request_module("net-pf-%d", family);
#endif
 
	rcu_read_lock();
	
	/*获取对应协议的net_proto_family指针*/
	pf = rcu_dereference(net_families[family]);
	err = -EAFNOSUPPORT;
	if (!pf)
		goto out_release;
 
	/*
	 * We will call the ->create function, that possibly is in a loadable
	 * module, so we have to bump that loadable module refcnt first.
	 */
	/*如果对应协议族模块是动态加载到内核中去的,则对此内核模块的应用计数+1,以防
	在创建过程中,该模块被卸载,造成严重的后果。*/
	if (!try_module_get(pf->owner))
		goto out_release;
 
	/* Now protected by module ref count */
	rcu_read_unlock();
	
	/*在IPv4协议族中调用inet_create()对已创建的socket继续进行初始化,同时创建网络层socket。*/
	err = pf->create(net, sock, protocol, kern);
	if (err < 0)
		goto out_module_put;
 
	/*
	 * Now to bump the refcnt of the [loadable] module that owns this
	 * socket at sock_release time we decrement its refcnt.
	 */
	/*如果proto_ops结构实例所在模块以内核模块方式动态加载进内核,
	则增加该模块的引用计数,在sock_release时,减小该计数。*/
	if (!try_module_get(sock->ops->owner))
		goto out_module_busy;
 
	/*
	 * Now that we‘re done with the ->create function, the [loadable]
	 * module can have its refcnt decremented
	 */
	/*调用完inet_create函数后,对此模块的引用计数减一。*/
	module_put(pf->owner);
	
	/*安全模块对创建后的socket做安全检查,不做讨论。*/
	err = security_socket_post_create(sock, family, type, protocol, kern);
	if (err)
		goto out_sock_release;
	*res = sock;
 
	return 0;
 
out_module_busy:
	err = -EAFNOSUPPORT;
out_module_put:
	sock->ops = NULL;
	module_put(pf->owner);
out_sock_release:
	sock_release(sock);
	return err;
 
out_release:
	rcu_read_unlock();
	goto out_sock_release;
}
 
原文:https://www.cnblogs.com/hehex/p/12070561.html