基于字节流套接字(SOCK_STREAM)和数据报套接字(SOCK_DGRAM)不可以访问传输层协议,只是对应用层的报文进行操作,传输层的数据报格式都是由系统提供的协议栈实现,用户只需要填充相应的应用层报文,由系统完成底层报文首部的填充并发送。原始套接字(SOCK_RAW)可以访问位于基层的传输层协议,原始套接字没有端口号。
原始套接字(SOCK_RAW)是一种不同于 SOCK_STREAM、SOCK_DGRAM 的套接字,它实现于系统核心。原始套接字使进程可以读与写 ICMP、IGMP 等网络报文;也可以处理特殊的 IPv4 报文;进程还可以通过设置 IP_HDRINCL 套接字选项由用户自行构造 IP 首部。原始套接字可以用来自行组装 IP 数据报,然后将数据报发送到其他终端。但是只有管理员权限才能使用原始套接字,可防止普通用户往网络写入它们自行构造的 IP 数据报。
调用 socket 函数创建套接字时,指定套接字类型为 SOCK_RAW 以创建一个原始套接字。
int sockfd; /* 创建一个 IPv4 的原始套接字 */ sockfd = socket(AF_INET, SOCK_RAW, protocol);
const int on = 1; if(setsockopt(sockfd, IPPROTO_IP, IP_HDRINCL, &on, sizeof(on)) < 0) /* 接下来是一些错误处理程序 */
原始套接字的输出遵循以下规则:
原始套接字遵循以下规则:
内核在传递 IP 数据报到原始套接字之前,必须对所有进程上的所有原始套接字进行匹配检测,若匹配成功,才把 IP 数据报的副本传递到匹配的原始套接字。检测匹配步骤如下:
ping 程序的操作比较简单,当源主机向目标主机发送了 ICMP 回显请求数据报后,它期待着目标主机的回答。目标主机在收到一个 ICMP 回显请求数据报后,它会交换源、目的主机的地址,然后将收到的 ICMP 回显请求数据报中的数据部分原封不动地封装在自己的 ICMP 回显应答数据报中,然后发回给发送 ICMP 回显请求的一方。如果校验正确,发送者便认为目标主机的回显服务正常,也即物理连接畅通。
ping 程序编程需要用到 ICMP 协议,有关 ICMP 协议的知识可以参考前面的文章《ICMP 协议》。ping 命令只使用众多 ICMP 报文中的两种:"请求(ICMP_ECHO)"和"回应(ICMP_ECHOREPLY)",这两种 ICMP 报文格式如下图所示:
首先看下系统自带 ping 程序的输出:
$ ping www.github.com PING github.com (192.30.252.128) 56(84) bytes of data. 64 bytes from github.com (192.30.252.128): icmp_req=1 ttl=45 time=269 ms 64 bytes from github.com (192.30.252.128): icmp_req=2 ttl=45 time=274 ms 64 bytes from github.com (192.30.252.128): icmp_req=3 ttl=45 time=270 ms 64 bytes from github.com (192.30.252.128): icmp_req=4 ttl=45 time=281 ms 64 bytes from github.com (192.30.252.128): icmp_req=5 ttl=45 time=283 ms 64 bytes from github.com (192.30.252.128): icmp_req=6 ttl=45 time=249 ms 64 bytes from github.com (192.30.252.128): icmp_req=7 ttl=45 time=253 ms ^C --- github.com ping statistics --- 7 packets transmitted, 7 received, 0% packet loss, time 6006ms rtt min/avg/max/mdev = 249.472/269.010/283.945/12.186 ms
ping 程序的编程步骤:
1) 创建类型为 SOCK_RAW 的原始套接字,同时设定协议为 IPPROTO_ICMP;
2) 创建并初始化 ICMP 首部;
3) 调用 sendto 函数,将 ICMP 请求发给远程主机;
4) 调用 recvform函数,以接收任何 ICMP 响应;
Linux 中<netinet/ip_icmp.h> ICMP 的数据结构定义如下:
struct icmp
{
u_int8_t icmp_type; /* type of message, see below */
u_int8_t icmp_code; /* type sub code */
u_int16_t icmp_cksum; /* ones complement checksum of struct */
union
{
u_char ih_pptr; /* ICMP_PARAMPROB */
struct in_addr ih_gwaddr; /* gateway address */
struct ih_idseq /* echo datagram */
{
u_int16_t icd_id;
u_int16_t icd_seq;
} ih_idseq;
u_int32_t ih_void;
/* ICMP_UNREACH_NEEDFRAG -- Path MTU Discovery (RFC1191) */
struct ih_pmtu
{
u_int16_t ipm_void;
u_int16_t ipm_nextmtu;
} ih_pmtu;
struct ih_rtradv
{
u_int8_t irt_num_addrs;
u_int8_t irt_wpa;
u_int16_t irt_lifetime;
} ih_rtradv;
} icmp_hun;
#define icmp_pptr icmp_hun.ih_pptr
#define icmp_gwaddr icmp_hun.ih_gwaddr
#define icmp_id icmp_hun.ih_idseq.icd_id
#define icmp_seq icmp_hun.ih_idseq.icd_seq
#define icmp_void icmp_hun.ih_void
#define icmp_pmvoid icmp_hun.ih_pmtu.ipm_void
#define icmp_nextmtu icmp_hun.ih_pmtu.ipm_nextmtu
#define icmp_num_addrs icmp_hun.ih_rtradv.irt_num_addrs
#define icmp_wpa icmp_hun.ih_rtradv.irt_wpa
#define icmp_lifetime icmp_hun.ih_rtradv.irt_lifetime
union
{
struct
{
u_int32_t its_otime;
u_int32_t its_rtime;
u_int32_t its_ttime;
} id_ts;
struct
{
struct ip idi_ip;
/* options and then 64 bits of data */
} id_ip;
struct icmp_ra_addr id_radv;
u_int32_t id_mask;
u_int8_t id_data[1];
} icmp_dun;
#define icmp_otime icmp_dun.id_ts.its_otime
#define icmp_rtime icmp_dun.id_ts.its_rtime
#define icmp_ttime icmp_dun.id_ts.its_ttime
#define icmp_ip icmp_dun.id_ip.idi_ip
#define icmp_radv icmp_dun.id_radv
#define icmp_mask icmp_dun.id_mask
#define icmp_data icmp_dun.id_data
};
首先定义一个头文件:
#ifndef PING_H
#define PING_H
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/ip_icmp.h>
#include <sys/socket.h>
#include <signal.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/time.h>
#include <sys/types.h>
#define BUFSIZE 4096
char sendbuf[BUFSIZE];
extern int datalen; /* # bytes of data following ICMP header */
char *host;
int nsent; /* add 1 for each sendto() */
int nrecv; /* add 1 for each recvmsg() */
pid_t pid; /* our PID */
int sockfd;
int verbose;
/* function prototypes */
void init_v6(void);
void proc_v4(char *, ssize_t, struct msghdr *, struct timeval *);
void proc_v6(char *, ssize_t, struct msghdr *, struct timeval *);
void send_v4(void);
void send_v6(void);
void readloop(void);
void sig_alrm(int);
void tv_sub(struct timeval *, struct timeval *);
/* 这个结构主要是为了处理IPv4与IPv6之间的差异 */
struct proto
{
/* 3个函数指针 */
void (*fproc)(char *, ssize_t, struct msghdr *, struct timeval *);
void (*fsend)(void);
void (*finit)(void);
/* 2个套接字地址结构指针 */
struct sockaddr *sasend; /* sockaddr{} for send, from getaddrinfo */
struct sockaddr *sarecv; /* sockaddr for receiving */
socklen_t salen; /* length of sockaddr{}s */
/* ICMP 协议值 */
int icmpprot; /* IPPROTO_xxx value for ICMP */
} *pr;
#ifdef IPV6
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
#endif
#endif
#include "ping.h"
/* 初始化IPv4结构 */
struct proto proto_v4 = {proc_v4, send_v4, NULL, NULL, NULL, 0, IPPROTO_ICMP};
#ifdef IPV6
/* 若存在IPv6,则初始化IPv6结构 */
struct proto proto_v6 = {proc_v6, send_v6, init_v6, NULL, NULL, 0, IPPROTO_ICMPV6};
#endif
typedef void Sigfunc(int);
extern int datalen = 56; /* data that goes with ICMP echo request */
extern Sigfunc *MySignal(int signo, Sigfunc *func);
extern struct addrinfo *host_serv(const char *host, const char *serv, int family, int socktype);
extern char *Sock_ntop_host(const struct sockaddr *sa, socklen_t salen);
extern void *Calloc(size_t n, size_t size);
void statistics(int signo);
int main(int argc, char **argv)
{
int n;
struct addrinfo *ai;
char *h;
opterr = 0; /* don't want getopt() writing to stderr */
/* 只实现ping的一个参数选项-v供查询 */
/* 有关getopt函数的使用可以查阅相关资料 */
while( (n = getopt(argc, argv, "v")) != -1)
{
switch(n)
{
case 'v':
verbose++;
break;
case '?':
printf("unrecognize option: %c\n", n);
exit(1);
}
}
if(optind != argc-1)
{
perror("usage: ping [ -v ] <hostname>");
exit(1);
}
host = argv[optind];
pid = getpid() & 0xffff; /* ICMP ID field is 16 bits */
MySignal(SIGALRM, sig_alrm);
MySignal(SIGINT, statistics);
/* 将主机名和服务名映射到一个地址,并返回指向addrinfo的指针 */
ai = host_serv(host, NULL, 0, 0);
/* 将网络字节序的地址转换为字符串格式地址,并返回该字符串的指针 */
h = Sock_ntop_host(ai->ai_addr, ai->ai_addrlen);
/* 显示PING的主机名、地址与数据字节数 */
printf("PING %s (%s) %d bytes of data.\n", ai->ai_canonname ? ai->ai_canonname : h, h, datalen);
/* initialize according to protocol */
if(ai->ai_family == AF_INET)
{
pr = &proto_v4;/* proto结构指针pr指向对应域的结构,这里是IPv4域的结构 */
#ifdef IPV6
}else if(ai->family == AF_INET6)
{
pr = &proc_v6;
if(IN6_IS_ADDR_V4MAPPED(&(((struct sockaddr_in6 *)ai->ai_addr)->sin6_addr)))
{
perror("connot ping IPv4-mapped IPv6 address");
exit(1);
}
#endif
}else
{
printf("unknown address family %d", ai->ai_family);
exit(1);
}
pr->sasend = ai->ai_addr;/* 发送地址赋值 */
pr->sarecv = (struct sockaddr *)Calloc(1, ai->ai_addrlen);
pr->salen = ai->ai_addrlen;/* 地址的大小 */
/* 处理数据 */
readloop();
exit(0);
}
/* 显示发送和接收数据报的个数,并计算丢包率 */
void statistics(int signo)
{
printf("\n----------- %s ping statistics -----------\n", Sock_ntop_host(pr->sarecv, pr->salen));
int lost = 100*(nsent-nrecv)/nsent;
printf("%d packets transmitted, %d received, %d packet lost\n", nsent, nrecv, lost);
close(sockfd);
exit(1);
}
#include "ping.h"
void readloop()
{
int size;
char recvbuf[BUFSIZE];
char controlbuf[BUFSIZE];
struct msghdr msg;
struct iovec iov;
ssize_t n;
struct timeval tval;
/* 创建ICMP的原始套接字,必须是root权限 */
if( (sockfd = socket(pr->sasend->sa_family, SOCK_RAW, pr->icmpprot)) < 0)
{
perror("socket error");
exit(1);
}
/* 回收root权限,设置当前用户权限 */
setuid(getuid());
/* 初始化IPv6 */
if(pr->finit)
(*pr->finit)();
size = 60 * 1024;
/* 设置接收缓冲区的大小为60k,主要为了减小接收缓冲区溢出 */
setsockopt(sockfd, SOL_SOCKET, SO_RCVBUF, &size, sizeof(size));
/* 发送第一个数据包 */
sig_alrm(SIGALRM);
/* 初始化接收缓冲区 */
iov.iov_base = recvbuf;
iov.iov_len = sizeof(recvbuf);
msg.msg_name = pr->sarecv;
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = controlbuf;
for( ; ;)
{
/* 接收ICMP数据包 */
msg.msg_namelen = pr->salen;
msg.msg_controllen = sizeof(controlbuf);
/* 从套接字接收数据 */
n = recvmsg(sockfd, &msg, 0);
if(n < 0)
{
if(errno == EINTR)
continue;
else
{
perror("recvmsg error");
exit(1);
}
}
/* 记录接收时间 */
gettimeofday(&tval, NULL);
/* 调用处理函数 */
(*pr->fproc)(recvbuf, n, &msg, &tval);
}
}#include "ping.h"
/* 在IPv4域中发送数据包 */
extern uint16_t in_cksum(uint16_t *addr, int len);
void
send_v4(void)
{
int len;
struct icmp *icmp;
/* 设置ICMP报头 */
icmp = (struct icmp *) sendbuf;
icmp->icmp_type = ICMP_ECHO;/* 回显请求 */
icmp->icmp_code = 0;
icmp->icmp_id = pid;
icmp->icmp_seq = nsent++;
memset(icmp->icmp_data, 0xa5, datalen); /* fill with pattern */
gettimeofday((struct timeval *) icmp->icmp_data, NULL);/* 记录发送时间 */
len = 8 + datalen; /* checksum ICMP header and data */
icmp->icmp_cksum = 0;
/* 检验和算法 */
icmp->icmp_cksum = in_cksum((u_short *) icmp, len);
/* 发送数据包 */
if( len != sendto(sockfd, sendbuf, len, 0, pr->sasend, pr->salen))
{
perror("sendto error");
exit(1);
}
}
#include "ping.h"
extern char *Sock_ntop_host(const struct sockaddr *sa, socklen_t salen);
void
proc_v4(char *ptr, ssize_t len, struct msghdr *msg, struct timeval *tvrecv)
{
int hlen1, icmplen;
double rtt;
struct ip *ip;
struct icmp *icmp;
struct timeval *tvsend;
ip = (struct ip *) ptr; /* start of IP header */
/* IP报文首部长度,即IP报文首部的长度标志乘以4 */
hlen1 = ip->ip_hl << 2; /* length of IP header */
if (ip->ip_p != IPPROTO_ICMP)
return; /* not ICMP */
/* 越过IP报头,指向ICMP报头 */
icmp = (struct icmp *) (ptr + hlen1); /* start of ICMP header */
/* ICMP报头及ICMP数据报的总长度,若小于8,则不合理 */
if ( (icmplen = len - hlen1) < 8)
return; /* malformed packet */
/* 确保所有接收的数据报是ICMP回显应答 */
if (icmp->icmp_type == ICMP_ECHOREPLY) {
if (icmp->icmp_id != pid)
return; /* not a response to our ECHO_REQUEST */
if (icmplen < 16)
return; /* not enough data to use */
tvsend = (struct timeval *) icmp->icmp_data;
/* 计算接收和发送的时间差 */
tv_sub(tvrecv, tvsend);
/* 以毫秒为单位计算rtt */
rtt = tvrecv->tv_sec * 1000.0 + tvrecv->tv_usec / 1000.0;
/* 打印相关信息 */
printf("%d bytes from %s: icmp_seq=%u ttl=%d rtt=%.3f ms\n",
icmplen, Sock_ntop_host(pr->sarecv, pr->salen),
icmp->icmp_seq, ip->ip_ttl, rtt);
nrecv++;
} else if (verbose) {
printf(" %d bytes from %s: icmp_type = %d, icmp_code = %d\n",
icmplen, Sock_ntop_host(pr->sarecv, pr->salen),
icmp->icmp_type, icmp->icmp_code);
}
}
#include <stdint.h>
/* 检验和算法 */
uint16_t
in_cksum(uint16_t *addr, int len)
{
int nleft = len;
uint32_t sum = 0;
uint16_t *w = addr;
uint16_t answer = 0;
/*
* Our algorithm is simple, using a 32 bit accumulator (sum), we add
* sequential 16 bit words to it, and at the end, fold back all the
* carry bits from the top 16 bits into the lower 16 bits.
*/
/* 把ICMP报头二进制数据以2字节为单位进行累加 */
while (nleft > 1) {
sum += *w++;
nleft -= 2;
}
/* 4mop up an odd byte, if necessary */
if (nleft == 1) {/* 若ICMP报头为奇数个字节,把最后一个字节视为2字节数据的高字节,则低字节为0,继续累加 */
*(unsigned char *)(&answer) = *(unsigned char *)w ;
sum += answer;
}
/* 4add back carry outs from top 16 bits to low 16 bits */
sum = (sum >> 16) + (sum & 0xffff); /* add hi 16 to low 16 */
sum += (sum >> 16); /* add carry */
answer = ~sum; /* truncate to 16 bits */
return(answer);
}
#include "ping.h"
/* 发送数据包,并设置闹钟,一秒钟后给所在进程发送SIGALRM信号 */
void
sig_alrm(int signo)
{
(*pr->fsend)();
alarm(1);
return;
}
编译步骤:
sudo make sudo chmod u+s Ping
测试:
$ ./Ping www.github.com PING github.com (192.30.252.129) 56 bytes of data. 64 bytes from 192.30.252.129: icmp_seq=0 ttl=45 rtt=303.057 ms 64 bytes from 192.30.252.129: icmp_seq=1 ttl=45 rtt=301.416 ms 64 bytes from 192.30.252.129: icmp_seq=2 ttl=45 rtt=301.614 ms 64 bytes from 192.30.252.129: icmp_seq=3 ttl=45 rtt=301.727 ms 64 bytes from 192.30.252.129: icmp_seq=4 ttl=45 rtt=308.911 ms 64 bytes from 192.30.252.129: icmp_seq=5 ttl=45 rtt=303.088 ms 64 bytes from 192.30.252.129: icmp_seq=6 ttl=45 rtt=305.763 ms ^C ----------- 192.30.252.129 ping statistics ----------- 7 packets transmitted, 7 received, 0 packet lost
《Unix 网络编程》
原文:http://blog.csdn.net/chenhanzhun/article/details/42080777