在代码分析前,先总体介绍下sentinel 的机制。
1. 下线定义
sentinel对下线有两种定义:
a.主观下线(sdown):sentinel实例本身对服务实例的判断
b.客观下线(odown):多个sentinel实例对同一个服务SDOWN的状态做出协商后的判断,只有master才可能在odown状态
简单的说,一个sentinel单独做出的判断只能是sdown,是没有任何官方效力的,只有多个sentinel大家商量好,得到一致,才能将某个master状态置为odown,只有确定master odown状态后,才能做后续fail over的操作
2. 通信
sentinel与maste/slave的交互主要包括:
a.PING:sentinel向其发送PING以了解其状态(是否下线)
b.INFO:sentinel向其发送INFO以获取replication相关的信息
c.PUBLISH:sentinel向其监控的master/slave发布本身的信息及master相关的配置
d.SUBSCRIBE:sentinel通过订阅master/slave的”__sentinel__:hello“频道以获取其它正在监控相同服务的sentinel
sentinel与sentinel的交互主要包括:
a.PING:sentinel向slave发送PING以了解其状态(是否下线)
b.SENTINEL is-master-down-by-addr:和其他sentinel协商master状态,如果master odown,则投票选出leader做fail over
3. fail over
一次完整的fail over包括以下步骤:
a. sentinel发现master下线,则标记master sdown
b. 和其他sentinel协商以确定master状态是否odown
c. 如果master odown,则选出leader
d. 当选为leader的sentinel选出一个slave做为master,并向该slave发送slaveof no one命令以转变slave角色为master
e. 向已下线的master及其他slave发送slaveof xxxx命令使其作为新当选master的slave
int main(int argc, char **argv) {
......
//checkForSentinelMode判断是否以sentinel模式启动
//运行程序名为redis-sentinel,或者带参数--sentinel运行则认为以sentinel模式运行
server.sentinel_mode = checkForSentinelMode(argc,argv);
initServerConfig();
//sentinel模式下需要完成的初始化工作
if (server.sentinel_mode) {
initSentinelConfig();
initSentinel();
}
if (argc >= 2) {
......
//导入配置
loadServerConfig(configfile,options);
sdsfree(options);
}
......
//注册定时器
initServer();
......
//判断config文件是否存在及是否可写(sentinel模式需要写config文件)
if (!server.sentinel_mode) {
......
} else {
sentinelIsRunning();
}
//以下开始进入事件处理循环
aeSetBeforeSleepProc(server.el,beforeSleep);
aeMain(server.el);
aeDeleteEventLoop(server.el);
return 0;
}struct sentsentinelStateinelState {
uint64_t current_epoch; //当前处在第几个世纪(每次fail over,current_epoch+1)
dict *masters; /* master实例字典(一个sentinle可监控多个master)*/
int tilt; /*是否在TITL模式中,后面详细介绍TITL模式*/
int running_scripts; /* 当前正在执行的脚本 */
mstime_t tilt_start_time; /* TITL模式开始的时间 */
mstime_t previous_time; /* 上次执行sentinel周期性执行任务的时间,用以判断是否进入TITL模式*/
list *scripts_queue; /* 待执行脚本队列 */
} sentinel;
typedef struct sentinelRedisInstance {
......
/* Master specific. */
dict *sentinels; /* 监控该master实例的其他sentinel结点字典*/
dict *slaves; /* 该master实例说包含的slave结点字典 */
......
} sentinelRedisInstance;char *sentinelHandleConfiguration(char **argv, int argc) {
sentinelRedisInstance *ri;
//Handle 类似“sentinel monitor mymaster 10.2.60.50 6379 2”的配置
//调用createSentinelRedisInstance创建master实例(SRI_MASTER)
if (!strcasecmp(argv[0],"monitor") && argc == 5) {
/* monitor <name> <host> <port> <quorum> */
int quorum = atoi(argv[4]);
if (quorum <= 0) return "Quorum must be 1 or greater.";
if (createSentinelRedisInstance(argv[1],SRI_MASTER,argv[2],
atoi(argv[3]),quorum,NULL) == NULL)
{
switch(errno) {
case EBUSY: return "Duplicated master name.";
case ENOENT: return "Can‘t resolve master instance hostname.";
case EINVAL: return "Invalid port number";
}
}
.....
//调用createSentinelRedisInstance创建slave实例(SRI_SLAVE)
} else if (!strcasecmp(argv[0],"known-slave") && argc == 4) {
sentinelRedisInstance *slave;
/* known-slave <name> <ip> <port> */
//根据master name获取对应的master实例
ri = sentinelGetMasterByName(argv[1]);
if (!ri) return "No such master with specified name.";
if ((slave = createSentinelRedisInstance(NULL,SRI_SLAVE,argv[2],
atoi(argv[3]), ri->quorum, ri)) == NULL)
{
return "Wrong hostname or port for slave.";
}
//调用createSentinelRedisInstance创建sentinel实例(SRI_SENTINEL)
} else if (!strcasecmp(argv[0],"known-sentinel") &&
(argc == 4 || argc == 5)) {
sentinelRedisInstance *si;
// known-sentinel <name> <ip> <port> [runid]
//根据master name获取对应的master实例
ri = sentinelGetMasterByName(argv[1]); if (!ri) return "No such master with specified name."; if ((si = createSentinelRedisInstance(NULL,SRI_SENTINEL,argv[2], atoi(argv[3]), ri->quorum, ri)) == NULL) { return "Wrong hostname or port for sentinel."; } if (argc == 5) si->runid = sdsnew(argv[4]); } else { return "Unrecognized sentinel configuration statement."; } return NULL;}
createSentinelRedisInstances根据传入的参数不同,创建了不同的redis实例,并将redis实例添加到相应的table中,包括master、slave、sentinel三种
sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *hostname, int port, int quorum, sentinelRedisInstance *master) {
......
//对于slave和sentinel实例,定义其hostname为ip:port
if (flags & (SRI_SLAVE|SRI_SENTINEL)) {
snprintf(slavename,sizeof(slavename),
strchr(hostname,‘:‘) ? "[%s]:%d" : "%s:%d",
hostname,port);
name = slavename;
}
//根据实例类型不同,添加到不同的table
if (flags & SRI_MASTER) table = sentinel.masters;
else if (flags & SRI_SLAVE) table = master->slaves;
else if (flags & SRI_SENTINEL) table = master->sentinels;
sdsname = sdsnew(name);
if (dictFind(table,sdsname)) {
sdsfree(sdsname);
errno = EBUSY;
return NULL;
}
......
dictAdd(table, ri->name, ri);
return ri;
}void initSentinel(void) {
int j;
//空command字典
dictEmpty(server.commands,NULL);
//添加sentinal模式下支持的命令,sentinelcmds包括:ping、sentinel、subscribe、unsubscribe、psubscribe、info、shutdown
for (j = 0; j < sizeof(sentinelcmds)/sizeof(sentinelcmds[0]); j++) {
int retval;
struct redisCommand *cmd = sentinelcmds+j;
retval = dictAdd(server.commands, sdsnew(cmd->name), cmd);
redisAssert(retval == DICT_OK);
}
......
}void initServer() {
......
//注册定时器,定时时间1ms
if(aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL) == AE_ERR) {
redisPanic("Can‘t create the serverCron time event.");
exit(1);
}
......
}int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
......
/* 如果在sentinel模式下,则执行sentinel相关的周期性任务 */
run_with_period(100) { //100ms执行一次
if (server.sentinel_mode) sentinelTimer();
}
server.cronloops++;
return 1000/server.hz; //hz默认值为10(在sentinelTimer会被修改),此处返回100ms会被其它函数扑捉到,并重新注册为定时函数
}sentinelTimer内部包含sentinel模式需要定期执行的操作,包括check master、slave、sentinel的状态,并根据配置的条件判断是否需要fail over。
Redis Sentinel源码分析(一),布布扣,bubuko.com
原文:http://blog.csdn.net/yfkiss/article/details/22151175