1  原理图
NSCA
NSCA is an addon that allows you to send passive check results from remote Linux/Unix hosts to the Nagios Core daemon running on 
the monitoring server. This is very useful in distributed and redundant/failover monitoring setups.
The NSCA addon can be found on Nagios Exchange.



2 安装
2.1 安装nagios
参考 安装nagios 4.4
2.2 安装nsca
2.2.1 下载nsca
nsca-2.9.1.tar.gz
2.2.1 安装server端nsca
2.2.1.1 解压、安装
tar -xzvf ./nsca-2.9.1.tar.gz
cd ./nsca-2.9.1
./configure
make all
1)    会在src目录下生成两个程序 nsca send_nsca(主程序)
2)    sample-config中会有nsca.cfg与send_nsca.cfg(配置文件)
3)    当前目录下会有一个init-script(启动脚本)          
cp src/nsca /usr/local/nagios/bin/
cp sample-config/nsca.cfg /usr/local/nagios/etc
chown nagios.nagios /usr/local/nagios/bin/nsca
chown nagios.nagios /usr/local/nagios/etc/nsca.cfg
cp init-script /etc/init.d/nsca
chmod a+x /etc/init.d/nsca
vi /usr/local/nagios/etc/nsca.cfg
aggregate_writes=1
max_packet_age=60
password=nsca0622
#此处和分布式监控服务器密码一致                
vi /usr/local/nagios/etc/nagios.cfg
check_external_commands=1
accept_passive_service_checks=1
accept_passive_host_checks=1
#配置接受被动主机检测的结果
#
service_freshness_check_interval=600
host_freshness_check_interval=600
passive_host_checks_are_soft=1
execute_host_checks=0
execute_service_checks=0
enable_flap_detection=0
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg  
Things look okay - No serious problems were detected during the pre-flight check
systemctl restart nagios.service
systemctl restart nsca.service
systemctl enable nagios.service
systemctl enable nsca.service
2.2.1.2 配置host service command
vi /usr/local/nagios/etc/objects/templates.cfg
define host {
name                            generic-host            ; The name of this host template
notifications_enabled           1                       ; Host notifications are enabled
event_handler_enabled           1                       ; Host event handler is enabled
flap_detection_enabled          1                       ; Flap detection is enabled
process_perf_data               1                       ; Process performance data
retain_status_information       1                       ; Retain status information across program restarts
retain_nonstatus_information    1                       ; Retain non-status information across program restarts
notification_period             24x7                    ; Send host notifications at any time
register                        0                       ; DON‘T REGISTER THIS DEFINITION - ITS NOT A REAL HOST, JUST A TEMPLATE!
action_url                      /pnp4nagios/index.php/graph?host=$HOSTNAME$&srv=HOST
check_freshness               1
freshness_threshold         600
passive_checks_enabled  1
active_checks_enabled    0
check_command              check_dummy!0
}
define service {
name                            generic-service         ; The ‘name‘ of this service template
active_checks_enabled           1                       ; Active service checks are enabled
passive_checks_enabled          1                       ; Passive service checks are enabled/accepted
parallelize_check               1                       ; Active service checks should be parallelized (disabling this can lead to major performance problems)
obsess_over_service             1                       ; We should obsess over this service (if necessary)
check_freshness                 0                       ; Default is to NOT check service ‘freshness‘
notifications_enabled           1                       ; Service notifications are enabled
event_handler_enabled           1                       ; Service event handler is enabled
flap_detection_enabled          1                       ; Flap detection is enabled
process_perf_data               1                       ; Process performance data
retain_status_information       1                       ; Retain status information across program restarts
retain_nonstatus_information    1                       ; Retain non-status information across program restarts
is_volatile                     0                       ; The service is not volatile
check_period                    24x7                    ; The service can be checked at any time of the day
max_check_attempts              3                       ; Re-check the service up to 3 times in order to determine its final (hard) state
check_interval                  10                      ; Check the service every 10 minutes under normal conditions
retry_interval                  2                       ; Re-check the service every two minutes until a hard state can be determined
contact_groups                  admins                  ; Notifications get sent out to everyone in the ‘admins‘ group
notification_options            w,u,c,r                 ; Send notifications about warning, unknown, critical, and recovery events
notification_interval           60                      ; Re-notify about service problems every hour
notification_period             24x7                    ; Notifications can be sent out at any time
register                        0                       ; DON‘T REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE!
action_url                      /pnp4nagios/index.php/graph?host=$HOSTNAME$&srv=$SERVICEDESC$
check_command           check_dummy!0
check_freshness            1
freshness_threshold      600
active_checks_enabled  0
}
vi /usr/local/nagios/etc/objects/commands.cfg
define command{
command_name                 service-is-stale
command_line                    $USER1$/staleservice.sh
}
define command{
command_name                  check_dummy
command_line                     $USER1$/check_dummy $ARG1$
}
vi /usr/local/nagios/libexec/staleservice.sh
#!/bin/sh
/bin/echo "CRITICAL: Service results are stale!"
exit 2
chmod a+x /usr/local/nagios/libexec/staleservice.sh
chown nagios:nagios /usr/local/nagios/libexec/staleservice.sh
2.2.2 安装client端nsca(分布式服务器)
2.2.2.1 编译
编译同server方法一样
tar -xzvf ./nsca-2.9.1.tar.gz
cd ./nsca-2.9.1
./configure
make all
1)会在src目录下生成两个程序 nsca send_nsca(主程序)
2)sample-config中会有nsca.cfg与send_nsca.cfg(配置文件)
cp src/send_nsca /usr/local/nagios/bin/
cp sample-config/send_nsca.cfg /usr/local/nagios/etc/
chown nagios.nagios /usr/local/nagios/bin/send_nsca
chown nagios.nagios /usr/local/nagios/etc/send_nsca.cfg
2.2.2.2 修改配置信息
vi /usr/local/nagios/etc/send_nsca.cfg
password=nsca0622 #此处和中心服务器密码一致
vi /usr/local/nagios/etc/nagios.cfg
enable_notifications=0                                            # 阻止它直接送出任何通知信息    
obsess_over_services=1                                           # 配置为强迫型服务(obsess over services)类型    
ocsp_command=submit_service_check_result        #定义一个强迫型服务处理(ocsp)命令
obsess_over_hosts=1                                               # 配置为强迫型主机(obsess over hosts)类型    
ochp_command=submit_host_check_result           #定义一个强迫型主机处理(ochp)命令
vi /usr/local/nagios/etc/objects/commands.cfg
define command{
command_name submit_service_check_result
command_line /usr/local/nagios/libexec/eventhandlers/submit_service_check_result $HOSTNAME$ ‘$SERVICEDESC$‘ $SERVICESTATE$ ‘$SERVICEOUTPUT$ |$SERVICEPERFDATA$‘
}
define command{
command_name submit_host_check_result
command_line /usr/local/nagios/libexec/eventhandlers/submit_host_result $HOSTNAME$ $HOSTSTATE$ ‘$HOSTOUTPUT$ |$HOSTPERFDATA$‘
}
su - nagios
mkdir /usr/local/nagios/libexec/eventhandlers
cd /usr/local/nagios/libexec/eventhandlers
vi ./submit_service_check_result
#!/bin/sh
PATH="/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin"
export PATH
PROGNAME=$(basename "$0")
PROGPATH=$(echo "$0" | sed -e ‘s,[\/][^\/][^\/]*$,,‘)
REVISION="2.2.1"
PATH="/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin"
. "$PROGPATH"/../utils.sh
central_server=9.1.9.90
return_code=-1
#echo "1:"$1 >>/tmp/nsca_c.log
#echo "2:"$2 >>/tmp/nsca_c.log
#echo "3:"$3 >>/tmp/nsca_c.log
#echo "4:"$4 >>/tmp/nsca_c.log
case "$3" in
OK)
return_code=$STATE_OK
;;
WARNING)
return_code=$STATE_WARNING
;;
CRITICAL)
return_code=$STATE_CRITICAL
;;
UNKNOWN)
return_code=$STATE_UNKNOWN
;;
esac  
/usr/bin/printf "%s\t%s\t%s\t%s\n" "$1" "$2" "$return_code" "$4" | /usr/local/nagios/bin/send_nsca -H $central_server -c /usr/local/nagios/etc/send_nsca.cfg
/usr/bin/printf "%s\t%s\t%s\t%s\n" "$1" "$2" "$return_code" "$4" >>/tmp/nsca_c.log
chmod a+x ./submit_service_check_result
vi ./submit_host_result
#!/bin/sh
#
central_server=9.1.9.90
return_code=-1
case "$2" in
UP)
return_code=0
;;
DOWN)
return_code=1
;;
UNREACHABLE)
return_code=2
;;
esac
/usr/bin/printf "%s\t%s\t%s\n" "$1" "$return_code" "$3" | /usr/local/nagios/bin/send_nsca -H $central_server -c /usr/local/nagios/etc/send_nsca.cfg
/usr/bin/printf "%s\t%s\t%s\n" "$1" "$return_code" "$3" >>/tmp/nsca_c_h.log
chmod a+x ./submit_host_result
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
systemctl restart nagios.service
systemctl enable nagios.service
1、中心服务器和分布式服务器的时间一定要调整一致
2、分布式服务器监控客户端主机可以用任何方式,主动被动都可以
3、分布式上定义的监控主机与服务,在中心服务器上也要定义,保证主机名(host_name)和服务描述(service_description)一致。
4、分布式服务器上定义的服务检测命令(check_command)是真正的检测服务的命令 ,中心服务器上定义的服务检测命令(check_command)是当中心服务器由被动检测变为主动检测时执行的命令(也就是当分布式主机不发送检测命令时中心服务器执行的命令),正常情况下不执行这个命令。
原文:http://blog.51cto.com/skymax/2140695