数以千计的服务器存活状态检查
# !/usr/bin/python
# -*- coding: UTF-8 -*-
import commands
import sys
import time
class server_unreache(object):
def __init__(self, ip_list):
self.ip_list = ip_list
#第一次获取无法ping通的服务器列表
def unreachable_ip_list(self):
global unreachable_iplist_one_time
unreachable_iplist_one_time=[]
get_un_iplist_str = "/sbin/fping -u -f {}".format(ip_list)
(status, get_un_iplist) = commands.getstatusoutput(get_un_iplist_str)
for unreach in get_un_iplist.split('\n'):
unreachable_iplist_one_time.append(unreach)
#return unreachable_iplist_one_time
pass
# 对第一次获取无法ping通的服务器列表,在进行3次ping操作,每次操作时间间隔为10秒
def do_3times_fping_unreachable(self):
global unreache_3times
unreache_3times=[]
for j in range(3):
for i in range(len(unreachable_iplist_one_time)):
get_3times_un_iplist_str = "/sbin/fping -u {}".format(unreachable_iplist_one_time[i])
time.sleep(10)
(status, get_3times_un_iplist) = commands.getstatusoutput(get_3times_un_iplist_str)
print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
if status != 0 and get_3times_un_iplist not in unreache_3times:
unreache_3times.append(get_3times_un_iplist)
print("{} times,unreache_3times {}".format(j+1,unreache_3times[i]))
print(unreache_3times)
pass
def do_reboot(self,count=0):
global do_reboot_cmd
do_reboot_cmd="ipmitool -I lanplus -H {} -U UserNmae -P PassWord chassis power status"
for i in range(len(unreache_3times)):
count = count + 1
#print(count)
if count <= 2:
print("do_reboot_ip:{},\n do_reboot_cmd:{}".format(unreache_3times[i],do_reboot_cmd.format(unreache_3times[i].replace('172.','10.'))))
#print("do_reboot_ip", unreache_3times[i])
if __name__ == '__main__':
ip_list = sys.argv[1]
ds=server_unreache(ip_list)
ds.unreachable_ip_list()
ds.do_3times_fping_unreachable()
ds.do_reboot()
本文标题:数以千计的服务器存活状态检查
标题来源:http://hbruida.cn/article/jejsoh.html