View file File name : ceph-status.py Content :#!/usr/bin/env python # -*- coding: UTF-8 -*- import os import sys import commands import json def main(): if sys.argv[1] == 'health': try: print get_cluster_health() except: print 255 if sys.argv[1] == 'used_percent': try: print get_cluster_used_percent() except: print 0 if sys.argv[1] == 'total_objects': try: print get_cluster_total_objects() except: print 0 if sys.argv[1] == 'total_pgs': try: print get_cluster_total_pgs() except: print 0 if sys.argv[1] == 'commit_latency': try: print get_cluster_latency("ave_commit") except: print 0 if sys.argv[1] == 'apply_latency': try: print get_cluster_latency("ave_apply") except: print 0 if sys.argv[1] == 'throughput_write': try: print get_cluster_throughput("write") except: print 0 if sys.argv[1] == 'throughput_read': try: print get_cluster_throughput("read") except: print 0 if sys.argv[1] == 'total_ops': try: print get_cluster_total_ops() except: print 0 if sys.argv[1] == 'total_pools': try: print get_cluster_total_pools() except: print 0 if sys.argv[1] == 'pools': try: print get_cluster_pools() except: print 0 if sys.argv[1] == 'osds': try: print get_host_osds() except: print 0 if sys.argv[1] == 'osds_mem_use_virt': try: print get_osd_mem_virt(sys.argv[2],"virt") except: print 0 if sys.argv[1] == 'osds_mem_use_res': # try: print get_osd_mem_virt(sys.argv[2],"res") # except: # print 0 if sys.argv[1] == 'osds_cpu_use': try: print get_osd_cpu(sys.argv[2]) except: print 0 #get fio write speed (KB/s) if sys.argv[1] == 'fio_write_speed': try: print get_fio_write_speed() except: print 0 #get fio write speed (KB/s) if sys.argv[1] == 'fio_read_speed': try: print get_fio_read_speed() except: print 0 #test unit if sys.argv[1] == 'pool_objects': try: print get_pool_stats(sys.argv[2],"objects") except: print 0 if sys.argv[1] == 'pool_bytes_used': try: print get_pool_stats(sys.argv[2],"used") except: print 0 if sys.argv[1] == 'pool_throughput_write': try: print get_pool_stats(sys.argv[2],"throughput_write") except: print 0 if sys.argv[1] == 'pool_throughput_read': try: print get_pool_stats(sys.argv[2], "throughput_read") except: print 0 if sys.argv[1] == 'pool_op_write': try: print get_pool_stats(sys.argv[2], "op_write") except: print 0 if sys.argv[1] == 'pool_op_read': try: print get_pool_stats(sys.argv[2], "op_read") except: print 0 if sys.argv[1] == 'pool_id': try: print get_pool_config(sys.argv[2],"id") except: print 0 if sys.argv[1] == 'pool_size': try: print get_pool_config(sys.argv[2],"size") except: print 0 if sys.argv[1] == 'pool_min_size': try: print get_pool_config(sys.argv[2], "min_size") except: print 0 if sys.argv[1] == 'pool_pg_num': try: print get_pool_config(sys.argv[2], "pg_num") except: print 0 if sys.argv[1] == 'pool_pgp_num': try: print get_pool_config(sys.argv[2], "pgp_num") except: print 0 ##get ceph cluster status def get_cluster_health() : cluster_health = commands.getoutput('timeout 10 ceph health -f json-pretty 2>/dev/null') try: json_str = json.loads(cluster_health) if json_str["overall_status"] == "HEALTH_OK": return 1 elif json_str["overall_status"] == "HEALTH_WARN": return 2 elif json_str["overall_status"] == "HEALTH_ERR": return 3 else: return 255 except: return 255 ##get cluster used percent def get_cluster_used_percent(): try: cluster_used_percent = commands.getoutput('timeout 10 ceph -s -f json-pretty 2>/dev/null') json_str = json.loads(cluster_used_percent) cluster_used = int(json_str["pgmap"]["bytes_used"]) cluster_total = int(json_str["pgmap"]["bytes_total"]) return "%.3f" %(cluster_used/float(cluster_total)) except: return 0 ##get cluster total objects(has bug for get objects) def get_cluster_total_objects(): get_cluster_total_objects = commands.getoutput('timeout 10 ceph -s 2> /dev/null|grep pgmap|awk \'{print $10}\'') try: if len(get_cluster_total_objects) != 0: return get_cluster_total_objects else: return 0 except: return 0 #get cluster total pg def get_cluster_total_pgs(): try: get_cluster_total_pgs = commands.getoutput('timeout 10 ceph -s -f json-pretty 2>/dev/null') json_str = json.loads(get_cluster_total_pgs) return json_str["pgmap"]["num_pgs"] except: return 0 #get cluster average latency def get_cluster_latency(arg): if arg =="ave_commit": osd_commit_list = [] try: get_cluster_latency_commit = commands.getoutput('timeout 10 ceph osd perf -f json-pretty 2>/dev/null') json_str = json.loads(get_cluster_latency_commit) for item in json_str["osd_perf_infos"]: osd_commit_list.append(int(item["perf_stats"]["commit_latency_ms"])) return sum(osd_commit_list)/len(osd_commit_list) except: return 0 if arg =="ave_apply": osd_apply_list = [] try: get_cluster_latency_apply = commands.getoutput('timeout 10 ceph osd perf -f json-pretty 2>/dev/null') json_str = json.loads(get_cluster_latency_apply) for item in json_str["osd_perf_infos"]: osd_apply_list.append(int(item["perf_stats"]["apply_latency_ms"])) return sum(osd_apply_list)/len(osd_apply_list) except: return 0 #get cluster throughput write and read def get_cluster_throughput(arg): if arg == "write": try: get_cluster_throughput_write = commands.getoutput('timeout 10 ceph -s -f json-pretty 2>/dev/null ') json_str = json.loads(get_cluster_throughput_write) if json_str["pgmap"].has_key('write_bytes_sec') == True: return json_str["pgmap"]["write_bytes_sec"] else: return 0 except: return 0 if arg == "read": try: get_cluster_throughput_read = commands.getoutput('timeout 10 ceph -s -f json-pretty 2>/dev/null ') json_str = json.loads(get_cluster_throughput_read) if json_str["pgmap"].has_key('read_bytes_sec') == True: return json_str["pgmap"]["read_bytes_sec"] else: return 0 except: return 0 # get cluster ops (read ,write,promote) def get_cluster_total_ops(): ops_list =[] try: cluster_total_ops = commands.getoutput('timeout 10 ceph -s -f json-pretty 2>/dev/null') json_str = json.loads(cluster_total_ops) if json_str["pgmap"].has_key('write_op_per_sec') == True: ops_list.append(int(json_str["pgmap"]["write_op_per_sec"])) if json_str["pgmap"].has_key('read_op_per_sec') == True: ops_list.append(int(json_str["pgmap"]["read_op_per_sec"])) if json_str["pgmap"].has_key('promote_op_per_sec') == True: ops_list.append(int(json_str["pgmap"]["promote_op_per_sec"])) return sum(ops_list) except: return 0 # get cluster total pools (has bug for get pools) def get_cluster_total_pools(): try: cluster_total_pools = commands.getoutput('timeout 10 ceph osd lspools -f json-pretty 2>/dev/null') json_str = json.loads(cluster_total_pools) return len(json_str) except: return 0 #get all pool name def get_cluster_pools(): try: pool_list=[] cluster_pools = commands.getoutput('timeout 10 ceph df -f json-pretty 2>/dev/null') json_str=json.loads(cluster_pools) for item in json_str["pools"]: pool_dic = {} pool_dic['{#POOL}'] = str(item["name"]) pool_list.append(pool_dic) return json.dumps(pool_list,separators=(',', ':')) except: return 0 def get_host_osds(): try: osd_list=[] osds=[] host_osds = commands.getoutput("mount|grep osd|grep -v lockbox|awk '{print $3}'|cut -f2 -d - 2>/dev/null") host_osds = host_osds.splitlines() for osd in host_osds: osd_dic = {} osd_dic['{#OSD}'] = str(osd) osd_list.append(osd_dic) return json.dumps(osd_list,separators=(',', ':')) except: return 0 def get_osd_mem_virt(osd,memtype): # try: pidfile="/var/run/ceph/osd.%s.pid" %osd osdpid = commands.getoutput('cat %s 2>/dev/null' %pidfile) if not osdpid : return 0 elif memtype == "virt": osd_runmemvsz = commands.getoutput('ps -p %s -o vsz |grep -v VSZ 2>/dev/null' %osdpid) return osd_runmemvsz elif memtype == "res": osd_runmemrsz = commands.getoutput('ps -p %s -o rsz |grep -v RSZ 2>/dev/null' %osdpid) return osd_runmemrsz # except: # return 0 def get_osd_cpu(osd): try: pidfile="/var/run/ceph/osd.%s.pid" %osd osdpid = commands.getoutput('cat %s 2>/dev/null' %pidfile) if not osdpid : return 0 osd_cpu = commands.getoutput('''ps -p %s -o pcpu |grep -v CPU|awk 'gsub(/^ *| *$/,"")' 2>/dev/null''' %osdpid) return osd_cpu except: return 0 def get_fio_write_speed(): try: fio_write_speed = commands.getoutput('''iotop --batch --iter 1 -P -k |grep fio|grep -v fio_write_speed|grep -v grep |awk '{print $6}' 2>/dev/null''') if not fio_write_speed: return 0 else: return fio_write_speed except: return 0 def get_fio_read_speed(): try: fio_read_speed = commands.getoutput('''iotop --batch --iter 1 -P -k |grep fio|grep -v fio_read_speed|grep -v grep |awk '{print $4}' 2>/dev/null''') if not fio_read_speed: return 0 else: return fio_read_speed except: return 0 #get every pool object,used, throughput,ops def get_pool_stats(poolname,stats): if stats == "objects": try: pool_objects = commands.getoutput('timeout 10 ceph df -f json-pretty 2>/dev/null') json_str = json.loads(pool_objects) for item in json_str["pools"]: if item["name"] == poolname: return item["stats"]["objects"] break except: return 0 elif stats == "used" : try: pool_bytes_used = commands.getoutput("timeout 10 ceph df -f json-pretty 2>/dev/null") json_str = json.loads(pool_bytes_used) for item in json_str["pools"]: if item["name"] == poolname: return item["stats"]["bytes_used"] break except: return 0 elif stats == "throughput_write": try: pool_throughput_write = commands.getoutput("timeout 10 ceph osd pool stats -f json-pretty 2>/dev/null") json_str = json.loads(pool_throughput_write) for item in json_str: if item["pool_name"] == poolname: if item["client_io_rate"].has_key('write_bytes_sec') == True: return item["client_io_rate"]["write_bytes_sec"] else: return 0 except: return 0 elif stats == "throughput_read": try: pool_throughput_read = commands.getoutput("timeout 10 ceph osd pool stats -f json-pretty 2>/dev/null") json_str = json.loads(pool_throughput_read) for item in json_str: if item["pool_name"] == poolname: if item["client_io_rate"].has_key('read_bytes_sec') == True: return item["client_io_rate"]["read_bytes_sec"] else: return 0 except: return 0 elif stats == "op_write": try: pool_op_write = commands.getoutput("timeout 10 ceph osd pool stats -f json-pretty 2>/dev/null") json_str = json.loads(pool_op_write) for item in json_str: if item["pool_name"] == poolname: if item["client_io_rate"].has_key('write_op_per_sec') == True: return item["client_io_rate"]["write_op_per_sec"] else: return 0 except: return 0 elif stats == "op_read": try: pool_op_read = commands.getoutput("timeout 10 ceph osd pool stats -f json-pretty 2>/dev/null") json_str = json.loads(pool_op_read) for item in json_str: if item["pool_name"] == poolname: if item["client_io_rate"].has_key('read_op_per_sec') == True: return item["client_io_rate"]["read_op_per_sec"] else: return 0 except: return 0 elif stats == "size": try: print stats pool_size = commands.getoutput("timeout 10 ceph osd pool get rbd size -f json-pretty 2>/dev/null") json_str = json.loads(pool_size) print json_str except: return 0 #get cluster pool config def get_pool_config(poolname,config): if config == "size": try: pool_size = commands.getoutput("timeout 10 ceph osd pool get %s size -f json-pretty 2>/dev/null" %(poolname)) json_str = json.loads(pool_size) return json_str["size"] except: return 0 elif config == "id": try: pool_id = commands.getoutput("timeout 10 ceph osd pool get %s size -f json-pretty 2>/dev/null" % (poolname)) json_str = json.loads(pool_id) return json_str["pool_id"] except: return 0 elif config == "min_size": try: pool_min_size = commands.getoutput("timeout 10 ceph osd pool get %s min_size -f json-pretty 2>/dev/null" % (poolname)) json_str = json.loads(pool_min_size) return json_str["min_size"] except: return 0 elif config == "pg_num": try: pool_pg_num = commands.getoutput("timeout 10 ceph osd pool get %s pg_num -f json-pretty 2>/dev/null" % (poolname)) json_str = json.loads(pool_pg_num) return json_str["pg_num"] except: return 0 elif config == "pgp_num": try: pool_pgp_num = commands.getoutput("timeout 10 ceph osd pool get %s pgp_num -f json-pretty 2>/dev/null" % (poolname)) json_str = json.loads(pool_pgp_num) return json_str["pgp_num"] except: return 0 if __name__ == '__main__': main()