Edit file File name : ceph-osd-stats.py Content :#!/usr/bin/env python # -*- coding: UTF-8 -*- import os import commands import json import math # This script returns a few useful metrics from `osd df` - including # a list of OSDs that are outside an acceptable range of utilization df_output = commands.getoutput('timeout 10 ceph osd df -f json-pretty 2>/dev/null') osd_df = json.loads(df_output) osd_utilization = osd_df["summary"]["average_utilization"] osd_spread = osd_df["summary"]["max_var"] - osd_df["summary"]["min_var"] # This carefully selected linear function return "good enough" values that # care more about osd utilization disparity as the cluster fills up osd_spread_tolerated = math.fabs(-0.39 * (osd_utilization/100) + 0.35) osds_outside_tolerated_spread = [] for osd in osd_df["nodes"]: osds = {} # Get absolute value of divergence from average utilization avg_diff = math.fabs(1-osd["var"]) # If the OSD holds more or less than the tolerated spread # and has more than just a little bit of data (probably marked out) if avg_diff > osd_spread_tolerated and osd["var"] > .01: osds["id"] = osd["id"] osds["var"] = osd["var"] osds["reweight"] = osd["reweight"] osds_outside_tolerated_spread.append(osds) final_array = {} final_array["skewed_osds"] = osds_outside_tolerated_spread final_array["average_osd_utilization"] = osd_utilization final_array["osd_spread_tolerated"] = osd_spread_tolerated print(json.dumps(final_array)) Save