forked from techhat/python-moosefs
-
Notifications
You must be signed in to change notification settings - Fork 2
/
nagios-moosefs-replicas.py
executable file
·110 lines (79 loc) · 2.46 KB
/
nagios-moosefs-replicas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/usr/bin/python
# A proof of concept Nagios check for MooseFS
# This check is supposed to warn you with a WARNING
# if there's missing replicas
# or CRITICAL if there's no replicas left for some data
# HOWTO:
# needs MooseFS libs (included)
# access is via standard port, no auth needed(!)
# CAVEAT:
# All other MooseFS checks I write will be directly for Check_MK
from moosefs import MooseFS
import sys
def sh_syntax():
print """
Syntax:
nagios-moosefs-replicas.py [-H <mfsmaster hostname>]
If the mfsmaster is not specified, the check will try to default to 'mfsmaster'
"""
# Handle arguments, help or hostname to check
if len(sys.argv) == 2 and "-h" in sys.argv:
sh_syntax()
sys.exit(5)
elif len(sys.argv) == 3 and "-H" == sys.argv[1]:
mfsmaster=sys.argv[2]
else:
# Fall back to the default hostname, if none other was given.
mfsmaster="mfsmaster"
# Try building a connection, crash-land on any error.
try:
mfsobj = MooseFS(masterhost=mfsmaster)
except:
print "UNKNOWN - Error occured connecting to mfsmaster"
# Raise should only be done if connected to a terminal, so it doesn't spill into nagios.
#raise
sys.exit(3)
# if connection succeeded, load the table of replicas.
matrix = mfsobj.mfs_info()['matrix']
# For easy reuse of check in Check_MK
if not nagios_state_names:
nagios_state_names = {
0 : "OK",
1 : "WARN",
2 : "CRIT",
3 : "UNKW" }
state = 0
goal = 0
msg = ""
# parse the table per-goal (horizontally)
for goal_data in matrix:
cur_goal = goal
goal = goal + 1
chunks_per_goal = matrix[cur_goal]
# skip goal 0 chunks
if cur_goal == 0:
continue
else:
i = 0
undergoal = 0
# slide through the matrix row, checking for non-zero entries (vertically)
# only look at entries _less_ than the designated goal.
while i < cur_goal:
undergoal = undergoal + chunks_per_goal[i]
i = i + 1
if undergoal:
# lost data:
if chunks_per_goal[0] > 0:
state = max(state, 2)
msg += "%i chunks of goal %i data lost" % (undergoal, cur_goal)
# missing replicas
else:
state = max(state, 1)
msg += "%i chunks of goal %i lack replicas" % (undergoal, cur_goal)
else:
state = max(state, 0)
# Add a message in case there were no errors.
if state == 0:
msg = "No errors"
print nagios_state_names[state] + " - " + msg
sys.exit(state)