source: npl/overig/smartmontools/syn3-smartcheck @ 03d3a6f

perl-5.22
Last change on this file since 03d3a6f was c5c522c, checked in by Edwin Eefting <edwin@datux.nl>, 8 years ago

initial commit, transferred from cleaned syn3 svn tree

  • Property mode set to 100755
File size: 4.2 KB
Line 
1#!/bin/bash
2#(C)2007 DatuX. All rights reserved.
3#Automatic harddisk smart monitoring via Syn-3 monitoring framework.
4
5mkdir -p /etc/smart
6touch /etc/smart/config
7source /etc/smart/config
8
9#return 0 if selftest of a device is disabled.
10is_selftest_disabled()
11{
12        eval SELFTEST_DISABLED=\$SELFTEST_DISABLED_${1};
13        if [ "$SELFTEST_DISABLED" == "1" ]; then
14                return 0;
15        fi
16        return 1;
17}
18
19#return 0 if monitoring of a device is disabled.
20is_monitoring_disabled()
21{
22        eval MONITORING_DISABLED=\$MONITORING_DISABLED_${1};
23        if [ "$MONITORING_DISABLED" == "1" ]; then
24                return 0;
25        fi
26        return 1;
27}
28
29#runs a smartctl commandline and syn3-state logs result
30run_smartctl()
31{
32        #is this smart drive accesible this way?
33        $SMARTCMD -i > /tmp/smart.$$ 2>/dev/null || return $?
34           
35        #yes, so get info
36        INFO=`cat /tmp/smart.$$ | egrep '(Device Model:|Device:|Model Family:|Serial Number:)' | sed -r 's/ +/ /g' | tr "\n" " "`
37        ID="`egrep '(^Device:|Serial )' /tmp/smart.$$ | md5sum`"
38       
39        #we already saw this disk?
40        #(sometimes disks are accesible in more then one way, for example with smart array controlers)
41        if echo "$IDS" | grep "$ID" >/dev/null; then
42            return 0
43        fi
44        IDS="$IDS $ID"
45       
46        #re-enable smart if this is not yet the case:
47        $SMARTCMD -s on &>/dev/null
48
49        #get current status
50        OUTPUT=`$SMARTCMD -H -i -A -l error -l selftest -q errorsonly 2>&1`
51        EXIT=$?
52
53        if [ "$OUTPUT" ]; then
54                OUTPUT="Extra output: [ $OUTPUT ]";
55        fi
56       
57        #List devices here
58        if [ "$LIST" == 1 ]; then
59                echo "`echo "$ID" | cut -d ' ' -f1`     $FACILITY       $INFO";
60                return 0;
61        fi
62
63        #graph of temperature, if any
64        TEMPERATURE=`$SMARTCMD -A|grep Temperature|head -1 | cut -c88- | egrep -o '^[0-9]+'`
65        if [ "$TEMPERATURE" ]; then
66                #TODO: add temperature_max_alert and caution?
67                syn3-graphcreate "$FACILITY" --step=60 --use=MAX temperature=GAUGE:120:0:U
68                syn3-graph "$FACILITY" temperature="$TEMPERATURE"
69        fi
70
71        #if monitoring is disabled in config file, skip current device.
72        is_monitoring_disabled `echo "$ID" | cut -d ' ' -f1`;
73        if [ "$?" == 0 ]; then
74                syn3-state "$FACILITY" DELETE
75                return 0
76        fi
77
78        #analyse status code we've got:
79        if    [ "$(( $EXIT & 8 ))" != 0 ]; then
80                syn3-state "$FACILITY" ALERT "Harddisk $INFO is FAILING! $OUTPUT"
81        elif [ "$(( $EXIT & 16 ))" != 0 ]; then
82                syn3-state "$FACILITY" ALERT "Harddisk $INFO is almost failing. $OUTPUT"
83        elif [ "$(( $EXIT & 32 ))" != 0 ]; then
84                syn3-state "$FACILITY" CAUTION "Harddisk $INFO has almost failed in the past. $OUTPUT"
85        elif [ "$(( $EXIT & 128 ))" != 0 ]; then
86                syn3-state "$FACILITY" CAUTION "Harddisk $INFO selftest failed. $OUTPUT"
87        elif [ "$(( $EXIT & 64 ))" != 0 ]; then
88                syn3-state "$FACILITY" OK "Harddisk $INFO is healty, but has logged errors: $OUTPUT"
89        elif [ "$(( $EXIT & 2 ))" != 0 ]; then
90                syn3-state "$FACILITY" DELETE #note: this cant be called anymore
91        elif [ "$EXIT" == 0 ]; then
92                syn3-state "$FACILITY" OK "Hardisk $INFO is healty. $OUTPUT"
93        else
94                syn3-state "$FACILITY" CAUTION "Hardisk $INFO has unknown SMART-status. $OUTPUT"
95        fi
96
97        #if selftest is disabled in config file, skip current device.
98        is_selftest_disabled `echo "$ID" | cut -d ' ' -f1`;
99        if [ "$?" == 0 ]; then
100                return 0
101        fi
102
103        #Start a long self-test at mid-night
104        if [ `date +%H%M` == "0000" ]; then
105                $SMARTCMD -t long &>/dev/null
106        fi
107}
108
109if [ "$1" == "--list" ]; then
110        LIST=1;
111fi
112
113#traverse all blockdevices
114cd /sys/block
115for SYS in *; do
116        DEV=/dev/`echo $SYS| sed s@!@/@g`
117
118        #range=1 (things like dm, loop, fd etc)?
119        [ "`cat $SYS/range`" == "1" ] && continue;
120
121        #try default first (this works for ata and scsi):
122        SMARTCMD="smartctl $DEV"
123        FACILITY="SMART-`echo $DEV| sed s#.*/##`"
124        run_smartctl && continue
125
126        #SAT
127        SMARTCMD="smartctl $DEV -d sat"
128        FACILITY="SMART-`echo $DEV| sed s#.*/##`"
129        run_smartctl && continue
130
131        #try cciss Smart Array controller mode:
132        FOUND=
133        for DRIVE in `seq 0 15`; do
134                SMARTCMD="smartctl $DEV -d cciss,$DRIVE"
135                FACILITY="SMART-`echo $DEV| sed s#.*/##`-$DRIVE"
136                run_smartctl && FOUND=1
137        done
138        [ "$FOUND" ] && continue;
139
140        #try 3ware controller mode:
141        FOUND=
142        for DRIVE in `seq 0 15`; do
143                SMARTCMD="smartctl $DEV -d 3ware,$DRIVE"
144                FACILITY="SMART-`echo $DEV| sed s#.*/##`-$DRIVE"
145                run_smartctl && FOUND=1
146        done
147        [ "$FOUND" ] && continue;
148
149done
150
151rm /tmp/smart.$$
152
Note: See TracBrowser for help on using the repository browser.