source: npl/kernel/initrd_scripts/root/etc/drbd.boot

Last change on this file was 6856db8, checked in by Edwin Eefting <edwin@datux.nl>, 6 years ago

spelling

  • Property mode set to 100755
File size: 15.4 KB
Line 
1#!/bin/bash
2#(C)2006 DatuX
3#This script is sourced from /linuxrc
4
5#Global variables that control this script:
6# ETH                   Ethernet device to use
7# LOCAL_IP              First IP to use
8# REMOTE_IP             Second IP to use.
9#                           NOTE: local and remote will autonegotiated
10#
11# RATE                  Rebuild speed in kb/s (default 100000)
12# PROTO                 Protocol to use for drbd (default C)
13# REBUILD               First-time initialisation of a secondary node:
14#                               Wait for unlimited time and rebuild all data.
15# FORCEPRIMARY  First-time initialisation of a primary node:
16#                               Force this node to be the primary node in case the data is inconsistent.
17# CLUSTER_ID    Used by syndog to send heartbeat and determine ip adress and primary role.
18# BOOT_DELAY    Wait this many seconds for other node after inital boot.
19#
20#
21# not implemented yet, dont forget to implement in syn3-heartbeatcheck as well:
22# PRIMARY_ID    Prefer node with this NODE_ID to be primary node
23
24splashstep "Activating server redundancy"
25
26#Prepare basic drbd subsystem
27modprobe drbd minor_count=3 || error_shell
28echo "" > /sys/module/drbd/parameters/usermode_helper
29
30#for drbd verify (prevents ugly error message)
31modprobe sha1
32
33#wait for device nodes to appear
34udevadm settle
35
36# All interfaces for heartbeat
37for NET in /sys/class/net/eth*/address; do
38    INTERFACE="`echo $NET|cut -f5 -d/`";
39    ifconfig $INTERFACE up &>/dev/null
40done
41
42
43############# config and startup
44
45# Start syndog immediatly in background, broadcasting a, clusterid and uniq id for this node (mac in this case) and status 0=standby node
46NODE_ID=`cat /sys/class/net/$ETH/address`
47#only start dog when we already have an id, otherwise start it later when we get one
48if [ "$CLUSTER_ID" != "all" ]; then
49    THIS_BOOTED="0"
50    syndog "$CLUSTER_ID $NODE_ID $THIS_BOOTED" &
51else
52    THIS_BOOTED="2" #indicate we want to become primary
53fi
54
55
56#first time initialisation of meta data.
57if [ "$FORCEPRIMARY" ] || [ "$REBUILD" ]; then
58    #before we start, make sure we dont have any old meta-data garbage left:
59    echo "Preparing meta data areas..."
60    dd if=/dev/zero of=/dev/syn3/boot.meta count=262144
61    dd if=/dev/zero of=/dev/syn3/home.meta count=262144
62    dd if=/dev/zero of=/dev/syn3/root.meta count=262144
63    #now initialize the metadata
64    drbdmeta --force /dev/drbd0 v08 /dev/syn3/boot.meta 0 create-md || error_shell
65    drbdmeta --force /dev/drbd1 v08 /dev/syn3/home.meta 0 create-md || error_shell
66    drbdmeta --force /dev/drbd2 v08 /dev/syn3/root.meta 0 create-md || error_shell
67    echo "Metadata creation complete.";
68fi
69
70#default settings
71[ "$RATE" ]       || RATE="100000"
72[ "$PROTO" ]      || PROTO="C"
73[ "$BOOT_DELAY" ] || BOOT_DELAY=30
74
75#create new resource
76drbdsetup new-resource OS || error_shell
77drbdsetup new-minor OS /dev/drbd0 0 || error_shell
78drbdsetup new-minor OS /dev/drbd2 2 || error_shell
79drbdsetup new-minor OS /dev/drbd1 1 || error_shell
80
81#apply activity log
82drbdmeta /dev/drbd0 v08 /dev/syn3/boot.meta 0 apply-al || error_shell
83drbdmeta /dev/drbd2 v08 /dev/syn3/root.meta 0 apply-al || error_shell
84drbdmeta /dev/drbd1 v08 /dev/syn3/home.meta 0 apply-al || error_shell
85
86#attach /boot /home and /
87if ! drbdsetup attach /dev/drbd0    /dev/md0        /dev/syn3/boot.meta 0 --resync-rate $RATE ||
88   ! drbdsetup attach /dev/drbd2    /dev/syn3/root  /dev/syn3/root.meta 0 --resync-after 0 --resync-rate $RATE ||
89   ! drbdsetup attach /dev/drbd1    /dev/syn3/home  /dev/syn3/home.meta 0 --resync-after 2 --resync-rate $RATE ; then
90    #disk-attaching failed!
91    splasherror "Redundancy ERROR: Problem attaching disks."
92    echo "-Are you updating to a newer kernel? "
93    echo " In this case you need to disable redundancy before upgrading. "
94    echo " This is the safest way to upgrade.";
95    echo "-If you're sure this should be the primairy node, use 'forceprimary'."
96    echo " ONLY USE THIS IF YOU'RE SURE THE DATA ON THE PRIMARY NODE IS CONSISTENT!";
97    echo "-If you're sure this should be the secondairy node, use 'rebuild'."
98    error_shell
99fi
100
101splashmode verbose 2>/dev/null
102
103
104################################ Some common functions, used in the mainloop
105
106#check if all the drbd partitions are connected
107drbd_connected()
108{
109    if drbdsetup /dev/drbd0 wait-connect --wfc-timeout 1 --degr-wfc-timeout 1 --outdated-wfc-timeout 1 &&
110       drbdsetup /dev/drbd1 wait-connect --wfc-timeout 1 --degr-wfc-timeout 1 --outdated-wfc-timeout 1 &&
111       drbdsetup /dev/drbd2 wait-connect --wfc-timeout 1 --degr-wfc-timeout 1 --outdated-wfc-timeout 1 &&
112       ! grep StandAlone /proc/drbd &>/dev/null; then
113        return 0
114    else
115        return 1
116    fi
117}
118
119#(re)set drbd network settings
120drbd_network()
121{
122    if [ "$REBUILD" ]; then
123        #this is used for first time initialisation of a secondary node, AND for recovery of splitbrain.
124        DISCARD="--discard-my-data"
125    else
126        DISCARD=""
127    fi
128
129    #just diconnect both posibilities:
130    drbdsetup disconnect $SELECTED_LOCAL_IP $SELECTED_REMOTE_IP &>/dev/null
131    drbdsetup disconnect $SELECTED_REMOTE_IP $SELECTED_LOCAL_IP &>/dev/null
132    drbdsetup connect OS $SELECTED_LOCAL_IP:7788 $SELECTED_REMOTE_IP:7788 --proto $PROTO --verify-alg sha1 $CONNECT_OPTIONS $AUTO_RECOVER $DISCARD  || error_shell
133
134}
135
136config_network()
137{
138    ifconfig $ETH:drbd $SELECTED_LOCAL_IP
139    ifconfig $ETH up
140}
141
142#become the primary node
143become_primary()
144{
145    drbdsetup /dev/drbd0 primary $1  &&
146    drbdsetup /dev/drbd1 primary $1  &&
147    drbdsetup /dev/drbd2 primary $1
148    return $?
149}
150
151#become secondary
152become_secondary()
153{
154    drbdsetup /dev/drbd0 secondary
155    drbdsetup /dev/drbd1 secondary
156    drbdsetup /dev/drbd2 secondary
157}
158
159##################### First time initialisation of primary
160if [ "$FORCEPRIMARY" ]; then
161    #the first time our local data is still marked as inconsistent, so indicate its ok to overwrite data:
162    become_primary --overwrite-data-of-peer
163
164    #this is only needed the first time the system uses drbd, so remove the force-primary file immediatly
165    FORCEPRIMARY=
166    mount /dev/boot /mnt
167    rm /mnt/drbd.primary 2>/dev/null
168    umount /mnt
169    sync
170
171    #become secondairy again, so the normal negotiation of the rest of this script can go on.
172    become_secondary
173
174fi
175
176
177##################### Wait until we become primary node
178
179#fix network before starting
180# drbd_network
181while true; do
182    ###################### data gathering
183    echo
184    echo -n "Monitoring other node: "
185
186    #press enter for shell
187    if read -t 1 ENTER; then
188        debug_shell "Exit the shell to continue monitoring."
189        echo
190        echo -n "Continuing monitoring: "
191    fi
192
193    #heart beat
194    echo -n "heartbeat..."
195    HEARTBEAT_ONLINE=
196    OTHER_BOOTED=
197    if HEARTBEATS="`syndog $CLUSTER_ID --cat`"; then
198        HEARTBEAT_ONLINE=1
199        #received critical network config info, keep it
200        OTHER_NODE_ID=`echo "$HEARTBEATS"|cut -f3 -d' '|head -1`
201        OTHER_BOOTED=`echo "$HEARTBEATS"|cut -f4 -d' '|head -1`
202
203        #do we need to still learn the cluster id?
204        if [ "$CLUSTER_ID" == "all" ]; then
205            RECEIVED_ID=`echo "$HEARTBEATS"|grep ^$ETH|cut -f2 -d' '|head -1`
206            if [ "$RECEIVED_ID" != "" ]; then
207                #we just learned our cluster id, keep it and start heartbeat
208                CLUSTER_ID="$RECEIVED_ID"
209                syndog "$CLUSTER_ID $NODE_ID $THIS_BOOTED" &
210            fi
211        fi
212
213        #config network and drbd rightaway, so the next steps will be ok faster
214        if ! [ "$NET_CONFIGURED" ]; then
215            NET_CONFIGURED=1
216
217            #the node_ids determine the order of ip's.
218            if [ "$NODE_ID" ">" "$OTHER_NODE_ID" ]; then
219                SELECTED_LOCAL_IP=$LOCAL_IP
220                SELECTED_REMOTE_IP=$REMOTE_IP
221            else
222                SELECTED_LOCAL_IP=$REMOTE_IP
223                SELECTED_REMOTE_IP=$LOCAL_IP
224            fi
225
226            config_network
227            drbd_network
228        fi
229    fi
230
231    #ping of other ip
232    NET_ONLINE=
233    if [ "$NET_CONFIGURED" ]; then
234        echo -n "ping..."
235        if fping -B 1 -t 250 -r 4 $SELECTED_REMOTE_IP >/dev/null 2>/dev/null; then
236            NET_ONLINE=1
237        fi
238    fi
239
240    #drbd connected
241    echo -n "drbd connection..."
242    CONNECTED=
243    if drbd_connected; then
244        CONNECTED=1
245    fi
246
247    #are we consistent?
248    if grep 'ds:Inconsistent' /proc/drbd >/dev/null; then
249        CONSISTENT=
250    else
251        CONSISTENT=1
252    fi
253
254    # Determine if other node is already primary
255    OTHER_PRIMARY=
256    if grep /Primary /proc/drbd >/dev/null; then
257        OTHER_PRIMARY=1
258    fi
259
260    #do we want to be primary?
261    WANT_PRIMARY=
262    if [ "$OTHER_BOOTED" == "2" ]; then
263        #other wants primary by choice, so we dont
264        WANT_PRIMARY=
265    elif [ "$NODE_ID" ">" "$OTHER_NODE_ID" ] || [ "$THIS_BOOTED" == "2" ]; then
266        # we are primary by choice or just because we won the "election" (higher node_id)
267        WANT_PRIMARY=1
268    fi
269
270    #determine start_time, for BOOT_DELAY
271    if ! [ "$START_TIME" ]; then
272        START_TIME=`date +%s`
273    fi
274    BOOT_DELAY_LEFT=$(( BOOT_DELAY + START_TIME - `date +%s` ))
275
276
277    ###################### status screen
278    echo -en "$CLS"
279    echo "                       [ Syn-3 cluster status ]"
280    echo
281    echo
282
283    echo -en "${BOLD}Heartbeat             :${NORMAL}"
284    if [ "$HEARTBEAT_ONLINE" ]; then
285        echo -e "${GOOD} ONLINE ${NORMAL} "
286    else
287        echo -e "${BAD} OFFLINE ${NORMAL} "
288    fi
289
290    echo     " ClusterID     : $CLUSTER_ID"
291    echo     " This node ID  : $NODE_ID "
292    echo     " Other node ID : $OTHER_NODE_ID (booted=$OTHER_BOOTED)"
293
294    # Determine and show which node has the preference to become primary
295    echo -n  " Our preference: "
296    if [ "$WANT_PRIMARY" ]; then
297        if [ "$THIS_BOOTED" == "2" ]; then
298            echo -e "Primary role (forced by this node)"
299        else
300            echo -e "Primary role"
301
302        fi
303    else
304        if [ "$OTHER_BOOTED" == "2" ]; then
305            echo -e "Secondary role (forced by other node)"
306        else
307            echo -e "Secondary role"
308        fi
309    fi
310
311    echo -n  " Heartbeats    : "
312    echo `echo "$HEARTBEATS"|cut -f1 -d' '|sort |uniq`
313
314
315    echo
316    echo -en "${BOLD}Network               :${NORMAL}"
317    if ! [ "$NET_CONFIGURED" ]; then
318        echo -e "${BAD} WAITING ${NORMAL}"
319    elif [ "$NET_ONLINE" ]; then
320        echo -e "${GOOD} ONLINE ${NORMAL}"
321    else
322        echo -e "${BAD} OFFLINE ${NORMAL}"
323    fi
324
325
326    echo
327    echo -en "${BOLD}DRBD connection status:"
328    if [ "$CONNECTED" ]; then
329        echo -e "${GOOD} ONLINE ${NORMAL}"
330    else
331        echo -e "${BAD} OFFLINE ${NORMAL}"
332    fi
333
334
335    echo
336    # Determine and show local data consitency status
337    echo -en "${BOLD}DRBD local data       :"
338    if [ "$CONSISTENT" ] ; then
339        echo -e "${GOOD} OK ${NORMAL}"
340    else
341        if grep sync /proc/drbd >/dev/null; then
342            echo -ne "${WARN} Synchronising ${NORMAL}"
343        else
344            echo -ne "${BAD} Sync needed ${NORMAL}"
345        fi
346        if [ "$REBUILD" ];then
347            echo "(rebuild mode, discarding local changes)"
348        else
349            echo
350        fi
351    fi
352
353
354
355    #show status details
356    # drbdsetup status
357    cat /proc/drbd | egrep '(finish|sync|cs:)'
358
359
360    # Determine and show what to do with all the gathered info:
361    echo
362    echo
363    if [ "$HEARTBEAT_ONLINE" ]; then
364        if [ "$NET_CONFIGURED" ]; then
365            if [ "$NET_ONLINE" ]; then
366                if [ "$CONNECTED" ]; then
367                    if [ "$OTHER_PRIMARY" ]; then
368                        become_secondary
369                        if [ "$REBUILD" ]; then
370                            splashwarn "Synchronization started. Please reboot other node to finish installation. (this node will become primary and auto-reboot)"
371                            # if [ "$CONSISTENT" ]; then
372                            #     splashwarn "Finished initial synchronization: This node needs become primary and reboot from harddisk. Please temporary shutdown other node. (this node will become primary and auto-reboot)"
373                            # else
374                            #     splashwarn "Redundancy OK: Please wait until initial synchronization is complete..."
375                            # fi
376                        else
377                            splashinfo "Redundancy OK: This node is secondary."
378                        fi
379                    #other node is not primary, yet
380                    else
381                        #do we want to be primary?
382                        if [ "$WANT_PRIMARY" ]; then
383                            if become_primary ; then
384                                splashinfo "Redundancy OK: This node is primary, booting this node"
385                                break;
386                            else
387                                #dit kan gebeuren als de andere al geboot was, dus geen error?
388                                #wel become_secondary doen, voor het geval dat de helft
389                                #van onze devices al primary is!
390                                become_secondary
391                                splasherror "Redundancy ERROR: Unknown error while becoming the primary node!"
392                            fi
393                        #we dont want to be primary
394                        else
395                            splashinfo "Redundancy OK: This node is secondary, other node should be primary."
396                        fi
397                    fi
398                else #not connected
399                    drbd_network
400                    if dmesg | grep "peer's disk size is too small!"; then
401                        echo "Please shut down the other node and reboot this node."
402                        error_shell "Redundancy ERROR: Disk of other node is too small."
403                    else
404                        splasherror "Redundancy ERROR: Other node is online, but disconnected!"
405                        echo " - Wait a minute for other node to reconfigure itself."
406                        echo " - Check the firewall settings."
407                        echo " - Check if there is a hardware failure and shut it down."
408                        if [ "$AUTO_RECOVER" == "" ]; then
409                            echo " -Check for a split-brain. (or enable autorecover on both nodes)"
410                        fi
411                    fi
412                fi
413            else #network not online
414                splasherror "Node is online, but can not ping it. (wait a minute, or check firewall/network)"
415            fi
416        else #net not configured
417            splasherror "Waiting for network configuration"
418        fi
419    else #heartbeat not online
420        if ! [ "$NET_ONLINE" ]; then
421            if [ "$CONSISTENT" ]; then
422                if [ "$BOOT_DELAY_LEFT" -le 0 ]; then
423                    if become_primary ; then
424                        splashwarn "Redundancy WARNING: Other node is offline, booting this node!"
425                        break;
426                    else
427                        splasherror "Redundancy ERROR: Unknown error while becoming the primary node!"
428                        become_secondary
429                    fi
430                else
431                    splashwarn "Redundancy WARNING: Waiting for other node, booting after $BOOT_DELAY_LEFT seconds."
432                fi
433            else
434                splasherror "Redundancy ERROR: The data on this node needs to be synced!"
435                echo "Cannot boot this node in this state."
436                echo "Please reboot the other node and wait until synchronization is complete."
437            fi
438        else #net still online
439            splasherror "Redundancy ERROR: Network is still online, but no heartbeat."
440        fi
441    fi
442
443done
444
445# If we're here, we're primary and ready to boot!
446
447
448true
Note: See TracBrowser for help on using the repository browser.