source: npl/kernel/initrd_scripts/root/etc/drbd.boot

Last change on this file was 6856db8, checked in by Edwin Eefting <edwin@datux.nl>, 6 years ago

spelling

  • Property mode set to 100755
File size: 15.4 KB
RevLine 
[c5c522c]1#!/bin/bash
2#(C)2006 DatuX
3#This script is sourced from /linuxrc
4
5#Global variables that control this script:
6# ETH                   Ethernet device to use
7# LOCAL_IP              First IP to use
8# REMOTE_IP             Second IP to use.
9#                           NOTE: local and remote will autonegotiated
10#
11# RATE                  Rebuild speed in kb/s (default 100000)
12# PROTO                 Protocol to use for drbd (default C)
13# REBUILD               First-time initialisation of a secondary node:
14#                               Wait for unlimited time and rebuild all data.
15# FORCEPRIMARY  First-time initialisation of a primary node:
16#                               Force this node to be the primary node in case the data is inconsistent.
17# CLUSTER_ID    Used by syndog to send heartbeat and determine ip adress and primary role.
18# BOOT_DELAY    Wait this many seconds for other node after inital boot.
19#
20#
21# not implemented yet, dont forget to implement in syn3-heartbeatcheck as well:
22# PRIMARY_ID    Prefer node with this NODE_ID to be primary node
23
24splashstep "Activating server redundancy"
25
26#Prepare basic drbd subsystem
27modprobe drbd minor_count=3 || error_shell
28echo "" > /sys/module/drbd/parameters/usermode_helper
29
30#for drbd verify (prevents ugly error message)
31modprobe sha1
32
33#wait for device nodes to appear
34udevadm settle
35
36# All interfaces for heartbeat
37for NET in /sys/class/net/eth*/address; do
38    INTERFACE="`echo $NET|cut -f5 -d/`";
39    ifconfig $INTERFACE up &>/dev/null
40done
41
42
43############# config and startup
44
45# Start syndog immediatly in background, broadcasting a, clusterid and uniq id for this node (mac in this case) and status 0=standby node
46NODE_ID=`cat /sys/class/net/$ETH/address`
47#only start dog when we already have an id, otherwise start it later when we get one
48if [ "$CLUSTER_ID" != "all" ]; then
49    THIS_BOOTED="0"
50    syndog "$CLUSTER_ID $NODE_ID $THIS_BOOTED" &
51else
52    THIS_BOOTED="2" #indicate we want to become primary
53fi
54
55
56#first time initialisation of meta data.
57if [ "$FORCEPRIMARY" ] || [ "$REBUILD" ]; then
58    #before we start, make sure we dont have any old meta-data garbage left:
59    echo "Preparing meta data areas..."
60    dd if=/dev/zero of=/dev/syn3/boot.meta count=262144
61    dd if=/dev/zero of=/dev/syn3/home.meta count=262144
62    dd if=/dev/zero of=/dev/syn3/root.meta count=262144
63    #now initialize the metadata
64    drbdmeta --force /dev/drbd0 v08 /dev/syn3/boot.meta 0 create-md || error_shell
65    drbdmeta --force /dev/drbd1 v08 /dev/syn3/home.meta 0 create-md || error_shell
66    drbdmeta --force /dev/drbd2 v08 /dev/syn3/root.meta 0 create-md || error_shell
67    echo "Metadata creation complete.";
68fi
69
70#default settings
71[ "$RATE" ]       || RATE="100000"
72[ "$PROTO" ]      || PROTO="C"
73[ "$BOOT_DELAY" ] || BOOT_DELAY=30
74
75#create new resource
76drbdsetup new-resource OS || error_shell
77drbdsetup new-minor OS /dev/drbd0 0 || error_shell
78drbdsetup new-minor OS /dev/drbd2 2 || error_shell
79drbdsetup new-minor OS /dev/drbd1 1 || error_shell
80
81#apply activity log
82drbdmeta /dev/drbd0 v08 /dev/syn3/boot.meta 0 apply-al || error_shell
83drbdmeta /dev/drbd2 v08 /dev/syn3/root.meta 0 apply-al || error_shell
84drbdmeta /dev/drbd1 v08 /dev/syn3/home.meta 0 apply-al || error_shell
85
86#attach /boot /home and /
87if ! drbdsetup attach /dev/drbd0    /dev/md0        /dev/syn3/boot.meta 0 --resync-rate $RATE ||
88   ! drbdsetup attach /dev/drbd2    /dev/syn3/root  /dev/syn3/root.meta 0 --resync-after 0 --resync-rate $RATE ||
89   ! drbdsetup attach /dev/drbd1    /dev/syn3/home  /dev/syn3/home.meta 0 --resync-after 2 --resync-rate $RATE ; then
90    #disk-attaching failed!
91    splasherror "Redundancy ERROR: Problem attaching disks."
92    echo "-Are you updating to a newer kernel? "
93    echo " In this case you need to disable redundancy before upgrading. "
94    echo " This is the safest way to upgrade.";
95    echo "-If you're sure this should be the primairy node, use 'forceprimary'."
96    echo " ONLY USE THIS IF YOU'RE SURE THE DATA ON THE PRIMARY NODE IS CONSISTENT!";
97    echo "-If you're sure this should be the secondairy node, use 'rebuild'."
98    error_shell
99fi
100
101splashmode verbose 2>/dev/null
102
103
104################################ Some common functions, used in the mainloop
105
106#check if all the drbd partitions are connected
107drbd_connected()
108{
109    if drbdsetup /dev/drbd0 wait-connect --wfc-timeout 1 --degr-wfc-timeout 1 --outdated-wfc-timeout 1 &&
110       drbdsetup /dev/drbd1 wait-connect --wfc-timeout 1 --degr-wfc-timeout 1 --outdated-wfc-timeout 1 &&
111       drbdsetup /dev/drbd2 wait-connect --wfc-timeout 1 --degr-wfc-timeout 1 --outdated-wfc-timeout 1 &&
112       ! grep StandAlone /proc/drbd &>/dev/null; then
113        return 0
114    else
115        return 1
116    fi
117}
118
119#(re)set drbd network settings
120drbd_network()
121{
122    if [ "$REBUILD" ]; then
123        #this is used for first time initialisation of a secondary node, AND for recovery of splitbrain.
124        DISCARD="--discard-my-data"
125    else
126        DISCARD=""
127    fi
128
129    #just diconnect both posibilities:
130    drbdsetup disconnect $SELECTED_LOCAL_IP $SELECTED_REMOTE_IP &>/dev/null
131    drbdsetup disconnect $SELECTED_REMOTE_IP $SELECTED_LOCAL_IP &>/dev/null
132    drbdsetup connect OS $SELECTED_LOCAL_IP:7788 $SELECTED_REMOTE_IP:7788 --proto $PROTO --verify-alg sha1 $CONNECT_OPTIONS $AUTO_RECOVER $DISCARD  || error_shell
133
134}
135
136config_network()
137{
138    ifconfig $ETH:drbd $SELECTED_LOCAL_IP
139    ifconfig $ETH up
140}
141
142#become the primary node
143become_primary()
144{
145    drbdsetup /dev/drbd0 primary $1  &&
146    drbdsetup /dev/drbd1 primary $1  &&
147    drbdsetup /dev/drbd2 primary $1
148    return $?
149}
150
151#become secondary
152become_secondary()
153{
154    drbdsetup /dev/drbd0 secondary
155    drbdsetup /dev/drbd1 secondary
156    drbdsetup /dev/drbd2 secondary
157}
158
159##################### First time initialisation of primary
160if [ "$FORCEPRIMARY" ]; then
161    #the first time our local data is still marked as inconsistent, so indicate its ok to overwrite data:
162    become_primary --overwrite-data-of-peer
163
164    #this is only needed the first time the system uses drbd, so remove the force-primary file immediatly
165    FORCEPRIMARY=
166    mount /dev/boot /mnt
167    rm /mnt/drbd.primary 2>/dev/null
168    umount /mnt
169    sync
170
171    #become secondairy again, so the normal negotiation of the rest of this script can go on.
172    become_secondary
173
174fi
175
176
177##################### Wait until we become primary node
178
179#fix network before starting
180# drbd_network
181while true; do
182    ###################### data gathering
183    echo
184    echo -n "Monitoring other node: "
185
186    #press enter for shell
187    if read -t 1 ENTER; then
188        debug_shell "Exit the shell to continue monitoring."
189        echo
190        echo -n "Continuing monitoring: "
191    fi
192
193    #heart beat
194    echo -n "heartbeat..."
195    HEARTBEAT_ONLINE=
196    OTHER_BOOTED=
197    if HEARTBEATS="`syndog $CLUSTER_ID --cat`"; then
198        HEARTBEAT_ONLINE=1
199        #received critical network config info, keep it
200        OTHER_NODE_ID=`echo "$HEARTBEATS"|cut -f3 -d' '|head -1`
201        OTHER_BOOTED=`echo "$HEARTBEATS"|cut -f4 -d' '|head -1`
202
203        #do we need to still learn the cluster id?
204        if [ "$CLUSTER_ID" == "all" ]; then
205            RECEIVED_ID=`echo "$HEARTBEATS"|grep ^$ETH|cut -f2 -d' '|head -1`
206            if [ "$RECEIVED_ID" != "" ]; then
207                #we just learned our cluster id, keep it and start heartbeat
208                CLUSTER_ID="$RECEIVED_ID"
209                syndog "$CLUSTER_ID $NODE_ID $THIS_BOOTED" &
210            fi
211        fi
212
213        #config network and drbd rightaway, so the next steps will be ok faster
214        if ! [ "$NET_CONFIGURED" ]; then
215            NET_CONFIGURED=1
216
217            #the node_ids determine the order of ip's.
218            if [ "$NODE_ID" ">" "$OTHER_NODE_ID" ]; then
219                SELECTED_LOCAL_IP=$LOCAL_IP
220                SELECTED_REMOTE_IP=$REMOTE_IP
221            else
222                SELECTED_LOCAL_IP=$REMOTE_IP
223                SELECTED_REMOTE_IP=$LOCAL_IP
224            fi
225
226            config_network
227            drbd_network
228        fi
229    fi
230
231    #ping of other ip
232    NET_ONLINE=
233    if [ "$NET_CONFIGURED" ]; then
234        echo -n "ping..."
235        if fping -B 1 -t 250 -r 4 $SELECTED_REMOTE_IP >/dev/null 2>/dev/null; then
236            NET_ONLINE=1
237        fi
238    fi
239
240    #drbd connected
241    echo -n "drbd connection..."
242    CONNECTED=
243    if drbd_connected; then
244        CONNECTED=1
245    fi
246
247    #are we consistent?
248    if grep 'ds:Inconsistent' /proc/drbd >/dev/null; then
249        CONSISTENT=
250    else
251        CONSISTENT=1
252    fi
253
254    # Determine if other node is already primary
255    OTHER_PRIMARY=
256    if grep /Primary /proc/drbd >/dev/null; then
257        OTHER_PRIMARY=1
258    fi
259
260    #do we want to be primary?
261    WANT_PRIMARY=
262    if [ "$OTHER_BOOTED" == "2" ]; then
263        #other wants primary by choice, so we dont
264        WANT_PRIMARY=
265    elif [ "$NODE_ID" ">" "$OTHER_NODE_ID" ] || [ "$THIS_BOOTED" == "2" ]; then
266        # we are primary by choice or just because we won the "election" (higher node_id)
267        WANT_PRIMARY=1
268    fi
269
270    #determine start_time, for BOOT_DELAY
271    if ! [ "$START_TIME" ]; then
272        START_TIME=`date +%s`
273    fi
274    BOOT_DELAY_LEFT=$(( BOOT_DELAY + START_TIME - `date +%s` ))
275
276
277    ###################### status screen
278    echo -en "$CLS"
279    echo "                       [ Syn-3 cluster status ]"
280    echo
281    echo
282
283    echo -en "${BOLD}Heartbeat             :${NORMAL}"
284    if [ "$HEARTBEAT_ONLINE" ]; then
285        echo -e "${GOOD} ONLINE ${NORMAL} "
286    else
287        echo -e "${BAD} OFFLINE ${NORMAL} "
288    fi
289
290    echo     " ClusterID     : $CLUSTER_ID"
291    echo     " This node ID  : $NODE_ID "
292    echo     " Other node ID : $OTHER_NODE_ID (booted=$OTHER_BOOTED)"
293
294    # Determine and show which node has the preference to become primary
295    echo -n  " Our preference: "
296    if [ "$WANT_PRIMARY" ]; then
297        if [ "$THIS_BOOTED" == "2" ]; then
298            echo -e "Primary role (forced by this node)"
299        else
300            echo -e "Primary role"
301
302        fi
303    else
304        if [ "$OTHER_BOOTED" == "2" ]; then
305            echo -e "Secondary role (forced by other node)"
306        else
307            echo -e "Secondary role"
308        fi
309    fi
310
311    echo -n  " Heartbeats    : "
312    echo `echo "$HEARTBEATS"|cut -f1 -d' '|sort |uniq`
313
314
315    echo
316    echo -en "${BOLD}Network               :${NORMAL}"
317    if ! [ "$NET_CONFIGURED" ]; then
318        echo -e "${BAD} WAITING ${NORMAL}"
319    elif [ "$NET_ONLINE" ]; then
320        echo -e "${GOOD} ONLINE ${NORMAL}"
321    else
322        echo -e "${BAD} OFFLINE ${NORMAL}"
323    fi
324
325
326    echo
327    echo -en "${BOLD}DRBD connection status:"
328    if [ "$CONNECTED" ]; then
329        echo -e "${GOOD} ONLINE ${NORMAL}"
330    else
331        echo -e "${BAD} OFFLINE ${NORMAL}"
332    fi
333
334
335    echo
336    # Determine and show local data consitency status
337    echo -en "${BOLD}DRBD local data       :"
338    if [ "$CONSISTENT" ] ; then
339        echo -e "${GOOD} OK ${NORMAL}"
340    else
341        if grep sync /proc/drbd >/dev/null; then
[6856db8]342            echo -ne "${WARN} Synchronising ${NORMAL}"
[c5c522c]343        else
344            echo -ne "${BAD} Sync needed ${NORMAL}"
345        fi
346        if [ "$REBUILD" ];then
347            echo "(rebuild mode, discarding local changes)"
348        else
349            echo
350        fi
351    fi
352
353
354
355    #show status details
356    # drbdsetup status
357    cat /proc/drbd | egrep '(finish|sync|cs:)'
358
359
360    # Determine and show what to do with all the gathered info:
361    echo
362    echo
363    if [ "$HEARTBEAT_ONLINE" ]; then
364        if [ "$NET_CONFIGURED" ]; then
365            if [ "$NET_ONLINE" ]; then
366                if [ "$CONNECTED" ]; then
367                    if [ "$OTHER_PRIMARY" ]; then
368                        become_secondary
369                        if [ "$REBUILD" ]; then
[6856db8]370                            splashwarn "Synchronization started. Please reboot other node to finish installation. (this node will become primary and auto-reboot)"
[c5c522c]371                            # if [ "$CONSISTENT" ]; then
[6856db8]372                            #     splashwarn "Finished initial synchronization: This node needs become primary and reboot from harddisk. Please temporary shutdown other node. (this node will become primary and auto-reboot)"
[c5c522c]373                            # else
[6856db8]374                            #     splashwarn "Redundancy OK: Please wait until initial synchronization is complete..."
[c5c522c]375                            # fi
376                        else
377                            splashinfo "Redundancy OK: This node is secondary."
378                        fi
379                    #other node is not primary, yet
380                    else
381                        #do we want to be primary?
382                        if [ "$WANT_PRIMARY" ]; then
383                            if become_primary ; then
384                                splashinfo "Redundancy OK: This node is primary, booting this node"
385                                break;
386                            else
387                                #dit kan gebeuren als de andere al geboot was, dus geen error?
388                                #wel become_secondary doen, voor het geval dat de helft
389                                #van onze devices al primary is!
390                                become_secondary
391                                splasherror "Redundancy ERROR: Unknown error while becoming the primary node!"
392                            fi
393                        #we dont want to be primary
394                        else
395                            splashinfo "Redundancy OK: This node is secondary, other node should be primary."
396                        fi
397                    fi
398                else #not connected
399                    drbd_network
400                    if dmesg | grep "peer's disk size is too small!"; then
401                        echo "Please shut down the other node and reboot this node."
402                        error_shell "Redundancy ERROR: Disk of other node is too small."
403                    else
404                        splasherror "Redundancy ERROR: Other node is online, but disconnected!"
405                        echo " - Wait a minute for other node to reconfigure itself."
406                        echo " - Check the firewall settings."
407                        echo " - Check if there is a hardware failure and shut it down."
408                        if [ "$AUTO_RECOVER" == "" ]; then
409                            echo " -Check for a split-brain. (or enable autorecover on both nodes)"
410                        fi
411                    fi
412                fi
413            else #network not online
414                splasherror "Node is online, but can not ping it. (wait a minute, or check firewall/network)"
415            fi
416        else #net not configured
417            splasherror "Waiting for network configuration"
418        fi
419    else #heartbeat not online
420        if ! [ "$NET_ONLINE" ]; then
421            if [ "$CONSISTENT" ]; then
422                if [ "$BOOT_DELAY_LEFT" -le 0 ]; then
423                    if become_primary ; then
424                        splashwarn "Redundancy WARNING: Other node is offline, booting this node!"
425                        break;
426                    else
427                        splasherror "Redundancy ERROR: Unknown error while becoming the primary node!"
428                        become_secondary
429                    fi
430                else
431                    splashwarn "Redundancy WARNING: Waiting for other node, booting after $BOOT_DELAY_LEFT seconds."
432                fi
433            else
434                splasherror "Redundancy ERROR: The data on this node needs to be synced!"
435                echo "Cannot boot this node in this state."
[6856db8]436                echo "Please reboot the other node and wait until synchronization is complete."
[c5c522c]437            fi
438        else #net still online
439            splasherror "Redundancy ERROR: Network is still online, but no heartbeat."
440        fi
441    fi
442
443done
444
445# If we're here, we're primary and ready to boot!
446
447
448true
Note: See TracBrowser for help on using the repository browser.