#!/bin/bash
#(C)2006 DatuX
#This script is sourced from /linuxrc

#Global variables that control this script:
# ETH			Ethernet device to use
# LOCAL_IP		First IP to use
# REMOTE_IP		Second IP to use.
#			    NOTE: local and remote will autonegotiated
#
# RATE			Rebuild speed in kb/s (default 100000)
# PROTO			Protocol to use for drbd (default C)
# REBUILD		First-time initialisation of a secondary node:
#				Wait for unlimited time and rebuild all data.
# FORCEPRIMARY	First-time initialisation of a primary node:
#				Force this node to be the primary node in case the data is inconsistent.
# CLUSTER_ID    Used by syndog to send heartbeat and determine ip adress and primary role.
# BOOT_DELAY    Wait this many seconds for other node after inital boot.
#
#
# not implemented yet, dont forget to implement in syn3-heartbeatcheck as well:
# PRIMARY_ID  	Prefer node with this NODE_ID to be primary node

splashstep "Activating server redundancy"

#Prepare basic drbd subsystem
modprobe drbd minor_count=3 || error_shell
echo "" > /sys/module/drbd/parameters/usermode_helper

#for drbd verify (prevents ugly error message)
modprobe sha1

#wait for device nodes to appear
udevadm settle

# All interfaces for heartbeat
for NET in /sys/class/net/eth*/address; do
    INTERFACE="`echo $NET|cut -f5 -d/`";
    ifconfig $INTERFACE up &>/dev/null
done


############# config and startup

# Start syndog immediatly in background, broadcasting a, clusterid and uniq id for this node (mac in this case) and status 0=standby node
NODE_ID=`cat /sys/class/net/$ETH/address`
#only start dog when we already have an id, otherwise start it later when we get one
if [ "$CLUSTER_ID" != "all" ]; then
    THIS_BOOTED="0"
    syndog "$CLUSTER_ID $NODE_ID $THIS_BOOTED" &
else
    THIS_BOOTED="2" #indicate we want to become primary
fi


#first time initialisation of meta data.
if [ "$FORCEPRIMARY" ] || [ "$REBUILD" ]; then
    #before we start, make sure we dont have any old meta-data garbage left:
    echo "Preparing meta data areas..."
    dd if=/dev/zero of=/dev/syn3/boot.meta count=262144
    dd if=/dev/zero of=/dev/syn3/home.meta count=262144
    dd if=/dev/zero of=/dev/syn3/root.meta count=262144
    #now initialize the metadata
    drbdmeta --force /dev/drbd0 v08 /dev/syn3/boot.meta 0 create-md || error_shell
    drbdmeta --force /dev/drbd1 v08 /dev/syn3/home.meta 0 create-md || error_shell
    drbdmeta --force /dev/drbd2 v08 /dev/syn3/root.meta 0 create-md || error_shell
    echo "Metadata creation complete.";
fi

#default settings
[ "$RATE" ]       || RATE="100000"
[ "$PROTO" ]      || PROTO="C"
[ "$BOOT_DELAY" ] || BOOT_DELAY=30

#create new resource
drbdsetup new-resource OS || error_shell
drbdsetup new-minor OS /dev/drbd0 0 || error_shell
drbdsetup new-minor OS /dev/drbd2 2 || error_shell
drbdsetup new-minor OS /dev/drbd1 1 || error_shell

#apply activity log
drbdmeta /dev/drbd0 v08 /dev/syn3/boot.meta 0 apply-al || error_shell
drbdmeta /dev/drbd2 v08 /dev/syn3/root.meta 0 apply-al || error_shell
drbdmeta /dev/drbd1 v08 /dev/syn3/home.meta 0 apply-al || error_shell

#attach /boot /home and /
if ! drbdsetup attach /dev/drbd0    /dev/md0        /dev/syn3/boot.meta 0 --resync-rate $RATE ||
   ! drbdsetup attach /dev/drbd2    /dev/syn3/root  /dev/syn3/root.meta 0 --resync-after 0 --resync-rate $RATE ||
   ! drbdsetup attach /dev/drbd1    /dev/syn3/home  /dev/syn3/home.meta 0 --resync-after 2 --resync-rate $RATE ; then
    #disk-attaching failed!
    splasherror "Redundancy ERROR: Problem attaching disks."
    echo "-Are you updating to a newer kernel? "
    echo " In this case you need to disable redundancy before upgrading. "
    echo " This is the safest way to upgrade.";
    echo "-If you're sure this should be the primairy node, use 'forceprimary'."
    echo " ONLY USE THIS IF YOU'RE SURE THE DATA ON THE PRIMARY NODE IS CONSISTENT!";
    echo "-If you're sure this should be the secondairy node, use 'rebuild'."
    error_shell
fi

splashmode verbose 2>/dev/null


################################ Some common functions, used in the mainloop

#check if all the drbd partitions are connected
drbd_connected()
{
    if drbdsetup /dev/drbd0 wait-connect --wfc-timeout 1 --degr-wfc-timeout 1 --outdated-wfc-timeout 1 &&
       drbdsetup /dev/drbd1 wait-connect --wfc-timeout 1 --degr-wfc-timeout 1 --outdated-wfc-timeout 1 &&
       drbdsetup /dev/drbd2 wait-connect --wfc-timeout 1 --degr-wfc-timeout 1 --outdated-wfc-timeout 1 &&
       ! grep StandAlone /proc/drbd &>/dev/null; then
        return 0
    else
        return 1
    fi
}

#(re)set drbd network settings
drbd_network()
{
    if [ "$REBUILD" ]; then
        #this is used for first time initialisation of a secondary node, AND for recovery of splitbrain.
        DISCARD="--discard-my-data"
    else
        DISCARD=""
    fi

    #just diconnect both posibilities:
    drbdsetup disconnect $SELECTED_LOCAL_IP $SELECTED_REMOTE_IP &>/dev/null
    drbdsetup disconnect $SELECTED_REMOTE_IP $SELECTED_LOCAL_IP &>/dev/null
    drbdsetup connect OS $SELECTED_LOCAL_IP:7788 $SELECTED_REMOTE_IP:7788 --proto $PROTO --verify-alg sha1 $CONNECT_OPTIONS $AUTO_RECOVER $DISCARD  || error_shell

}

config_network()
{
    ifconfig $ETH:drbd $SELECTED_LOCAL_IP
    ifconfig $ETH up
}

#become the primary node
become_primary()
{
    drbdsetup /dev/drbd0 primary $1  &&
    drbdsetup /dev/drbd1 primary $1  &&
    drbdsetup /dev/drbd2 primary $1
    return $?
}

#become secondary
become_secondary()
{
    drbdsetup /dev/drbd0 secondary
    drbdsetup /dev/drbd1 secondary
    drbdsetup /dev/drbd2 secondary
}

##################### First time initialisation of primary
if [ "$FORCEPRIMARY" ]; then
    #the first time our local data is still marked as inconsistent, so indicate its ok to overwrite data:
    become_primary --overwrite-data-of-peer

    #this is only needed the first time the system uses drbd, so remove the force-primary file immediatly
    FORCEPRIMARY=
    mount /dev/boot /mnt
    rm /mnt/drbd.primary 2>/dev/null
    umount /mnt
    sync

    #become secondairy again, so the normal negotiation of the rest of this script can go on.
    become_secondary

fi


##################### Wait until we become primary node

#fix network before starting
# drbd_network
while true; do
    ###################### data gathering
    echo
    echo -n "Monitoring other node: "

    #press enter for shell
    if read -t 1 ENTER; then
        debug_shell "Exit the shell to continue monitoring."
        echo
        echo -n "Continuing monitoring: "
    fi

    #heart beat
    echo -n "heartbeat..."
    HEARTBEAT_ONLINE=
    OTHER_BOOTED=
    if HEARTBEATS="`syndog $CLUSTER_ID --cat`"; then
        HEARTBEAT_ONLINE=1
        #received critical network config info, keep it
        OTHER_NODE_ID=`echo "$HEARTBEATS"|cut -f3 -d' '|head -1`
        OTHER_BOOTED=`echo "$HEARTBEATS"|cut -f4 -d' '|head -1`

        #do we need to still learn the cluster id?
        if [ "$CLUSTER_ID" == "all" ]; then
            RECEIVED_ID=`echo "$HEARTBEATS"|grep ^$ETH|cut -f2 -d' '|head -1`
            if [ "$RECEIVED_ID" != "" ]; then
                #we just learned our cluster id, keep it and start heartbeat
                CLUSTER_ID="$RECEIVED_ID"
                syndog "$CLUSTER_ID $NODE_ID $THIS_BOOTED" &
            fi
        fi

        #config network and drbd rightaway, so the next steps will be ok faster
        if ! [ "$NET_CONFIGURED" ]; then
            NET_CONFIGURED=1

            #the node_ids determine the order of ip's.
            if [ "$NODE_ID" ">" "$OTHER_NODE_ID" ]; then
                SELECTED_LOCAL_IP=$LOCAL_IP
                SELECTED_REMOTE_IP=$REMOTE_IP
            else
                SELECTED_LOCAL_IP=$REMOTE_IP
                SELECTED_REMOTE_IP=$LOCAL_IP
            fi

            config_network
            drbd_network
        fi
    fi

    #ping of other ip
    NET_ONLINE=
    if [ "$NET_CONFIGURED" ]; then
        echo -n "ping..."
        if fping -B 1 -t 250 -r 4 $SELECTED_REMOTE_IP >/dev/null 2>/dev/null; then
            NET_ONLINE=1
        fi
    fi

    #drbd connected
    echo -n "drbd connection..."
    CONNECTED=
    if drbd_connected; then
        CONNECTED=1
    fi

    #are we consistent?
    if grep 'ds:Inconsistent' /proc/drbd >/dev/null; then
        CONSISTENT=
    else
        CONSISTENT=1
    fi

    # Determine if other node is already primary
    OTHER_PRIMARY=
    if grep /Primary /proc/drbd >/dev/null; then
        OTHER_PRIMARY=1
    fi

    #do we want to be primary?
    WANT_PRIMARY=
    if [ "$OTHER_BOOTED" == "2" ]; then
        #other wants primary by choice, so we dont
        WANT_PRIMARY=
    elif [ "$NODE_ID" ">" "$OTHER_NODE_ID" ] || [ "$THIS_BOOTED" == "2" ]; then
        # we are primary by choice or just because we won the "election" (higher node_id)
        WANT_PRIMARY=1
    fi

    #determine start_time, for BOOT_DELAY
    if ! [ "$START_TIME" ]; then
        START_TIME=`date +%s`
    fi
    BOOT_DELAY_LEFT=$(( BOOT_DELAY + START_TIME - `date +%s` ))


    ###################### status screen
    echo -en "$CLS"
    echo "                       [ Syn-3 cluster status ]"
    echo
    echo

    echo -en "${BOLD}Heartbeat             :${NORMAL}"
    if [ "$HEARTBEAT_ONLINE" ]; then
        echo -e "${GOOD} ONLINE ${NORMAL} "
    else
        echo -e "${BAD} OFFLINE ${NORMAL} "
    fi

    echo     " ClusterID     : $CLUSTER_ID"
    echo     " This node ID  : $NODE_ID "
    echo     " Other node ID : $OTHER_NODE_ID (booted=$OTHER_BOOTED)"

    # Determine and show which node has the preference to become primary
    echo -n  " Our preference: "
    if [ "$WANT_PRIMARY" ]; then
        if [ "$THIS_BOOTED" == "2" ]; then
            echo -e "Primary role (forced by this node)"
        else
            echo -e "Primary role"

        fi
    else
        if [ "$OTHER_BOOTED" == "2" ]; then
            echo -e "Secondary role (forced by other node)"
        else
            echo -e "Secondary role"
        fi
    fi

    echo -n  " Heartbeats    : "
    echo `echo "$HEARTBEATS"|cut -f1 -d' '|sort |uniq`


    echo
    echo -en "${BOLD}Network               :${NORMAL}"
    if ! [ "$NET_CONFIGURED" ]; then
        echo -e "${BAD} WAITING ${NORMAL}"
    elif [ "$NET_ONLINE" ]; then
        echo -e "${GOOD} ONLINE ${NORMAL}"
    else
        echo -e "${BAD} OFFLINE ${NORMAL}"
    fi


    echo
    echo -en "${BOLD}DRBD connection status:"
    if [ "$CONNECTED" ]; then
        echo -e "${GOOD} ONLINE ${NORMAL}"
    else
        echo -e "${BAD} OFFLINE ${NORMAL}"
    fi


    echo
    # Determine and show local data consitency status
    echo -en "${BOLD}DRBD local data       :"
    if [ "$CONSISTENT" ] ; then
        echo -e "${GOOD} OK ${NORMAL}"
    else
        if grep sync /proc/drbd >/dev/null; then
            echo -ne "${WARN} Syncronising ${NORMAL}"
        else
            echo -ne "${BAD} Sync needed ${NORMAL}"
        fi
        if [ "$REBUILD" ];then
            echo "(rebuild mode, discarding local changes)"
        else
            echo
        fi
    fi


    #show status details
    # drbdsetup status
    cat /proc/drbd | egrep '(finish|sync|cs:)'


    # Determine and show what to do with all the gathered info:
    echo
    echo
    if [ "$HEARTBEAT_ONLINE" ]; then
        if [ "$NET_CONFIGURED" ]; then
            if [ "$NET_ONLINE" ]; then
                if [ "$CONNECTED" ]; then
                    if [ "$OTHER_PRIMARY" ]; then
                        become_secondary
                        if [ "$REBUILD" ]; then
                            splashwarn "Synchronisation started. Please reboot other node to finish installation. (this node will become primary and auto-reboot)"
                            # if [ "$CONSISTENT" ]; then
                            #     splashwarn "Finished initial synchronisation: This node needs become primary and reboot from harddisk. Please temporary shutdown other node. (this node will become primary and auto-reboot)"
                            # else
                            #     splashwarn "Redundancy OK: Please wait until initial syncronisation is complete..."
                            # fi
                        else
                            splashinfo "Redundancy OK: This node is secondary."
                        fi
                    #other node is not primary, yet
                    else
                        #do we want to be primary?
                        if [ "$WANT_PRIMARY" ]; then
                            if become_primary ; then
                                splashinfo "Redundancy OK: This node is primary, booting this node"
                                break;
                            else
                                #dit kan gebeuren als de andere al geboot was, dus geen error?
                                #wel become_secondary doen, voor het geval dat de helft
                                #van onze devices al primary is!
                                become_secondary
                                splasherror "Redundancy ERROR: Unknown error while becoming the primary node!"
                            fi
                        #we dont want to be primary
                        else
                            splashinfo "Redundancy OK: This node is secondary, other node should be primary."
                        fi
                    fi
                else #not connected
                    drbd_network
                    if dmesg | grep "peer's disk size is too small!"; then
                        echo "Please shut down the other node and reboot this node."
                        error_shell "Redundancy ERROR: Disk of other node is too small."
                    else
                        splasherror "Redundancy ERROR: Other node is online, but disconnected!"
                        echo " - Wait a minute for other node to reconfigure itself."
                        echo " - Check the firewall settings."
                        echo " - Check if there is a hardware failure and shut it down."
                        if [ "$AUTO_RECOVER" == "" ]; then
                            echo " -Check for a split-brain. (or enable autorecover on both nodes)"
                        fi
                    fi
                fi
            else #network not online
                splasherror "Node is online, but can not ping it. (wait a minute, or check firewall/network)"
            fi
        else #net not configured
            splasherror "Waiting for network configuration"
        fi
    else #heartbeat not online
        if ! [ "$NET_ONLINE" ]; then
            if [ "$CONSISTENT" ]; then
                if [ "$BOOT_DELAY_LEFT" -le 0 ]; then
                    if become_primary ; then
                        splashwarn "Redundancy WARNING: Other node is offline, booting this node!"
                        break;
                    else
                        splasherror "Redundancy ERROR: Unknown error while becoming the primary node!"
                        become_secondary
                    fi
                else
                    splashwarn "Redundancy WARNING: Waiting for other node, booting after $BOOT_DELAY_LEFT seconds."
                fi
            else
                splasherror "Redundancy ERROR: The data on this node needs to be synced!"
                echo "Cannot boot this node in this state."
                echo "Please reboot the other node and wait until syncronisation is complete."
            fi
        else #net still online
            splasherror "Redundancy ERROR: Network is still online, but no heartbeat."
        fi
    fi

done

# If we're here, we're primary and ready to boot!


true