rebar3_riak_core_admin_runner

#!/bin/sh

# /bin/sh on Solaris is not a POSIX compatible shell, but /usr/bin/ksh is.
if [ `uname -s` = 'SunOS' -a "${POSIX_SHELL}" != "true" ]; then
    POSIX_SHELL="true"
    export POSIX_SHELL
    # To support 'whoami' add /usr/ucb to path
    PATH=/usr/ucb:$PATH
    export PATH
    exec /usr/bin/ksh $0 "$@"
fi
unset POSIX_SHELL # clear it so if we invoke other scripts, they run as ksh as well


SCRIPT=$(readlink $0 || true)
if [ -z $SCRIPT ]; then
    SCRIPT=$0
fi;


SCRIPT_DIR="$(cd `dirname "$SCRIPT"` && pwd -P)"
RELEASE_ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd -P)"
REL_NAME="{{ release_name }}"
CUTTLEFISH_CONF="{{ release_name }}.conf"
REL_VSN="{{ rel_vsn }}"
ERTS_VSN="{{ erts_vsn }}"
CODE_LOADING_MODE="${CODE_LOADING_MODE:-embedded}"
REL_DIR="$RELEASE_ROOT_DIR/releases/$REL_VSN"
ERL_OPTS="{{ erl_opts }}"
RUNNER_LOG_DIR="${RUNNER_LOG_DIR:-$RELEASE_ROOT_DIR/log}"
RUNNER_BASE_DIR=$RELEASE_ROOT_DIR
RUNNER_ETC_DIR="${RUNNER_ETC_DIR:-$RELEASE_ROOT_DIR/etc}"

find_erts_dir() {
    __erts_dir="$RELEASE_ROOT_DIR/erts-$ERTS_VSN"
    if [ -d "$__erts_dir" ]; then
        ERTS_DIR="$__erts_dir";
        ROOTDIR="$RELEASE_ROOT_DIR"
    else
        __erl="$(which erl)"
        code="io:format(\"~s\", [code:root_dir()]), halt()."
        __erl_root="$("$__erl" -noshell -eval "$code")"
        ERTS_DIR="$__erl_root/erts-$ERTS_VSN"
        ROOTDIR="$__erl_root"
    fi
}

# Get node pid
relx_get_pid() {
    if output="$(relx_nodetool rpcterms os getpid)"
    then
        echo "$output" | sed -e 's/"//g'
        return 0
    else
        echo "$output"
        return 1
    fi
}

relx_get_longname() {
    id="longname$(relx_gen_id)-${NAME}"
    "$BINDIR/erl" -boot start_clean -eval 'io:format("~s~n", [node()]), halt()' -noshell -name $id | sed -e 's/.*@//g'
}

# Connect to a remote node
relx_rem_sh() {
    # Generate a unique id used to allow multiple remsh to the same node
    # transparently
    id="remsh$(relx_gen_id)-${NAME}"

    # Get the node's ticktime so that we use the same thing.
    TICKTIME="$(relx_nodetool rpcterms net_kernel get_net_ticktime)"

    # Setup remote shell command to control node
    exec "$BINDIR/erl" "$NAME_TYPE" "$id" -remsh "$NAME" -boot start_clean \
         -boot_var ERTS_LIB_DIR "$ERTS_LIB_DIR" \
         -setcookie "$COOKIE" -hidden -kernel net_ticktime $TICKTIME
}

# Generate a random id
relx_gen_id() {
    od -X -N 4 /dev/urandom | head -n1 | awk '{print $2}'
}

# Control a node
relx_nodetool() {
    command="$1"; shift

    "$ERTS_DIR/bin/escript" "$ROOTDIR/bin/nodetool" "$NAME_TYPE" "$NAME" \
                                -setcookie "$COOKIE" "$command" $@
}

# Run an escript in the node's environment
relx_escript() {
    shift; scriptpath="$1"; shift
    export RELEASE_ROOT_DIR

    "$ERTS_DIR/bin/escript" "$ROOTDIR/$scriptpath" $@
}

# Output a start command for the last argument of run_erl
relx_start_command() {
    printf "exec \"%s\" \"%s\"" "$RELEASE_ROOT_DIR/bin/$REL_NAME" \
           "$START_OPTION"
}

# Make sure log directory exists
mkdir -p "$RUNNER_LOG_DIR"

# Use $CWD/sys.config if exists, otherwise releases/VSN/sys.config
if [ -z "$NAME_ARG" ]; then
    NODENAME=`egrep '^[ \t]*nodename[ \t]*=[ \t]*' $RUNNER_ETC_DIR/$CUTTLEFISH_CONF 2> /dev/null | tail -n 1 | cut -d = -f 2`
    if [ -z "$NODENAME" ]; then
        echo "vm.args needs to have a -name parameter."
        echo "  -sname is not supported."
        exit 1
    else
        NAME_TYPE="-name"
        NAME="${NODENAME# *}"
    fi
fi

PIPE_DIR="${PIPE_DIR:-/tmp/erl_pipes/$NAME/}"

# Extract the target cookie
#COOKIE_ARG=`grep -e '-setcookie' $RUNNER_ETC_DIR/vm.args`
if [ -z "$COOKIE_ARG" ]; then
    COOKIE=`egrep '^[ \t]*distributed_cookie[ \t]*=[ \t]*' $RUNNER_ETC_DIR/$CUTTLEFISH_CONF 2> /dev/null | cut -d = -f 2 | tr -d ' '`
    if [ -z "$COOKIE" ]; then
        echo "vm.args needs to have a -setcookie parameter."
        exit 1
    else
        COOKIE_ARG="-setcookie $COOKIE"
    fi
fi

find_erts_dir
export ROOTDIR="$RELEASE_ROOT_DIR"
export BINDIR="$ERTS_DIR/bin"
export EMU="beam"
export PROGNAME="erl"
export LD_LIBRARY_PATH="$ERTS_DIR/lib:$LD_LIBRARY_PATH"
ERTS_LIB_DIR="$ERTS_DIR/../lib"
CUTTLEFISHCMD="$ERTS_DIR/bin/escript $RUNNER_BASE_DIR/bin/cuttlefish"

cd "$ROOTDIR"

if CUTTLEFISH_CONFIG=$($CUTTLEFISHCMD -e $RUNNER_ETC_DIR -d $RUNNER_BASE_DIR/generated.conf -s $RUNNER_BASE_DIR/share/schema/ -c $RUNNER_ETC_DIR/$CUTTLEFISH_CONF)
then
    CONFIG_FILES="$CUTTLEFISH_CONFIG"
else
    echo "Cuttlefish failed! Oh no!"
    exit 1
fi


# Parse out release and erts info
START_ERL=`cat $RUNNER_BASE_DIR/releases/start_erl.data`
ERTS_VSN=${START_ERL% *}
APP_VSN=${START_ERL#* }

# TODO: look in the release otherwise use which
ESCRIPT=escript
NODETOOL_PATH=$RUNNER_BASE_DIR/bin
NODETOOL=$NODETOOL_PATH/nodetool
# Setup command to control the node
NODETOOL="$ESCRIPT $NODETOOL $NAME_ARG $COOKIE_ARG"

ensure_node_running()
{
    # Make sure the local node IS running
    if ! relx_nodetool "ping"; then
        echo "Node is not running!"
        exit 1
    fi
}

cluster_admin()
{
    case "$1" in
        join)
            if [ $# -ne 2 ]; then
                echo "Usage: $SCRIPT cluster join <node>"
                exit 1
            fi
            ensure_node_running
            relx_nodetool rpc {{ release_name }}_console staged_join "$2"
            ;;
        leave)
            if [ $# -eq 1 ]; then
                ensure_node_running
                relx_nodetool rpc riak_core_console stage_leave
            elif [ $# -eq 2 ]; then
                ensure_node_running
                relx_nodetool rpc riak_core_console stage_leave "$2"
            else
                echo "Usage: $SCRIPT cluster leave [<node>]"
                exit 1
            fi
            ;;
        force-remove)
            if [ $# -ne 2 ]; then
                echo "Usage: $SCRIPT cluster force-remove <node>"
                exit 1
            fi
            ensure_node_running
            relx_nodetool rpc riak_core_console stage_remove "$2"
            ;;
        replace)
            if [ $# -ne 3 ]; then
                echo "Usage: $SCRIPT cluster replace <node1> <node2>"
                exit 1
            fi
            ensure_node_running
            relx_nodetool rpc riak_core_console stage_replace "$2" "$3"
            ;;
        force-replace)
            if [ $# -ne 3 ]; then
                echo "Usage: $SCRIPT cluster force-replace <node1> <node2>"
                exit 1
            fi
            ensure_node_running
            relx_nodetool rpc riak_core_console stage_force_replace "$2" "$3"
            ;;
        plan)
            ensure_node_running
            relx_nodetool rpc riak_core_console print_staged
            ;;
        commit)
            ensure_node_running
            relx_nodetool rpc riak_core_console commit_staged
            ;;
        clear)
            ensure_node_running
            relx_nodetool rpc riak_core_console clear_staged
            ;;
        *)
            echo "\
Usage: $SCRIPT cluster <command>

The following commands stage changes to cluster membership. These commands
do not take effect immediately. After staging a set of changes, the staged
plan must be committed to take effect:

   join <node>                    Join node to the cluster containing <node>
   leave                          Have this node leave the cluster and shutdown
   leave <node>                   Have <node> leave the cluster and shutdown

   force-remove <node>            Remove <node> from the cluster without
                                  first handing off data. Designed for
                                  crashed, unrecoverable nodes

   replace <node1> <node2>        Have <node1> transfer all data to <node2>,
                                  and then leave the cluster and shutdown

   force-replace <node1> <node2>  Reassign all partitions owned by <node1> to
                                  <node2> without first handing off data, and
                                  remove <node1> from the cluster.

Staging commands:
   plan                           Display the staged changes to the cluster
   commit                         Commit the staged changes
   clear                          Clear the staged changes
"
    esac
}

# Check the first argument for instructions
case "$1" in
     down)
        if [ $# -ne 2 ]; then
            echo "Usage: $SCRIPT down <node>"
            exit 1
        fi

        ensure_node_running
        relx_nodetool rpc {{ release_name }}_console down $@
        ;;

     ringready)
        if [ $# -ne 1 ]; then
            echo "Usage: $SCRIPT ringready"
            exit 1
        fi

        ensure_node_running
        relx_nodetool rpc {{ release_name }}_console ringready ''
        ;;

    member[_-]status)
        if [ $# -ne 1 ]; then
            echo "Usage: $SCRIPT $1"
            exit 1
        fi

        ensure_node_running
        relx_nodetool rpc riak_core_console member_status ''
        ;;

    ring[_-]status)
        if [ $# -ne 1 ]; then
            echo "Usage: $SCRIPT $1"
            exit 1
        fi

        ensure_node_running
        relx_nodetool rpc riak_core_console ring_status ''
        ;;

    services)
        relx_nodetool rpcterms riak_core_node_watcher services ''
        ;;

    wait[_-]for[_-]service)
        SVC=$2
        TARGETNODE=$3
        if [ $# -lt 3 ]; then
            echo "Usage: $SCRIPT $1 <service_name> <target_node>"
            exit 1
        fi

        while (true); do
            # Make sure riak_core_node_watcher is up and running locally before trying to query it
            # to avoid ugly (but harmless) error messages
            NODEWATCHER=`$NODETOOL rpcterms erlang whereis "'riak_core_node_watcher'."`
            if [ "$NODEWATCHER" = "undefined" ]; then
                echo "$SVC is not up: node watcher is not running"
                continue
            fi

            # Get the list of services that are available on the requested node
            SERVICES=`$NODETOOL rpcterms riak_core_node_watcher services "'${TARGETNODE}'."`
            echo "$SERVICES" | grep "[[,]$SVC[],]" > /dev/null 2>&1
            if [ "X$?" = "X0" ]; then
                echo "$SVC is up"
                exit 0
            else
                echo "$SVC is not up: $SERVICES"
            fi
            sleep 3
        done
        ;;
    cluster)
        shift
        cluster_admin "$@"
        ;;
    *)
        echo "Usage: $SCRIPT { cluster | down | ringready | member-status | "
        echo "                    ring-status | services | wait-for-service "
        exit 1
        ;;
esac