dhodovsk / source-git / pacemaker

Forked from source-git/pacemaker 3 years ago
Clone

Blame daemons/controld/controld_join_client.c

rpm-build 3ee90c
/*
rpm-build 3ee90c
 * Copyright 2004-2019 the Pacemaker project contributors
rpm-build 3ee90c
 *
rpm-build 3ee90c
 * The version control history for this file may have further details.
rpm-build 3ee90c
 *
rpm-build 3ee90c
 * This source code is licensed under the GNU General Public License version 2
rpm-build 3ee90c
 * or later (GPLv2+) WITHOUT ANY WARRANTY.
rpm-build 3ee90c
 */
rpm-build 3ee90c
rpm-build 3ee90c
#include <crm_internal.h>
rpm-build 3ee90c
rpm-build 3ee90c
#include <crm/crm.h>
rpm-build 3ee90c
#include <crm/cib.h>
rpm-build 3ee90c
#include <crm/msg_xml.h>
rpm-build 3ee90c
#include <crm/common/xml.h>
rpm-build 3ee90c
rpm-build 3ee90c
#include <pacemaker-controld.h>
rpm-build 3ee90c
rpm-build 3ee90c
int reannounce_count = 0;
rpm-build 3ee90c
void join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
rpm-build 3ee90c
rpm-build 3ee90c
extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t * orig);
rpm-build 3ee90c
rpm-build 3ee90c
/*!
rpm-build 3ee90c
 * \internal
rpm-build 3ee90c
 * \brief Remember if DC is shutting down as we join
rpm-build 3ee90c
 *
rpm-build 3ee90c
 * If we're joining while the current DC is shutting down, update its expected
rpm-build 3ee90c
 * state, so we don't fence it if we become the new DC. (We weren't a peer
rpm-build 3ee90c
 * when it broadcast its shutdown request.)
rpm-build 3ee90c
 *
rpm-build 3ee90c
 * \param[in] msg  A join message from the DC
rpm-build 3ee90c
 */
rpm-build 3ee90c
static void
rpm-build 3ee90c
update_dc_expected(xmlNode *msg)
rpm-build 3ee90c
{
rpm-build 3ee90c
    if (fsa_our_dc && crm_is_true(crm_element_value(msg, F_CRM_DC_LEAVING))) {
rpm-build 3ee90c
        crm_node_t *dc_node = crm_get_peer(0, fsa_our_dc);
rpm-build 3ee90c
rpm-build 3ee90c
        crm_update_peer_expected(__FUNCTION__, dc_node, CRMD_JOINSTATE_DOWN);
rpm-build 3ee90c
    }
rpm-build 3ee90c
}
rpm-build 3ee90c
rpm-build 3ee90c
/*	A_CL_JOIN_QUERY		*/
rpm-build 3ee90c
/* is there a DC out there? */
rpm-build 3ee90c
void
rpm-build 3ee90c
do_cl_join_query(long long action,
rpm-build 3ee90c
                 enum crmd_fsa_cause cause,
rpm-build 3ee90c
                 enum crmd_fsa_state cur_state,
rpm-build 3ee90c
                 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
rpm-build 3ee90c
{
rpm-build 3ee90c
    xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL,
rpm-build 3ee90c
                                  CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
rpm-build 3ee90c
rpm-build 3ee90c
    sleep(1);                   // Give the cluster layer time to propagate to the DC
rpm-build 3ee90c
    update_dc(NULL);            /* Unset any existing value so that the result is not discarded */
rpm-build 3ee90c
    crm_debug("Querying for a DC");
rpm-build 3ee90c
    send_cluster_message(NULL, crm_msg_crmd, req, FALSE);
rpm-build 3ee90c
    free_xml(req);
rpm-build 3ee90c
}
rpm-build 3ee90c
rpm-build 3ee90c
/*	 A_CL_JOIN_ANNOUNCE	*/
rpm-build 3ee90c
rpm-build 3ee90c
/* this is kind of a workaround for the fact that we may not be around or
rpm-build 3ee90c
 * are otherwise unable to reply when the DC sends out A_DC_JOIN_OFFER_ALL
rpm-build 3ee90c
 */
rpm-build 3ee90c
void
rpm-build 3ee90c
do_cl_join_announce(long long action,
rpm-build 3ee90c
                    enum crmd_fsa_cause cause,
rpm-build 3ee90c
                    enum crmd_fsa_state cur_state,
rpm-build 3ee90c
                    enum crmd_fsa_input current_input, fsa_data_t * msg_data)
rpm-build 3ee90c
{
rpm-build 3ee90c
    /* don't announce if we're in one of these states */
rpm-build 3ee90c
    if (cur_state != S_PENDING) {
rpm-build 3ee90c
        crm_warn("Not announcing cluster join because in state %s",
rpm-build 3ee90c
                 fsa_state2string(cur_state));
rpm-build 3ee90c
        return;
rpm-build 3ee90c
    }
rpm-build 3ee90c
rpm-build 3ee90c
    if (AM_I_OPERATIONAL) {
rpm-build 3ee90c
        /* send as a broadcast */
rpm-build 3ee90c
        xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL,
rpm-build 3ee90c
                                      CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
rpm-build 3ee90c
rpm-build 3ee90c
        crm_debug("Announcing availability");
rpm-build 3ee90c
        update_dc(NULL);
rpm-build 3ee90c
        send_cluster_message(NULL, crm_msg_crmd, req, FALSE);
rpm-build 3ee90c
        free_xml(req);
rpm-build 3ee90c
rpm-build 3ee90c
    } else {
rpm-build 3ee90c
        /* Delay announce until we have finished local startup */
rpm-build 3ee90c
        crm_warn("Delaying announce of cluster join until local startup is complete");
rpm-build 3ee90c
        return;
rpm-build 3ee90c
    }
rpm-build 3ee90c
}
rpm-build 3ee90c
rpm-build 3ee90c
static int query_call_id = 0;
rpm-build 3ee90c
rpm-build 3ee90c
/*	 A_CL_JOIN_REQUEST	*/
rpm-build 3ee90c
/* aka. accept the welcome offer */
rpm-build 3ee90c
void
rpm-build 3ee90c
do_cl_join_offer_respond(long long action,
rpm-build 3ee90c
                         enum crmd_fsa_cause cause,
rpm-build 3ee90c
                         enum crmd_fsa_state cur_state,
rpm-build 3ee90c
                         enum crmd_fsa_input current_input, fsa_data_t * msg_data)
rpm-build 3ee90c
{
rpm-build 3ee90c
    ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
rpm-build 3ee90c
    const char *welcome_from;
rpm-build 3ee90c
    const char *join_id;
rpm-build 3ee90c
rpm-build 3ee90c
    CRM_CHECK(input != NULL, return);
rpm-build 3ee90c
rpm-build 3ee90c
#if 0
rpm-build 3ee90c
    if (we are sick) {
rpm-build 3ee90c
        log error;
rpm-build 3ee90c
rpm-build 3ee90c
        /* save the request for later? */
rpm-build 3ee90c
        return;
rpm-build 3ee90c
    }
rpm-build 3ee90c
#endif
rpm-build 3ee90c
rpm-build 3ee90c
    welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM);
rpm-build 3ee90c
    join_id = crm_element_value(input->msg, F_CRM_JOIN_ID);
rpm-build 3ee90c
    crm_trace("Accepting cluster join offer from node %s "CRM_XS" join-%s",
rpm-build 3ee90c
              welcome_from, crm_element_value(input->msg, F_CRM_JOIN_ID));
rpm-build 3ee90c
rpm-build 3ee90c
    /* we only ever want the last one */
rpm-build 3ee90c
    if (query_call_id > 0) {
rpm-build 3ee90c
        crm_trace("Cancelling previous join query: %d", query_call_id);
rpm-build 3ee90c
        remove_cib_op_callback(query_call_id, FALSE);
rpm-build 3ee90c
        query_call_id = 0;
rpm-build 3ee90c
    }
rpm-build 3ee90c
rpm-build 3ee90c
    if (update_dc(input->msg) == FALSE) {
rpm-build 3ee90c
        crm_warn("Discarding cluster join offer from node %s (expected %s)",
rpm-build 3ee90c
                 welcome_from, fsa_our_dc);
rpm-build 3ee90c
        return;
rpm-build 3ee90c
    }
rpm-build 3ee90c
rpm-build 3ee90c
    update_dc_expected(input->msg);
rpm-build 3ee90c
rpm-build 3ee90c
    query_call_id =
rpm-build 3ee90c
        fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local | cib_no_children);
rpm-build 3ee90c
    fsa_register_cib_callback(query_call_id, FALSE, strdup(join_id), join_query_callback);
rpm-build 3ee90c
    crm_trace("Registered join query callback: %d", query_call_id);
rpm-build 3ee90c
rpm-build 3ee90c
    register_fsa_action(A_DC_TIMER_STOP);
rpm-build 3ee90c
}
rpm-build 3ee90c
rpm-build 3ee90c
void
rpm-build 3ee90c
join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
rpm-build 3ee90c
{
rpm-build 3ee90c
    char *join_id = user_data;
rpm-build 3ee90c
    xmlNode *generation = create_xml_node(NULL, XML_CIB_TAG_GENERATION_TUPPLE);
rpm-build 3ee90c
rpm-build 3ee90c
    CRM_LOG_ASSERT(join_id != NULL);
rpm-build 3ee90c
rpm-build 3ee90c
    if (query_call_id != call_id) {
rpm-build 3ee90c
        crm_trace("Query %d superseded", call_id);
rpm-build 3ee90c
        goto done;
rpm-build 3ee90c
    }
rpm-build 3ee90c
rpm-build 3ee90c
    query_call_id = 0;
rpm-build 3ee90c
    if(rc != pcmk_ok || output == NULL) {
rpm-build 3ee90c
        crm_err("Could not retrieve version details for join-%s: %s (%d)",
rpm-build 3ee90c
                join_id, pcmk_strerror(rc), rc);
rpm-build 3ee90c
        register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);
rpm-build 3ee90c
rpm-build 3ee90c
    } else if (fsa_our_dc == NULL) {
rpm-build 3ee90c
        crm_debug("Membership is in flux, not continuing join-%s", join_id);
rpm-build 3ee90c
rpm-build 3ee90c
    } else {
rpm-build 3ee90c
        xmlNode *reply = NULL;
rpm-build 3ee90c
rpm-build 3ee90c
        crm_debug("Respond to join offer join-%s from %s", join_id, fsa_our_dc);
rpm-build 3ee90c
        copy_in_properties(generation, output);
rpm-build 3ee90c
rpm-build 3ee90c
        reply = create_request(CRM_OP_JOIN_REQUEST, generation, fsa_our_dc,
rpm-build 3ee90c
                               CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
rpm-build 3ee90c
rpm-build 3ee90c
        crm_xml_add(reply, F_CRM_JOIN_ID, join_id);
rpm-build 3ee90c
        crm_xml_add(reply, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
rpm-build 3ee90c
        send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, reply, TRUE);
rpm-build 3ee90c
        free_xml(reply);
rpm-build 3ee90c
    }
rpm-build 3ee90c
rpm-build 3ee90c
  done:
rpm-build 3ee90c
    free_xml(generation);
rpm-build 3ee90c
}
rpm-build 3ee90c
rpm-build 3ee90c
static void
rpm-build 3ee90c
set_join_state(const char * start_state)
rpm-build 3ee90c
{
rpm-build 3ee90c
    if (safe_str_eq(start_state, "standby")) {
rpm-build 3ee90c
        crm_notice("Forcing node %s to join in %s state per configured environment",
rpm-build 3ee90c
                   fsa_our_uname, start_state);
rpm-build 3ee90c
        update_attr_delegate(fsa_cib_conn, cib_sync_call, XML_CIB_TAG_NODES, fsa_our_uuid,
rpm-build 3ee90c
                             NULL, NULL, NULL, "standby", "on", TRUE, NULL, NULL);
rpm-build 3ee90c
rpm-build 3ee90c
    } else if (safe_str_eq(start_state, "online")) {
rpm-build 3ee90c
        crm_notice("Forcing node %s to join in %s state per configured environment",
rpm-build 3ee90c
                   fsa_our_uname, start_state);
rpm-build 3ee90c
        update_attr_delegate(fsa_cib_conn, cib_sync_call, XML_CIB_TAG_NODES, fsa_our_uuid,
rpm-build 3ee90c
                             NULL, NULL, NULL, "standby", "off", TRUE, NULL, NULL);
rpm-build 3ee90c
rpm-build 3ee90c
    } else if (safe_str_eq(start_state, "default")) {
rpm-build 3ee90c
        crm_debug("Not forcing a starting state on node %s", fsa_our_uname);
rpm-build 3ee90c
rpm-build 3ee90c
    } else {
rpm-build 3ee90c
        crm_warn("Unrecognized start state '%s', using 'default' (%s)",
rpm-build 3ee90c
                 start_state, fsa_our_uname);
rpm-build 3ee90c
    }
rpm-build 3ee90c
}
rpm-build 3ee90c
rpm-build 3ee90c
/*	A_CL_JOIN_RESULT	*/
rpm-build 3ee90c
/* aka. this is notification that we have (or have not) been accepted */
rpm-build 3ee90c
void
rpm-build 3ee90c
do_cl_join_finalize_respond(long long action,
rpm-build 3ee90c
                            enum crmd_fsa_cause cause,
rpm-build 3ee90c
                            enum crmd_fsa_state cur_state,
rpm-build 3ee90c
                            enum crmd_fsa_input current_input, fsa_data_t * msg_data)
rpm-build 3ee90c
{
rpm-build 3ee90c
    xmlNode *tmp1 = NULL;
rpm-build 3ee90c
    gboolean was_nack = TRUE;
rpm-build 3ee90c
    static gboolean first_join = TRUE;
rpm-build 3ee90c
    ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
rpm-build 3ee90c
    const char *start_state = daemon_option("node_start_state");
rpm-build 3ee90c
rpm-build 3ee90c
    int join_id = -1;
rpm-build 3ee90c
    const char *op = crm_element_value(input->msg, F_CRM_TASK);
rpm-build 3ee90c
    const char *ack_nack = crm_element_value(input->msg, CRM_OP_JOIN_ACKNAK);
rpm-build 3ee90c
    const char *welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM);
rpm-build 3ee90c
rpm-build 3ee90c
    if (safe_str_neq(op, CRM_OP_JOIN_ACKNAK)) {
rpm-build 3ee90c
        crm_trace("Ignoring op=%s message", op);
rpm-build 3ee90c
        return;
rpm-build 3ee90c
    }
rpm-build 3ee90c
rpm-build 3ee90c
    /* calculate if it was an ack or a nack */
rpm-build 3ee90c
    if (crm_is_true(ack_nack)) {
rpm-build 3ee90c
        was_nack = FALSE;
rpm-build 3ee90c
    }
rpm-build 3ee90c
rpm-build 3ee90c
    crm_element_value_int(input->msg, F_CRM_JOIN_ID, &join_id);
rpm-build 3ee90c
rpm-build 3ee90c
    if (was_nack) {
rpm-build 3ee90c
        crm_err("Shutting down because cluster join with leader %s failed "
rpm-build 3ee90c
                CRM_XS" join-%d NACK'd", welcome_from, join_id);
rpm-build 3ee90c
        register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
rpm-build 3ee90c
        return;
rpm-build 3ee90c
    }
rpm-build 3ee90c
rpm-build 3ee90c
    if (AM_I_DC == FALSE && safe_str_eq(welcome_from, fsa_our_uname)) {
rpm-build 3ee90c
        crm_warn("Discarding our own welcome - we're no longer the DC");
rpm-build 3ee90c
        return;
rpm-build 3ee90c
    }
rpm-build 3ee90c
rpm-build 3ee90c
    if (update_dc(input->msg) == FALSE) {
rpm-build 3ee90c
        crm_warn("Discarding %s from node %s (expected from %s)",
rpm-build 3ee90c
                 op, welcome_from, fsa_our_dc);
rpm-build 3ee90c
        return;
rpm-build 3ee90c
    }
rpm-build 3ee90c
rpm-build 3ee90c
    update_dc_expected(input->msg);
rpm-build 3ee90c
rpm-build 3ee90c
    /* send our status section to the DC */
rpm-build 3ee90c
    tmp1 = do_lrm_query(TRUE, fsa_our_uname);
rpm-build 3ee90c
    if (tmp1 != NULL) {
rpm-build 3ee90c
        xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1, fsa_our_dc,
rpm-build 3ee90c
                                        CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
rpm-build 3ee90c
rpm-build 3ee90c
        crm_xml_add_int(reply, F_CRM_JOIN_ID, join_id);
rpm-build 3ee90c
rpm-build 3ee90c
        crm_debug("Confirming join-%d: sending local operation history to %s",
rpm-build 3ee90c
                  join_id, fsa_our_dc);
rpm-build 3ee90c
rpm-build 3ee90c
        /*
rpm-build 3ee90c
         * If this is the node's first join since the controller started on it,
rpm-build 3ee90c
         * set its initial state (standby or member) according to the user's
rpm-build 3ee90c
         * preference.
rpm-build 3ee90c
         *
rpm-build 3ee90c
         * We do not clear the LRM history here. Even if the DC failed to do it
rpm-build 3ee90c
         * when we last left, removing them here creates a race condition if the
rpm-build 3ee90c
         * controller is being recovered. Instead of a list of active resources
rpm-build 3ee90c
         * from the executor, we may end up with a blank status section. If we
rpm-build 3ee90c
         * are _NOT_ lucky, we will probe for the "wrong" instance of anonymous
rpm-build 3ee90c
         * clones and end up with multiple active instances on the machine.
rpm-build 3ee90c
         */
rpm-build 3ee90c
        if (first_join && is_not_set(fsa_input_register, R_SHUTDOWN)) {
rpm-build 3ee90c
            first_join = FALSE;
rpm-build 3ee90c
            if (start_state) {
rpm-build 3ee90c
                set_join_state(start_state);
rpm-build 3ee90c
            }
rpm-build 3ee90c
        }
rpm-build 3ee90c
rpm-build 3ee90c
        send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, reply, TRUE);
rpm-build 3ee90c
        free_xml(reply);
rpm-build 3ee90c
rpm-build 3ee90c
        if (AM_I_DC == FALSE) {
rpm-build 3ee90c
            register_fsa_input_adv(cause, I_NOT_DC, NULL, A_NOTHING, TRUE, __FUNCTION__);
rpm-build 3ee90c
        }
rpm-build 3ee90c
rpm-build 3ee90c
        free_xml(tmp1);
rpm-build 3ee90c
rpm-build 3ee90c
    } else {
rpm-build 3ee90c
        crm_err("Could not confirm join-%d with %s: Local operation history failed",
rpm-build 3ee90c
                join_id, fsa_our_dc);
rpm-build 3ee90c
        register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
rpm-build 3ee90c
    }
rpm-build 3ee90c
}