dhodovsk / source-git / pacemaker

Forked from source-git/pacemaker 3 years ago
Clone
Blob Blame History Raw
/*
 * Copyright 2004-2019 the Pacemaker project contributors
 *
 * The version control history for this file may have further details.
 *
 * This source code is licensed under the GNU General Public License version 2
 * or later (GPLv2+) WITHOUT ANY WARRANTY.
 */

#include <crm_internal.h>

#include <sys/stat.h>

#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/msg_xml.h>
#include <crm/cluster.h>        /* For ONLINESTATUS etc */

#include <pacemaker-controld.h>

void te_update_confirm(const char *event, xmlNode * msg);

extern char *te_uuid;
gboolean shuttingdown = FALSE;
crm_graph_t *transition_graph;
crm_trigger_t *transition_trigger = NULL;

/* #define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_CIB_TAG_STATE"[@uname='%s']"//"XML_LRM_TAG_RSC_OP"[@id='%s]" */
#define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_LRM_TAG_RSC_OP"[@id='%s']"

static void
te_update_diff_v1(const char *event, xmlNode *diff)
{
    int lpc, max;
    xmlXPathObject *xpathObj = NULL;

    CRM_CHECK(diff != NULL, return);

    xml_log_patchset(LOG_TRACE, __FUNCTION__, diff);
    if (cib_config_changed(NULL, NULL, &diff)) {
        abort_transition(INFINITY, tg_restart, "Non-status change", diff);
        goto bail;              /* configuration changed */
    }

    /* Tickets Attributes - Added/Updated */
    xpathObj =
        xpath_search(diff,
                     "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_TICKETS);
    if (numXpathResults(xpathObj) > 0) {
        xmlNode *aborted = getXpathResult(xpathObj, 0);

        abort_transition(INFINITY, tg_restart, "Ticket attribute: update", aborted);
        goto bail;

    }
    freeXpathObject(xpathObj);

    /* Tickets Attributes - Removed */
    xpathObj =
        xpath_search(diff,
                     "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_TICKETS);
    if (numXpathResults(xpathObj) > 0) {
        xmlNode *aborted = getXpathResult(xpathObj, 0);

        abort_transition(INFINITY, tg_restart, "Ticket attribute: removal", aborted);
        goto bail;
    }
    freeXpathObject(xpathObj);

    /* Transient Attributes - Added/Updated */
    xpathObj =
        xpath_search(diff,
                     "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//"
                     XML_TAG_TRANSIENT_NODEATTRS "//" XML_CIB_TAG_NVPAIR);
    max = numXpathResults(xpathObj);

    for (lpc = 0; lpc < max; lpc++) {
        xmlNode *attr = getXpathResult(xpathObj, lpc);
        const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
        const char *value = NULL;

        if (safe_str_eq(CRM_OP_PROBED, name)) {
            value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
        }

        if (crm_is_true(value) == FALSE) {
            abort_transition(INFINITY, tg_restart, "Transient attribute: update", attr);
            crm_log_xml_trace(attr, "Abort");
            goto bail;
        }
    }

    freeXpathObject(xpathObj);

    /* Transient Attributes - Removed */
    xpathObj =
        xpath_search(diff,
                     "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//"
                     XML_TAG_TRANSIENT_NODEATTRS);
    if (numXpathResults(xpathObj) > 0) {
        xmlNode *aborted = getXpathResult(xpathObj, 0);

        abort_transition(INFINITY, tg_restart, "Transient attribute: removal", aborted);
        goto bail;

    }
    freeXpathObject(xpathObj);

    /*
     * Updates by, or in response to, TE actions will never contain updates
     * for more than one resource at a time, so such updates indicate an
     * LRM refresh.
     *
     * In that case, start a new transition rather than check each result
     * individually, which can result in _huge_ speedups in large clusters.
     *
     * Unfortunately, we can only do so when there are no pending actions.
     * Otherwise, we could mistakenly throw away those results here, and
     * the cluster will stall waiting for them and time out the operation.
     */
    if (transition_graph->pending == 0) {
        xpathObj = xpath_search(diff,
                                "//" F_CIB_UPDATE_RESULT
                                "//" XML_TAG_DIFF_ADDED
                                "//" XML_LRM_TAG_RESOURCE);
        max = numXpathResults(xpathObj);
        if (max > 1) {
            crm_debug("Ignoring resource operation updates due to history refresh of %d resources",
                      max);
            crm_log_xml_trace(diff, "lrm-refresh");
            abort_transition(INFINITY, tg_restart, "History refresh", NULL);
            goto bail;
        }
        freeXpathObject(xpathObj);
    }

    /* Process operation updates */
    xpathObj =
        xpath_search(diff,
                     "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP);
    max = numXpathResults(xpathObj);
    if (max > 0) {
        int lpc = 0;

        for (lpc = 0; lpc < max; lpc++) {
            xmlNode *rsc_op = getXpathResult(xpathObj, lpc);
            const char *node = get_node_id(rsc_op);

            process_graph_event(rsc_op, node);
        }
    }
    freeXpathObject(xpathObj);

    /* Detect deleted (as opposed to replaced or added) actions - eg. crm_resource -C */
    xpathObj = xpath_search(diff, "//" XML_TAG_DIFF_REMOVED "//" XML_LRM_TAG_RSC_OP);
    max = numXpathResults(xpathObj);
    for (lpc = 0; lpc < max; lpc++) {
        int path_max = 0;
        const char *op_id = NULL;
        char *rsc_op_xpath = NULL;
        xmlXPathObject *op_match = NULL;
        xmlNode *match = getXpathResult(xpathObj, lpc);

        CRM_LOG_ASSERT(match != NULL);
        if(match == NULL) { continue; };

        op_id = ID(match);

        path_max = strlen(RSC_OP_TEMPLATE) + strlen(op_id) + 1;
        rsc_op_xpath = calloc(1, path_max);
        snprintf(rsc_op_xpath, path_max, RSC_OP_TEMPLATE, op_id);

        op_match = xpath_search(diff, rsc_op_xpath);
        if (numXpathResults(op_match) == 0) {
            /* Prevent false positives by matching cancelations too */
            const char *node = get_node_id(match);
            crm_action_t *cancelled = get_cancel_action(op_id, node);

            if (cancelled == NULL) {
                crm_debug("No match for deleted action %s (%s on %s)", rsc_op_xpath, op_id,
                          node);
                abort_transition(INFINITY, tg_restart, "Resource op removal", match);
                freeXpathObject(op_match);
                free(rsc_op_xpath);
                goto bail;

            } else {
                crm_debug("Deleted lrm_rsc_op %s on %s was for graph event %d",
                          op_id, node, cancelled->id);
            }
        }

        freeXpathObject(op_match);
        free(rsc_op_xpath);
    }

  bail:
    freeXpathObject(xpathObj);
}

static void
process_lrm_resource_diff(xmlNode *lrm_resource, const char *node)
{
    for (xmlNode *rsc_op = __xml_first_child(lrm_resource); rsc_op != NULL;
         rsc_op = __xml_next(rsc_op)) {
        process_graph_event(rsc_op, node);
    }
}

static void
process_resource_updates(const char *node, xmlNode *xml, xmlNode *change,
                         const char *op, const char *xpath)
{
    xmlNode *rsc = NULL;

    if (xml == NULL) {
        return;

    } else if (strcmp((const char*)xml->name, XML_CIB_TAG_LRM) == 0) {
        xml = first_named_child(xml, XML_LRM_TAG_RESOURCES);
        crm_trace("Got %p in %s", xml, XML_CIB_TAG_LRM);
    }

    CRM_ASSERT(strcmp((const char*)xml->name, XML_LRM_TAG_RESOURCES) == 0);

    /*
     * Updates by, or in response to, TE actions will never contain updates
     * for more than one resource at a time, so such updates indicate an
     * LRM refresh.
     *
     * In that case, start a new transition rather than check each result
     * individually, which can result in _huge_ speedups in large clusters.
     *
     * Unfortunately, we can only do so when there are no pending actions.
     * Otherwise, we could mistakenly throw away those results here, and
     * the cluster will stall waiting for them and time out the operation.
     */
    if ((transition_graph->pending == 0)
        && xml->children && xml->children->next) {

        crm_log_xml_trace(change, "lrm-refresh");
        abort_transition(INFINITY, tg_restart, "History refresh", NULL);
        return;
    }

    for (rsc = __xml_first_child(xml); rsc != NULL; rsc = __xml_next(rsc)) {
        crm_trace("Processing %s", ID(rsc));
        process_lrm_resource_diff(rsc, node);
    }
}

#define NODE_PATT "/lrm[@id="
static char *get_node_from_xpath(const char *xpath) 
{
    char *nodeid = NULL;
    char *tmp = strstr(xpath, NODE_PATT);

    if(tmp) {
        tmp += strlen(NODE_PATT);
        tmp += 1;

        nodeid = strdup(tmp);
        tmp = strstr(nodeid, "\'");
        CRM_ASSERT(tmp);
        tmp[0] = 0;
    }
    return nodeid;
}

static char *extract_node_uuid(const char *xpath) 
{
    char *mutable_path = strdup(xpath);
    char *node_uuid = NULL;
    char *search = NULL;
    char *match = NULL;

    match = strstr(mutable_path, "node_state[@id=\'");
    if (match == NULL) {
        free(mutable_path);
        return NULL;
    }
    match += strlen("node_state[@id=\'");

    search = strchr(match, '\'');
    if (search == NULL) {
        free(mutable_path);
        return NULL;
    }
    search[0] = 0;

    node_uuid = strdup(match);
    free(mutable_path);
    return node_uuid;
}

static void
abort_unless_down(const char *xpath, const char *op, xmlNode *change,
                  const char *reason)
{
    char *node_uuid = NULL;
    crm_action_t *down = NULL;

    if(safe_str_neq(op, "delete")) {
        abort_transition(INFINITY, tg_restart, reason, change);
        return;
    }

    node_uuid = extract_node_uuid(xpath);
    if(node_uuid == NULL) {
        crm_err("Could not extract node ID from %s", xpath);
        abort_transition(INFINITY, tg_restart, reason, change);
        return;
    }

    down = match_down_event(node_uuid);
    if (down == NULL) {
        crm_trace("Not expecting %s to be down (%s)", node_uuid, xpath);
        abort_transition(INFINITY, tg_restart, reason, change);
    } else {
        crm_trace("Expecting changes to %s (%s)", node_uuid, xpath);
    }
    free(node_uuid);
}

static void
process_op_deletion(const char *xpath, xmlNode *change)
{
    char *mutable_key = strdup(xpath);
    char *key;
    char *node_uuid;

    // Extract the part of xpath between last pair of single quotes
    key = strrchr(mutable_key, '\'');
    if (key != NULL) {
        *key = '\0';
        key = strrchr(mutable_key, '\'');
    }
    if (key == NULL) {
        crm_warn("Ignoring malformed CIB update (resource deletion of %s)",
                 xpath);
        free(mutable_key);
        return;
    }
    ++key;

    node_uuid = extract_node_uuid(xpath);
    if (confirm_cancel_action(key, node_uuid) == FALSE) {
        abort_transition(INFINITY, tg_restart, "Resource operation removal",
                         change);
    }
    free(mutable_key);
    free(node_uuid);
}

static void
process_delete_diff(const char *xpath, const char *op, xmlNode *change)
{
    if (strstr(xpath, "/" XML_LRM_TAG_RSC_OP "[")) {
        process_op_deletion(xpath, change);

    } else if (strstr(xpath, "/" XML_CIB_TAG_LRM "[")) {
        abort_unless_down(xpath, op, change, "Resource state removal");

    } else if (strstr(xpath, "/" XML_CIB_TAG_STATE "[")) {
        abort_unless_down(xpath, op, change, "Node state removal");

    } else {
        crm_trace("Ignoring delete of %s", xpath);
    }
}

static void
process_node_state_diff(xmlNode *state, xmlNode *change, const char *op,
                        const char *xpath)
{
    xmlNode *lrm = first_named_child(state, XML_CIB_TAG_LRM);

    process_resource_updates(ID(state), lrm, change, op, xpath);
}

static void
process_status_diff(xmlNode *status, xmlNode *change, const char *op,
                    const char *xpath)
{
    for (xmlNode *state = __xml_first_child(status); state != NULL;
         state = __xml_next(state)) {
        process_node_state_diff(state, change, op, xpath);
    }
}

static void
process_cib_diff(xmlNode *cib, xmlNode *change, const char *op,
                 const char *xpath)
{
    xmlNode *status = first_named_child(cib, XML_CIB_TAG_STATUS);
    xmlNode *config = first_named_child(cib, XML_CIB_TAG_CONFIGURATION);

    if (status) {
        process_status_diff(status, change, op, xpath);
    }
    if (config) {
        abort_transition(INFINITY, tg_restart,
                         "Non-status-only change", change);
    }
}

static void
te_update_diff_v2(xmlNode *diff)
{
    crm_log_xml_trace(diff, "Patch:Raw");

    for (xmlNode *change = __xml_first_child(diff); change != NULL;
         change = __xml_next(change)) {

        xmlNode *match = NULL;
        const char *name = NULL;
        const char *xpath = crm_element_value(change, XML_DIFF_PATH);

        // Possible ops: create, modify, delete, move
        const char *op = crm_element_value(change, XML_DIFF_OP);

        // Ignore uninteresting updates
        if (op == NULL) {
            continue;

        } else if (xpath == NULL) {
            crm_trace("Ignoring %s change for version field", op);
            continue;

        } else if (strcmp(op, "move") == 0) {
            crm_trace("Ignoring move change at %s", xpath);
            continue;
        }

        // Find the result of create/modify ops
        if (strcmp(op, "create") == 0) {
            match = change->children;

        } else if (strcmp(op, "modify") == 0) {
            match = first_named_child(change, XML_DIFF_RESULT);
            if(match) {
                match = match->children;
            }

        } else if (strcmp(op, "delete") != 0) {
            crm_warn("Ignoring malformed CIB update (%s operation on %s is unrecognized)",
                     op, xpath);
            continue;
        }

        if (match) {
            if (match->type == XML_COMMENT_NODE) {
                crm_trace("Ignoring %s operation for comment at %s", op, xpath);
                continue;
            }
            name = (const char *)match->name;
        }

        crm_trace("Handling %s operation for %s%s%s",
                  op, (xpath? xpath : "CIB"),
                  (name? " matched by " : ""), (name? name : ""));

        if (strstr(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION)) {
            abort_transition(INFINITY, tg_restart, "Configuration change",
                             change);
            break; // Won't be packaged with operation results we may be waiting for

        } else if (strstr(xpath, "/" XML_CIB_TAG_TICKETS)
                   || safe_str_eq(name, XML_CIB_TAG_TICKETS)) {
            abort_transition(INFINITY, tg_restart, "Ticket attribute change", change);
            break; // Won't be packaged with operation results we may be waiting for

        } else if (strstr(xpath, "/" XML_TAG_TRANSIENT_NODEATTRS "[")
                   || safe_str_eq(name, XML_TAG_TRANSIENT_NODEATTRS)) {
            abort_unless_down(xpath, op, change, "Transient attribute change");
            break; // Won't be packaged with operation results we may be waiting for

        } else if (strcmp(op, "delete") == 0) {
            process_delete_diff(xpath, op, change);

        } else if (name == NULL) {
            crm_warn("Ignoring malformed CIB update (%s at %s has no result)",
                     op, xpath);

        } else if (strcmp(name, XML_TAG_CIB) == 0) {
            process_cib_diff(match, change, op, xpath);

        } else if (strcmp(name, XML_CIB_TAG_STATUS) == 0) {
            process_status_diff(match, change, op, xpath);

        } else if (strcmp(name, XML_CIB_TAG_STATE) == 0) {
            process_node_state_diff(match, change, op, xpath);

        } else if (strcmp(name, XML_CIB_TAG_LRM) == 0) {
            process_resource_updates(ID(match), match, change, op, xpath);

        } else if (strcmp(name, XML_LRM_TAG_RESOURCES) == 0) {
            char *local_node = get_node_from_xpath(xpath);

            process_resource_updates(local_node, match, change, op, xpath);
            free(local_node);

        } else if (strcmp(name, XML_LRM_TAG_RESOURCE) == 0) {
            char *local_node = get_node_from_xpath(xpath);

            process_lrm_resource_diff(match, local_node);
            free(local_node);

        } else if (strcmp(name, XML_LRM_TAG_RSC_OP) == 0) {
            char *local_node = get_node_from_xpath(xpath);

            process_graph_event(match, local_node);
            free(local_node);

        } else {
            crm_warn("Ignoring malformed CIB update (%s at %s has unrecognized result %s)",
                     op, xpath, name);
        }
    }
}

void
te_update_diff(const char *event, xmlNode * msg)
{
    xmlNode *diff = NULL;
    const char *op = NULL;
    int rc = -EINVAL;
    int format = 1;
    int p_add[] = { 0, 0, 0 };
    int p_del[] = { 0, 0, 0 };

    CRM_CHECK(msg != NULL, return);
    crm_element_value_int(msg, F_CIB_RC, &rc);

    if (transition_graph == NULL) {
        crm_trace("No graph");
        return;

    } else if (rc < pcmk_ok) {
        crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc));
        return;

    } else if (transition_graph->complete
               && fsa_state != S_IDLE
               && fsa_state != S_TRANSITION_ENGINE
               && fsa_state != S_POLICY_ENGINE) {
        crm_trace("Filter state=%s, complete=%d", fsa_state2string(fsa_state),
                  transition_graph->complete);
        return;
    }

    op = crm_element_value(msg, F_CIB_OPERATION);
    diff = get_message_xml(msg, F_CIB_UPDATE_RESULT);

    xml_patch_versions(diff, p_add, p_del);
    crm_debug("Processing (%s) diff: %d.%d.%d -> %d.%d.%d (%s)", op,
              p_del[0], p_del[1], p_del[2], p_add[0], p_add[1], p_add[2],
              fsa_state2string(fsa_state));

    crm_element_value_int(diff, "format", &format);
    switch (format) {
        case 1:
            te_update_diff_v1(event, diff);
            break;
        case 2:
            te_update_diff_v2(diff);
            break;
        default:
            crm_warn("Ignoring malformed CIB update (unknown patch format %d)",
                     format);
    }
}

gboolean
process_te_message(xmlNode * msg, xmlNode * xml_data)
{
    const char *from = crm_element_value(msg, F_ORIG);
    const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
    const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);
    const char *ref = crm_element_value(msg, F_CRM_REFERENCE);
    const char *op = crm_element_value(msg, F_CRM_TASK);
    const char *type = crm_element_value(msg, F_CRM_MSG_TYPE);

    crm_trace("Processing %s (%s) message", op, ref);
    crm_log_xml_trace(msg, "ipc");

    if (op == NULL) {
        /* error */

    } else if (sys_to == NULL || strcasecmp(sys_to, CRM_SYSTEM_TENGINE) != 0) {
        crm_trace("Bad sys-to %s", crm_str(sys_to));
        return FALSE;

    } else if (safe_str_eq(op, CRM_OP_INVOKE_LRM)
               && safe_str_eq(sys_from, CRM_SYSTEM_LRMD)
/* 		  && safe_str_eq(type, XML_ATTR_RESPONSE) */
        ) {
        xmlXPathObject *xpathObj = NULL;

        crm_log_xml_trace(msg, "Processing (N)ACK");
        crm_debug("Processing (N)ACK %s from %s", crm_element_value(msg, F_CRM_REFERENCE), from);

        xpathObj = xpath_search(xml_data, "//" XML_LRM_TAG_RSC_OP);
        if (numXpathResults(xpathObj)) {
            int lpc = 0, max = numXpathResults(xpathObj);

            for (lpc = 0; lpc < max; lpc++) {
                xmlNode *rsc_op = getXpathResult(xpathObj, lpc);
                const char *node = get_node_id(rsc_op);

                process_graph_event(rsc_op, node);
            }
            freeXpathObject(xpathObj);

        } else {
            crm_log_xml_err(msg, "Invalid (N)ACK");
            freeXpathObject(xpathObj);
            return FALSE;
        }

    } else {
        crm_err("Unknown command: %s::%s from %s", type, op, sys_from);
    }

    crm_trace("finished processing message");

    return TRUE;
}

void
cib_action_updated(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
    if (rc < pcmk_ok) {
        crm_err("Update %d FAILED: %s", call_id, pcmk_strerror(rc));
    }
}

/*!
 * \brief Handle a timeout in node-to-node communication
 *
 * \param[in] data  Pointer to action timer
 *
 * \return FALSE (indicating that source should be not be re-added)
 */
gboolean
action_timer_callback(gpointer data)
{
    crm_action_timer_t *timer = NULL;
    const char *task = NULL;
    const char *on_node = NULL;
    const char *via_node = NULL;

    CRM_CHECK(data != NULL, return FALSE);

    timer = (crm_action_timer_t *) data;
    stop_te_timer(timer);

    CRM_CHECK(timer->action != NULL, return FALSE);

    task = crm_element_value(timer->action->xml, XML_LRM_ATTR_TASK);
    on_node = crm_element_value(timer->action->xml, XML_LRM_ATTR_TARGET);
    via_node = crm_element_value(timer->action->xml, XML_LRM_ATTR_ROUTER_NODE);

    if (transition_graph->complete) {
        crm_notice("Node %s did not send %s result (via %s) within %dms "
                   "(ignoring because transition not in progress)",
                   (on_node? on_node : ""), (task? task : "unknown action"),
                   (via_node? via_node : "controller"), timer->timeout);
    } else {
        /* fail the action */

        crm_err("Node %s did not send %s result (via %s) within %dms "
                "(action timeout plus cluster-delay)",
                (on_node? on_node : ""), (task? task : "unknown action"),
                (via_node? via_node : "controller"), timer->timeout);
        print_action(LOG_ERR, "Aborting transition, action lost: ", timer->action);

        timer->action->failed = TRUE;
        te_action_confirmed(timer->action, transition_graph);
        abort_transition(INFINITY, tg_restart, "Action lost", NULL);

        // Record timeout in the CIB if appropriate
        if ((timer->action->type == action_type_rsc)
            && controld_action_is_recordable(task)) {
            controld_record_action_timeout(timer->action);
        }
    }

    return FALSE;
}