Blob Blame History Raw
/*
 * Copyright 2013-2020 the Pacemaker project contributors
 *
 * The version control history for this file may have further details.
 *
 * This source code is licensed under the GNU General Public License version 2
 * or later (GPLv2+) WITHOUT ANY WARRANTY.
 */

#include <crm_internal.h>

#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>

#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>

#include <crm/crm.h>
#include <crm/cib/internal.h>
#include <crm/msg_xml.h>
#include <crm/pengine/rules.h>
#include <crm/common/iso8601.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/common/xml.h>
#include <crm/cluster/internal.h>

#include <crm/common/attrd_internal.h>
#include "pacemaker-attrd.h"

lrmd_t *the_lrmd = NULL;
crm_cluster_t *attrd_cluster = NULL;
crm_trigger_t *attrd_config_read = NULL;
static crm_exit_t attrd_exit_status = CRM_EX_OK;

static void
attrd_cpg_dispatch(cpg_handle_t handle,
                 const struct cpg_name *groupName,
                 uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
    uint32_t kind = 0;
    xmlNode *xml = NULL;
    const char *from = NULL;
    char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);

    if(data == NULL) {
        return;
    }

    if (kind == crm_class_cluster) {
        xml = string2xml(data);
    }

    if (xml == NULL) {
        crm_err("Bad message of class %d received from %s[%u]: '%.120s'", kind, from, nodeid, data);
    } else {
        crm_node_t *peer = crm_get_peer(nodeid, from);

        attrd_peer_message(peer, xml);
    }

    free_xml(xml);
    free(data);
}

static void
attrd_cpg_destroy(gpointer unused)
{
    if (attrd_shutting_down()) {
        crm_info("Corosync disconnection complete");

    } else {
        crm_crit("Lost connection to cluster layer, shutting down");
        attrd_exit_status = CRM_EX_DISCONNECT;
        attrd_shutdown(0);
    }
}

static void
attrd_cib_destroy_cb(gpointer user_data)
{
    cib_t *conn = user_data;

    conn->cmds->signoff(conn);  /* Ensure IPC is cleaned up */

    if (attrd_shutting_down()) {
        crm_info("Connection disconnection complete");

    } else {
        /* eventually this should trigger a reconnect, not a shutdown */
        crm_crit("Lost connection to the CIB manager, shutting down");
        attrd_exit_status = CRM_EX_DISCONNECT;
        attrd_shutdown(0);
    }

    return;
}

static void
attrd_erase_cb(xmlNode *msg, int call_id, int rc, xmlNode *output,
               void *user_data)
{
    do_crm_log_unlikely((rc? LOG_NOTICE : LOG_DEBUG),
                        "Cleared transient attributes: %s "
                        CRM_XS " xpath=%s rc=%d",
                        pcmk_strerror(rc), (char *) user_data, rc);
}

#define XPATH_TRANSIENT "//node_state[@uname='%s']/" XML_TAG_TRANSIENT_NODEATTRS

/*!
 * \internal
 * \brief Wipe all transient attributes for this node from the CIB
 *
 * Clear any previous transient node attributes from the CIB. This is
 * normally done by the DC's controller when this node leaves the cluster, but
 * this handles the case where the node restarted so quickly that the
 * cluster layer didn't notice.
 *
 * \todo If pacemaker-attrd respawns after crashing (see PCMK_respawned),
 *       ideally we'd skip this and sync our attributes from the writer.
 *       However, currently we reject any values for us that the writer has, in
 *       attrd_peer_update().
 */
static void
attrd_erase_attrs(void)
{
    int call_id;
    char *xpath = crm_strdup_printf(XPATH_TRANSIENT, attrd_cluster->uname);

    crm_info("Clearing transient attributes from CIB " CRM_XS " xpath=%s",
             xpath);

    call_id = the_cib->cmds->remove(the_cib, xpath, NULL,
                                    cib_quorum_override | cib_xpath);
    the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, xpath,
                                          "attrd_erase_cb", attrd_erase_cb,
                                          free);
}

static int
attrd_cib_connect(int max_retry)
{
    static int attempts = 0;

    int rc = -ENOTCONN;

    the_cib = cib_new();
    if (the_cib == NULL) {
        return -ENOTCONN;
    }

    do {
        if(attempts > 0) {
            sleep(attempts);
        }

        attempts++;
        crm_debug("Connection attempt %d to the CIB manager", attempts);
        rc = the_cib->cmds->signon(the_cib, T_ATTRD, cib_command);

    } while(rc != pcmk_ok && attempts < max_retry);

    if (rc != pcmk_ok) {
        crm_err("Connection to the CIB manager failed: %s " CRM_XS " rc=%d",
                pcmk_strerror(rc), rc);
        goto cleanup;
    }

    crm_debug("Connected to the CIB manager after %d attempts", attempts);

    rc = the_cib->cmds->set_connection_dnotify(the_cib, attrd_cib_destroy_cb);
    if (rc != pcmk_ok) {
        crm_err("Could not set disconnection callback");
        goto cleanup;
    }

    rc = the_cib->cmds->add_notify_callback(the_cib, T_CIB_REPLACE_NOTIFY, attrd_cib_replaced_cb);
    if(rc != pcmk_ok) {
        crm_err("Could not set CIB notification callback");
        goto cleanup;
    }

    rc = the_cib->cmds->add_notify_callback(the_cib, T_CIB_DIFF_NOTIFY, attrd_cib_updated_cb);
    if (rc != pcmk_ok) {
        crm_err("Could not set CIB notification callback (update)");
        goto cleanup;
    }

    return pcmk_ok;

  cleanup:
    the_cib->cmds->signoff(the_cib);
    cib_delete(the_cib);
    the_cib = NULL;
    return -ENOTCONN;
}

/*!
 * \internal
 * \brief Prepare the CIB after cluster is connected
 */
static void
attrd_cib_init(void)
{
    // We have no attribute values in memory, wipe the CIB to match
    attrd_erase_attrs();

    // Set a trigger for reading the CIB (for the alerts section)
    attrd_config_read = mainloop_add_trigger(G_PRIORITY_HIGH, attrd_read_options, NULL);

    // Always read the CIB at start-up
    mainloop_set_trigger(attrd_config_read);
}

static qb_ipcs_service_t *ipcs = NULL;

static int32_t
attrd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
{
    uint32_t id = 0;
    uint32_t flags = 0;
    pcmk__client_t *client = pcmk__find_client(c);
    xmlNode *xml = NULL;
    const char *op;

    // Sanity-check, and parse XML from IPC data
    CRM_CHECK((c != NULL) && (client != NULL), return 0);
    if (data == NULL) {
        crm_debug("No IPC data from PID %d", pcmk__client_pid(c));
        return 0;
    }
    xml = pcmk__client_data2xml(client, data, &id, &flags);
    if (xml == NULL) {
        crm_debug("Unrecognizable IPC data from PID %d", pcmk__client_pid(c));
        return 0;
    }

#if ENABLE_ACL
    CRM_ASSERT(client->user != NULL);
    pcmk__update_acl_user(xml, PCMK__XA_ATTR_USER, client->user);
#endif

    op = crm_element_value(xml, PCMK__XA_TASK);

    if (client->name == NULL) {
        const char *value = crm_element_value(xml, F_ORIG);
        client->name = crm_strdup_printf("%s.%d", value?value:"unknown", client->pid);
    }

    if (pcmk__str_eq(op, PCMK__ATTRD_CMD_PEER_REMOVE, pcmk__str_casei)) {
        attrd_send_ack(client, id, flags);
        attrd_client_peer_remove(client->name, xml);

    } else if (pcmk__str_eq(op, PCMK__ATTRD_CMD_CLEAR_FAILURE, pcmk__str_casei)) {
        attrd_send_ack(client, id, flags);
        attrd_client_clear_failure(xml);

    } else if (pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE, pcmk__str_casei)) {
        attrd_send_ack(client, id, flags);
        attrd_client_update(xml);

    } else if (pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_BOTH, pcmk__str_casei)) {
        attrd_send_ack(client, id, flags);
        attrd_client_update(xml);

    } else if (pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_DELAY, pcmk__str_casei)) {
        attrd_send_ack(client, id, flags);
        attrd_client_update(xml);

    } else if (pcmk__str_eq(op, PCMK__ATTRD_CMD_REFRESH, pcmk__str_casei)) {
        attrd_send_ack(client, id, flags);
        attrd_client_refresh();

    } else if (pcmk__str_eq(op, PCMK__ATTRD_CMD_QUERY, pcmk__str_casei)) {
        /* queries will get reply, so no ack is necessary */
        attrd_client_query(client, id, flags, xml);

    } else {
        crm_info("Ignoring request from client %s with unknown operation %s",
                 client->name, op);
    }

    free_xml(xml);
    return 0;
}

void
attrd_ipc_fini(void)
{
    if (ipcs != NULL) {
        pcmk__drop_all_clients(ipcs);
        qb_ipcs_destroy(ipcs);
        ipcs = NULL;
    }
}

static int
attrd_cluster_connect(void)
{
    attrd_cluster = calloc(1, sizeof(crm_cluster_t));

    attrd_cluster->destroy = attrd_cpg_destroy;
    attrd_cluster->cpg.cpg_deliver_fn = attrd_cpg_dispatch;
    attrd_cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership;

    crm_set_status_callback(&attrd_peer_change_cb);

    if (crm_cluster_connect(attrd_cluster) == FALSE) {
        crm_err("Cluster connection failed");
        return -ENOTCONN;
    }
    return pcmk_ok;
}

static pcmk__cli_option_t long_options[] = {
    // long option, argument type, storage, short option, description, flags
    {
        "help",     no_argument, NULL, '?',
        "\tThis text", pcmk__option_default
    },
    {
        "verbose",  no_argument, NULL, 'V',
        "\tIncrease debug output", pcmk__option_default
    },
    { 0, 0, 0, 0 }
};

int
main(int argc, char **argv)
{
    int flag = 0;
    int index = 0;
    int argerr = 0;
    crm_ipc_t *old_instance = NULL;

    attrd_init_mainloop();
    crm_log_preinit(NULL, argc, argv);
    pcmk__set_cli_options(NULL, "[options]", long_options,
                          "daemon for managing Pacemaker node attributes");

    mainloop_add_signal(SIGTERM, attrd_shutdown);

     while (1) {
        flag = pcmk__next_cli_option(argc, argv, &index, NULL);
        if (flag == -1)
            break;

        switch (flag) {
            case 'V':
                crm_bump_log_level(argc, argv);
                break;
            case 'h':          /* Help message */
                pcmk__cli_help(flag, CRM_EX_OK);
                break;
            default:
                ++argerr;
                break;
        }
    }

    if (optind > argc) {
        ++argerr;
    }

    if (argerr) {
        pcmk__cli_help('?', CRM_EX_USAGE);
    }

    crm_log_init(T_ATTRD, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
    crm_notice("Starting Pacemaker node attribute manager");

    old_instance = crm_ipc_new(T_ATTRD, 0);
    if (crm_ipc_connect(old_instance)) {
        /* IPC end-point already up */
        crm_ipc_close(old_instance);
        crm_ipc_destroy(old_instance);
        crm_err("pacemaker-attrd is already active, aborting startup");
        crm_exit(CRM_EX_OK);
    } else {
        /* not up or not authentic, we'll proceed either way */
        crm_ipc_destroy(old_instance);
        old_instance = NULL;
    }

    attributes = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_attribute);

    /* Connect to the CIB before connecting to the cluster or listening for IPC.
     * This allows us to assume the CIB is connected whenever we process a
     * cluster or IPC message (which also avoids start-up race conditions).
     */
    if (attrd_cib_connect(10) != pcmk_ok) {
        attrd_exit_status = CRM_EX_FATAL;
        goto done;
    }
    crm_info("CIB connection active");

    if (attrd_cluster_connect() != pcmk_ok) {
        attrd_exit_status = CRM_EX_FATAL;
        goto done;
    }
    crm_info("Cluster connection active");

    // Initialization that requires the cluster to be connected
    attrd_election_init();
    attrd_cib_init();

    /* Set a private attribute for ourselves with the protocol version we
     * support. This lets all nodes determine the minimum supported version
     * across all nodes. It also ensures that the writer learns our node name,
     * so it can send our attributes to the CIB.
     */
    attrd_broadcast_protocol();

    attrd_init_ipc(&ipcs, attrd_ipc_dispatch);
    crm_notice("Pacemaker node attribute manager successfully started and accepting connections");
    attrd_run_mainloop();

  done:
    crm_info("Shutting down attribute manager");

    attrd_election_fini();
    attrd_ipc_fini();
    attrd_lrmd_disconnect();
    attrd_cib_disconnect();
    g_hash_table_destroy(attributes);

    crm_exit(attrd_exit_status);
}