|
rpm-build |
3ee90c |
/*
|
|
rpm-build |
3ee90c |
* Copyright 2004-2019 the Pacemaker project contributors
|
|
rpm-build |
3ee90c |
*
|
|
rpm-build |
3ee90c |
* The version control history for this file may have further details.
|
|
rpm-build |
3ee90c |
*
|
|
rpm-build |
3ee90c |
* This source code is licensed under the GNU General Public License version 2
|
|
rpm-build |
3ee90c |
* or later (GPLv2+) WITHOUT ANY WARRANTY.
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
#include <crm_internal.h>
|
|
rpm-build |
3ee90c |
#include <crm/crm.h>
|
|
rpm-build |
3ee90c |
#include <crm/msg_xml.h>
|
|
rpm-build |
3ee90c |
#include <crm/common/xml.h>
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
#include <pacemaker-controld.h>
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
static void
|
|
rpm-build |
3ee90c |
tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/*
|
|
rpm-build |
3ee90c |
* stonith failure counting
|
|
rpm-build |
3ee90c |
*
|
|
rpm-build |
3ee90c |
* We don't want to get stuck in a permanent fencing loop. Keep track of the
|
|
rpm-build |
3ee90c |
* number of fencing failures for each target node, and the most we'll restart a
|
|
rpm-build |
3ee90c |
* transition for.
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
struct st_fail_rec {
|
|
rpm-build |
3ee90c |
int count;
|
|
rpm-build |
3ee90c |
};
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
static bool fence_reaction_panic = FALSE;
|
|
rpm-build |
3ee90c |
static unsigned long int stonith_max_attempts = 10;
|
|
rpm-build |
3ee90c |
static GHashTable *stonith_failures = NULL;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
void
|
|
rpm-build |
3ee90c |
update_stonith_max_attempts(const char *value)
|
|
rpm-build |
3ee90c |
{
|
|
rpm-build |
3ee90c |
if (safe_str_eq(value, CRM_INFINITY_S)) {
|
|
rpm-build |
3ee90c |
stonith_max_attempts = CRM_SCORE_INFINITY;
|
|
rpm-build |
3ee90c |
} else {
|
|
rpm-build |
3ee90c |
stonith_max_attempts = crm_int_helper(value, NULL);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
void
|
|
rpm-build |
3ee90c |
set_fence_reaction(const char *reaction_s)
|
|
rpm-build |
3ee90c |
{
|
|
rpm-build |
3ee90c |
if (safe_str_eq(reaction_s, "panic")) {
|
|
rpm-build |
3ee90c |
fence_reaction_panic = TRUE;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
} else {
|
|
rpm-build |
3ee90c |
if (safe_str_neq(reaction_s, "stop")) {
|
|
rpm-build |
3ee90c |
crm_warn("Invalid value '%s' for %s, using 'stop'",
|
|
rpm-build |
3ee90c |
reaction_s, XML_CONFIG_ATTR_FENCE_REACTION);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
fence_reaction_panic = FALSE;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
static gboolean
|
|
rpm-build |
3ee90c |
too_many_st_failures(const char *target)
|
|
rpm-build |
3ee90c |
{
|
|
rpm-build |
3ee90c |
GHashTableIter iter;
|
|
rpm-build |
3ee90c |
const char *key = NULL;
|
|
rpm-build |
3ee90c |
struct st_fail_rec *value = NULL;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (stonith_failures == NULL) {
|
|
rpm-build |
3ee90c |
return FALSE;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (target == NULL) {
|
|
rpm-build |
3ee90c |
g_hash_table_iter_init(&iter, stonith_failures);
|
|
rpm-build |
3ee90c |
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
|
|
rpm-build |
3ee90c |
(gpointer *) &value)) {
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (value->count >= stonith_max_attempts) {
|
|
rpm-build |
3ee90c |
target = (const char*)key;
|
|
rpm-build |
3ee90c |
goto too_many;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
} else {
|
|
rpm-build |
3ee90c |
value = g_hash_table_lookup(stonith_failures, target);
|
|
rpm-build |
3ee90c |
if ((value != NULL) && (value->count >= stonith_max_attempts)) {
|
|
rpm-build |
3ee90c |
goto too_many;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
return FALSE;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
too_many:
|
|
rpm-build |
3ee90c |
crm_warn("Too many failures (%d) to fence %s, giving up",
|
|
rpm-build |
3ee90c |
value->count, target);
|
|
rpm-build |
3ee90c |
return TRUE;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/*!
|
|
rpm-build |
3ee90c |
* \internal
|
|
rpm-build |
3ee90c |
* \brief Reset a stonith fail count
|
|
rpm-build |
3ee90c |
*
|
|
rpm-build |
3ee90c |
* \param[in] target Name of node to reset, or NULL for all
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
void
|
|
rpm-build |
3ee90c |
st_fail_count_reset(const char *target)
|
|
rpm-build |
3ee90c |
{
|
|
rpm-build |
3ee90c |
if (stonith_failures == NULL) {
|
|
rpm-build |
3ee90c |
return;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (target) {
|
|
rpm-build |
3ee90c |
struct st_fail_rec *rec = NULL;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
rec = g_hash_table_lookup(stonith_failures, target);
|
|
rpm-build |
3ee90c |
if (rec) {
|
|
rpm-build |
3ee90c |
rec->count = 0;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
} else {
|
|
rpm-build |
3ee90c |
GHashTableIter iter;
|
|
rpm-build |
3ee90c |
const char *key = NULL;
|
|
rpm-build |
3ee90c |
struct st_fail_rec *rec = NULL;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
g_hash_table_iter_init(&iter, stonith_failures);
|
|
rpm-build |
3ee90c |
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
|
|
rpm-build |
3ee90c |
(gpointer *) &rec)) {
|
|
rpm-build |
3ee90c |
rec->count = 0;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
static void
|
|
rpm-build |
3ee90c |
st_fail_count_increment(const char *target)
|
|
rpm-build |
3ee90c |
{
|
|
rpm-build |
3ee90c |
struct st_fail_rec *rec = NULL;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (stonith_failures == NULL) {
|
|
rpm-build |
3ee90c |
stonith_failures = crm_str_table_new();
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
rec = g_hash_table_lookup(stonith_failures, target);
|
|
rpm-build |
3ee90c |
if (rec) {
|
|
rpm-build |
3ee90c |
rec->count++;
|
|
rpm-build |
3ee90c |
} else {
|
|
rpm-build |
3ee90c |
rec = malloc(sizeof(struct st_fail_rec));
|
|
rpm-build |
3ee90c |
if(rec == NULL) {
|
|
rpm-build |
3ee90c |
return;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
rec->count = 1;
|
|
rpm-build |
3ee90c |
g_hash_table_insert(stonith_failures, strdup(target), rec);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/* end stonith fail count functions */
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
static void
|
|
rpm-build |
3ee90c |
cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
|
|
rpm-build |
3ee90c |
void *user_data)
|
|
rpm-build |
3ee90c |
{
|
|
rpm-build |
3ee90c |
if (rc < pcmk_ok) {
|
|
rpm-build |
3ee90c |
crm_err("Fencing update %d for %s: failed - %s (%d)",
|
|
rpm-build |
3ee90c |
call_id, (char *)user_data, pcmk_strerror(rc), rc);
|
|
rpm-build |
3ee90c |
crm_log_xml_warn(msg, "Failed update");
|
|
rpm-build |
3ee90c |
abort_transition(INFINITY, tg_shutdown, "CIB update failed", NULL);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
} else {
|
|
rpm-build |
3ee90c |
crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
static void
|
|
rpm-build |
3ee90c |
send_stonith_update(crm_action_t *action, const char *target, const char *uuid)
|
|
rpm-build |
3ee90c |
{
|
|
rpm-build |
3ee90c |
int rc = pcmk_ok;
|
|
rpm-build |
3ee90c |
crm_node_t *peer = NULL;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/* We (usually) rely on the membership layer to do node_update_cluster,
|
|
rpm-build |
3ee90c |
* and the peer status callback to do node_update_peer, because the node
|
|
rpm-build |
3ee90c |
* might have already rejoined before we get the stonith result here.
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
int flags = node_update_join | node_update_expected;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/* zero out the node-status & remove all LRM status info */
|
|
rpm-build |
3ee90c |
xmlNode *node_state = NULL;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
CRM_CHECK(target != NULL, return);
|
|
rpm-build |
3ee90c |
CRM_CHECK(uuid != NULL, return);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/* Make sure the membership and join caches are accurate */
|
|
rpm-build |
3ee90c |
peer = crm_get_peer_full(0, target, CRM_GET_PEER_ANY);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
CRM_CHECK(peer != NULL, return);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (peer->state == NULL) {
|
|
rpm-build |
3ee90c |
/* Usually, we rely on the membership layer to update the cluster state
|
|
rpm-build |
3ee90c |
* in the CIB. However, if the node has never been seen, do it here, so
|
|
rpm-build |
3ee90c |
* the node is not considered unclean.
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
flags |= node_update_cluster;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (peer->uuid == NULL) {
|
|
rpm-build |
3ee90c |
crm_info("Recording uuid '%s' for node '%s'", uuid, target);
|
|
rpm-build |
3ee90c |
peer->uuid = strdup(uuid);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
crmd_peer_down(peer, TRUE);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/* Generate a node state update for the CIB */
|
|
rpm-build |
3ee90c |
node_state = create_node_state_update(peer, flags, NULL, __FUNCTION__);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/* we have to mark whether or not remote nodes have already been fenced */
|
|
rpm-build |
3ee90c |
if (peer->flags & crm_remote_node) {
|
|
rpm-build |
3ee90c |
time_t now = time(NULL);
|
|
rpm-build |
3ee90c |
char *now_s = crm_itoa(now);
|
|
rpm-build |
3ee90c |
crm_xml_add(node_state, XML_NODE_IS_FENCED, now_s);
|
|
rpm-build |
3ee90c |
free(now_s);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/* Force our known ID */
|
|
rpm-build |
3ee90c |
crm_xml_add(node_state, XML_ATTR_UUID, uuid);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state,
|
|
rpm-build |
3ee90c |
cib_quorum_override | cib_scope_local | cib_can_create);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/* Delay processing the trigger until the update completes */
|
|
rpm-build |
3ee90c |
crm_debug("Sending fencing update %d for %s", rc, target);
|
|
rpm-build |
3ee90c |
fsa_register_cib_callback(rc, FALSE, strdup(target), cib_fencing_updated);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/* Make sure it sticks */
|
|
rpm-build |
3ee90c |
/* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local); */
|
|
rpm-build |
3ee90c |
|
|
|
409f23 |
controld_delete_node_state(peer->uname, controld_section_all,
|
|
|
409f23 |
cib_scope_local);
|
|
rpm-build |
3ee90c |
free_xml(node_state);
|
|
rpm-build |
3ee90c |
return;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/*!
|
|
rpm-build |
3ee90c |
* \internal
|
|
rpm-build |
3ee90c |
* \brief Abort transition due to stonith failure
|
|
rpm-build |
3ee90c |
*
|
|
rpm-build |
3ee90c |
* \param[in] abort_action Whether to restart or stop transition
|
|
rpm-build |
3ee90c |
* \param[in] target Don't restart if this (NULL for any) has too many failures
|
|
rpm-build |
3ee90c |
* \param[in] reason Log this stonith action XML as abort reason (or NULL)
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
static void
|
|
rpm-build |
3ee90c |
abort_for_stonith_failure(enum transition_action abort_action,
|
|
rpm-build |
3ee90c |
const char *target, xmlNode *reason)
|
|
rpm-build |
3ee90c |
{
|
|
rpm-build |
3ee90c |
/* If stonith repeatedly fails, we eventually give up on starting a new
|
|
rpm-build |
3ee90c |
* transition for that reason.
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
if ((abort_action != tg_stop) && too_many_st_failures(target)) {
|
|
rpm-build |
3ee90c |
abort_action = tg_stop;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
abort_transition(INFINITY, abort_action, "Stonith failed", reason);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/*
|
|
rpm-build |
3ee90c |
* stonith cleanup list
|
|
rpm-build |
3ee90c |
*
|
|
rpm-build |
3ee90c |
* If the DC is shot, proper notifications might not go out.
|
|
rpm-build |
3ee90c |
* The stonith cleanup list allows the cluster to (re-)send
|
|
rpm-build |
3ee90c |
* notifications once a new DC is elected.
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
static GListPtr stonith_cleanup_list = NULL;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/*!
|
|
rpm-build |
3ee90c |
* \internal
|
|
rpm-build |
3ee90c |
* \brief Add a node to the stonith cleanup list
|
|
rpm-build |
3ee90c |
*
|
|
rpm-build |
3ee90c |
* \param[in] target Name of node to add
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
void
|
|
rpm-build |
3ee90c |
add_stonith_cleanup(const char *target) {
|
|
rpm-build |
3ee90c |
stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target));
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/*!
|
|
rpm-build |
3ee90c |
* \internal
|
|
rpm-build |
3ee90c |
* \brief Remove a node from the stonith cleanup list
|
|
rpm-build |
3ee90c |
*
|
|
rpm-build |
3ee90c |
* \param[in] Name of node to remove
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
void
|
|
rpm-build |
3ee90c |
remove_stonith_cleanup(const char *target)
|
|
rpm-build |
3ee90c |
{
|
|
rpm-build |
3ee90c |
GListPtr iter = stonith_cleanup_list;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
while (iter != NULL) {
|
|
rpm-build |
3ee90c |
GListPtr tmp = iter;
|
|
rpm-build |
3ee90c |
char *iter_name = tmp->data;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
iter = iter->next;
|
|
rpm-build |
3ee90c |
if (safe_str_eq(target, iter_name)) {
|
|
rpm-build |
3ee90c |
crm_trace("Removing %s from the cleanup list", iter_name);
|
|
rpm-build |
3ee90c |
stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
|
|
rpm-build |
3ee90c |
free(iter_name);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/*!
|
|
rpm-build |
3ee90c |
* \internal
|
|
rpm-build |
3ee90c |
* \brief Purge all entries from the stonith cleanup list
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
void
|
|
rpm-build |
3ee90c |
purge_stonith_cleanup()
|
|
rpm-build |
3ee90c |
{
|
|
rpm-build |
3ee90c |
if (stonith_cleanup_list) {
|
|
rpm-build |
3ee90c |
GListPtr iter = NULL;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
|
|
rpm-build |
3ee90c |
char *target = iter->data;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
crm_info("Purging %s from stonith cleanup list", target);
|
|
rpm-build |
3ee90c |
free(target);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
g_list_free(stonith_cleanup_list);
|
|
rpm-build |
3ee90c |
stonith_cleanup_list = NULL;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/*!
|
|
rpm-build |
3ee90c |
* \internal
|
|
rpm-build |
3ee90c |
* \brief Send stonith updates for all entries in cleanup list, then purge it
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
void
|
|
rpm-build |
3ee90c |
execute_stonith_cleanup()
|
|
rpm-build |
3ee90c |
{
|
|
rpm-build |
3ee90c |
GListPtr iter;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
|
|
rpm-build |
3ee90c |
char *target = iter->data;
|
|
rpm-build |
3ee90c |
crm_node_t *target_node = crm_get_peer(0, target);
|
|
rpm-build |
3ee90c |
const char *uuid = crm_peer_uuid(target_node);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
crm_notice("Marking %s, target of a previous stonith action, as clean", target);
|
|
rpm-build |
3ee90c |
send_stonith_update(NULL, target, uuid);
|
|
rpm-build |
3ee90c |
free(target);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
g_list_free(stonith_cleanup_list);
|
|
rpm-build |
3ee90c |
stonith_cleanup_list = NULL;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/* end stonith cleanup list functions */
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/* stonith API client
|
|
rpm-build |
3ee90c |
*
|
|
rpm-build |
3ee90c |
* Functions that need to interact directly with the fencer via its API
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
static stonith_t *stonith_api = NULL;
|
|
rpm-build |
3ee90c |
static crm_trigger_t *stonith_reconnect = NULL;
|
|
rpm-build |
3ee90c |
static char *te_client_id = NULL;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
static gboolean
|
|
rpm-build |
3ee90c |
fail_incompletable_stonith(crm_graph_t *graph)
|
|
rpm-build |
3ee90c |
{
|
|
rpm-build |
3ee90c |
GListPtr lpc = NULL;
|
|
rpm-build |
3ee90c |
const char *task = NULL;
|
|
rpm-build |
3ee90c |
xmlNode *last_action = NULL;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (graph == NULL) {
|
|
rpm-build |
3ee90c |
return FALSE;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
|
|
rpm-build |
3ee90c |
GListPtr lpc2 = NULL;
|
|
rpm-build |
3ee90c |
synapse_t *synapse = (synapse_t *) lpc->data;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (synapse->confirmed) {
|
|
rpm-build |
3ee90c |
continue;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
|
|
rpm-build |
3ee90c |
crm_action_t *action = (crm_action_t *) lpc2->data;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (action->type != action_type_crm || action->confirmed) {
|
|
rpm-build |
3ee90c |
continue;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
|
|
rpm-build |
3ee90c |
if (task && safe_str_eq(task, CRM_OP_FENCE)) {
|
|
rpm-build |
3ee90c |
action->failed = TRUE;
|
|
rpm-build |
3ee90c |
last_action = action->xml;
|
|
rpm-build |
3ee90c |
update_graph(graph, action);
|
|
rpm-build |
3ee90c |
crm_notice("Failing action %d (%s): fencer terminated",
|
|
rpm-build |
3ee90c |
action->id, ID(action->xml));
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (last_action != NULL) {
|
|
rpm-build |
3ee90c |
crm_warn("Fencer failure resulted in unrunnable actions");
|
|
rpm-build |
3ee90c |
abort_for_stonith_failure(tg_restart, NULL, last_action);
|
|
rpm-build |
3ee90c |
return TRUE;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
return FALSE;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
static void
|
|
rpm-build |
3ee90c |
tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
|
|
rpm-build |
3ee90c |
{
|
|
rpm-build |
3ee90c |
te_cleanup_stonith_history_sync(st, FALSE);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (is_set(fsa_input_register, R_ST_REQUIRED)) {
|
|
rpm-build |
3ee90c |
crm_crit("Fencing daemon connection failed");
|
|
rpm-build |
3ee90c |
mainloop_set_trigger(stonith_reconnect);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
} else {
|
|
rpm-build |
3ee90c |
crm_info("Fencing daemon disconnected");
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (stonith_api) {
|
|
rpm-build |
3ee90c |
/* the client API won't properly reconnect notifications
|
|
rpm-build |
3ee90c |
* if they are still in the table - so remove them
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
if (stonith_api->state != stonith_disconnected) {
|
|
rpm-build |
3ee90c |
stonith_api->cmds->disconnect(st);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT);
|
|
rpm-build |
3ee90c |
stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_FENCE);
|
|
rpm-build |
3ee90c |
stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_HISTORY_SYNCED);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (AM_I_DC) {
|
|
rpm-build |
3ee90c |
fail_incompletable_stonith(transition_graph);
|
|
rpm-build |
3ee90c |
trigger_graph();
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
static void
|
|
rpm-build |
3ee90c |
tengine_stonith_notify(stonith_t *st, stonith_event_t *st_event)
|
|
rpm-build |
3ee90c |
{
|
|
rpm-build |
3ee90c |
if (te_client_id == NULL) {
|
|
rpm-build |
3ee90c |
te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
|
|
rpm-build |
3ee90c |
(unsigned long) getpid());
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (st_event == NULL) {
|
|
rpm-build |
3ee90c |
crm_err("Notify data not found");
|
|
rpm-build |
3ee90c |
return;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
crmd_alert_fencing_op(st_event);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if ((st_event->result == pcmk_ok) && safe_str_eq("on", st_event->action)) {
|
|
rpm-build |
3ee90c |
crm_notice("%s was successfully unfenced by %s (at the request of %s)",
|
|
rpm-build |
3ee90c |
st_event->target,
|
|
rpm-build |
3ee90c |
st_event->executioner? st_event->executioner : "<anyone>",
|
|
rpm-build |
3ee90c |
st_event->origin);
|
|
rpm-build |
3ee90c |
/* TODO: Hook up st_event->device */
|
|
rpm-build |
3ee90c |
return;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
} else if (safe_str_eq("on", st_event->action)) {
|
|
rpm-build |
3ee90c |
crm_err("Unfencing of %s by %s failed: %s (%d)",
|
|
rpm-build |
3ee90c |
st_event->target,
|
|
rpm-build |
3ee90c |
st_event->executioner? st_event->executioner : "<anyone>",
|
|
rpm-build |
3ee90c |
pcmk_strerror(st_event->result), st_event->result);
|
|
rpm-build |
3ee90c |
return;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
} else if ((st_event->result == pcmk_ok)
|
|
rpm-build |
3ee90c |
&& crm_str_eq(st_event->target, fsa_our_uname, TRUE)) {
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/* We were notified of our own fencing. Most likely, either fencing was
|
|
rpm-build |
3ee90c |
* misconfigured, or fabric fencing that doesn't cut cluster
|
|
rpm-build |
3ee90c |
* communication is in use.
|
|
rpm-build |
3ee90c |
*
|
|
rpm-build |
3ee90c |
* Either way, shutting down the local host is a good idea, to require
|
|
rpm-build |
3ee90c |
* administrator intervention. Also, other nodes would otherwise likely
|
|
rpm-build |
3ee90c |
* set our status to lost because of the fencing callback and discard
|
|
rpm-build |
3ee90c |
* our subsequent election votes as "not part of our cluster".
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
crm_crit("We were allegedly just fenced by %s for %s!",
|
|
rpm-build |
3ee90c |
st_event->executioner? st_event->executioner : "the cluster",
|
|
rpm-build |
3ee90c |
st_event->origin); /* Dumps blackbox if enabled */
|
|
rpm-build |
3ee90c |
if (fence_reaction_panic) {
|
|
rpm-build |
3ee90c |
pcmk_panic(__FUNCTION__);
|
|
rpm-build |
3ee90c |
} else {
|
|
rpm-build |
3ee90c |
crm_exit(CRM_EX_FATAL);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
return;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/* Update the count of stonith failures for this target, in case we become
|
|
rpm-build |
3ee90c |
* DC later. The current DC has already updated its fail count in
|
|
rpm-build |
3ee90c |
* tengine_stonith_callback().
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
if (!AM_I_DC && safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
|
|
rpm-build |
3ee90c |
if (st_event->result == pcmk_ok) {
|
|
rpm-build |
3ee90c |
st_fail_count_reset(st_event->target);
|
|
rpm-build |
3ee90c |
} else {
|
|
rpm-build |
3ee90c |
st_fail_count_increment(st_event->target);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s: %s "
|
|
rpm-build |
3ee90c |
CRM_XS " initiator=%s ref=%s",
|
|
rpm-build |
3ee90c |
st_event->target, st_event->result == pcmk_ok ? "" : " not",
|
|
rpm-build |
3ee90c |
st_event->action,
|
|
rpm-build |
3ee90c |
st_event->executioner ? st_event->executioner : "<anyone>",
|
|
rpm-build |
3ee90c |
(st_event->client_origin? st_event->client_origin : "<unknown>"),
|
|
rpm-build |
3ee90c |
pcmk_strerror(st_event->result),
|
|
rpm-build |
3ee90c |
st_event->origin, st_event->id);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (st_event->result == pcmk_ok) {
|
|
rpm-build |
3ee90c |
crm_node_t *peer = crm_find_known_peer_full(0, st_event->target, CRM_GET_PEER_ANY);
|
|
rpm-build |
3ee90c |
const char *uuid = NULL;
|
|
rpm-build |
3ee90c |
gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (peer == NULL) {
|
|
rpm-build |
3ee90c |
return;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
uuid = crm_peer_uuid(peer);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
|
|
rpm-build |
3ee90c |
if(AM_I_DC) {
|
|
rpm-build |
3ee90c |
/* The DC always sends updates */
|
|
rpm-build |
3ee90c |
send_stonith_update(NULL, st_event->target, uuid);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/* @TODO Ideally, at this point, we'd check whether the fenced node
|
|
rpm-build |
3ee90c |
* hosted any guest nodes, and call remote_node_down() for them.
|
|
rpm-build |
3ee90c |
* Unfortunately, the controller doesn't have a simple, reliable way
|
|
rpm-build |
3ee90c |
* to map hosts to guests. It might be possible to track this in the
|
|
rpm-build |
3ee90c |
* peer cache via crm_remote_peer_cache_refresh(). For now, we rely
|
|
rpm-build |
3ee90c |
* on the PE creating fence pseudo-events for the guests.
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (st_event->client_origin
|
|
rpm-build |
3ee90c |
&& safe_str_neq(st_event->client_origin, te_client_id)) {
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/* Abort the current transition graph if it wasn't us
|
|
rpm-build |
3ee90c |
* that invoked stonith to fence someone
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target);
|
|
rpm-build |
3ee90c |
abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/* Assume it was our leader if we don't currently have one */
|
|
rpm-build |
3ee90c |
} else if (((fsa_our_dc == NULL) || safe_str_eq(fsa_our_dc, st_event->target))
|
|
rpm-build |
3ee90c |
&& is_not_set(peer->flags, crm_remote_node)) {
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
crm_notice("Target %s our leader %s (recorded: %s)",
|
|
rpm-build |
3ee90c |
fsa_our_dc ? "was" : "may have been", st_event->target,
|
|
rpm-build |
3ee90c |
fsa_our_dc ? fsa_our_dc : "<unset>");
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/* Given the CIB resyncing that occurs around elections,
|
|
rpm-build |
3ee90c |
* have one node update the CIB now and, if the new DC is different,
|
|
rpm-build |
3ee90c |
* have them do so too after the election
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
if (we_are_executioner) {
|
|
rpm-build |
3ee90c |
send_stonith_update(NULL, st_event->target, uuid);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
add_stonith_cleanup(st_event->target);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/* If the target is a remote node, and we host its connection,
|
|
rpm-build |
3ee90c |
* immediately fail all monitors so it can be recovered quickly.
|
|
rpm-build |
3ee90c |
* The connection won't necessarily drop when a remote node is fenced,
|
|
rpm-build |
3ee90c |
* so the failure might not otherwise be detected until the next poke.
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
if (is_set(peer->flags, crm_remote_node)) {
|
|
rpm-build |
3ee90c |
remote_ra_fail(st_event->target);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
crmd_peer_down(peer, TRUE);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/*!
|
|
rpm-build |
3ee90c |
* \brief Connect to fencer
|
|
rpm-build |
3ee90c |
*
|
|
rpm-build |
3ee90c |
* \param[in] user_data If NULL, retry failures now, otherwise retry in main loop
|
|
rpm-build |
3ee90c |
*
|
|
rpm-build |
3ee90c |
* \return TRUE
|
|
rpm-build |
3ee90c |
* \note If user_data is NULL, this will wait 2s between attempts, for up to
|
|
rpm-build |
3ee90c |
* 30 attempts, meaning the controller could be blocked as long as 58s.
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
static gboolean
|
|
rpm-build |
3ee90c |
te_connect_stonith(gpointer user_data)
|
|
rpm-build |
3ee90c |
{
|
|
rpm-build |
3ee90c |
int rc = pcmk_ok;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (stonith_api == NULL) {
|
|
rpm-build |
3ee90c |
stonith_api = stonith_api_new();
|
|
rpm-build |
3ee90c |
if (stonith_api == NULL) {
|
|
rpm-build |
3ee90c |
crm_err("Could not connect to fencer: API memory allocation failed");
|
|
rpm-build |
3ee90c |
return TRUE;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (stonith_api->state != stonith_disconnected) {
|
|
rpm-build |
3ee90c |
crm_trace("Already connected to fencer, no need to retry");
|
|
rpm-build |
3ee90c |
return TRUE;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (user_data == NULL) {
|
|
rpm-build |
3ee90c |
// Blocking (retry failures now until successful)
|
|
rpm-build |
3ee90c |
rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
|
|
rpm-build |
3ee90c |
if (rc != pcmk_ok) {
|
|
rpm-build |
3ee90c |
crm_err("Could not connect to fencer in 30 attempts: %s "
|
|
rpm-build |
3ee90c |
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
} else {
|
|
rpm-build |
3ee90c |
// Non-blocking (retry failures later in main loop)
|
|
rpm-build |
3ee90c |
rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
|
|
rpm-build |
3ee90c |
if (rc != pcmk_ok) {
|
|
rpm-build |
3ee90c |
if (is_set(fsa_input_register, R_ST_REQUIRED)) {
|
|
rpm-build |
3ee90c |
crm_err("Fencer connection failed (will retry): %s "
|
|
rpm-build |
3ee90c |
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
|
|
rpm-build |
3ee90c |
mainloop_set_trigger(stonith_reconnect);
|
|
rpm-build |
3ee90c |
} else {
|
|
rpm-build |
3ee90c |
crm_info("Fencer connection failed (ignoring because no longer required): %s "
|
|
rpm-build |
3ee90c |
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
return TRUE;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (rc == pcmk_ok) {
|
|
rpm-build |
3ee90c |
stonith_api->cmds->register_notification(stonith_api,
|
|
rpm-build |
3ee90c |
T_STONITH_NOTIFY_DISCONNECT,
|
|
rpm-build |
3ee90c |
tengine_stonith_connection_destroy);
|
|
rpm-build |
3ee90c |
stonith_api->cmds->register_notification(stonith_api,
|
|
rpm-build |
3ee90c |
T_STONITH_NOTIFY_FENCE,
|
|
rpm-build |
3ee90c |
tengine_stonith_notify);
|
|
rpm-build |
3ee90c |
stonith_api->cmds->register_notification(stonith_api,
|
|
rpm-build |
3ee90c |
T_STONITH_NOTIFY_HISTORY_SYNCED,
|
|
rpm-build |
3ee90c |
tengine_stonith_history_synced);
|
|
rpm-build |
3ee90c |
te_trigger_stonith_history_sync(TRUE);
|
|
rpm-build |
3ee90c |
crm_notice("Fencer successfully connected");
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
return TRUE;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/*!
|
|
rpm-build |
3ee90c |
\internal
|
|
rpm-build |
3ee90c |
\brief Schedule fencer connection attempt in main loop
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
void
|
|
rpm-build |
3ee90c |
controld_trigger_fencer_connect()
|
|
rpm-build |
3ee90c |
{
|
|
rpm-build |
3ee90c |
if (stonith_reconnect == NULL) {
|
|
rpm-build |
3ee90c |
stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW,
|
|
rpm-build |
3ee90c |
te_connect_stonith,
|
|
rpm-build |
3ee90c |
GINT_TO_POINTER(TRUE));
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
set_bit(fsa_input_register, R_ST_REQUIRED);
|
|
rpm-build |
3ee90c |
mainloop_set_trigger(stonith_reconnect);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
void
|
|
rpm-build |
3ee90c |
controld_disconnect_fencer(bool destroy)
|
|
rpm-build |
3ee90c |
{
|
|
rpm-build |
3ee90c |
if (stonith_api) {
|
|
rpm-build |
3ee90c |
// Prevent fencer connection from coming up again
|
|
rpm-build |
3ee90c |
clear_bit(fsa_input_register, R_ST_REQUIRED);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (stonith_api->state != stonith_disconnected) {
|
|
rpm-build |
3ee90c |
stonith_api->cmds->disconnect(stonith_api);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT);
|
|
rpm-build |
3ee90c |
stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_FENCE);
|
|
rpm-build |
3ee90c |
stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_HISTORY_SYNCED);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
if (destroy) {
|
|
rpm-build |
3ee90c |
if (stonith_api) {
|
|
rpm-build |
3ee90c |
stonith_api->cmds->free(stonith_api);
|
|
rpm-build |
3ee90c |
stonith_api = NULL;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
if (stonith_reconnect) {
|
|
rpm-build |
3ee90c |
mainloop_destroy_trigger(stonith_reconnect);
|
|
rpm-build |
3ee90c |
stonith_reconnect = NULL;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
if (te_client_id) {
|
|
rpm-build |
3ee90c |
free(te_client_id);
|
|
rpm-build |
3ee90c |
te_client_id = NULL;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
static gboolean
|
|
rpm-build |
3ee90c |
do_stonith_history_sync(gpointer user_data)
|
|
rpm-build |
3ee90c |
{
|
|
rpm-build |
3ee90c |
if (stonith_api && (stonith_api->state != stonith_disconnected)) {
|
|
rpm-build |
3ee90c |
stonith_history_t *history = NULL;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
te_cleanup_stonith_history_sync(stonith_api, FALSE);
|
|
rpm-build |
3ee90c |
stonith_api->cmds->history(stonith_api,
|
|
rpm-build |
3ee90c |
st_opt_sync_call | st_opt_broadcast,
|
|
rpm-build |
3ee90c |
NULL, &history, 5);
|
|
rpm-build |
3ee90c |
stonith_history_free(history);
|
|
rpm-build |
3ee90c |
return TRUE;
|
|
rpm-build |
3ee90c |
} else {
|
|
rpm-build |
3ee90c |
crm_info("Skip triggering stonith history-sync as stonith is disconnected");
|
|
rpm-build |
3ee90c |
return FALSE;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
static void
|
|
rpm-build |
3ee90c |
tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
|
|
rpm-build |
3ee90c |
{
|
|
rpm-build |
3ee90c |
char *uuid = NULL;
|
|
rpm-build |
3ee90c |
int stonith_id = -1;
|
|
rpm-build |
3ee90c |
int transition_id = -1;
|
|
rpm-build |
3ee90c |
crm_action_t *action = NULL;
|
|
rpm-build |
3ee90c |
int call_id = data->call_id;
|
|
rpm-build |
3ee90c |
int rc = data->rc;
|
|
rpm-build |
3ee90c |
char *userdata = data->userdata;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
CRM_CHECK(userdata != NULL, return);
|
|
rpm-build |
3ee90c |
crm_notice("Stonith operation %d/%s: %s (%d)", call_id, (char *)userdata,
|
|
rpm-build |
3ee90c |
pcmk_strerror(rc), rc);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (AM_I_DC == FALSE) {
|
|
rpm-build |
3ee90c |
return;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/* crm_info("call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", */
|
|
rpm-build |
3ee90c |
/* op->call_id, op->optype, op->node_name, op->op_result, */
|
|
rpm-build |
3ee90c |
/* (char *)op->node_list, op->private_data); */
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/* filter out old STONITH actions */
|
|
rpm-build |
3ee90c |
CRM_CHECK(decode_transition_key(userdata, &uuid, &transition_id, &stonith_id, NULL),
|
|
rpm-build |
3ee90c |
goto bail);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (transition_graph->complete || stonith_id < 0 || safe_str_neq(uuid, te_uuid)
|
|
rpm-build |
3ee90c |
|| transition_graph->id != transition_id) {
|
|
rpm-build |
3ee90c |
crm_info("Ignoring STONITH action initiated outside of the current transition");
|
|
rpm-build |
3ee90c |
goto bail;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
action = controld_get_action(stonith_id);
|
|
rpm-build |
3ee90c |
if (action == NULL) {
|
|
rpm-build |
3ee90c |
crm_err("Stonith action not matched");
|
|
rpm-build |
3ee90c |
goto bail;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
stop_te_timer(action->timer);
|
|
rpm-build |
3ee90c |
if (rc == pcmk_ok) {
|
|
rpm-build |
3ee90c |
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
|
|
rpm-build |
3ee90c |
const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
|
|
rpm-build |
3ee90c |
const char *op = crm_meta_value(action->params, "stonith_action");
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
crm_info("Stonith operation %d for %s passed", call_id, target);
|
|
rpm-build |
3ee90c |
if (action->confirmed == FALSE) {
|
|
rpm-build |
3ee90c |
te_action_confirmed(action, NULL);
|
|
rpm-build |
3ee90c |
if (safe_str_eq("on", op)) {
|
|
rpm-build |
3ee90c |
const char *value = NULL;
|
|
rpm-build |
3ee90c |
char *now = crm_ttoa(time(NULL));
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, FALSE);
|
|
rpm-build |
3ee90c |
free(now);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL);
|
|
rpm-build |
3ee90c |
update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, FALSE);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE);
|
|
rpm-build |
3ee90c |
update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, FALSE);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
} else if (action->sent_update == FALSE) {
|
|
rpm-build |
3ee90c |
send_stonith_update(action, target, uuid);
|
|
rpm-build |
3ee90c |
action->sent_update = TRUE;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
st_fail_count_reset(target);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
} else {
|
|
rpm-build |
3ee90c |
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
|
|
rpm-build |
3ee90c |
enum transition_action abort_action = tg_restart;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
action->failed = TRUE;
|
|
rpm-build |
3ee90c |
crm_notice("Stonith operation %d for %s failed (%s): aborting transition.",
|
|
rpm-build |
3ee90c |
call_id, target, pcmk_strerror(rc));
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/* If no fence devices were available, there's no use in immediately
|
|
rpm-build |
3ee90c |
* checking again, so don't start a new transition in that case.
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
if (rc == -ENODEV) {
|
|
rpm-build |
3ee90c |
crm_warn("No devices found in cluster to fence %s, giving up",
|
|
rpm-build |
3ee90c |
target);
|
|
rpm-build |
3ee90c |
abort_action = tg_stop;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/* Increment the fail count now, so abort_for_stonith_failure() can
|
|
rpm-build |
3ee90c |
* check it. Non-DC nodes will increment it in tengine_stonith_notify().
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
st_fail_count_increment(target);
|
|
rpm-build |
3ee90c |
abort_for_stonith_failure(abort_action, target, NULL);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
update_graph(transition_graph, action);
|
|
rpm-build |
3ee90c |
trigger_graph();
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
bail:
|
|
rpm-build |
3ee90c |
free(userdata);
|
|
rpm-build |
3ee90c |
free(uuid);
|
|
rpm-build |
3ee90c |
return;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
gboolean
|
|
rpm-build |
3ee90c |
te_fence_node(crm_graph_t *graph, crm_action_t *action)
|
|
rpm-build |
3ee90c |
{
|
|
rpm-build |
3ee90c |
int rc = 0;
|
|
rpm-build |
3ee90c |
const char *id = NULL;
|
|
rpm-build |
3ee90c |
const char *uuid = NULL;
|
|
rpm-build |
3ee90c |
const char *target = NULL;
|
|
rpm-build |
3ee90c |
const char *type = NULL;
|
|
rpm-build |
3ee90c |
gboolean invalid_action = FALSE;
|
|
rpm-build |
3ee90c |
enum stonith_call_options options = st_opt_none;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
id = ID(action->xml);
|
|
rpm-build |
3ee90c |
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
|
|
rpm-build |
3ee90c |
uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
|
|
rpm-build |
3ee90c |
type = crm_meta_value(action->params, "stonith_action");
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
CRM_CHECK(id != NULL, invalid_action = TRUE);
|
|
rpm-build |
3ee90c |
CRM_CHECK(uuid != NULL, invalid_action = TRUE);
|
|
rpm-build |
3ee90c |
CRM_CHECK(type != NULL, invalid_action = TRUE);
|
|
rpm-build |
3ee90c |
CRM_CHECK(target != NULL, invalid_action = TRUE);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (invalid_action) {
|
|
rpm-build |
3ee90c |
crm_log_xml_warn(action->xml, "BadAction");
|
|
rpm-build |
3ee90c |
return FALSE;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
crm_notice("Requesting fencing (%s) of node %s "
|
|
rpm-build |
3ee90c |
CRM_XS " action=%s timeout=%u",
|
|
rpm-build |
3ee90c |
type, target, id, transition_graph->stonith_timeout);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/* Passing NULL means block until we can connect... */
|
|
rpm-build |
3ee90c |
te_connect_stonith(NULL);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (crmd_join_phase_count(crm_join_confirmed) == 1) {
|
|
rpm-build |
3ee90c |
options |= st_opt_allow_suicide;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
rc = stonith_api->cmds->fence(stonith_api, options, target, type,
|
|
rpm-build |
3ee90c |
(int) (transition_graph->stonith_timeout / 1000),
|
|
rpm-build |
3ee90c |
0);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
stonith_api->cmds->register_callback(stonith_api, rc,
|
|
rpm-build |
3ee90c |
(int) (transition_graph->stonith_timeout / 1000),
|
|
rpm-build |
3ee90c |
st_opt_timeout_updates,
|
|
rpm-build |
3ee90c |
generate_transition_key(transition_graph->id, action->id,
|
|
rpm-build |
3ee90c |
0, te_uuid),
|
|
rpm-build |
3ee90c |
"tengine_stonith_callback", tengine_stonith_callback);
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
return TRUE;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/* end stonith API client functions */
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/*
|
|
rpm-build |
3ee90c |
* stonith history synchronization
|
|
rpm-build |
3ee90c |
*
|
|
rpm-build |
3ee90c |
* Each node's fencer keeps track of a cluster-wide fencing history. When a node
|
|
rpm-build |
3ee90c |
* joins or leaves, we need to synchronize the history across all nodes.
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
static crm_trigger_t *stonith_history_sync_trigger = NULL;
|
|
rpm-build |
3ee90c |
static mainloop_timer_t *stonith_history_sync_timer_short = NULL;
|
|
rpm-build |
3ee90c |
static mainloop_timer_t *stonith_history_sync_timer_long = NULL;
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
void
|
|
rpm-build |
3ee90c |
te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers)
|
|
rpm-build |
3ee90c |
{
|
|
rpm-build |
3ee90c |
if (free_timers) {
|
|
rpm-build |
3ee90c |
mainloop_timer_del(stonith_history_sync_timer_short);
|
|
rpm-build |
3ee90c |
stonith_history_sync_timer_short = NULL;
|
|
rpm-build |
3ee90c |
mainloop_timer_del(stonith_history_sync_timer_long);
|
|
rpm-build |
3ee90c |
stonith_history_sync_timer_long = NULL;
|
|
rpm-build |
3ee90c |
} else {
|
|
rpm-build |
3ee90c |
mainloop_timer_stop(stonith_history_sync_timer_short);
|
|
rpm-build |
3ee90c |
mainloop_timer_stop(stonith_history_sync_timer_long);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (st) {
|
|
rpm-build |
3ee90c |
st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY_SYNCED);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
static void
|
|
rpm-build |
3ee90c |
tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event)
|
|
rpm-build |
3ee90c |
{
|
|
rpm-build |
3ee90c |
te_cleanup_stonith_history_sync(st, FALSE);
|
|
rpm-build |
3ee90c |
crm_debug("Fence-history synced - cancel all timers");
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
static gboolean
|
|
rpm-build |
3ee90c |
stonith_history_sync_set_trigger(gpointer user_data)
|
|
rpm-build |
3ee90c |
{
|
|
rpm-build |
3ee90c |
mainloop_set_trigger(stonith_history_sync_trigger);
|
|
rpm-build |
3ee90c |
return FALSE;
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
void
|
|
rpm-build |
3ee90c |
te_trigger_stonith_history_sync(bool long_timeout)
|
|
rpm-build |
3ee90c |
{
|
|
rpm-build |
3ee90c |
/* trigger a sync in 5s to give more nodes the
|
|
rpm-build |
3ee90c |
* chance to show up so that we don't create
|
|
rpm-build |
3ee90c |
* unnecessary stonith-history-sync traffic
|
|
rpm-build |
3ee90c |
*
|
|
rpm-build |
3ee90c |
* the long timeout of 30s is there as a fallback
|
|
rpm-build |
3ee90c |
* so that after a successful connection to fenced
|
|
rpm-build |
3ee90c |
* we will wait for 30s for the DC to trigger a
|
|
rpm-build |
3ee90c |
* history-sync
|
|
rpm-build |
3ee90c |
* if this doesn't happen we trigger a sync locally
|
|
rpm-build |
3ee90c |
* (e.g. fenced segfaults and is restarted by pacemakerd)
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/* as we are finally checking the stonith-connection
|
|
rpm-build |
3ee90c |
* in do_stonith_history_sync we should be fine
|
|
rpm-build |
3ee90c |
* leaving stonith_history_sync_time & stonith_history_sync_trigger
|
|
rpm-build |
3ee90c |
* around
|
|
rpm-build |
3ee90c |
*/
|
|
rpm-build |
3ee90c |
if (stonith_history_sync_trigger == NULL) {
|
|
rpm-build |
3ee90c |
stonith_history_sync_trigger =
|
|
rpm-build |
3ee90c |
mainloop_add_trigger(G_PRIORITY_LOW,
|
|
rpm-build |
3ee90c |
do_stonith_history_sync, NULL);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
if (long_timeout) {
|
|
rpm-build |
3ee90c |
if(stonith_history_sync_timer_long == NULL) {
|
|
rpm-build |
3ee90c |
stonith_history_sync_timer_long =
|
|
rpm-build |
3ee90c |
mainloop_timer_add("history_sync_long", 30000,
|
|
rpm-build |
3ee90c |
FALSE, stonith_history_sync_set_trigger,
|
|
rpm-build |
3ee90c |
NULL);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
crm_info("Fence history will be synchronized cluster-wide within 30 seconds");
|
|
rpm-build |
3ee90c |
mainloop_timer_start(stonith_history_sync_timer_long);
|
|
rpm-build |
3ee90c |
} else {
|
|
rpm-build |
3ee90c |
if(stonith_history_sync_timer_short == NULL) {
|
|
rpm-build |
3ee90c |
stonith_history_sync_timer_short =
|
|
rpm-build |
3ee90c |
mainloop_timer_add("history_sync_short", 5000,
|
|
rpm-build |
3ee90c |
FALSE, stonith_history_sync_set_trigger,
|
|
rpm-build |
3ee90c |
NULL);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
|
|
rpm-build |
3ee90c |
mainloop_timer_start(stonith_history_sync_timer_short);
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
}
|
|
rpm-build |
3ee90c |
|
|
rpm-build |
3ee90c |
/* end stonith history synchronization functions */
|