From 409f23b0b4338996911235fe7a771d1a2cbd02da Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Jun 10 2020 17:56:11 +0000 Subject: Refactor: controller: improve efficiency when deleting node state Rename erase_status_xpath() to controld_delete_node_state() to follow current naming practice. Instead of passing it a node_state subsection name, pass a new enum value indicating what to erase (resource history, transient node attributes, or both). This allows us to improve the log messages further, as well as improving efficiency when both need to be cleared. --- diff --git a/daemons/controld/controld_based.c b/daemons/controld/controld_based.c index 1db5650..008a02d 100644 --- a/daemons/controld/controld_based.c +++ b/daemons/controld/controld_based.c @@ -170,39 +170,76 @@ controld_action_is_recordable(const char *action) } static void -erase_xpath_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, - void *user_data) +cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, + void *user_data) { - char *xpath = user_data; + char *desc = user_data; if (rc == 0) { - crm_debug("Deletion of '%s' from CIB (via CIB call %d) succeeded", - xpath, call_id); + crm_debug("Deletion of %s (via CIB call %d) succeeded", desc, call_id); } else { - crm_warn("Deletion of '%s' from CIB (via CIB call %d) failed: %s " - CRM_XS " rc=%d", xpath, call_id, pcmk_strerror(rc), rc); + crm_warn("Deletion of %s (via CIB call %d) failed: %s " CRM_XS " rc=%d", + desc, call_id, pcmk_strerror(rc), rc); } } -#define XPATH_STATUS_TAG "//node_state[@uname='%s']/%s" +// Searches for various portions of node_state to delete +// Match a particular node's node_state (takes node name 1x) +#define XPATH_NODE_STATE "//" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" + +// Node's lrm section (name 1x) +#define XPATH_NODE_LRM XPATH_NODE_STATE "/" XML_CIB_TAG_LRM + +// Node's transient_attributes section (name 1x) +#define XPATH_NODE_ATTRS XPATH_NODE_STATE "/" XML_TAG_TRANSIENT_NODEATTRS + +// Everything under node_state (name 1x) +#define XPATH_NODE_ALL XPATH_NODE_STATE "/*" + +/*! + * \internal + * \brief Delete subsection of a node's CIB node_state + * + * \param[in] uname Desired node + * \param[in] section Subsection of node_state to delete + * \param[in] options CIB call options to use + */ void -erase_status_tag(const char *uname, const char *tag, int options) +controld_delete_node_state(const char *uname, enum controld_section_e section, + int options) { + char *xpath = NULL; + char *desc = NULL; + CRM_CHECK(uname != NULL, return); + switch (section) { + case controld_section_lrm: + xpath = crm_strdup_printf(XPATH_NODE_LRM, uname); + desc = crm_strdup_printf("resource history for node %s", uname); + break; + case controld_section_attrs: + xpath = crm_strdup_printf(XPATH_NODE_ATTRS, uname); + desc = crm_strdup_printf("transient attributes for node %s", uname); + break; + case controld_section_all: + xpath = crm_strdup_printf(XPATH_NODE_ALL, uname); + desc = crm_strdup_printf("all state for node %s", uname); + break; + } if (fsa_cib_conn == NULL) { - crm_warn("Unable to delete CIB '%s' section for node %s: " - "no CIB connection", tag, uname); + crm_warn("Unable to delete %s: no CIB connection", desc); + free(desc); } else { int call_id; - char *xpath = crm_strdup_printf(XPATH_STATUS_TAG, uname, tag); options |= cib_quorum_override|cib_xpath; call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, xpath, NULL, options); - crm_info("Deleting CIB '%s' section for node %s (via CIB call %d) " - CRM_XS " xpath=%s", tag, uname, call_id, xpath); - fsa_register_cib_callback(call_id, FALSE, xpath, erase_xpath_callback); - // CIB library handles freeing xpath + crm_info("Deleting %s (via CIB call %d) " CRM_XS " xpath=%s", + desc, call_id, xpath); + fsa_register_cib_callback(call_id, FALSE, desc, cib_delete_callback); + // CIB library handles freeing desc } + free(xpath); } diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c index 5cbd392..f7e3db2 100644 --- a/daemons/controld/controld_callbacks.c +++ b/daemons/controld/controld_callbacks.c @@ -200,14 +200,18 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d * transient attributes intact until it rejoins. */ if (compare_version(fsa_our_dc_version, "3.0.9") > 0) { - erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); + controld_delete_node_state(node->uname, + controld_section_attrs, + cib_scope_local); } } else if(AM_I_DC) { if (appeared) { te_trigger_stonith_history_sync(FALSE); } else { - erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); + controld_delete_node_state(node->uname, + controld_section_attrs, + cib_scope_local); } } break; diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c index 293c9e7..7e970c8 100644 --- a/daemons/controld/controld_execd.c +++ b/daemons/controld/controld_execd.c @@ -1413,7 +1413,8 @@ force_reprobe(lrm_state_t *lrm_state, const char *from_sys, } /* Now delete the copy in the CIB */ - erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local); + controld_delete_node_state(lrm_state->node_name, controld_section_lrm, + cib_scope_local); /* Finally, _delete_ the value in pacemaker-attrd -- setting it to FALSE * would result in the scheduler sending us back here again diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c index 0933487..c297985 100644 --- a/daemons/controld/controld_fencing.c +++ b/daemons/controld/controld_fencing.c @@ -229,9 +229,8 @@ send_stonith_update(crm_action_t *action, const char *target, const char *uuid) /* Make sure it sticks */ /* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local); */ - erase_status_tag(peer->uname, XML_CIB_TAG_LRM, cib_scope_local); - erase_status_tag(peer->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); - + controld_delete_node_state(peer->uname, controld_section_all, + cib_scope_local); free_xml(node_state); return; } diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c index ea3dd7d..f41c07c 100644 --- a/daemons/controld/controld_join_dc.c +++ b/daemons/controld/controld_join_dc.c @@ -587,7 +587,8 @@ do_dc_join_ack(long long action, /* Update CIB with node's current executor state. A new transition will be * triggered later, when the CIB notifies us of the change. */ - erase_status_tag(join_from, XML_CIB_TAG_LRM, cib_scope_local); + controld_delete_node_state(join_from, controld_section_lrm, + cib_scope_local); if (safe_str_eq(join_from, fsa_our_uname)) { xmlNode *now_dc_lrmd_state = do_lrm_query(TRUE, fsa_our_uname); diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c index 4fbae45..2d3dfa7 100644 --- a/daemons/controld/controld_remote_ra.c +++ b/daemons/controld/controld_remote_ra.c @@ -181,13 +181,13 @@ remote_node_up(const char *node_name) CRM_CHECK(node_name != NULL, return); crm_info("Announcing pacemaker_remote node %s", node_name); - /* Clear node's operation history. The node's transient attributes should - * and normally will be cleared when the node leaves, but since remote node - * state has a number of corner cases, clear them here as well, to be sure. + /* Clear node's entire state (resource history and transient attributes). + * The transient attributes should and normally will be cleared when the + * node leaves, but since remote node state has a number of corner cases, + * clear them here as well, to be sure. */ call_opt = crmd_cib_smart_opt(); - erase_status_tag(node_name, XML_CIB_TAG_LRM, call_opt); - erase_status_tag(node_name, XML_TAG_TRANSIENT_NODEATTRS, call_opt); + controld_delete_node_state(node_name, controld_section_all, call_opt); /* Clear node's probed attribute */ update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE); @@ -252,15 +252,15 @@ remote_node_down(const char *node_name, const enum down_opts opts) /* Purge node from attrd's memory */ update_attrd_remote_node_removed(node_name, NULL); - /* Purge node's transient attributes */ - erase_status_tag(node_name, XML_TAG_TRANSIENT_NODEATTRS, call_opt); - - /* Normally, the LRM operation history should be kept until the node comes - * back up. However, after a successful fence, we want to clear it, so we - * don't think resources are still running on the node. + /* Normally, only node attributes should be erased, and the resource history + * should be kept until the node comes back up. However, after a successful + * fence, we want to clear the history as well, so we don't think resources + * are still running on the node. */ if (opts == DOWN_ERASE_LRM) { - erase_status_tag(node_name, XML_CIB_TAG_LRM, call_opt); + controld_delete_node_state(node_name, controld_section_all, call_opt); + } else { + controld_delete_node_state(node_name, controld_section_attrs, call_opt); } /* Ensure node is in the remote peer cache with lost state */ diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h index cf04f13..f902361 100644 --- a/daemons/controld/controld_utils.h +++ b/daemons/controld/controld_utils.h @@ -70,7 +70,6 @@ xmlNode *create_node_state_update(crm_node_t *node, int flags, xmlNode *parent, const char *source); void populate_cib_nodes(enum node_update_flags flags, const char *source); void crm_update_quorum(gboolean quorum, gboolean force_update); -void erase_status_tag(const char *uname, const char *tag, int options); void controld_close_attrd_ipc(void); void update_attrd(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node); void update_attrd_remote_node_removed(const char *host, const char *user_name); @@ -87,6 +86,16 @@ unsigned int cib_op_timeout(void); bool feature_set_compatible(const char *dc_version, const char *join_version); bool controld_action_is_recordable(const char *action); +// Subsections of node_state +enum controld_section_e { + controld_section_lrm, + controld_section_attrs, + controld_section_all, +}; + +void controld_delete_node_state(const char *uname, + enum controld_section_e section, int options); + const char *get_node_id(xmlNode *lrm_rsc_op); /* Convenience macro for registering a CIB callback