diff --git a/tools/crm_node.c b/tools/crm_node.c index db31f20..1773a36 100644 --- a/tools/crm_node.c +++ b/tools/crm_node.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #define SUMMARY "crm_node - Tool for displaying low-level node information" @@ -39,7 +40,6 @@ gboolean command_cb(const gchar *option_name, const gchar *optarg, gpointer data gboolean name_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean remove_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); -static char *pid_s = NULL; static GMainLoop *mainloop = NULL; static crm_exit_t exit_code = CRM_EX_OK; @@ -140,11 +140,6 @@ remove_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError * static void crm_node_exit(crm_exit_t value) { - if (pid_s) { - free(pid_s); - pid_s = NULL; - } - exit_code = value; if (mainloop && g_main_loop_is_running(mainloop)) { @@ -155,173 +150,123 @@ crm_node_exit(crm_exit_t value) } static void -exit_disconnect(gpointer user_data) -{ - fprintf(stderr, "error: Lost connection to cluster\n"); - crm_node_exit(CRM_EX_DISCONNECT); -} - -typedef int (*ipc_dispatch_fn) (const char *buffer, ssize_t length, - gpointer userdata); - -static crm_ipc_t * -new_mainloop_for_ipc(const char *system, ipc_dispatch_fn dispatch) +controller_event_cb(pcmk_ipc_api_t *controld_api, + enum pcmk_ipc_event event_type, crm_exit_t status, + void *event_data, void *user_data) { - mainloop_io_t *source = NULL; - crm_ipc_t *ipc = NULL; - - struct ipc_client_callbacks ipc_callbacks = { - .dispatch = dispatch, - .destroy = exit_disconnect - }; + pcmk_controld_api_reply_t *reply = event_data; - mainloop = g_main_loop_new(NULL, FALSE); - source = mainloop_add_ipc_client(system, G_PRIORITY_DEFAULT, 0, - NULL, &ipc_callbacks); - ipc = mainloop_get_ipc_client(source); - if (ipc == NULL) { - fprintf(stderr, - "error: Could not connect to cluster (is it running?)\n"); - crm_node_exit(CRM_EX_DISCONNECT); - } - return ipc; -} - -static int -send_controller_hello(crm_ipc_t *controller) -{ - xmlNode *hello = NULL; - int rc; - - pid_s = pcmk__getpid_s(); - hello = create_hello_message(pid_s, crm_system_name, "1", "0"); - rc = crm_ipc_send(controller, hello, 0, 0, NULL); - free_xml(hello); - return (rc < 0)? rc : 0; -} - -static int -send_node_info_request(crm_ipc_t *controller, uint32_t nodeid) -{ - xmlNode *ping = NULL; - int rc; - - ping = create_request(CRM_OP_NODE_INFO, NULL, NULL, CRM_SYSTEM_CRMD, - crm_system_name, pid_s); - if (nodeid > 0) { - crm_xml_add_int(ping, XML_ATTR_ID, nodeid); - } - rc = crm_ipc_send(controller, ping, 0, 0, NULL); - free_xml(ping); - return (rc < 0)? rc : 0; -} + switch (event_type) { + case pcmk_ipc_event_disconnect: + if (exit_code == CRM_EX_DISCONNECT) { // Unexpected + fprintf(stderr, "error: Lost connection to controller\n"); + } + goto done; + break; -static int -dispatch_controller(const char *buffer, ssize_t length, gpointer userdata) -{ - xmlNode *message = string2xml(buffer); - xmlNode *data = NULL; - const char *value = NULL; + case pcmk_ipc_event_reply: + break; - if (message == NULL) { - fprintf(stderr, "error: Could not understand reply from controller\n"); - crm_node_exit(CRM_EX_PROTOCOL); - return 0; + default: + return; } - crm_log_xml_trace(message, "controller reply"); - - exit_code = CRM_EX_PROTOCOL; - // Validate reply - value = crm_element_value(message, F_CRM_MSG_TYPE); - if (safe_str_neq(value, XML_ATTR_RESPONSE)) { - fprintf(stderr, "error: Message from controller was not a reply\n"); + if (status != CRM_EX_OK) { + fprintf(stderr, "error: Bad reply from controller: %s\n", + crm_exit_str(status)); goto done; } - value = crm_element_value(message, XML_ATTR_REFERENCE); - if (value == NULL) { - fprintf(stderr, "error: Controller reply did not specify original message\n"); - goto done; - } - data = get_message_xml(message, F_CRM_DATA); - if (data == NULL) { - fprintf(stderr, "error: Controller reply did not contain any data\n"); + if (reply->reply_type != pcmk_controld_reply_info) { + fprintf(stderr, "error: Unknown reply type %d from controller\n", + reply->reply_type); goto done; } + // Parse desired info from reply and display to user switch (options.command) { case 'i': - value = crm_element_value(data, XML_ATTR_ID); - if (value == NULL) { - fprintf(stderr, "error: Controller reply did not contain node ID\n"); - } else { - printf("%s\n", value); - exit_code = CRM_EX_OK; + if (reply->data.node_info.id == 0) { + fprintf(stderr, + "error: Controller reply did not contain node ID\n"); + exit_code = CRM_EX_PROTOCOL; + goto done; } + printf("%d\n", reply->data.node_info.id); break; case 'n': case 'N': - value = crm_element_value(data, XML_ATTR_UNAME); - if (value == NULL) { + if (reply->data.node_info.uname == NULL) { fprintf(stderr, "Node is not known to cluster\n"); exit_code = CRM_EX_NOHOST; - } else { - printf("%s\n", value); - exit_code = CRM_EX_OK; + goto done; } + printf("%s\n", reply->data.node_info.uname); break; case 'q': - value = crm_element_value(data, XML_ATTR_HAVE_QUORUM); - if (value == NULL) { - fprintf(stderr, "error: Controller reply did not contain quorum status\n"); - } else { - bool quorum = crm_is_true(value); - - printf("%d\n", quorum); - exit_code = quorum? CRM_EX_OK : CRM_EX_QUORUM; + printf("%d\n", reply->data.node_info.have_quorum); + if (!(reply->data.node_info.have_quorum)) { + exit_code = CRM_EX_QUORUM; + goto done; } break; default: fprintf(stderr, "internal error: Controller reply not expected\n"); exit_code = CRM_EX_SOFTWARE; - break; + goto done; } + // Success + exit_code = CRM_EX_OK; done: - free_xml(message); - crm_node_exit(exit_code); - return 0; + pcmk_disconnect_ipc(controld_api); + pcmk_quit_main_loop(mainloop, 10); } static void run_controller_mainloop(uint32_t nodeid) { - crm_ipc_t *controller = NULL; + pcmk_ipc_api_t *controld_api = NULL; int rc; - controller = new_mainloop_for_ipc(CRM_SYSTEM_CRMD, dispatch_controller); + // Set disconnect exit code to handle unexpected disconnects + exit_code = CRM_EX_DISCONNECT; + + // Create controller IPC object + rc = pcmk_new_ipc_api(&controld_api, pcmk_ipc_controld); + if (rc != pcmk_rc_ok) { + fprintf(stderr, "error: Could not connect to controller: %s\n", + pcmk_rc_str(rc)); + return; + } + pcmk_register_ipc_callback(controld_api, controller_event_cb, NULL); - rc = send_controller_hello(controller); - if (rc < 0) { - fprintf(stderr, "error: Could not register with controller: %s\n", - pcmk_strerror(rc)); - crm_node_exit(crm_errno2exit(rc)); + // Connect to controller + rc = pcmk_connect_ipc(controld_api, pcmk_ipc_dispatch_main); + if (rc != pcmk_rc_ok) { + fprintf(stderr, "error: Could not connect to controller: %s\n", + pcmk_rc_str(rc)); + exit_code = pcmk_rc2exitc(rc); + return; } - rc = send_node_info_request(controller, nodeid); - if (rc < 0) { + rc = pcmk_controld_api_node_info(controld_api, nodeid); + if (rc != pcmk_rc_ok) { fprintf(stderr, "error: Could not ping controller: %s\n", - pcmk_strerror(rc)); - crm_node_exit(crm_errno2exit(rc)); + pcmk_rc_str(rc)); + pcmk_disconnect_ipc(controld_api); + exit_code = pcmk_rc2exitc(rc); + return; } - // Run main loop to get controller reply via dispatch_controller() + // Run main loop to get controller reply via controller_event_cb() + mainloop = g_main_loop_new(NULL, FALSE); g_main_loop_run(mainloop); g_main_loop_unref(mainloop); mainloop = NULL; + pcmk_free_ipc_api(controld_api); } static void @@ -385,32 +330,56 @@ cib_remove_node(long id, const char *name) } static int +controller_remove_node(const char *node_name, long nodeid) +{ + pcmk_ipc_api_t *controld_api = NULL; + int rc; + + // Create controller IPC object + rc = pcmk_new_ipc_api(&controld_api, pcmk_ipc_controld); + if (rc != pcmk_rc_ok) { + fprintf(stderr, "error: Could not connect to controller: %s\n", + pcmk_rc_str(rc)); + return ENOTCONN; + } + + // Connect to controller (without main loop) + rc = pcmk_connect_ipc(controld_api, pcmk_ipc_dispatch_sync); + if (rc != pcmk_rc_ok) { + fprintf(stderr, "error: Could not connect to controller: %s\n", + pcmk_rc_str(rc)); + pcmk_free_ipc_api(controld_api); + return rc; + } + + rc = pcmk_ipc_purge_node(controld_api, node_name, nodeid); + if (rc != pcmk_rc_ok) { + fprintf(stderr, + "error: Could not clear node from controller's cache: %s\n", + pcmk_rc_str(rc)); + } + + pcmk_free_ipc_api(controld_api); + return pcmk_rc_ok; +} + +static int tools_remove_node_cache(const char *node_name, long nodeid, const char *target) { int rc = -1; - crm_ipc_t *conn = crm_ipc_new(target, 0); + crm_ipc_t *conn = NULL; xmlNode *cmd = NULL; + conn = crm_ipc_new(target, 0); if (!conn) { return -ENOTCONN; } - if (!crm_ipc_connect(conn)) { crm_perror(LOG_ERR, "Connection to %s failed", target); crm_ipc_destroy(conn); return -ENOTCONN; } - if(safe_str_eq(target, CRM_SYSTEM_CRMD)) { - // The controller requires a hello message before sending a request - rc = send_controller_hello(conn); - if (rc < 0) { - fprintf(stderr, "error: Could not register with controller: %s\n", - pcmk_strerror(rc)); - return rc; - } - } - crm_trace("Removing %s[%ld] from the %s membership cache", node_name, nodeid, target); @@ -427,9 +396,9 @@ tools_remove_node_cache(const char *node_name, long nodeid, const char *target) crm_xml_add_int(cmd, PCMK__XA_ATTR_NODE_ID, (int) nodeid); } - } else { - cmd = create_request(CRM_OP_RM_NODE_CACHE, - NULL, NULL, target, crm_system_name, pid_s); + } else { // Fencer or pacemakerd + cmd = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, target, + crm_system_name, NULL); if (nodeid > 0) { crm_xml_set_id(cmd, "%ld", nodeid); } @@ -441,6 +410,7 @@ tools_remove_node_cache(const char *node_name, long nodeid, const char *target) target, node_name, nodeid, rc); if (rc > 0) { + // @TODO Should this be done just once after all the rest? rc = cib_remove_node(nodeid, node_name); } @@ -455,12 +425,12 @@ tools_remove_node_cache(const char *node_name, long nodeid, const char *target) static void remove_node(const char *target_uname) { + int rc; int d = 0; long nodeid = 0; const char *node_name = NULL; char *endptr = NULL; const char *daemons[] = { - CRM_SYSTEM_CRMD, "stonith-ng", T_ATTRD, CRM_SYSTEM_MCP, @@ -476,6 +446,12 @@ remove_node(const char *target_uname) node_name = target_uname; } + rc = controller_remove_node(node_name, nodeid); + if (rc != pcmk_rc_ok) { + exit_code = pcmk_rc2exitc(rc); + return; + } + for (d = 0; d < DIMOF(daemons); d++) { if (tools_remove_node_cache(node_name, nodeid, daemons[d])) { crm_err("Failed to connect to %s to remove node '%s'", @@ -545,12 +521,34 @@ node_mcp_dispatch(const char *buffer, ssize_t length, gpointer userdata) } static void +lost_pacemakerd(gpointer user_data) +{ + fprintf(stderr, "error: Lost connection to cluster\n"); + exit_code = CRM_EX_DISCONNECT; + g_main_loop_quit(mainloop); +} + +static void run_pacemakerd_mainloop(void) { crm_ipc_t *ipc = NULL; xmlNode *poke = NULL; + mainloop_io_t *source = NULL; - ipc = new_mainloop_for_ipc(CRM_SYSTEM_MCP, node_mcp_dispatch); + struct ipc_client_callbacks ipc_callbacks = { + .dispatch = node_mcp_dispatch, + .destroy = lost_pacemakerd + }; + + source = mainloop_add_ipc_client(CRM_SYSTEM_MCP, G_PRIORITY_DEFAULT, 0, + NULL, &ipc_callbacks); + ipc = mainloop_get_ipc_client(source); + if (ipc == NULL) { + fprintf(stderr, + "error: Could not connect to cluster (is it running?)\n"); + exit_code = CRM_EX_DISCONNECT; + return; + } // Sending anything will get us a list of nodes poke = create_xml_node(NULL, "poke"); @@ -558,6 +556,7 @@ run_pacemakerd_mainloop(void) free_xml(poke); // Handle reply via node_mcp_dispatch() + mainloop = g_main_loop_new(NULL, FALSE); g_main_loop_run(mainloop); g_main_loop_unref(mainloop); mainloop = NULL;