/*
Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
General Public License, version 3 or any later version (LGPLv3 or
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
#include <glusterfs/common-utils.h>
#include "cli1-xdr.h"
#include "xdr-generic.h"
#include <glusterfs/glusterfs.h>
#include "glusterd.h"
#include "glusterd-op-sm.h"
#include "glusterd-geo-rep.h"
#include "glusterd-store.h"
#include "glusterd-utils.h"
#include "glusterd-svc-mgmt.h"
#include "glusterd-svc-helper.h"
#include "glusterd-nfs-svc.h"
#include "glusterd-volgen.h"
#include "glusterd-messages.h"
#include "glusterd-server-quorum.h"
#include "glusterd-mgmt.h"
#include <glusterfs/run.h>
#include <glusterfs/syscall.h>
#include <signal.h>
int
glusterd_mgmt_v3_initiate_replace_brick_cmd_phases(rpcsvc_request_t *req,
glusterd_op_t op,
dict_t *dict);
int
__glusterd_handle_replace_brick(rpcsvc_request_t *req)
{
int32_t ret = -1;
gf_cli_req cli_req = {{
0,
}};
dict_t *dict = NULL;
char *src_brick = NULL;
char *dst_brick = NULL;
char *cli_op = NULL;
glusterd_op_t op = -1;
char *volname = NULL;
char msg[256] = {
0,
};
xlator_t *this = NULL;
glusterd_conf_t *conf = NULL;
GF_ASSERT(req);
this = THIS;
GF_ASSERT(this);
conf = this->private;
ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
if (ret < 0) {
// failed to decode msg;
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
"Failed to decode "
"request received from cli");
req->rpc_err = GARBAGE_ARGS;
goto out;
}
gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_REPLACE_BRK_REQ_RCVD,
"Received replace brick req");
if (cli_req.dict.dict_len) {
/* Unserialize the dictionary */
dict = dict_new();
ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
&dict);
if (ret < 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
"failed to "
"unserialize req-buffer to dictionary");
snprintf(msg, sizeof(msg),
"Unable to decode the "
"command");
goto out;
}
}
ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
if (ret) {
snprintf(msg, sizeof(msg), "Could not get volume name");
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", msg);
goto out;
}
ret = dict_get_strn(dict, "operation", SLEN("operation"), &cli_op);
if (ret) {
gf_msg_debug(this->name, 0, "dict_get on operation failed");
snprintf(msg, sizeof(msg), "Could not get operation");
goto out;
}
op = gd_cli_to_gd_op(cli_op);
if (conf->op_version < GD_OP_VERSION_3_9_0 &&
strcmp(cli_op, "GF_REPLACE_OP_COMMIT_FORCE")) {
snprintf(msg, sizeof(msg),
"Cannot execute command. The "
"cluster is operating at version %d. reset-brick "
"command %s is unavailable in this version.",
conf->op_version, gd_rb_op_to_str(cli_op));
ret = -1;
goto out;
}
ret = dict_get_strn(dict, "src-brick", SLEN("src-brick"), &src_brick);
if (ret) {
snprintf(msg, sizeof(msg), "Failed to get src brick");
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", msg);
goto out;
}
gf_msg_debug(this->name, 0, "src brick=%s", src_brick);
if (!strcmp(cli_op, "GF_RESET_OP_COMMIT") ||
!strcmp(cli_op, "GF_RESET_OP_COMMIT_FORCE") ||
!strcmp(cli_op, "GF_REPLACE_OP_COMMIT_FORCE")) {
ret = dict_get_strn(dict, "dst-brick", SLEN("dst-brick"), &dst_brick);
if (ret) {
snprintf(msg, sizeof(msg),
"Failed to get"
"dest brick");
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s",
msg);
goto out;
}
gf_msg_debug(this->name, 0, "dst brick=%s", dst_brick);
}
gf_msg(this->name, GF_LOG_INFO, 0,
(op == GD_OP_REPLACE_BRICK)
? GD_MSG_REPLACE_BRK_COMMIT_FORCE_REQ_RCVD
: GD_MSG_RESET_BRICK_COMMIT_FORCE_REQ_RCVD,
"Received %s request.", gd_rb_op_to_str(cli_op));
ret = glusterd_mgmt_v3_initiate_replace_brick_cmd_phases(req, op, dict);
out:
if (ret) {
glusterd_op_send_cli_response(op, ret, 0, req, dict, msg);
}
ret = 0;
free(cli_req.dict.dict_val); // malloced by xdr
return ret;
}
int
glusterd_handle_reset_brick(rpcsvc_request_t *req)
{
return glusterd_big_locked_handler(req, __glusterd_handle_replace_brick);
}
int
glusterd_handle_replace_brick(rpcsvc_request_t *req)
{
return glusterd_big_locked_handler(req, __glusterd_handle_replace_brick);
}
int
glusterd_op_stage_replace_brick(dict_t *dict, char **op_errstr,
dict_t *rsp_dict)
{
int ret = 0;
char *src_brick = NULL;
char *dst_brick = NULL;
char *volname = NULL;
char *op = NULL;
glusterd_op_t gd_op = -1;
glusterd_volinfo_t *volinfo = NULL;
glusterd_brickinfo_t *src_brickinfo = NULL;
char *host = NULL;
char msg[2048] = {0};
glusterd_peerinfo_t *peerinfo = NULL;
glusterd_brickinfo_t *dst_brickinfo = NULL;
glusterd_conf_t *priv = NULL;
char pidfile[PATH_MAX] = {0};
xlator_t *this = NULL;
gf_boolean_t is_force = _gf_false;
char *dup_dstbrick = NULL;
this = THIS;
GF_ASSERT(this);
priv = this->private;
GF_ASSERT(priv);
ret = glusterd_brick_op_prerequisites(dict, &op, &gd_op, &volname, &volinfo,
&src_brick, &src_brickinfo, pidfile,
op_errstr, rsp_dict);
if (ret)
goto out;
if (volinfo->type == GF_CLUSTER_TYPE_NONE) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_NOT_PERMITTED,
"replace-brick is not permitted on distribute only "
"volumes");
gf_asprintf(op_errstr,
"replace-brick is not permitted on "
"distribute only volumes. Please use add-brick "
"and remove-brick operations instead.");
ret = -1;
goto out;
}
ret = glusterd_validate_quorum(this, gd_op, dict, op_errstr);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SERVER_QUORUM_NOT_MET,
"Server quorum not met. Rejecting operation.");
goto out;
}
if (strcmp(op, "GF_REPLACE_OP_COMMIT_FORCE")) {
ret = -1;
goto out;
} else {
is_force = _gf_true;
}
ret = glusterd_get_dst_brick_info(&dst_brick, volname, op_errstr,
&dst_brickinfo, &host, dict,
&dup_dstbrick);
if (ret)
goto out;
ret = glusterd_new_brick_validate(dst_brick, dst_brickinfo, msg,
sizeof(msg), op);
/* fail if brick being replaced with itself */
if (ret) {
*op_errstr = gf_strdup(msg);
ret = -1;
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_VALIDATE_FAIL, "%s",
*op_errstr);
goto out;
}
volinfo->rep_brick.src_brick = src_brickinfo;
volinfo->rep_brick.dst_brick = dst_brickinfo;
if (glusterd_rb_check_bricks(volinfo, src_brickinfo, dst_brickinfo)) {
ret = -1;
*op_errstr = gf_strdup(
"Incorrect source or "
"destination brick");
if (*op_errstr)
gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_BRICK_NOT_FOUND,
"%s", *op_errstr);
goto out;
}
if (gf_is_local_addr(host)) {
ret = glusterd_validate_and_create_brickpath(
dst_brickinfo, volinfo->volume_id, volinfo->volname, op_errstr,
is_force, _gf_false);
if (ret)
goto out;
}
if (!gf_is_local_addr(host)) {
RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(NULL, host);
if (peerinfo == NULL) {
ret = -1;
snprintf(msg, sizeof(msg), "%s, is not a friend", host);
*op_errstr = gf_strdup(msg);
} else if (!peerinfo->connected) {
snprintf(msg, sizeof(msg),
"%s, is not connected at "
"the moment",
host);
*op_errstr = gf_strdup(msg);
ret = -1;
} else if (GD_FRIEND_STATE_BEFRIENDED != peerinfo->state.state) {
snprintf(msg, sizeof(msg),
"%s, is not befriended "
"at the moment",
host);
*op_errstr = gf_strdup(msg);
ret = -1;
}
RCU_READ_UNLOCK;
if (ret)
goto out;
} else if (priv->op_version >= GD_OP_VERSION_3_6_0) {
/* A bricks mount dir is required only by snapshots which were
* introduced in gluster-3.6.0
*/
if (!(gf_uuid_compare(dst_brickinfo->uuid, MY_UUID))) {
ret = glusterd_get_brick_mount_dir(dst_brickinfo->path,
dst_brickinfo->hostname,
dst_brickinfo->mount_dir);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0,
GD_MSG_BRICK_MOUNTDIR_GET_FAIL,
"Failed to get brick mount_dir");
goto out;
}
ret = dict_set_dynstr_with_alloc(rsp_dict, "brick1.mount_dir",
dst_brickinfo->mount_dir);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
"Failed to set brick.mount_dir");
goto out;
}
}
ret = dict_set_int32n(rsp_dict, "brick_count", SLEN("brick_count"), 1);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
"Failed to set local_brick_count");
goto out;
}
}
ret = 0;
out:
GF_FREE(dup_dstbrick);
gf_msg_debug(this->name, 0, "Returning %d", ret);
return ret;
}
int
glusterd_op_perform_replace_brick(glusterd_volinfo_t *volinfo, char *old_brick,
char *new_brick, dict_t *dict)
{
char *brick_mount_dir = NULL;
glusterd_brickinfo_t *old_brickinfo = NULL;
glusterd_brickinfo_t *new_brickinfo = NULL;
int32_t ret = -1;
xlator_t *this = NULL;
glusterd_conf_t *conf = NULL;
struct statvfs brickstat = {
0,
};
this = THIS;
GF_ASSERT(this);
GF_ASSERT(dict);
GF_ASSERT(volinfo);
conf = this->private;
GF_ASSERT(conf);
ret = glusterd_brickinfo_new_from_brick(new_brick, &new_brickinfo, _gf_true,
NULL);
if (ret)
goto out;
ret = glusterd_resolve_brick(new_brickinfo);
if (ret)
goto out;
if (!gf_uuid_compare(new_brickinfo->uuid, MY_UUID)) {
ret = sys_statvfs(new_brickinfo->path, &brickstat);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_STATVFS_FAILED,
"Failed to fetch disk utilization "
"from the brick (%s:%s). Please check the health of "
"the brick. Error code was %s",
new_brickinfo->hostname, new_brickinfo->path,
strerror(errno));
goto out;
}
new_brickinfo->statfs_fsid = brickstat.f_fsid;
}
ret = glusterd_volume_brickinfo_get_by_brick(old_brick, volinfo,
&old_brickinfo, _gf_false);
if (ret)
goto out;
(void)snprintf(new_brickinfo->brick_id, sizeof(new_brickinfo->brick_id),
"%s", old_brickinfo->brick_id);
new_brickinfo->port = old_brickinfo->port;
/* A bricks mount dir is required only by snapshots which were
* introduced in gluster-3.6.0
*/
if (conf->op_version >= GD_OP_VERSION_3_6_0) {
ret = dict_get_strn(dict, "brick1.mount_dir", SLEN("brick1.mount_dir"),
&brick_mount_dir);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, errno,
GD_MSG_BRICK_MOUNTDIR_GET_FAIL,
"brick1.mount_dir not present");
goto out;
}
(void)snprintf(new_brickinfo->mount_dir,
sizeof(new_brickinfo->mount_dir), "%s", brick_mount_dir);
}
cds_list_add(&new_brickinfo->brick_list, &old_brickinfo->brick_list);
volinfo->brick_count++;
ret = glusterd_op_perform_remove_brick(volinfo, old_brick, 1, NULL);
if (ret)
goto out;
/* if the volume is a replicate volume, do: */
if (glusterd_is_volume_replicate(volinfo)) {
if (!gf_uuid_compare(new_brickinfo->uuid, MY_UUID)) {
ret = glusterd_handle_replicate_brick_ops(volinfo, new_brickinfo,
GD_OP_REPLACE_BRICK);
if (ret < 0)
goto out;
}
}
ret = glusterd_create_volfiles_and_notify_services(volinfo);
if (ret)
goto out;
if (GLUSTERD_STATUS_STARTED == volinfo->status) {
ret = glusterd_brick_start(volinfo, new_brickinfo, _gf_false,
_gf_false);
if (ret)
goto out;
}
out:
gf_msg_debug("glusterd", 0, "Returning %d", ret);
return ret;
}
int
glusterd_op_replace_brick(dict_t *dict, dict_t *rsp_dict)
{
int ret = 0;
char *replace_op = NULL;
glusterd_volinfo_t *volinfo = NULL;
char *volname = NULL;
xlator_t *this = NULL;
glusterd_conf_t *priv = NULL;
char *src_brick = NULL;
char *dst_brick = NULL;
glusterd_brickinfo_t *src_brickinfo = NULL;
glusterd_brickinfo_t *dst_brickinfo = NULL;
this = THIS;
GF_ASSERT(this);
priv = this->private;
GF_ASSERT(priv);
ret = dict_get_strn(dict, "src-brick", SLEN("src-brick"), &src_brick);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
"Unable to get src brick");
goto out;
}
gf_msg_debug(this->name, 0, "src brick=%s", src_brick);
ret = dict_get_strn(dict, "dst-brick", SLEN("dst-brick"), &dst_brick);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
"Unable to get dst brick");
goto out;
}
gf_msg_debug(this->name, 0, "dst brick=%s", dst_brick);
ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
"Unable to get volume name");
goto out;
}
ret = dict_get_strn(dict, "operation", SLEN("operation"), &replace_op);
if (ret) {
gf_msg_debug(this->name, 0, "dict_get on operation failed");
goto out;
}
ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
"Unable to allocate memory");
goto out;
}
ret = glusterd_volume_brickinfo_get_by_brick(src_brick, volinfo,
&src_brickinfo, _gf_false);
if (ret) {
gf_msg_debug(this->name, 0, "Unable to get src-brickinfo");
goto out;
}
ret = glusterd_get_rb_dst_brickinfo(volinfo, &dst_brickinfo);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RB_BRICKINFO_GET_FAIL,
"Unable to get "
"replace brick destination brickinfo");
goto out;
}
ret = glusterd_resolve_brick(dst_brickinfo);
if (ret) {
gf_msg_debug(this->name, 0, "Unable to resolve dst-brickinfo");
goto out;
}
ret = rb_update_dstbrick_port(dst_brickinfo, rsp_dict, dict);
if (ret)
goto out;
if (strcmp(replace_op, "GF_REPLACE_OP_COMMIT_FORCE")) {
ret = -1;
goto out;
}
ret = glusterd_svcs_stop(volinfo);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTER_SERVICES_STOP_FAIL,
"Unable to stop gluster services, ret: %d", ret);
}
ret = glusterd_op_perform_replace_brick(volinfo, src_brick, dst_brick,
dict);
if (ret) {
gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_BRICK_ADD_FAIL,
"Unable to add dst-brick: "
"%s to volume: %s",
dst_brick, volinfo->volname);
(void)glusterd_svcs_manager(volinfo);
goto out;
}
volinfo->rebal.defrag_status = 0;
ret = glusterd_svcs_manager(volinfo);
if (ret) {
gf_msg(this->name, GF_LOG_CRITICAL, 0,
GD_MSG_GLUSTER_SERVICE_START_FAIL,
"Failed to start one or more gluster services.");
}
ret = glusterd_fetchspec_notify(THIS);
glusterd_brickinfo_delete(volinfo->rep_brick.dst_brick);
volinfo->rep_brick.src_brick = NULL;
volinfo->rep_brick.dst_brick = NULL;
if (!ret)
ret = glusterd_store_volinfo(volinfo,
GLUSTERD_VOLINFO_VER_AC_INCREMENT);
if (ret)
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RBOP_STATE_STORE_FAIL,
"Couldn't store"
" replace brick operation's state");
out:
return ret;
}
int
glusterd_mgmt_v3_initiate_replace_brick_cmd_phases(rpcsvc_request_t *req,
glusterd_op_t op,
dict_t *dict)
{
int32_t ret = -1;
int32_t op_ret = -1;
uint32_t txn_generation = 0;
uint32_t op_errno = 0;
char *op_errstr = NULL;
dict_t *req_dict = NULL;
dict_t *tmp_dict = NULL;
uuid_t *originator_uuid = NULL;
xlator_t *this = NULL;
glusterd_conf_t *conf = NULL;
gf_boolean_t is_acquired = _gf_false;
this = THIS;
GF_ASSERT(this);
GF_ASSERT(req);
GF_ASSERT(dict);
conf = this->private;
GF_ASSERT(conf);
txn_generation = conf->generation;
originator_uuid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
if (!originator_uuid) {
ret = -1;
goto out;
}
gf_uuid_copy(*originator_uuid, MY_UUID);
ret = dict_set_bin(dict, "originator_uuid", originator_uuid,
sizeof(uuid_t));
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
"Failed to set originator_uuid.");
GF_FREE(originator_uuid);
goto out;
}
ret = dict_set_int32n(dict, "is_synctasked", SLEN("is_synctasked"),
_gf_true);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
"Failed to set synctasked flag to true.");
goto out;
}
tmp_dict = dict_new();
if (!tmp_dict) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
"Unable to create dict");
goto out;
}
dict_copy(dict, tmp_dict);
ret = glusterd_mgmt_v3_initiate_lockdown(op, dict, &op_errstr, &op_errno,
&is_acquired, txn_generation);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCKDOWN_FAIL,
"mgmt_v3 lockdown failed.");
goto out;
}
ret = glusterd_mgmt_v3_build_payload(&req_dict, &op_errstr, dict, op);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_PAYLOAD_BUILD_FAIL,
LOGSTR_BUILD_PAYLOAD, gd_op_list[op]);
if (op_errstr == NULL)
gf_asprintf(&op_errstr, OPERRSTR_BUILD_PAYLOAD);
goto out;
}
ret = glusterd_mgmt_v3_pre_validate(op, req_dict, &op_errstr, &op_errno,
txn_generation);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL,
"Pre Validation Failed");
goto out;
}
ret = glusterd_mgmt_v3_commit(op, dict, req_dict, &op_errstr, &op_errno,
txn_generation);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
"Commit Op Failed");
goto out;
}
ret = 0;
out:
op_ret = ret;
(void)glusterd_mgmt_v3_release_peer_locks(op, dict, op_ret, &op_errstr,
is_acquired, txn_generation);
if (is_acquired) {
ret = glusterd_multiple_mgmt_v3_unlock(tmp_dict, MY_UUID);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL,
"Failed to release mgmt_v3 locks on "
"localhost.");
op_ret = ret;
}
}
/* SEND CLI RESPONSE */
glusterd_op_send_cli_response(op, op_ret, op_errno, req, dict, op_errstr);
if (req_dict)
dict_unref(req_dict);
if (tmp_dict)
dict_unref(tmp_dict);
if (op_errstr) {
GF_FREE(op_errstr);
op_errstr = NULL;
}
return 0;
}