/*
Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
General Public License, version 3 or any later version (LGPLv3 or
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
#include <glusterfs/common-utils.h>
#include "glusterd.h"
#include "glusterd-utils.h"
#include "glusterd-messages.h"
#include "glusterd-server-quorum.h"
#include "glusterd-store.h"
#include "glusterd-syncop.h"
#include "glusterd-op-sm.h"
#define CEILING_POS(X) (((X) - (int)(X)) > 0 ? (int)((X) + 1) : (int)(X))
static gf_boolean_t
glusterd_is_get_op(xlator_t *this, glusterd_op_t op, dict_t *dict)
{
char *key = NULL;
char *volname = NULL;
int ret = 0;
if (op == GD_OP_STATUS_VOLUME)
return _gf_true;
if (op == GD_OP_SET_VOLUME) {
/*check for set volume help*/
ret = dict_get_str(dict, "volname", &volname);
if (volname && ((strcmp(volname, "help") == 0) ||
(strcmp(volname, "help-xml") == 0))) {
ret = dict_get_str(dict, "key1", &key);
if (ret < 0)
return _gf_true;
}
}
return _gf_false;
}
gf_boolean_t
glusterd_is_quorum_validation_required(xlator_t *this, glusterd_op_t op,
dict_t *dict)
{
gf_boolean_t required = _gf_true;
char *key = NULL;
char *key_fixed = NULL;
int ret = -1;
if (glusterd_is_get_op(this, op, dict)) {
required = _gf_false;
goto out;
}
if ((op != GD_OP_SET_VOLUME) && (op != GD_OP_RESET_VOLUME))
goto out;
if (op == GD_OP_SET_VOLUME)
ret = dict_get_str(dict, "key1", &key);
else if (op == GD_OP_RESET_VOLUME)
ret = dict_get_str(dict, "key", &key);
if (ret)
goto out;
ret = glusterd_check_option_exists(key, &key_fixed);
if (ret <= 0)
goto out;
if (key_fixed)
key = key_fixed;
if (glusterd_is_quorum_option(key))
required = _gf_false;
out:
GF_FREE(key_fixed);
return required;
}
int
glusterd_validate_quorum(xlator_t *this, glusterd_op_t op, dict_t *dict,
char **op_errstr)
{
int ret = 0;
char *volname = NULL;
glusterd_volinfo_t *volinfo = NULL;
char *errstr = NULL;
errstr = "Quorum not met. Volume operation not allowed.";
if (!glusterd_is_quorum_validation_required(this, op, dict))
goto out;
ret = dict_get_str(dict, "volname", &volname);
if (ret) {
ret = 0;
goto out;
}
ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
ret = 0;
goto out;
}
if (!glusterd_is_volume_in_server_quorum(volinfo)) {
ret = 0;
goto out;
}
if (does_gd_meet_server_quorum(this)) {
ret = 0;
goto out;
}
ret = -1;
*op_errstr = gf_strdup(errstr);
out:
return ret;
}
gf_boolean_t
glusterd_is_quorum_option(char *option)
{
gf_boolean_t res = _gf_false;
int i = 0;
static const char *const keys[] = {GLUSTERD_QUORUM_TYPE_KEY,
GLUSTERD_QUORUM_RATIO_KEY, NULL};
for (i = 0; keys[i]; i++) {
if (strcmp(option, keys[i]) == 0) {
res = _gf_true;
break;
}
}
return res;
}
gf_boolean_t
glusterd_is_quorum_changed(dict_t *options, char *option, char *value)
{
int ret = 0;
gf_boolean_t reconfigured = _gf_false;
gf_boolean_t all = _gf_false;
char *oldquorum = NULL;
char *newquorum = NULL;
char *oldratio = NULL;
char *newratio = NULL;
xlator_t *this = NULL;
this = THIS;
if ((strcmp("all", option) != 0) && !glusterd_is_quorum_option(option))
goto out;
if (strcmp("all", option) == 0)
all = _gf_true;
if (all || (strcmp(GLUSTERD_QUORUM_TYPE_KEY, option) == 0)) {
newquorum = value;
ret = dict_get_str(options, GLUSTERD_QUORUM_TYPE_KEY, &oldquorum);
if (ret)
gf_msg(this->name, GF_LOG_DEBUG, 0, GD_MSG_DICT_GET_FAILED,
"dict_get_str failed on %s", GLUSTERD_QUORUM_TYPE_KEY);
}
if (all || (strcmp(GLUSTERD_QUORUM_RATIO_KEY, option) == 0)) {
newratio = value;
ret = dict_get_str(options, GLUSTERD_QUORUM_RATIO_KEY, &oldratio);
if (ret)
gf_msg(this->name, GF_LOG_DEBUG, 0, GD_MSG_DICT_GET_FAILED,
"dict_get_str failed on %s", GLUSTERD_QUORUM_RATIO_KEY);
}
reconfigured = _gf_true;
if (oldquorum && newquorum && (strcmp(oldquorum, newquorum) == 0))
reconfigured = _gf_false;
if (oldratio && newratio && (strcmp(oldratio, newratio) == 0))
reconfigured = _gf_false;
if ((oldratio == NULL) && (newratio == NULL) && (oldquorum == NULL) &&
(newquorum == NULL))
reconfigured = _gf_false;
out:
return reconfigured;
}
static gf_boolean_t
_is_contributing_to_quorum(gd_quorum_contrib_t contrib)
{
if ((contrib == QUORUM_UP) || (contrib == QUORUM_DOWN))
return _gf_true;
return _gf_false;
}
gf_boolean_t
does_quorum_meet(int active_count, int quorum_count)
{
return (active_count >= quorum_count);
}
int
glusterd_get_quorum_cluster_counts(xlator_t *this, int *active_count,
int *quorum_count)
{
glusterd_peerinfo_t *peerinfo = NULL;
glusterd_conf_t *conf = NULL;
int ret = -1;
int inquorum_count = 0;
char *val = NULL;
double quorum_percentage = 0.0;
gf_boolean_t ratio = _gf_false;
int count = 0;
conf = this->private;
/* Start with counting self */
inquorum_count = 1;
if (active_count)
*active_count = 1;
RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
{
if (_is_contributing_to_quorum(peerinfo->quorum_contrib))
inquorum_count = inquorum_count + 1;
if (active_count && (peerinfo->quorum_contrib == QUORUM_UP))
*active_count = *active_count + 1;
}
RCU_READ_UNLOCK;
ret = dict_get_str(conf->opts, GLUSTERD_QUORUM_RATIO_KEY, &val);
if (ret == 0) {
ret = gf_string2percent(val, &quorum_percentage);
if (ret == 0)
ratio = _gf_true;
}
if (ratio)
count = CEILING_POS(inquorum_count * quorum_percentage / 100.0);
else
count = (inquorum_count * 50 / 100) + 1;
*quorum_count = count;
ret = 0;
return ret;
}
gf_boolean_t
glusterd_is_volume_in_server_quorum(glusterd_volinfo_t *volinfo)
{
gf_boolean_t res = _gf_false;
char *quorum_type = NULL;
int ret = 0;
ret = dict_get_str(volinfo->dict, GLUSTERD_QUORUM_TYPE_KEY, &quorum_type);
if (ret)
goto out;
if (strcmp(quorum_type, GLUSTERD_SERVER_QUORUM) == 0)
res = _gf_true;
out:
return res;
}
gf_boolean_t
glusterd_is_any_volume_in_server_quorum(xlator_t *this)
{
glusterd_conf_t *conf = NULL;
glusterd_volinfo_t *volinfo = NULL;
conf = this->private;
list_for_each_entry(volinfo, &conf->volumes, vol_list)
{
if (glusterd_is_volume_in_server_quorum(volinfo)) {
return _gf_true;
}
}
return _gf_false;
}
gf_boolean_t
does_gd_meet_server_quorum(xlator_t *this)
{
int quorum_count = 0;
int active_count = 0;
gf_boolean_t in = _gf_false;
int ret = -1;
ret = glusterd_get_quorum_cluster_counts(this, &active_count,
&quorum_count);
if (ret)
goto out;
if (!does_quorum_meet(active_count, quorum_count)) {
goto out;
}
in = _gf_true;
out:
return in;
}
void
glusterd_do_volume_quorum_action(xlator_t *this, glusterd_volinfo_t *volinfo,
gf_boolean_t meets_quorum)
{
int ret = -1;
glusterd_brickinfo_t *brickinfo = NULL;
gd_quorum_status_t quorum_status = NOT_APPLICABLE_QUORUM;
gf_boolean_t follows_quorum = _gf_false;
gf_boolean_t quorum_status_unchanged = _gf_false;
if (volinfo->status != GLUSTERD_STATUS_STARTED) {
volinfo->quorum_status = NOT_APPLICABLE_QUORUM;
goto out;
}
follows_quorum = glusterd_is_volume_in_server_quorum(volinfo);
if (follows_quorum) {
if (meets_quorum)
quorum_status = MEETS_QUORUM;
else
quorum_status = DOESNT_MEET_QUORUM;
} else {
quorum_status = NOT_APPLICABLE_QUORUM;
}
/*
* The following check is added to prevent spurious brick starts when
* events occur that affect quorum.
* Example:
* There is a cluster of 10 peers. Volume is in quorum. User
* takes down one brick from the volume to perform maintenance.
* Suddenly one of the peers go down. Cluster is still in quorum. But
* because of this 'peer going down' event, quorum is calculated and
* the bricks that are down are brought up again. In this process it
* also brings up the brick that is purposefully taken down.
*/
if (volinfo->quorum_status == quorum_status) {
quorum_status_unchanged = _gf_true;
goto out;
}
if (quorum_status == MEETS_QUORUM) {
gf_msg(this->name, GF_LOG_CRITICAL, 0,
GD_MSG_SERVER_QUORUM_MET_STARTING_BRICKS,
"Server quorum regained for volume %s. Starting local "
"bricks.",
volinfo->volname);
gf_event(EVENT_QUORUM_REGAINED, "volume=%s", volinfo->volname);
} else if (quorum_status == DOESNT_MEET_QUORUM) {
gf_msg(this->name, GF_LOG_CRITICAL, 0,
GD_MSG_SERVER_QUORUM_LOST_STOPPING_BRICKS,
"Server quorum lost for volume %s. Stopping local "
"bricks.",
volinfo->volname);
gf_event(EVENT_QUORUM_LOST, "volume=%s", volinfo->volname);
}
list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
{
if (!glusterd_is_local_brick(this, volinfo, brickinfo))
continue;
if (quorum_status == DOESNT_MEET_QUORUM) {
ret = glusterd_brick_stop(volinfo, brickinfo, _gf_false);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_STOP_FAIL,
"Failed to "
"stop brick %s:%s",
brickinfo->hostname, brickinfo->path);
}
} else {
if (!brickinfo->start_triggered) {
pthread_mutex_lock(&brickinfo->restart_mutex);
{
/* coverity[SLEEP] */
ret = glusterd_brick_start(volinfo, brickinfo, _gf_false,
_gf_false);
}
pthread_mutex_unlock(&brickinfo->restart_mutex);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0,
GD_MSG_BRICK_DISCONNECTED, "Failed to start %s:%s",
brickinfo->hostname, brickinfo->path);
}
}
}
}
volinfo->quorum_status = quorum_status;
if (quorum_status == MEETS_QUORUM) {
/* bricks might have been restarted and so as the port change
* might have happened
*/
ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL,
"Failed to write volinfo for volume %s", volinfo->volname);
goto out;
}
}
out:
if (quorum_status_unchanged) {
list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
{
if (!glusterd_is_local_brick(this, volinfo, brickinfo))
continue;
ret = glusterd_brick_start(volinfo, brickinfo, _gf_false, _gf_true);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_DISCONNECTED,
"Failed to "
"connect to %s:%s",
brickinfo->hostname, brickinfo->path);
}
}
}
return;
}
int
glusterd_do_quorum_action()
{
xlator_t *this = NULL;
glusterd_conf_t *conf = NULL;
glusterd_volinfo_t *volinfo = NULL;
int ret = 0;
int active_count = 0;
int quorum_count = 0;
gf_boolean_t meets = _gf_false;
this = THIS;
conf = this->private;
conf->pending_quorum_action = _gf_true;
ret = glusterd_lock(conf->uuid);
if (ret)
goto out;
{
ret = glusterd_get_quorum_cluster_counts(this, &active_count,
&quorum_count);
if (ret)
goto unlock;
if (does_quorum_meet(active_count, quorum_count))
meets = _gf_true;
list_for_each_entry(volinfo, &conf->volumes, vol_list)
{
glusterd_do_volume_quorum_action(this, volinfo, meets);
}
}
unlock:
(void)glusterd_unlock(conf->uuid);
conf->pending_quorum_action = _gf_false;
out:
return ret;
}
/* ret = 0 represents quorum is not met
* ret = 1 represents quorum is met
* ret = 2 represents quorum not applicable
*/
int
check_quorum_for_brick_start(glusterd_volinfo_t *volinfo,
gf_boolean_t node_quorum)
{
gf_boolean_t volume_quorum = _gf_false;
int ret = 0;
volume_quorum = glusterd_is_volume_in_server_quorum(volinfo);
if (volume_quorum) {
if (node_quorum)
ret = 1;
} else {
ret = 2;
}
return ret;
}