/*
* Copyright (c) 2003, 2004, 2005 Christophe Varoqui
* Copyright (c) 2005 Benjamin Marzinski, Redhat
* Copyright (c) 2005 Kiyoshi Ueda, NEC
* Copyright (c) 2005 Patrick Caulfield, Redhat
* Copyright (c) 2005 Edward Goggin, EMC
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/file.h>
#include <errno.h>
#include <ctype.h>
#include <libdevmapper.h>
#include <libudev.h>
#include "mpath_cmd.h"
#include "checkers.h"
#include "vector.h"
#include "memory.h"
#include "devmapper.h"
#include "defaults.h"
#include "structs.h"
#include "structs_vec.h"
#include "dmparser.h"
#include "config.h"
#include "blacklist.h"
#include "propsel.h"
#include "discovery.h"
#include "debug.h"
#include "switchgroup.h"
#include "dm-generic.h"
#include "print.h"
#include "configure.h"
#include "pgpolicies.h"
#include "dict.h"
#include "alias.h"
#include "prio.h"
#include "util.h"
#include "uxsock.h"
#include "wwids.h"
#include "sysfs.h"
#include "io_err_stat.h"
/* Time in ms to wait for pending checkers in setup_map() */
#define WAIT_CHECKERS_PENDING_MS 10
#define WAIT_ALL_CHECKERS_PENDING_MS 90
/* group paths in pg by host adapter
*/
int group_by_host_adapter(struct pathgroup *pgp, vector adapters)
{
struct adapter_group *agp;
struct host_group *hgp;
struct path *pp, *pp1;
char adapter_name1[SLOT_NAME_SIZE];
char adapter_name2[SLOT_NAME_SIZE];
int i, j;
int found_hostgroup = 0;
while (VECTOR_SIZE(pgp->paths) > 0) {
pp = VECTOR_SLOT(pgp->paths, 0);
if (sysfs_get_host_adapter_name(pp, adapter_name1))
goto out;
/* create a new host adapter group
*/
agp = alloc_adaptergroup();
if (!agp)
goto out;
agp->pgp = pgp;
strlcpy(agp->adapter_name, adapter_name1, SLOT_NAME_SIZE);
store_adaptergroup(adapters, agp);
/* create a new host port group
*/
hgp = alloc_hostgroup();
if (!hgp)
goto out;
if (store_hostgroup(agp->host_groups, hgp))
goto out;
hgp->host_no = pp->sg_id.host_no;
agp->num_hosts++;
if (store_path(hgp->paths, pp))
goto out;
hgp->num_paths++;
/* delete path from path group
*/
vector_del_slot(pgp->paths, 0);
/* add all paths belonging to same host adapter
*/
vector_foreach_slot(pgp->paths, pp1, i) {
if (sysfs_get_host_adapter_name(pp1, adapter_name2))
goto out;
if (strcmp(adapter_name1, adapter_name2) == 0) {
found_hostgroup = 0;
vector_foreach_slot(agp->host_groups, hgp, j) {
if (hgp->host_no == pp1->sg_id.host_no) {
if (store_path(hgp->paths, pp1))
goto out;
hgp->num_paths++;
found_hostgroup = 1;
break;
}
}
if (!found_hostgroup) {
/* this path belongs to new host port
* within this adapter
*/
hgp = alloc_hostgroup();
if (!hgp)
goto out;
if (store_hostgroup(agp->host_groups, hgp))
goto out;
agp->num_hosts++;
if (store_path(hgp->paths, pp1))
goto out;
hgp->host_no = pp1->sg_id.host_no;
hgp->num_paths++;
}
/* delete paths from original path_group
* as they are added into adapter group now
*/
vector_del_slot(pgp->paths, i);
i--;
}
}
}
return 0;
out: /* add back paths into pg as re-ordering failed
*/
vector_foreach_slot(adapters, agp, i) {
vector_foreach_slot(agp->host_groups, hgp, j) {
while (VECTOR_SIZE(hgp->paths) > 0) {
pp = VECTOR_SLOT(hgp->paths, 0);
if (store_path(pgp->paths, pp))
condlog(3, "failed to restore "
"path %s into path group",
pp->dev);
vector_del_slot(hgp->paths, 0);
}
}
}
free_adaptergroup(adapters);
return 1;
}
/* re-order paths in pg by alternating adapters and host ports
* for optimized selection
*/
int order_paths_in_pg_by_alt_adapters(struct pathgroup *pgp, vector adapters,
int total_paths)
{
int next_adapter_index = 0;
struct adapter_group *agp;
struct host_group *hgp;
struct path *pp;
while (total_paths > 0) {
agp = VECTOR_SLOT(adapters, next_adapter_index);
if (!agp) {
condlog(0, "can't get adapter group %d", next_adapter_index);
return 1;
}
hgp = VECTOR_SLOT(agp->host_groups, agp->next_host_index);
if (!hgp) {
condlog(0, "can't get host group %d of adapter group %d", next_adapter_index, agp->next_host_index);
return 1;
}
if (!hgp->num_paths) {
agp->next_host_index++;
agp->next_host_index %= agp->num_hosts;
next_adapter_index++;
next_adapter_index %= VECTOR_SIZE(adapters);
continue;
}
pp = VECTOR_SLOT(hgp->paths, 0);
if (store_path(pgp->paths, pp))
return 1;
total_paths--;
vector_del_slot(hgp->paths, 0);
hgp->num_paths--;
agp->next_host_index++;
agp->next_host_index %= agp->num_hosts;
next_adapter_index++;
next_adapter_index %= VECTOR_SIZE(adapters);
}
/* all paths are added into path_group
* in crafted child order
*/
return 0;
}
/* round-robin: order paths in path group to alternate
* between all host adapters
*/
int rr_optimize_path_order(struct pathgroup *pgp)
{
vector adapters;
struct path *pp;
int total_paths;
int i;
total_paths = VECTOR_SIZE(pgp->paths);
vector_foreach_slot(pgp->paths, pp, i) {
if (pp->sg_id.proto_id != SCSI_PROTOCOL_FCP &&
pp->sg_id.proto_id != SCSI_PROTOCOL_SAS &&
pp->sg_id.proto_id != SCSI_PROTOCOL_ISCSI &&
pp->sg_id.proto_id != SCSI_PROTOCOL_SRP) {
/* return success as default path order
* is maintained in path group
*/
return 0;
}
}
adapters = vector_alloc();
if (!adapters)
return 0;
/* group paths in path group by host adapters
*/
if (group_by_host_adapter(pgp, adapters)) {
/* already freed adapters */
condlog(3, "Failed to group paths by adapters");
return 0;
}
/* re-order paths in pg to alternate between adapters and host ports
*/
if (order_paths_in_pg_by_alt_adapters(pgp, adapters, total_paths)) {
condlog(3, "Failed to re-order paths in pg by adapters "
"and host ports");
free_adaptergroup(adapters);
/* return failure as original paths are
* removed form pgp
*/
return 1;
}
free_adaptergroup(adapters);
return 0;
}
static int wait_for_pending_paths(struct multipath *mpp,
struct config *conf,
int n_pending, int goal, int wait_ms)
{
static const struct timespec millisec =
{ .tv_sec = 0, .tv_nsec = 1000*1000 };
int i, j;
struct path *pp;
struct pathgroup *pgp;
struct timespec ts;
do {
vector_foreach_slot(mpp->pg, pgp, i) {
vector_foreach_slot(pgp->paths, pp, j) {
if (pp->state != PATH_PENDING)
continue;
pp->state = get_state(pp, conf,
0, PATH_PENDING);
if (pp->state != PATH_PENDING &&
--n_pending <= goal)
return 0;
}
}
ts = millisec;
while (nanosleep(&ts, &ts) != 0 && errno == EINTR)
/* nothing */;
} while (--wait_ms > 0);
return n_pending;
}
int setup_map(struct multipath *mpp, char *params, int params_size,
struct vectors *vecs)
{
struct pathgroup * pgp;
struct config *conf;
int i, n_paths, marginal_pathgroups;
/*
* don't bother if devmap size is unknown
*/
if (mpp->size <= 0) {
condlog(3, "%s: devmap size is unknown", mpp->alias);
return 1;
}
/*
* free features, selector, and hwhandler properties if they are being reused
*/
free_multipath_attributes(mpp);
if (mpp->disable_queueing && VECTOR_SIZE(mpp->paths) != 0)
mpp->disable_queueing = 0;
/*
* properties selectors
*
* Ordering matters for some properties:
* - features after no_path_retry and retain_hwhandler
* - hwhandler after retain_hwhandler
* No guarantee that this list is complete, check code in
* propsel.c if in doubt.
*/
conf = get_multipath_config();
pthread_cleanup_push(put_multipath_config, conf);
select_pgfailback(conf, mpp);
select_pgpolicy(conf, mpp);
select_selector(conf, mpp);
select_no_path_retry(conf, mpp);
select_retain_hwhandler(conf, mpp);
select_features(conf, mpp);
select_hwhandler(conf, mpp);
select_rr_weight(conf, mpp);
select_minio(conf, mpp);
select_mode(conf, mpp);
select_uid(conf, mpp);
select_gid(conf, mpp);
select_fast_io_fail(conf, mpp);
select_dev_loss(conf, mpp);
select_reservation_key(conf, mpp);
select_deferred_remove(conf, mpp);
select_marginal_path_err_sample_time(conf, mpp);
select_marginal_path_err_rate_threshold(conf, mpp);
select_marginal_path_err_recheck_gap_time(conf, mpp);
select_marginal_path_double_failed_time(conf, mpp);
select_san_path_err_threshold(conf, mpp);
select_san_path_err_forget_rate(conf, mpp);
select_san_path_err_recovery_time(conf, mpp);
select_delay_checks(conf, mpp);
select_skip_kpartx(conf, mpp);
select_max_sectors_kb(conf, mpp);
select_ghost_delay(conf, mpp);
select_flush_on_last_del(conf, mpp);
sysfs_set_scsi_tmo(mpp, conf->checkint);
marginal_pathgroups = conf->marginal_pathgroups;
pthread_cleanup_pop(1);
if (marginal_path_check_enabled(mpp))
start_io_err_stat_thread(vecs);
n_paths = VECTOR_SIZE(mpp->paths);
/*
* assign paths to path groups -- start with no groups and all paths
* in mpp->paths
*/
if (mpp->pg) {
vector_foreach_slot (mpp->pg, pgp, i)
free_pathgroup(pgp, KEEP_PATHS);
vector_free(mpp->pg);
mpp->pg = NULL;
}
if (group_paths(mpp, marginal_pathgroups))
return 1;
/*
* If async state detection is used, see if pending state checks
* have finished, to get nr_active right. We can't wait until the
* checkers time out, as that may take 30s or more, and we are
* holding the vecs lock.
*/
if (conf->force_sync == 0 && n_paths > 0) {
int n_pending = pathcount(mpp, PATH_PENDING);
if (n_pending > 0)
n_pending = wait_for_pending_paths(
mpp, conf, n_pending, 0,
WAIT_CHECKERS_PENDING_MS);
/* ALL paths pending - wait some more, but be satisfied
with only some paths finished */
if (n_pending == n_paths)
n_pending = wait_for_pending_paths(
mpp, conf, n_pending,
n_paths >= 4 ? 2 : 1,
WAIT_ALL_CHECKERS_PENDING_MS);
if (n_pending > 0)
condlog(2, "%s: setting up map with %d/%d path checkers pending",
mpp->alias, n_pending, n_paths);
}
/*
* ponders each path group and determine highest prio pg
* to switch over (default to first)
*/
mpp->bestpg = select_path_group(mpp);
/* re-order paths in all path groups in an optimized way
* for round-robin path selectors to get maximum throughput.
*/
if (!strncmp(mpp->selector, "round-robin", 11)) {
vector_foreach_slot(mpp->pg, pgp, i) {
if (VECTOR_SIZE(pgp->paths) <= 2)
continue;
if (rr_optimize_path_order(pgp)) {
condlog(2, "cannot re-order paths for "
"optimization: %s",
mpp->alias);
return 1;
}
}
}
/*
* transform the mp->pg vector of vectors of paths
* into a mp->params strings to feed the device-mapper
*/
if (assemble_map(mpp, params, params_size)) {
condlog(0, "%s: problem assembing map", mpp->alias);
return 1;
}
return 0;
}
static void
compute_pgid(struct pathgroup * pgp)
{
struct path * pp;
int i;
vector_foreach_slot (pgp->paths, pp, i)
pgp->id ^= (long)pp;
}
static int
pgcmp (struct multipath * mpp, struct multipath * cmpp)
{
int i, j;
struct pathgroup * pgp;
struct pathgroup * cpgp;
int r = 0;
if (!mpp)
return 0;
vector_foreach_slot (mpp->pg, pgp, i) {
compute_pgid(pgp);
vector_foreach_slot (cmpp->pg, cpgp, j) {
if (pgp->id == cpgp->id &&
!pathcmp(pgp, cpgp)) {
r = 0;
break;
}
r++;
}
if (r)
return r;
}
return r;
}
static struct udev_device *
get_udev_for_mpp(const struct multipath *mpp)
{
dev_t devnum;
struct udev_device *udd;
if (!mpp || !mpp->dmi) {
condlog(1, "%s called with empty mpp", __func__);
return NULL;
}
devnum = makedev(mpp->dmi->major, mpp->dmi->minor);
udd = udev_device_new_from_devnum(udev, 'b', devnum);
if (!udd) {
condlog(1, "failed to get udev device for %s", mpp->alias);
return NULL;
}
return udd;
}
static void
trigger_udev_change(const struct multipath *mpp)
{
static const char change[] = "change";
struct udev_device *udd = get_udev_for_mpp(mpp);
if (!udd)
return;
condlog(3, "triggering %s uevent for %s", change, mpp->alias);
sysfs_attr_set_value(udd, "uevent", change, sizeof(change)-1);
udev_device_unref(udd);
}
static void trigger_partitions_udev_change(struct udev_device *dev,
const char *action, int len)
{
struct udev_enumerate *part_enum;
struct udev_list_entry *item;
part_enum = udev_enumerate_new(udev);
if (!part_enum)
return;
if (udev_enumerate_add_match_parent(part_enum, dev) < 0 ||
udev_enumerate_add_match_subsystem(part_enum, "block") < 0 ||
udev_enumerate_scan_devices(part_enum) < 0)
goto unref;
udev_list_entry_foreach(item,
udev_enumerate_get_list_entry(part_enum)) {
const char *syspath;
struct udev_device *part;
syspath = udev_list_entry_get_name(item);
part = udev_device_new_from_syspath(udev, syspath);
if (!part)
continue;
if (!strcmp("partition", udev_device_get_devtype(part))) {
condlog(4, "%s: triggering %s event for %s", __func__,
action, syspath);
sysfs_attr_set_value(part, "uevent", action, len);
}
udev_device_unref(part);
}
unref:
udev_enumerate_unref(part_enum);
}
void
trigger_paths_udev_change(struct multipath *mpp, bool is_mpath)
{
struct pathgroup *pgp;
struct path *pp;
int i, j;
/*
* If a path changes from multipath to non-multipath, we must
* synthesize an artificial "add" event, otherwise the LVM2 rules
* (69-lvm2-lvmetad.rules) won't pick it up. Otherwise, we'd just
* irritate ourselves with an "add", so use "change".
*/
const char *action = is_mpath ? "change" : "add";
if (!mpp || !mpp->pg)
return;
vector_foreach_slot (mpp->pg, pgp, i) {
if (!pgp->paths)
continue;
vector_foreach_slot(pgp->paths, pp, j) {
const char *env;
if (!pp->udev)
continue;
/*
* Paths that are already classified as multipath
* members don't need another uevent.
*/
env = udev_device_get_property_value(
pp->udev, "DM_MULTIPATH_DEVICE_PATH");
if (is_mpath && env != NULL && !strcmp(env, "1")) {
/*
* If FIND_MULTIPATHS_WAIT_UNTIL is not "0",
* path is in "maybe" state and timer is running
* Send uevent now (see multipath.rules).
*/
env = udev_device_get_property_value(
pp->udev, "FIND_MULTIPATHS_WAIT_UNTIL");
if (env == NULL || !strcmp(env, "0"))
continue;
} else if (!is_mpath &&
(env == NULL || !strcmp(env, "0")))
continue;
condlog(3, "triggering %s uevent for %s (is %smultipath member)",
action, pp->dev, is_mpath ? "" : "no ");
sysfs_attr_set_value(pp->udev, "uevent",
action, strlen(action));
trigger_partitions_udev_change(pp->udev, action,
strlen(action));
}
}
mpp->needs_paths_uevent = 0;
}
static int
is_mpp_known_to_udev(const struct multipath *mpp)
{
struct udev_device *udd = get_udev_for_mpp(mpp);
int ret = (udd != NULL);
udev_device_unref(udd);
return ret;
}
static int
sysfs_set_max_sectors_kb(struct multipath *mpp, int is_reload)
{
struct pathgroup * pgp;
struct path *pp;
char buff[11];
int i, j, ret, err = 0;
struct udev_device *udd;
int max_sectors_kb;
if (mpp->max_sectors_kb == MAX_SECTORS_KB_UNDEF)
return 0;
max_sectors_kb = mpp->max_sectors_kb;
if (is_reload) {
if (!mpp->dmi && dm_get_info(mpp->alias, &mpp->dmi) != 0) {
condlog(1, "failed to get dm info for %s", mpp->alias);
return 1;
}
udd = get_udev_for_mpp(mpp);
if (!udd) {
condlog(1, "failed to get udev device to set max_sectors_kb for %s", mpp->alias);
return 1;
}
ret = sysfs_attr_get_value(udd, "queue/max_sectors_kb", buff,
sizeof(buff));
udev_device_unref(udd);
if (ret <= 0) {
condlog(1, "failed to get current max_sectors_kb from %s", mpp->alias);
return 1;
}
if (sscanf(buff, "%u\n", &max_sectors_kb) != 1) {
condlog(1, "can't parse current max_sectors_kb from %s",
mpp->alias);
return 1;
}
}
snprintf(buff, 11, "%d", max_sectors_kb);
vector_foreach_slot (mpp->pg, pgp, i) {
vector_foreach_slot(pgp->paths, pp, j) {
ret = sysfs_attr_set_value(pp->udev,
"queue/max_sectors_kb",
buff, strlen(buff));
if (ret < 0) {
condlog(1, "failed setting max_sectors_kb on %s : %s", pp->dev, strerror(-ret));
err = 1;
}
}
}
return err;
}
static void
select_action (struct multipath * mpp, vector curmp, int force_reload)
{
struct multipath * cmpp;
struct multipath * cmpp_by_name;
char * mpp_feat, * cmpp_feat;
cmpp = find_mp_by_wwid(curmp, mpp->wwid);
cmpp_by_name = find_mp_by_alias(curmp, mpp->alias);
if (!cmpp_by_name) {
if (cmpp) {
condlog(2, "%s: rename %s to %s", mpp->wwid,
cmpp->alias, mpp->alias);
strlcpy(mpp->alias_old, cmpp->alias, WWID_SIZE);
mpp->action = ACT_RENAME;
if (force_reload) {
mpp->force_udev_reload = 1;
mpp->action = ACT_FORCERENAME;
}
return;
}
mpp->action = ACT_CREATE;
condlog(3, "%s: set ACT_CREATE (map does not exist)",
mpp->alias);
return;
}
if (!cmpp) {
condlog(2, "%s: remove (wwid changed)", mpp->alias);
dm_flush_map(mpp->alias);
strlcpy(cmpp_by_name->wwid, mpp->wwid, WWID_SIZE);
drop_multipath(curmp, cmpp_by_name->wwid, KEEP_PATHS);
mpp->action = ACT_CREATE;
condlog(3, "%s: set ACT_CREATE (map wwid change)",
mpp->alias);
return;
}
if (cmpp != cmpp_by_name) {
condlog(2, "%s: unable to rename %s to %s (%s is used by %s)",
mpp->wwid, cmpp->alias, mpp->alias,
mpp->alias, cmpp_by_name->wwid);
/* reset alias to existing alias */
FREE(mpp->alias);
mpp->alias = STRDUP(cmpp->alias);
mpp->action = ACT_IMPOSSIBLE;
return;
}
if (force_reload) {
mpp->force_udev_reload = 1;
mpp->action = ACT_RELOAD;
condlog(3, "%s: set ACT_RELOAD (forced by user)",
mpp->alias);
return;
}
if (cmpp->size != mpp->size) {
mpp->force_udev_reload = 1;
mpp->action = ACT_RESIZE;
condlog(3, "%s: set ACT_RESIZE (size change)",
mpp->alias);
return;
}
if (mpp->no_path_retry != NO_PATH_RETRY_UNDEF &&
!!strstr(mpp->features, "queue_if_no_path") !=
!!strstr(cmpp->features, "queue_if_no_path")) {
mpp->action = ACT_RELOAD;
condlog(3, "%s: set ACT_RELOAD (no_path_retry change)",
mpp->alias);
return;
}
if ((mpp->retain_hwhandler != RETAIN_HWHANDLER_ON ||
strcmp(cmpp->hwhandler, "0") == 0) &&
(strlen(cmpp->hwhandler) != strlen(mpp->hwhandler) ||
strncmp(cmpp->hwhandler, mpp->hwhandler,
strlen(mpp->hwhandler)))) {
mpp->action = ACT_RELOAD;
condlog(3, "%s: set ACT_RELOAD (hwhandler change)",
mpp->alias);
return;
}
if (mpp->retain_hwhandler != RETAIN_HWHANDLER_UNDEF &&
!!strstr(mpp->features, "retain_attached_hw_handler") !=
!!strstr(cmpp->features, "retain_attached_hw_handler") &&
get_linux_version_code() < KERNEL_VERSION(4, 3, 0)) {
mpp->action = ACT_RELOAD;
condlog(3, "%s: set ACT_RELOAD (retain_hwhandler change)",
mpp->alias);
return;
}
cmpp_feat = STRDUP(cmpp->features);
mpp_feat = STRDUP(mpp->features);
if (cmpp_feat && mpp_feat) {
remove_feature(&mpp_feat, "queue_if_no_path");
remove_feature(&mpp_feat, "retain_attached_hw_handler");
remove_feature(&cmpp_feat, "queue_if_no_path");
remove_feature(&cmpp_feat, "retain_attached_hw_handler");
if (strncmp(mpp_feat, cmpp_feat, PARAMS_SIZE)) {
mpp->action = ACT_RELOAD;
condlog(3, "%s: set ACT_RELOAD (features change)",
mpp->alias);
}
}
FREE(cmpp_feat);
FREE(mpp_feat);
if (!cmpp->selector || strncmp(cmpp->selector, mpp->selector,
strlen(mpp->selector))) {
mpp->action = ACT_RELOAD;
condlog(3, "%s: set ACT_RELOAD (selector change)",
mpp->alias);
return;
}
if (cmpp->minio != mpp->minio) {
mpp->action = ACT_RELOAD;
condlog(3, "%s: set ACT_RELOAD (minio change, %u->%u)",
mpp->alias, cmpp->minio, mpp->minio);
return;
}
if (!cmpp->pg || VECTOR_SIZE(cmpp->pg) != VECTOR_SIZE(mpp->pg)) {
mpp->action = ACT_RELOAD;
condlog(3, "%s: set ACT_RELOAD (path group number change)",
mpp->alias);
return;
}
if (pgcmp(mpp, cmpp)) {
mpp->action = ACT_RELOAD;
condlog(3, "%s: set ACT_RELOAD (path group topology change)",
mpp->alias);
return;
}
if (cmpp->nextpg != mpp->bestpg) {
mpp->action = ACT_SWITCHPG;
condlog(3, "%s: set ACT_SWITCHPG (next path group change)",
mpp->alias);
return;
}
if (!is_mpp_known_to_udev(cmpp)) {
mpp->action = ACT_RELOAD;
condlog(3, "%s: set ACT_RELOAD (udev device not initialized)",
mpp->alias);
return;
}
mpp->action = ACT_NOTHING;
condlog(3, "%s: set ACT_NOTHING (map unchanged)",
mpp->alias);
return;
}
int reinstate_paths(struct multipath *mpp)
{
int i, j;
struct pathgroup * pgp;
struct path * pp;
if (!mpp->pg)
return 0;
vector_foreach_slot (mpp->pg, pgp, i) {
if (!pgp->paths)
continue;
vector_foreach_slot (pgp->paths, pp, j) {
if (pp->state != PATH_UP &&
(pgp->status == PGSTATE_DISABLED ||
pgp->status == PGSTATE_ACTIVE))
continue;
if (pp->dmstate == PSTATE_FAILED) {
if (dm_reinstate_path(mpp->alias, pp->dev_t))
condlog(0, "%s: error reinstating",
pp->dev);
}
}
}
return 0;
}
static int
lock_multipath (struct multipath * mpp, int lock)
{
struct pathgroup * pgp;
struct path * pp;
int i, j;
int x, y;
if (!mpp || !mpp->pg)
return 0;
vector_foreach_slot (mpp->pg, pgp, i) {
if (!pgp->paths)
continue;
vector_foreach_slot(pgp->paths, pp, j) {
if (lock && flock(pp->fd, LOCK_SH | LOCK_NB) &&
errno == EWOULDBLOCK)
goto fail;
else if (!lock)
flock(pp->fd, LOCK_UN);
}
}
return 0;
fail:
vector_foreach_slot (mpp->pg, pgp, x) {
if (x > i)
return 1;
if (!pgp->paths)
continue;
vector_foreach_slot(pgp->paths, pp, y) {
if (x == i && y >= j)
return 1;
flock(pp->fd, LOCK_UN);
}
}
return 1;
}
int domap(struct multipath *mpp, char *params, int is_daemon)
{
int r = DOMAP_FAIL;
struct config *conf;
int verbosity;
/*
* last chance to quit before touching the devmaps
*/
if (mpp->action == ACT_DRY_RUN) {
conf = get_multipath_config();
verbosity = conf->verbosity;
put_multipath_config(conf);
print_multipath_topology(mpp, verbosity);
return DOMAP_DRY;
}
if (mpp->action == ACT_CREATE &&
dm_map_present(mpp->alias)) {
condlog(3, "%s: map already present", mpp->alias);
mpp->action = ACT_RELOAD;
}
switch (mpp->action) {
case ACT_REJECT:
case ACT_NOTHING:
case ACT_IMPOSSIBLE:
return DOMAP_EXIST;
case ACT_SWITCHPG:
dm_switchgroup(mpp->alias, mpp->bestpg);
/*
* we may have avoided reinstating paths because there where in
* active or disabled PG. Now that the topology has changed,
* retry.
*/
reinstate_paths(mpp);
return DOMAP_EXIST;
case ACT_CREATE:
if (lock_multipath(mpp, 1)) {
condlog(3, "%s: failed to create map (in use)",
mpp->alias);
return DOMAP_RETRY;
}
sysfs_set_max_sectors_kb(mpp, 0);
if (is_daemon && mpp->ghost_delay > 0 && count_active_paths(mpp) &&
pathcount(mpp, PATH_UP) == 0)
mpp->ghost_delay_tick = mpp->ghost_delay;
r = dm_addmap_create(mpp, params);
lock_multipath(mpp, 0);
break;
case ACT_RELOAD:
sysfs_set_max_sectors_kb(mpp, 1);
if (mpp->ghost_delay_tick > 0 && pathcount(mpp, PATH_UP))
mpp->ghost_delay_tick = 0;
r = dm_addmap_reload(mpp, params, 0);
break;
case ACT_RESIZE:
sysfs_set_max_sectors_kb(mpp, 1);
if (mpp->ghost_delay_tick > 0 && pathcount(mpp, PATH_UP))
mpp->ghost_delay_tick = 0;
r = dm_addmap_reload(mpp, params, 1);
break;
case ACT_RENAME:
conf = get_multipath_config();
pthread_cleanup_push(put_multipath_config, conf);
r = dm_rename(mpp->alias_old, mpp->alias,
conf->partition_delim, mpp->skip_kpartx);
pthread_cleanup_pop(1);
break;
case ACT_FORCERENAME:
conf = get_multipath_config();
pthread_cleanup_push(put_multipath_config, conf);
r = dm_rename(mpp->alias_old, mpp->alias,
conf->partition_delim, mpp->skip_kpartx);
pthread_cleanup_pop(1);
if (r) {
sysfs_set_max_sectors_kb(mpp, 1);
if (mpp->ghost_delay_tick > 0 &&
pathcount(mpp, PATH_UP))
mpp->ghost_delay_tick = 0;
r = dm_addmap_reload(mpp, params, 0);
}
break;
default:
break;
}
if (r == DOMAP_OK) {
/*
* DM_DEVICE_CREATE, DM_DEVICE_RENAME, or DM_DEVICE_RELOAD
* succeeded
*/
mpp->force_udev_reload = 0;
if (mpp->action == ACT_CREATE &&
(remember_wwid(mpp->wwid) == 1 ||
mpp->needs_paths_uevent))
trigger_paths_udev_change(mpp, true);
if (!is_daemon) {
/* multipath client mode */
dm_switchgroup(mpp->alias, mpp->bestpg);
} else {
/* multipath daemon mode */
mpp->stat_map_loads++;
condlog(2, "%s: load table [0 %llu %s %s]", mpp->alias,
mpp->size, TGT_MPATH, params);
/*
* Required action is over, reset for the stateful daemon.
* But don't do it for creation as we use in the caller the
* mpp->action to figure out whether to start the watievent checker.
*/
if (mpp->action != ACT_CREATE)
mpp->action = ACT_NOTHING;
else {
conf = get_multipath_config();
mpp->wait_for_udev = 1;
mpp->uev_wait_tick = conf->uev_wait_timeout;
put_multipath_config(conf);
}
}
dm_setgeometry(mpp);
return DOMAP_OK;
} else if (r == DOMAP_FAIL && mpp->action == ACT_CREATE &&
mpp->needs_paths_uevent)
trigger_paths_udev_change(mpp, false);
return DOMAP_FAIL;
}
static int
deadmap (struct multipath * mpp)
{
int i, j;
struct pathgroup * pgp;
struct path * pp;
if (!mpp->pg)
return 1;
vector_foreach_slot (mpp->pg, pgp, i) {
if (!pgp->paths)
continue;
vector_foreach_slot (pgp->paths, pp, j)
if (strlen(pp->dev))
return 0; /* alive */
}
return 1; /* dead */
}
int check_daemon(void)
{
int fd;
char *reply;
int ret = 0;
unsigned int timeout;
struct config *conf;
fd = mpath_connect();
if (fd == -1)
return 0;
if (send_packet(fd, "show daemon") != 0)
goto out;
conf = get_multipath_config();
timeout = conf->uxsock_timeout;
put_multipath_config(conf);
if (recv_packet(fd, &reply, timeout) != 0)
goto out;
if (reply && strstr(reply, "shutdown"))
goto out_free;
ret = 1;
out_free:
FREE(reply);
out:
mpath_disconnect(fd);
return ret;
}
/*
* The force_reload parameter determines how coalesce_paths treats existing maps.
* FORCE_RELOAD_NONE: existing maps aren't touched at all
* FORCE_RELOAD_YES: all maps are rebuilt from scratch and (re)loaded in DM
* FORCE_RELOAD_WEAK: existing maps are compared to the current conf and only
* reloaded in DM if there's a difference. This is useful during startup.
*/
int coalesce_paths (struct vectors * vecs, vector newmp, char * refwwid,
int force_reload, enum mpath_cmds cmd)
{
int ret = CP_FAIL;
int k, i, r;
int is_daemon = (cmd == CMD_NONE) ? 1 : 0;
char params[PARAMS_SIZE];
struct multipath * mpp;
struct path * pp1;
struct path * pp2;
vector curmp = vecs->mpvec;
vector pathvec = vecs->pathvec;
struct config *conf;
int allow_queueing;
uint64_t *size_mismatch_seen;
/* ignore refwwid if it's empty */
if (refwwid && !strlen(refwwid))
refwwid = NULL;
if (force_reload != FORCE_RELOAD_NONE) {
vector_foreach_slot (pathvec, pp1, k) {
pp1->mpp = NULL;
}
}
if (VECTOR_SIZE(pathvec) == 0)
return CP_OK;
size_mismatch_seen = calloc((VECTOR_SIZE(pathvec) - 1) / 64 + 1,
sizeof(uint64_t));
if (size_mismatch_seen == NULL)
return CP_FAIL;
vector_foreach_slot (pathvec, pp1, k) {
int invalid;
/* skip this path for some reason */
/* 1. if path has no unique id or wwid blacklisted */
if (strlen(pp1->wwid) == 0) {
orphan_path(pp1, "no WWID");
continue;
}
conf = get_multipath_config();
pthread_cleanup_push(put_multipath_config, conf);
invalid = (filter_path(conf, pp1) > 0);
pthread_cleanup_pop(1);
if (invalid) {
orphan_path(pp1, "blacklisted");
continue;
}
/* 2. if path already coalesced, or seen and discarded */
if (pp1->mpp || is_bit_set_in_array(k, size_mismatch_seen))
continue;
/* 3. if path has disappeared */
if (pp1->state == PATH_REMOVED) {
orphan_path(pp1, "path removed");
continue;
}
/* 4. path is out of scope */
if (refwwid && strncmp(pp1->wwid, refwwid, WWID_SIZE - 1))
continue;
/* If find_multipaths was selected check if the path is valid */
if (!refwwid && !should_multipath(pp1, pathvec, curmp)) {
orphan_path(pp1, "only one path");
continue;
}
/*
* at this point, we know we really got a new mp
*/
mpp = add_map_with_path(vecs, pp1, 0);
if (!mpp) {
orphan_path(pp1, "failed to create multipath device");
continue;
}
if (!mpp->paths) {
condlog(0, "%s: skip coalesce (no paths)", mpp->alias);
remove_map(mpp, vecs, 0);
continue;
}
for (i = k + 1; i < VECTOR_SIZE(pathvec); i++) {
pp2 = VECTOR_SLOT(pathvec, i);
if (strcmp(pp1->wwid, pp2->wwid))
continue;
if (!mpp->size && pp2->size)
mpp->size = pp2->size;
if (mpp->size && pp2->size &&
pp2->size != mpp->size) {
/*
* ouch, avoid feeding that to the DM
*/
condlog(0, "%s: size %llu, expected %llu. "
"Discard", pp2->dev, pp2->size,
mpp->size);
mpp->action = ACT_REJECT;
set_bit_in_array(i, size_mismatch_seen);
}
}
verify_paths(mpp, vecs);
params[0] = '\0';
if (setup_map(mpp, params, PARAMS_SIZE, vecs)) {
remove_map(mpp, vecs, 0);
continue;
}
if (cmd == CMD_DRY_RUN)
mpp->action = ACT_DRY_RUN;
if (mpp->action == ACT_UNDEF)
select_action(mpp, curmp,
force_reload == FORCE_RELOAD_YES ? 1 : 0);
r = domap(mpp, params, is_daemon);
if (r == DOMAP_FAIL || r == DOMAP_RETRY) {
condlog(3, "%s: domap (%u) failure "
"for create/reload map",
mpp->alias, r);
if (r == DOMAP_FAIL || is_daemon) {
condlog(2, "%s: %s map",
mpp->alias, (mpp->action == ACT_CREATE)?
"ignoring" : "removing");
remove_map(mpp, vecs, 0);
continue;
} else /* if (r == DOMAP_RETRY && !is_daemon) */ {
ret = CP_RETRY;
goto out;
}
}
if (r == DOMAP_DRY)
continue;
if (r == DOMAP_EXIST && mpp->action == ACT_NOTHING &&
force_reload == FORCE_RELOAD_WEAK)
/*
* First time we're called, and no changes applied.
* domap() was a noop. But we can't be sure that
* udev has already finished setting up this device
* (udev in initrd may have been shut down while
* processing this device or its children).
* Trigger a change event, just in case.
*/
trigger_udev_change(find_mp_by_wwid(curmp, mpp->wwid));
conf = get_multipath_config();
allow_queueing = conf->allow_queueing;
put_multipath_config(conf);
if (!is_daemon && !allow_queueing && !check_daemon()) {
if (mpp->no_path_retry != NO_PATH_RETRY_UNDEF &&
mpp->no_path_retry != NO_PATH_RETRY_FAIL)
condlog(3, "%s: multipathd not running, unset "
"queue_if_no_path feature", mpp->alias);
if (!dm_queue_if_no_path(mpp->alias, 0))
remove_feature(&mpp->features,
"queue_if_no_path");
}
if (!is_daemon && mpp->action != ACT_NOTHING) {
int verbosity;
conf = get_multipath_config();
verbosity = conf->verbosity;
put_multipath_config(conf);
print_multipath_topology(mpp, verbosity);
}
if (newmp) {
if (mpp->action != ACT_REJECT) {
if (!vector_alloc_slot(newmp))
goto out;
vector_set_slot(newmp, mpp);
}
else
remove_map(mpp, vecs, 0);
}
}
/*
* Flush maps with only dead paths (ie not in sysfs)
* Keep maps with only failed paths
*/
if (newmp) {
vector_foreach_slot (newmp, mpp, i) {
char alias[WWID_SIZE];
if (!deadmap(mpp))
continue;
strlcpy(alias, mpp->alias, WWID_SIZE);
vector_del_slot(newmp, i);
i--;
remove_map(mpp, vecs, 0);
if (dm_flush_map(alias))
condlog(2, "%s: remove failed (dead)",
alias);
else
condlog(2, "%s: remove (dead)", alias);
}
}
ret = CP_OK;
out:
free(size_mismatch_seen);
return ret;
}
struct udev_device *get_udev_device(const char *dev, enum devtypes dev_type)
{
struct udev_device *ud = NULL;
const char *base;
if (dev == NULL || *dev == '\0')
return NULL;
switch (dev_type) {
case DEV_DEVNODE:
case DEV_DEVMAP:
/* This should be GNU basename, compiler will warn if not */
base = basename(dev);
if (*base == '\0')
break;
ud = udev_device_new_from_subsystem_sysname(udev, "block",
base);
break;
case DEV_DEVT:
ud = udev_device_new_from_devnum(udev, 'b', parse_devt(dev));
break;
case DEV_UEVENT:
ud = udev_device_new_from_environment(udev);
break;
default:
condlog(0, "Internal error: get_udev_device called with invalid type %d\n",
dev_type);
break;
}
if (ud == NULL)
condlog(2, "get_udev_device: failed to look up %s with type %d",
dev, dev_type);
return ud;
}
/*
* returns:
* 0 - success
* 1 - failure
* 2 - blacklist
*/
int get_refwwid(enum mpath_cmds cmd, char *dev, enum devtypes dev_type,
vector pathvec, char **wwid)
{
int ret = 1;
struct path * pp;
char buff[FILE_NAME_SIZE];
char * refwwid = NULL, tmpwwid[WWID_SIZE];
int flags = DI_SYSFS | DI_WWID;
struct config *conf;
int invalid = 0;
if (!wwid)
return 1;
*wwid = NULL;
if (dev_type == DEV_NONE)
return 1;
if (cmd != CMD_REMOVE_WWID)
flags |= DI_BLACKLIST;
if (dev_type == DEV_DEVNODE) {
if (basenamecpy(dev, buff, FILE_NAME_SIZE) == 0) {
condlog(1, "basename failed for '%s' (%s)",
dev, buff);
return 1;
}
pp = find_path_by_dev(pathvec, buff);
if (!pp) {
struct udev_device *udevice =
get_udev_device(buff, dev_type);
if (!udevice)
return 1;
conf = get_multipath_config();
pthread_cleanup_push(put_multipath_config, conf);
ret = store_pathinfo(pathvec, conf, udevice,
flags, &pp);
pthread_cleanup_pop(1);
udev_device_unref(udevice);
if (!pp) {
if (ret == 1)
condlog(0, "%s: can't store path info",
dev);
return ret;
}
}
conf = get_multipath_config();
pthread_cleanup_push(put_multipath_config, conf);
if (pp->udev && pp->uid_attribute &&
filter_property(conf, pp->udev, 3, pp->uid_attribute) > 0)
invalid = 1;
pthread_cleanup_pop(1);
if (invalid)
return 2;
refwwid = pp->wwid;
goto out;
}
if (dev_type == DEV_DEVT) {
strchop(dev);
if (devt2devname(buff, FILE_NAME_SIZE, dev)) {
condlog(0, "%s: cannot find block device\n", dev);
return 1;
}
pp = find_path_by_dev(pathvec, buff);
if (!pp) {
struct udev_device *udevice =
get_udev_device(dev, dev_type);
if (!udevice)
return 1;
conf = get_multipath_config();
pthread_cleanup_push(put_multipath_config, conf);
ret = store_pathinfo(pathvec, conf, udevice,
flags, &pp);
pthread_cleanup_pop(1);
udev_device_unref(udevice);
if (!pp) {
if (ret == 1)
condlog(0, "%s can't store path info",
buff);
return ret;
}
}
conf = get_multipath_config();
pthread_cleanup_push(put_multipath_config, conf);
if (pp->udev && pp->uid_attribute &&
filter_property(conf, pp->udev, 3, pp->uid_attribute) > 0)
invalid = 1;
pthread_cleanup_pop(1);
if (invalid)
return 2;
refwwid = pp->wwid;
goto out;
}
if (dev_type == DEV_UEVENT) {
struct udev_device *udevice = get_udev_device(dev, dev_type);
if (!udevice)
return 1;
conf = get_multipath_config();
pthread_cleanup_push(put_multipath_config, conf);
ret = store_pathinfo(pathvec, conf, udevice,
flags, &pp);
pthread_cleanup_pop(1);
udev_device_unref(udevice);
if (!pp) {
if (ret == 1)
condlog(0, "%s: can't store path info", dev);
return ret;
}
conf = get_multipath_config();
pthread_cleanup_push(put_multipath_config, conf);
if (pp->udev && pp->uid_attribute &&
filter_property(conf, pp->udev, 3, pp->uid_attribute) > 0)
invalid = 1;
pthread_cleanup_pop(1);
if (invalid)
return 2;
refwwid = pp->wwid;
goto out;
}
if (dev_type == DEV_DEVMAP) {
conf = get_multipath_config();
pthread_cleanup_push(put_multipath_config, conf);
if (((dm_get_uuid(dev, tmpwwid, WWID_SIZE)) == 0)
&& (strlen(tmpwwid))) {
refwwid = tmpwwid;
goto check;
}
/*
* may be a binding
*/
if (get_user_friendly_wwid(dev, tmpwwid,
conf->bindings_file) == 0) {
refwwid = tmpwwid;
goto check;
}
/*
* or may be an alias
*/
refwwid = get_mpe_wwid(conf->mptable, dev);
/*
* or directly a wwid
*/
if (!refwwid)
refwwid = dev;
check:
if (refwwid && strlen(refwwid) &&
filter_wwid(conf->blist_wwid, conf->elist_wwid, refwwid,
NULL) > 0)
invalid = 1;
pthread_cleanup_pop(1);
if (invalid)
return 2;
}
out:
if (refwwid && strlen(refwwid)) {
*wwid = STRDUP(refwwid);
return 0;
}
return 1;
}
int reload_map(struct vectors *vecs, struct multipath *mpp, int refresh,
int is_daemon)
{
char params[PARAMS_SIZE] = {0};
struct path *pp;
int i, r;
update_mpp_paths(mpp, vecs->pathvec);
if (refresh) {
vector_foreach_slot (mpp->paths, pp, i) {
struct config *conf = get_multipath_config();
pthread_cleanup_push(put_multipath_config, conf);
r = pathinfo(pp, conf, DI_PRIO);
pthread_cleanup_pop(1);
if (r) {
condlog(2, "%s: failed to refresh pathinfo",
mpp->alias);
return 1;
}
}
}
if (setup_map(mpp, params, PARAMS_SIZE, vecs)) {
condlog(0, "%s: failed to setup map", mpp->alias);
return 1;
}
select_action(mpp, vecs->mpvec, 1);
r = domap(mpp, params, is_daemon);
if (r == DOMAP_FAIL || r == DOMAP_RETRY) {
condlog(3, "%s: domap (%u) failure "
"for reload map", mpp->alias, r);
return 1;
}
return 0;
}