/*
Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
General Public License, version 3 or any later version (LGPLv3 or
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
#include <libgen.h>
#include <unistd.h>
#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
#include <glusterfs/glusterfs.h>
#include "afr.h"
#include <glusterfs/dict.h>
#include <glusterfs/xlator.h>
#include <glusterfs/hashfn.h>
#include <glusterfs/logging.h>
#include <glusterfs/list.h>
#include <glusterfs/call-stub.h>
#include <glusterfs/defaults.h>
#include <glusterfs/common-utils.h>
#include <glusterfs/compat-errno.h>
#include <glusterfs/compat.h>
#include "protocol-common.h"
#include <glusterfs/byte-order.h>
#include "afr-transaction.h"
#include "afr-self-heal.h"
#include "afr-messages.h"
static void
__afr_inode_write_finalize(call_frame_t *frame, xlator_t *this)
{
int i = 0;
int ret = 0;
int read_subvol = 0;
struct iatt *stbuf = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
afr_read_subvol_args_t args = {
0,
};
local = frame->local;
priv = this->private;
GF_VALIDATE_OR_GOTO(this->name, local->inode, out);
/*This code needs to stay till DHT sends fops on linked
* inodes*/
if (!inode_is_linked(local->inode)) {
for (i = 0; i < priv->child_count; i++) {
if (!local->replies[i].valid)
continue;
if (local->replies[i].op_ret == -1)
continue;
if (!gf_uuid_is_null(local->replies[i].poststat.ia_gfid)) {
gf_uuid_copy(args.gfid, local->replies[i].poststat.ia_gfid);
args.ia_type = local->replies[i].poststat.ia_type;
break;
} else {
ret = dict_get_bin(local->replies[i].xdata,
DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
if (ret)
continue;
gf_uuid_copy(args.gfid, stbuf->ia_gfid);
args.ia_type = stbuf->ia_type;
break;
}
}
}
if (local->transaction.type == AFR_METADATA_TRANSACTION) {
read_subvol = afr_metadata_subvol_get(local->inode, this, NULL,
local->readable, NULL, &args);
} else {
read_subvol = afr_data_subvol_get(local->inode, this, NULL,
local->readable, NULL, &args);
}
local->op_ret = -1;
local->op_errno = afr_final_errno(local, priv);
afr_pick_error_xdata(local, priv, local->inode, local->readable, NULL,
NULL);
for (i = 0; i < priv->child_count; i++) {
if (!local->replies[i].valid)
continue;
if (local->replies[i].op_ret < 0)
continue;
/* Order of checks in the compound conditional
below is important.
- Highest precedence: largest op_ret
- Next precedence: if all op_rets are equal, read subvol
- Least precedence: any succeeded subvol
*/
if ((local->op_ret < local->replies[i].op_ret) ||
((local->op_ret == local->replies[i].op_ret) &&
(i == read_subvol))) {
local->op_ret = local->replies[i].op_ret;
local->op_errno = local->replies[i].op_errno;
local->cont.inode_wfop.prebuf = local->replies[i].prestat;
local->cont.inode_wfop.postbuf = local->replies[i].poststat;
if (local->replies[i].xdata) {
if (local->xdata_rsp)
dict_unref(local->xdata_rsp);
local->xdata_rsp = dict_ref(local->replies[i].xdata);
}
if (local->replies[i].xattr) {
if (local->xattr_rsp)
dict_unref(local->xattr_rsp);
local->xattr_rsp = dict_ref(local->replies[i].xattr);
}
}
}
afr_set_in_flight_sb_status(this, frame, local->inode);
out:
return;
}
static void
__afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
int op_ret, int op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xattr, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
local = frame->local;
priv = this->private;
local->replies[child_index].valid = 1;
if (AFR_IS_ARBITER_BRICK(priv, child_index) && op_ret == 1)
op_ret = iov_length(local->cont.writev.vector,
local->cont.writev.count);
local->replies[child_index].op_ret = op_ret;
local->replies[child_index].op_errno = op_errno;
if (xdata)
local->replies[child_index].xdata = dict_ref(xdata);
if (op_ret >= 0) {
if (prebuf)
local->replies[child_index].prestat = *prebuf;
if (postbuf)
local->replies[child_index].poststat = *postbuf;
if (xattr)
local->replies[child_index].xattr = dict_ref(xattr);
} else {
afr_transaction_fop_failed(frame, this, child_index);
}
return;
}
static int
__afr_inode_write_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xattr, dict_t *xdata)
{
afr_local_t *local = NULL;
int child_index = (long)cookie;
int call_count = -1;
afr_private_t *priv = NULL;
priv = this->private;
local = frame->local;
LOCK(&frame->lock);
{
__afr_inode_write_fill(frame, this, child_index, op_ret, op_errno,
prebuf, postbuf, xattr, xdata);
}
UNLOCK(&frame->lock);
call_count = afr_frame_return(frame);
if (call_count == 0) {
__afr_inode_write_finalize(frame, this);
if (afr_txn_nothing_failed(frame, this)) {
/*if it did pre-op, it will do post-op changing ctime*/
if (priv->consistent_metadata && afr_needs_changelog_update(local))
afr_zero_fill_stat(local);
local->transaction.unwind(frame, this);
}
afr_transaction_resume(frame, this);
}
return 0;
}
/* {{{ writev */
void
afr_writev_copy_outvars(call_frame_t *src_frame, call_frame_t *dst_frame)
{
afr_local_t *src_local = NULL;
afr_local_t *dst_local = NULL;
src_local = src_frame->local;
dst_local = dst_frame->local;
dst_local->op_ret = src_local->op_ret;
dst_local->op_errno = src_local->op_errno;
dst_local->cont.inode_wfop.prebuf = src_local->cont.inode_wfop.prebuf;
dst_local->cont.inode_wfop.postbuf = src_local->cont.inode_wfop.postbuf;
if (src_local->xdata_rsp)
dst_local->xdata_rsp = dict_ref(src_local->xdata_rsp);
}
void
afr_writev_unwind(call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = this->private;
local = frame->local;
if (priv->consistent_metadata)
afr_zero_fill_stat(local);
AFR_STACK_UNWIND(writev, frame, local->op_ret, local->op_errno,
&local->cont.inode_wfop.prebuf,
&local->cont.inode_wfop.postbuf, local->xdata_rsp);
}
int
afr_transaction_writev_unwind(call_frame_t *frame, xlator_t *this)
{
call_frame_t *fop_frame = NULL;
fop_frame = afr_transaction_detach_fop_frame(frame);
if (fop_frame) {
afr_writev_copy_outvars(frame, fop_frame);
afr_writev_unwind(fop_frame, this);
}
return 0;
}
static void
afr_writev_handle_short_writes(call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
int i = 0;
local = frame->local;
priv = this->private;
/*
* We already have the best case result of the writev calls staged
* as the return value. Any writev that returns some value less
* than the best case is now out of sync, so mark the fop as
* failed. Note that fops that have returned with errors have
* already been marked as failed.
*/
for (i = 0; i < priv->child_count; i++) {
if ((!local->replies[i].valid) || (local->replies[i].op_ret == -1))
continue;
if (local->replies[i].op_ret < local->op_ret)
afr_transaction_fop_failed(frame, this, i);
}
}
void
afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
int ret = 0;
afr_local_t *local = frame->local;
uint32_t open_fd_count = 0;
uint32_t write_is_append = 0;
int32_t num_inodelks = 0;
LOCK(&frame->lock);
{
__afr_inode_write_fill(frame, this, child_index, op_ret, op_errno,
prebuf, postbuf, NULL, xdata);
if (op_ret == -1 || !xdata)
goto unlock;
write_is_append = 0;
ret = dict_get_uint32(xdata, GLUSTERFS_WRITE_IS_APPEND,
&write_is_append);
if (ret || !write_is_append)
local->append_write = _gf_false;
ret = dict_get_uint32(xdata, GLUSTERFS_ACTIVE_FD_COUNT, &open_fd_count);
if (ret < 0)
goto unlock;
if (open_fd_count > local->open_fd_count) {
local->open_fd_count = open_fd_count;
local->update_open_fd_count = _gf_true;
}
ret = dict_get_int32_sizen(xdata, GLUSTERFS_INODELK_COUNT,
&num_inodelks);
if (ret < 0)
goto unlock;
if (num_inodelks > local->num_inodelks) {
local->num_inodelks = num_inodelks;
local->update_num_inodelks = _gf_true;
}
}
unlock:
UNLOCK(&frame->lock);
}
void
afr_process_post_writev(call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_lock_t *lock = NULL;
local = frame->local;
if (!local->stable_write && !local->append_write)
/* An appended write removes the necessity to
fsync() the file. This is because self-heal
has the logic to check for larger file when
the xattrs are not reliably pointing at
a stale file.
*/
afr_fd_report_unstable_write(this, local);
__afr_inode_write_finalize(frame, this);
afr_writev_handle_short_writes(frame, this);
if (local->update_open_fd_count)
local->inode_ctx->open_fd_count = local->open_fd_count;
if (local->update_num_inodelks &&
local->transaction.type == AFR_DATA_TRANSACTION) {
lock = &local->inode_ctx->lock[local->transaction.type];
lock->num_inodelks = local->num_inodelks;
}
}
int
afr_writev_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
call_frame_t *fop_frame = NULL;
int child_index = (long)cookie;
int call_count = -1;
afr_inode_write_fill(frame, this, child_index, op_ret, op_errno, prebuf,
postbuf, xdata);
call_count = afr_frame_return(frame);
if (call_count == 0) {
afr_process_post_writev(frame, this);
if (!afr_txn_nothing_failed(frame, this)) {
// Don't unwind until post-op is complete
afr_transaction_resume(frame, this);
} else {
/*
* Generally inode-write fops do transaction.unwind then
* transaction.resume, but writev needs to make sure that
* delayed post-op frame is placed in fdctx before unwind
* happens. This prevents the race of flush doing the
* changelog wakeup first in fuse thread and then this
* writev placing its delayed post-op frame in fdctx.
* This helps flush make sure all the delayed post-ops are
* completed.
*/
fop_frame = afr_transaction_detach_fop_frame(frame);
afr_writev_copy_outvars(frame, fop_frame);
afr_transaction_resume(frame, this);
afr_writev_unwind(fop_frame, this);
}
}
return 0;
}
static int
afr_arbiter_writev_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = frame->local;
afr_private_t *priv = this->private;
static char byte = 0xFF;
static struct iovec vector = {&byte, 1};
int32_t count = 1;
STACK_WIND_COOKIE(
frame, afr_writev_wind_cbk, (void *)(long)subvol,
priv->children[subvol], priv->children[subvol]->fops->writev, local->fd,
&vector, count, local->cont.writev.offset, local->cont.writev.flags,
local->cont.writev.iobref, local->xdata_req);
return 0;
}
int
afr_writev_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
local = frame->local;
priv = this->private;
if (AFR_IS_ARBITER_BRICK(priv, subvol)) {
afr_arbiter_writev_wind(frame, this, subvol);
return 0;
}
STACK_WIND_COOKIE(frame, afr_writev_wind_cbk, (void *)(long)subvol,
priv->children[subvol],
priv->children[subvol]->fops->writev, local->fd,
local->cont.writev.vector, local->cont.writev.count,
local->cont.writev.offset, local->cont.writev.flags,
local->cont.writev.iobref, local->xdata_req);
return 0;
}
int
afr_do_writev(call_frame_t *frame, xlator_t *this)
{
call_frame_t *transaction_frame = NULL;
afr_local_t *local = NULL;
int ret = -1;
int op_errno = ENOMEM;
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
local = frame->local;
transaction_frame->local = local;
frame->local = NULL;
if (!AFR_FRAME_INIT(frame, op_errno))
goto out;
local->op = GF_FOP_WRITE;
local->transaction.wind = afr_writev_wind;
local->transaction.unwind = afr_transaction_writev_unwind;
local->transaction.main_frame = frame;
if (local->fd->flags & O_APPEND) {
/*
* Backend vfs ignores the 'offset' for append mode fd so
* locking just the region provided for the writev does not
* give consistency guarantee. The actual write may happen at a
* completely different range than the one provided by the
* offset, len in the fop. So lock the entire file.
*/
local->transaction.start = 0;
local->transaction.len = 0;
} else {
local->transaction.start = local->cont.writev.offset;
local->transaction.len = iov_length(local->cont.writev.vector,
local->cont.writev.count);
}
ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
goto out;
}
return 0;
out:
if (transaction_frame)
AFR_STACK_DESTROY(transaction_frame);
AFR_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int
afr_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
dict_t *xdata)
{
afr_local_t *local = NULL;
int op_errno = ENOMEM;
int ret = -1;
local = AFR_FRAME_INIT(frame, op_errno);
if (!local)
goto out;
local->cont.writev.vector = iov_dup(vector, count);
if (!local->cont.writev.vector)
goto out;
local->cont.writev.count = count;
local->cont.writev.offset = offset;
local->cont.writev.flags = flags;
local->cont.writev.iobref = iobref_ref(iobref);
if (xdata)
local->xdata_req = dict_copy_with_ref(xdata, NULL);
else
local->xdata_req = dict_new();
if (!local->xdata_req)
goto out;
local->fd = fd_ref(fd);
ret = afr_set_inode_local(this, local, fd->inode);
if (ret)
goto out;
if (dict_set_uint32(local->xdata_req, GLUSTERFS_ACTIVE_FD_COUNT, 4)) {
op_errno = ENOMEM;
goto out;
}
if (dict_set_str_sizen(local->xdata_req, GLUSTERFS_INODELK_DOM_COUNT,
this->name)) {
op_errno = ENOMEM;
goto out;
}
if (dict_set_uint32(local->xdata_req, GLUSTERFS_WRITE_IS_APPEND, 4)) {
op_errno = ENOMEM;
goto out;
}
/* Set append_write to be true speculatively. If on any
server it turns not be true, we unset it in the
callback.
*/
local->append_write = _gf_true;
/* detect here, but set it in writev_wind_cbk *after* the unstable
write is performed
*/
local->stable_write = !!((fd->flags | flags) & (O_SYNC | O_DSYNC));
afr_fix_open(fd, this);
afr_do_writev(frame, this);
return 0;
out:
AFR_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
/* }}} */
/* {{{ truncate */
int
afr_truncate_unwind(call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
call_frame_t *main_frame = NULL;
local = frame->local;
main_frame = afr_transaction_detach_fop_frame(frame);
if (!main_frame)
return 0;
AFR_STACK_UNWIND(truncate, main_frame, local->op_ret, local->op_errno,
&local->cont.inode_wfop.prebuf,
&local->cont.inode_wfop.postbuf, local->xdata_rsp);
return 0;
}
int
afr_truncate_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
afr_local_t *local = NULL;
local = frame->local;
if (op_ret == 0 && prebuf->ia_size != postbuf->ia_size)
local->stable_write = _gf_false;
return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf,
postbuf, NULL, xdata);
}
int
afr_truncate_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
local = frame->local;
priv = this->private;
STACK_WIND_COOKIE(frame, afr_truncate_wind_cbk, (void *)(long)subvol,
priv->children[subvol],
priv->children[subvol]->fops->truncate, &local->loc,
local->cont.truncate.offset, local->xdata_req);
return 0;
}
int
afr_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
dict_t *xdata)
{
afr_local_t *local = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
local = AFR_FRAME_INIT(transaction_frame, op_errno);
if (!local)
goto out;
local->cont.truncate.offset = offset;
if (xdata)
local->xdata_req = dict_copy_with_ref(xdata, NULL);
else
local->xdata_req = dict_new();
if (!local->xdata_req)
goto out;
local->transaction.wind = afr_truncate_wind;
local->transaction.unwind = afr_truncate_unwind;
loc_copy(&local->loc, loc);
ret = afr_set_inode_local(this, local, loc->inode);
if (ret)
goto out;
local->op = GF_FOP_TRUNCATE;
local->transaction.main_frame = frame;
local->transaction.start = offset;
local->transaction.len = 0;
/* Set it true speculatively, will get reset in afr_truncate_wind_cbk
if truncate was not a NOP */
local->stable_write = _gf_true;
ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
goto out;
}
return 0;
out:
if (transaction_frame)
AFR_STACK_DESTROY(transaction_frame);
AFR_STACK_UNWIND(truncate, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
/* }}} */
/* {{{ ftruncate */
int
afr_ftruncate_unwind(call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
call_frame_t *main_frame = NULL;
local = frame->local;
main_frame = afr_transaction_detach_fop_frame(frame);
if (!main_frame)
return 0;
AFR_STACK_UNWIND(ftruncate, main_frame, local->op_ret, local->op_errno,
&local->cont.inode_wfop.prebuf,
&local->cont.inode_wfop.postbuf, local->xdata_rsp);
return 0;
}
int
afr_ftruncate_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
afr_local_t *local = NULL;
local = frame->local;
if (op_ret == 0 && prebuf->ia_size != postbuf->ia_size)
local->stable_write = _gf_false;
return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf,
postbuf, NULL, xdata);
}
int
afr_ftruncate_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
local = frame->local;
priv = this->private;
STACK_WIND_COOKIE(frame, afr_ftruncate_wind_cbk, (void *)(long)subvol,
priv->children[subvol],
priv->children[subvol]->fops->ftruncate, local->fd,
local->cont.ftruncate.offset, local->xdata_req);
return 0;
}
int
afr_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
dict_t *xdata)
{
afr_local_t *local = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
local = AFR_FRAME_INIT(transaction_frame, op_errno);
if (!local)
goto out;
local->cont.ftruncate.offset = offset;
if (xdata)
local->xdata_req = dict_copy_with_ref(xdata, NULL);
else
local->xdata_req = dict_new();
if (!local->xdata_req)
goto out;
local->fd = fd_ref(fd);
ret = afr_set_inode_local(this, local, fd->inode);
if (ret)
goto out;
local->op = GF_FOP_FTRUNCATE;
local->transaction.wind = afr_ftruncate_wind;
local->transaction.unwind = afr_ftruncate_unwind;
local->transaction.main_frame = frame;
local->transaction.start = local->cont.ftruncate.offset;
local->transaction.len = 0;
afr_fix_open(fd, this);
/* Set it true speculatively, will get reset in afr_ftruncate_wind_cbk
if truncate was not a NOP */
local->stable_write = _gf_true;
ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
goto out;
}
return 0;
out:
AFR_STACK_UNWIND(ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
/* }}} */
/* {{{ setattr */
int
afr_setattr_unwind(call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
call_frame_t *main_frame = NULL;
local = frame->local;
main_frame = afr_transaction_detach_fop_frame(frame);
if (!main_frame)
return 0;
AFR_STACK_UNWIND(setattr, main_frame, local->op_ret, local->op_errno,
&local->cont.inode_wfop.prebuf,
&local->cont.inode_wfop.postbuf, local->xdata_rsp);
return 0;
}
int
afr_setattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preop,
struct iatt *postop, dict_t *xdata)
{
return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, preop,
postop, NULL, xdata);
}
int
afr_setattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
local = frame->local;
priv = this->private;
STACK_WIND_COOKIE(frame, afr_setattr_wind_cbk, (void *)(long)subvol,
priv->children[subvol],
priv->children[subvol]->fops->setattr, &local->loc,
&local->cont.setattr.in_buf, local->cont.setattr.valid,
local->xdata_req);
return 0;
}
int
afr_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *buf,
int32_t valid, dict_t *xdata)
{
afr_local_t *local = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
local = AFR_FRAME_INIT(transaction_frame, op_errno);
if (!local)
goto out;
local->cont.setattr.in_buf = *buf;
local->cont.setattr.valid = valid;
if (xdata)
local->xdata_req = dict_copy_with_ref(xdata, NULL);
else
local->xdata_req = dict_new();
if (!local->xdata_req)
goto out;
local->transaction.wind = afr_setattr_wind;
local->transaction.unwind = afr_setattr_unwind;
loc_copy(&local->loc, loc);
ret = afr_set_inode_local(this, local, loc->inode);
if (ret)
goto out;
local->op = GF_FOP_SETATTR;
local->transaction.main_frame = frame;
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
goto out;
}
return 0;
out:
if (transaction_frame)
AFR_STACK_DESTROY(transaction_frame);
AFR_STACK_UNWIND(setattr, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
/* {{{ fsetattr */
int
afr_fsetattr_unwind(call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
call_frame_t *main_frame = NULL;
local = frame->local;
main_frame = afr_transaction_detach_fop_frame(frame);
if (!main_frame)
return 0;
AFR_STACK_UNWIND(fsetattr, main_frame, local->op_ret, local->op_errno,
&local->cont.inode_wfop.prebuf,
&local->cont.inode_wfop.postbuf, local->xdata_rsp);
return 0;
}
int
afr_fsetattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preop,
struct iatt *postop, dict_t *xdata)
{
return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, preop,
postop, NULL, xdata);
}
int
afr_fsetattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
local = frame->local;
priv = this->private;
STACK_WIND_COOKIE(frame, afr_fsetattr_wind_cbk, (void *)(long)subvol,
priv->children[subvol],
priv->children[subvol]->fops->fsetattr, local->fd,
&local->cont.fsetattr.in_buf, local->cont.fsetattr.valid,
local->xdata_req);
return 0;
}
int
afr_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *buf,
int32_t valid, dict_t *xdata)
{
afr_local_t *local = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
local = AFR_FRAME_INIT(transaction_frame, op_errno);
if (!local)
goto out;
local->cont.fsetattr.in_buf = *buf;
local->cont.fsetattr.valid = valid;
if (xdata)
local->xdata_req = dict_copy_with_ref(xdata, NULL);
else
local->xdata_req = dict_new();
if (!local->xdata_req)
goto out;
local->transaction.wind = afr_fsetattr_wind;
local->transaction.unwind = afr_fsetattr_unwind;
local->fd = fd_ref(fd);
ret = afr_set_inode_local(this, local, fd->inode);
if (ret)
goto out;
local->op = GF_FOP_FSETATTR;
afr_fix_open(fd, this);
local->transaction.main_frame = frame;
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
goto out;
}
return 0;
out:
if (transaction_frame)
AFR_STACK_DESTROY(transaction_frame);
AFR_STACK_UNWIND(fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
/* {{{ setxattr */
int
afr_setxattr_unwind(call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
call_frame_t *main_frame = NULL;
local = frame->local;
main_frame = afr_transaction_detach_fop_frame(frame);
if (!main_frame)
return 0;
AFR_STACK_UNWIND(setxattr, main_frame, local->op_ret, local->op_errno,
local->xdata_rsp);
return 0;
}
int
afr_setxattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
NULL, NULL, xdata);
}
int
afr_setxattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
local = frame->local;
priv = this->private;
STACK_WIND_COOKIE(frame, afr_setxattr_wind_cbk, (void *)(long)subvol,
priv->children[subvol],
priv->children[subvol]->fops->setxattr, &local->loc,
local->cont.setxattr.dict, local->cont.setxattr.flags,
local->xdata_req);
return 0;
}
int
afr_emptyb_set_pending_changelog_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
dict_t *xattr, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
int i, ret = 0;
char *op_type = NULL;
local = frame->local;
priv = this->private;
i = (long)cookie;
local->replies[i].valid = 1;
local->replies[i].op_ret = op_ret;
local->replies[i].op_errno = op_errno;
ret = dict_get_str_sizen(local->xdata_req, "replicate-brick-op", &op_type);
if (ret)
goto out;
gf_msg(this->name, op_ret ? GF_LOG_ERROR : GF_LOG_INFO,
op_ret ? op_errno : 0, afr_get_msg_id(op_type),
"Set of pending xattr %s on"
" %s.",
op_ret ? "failed" : "succeeded", priv->children[i]->name);
out:
syncbarrier_wake(&local->barrier);
return 0;
}
int
afr_emptyb_set_pending_changelog(call_frame_t *frame, xlator_t *this,
unsigned char *locked_nodes)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
int ret = 0, i = 0;
local = frame->local;
priv = this->private;
AFR_ONLIST(locked_nodes, frame, afr_emptyb_set_pending_changelog_cbk,
xattrop, &local->loc, GF_XATTROP_ADD_ARRAY, local->xattr_req,
NULL);
/* It is sufficient if xattrop was successful on one child */
for (i = 0; i < priv->child_count; i++) {
if (!local->replies[i].valid)
continue;
if (local->replies[i].op_ret == 0) {
ret = 0;
goto out;
} else {
ret = afr_higher_errno(ret, local->replies[i].op_errno);
}
}
out:
return -ret;
}
static int
_afr_handle_empty_brick_type(xlator_t *this, call_frame_t *frame, loc_t *loc,
int empty_index, afr_transaction_type type,
char *op_type, const int op_type_len)
{
int count = 0;
int ret = -ENOMEM;
int idx = -1;
int d_idx = -1;
unsigned char *locked_nodes = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
priv = this->private;
local = frame->local;
locked_nodes = alloca0(priv->child_count);
idx = afr_index_for_transaction_type(type);
d_idx = afr_index_for_transaction_type(AFR_DATA_TRANSACTION);
local->pending = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS);
if (!local->pending)
goto out;
local->pending[empty_index][idx] = hton32(1);
if ((priv->esh_granular) && (type == AFR_ENTRY_TRANSACTION))
local->pending[empty_index][d_idx] = hton32(1);
local->xdata_req = dict_new();
if (!local->xdata_req)
goto out;
ret = dict_set_nstrn(local->xdata_req, "replicate-brick-op",
SLEN("replicate-brick-op"), op_type, op_type_len);
if (ret)
goto out;
local->xattr_req = dict_new();
if (!local->xattr_req)
goto out;
ret = afr_set_pending_dict(priv, local->xattr_req, local->pending);
if (ret < 0)
goto out;
if (AFR_ENTRY_TRANSACTION == type) {
count = afr_selfheal_entrylk(frame, this, loc->inode, this->name, NULL,
locked_nodes);
} else {
count = afr_selfheal_inodelk(frame, this, loc->inode, this->name,
LLONG_MAX - 1, 0, locked_nodes);
}
if (!count) {
gf_msg(this->name, GF_LOG_ERROR, EAGAIN, AFR_MSG_REPLACE_BRICK_STATUS,
"Couldn't acquire lock on"
" any child.");
ret = -EAGAIN;
goto unlock;
}
ret = afr_emptyb_set_pending_changelog(frame, this, locked_nodes);
if (ret)
goto unlock;
ret = 0;
unlock:
if (AFR_ENTRY_TRANSACTION == type) {
afr_selfheal_unentrylk(frame, this, loc->inode, this->name, NULL,
locked_nodes, NULL);
} else {
afr_selfheal_uninodelk(frame, this, loc->inode, this->name,
LLONG_MAX - 1, 0, locked_nodes);
}
out:
return ret;
}
void
afr_brick_args_cleanup(void *opaque)
{
afr_empty_brick_args_t *data = NULL;
data = opaque;
loc_wipe(&data->loc);
GF_FREE(data);
}
int
_afr_handle_empty_brick_cbk(int ret, call_frame_t *frame, void *opaque)
{
afr_brick_args_cleanup(opaque);
return 0;
}
int
_afr_handle_empty_brick(void *opaque)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
int empty_index = -1;
int ret = -1;
int op_errno = ENOMEM;
call_frame_t *frame = NULL;
xlator_t *this = NULL;
char *op_type = NULL;
int op_type_len = 0;
afr_empty_brick_args_t *data = NULL;
data = opaque;
frame = data->frame;
empty_index = data->empty_index;
if (!data->op_type)
goto out;
op_type = data->op_type;
op_type_len = strlen(op_type);
this = frame->this;
priv = this->private;
local = AFR_FRAME_INIT(frame, op_errno);
if (!local)
goto out;
loc_copy(&local->loc, &data->loc);
gf_msg(this->name, GF_LOG_INFO, 0, 0, "New brick is : %s",
priv->children[empty_index]->name);
ret = _afr_handle_empty_brick_type(this, frame, &local->loc, empty_index,
AFR_METADATA_TRANSACTION, op_type,
op_type_len);
if (ret) {
op_errno = -ret;
ret = -1;
goto out;
}
dict_unref(local->xdata_req);
dict_unref(local->xattr_req);
afr_matrix_cleanup(local->pending, priv->child_count);
local->pending = NULL;
local->xattr_req = NULL;
local->xdata_req = NULL;
ret = _afr_handle_empty_brick_type(this, frame, &local->loc, empty_index,
AFR_ENTRY_TRANSACTION, op_type,
op_type_len);
if (ret) {
op_errno = -ret;
ret = -1;
goto out;
}
ret = 0;
out:
AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL);
return 0;
}
int
afr_split_brain_resolve_do(call_frame_t *frame, xlator_t *this, loc_t *loc,
char *data)
{
afr_local_t *local = NULL;
int ret = -1;
int op_errno = EINVAL;
local = frame->local;
local->xdata_req = dict_new();
if (!local->xdata_req) {
op_errno = ENOMEM;
goto out;
}
ret = dict_set_int32_sizen(local->xdata_req, "heal-op",
GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK);
if (ret) {
op_errno = -ret;
ret = -1;
goto out;
}
ret = dict_set_str_sizen(local->xdata_req, "child-name", data);
if (ret) {
op_errno = -ret;
ret = -1;
goto out;
}
/* set spb choice to -1 whether heal succeeds or not:
* If heal succeeds : spb-choice should be set to -1 as
* it is no longer valid; file is not
* in split-brain anymore.
* If heal doesn't succeed:
* spb-choice should be set to -1
* otherwise reads will be served
* from spb-choice which is misleading.
*/
ret = afr_inode_split_brain_choice_set(loc->inode, this, -1);
if (ret)
gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
"Failed to set"
"split-brain choice to -1");
afr_heal_splitbrain_file(frame, this, loc);
ret = 0;
out:
if (ret < 0)
AFR_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL);
return 0;
}
int
afr_get_split_brain_child_index(xlator_t *this, void *value, size_t len)
{
int spb_child_index = -1;
char *spb_child_str = NULL;
spb_child_str = alloca0(len + 1);
memcpy(spb_child_str, value, len);
if (!strcmp(spb_child_str, "none"))
return -2;
spb_child_index = afr_get_child_index_from_name(this, spb_child_str);
if (spb_child_index < 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
"Invalid subvol: %s", spb_child_str);
}
return spb_child_index;
}
int
afr_can_set_split_brain_choice(void *opaque)
{
afr_spbc_timeout_t *data = opaque;
call_frame_t *frame = NULL;
xlator_t *this = NULL;
loc_t *loc = NULL;
int ret = -1;
frame = data->frame;
loc = data->loc;
this = frame->this;
ret = afr_is_split_brain(frame, this, loc->inode, loc->gfid, &data->d_spb,
&data->m_spb);
if (ret)
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
"Failed to determine if %s"
" is in split-brain. "
"Aborting split-brain-choice set.",
uuid_utoa(loc->gfid));
return ret;
}
int
afr_handle_split_brain_commands(xlator_t *this, call_frame_t *frame, loc_t *loc,
dict_t *dict)
{
void *value = NULL;
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
afr_spbc_timeout_t *data = NULL;
int len = 0;
int spb_child_index = -1;
int ret = -1;
int op_errno = EINVAL;
priv = this->private;
local = AFR_FRAME_INIT(frame, op_errno);
if (!local) {
ret = 1;
goto out;
}
local->op = GF_FOP_SETXATTR;
ret = dict_get_ptr_and_len(dict, GF_AFR_SBRAIN_CHOICE, &value, &len);
if (value) {
spb_child_index = afr_get_split_brain_child_index(this, value, len);
if (spb_child_index < 0) {
/* Case where value was "none" */
if (spb_child_index == -2)
spb_child_index = -1;
else {
ret = 1;
op_errno = EINVAL;
goto out;
}
}
data = GF_CALLOC(1, sizeof(*data), gf_afr_mt_spbc_timeout_t);
if (!data) {
ret = 1;
goto out;
}
data->spb_child_index = spb_child_index;
data->frame = frame;
loc_copy(&local->loc, loc);
data->loc = &local->loc;
ret = synctask_new(this->ctx->env, afr_can_set_split_brain_choice,
afr_set_split_brain_choice, NULL, data);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0,
AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
"Failed to create"
" synctask. Aborting split-brain choice set"
" for %s",
loc->name);
ret = 1;
op_errno = ENOMEM;
goto out;
}
ret = 0;
goto out;
}
ret = dict_get_ptr_and_len(dict, GF_AFR_SBRAIN_RESOLVE, &value, &len);
if (value) {
spb_child_index = afr_get_split_brain_child_index(this, value, len);
if (spb_child_index < 0) {
ret = 1;
goto out;
}
afr_split_brain_resolve_do(frame, this, loc,
priv->children[spb_child_index]->name);
ret = 0;
}
out:
/* key was correct but value was invalid when ret == 1 */
if (ret == 1) {
AFR_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL);
if (data)
GF_FREE(data);
ret = 0;
}
return ret;
}
int
afr_handle_spb_choice_timeout(xlator_t *this, call_frame_t *frame, dict_t *dict)
{
int ret = -1;
int op_errno = 0;
uint64_t timeout = 0;
afr_private_t *priv = NULL;
priv = this->private;
ret = dict_get_uint64(dict, GF_AFR_SPB_CHOICE_TIMEOUT, &timeout);
if (!ret) {
priv->spb_choice_timeout = timeout * 60;
AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL);
}
return ret;
}
int
afr_handle_empty_brick(xlator_t *this, call_frame_t *frame, loc_t *loc,
dict_t *dict)
{
int ret = -1;
int ab_ret = -1;
int empty_index = -1;
int op_errno = EPERM;
char *empty_brick = NULL;
char *op_type = NULL;
afr_empty_brick_args_t *data = NULL;
ret = dict_get_str_sizen(dict, GF_AFR_REPLACE_BRICK, &empty_brick);
if (!ret)
op_type = GF_AFR_REPLACE_BRICK;
ab_ret = dict_get_str_sizen(dict, GF_AFR_ADD_BRICK, &empty_brick);
if (!ab_ret)
op_type = GF_AFR_ADD_BRICK;
if (ret && ab_ret)
goto out;
if (frame->root->pid != GF_CLIENT_PID_ADD_REPLICA_MOUNT) {
gf_msg(this->name, GF_LOG_ERROR, EPERM, afr_get_msg_id(op_type),
"'%s' is an internal extended attribute.", op_type);
ret = 1;
goto out;
}
empty_index = afr_get_child_index_from_name(this, empty_brick);
if (empty_index < 0) {
/* Didn't belong to this replica pair
* Just do a no-op
*/
AFR_STACK_UNWIND(setxattr, frame, 0, 0, NULL);
return 0;
} else {
data = GF_CALLOC(1, sizeof(*data), gf_afr_mt_empty_brick_t);
if (!data) {
ret = 1;
op_errno = ENOMEM;
goto out;
}
data->frame = frame;
loc_copy(&data->loc, loc);
data->empty_index = empty_index;
data->op_type = op_type;
ret = synctask_new(this->ctx->env, _afr_handle_empty_brick,
_afr_handle_empty_brick_cbk, NULL, data);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, afr_get_msg_id(op_type),
"Failed to create synctask.");
ret = 1;
op_errno = ENOMEM;
afr_brick_args_cleanup(data);
goto out;
}
}
ret = 0;
out:
if (ret == 1) {
AFR_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL);
ret = 0;
}
return ret;
}
static int
afr_handle_special_xattr(xlator_t *this, call_frame_t *frame, loc_t *loc,
dict_t *dict)
{
int ret = -1;
ret = afr_handle_split_brain_commands(this, frame, loc, dict);
if (ret == 0)
goto out;
ret = afr_handle_spb_choice_timeout(this, frame, dict);
if (ret == 0)
goto out;
/* Applicable for replace-brick and add-brick commands */
ret = afr_handle_empty_brick(this, frame, loc, dict);
out:
return ret;
}
int
afr_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
int32_t flags, dict_t *xdata)
{
afr_local_t *local = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = EINVAL;
GF_IF_INTERNAL_XATTR_GOTO("trusted.afr.*", dict, op_errno, out);
GF_IF_INTERNAL_XATTR_GOTO("trusted.glusterfs.afr.*", dict, op_errno, out);
ret = afr_handle_special_xattr(this, frame, loc, dict);
if (ret == 0)
return 0;
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
local = AFR_FRAME_INIT(transaction_frame, op_errno);
if (!local)
goto out;
local->cont.setxattr.dict = dict_ref(dict);
local->cont.setxattr.flags = flags;
if (xdata)
local->xdata_req = dict_copy_with_ref(xdata, NULL);
else
local->xdata_req = dict_new();
if (!local->xdata_req)
goto out;
local->transaction.wind = afr_setxattr_wind;
local->transaction.unwind = afr_setxattr_unwind;
loc_copy(&local->loc, loc);
ret = afr_set_inode_local(this, local, loc->inode);
if (ret)
goto out;
local->transaction.main_frame = frame;
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
local->op = GF_FOP_SETXATTR;
ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
goto out;
}
return 0;
out:
if (transaction_frame)
AFR_STACK_DESTROY(transaction_frame);
AFR_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL);
return 0;
}
/* {{{ fsetxattr */
int
afr_fsetxattr_unwind(call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
call_frame_t *main_frame = NULL;
local = frame->local;
main_frame = afr_transaction_detach_fop_frame(frame);
if (!main_frame)
return 0;
AFR_STACK_UNWIND(fsetxattr, main_frame, local->op_ret, local->op_errno,
local->xdata_rsp);
return 0;
}
int
afr_fsetxattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
NULL, NULL, xdata);
}
int
afr_fsetxattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
local = frame->local;
priv = this->private;
STACK_WIND_COOKIE(frame, afr_fsetxattr_wind_cbk, (void *)(long)subvol,
priv->children[subvol],
priv->children[subvol]->fops->fsetxattr, local->fd,
local->cont.fsetxattr.dict, local->cont.fsetxattr.flags,
local->xdata_req);
return 0;
}
int
afr_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
int32_t flags, dict_t *xdata)
{
afr_local_t *local = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
GF_IF_INTERNAL_XATTR_GOTO("trusted.afr.*", dict, op_errno, out);
GF_IF_INTERNAL_XATTR_GOTO("trusted.glusterfs.afr.*", dict, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
local = AFR_FRAME_INIT(transaction_frame, op_errno);
if (!local)
goto out;
local->cont.fsetxattr.dict = dict_ref(dict);
local->cont.fsetxattr.flags = flags;
if (xdata)
local->xdata_req = dict_copy_with_ref(xdata, NULL);
else
local->xdata_req = dict_new();
if (!local->xdata_req)
goto out;
local->transaction.wind = afr_fsetxattr_wind;
local->transaction.unwind = afr_fsetxattr_unwind;
local->fd = fd_ref(fd);
ret = afr_set_inode_local(this, local, fd->inode);
if (ret)
goto out;
local->op = GF_FOP_FSETXATTR;
local->transaction.main_frame = frame;
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
goto out;
}
return 0;
out:
if (transaction_frame)
AFR_STACK_DESTROY(transaction_frame);
AFR_STACK_UNWIND(fsetxattr, frame, -1, op_errno, NULL);
return 0;
}
/* }}} */
/* {{{ removexattr */
int
afr_removexattr_unwind(call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
call_frame_t *main_frame = NULL;
local = frame->local;
main_frame = afr_transaction_detach_fop_frame(frame);
if (!main_frame)
return 0;
AFR_STACK_UNWIND(removexattr, main_frame, local->op_ret, local->op_errno,
local->xdata_rsp);
return 0;
}
int
afr_removexattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
NULL, NULL, xdata);
}
int
afr_removexattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
local = frame->local;
priv = this->private;
STACK_WIND_COOKIE(frame, afr_removexattr_wind_cbk, (void *)(long)subvol,
priv->children[subvol],
priv->children[subvol]->fops->removexattr, &local->loc,
local->cont.removexattr.name, local->xdata_req);
return 0;
}
int
afr_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *name, dict_t *xdata)
{
afr_local_t *local = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
GF_IF_NATIVE_XATTR_GOTO("trusted.afr.*", name, op_errno, out);
GF_IF_NATIVE_XATTR_GOTO("trusted.glusterfs.afr.*", name, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
local = AFR_FRAME_INIT(transaction_frame, op_errno);
if (!local)
goto out;
local->cont.removexattr.name = gf_strdup(name);
if (xdata)
local->xdata_req = dict_copy_with_ref(xdata, NULL);
else
local->xdata_req = dict_new();
if (!local->xdata_req)
goto out;
local->transaction.wind = afr_removexattr_wind;
local->transaction.unwind = afr_removexattr_unwind;
loc_copy(&local->loc, loc);
ret = afr_set_inode_local(this, local, loc->inode);
if (ret)
goto out;
local->op = GF_FOP_REMOVEXATTR;
local->transaction.main_frame = frame;
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
goto out;
}
return 0;
out:
if (transaction_frame)
AFR_STACK_DESTROY(transaction_frame);
AFR_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL);
return 0;
}
/* ffremovexattr */
int
afr_fremovexattr_unwind(call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
call_frame_t *main_frame = NULL;
local = frame->local;
main_frame = afr_transaction_detach_fop_frame(frame);
if (!main_frame)
return 0;
AFR_STACK_UNWIND(fremovexattr, main_frame, local->op_ret, local->op_errno,
local->xdata_rsp);
return 0;
}
int
afr_fremovexattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
NULL, NULL, xdata);
}
int
afr_fremovexattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
local = frame->local;
priv = this->private;
STACK_WIND_COOKIE(frame, afr_fremovexattr_wind_cbk, (void *)(long)subvol,
priv->children[subvol],
priv->children[subvol]->fops->fremovexattr, local->fd,
local->cont.removexattr.name, local->xdata_req);
return 0;
}
int
afr_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
const char *name, dict_t *xdata)
{
afr_local_t *local = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
GF_IF_NATIVE_XATTR_GOTO("trusted.afr.*", name, op_errno, out);
GF_IF_NATIVE_XATTR_GOTO("trusted.glusterfs.afr.*", name, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
local = AFR_FRAME_INIT(transaction_frame, op_errno);
if (!local)
goto out;
local->cont.removexattr.name = gf_strdup(name);
if (xdata)
local->xdata_req = dict_copy_with_ref(xdata, NULL);
else
local->xdata_req = dict_new();
if (!local->xdata_req)
goto out;
local->transaction.wind = afr_fremovexattr_wind;
local->transaction.unwind = afr_fremovexattr_unwind;
local->fd = fd_ref(fd);
ret = afr_set_inode_local(this, local, fd->inode);
if (ret)
goto out;
local->op = GF_FOP_FREMOVEXATTR;
local->transaction.main_frame = frame;
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
goto out;
}
return 0;
out:
if (transaction_frame)
AFR_STACK_DESTROY(transaction_frame);
AFR_STACK_UNWIND(fremovexattr, frame, -1, op_errno, NULL);
return 0;
}
int
afr_fallocate_unwind(call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
call_frame_t *main_frame = NULL;
local = frame->local;
main_frame = afr_transaction_detach_fop_frame(frame);
if (!main_frame)
return 0;
AFR_STACK_UNWIND(fallocate, main_frame, local->op_ret, local->op_errno,
&local->cont.inode_wfop.prebuf,
&local->cont.inode_wfop.postbuf, local->xdata_rsp);
return 0;
}
int
afr_fallocate_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf,
postbuf, NULL, xdata);
}
int
afr_fallocate_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
local = frame->local;
priv = this->private;
STACK_WIND_COOKIE(frame, afr_fallocate_wind_cbk, (void *)(long)subvol,
priv->children[subvol],
priv->children[subvol]->fops->fallocate, local->fd,
local->cont.fallocate.mode, local->cont.fallocate.offset,
local->cont.fallocate.len, local->xdata_req);
return 0;
}
int
afr_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
off_t offset, size_t len, dict_t *xdata)
{
call_frame_t *transaction_frame = NULL;
afr_local_t *local = NULL;
int ret = -1;
int op_errno = ENOMEM;
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
local = AFR_FRAME_INIT(transaction_frame, op_errno);
if (!local)
goto out;
local->cont.fallocate.mode = mode;
local->cont.fallocate.offset = offset;
local->cont.fallocate.len = len;
local->fd = fd_ref(fd);
ret = afr_set_inode_local(this, local, fd->inode);
if (ret)
goto out;
if (xdata)
local->xdata_req = dict_copy_with_ref(xdata, NULL);
else
local->xdata_req = dict_new();
if (!local->xdata_req)
goto out;
local->op = GF_FOP_FALLOCATE;
local->transaction.wind = afr_fallocate_wind;
local->transaction.unwind = afr_fallocate_unwind;
local->transaction.main_frame = frame;
local->transaction.start = local->cont.fallocate.offset;
local->transaction.len = 0;
afr_fix_open(fd, this);
ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
goto out;
}
return 0;
out:
if (transaction_frame)
AFR_STACK_DESTROY(transaction_frame);
AFR_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
/* }}} */
/* {{{ discard */
int
afr_discard_unwind(call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
call_frame_t *main_frame = NULL;
local = frame->local;
main_frame = afr_transaction_detach_fop_frame(frame);
if (!main_frame)
return 0;
AFR_STACK_UNWIND(discard, main_frame, local->op_ret, local->op_errno,
&local->cont.inode_wfop.prebuf,
&local->cont.inode_wfop.postbuf, local->xdata_rsp);
return 0;
}
int
afr_discard_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf,
postbuf, NULL, xdata);
}
int
afr_discard_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
local = frame->local;
priv = this->private;
STACK_WIND_COOKIE(frame, afr_discard_wind_cbk, (void *)(long)subvol,
priv->children[subvol],
priv->children[subvol]->fops->discard, local->fd,
local->cont.discard.offset, local->cont.discard.len,
local->xdata_req);
return 0;
}
int
afr_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
size_t len, dict_t *xdata)
{
afr_local_t *local = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
local = AFR_FRAME_INIT(transaction_frame, op_errno);
if (!local)
goto out;
local->cont.discard.offset = offset;
local->cont.discard.len = len;
local->fd = fd_ref(fd);
ret = afr_set_inode_local(this, local, fd->inode);
if (ret)
goto out;
if (xdata)
local->xdata_req = dict_copy_with_ref(xdata, NULL);
else
local->xdata_req = dict_new();
if (!local->xdata_req)
goto out;
local->op = GF_FOP_DISCARD;
local->transaction.wind = afr_discard_wind;
local->transaction.unwind = afr_discard_unwind;
local->transaction.main_frame = frame;
local->transaction.start = local->cont.discard.offset;
local->transaction.len = 0;
afr_fix_open(fd, this);
ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
goto out;
}
return 0;
out:
if (transaction_frame)
AFR_STACK_DESTROY(transaction_frame);
AFR_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
/* {{{ zerofill */
int
afr_zerofill_unwind(call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
call_frame_t *main_frame = NULL;
local = frame->local;
main_frame = afr_transaction_detach_fop_frame(frame);
if (!main_frame)
return 0;
AFR_STACK_UNWIND(discard, main_frame, local->op_ret, local->op_errno,
&local->cont.inode_wfop.prebuf,
&local->cont.inode_wfop.postbuf, local->xdata_rsp);
return 0;
}
int
afr_zerofill_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf,
postbuf, NULL, xdata);
}
int
afr_zerofill_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
local = frame->local;
priv = this->private;
STACK_WIND_COOKIE(frame, afr_zerofill_wind_cbk, (void *)(long)subvol,
priv->children[subvol],
priv->children[subvol]->fops->zerofill, local->fd,
local->cont.zerofill.offset, local->cont.zerofill.len,
local->xdata_req);
return 0;
}
int
afr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
size_t len, dict_t *xdata)
{
afr_local_t *local = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
local = AFR_FRAME_INIT(transaction_frame, op_errno);
if (!local)
goto out;
local->cont.zerofill.offset = offset;
local->cont.zerofill.len = len;
local->fd = fd_ref(fd);
ret = afr_set_inode_local(this, local, fd->inode);
if (ret)
goto out;
if (xdata)
local->xdata_req = dict_copy_with_ref(xdata, NULL);
else
local->xdata_req = dict_new();
if (!local->xdata_req)
goto out;
local->op = GF_FOP_ZEROFILL;
local->transaction.wind = afr_zerofill_wind;
local->transaction.unwind = afr_zerofill_unwind;
local->transaction.main_frame = frame;
local->transaction.start = local->cont.zerofill.offset;
local->transaction.len = len;
afr_fix_open(fd, this);
ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
goto out;
}
return 0;
out:
if (transaction_frame)
AFR_STACK_DESTROY(transaction_frame);
AFR_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
/* }}} */
int32_t
afr_xattrop_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xattr,
dict_t *xdata)
{
return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
NULL, xattr, xdata);
}
int
afr_xattrop_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
local = frame->local;
priv = this->private;
STACK_WIND_COOKIE(frame, afr_xattrop_wind_cbk, (void *)(long)subvol,
priv->children[subvol],
priv->children[subvol]->fops->xattrop, &local->loc,
local->cont.xattrop.optype, local->cont.xattrop.xattr,
local->xdata_req);
return 0;
}
int
afr_xattrop_unwind(call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
call_frame_t *main_frame = NULL;
local = frame->local;
main_frame = afr_transaction_detach_fop_frame(frame);
if (!main_frame)
return 0;
AFR_STACK_UNWIND(xattrop, main_frame, local->op_ret, local->op_errno,
local->xattr_rsp, local->xdata_rsp);
return 0;
}
int32_t
afr_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
afr_local_t *local = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
local = AFR_FRAME_INIT(transaction_frame, op_errno);
if (!local)
goto out;
local->cont.xattrop.xattr = dict_ref(xattr);
local->cont.xattrop.optype = optype;
if (xdata)
local->xdata_req = dict_ref(xdata);
local->transaction.wind = afr_xattrop_wind;
local->transaction.unwind = afr_xattrop_unwind;
loc_copy(&local->loc, loc);
ret = afr_set_inode_local(this, local, loc->inode);
if (ret)
goto out;
local->op = GF_FOP_XATTROP;
local->transaction.main_frame = frame;
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
goto out;
}
return 0;
out:
if (transaction_frame)
AFR_STACK_DESTROY(transaction_frame);
AFR_STACK_UNWIND(xattrop, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
afr_fxattrop_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xattr,
dict_t *xdata)
{
return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
NULL, xattr, xdata);
}
int
afr_fxattrop_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
local = frame->local;
priv = this->private;
STACK_WIND_COOKIE(frame, afr_fxattrop_wind_cbk, (void *)(long)subvol,
priv->children[subvol],
priv->children[subvol]->fops->fxattrop, local->fd,
local->cont.xattrop.optype, local->cont.xattrop.xattr,
local->xdata_req);
return 0;
}
int
afr_fxattrop_unwind(call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
call_frame_t *main_frame = NULL;
local = frame->local;
main_frame = afr_transaction_detach_fop_frame(frame);
if (!main_frame)
return 0;
AFR_STACK_UNWIND(fxattrop, main_frame, local->op_ret, local->op_errno,
local->xattr_rsp, local->xdata_rsp);
return 0;
}
int32_t
afr_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
afr_local_t *local = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
local = AFR_FRAME_INIT(transaction_frame, op_errno);
if (!local)
goto out;
local->cont.xattrop.xattr = dict_ref(xattr);
local->cont.xattrop.optype = optype;
if (xdata)
local->xdata_req = dict_ref(xdata);
local->transaction.wind = afr_fxattrop_wind;
local->transaction.unwind = afr_fxattrop_unwind;
local->fd = fd_ref(fd);
ret = afr_set_inode_local(this, local, fd->inode);
if (ret)
goto out;
local->op = GF_FOP_FXATTROP;
local->transaction.main_frame = frame;
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
goto out;
}
return 0;
out:
if (transaction_frame)
AFR_STACK_DESTROY(transaction_frame);
AFR_STACK_UNWIND(fxattrop, frame, -1, op_errno, NULL, NULL);
return 0;
}
int
afr_fsync_unwind(call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
call_frame_t *main_frame = NULL;
local = frame->local;
main_frame = afr_transaction_detach_fop_frame(frame);
if (!main_frame)
return 0;
AFR_STACK_UNWIND(fsync, main_frame, local->op_ret, local->op_errno,
&local->cont.inode_wfop.prebuf,
&local->cont.inode_wfop.postbuf, local->xdata_rsp);
return 0;
}
int
afr_fsync_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf,
postbuf, NULL, xdata);
}
int
afr_fsync_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
local = frame->local;
priv = this->private;
STACK_WIND_COOKIE(frame, afr_fsync_wind_cbk, (void *)(long)subvol,
priv->children[subvol],
priv->children[subvol]->fops->fsync, local->fd,
local->cont.fsync.datasync, local->xdata_req);
return 0;
}
int
afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
dict_t *xdata)
{
afr_local_t *local = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int32_t op_errno = ENOMEM;
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
local = AFR_FRAME_INIT(transaction_frame, op_errno);
if (!local)
goto out;
if (xdata)
local->xdata_req = dict_copy_with_ref(xdata, NULL);
else
local->xdata_req = dict_new();
if (!local->xdata_req)
goto out;
local->fd = fd_ref(fd);
ret = afr_set_inode_local(this, local, fd->inode);
if (ret)
goto out;
local->op = GF_FOP_FSYNC;
local->cont.fsync.datasync = datasync;
if (afr_fd_has_witnessed_unstable_write(this, fd->inode)) {
/* don't care. we only wanted to CLEAR the bit */
}
local->transaction.wind = afr_fsync_wind;
local->transaction.unwind = afr_fsync_unwind;
local->transaction.main_frame = frame;
ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
goto out;
}
return 0;
out:
if (transaction_frame)
AFR_STACK_DESTROY(transaction_frame);
AFR_STACK_UNWIND(fsync, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}