/*
Copyright (c) 2006-2017 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
General Public License, version 3 or any later version (LGPLv3 or
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
#define __XOPEN_SOURCE 500
/* for SEEK_HOLE and SEEK_DATA */
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <openssl/md5.h>
#include <stdint.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <errno.h>
#include <libgen.h>
#include <pthread.h>
#include <ftw.h>
#include <sys/stat.h>
#include <signal.h>
#include <sys/uio.h>
#include <unistd.h>
#include <ftw.h>
#include <regex.h>
#ifndef GF_BSD_HOST_OS
#include <alloca.h>
#endif /* GF_BSD_HOST_OS */
#ifdef HAVE_LINKAT
#include <fcntl.h>
#endif /* HAVE_LINKAT */
#include <glusterfs/glusterfs.h>
#include <glusterfs/checksum.h>
#include <glusterfs/dict.h>
#include <glusterfs/logging.h>
#include "posix.h"
#include <glusterfs/xlator.h>
#include <glusterfs/defaults.h>
#include <glusterfs/common-utils.h>
#include <glusterfs/compat-errno.h>
#include <glusterfs/compat.h>
#include <glusterfs/byte-order.h>
#include <glusterfs/syscall.h>
#include <glusterfs/statedump.h>
#include <glusterfs/locking.h>
#include <glusterfs/timer.h>
#include "glusterfs3-xdr.h"
#include <glusterfs/hashfn.h>
#include "posix-aio.h"
#include <glusterfs/glusterfs-acl.h>
#include "posix-messages.h"
#include "posix-metadata.h"
#include <glusterfs/events.h>
#include "posix-gfid-path.h"
#include <glusterfs/compat-uuid.h>
extern char *marker_xattrs[];
#define ALIGN_SIZE 4096
#undef HAVE_SET_FSID
#ifdef HAVE_SET_FSID
#define DECLARE_OLD_FS_ID_VAR \
uid_t old_fsuid; \
gid_t old_fsgid;
#define SET_FS_ID(uid, gid) \
do { \
old_fsuid = setfsuid(uid); \
old_fsgid = setfsgid(gid); \
} while (0)
#define SET_TO_OLD_FS_ID() \
do { \
setfsuid(old_fsuid); \
setfsgid(old_fsgid); \
} while (0)
#else
#define DECLARE_OLD_FS_ID_VAR
#define SET_FS_ID(uid, gid)
#define SET_TO_OLD_FS_ID()
#endif
/* Setting microseconds or nanoseconds depending on what's supported:
The passed in `tv` can be
struct timespec
if supported (better, because it supports nanosecond resolution) or
struct timeval
otherwise. */
#if HAVE_UTIMENSAT
#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) tv.tv_nsec = nanosecs
#else
#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \
tv.tv_usec = nanosecs / 1000
#endif
static char *disallow_removexattrs[] = {GF_XATTR_VOL_ID_KEY, GFID_XATTR_KEY,
NULL};
int32_t
posix_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
struct iatt buf = {
0,
};
int32_t op_ret = -1;
int32_t op_errno = 0;
struct posix_private *priv = NULL;
char *real_path = NULL;
dict_t *xattr_rsp = NULL;
DECLARE_OLD_FS_ID_VAR;
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(loc, out);
priv = this->private;
VALIDATE_OR_GOTO(priv, out);
SET_FS_ID(frame->root->uid, frame->root->gid);
MAKE_INODE_HANDLE(real_path, this, loc, &buf);
if (op_ret == -1) {
op_errno = errno;
if (op_errno == ENOENT) {
gf_msg_debug(this->name, 0, "lstat on %s failed: %s",
real_path ? real_path : "<null>", strerror(op_errno));
} else {
gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_LSTAT_FAILED,
"lstat on %s failed", real_path ? real_path : "<null>");
}
goto out;
}
if (xdata) {
xattr_rsp = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata,
&buf);
posix_cs_maintenance(this, NULL, loc, NULL, &buf, real_path, xdata,
&xattr_rsp, _gf_true);
}
op_ret = 0;
out:
SET_TO_OLD_FS_ID();
STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, &buf, xattr_rsp);
if (xattr_rsp)
dict_unref(xattr_rsp);
return 0;
}
static int
posix_do_chmod(xlator_t *this, const char *path, struct iatt *stbuf)
{
int32_t ret = -1;
mode_t mode = 0;
mode_t mode_bit = 0;
struct posix_private *priv = NULL;
struct stat stat;
int is_symlink = 0;
priv = this->private;
VALIDATE_OR_GOTO(priv, out);
ret = sys_lstat(path, &stat);
if (ret != 0) {
gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_LSTAT_FAILED,
"lstat failed: %s", path);
goto out;
}
if (S_ISLNK(stat.st_mode))
is_symlink = 1;
if (S_ISDIR(stat.st_mode)) {
mode = st_mode_from_ia(stbuf->ia_prot, stbuf->ia_type);
mode_bit = (mode & priv->create_directory_mask) |
priv->force_directory_mode;
mode = posix_override_umask(mode, mode_bit);
} else {
mode = st_mode_from_ia(stbuf->ia_prot, stbuf->ia_type);
mode_bit = (mode & priv->create_mask) | priv->force_create_mode;
mode = posix_override_umask(mode, mode_bit);
}
ret = lchmod(path, mode);
if ((ret == -1) && (errno == ENOSYS)) {
/* in Linux symlinks are always in mode 0777 and no
such call as lchmod exists.
*/
gf_msg_debug(this->name, 0, "%s (%s)", path, strerror(errno));
if (is_symlink) {
ret = 0;
goto out;
}
ret = sys_chmod(path, mode);
}
out:
return ret;
}
static int
posix_do_chown(xlator_t *this, const char *path, struct iatt *stbuf,
int32_t valid)
{
int32_t ret = -1;
uid_t uid = -1;
gid_t gid = -1;
if (valid & GF_SET_ATTR_UID)
uid = stbuf->ia_uid;
if (valid & GF_SET_ATTR_GID)
gid = stbuf->ia_gid;
ret = sys_lchown(path, uid, gid);
return ret;
}
static int
posix_do_utimes(xlator_t *this, const char *path, struct iatt *stbuf, int valid)
{
int32_t ret = -1;
#if defined(HAVE_UTIMENSAT)
struct timespec tv[2] = {{
0,
},
{
0,
}};
#else
struct timeval tv[2] = {{
0,
},
{
0,
}};
#endif
struct stat stat;
int is_symlink = 0;
ret = sys_lstat(path, &stat);
if (ret != 0) {
gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FILE_OP_FAILED, "%s",
path);
goto out;
}
if (S_ISLNK(stat.st_mode))
is_symlink = 1;
if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) {
tv[0].tv_sec = stbuf->ia_atime;
SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[0], stbuf->ia_atime_nsec);
} else {
/* atime is not given, use current values */
tv[0].tv_sec = ST_ATIM_SEC(&stat);
SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[0], ST_ATIM_NSEC(&stat));
}
if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) {
tv[1].tv_sec = stbuf->ia_mtime;
SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[1], stbuf->ia_mtime_nsec);
} else {
/* mtime is not given, use current values */
tv[1].tv_sec = ST_MTIM_SEC(&stat);
SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[1], ST_MTIM_NSEC(&stat));
}
ret = PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv);
if ((ret == -1) && (errno == ENOSYS)) {
gf_msg_debug(this->name, 0, "%s (%s)", path, strerror(errno));
if (is_symlink) {
ret = 0;
goto out;
}
ret = PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv);
}
out:
return ret;
}
int
posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
char *real_path = 0;
struct iatt statpre = {
0,
};
struct iatt statpost = {
0,
};
dict_t *xattr_rsp = NULL;
struct posix_private *priv = NULL;
priv = this->private;
DECLARE_OLD_FS_ID_VAR;
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(loc, out);
SET_FS_ID(frame->root->uid, frame->root->gid);
MAKE_INODE_HANDLE(real_path, this, loc, &statpre);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
"setattr (lstat) on %s failed",
real_path ? real_path : "<null>");
goto out;
}
if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
op_ret = posix_do_chown(this, real_path, stbuf, valid);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_CHOWN_FAILED,
"setattr (chown) on %s "
"failed",
real_path);
goto out;
}
}
if (valid & GF_SET_ATTR_MODE) {
op_ret = posix_do_chmod(this, real_path, stbuf);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_CHMOD_FAILED,
"setattr (chmod) on %s "
"failed",
real_path);
goto out;
}
}
if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
op_ret = posix_do_utimes(this, real_path, stbuf, valid);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_UTIMES_FAILED,
"setattr (utimes) on %s "
"failed",
real_path);
goto out;
}
posix_update_utime_in_mdata(this, real_path, -1, loc->inode, stbuf,
valid);
}
if (valid & GF_SET_ATTR_CTIME && !priv->ctime) {
/*
* If ctime is not enabled, we have no means to associate an
* arbitrary ctime with the file, so as a fallback, we ignore
* the ctime payload and update the file ctime to current time
* (which is possible directly with the POSIX API).
*/
op_ret = PATH_SET_TIMESPEC_OR_TIMEVAL(real_path, NULL);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_UTIMES_FAILED,
"setattr (utimes) on %s "
"failed",
real_path);
goto out;
}
}
if (!valid) {
op_ret = sys_lchown(real_path, -1, -1);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LCHOWN_FAILED,
"lchown (%s, -1, -1) "
"failed",
real_path);
goto out;
}
}
op_ret = posix_pstat(this, loc->inode, loc->gfid, real_path, &statpost,
_gf_false);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
"setattr (lstat) on %s failed", real_path);
goto out;
}
if (valid & GF_SET_ATTR_CTIME && priv->ctime) {
/*
* If we got ctime payload, we override
* the ctime of statpost with that.
*/
statpost.ia_ctime = stbuf->ia_ctime;
statpost.ia_ctime_nsec = stbuf->ia_ctime_nsec;
}
posix_set_ctime(frame, this, real_path, -1, loc->inode, &statpost);
if (xdata)
xattr_rsp = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata,
&statpost);
op_ret = 0;
out:
SET_TO_OLD_FS_ID();
STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, &statpre, &statpost,
xattr_rsp);
if (xattr_rsp)
dict_unref(xattr_rsp);
return 0;
}
int32_t
posix_do_fchown(xlator_t *this, int fd, struct iatt *stbuf, int32_t valid)
{
int ret = -1;
uid_t uid = -1;
gid_t gid = -1;
if (valid & GF_SET_ATTR_UID)
uid = stbuf->ia_uid;
if (valid & GF_SET_ATTR_GID)
gid = stbuf->ia_gid;
ret = sys_fchown(fd, uid, gid);
return ret;
}
int32_t
posix_do_fchmod(xlator_t *this, int fd, struct iatt *stbuf)
{
int32_t ret = -1;
mode_t mode = 0;
mode_t mode_bit = 0;
struct posix_private *priv = NULL;
priv = this->private;
VALIDATE_OR_GOTO(priv, out);
mode = st_mode_from_ia(stbuf->ia_prot, stbuf->ia_type);
mode_bit = (mode & priv->create_mask) | priv->force_create_mode;
mode = posix_override_umask(mode, mode_bit);
ret = sys_fchmod(fd, mode);
out:
return ret;
}
static int
posix_do_futimes(xlator_t *this, int fd, struct iatt *stbuf, int valid)
{
int32_t ret = -1;
struct timeval tv[2] = {{
0,
},
{
0,
}};
struct stat stat = {
0,
};
ret = sys_fstat(fd, &stat);
if (ret != 0) {
gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FILE_OP_FAILED, "%d",
fd);
goto out;
}
if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) {
tv[0].tv_sec = stbuf->ia_atime;
tv[0].tv_usec = stbuf->ia_atime_nsec / 1000;
} else {
/* atime is not given, use current values */
tv[0].tv_sec = ST_ATIM_SEC(&stat);
tv[0].tv_usec = ST_ATIM_NSEC(&stat) / 1000;
}
if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) {
tv[1].tv_sec = stbuf->ia_mtime;
tv[1].tv_usec = stbuf->ia_mtime_nsec / 1000;
} else {
/* mtime is not given, use current values */
tv[1].tv_sec = ST_MTIM_SEC(&stat);
tv[1].tv_usec = ST_MTIM_NSEC(&stat) / 1000;
}
ret = sys_futimes(fd, tv);
if (ret == -1)
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FUTIMES_FAILED, "%d", fd);
out:
return ret;
}
int
posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
struct iatt statpre = {
0,
};
struct iatt statpost = {
0,
};
struct posix_fd *pfd = NULL;
dict_t *xattr_rsp = NULL;
int32_t ret = -1;
DECLARE_OLD_FS_ID_VAR;
SET_FS_ID(frame->root->uid, frame->root->gid);
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(fd, out);
ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
if (ret < 0) {
gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd);
goto out;
}
op_ret = posix_fdstat(this, fd->inode, pfd->fd, &statpre);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
"fsetattr (fstat) failed on fd=%p", fd);
goto out;
}
if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
op_ret = posix_do_fchown(this, pfd->fd, stbuf, valid);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FCHOWN_FAILED,
"fsetattr (fchown) failed"
" on fd=%p",
fd);
goto out;
}
}
if (valid & GF_SET_ATTR_MODE) {
op_ret = posix_do_fchmod(this, pfd->fd, stbuf);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FCHMOD_FAILED,
"fsetattr (fchmod) failed"
" on fd=%p",
fd);
goto out;
}
}
if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
op_ret = posix_do_futimes(this, pfd->fd, stbuf, valid);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FUTIMES_FAILED,
"fsetattr (futimes) on "
"failed fd=%p",
fd);
goto out;
}
posix_update_utime_in_mdata(this, NULL, pfd->fd, fd->inode, stbuf,
valid);
}
if (!valid) {
op_ret = sys_fchown(pfd->fd, -1, -1);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FCHOWN_FAILED,
"fchown (%d, -1, -1) failed", pfd->fd);
goto out;
}
}
op_ret = posix_fdstat(this, fd->inode, pfd->fd, &statpost);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
"fsetattr (fstat) failed on fd=%p", fd);
goto out;
}
posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &statpost);
if (xdata)
xattr_rsp = posix_xattr_fill(this, NULL, NULL, fd, pfd->fd, xdata,
&statpost);
op_ret = 0;
out:
SET_TO_OLD_FS_ID();
STACK_UNWIND_STRICT(fsetattr, frame, op_ret, op_errno, &statpre, &statpost,
xattr_rsp);
if (xattr_rsp)
dict_unref(xattr_rsp);
return 0;
}
static int32_t
posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
off_t offset, size_t len, struct iatt *statpre,
struct iatt *statpost, dict_t *xdata, dict_t **rsp_xdata)
{
int32_t ret = -1;
int32_t op_errno = 0;
struct posix_fd *pfd = NULL;
gf_boolean_t locked = _gf_false;
posix_inode_ctx_t *ctx = NULL;
struct posix_private *priv = NULL;
DECLARE_OLD_FS_ID_VAR;
SET_FS_ID(frame->root->uid, frame->root->gid);
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(fd, out);
priv = this->private;
/* fallocate case is special so call posix_disk_space_check separately
for every fallocate fop instead of calling posix_disk_space with
thread after every 5 sec sleep to working correctly storage.reserve
option behaviour
*/
if (priv->disk_reserve)
posix_disk_space_check(this);
DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, out);
ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
if (ret < 0) {
gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd);
goto out;
}
ret = posix_inode_ctx_get_all(fd->inode, this, &ctx);
if (ret < 0) {
ret = -ENOMEM;
goto out;
}
if (xdata && dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) {
locked = _gf_true;
pthread_mutex_lock(&ctx->write_atomic_lock);
}
ret = posix_fdstat(this, fd->inode, pfd->fd, statpre);
if (ret == -1) {
ret = -errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
"fallocate (fstat) failed on fd=%p", fd);
goto out;
}
if (xdata) {
ret = posix_cs_maintenance(this, fd, NULL, &pfd->fd, statpre, NULL,
xdata, rsp_xdata, _gf_false);
if (ret < 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0,
"file state check failed, fd %p", fd);
ret = -EIO;
goto out;
}
}
ret = sys_fallocate(pfd->fd, flags, offset, len);
if (ret == -1) {
ret = -errno;
gf_msg(this->name, GF_LOG_ERROR, -ret, P_MSG_FALLOCATE_FAILED,
"fallocate failed on %s offset: %jd, "
"len:%zu, flags: %d",
uuid_utoa(fd->inode->gfid), offset, len, flags);
goto out;
}
ret = posix_fdstat(this, fd->inode, pfd->fd, statpost);
if (ret == -1) {
ret = -errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
"fallocate (fstat) failed on fd=%p", fd);
goto out;
}
posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, statpost);
out:
if (locked) {
pthread_mutex_unlock(&ctx->write_atomic_lock);
locked = _gf_false;
}
SET_TO_OLD_FS_ID();
if (ret == ENOSPC)
ret = -ENOSPC;
return ret;
}
char *
_page_aligned_alloc(size_t size, char **aligned_buf)
{
char *alloc_buf = NULL;
char *buf = NULL;
alloc_buf = GF_CALLOC(1, (size + ALIGN_SIZE), gf_posix_mt_char);
if (!alloc_buf)
goto out;
/* page aligned buffer */
buf = GF_ALIGN_BUF(alloc_buf, ALIGN_SIZE);
*aligned_buf = buf;
out:
return alloc_buf;
}
static int32_t
_posix_do_zerofill(int fd, off_t offset, off_t len, int o_direct)
{
off_t num_vect = 0;
off_t num_loop = 1;
off_t idx = 0;
int32_t op_ret = -1;
int32_t vect_size = VECTOR_SIZE;
off_t remain = 0;
off_t extra = 0;
struct iovec *vector = NULL;
char *iov_base = NULL;
char *alloc_buf = NULL;
if (len == 0)
return 0;
if (len < VECTOR_SIZE)
vect_size = len;
num_vect = len / (vect_size);
remain = len % vect_size;
if (num_vect > MAX_NO_VECT) {
extra = num_vect % MAX_NO_VECT;
num_loop = num_vect / MAX_NO_VECT;
num_vect = MAX_NO_VECT;
}
vector = GF_CALLOC(num_vect, sizeof(struct iovec), gf_common_mt_iovec);
if (!vector)
return -1;
if (o_direct) {
alloc_buf = _page_aligned_alloc(vect_size, &iov_base);
if (!alloc_buf) {
GF_FREE(vector);
return -1;
}
} else {
iov_base = GF_CALLOC(vect_size, sizeof(char), gf_common_mt_char);
if (!iov_base) {
GF_FREE(vector);
return -1;
}
}
for (idx = 0; idx < num_vect; idx++) {
vector[idx].iov_base = iov_base;
vector[idx].iov_len = vect_size;
}
if (sys_lseek(fd, offset, SEEK_SET) < 0) {
op_ret = -1;
goto err;
}
for (idx = 0; idx < num_loop; idx++) {
op_ret = sys_writev(fd, vector, num_vect);
if (op_ret < 0)
goto err;
if (op_ret != (vect_size * num_vect)) {
op_ret = -1;
errno = ENOSPC;
goto err;
}
}
if (extra) {
op_ret = sys_writev(fd, vector, extra);
if (op_ret < 0)
goto err;
if (op_ret != (vect_size * extra)) {
op_ret = -1;
errno = ENOSPC;
goto err;
}
}
if (remain) {
vector[0].iov_len = remain;
op_ret = sys_writev(fd, vector, 1);
if (op_ret < 0)
goto err;
if (op_ret != remain) {
op_ret = -1;
errno = ENOSPC;
goto err;
}
}
err:
if (o_direct)
GF_FREE(alloc_buf);
else
GF_FREE(iov_base);
GF_FREE(vector);
return op_ret;
}
static int32_t
posix_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
off_t len, struct iatt *statpre, struct iatt *statpost,
dict_t *xdata, dict_t **rsp_xdata)
{
int32_t ret = -1;
int32_t op_errno = 0;
int32_t flags = 0;
struct posix_fd *pfd = NULL;
gf_boolean_t locked = _gf_false;
posix_inode_ctx_t *ctx = NULL;
DECLARE_OLD_FS_ID_VAR;
SET_FS_ID(frame->root->uid, frame->root->gid);
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(fd, out);
ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
if (ret < 0) {
gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd);
goto out;
}
ret = posix_inode_ctx_get_all(fd->inode, this, &ctx);
if (ret < 0) {
ret = -ENOMEM;
goto out;
}
if (dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) {
locked = _gf_true;
pthread_mutex_lock(&ctx->write_atomic_lock);
}
ret = posix_fdstat(this, fd->inode, pfd->fd, statpre);
if (ret == -1) {
ret = -errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
"pre-operation fstat failed on fd = %p", fd);
goto out;
}
if (xdata) {
ret = posix_cs_maintenance(this, fd, NULL, &pfd->fd, statpre, NULL,
xdata, rsp_xdata, _gf_false);
if (ret < 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0,
"file state "
"check failed, fd %p",
fd);
ret = -EIO;
goto out;
}
}
/* See if we can use FALLOC_FL_ZERO_RANGE to perform the zero fill.
* If it fails, fall back to _posix_do_zerofill() and an optional fsync.
*/
flags = FALLOC_FL_ZERO_RANGE;
ret = sys_fallocate(pfd->fd, flags, offset, len);
if (ret == 0) {
goto fsync;
} else {
ret = -errno;
if ((ret != -ENOSYS) && (ret != -EOPNOTSUPP)) {
goto out;
}
}
ret = _posix_do_zerofill(pfd->fd, offset, len, pfd->flags & O_DIRECT);
if (ret < 0) {
ret = -errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_ZEROFILL_FAILED,
"zerofill failed on fd %d length %" PRId64, pfd->fd, len);
goto out;
}
fsync:
if (pfd->flags & (O_SYNC | O_DSYNC)) {
ret = sys_fsync(pfd->fd);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_WRITEV_FAILED,
"fsync() in writev on fd"
"%d failed",
pfd->fd);
ret = -errno;
goto out;
}
}
ret = posix_fdstat(this, fd->inode, pfd->fd, statpost);
if (ret == -1) {
ret = -errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
"post operation fstat failed on fd=%p", fd);
goto out;
}
posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, statpost);
out:
if (locked) {
pthread_mutex_unlock(&ctx->write_atomic_lock);
locked = _gf_false;
}
SET_TO_OLD_FS_ID();
return ret;
}
int32_t
posix_glfallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
int32_t keep_size, off_t offset, size_t len, dict_t *xdata)
{
int32_t ret;
int32_t flags = 0;
struct iatt statpre = {
0,
};
struct iatt statpost = {
0,
};
#ifdef FALLOC_FL_KEEP_SIZE
if (keep_size)
flags = FALLOC_FL_KEEP_SIZE;
#endif /* FALLOC_FL_KEEP_SIZE */
ret = posix_do_fallocate(frame, this, fd, flags, offset, len, &statpre,
&statpost, xdata, NULL);
if (ret < 0)
goto err;
STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, NULL);
return 0;
err:
STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, NULL);
return 0;
}
int32_t
posix_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
size_t len, dict_t *xdata)
{
int32_t ret;
dict_t *rsp_xdata = NULL;
#ifndef FALLOC_FL_KEEP_SIZE
ret = EOPNOTSUPP;
#else /* FALLOC_FL_KEEP_SIZE */
int32_t flags = FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE;
struct iatt statpre = {
0,
};
struct iatt statpost = {
0,
};
ret = posix_do_fallocate(frame, this, fd, flags, offset, len, &statpre,
&statpost, xdata, &rsp_xdata);
if (ret < 0)
goto err;
STACK_UNWIND_STRICT(discard, frame, 0, 0, &statpre, &statpost, rsp_xdata);
return 0;
err:
#endif /* FALLOC_FL_KEEP_SIZE */
STACK_UNWIND_STRICT(discard, frame, -1, -ret, NULL, NULL, rsp_xdata);
return 0;
}
int32_t
posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
off_t len, dict_t *xdata)
{
int32_t ret = 0;
struct iatt statpre = {
0,
};
struct iatt statpost = {
0,
};
struct posix_private *priv = NULL;
int op_ret = -1;
int op_errno = EINVAL;
dict_t *rsp_xdata = NULL;
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
priv = this->private;
DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
ret = posix_do_zerofill(frame, this, fd, offset, len, &statpre, &statpost,
xdata, &rsp_xdata);
if (ret < 0) {
op_ret = -1;
op_errno = -ret;
goto out;
}
STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, rsp_xdata);
return 0;
out:
STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, NULL, NULL,
rsp_xdata);
return 0;
}
int32_t
posix_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
{
/*
* IPC is for inter-translator communication. If one gets here, it
* means somebody sent one that nobody else recognized, which is an
* error much like an uncaught exception.
*/
gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_IPC_NOT_HANDLE,
"GF_LOG_IPC(%d) not handled", op);
STACK_UNWIND_STRICT(ipc, frame, -1, EOPNOTSUPP, NULL);
return 0;
}
int32_t
posix_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
gf_seek_what_t what, dict_t *xdata)
{
#ifdef HAVE_SEEK_HOLE
struct posix_fd *pfd = NULL;
off_t ret = -1;
int err = 0;
int whence = 0;
struct iatt preop = {
0,
};
dict_t *rsp_xdata = NULL;
DECLARE_OLD_FS_ID_VAR;
SET_FS_ID(frame->root->uid, frame->root->gid);
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(fd, out);
switch (what) {
case GF_SEEK_DATA:
whence = SEEK_DATA;
break;
case GF_SEEK_HOLE:
whence = SEEK_HOLE;
break;
default:
err = ENOTSUP;
gf_msg(this->name, GF_LOG_ERROR, ENOTSUP, P_MSG_SEEK_UNKOWN,
"don't know what to seek");
goto out;
}
ret = posix_fd_ctx_get(fd, this, &pfd, &err);
if (ret < 0) {
gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd);
goto out;
}
if (xdata) {
ret = posix_fdstat(this, fd->inode, pfd->fd, &preop);
if (ret == -1) {
ret = -errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
"pre-operation fstat failed on fd=%p", fd);
goto out;
}
ret = posix_cs_maintenance(this, fd, NULL, &pfd->fd, &preop, NULL,
xdata, &rsp_xdata, _gf_false);
if (ret < 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0,
"file state check failed, fd %p", fd);
ret = -EIO;
goto out;
}
}
ret = sys_lseek(pfd->fd, offset, whence);
if (ret == -1) {
err = errno;
gf_msg(this->name, fop_log_level(GF_FOP_SEEK, err), err,
P_MSG_SEEK_FAILED, "seek failed on fd %d length %" PRId64,
pfd->fd, offset);
goto out;
}
out:
SET_TO_OLD_FS_ID();
STACK_UNWIND_STRICT(seek, frame, (ret == -1 ? -1 : 0), err,
(ret == -1 ? -1 : ret), rsp_xdata);
#else
STACK_UNWIND_STRICT(seek, frame, -1, EINVAL, 0, NULL);
#endif
return 0;
}
int32_t
posix_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
dict_t *xdata)
{
char *real_path = NULL;
int32_t op_ret = -1;
int32_t op_errno = EINVAL;
DIR *dir = NULL;
struct posix_fd *pfd = NULL;
DECLARE_OLD_FS_ID_VAR;
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(loc, out);
VALIDATE_OR_GOTO(fd, out);
SET_FS_ID(frame->root->uid, frame->root->gid);
MAKE_INODE_HANDLE(real_path, this, loc, NULL);
if (!real_path) {
op_errno = ESTALE;
goto out;
}
op_ret = -1;
dir = sys_opendir(real_path);
if (dir == NULL) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_OPENDIR_FAILED,
"opendir failed on %s", real_path);
goto out;
}
op_ret = dirfd(dir);
if (op_ret < 0) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_DIRFD_FAILED,
"dirfd() failed on %s", real_path);
goto out;
}
pfd = GF_CALLOC(1, sizeof(*pfd), gf_posix_mt_posix_fd);
if (!pfd) {
op_errno = errno;
goto out;
}
pfd->dir = dir;
pfd->dir_eof = -1;
pfd->fd = op_ret;
op_ret = fd_ctx_set(fd, this, (uint64_t)(long)pfd);
if (op_ret)
gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED,
"failed to set the fd"
"context path=%s fd=%p",
real_path, fd);
posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, NULL);
op_ret = 0;
out:
if (op_ret == -1) {
if (dir) {
(void)sys_closedir(dir);
dir = NULL;
}
if (pfd) {
GF_FREE(pfd);
pfd = NULL;
}
}
SET_TO_OLD_FS_ID();
STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, NULL);
return 0;
}
int32_t
posix_releasedir(xlator_t *this, fd_t *fd)
{
struct posix_fd *pfd = NULL;
uint64_t tmp_pfd = 0;
int ret = 0;
glusterfs_ctx_t *ctx = NULL;
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(fd, out);
ret = fd_ctx_del(fd, this, &tmp_pfd);
if (ret < 0) {
gf_msg_debug(this->name, 0, "pfd from fd=%p is NULL", fd);
goto out;
}
pfd = (struct posix_fd *)(long)tmp_pfd;
if (!pfd->dir) {
gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL,
"pfd->dir is NULL for fd=%p", fd);
goto out;
}
ctx = THIS->ctx;
pthread_mutex_lock(&ctx->janitor_lock);
{
INIT_LIST_HEAD(&pfd->list);
list_add_tail(&pfd->list, &ctx->janitor_fds);
pthread_cond_signal(&ctx->janitor_cond);
}
pthread_mutex_unlock(&ctx->janitor_lock);
/*gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir);
sys_closedir(pfd->dir);
GF_FREE(pfd);
*/
out:
return 0;
}
int32_t
posix_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
dict_t *xdata)
{
char *dest = NULL;
int32_t op_ret = -1;
int32_t op_errno = EINVAL;
char *real_path = NULL;
struct iatt stbuf = {
0,
};
DECLARE_OLD_FS_ID_VAR;
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(loc, out);
SET_FS_ID(frame->root->uid, frame->root->gid);
dest = alloca(size + 1);
MAKE_INODE_HANDLE(real_path, this, loc, &stbuf);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
"lstat on %s failed", loc->path ? loc->path : "<null>");
goto out;
}
op_ret = sys_readlink(real_path, dest, size);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_READYLINK_FAILED,
"readlink on %s failed", real_path);
goto out;
}
dest[op_ret] = 0;
out:
SET_TO_OLD_FS_ID();
STACK_UNWIND_STRICT(readlink, frame, op_ret, op_errno, dest, &stbuf, NULL);
return 0;
}
int32_t
posix_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
char *real_path = 0;
struct posix_private *priv = NULL;
struct iatt prebuf = {
0,
};
struct iatt postbuf = {
0,
};
dict_t *rsp_xdata = NULL;
DECLARE_OLD_FS_ID_VAR;
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(loc, out);
priv = this->private;
VALIDATE_OR_GOTO(priv, out);
SET_FS_ID(frame->root->uid, frame->root->gid);
MAKE_INODE_HANDLE(real_path, this, loc, &prebuf);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
"pre-operation lstat on %s failed",
real_path ? real_path : "<null>");
goto out;
}
if (xdata) {
op_ret = posix_cs_maintenance(this, NULL, loc, NULL, &prebuf, real_path,
xdata, &rsp_xdata, _gf_false);
if (op_ret == -1) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0,
"file state check failed, path %s", loc->path);
op_errno = EIO;
goto out;
}
}
op_ret = sys_truncate(real_path, offset);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TRUNCATE_FAILED,
"truncate on %s failed", real_path);
goto out;
}
op_ret = posix_pstat(this, loc->inode, loc->gfid, real_path, &postbuf,
_gf_false);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
"lstat on %s failed", real_path);
goto out;
}
posix_set_ctime(frame, this, real_path, -1, loc->inode, &postbuf);
op_ret = 0;
out:
SET_TO_OLD_FS_ID();
STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, &prebuf, &postbuf,
NULL);
return 0;
}
int32_t
posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
fd_t *fd, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
char *real_path = NULL;
int32_t _fd = -1;
struct posix_fd *pfd = NULL;
struct posix_private *priv = NULL;
struct iatt stbuf = {
0,
};
DECLARE_OLD_FS_ID_VAR;
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(this->private, out);
VALIDATE_OR_GOTO(loc, out);
VALIDATE_OR_GOTO(fd, out);
priv = this->private;
VALIDATE_OR_GOTO(priv, out);
if (loc->inode && ((loc->inode->ia_type == IA_IFBLK) ||
(loc->inode->ia_type == IA_IFCHR))) {
gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_INVALID_ARGUMENT,
"open received on a block/char file (%s)",
uuid_utoa(loc->inode->gfid));
op_errno = EINVAL;
goto out;
}
if (flags & O_CREAT)
DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
MAKE_INODE_HANDLE(real_path, this, loc, &stbuf);
if (!real_path) {
op_ret = -1;
op_errno = ESTALE;
goto out;
}
if (IA_ISLNK(stbuf.ia_type)) {
op_ret = -1;
op_errno = ELOOP;
goto out;
}
op_ret = -1;
SET_FS_ID(frame->root->uid, frame->root->gid);
if (priv->o_direct)
flags |= O_DIRECT;
_fd = sys_open(real_path, flags, priv->force_create_mode);
if (_fd == -1) {
op_ret = -1;
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FILE_OP_FAILED,
"open on %s, flags: %d", real_path, flags);
goto out;
}
posix_set_ctime(frame, this, real_path, -1, loc->inode, &stbuf);
pfd = GF_CALLOC(1, sizeof(*pfd), gf_posix_mt_posix_fd);
if (!pfd) {
op_errno = errno;
goto out;
}
pfd->flags = flags;
pfd->fd = _fd;
op_ret = fd_ctx_set(fd, this, (uint64_t)(long)pfd);
if (op_ret)
gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED,
"failed to set the fd context path=%s fd=%p", real_path, fd);
GF_ATOMIC_INC(priv->nr_files);
op_ret = 0;
out:
if (op_ret == -1) {
if (_fd != -1) {
sys_close(_fd);
}
}
SET_TO_OLD_FS_ID();
STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, NULL);
return 0;
}
int
posix_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t offset, uint32_t flags, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
int _fd = -1;
struct posix_private *priv = NULL;
struct iobuf *iobuf = NULL;
struct iobref *iobref = NULL;
struct iovec vec = {
0,
};
struct posix_fd *pfd = NULL;
struct iatt stbuf = {
0,
};
struct iatt preop = {
0,
};
int ret = -1;
dict_t *rsp_xdata = NULL;
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(fd, out);
VALIDATE_OR_GOTO(fd->inode, out);
VALIDATE_OR_GOTO(this->private, out);
priv = this->private;
VALIDATE_OR_GOTO(priv, out);
if ((fd->inode->ia_type == IA_IFBLK) || (fd->inode->ia_type == IA_IFCHR)) {
gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_INVALID_ARGUMENT,
"readv received on a block/char file (%s)",
uuid_utoa(fd->inode->gfid));
op_errno = EINVAL;
goto out;
}
ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
if (ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
"pfd is NULL from fd=%p", fd);
goto out;
}
if (!size) {
op_errno = EINVAL;
gf_msg(this->name, GF_LOG_WARNING, EINVAL, P_MSG_INVALID_ARGUMENT,
"size=%" GF_PRI_SIZET, size);
goto out;
}
iobuf = iobuf_get_page_aligned(this->ctx->iobuf_pool, size, ALIGN_SIZE);
if (!iobuf) {
op_errno = ENOMEM;
goto out;
}
_fd = pfd->fd;
if (xdata) {
op_ret = posix_fdstat(this, fd->inode, _fd, &preop);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
"pre-operation fstat failed on fd=%p", fd);
goto out;
}
op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &preop, NULL, xdata,
&rsp_xdata, _gf_false);
if (op_ret < 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0,
"file state check failed, fd %p", fd);
op_errno = EIO;
goto out;
}
}
op_ret = sys_pread(_fd, iobuf->ptr, size, offset);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_READ_FAILED,
"read failed on gfid=%s, "
"fd=%p, offset=%" PRIu64 " size=%" GF_PRI_SIZET
", "
"buf=%p",
uuid_utoa(fd->inode->gfid), fd, offset, size, iobuf->ptr);
goto out;
}
GF_ATOMIC_ADD(priv->read_value, op_ret);
vec.iov_base = iobuf->ptr;
vec.iov_len = op_ret;
iobref = iobref_new();
iobref_add(iobref, iobuf);
/*
* readv successful, and we need to get the stat of the file
* we read from
*/
op_ret = posix_fdstat(this, fd->inode, _fd, &stbuf);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
"fstat failed on fd=%p", fd);
goto out;
}
posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &stbuf);
/* Hack to notify higher layers of EOF. */
if (!stbuf.ia_size || (offset + vec.iov_len) >= stbuf.ia_size)
op_errno = ENOENT;
op_ret = vec.iov_len;
out:
STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &vec, 1, &stbuf, iobref,
rsp_xdata);
if (iobref)
iobref_unref(iobref);
if (iobuf)
iobuf_unref(iobuf);
return 0;
}
int32_t
__posix_pwritev(int fd, struct iovec *vector, int count, off_t offset)
{
int32_t op_ret = 0;
int idx = 0;
int retval = 0;
off_t internal_off = 0;
if (!vector)
return -EFAULT;
internal_off = offset;
for (idx = 0; idx < count; idx++) {
retval = sys_pwrite(fd, vector[idx].iov_base, vector[idx].iov_len,
internal_off);
if (retval == -1) {
op_ret = -errno;
goto err;
}
op_ret += retval;
internal_off += retval;
}
err:
return op_ret;
}
int32_t
__posix_writev(int fd, struct iovec *vector, int count, off_t startoff,
int odirect)
{
int32_t op_ret = 0;
int idx = 0;
int max_buf_size = 0;
int retval = 0;
char *buf = NULL;
char *alloc_buf = NULL;
off_t internal_off = 0;
/* Check for the O_DIRECT flag during open() */
if (!odirect)
return __posix_pwritev(fd, vector, count, startoff);
for (idx = 0; idx < count; idx++) {
if (max_buf_size < vector[idx].iov_len)
max_buf_size = vector[idx].iov_len;
}
alloc_buf = _page_aligned_alloc(max_buf_size, &buf);
if (!alloc_buf) {
op_ret = -errno;
goto err;
}
internal_off = startoff;
for (idx = 0; idx < count; idx++) {
memcpy(buf, vector[idx].iov_base, vector[idx].iov_len);
/* not sure whether writev works on O_DIRECT'd fd */
retval = sys_pwrite(fd, buf, vector[idx].iov_len, internal_off);
if (retval == -1) {
op_ret = -errno;
goto err;
}
op_ret += retval;
internal_off += retval;
}
err:
GF_FREE(alloc_buf);
return op_ret;
}
dict_t *
_fill_writev_xdata(fd_t *fd, dict_t *xdata, xlator_t *this, int is_append)
{
dict_t *rsp_xdata = NULL;
int32_t ret = 0;
inode_t *inode = NULL;
if (fd)
inode = fd->inode;
if (!fd || !fd->inode || gf_uuid_is_null(fd->inode->gfid)) {
gf_msg_callingfn(this->name, GF_LOG_ERROR, EINVAL, P_MSG_XATTR_FAILED,
"fd: %p inode: %p"
"gfid:%s",
fd, inode ? inode : 0,
inode ? uuid_utoa(inode->gfid) : "N/A");
goto out;
}
if (!xdata)
goto out;
rsp_xdata = dict_new();
if (!rsp_xdata)
goto out;
if (dict_get(xdata, GLUSTERFS_OPEN_FD_COUNT)) {
ret = dict_set_uint32(rsp_xdata, GLUSTERFS_OPEN_FD_COUNT,
fd->inode->fd_count);
if (ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
"%s: Failed to set "
"dictionary value for %s",
uuid_utoa(fd->inode->gfid), GLUSTERFS_OPEN_FD_COUNT);
}
}
if (dict_get(xdata, GLUSTERFS_ACTIVE_FD_COUNT)) {
ret = dict_set_uint32(rsp_xdata, GLUSTERFS_ACTIVE_FD_COUNT,
fd->inode->active_fd_count);
if (ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
"%s: Failed to set "
"dictionary value for %s",
uuid_utoa(fd->inode->gfid), GLUSTERFS_ACTIVE_FD_COUNT);
}
}
if (dict_get(xdata, GLUSTERFS_WRITE_IS_APPEND)) {
ret = dict_set_uint32(rsp_xdata, GLUSTERFS_WRITE_IS_APPEND, is_append);
if (ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
"%s: Failed to set "
"dictionary value for %s",
uuid_utoa(fd->inode->gfid), GLUSTERFS_WRITE_IS_APPEND);
}
}
out:
return rsp_xdata;
}
int32_t
posix_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
struct iobref *iobref, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
int _fd = -1;
struct posix_private *priv = NULL;
struct posix_fd *pfd = NULL;
struct iatt preop = {
0,
};
struct iatt postop = {
0,
};
int ret = -1;
dict_t *rsp_xdata = NULL;
int is_append = 0;
gf_boolean_t locked = _gf_false;
gf_boolean_t write_append = _gf_false;
gf_boolean_t update_atomic = _gf_false;
posix_inode_ctx_t *ctx = NULL;
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(fd, out);
VALIDATE_OR_GOTO(fd->inode, out);
VALIDATE_OR_GOTO(vector, out);
VALIDATE_OR_GOTO(this->private, out);
priv = this->private;
VALIDATE_OR_GOTO(priv, out);
DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
if ((fd->inode->ia_type == IA_IFBLK) || (fd->inode->ia_type == IA_IFCHR)) {
gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_INVALID_ARGUMENT,
"writev received on a block/char file (%s)",
uuid_utoa(fd->inode->gfid));
op_errno = EINVAL;
goto out;
}
ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
if (ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL,
"pfd is NULL from fd=%p", fd);
goto out;
}
_fd = pfd->fd;
ret = posix_check_internal_writes(this, fd, _fd, xdata);
if (ret < 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0,
"possible overwrite from internal client, fd=%p", fd);
op_ret = -1;
op_errno = EBUSY;
goto out;
}
if (xdata) {
if (dict_get(xdata, GLUSTERFS_WRITE_IS_APPEND))
write_append = _gf_true;
if (dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC))
update_atomic = _gf_true;
}
/* The write_is_append check and write must happen
atomically. Else another write can overtake this
write after the check and get written earlier.
So lock before preop-stat and unlock after write.
*/
/*
* The update_atomic option is to instruct posix to do prestat,
* write and poststat atomically. This is to prevent any modification to
* ia_size and ia_blocks until poststat and the diff in their values
* between pre and poststat could be of use for some translators (shard
* as of today).
*/
op_ret = posix_inode_ctx_get_all(fd->inode, this, &ctx);
if (op_ret < 0) {
op_errno = ENOMEM;
goto out;
}
if (write_append || update_atomic) {
locked = _gf_true;
pthread_mutex_lock(&ctx->write_atomic_lock);
}
op_ret = posix_fdstat(this, fd->inode, _fd, &preop);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
"pre-operation fstat failed on fd=%p", fd);
goto out;
}
if (xdata) {
op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &preop, NULL, xdata,
&rsp_xdata, _gf_false);
if (op_ret < 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0,
"file state check failed, fd %p", fd);
op_errno = EIO;
goto out;
}
}
if (locked && write_append) {
if (preop.ia_size == offset || (fd->flags & O_APPEND))
is_append = 1;
}
op_ret = __posix_writev(_fd, vector, count, offset,
(pfd->flags & O_DIRECT));
if (locked && (!update_atomic)) {
pthread_mutex_unlock(&ctx->write_atomic_lock);
locked = _gf_false;
}
if (op_ret < 0) {
op_errno = -op_ret;
op_ret = -1;
gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_WRITE_FAILED,
"write failed: offset %" PRIu64 ",", offset);
goto out;
}
rsp_xdata = _fill_writev_xdata(fd, xdata, this, is_append);
/* writev successful, we also need to get the stat of
* the file we wrote to
*/
ret = posix_fdstat(this, fd->inode, _fd, &postop);
if (ret == -1) {
op_ret = -1;
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
"post-operation fstat failed on fd=%p", fd);
goto out;
}
posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &postop);
if (locked) {
pthread_mutex_unlock(&ctx->write_atomic_lock);
locked = _gf_false;
}
if (flags & (O_SYNC | O_DSYNC)) {
ret = sys_fsync(_fd);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_WRITEV_FAILED,
"fsync() in writev on fd %d failed", _fd);
op_ret = -1;
op_errno = errno;
goto out;
}
}
GF_ATOMIC_ADD(priv->write_value, op_ret);
out:
if (locked) {
pthread_mutex_unlock(&ctx->write_atomic_lock);
locked = _gf_false;
}
STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, &preop, &postop,
rsp_xdata);
if (rsp_xdata)
dict_unref(rsp_xdata);
return 0;
}
int32_t
posix_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in,
off64_t off_in, fd_t *fd_out, off64_t off_out, size_t len,
uint32_t flags, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
int _fd_in = -1;
int _fd_out = -1;
struct posix_private *priv = NULL;
struct posix_fd *pfd_in = NULL;
struct posix_fd *pfd_out = NULL;
struct iatt preop_dst = {
0,
};
struct iatt postop_dst = {
0,
};
struct iatt stbuf = {
0,
};
int ret = -1;
dict_t *rsp_xdata = NULL;
int is_append = 0;
gf_boolean_t locked = _gf_false;
gf_boolean_t update_atomic = _gf_false;
posix_inode_ctx_t *ctx = NULL;
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(fd_in, out);
VALIDATE_OR_GOTO(fd_in->inode, out);
VALIDATE_OR_GOTO(fd_out, out);
VALIDATE_OR_GOTO(fd_out->inode, out);
VALIDATE_OR_GOTO(this->private, out);
priv = this->private;
VALIDATE_OR_GOTO(priv, out);
DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
if (posix_check_dev_file(this, fd_in->inode, "copy_file_range", &op_errno))
goto out;
if (posix_check_dev_file(this, fd_out->inode, "copy_file_range", &op_errno))
goto out;
ret = posix_fd_ctx_get(fd_in, this, &pfd_in, &op_errno);
if (ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL,
"pfd is NULL from fd=%p", fd_in);
goto out;
}
_fd_in = pfd_in->fd;
ret = posix_fd_ctx_get(fd_out, this, &pfd_out, &op_errno);
if (ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL,
"pfd is NULL from fd=%p", fd_out);
goto out;
}
_fd_out = pfd_out->fd;
/*
* Currently, the internal write is checked via xdata which
* is set by some xlator above. It could be due to several of
* the reasons such as healing or a snapshot operation happening
* using copy_file_range. As of now (i.e. writing the patch with
* this change) none of the xlators above posix are using the
* internal write with copy_file_range. In future it might
* change. Atleast as of now the hope is that, when that happens
* this functon or fop does not require additional changes for
* handling internal writes.
*/
ret = posix_check_internal_writes(this, fd_out, _fd_out, xdata);
if (ret < 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0,
"possible overwrite from internal client, fd=%p", fd_out);
op_ret = -1;
op_errno = EBUSY;
goto out;
}
if (xdata) {
if (dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC))
update_atomic = _gf_true;
}
/*
* The update_atomic option is to instruct posix to do prestat,
* write and poststat atomically. This is to prevent any modification to
* ia_size and ia_blocks until poststat and the diff in their values
* between pre and poststat could be of use for some translators.
* This is similar to the atomic write operation. atmoic write is
* (i.e. prestat + write + poststat) used by shard as of now. In case,
* some xlator needs copy_file_range to be atomic from prestat and postat
* prespective (i.e. prestat + copy_file_range + poststat) then it has
* to send "GLUSTERFS_WRITE_UPDATE_ATOMIC" key in xdata.
*/
op_ret = posix_inode_ctx_get_all(fd_out->inode, this, &ctx);
if (op_ret < 0) {
op_errno = ENOMEM;
goto out;
}
if (update_atomic) {
ret = pthread_mutex_lock(&ctx->write_atomic_lock);
if (!ret)
locked = _gf_true;
else {
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_MUTEX_FAILED,
"failed to hold write atomic lock on %s",
uuid_utoa(fd_out->inode->gfid));
goto out;
}
}
op_ret = posix_fdstat(this, fd_out->inode, _fd_out, &preop_dst);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
"pre-operation fstat failed on fd=%p", fd_out);
goto out;
}
/*
* Since, only the destination file (fd_out) is undergoing
* modification, the write related tests are done on that.
* i.e. this is treater similar to as if the destination file
* undergoing write fop from maintenance perspective.
*/
if (xdata) {
op_ret = posix_cs_maintenance(this, fd_out, NULL, &_fd_out, &preop_dst,
NULL, xdata, &rsp_xdata, _gf_false);
if (op_ret < 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0,
"file state check failed, fd %p", fd_out);
op_errno = EIO;
goto out;
}
}
/*
* NOTE: This is just doing a single execution of copy_file_range
* system call. If the returned value of this system call is less
* than len, then should we keep doing it in a for loop until the
* copy_file_range of all the len bytes is done?
* Check the example program provided in the man page of
* copy_file_range.
* If so, then a separate variables for both off_in and off_out
* should be used which are initialized to off_in and off_out
* that this function call receives, but then advanced by the
* value returned by sys_copy_file_range and then use that as
* off_in and off_out for next instance of copy_file_range execution.
*/
op_ret = sys_copy_file_range(_fd_in, &off_in, _fd_out, &off_out, len,
flags);
if (op_ret < 0) {
op_errno = -op_ret;
op_ret = -1;
gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_COPY_FILE_RANGE_FAILED,
"copy_file_range failed: fd_in: %p (gfid: %s) ,"
" fd_out %p (gfid:%s)",
fd_in, uuid_utoa(fd_in->inode->gfid), fd_out,
uuid_utoa(fd_out->inode->gfid));
goto out;
}
/*
* Let this be as it is for now. This function collects
* infomration such as open fd count etc. So, even though
* is_append does not apply to copy_file_range, for now,
* allowing it to be recorded in the dict as _gf_false.
*/
rsp_xdata = _fill_writev_xdata(fd_out, xdata, this, is_append);
/* copy_file_range successful, we also need to get the stat of
* the file we wrote to (i.e. destination file or fd_out).
*/
ret = posix_fdstat(this, fd_out->inode, _fd_out, &postop_dst);
if (ret == -1) {
op_ret = -1;
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
"post-operation fstat failed on fd=%p", fd_out);
goto out;
}
/*
* Also perform the stat on the source fd (i.e. fd_in). For now,
* allowing it to be done within the locked region if the request
* is for atomic operation (and update) of copy_file_range.
*/
ret = posix_fdstat(this, fd_in->inode, _fd_in, &stbuf);
if (ret == -1) {
op_ret = -1;
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
"post-operation fstat failed on fd=%p", fd_in);
goto out;
}
/*
* The core logic of what time attributes are to be updated
* on a fop is decided at client side xlator utime.
* All the remaining fops call posix_set_ctime function
* to update the {a,m,c}time. But, for all the other fops,
* the operation is happening on only one file (or inode).
* But here, there are 2 fds (source and destination). Hence
* the new function below to update the appropriate times for
* both the source and the destination file.
* For the source file, if at all anything has to be updated,
* it would be atime (as that file is only read, not updated).
* For the destination file, the attributes that require the
* modification would be mtime and ctime.
* What times have to be changed is actually determined by
* utime xlator. But, all of them would be in frame->root->flags.
* So, currently posix assumes that, the atime flag is for
* the source file and the other 2 flags are for the destination
* file. Since, the assumption is rigid (i.e. atime for source
* and {m,c}time for destination), the below function is called
* posix_set_ctime_cfr (cfr standing for copy_file_range).
* FUTURE TODO:
* In future, some other functionality or fop might operate
* simultaneously on 2 files. Then, depending upon what that new
* fop does or what are its requirements, the below function might
* require changes to become generic for consumption in case of
* simultaneous operations on 2 files.
*/
posix_set_ctime_cfr(frame, this, NULL, pfd_in->fd, fd_in->inode, &stbuf,
NULL, pfd_out->fd, fd_out->inode, &postop_dst);
if (locked) {
pthread_mutex_unlock(&ctx->write_atomic_lock);
locked = _gf_false;
}
/*
* Record copy_file_range in priv->write_value for now.
* If not needed, remove below section of code along with
* this comment (or add comment to explain why it is not
* needed).
*/
GF_ATOMIC_ADD(priv->write_value, op_ret);
out:
if (locked) {
pthread_mutex_unlock(&ctx->write_atomic_lock);
locked = _gf_false;
}
STACK_UNWIND_STRICT(copy_file_range, frame, op_ret, op_errno, &stbuf,
&preop_dst, &postop_dst, rsp_xdata);
if (rsp_xdata)
dict_unref(rsp_xdata);
return 0;
}
int32_t
posix_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
char *real_path = NULL;
int32_t op_ret = -1;
int32_t op_errno = 0;
struct statvfs buf = {
0,
};
struct posix_private *priv = NULL;
int shared_by = 1;
int percent = 0;
uint64_t reserved_blocks = 0;
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(loc, out);
VALIDATE_OR_GOTO(this->private, out);
MAKE_INODE_HANDLE(real_path, this, loc, NULL);
if (!real_path) {
op_ret = -1;
op_errno = ESTALE;
goto out;
}
priv = this->private;
op_ret = sys_statvfs(real_path, &buf);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED,
"statvfs failed on %s", real_path);
goto out;
}
percent = priv->disk_reserve;
reserved_blocks = (buf.f_blocks * percent) / 100;
if (buf.f_bfree > reserved_blocks) {
buf.f_bfree = (buf.f_bfree - reserved_blocks);
if (buf.f_bavail > buf.f_bfree) {
buf.f_bavail = buf.f_bfree;
}
} else {
buf.f_bfree = 0;
buf.f_bavail = 0;
}
shared_by = priv->shared_brick_count;
if (shared_by > 1) {
buf.f_blocks /= shared_by;
buf.f_bfree /= shared_by;
buf.f_bavail /= shared_by;
buf.f_files /= shared_by;
buf.f_ffree /= shared_by;
buf.f_favail /= shared_by;
}
if (!priv->export_statfs) {
buf.f_blocks = 0;
buf.f_bfree = 0;
buf.f_bavail = 0;
buf.f_files = 0;
buf.f_ffree = 0;
buf.f_favail = 0;
}
op_ret = 0;
out:
STACK_UNWIND_STRICT(statfs, frame, op_ret, op_errno, &buf, NULL);
return 0;
}
int32_t
posix_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
int ret = -1;
struct posix_fd *pfd = NULL;
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(fd, out);
ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
if (ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
"pfd is NULL on fd=%p", fd);
goto out;
}
op_ret = 0;
out:
STACK_UNWIND_STRICT(flush, frame, op_ret, op_errno, NULL);
return 0;
}
int32_t
posix_release(xlator_t *this, fd_t *fd)
{
struct posix_private *priv = NULL;
struct posix_fd *pfd = NULL;
int ret = -1;
uint64_t tmp_pfd = 0;
glusterfs_ctx_t *ctx = NULL;
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(fd, out);
priv = this->private;
ctx = THIS->ctx;
ret = fd_ctx_del(fd, this, &tmp_pfd);
if (ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL,
"pfd is NULL from fd=%p", fd);
goto out;
}
pfd = (struct posix_fd *)(long)tmp_pfd;
if (pfd->dir) {
gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DIR_NOT_NULL,
"pfd->dir is %p (not NULL) for file fd=%p", pfd->dir, fd);
}
pthread_mutex_lock(&ctx->janitor_lock);
{
INIT_LIST_HEAD(&pfd->list);
list_add_tail(&pfd->list, &ctx->janitor_fds);
pthread_cond_signal(&ctx->janitor_cond);
}
pthread_mutex_unlock(&ctx->janitor_lock);
if (!priv)
goto out;
GF_ATOMIC_DEC(priv->nr_files);
out:
return 0;
}
int
posix_batch_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
dict_t *xdata)
{
call_stub_t *stub = NULL;
struct posix_private *priv = NULL;
priv = this->private;
stub = fop_fsync_stub(frame, default_fsync, fd, datasync, xdata);
if (!stub) {
STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, 0, 0, 0);
return 0;
}
pthread_mutex_lock(&priv->fsync_mutex);
{
list_add_tail(&stub->list, &priv->fsyncs);
priv->fsync_queue_count++;
pthread_cond_signal(&priv->fsync_cond);
}
pthread_mutex_unlock(&priv->fsync_mutex);
return 0;
}
int32_t
posix_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
int _fd = -1;
struct posix_fd *pfd = NULL;
int ret = -1;
struct iatt preop = {
0,
};
struct iatt postop = {
0,
};
struct posix_private *priv = NULL;
DECLARE_OLD_FS_ID_VAR;
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(fd, out);
SET_FS_ID(frame->root->uid, frame->root->gid);
#ifdef GF_DARWIN_HOST_OS
/* Always return success in case of fsync in MAC OS X */
op_ret = 0;
goto out;
#endif
priv = this->private;
if (priv->batch_fsync_mode && xdata && dict_get(xdata, "batch-fsync")) {
posix_batch_fsync(frame, this, fd, datasync, xdata);
return 0;
}
ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
if (ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
"pfd not found in fd's ctx");
goto out;
}
_fd = pfd->fd;
op_ret = posix_fdstat(this, fd->inode, _fd, &preop);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FSTAT_FAILED,
"pre-operation fstat failed on fd=%p", fd);
goto out;
}
if (datasync) {
op_ret = sys_fdatasync(_fd);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSYNC_FAILED,
"fdatasync on fd=%p"
"failed:",
fd);
goto out;
}
} else {
op_ret = sys_fsync(_fd);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSYNC_FAILED,
"fsync on fd=%p "
"failed",
fd);
goto out;
}
}
op_ret = posix_fdstat(this, fd->inode, _fd, &postop);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FSTAT_FAILED,
"post-operation fstat failed on fd=%p", fd);
goto out;
}
op_ret = 0;
out:
SET_TO_OLD_FS_ID();
STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, &preop, &postop, NULL);
return 0;
}
static int gf_posix_xattr_enotsup_log;
static int
_handle_setxattr_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp)
{
posix_xattr_filler_t *filler = NULL;
filler = tmp;
return posix_handle_pair(filler->this, filler->real_path, k, v,
filler->flags, filler->stbuf);
}
#ifdef GF_DARWIN_HOST_OS
static int
map_xattr_flags(int flags)
{
/* DARWIN has different defines on XATTR_ flags.
There do not seem to be a POSIX standard
Parse any other flags over.
*/
int darwinflags = flags &
~(GF_XATTR_CREATE | GF_XATTR_REPLACE | XATTR_REPLACE);
if (GF_XATTR_CREATE & flags)
darwinflags |= XATTR_CREATE;
if (GF_XATTR_REPLACE & flags)
darwinflags |= XATTR_REPLACE;
return darwinflags;
}
#endif
int32_t
posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
int flags, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
char *real_path = NULL;
char *acl_xattr = NULL;
struct iatt preop = {0};
struct iatt postop = {0};
int32_t ret = 0;
ssize_t acl_size = 0;
dict_t *xattr = NULL;
posix_xattr_filler_t filler = {
0,
};
struct posix_private *priv = NULL;
struct iatt tmp_stbuf = {
0,
};
data_t *tdata = NULL;
char stime[4096];
char sxattr[4096];
gf_cs_obj_state state = -1;
char remotepath[4096] = {0};
int i = 0;
int len;
DECLARE_OLD_FS_ID_VAR;
SET_FS_ID(frame->root->uid, frame->root->gid);
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(this->private, out);
VALIDATE_OR_GOTO(loc, out);
VALIDATE_OR_GOTO(dict, out);
priv = this->private;
DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
MAKE_INODE_HANDLE(real_path, this, loc, NULL);
if (!real_path) {
op_ret = -1;
op_errno = ESTALE;
goto out;
}
posix_pstat(this, loc->inode, loc->gfid, real_path, &preop, _gf_false);
op_ret = -1;
dict_del(dict, GFID_XATTR_KEY);
dict_del(dict, GF_XATTR_VOL_ID_KEY);
/* the io-stats-dump key should not reach disk */
dict_del(dict, GF_XATTR_IOSTATS_DUMP_KEY);
tdata = dict_get(dict, GF_CS_OBJECT_UPLOAD_COMPLETE);
if (tdata) {
/*TODO: move the following to a different function */
LOCK(&loc->inode->lock);
{
state = posix_cs_check_status(this, real_path, NULL, &preop);
if (state != GF_CS_LOCAL) {
op_errno = EINVAL;
ret = posix_cs_set_state(this, &xattr, state, real_path, NULL);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0, "set state failed");
}
goto unlock;
}
ret = posix_pstat(this, loc->inode, loc->gfid, real_path,
&tmp_stbuf, _gf_true);
if (ret) {
op_errno = EINVAL;
goto unlock;
}
sprintf(stime, "%" PRId64, tmp_stbuf.ia_mtime);
/*TODO: may be should consider nano-second also */
if (strncmp(stime, tdata->data, tdata->len) != 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0,
"mtime "
"passed is different from seen by file now."
" Will skip truncating the file");
ret = -1;
op_errno = EINVAL;
goto unlock;
}
len = sprintf(sxattr, "%" PRIu64, tmp_stbuf.ia_size);
ret = sys_lsetxattr(real_path, GF_CS_OBJECT_SIZE, sxattr, len,
flags);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0,
"setxattr failed. key %s err %d", GF_CS_OBJECT_SIZE,
ret);
op_errno = errno;
goto unlock;
}
if (loc->path[0] == '/') {
for (i = 1; i < strlen(loc->path); i++) {
remotepath[i - 1] = loc->path[i];
}
remotepath[i] = '\0';
gf_msg_debug(this->name, GF_LOG_ERROR, "remotepath %s",
remotepath);
}
ret = sys_lsetxattr(real_path, GF_CS_OBJECT_REMOTE, remotepath,
strlen(loc->path), flags);
if (ret) {
gf_log("POSIX", GF_LOG_ERROR,
"setxattr failed - %s"
" %d",
GF_CS_OBJECT_SIZE, ret);
goto unlock;
}
ret = sys_truncate(real_path, 0);
if (ret) {
gf_log("POSIX", GF_LOG_ERROR,
"truncate failed - %s"
" %d",
GF_CS_OBJECT_SIZE, ret);
op_errno = errno;
ret = sys_lremovexattr(real_path, GF_CS_OBJECT_REMOTE);
if (ret) {
gf_log("POSIX", GF_LOG_ERROR,
"removexattr "
"failed post processing- %s"
" %d",
GF_CS_OBJECT_SIZE, ret);
}
goto unlock;
} else {
state = GF_CS_REMOTE;
ret = posix_cs_set_state(this, &xattr, state, real_path, NULL);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0, "set state failed");
}
}
}
unlock:
UNLOCK(&loc->inode->lock);
goto out;
}
filler.real_path = real_path;
filler.this = this;
filler.stbuf = &preop;
#ifdef GF_DARWIN_HOST_OS
filler.flags = map_xattr_flags(flags);
#else
filler.flags = flags;
#endif
op_ret = dict_foreach(dict, _handle_setxattr_keyvalue_pair, &filler);
if (op_ret < 0) {
op_errno = -op_ret;
op_ret = -1;
goto out;
}
xattr = dict_new();
if (!xattr)
goto out;
/*
* FIXFIX: Send the stbuf info in the xdata for now
* This is used by DHT to redirect FOPs if the file is being migrated
* Ignore errors for now
*/
ret = posix_pstat(this, loc->inode, loc->gfid, real_path, &postop,
_gf_false);
if (ret)
goto out;
ret = posix_set_iatt_in_dict(xattr, &preop, &postop);
/*
* ACL can be set on a file/folder using GF_POSIX_ACL_*_KEY xattrs which
* won't aware of access-control xlator. To update its context correctly,
* POSIX_ACL_*_XATTR stored in xdata which is send in the call_back path.
*/
if (dict_get(dict, GF_POSIX_ACL_ACCESS)) {
/*
* The size of buffer will be know after calling sys_lgetxattr,
* so first we allocate buffer with large size(~4k), then we
* reduced into required size using GF_REALLO().
*/
acl_xattr = GF_CALLOC(1, ACL_BUFFER_MAX, gf_posix_mt_char);
if (!acl_xattr)
goto out;
acl_size = sys_lgetxattr(real_path, POSIX_ACL_ACCESS_XATTR, acl_xattr,
ACL_BUFFER_MAX);
if (acl_size < 0) {
gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_XATTR_FAILED,
"Posix acl is not set "
"properly at the backend");
goto out;
}
/* If acl_size is more than max buffer size, just ignore it */
if (acl_size >= ACL_BUFFER_MAX) {
gf_msg(this->name, GF_LOG_WARNING, ENOMEM, P_MSG_BUFFER_OVERFLOW,
"size of acl is more"
"than the buffer");
goto out;
}
acl_xattr = GF_REALLOC(acl_xattr, acl_size);
if (!acl_xattr)
goto out;
ret = dict_set_bin(xattr, POSIX_ACL_ACCESS_XATTR, acl_xattr, acl_size);
if (ret) {
gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL,
"failed to set"
"xdata for acl");
GF_FREE(acl_xattr);
goto out;
}
}
if (dict_get(dict, GF_POSIX_ACL_DEFAULT)) {
acl_xattr = GF_CALLOC(1, ACL_BUFFER_MAX, gf_posix_mt_char);
if (!acl_xattr)
goto out;
acl_size = sys_lgetxattr(real_path, POSIX_ACL_DEFAULT_XATTR, acl_xattr,
ACL_BUFFER_MAX);
if (acl_size < 0) {
gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_XATTR_FAILED,
"Posix acl is not set "
"properly at the backend");
goto out;
}
if (acl_size >= ACL_BUFFER_MAX) {
gf_msg(this->name, GF_LOG_WARNING, ENOMEM, P_MSG_BUFFER_OVERFLOW,
"size of acl is more"
"than the buffer");
goto out;
}
acl_xattr = GF_REALLOC(acl_xattr, acl_size);
if (!acl_xattr)
goto out;
ret = dict_set_bin(xattr, POSIX_ACL_DEFAULT_XATTR, acl_xattr, acl_size);
if (ret) {
gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL,
"failed to set"
"xdata for acl");
GF_FREE(acl_xattr);
goto out;
}
}
out:
SET_TO_OLD_FS_ID();
STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xattr);
if (xattr)
dict_unref(xattr);
return 0;
}
int
posix_xattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *key, dict_t *dict, dict_t *xdata)
{
int ret = -1;
int op_ret = -1;
const char *fname = NULL;
char *real_path = NULL;
char *found = NULL;
DIR *fd = NULL;
struct dirent *entry = NULL;
struct dirent scratch[2] = {
{
0,
},
};
MAKE_INODE_HANDLE(real_path, this, loc, NULL);
if (!real_path) {
return -ESTALE;
}
if (op_ret == -1) {
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
"posix_xattr_get_real_filename (lstat) on %s failed", real_path);
return -errno;
}
fd = sys_opendir(real_path);
if (!fd)
return -errno;
fname = key + SLEN(GF_XATTR_GET_REAL_FILENAME_KEY);
for (;;) {
errno = 0;
entry = sys_readdir(fd, scratch);
if (!entry || errno != 0)
break;
if (strcasecmp(entry->d_name, fname) == 0) {
found = gf_strdup(entry->d_name);
if (!found) {
(void)sys_closedir(fd);
return -ENOMEM;
}
break;
}
}
(void)sys_closedir(fd);
if (!found)
return -ENOENT;
ret = dict_set_dynstr(dict, (char *)key, found);
if (ret) {
GF_FREE(found);
return -ENOMEM;
}
ret = strlen(found) + 1;
return ret;
}
int
posix_get_ancestry_directory(xlator_t *this, inode_t *leaf_inode,
gf_dirent_t *head, char **path, int type,
int32_t *op_errno, dict_t *xdata)
{
ssize_t handle_size = 0;
struct posix_private *priv = NULL;
inode_t *inode = NULL;
int ret = -1;
char dirpath[PATH_MAX] = {
0,
};
priv = this->private;
handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length);
ret = posix_make_ancestryfromgfid(
this, dirpath, PATH_MAX + 1, head, type | POSIX_ANCESTRY_PATH,
leaf_inode->gfid, handle_size, priv->base_path, leaf_inode->table,
&inode, xdata, op_errno);
if (ret < 0)
goto out;
/* there is already a reference in loc->inode */
inode_unref(inode);
if ((type & POSIX_ANCESTRY_PATH) && (path != NULL)) {
if (strcmp(dirpath, "/"))
dirpath[strlen(dirpath) - 1] = '\0';
*path = gf_strdup(dirpath);
}
out:
return ret;
}
int32_t
posix_links_in_same_directory(char *dirpath, int count, inode_t *leaf_inode,
inode_t *parent, struct stat *stbuf,
gf_dirent_t *head, char **path, int type,
dict_t *xdata, int32_t *op_errno)
{
int op_ret = -1;
gf_dirent_t *gf_entry = NULL;
xlator_t *this = NULL;
struct posix_private *priv = NULL;
DIR *dirp = NULL;
struct dirent *entry = NULL;
struct dirent scratch[2] = {
{
0,
},
};
char temppath[PATH_MAX] = {
0,
};
char scr[PATH_MAX * 4] = {
0,
};
this = THIS;
priv = this->private;
dirp = sys_opendir(dirpath);
if (!dirp) {
*op_errno = errno;
gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_OPEN_FAILED,
"could not opendir %s", dirpath);
goto out;
}
while (count > 0) {
errno = 0;
entry = sys_readdir(dirp, scratch);
if (!entry || errno != 0)
break;
if (entry->d_ino != stbuf->st_ino)
continue;
/* Linking an inode here, can cause a race in posix_acl.
Parent inode gets linked here, but before
it reaches posix_acl_readdirp_cbk, create/lookup can
come on a leaf-inode, as parent-inode-ctx not yet updated
in posix_acl_readdirp_cbk, create and lookup can fail
with EACCESS. So do the inode linking in the quota xlator
linked_inode = inode_link (leaf_inode, parent,
entry->d_name, NULL);
GF_ASSERT (linked_inode == leaf_inode);
inode_unref (linked_inode);*/
if (type & POSIX_ANCESTRY_DENTRY) {
loc_t loc = {
0,
};
loc.inode = inode_ref(leaf_inode);
gf_uuid_copy(loc.gfid, leaf_inode->gfid);
(void)snprintf(temppath, sizeof(temppath), "%s/%s", dirpath,
entry->d_name);
gf_entry = gf_dirent_for_name(entry->d_name);
if (!gf_entry) {
gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, "gf_entry is NULL");
op_ret = -1;
*op_errno = ENOMEM;
inode_unref(loc.inode);
goto out;
}
gf_entry->inode = inode_ref(leaf_inode);
gf_entry->dict = posix_xattr_fill(this, temppath, &loc, NULL, -1,
xdata, NULL);
iatt_from_stat(&(gf_entry->d_stat), stbuf);
list_add_tail(&gf_entry->list, &head->list);
loc_wipe(&loc);
}
if (type & POSIX_ANCESTRY_PATH) {
(void)snprintf(temppath, sizeof(temppath), "%s/%s",
&dirpath[priv->base_path_length], entry->d_name);
if (!*path) {
*path = gf_strdup(temppath);
} else {
/* creating a colon separated */
/* list of hard links */
(void)snprintf(scr, sizeof(scr), "%s:%s", *path, temppath);
GF_FREE(*path);
*path = gf_strdup(scr);
}
if (!*path) {
op_ret = -1;
*op_errno = ENOMEM;
goto out;
}
}
count--;
}
op_ret = 0;
out:
if (dirp) {
op_ret = sys_closedir(dirp);
if (op_ret == -1) {
*op_errno = errno;
gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_CLOSE_FAILED,
"closedir failed");
}
}
return op_ret;
}
int
posix_get_ancestry_non_directory(xlator_t *this, inode_t *leaf_inode,
gf_dirent_t *head, char **path, int type,
int32_t *op_errno, dict_t *xdata)
{
size_t remaining_size = 0;
int op_ret = -1, pathlen = -1;
ssize_t handle_size = 0;
uuid_t pgfid = {
0,
};
int nlink_samepgfid = 0;
struct stat stbuf = {
0,
};
char *list = NULL;
int32_t list_offset = 0;
struct posix_private *priv = NULL;
ssize_t size = 0;
inode_t *parent = NULL;
loc_t *loc = NULL;
char *leaf_path = NULL;
char key[4096] = {
0,
};
char dirpath[PATH_MAX] = {
0,
};
char pgfidstr[UUID_CANONICAL_FORM_LEN + 1] = {
0,
};
int len;
priv = this->private;
loc = GF_CALLOC(1, sizeof(*loc), gf_posix_mt_char);
if (loc == NULL) {
op_ret = -1;
*op_errno = ENOMEM;
goto out;
}
gf_uuid_copy(loc->gfid, leaf_inode->gfid);
MAKE_INODE_HANDLE(leaf_path, this, loc, NULL);
if (!leaf_path) {
GF_FREE(loc);
*op_errno = ESTALE;
goto out;
}
GF_FREE(loc);
size = sys_llistxattr(leaf_path, NULL, 0);
if (size == -1) {
*op_errno = errno;
if ((errno == ENOTSUP) || (errno == ENOSYS)) {
GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name,
GF_LOG_WARNING,
"Extended attributes not "
"supported (try remounting brick"
" with 'user_xattr' flag)");
} else {
gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_XATTR_FAILED,
"listxattr failed on"
"%s",
leaf_path);
}
goto out;
}
if (size == 0) {
op_ret = 0;
goto out;
}
list = alloca(size);
if (!list) {
*op_errno = errno;
goto out;
}
size = sys_llistxattr(leaf_path, list, size);
if (size < 0) {
op_ret = -1;
*op_errno = errno;
goto out;
}
remaining_size = size;
list_offset = 0;
op_ret = sys_lstat(leaf_path, &stbuf);
if (op_ret == -1) {
*op_errno = errno;
gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_LSTAT_FAILED,
"lstat failed on %s", leaf_path);
goto out;
}
while (remaining_size > 0) {
snprintf(key, sizeof(key), "%s", list + list_offset);
if (strncmp(key, PGFID_XATTR_KEY_PREFIX,
SLEN(PGFID_XATTR_KEY_PREFIX)) != 0)
goto next;
op_ret = sys_lgetxattr(leaf_path, key, &nlink_samepgfid,
sizeof(nlink_samepgfid));
if (op_ret == -1) {
*op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
"getxattr failed on "
"%s: key = %s ",
leaf_path, key);
goto out;
}
nlink_samepgfid = ntoh32(nlink_samepgfid);
snprintf(pgfidstr, sizeof(pgfidstr), "%s",
key + SLEN(PGFID_XATTR_KEY_PREFIX));
gf_uuid_parse(pgfidstr, pgfid);
handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length);
/* constructing the absolute real path of parent dir */
snprintf(dirpath, sizeof(dirpath), "%s", priv->base_path);
pathlen = PATH_MAX + 1 - priv->base_path_length;
op_ret = posix_make_ancestryfromgfid(
this, dirpath + priv->base_path_length, pathlen, head,
type | POSIX_ANCESTRY_PATH, pgfid, handle_size, priv->base_path,
leaf_inode->table, &parent, xdata, op_errno);
if (op_ret < 0) {
goto next;
}
dirpath[strlen(dirpath) - 1] = '\0';
posix_links_in_same_directory(dirpath, nlink_samepgfid, leaf_inode,
parent, &stbuf, head, path, type, xdata,
op_errno);
if (parent != NULL) {
inode_unref(parent);
parent = NULL;
}
next:
len = strlen(key);
remaining_size -= (len + 1);
list_offset += (len + 1);
} /* while (remaining_size > 0) */
op_ret = 0;
out:
return op_ret;
}
int
posix_get_ancestry(xlator_t *this, inode_t *leaf_inode, gf_dirent_t *head,
char **path, int type, int32_t *op_errno, dict_t *xdata)
{
int ret = -1;
struct posix_private *priv = NULL;
priv = this->private;
if (IA_ISDIR(leaf_inode->ia_type)) {
ret = posix_get_ancestry_directory(this, leaf_inode, head, path, type,
op_errno, xdata);
} else {
if (!priv->update_pgfid_nlinks)
goto out;
ret = posix_get_ancestry_non_directory(this, leaf_inode, head, path,
type, op_errno, xdata);
}
out:
if (ret && path && *path) {
GF_FREE(*path);
*path = NULL;
}
return ret;
}
/**
* posix_getxattr - this function returns a dictionary with all the
* key:value pair present as xattr. used for
* both 'listxattr' and 'getxattr'.
*/
int32_t
posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *name, dict_t *xdata)
{
struct posix_private *priv = NULL;
int32_t op_ret = -1;
int32_t op_errno = 0;
char *value = NULL;
char *real_path = NULL;
dict_t *dict = NULL;
int ret = -1;
char *path = NULL;
char *rpath = NULL;
ssize_t size = 0;
char *list = NULL;
int32_t list_offset = 0;
size_t remaining_size = 0;
char *host_buf = NULL;
char *keybuffer = NULL;
int keybuff_len;
char *value_buf = NULL;
gf_boolean_t have_val = _gf_false;
struct iatt buf = {
0,
};
dict_t *xattr_rsp = NULL;
DECLARE_OLD_FS_ID_VAR;
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(loc, out);
VALIDATE_OR_GOTO(this->private, out);
SET_FS_ID(frame->root->uid, frame->root->gid);
MAKE_INODE_HANDLE(real_path, this, loc, NULL);
op_ret = -1;
priv = this->private;
ret = posix_handle_georep_xattrs(frame, name, &op_errno, _gf_true);
if (ret == -1) {
op_ret = -1;
/* errno should be set from the above function*/
goto out;
}
ret = posix_handle_mdata_xattr(frame, name, &op_errno);
if (ret == -1) {
op_ret = -1;
/* errno should be set from the above function*/
goto out;
}
if (name && posix_is_gfid2path_xattr(name)) {
op_ret = -1;
op_errno = ENOATTR;
goto out;
}
dict = dict_new();
if (!dict) {
op_errno = ENOMEM;
goto out;
}
if (loc->inode && name && GF_POSIX_ACL_REQUEST(name)) {
ret = posix_pacl_get(real_path, -1, name, &value);
if (ret || !value) {
op_errno = errno;
gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_ACL_FAILED,
"could not get acl (%s) for"
"%s",
name, real_path);
op_ret = -1;
goto out;
}
ret = dict_set_dynstr(dict, (char *)name, value);
if (ret < 0) {
GF_FREE(value);
gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_ACL_FAILED,
"could not set acl (%s) for"
"%s in dictionary",
name, real_path);
op_ret = -1;
op_errno = ENOMEM;
goto out;
}
size = ret;
goto done;
}
if (loc->inode && name &&
(strncmp(name, GF_XATTR_GET_REAL_FILENAME_KEY,
SLEN(GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)) {
ret = posix_xattr_get_real_filename(frame, this, loc, name, dict,
xdata);
if (ret < 0) {
op_ret = -1;
op_errno = -ret;
if (op_errno == ENOENT) {
gf_msg_debug(this->name, 0,
"Failed to get "
"real filename (%s, %s)",
loc->path, name);
} else {
gf_msg(this->name, GF_LOG_WARNING, op_errno,
P_MSG_GETTING_FILENAME_FAILED,
"Failed to get real filename (%s, %s):", loc->path,
name);
}
goto out;
}
size = ret;
goto done;
}
if (loc->inode && name && !strcmp(name, GLUSTERFS_OPEN_FD_COUNT)) {
if (!fd_list_empty(loc->inode)) {
ret = dict_set_uint32(dict, (char *)name, 1);
if (ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
"Failed to set "
"dictionary value for %s",
name);
op_errno = ENOMEM;
goto out;
}
} else {
ret = dict_set_uint32(dict, (char *)name, 0);
if (ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
"Failed to set "
"dictionary value for %s",
name);
op_errno = ENOMEM;
goto out;
}
}
goto done;
}
if (loc->inode && name && (XATTR_IS_PATHINFO(name))) {
VALIDATE_OR_GOTO(this->private, out);
if (LOC_HAS_ABSPATH(loc)) {
MAKE_REAL_PATH(rpath, this, loc->path);
} else {
rpath = real_path;
}
size = gf_asprintf(
&host_buf, "<POSIX(%s):%s:%s>", priv->base_path,
((priv->node_uuid_pathinfo && !gf_uuid_is_null(priv->glusterd_uuid))
? uuid_utoa(priv->glusterd_uuid)
: priv->hostname),
rpath);
if (size < 0) {
op_errno = ENOMEM;
goto out;
}
ret = dict_set_dynstr(dict, (char *)name, host_buf);
if (ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
"could not set value"
" (%s) in dictionary",
host_buf);
GF_FREE(host_buf);
op_errno = ENOMEM;
goto out;
}
goto done;
}
if (loc->inode && name && (strcmp(name, GF_XATTR_NODE_UUID_KEY) == 0) &&
!gf_uuid_is_null(priv->glusterd_uuid)) {
size = gf_asprintf(&host_buf, "%s", uuid_utoa(priv->glusterd_uuid));
if (size == -1) {
op_errno = ENOMEM;
goto out;
}
ret = dict_set_dynstr(dict, GF_XATTR_NODE_UUID_KEY, host_buf);
if (ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_DICT_SET_FAILED,
"could not set value"
"(%s) in dictionary",
host_buf);
GF_FREE(host_buf);
op_errno = -ret;
goto out;
}
goto done;
}
if (loc->inode && name && (strcmp(name, GFID_TO_PATH_KEY) == 0)) {
ret = inode_path(loc->inode, NULL, &path);
if (ret < 0) {
op_errno = -ret;
gf_msg(this->name, GF_LOG_WARNING, op_errno,
P_MSG_INODE_PATH_GET_FAILED,
"%s: could not get "
"inode path",
uuid_utoa(loc->inode->gfid));
goto out;
}
size = ret;
ret = dict_set_dynstr(dict, GFID_TO_PATH_KEY, path);
if (ret < 0) {
op_errno = ENOMEM;
GF_FREE(path);
goto out;
}
goto done;
}
if (loc->inode && name && (strcmp(name, GFID2PATH_VIRT_XATTR_KEY) == 0)) {
if (!priv->gfid2path) {
op_errno = ENOATTR;
op_ret = -1;
goto out;
}
ret = posix_get_gfid2path(this, loc->inode, real_path, &op_errno, dict);
if (ret < 0) {
op_ret = -1;
goto out;
}
size = ret;
goto done;
}
if (loc->inode && name && (strcmp(name, GET_ANCESTRY_PATH_KEY) == 0)) {
int type = POSIX_ANCESTRY_PATH;
op_ret = posix_get_ancestry(this, loc->inode, NULL, &path, type,
&op_errno, xdata);
if (op_ret < 0) {
op_ret = -1;
op_errno = ENODATA;
goto out;
}
size = op_ret;
op_ret = dict_set_dynstr(dict, GET_ANCESTRY_PATH_KEY, path);
if (op_ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, -op_ret,
P_MSG_GET_KEY_VALUE_FAILED,
"could not get "
"value for key (%s)",
GET_ANCESTRY_PATH_KEY);
GF_FREE(path);
op_errno = ENOMEM;
goto out;
}
goto done;
}
if (loc->inode && name &&
(strncmp(name, GLUSTERFS_GET_OBJECT_SIGNATURE,
SLEN(GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0)) {
op_ret = posix_get_objectsignature(real_path, dict);
if (op_ret < 0) {
op_errno = -op_ret;
goto out;
}
goto done;
}
/* here allocate value_buf of 8192 bytes to avoid one extra getxattr
call,If buffer size is small to hold the xattr result then it will
allocate a new buffer value of required size and call getxattr again
*/
value_buf = alloca(XATTR_VAL_BUF_SIZE);
if (name) {
char *key = (char *)name;
keybuffer = key;
#if defined(GF_DARWIN_HOST_OS_DISABLED)
if (priv->xattr_user_namespace == XATTR_STRIP) {
if (strncmp(key, "user.", 5) == 0) {
key += 5;
gf_msg_debug(this->name, 0,
"getxattr for file %s"
" stripping user key: %s -> %s",
real_path, keybuffer, key);
}
}
#endif
size = sys_lgetxattr(real_path, key, value_buf, XATTR_VAL_BUF_SIZE - 1);
if (size >= 0) {
have_val = _gf_true;
} else {
if (errno == ERANGE) {
gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED,
"getxattr failed due to overflow of buffer"
" on %s: %s ",
real_path, key);
size = sys_lgetxattr(real_path, key, NULL, 0);
}
if (size == -1) {
op_errno = errno;
if ((op_errno == ENOTSUP) || (op_errno == ENOSYS)) {
GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name,
GF_LOG_WARNING,
"Extended attributes not "
"supported (try remounting"
" brick with 'user_xattr' "
"flag)");
}
if ((op_errno == ENOATTR) || (op_errno == ENODATA)) {
gf_msg_debug(this->name, 0,
"No such attribute:%s for file %s", key,
real_path);
} else {
gf_msg(this->name, GF_LOG_ERROR, op_errno,
P_MSG_XATTR_FAILED, "getxattr failed on %s: %s ",
real_path, key);
}
goto out;
}
}
value = GF_MALLOC(size + 1, gf_posix_mt_char);
if (!value) {
op_ret = -1;
op_errno = ENOMEM;
goto out;
}
if (have_val) {
memcpy(value, value_buf, size);
} else {
bzero(value, size + 1);
size = sys_lgetxattr(real_path, key, value, size);
if (size == -1) {
op_ret = -1;
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
"getxattr failed on %s: key = %s", real_path, key);
GF_FREE(value);
goto out;
}
}
value[size] = '\0';
op_ret = dict_set_dynptr(dict, key, value, size);
if (op_ret < 0) {
op_errno = -op_ret;
gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_DICT_SET_FAILED,
"dict set operation "
"on %s for the key %s failed.",
real_path, key);
GF_FREE(value);
goto out;
}
goto done;
}
have_val = _gf_false;
size = sys_llistxattr(real_path, value_buf, XATTR_VAL_BUF_SIZE - 1);
if (size > 0) {
have_val = _gf_true;
} else {
if (errno == ERANGE) {
gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED,
"listxattr failed due to overflow of buffer"
" on %s ",
real_path);
size = sys_llistxattr(real_path, NULL, 0);
}
if (size == -1) {
op_errno = errno;
if ((errno == ENOTSUP) || (errno == ENOSYS)) {
GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name,
GF_LOG_WARNING,
"Extended attributes not "
"supported (try remounting"
" brick with 'user_xattr' "
"flag)");
} else {
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
"listxattr failed on %s", real_path);
}
goto out;
}
if (size == 0)
goto done;
}
list = alloca(size);
if (!list) {
op_errno = errno;
goto out;
}
if (have_val) {
memcpy(list, value_buf, size);
} else {
size = sys_llistxattr(real_path, list, size);
if (size < 0) {
op_ret = -1;
op_errno = errno;
goto out;
}
}
remaining_size = size;
list_offset = 0;
keybuffer = alloca(XATTR_KEY_BUF_SIZE);
while (remaining_size > 0) {
keybuff_len = snprintf(keybuffer, XATTR_KEY_BUF_SIZE, "%s",
list + list_offset);
ret = posix_handle_georep_xattrs(frame, keybuffer, NULL, _gf_false);
if (ret == -1)
goto ignore;
ret = posix_handle_mdata_xattr(frame, keybuffer, &op_errno);
if (ret == -1) {
goto ignore;
}
if (posix_is_gfid2path_xattr(keybuffer)) {
goto ignore;
}
have_val = _gf_false;
size = sys_lgetxattr(real_path, keybuffer, value_buf,
XATTR_VAL_BUF_SIZE - 1);
if (size >= 0) {
have_val = _gf_true;
} else {
if (errno == ERANGE) {
gf_msg(this->name, GF_LOG_INFO, op_errno, P_MSG_XATTR_FAILED,
"getxattr failed due to overflow of"
" buffer on %s: %s ",
real_path, keybuffer);
size = sys_lgetxattr(real_path, keybuffer, NULL, 0);
}
if (size == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
"getxattr failed on"
" %s: key = %s ",
real_path, keybuffer);
goto out;
}
}
value = GF_MALLOC(size + 1, gf_posix_mt_char);
if (!value) {
op_errno = errno;
goto out;
}
if (have_val) {
memcpy(value, value_buf, size);
} else {
bzero(value, size + 1);
size = sys_lgetxattr(real_path, keybuffer, value, size);
if (size == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
"getxattr failed on"
" %s: key = %s ",
real_path, keybuffer);
GF_FREE(value);
goto out;
}
}
value[size] = '\0';
#ifdef GF_DARWIN_HOST_OS
/* The protocol expect namespace for now */
char *newkey = NULL;
gf_add_prefix(XATTR_USER_PREFIX, keybuffer, &newkey);
keybuff_len = snprintf(keybuffer, sizeof(keybuffer), "%s", newkey);
GF_FREE(newkey);
#endif
op_ret = dict_set_dynptr(dict, keybuffer, value, size);
if (op_ret < 0) {
op_errno = -op_ret;
gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_DICT_SET_FAILED,
"dict set operation "
"on %s for the key %s failed.",
real_path, keybuffer);
GF_FREE(value);
goto out;
}
ignore:
remaining_size -= keybuff_len + 1;
list_offset += keybuff_len + 1;
} /* while (remaining_size > 0) */
done:
op_ret = size;
if (xdata && (op_ret >= 0)) {
xattr_rsp = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata,
&buf);
}
if (dict) {
dict_del(dict, GFID_XATTR_KEY);
dict_del(dict, GF_XATTR_VOL_ID_KEY);
}
out:
SET_TO_OLD_FS_ID();
STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xattr_rsp);
if (xattr_rsp)
dict_unref(xattr_rsp);
if (dict) {
dict_unref(dict);
}
return 0;
}
int32_t
posix_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = EINVAL;
struct posix_fd *pfd = NULL;
int _fd = -1;
int32_t list_offset = 0;
ssize_t size = 0;
size_t remaining_size = 0;
char *value = NULL;
char *list = NULL;
dict_t *dict = NULL;
int ret = -1;
char key[4096] = {
0,
};
int key_len;
char *value_buf = NULL;
gf_boolean_t have_val = _gf_false;
struct iatt buf = {
0,
};
dict_t *xattr_rsp = NULL;
DECLARE_OLD_FS_ID_VAR;
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(fd, out);
SET_FS_ID(frame->root->uid, frame->root->gid);
ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
if (ret < 0) {
op_ret = -1;
gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
"pfd is NULL from fd=%p", fd);
goto out;
}
_fd = pfd->fd;
/* Get the total size */
dict = dict_new();
if (!dict) {
op_ret = -1;
op_errno = ENOMEM;
goto out;
}
if (name && !strcmp(name, GLUSTERFS_OPEN_FD_COUNT)) {
ret = dict_set_uint32(dict, (char *)name, 1);
if (ret < 0) {
op_ret = -1;
size = -1;
op_errno = ENOMEM;
gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
"Failed to set "
"dictionary value for %s",
name);
goto out;
}
goto done;
}
if (name && strncmp(name, GLUSTERFS_GET_OBJECT_SIGNATURE,
SLEN(GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0) {
op_ret = posix_fdget_objectsignature(_fd, dict);
if (op_ret < 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0,
"posix_fdget_objectsignature failed");
op_errno = -op_ret;
op_ret = -1;
size = -1;
goto out;
}
goto done;
}
/* here allocate value_buf of 8192 bytes to avoid one extra getxattr
call,If buffer size is small to hold the xattr result then it will
allocate a new buffer value of required size and call getxattr again
*/
value_buf = alloca(XATTR_VAL_BUF_SIZE);
if (name) {
key_len = snprintf(key, sizeof(key), "%s", name);
#ifdef GF_DARWIN_HOST_OS
struct posix_private *priv = NULL;
priv = this->private;
if (priv->xattr_user_namespace == XATTR_STRIP) {
char *newkey = NULL;
gf_add_prefix(XATTR_USER_PREFIX, key, &newkey);
key_len = snprintf(key, sizeof(key), "%s", newkey);
GF_FREE(newkey);
}
#endif
size = sys_fgetxattr(_fd, key, value_buf, XATTR_VAL_BUF_SIZE - 1);
if (size >= 0) {
have_val = _gf_true;
} else {
if (errno == ERANGE) {
gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED,
"fgetxattr failed due to overflow of"
"buffer on %s ",
key);
size = sys_fgetxattr(_fd, key, NULL, 0);
}
if (size == -1) {
op_errno = errno;
if (errno == ENODATA || errno == ENOATTR) {
gf_msg_debug(this->name, 0,
"fgetxattr"
" failed on key %s (%s)",
key, strerror(op_errno));
} else {
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
"fgetxattr"
" failed on key %s",
key);
}
goto done;
}
}
value = GF_MALLOC(size + 1, gf_posix_mt_char);
if (!value) {
op_ret = -1;
op_errno = ENOMEM;
goto out;
}
if (have_val) {
memcpy(value, value_buf, size);
} else {
bzero(value, size + 1);
size = sys_fgetxattr(_fd, key, value, size);
if (size == -1) {
op_ret = -1;
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
"fgetxattr"
" failed on fd %p for the key %s ",
fd, key);
GF_FREE(value);
goto out;
}
}
value[size] = '\0';
op_ret = dict_set_dynptr(dict, key, value, size);
if (op_ret < 0) {
op_errno = -op_ret;
op_ret = -1;
gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_DICT_SET_FAILED,
"dict set operation "
"on key %s failed",
key);
GF_FREE(value);
goto out;
}
goto done;
}
size = sys_flistxattr(_fd, value_buf, XATTR_VAL_BUF_SIZE - 1);
if (size > 0) {
have_val = _gf_true;
} else {
if (errno == ERANGE) {
gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED,
"listxattr failed due to overflow of buffer"
" on %p ",
fd);
size = sys_flistxattr(_fd, NULL, 0);
}
if (size == -1) {
op_ret = -1;
op_errno = errno;
if ((errno == ENOTSUP) || (errno == ENOSYS)) {
GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name,
GF_LOG_WARNING,
"Extended attributes not "
"supported (try remounting "
"brick with 'user_xattr' flag)");
} else {
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
"listxattr failed "
"on %p:",
fd);
}
goto out;
}
if (size == 0)
goto done;
}
list = alloca(size + 1);
if (!list) {
op_ret = -1;
op_errno = ENOMEM;
goto out;
}
if (have_val)
memcpy(list, value_buf, size);
else
size = sys_flistxattr(_fd, list, size);
remaining_size = size;
list_offset = 0;
while (remaining_size > 0) {
if (*(list + list_offset) == '\0')
break;
key_len = snprintf(key, sizeof(key), "%s", list + list_offset);
have_val = _gf_false;
size = sys_fgetxattr(_fd, key, value_buf, XATTR_VAL_BUF_SIZE - 1);
if (size >= 0) {
have_val = _gf_true;
} else {
if (errno == ERANGE) {
gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED,
"fgetxattr failed due to overflow of buffer"
" on fd %p: for the key %s ",
fd, key);
size = sys_fgetxattr(_fd, key, NULL, 0);
}
if (size == -1) {
op_ret = -1;
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
"fgetxattr failed "
"on fd %p for the key %s ",
fd, key);
break;
}
}
value = GF_MALLOC(size + 1, gf_posix_mt_char);
if (!value) {
op_ret = -1;
op_errno = errno;
goto out;
}
if (have_val) {
memcpy(value, value_buf, size);
} else {
bzero(value, size + 1);
size = sys_fgetxattr(_fd, key, value, size);
if (size == -1) {
op_ret = -1;
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
"fgetxattr failed o"
"n the fd %p for the key %s ",
fd, key);
GF_FREE(value);
break;
}
}
value[size] = '\0';
op_ret = dict_set_dynptr(dict, key, value, size);
if (op_ret) {
op_errno = -op_ret;
op_ret = -1;
gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_DICT_SET_FAILED,
"dict set operation "
"failed on key %s",
key);
GF_FREE(value);
goto out;
}
remaining_size -= key_len + 1;
list_offset += key_len + 1;
} /* while (remaining_size > 0) */
done:
op_ret = size;
if (xdata && (op_ret >= 0)) {
xattr_rsp = posix_xattr_fill(this, NULL, NULL, fd, pfd->fd, xdata,
&buf);
}
if (dict) {
dict_del(dict, GFID_XATTR_KEY);
dict_del(dict, GF_XATTR_VOL_ID_KEY);
}
out:
SET_TO_OLD_FS_ID();
STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, dict, xattr_rsp);
if (xattr_rsp)
dict_unref(xattr_rsp);
if (dict)
dict_unref(dict);
return 0;
}
static int
_handle_fsetxattr_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp)
{
posix_xattr_filler_t *filler = NULL;
filler = tmp;
return posix_fhandle_pair(filler->frame, filler->this, filler->fdnum, k, v,
filler->flags, filler->stbuf, filler->fd);
}
int32_t
posix_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
int flags, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
struct posix_fd *pfd = NULL;
int _fd = -1;
int ret = -1;
struct iatt preop = {
0,
};
struct iatt postop = {
0,
};
dict_t *xattr = NULL;
posix_xattr_filler_t filler = {
0,
};
struct posix_private *priv = NULL;
DECLARE_OLD_FS_ID_VAR;
SET_FS_ID(frame->root->uid, frame->root->gid);
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(fd, out);
VALIDATE_OR_GOTO(dict, out);
priv = this->private;
DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
if (ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
"pfd is NULL from fd=%p", fd);
goto out;
}
_fd = pfd->fd;
ret = posix_fdstat(this, fd->inode, pfd->fd, &preop);
if (ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_FSTAT_FAILED,
"fsetxattr (fstat)"
"failed on fd=%p",
fd);
goto out;
}
dict_del(dict, GFID_XATTR_KEY);
dict_del(dict, GF_XATTR_VOL_ID_KEY);
filler.fdnum = _fd;
filler.this = this;
filler.frame = frame;
filler.stbuf = &preop;
filler.fd = fd;
#ifdef GF_DARWIN_HOST_OS
filler.flags = map_xattr_flags(flags);
#else
filler.flags = flags;
#endif
op_ret = dict_foreach(dict, _handle_fsetxattr_keyvalue_pair, &filler);
if (op_ret < 0) {
op_errno = -op_ret;
op_ret = -1;
}
if (!ret && xdata && dict_get(xdata, GLUSTERFS_DURABLE_OP)) {
op_ret = sys_fsync(_fd);
if (op_ret < 0) {
op_ret = -1;
op_errno = errno;
gf_msg(this->name, GF_LOG_WARNING, errno,
P_MSG_DURABILITY_REQ_NOT_SATISFIED,
"could not satisfy durability request: "
"reason ");
}
}
ret = posix_fdstat(this, fd->inode, pfd->fd, &postop);
if (ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_XATTR_FAILED,
"fsetxattr (fstat)"
"failed on fd=%p",
fd);
goto out;
}
xattr = dict_new();
if (!xattr)
goto out;
ret = posix_set_iatt_in_dict(xattr, &preop, &postop);
out:
SET_TO_OLD_FS_ID();
STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xattr);
if (xattr)
dict_unref(xattr);
return 0;
}
int
_posix_remove_xattr(dict_t *dict, char *key, data_t *value, void *data)
{
int32_t op_ret = 0;
xlator_t *this = NULL;
posix_xattr_filler_t *filler = NULL;
filler = (posix_xattr_filler_t *)data;
this = filler->this;
#ifdef GF_DARWIN_HOST_OS
struct posix_private *priv = NULL;
priv = (struct posix_private *)this->private;
char *newkey = NULL;
if (priv->xattr_user_namespace == XATTR_STRIP) {
gf_remove_prefix(XATTR_USER_PREFIX, key, &newkey);
gf_msg_debug("remove_xattr", 0, "key %s => %s", key, newkey);
key = newkey;
}
#endif
/* Bulk remove xattr is internal fop in gluster. Some of the xattrs may
* have special behavior. Ex: removexattr("posix.system_acl_access"),
* removes more than one xattr on the file that could be present in the
* bulk-removal request. Removexattr of these deleted xattrs will fail
* with either ENODATA/ENOATTR. Since all this fop cares is removal of the
* xattrs in bulk-remove request and if they are already deleted, it can be
* treated as success.
*/
if (filler->real_path)
op_ret = sys_lremovexattr(filler->real_path, key);
else
op_ret = sys_fremovexattr(filler->fdnum, key);
if (op_ret == -1) {
if (errno == ENODATA || errno == ENOATTR)
op_ret = 0;
}
if (op_ret == -1) {
filler->op_errno = errno;
if (errno != ENOATTR && errno != ENODATA && errno != EPERM) {
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
"removexattr failed on "
"file/dir %s with gfid: %s (for %s)",
filler->real_path ? filler->real_path : "",
uuid_utoa(filler->inode->gfid), key);
}
}
#ifdef GF_DARWIN_HOST_OS
GF_FREE(newkey);
#endif
return op_ret;
}
int
posix_common_removexattr(call_frame_t *frame, loc_t *loc, fd_t *fd,
const char *name, dict_t *xdata, int *op_errno,
dict_t **xdata_rsp)
{
gf_boolean_t bulk_removexattr = _gf_false;
gf_boolean_t disallow = _gf_false;
char *real_path = NULL;
struct posix_fd *pfd = NULL;
int op_ret = 0;
struct iatt preop = {
0,
};
struct iatt postop = {
0,
};
int ret = 0;
int _fd = -1;
xlator_t *this = frame->this;
inode_t *inode = NULL;
posix_xattr_filler_t filler = {0};
DECLARE_OLD_FS_ID_VAR;
SET_FS_ID(frame->root->uid, frame->root->gid);
if (loc) {
MAKE_INODE_HANDLE(real_path, this, loc, NULL);
if (!real_path) {
op_ret = -1;
*op_errno = ESTALE;
goto out;
}
inode = loc->inode;
} else {
op_ret = posix_fd_ctx_get(fd, this, &pfd, op_errno);
if (op_ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, *op_errno, P_MSG_PFD_NULL,
"pfd is NULL from fd=%p", fd);
goto out;
}
_fd = pfd->fd;
inode = fd->inode;
}
if (posix_is_gfid2path_xattr(name)) {
op_ret = -1;
*op_errno = ENOATTR;
goto out;
}
if (loc)
ret = posix_pstat(this, inode, loc->gfid, real_path, &preop, _gf_false);
else
ret = posix_fdstat(this, inode, _fd, &preop);
if (gf_get_index_by_elem(disallow_removexattrs, (char *)name) >= 0) {
gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_NOT_REMOVED,
"Remove xattr called on %s for file/dir %s with gfid: "
"%s",
name, real_path ? real_path : "", uuid_utoa(inode->gfid));
op_ret = -1;
*op_errno = EPERM;
goto out;
} else if (posix_is_bulk_removexattr((char *)name, xdata)) {
bulk_removexattr = _gf_true;
(void)dict_has_key_from_array(xdata, disallow_removexattrs, &disallow);
if (disallow) {
gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_NOT_REMOVED,
"Bulk removexattr has keys that shouldn't be "
"removed for file/dir %s with gfid: %s",
real_path ? real_path : "", uuid_utoa(inode->gfid));
op_ret = -1;
*op_errno = EPERM;
goto out;
}
}
if (bulk_removexattr) {
filler.real_path = real_path;
filler.this = this;
filler.fdnum = _fd;
filler.inode = inode;
op_ret = dict_foreach(xdata, _posix_remove_xattr, &filler);
if (op_ret) {
*op_errno = filler.op_errno;
goto out;
}
} else {
if (loc)
op_ret = sys_lremovexattr(real_path, name);
else
op_ret = sys_fremovexattr(_fd, name);
if (op_ret == -1) {
*op_errno = errno;
if (*op_errno != ENOATTR && *op_errno != ENODATA &&
*op_errno != EPERM) {
gf_msg(this->name, GF_LOG_ERROR, *op_errno, P_MSG_XATTR_FAILED,
"removexattr on %s with gfid %s "
"(for %s)",
real_path, uuid_utoa(inode->gfid), name);
}
goto out;
}
}
if (loc) {
posix_set_ctime(frame, this, real_path, -1, inode, NULL);
ret = posix_pstat(this, inode, loc->gfid, real_path, &postop,
_gf_false);
} else {
posix_set_ctime(frame, this, NULL, _fd, inode, NULL);
ret = posix_fdstat(this, inode, _fd, &postop);
}
if (ret)
goto out;
*xdata_rsp = dict_new();
if (!*xdata_rsp)
goto out;
ret = posix_set_iatt_in_dict(*xdata_rsp, &preop, &postop);
op_ret = 0;
out:
SET_TO_OLD_FS_ID();
return op_ret;
}
int32_t
posix_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *name, dict_t *xdata)
{
int op_ret = -1;
int op_errno = EINVAL;
dict_t *xdata_rsp = NULL;
VALIDATE_OR_GOTO(loc, out);
op_ret = posix_common_removexattr(frame, loc, NULL, name, xdata, &op_errno,
&xdata_rsp);
out:
STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, xdata_rsp);
if (xdata_rsp)
dict_unref(xdata_rsp);
return 0;
}
int32_t
posix_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
const char *name, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = EINVAL;
dict_t *xdata_rsp = NULL;
VALIDATE_OR_GOTO(fd, out);
op_ret = posix_common_removexattr(frame, NULL, fd, name, xdata, &op_errno,
&xdata_rsp);
out:
STACK_UNWIND_STRICT(fremovexattr, frame, op_ret, op_errno, xdata_rsp);
if (xdata_rsp)
dict_unref(xdata_rsp);
return 0;
}
int32_t
posix_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
int ret = -1;
struct posix_fd *pfd = NULL;
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(fd, out);
ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
if (ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
"pfd is NULL, fd=%p", fd);
goto out;
}
op_ret = 0;
out:
STACK_UNWIND_STRICT(fsyncdir, frame, op_ret, op_errno, NULL);
return 0;
}
void
posix_print_xattr(dict_t *this, char *key, data_t *value, void *data)
{
gf_msg_debug("posix", 0, "(key/val) = (%s/%d)", key, data_to_int32(value));
}
/**
* add_array - add two arrays of 32-bit numbers (stored in network byte order)
* dest = dest + src
* @count: number of 32-bit numbers
* FIXME: handle overflow
*/
static void
__add_array(int32_t *dest, int32_t *src, int count)
{
int i = 0;
int32_t destval = 0;
for (i = 0; i < count; i++) {
destval = ntoh32(dest[i]);
dest[i] = hton32(destval + ntoh32(src[i]));
}
}
static void
__add_long_array(int64_t *dest, int64_t *src, int count)
{
int i = 0;
for (i = 0; i < count; i++) {
dest[i] = hton64(ntoh64(dest[i]) + ntoh64(src[i]));
}
}
/* functions:
__add_array_with_default
__add_long_array_with_default
xattrop type:
GF_XATTROP_ADD_ARRAY_WITH_DEFAULT
GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT
These operations are similar to 'GF_XATTROP_ADD_ARRAY',
except that it adds a default value if xattr is missing
or its value is zero on disk.
One use-case of this operation is in inode-quota.
When a new directory is created, its default dir_count
should be set to 1. So when a xattrop performed setting
inode-xattrs, it should account initial dir_count
1 if the xattrs are not present
Here is the usage of this operation
value required in xdata for each key
struct array {
int32_t newvalue_1;
int32_t newvalue_2;
...
int32_t newvalue_n;
int32_t default_1;
int32_t default_2;
...
int32_t default_n;
};
or
struct array {
int32_t value_1;
int32_t value_2;
...
int32_t value_n;
} data[2];
fill data[0] with new value to add
fill data[1] with default value
xattrop GF_XATTROP_ADD_ARRAY_WITH_DEFAULT
for i from 1 to n
{
if (xattr (dest_i) is zero or not set in the disk)
dest_i = newvalue_i + default_i
else
dest_i = dest_i + newvalue_i
}
value in xdata after xattrop is successful
struct array {
int32_t dest_1;
int32_t dest_2;
...
int32_t dest_n;
};
*/
static void
__add_array_with_default(int32_t *dest, int32_t *src, int count)
{
int i = 0;
int32_t destval = 0;
for (i = 0; i < count; i++) {
destval = ntoh32(dest[i]);
if (destval == 0)
dest[i] = hton32(ntoh32(src[i]) + ntoh32(src[count + i]));
else
dest[i] = hton32(destval + ntoh32(src[i]));
}
}
static void
__add_long_array_with_default(int64_t *dest, int64_t *src, int count)
{
int i = 0;
int64_t destval = 0;
for (i = 0; i < count; i++) {
destval = ntoh64(dest[i]);
if (destval == 0)
dest[i] = hton64(ntoh64(src[i]) + ntoh64(src[i + count]));
else
dest[i] = hton64(destval + ntoh64(src[i]));
}
}
static int
_posix_handle_xattr_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp)
{
int size = 0;
int count = 0;
int op_ret = 0;
int op_errno = 0;
gf_xattrop_flags_t optype = 0;
char *array = NULL;
char *dst_data = NULL;
inode_t *inode = NULL;
xlator_t *this = NULL;
posix_xattr_filler_t *filler = NULL;
posix_inode_ctx_t *ctx = NULL;
filler = tmp;
optype = (gf_xattrop_flags_t)(filler->flags);
this = filler->this;
inode = filler->inode;
count = v->len;
if (optype == GF_XATTROP_ADD_ARRAY_WITH_DEFAULT ||
optype == GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT)
count = count / 2;
array = GF_CALLOC(count, sizeof(char), gf_posix_mt_char);
#ifdef GF_DARWIN_HOST_OS
struct posix_private *priv = NULL;
priv = this->private;
if (priv->xattr_user_namespace == XATTR_STRIP) {
if (strncmp(k, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) == 0) {
k += XATTR_USER_PREFIX_LEN;
}
}
#endif
op_ret = posix_inode_ctx_get_all(inode, this, &ctx);
if (op_ret < 0) {
op_errno = ENOMEM;
goto out;
}
pthread_mutex_lock(&ctx->xattrop_lock);
{
if (filler->real_path) {
size = sys_lgetxattr(filler->real_path, k, (char *)array, count);
} else {
size = sys_fgetxattr(filler->fdnum, k, (char *)array, count);
}
op_errno = errno;
if ((size == -1) && (op_errno != ENODATA) && (op_errno != ENOATTR)) {
if (op_errno == ENOTSUP) {
GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name,
GF_LOG_WARNING,
"Extended attributes not "
"supported by filesystem");
} else if (op_errno != ENOENT ||
!posix_special_xattr(marker_xattrs, k)) {
if (filler->real_path)
gf_msg(this->name, fop_log_level(GF_FOP_XATTROP, op_errno),
op_errno, P_MSG_XATTR_FAILED,
"getxattr failed on %s while "
"doing xattrop: Key:%s ",
filler->real_path, k);
else
gf_msg(
this->name, GF_LOG_ERROR, op_errno, P_MSG_XATTR_FAILED,
"fgetxattr failed on gfid=%s "
"while doing xattrop: "
"Key:%s (%s)",
uuid_utoa(filler->inode->gfid), k, strerror(op_errno));
}
op_ret = -1;
goto unlock;
}
if (size == -1 && optype == GF_XATTROP_GET_AND_SET) {
GF_FREE(array);
array = NULL;
}
/* We only write back the xattr if it has been really modified
* (i.e. v->data is not all 0's). Otherwise we return its value
* but we don't update anything.
*
* If the xattr does not exist, a value of all 0's is returned
* without creating it. */
size = count;
if (optype != GF_XATTROP_GET_AND_SET &&
mem_0filled(v->data, v->len) == 0)
goto unlock;
dst_data = array;
switch (optype) {
case GF_XATTROP_ADD_ARRAY:
__add_array((int32_t *)array, (int32_t *)v->data, count / 4);
break;
case GF_XATTROP_ADD_ARRAY64:
__add_long_array((int64_t *)array, (int64_t *)v->data,
count / 8);
break;
case GF_XATTROP_ADD_ARRAY_WITH_DEFAULT:
__add_array_with_default((int32_t *)array, (int32_t *)v->data,
count / 4);
break;
case GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT:
__add_long_array_with_default((int64_t *)array,
(int64_t *)v->data, count / 8);
break;
case GF_XATTROP_GET_AND_SET:
dst_data = v->data;
break;
default:
gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_UNKNOWN_OP,
"Unknown xattrop type (%d)"
" on %s. Please send a bug report to "
"gluster-devel@gluster.org",
optype, filler->real_path);
op_ret = -1;
op_errno = EINVAL;
goto unlock;
}
if (filler->real_path) {
size = sys_lsetxattr(filler->real_path, k, dst_data, count, 0);
} else {
size = sys_fsetxattr(filler->fdnum, k, (char *)dst_data, count, 0);
}
op_errno = errno;
}
unlock:
pthread_mutex_unlock(&ctx->xattrop_lock);
if (op_ret == -1)
goto out;
if (size == -1) {
if (filler->real_path)
gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_XATTR_FAILED,
"setxattr failed on %s "
"while doing xattrop: key=%s",
filler->real_path, k);
else
gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_XATTR_FAILED,
"fsetxattr failed on gfid=%s while doing "
"xattrop: key=%s (%s)",
uuid_utoa(filler->inode->gfid), k, strerror(op_errno));
op_ret = -1;
goto out;
} else if (array) {
op_ret = dict_set_bin(filler->xattr, k, array, count);
if (op_ret) {
if (filler->real_path)
gf_msg_debug(this->name, 0,
"dict_set_bin failed (path=%s): "
"key=%s (%s)",
filler->real_path, k, strerror(-size));
else
gf_msg_debug(this->name, 0,
"dict_set_bin failed (gfid=%s): "
"key=%s (%s)",
uuid_utoa(filler->inode->gfid), k,
strerror(-size));
op_ret = -1;
op_errno = EINVAL;
GF_FREE(array);
goto out;
}
array = NULL;
}
out:
if (op_ret < 0)
filler->op_errno = op_errno;
if (array)
GF_FREE(array);
return op_ret;
}
/**
* xattrop - xattr operations - for internal use by GlusterFS
* @optype: ADD_ARRAY:
* dict should contain:
* "key" ==> array of 32-bit numbers
*/
int
do_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
int op_ret = 0;
int op_errno = 0;
int _fd = -1;
char *real_path = NULL;
struct posix_fd *pfd = NULL;
inode_t *inode = NULL;
posix_xattr_filler_t filler = {
0,
};
dict_t *xattr_rsp = NULL;
dict_t *xdata_rsp = NULL;
struct iatt stbuf = {0};
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(xattr, out);
VALIDATE_OR_GOTO(this, out);
if (fd) {
op_ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
if (op_ret < 0) {
gf_msg(this->name, GF_LOG_WARNING,
fop_log_level(GF_FOP_FXATTROP, op_errno),
P_MSG_PFD_GET_FAILED,
"failed to get pfd from"
" fd=%p",
fd);
goto out;
}
_fd = pfd->fd;
}
if (loc && !gf_uuid_is_null(loc->gfid)) {
MAKE_INODE_HANDLE(real_path, this, loc, NULL);
if (!real_path) {
op_ret = -1;
op_errno = ESTALE;
goto out;
}
}
if (real_path) {
inode = loc->inode;
} else if (fd) {
inode = fd->inode;
}
xattr_rsp = dict_new();
if (xattr_rsp == NULL) {
op_ret = -1;
op_errno = ENOMEM;
goto out;
}
filler.this = this;
filler.fdnum = _fd;
filler.real_path = real_path;
filler.flags = (int)optype;
filler.inode = inode;
filler.xattr = xattr_rsp;
op_ret = dict_foreach(xattr, _posix_handle_xattr_keyvalue_pair, &filler);
op_errno = filler.op_errno;
if (op_ret < 0)
goto out;
if (!xdata)
goto out;
if (fd) {
op_ret = posix_fdstat(this, inode, _fd, &stbuf);
} else {
op_ret = posix_pstat(this, inode, inode->gfid, real_path, &stbuf,
_gf_false);
}
if (op_ret < 0) {
op_errno = errno;
goto out;
}
xdata_rsp = posix_xattr_fill(this, real_path, loc, fd, _fd, xdata, &stbuf);
if (!xdata_rsp) {
op_ret = -1;
op_errno = ENOMEM;
}
posix_set_mode_in_dict(xdata, xdata_rsp, &stbuf);
out:
STACK_UNWIND_STRICT(xattrop, frame, op_ret, op_errno, xattr_rsp, xdata_rsp);
if (xattr_rsp)
dict_unref(xattr_rsp);
if (xdata_rsp)
dict_unref(xdata_rsp);
return 0;
}
int
posix_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
do_xattrop(frame, this, loc, NULL, optype, xattr, xdata);
return 0;
}
int
posix_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
do_xattrop(frame, this, NULL, fd, optype, xattr, xdata);
return 0;
}
int
posix_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
char *real_path = NULL;
DECLARE_OLD_FS_ID_VAR;
SET_FS_ID(frame->root->uid, frame->root->gid);
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(loc, out);
MAKE_INODE_HANDLE(real_path, this, loc, NULL);
if (!real_path) {
op_ret = -1;
op_errno = errno;
goto out;
}
op_ret = sys_access(real_path, mask & 07);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_ACCESS_FAILED,
"access failed on %s", real_path);
goto out;
}
op_ret = 0;
out:
SET_TO_OLD_FS_ID();
STACK_UNWIND_STRICT(access, frame, op_ret, op_errno, NULL);
return 0;
}
int32_t
posix_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
int _fd = -1;
struct iatt preop = {
0,
};
struct iatt postop = {
0,
};
struct posix_fd *pfd = NULL;
int ret = -1;
struct posix_private *priv = NULL;
dict_t *rsp_xdata = NULL;
DECLARE_OLD_FS_ID_VAR;
SET_FS_ID(frame->root->uid, frame->root->gid);
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(fd, out);
priv = this->private;
VALIDATE_OR_GOTO(priv, out);
ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
if (ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
"pfd is NULL, fd=%p", fd);
goto out;
}
_fd = pfd->fd;
op_ret = posix_fdstat(this, fd->inode, _fd, &preop);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
"pre-operation fstat failed on fd=%p", fd);
goto out;
}
if (xdata) {
op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &preop, NULL, xdata,
&rsp_xdata, _gf_false);
if (op_ret < 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0,
"file state check failed, fd %p", fd);
op_errno = EIO;
goto out;
}
}
op_ret = sys_ftruncate(_fd, offset);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TRUNCATE_FAILED,
"ftruncate failed on fd=%p (%" PRId64 "", fd, offset);
goto out;
}
op_ret = posix_fdstat(this, fd->inode, _fd, &postop);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
"post-operation fstat failed on fd=%p", fd);
goto out;
}
op_ret = 0;
out:
SET_TO_OLD_FS_ID();
STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, &preop, &postop,
NULL);
return 0;
}
int32_t
posix_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
int _fd = -1;
int32_t op_ret = -1;
int32_t op_errno = 0;
struct iatt buf = {
0,
};
struct posix_fd *pfd = NULL;
dict_t *xattr_rsp = NULL;
int ret = -1;
struct posix_private *priv = NULL;
DECLARE_OLD_FS_ID_VAR;
SET_FS_ID(frame->root->uid, frame->root->gid);
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(fd, out);
priv = this->private;
VALIDATE_OR_GOTO(priv, out);
if (!xdata)
gf_msg_trace(this->name, 0, "null xdata passed, fd %p", fd);
ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
if (ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
"pfd is NULL, fd=%p", fd);
goto out;
}
_fd = pfd->fd;
op_ret = posix_fdstat(this, fd->inode, _fd, &buf);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
"fstat failed on fd=%p", fd);
goto out;
}
if (xdata) {
xattr_rsp = posix_xattr_fill(this, NULL, NULL, fd, _fd, xdata, &buf);
op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &buf, NULL, xdata,
&xattr_rsp, _gf_false);
if (op_ret < 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0,
"file state check failed, fd %p", fd);
}
}
op_ret = 0;
out:
SET_TO_OLD_FS_ID();
STACK_UNWIND_STRICT(fstat, frame, op_ret, op_errno, &buf, xattr_rsp);
if (xattr_rsp)
dict_unref(xattr_rsp);
return 0;
}
int32_t
posix_lease(call_frame_t *frame, xlator_t *this, loc_t *loc,
struct gf_lease *lease, dict_t *xdata)
{
struct gf_lease nullease = {
0,
};
gf_msg(this->name, GF_LOG_CRITICAL, EINVAL, P_MSG_LEASE_DISABLED,
"\"features/leases\" translator is not loaded. You need"
"to use it for proper functioning of your application");
STACK_UNWIND_STRICT(lease, frame, -1, ENOSYS, &nullease, NULL);
return 0;
}
static int gf_posix_lk_log;
int32_t
posix_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
struct gf_flock *lock, dict_t *xdata)
{
struct gf_flock nullock = {
0,
};
GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
"\"features/locks\" translator is "
"not loaded. You need to use it for proper "
"functioning of your application.");
STACK_UNWIND_STRICT(lk, frame, -1, ENOSYS, &nullock, NULL);
return 0;
}
int32_t
posix_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
"\"features/locks\" translator is "
"not loaded. You need to use it for proper "
"functioning of your application.");
STACK_UNWIND_STRICT(inodelk, frame, -1, ENOSYS, NULL);
return 0;
}
int32_t
posix_finodelk(call_frame_t *frame, xlator_t *this, const char *volume,
fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
"\"features/locks\" translator is "
"not loaded. You need to use it for proper "
"functioning of your application.");
STACK_UNWIND_STRICT(finodelk, frame, -1, ENOSYS, NULL);
return 0;
}
int32_t
posix_entrylk(call_frame_t *frame, xlator_t *this, const char *volume,
loc_t *loc, const char *basename, entrylk_cmd cmd,
entrylk_type type, dict_t *xdata)
{
GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
"\"features/locks\" translator is "
"not loaded. You need to use it for proper "
"functioning of your application.");
STACK_UNWIND_STRICT(entrylk, frame, -1, ENOSYS, NULL);
return 0;
}
int32_t
posix_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume,
fd_t *fd, const char *basename, entrylk_cmd cmd,
entrylk_type type, dict_t *xdata)
{
GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
"\"features/locks\" translator is "
"not loaded. You need to use it for proper "
"functioning of your application.");
STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOSYS, NULL);
return 0;
}
int
posix_fill_readdir(fd_t *fd, DIR *dir, off_t off, size_t size,
gf_dirent_t *entries, xlator_t *this, int32_t skip_dirs)
{
off_t in_case = -1;
off_t last_off = 0;
size_t filled = 0;
int count = 0;
int32_t this_size = -1;
gf_dirent_t *this_entry = NULL;
struct posix_fd *pfd = NULL;
struct stat stbuf = {
0,
};
char *hpath = NULL;
int len = 0;
int ret = 0;
int op_errno = 0;
struct dirent *entry = NULL;
struct dirent scratch[2] = {
{
0,
},
};
ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
if (ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
"pfd is NULL, fd=%p", fd);
count = -1;
errno = op_errno;
goto out;
}
if (skip_dirs) {
len = posix_handle_path(this, fd->inode->gfid, NULL, NULL, 0);
if (len <= 0) {
errno = ESTALE;
count = -1;
goto out;
}
hpath = alloca(len + 256); /* NAME_MAX */
if (posix_handle_path(this, fd->inode->gfid, NULL, hpath, len) <= 0) {
errno = ESTALE;
count = -1;
goto out;
}
len = strlen(hpath);
hpath[len] = '/';
}
if (!off) {
rewinddir(dir);
} else {
seekdir(dir, off);
#ifndef GF_LINUX_HOST_OS
if ((u_long)telldir(dir) != off && off != pfd->dir_eof) {
gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, P_MSG_DIR_OPERATION_FAILED,
"seekdir(0x%llx) failed on dir=%p: "
"Invalid argument (offset reused from "
"another DIR * structure?)",
off, dir);
errno = EINVAL;
count = -1;
goto out;
}
#endif /* GF_LINUX_HOST_OS */
}
while (filled <= size) {
in_case = (u_long)telldir(dir);
if (in_case == -1) {
gf_msg(THIS->name, GF_LOG_ERROR, errno, P_MSG_DIR_OPERATION_FAILED,
"telldir failed on dir=%p", dir);
goto out;
}
errno = 0;
entry = sys_readdir(dir, scratch);
if (!entry || errno != 0) {
if (errno == EBADF) {
gf_msg(THIS->name, GF_LOG_WARNING, errno,
P_MSG_DIR_OPERATION_FAILED, "readdir failed on dir=%p",
dir);
goto out;
}
break;
}
#ifdef __NetBSD__
/*
* NetBSD with UFS1 backend uses backing files for
* extended attributes. They can be found in a
* .attribute file located at the root of the filesystem
* We hide it to glusterfs clients, since chaos will occur
* when the cluster/dht xlator decides to distribute
* exended attribute backing file across storage servers.
*/
if (__is_root_gfid(fd->inode->gfid) == 0 &&
(!strcmp(entry->d_name, ".attribute")))
continue;
#endif /* __NetBSD__ */
if (__is_root_gfid(fd->inode->gfid) &&
(!strcmp(GF_HIDDEN_PATH, entry->d_name))) {
continue;
}
if (skip_dirs) {
if (DT_ISDIR(entry->d_type)) {
continue;
} else if (hpath) {
strcpy(&hpath[len + 1], entry->d_name);
ret = sys_lstat(hpath, &stbuf);
if (!ret && S_ISDIR(stbuf.st_mode))
continue;
}
}
this_size = max(sizeof(gf_dirent_t), sizeof(gfs3_dirplist)) +
strlen(entry->d_name) + 1;
if (this_size + filled > size) {
seekdir(dir, in_case);
#ifndef GF_LINUX_HOST_OS
if ((u_long)telldir(dir) != in_case && in_case != pfd->dir_eof) {
gf_msg(THIS->name, GF_LOG_ERROR, EINVAL,
P_MSG_DIR_OPERATION_FAILED,
"seekdir(0x%llx) failed on dir=%p: "
"Invalid argument (offset reused from "
"another DIR * structure?)",
in_case, dir);
errno = EINVAL;
count = -1;
goto out;
}
#endif /* GF_LINUX_HOST_OS */
break;
}
this_entry = gf_dirent_for_name(entry->d_name);
if (!this_entry) {
gf_msg(THIS->name, GF_LOG_ERROR, errno,
P_MSG_GF_DIRENT_CREATE_FAILED,
"could not create "
"gf_dirent for entry %s",
entry->d_name);
goto out;
}
/*
* we store the offset of next entry here, which is
* probably not intended, but code using syncop_readdir()
* (glfs-heal.c, afr-self-heald.c, pump.c) rely on it
* for directory read resumption.
*/
last_off = (u_long)telldir(dir);
this_entry->d_off = last_off;
this_entry->d_ino = entry->d_ino;
this_entry->d_type = entry->d_type;
list_add_tail(&this_entry->list, &entries->list);
filled += this_size;
count++;
}
if ((!sys_readdir(dir, scratch) && (errno == 0))) {
/* Indicate EOF */
errno = ENOENT;
/* Remember EOF offset for later detection */
pfd->dir_eof = (u_long)last_off;
}
out:
return count;
}
dict_t *
posix_entry_xattr_fill(xlator_t *this, inode_t *inode, fd_t *fd,
char *entry_path, dict_t *dict, struct iatt *stbuf)
{
loc_t tmp_loc = {
0,
};
/* if we don't send the 'loc', open-fd-count be a problem. */
tmp_loc.inode = inode;
return posix_xattr_fill(this, entry_path, &tmp_loc, NULL, -1, dict, stbuf);
}
int
posix_readdirp_fill(xlator_t *this, fd_t *fd, gf_dirent_t *entries,
dict_t *dict)
{
gf_dirent_t *entry = NULL;
inode_table_t *itable = NULL;
inode_t *inode = NULL;
char *hpath = NULL;
int len = 0;
struct iatt stbuf = {
0,
};
uuid_t gfid;
int ret = -1;
if (list_empty(&entries->list))
return 0;
itable = fd->inode->table;
len = posix_handle_path(this, fd->inode->gfid, NULL, NULL, 0);
if (len <= 0) {
gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_HANDLEPATH_FAILED,
"Failed to create handle path, fd=%p, gfid=%s", fd,
uuid_utoa(fd->inode->gfid));
return -1;
}
hpath = alloca(len + 256); /* NAME_MAX */
if (posix_handle_path(this, fd->inode->gfid, NULL, hpath, len) <= 0) {
gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_HANDLEPATH_FAILED,
"Failed to create handle path, fd=%p, gfid=%s", fd,
uuid_utoa(fd->inode->gfid));
return -1;
}
len = strlen(hpath);
hpath[len] = '/';
list_for_each_entry(entry, &entries->list, list)
{
inode = inode_grep(fd->inode->table, fd->inode, entry->d_name);
if (inode)
gf_uuid_copy(gfid, inode->gfid);
else
bzero(gfid, 16);
strcpy(&hpath[len + 1], entry->d_name);
ret = posix_pstat(this, inode, gfid, hpath, &stbuf, _gf_false);
if (ret == -1) {
if (inode)
inode_unref(inode);
continue;
}
if (!inode)
inode = inode_find(itable, stbuf.ia_gfid);
if (!inode)
inode = inode_new(itable);
entry->inode = inode;
if (dict) {
entry->dict = posix_entry_xattr_fill(this, entry->inode, fd, hpath,
dict, &stbuf);
}
entry->d_stat = stbuf;
if (stbuf.ia_ino)
entry->d_ino = stbuf.ia_ino;
if (entry->d_type == DT_UNKNOWN && !IA_ISINVAL(stbuf.ia_type)) {
/* The platform supports d_type but the underlying
filesystem doesn't. We set d_type to the correct
value from ia_type */
entry->d_type = gf_d_type_from_ia_type(stbuf.ia_type);
}
inode = NULL;
}
return 0;
}
int32_t
posix_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t off, int whichop, dict_t *dict)
{
struct posix_fd *pfd = NULL;
DIR *dir = NULL;
int ret = -1;
int count = 0;
int32_t op_ret = -1;
int32_t op_errno = 0;
gf_dirent_t entries;
int32_t skip_dirs = 0;
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(fd, out);
INIT_LIST_HEAD(&entries.list);
ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
if (ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
"pfd is NULL, fd=%p", fd);
goto out;
}
dir = pfd->dir;
if (!dir) {
gf_msg(this->name, GF_LOG_WARNING, EINVAL, P_MSG_PFD_NULL,
"dir is NULL for fd=%p", fd);
op_errno = EINVAL;
goto out;
}
/* When READDIR_FILTER option is set to on, we can filter out
* directory's entry from the entry->list.
*/
ret = dict_get_int32(dict, GF_READDIR_SKIP_DIRS, &skip_dirs);
LOCK(&fd->lock);
{
/* posix_fill_readdir performs multiple separate individual
readdir() calls to fill up the buffer.
In case of NFS where the same anonymous FD is shared between
different applications, reading a common directory can
result in the anonymous fd getting re-used unsafely between
the two readdir requests (in two different io-threads).
It would also help, in the future, to replace the loop
around readdir() with a single large getdents() call.
*/
count = posix_fill_readdir(fd, dir, off, size, &entries, this,
skip_dirs);
}
UNLOCK(&fd->lock);
/* pick ENOENT to indicate EOF */
op_errno = errno;
op_ret = count;
if (whichop != GF_FOP_READDIRP)
goto out;
posix_readdirp_fill(this, fd, &entries, dict);
out:
if (whichop == GF_FOP_READDIR)
STACK_UNWIND_STRICT(readdir, frame, op_ret, op_errno, &entries, NULL);
else
STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, &entries, NULL);
gf_dirent_free(&entries);
return 0;
}
int32_t
posix_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t off, dict_t *xdata)
{
posix_do_readdir(frame, this, fd, size, off, GF_FOP_READDIR, xdata);
return 0;
}
int32_t
posix_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t off, dict_t *dict)
{
gf_dirent_t entries;
int32_t op_ret = -1, op_errno = 0;
gf_dirent_t *entry = NULL;
if ((dict != NULL) && (dict_get(dict, GET_ANCESTRY_DENTRY_KEY))) {
INIT_LIST_HEAD(&entries.list);
op_ret = posix_get_ancestry(this, fd->inode, &entries, NULL,
POSIX_ANCESTRY_DENTRY, &op_errno, dict);
if (op_ret >= 0) {
op_ret = 0;
list_for_each_entry(entry, &entries.list, list) { op_ret++; }
}
STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, &entries, NULL);
gf_dirent_free(&entries);
return 0;
}
posix_do_readdir(frame, this, fd, size, off, GF_FOP_READDIRP, dict);
return 0;
}
int32_t
posix_rchecksum(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
int32_t len, dict_t *xdata)
{
char *alloc_buf = NULL;
char *buf = NULL;
int _fd = -1;
struct posix_fd *pfd = NULL;
int op_ret = -1;
int op_errno = 0;
int ret = 0;
ssize_t bytes_read = 0;
int32_t weak_checksum = 0;
int32_t zerofillcheck = 0;
/* Protocol version 4 uses 32 bytes i.e SHA256_DIGEST_LENGTH,
so this is used. */
unsigned char md5_checksum[SHA256_DIGEST_LENGTH] = {0};
unsigned char strong_checksum[SHA256_DIGEST_LENGTH] = {0};
unsigned char *checksum = NULL;
struct posix_private *priv = NULL;
dict_t *rsp_xdata = NULL;
gf_boolean_t buf_has_zeroes = _gf_false;
struct iatt preop = {
0,
};
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(fd, out);
priv = this->private;
alloc_buf = _page_aligned_alloc(len, &buf);
if (!alloc_buf) {
op_errno = ENOMEM;
goto out;
}
rsp_xdata = dict_new();
if (!rsp_xdata) {
op_errno = ENOMEM;
goto out;
}
ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
if (ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_PFD_NULL,
"pfd is NULL, fd=%p", fd);
goto out;
}
_fd = pfd->fd;
if (xdata) {
op_ret = posix_fdstat(this, fd->inode, _fd, &preop);
if (op_ret == -1) {
op_errno = errno;
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
"pre-operation fstat failed on fd=%p", fd);
goto out;
}
op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &preop, NULL, xdata,
&rsp_xdata, _gf_false);
if (op_ret < 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0,
"file state check failed, fd %p", fd);
op_errno = EIO;
goto out;
}
}
LOCK(&fd->lock);
{
if (priv->aio_capable && priv->aio_init_done)
__posix_fd_set_odirect(fd, pfd, 0, offset, len);
bytes_read = sys_pread(_fd, buf, len, offset);
if (bytes_read < 0) {
gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PREAD_FAILED,
"pread of %d bytes returned %zd", len, bytes_read);
op_errno = errno;
}
}
UNLOCK(&fd->lock);
if (bytes_read < 0)
goto out;
if (xdata &&
dict_get_int32(xdata, "check-zero-filled", &zerofillcheck) == 0) {
buf_has_zeroes = (mem_0filled(buf, bytes_read)) ? _gf_false : _gf_true;
ret = dict_set_uint32(rsp_xdata, "buf-has-zeroes", buf_has_zeroes);
if (ret) {
gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_DICT_SET_FAILED,
"%s: Failed to set "
"dictionary value for key: %s",
uuid_utoa(fd->inode->gfid), "buf-has-zeroes");
op_errno = -ret;
goto out;
}
}
weak_checksum = gf_rsync_weak_checksum((unsigned char *)buf, (size_t)ret);
if (priv->fips_mode_rchecksum) {
ret = dict_set_int32(rsp_xdata, "fips-mode-rchecksum", 1);
if (ret) {
gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_DICT_SET_FAILED,
"%s: Failed to set "
"dictionary value for key: %s",
uuid_utoa(fd->inode->gfid), "fips-mode-rchecksum");
goto out;
}
checksum = strong_checksum;
gf_rsync_strong_checksum((unsigned char *)buf, (size_t)bytes_read,
(unsigned char *)checksum);
} else {
checksum = md5_checksum;
gf_rsync_md5_checksum((unsigned char *)buf, (size_t)bytes_read,
(unsigned char *)checksum);
}
op_ret = 0;
posix_set_ctime(frame, this, NULL, _fd, fd->inode, NULL);
out:
STACK_UNWIND_STRICT(rchecksum, frame, op_ret, op_errno, weak_checksum,
checksum, rsp_xdata);
if (rsp_xdata)
dict_unref(rsp_xdata);
GF_FREE(alloc_buf);
return 0;
}
int
posix_forget(xlator_t *this, inode_t *inode)
{
int ret = 0;
char *unlink_path = NULL;
uint64_t ctx_uint1 = 0;
uint64_t ctx_uint2 = 0;
posix_inode_ctx_t *ctx = NULL;
posix_mdata_t *mdata = NULL;
struct posix_private *priv_posix = NULL;
priv_posix = (struct posix_private *)this->private;
if (!priv_posix)
return 0;
ret = inode_ctx_del2(inode, this, &ctx_uint1, &ctx_uint2);
if (!ctx_uint1)
goto check_ctx2;
ctx = (posix_inode_ctx_t *)(uintptr_t)ctx_uint1;
if (ctx->unlink_flag == GF_UNLINK_TRUE) {
POSIX_GET_FILE_UNLINK_PATH(priv_posix->base_path, inode->gfid,
unlink_path);
if (!unlink_path) {
gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_UNLINK_FAILED,
"Failed to remove gfid :%s", uuid_utoa(inode->gfid));
ret = -1;
goto ctx_free;
}
ret = sys_unlink(unlink_path);
}
ctx_free:
pthread_mutex_destroy(&ctx->xattrop_lock);
pthread_mutex_destroy(&ctx->write_atomic_lock);
pthread_mutex_destroy(&ctx->pgfid_lock);
GF_FREE(ctx);
check_ctx2:
if (ctx_uint2) {
mdata = (posix_mdata_t *)(uintptr_t)ctx_uint2;
}
GF_FREE(mdata);
return ret;
}