/*
Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
General Public License, version 3 or any later version (LGPLv3 or
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
#ifndef __BIT_ROT_STUB_H__
#define __BIT_ROT_STUB_H__
#include <glusterfs/glusterfs.h>
#include <glusterfs/logging.h>
#include <glusterfs/dict.h>
#include <glusterfs/xlator.h>
#include <glusterfs/defaults.h>
#include <glusterfs/call-stub.h>
#include "bit-rot-stub-mem-types.h"
#include <glusterfs/syscall.h>
#include <glusterfs/common-utils.h>
#include "bit-rot-common.h"
#include "bit-rot-stub-messages.h"
#include "glusterfs3-xdr.h"
#include <glusterfs/syncop.h>
#include <glusterfs/syncop-utils.h>
#define BAD_OBJECT_THREAD_STACK_SIZE ((size_t)(1024 * 1024))
#define BR_STUB_DUMP_STR_SIZE 65536
#define BR_PATH_MAX_EXTRA (PATH_MAX + 1024)
#define BR_PATH_MAX_PLUS (PATH_MAX + 2048)
/*
* Oops. Spelling mistake. Correcting it
*/
#define OLD_BR_STUB_QUARANTINE_DIR GF_HIDDEN_PATH "/quanrantine"
#define BR_STUB_QUARANTINE_DIR GF_HIDDEN_PATH "/quarantine"
/* do not reference frame->local in cbk unless initialized.
* Assigned 0x1 marks verisoning flag between call path and
* cbk path.
*/
#define BR_STUB_VER_NOT_ACTIVE_THEN_GOTO(frame, priv, label) \
do { \
if (priv->do_versioning) \
frame->local = (void *)0x1; \
else \
goto label; \
} while (0)
#define BR_STUB_VER_COND_GOTO(priv, cond, label) \
do { \
if (!priv->do_versioning || cond) \
goto label; \
} while (0)
#define BR_STUB_VER_ENABLED_IN_CALLPATH(frame, flag) \
do { \
if (frame->local) \
flag = _gf_true; \
if (frame->local == (void *)0x1) \
frame->local = NULL; \
} while (0)
#define BR_STUB_RESET_LOCAL_NULL(frame) \
do { \
if (frame->local == (void *)0x1) \
frame->local = NULL; \
} while (0)
typedef int(br_stub_version_cbk)(call_frame_t *, void *, xlator_t *, int32_t,
int32_t, dict_t *);
typedef struct br_stub_inode_ctx {
int need_writeback; /* does the inode need
a writeback to disk? */
unsigned long currentversion; /* ongoing version */
int info_sign;
struct list_head fd_list; /* list of open fds or fds participating in
write operations */
gf_boolean_t bad_object;
} br_stub_inode_ctx_t;
typedef struct br_stub_fd {
fd_t *fd;
struct list_head list;
struct bad_object_dir {
DIR *dir;
off_t dir_eof;
} bad_object;
} br_stub_fd_t;
#define I_DIRTY (1 << 0) /* inode needs writeback */
#define I_MODIFIED (1 << 1)
#define WRITEBACK_DURABLE 1 /* writeback is durable */
/**
* This could just have been a plain struct without unions and all,
* but we may need additional things in the future.
*/
typedef struct br_stub_local {
call_stub_t *fopstub; /* stub for original fop */
int versioningtype; /* not much used atm */
union {
struct br_stub_ctx {
fd_t *fd;
uuid_t gfid;
inode_t *inode;
unsigned long version;
} context;
} u;
} br_stub_local_t;
#define BR_STUB_NO_VERSIONING (1 << 0)
#define BR_STUB_INCREMENTAL_VERSIONING (1 << 1)
typedef struct br_stub_private {
gf_boolean_t do_versioning;
uint32_t boot[2];
char export[PATH_MAX];
pthread_mutex_t lock;
pthread_cond_t cond;
struct list_head squeue; /* ordered signing queue */
pthread_t signth;
struct bad_objects_container {
pthread_t thread;
pthread_mutex_t bad_lock;
pthread_cond_t bad_cond;
struct list_head bad_queue;
} container;
struct mem_pool *local_pool;
char stub_basepath[BR_PATH_MAX_EXTRA];
uuid_t bad_object_dir_gfid;
} br_stub_private_t;
br_stub_fd_t *
br_stub_fd_new(void);
int
__br_stub_fd_ctx_set(xlator_t *this, fd_t *fd, br_stub_fd_t *br_stub_fd);
br_stub_fd_t *
__br_stub_fd_ctx_get(xlator_t *this, fd_t *fd);
br_stub_fd_t *
br_stub_fd_ctx_get(xlator_t *this, fd_t *fd);
int32_t
br_stub_fd_ctx_set(xlator_t *this, fd_t *fd, br_stub_fd_t *br_stub_fd);
static inline gf_boolean_t
__br_stub_is_bad_object(br_stub_inode_ctx_t *ctx)
{
return ctx->bad_object;
}
static inline void
__br_stub_mark_object_bad(br_stub_inode_ctx_t *ctx)
{
ctx->bad_object = _gf_true;
}
/* inode writeback helpers */
static inline void
__br_stub_mark_inode_dirty(br_stub_inode_ctx_t *ctx)
{
ctx->need_writeback |= I_DIRTY;
}
static inline void
__br_stub_mark_inode_synced(br_stub_inode_ctx_t *ctx)
{
ctx->need_writeback &= ~I_DIRTY;
}
static inline int
__br_stub_is_inode_dirty(br_stub_inode_ctx_t *ctx)
{
return (ctx->need_writeback & I_DIRTY);
}
/* inode mofification markers */
static inline void
__br_stub_set_inode_modified(br_stub_inode_ctx_t *ctx)
{
ctx->need_writeback |= I_MODIFIED;
}
static inline void
__br_stub_unset_inode_modified(br_stub_inode_ctx_t *ctx)
{
ctx->need_writeback &= ~I_MODIFIED;
}
static inline int
__br_stub_is_inode_modified(br_stub_inode_ctx_t *ctx)
{
return (ctx->need_writeback & I_MODIFIED);
}
static inline int
br_stub_require_release_call(xlator_t *this, fd_t *fd, br_stub_fd_t **fd_ctx)
{
int32_t ret = 0;
br_stub_fd_t *br_stub_fd = NULL;
br_stub_fd = br_stub_fd_new();
if (!br_stub_fd)
return -1;
br_stub_fd->fd = fd;
INIT_LIST_HEAD(&br_stub_fd->list);
ret = br_stub_fd_ctx_set(this, fd, br_stub_fd);
if (ret)
gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SET_CONTEXT_FAILED,
"could not set fd context (for release callback");
else
*fd_ctx = br_stub_fd;
return ret;
}
/* get/set inode context helpers */
static inline int
__br_stub_get_inode_ctx(xlator_t *this, inode_t *inode, uint64_t *ctx)
{
return __inode_ctx_get(inode, this, ctx);
}
static inline int
br_stub_get_inode_ctx(xlator_t *this, inode_t *inode, uint64_t *ctx)
{
int ret = -1;
LOCK(&inode->lock);
{
ret = __br_stub_get_inode_ctx(this, inode, ctx);
}
UNLOCK(&inode->lock);
return ret;
}
static inline int
br_stub_set_inode_ctx(xlator_t *this, inode_t *inode, br_stub_inode_ctx_t *ctx)
{
uint64_t ctx_addr = (uint64_t)(uintptr_t)ctx;
return inode_ctx_set(inode, this, &ctx_addr);
}
/* version get/set helpers */
static inline unsigned long
__br_stub_writeback_version(br_stub_inode_ctx_t *ctx)
{
return (ctx->currentversion + 1);
}
static inline void
__br_stub_set_ongoing_version(br_stub_inode_ctx_t *ctx, unsigned long version)
{
if (ctx->currentversion < version)
ctx->currentversion = version;
else
gf_msg("bit-rot-stub", GF_LOG_WARNING, 0, BRS_MSG_CHANGE_VERSION_FAILED,
"current version: %lu"
"new version: %lu",
ctx->currentversion, version);
}
static inline int
__br_stub_can_trigger_release(inode_t *inode, br_stub_inode_ctx_t *ctx,
unsigned long *version)
{
/**
* If the inode is modified, then it has to be dirty. An inode is
* marked dirty once version is increased. Its marked as modified
* when the modification call (write/truncate) which triggered
* the versioning is successful.
*/
if (__br_stub_is_inode_modified(ctx) && list_empty(&ctx->fd_list) &&
(ctx->info_sign != BR_SIGN_REOPEN_WAIT)) {
GF_ASSERT(__br_stub_is_inode_dirty(ctx) == 0);
if (version)
*version = htonl(ctx->currentversion);
return 1;
}
return 0;
}
static inline int32_t
br_stub_get_ongoing_version(xlator_t *this, inode_t *inode,
unsigned long *version)
{
int32_t ret = 0;
uint64_t ctx_addr = 0;
br_stub_inode_ctx_t *ctx = NULL;
LOCK(&inode->lock);
{
ret = __inode_ctx_get(inode, this, &ctx_addr);
if (ret < 0)
goto unblock;
ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
*version = ctx->currentversion;
}
unblock:
UNLOCK(&inode->lock);
return ret;
}
/**
* fetch the current version from inode and return the context.
* inode->lock should be held before invoking this as context
* *needs* to be valid in the caller.
*/
static inline br_stub_inode_ctx_t *
__br_stub_get_ongoing_version_ctx(xlator_t *this, inode_t *inode,
unsigned long *version)
{
int32_t ret = 0;
uint64_t ctx_addr = 0;
br_stub_inode_ctx_t *ctx = NULL;
ret = __inode_ctx_get(inode, this, &ctx_addr);
if (ret < 0)
return NULL;
ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
if (version)
*version = ctx->currentversion;
return ctx;
}
/* filter for xattr fetch */
static inline int
br_stub_is_internal_xattr(const char *name)
{
if (name && ((strncmp(name, BITROT_CURRENT_VERSION_KEY,
SLEN(BITROT_CURRENT_VERSION_KEY)) == 0) ||
(strncmp(name, BITROT_SIGNING_VERSION_KEY,
SLEN(BITROT_SIGNING_VERSION_KEY)) == 0)))
return 1;
return 0;
}
static inline void
br_stub_remove_vxattrs(dict_t *xattr, gf_boolean_t remove_bad_marker)
{
if (xattr) {
/*
* When a file is corrupted, bad-object should be
* set in the dict. But, other info such as version,
* signature etc should not be set. Hence the flag
* remove_bad_marker. The consumer should know whether
* to send the bad-object info in the dict or not.
*/
if (remove_bad_marker)
dict_del(xattr, BITROT_OBJECT_BAD_KEY);
dict_del(xattr, BITROT_CURRENT_VERSION_KEY);
dict_del(xattr, BITROT_SIGNING_VERSION_KEY);
dict_del(xattr, BITROT_SIGNING_XATTR_SIZE_KEY);
}
}
/**
* This function returns the below values for different situations
* 0 => as per the inode context object is not bad
* -1 => Failed to get the inode context itself
* -2 => As per the inode context object is bad
* Both -ve values means the fop which called this function is failed
* and error is returned upwards.
* In future if needed or more errors have to be handled, then those
* errors can be made into enums.
*/
static inline int
br_stub_is_bad_object(xlator_t *this, inode_t *inode)
{
int bad_object = 0;
gf_boolean_t tmp = _gf_false;
uint64_t ctx_addr = 0;
br_stub_inode_ctx_t *ctx = NULL;
int32_t ret = -1;
ret = br_stub_get_inode_ctx(this, inode, &ctx_addr);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
"failed to get the inode context for the inode %s",
uuid_utoa(inode->gfid));
bad_object = -1;
goto out;
}
ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
LOCK(&inode->lock);
{
tmp = __br_stub_is_bad_object(ctx);
if (tmp)
bad_object = -2;
}
UNLOCK(&inode->lock);
out:
return bad_object;
}
static inline int32_t
br_stub_mark_object_bad(xlator_t *this, inode_t *inode)
{
int32_t ret = -1;
uint64_t ctx_addr = 0;
br_stub_inode_ctx_t *ctx = NULL;
ret = br_stub_get_inode_ctx(this, inode, &ctx_addr);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
"failed to get the "
"inode context for the inode %s",
uuid_utoa(inode->gfid));
goto out;
}
ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
LOCK(&inode->lock);
{
__br_stub_mark_object_bad(ctx);
}
UNLOCK(&inode->lock);
out:
return ret;
}
/**
* There is a possibility that dict_set might fail. The o/p of dict_set is
* given to the caller and the caller has to decide what to do.
*/
static inline int32_t
br_stub_mark_xdata_bad_object(xlator_t *this, inode_t *inode, dict_t *xdata)
{
int32_t ret = 0;
if (br_stub_is_bad_object(this, inode) == -2)
ret = dict_set_int32(xdata, GLUSTERFS_BAD_INODE, 1);
return ret;
}
int32_t
br_stub_add_fd_to_inode(xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx);
br_sign_state_t
__br_stub_inode_sign_state(br_stub_inode_ctx_t *ctx, glusterfs_fop_t fop,
fd_t *fd);
int
br_stub_dir_create(xlator_t *this, br_stub_private_t *priv);
int
br_stub_add(xlator_t *this, uuid_t gfid);
int32_t
br_stub_create_stub_gfid(xlator_t *this, char *stub_gfid_path, uuid_t gfid);
int
br_stub_dir_create(xlator_t *this, br_stub_private_t *priv);
call_stub_t *
__br_stub_dequeue(struct list_head *callstubs);
void
__br_stub_enqueue(struct list_head *callstubs, call_stub_t *stub);
void
br_stub_worker_enqueue(xlator_t *this, call_stub_t *stub);
void *
br_stub_worker(void *data);
int32_t
br_stub_lookup_wrapper(call_frame_t *frame, xlator_t *this, loc_t *loc,
dict_t *xattr_req);
int32_t
br_stub_readdir_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd,
size_t size, off_t off, dict_t *xdata);
int
br_stub_del(xlator_t *this, uuid_t gfid);
int
br_stub_bad_objects_path(xlator_t *this, fd_t *fd, gf_dirent_t *entries,
dict_t **dict);
void
br_stub_entry_xattr_fill(xlator_t *this, char *hpath, gf_dirent_t *entry,
dict_t *dict);
int
br_stub_get_path_of_gfid(xlator_t *this, inode_t *parent, inode_t *inode,
uuid_t gfid, char **path);
#endif /* __BIT_ROT_STUB_H__ */