/*
Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
General Public License, version 3 or any later version (LGPLv3 or
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
#ifdef __NetBSD__
#define _KMEMUSER
#endif
#if defined(GF_SOLARIS_HOST_OS)
#include <sys/procfs.h>
#elif defined(__FreeBSD__)
#include <sys/types.h>
#include <libutil.h>
#elif defined(CTL_KERN)
#include <sys/sysctl.h>
#endif
#include <pwd.h>
#include "fuse-bridge.h"
static void
fuse_resolve_wipe(fuse_resolve_t *resolve)
{
GF_FREE((void *)resolve->path);
GF_FREE((void *)resolve->bname);
if (resolve->fd)
fd_unref(resolve->fd);
loc_wipe(&resolve->resolve_loc);
if (resolve->hint) {
inode_unref(resolve->hint);
resolve->hint = 0;
}
if (resolve->parhint) {
inode_unref(resolve->parhint);
resolve->parhint = 0;
}
}
void
free_fuse_state(fuse_state_t *state)
{
xlator_t *this = NULL;
fuse_private_t *priv = NULL;
uint64_t winds = 0;
char switched = 0;
this = state->this;
priv = this->private;
loc_wipe(&state->loc);
loc_wipe(&state->loc2);
if (state->xdata) {
dict_unref(state->xdata);
state->xdata = (void *)0xaaaaeeee;
}
if (state->xattr)
dict_unref(state->xattr);
if (state->name) {
GF_FREE(state->name);
state->name = NULL;
}
if (state->fd) {
fd_unref(state->fd);
state->fd = (void *)0xfdfdfdfd;
}
if (state->finh) {
GF_FREE(state->finh);
state->finh = NULL;
}
fuse_resolve_wipe(&state->resolve);
fuse_resolve_wipe(&state->resolve2);
pthread_mutex_lock(&priv->sync_mutex);
{
winds = --state->active_subvol->winds;
switched = state->active_subvol->switched;
}
pthread_mutex_unlock(&priv->sync_mutex);
if ((winds == 0) && (switched)) {
xlator_notify(state->active_subvol, GF_EVENT_PARENT_DOWN,
state->active_subvol, NULL);
}
#ifdef DEBUG
memset(state, 0x90, sizeof(*state));
#endif
GF_FREE(state);
state = NULL;
}
fuse_state_t *
get_fuse_state(xlator_t *this, fuse_in_header_t *finh)
{
fuse_state_t *state = NULL;
xlator_t *active_subvol = NULL;
fuse_private_t *priv = NULL;
state = (void *)GF_CALLOC(1, sizeof(*state), gf_fuse_mt_fuse_state_t);
if (!state)
return NULL;
state->this = THIS;
priv = this->private;
pthread_mutex_lock(&priv->sync_mutex);
{
while (priv->handle_graph_switch)
pthread_cond_wait(&priv->migrate_cond, &priv->sync_mutex);
active_subvol = fuse_active_subvol(state->this);
active_subvol->winds++;
}
pthread_mutex_unlock(&priv->sync_mutex);
state->active_subvol = active_subvol;
state->itable = active_subvol->itable;
state->pool = this->ctx->pool;
state->finh = finh;
state->this = this;
LOCK_INIT(&state->lock);
return state;
}
#define FUSE_MAX_AUX_GROUPS \
32 /* We can get only up to 32 aux groups from /proc */
void
frame_fill_groups(call_frame_t *frame)
{
#if defined(GF_LINUX_HOST_OS)
xlator_t *this = frame->this;
fuse_private_t *priv = this->private;
char filename[32];
char line[4096];
char *ptr = NULL;
FILE *fp = NULL;
int idx = 0;
long int id = 0;
char *saveptr = NULL;
char *endptr = NULL;
int ret = 0;
int ngroups = 0;
gid_t *mygroups = NULL;
if (priv->resolve_gids) {
struct passwd pwent;
char mystrs[1024];
struct passwd *result;
if (getpwuid_r(frame->root->uid, &pwent, mystrs, sizeof(mystrs),
&result) != 0) {
gf_log(this->name, GF_LOG_ERROR,
"getpwuid_r(%u) "
"failed",
frame->root->uid);
return;
}
if (result == 0) {
gf_log(this->name, GF_LOG_ERROR,
"getpwuid_r(%u): "
"no matching record",
frame->root->uid);
return;
}
ngroups = gf_getgrouplist(result->pw_name, frame->root->gid, &mygroups);
if (ngroups == -1) {
gf_log(this->name, GF_LOG_ERROR,
"could not map %s to "
"group list (ngroups %d, max %d)",
result->pw_name, ngroups, GF_MAX_AUX_GROUPS);
return;
}
call_stack_set_groups(frame->root, ngroups, &mygroups);
} else {
ret = snprintf(filename, sizeof filename, "/proc/%d/status",
frame->root->pid);
if (ret >= sizeof filename)
goto out;
fp = fopen(filename, "r");
if (!fp)
goto out;
if (call_stack_alloc_groups(frame->root, ngroups) != 0)
goto out;
while ((ptr = fgets(line, sizeof line, fp))) {
if (strncmp(ptr, "Groups:", 7) != 0)
continue;
ptr = line + 8;
for (ptr = strtok_r(ptr, " \t\r\n", &saveptr); ptr;
ptr = strtok_r(NULL, " \t\r\n", &saveptr)) {
errno = 0;
id = strtol(ptr, &endptr, 0);
if (errno == ERANGE)
break;
if (!endptr || *endptr)
break;
frame->root->groups[idx++] = id;
if (idx == FUSE_MAX_AUX_GROUPS)
break;
}
frame->root->ngrps = idx;
break;
}
}
out:
if (fp)
fclose(fp);
#elif defined(GF_SOLARIS_HOST_OS)
char filename[32];
char scratch[128];
prcred_t *prcred = (prcred_t *)scratch;
FILE *fp = NULL;
int ret = 0;
int ngrps;
ret = snprintf(filename, sizeof filename, "/proc/%d/cred",
frame->root->pid);
if (ret < sizeof filename) {
fp = fopen(filename, "r");
if (fp != NULL) {
if (fgets(scratch, sizeof scratch, fp) != NULL) {
ngrps = MIN(prcred->pr_ngroups, FUSE_MAX_AUX_GROUPS);
if (call_stack_alloc_groups(frame->root, ngrps) != 0) {
fclose(fp);
return;
}
}
fclose(fp);
}
}
#elif defined(CTL_KERN) /* DARWIN and *BSD */
/*
N.B. CTL_KERN is an enum on Linux. (Meaning, if it's not
obvious, that it's not subject to preprocessor directives
like '#if defined'.)
Unlike Linux, on Mac OS and the BSDs it is a #define. We
could test to see that KERN_PROC is defined, but, barring any
evidence to the contrary, I think that's overkill.
We might also test that GF_DARWIN_HOST_OS is defined, why
limit this to just Mac OS. It's equally valid for the BSDs
and we do have people building on NetBSD and FreeBSD.
*/
int name[] = {CTL_KERN, KERN_PROC, KERN_PROC_PID, frame->root->pid};
size_t namelen = sizeof name / sizeof name[0];
struct kinfo_proc kp;
size_t kplen = sizeof(kp);
int i, ngroups;
if (sysctl(name, namelen, &kp, &kplen, NULL, 0) != 0)
return;
ngroups = MIN(kp.kp_eproc.e_ucred.cr_ngroups, NGROUPS_MAX);
if (call_stack_alloc_groups(frame->root, ngroups) != 0)
return;
for (i = 0; i < ngroups; i++)
frame->root->groups[i] = kp.kp_eproc.e_ucred.cr_groups[i];
frame->root->ngrps = ngroups;
#else
frame->root->ngrps = 0;
#endif /* GF_LINUX_HOST_OS */
}
/*
* Get the groups for the PID associated with this frame. If enabled,
* use the gid cache to reduce group list collection.
*/
static void
get_groups(fuse_private_t *priv, call_frame_t *frame)
{
int i;
const gid_list_t *gl;
gid_list_t agl;
if (!priv || !priv->gid_cache_timeout) {
frame_fill_groups(frame);
return;
}
if (-1 == priv->gid_cache_timeout) {
frame->root->ngrps = 0;
return;
}
gl = gid_cache_lookup(&priv->gid_cache, frame->root->pid, frame->root->uid,
frame->root->gid);
if (gl) {
if (call_stack_alloc_groups(frame->root, gl->gl_count) != 0)
return;
frame->root->ngrps = gl->gl_count;
for (i = 0; i < gl->gl_count; i++)
frame->root->groups[i] = gl->gl_list[i];
gid_cache_release(&priv->gid_cache, gl);
return;
}
frame_fill_groups(frame);
agl.gl_id = frame->root->pid;
agl.gl_uid = frame->root->uid;
agl.gl_gid = frame->root->gid;
agl.gl_count = frame->root->ngrps;
agl.gl_list = GF_CALLOC(frame->root->ngrps, sizeof(gid_t),
gf_fuse_mt_gids_t);
if (!agl.gl_list)
return;
for (i = 0; i < frame->root->ngrps; i++)
agl.gl_list[i] = frame->root->groups[i];
if (gid_cache_add(&priv->gid_cache, &agl) != 1)
GF_FREE(agl.gl_list);
}
call_frame_t *
get_call_frame_for_req(fuse_state_t *state)
{
call_pool_t *pool = NULL;
fuse_in_header_t *finh = NULL;
call_frame_t *frame = NULL;
xlator_t *this = NULL;
fuse_private_t *priv = NULL;
pool = state->pool;
finh = state->finh;
this = state->this;
priv = this->private;
frame = create_frame(this, pool);
if (!frame)
return NULL;
if (finh) {
frame->root->uid = finh->uid;
frame->root->gid = finh->gid;
frame->root->pid = finh->pid;
set_lk_owner_from_uint64(&frame->root->lk_owner, state->lk_owner);
}
get_groups(priv, frame);
if (priv && priv->client_pid_set)
frame->root->pid = priv->client_pid;
frame->root->type = GF_OP_TYPE_FOP;
return frame;
}
inode_t *
fuse_ino_to_inode(uint64_t ino, xlator_t *fuse)
{
inode_t *inode = NULL;
xlator_t *active_subvol = NULL;
if (ino == 1) {
active_subvol = fuse_active_subvol(fuse);
if (active_subvol)
inode = active_subvol->itable->root;
} else {
inode = (inode_t *)(unsigned long)ino;
inode_ref(inode);
}
return inode;
}
uint64_t
inode_to_fuse_nodeid(inode_t *inode)
{
if (!inode)
return 0;
if (__is_root_gfid(inode->gfid))
return 1;
return (unsigned long)inode;
}
GF_MUST_CHECK int32_t
fuse_loc_fill(loc_t *loc, fuse_state_t *state, ino_t ino, ino_t par,
const char *name)
{
inode_t *inode = NULL;
inode_t *parent = NULL;
int32_t ret = -1;
char *path = NULL;
uuid_t null_gfid = {
0,
};
/* resistance against multiple invocation of loc_fill not to get
reference leaks via inode_search() */
if (name) {
parent = loc->parent;
if (!parent) {
parent = fuse_ino_to_inode(par, state->this);
loc->parent = parent;
if (parent)
gf_uuid_copy(loc->pargfid, parent->gfid);
}
inode = loc->inode;
if (!inode && parent) {
inode = inode_grep(parent->table, parent, name);
loc->inode = inode;
}
ret = inode_path(parent, name, &path);
if (ret <= 0) {
gf_log("glusterfs-fuse", GF_LOG_DEBUG,
"inode_path failed for %s/%s",
(parent) ? uuid_utoa(parent->gfid) : "0", name);
goto fail;
}
loc->path = path;
} else {
inode = loc->inode;
if (!inode) {
inode = fuse_ino_to_inode(ino, state->this);
loc->inode = inode;
if (inode)
gf_uuid_copy(loc->gfid, inode->gfid);
}
parent = loc->parent;
if (!parent) {
parent = inode_parent(inode, null_gfid, NULL);
loc->parent = parent;
if (parent)
gf_uuid_copy(loc->pargfid, parent->gfid);
}
ret = inode_path(inode, NULL, &path);
if (ret <= 0) {
gf_log("glusterfs-fuse", GF_LOG_DEBUG, "inode_path failed for %s",
(inode) ? uuid_utoa(inode->gfid) : "0");
goto fail;
}
loc->path = path;
}
if (loc->path) {
loc->name = strrchr(loc->path, '/');
if (loc->name)
loc->name++;
else
loc->name = "";
}
if ((ino != 1) && (parent == NULL)) {
gf_log("fuse-bridge", GF_LOG_DEBUG,
"failed to search parent for %" PRId64 "/%s (%" PRId64 ")",
(ino_t)par, name, (ino_t)ino);
ret = -1;
goto fail;
}
ret = 0;
fail:
/* this should not happen as inode_path returns -1 when buf is NULL
for sure */
if (path && !loc->path)
GF_FREE(path);
return ret;
}
/* courtesy of folly */
void
gf_fuse_stat2attr(struct iatt *st, struct fuse_attr *fa,
gf_boolean_t enable_ino32)
{
if (enable_ino32)
fa->ino = GF_FUSE_SQUASH_INO(st->ia_ino);
else
fa->ino = st->ia_ino;
fa->size = st->ia_size;
fa->blocks = st->ia_blocks;
fa->atime = st->ia_atime;
fa->mtime = st->ia_mtime;
fa->ctime = st->ia_ctime;
fa->atimensec = st->ia_atime_nsec;
fa->mtimensec = st->ia_mtime_nsec;
fa->ctimensec = st->ia_ctime_nsec;
fa->mode = st_mode_from_ia(st->ia_prot, st->ia_type);
fa->nlink = st->ia_nlink;
fa->uid = st->ia_uid;
fa->gid = st->ia_gid;
fa->rdev = makedev(ia_major(st->ia_rdev), ia_minor(st->ia_rdev));
#if FUSE_KERNEL_MINOR_VERSION >= 9
fa->blksize = st->ia_blksize;
#endif
#ifdef GF_DARWIN_HOST_OS
fa->crtime = (uint64_t)-1;
fa->crtimensec = (uint32_t)-1;
fa->flags = 0;
#endif
}
void
gf_fuse_fill_dirent(gf_dirent_t *entry, struct fuse_dirent *fde,
gf_boolean_t enable_ino32)
{
if (enable_ino32)
fde->ino = GF_FUSE_SQUASH_INO(entry->d_ino);
else
fde->ino = entry->d_ino;
fde->off = entry->d_off;
fde->type = entry->d_type;
fde->namelen = strlen(entry->d_name);
(void)memcpy(fde->name, entry->d_name, fde->namelen);
}
static int
fuse_do_flip_xattr_ns(char *okey, const char *nns, char **nkey)
{
int ret = 0;
char *key = NULL;
okey = strchr(okey, '.');
GF_ASSERT(okey);
int key_len = strlen(nns) + strlen(okey);
key = GF_MALLOC(key_len + 1, gf_common_mt_char);
if (!key) {
ret = -1;
goto out;
}
strcpy(key, nns);
strcat(key, okey);
*nkey = key;
out:
return ret;
}
static int
fuse_xattr_alloc_default(char *okey, char **nkey)
{
int ret = 0;
*nkey = gf_strdup(okey);
if (!*nkey)
ret = -1;
return ret;
}
#define PRIV_XA_NS "trusted"
#define UNPRIV_XA_NS "system"
int
fuse_flip_xattr_ns(fuse_private_t *priv, char *okey, char **nkey)
{
int ret = 0;
gf_boolean_t need_flip = _gf_false;
switch (priv->client_pid) {
case GF_CLIENT_PID_GSYNCD:
/* valid xattr(s): *xtime, volume-mark* */
gf_log("glusterfs-fuse", GF_LOG_DEBUG,
"PID: %d, checking xattr(s): "
"volume-mark*, *xtime",
priv->client_pid);
if ((strcmp(okey, UNPRIV_XA_NS ".glusterfs.volume-mark") == 0) ||
(fnmatch(UNPRIV_XA_NS ".glusterfs.volume-mark.*", okey,
FNM_PERIOD) == 0) ||
(fnmatch(UNPRIV_XA_NS ".glusterfs.*.xtime", okey, FNM_PERIOD) ==
0))
need_flip = _gf_true;
break;
case GF_CLIENT_PID_HADOOP:
/* valid xattr(s): pathinfo */
gf_log("glusterfs-fuse", GF_LOG_DEBUG,
"PID: %d, checking xattr(s): "
"pathinfo",
priv->client_pid);
if (strcmp(okey, UNPRIV_XA_NS ".glusterfs.pathinfo") == 0)
need_flip = _gf_true;
break;
}
if (need_flip) {
gf_log("glusterfs-fuse", GF_LOG_DEBUG,
"flipping %s to " PRIV_XA_NS " equivalent", okey);
ret = fuse_do_flip_xattr_ns(okey, PRIV_XA_NS, nkey);
} else {
/* if we cannot match, continue with what we got */
ret = fuse_xattr_alloc_default(okey, nkey);
}
return ret;
}
int
fuse_ignore_xattr_set(fuse_private_t *priv, char *key)
{
int ret = 0;
/* don't mess with user namespace */
if (fnmatch("user.*", key, FNM_PERIOD) == 0)
goto out;
if (priv->client_pid != GF_CLIENT_PID_GSYNCD)
goto out;
/* trusted NS check */
if (!((fnmatch("*.glusterfs.*.xtime", key, FNM_PERIOD) == 0) ||
(fnmatch("*.glusterfs.volume-mark", key, FNM_PERIOD) == 0) ||
(fnmatch("*.glusterfs.volume-mark.*", key, FNM_PERIOD) == 0) ||
(fnmatch("system.posix_acl_access", key, FNM_PERIOD) == 0) ||
(fnmatch("glusterfs.gfid.newfile", key, FNM_PERIOD) == 0) ||
(fnmatch("*.glusterfs.shard.block-size", key, FNM_PERIOD) == 0) ||
(fnmatch("*.glusterfs.shard.file-size", key, FNM_PERIOD) == 0)))
ret = -1;
out:
gf_log("glusterfs-fuse", GF_LOG_DEBUG,
"%s setxattr: key [%s], "
" client pid [%d]",
(ret ? "disallowing" : "allowing"), key, priv->client_pid);
return ret;
}
int
fuse_check_selinux_cap_xattr(fuse_private_t *priv, char *name)
{
int ret = -1;
if (strcmp(name, "security.selinux") &&
strcmp(name, "security.capability")) {
/* if xattr name is not of interest, no validations needed */
ret = 0;
goto out;
}
if ((strcmp(name, "security.selinux") == 0) && (priv->selinux)) {
ret = 0;
}
if ((strcmp(name, "security.capability") == 0) &&
((priv->capability) || (priv->selinux))) {
ret = 0;
}
out:
return ret;
}