/*
Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
General Public License, version 3 or any later version (LGPLv3 or
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
#ifndef __BIT_ROT_H__
#define __BIT_ROT_H__
#include <glusterfs/glusterfs.h>
#include <glusterfs/logging.h>
#include <glusterfs/dict.h>
#include <glusterfs/xlator.h>
#include <glusterfs/defaults.h>
#include <glusterfs/syncop.h>
#include <glusterfs/syncop-utils.h>
#include "changelog.h"
#include "timer-wheel.h"
#include <glusterfs/throttle-tbf.h>
#include "bit-rot-ssm.h"
#include "bit-rot-common.h"
#include "bit-rot-stub-mem-types.h"
#include "bit-rot-scrub-status.h"
#include <openssl/sha.h>
typedef enum scrub_throttle {
BR_SCRUB_THROTTLE_VOID = -1,
BR_SCRUB_THROTTLE_LAZY = 0,
BR_SCRUB_THROTTLE_NORMAL = 1,
BR_SCRUB_THROTTLE_AGGRESSIVE = 2,
BR_SCRUB_THROTTLE_STALLED = 3,
} scrub_throttle_t;
typedef enum scrub_freq {
BR_FSSCRUB_FREQ_HOURLY = 1,
BR_FSSCRUB_FREQ_DAILY,
BR_FSSCRUB_FREQ_WEEKLY,
BR_FSSCRUB_FREQ_BIWEEKLY,
BR_FSSCRUB_FREQ_MONTHLY,
BR_FSSCRUB_FREQ_MINUTE,
BR_FSSCRUB_FREQ_STALLED,
} scrub_freq_t;
#define signature_size(hl) (sizeof(br_isignature_t) + hl + 1)
struct br_scanfs {
gf_lock_t entrylock;
pthread_mutex_t waitlock;
pthread_cond_t waitcond;
unsigned int entries;
struct list_head queued;
struct list_head ready;
};
/* just need three states to track child status */
typedef enum br_child_state {
BR_CHILD_STATE_CONNECTED = 1,
BR_CHILD_STATE_INITIALIZING,
BR_CHILD_STATE_CONNFAILED,
BR_CHILD_STATE_DISCONNECTED,
} br_child_state_t;
struct br_child {
pthread_mutex_t lock; /* protects child state */
char witnessed; /* witnessed at least one successful
connection */
br_child_state_t c_state; /* current state of this child */
char child_up; /* Indicates whether this child is
up or not */
xlator_t *xl; /* client xlator corresponding to
this child */
inode_table_t *table; /* inode table for this child */
char brick_path[PATH_MAX]; /* brick export directory of this
child */
struct list_head list; /* hook to attach to the list of
UP children */
xlator_t *this; /* Bit rot xlator */
pthread_t thread; /* initial crawler for unsigned
object(s) or scrub crawler */
int threadrunning; /* active thread */
struct mem_pool *timer_pool; /* timer-wheel's timer mem-pool */
struct timeval tv;
struct br_scanfs fsscan; /* per subvolume FS scanner */
gf_boolean_t active_scrubbing; /* Actively scrubbing or not */
};
typedef struct br_child br_child_t;
struct br_obj_n_workers {
struct list_head objects; /* queue of objects expired from the
timer wheel and ready to be picked
up for signing */
pthread_t *workers; /* Threads which pick up the objects
from the above queue and start
signing each object */
};
struct br_scrubber {
xlator_t *this;
scrub_throttle_t throttle;
/**
* frequency of scanning for this subvolume. this should
* normally be per-child, but since all children follow the
* same frequency for a volume, this option ends up here
* instead of br_child_t.
*/
scrub_freq_t frequency;
gf_boolean_t frequency_reconf;
gf_boolean_t throttle_reconf;
pthread_mutex_t mutex;
pthread_cond_t cond;
unsigned int nr_scrubbers;
struct list_head scrubbers;
/**
* list of "rotatable" subvolume(s) undergoing scrubbing
*/
struct list_head scrublist;
};
struct br_monitor {
gf_lock_t lock;
pthread_t thread; /* Monitor thread */
gf_boolean_t inited;
pthread_mutex_t mutex;
pthread_cond_t cond; /* Thread starts and will be waiting on cond.
First child which is up wakes this up */
xlator_t *this;
/* scheduler */
uint32_t boot;
int32_t active_child_count; /* Number of children currently scrubbing */
gf_boolean_t kick; /* This variable tracks the scrubber is
* kicked or not. Both 'kick' and
* 'active_child_count' uses the same pair
* of mutex-cond variable, i.e, wakelock and
* wakecond. */
pthread_mutex_t wakelock;
pthread_cond_t wakecond;
gf_boolean_t done;
pthread_mutex_t donelock;
pthread_cond_t donecond;
struct gf_tw_timer_list *timer;
br_scrub_state_t state; /* current scrub state */
};
typedef struct br_obj_n_workers br_obj_n_workers_t;
typedef struct br_private br_private_t;
typedef void (*br_scrubbed_file_update)(br_private_t *priv);
struct br_private {
pthread_mutex_t lock;
struct list_head bricks; /* list of bricks from which enents
have been received */
struct list_head signing;
pthread_cond_t object_cond; /* handling signing of objects */
int child_count;
br_child_t *children; /* list of subvolumes */
int up_children;
pthread_cond_t cond; /* handling CHILD_UP notifications */
pthread_t thread; /* thread for connecting each UP
child with changelog */
struct tvec_base *timer_wheel; /* timer wheel where the objects which
changelog has sent sits and waits
for expiry */
br_obj_n_workers_t *obj_queue; /* place holder for all the objects
that are expired from timer wheel
and ready to be picked up for
signing and the workers which sign
the objects */
uint32_t expiry_time; /* objects "wait" time */
uint32_t signer_th_count; /* Number of signing process threads */
tbf_t *tbf; /* token bucket filter */
gf_boolean_t iamscrubber; /* function as a fs scrubber */
struct br_scrub_stats scrub_stat; /* statistics of scrub*/
struct br_scrubber fsscrub; /* scrubbers for this subvolume */
struct br_monitor scrub_monitor; /* scrubber monitor */
};
struct br_object {
xlator_t *this;
uuid_t gfid;
unsigned long signedversion; /* version against which this object will
be signed */
br_child_t *child; /* object's subvolume */
int sign_info;
struct list_head list; /* hook to add to the queue once the
object is expired from timer wheel */
void *data;
};
typedef struct br_object br_object_t;
typedef int32_t(br_scrub_ssm_call)(xlator_t *);
void
br_log_object(xlator_t *, char *, uuid_t, int32_t);
void
br_log_object_path(xlator_t *, char *, const char *, int32_t);
int32_t
br_calculate_obj_checksum(unsigned char *, br_child_t *, fd_t *, struct iatt *);
int32_t
br_prepare_loc(xlator_t *, br_child_t *, loc_t *, gf_dirent_t *, loc_t *);
gf_boolean_t
bitd_is_bad_file(xlator_t *, br_child_t *, loc_t *, fd_t *);
static inline void
_br_set_child_state(br_child_t *child, br_child_state_t state)
{
child->c_state = state;
}
static inline int
_br_is_child_connected(br_child_t *child)
{
return (child->c_state == BR_CHILD_STATE_CONNECTED);
}
static inline int
_br_is_child_scrub_active(br_child_t *child)
{
return child->active_scrubbing;
}
static inline int
_br_child_failed_conn(br_child_t *child)
{
return (child->c_state == BR_CHILD_STATE_CONNFAILED);
}
static inline int
_br_child_witnessed_connection(br_child_t *child)
{
return (child->witnessed == 1);
}
/* scrub state */
static inline void
_br_monitor_set_scrub_state(struct br_monitor *scrub_monitor,
br_scrub_state_t state)
{
scrub_monitor->state = state;
}
static inline br_scrub_event_t
_br_child_get_scrub_event(struct br_scrubber *fsscrub)
{
return (fsscrub->frequency == BR_FSSCRUB_FREQ_STALLED)
? BR_SCRUB_EVENT_PAUSE
: BR_SCRUB_EVENT_SCHEDULE;
}
int32_t
br_get_bad_objects_list(xlator_t *this, dict_t **dict);
#endif /* __BIT_ROT_H__ */