|
Packit |
4e8bc4 |
/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
|
Packit |
4e8bc4 |
#include "memcached.h"
|
|
Packit |
4e8bc4 |
#include "bipbuffer.h"
|
|
Packit |
4e8bc4 |
#include "slab_automove.h"
|
|
Packit |
4e8bc4 |
#ifdef EXTSTORE
|
|
Packit |
4e8bc4 |
#include "storage.h"
|
|
Packit |
4e8bc4 |
#include "slab_automove_extstore.h"
|
|
Packit |
4e8bc4 |
#endif
|
|
Packit |
4e8bc4 |
#include <sys/stat.h>
|
|
Packit |
4e8bc4 |
#include <sys/socket.h>
|
|
Packit |
4e8bc4 |
#include <sys/resource.h>
|
|
Packit |
4e8bc4 |
#include <fcntl.h>
|
|
Packit |
4e8bc4 |
#include <netinet/in.h>
|
|
Packit |
4e8bc4 |
#include <errno.h>
|
|
Packit |
4e8bc4 |
#include <stdlib.h>
|
|
Packit |
4e8bc4 |
#include <stdio.h>
|
|
Packit |
4e8bc4 |
#include <signal.h>
|
|
Packit |
4e8bc4 |
#include <string.h>
|
|
Packit |
4e8bc4 |
#include <time.h>
|
|
Packit |
4e8bc4 |
#include <assert.h>
|
|
Packit |
4e8bc4 |
#include <unistd.h>
|
|
Packit |
4e8bc4 |
#include <poll.h>
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* Forward Declarations */
|
|
Packit |
4e8bc4 |
static void item_link_q(item *it);
|
|
Packit |
4e8bc4 |
static void item_unlink_q(item *it);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
static unsigned int lru_type_map[4] = {HOT_LRU, WARM_LRU, COLD_LRU, TEMP_LRU};
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
#define LARGEST_ID POWER_LARGEST
|
|
Packit |
4e8bc4 |
typedef struct {
|
|
Packit |
4e8bc4 |
uint64_t evicted;
|
|
Packit |
4e8bc4 |
uint64_t evicted_nonzero;
|
|
Packit |
4e8bc4 |
uint64_t reclaimed;
|
|
Packit |
4e8bc4 |
uint64_t outofmemory;
|
|
Packit |
4e8bc4 |
uint64_t tailrepairs;
|
|
Packit |
4e8bc4 |
uint64_t expired_unfetched; /* items reclaimed but never touched */
|
|
Packit |
4e8bc4 |
uint64_t evicted_unfetched; /* items evicted but never touched */
|
|
Packit |
4e8bc4 |
uint64_t evicted_active; /* items evicted that should have been shuffled */
|
|
Packit |
4e8bc4 |
uint64_t crawler_reclaimed;
|
|
Packit |
4e8bc4 |
uint64_t crawler_items_checked;
|
|
Packit |
4e8bc4 |
uint64_t lrutail_reflocked;
|
|
Packit |
4e8bc4 |
uint64_t moves_to_cold;
|
|
Packit |
4e8bc4 |
uint64_t moves_to_warm;
|
|
Packit |
4e8bc4 |
uint64_t moves_within_lru;
|
|
Packit |
4e8bc4 |
uint64_t direct_reclaims;
|
|
Packit |
4e8bc4 |
uint64_t hits_to_hot;
|
|
Packit |
4e8bc4 |
uint64_t hits_to_warm;
|
|
Packit |
4e8bc4 |
uint64_t hits_to_cold;
|
|
Packit |
4e8bc4 |
uint64_t hits_to_temp;
|
|
Packit |
4e8bc4 |
uint64_t mem_requested;
|
|
Packit |
4e8bc4 |
rel_time_t evicted_time;
|
|
Packit |
4e8bc4 |
} itemstats_t;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
static item *heads[LARGEST_ID];
|
|
Packit |
4e8bc4 |
static item *tails[LARGEST_ID];
|
|
Packit |
4e8bc4 |
static itemstats_t itemstats[LARGEST_ID];
|
|
Packit |
4e8bc4 |
static unsigned int sizes[LARGEST_ID];
|
|
Packit |
4e8bc4 |
static uint64_t sizes_bytes[LARGEST_ID];
|
|
Packit |
4e8bc4 |
static unsigned int *stats_sizes_hist = NULL;
|
|
Packit |
4e8bc4 |
static uint64_t stats_sizes_cas_min = 0;
|
|
Packit |
4e8bc4 |
static int stats_sizes_buckets = 0;
|
|
Packit |
4e8bc4 |
static uint64_t cas_id = 0;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
static volatile int do_run_lru_maintainer_thread = 0;
|
|
Packit |
4e8bc4 |
static int lru_maintainer_initialized = 0;
|
|
Packit |
4e8bc4 |
static pthread_mutex_t lru_maintainer_lock = PTHREAD_MUTEX_INITIALIZER;
|
|
Packit |
4e8bc4 |
static pthread_mutex_t cas_id_lock = PTHREAD_MUTEX_INITIALIZER;
|
|
Packit |
4e8bc4 |
static pthread_mutex_t stats_sizes_lock = PTHREAD_MUTEX_INITIALIZER;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
void item_stats_reset(void) {
|
|
Packit |
4e8bc4 |
int i;
|
|
Packit |
4e8bc4 |
for (i = 0; i < LARGEST_ID; i++) {
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&lru_locks[i]);
|
|
Packit |
4e8bc4 |
memset(&itemstats[i], 0, sizeof(itemstats_t));
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&lru_locks[i]);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* called with class lru lock held */
|
|
Packit |
4e8bc4 |
void do_item_stats_add_crawl(const int i, const uint64_t reclaimed,
|
|
Packit |
4e8bc4 |
const uint64_t unfetched, const uint64_t checked) {
|
|
Packit |
4e8bc4 |
itemstats[i].crawler_reclaimed += reclaimed;
|
|
Packit |
4e8bc4 |
itemstats[i].expired_unfetched += unfetched;
|
|
Packit |
4e8bc4 |
itemstats[i].crawler_items_checked += checked;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
typedef struct _lru_bump_buf {
|
|
Packit |
4e8bc4 |
struct _lru_bump_buf *prev;
|
|
Packit |
4e8bc4 |
struct _lru_bump_buf *next;
|
|
Packit |
4e8bc4 |
pthread_mutex_t mutex;
|
|
Packit |
4e8bc4 |
bipbuf_t *buf;
|
|
Packit |
4e8bc4 |
uint64_t dropped;
|
|
Packit |
4e8bc4 |
} lru_bump_buf;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
typedef struct {
|
|
Packit |
4e8bc4 |
item *it;
|
|
Packit |
4e8bc4 |
uint32_t hv;
|
|
Packit |
4e8bc4 |
} lru_bump_entry;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
static lru_bump_buf *bump_buf_head = NULL;
|
|
Packit |
4e8bc4 |
static lru_bump_buf *bump_buf_tail = NULL;
|
|
Packit |
4e8bc4 |
static pthread_mutex_t bump_buf_lock = PTHREAD_MUTEX_INITIALIZER;
|
|
Packit |
4e8bc4 |
/* TODO: tunable? Need bench results */
|
|
Packit |
4e8bc4 |
#define LRU_BUMP_BUF_SIZE 8192
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
static bool lru_bump_async(lru_bump_buf *b, item *it, uint32_t hv);
|
|
Packit |
4e8bc4 |
static uint64_t lru_total_bumps_dropped(void);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* Get the next CAS id for a new item. */
|
|
Packit |
4e8bc4 |
/* TODO: refactor some atomics for this. */
|
|
Packit |
4e8bc4 |
uint64_t get_cas_id(void) {
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&cas_id_lock);
|
|
Packit |
4e8bc4 |
uint64_t next_id = ++cas_id;
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&cas_id_lock);
|
|
Packit |
4e8bc4 |
return next_id;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
void set_cas_id(uint64_t new_cas) {
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&cas_id_lock);
|
|
Packit |
4e8bc4 |
cas_id = new_cas;
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&cas_id_lock);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
int item_is_flushed(item *it) {
|
|
Packit |
4e8bc4 |
rel_time_t oldest_live = settings.oldest_live;
|
|
Packit |
4e8bc4 |
uint64_t cas = ITEM_get_cas(it);
|
|
Packit |
4e8bc4 |
uint64_t oldest_cas = settings.oldest_cas;
|
|
Packit |
4e8bc4 |
if (oldest_live == 0 || oldest_live > current_time)
|
|
Packit |
4e8bc4 |
return 0;
|
|
Packit |
4e8bc4 |
if ((it->time <= oldest_live)
|
|
Packit |
4e8bc4 |
|| (oldest_cas != 0 && cas != 0 && cas < oldest_cas)) {
|
|
Packit |
4e8bc4 |
return 1;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
return 0;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* must be locked before call */
|
|
Packit |
4e8bc4 |
unsigned int do_get_lru_size(uint32_t id) {
|
|
Packit |
4e8bc4 |
return sizes[id];
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* Enable this for reference-count debugging. */
|
|
Packit |
4e8bc4 |
#if 0
|
|
Packit |
4e8bc4 |
# define DEBUG_REFCNT(it,op) \
|
|
Packit |
4e8bc4 |
fprintf(stderr, "item %x refcnt(%c) %d %c%c%c\n", \
|
|
Packit |
4e8bc4 |
it, op, it->refcount, \
|
|
Packit |
4e8bc4 |
(it->it_flags & ITEM_LINKED) ? 'L' : ' ', \
|
|
Packit |
4e8bc4 |
(it->it_flags & ITEM_SLABBED) ? 'S' : ' ')
|
|
Packit |
4e8bc4 |
#else
|
|
Packit |
4e8bc4 |
# define DEBUG_REFCNT(it,op) while(0)
|
|
Packit |
4e8bc4 |
#endif
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/**
|
|
Packit |
4e8bc4 |
* Generates the variable-sized part of the header for an object.
|
|
Packit |
4e8bc4 |
*
|
|
Packit |
4e8bc4 |
* nkey - The length of the key
|
|
Packit |
4e8bc4 |
* flags - key flags
|
|
Packit |
4e8bc4 |
* nbytes - Number of bytes to hold value and addition CRLF terminator
|
|
Packit |
4e8bc4 |
* suffix - Buffer for the "VALUE" line suffix (flags, size).
|
|
Packit |
4e8bc4 |
* nsuffix - The length of the suffix is stored here.
|
|
Packit |
4e8bc4 |
*
|
|
Packit |
4e8bc4 |
* Returns the total size of the header.
|
|
Packit |
4e8bc4 |
*/
|
|
Packit |
4e8bc4 |
static size_t item_make_header(const uint8_t nkey, const unsigned int flags, const int nbytes,
|
|
Packit |
4e8bc4 |
char *suffix, uint8_t *nsuffix) {
|
|
Packit |
4e8bc4 |
if (flags == 0) {
|
|
Packit |
4e8bc4 |
*nsuffix = 0;
|
|
Packit |
4e8bc4 |
} else {
|
|
Packit |
4e8bc4 |
*nsuffix = sizeof(flags);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
return sizeof(item) + nkey + *nsuffix + nbytes;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
item *do_item_alloc_pull(const size_t ntotal, const unsigned int id) {
|
|
Packit |
4e8bc4 |
item *it = NULL;
|
|
Packit |
4e8bc4 |
int i;
|
|
Packit |
4e8bc4 |
/* If no memory is available, attempt a direct LRU juggle/eviction */
|
|
Packit |
4e8bc4 |
/* This is a race in order to simplify lru_pull_tail; in cases where
|
|
Packit |
4e8bc4 |
* locked items are on the tail, you want them to fall out and cause
|
|
Packit |
4e8bc4 |
* occasional OOM's, rather than internally work around them.
|
|
Packit |
4e8bc4 |
* This also gives one fewer code path for slab alloc/free
|
|
Packit |
4e8bc4 |
*/
|
|
Packit |
4e8bc4 |
for (i = 0; i < 10; i++) {
|
|
Packit |
4e8bc4 |
/* Try to reclaim memory first */
|
|
Packit |
4e8bc4 |
if (!settings.lru_segmented) {
|
|
Packit |
4e8bc4 |
lru_pull_tail(id, COLD_LRU, 0, 0, 0, NULL);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
it = slabs_alloc(ntotal, id, 0);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
if (it == NULL) {
|
|
Packit |
4e8bc4 |
// We send '0' in for "total_bytes" as this routine is always
|
|
Packit |
4e8bc4 |
// pulling to evict, or forcing HOT -> COLD migration.
|
|
Packit |
4e8bc4 |
// As of this writing, total_bytes isn't at all used with COLD_LRU.
|
|
Packit |
4e8bc4 |
if (lru_pull_tail(id, COLD_LRU, 0, LRU_PULL_EVICT, 0, NULL) <= 0) {
|
|
Packit |
4e8bc4 |
if (settings.lru_segmented) {
|
|
Packit |
4e8bc4 |
lru_pull_tail(id, HOT_LRU, 0, 0, 0, NULL);
|
|
Packit |
4e8bc4 |
} else {
|
|
Packit |
4e8bc4 |
break;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
} else {
|
|
Packit |
4e8bc4 |
break;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
if (i > 0) {
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&lru_locks[id]);
|
|
Packit |
4e8bc4 |
itemstats[id].direct_reclaims += i;
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&lru_locks[id]);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
return it;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* Chain another chunk onto this chunk. */
|
|
Packit |
4e8bc4 |
/* slab mover: if it finds a chunk without ITEM_CHUNK flag, and no ITEM_LINKED
|
|
Packit |
4e8bc4 |
* flag, it counts as busy and skips.
|
|
Packit |
4e8bc4 |
* I think it might still not be safe to do linking outside of the slab lock
|
|
Packit |
4e8bc4 |
*/
|
|
Packit |
4e8bc4 |
item_chunk *do_item_alloc_chunk(item_chunk *ch, const size_t bytes_remain) {
|
|
Packit |
4e8bc4 |
// TODO: Should be a cleaner way of finding real size with slabber calls
|
|
Packit |
4e8bc4 |
size_t size = bytes_remain + sizeof(item_chunk);
|
|
Packit |
4e8bc4 |
if (size > settings.slab_chunk_size_max)
|
|
Packit |
4e8bc4 |
size = settings.slab_chunk_size_max;
|
|
Packit |
4e8bc4 |
unsigned int id = slabs_clsid(size);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
item_chunk *nch = (item_chunk *) do_item_alloc_pull(size, id);
|
|
Packit |
4e8bc4 |
if (nch == NULL)
|
|
Packit |
4e8bc4 |
return NULL;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
// link in.
|
|
Packit |
4e8bc4 |
// ITEM_CHUNK[ED] bits need to be protected by the slabs lock.
|
|
Packit |
4e8bc4 |
slabs_mlock();
|
|
Packit |
4e8bc4 |
nch->head = ch->head;
|
|
Packit |
4e8bc4 |
ch->next = nch;
|
|
Packit |
4e8bc4 |
nch->prev = ch;
|
|
Packit |
4e8bc4 |
nch->next = 0;
|
|
Packit |
4e8bc4 |
nch->used = 0;
|
|
Packit |
4e8bc4 |
nch->slabs_clsid = id;
|
|
Packit |
4e8bc4 |
nch->size = size - sizeof(item_chunk);
|
|
Packit |
4e8bc4 |
nch->it_flags |= ITEM_CHUNK;
|
|
Packit |
4e8bc4 |
slabs_munlock();
|
|
Packit |
4e8bc4 |
return nch;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
item *do_item_alloc(char *key, const size_t nkey, const unsigned int flags,
|
|
Packit |
4e8bc4 |
const rel_time_t exptime, const int nbytes) {
|
|
Packit |
4e8bc4 |
uint8_t nsuffix;
|
|
Packit |
4e8bc4 |
item *it = NULL;
|
|
Packit |
4e8bc4 |
char suffix[40];
|
|
Packit |
4e8bc4 |
// Avoid potential underflows.
|
|
Packit |
4e8bc4 |
if (nbytes < 2)
|
|
Packit |
4e8bc4 |
return 0;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix);
|
|
Packit |
4e8bc4 |
if (settings.use_cas) {
|
|
Packit |
4e8bc4 |
ntotal += sizeof(uint64_t);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
unsigned int id = slabs_clsid(ntotal);
|
|
Packit |
4e8bc4 |
unsigned int hdr_id = 0;
|
|
Packit |
4e8bc4 |
if (id == 0)
|
|
Packit |
4e8bc4 |
return 0;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* This is a large item. Allocate a header object now, lazily allocate
|
|
Packit |
4e8bc4 |
* chunks while reading the upload.
|
|
Packit |
4e8bc4 |
*/
|
|
Packit |
4e8bc4 |
if (ntotal > settings.slab_chunk_size_max) {
|
|
Packit |
4e8bc4 |
/* We still link this item into the LRU for the larger slab class, but
|
|
Packit |
4e8bc4 |
* we're pulling a header from an entirely different slab class. The
|
|
Packit |
4e8bc4 |
* free routines handle large items specifically.
|
|
Packit |
4e8bc4 |
*/
|
|
Packit |
4e8bc4 |
int htotal = nkey + 1 + nsuffix + sizeof(item) + sizeof(item_chunk);
|
|
Packit |
4e8bc4 |
if (settings.use_cas) {
|
|
Packit |
4e8bc4 |
htotal += sizeof(uint64_t);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
#ifdef NEED_ALIGN
|
|
Packit |
4e8bc4 |
// header chunk needs to be padded on some systems
|
|
Packit |
4e8bc4 |
int remain = htotal % 8;
|
|
Packit |
4e8bc4 |
if (remain != 0) {
|
|
Packit |
4e8bc4 |
htotal += 8 - remain;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
#endif
|
|
Packit |
4e8bc4 |
hdr_id = slabs_clsid(htotal);
|
|
Packit |
4e8bc4 |
it = do_item_alloc_pull(htotal, hdr_id);
|
|
Packit |
4e8bc4 |
/* setting ITEM_CHUNKED is fine here because we aren't LINKED yet. */
|
|
Packit |
4e8bc4 |
if (it != NULL)
|
|
Packit |
4e8bc4 |
it->it_flags |= ITEM_CHUNKED;
|
|
Packit |
4e8bc4 |
} else {
|
|
Packit |
4e8bc4 |
it = do_item_alloc_pull(ntotal, id);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
if (it == NULL) {
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&lru_locks[id]);
|
|
Packit |
4e8bc4 |
itemstats[id].outofmemory++;
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&lru_locks[id]);
|
|
Packit |
4e8bc4 |
return NULL;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
assert(it->it_flags == 0 || it->it_flags == ITEM_CHUNKED);
|
|
Packit |
4e8bc4 |
//assert(it != heads[id]);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* Refcount is seeded to 1 by slabs_alloc() */
|
|
Packit |
4e8bc4 |
it->next = it->prev = 0;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* Items are initially loaded into the HOT_LRU. This is '0' but I want at
|
|
Packit |
4e8bc4 |
* least a note here. Compiler (hopefully?) optimizes this out.
|
|
Packit |
4e8bc4 |
*/
|
|
Packit |
4e8bc4 |
if (settings.temp_lru &&
|
|
Packit |
4e8bc4 |
exptime - current_time <= settings.temporary_ttl) {
|
|
Packit |
4e8bc4 |
id |= TEMP_LRU;
|
|
Packit |
4e8bc4 |
} else if (settings.lru_segmented) {
|
|
Packit |
4e8bc4 |
id |= HOT_LRU;
|
|
Packit |
4e8bc4 |
} else {
|
|
Packit |
4e8bc4 |
/* There is only COLD in compat-mode */
|
|
Packit |
4e8bc4 |
id |= COLD_LRU;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
it->slabs_clsid = id;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
DEBUG_REFCNT(it, '*');
|
|
Packit |
4e8bc4 |
it->it_flags |= settings.use_cas ? ITEM_CAS : 0;
|
|
Packit |
4e8bc4 |
it->it_flags |= nsuffix != 0 ? ITEM_CFLAGS : 0;
|
|
Packit |
4e8bc4 |
it->nkey = nkey;
|
|
Packit |
4e8bc4 |
it->nbytes = nbytes;
|
|
Packit |
4e8bc4 |
memcpy(ITEM_key(it), key, nkey);
|
|
Packit |
4e8bc4 |
it->exptime = exptime;
|
|
Packit |
4e8bc4 |
if (nsuffix > 0) {
|
|
Packit |
4e8bc4 |
memcpy(ITEM_suffix(it), &flags, sizeof(flags));
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* Initialize internal chunk. */
|
|
Packit |
4e8bc4 |
if (it->it_flags & ITEM_CHUNKED) {
|
|
Packit |
4e8bc4 |
item_chunk *chunk = (item_chunk *) ITEM_schunk(it);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
chunk->next = 0;
|
|
Packit |
4e8bc4 |
chunk->prev = 0;
|
|
Packit |
4e8bc4 |
chunk->used = 0;
|
|
Packit |
4e8bc4 |
chunk->size = 0;
|
|
Packit |
4e8bc4 |
chunk->head = it;
|
|
Packit |
4e8bc4 |
chunk->orig_clsid = hdr_id;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
it->h_next = 0;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
return it;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
void item_free(item *it) {
|
|
Packit |
4e8bc4 |
size_t ntotal = ITEM_ntotal(it);
|
|
Packit |
4e8bc4 |
unsigned int clsid;
|
|
Packit |
4e8bc4 |
assert((it->it_flags & ITEM_LINKED) == 0);
|
|
Packit |
4e8bc4 |
assert(it != heads[it->slabs_clsid]);
|
|
Packit |
4e8bc4 |
assert(it != tails[it->slabs_clsid]);
|
|
Packit |
4e8bc4 |
assert(it->refcount == 0);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* so slab size changer can tell later if item is already free or not */
|
|
Packit |
4e8bc4 |
clsid = ITEM_clsid(it);
|
|
Packit |
4e8bc4 |
DEBUG_REFCNT(it, 'F');
|
|
Packit |
4e8bc4 |
slabs_free(it, ntotal, clsid);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/**
|
|
Packit |
4e8bc4 |
* Returns true if an item will fit in the cache (its size does not exceed
|
|
Packit |
4e8bc4 |
* the maximum for a cache entry.)
|
|
Packit |
4e8bc4 |
*/
|
|
Packit |
4e8bc4 |
bool item_size_ok(const size_t nkey, const int flags, const int nbytes) {
|
|
Packit |
4e8bc4 |
char prefix[40];
|
|
Packit |
4e8bc4 |
uint8_t nsuffix;
|
|
Packit |
4e8bc4 |
if (nbytes < 2)
|
|
Packit |
4e8bc4 |
return false;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
size_t ntotal = item_make_header(nkey + 1, flags, nbytes,
|
|
Packit |
4e8bc4 |
prefix, &nsuffix);
|
|
Packit |
4e8bc4 |
if (settings.use_cas) {
|
|
Packit |
4e8bc4 |
ntotal += sizeof(uint64_t);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
return slabs_clsid(ntotal) != 0;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* fixing stats/references during warm start */
|
|
Packit |
4e8bc4 |
void do_item_link_fixup(item *it) {
|
|
Packit |
4e8bc4 |
item **head, **tail;
|
|
Packit |
4e8bc4 |
int ntotal = ITEM_ntotal(it);
|
|
Packit |
4e8bc4 |
uint32_t hv = hash(ITEM_key(it), it->nkey);
|
|
Packit |
4e8bc4 |
assoc_insert(it, hv);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
head = &heads[it->slabs_clsid];
|
|
Packit |
4e8bc4 |
tail = &tails[it->slabs_clsid];
|
|
Packit |
4e8bc4 |
if (it->prev == 0 && *head == 0) *head = it;
|
|
Packit |
4e8bc4 |
if (it->next == 0 && *tail == 0) *tail = it;
|
|
Packit |
4e8bc4 |
sizes[it->slabs_clsid]++;
|
|
Packit |
4e8bc4 |
sizes_bytes[it->slabs_clsid] += ntotal;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
STATS_LOCK();
|
|
Packit |
4e8bc4 |
stats_state.curr_bytes += ntotal;
|
|
Packit |
4e8bc4 |
stats_state.curr_items += 1;
|
|
Packit |
4e8bc4 |
stats.total_items += 1;
|
|
Packit |
4e8bc4 |
STATS_UNLOCK();
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
item_stats_sizes_add(it);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
return;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
static void do_item_link_q(item *it) { /* item is the new head */
|
|
Packit |
4e8bc4 |
item **head, **tail;
|
|
Packit |
4e8bc4 |
assert((it->it_flags & ITEM_SLABBED) == 0);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
head = &heads[it->slabs_clsid];
|
|
Packit |
4e8bc4 |
tail = &tails[it->slabs_clsid];
|
|
Packit |
4e8bc4 |
assert(it != *head);
|
|
Packit |
4e8bc4 |
assert((*head && *tail) || (*head == 0 && *tail == 0));
|
|
Packit |
4e8bc4 |
it->prev = 0;
|
|
Packit |
4e8bc4 |
it->next = *head;
|
|
Packit |
4e8bc4 |
if (it->next) it->next->prev = it;
|
|
Packit |
4e8bc4 |
*head = it;
|
|
Packit |
4e8bc4 |
if (*tail == 0) *tail = it;
|
|
Packit |
4e8bc4 |
sizes[it->slabs_clsid]++;
|
|
Packit |
4e8bc4 |
#ifdef EXTSTORE
|
|
Packit |
4e8bc4 |
if (it->it_flags & ITEM_HDR) {
|
|
Packit |
4e8bc4 |
sizes_bytes[it->slabs_clsid] += (ITEM_ntotal(it) - it->nbytes) + sizeof(item_hdr);
|
|
Packit |
4e8bc4 |
} else {
|
|
Packit |
4e8bc4 |
sizes_bytes[it->slabs_clsid] += ITEM_ntotal(it);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
#else
|
|
Packit |
4e8bc4 |
sizes_bytes[it->slabs_clsid] += ITEM_ntotal(it);
|
|
Packit |
4e8bc4 |
#endif
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
return;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
static void item_link_q(item *it) {
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&lru_locks[it->slabs_clsid]);
|
|
Packit |
4e8bc4 |
do_item_link_q(it);
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&lru_locks[it->slabs_clsid]);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
static void item_link_q_warm(item *it) {
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&lru_locks[it->slabs_clsid]);
|
|
Packit |
4e8bc4 |
do_item_link_q(it);
|
|
Packit |
4e8bc4 |
itemstats[it->slabs_clsid].moves_to_warm++;
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&lru_locks[it->slabs_clsid]);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
static void do_item_unlink_q(item *it) {
|
|
Packit |
4e8bc4 |
item **head, **tail;
|
|
Packit |
4e8bc4 |
head = &heads[it->slabs_clsid];
|
|
Packit |
4e8bc4 |
tail = &tails[it->slabs_clsid];
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
if (*head == it) {
|
|
Packit |
4e8bc4 |
assert(it->prev == 0);
|
|
Packit |
4e8bc4 |
*head = it->next;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
if (*tail == it) {
|
|
Packit |
4e8bc4 |
assert(it->next == 0);
|
|
Packit |
4e8bc4 |
*tail = it->prev;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
assert(it->next != it);
|
|
Packit |
4e8bc4 |
assert(it->prev != it);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
if (it->next) it->next->prev = it->prev;
|
|
Packit |
4e8bc4 |
if (it->prev) it->prev->next = it->next;
|
|
Packit |
4e8bc4 |
sizes[it->slabs_clsid]--;
|
|
Packit |
4e8bc4 |
#ifdef EXTSTORE
|
|
Packit |
4e8bc4 |
if (it->it_flags & ITEM_HDR) {
|
|
Packit |
4e8bc4 |
sizes_bytes[it->slabs_clsid] -= (ITEM_ntotal(it) - it->nbytes) + sizeof(item_hdr);
|
|
Packit |
4e8bc4 |
} else {
|
|
Packit |
4e8bc4 |
sizes_bytes[it->slabs_clsid] -= ITEM_ntotal(it);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
#else
|
|
Packit |
4e8bc4 |
sizes_bytes[it->slabs_clsid] -= ITEM_ntotal(it);
|
|
Packit |
4e8bc4 |
#endif
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
return;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
static void item_unlink_q(item *it) {
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&lru_locks[it->slabs_clsid]);
|
|
Packit |
4e8bc4 |
do_item_unlink_q(it);
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&lru_locks[it->slabs_clsid]);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
int do_item_link(item *it, const uint32_t hv) {
|
|
Packit |
4e8bc4 |
MEMCACHED_ITEM_LINK(ITEM_key(it), it->nkey, it->nbytes);
|
|
Packit |
4e8bc4 |
assert((it->it_flags & (ITEM_LINKED|ITEM_SLABBED)) == 0);
|
|
Packit |
4e8bc4 |
it->it_flags |= ITEM_LINKED;
|
|
Packit |
4e8bc4 |
it->time = current_time;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
STATS_LOCK();
|
|
Packit |
4e8bc4 |
stats_state.curr_bytes += ITEM_ntotal(it);
|
|
Packit |
4e8bc4 |
stats_state.curr_items += 1;
|
|
Packit |
4e8bc4 |
stats.total_items += 1;
|
|
Packit |
4e8bc4 |
STATS_UNLOCK();
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* Allocate a new CAS ID on link. */
|
|
Packit |
4e8bc4 |
ITEM_set_cas(it, (settings.use_cas) ? get_cas_id() : 0);
|
|
Packit |
4e8bc4 |
assoc_insert(it, hv);
|
|
Packit |
4e8bc4 |
item_link_q(it);
|
|
Packit |
4e8bc4 |
refcount_incr(it);
|
|
Packit |
4e8bc4 |
item_stats_sizes_add(it);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
return 1;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
void do_item_unlink(item *it, const uint32_t hv) {
|
|
Packit |
4e8bc4 |
MEMCACHED_ITEM_UNLINK(ITEM_key(it), it->nkey, it->nbytes);
|
|
Packit |
4e8bc4 |
if ((it->it_flags & ITEM_LINKED) != 0) {
|
|
Packit |
4e8bc4 |
it->it_flags &= ~ITEM_LINKED;
|
|
Packit |
4e8bc4 |
STATS_LOCK();
|
|
Packit |
4e8bc4 |
stats_state.curr_bytes -= ITEM_ntotal(it);
|
|
Packit |
4e8bc4 |
stats_state.curr_items -= 1;
|
|
Packit |
4e8bc4 |
STATS_UNLOCK();
|
|
Packit |
4e8bc4 |
item_stats_sizes_remove(it);
|
|
Packit |
4e8bc4 |
assoc_delete(ITEM_key(it), it->nkey, hv);
|
|
Packit |
4e8bc4 |
item_unlink_q(it);
|
|
Packit |
4e8bc4 |
do_item_remove(it);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* FIXME: Is it necessary to keep this copy/pasted code? */
|
|
Packit |
4e8bc4 |
void do_item_unlink_nolock(item *it, const uint32_t hv) {
|
|
Packit |
4e8bc4 |
MEMCACHED_ITEM_UNLINK(ITEM_key(it), it->nkey, it->nbytes);
|
|
Packit |
4e8bc4 |
if ((it->it_flags & ITEM_LINKED) != 0) {
|
|
Packit |
4e8bc4 |
it->it_flags &= ~ITEM_LINKED;
|
|
Packit |
4e8bc4 |
STATS_LOCK();
|
|
Packit |
4e8bc4 |
stats_state.curr_bytes -= ITEM_ntotal(it);
|
|
Packit |
4e8bc4 |
stats_state.curr_items -= 1;
|
|
Packit |
4e8bc4 |
STATS_UNLOCK();
|
|
Packit |
4e8bc4 |
item_stats_sizes_remove(it);
|
|
Packit |
4e8bc4 |
assoc_delete(ITEM_key(it), it->nkey, hv);
|
|
Packit |
4e8bc4 |
do_item_unlink_q(it);
|
|
Packit |
4e8bc4 |
do_item_remove(it);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
void do_item_remove(item *it) {
|
|
Packit |
4e8bc4 |
MEMCACHED_ITEM_REMOVE(ITEM_key(it), it->nkey, it->nbytes);
|
|
Packit |
4e8bc4 |
assert((it->it_flags & ITEM_SLABBED) == 0);
|
|
Packit |
4e8bc4 |
assert(it->refcount > 0);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
if (refcount_decr(it) == 0) {
|
|
Packit |
4e8bc4 |
item_free(it);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* Bump the last accessed time, or relink if we're in compat mode */
|
|
Packit |
4e8bc4 |
void do_item_update(item *it) {
|
|
Packit |
4e8bc4 |
MEMCACHED_ITEM_UPDATE(ITEM_key(it), it->nkey, it->nbytes);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* Hits to COLD_LRU immediately move to WARM. */
|
|
Packit |
4e8bc4 |
if (settings.lru_segmented) {
|
|
Packit |
4e8bc4 |
assert((it->it_flags & ITEM_SLABBED) == 0);
|
|
Packit |
4e8bc4 |
if ((it->it_flags & ITEM_LINKED) != 0) {
|
|
Packit |
4e8bc4 |
if (ITEM_lruid(it) == COLD_LRU && (it->it_flags & ITEM_ACTIVE)) {
|
|
Packit |
4e8bc4 |
it->time = current_time;
|
|
Packit |
4e8bc4 |
item_unlink_q(it);
|
|
Packit |
4e8bc4 |
it->slabs_clsid = ITEM_clsid(it);
|
|
Packit |
4e8bc4 |
it->slabs_clsid |= WARM_LRU;
|
|
Packit |
4e8bc4 |
it->it_flags &= ~ITEM_ACTIVE;
|
|
Packit |
4e8bc4 |
item_link_q_warm(it);
|
|
Packit |
4e8bc4 |
} else {
|
|
Packit |
4e8bc4 |
it->time = current_time;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
} else if (it->time < current_time - ITEM_UPDATE_INTERVAL) {
|
|
Packit |
4e8bc4 |
assert((it->it_flags & ITEM_SLABBED) == 0);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
if ((it->it_flags & ITEM_LINKED) != 0) {
|
|
Packit |
4e8bc4 |
it->time = current_time;
|
|
Packit |
4e8bc4 |
item_unlink_q(it);
|
|
Packit |
4e8bc4 |
item_link_q(it);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
int do_item_replace(item *it, item *new_it, const uint32_t hv) {
|
|
Packit |
4e8bc4 |
MEMCACHED_ITEM_REPLACE(ITEM_key(it), it->nkey, it->nbytes,
|
|
Packit |
4e8bc4 |
ITEM_key(new_it), new_it->nkey, new_it->nbytes);
|
|
Packit |
4e8bc4 |
assert((it->it_flags & ITEM_SLABBED) == 0);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
do_item_unlink(it, hv);
|
|
Packit |
4e8bc4 |
return do_item_link(new_it, hv);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/*@null@*/
|
|
Packit |
4e8bc4 |
/* This is walking the line of violating lock order, but I think it's safe.
|
|
Packit |
4e8bc4 |
* If the LRU lock is held, an item in the LRU cannot be wiped and freed.
|
|
Packit |
4e8bc4 |
* The data could possibly be overwritten, but this is only accessing the
|
|
Packit |
4e8bc4 |
* headers.
|
|
Packit |
4e8bc4 |
* It may not be the best idea to leave it like this, but for now it's safe.
|
|
Packit |
4e8bc4 |
*/
|
|
Packit |
4e8bc4 |
char *item_cachedump(const unsigned int slabs_clsid, const unsigned int limit, unsigned int *bytes) {
|
|
Packit |
4e8bc4 |
unsigned int memlimit = 2 * 1024 * 1024; /* 2MB max response size */
|
|
Packit |
4e8bc4 |
char *buffer;
|
|
Packit |
4e8bc4 |
unsigned int bufcurr;
|
|
Packit |
4e8bc4 |
item *it;
|
|
Packit |
4e8bc4 |
unsigned int len;
|
|
Packit |
4e8bc4 |
unsigned int shown = 0;
|
|
Packit |
4e8bc4 |
char key_temp[KEY_MAX_LENGTH + 1];
|
|
Packit |
4e8bc4 |
char temp[512];
|
|
Packit |
4e8bc4 |
unsigned int id = slabs_clsid;
|
|
Packit |
4e8bc4 |
id |= COLD_LRU;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&lru_locks[id]);
|
|
Packit |
4e8bc4 |
it = heads[id];
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
buffer = malloc((size_t)memlimit);
|
|
Packit |
4e8bc4 |
if (buffer == 0) {
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&lru_locks[id]);
|
|
Packit |
4e8bc4 |
return NULL;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
bufcurr = 0;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
while (it != NULL && (limit == 0 || shown < limit)) {
|
|
Packit |
4e8bc4 |
assert(it->nkey <= KEY_MAX_LENGTH);
|
|
Packit |
4e8bc4 |
if (it->nbytes == 0 && it->nkey == 0) {
|
|
Packit |
4e8bc4 |
it = it->next;
|
|
Packit |
4e8bc4 |
continue;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
/* Copy the key since it may not be null-terminated in the struct */
|
|
Packit |
4e8bc4 |
strncpy(key_temp, ITEM_key(it), it->nkey);
|
|
Packit |
4e8bc4 |
key_temp[it->nkey] = 0x00; /* terminate */
|
|
Packit |
4e8bc4 |
len = snprintf(temp, sizeof(temp), "ITEM %s [%d b; %llu s]\r\n",
|
|
Packit |
4e8bc4 |
key_temp, it->nbytes - 2,
|
|
Packit |
4e8bc4 |
it->exptime == 0 ? 0 :
|
|
Packit |
4e8bc4 |
(unsigned long long)it->exptime + process_started);
|
|
Packit |
4e8bc4 |
if (bufcurr + len + 6 > memlimit) /* 6 is END\r\n\0 */
|
|
Packit |
4e8bc4 |
break;
|
|
Packit |
4e8bc4 |
memcpy(buffer + bufcurr, temp, len);
|
|
Packit |
4e8bc4 |
bufcurr += len;
|
|
Packit |
4e8bc4 |
shown++;
|
|
Packit |
4e8bc4 |
it = it->next;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
memcpy(buffer + bufcurr, "END\r\n", 6);
|
|
Packit |
4e8bc4 |
bufcurr += 5;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
*bytes = bufcurr;
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&lru_locks[id]);
|
|
Packit |
4e8bc4 |
return buffer;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* With refactoring of the various stats code the automover won't need a
|
|
Packit |
4e8bc4 |
* custom function here.
|
|
Packit |
4e8bc4 |
*/
|
|
Packit |
4e8bc4 |
void fill_item_stats_automove(item_stats_automove *am) {
|
|
Packit |
4e8bc4 |
int n;
|
|
Packit |
4e8bc4 |
for (n = 0; n < MAX_NUMBER_OF_SLAB_CLASSES; n++) {
|
|
Packit |
4e8bc4 |
item_stats_automove *cur = &am[n];
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
// outofmemory records into HOT
|
|
Packit |
4e8bc4 |
int i = n | HOT_LRU;
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&lru_locks[i]);
|
|
Packit |
4e8bc4 |
cur->outofmemory = itemstats[i].outofmemory;
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&lru_locks[i]);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
// evictions and tail age are from COLD
|
|
Packit |
4e8bc4 |
i = n | COLD_LRU;
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&lru_locks[i]);
|
|
Packit |
4e8bc4 |
cur->evicted = itemstats[i].evicted;
|
|
Packit |
4e8bc4 |
if (tails[i]) {
|
|
Packit |
4e8bc4 |
cur->age = current_time - tails[i]->time;
|
|
Packit |
4e8bc4 |
} else {
|
|
Packit |
4e8bc4 |
cur->age = 0;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&lru_locks[i]);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
void item_stats_totals(ADD_STAT add_stats, void *c) {
|
|
Packit |
4e8bc4 |
itemstats_t totals;
|
|
Packit |
4e8bc4 |
memset(&totals, 0, sizeof(itemstats_t));
|
|
Packit |
4e8bc4 |
int n;
|
|
Packit |
4e8bc4 |
for (n = 0; n < MAX_NUMBER_OF_SLAB_CLASSES; n++) {
|
|
Packit |
4e8bc4 |
int x;
|
|
Packit |
4e8bc4 |
int i;
|
|
Packit |
4e8bc4 |
for (x = 0; x < 4; x++) {
|
|
Packit |
4e8bc4 |
i = n | lru_type_map[x];
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&lru_locks[i]);
|
|
Packit |
4e8bc4 |
totals.expired_unfetched += itemstats[i].expired_unfetched;
|
|
Packit |
4e8bc4 |
totals.evicted_unfetched += itemstats[i].evicted_unfetched;
|
|
Packit |
4e8bc4 |
totals.evicted_active += itemstats[i].evicted_active;
|
|
Packit |
4e8bc4 |
totals.evicted += itemstats[i].evicted;
|
|
Packit |
4e8bc4 |
totals.reclaimed += itemstats[i].reclaimed;
|
|
Packit |
4e8bc4 |
totals.crawler_reclaimed += itemstats[i].crawler_reclaimed;
|
|
Packit |
4e8bc4 |
totals.crawler_items_checked += itemstats[i].crawler_items_checked;
|
|
Packit |
4e8bc4 |
totals.lrutail_reflocked += itemstats[i].lrutail_reflocked;
|
|
Packit |
4e8bc4 |
totals.moves_to_cold += itemstats[i].moves_to_cold;
|
|
Packit |
4e8bc4 |
totals.moves_to_warm += itemstats[i].moves_to_warm;
|
|
Packit |
4e8bc4 |
totals.moves_within_lru += itemstats[i].moves_within_lru;
|
|
Packit |
4e8bc4 |
totals.direct_reclaims += itemstats[i].direct_reclaims;
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&lru_locks[i]);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
APPEND_STAT("expired_unfetched", "%llu",
|
|
Packit |
4e8bc4 |
(unsigned long long)totals.expired_unfetched);
|
|
Packit |
4e8bc4 |
APPEND_STAT("evicted_unfetched", "%llu",
|
|
Packit |
4e8bc4 |
(unsigned long long)totals.evicted_unfetched);
|
|
Packit |
4e8bc4 |
if (settings.lru_maintainer_thread) {
|
|
Packit |
4e8bc4 |
APPEND_STAT("evicted_active", "%llu",
|
|
Packit |
4e8bc4 |
(unsigned long long)totals.evicted_active);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
APPEND_STAT("evictions", "%llu",
|
|
Packit |
4e8bc4 |
(unsigned long long)totals.evicted);
|
|
Packit |
4e8bc4 |
APPEND_STAT("reclaimed", "%llu",
|
|
Packit |
4e8bc4 |
(unsigned long long)totals.reclaimed);
|
|
Packit |
4e8bc4 |
APPEND_STAT("crawler_reclaimed", "%llu",
|
|
Packit |
4e8bc4 |
(unsigned long long)totals.crawler_reclaimed);
|
|
Packit |
4e8bc4 |
APPEND_STAT("crawler_items_checked", "%llu",
|
|
Packit |
4e8bc4 |
(unsigned long long)totals.crawler_items_checked);
|
|
Packit |
4e8bc4 |
APPEND_STAT("lrutail_reflocked", "%llu",
|
|
Packit |
4e8bc4 |
(unsigned long long)totals.lrutail_reflocked);
|
|
Packit |
4e8bc4 |
if (settings.lru_maintainer_thread) {
|
|
Packit |
4e8bc4 |
APPEND_STAT("moves_to_cold", "%llu",
|
|
Packit |
4e8bc4 |
(unsigned long long)totals.moves_to_cold);
|
|
Packit |
4e8bc4 |
APPEND_STAT("moves_to_warm", "%llu",
|
|
Packit |
4e8bc4 |
(unsigned long long)totals.moves_to_warm);
|
|
Packit |
4e8bc4 |
APPEND_STAT("moves_within_lru", "%llu",
|
|
Packit |
4e8bc4 |
(unsigned long long)totals.moves_within_lru);
|
|
Packit |
4e8bc4 |
APPEND_STAT("direct_reclaims", "%llu",
|
|
Packit |
4e8bc4 |
(unsigned long long)totals.direct_reclaims);
|
|
Packit |
4e8bc4 |
APPEND_STAT("lru_bumps_dropped", "%llu",
|
|
Packit |
4e8bc4 |
(unsigned long long)lru_total_bumps_dropped());
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
void item_stats(ADD_STAT add_stats, void *c) {
|
|
Packit |
4e8bc4 |
struct thread_stats thread_stats;
|
|
Packit |
4e8bc4 |
threadlocal_stats_aggregate(&thread_stats);
|
|
Packit |
4e8bc4 |
itemstats_t totals;
|
|
Packit |
4e8bc4 |
int n;
|
|
Packit |
4e8bc4 |
for (n = 0; n < MAX_NUMBER_OF_SLAB_CLASSES; n++) {
|
|
Packit |
4e8bc4 |
memset(&totals, 0, sizeof(itemstats_t));
|
|
Packit |
4e8bc4 |
int x;
|
|
Packit |
4e8bc4 |
int i;
|
|
Packit |
4e8bc4 |
unsigned int size = 0;
|
|
Packit |
4e8bc4 |
unsigned int age = 0;
|
|
Packit |
4e8bc4 |
unsigned int age_hot = 0;
|
|
Packit |
4e8bc4 |
unsigned int age_warm = 0;
|
|
Packit |
4e8bc4 |
unsigned int lru_size_map[4];
|
|
Packit |
4e8bc4 |
const char *fmt = "items:%d:%s";
|
|
Packit |
4e8bc4 |
char key_str[STAT_KEY_LEN];
|
|
Packit |
4e8bc4 |
char val_str[STAT_VAL_LEN];
|
|
Packit |
4e8bc4 |
int klen = 0, vlen = 0;
|
|
Packit |
4e8bc4 |
for (x = 0; x < 4; x++) {
|
|
Packit |
4e8bc4 |
i = n | lru_type_map[x];
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&lru_locks[i]);
|
|
Packit |
4e8bc4 |
totals.evicted += itemstats[i].evicted;
|
|
Packit |
4e8bc4 |
totals.evicted_nonzero += itemstats[i].evicted_nonzero;
|
|
Packit |
4e8bc4 |
totals.outofmemory += itemstats[i].outofmemory;
|
|
Packit |
4e8bc4 |
totals.tailrepairs += itemstats[i].tailrepairs;
|
|
Packit |
4e8bc4 |
totals.reclaimed += itemstats[i].reclaimed;
|
|
Packit |
4e8bc4 |
totals.expired_unfetched += itemstats[i].expired_unfetched;
|
|
Packit |
4e8bc4 |
totals.evicted_unfetched += itemstats[i].evicted_unfetched;
|
|
Packit |
4e8bc4 |
totals.evicted_active += itemstats[i].evicted_active;
|
|
Packit |
4e8bc4 |
totals.crawler_reclaimed += itemstats[i].crawler_reclaimed;
|
|
Packit |
4e8bc4 |
totals.crawler_items_checked += itemstats[i].crawler_items_checked;
|
|
Packit |
4e8bc4 |
totals.lrutail_reflocked += itemstats[i].lrutail_reflocked;
|
|
Packit |
4e8bc4 |
totals.moves_to_cold += itemstats[i].moves_to_cold;
|
|
Packit |
4e8bc4 |
totals.moves_to_warm += itemstats[i].moves_to_warm;
|
|
Packit |
4e8bc4 |
totals.moves_within_lru += itemstats[i].moves_within_lru;
|
|
Packit |
4e8bc4 |
totals.direct_reclaims += itemstats[i].direct_reclaims;
|
|
Packit |
4e8bc4 |
totals.mem_requested += sizes_bytes[i];
|
|
Packit |
4e8bc4 |
size += sizes[i];
|
|
Packit |
4e8bc4 |
lru_size_map[x] = sizes[i];
|
|
Packit |
4e8bc4 |
if (lru_type_map[x] == COLD_LRU && tails[i] != NULL) {
|
|
Packit |
4e8bc4 |
age = current_time - tails[i]->time;
|
|
Packit |
4e8bc4 |
} else if (lru_type_map[x] == HOT_LRU && tails[i] != NULL) {
|
|
Packit |
4e8bc4 |
age_hot = current_time - tails[i]->time;
|
|
Packit |
4e8bc4 |
} else if (lru_type_map[x] == WARM_LRU && tails[i] != NULL) {
|
|
Packit |
4e8bc4 |
age_warm = current_time - tails[i]->time;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
if (lru_type_map[x] == COLD_LRU)
|
|
Packit |
4e8bc4 |
totals.evicted_time = itemstats[i].evicted_time;
|
|
Packit |
4e8bc4 |
switch (lru_type_map[x]) {
|
|
Packit |
4e8bc4 |
case HOT_LRU:
|
|
Packit |
4e8bc4 |
totals.hits_to_hot = thread_stats.lru_hits[i];
|
|
Packit |
4e8bc4 |
break;
|
|
Packit |
4e8bc4 |
case WARM_LRU:
|
|
Packit |
4e8bc4 |
totals.hits_to_warm = thread_stats.lru_hits[i];
|
|
Packit |
4e8bc4 |
break;
|
|
Packit |
4e8bc4 |
case COLD_LRU:
|
|
Packit |
4e8bc4 |
totals.hits_to_cold = thread_stats.lru_hits[i];
|
|
Packit |
4e8bc4 |
break;
|
|
Packit |
4e8bc4 |
case TEMP_LRU:
|
|
Packit |
4e8bc4 |
totals.hits_to_temp = thread_stats.lru_hits[i];
|
|
Packit |
4e8bc4 |
break;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&lru_locks[i]);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
if (size == 0)
|
|
Packit |
4e8bc4 |
continue;
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "number", "%u", size);
|
|
Packit |
4e8bc4 |
if (settings.lru_maintainer_thread) {
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "number_hot", "%u", lru_size_map[0]);
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "number_warm", "%u", lru_size_map[1]);
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "number_cold", "%u", lru_size_map[2]);
|
|
Packit |
4e8bc4 |
if (settings.temp_lru) {
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "number_temp", "%u", lru_size_map[3]);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "age_hot", "%u", age_hot);
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "age_warm", "%u", age_warm);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "age", "%u", age);
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "mem_requested", "%llu", (unsigned long long)totals.mem_requested);
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "evicted",
|
|
Packit |
4e8bc4 |
"%llu", (unsigned long long)totals.evicted);
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "evicted_nonzero",
|
|
Packit |
4e8bc4 |
"%llu", (unsigned long long)totals.evicted_nonzero);
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "evicted_time",
|
|
Packit |
4e8bc4 |
"%u", totals.evicted_time);
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "outofmemory",
|
|
Packit |
4e8bc4 |
"%llu", (unsigned long long)totals.outofmemory);
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "tailrepairs",
|
|
Packit |
4e8bc4 |
"%llu", (unsigned long long)totals.tailrepairs);
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "reclaimed",
|
|
Packit |
4e8bc4 |
"%llu", (unsigned long long)totals.reclaimed);
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "expired_unfetched",
|
|
Packit |
4e8bc4 |
"%llu", (unsigned long long)totals.expired_unfetched);
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "evicted_unfetched",
|
|
Packit |
4e8bc4 |
"%llu", (unsigned long long)totals.evicted_unfetched);
|
|
Packit |
4e8bc4 |
if (settings.lru_maintainer_thread) {
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "evicted_active",
|
|
Packit |
4e8bc4 |
"%llu", (unsigned long long)totals.evicted_active);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "crawler_reclaimed",
|
|
Packit |
4e8bc4 |
"%llu", (unsigned long long)totals.crawler_reclaimed);
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "crawler_items_checked",
|
|
Packit |
4e8bc4 |
"%llu", (unsigned long long)totals.crawler_items_checked);
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "lrutail_reflocked",
|
|
Packit |
4e8bc4 |
"%llu", (unsigned long long)totals.lrutail_reflocked);
|
|
Packit |
4e8bc4 |
if (settings.lru_maintainer_thread) {
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "moves_to_cold",
|
|
Packit |
4e8bc4 |
"%llu", (unsigned long long)totals.moves_to_cold);
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "moves_to_warm",
|
|
Packit |
4e8bc4 |
"%llu", (unsigned long long)totals.moves_to_warm);
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "moves_within_lru",
|
|
Packit |
4e8bc4 |
"%llu", (unsigned long long)totals.moves_within_lru);
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "direct_reclaims",
|
|
Packit |
4e8bc4 |
"%llu", (unsigned long long)totals.direct_reclaims);
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "hits_to_hot",
|
|
Packit |
4e8bc4 |
"%llu", (unsigned long long)totals.hits_to_hot);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "hits_to_warm",
|
|
Packit |
4e8bc4 |
"%llu", (unsigned long long)totals.hits_to_warm);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "hits_to_cold",
|
|
Packit |
4e8bc4 |
"%llu", (unsigned long long)totals.hits_to_cold);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
APPEND_NUM_FMT_STAT(fmt, n, "hits_to_temp",
|
|
Packit |
4e8bc4 |
"%llu", (unsigned long long)totals.hits_to_temp);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* getting here means both ascii and binary terminators fit */
|
|
Packit |
4e8bc4 |
add_stats(NULL, 0, NULL, 0, c);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
bool item_stats_sizes_status(void) {
|
|
Packit |
4e8bc4 |
bool ret = false;
|
|
Packit |
4e8bc4 |
mutex_lock(&stats_sizes_lock);
|
|
Packit |
4e8bc4 |
if (stats_sizes_hist != NULL)
|
|
Packit |
4e8bc4 |
ret = true;
|
|
Packit |
4e8bc4 |
mutex_unlock(&stats_sizes_lock);
|
|
Packit |
4e8bc4 |
return ret;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
void item_stats_sizes_init(void) {
|
|
Packit |
4e8bc4 |
if (stats_sizes_hist != NULL)
|
|
Packit |
4e8bc4 |
return;
|
|
Packit |
4e8bc4 |
stats_sizes_buckets = settings.item_size_max / 32 + 1;
|
|
Packit |
4e8bc4 |
stats_sizes_hist = calloc(stats_sizes_buckets, sizeof(int));
|
|
Packit |
4e8bc4 |
stats_sizes_cas_min = (settings.use_cas) ? get_cas_id() : 0;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
void item_stats_sizes_enable(ADD_STAT add_stats, void *c) {
|
|
Packit |
4e8bc4 |
mutex_lock(&stats_sizes_lock);
|
|
Packit |
4e8bc4 |
if (!settings.use_cas) {
|
|
Packit |
4e8bc4 |
APPEND_STAT("sizes_status", "error", "");
|
|
Packit |
4e8bc4 |
APPEND_STAT("sizes_error", "cas_support_disabled", "");
|
|
Packit |
4e8bc4 |
} else if (stats_sizes_hist == NULL) {
|
|
Packit |
4e8bc4 |
item_stats_sizes_init();
|
|
Packit |
4e8bc4 |
if (stats_sizes_hist != NULL) {
|
|
Packit |
4e8bc4 |
APPEND_STAT("sizes_status", "enabled", "");
|
|
Packit |
4e8bc4 |
} else {
|
|
Packit |
4e8bc4 |
APPEND_STAT("sizes_status", "error", "");
|
|
Packit |
4e8bc4 |
APPEND_STAT("sizes_error", "no_memory", "");
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
} else {
|
|
Packit |
4e8bc4 |
APPEND_STAT("sizes_status", "enabled", "");
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
mutex_unlock(&stats_sizes_lock);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
void item_stats_sizes_disable(ADD_STAT add_stats, void *c) {
|
|
Packit |
4e8bc4 |
mutex_lock(&stats_sizes_lock);
|
|
Packit |
4e8bc4 |
if (stats_sizes_hist != NULL) {
|
|
Packit |
4e8bc4 |
free(stats_sizes_hist);
|
|
Packit |
4e8bc4 |
stats_sizes_hist = NULL;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
APPEND_STAT("sizes_status", "disabled", "");
|
|
Packit |
4e8bc4 |
mutex_unlock(&stats_sizes_lock);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
void item_stats_sizes_add(item *it) {
|
|
Packit |
4e8bc4 |
if (stats_sizes_hist == NULL || stats_sizes_cas_min > ITEM_get_cas(it))
|
|
Packit |
4e8bc4 |
return;
|
|
Packit |
4e8bc4 |
int ntotal = ITEM_ntotal(it);
|
|
Packit |
4e8bc4 |
int bucket = ntotal / 32;
|
|
Packit |
4e8bc4 |
if ((ntotal % 32) != 0) bucket++;
|
|
Packit |
4e8bc4 |
if (bucket < stats_sizes_buckets) stats_sizes_hist[bucket]++;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* I think there's no way for this to be accurate without using the CAS value.
|
|
Packit |
4e8bc4 |
* Since items getting their time value bumped will pass this validation.
|
|
Packit |
4e8bc4 |
*/
|
|
Packit |
4e8bc4 |
void item_stats_sizes_remove(item *it) {
|
|
Packit |
4e8bc4 |
if (stats_sizes_hist == NULL || stats_sizes_cas_min > ITEM_get_cas(it))
|
|
Packit |
4e8bc4 |
return;
|
|
Packit |
4e8bc4 |
int ntotal = ITEM_ntotal(it);
|
|
Packit |
4e8bc4 |
int bucket = ntotal / 32;
|
|
Packit |
4e8bc4 |
if ((ntotal % 32) != 0) bucket++;
|
|
Packit |
4e8bc4 |
if (bucket < stats_sizes_buckets) stats_sizes_hist[bucket]--;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/** dumps out a list of objects of each size, with granularity of 32 bytes */
|
|
Packit |
4e8bc4 |
/*@null@*/
|
|
Packit |
4e8bc4 |
/* Locks are correct based on a technicality. Holds LRU lock while doing the
|
|
Packit |
4e8bc4 |
* work, so items can't go invalid, and it's only looking at header sizes
|
|
Packit |
4e8bc4 |
* which don't change.
|
|
Packit |
4e8bc4 |
*/
|
|
Packit |
4e8bc4 |
void item_stats_sizes(ADD_STAT add_stats, void *c) {
|
|
Packit |
4e8bc4 |
mutex_lock(&stats_sizes_lock);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
if (stats_sizes_hist != NULL) {
|
|
Packit |
4e8bc4 |
int i;
|
|
Packit |
4e8bc4 |
for (i = 0; i < stats_sizes_buckets; i++) {
|
|
Packit |
4e8bc4 |
if (stats_sizes_hist[i] != 0) {
|
|
Packit |
4e8bc4 |
char key[12];
|
|
Packit |
4e8bc4 |
snprintf(key, sizeof(key), "%d", i * 32);
|
|
Packit |
4e8bc4 |
APPEND_STAT(key, "%u", stats_sizes_hist[i]);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
} else {
|
|
Packit |
4e8bc4 |
APPEND_STAT("sizes_status", "disabled", "");
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
add_stats(NULL, 0, NULL, 0, c);
|
|
Packit |
4e8bc4 |
mutex_unlock(&stats_sizes_lock);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/** wrapper around assoc_find which does the lazy expiration logic */
|
|
Packit |
4e8bc4 |
item *do_item_get(const char *key, const size_t nkey, const uint32_t hv, conn *c, const bool do_update) {
|
|
Packit |
4e8bc4 |
item *it = assoc_find(key, nkey, hv);
|
|
Packit |
4e8bc4 |
if (it != NULL) {
|
|
Packit |
4e8bc4 |
refcount_incr(it);
|
|
Packit |
4e8bc4 |
/* Optimization for slab reassignment. prevents popular items from
|
|
Packit |
4e8bc4 |
* jamming in busy wait. Can only do this here to satisfy lock order
|
|
Packit |
4e8bc4 |
* of item_lock, slabs_lock. */
|
|
Packit |
4e8bc4 |
/* This was made unsafe by removal of the cache_lock:
|
|
Packit |
4e8bc4 |
* slab_rebalance_signal and slab_rebal.* are modified in a separate
|
|
Packit |
4e8bc4 |
* thread under slabs_lock. If slab_rebalance_signal = 1, slab_start =
|
|
Packit |
4e8bc4 |
* NULL (0), but slab_end is still equal to some value, this would end
|
|
Packit |
4e8bc4 |
* up unlinking every item fetched.
|
|
Packit |
4e8bc4 |
* This is either an acceptable loss, or if slab_rebalance_signal is
|
|
Packit |
4e8bc4 |
* true, slab_start/slab_end should be put behind the slabs_lock.
|
|
Packit |
4e8bc4 |
* Which would cause a huge potential slowdown.
|
|
Packit |
4e8bc4 |
* Could also use a specific lock for slab_rebal.* and
|
|
Packit |
4e8bc4 |
* slab_rebalance_signal (shorter lock?)
|
|
Packit |
4e8bc4 |
*/
|
|
Packit |
4e8bc4 |
/*if (slab_rebalance_signal &&
|
|
Packit |
4e8bc4 |
((void *)it >= slab_rebal.slab_start && (void *)it < slab_rebal.slab_end)) {
|
|
Packit |
4e8bc4 |
do_item_unlink(it, hv);
|
|
Packit |
4e8bc4 |
do_item_remove(it);
|
|
Packit |
4e8bc4 |
it = NULL;
|
|
Packit |
4e8bc4 |
}*/
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
int was_found = 0;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
if (settings.verbose > 2) {
|
|
Packit |
4e8bc4 |
int ii;
|
|
Packit |
4e8bc4 |
if (it == NULL) {
|
|
Packit |
4e8bc4 |
fprintf(stderr, "> NOT FOUND ");
|
|
Packit |
4e8bc4 |
} else {
|
|
Packit |
4e8bc4 |
fprintf(stderr, "> FOUND KEY ");
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
for (ii = 0; ii < nkey; ++ii) {
|
|
Packit |
4e8bc4 |
fprintf(stderr, "%c", key[ii]);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
if (it != NULL) {
|
|
Packit |
4e8bc4 |
was_found = 1;
|
|
Packit |
4e8bc4 |
if (item_is_flushed(it)) {
|
|
Packit |
4e8bc4 |
do_item_unlink(it, hv);
|
|
Packit |
4e8bc4 |
STORAGE_delete(c->thread->storage, it);
|
|
Packit |
4e8bc4 |
do_item_remove(it);
|
|
Packit |
4e8bc4 |
it = NULL;
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&c->thread->stats.mutex);
|
|
Packit |
4e8bc4 |
c->thread->stats.get_flushed++;
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&c->thread->stats.mutex);
|
|
Packit |
4e8bc4 |
if (settings.verbose > 2) {
|
|
Packit |
4e8bc4 |
fprintf(stderr, " -nuked by flush");
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
was_found = 2;
|
|
Packit |
4e8bc4 |
} else if (it->exptime != 0 && it->exptime <= current_time) {
|
|
Packit |
4e8bc4 |
do_item_unlink(it, hv);
|
|
Packit |
4e8bc4 |
STORAGE_delete(c->thread->storage, it);
|
|
Packit |
4e8bc4 |
do_item_remove(it);
|
|
Packit |
4e8bc4 |
it = NULL;
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&c->thread->stats.mutex);
|
|
Packit |
4e8bc4 |
c->thread->stats.get_expired++;
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&c->thread->stats.mutex);
|
|
Packit |
4e8bc4 |
if (settings.verbose > 2) {
|
|
Packit |
4e8bc4 |
fprintf(stderr, " -nuked by expire");
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
was_found = 3;
|
|
Packit |
4e8bc4 |
} else {
|
|
Packit |
4e8bc4 |
if (do_update) {
|
|
Packit |
4e8bc4 |
do_item_bump(c, it, hv);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
DEBUG_REFCNT(it, '+');
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
if (settings.verbose > 2)
|
|
Packit |
4e8bc4 |
fprintf(stderr, "\n");
|
|
Packit |
4e8bc4 |
/* For now this is in addition to the above verbose logging. */
|
|
Packit |
4e8bc4 |
LOGGER_LOG(c->thread->l, LOG_FETCHERS, LOGGER_ITEM_GET, NULL, was_found, key, nkey,
|
|
Packit |
4e8bc4 |
(it) ? ITEM_clsid(it) : 0, c->sfd);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
return it;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
// Requires lock held for item.
|
|
Packit |
4e8bc4 |
// Split out of do_item_get() to allow mget functions to look through header
|
|
Packit |
4e8bc4 |
// data before losing state modified via the bump function.
|
|
Packit |
4e8bc4 |
void do_item_bump(conn *c, item *it, const uint32_t hv) {
|
|
Packit |
4e8bc4 |
/* We update the hit markers only during fetches.
|
|
Packit |
4e8bc4 |
* An item needs to be hit twice overall to be considered
|
|
Packit |
4e8bc4 |
* ACTIVE, but only needs a single hit to maintain activity
|
|
Packit |
4e8bc4 |
* afterward.
|
|
Packit |
4e8bc4 |
* FETCHED tells if an item has ever been active.
|
|
Packit |
4e8bc4 |
*/
|
|
Packit |
4e8bc4 |
if (settings.lru_segmented) {
|
|
Packit |
4e8bc4 |
if ((it->it_flags & ITEM_ACTIVE) == 0) {
|
|
Packit |
4e8bc4 |
if ((it->it_flags & ITEM_FETCHED) == 0) {
|
|
Packit |
4e8bc4 |
it->it_flags |= ITEM_FETCHED;
|
|
Packit |
4e8bc4 |
} else {
|
|
Packit |
4e8bc4 |
it->it_flags |= ITEM_ACTIVE;
|
|
Packit |
4e8bc4 |
if (ITEM_lruid(it) != COLD_LRU) {
|
|
Packit |
4e8bc4 |
it->time = current_time; // only need to bump time.
|
|
Packit |
4e8bc4 |
} else if (!lru_bump_async(c->thread->lru_bump_buf, it, hv)) {
|
|
Packit |
4e8bc4 |
// add flag before async bump to avoid race.
|
|
Packit |
4e8bc4 |
it->it_flags &= ~ITEM_ACTIVE;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
} else {
|
|
Packit |
4e8bc4 |
it->it_flags |= ITEM_FETCHED;
|
|
Packit |
4e8bc4 |
do_item_update(it);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
item *do_item_touch(const char *key, size_t nkey, uint32_t exptime,
|
|
Packit |
4e8bc4 |
const uint32_t hv, conn *c) {
|
|
Packit |
4e8bc4 |
item *it = do_item_get(key, nkey, hv, c, DO_UPDATE);
|
|
Packit |
4e8bc4 |
if (it != NULL) {
|
|
Packit |
4e8bc4 |
it->exptime = exptime;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
return it;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/*** LRU MAINTENANCE THREAD ***/
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* Returns number of items remove, expired, or evicted.
|
|
Packit |
4e8bc4 |
* Callable from worker threads or the LRU maintainer thread */
|
|
Packit |
4e8bc4 |
int lru_pull_tail(const int orig_id, const int cur_lru,
|
|
Packit |
4e8bc4 |
const uint64_t total_bytes, const uint8_t flags, const rel_time_t max_age,
|
|
Packit |
4e8bc4 |
struct lru_pull_tail_return *ret_it) {
|
|
Packit |
4e8bc4 |
item *it = NULL;
|
|
Packit |
4e8bc4 |
int id = orig_id;
|
|
Packit |
4e8bc4 |
int removed = 0;
|
|
Packit |
4e8bc4 |
if (id == 0)
|
|
Packit |
4e8bc4 |
return 0;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
int tries = 5;
|
|
Packit |
4e8bc4 |
item *search;
|
|
Packit |
4e8bc4 |
item *next_it;
|
|
Packit |
4e8bc4 |
void *hold_lock = NULL;
|
|
Packit |
4e8bc4 |
unsigned int move_to_lru = 0;
|
|
Packit |
4e8bc4 |
uint64_t limit = 0;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
id |= cur_lru;
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&lru_locks[id]);
|
|
Packit |
4e8bc4 |
search = tails[id];
|
|
Packit |
4e8bc4 |
/* We walk up *only* for locked items, and if bottom is expired. */
|
|
Packit |
4e8bc4 |
for (; tries > 0 && search != NULL; tries--, search=next_it) {
|
|
Packit |
4e8bc4 |
/* we might relink search mid-loop, so search->prev isn't reliable */
|
|
Packit |
4e8bc4 |
next_it = search->prev;
|
|
Packit |
4e8bc4 |
if (search->nbytes == 0 && search->nkey == 0 && search->it_flags == 1) {
|
|
Packit |
4e8bc4 |
/* We are a crawler, ignore it. */
|
|
Packit |
4e8bc4 |
if (flags & LRU_PULL_CRAWL_BLOCKS) {
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&lru_locks[id]);
|
|
Packit |
4e8bc4 |
return 0;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
tries++;
|
|
Packit |
4e8bc4 |
continue;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
uint32_t hv = hash(ITEM_key(search), search->nkey);
|
|
Packit |
4e8bc4 |
/* Attempt to hash item lock the "search" item. If locked, no
|
|
Packit |
4e8bc4 |
* other callers can incr the refcount. Also skip ourselves. */
|
|
Packit |
4e8bc4 |
if ((hold_lock = item_trylock(hv)) == NULL)
|
|
Packit |
4e8bc4 |
continue;
|
|
Packit |
4e8bc4 |
/* Now see if the item is refcount locked */
|
|
Packit |
4e8bc4 |
if (refcount_incr(search) != 2) {
|
|
Packit |
4e8bc4 |
/* Note pathological case with ref'ed items in tail.
|
|
Packit |
4e8bc4 |
* Can still unlink the item, but it won't be reusable yet */
|
|
Packit |
4e8bc4 |
itemstats[id].lrutail_reflocked++;
|
|
Packit |
4e8bc4 |
/* In case of refcount leaks, enable for quick workaround. */
|
|
Packit |
4e8bc4 |
/* WARNING: This can cause terrible corruption */
|
|
Packit |
4e8bc4 |
if (settings.tail_repair_time &&
|
|
Packit |
4e8bc4 |
search->time + settings.tail_repair_time < current_time) {
|
|
Packit |
4e8bc4 |
itemstats[id].tailrepairs++;
|
|
Packit |
4e8bc4 |
search->refcount = 1;
|
|
Packit |
4e8bc4 |
/* This will call item_remove -> item_free since refcnt is 1 */
|
|
Packit |
4e8bc4 |
STORAGE_delete(ext_storage, search);
|
|
Packit |
4e8bc4 |
do_item_unlink_nolock(search, hv);
|
|
Packit |
4e8bc4 |
item_trylock_unlock(hold_lock);
|
|
Packit |
4e8bc4 |
continue;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* Expired or flushed */
|
|
Packit |
4e8bc4 |
if ((search->exptime != 0 && search->exptime < current_time)
|
|
Packit |
4e8bc4 |
|| item_is_flushed(search)) {
|
|
Packit |
4e8bc4 |
itemstats[id].reclaimed++;
|
|
Packit |
4e8bc4 |
if ((search->it_flags & ITEM_FETCHED) == 0) {
|
|
Packit |
4e8bc4 |
itemstats[id].expired_unfetched++;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
/* refcnt 2 -> 1 */
|
|
Packit |
4e8bc4 |
do_item_unlink_nolock(search, hv);
|
|
Packit |
4e8bc4 |
STORAGE_delete(ext_storage, search);
|
|
Packit |
4e8bc4 |
/* refcnt 1 -> 0 -> item_free */
|
|
Packit |
4e8bc4 |
do_item_remove(search);
|
|
Packit |
4e8bc4 |
item_trylock_unlock(hold_lock);
|
|
Packit |
4e8bc4 |
removed++;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* If all we're finding are expired, can keep going */
|
|
Packit |
4e8bc4 |
continue;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* If we're HOT_LRU or WARM_LRU and over size limit, send to COLD_LRU.
|
|
Packit |
4e8bc4 |
* If we're COLD_LRU, send to WARM_LRU unless we need to evict
|
|
Packit |
4e8bc4 |
*/
|
|
Packit |
4e8bc4 |
switch (cur_lru) {
|
|
Packit |
4e8bc4 |
case HOT_LRU:
|
|
Packit |
4e8bc4 |
limit = total_bytes * settings.hot_lru_pct / 100;
|
|
Packit |
4e8bc4 |
case WARM_LRU:
|
|
Packit |
4e8bc4 |
if (limit == 0)
|
|
Packit |
4e8bc4 |
limit = total_bytes * settings.warm_lru_pct / 100;
|
|
Packit |
4e8bc4 |
/* Rescue ACTIVE items aggressively */
|
|
Packit |
4e8bc4 |
if ((search->it_flags & ITEM_ACTIVE) != 0) {
|
|
Packit |
4e8bc4 |
search->it_flags &= ~ITEM_ACTIVE;
|
|
Packit |
4e8bc4 |
removed++;
|
|
Packit |
4e8bc4 |
if (cur_lru == WARM_LRU) {
|
|
Packit |
4e8bc4 |
itemstats[id].moves_within_lru++;
|
|
Packit |
4e8bc4 |
do_item_unlink_q(search);
|
|
Packit |
4e8bc4 |
do_item_link_q(search);
|
|
Packit |
4e8bc4 |
do_item_remove(search);
|
|
Packit |
4e8bc4 |
item_trylock_unlock(hold_lock);
|
|
Packit |
4e8bc4 |
} else {
|
|
Packit |
4e8bc4 |
/* Active HOT_LRU items flow to WARM */
|
|
Packit |
4e8bc4 |
itemstats[id].moves_to_warm++;
|
|
Packit |
4e8bc4 |
move_to_lru = WARM_LRU;
|
|
Packit |
4e8bc4 |
do_item_unlink_q(search);
|
|
Packit |
4e8bc4 |
it = search;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
} else if (sizes_bytes[id] > limit ||
|
|
Packit |
4e8bc4 |
current_time - search->time > max_age) {
|
|
Packit |
4e8bc4 |
itemstats[id].moves_to_cold++;
|
|
Packit |
4e8bc4 |
move_to_lru = COLD_LRU;
|
|
Packit |
4e8bc4 |
do_item_unlink_q(search);
|
|
Packit |
4e8bc4 |
it = search;
|
|
Packit |
4e8bc4 |
removed++;
|
|
Packit |
4e8bc4 |
break;
|
|
Packit |
4e8bc4 |
} else {
|
|
Packit |
4e8bc4 |
/* Don't want to move to COLD, not active, bail out */
|
|
Packit |
4e8bc4 |
it = search;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
break;
|
|
Packit |
4e8bc4 |
case COLD_LRU:
|
|
Packit |
4e8bc4 |
it = search; /* No matter what, we're stopping */
|
|
Packit |
4e8bc4 |
if (flags & LRU_PULL_EVICT) {
|
|
Packit |
4e8bc4 |
if (settings.evict_to_free == 0) {
|
|
Packit |
4e8bc4 |
/* Don't think we need a counter for this. It'll OOM. */
|
|
Packit |
4e8bc4 |
break;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
itemstats[id].evicted++;
|
|
Packit |
4e8bc4 |
itemstats[id].evicted_time = current_time - search->time;
|
|
Packit |
4e8bc4 |
if (search->exptime != 0)
|
|
Packit |
4e8bc4 |
itemstats[id].evicted_nonzero++;
|
|
Packit |
4e8bc4 |
if ((search->it_flags & ITEM_FETCHED) == 0) {
|
|
Packit |
4e8bc4 |
itemstats[id].evicted_unfetched++;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
if ((search->it_flags & ITEM_ACTIVE)) {
|
|
Packit |
4e8bc4 |
itemstats[id].evicted_active++;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
LOGGER_LOG(NULL, LOG_EVICTIONS, LOGGER_EVICTION, search);
|
|
Packit |
4e8bc4 |
STORAGE_delete(ext_storage, search);
|
|
Packit |
4e8bc4 |
do_item_unlink_nolock(search, hv);
|
|
Packit |
4e8bc4 |
removed++;
|
|
Packit |
4e8bc4 |
if (settings.slab_automove == 2) {
|
|
Packit |
4e8bc4 |
slabs_reassign(-1, orig_id);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
} else if (flags & LRU_PULL_RETURN_ITEM) {
|
|
Packit |
4e8bc4 |
/* Keep a reference to this item and return it. */
|
|
Packit |
4e8bc4 |
ret_it->it = it;
|
|
Packit |
4e8bc4 |
ret_it->hv = hv;
|
|
Packit |
4e8bc4 |
} else if ((search->it_flags & ITEM_ACTIVE) != 0
|
|
Packit |
4e8bc4 |
&& settings.lru_segmented) {
|
|
Packit |
4e8bc4 |
itemstats[id].moves_to_warm++;
|
|
Packit |
4e8bc4 |
search->it_flags &= ~ITEM_ACTIVE;
|
|
Packit |
4e8bc4 |
move_to_lru = WARM_LRU;
|
|
Packit |
4e8bc4 |
do_item_unlink_q(search);
|
|
Packit |
4e8bc4 |
removed++;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
break;
|
|
Packit |
4e8bc4 |
case TEMP_LRU:
|
|
Packit |
4e8bc4 |
it = search; /* Kill the loop. Parent only interested in reclaims */
|
|
Packit |
4e8bc4 |
break;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
if (it != NULL)
|
|
Packit |
4e8bc4 |
break;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&lru_locks[id]);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
if (it != NULL) {
|
|
Packit |
4e8bc4 |
if (move_to_lru) {
|
|
Packit |
4e8bc4 |
it->slabs_clsid = ITEM_clsid(it);
|
|
Packit |
4e8bc4 |
it->slabs_clsid |= move_to_lru;
|
|
Packit |
4e8bc4 |
item_link_q(it);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
if ((flags & LRU_PULL_RETURN_ITEM) == 0) {
|
|
Packit |
4e8bc4 |
do_item_remove(it);
|
|
Packit |
4e8bc4 |
item_trylock_unlock(hold_lock);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
return removed;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* TODO: Third place this code needs to be deduped */
|
|
Packit |
4e8bc4 |
static void lru_bump_buf_link_q(lru_bump_buf *b) {
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&bump_buf_lock);
|
|
Packit |
4e8bc4 |
assert(b != bump_buf_head);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
b->prev = 0;
|
|
Packit |
4e8bc4 |
b->next = bump_buf_head;
|
|
Packit |
4e8bc4 |
if (b->next) b->next->prev = b;
|
|
Packit |
4e8bc4 |
bump_buf_head = b;
|
|
Packit |
4e8bc4 |
if (bump_buf_tail == 0) bump_buf_tail = b;
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&bump_buf_lock);
|
|
Packit |
4e8bc4 |
return;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
void *item_lru_bump_buf_create(void) {
|
|
Packit |
4e8bc4 |
lru_bump_buf *b = calloc(1, sizeof(lru_bump_buf));
|
|
Packit |
4e8bc4 |
if (b == NULL) {
|
|
Packit |
4e8bc4 |
return NULL;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
b->buf = bipbuf_new(sizeof(lru_bump_entry) * LRU_BUMP_BUF_SIZE);
|
|
Packit |
4e8bc4 |
if (b->buf == NULL) {
|
|
Packit |
4e8bc4 |
free(b);
|
|
Packit |
4e8bc4 |
return NULL;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
pthread_mutex_init(&b->mutex, NULL);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
lru_bump_buf_link_q(b);
|
|
Packit |
4e8bc4 |
return b;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
static bool lru_bump_async(lru_bump_buf *b, item *it, uint32_t hv) {
|
|
Packit |
4e8bc4 |
bool ret = true;
|
|
Packit |
4e8bc4 |
refcount_incr(it);
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&b->mutex);
|
|
Packit |
4e8bc4 |
lru_bump_entry *be = (lru_bump_entry *) bipbuf_request(b->buf, sizeof(lru_bump_entry));
|
|
Packit |
4e8bc4 |
if (be != NULL) {
|
|
Packit |
4e8bc4 |
be->it = it;
|
|
Packit |
4e8bc4 |
be->hv = hv;
|
|
Packit |
4e8bc4 |
if (bipbuf_push(b->buf, sizeof(lru_bump_entry)) == 0) {
|
|
Packit |
4e8bc4 |
ret = false;
|
|
Packit |
4e8bc4 |
b->dropped++;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
} else {
|
|
Packit |
4e8bc4 |
ret = false;
|
|
Packit |
4e8bc4 |
b->dropped++;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
if (!ret) {
|
|
Packit |
4e8bc4 |
refcount_decr(it);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&b->mutex);
|
|
Packit |
4e8bc4 |
return ret;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* TODO: Might be worth a micro-optimization of having bump buffers link
|
|
Packit |
4e8bc4 |
* themselves back into the central queue when queue goes from zero to
|
|
Packit |
4e8bc4 |
* non-zero, then remove from list if zero more than N times.
|
|
Packit |
4e8bc4 |
* If very few hits on cold this would avoid extra memory barriers from LRU
|
|
Packit |
4e8bc4 |
* maintainer thread. If many hits, they'll just stay in the list.
|
|
Packit |
4e8bc4 |
*/
|
|
Packit |
4e8bc4 |
static bool lru_maintainer_bumps(void) {
|
|
Packit |
4e8bc4 |
lru_bump_buf *b;
|
|
Packit |
4e8bc4 |
lru_bump_entry *be;
|
|
Packit |
4e8bc4 |
unsigned int size;
|
|
Packit |
4e8bc4 |
unsigned int todo;
|
|
Packit |
4e8bc4 |
bool bumped = false;
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&bump_buf_lock);
|
|
Packit |
4e8bc4 |
for (b = bump_buf_head; b != NULL; b=b->next) {
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&b->mutex);
|
|
Packit |
4e8bc4 |
be = (lru_bump_entry *) bipbuf_peek_all(b->buf, &size);
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&b->mutex);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
if (be == NULL) {
|
|
Packit |
4e8bc4 |
continue;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
todo = size;
|
|
Packit |
4e8bc4 |
bumped = true;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
while (todo) {
|
|
Packit |
4e8bc4 |
item_lock(be->hv);
|
|
Packit |
4e8bc4 |
do_item_update(be->it);
|
|
Packit |
4e8bc4 |
do_item_remove(be->it);
|
|
Packit |
4e8bc4 |
item_unlock(be->hv);
|
|
Packit |
4e8bc4 |
be++;
|
|
Packit |
4e8bc4 |
todo -= sizeof(lru_bump_entry);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&b->mutex);
|
|
Packit |
4e8bc4 |
be = (lru_bump_entry *) bipbuf_poll(b->buf, size);
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&b->mutex);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&bump_buf_lock);
|
|
Packit |
4e8bc4 |
return bumped;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
static uint64_t lru_total_bumps_dropped(void) {
|
|
Packit |
4e8bc4 |
uint64_t total = 0;
|
|
Packit |
4e8bc4 |
lru_bump_buf *b;
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&bump_buf_lock);
|
|
Packit |
4e8bc4 |
for (b = bump_buf_head; b != NULL; b=b->next) {
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&b->mutex);
|
|
Packit |
4e8bc4 |
total += b->dropped;
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&b->mutex);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&bump_buf_lock);
|
|
Packit |
4e8bc4 |
return total;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* Loop up to N times:
|
|
Packit |
4e8bc4 |
* If too many items are in HOT_LRU, push to COLD_LRU
|
|
Packit |
4e8bc4 |
* If too many items are in WARM_LRU, push to COLD_LRU
|
|
Packit |
4e8bc4 |
* If too many items are in COLD_LRU, poke COLD_LRU tail
|
|
Packit |
4e8bc4 |
* 1000 loops with 1ms min sleep gives us under 1m items shifted/sec. The
|
|
Packit |
4e8bc4 |
* locks can't handle much more than that. Leaving a TODO for how to
|
|
Packit |
4e8bc4 |
* autoadjust in the future.
|
|
Packit |
4e8bc4 |
*/
|
|
Packit |
4e8bc4 |
static int lru_maintainer_juggle(const int slabs_clsid) {
|
|
Packit |
4e8bc4 |
int i;
|
|
Packit |
4e8bc4 |
int did_moves = 0;
|
|
Packit |
4e8bc4 |
uint64_t total_bytes = 0;
|
|
Packit |
4e8bc4 |
unsigned int chunks_perslab = 0;
|
|
Packit |
4e8bc4 |
//unsigned int chunks_free = 0;
|
|
Packit |
4e8bc4 |
/* TODO: if free_chunks below high watermark, increase aggressiveness */
|
|
Packit |
4e8bc4 |
slabs_available_chunks(slabs_clsid, NULL,
|
|
Packit |
4e8bc4 |
&chunks_perslab);
|
|
Packit |
4e8bc4 |
if (settings.temp_lru) {
|
|
Packit |
4e8bc4 |
/* Only looking for reclaims. Run before we size the LRU. */
|
|
Packit |
4e8bc4 |
for (i = 0; i < 500; i++) {
|
|
Packit |
4e8bc4 |
if (lru_pull_tail(slabs_clsid, TEMP_LRU, 0, 0, 0, NULL) <= 0) {
|
|
Packit |
4e8bc4 |
break;
|
|
Packit |
4e8bc4 |
} else {
|
|
Packit |
4e8bc4 |
did_moves++;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
rel_time_t cold_age = 0;
|
|
Packit |
4e8bc4 |
rel_time_t hot_age = 0;
|
|
Packit |
4e8bc4 |
rel_time_t warm_age = 0;
|
|
Packit |
4e8bc4 |
/* If LRU is in flat mode, force items to drain into COLD via max age of 0 */
|
|
Packit |
4e8bc4 |
if (settings.lru_segmented) {
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&lru_locks[slabs_clsid|COLD_LRU]);
|
|
Packit |
4e8bc4 |
if (tails[slabs_clsid|COLD_LRU]) {
|
|
Packit |
4e8bc4 |
cold_age = current_time - tails[slabs_clsid|COLD_LRU]->time;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
// Also build up total_bytes for the classes.
|
|
Packit |
4e8bc4 |
total_bytes += sizes_bytes[slabs_clsid|COLD_LRU];
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&lru_locks[slabs_clsid|COLD_LRU]);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
hot_age = cold_age * settings.hot_max_factor;
|
|
Packit |
4e8bc4 |
warm_age = cold_age * settings.warm_max_factor;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
// total_bytes doesn't have to be exact. cache it for the juggles.
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&lru_locks[slabs_clsid|HOT_LRU]);
|
|
Packit |
4e8bc4 |
total_bytes += sizes_bytes[slabs_clsid|HOT_LRU];
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&lru_locks[slabs_clsid|HOT_LRU]);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&lru_locks[slabs_clsid|WARM_LRU]);
|
|
Packit |
4e8bc4 |
total_bytes += sizes_bytes[slabs_clsid|WARM_LRU];
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&lru_locks[slabs_clsid|WARM_LRU]);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* Juggle HOT/WARM up to N times */
|
|
Packit |
4e8bc4 |
for (i = 0; i < 500; i++) {
|
|
Packit |
4e8bc4 |
int do_more = 0;
|
|
Packit |
4e8bc4 |
if (lru_pull_tail(slabs_clsid, HOT_LRU, total_bytes, LRU_PULL_CRAWL_BLOCKS, hot_age, NULL) ||
|
|
Packit |
4e8bc4 |
lru_pull_tail(slabs_clsid, WARM_LRU, total_bytes, LRU_PULL_CRAWL_BLOCKS, warm_age, NULL)) {
|
|
Packit |
4e8bc4 |
do_more++;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
if (settings.lru_segmented) {
|
|
Packit |
4e8bc4 |
do_more += lru_pull_tail(slabs_clsid, COLD_LRU, total_bytes, LRU_PULL_CRAWL_BLOCKS, 0, NULL);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
if (do_more == 0)
|
|
Packit |
4e8bc4 |
break;
|
|
Packit |
4e8bc4 |
did_moves++;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
return did_moves;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* Will crawl all slab classes a minimum of once per hour */
|
|
Packit |
4e8bc4 |
#define MAX_MAINTCRAWL_WAIT 60 * 60
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* Hoping user input will improve this function. This is all a wild guess.
|
|
Packit |
4e8bc4 |
* Operation: Kicks crawler for each slab id. Crawlers take some statistics as
|
|
Packit |
4e8bc4 |
* to items with nonzero expirations. It then buckets how many items will
|
|
Packit |
4e8bc4 |
* expire per minute for the next hour.
|
|
Packit |
4e8bc4 |
* This function checks the results of a run, and if it things more than 1% of
|
|
Packit |
4e8bc4 |
* expirable objects are ready to go, kick the crawler again to reap.
|
|
Packit |
4e8bc4 |
* It will also kick the crawler once per minute regardless, waiting a minute
|
|
Packit |
4e8bc4 |
* longer for each time it has no work to do, up to an hour wait time.
|
|
Packit |
4e8bc4 |
* The latter is to avoid newly started daemons from waiting too long before
|
|
Packit |
4e8bc4 |
* retrying a crawl.
|
|
Packit |
4e8bc4 |
*/
|
|
Packit |
4e8bc4 |
static void lru_maintainer_crawler_check(struct crawler_expired_data *cdata, logger *l) {
|
|
Packit |
4e8bc4 |
int i;
|
|
Packit |
4e8bc4 |
static rel_time_t next_crawls[POWER_LARGEST];
|
|
Packit |
4e8bc4 |
static rel_time_t next_crawl_wait[POWER_LARGEST];
|
|
Packit |
4e8bc4 |
uint8_t todo[POWER_LARGEST];
|
|
Packit |
4e8bc4 |
memset(todo, 0, sizeof(uint8_t) * POWER_LARGEST);
|
|
Packit |
4e8bc4 |
bool do_run = false;
|
|
Packit |
4e8bc4 |
unsigned int tocrawl_limit = 0;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
// TODO: If not segmented LRU, skip non-cold
|
|
Packit |
4e8bc4 |
for (i = POWER_SMALLEST; i < POWER_LARGEST; i++) {
|
|
Packit |
4e8bc4 |
crawlerstats_t *s = &cdata->crawlerstats[i];
|
|
Packit |
4e8bc4 |
/* We've not successfully kicked off a crawl yet. */
|
|
Packit |
4e8bc4 |
if (s->run_complete) {
|
|
Packit |
4e8bc4 |
char *lru_name = "na";
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&cdata->lock);
|
|
Packit |
4e8bc4 |
int x;
|
|
Packit |
4e8bc4 |
/* Should we crawl again? */
|
|
Packit |
4e8bc4 |
uint64_t possible_reclaims = s->seen - s->noexp;
|
|
Packit |
4e8bc4 |
uint64_t available_reclaims = 0;
|
|
Packit |
4e8bc4 |
/* Need to think we can free at least 1% of the items before
|
|
Packit |
4e8bc4 |
* crawling. */
|
|
Packit |
4e8bc4 |
/* FIXME: Configurable? */
|
|
Packit |
4e8bc4 |
uint64_t low_watermark = (possible_reclaims / 100) + 1;
|
|
Packit |
4e8bc4 |
rel_time_t since_run = current_time - s->end_time;
|
|
Packit |
4e8bc4 |
/* Don't bother if the payoff is too low. */
|
|
Packit |
4e8bc4 |
for (x = 0; x < 60; x++) {
|
|
Packit |
4e8bc4 |
available_reclaims += s->histo[x];
|
|
Packit |
4e8bc4 |
if (available_reclaims > low_watermark) {
|
|
Packit |
4e8bc4 |
if (next_crawl_wait[i] < (x * 60)) {
|
|
Packit |
4e8bc4 |
next_crawl_wait[i] += 60;
|
|
Packit |
4e8bc4 |
} else if (next_crawl_wait[i] >= 60) {
|
|
Packit |
4e8bc4 |
next_crawl_wait[i] -= 60;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
break;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
if (available_reclaims == 0) {
|
|
Packit |
4e8bc4 |
next_crawl_wait[i] += 60;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
if (next_crawl_wait[i] > MAX_MAINTCRAWL_WAIT) {
|
|
Packit |
4e8bc4 |
next_crawl_wait[i] = MAX_MAINTCRAWL_WAIT;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
next_crawls[i] = current_time + next_crawl_wait[i] + 5;
|
|
Packit |
4e8bc4 |
switch (GET_LRU(i)) {
|
|
Packit |
4e8bc4 |
case HOT_LRU:
|
|
Packit |
4e8bc4 |
lru_name = "hot";
|
|
Packit |
4e8bc4 |
break;
|
|
Packit |
4e8bc4 |
case WARM_LRU:
|
|
Packit |
4e8bc4 |
lru_name = "warm";
|
|
Packit |
4e8bc4 |
break;
|
|
Packit |
4e8bc4 |
case COLD_LRU:
|
|
Packit |
4e8bc4 |
lru_name = "cold";
|
|
Packit |
4e8bc4 |
break;
|
|
Packit |
4e8bc4 |
case TEMP_LRU:
|
|
Packit |
4e8bc4 |
lru_name = "temp";
|
|
Packit |
4e8bc4 |
break;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
LOGGER_LOG(l, LOG_SYSEVENTS, LOGGER_CRAWLER_STATUS, NULL,
|
|
Packit |
4e8bc4 |
CLEAR_LRU(i),
|
|
Packit |
4e8bc4 |
lru_name,
|
|
Packit |
4e8bc4 |
(unsigned long long)low_watermark,
|
|
Packit |
4e8bc4 |
(unsigned long long)available_reclaims,
|
|
Packit |
4e8bc4 |
(unsigned int)since_run,
|
|
Packit |
4e8bc4 |
next_crawls[i] - current_time,
|
|
Packit |
4e8bc4 |
s->end_time - s->start_time,
|
|
Packit |
4e8bc4 |
s->seen,
|
|
Packit |
4e8bc4 |
s->reclaimed);
|
|
Packit |
4e8bc4 |
// Got our calculation, avoid running until next actual run.
|
|
Packit |
4e8bc4 |
s->run_complete = false;
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&cdata->lock);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
if (current_time > next_crawls[i]) {
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&lru_locks[i]);
|
|
Packit |
4e8bc4 |
if (sizes[i] > tocrawl_limit) {
|
|
Packit |
4e8bc4 |
tocrawl_limit = sizes[i];
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&lru_locks[i]);
|
|
Packit |
4e8bc4 |
todo[i] = 1;
|
|
Packit |
4e8bc4 |
do_run = true;
|
|
Packit |
4e8bc4 |
next_crawls[i] = current_time + 5; // minimum retry wait.
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
if (do_run) {
|
|
Packit |
4e8bc4 |
if (settings.lru_crawler_tocrawl && settings.lru_crawler_tocrawl < tocrawl_limit) {
|
|
Packit |
4e8bc4 |
tocrawl_limit = settings.lru_crawler_tocrawl;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
lru_crawler_start(todo, tocrawl_limit, CRAWLER_AUTOEXPIRE, cdata, NULL, 0);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
slab_automove_reg_t slab_automove_default = {
|
|
Packit |
4e8bc4 |
.init = slab_automove_init,
|
|
Packit |
4e8bc4 |
.free = slab_automove_free,
|
|
Packit |
4e8bc4 |
.run = slab_automove_run
|
|
Packit |
4e8bc4 |
};
|
|
Packit |
4e8bc4 |
#ifdef EXTSTORE
|
|
Packit |
4e8bc4 |
slab_automove_reg_t slab_automove_extstore = {
|
|
Packit |
4e8bc4 |
.init = slab_automove_extstore_init,
|
|
Packit |
4e8bc4 |
.free = slab_automove_extstore_free,
|
|
Packit |
4e8bc4 |
.run = slab_automove_extstore_run
|
|
Packit |
4e8bc4 |
};
|
|
Packit |
4e8bc4 |
#endif
|
|
Packit |
4e8bc4 |
static pthread_t lru_maintainer_tid;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
#define MAX_LRU_MAINTAINER_SLEEP 1000000
|
|
Packit |
4e8bc4 |
#define MIN_LRU_MAINTAINER_SLEEP 1000
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
static void *lru_maintainer_thread(void *arg) {
|
|
Packit |
4e8bc4 |
slab_automove_reg_t *sam = &slab_automove_default;
|
|
Packit |
4e8bc4 |
#ifdef EXTSTORE
|
|
Packit |
4e8bc4 |
void *storage = arg;
|
|
Packit |
4e8bc4 |
if (storage != NULL)
|
|
Packit |
4e8bc4 |
sam = &slab_automove_extstore;
|
|
Packit |
4e8bc4 |
#endif
|
|
Packit |
4e8bc4 |
int i;
|
|
Packit |
4e8bc4 |
useconds_t to_sleep = MIN_LRU_MAINTAINER_SLEEP;
|
|
Packit |
4e8bc4 |
useconds_t last_sleep = MIN_LRU_MAINTAINER_SLEEP;
|
|
Packit |
4e8bc4 |
rel_time_t last_crawler_check = 0;
|
|
Packit |
4e8bc4 |
rel_time_t last_automove_check = 0;
|
|
Packit |
4e8bc4 |
useconds_t next_juggles[MAX_NUMBER_OF_SLAB_CLASSES] = {0};
|
|
Packit |
4e8bc4 |
useconds_t backoff_juggles[MAX_NUMBER_OF_SLAB_CLASSES] = {0};
|
|
Packit |
4e8bc4 |
struct crawler_expired_data *cdata =
|
|
Packit |
4e8bc4 |
calloc(1, sizeof(struct crawler_expired_data));
|
|
Packit |
4e8bc4 |
if (cdata == NULL) {
|
|
Packit |
4e8bc4 |
fprintf(stderr, "Failed to allocate crawler data for LRU maintainer thread\n");
|
|
Packit |
4e8bc4 |
abort();
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
pthread_mutex_init(&cdata->lock, NULL);
|
|
Packit |
4e8bc4 |
cdata->crawl_complete = true; // kick off the crawler.
|
|
Packit |
4e8bc4 |
logger *l = logger_create();
|
|
Packit |
4e8bc4 |
if (l == NULL) {
|
|
Packit |
4e8bc4 |
fprintf(stderr, "Failed to allocate logger for LRU maintainer thread\n");
|
|
Packit |
4e8bc4 |
abort();
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
double last_ratio = settings.slab_automove_ratio;
|
|
Packit |
4e8bc4 |
void *am = sam->init(&settings);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&lru_maintainer_lock);
|
|
Packit |
4e8bc4 |
if (settings.verbose > 2)
|
|
Packit |
4e8bc4 |
fprintf(stderr, "Starting LRU maintainer background thread\n");
|
|
Packit |
4e8bc4 |
while (do_run_lru_maintainer_thread) {
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&lru_maintainer_lock);
|
|
Packit |
4e8bc4 |
if (to_sleep)
|
|
Packit |
4e8bc4 |
usleep(to_sleep);
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&lru_maintainer_lock);
|
|
Packit |
4e8bc4 |
/* A sleep of zero counts as a minimum of a 1ms wait */
|
|
Packit |
4e8bc4 |
last_sleep = to_sleep > 1000 ? to_sleep : 1000;
|
|
Packit |
4e8bc4 |
to_sleep = MAX_LRU_MAINTAINER_SLEEP;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
STATS_LOCK();
|
|
Packit |
4e8bc4 |
stats.lru_maintainer_juggles++;
|
|
Packit |
4e8bc4 |
STATS_UNLOCK();
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* Each slab class gets its own sleep to avoid hammering locks */
|
|
Packit |
4e8bc4 |
for (i = POWER_SMALLEST; i < MAX_NUMBER_OF_SLAB_CLASSES; i++) {
|
|
Packit |
4e8bc4 |
next_juggles[i] = next_juggles[i] > last_sleep ? next_juggles[i] - last_sleep : 0;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
if (next_juggles[i] > 0) {
|
|
Packit |
4e8bc4 |
// Sleep the thread just for the minimum amount (or not at all)
|
|
Packit |
4e8bc4 |
if (next_juggles[i] < to_sleep)
|
|
Packit |
4e8bc4 |
to_sleep = next_juggles[i];
|
|
Packit |
4e8bc4 |
continue;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
int did_moves = lru_maintainer_juggle(i);
|
|
Packit |
4e8bc4 |
if (did_moves == 0) {
|
|
Packit |
4e8bc4 |
if (backoff_juggles[i] != 0) {
|
|
Packit |
4e8bc4 |
backoff_juggles[i] += backoff_juggles[i] / 8;
|
|
Packit |
4e8bc4 |
} else {
|
|
Packit |
4e8bc4 |
backoff_juggles[i] = MIN_LRU_MAINTAINER_SLEEP;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
if (backoff_juggles[i] > MAX_LRU_MAINTAINER_SLEEP)
|
|
Packit |
4e8bc4 |
backoff_juggles[i] = MAX_LRU_MAINTAINER_SLEEP;
|
|
Packit |
4e8bc4 |
} else if (backoff_juggles[i] > 0) {
|
|
Packit |
4e8bc4 |
backoff_juggles[i] /= 2;
|
|
Packit |
4e8bc4 |
if (backoff_juggles[i] < MIN_LRU_MAINTAINER_SLEEP) {
|
|
Packit |
4e8bc4 |
backoff_juggles[i] = 0;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
next_juggles[i] = backoff_juggles[i];
|
|
Packit |
4e8bc4 |
if (next_juggles[i] < to_sleep)
|
|
Packit |
4e8bc4 |
to_sleep = next_juggles[i];
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* Minimize the sleep if we had async LRU bumps to process */
|
|
Packit |
4e8bc4 |
if (settings.lru_segmented && lru_maintainer_bumps() && to_sleep > 1000) {
|
|
Packit |
4e8bc4 |
to_sleep = 1000;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* Once per second at most */
|
|
Packit |
4e8bc4 |
if (settings.lru_crawler && last_crawler_check != current_time) {
|
|
Packit |
4e8bc4 |
lru_maintainer_crawler_check(cdata, l);
|
|
Packit |
4e8bc4 |
last_crawler_check = current_time;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
if (settings.slab_automove == 1 && last_automove_check != current_time) {
|
|
Packit |
4e8bc4 |
if (last_ratio != settings.slab_automove_ratio) {
|
|
Packit |
4e8bc4 |
sam->free(am);
|
|
Packit |
4e8bc4 |
am = sam->init(&settings);
|
|
Packit |
4e8bc4 |
last_ratio = settings.slab_automove_ratio;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
int src, dst;
|
|
Packit |
4e8bc4 |
sam->run(am, &src, &dst);
|
|
Packit |
4e8bc4 |
if (src != -1 && dst != -1) {
|
|
Packit |
4e8bc4 |
slabs_reassign(src, dst);
|
|
Packit |
4e8bc4 |
LOGGER_LOG(l, LOG_SYSEVENTS, LOGGER_SLAB_MOVE, NULL,
|
|
Packit |
4e8bc4 |
src, dst);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
// dst == 0 means reclaim to global pool, be more aggressive
|
|
Packit |
4e8bc4 |
if (dst != 0) {
|
|
Packit |
4e8bc4 |
last_automove_check = current_time;
|
|
Packit |
4e8bc4 |
} else if (dst == 0) {
|
|
Packit |
4e8bc4 |
// also ensure we minimize the thread sleep
|
|
Packit |
4e8bc4 |
to_sleep = 1000;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&lru_maintainer_lock);
|
|
Packit |
4e8bc4 |
sam->free(am);
|
|
Packit |
4e8bc4 |
// LRU crawler *must* be stopped.
|
|
Packit |
4e8bc4 |
free(cdata);
|
|
Packit |
4e8bc4 |
if (settings.verbose > 2)
|
|
Packit |
4e8bc4 |
fprintf(stderr, "LRU maintainer thread stopping\n");
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
return NULL;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
int stop_lru_maintainer_thread(void) {
|
|
Packit |
4e8bc4 |
int ret;
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&lru_maintainer_lock);
|
|
Packit |
4e8bc4 |
/* LRU thread is a sleep loop, will die on its own */
|
|
Packit |
4e8bc4 |
do_run_lru_maintainer_thread = 0;
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&lru_maintainer_lock);
|
|
Packit |
4e8bc4 |
if ((ret = pthread_join(lru_maintainer_tid, NULL)) != 0) {
|
|
Packit |
4e8bc4 |
fprintf(stderr, "Failed to stop LRU maintainer thread: %s\n", strerror(ret));
|
|
Packit |
4e8bc4 |
return -1;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
settings.lru_maintainer_thread = false;
|
|
Packit |
4e8bc4 |
return 0;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
int start_lru_maintainer_thread(void *arg) {
|
|
Packit |
4e8bc4 |
int ret;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&lru_maintainer_lock);
|
|
Packit |
4e8bc4 |
do_run_lru_maintainer_thread = 1;
|
|
Packit |
4e8bc4 |
settings.lru_maintainer_thread = true;
|
|
Packit |
4e8bc4 |
if ((ret = pthread_create(&lru_maintainer_tid, NULL,
|
|
Packit |
4e8bc4 |
lru_maintainer_thread, arg)) != 0) {
|
|
Packit |
4e8bc4 |
fprintf(stderr, "Can't create LRU maintainer thread: %s\n",
|
|
Packit |
4e8bc4 |
strerror(ret));
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&lru_maintainer_lock);
|
|
Packit |
4e8bc4 |
return -1;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&lru_maintainer_lock);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
return 0;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* If we hold this lock, crawler can't wake up or move */
|
|
Packit |
4e8bc4 |
void lru_maintainer_pause(void) {
|
|
Packit |
4e8bc4 |
pthread_mutex_lock(&lru_maintainer_lock);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
void lru_maintainer_resume(void) {
|
|
Packit |
4e8bc4 |
pthread_mutex_unlock(&lru_maintainer_lock);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
int init_lru_maintainer(void) {
|
|
Packit |
4e8bc4 |
lru_maintainer_initialized = 1;
|
|
Packit |
4e8bc4 |
return 0;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* Tail linkers and crawler for the LRU crawler. */
|
|
Packit |
4e8bc4 |
void do_item_linktail_q(item *it) { /* item is the new tail */
|
|
Packit |
4e8bc4 |
item **head, **tail;
|
|
Packit |
4e8bc4 |
assert(it->it_flags == 1);
|
|
Packit |
4e8bc4 |
assert(it->nbytes == 0);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
head = &heads[it->slabs_clsid];
|
|
Packit |
4e8bc4 |
tail = &tails[it->slabs_clsid];
|
|
Packit |
4e8bc4 |
//assert(*tail != 0);
|
|
Packit |
4e8bc4 |
assert(it != *tail);
|
|
Packit |
4e8bc4 |
assert((*head && *tail) || (*head == 0 && *tail == 0));
|
|
Packit |
4e8bc4 |
it->prev = *tail;
|
|
Packit |
4e8bc4 |
it->next = 0;
|
|
Packit |
4e8bc4 |
if (it->prev) {
|
|
Packit |
4e8bc4 |
assert(it->prev->next == 0);
|
|
Packit |
4e8bc4 |
it->prev->next = it;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
*tail = it;
|
|
Packit |
4e8bc4 |
if (*head == 0) *head = it;
|
|
Packit |
4e8bc4 |
return;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
void do_item_unlinktail_q(item *it) {
|
|
Packit |
4e8bc4 |
item **head, **tail;
|
|
Packit |
4e8bc4 |
head = &heads[it->slabs_clsid];
|
|
Packit |
4e8bc4 |
tail = &tails[it->slabs_clsid];
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
if (*head == it) {
|
|
Packit |
4e8bc4 |
assert(it->prev == 0);
|
|
Packit |
4e8bc4 |
*head = it->next;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
if (*tail == it) {
|
|
Packit |
4e8bc4 |
assert(it->next == 0);
|
|
Packit |
4e8bc4 |
*tail = it->prev;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
assert(it->next != it);
|
|
Packit |
4e8bc4 |
assert(it->prev != it);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
if (it->next) it->next->prev = it->prev;
|
|
Packit |
4e8bc4 |
if (it->prev) it->prev->next = it->next;
|
|
Packit |
4e8bc4 |
return;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* This is too convoluted, but it's a difficult shuffle. Try to rewrite it
|
|
Packit |
4e8bc4 |
* more clearly. */
|
|
Packit |
4e8bc4 |
item *do_item_crawl_q(item *it) {
|
|
Packit |
4e8bc4 |
item **head, **tail;
|
|
Packit |
4e8bc4 |
assert(it->it_flags == 1);
|
|
Packit |
4e8bc4 |
assert(it->nbytes == 0);
|
|
Packit |
4e8bc4 |
head = &heads[it->slabs_clsid];
|
|
Packit |
4e8bc4 |
tail = &tails[it->slabs_clsid];
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* We've hit the head, pop off */
|
|
Packit |
4e8bc4 |
if (it->prev == 0) {
|
|
Packit |
4e8bc4 |
assert(*head == it);
|
|
Packit |
4e8bc4 |
if (it->next) {
|
|
Packit |
4e8bc4 |
*head = it->next;
|
|
Packit |
4e8bc4 |
assert(it->next->prev == it);
|
|
Packit |
4e8bc4 |
it->next->prev = 0;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
return NULL; /* Done */
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* Swing ourselves in front of the next item */
|
|
Packit |
4e8bc4 |
/* NB: If there is a prev, we can't be the head */
|
|
Packit |
4e8bc4 |
assert(it->prev != it);
|
|
Packit |
4e8bc4 |
if (it->prev) {
|
|
Packit |
4e8bc4 |
if (*head == it->prev) {
|
|
Packit |
4e8bc4 |
/* Prev was the head, now we're the head */
|
|
Packit |
4e8bc4 |
*head = it;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
if (*tail == it) {
|
|
Packit |
4e8bc4 |
/* We are the tail, now they are the tail */
|
|
Packit |
4e8bc4 |
*tail = it->prev;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
assert(it->next != it);
|
|
Packit |
4e8bc4 |
if (it->next) {
|
|
Packit |
4e8bc4 |
assert(it->prev->next == it);
|
|
Packit |
4e8bc4 |
it->prev->next = it->next;
|
|
Packit |
4e8bc4 |
it->next->prev = it->prev;
|
|
Packit |
4e8bc4 |
} else {
|
|
Packit |
4e8bc4 |
/* Tail. Move this above? */
|
|
Packit |
4e8bc4 |
it->prev->next = 0;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
/* prev->prev's next is it->prev */
|
|
Packit |
4e8bc4 |
it->next = it->prev;
|
|
Packit |
4e8bc4 |
it->prev = it->next->prev;
|
|
Packit |
4e8bc4 |
it->next->prev = it;
|
|
Packit |
4e8bc4 |
/* New it->prev now, if we're not at the head. */
|
|
Packit |
4e8bc4 |
if (it->prev) {
|
|
Packit |
4e8bc4 |
it->prev->next = it;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
assert(it->next != it);
|
|
Packit |
4e8bc4 |
assert(it->prev != it);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
return it->next; /* success */
|
|
Packit |
4e8bc4 |
}
|