Blame slabs.c

Packit Service 584ef9
/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
Packit Service 584ef9
/*
Packit Service 584ef9
 * Slabs memory allocation, based on powers-of-N. Slabs are up to 1MB in size
Packit Service 584ef9
 * and are divided into chunks. The chunk sizes start off at the size of the
Packit Service 584ef9
 * "item" structure plus space for a small key and value. They increase by
Packit Service 584ef9
 * a multiplier factor from there, up to half the maximum slab size. The last
Packit Service 584ef9
 * slab size is always 1MB, since that's the maximum item size allowed by the
Packit Service 584ef9
 * memcached protocol.
Packit Service 584ef9
 */
Packit Service 584ef9
#include "memcached.h"
Packit Service 584ef9
#include <sys/mman.h>
Packit Service 584ef9
#include <sys/stat.h>
Packit Service 584ef9
#include <sys/socket.h>
Packit Service 584ef9
#include <sys/resource.h>
Packit Service 584ef9
#include <fcntl.h>
Packit Service 584ef9
#include <netinet/in.h>
Packit Service 584ef9
#include <errno.h>
Packit Service 584ef9
#include <stdlib.h>
Packit Service 584ef9
#include <stdio.h>
Packit Service 584ef9
#include <string.h>
Packit Service 584ef9
#include <signal.h>
Packit Service 584ef9
#include <assert.h>
Packit Service 584ef9
#include <pthread.h>
Packit Service 584ef9
Packit Service 584ef9
//#define DEBUG_SLAB_MOVER
Packit Service 584ef9
/* powers-of-N allocation structures */
Packit Service 584ef9
Packit Service 584ef9
typedef struct {
Packit Service 584ef9
    unsigned int size;      /* sizes of items */
Packit Service 584ef9
    unsigned int perslab;   /* how many items per slab */
Packit Service 584ef9
Packit Service 584ef9
    void *slots;           /* list of item ptrs */
Packit Service 584ef9
    unsigned int sl_curr;   /* total free items in list */
Packit Service 584ef9
Packit Service 584ef9
    unsigned int slabs;     /* how many slabs were allocated for this class */
Packit Service 584ef9
Packit Service 584ef9
    void **slab_list;       /* array of slab pointers */
Packit Service 584ef9
    unsigned int list_size; /* size of prev array */
Packit Service 584ef9
} slabclass_t;
Packit Service 584ef9
Packit Service 584ef9
static slabclass_t slabclass[MAX_NUMBER_OF_SLAB_CLASSES];
Packit Service 584ef9
static size_t mem_limit = 0;
Packit Service 584ef9
static size_t mem_malloced = 0;
Packit Service 584ef9
/* If the memory limit has been hit once. Used as a hint to decide when to
Packit Service 584ef9
 * early-wake the LRU maintenance thread */
Packit Service 584ef9
static bool mem_limit_reached = false;
Packit Service 584ef9
static int power_largest;
Packit Service 584ef9
Packit Service 584ef9
static void *mem_base = NULL;
Packit Service 584ef9
static void *mem_current = NULL;
Packit Service 584ef9
static size_t mem_avail = 0;
Packit Service 584ef9
#ifdef EXTSTORE
Packit Service 584ef9
static void *storage  = NULL;
Packit Service 584ef9
#endif
Packit Service 584ef9
/**
Packit Service 584ef9
 * Access to the slab allocator is protected by this lock
Packit Service 584ef9
 */
Packit Service 584ef9
static pthread_mutex_t slabs_lock = PTHREAD_MUTEX_INITIALIZER;
Packit Service 584ef9
static pthread_mutex_t slabs_rebalance_lock = PTHREAD_MUTEX_INITIALIZER;
Packit Service 584ef9
Packit Service 584ef9
/*
Packit Service 584ef9
 * Forward Declarations
Packit Service 584ef9
 */
Packit Service 584ef9
static int grow_slab_list (const unsigned int id);
Packit Service 584ef9
static int do_slabs_newslab(const unsigned int id);
Packit Service 584ef9
static void *memory_allocate(size_t size);
Packit Service 584ef9
static void do_slabs_free(void *ptr, const size_t size, unsigned int id);
Packit Service 584ef9
Packit Service 584ef9
/* Preallocate as many slab pages as possible (called from slabs_init)
Packit Service 584ef9
   on start-up, so users don't get confused out-of-memory errors when
Packit Service 584ef9
   they do have free (in-slab) space, but no space to make new slabs.
Packit Service 584ef9
   if maxslabs is 18 (POWER_LARGEST - POWER_SMALLEST + 1), then all
Packit Service 584ef9
   slab types can be made.  if max memory is less than 18 MB, only the
Packit Service 584ef9
   smaller ones will be made.  */
Packit Service 584ef9
static void slabs_preallocate (const unsigned int maxslabs);
Packit Service 584ef9
#ifdef EXTSTORE
Packit Service 584ef9
void slabs_set_storage(void *arg) {
Packit Service 584ef9
    storage = arg;
Packit Service 584ef9
}
Packit Service 584ef9
#endif
Packit Service 584ef9
/*
Packit Service 584ef9
 * Figures out which slab class (chunk size) is required to store an item of
Packit Service 584ef9
 * a given size.
Packit Service 584ef9
 *
Packit Service 584ef9
 * Given object size, return id to use when allocating/freeing memory for object
Packit Service 584ef9
 * 0 means error: can't store such a large object
Packit Service 584ef9
 */
Packit Service 584ef9
Packit Service 584ef9
unsigned int slabs_clsid(const size_t size) {
Packit Service 584ef9
    int res = POWER_SMALLEST;
Packit Service 584ef9
Packit Service 584ef9
    if (size == 0 || size > settings.item_size_max)
Packit Service 584ef9
        return 0;
Packit Service 584ef9
    while (size > slabclass[res].size)
Packit Service 584ef9
        if (res++ == power_largest)     /* won't fit in the biggest slab */
Packit Service 584ef9
            return power_largest;
Packit Service 584ef9
    return res;
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
unsigned int slabs_size(const int clsid) {
Packit Service 584ef9
    return slabclass[clsid].size;
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
// TODO: could this work with the restartable memory?
Packit Service 584ef9
// Docs say hugepages only work with private shm allocs.
Packit Service 584ef9
/* Function split out for better error path handling */
Packit Service 584ef9
static void * alloc_large_chunk(const size_t limit)
Packit Service 584ef9
{
Packit Service 584ef9
    void *ptr = NULL;
Packit Service 584ef9
#if defined(__linux__) && defined(MADV_HUGEPAGE)
Packit Service 584ef9
    size_t pagesize = 0;
Packit Service 584ef9
    FILE *fp;
Packit Service 584ef9
    int ret;
Packit Service 584ef9
Packit Service 584ef9
    /* Get the size of huge pages */
Packit Service 584ef9
    fp = fopen("/proc/meminfo", "r");
Packit Service 584ef9
    if (fp != NULL) {
Packit Service 584ef9
        char buf[64];
Packit Service 584ef9
Packit Service 584ef9
        while ((fgets(buf, sizeof(buf), fp)))
Packit Service 584ef9
            if (!strncmp(buf, "Hugepagesize:", 13)) {
Packit Service 584ef9
                ret = sscanf(buf + 13, "%zu\n", &pagesize);
Packit Service 584ef9
Packit Service 584ef9
                /* meminfo huge page size is in KiBs */
Packit Service 584ef9
                pagesize <<= 10;
Packit Service 584ef9
            }
Packit Service 584ef9
        fclose(fp);
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
    if (!pagesize) {
Packit Service 584ef9
        fprintf(stderr, "Failed to get supported huge page size\n");
Packit Service 584ef9
        return NULL;
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
    if (settings.verbose > 1)
Packit Service 584ef9
        fprintf(stderr, "huge page size: %zu\n", pagesize);
Packit Service 584ef9
Packit Service 584ef9
    /* This works because glibc simply uses mmap when the alignment is
Packit Service 584ef9
     * above a certain limit. */
Packit Service 584ef9
    ret = posix_memalign(&ptr, pagesize, limit);
Packit Service 584ef9
    if (ret != 0) {
Packit Service 584ef9
        fprintf(stderr, "Failed to get aligned memory chunk: %d\n", ret);
Packit Service 584ef9
        return NULL;
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
    ret = madvise(ptr, limit, MADV_HUGEPAGE);
Packit Service 584ef9
    if (ret < 0) {
Packit Service 584ef9
        fprintf(stderr, "Failed to set transparent hugepage hint: %d\n", ret);
Packit Service 584ef9
        free(ptr);
Packit Service 584ef9
        ptr = NULL;
Packit Service 584ef9
    }
Packit Service 584ef9
#elif defined(__FreeBSD__)
Packit Service 584ef9
    size_t align = (sizeof(size_t) * 8 - (__builtin_clzl(4095)));
Packit Service 584ef9
    ptr = mmap(NULL, limit, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON | MAP_ALIGNED(align) | MAP_ALIGNED_SUPER, -1, 0);
Packit Service 584ef9
    if (ptr == MAP_FAILED) {
Packit Service 584ef9
        fprintf(stderr, "Failed to set super pages\n");
Packit Service 584ef9
        ptr = NULL;
Packit Service 584ef9
    }
Packit Service 584ef9
#else
Packit Service 584ef9
    ptr = malloc(limit);
Packit Service 584ef9
#endif
Packit Service 584ef9
    return ptr;
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
unsigned int slabs_fixup(char *chunk, const int border) {
Packit Service 584ef9
    slabclass_t *p;
Packit Service 584ef9
    item *it = (item *)chunk;
Packit Service 584ef9
    int id = ITEM_clsid(it);
Packit Service 584ef9
Packit Service 584ef9
    // memory isn't used yet. shunt to global pool.
Packit Service 584ef9
    // (which must be 0)
Packit Service 584ef9
    if (id == 0) {
Packit Service 584ef9
        //assert(border == 0);
Packit Service 584ef9
        p = &slabclass[0];
Packit Service 584ef9
        grow_slab_list(0);
Packit Service 584ef9
        p->slab_list[p->slabs++] = (char*)chunk;
Packit Service 584ef9
        return -1;
Packit Service 584ef9
    }
Packit Service 584ef9
    p = &slabclass[id];
Packit Service 584ef9
Packit Service 584ef9
    // if we're on a page border, add the slab to slab class
Packit Service 584ef9
    if (border == 0) {
Packit Service 584ef9
        grow_slab_list(id);
Packit Service 584ef9
        p->slab_list[p->slabs++] = chunk;
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
    // increase free count if ITEM_SLABBED
Packit Service 584ef9
    if (it->it_flags == ITEM_SLABBED) {
Packit Service 584ef9
        // if ITEM_SLABBED re-stack on freelist.
Packit Service 584ef9
        // don't have to run pointer fixups.
Packit Service 584ef9
        it->prev = 0;
Packit Service 584ef9
        it->next = p->slots;
Packit Service 584ef9
        if (it->next) it->next->prev = it;
Packit Service 584ef9
        p->slots = it;
Packit Service 584ef9
Packit Service 584ef9
        p->sl_curr++;
Packit Service 584ef9
        //fprintf(stderr, "replacing into freelist\n");
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
    return p->size;
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
/**
Packit Service 584ef9
 * Determines the chunk sizes and initializes the slab class descriptors
Packit Service 584ef9
 * accordingly.
Packit Service 584ef9
 */
Packit Service 584ef9
void slabs_init(const size_t limit, const double factor, const bool prealloc, const uint32_t *slab_sizes, void *mem_base_external, bool reuse_mem) {
Packit Service 584ef9
    int i = POWER_SMALLEST - 1;
Packit Service 584ef9
    unsigned int size = sizeof(item) + settings.chunk_size;
Packit Service 584ef9
Packit Service 584ef9
    /* Some platforms use runtime transparent hugepages. If for any reason
Packit Service 584ef9
     * the initial allocation fails, the required settings do not persist
Packit Service 584ef9
     * for remaining allocations. As such it makes little sense to do slab
Packit Service 584ef9
     * preallocation. */
Packit Service 584ef9
    bool __attribute__ ((unused)) do_slab_prealloc = false;
Packit Service 584ef9
Packit Service 584ef9
    mem_limit = limit;
Packit Service 584ef9
Packit Service 584ef9
    if (prealloc && mem_base_external == NULL) {
Packit Service 584ef9
        mem_base = alloc_large_chunk(mem_limit);
Packit Service 584ef9
        if (mem_base) {
Packit Service 584ef9
            do_slab_prealloc = true;
Packit Service 584ef9
            mem_current = mem_base;
Packit Service 584ef9
            mem_avail = mem_limit;
Packit Service 584ef9
        } else {
Packit Service 584ef9
            fprintf(stderr, "Warning: Failed to allocate requested memory in"
Packit Service 584ef9
                    " one large chunk.\nWill allocate in smaller chunks\n");
Packit Service 584ef9
        }
Packit Service 584ef9
    } else if (prealloc && mem_base_external != NULL) {
Packit Service 584ef9
        // Can't (yet) mix hugepages with mmap allocations, so separate the
Packit Service 584ef9
        // logic from above. Reusable memory also force-preallocates memory
Packit Service 584ef9
        // pages into the global pool, which requires turning mem_* variables.
Packit Service 584ef9
        do_slab_prealloc = true;
Packit Service 584ef9
        mem_base = mem_base_external;
Packit Service 584ef9
        // _current shouldn't be used in this case, but we set it to where it
Packit Service 584ef9
        // should be anyway.
Packit Service 584ef9
        if (reuse_mem) {
Packit Service 584ef9
            mem_current = ((char*)mem_base) + mem_limit;
Packit Service 584ef9
            mem_avail = 0;
Packit Service 584ef9
        } else {
Packit Service 584ef9
            mem_current = mem_base;
Packit Service 584ef9
            mem_avail = mem_limit;
Packit Service 584ef9
        }
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
    memset(slabclass, 0, sizeof(slabclass));
Packit Service 584ef9
Packit Service 584ef9
    while (++i < MAX_NUMBER_OF_SLAB_CLASSES-1) {
Packit Service 584ef9
        if (slab_sizes != NULL) {
Packit Service 584ef9
            if (slab_sizes[i-1] == 0)
Packit Service 584ef9
                break;
Packit Service 584ef9
            size = slab_sizes[i-1];
Packit Service 584ef9
        } else if (size >= settings.slab_chunk_size_max / factor) {
Packit Service 584ef9
            break;
Packit Service 584ef9
        }
Packit Service 584ef9
        /* Make sure items are always n-byte aligned */
Packit Service 584ef9
        if (size % CHUNK_ALIGN_BYTES)
Packit Service 584ef9
            size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);
Packit Service 584ef9
Packit Service 584ef9
        slabclass[i].size = size;
Packit Service 584ef9
        slabclass[i].perslab = settings.slab_page_size / slabclass[i].size;
Packit Service 584ef9
        if (slab_sizes == NULL)
Packit Service 584ef9
            size *= factor;
Packit Service 584ef9
        if (settings.verbose > 1) {
Packit Service 584ef9
            fprintf(stderr, "slab class %3d: chunk size %9u perslab %7u\n",
Packit Service 584ef9
                    i, slabclass[i].size, slabclass[i].perslab);
Packit Service 584ef9
        }
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
    power_largest = i;
Packit Service 584ef9
    slabclass[power_largest].size = settings.slab_chunk_size_max;
Packit Service 584ef9
    slabclass[power_largest].perslab = settings.slab_page_size / settings.slab_chunk_size_max;
Packit Service 584ef9
    if (settings.verbose > 1) {
Packit Service 584ef9
        fprintf(stderr, "slab class %3d: chunk size %9u perslab %7u\n",
Packit Service 584ef9
                i, slabclass[i].size, slabclass[i].perslab);
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
    /* for the test suite:  faking of how much we've already malloc'd */
Packit Service 584ef9
    {
Packit Service 584ef9
        char *t_initial_malloc = getenv("T_MEMD_INITIAL_MALLOC");
Packit Service 584ef9
        if (t_initial_malloc) {
Packit Service 584ef9
            mem_malloced = (size_t)atol(t_initial_malloc);
Packit Service 584ef9
        }
Packit Service 584ef9
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
    if (do_slab_prealloc) {
Packit Service 584ef9
        if (!reuse_mem) {
Packit Service 584ef9
            slabs_preallocate(power_largest);
Packit Service 584ef9
        }
Packit Service 584ef9
    }
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
void slabs_prefill_global(void) {
Packit Service 584ef9
    void *ptr;
Packit Service 584ef9
    slabclass_t *p = &slabclass[0];
Packit Service 584ef9
    int len = settings.slab_page_size;
Packit Service 584ef9
Packit Service 584ef9
    while (mem_malloced < mem_limit
Packit Service 584ef9
            && (ptr = memory_allocate(len)) != NULL) {
Packit Service 584ef9
        grow_slab_list(0);
Packit Service 584ef9
        p->slab_list[p->slabs++] = ptr;
Packit Service 584ef9
    }
Packit Service 584ef9
    mem_limit_reached = true;
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
static void slabs_preallocate (const unsigned int maxslabs) {
Packit Service 584ef9
    int i;
Packit Service 584ef9
    unsigned int prealloc = 0;
Packit Service 584ef9
Packit Service 584ef9
    /* pre-allocate a 1MB slab in every size class so people don't get
Packit Service 584ef9
       confused by non-intuitive "SERVER_ERROR out of memory"
Packit Service 584ef9
       messages.  this is the most common question on the mailing
Packit Service 584ef9
       list.  if you really don't want this, you can rebuild without
Packit Service 584ef9
       these three lines.  */
Packit Service 584ef9
Packit Service 584ef9
    for (i = POWER_SMALLEST; i < MAX_NUMBER_OF_SLAB_CLASSES; i++) {
Packit Service 584ef9
        if (++prealloc > maxslabs)
Packit Service 584ef9
            break;
Packit Service 584ef9
        if (do_slabs_newslab(i) == 0) {
Packit Service 584ef9
            fprintf(stderr, "Error while preallocating slab memory!\n"
Packit Service 584ef9
                "If using -L or other prealloc options, max memory must be "
Packit Service 584ef9
                "at least %d megabytes.\n", power_largest);
Packit Service 584ef9
            exit(1);
Packit Service 584ef9
        }
Packit Service 584ef9
    }
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
static int grow_slab_list (const unsigned int id) {
Packit Service 584ef9
    slabclass_t *p = &slabclass[id];
Packit Service 584ef9
    if (p->slabs == p->list_size) {
Packit Service 584ef9
        size_t new_size =  (p->list_size != 0) ? p->list_size * 2 : 16;
Packit Service 584ef9
        void *new_list = realloc(p->slab_list, new_size * sizeof(void *));
Packit Service 584ef9
        if (new_list == 0) return 0;
Packit Service 584ef9
        p->list_size = new_size;
Packit Service 584ef9
        p->slab_list = new_list;
Packit Service 584ef9
    }
Packit Service 584ef9
    return 1;
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
static void split_slab_page_into_freelist(char *ptr, const unsigned int id) {
Packit Service 584ef9
    slabclass_t *p = &slabclass[id];
Packit Service 584ef9
    int x;
Packit Service 584ef9
    for (x = 0; x < p->perslab; x++) {
Packit Service 584ef9
        do_slabs_free(ptr, 0, id);
Packit Service 584ef9
        ptr += p->size;
Packit Service 584ef9
    }
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
/* Fast FIFO queue */
Packit Service 584ef9
static void *get_page_from_global_pool(void) {
Packit Service 584ef9
    slabclass_t *p = &slabclass[SLAB_GLOBAL_PAGE_POOL];
Packit Service 584ef9
    if (p->slabs < 1) {
Packit Service 584ef9
        return NULL;
Packit Service 584ef9
    }
Packit Service 584ef9
    char *ret = p->slab_list[p->slabs - 1];
Packit Service 584ef9
    p->slabs--;
Packit Service 584ef9
    return ret;
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
static int do_slabs_newslab(const unsigned int id) {
Packit Service 584ef9
    slabclass_t *p = &slabclass[id];
Packit Service 584ef9
    slabclass_t *g = &slabclass[SLAB_GLOBAL_PAGE_POOL];
Packit Service 584ef9
    int len = (settings.slab_reassign || settings.slab_chunk_size_max != settings.slab_page_size)
Packit Service 584ef9
        ? settings.slab_page_size
Packit Service 584ef9
        : p->size * p->perslab;
Packit Service 584ef9
    char *ptr;
Packit Service 584ef9
Packit Service 584ef9
    if ((mem_limit && mem_malloced + len > mem_limit && p->slabs > 0
Packit Service 584ef9
         && g->slabs == 0)) {
Packit Service 584ef9
        mem_limit_reached = true;
Packit Service 584ef9
        MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id);
Packit Service 584ef9
        return 0;
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
    if ((grow_slab_list(id) == 0) ||
Packit Service 584ef9
        (((ptr = get_page_from_global_pool()) == NULL) &&
Packit Service 584ef9
        ((ptr = memory_allocate((size_t)len)) == 0))) {
Packit Service 584ef9
Packit Service 584ef9
        MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id);
Packit Service 584ef9
        return 0;
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
#if !defined(__FreeBSD__)
Packit Service 584ef9
    memset(ptr, 0, (size_t)len);
Packit Service 584ef9
#endif
Packit Service 584ef9
    split_slab_page_into_freelist(ptr, id);
Packit Service 584ef9
Packit Service 584ef9
    p->slab_list[p->slabs++] = ptr;
Packit Service 584ef9
    MEMCACHED_SLABS_SLABCLASS_ALLOCATE(id);
Packit Service 584ef9
Packit Service 584ef9
    return 1;
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
/*@null@*/
Packit Service 584ef9
static void *do_slabs_alloc(const size_t size, unsigned int id,
Packit Service 584ef9
        unsigned int flags) {
Packit Service 584ef9
    slabclass_t *p;
Packit Service 584ef9
    void *ret = NULL;
Packit Service 584ef9
    item *it = NULL;
Packit Service 584ef9
Packit Service 584ef9
    if (id < POWER_SMALLEST || id > power_largest) {
Packit Service 584ef9
        MEMCACHED_SLABS_ALLOCATE_FAILED(size, 0);
Packit Service 584ef9
        return NULL;
Packit Service 584ef9
    }
Packit Service 584ef9
    p = &slabclass[id];
Packit Service 584ef9
    assert(p->sl_curr == 0 || (((item *)p->slots)->it_flags & ITEM_SLABBED));
Packit Service 584ef9
Packit Service 584ef9
    assert(size <= p->size);
Packit Service 584ef9
    /* fail unless we have space at the end of a recently allocated page,
Packit Service 584ef9
       we have something on our freelist, or we could allocate a new page */
Packit Service 584ef9
    if (p->sl_curr == 0 && flags != SLABS_ALLOC_NO_NEWPAGE) {
Packit Service 584ef9
        do_slabs_newslab(id);
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
    if (p->sl_curr != 0) {
Packit Service 584ef9
        /* return off our freelist */
Packit Service 584ef9
        it = (item *)p->slots;
Packit Service 584ef9
        p->slots = it->next;
Packit Service 584ef9
        if (it->next) it->next->prev = 0;
Packit Service 584ef9
        /* Kill flag and initialize refcount here for lock safety in slab
Packit Service 584ef9
         * mover's freeness detection. */
Packit Service 584ef9
        it->it_flags &= ~ITEM_SLABBED;
Packit Service 584ef9
        it->refcount = 1;
Packit Service 584ef9
        p->sl_curr--;
Packit Service 584ef9
        ret = (void *)it;
Packit Service 584ef9
    } else {
Packit Service 584ef9
        ret = NULL;
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
    if (ret) {
Packit Service 584ef9
        MEMCACHED_SLABS_ALLOCATE(size, id, p->size, ret);
Packit Service 584ef9
    } else {
Packit Service 584ef9
        MEMCACHED_SLABS_ALLOCATE_FAILED(size, id);
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
    return ret;
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
static void do_slabs_free_chunked(item *it, const size_t size) {
Packit Service 584ef9
    item_chunk *chunk = (item_chunk *) ITEM_schunk(it);
Packit Service 584ef9
    slabclass_t *p;
Packit Service 584ef9
Packit Service 584ef9
    it->it_flags = ITEM_SLABBED;
Packit Service 584ef9
    // FIXME: refresh on how this works?
Packit Service 584ef9
    //it->slabs_clsid = 0;
Packit Service 584ef9
    it->prev = 0;
Packit Service 584ef9
    // header object's original classid is stored in chunk.
Packit Service 584ef9
    p = &slabclass[chunk->orig_clsid];
Packit Service 584ef9
    if (chunk->next) {
Packit Service 584ef9
        chunk = chunk->next;
Packit Service 584ef9
        chunk->prev = 0;
Packit Service 584ef9
    } else {
Packit Service 584ef9
        // header with no attached chunk
Packit Service 584ef9
        chunk = NULL;
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
    // return the header object.
Packit Service 584ef9
    // TODO: This is in three places, here and in do_slabs_free().
Packit Service 584ef9
    it->prev = 0;
Packit Service 584ef9
    it->next = p->slots;
Packit Service 584ef9
    if (it->next) it->next->prev = it;
Packit Service 584ef9
    p->slots = it;
Packit Service 584ef9
    p->sl_curr++;
Packit Service 584ef9
Packit Service 584ef9
    item_chunk *next_chunk;
Packit Service 584ef9
    while (chunk) {
Packit Service 584ef9
        assert(chunk->it_flags == ITEM_CHUNK);
Packit Service 584ef9
        chunk->it_flags = ITEM_SLABBED;
Packit Service 584ef9
        p = &slabclass[chunk->slabs_clsid];
Packit Service 584ef9
        next_chunk = chunk->next;
Packit Service 584ef9
Packit Service 584ef9
        chunk->prev = 0;
Packit Service 584ef9
        chunk->next = p->slots;
Packit Service 584ef9
        if (chunk->next) chunk->next->prev = chunk;
Packit Service 584ef9
        p->slots = chunk;
Packit Service 584ef9
        p->sl_curr++;
Packit Service 584ef9
Packit Service 584ef9
        chunk = next_chunk;
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
    return;
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
Packit Service 584ef9
static void do_slabs_free(void *ptr, const size_t size, unsigned int id) {
Packit Service 584ef9
    slabclass_t *p;
Packit Service 584ef9
    item *it;
Packit Service 584ef9
Packit Service 584ef9
    assert(id >= POWER_SMALLEST && id <= power_largest);
Packit Service 584ef9
    if (id < POWER_SMALLEST || id > power_largest)
Packit Service 584ef9
        return;
Packit Service 584ef9
Packit Service 584ef9
    MEMCACHED_SLABS_FREE(size, id, ptr);
Packit Service 584ef9
    p = &slabclass[id];
Packit Service 584ef9
Packit Service 584ef9
    it = (item *)ptr;
Packit Service 584ef9
    if ((it->it_flags & ITEM_CHUNKED) == 0) {
Packit Service 584ef9
        it->it_flags = ITEM_SLABBED;
Packit Service 584ef9
        it->slabs_clsid = id;
Packit Service 584ef9
        it->prev = 0;
Packit Service 584ef9
        it->next = p->slots;
Packit Service 584ef9
        if (it->next) it->next->prev = it;
Packit Service 584ef9
        p->slots = it;
Packit Service 584ef9
Packit Service 584ef9
        p->sl_curr++;
Packit Service 584ef9
    } else {
Packit Service 584ef9
        do_slabs_free_chunked(it, size);
Packit Service 584ef9
    }
Packit Service 584ef9
    return;
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
/* With refactoring of the various stats code the automover won't need a
Packit Service 584ef9
 * custom function here.
Packit Service 584ef9
 */
Packit Service 584ef9
void fill_slab_stats_automove(slab_stats_automove *am) {
Packit Service 584ef9
    int n;
Packit Service 584ef9
    pthread_mutex_lock(&slabs_lock);
Packit Service 584ef9
    for (n = 0; n < MAX_NUMBER_OF_SLAB_CLASSES; n++) {
Packit Service 584ef9
        slabclass_t *p = &slabclass[n];
Packit Service 584ef9
        slab_stats_automove *cur = &am[n];
Packit Service 584ef9
        cur->chunks_per_page = p->perslab;
Packit Service 584ef9
        cur->free_chunks = p->sl_curr;
Packit Service 584ef9
        cur->total_pages = p->slabs;
Packit Service 584ef9
        cur->chunk_size = p->size;
Packit Service 584ef9
    }
Packit Service 584ef9
    pthread_mutex_unlock(&slabs_lock);
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
/* TODO: slabs_available_chunks should grow up to encompass this.
Packit Service 584ef9
 * mem_flag is redundant with the other function.
Packit Service 584ef9
 */
Packit Service 584ef9
unsigned int global_page_pool_size(bool *mem_flag) {
Packit Service 584ef9
    unsigned int ret = 0;
Packit Service 584ef9
    pthread_mutex_lock(&slabs_lock);
Packit Service 584ef9
    if (mem_flag != NULL)
Packit Service 584ef9
        *mem_flag = mem_malloced >= mem_limit ? true : false;
Packit Service 584ef9
    ret = slabclass[SLAB_GLOBAL_PAGE_POOL].slabs;
Packit Service 584ef9
    pthread_mutex_unlock(&slabs_lock);
Packit Service 584ef9
    return ret;
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
/*@null@*/
Packit Service 584ef9
static void do_slabs_stats(ADD_STAT add_stats, void *c) {
Packit Service 584ef9
    int i, total;
Packit Service 584ef9
    /* Get the per-thread stats which contain some interesting aggregates */
Packit Service 584ef9
    struct thread_stats thread_stats;
Packit Service 584ef9
    threadlocal_stats_aggregate(&thread_stats);
Packit Service 584ef9
Packit Service 584ef9
    total = 0;
Packit Service 584ef9
    for(i = POWER_SMALLEST; i <= power_largest; i++) {
Packit Service 584ef9
        slabclass_t *p = &slabclass[i];
Packit Service 584ef9
        if (p->slabs != 0) {
Packit Service 584ef9
            uint32_t perslab, slabs;
Packit Service 584ef9
            slabs = p->slabs;
Packit Service 584ef9
            perslab = p->perslab;
Packit Service 584ef9
Packit Service 584ef9
            char key_str[STAT_KEY_LEN];
Packit Service 584ef9
            char val_str[STAT_VAL_LEN];
Packit Service 584ef9
            int klen = 0, vlen = 0;
Packit Service 584ef9
Packit Service 584ef9
            APPEND_NUM_STAT(i, "chunk_size", "%u", p->size);
Packit Service 584ef9
            APPEND_NUM_STAT(i, "chunks_per_page", "%u", perslab);
Packit Service 584ef9
            APPEND_NUM_STAT(i, "total_pages", "%u", slabs);
Packit Service 584ef9
            APPEND_NUM_STAT(i, "total_chunks", "%u", slabs * perslab);
Packit Service 584ef9
            APPEND_NUM_STAT(i, "used_chunks", "%u",
Packit Service 584ef9
                            slabs*perslab - p->sl_curr);
Packit Service 584ef9
            APPEND_NUM_STAT(i, "free_chunks", "%u", p->sl_curr);
Packit Service 584ef9
            /* Stat is dead, but displaying zero instead of removing it. */
Packit Service 584ef9
            APPEND_NUM_STAT(i, "free_chunks_end", "%u", 0);
Packit Service 584ef9
            APPEND_NUM_STAT(i, "get_hits", "%llu",
Packit Service 584ef9
                    (unsigned long long)thread_stats.slab_stats[i].get_hits);
Packit Service 584ef9
            APPEND_NUM_STAT(i, "cmd_set", "%llu",
Packit Service 584ef9
                    (unsigned long long)thread_stats.slab_stats[i].set_cmds);
Packit Service 584ef9
            APPEND_NUM_STAT(i, "delete_hits", "%llu",
Packit Service 584ef9
                    (unsigned long long)thread_stats.slab_stats[i].delete_hits);
Packit Service 584ef9
            APPEND_NUM_STAT(i, "incr_hits", "%llu",
Packit Service 584ef9
                    (unsigned long long)thread_stats.slab_stats[i].incr_hits);
Packit Service 584ef9
            APPEND_NUM_STAT(i, "decr_hits", "%llu",
Packit Service 584ef9
                    (unsigned long long)thread_stats.slab_stats[i].decr_hits);
Packit Service 584ef9
            APPEND_NUM_STAT(i, "cas_hits", "%llu",
Packit Service 584ef9
                    (unsigned long long)thread_stats.slab_stats[i].cas_hits);
Packit Service 584ef9
            APPEND_NUM_STAT(i, "cas_badval", "%llu",
Packit Service 584ef9
                    (unsigned long long)thread_stats.slab_stats[i].cas_badval);
Packit Service 584ef9
            APPEND_NUM_STAT(i, "touch_hits", "%llu",
Packit Service 584ef9
                    (unsigned long long)thread_stats.slab_stats[i].touch_hits);
Packit Service 584ef9
            total++;
Packit Service 584ef9
        }
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
    /* add overall slab stats and append terminator */
Packit Service 584ef9
Packit Service 584ef9
    APPEND_STAT("active_slabs", "%d", total);
Packit Service 584ef9
    APPEND_STAT("total_malloced", "%llu", (unsigned long long)mem_malloced);
Packit Service 584ef9
    add_stats(NULL, 0, NULL, 0, c);
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
static void *memory_allocate(size_t size) {
Packit Service 584ef9
    void *ret;
Packit Service 584ef9
Packit Service 584ef9
    if (mem_base == NULL) {
Packit Service 584ef9
        /* We are not using a preallocated large memory chunk */
Packit Service 584ef9
        ret = malloc(size);
Packit Service 584ef9
    } else {
Packit Service 584ef9
        ret = mem_current;
Packit Service 584ef9
Packit Service 584ef9
        if (size > mem_avail) {
Packit Service 584ef9
            return NULL;
Packit Service 584ef9
        }
Packit Service 584ef9
Packit Service 584ef9
        /* mem_current pointer _must_ be aligned!!! */
Packit Service 584ef9
        if (size % CHUNK_ALIGN_BYTES) {
Packit Service 584ef9
            size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);
Packit Service 584ef9
        }
Packit Service 584ef9
Packit Service 584ef9
        mem_current = ((char*)mem_current) + size;
Packit Service 584ef9
        if (size < mem_avail) {
Packit Service 584ef9
            mem_avail -= size;
Packit Service 584ef9
        } else {
Packit Service 584ef9
            mem_avail = 0;
Packit Service 584ef9
        }
Packit Service 584ef9
    }
Packit Service 584ef9
    mem_malloced += size;
Packit Service 584ef9
Packit Service 584ef9
    return ret;
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
/* Must only be used if all pages are item_size_max */
Packit Service 584ef9
static void memory_release() {
Packit Service 584ef9
    void *p = NULL;
Packit Service 584ef9
    if (mem_base != NULL)
Packit Service 584ef9
        return;
Packit Service 584ef9
Packit Service 584ef9
    if (!settings.slab_reassign)
Packit Service 584ef9
        return;
Packit Service 584ef9
Packit Service 584ef9
    while (mem_malloced > mem_limit &&
Packit Service 584ef9
            (p = get_page_from_global_pool()) != NULL) {
Packit Service 584ef9
        free(p);
Packit Service 584ef9
        mem_malloced -= settings.slab_page_size;
Packit Service 584ef9
    }
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
void *slabs_alloc(size_t size, unsigned int id,
Packit Service 584ef9
        unsigned int flags) {
Packit Service 584ef9
    void *ret;
Packit Service 584ef9
Packit Service 584ef9
    pthread_mutex_lock(&slabs_lock);
Packit Service 584ef9
    ret = do_slabs_alloc(size, id, flags);
Packit Service 584ef9
    pthread_mutex_unlock(&slabs_lock);
Packit Service 584ef9
    return ret;
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
void slabs_free(void *ptr, size_t size, unsigned int id) {
Packit Service 584ef9
    pthread_mutex_lock(&slabs_lock);
Packit Service 584ef9
    do_slabs_free(ptr, size, id);
Packit Service 584ef9
    pthread_mutex_unlock(&slabs_lock);
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
void slabs_stats(ADD_STAT add_stats, void *c) {
Packit Service 584ef9
    pthread_mutex_lock(&slabs_lock);
Packit Service 584ef9
    do_slabs_stats(add_stats, c);
Packit Service 584ef9
    pthread_mutex_unlock(&slabs_lock);
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
static bool do_slabs_adjust_mem_limit(size_t new_mem_limit) {
Packit Service 584ef9
    /* Cannot adjust memory limit at runtime if prealloc'ed */
Packit Service 584ef9
    if (mem_base != NULL)
Packit Service 584ef9
        return false;
Packit Service 584ef9
    settings.maxbytes = new_mem_limit;
Packit Service 584ef9
    mem_limit = new_mem_limit;
Packit Service 584ef9
    mem_limit_reached = false; /* Will reset on next alloc */
Packit Service 584ef9
    memory_release(); /* free what might already be in the global pool */
Packit Service 584ef9
    return true;
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
bool slabs_adjust_mem_limit(size_t new_mem_limit) {
Packit Service 584ef9
    bool ret;
Packit Service 584ef9
    pthread_mutex_lock(&slabs_lock);
Packit Service 584ef9
    ret = do_slabs_adjust_mem_limit(new_mem_limit);
Packit Service 584ef9
    pthread_mutex_unlock(&slabs_lock);
Packit Service 584ef9
    return ret;
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
unsigned int slabs_available_chunks(const unsigned int id, bool *mem_flag,
Packit Service 584ef9
        unsigned int *chunks_perslab) {
Packit Service 584ef9
    unsigned int ret;
Packit Service 584ef9
    slabclass_t *p;
Packit Service 584ef9
Packit Service 584ef9
    pthread_mutex_lock(&slabs_lock);
Packit Service 584ef9
    p = &slabclass[id];
Packit Service 584ef9
    ret = p->sl_curr;
Packit Service 584ef9
    if (mem_flag != NULL)
Packit Service 584ef9
        *mem_flag = mem_malloced >= mem_limit ? true : false;
Packit Service 584ef9
    if (chunks_perslab != NULL)
Packit Service 584ef9
        *chunks_perslab = p->perslab;
Packit Service 584ef9
    pthread_mutex_unlock(&slabs_lock);
Packit Service 584ef9
    return ret;
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
/* The slabber system could avoid needing to understand much, if anything,
Packit Service 584ef9
 * about items if callbacks were strategically used. Due to how the slab mover
Packit Service 584ef9
 * works, certain flag bits can only be adjusted while holding the slabs lock.
Packit Service 584ef9
 * Using these functions, isolate sections of code needing this and turn them
Packit Service 584ef9
 * into callbacks when an interface becomes more obvious.
Packit Service 584ef9
 */
Packit Service 584ef9
void slabs_mlock(void) {
Packit Service 584ef9
    pthread_mutex_lock(&slabs_lock);
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
void slabs_munlock(void) {
Packit Service 584ef9
    pthread_mutex_unlock(&slabs_lock);
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
static pthread_cond_t slab_rebalance_cond = PTHREAD_COND_INITIALIZER;
Packit Service 584ef9
static volatile int do_run_slab_rebalance_thread = 1;
Packit Service 584ef9
Packit Service 584ef9
static int slab_rebalance_start(void) {
Packit Service 584ef9
    slabclass_t *s_cls;
Packit Service 584ef9
    int no_go = 0;
Packit Service 584ef9
Packit Service 584ef9
    pthread_mutex_lock(&slabs_lock);
Packit Service 584ef9
Packit Service 584ef9
    if (slab_rebal.s_clsid < SLAB_GLOBAL_PAGE_POOL ||
Packit Service 584ef9
        slab_rebal.s_clsid > power_largest  ||
Packit Service 584ef9
        slab_rebal.d_clsid < SLAB_GLOBAL_PAGE_POOL ||
Packit Service 584ef9
        slab_rebal.d_clsid > power_largest  ||
Packit Service 584ef9
        slab_rebal.s_clsid == slab_rebal.d_clsid)
Packit Service 584ef9
        no_go = -2;
Packit Service 584ef9
Packit Service 584ef9
    s_cls = &slabclass[slab_rebal.s_clsid];
Packit Service 584ef9
Packit Service 584ef9
    if (!grow_slab_list(slab_rebal.d_clsid)) {
Packit Service 584ef9
        no_go = -1;
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
    if (s_cls->slabs < 2)
Packit Service 584ef9
        no_go = -3;
Packit Service 584ef9
Packit Service 584ef9
    if (no_go != 0) {
Packit Service 584ef9
        pthread_mutex_unlock(&slabs_lock);
Packit Service 584ef9
        return no_go; /* Should use a wrapper function... */
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
    /* Always kill the first available slab page as it is most likely to
Packit Service 584ef9
     * contain the oldest items
Packit Service 584ef9
     */
Packit Service 584ef9
    slab_rebal.slab_start = s_cls->slab_list[0];
Packit Service 584ef9
    slab_rebal.slab_end   = (char *)slab_rebal.slab_start +
Packit Service 584ef9
        (s_cls->size * s_cls->perslab);
Packit Service 584ef9
    slab_rebal.slab_pos   = slab_rebal.slab_start;
Packit Service 584ef9
    slab_rebal.done       = 0;
Packit Service 584ef9
    // Don't need to do chunk move work if page is in global pool.
Packit Service 584ef9
    if (slab_rebal.s_clsid == SLAB_GLOBAL_PAGE_POOL) {
Packit Service 584ef9
        slab_rebal.done = 1;
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
    // Bit-vector to keep track of completed chunks
Packit Service 584ef9
    slab_rebal.completed = (uint8_t*)calloc(s_cls->perslab,sizeof(uint8_t));
Packit Service 584ef9
Packit Service 584ef9
    slab_rebalance_signal = 2;
Packit Service 584ef9
Packit Service 584ef9
    if (settings.verbose > 1) {
Packit Service 584ef9
        fprintf(stderr, "Started a slab rebalance\n");
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
    pthread_mutex_unlock(&slabs_lock);
Packit Service 584ef9
Packit Service 584ef9
    STATS_LOCK();
Packit Service 584ef9
    stats_state.slab_reassign_running = true;
Packit Service 584ef9
    STATS_UNLOCK();
Packit Service 584ef9
Packit Service 584ef9
    return 0;
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
/* CALLED WITH slabs_lock HELD */
Packit Service 584ef9
static void *slab_rebalance_alloc(const size_t size, unsigned int id) {
Packit Service 584ef9
    slabclass_t *s_cls;
Packit Service 584ef9
    s_cls = &slabclass[slab_rebal.s_clsid];
Packit Service 584ef9
    int x;
Packit Service 584ef9
    item *new_it = NULL;
Packit Service 584ef9
Packit Service 584ef9
    for (x = 0; x < s_cls->perslab; x++) {
Packit Service 584ef9
        new_it = do_slabs_alloc(size, id, SLABS_ALLOC_NO_NEWPAGE);
Packit Service 584ef9
        /* check that memory isn't within the range to clear */
Packit Service 584ef9
        if (new_it == NULL) {
Packit Service 584ef9
            break;
Packit Service 584ef9
        }
Packit Service 584ef9
        if ((void *)new_it >= slab_rebal.slab_start
Packit Service 584ef9
            && (void *)new_it < slab_rebal.slab_end) {
Packit Service 584ef9
            /* Pulled something we intend to free. Mark it as freed since
Packit Service 584ef9
             * we've already done the work of unlinking it from the freelist.
Packit Service 584ef9
             */
Packit Service 584ef9
            new_it->refcount = 0;
Packit Service 584ef9
            new_it->it_flags = ITEM_SLABBED|ITEM_FETCHED;
Packit Service 584ef9
#ifdef DEBUG_SLAB_MOVER
Packit Service 584ef9
            memcpy(ITEM_key(new_it), "deadbeef", 8);
Packit Service 584ef9
#endif
Packit Service 584ef9
            new_it = NULL;
Packit Service 584ef9
            slab_rebal.inline_reclaim++;
Packit Service 584ef9
        } else {
Packit Service 584ef9
            break;
Packit Service 584ef9
        }
Packit Service 584ef9
    }
Packit Service 584ef9
    return new_it;
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
/* CALLED WITH slabs_lock HELD */
Packit Service 584ef9
/* detaches item/chunk from freelist. */
Packit Service 584ef9
static void slab_rebalance_cut_free(slabclass_t *s_cls, item *it) {
Packit Service 584ef9
    /* Ensure this was on the freelist and nothing else. */
Packit Service 584ef9
    assert(it->it_flags == ITEM_SLABBED);
Packit Service 584ef9
    if (s_cls->slots == it) {
Packit Service 584ef9
        s_cls->slots = it->next;
Packit Service 584ef9
    }
Packit Service 584ef9
    if (it->next) it->next->prev = it->prev;
Packit Service 584ef9
    if (it->prev) it->prev->next = it->next;
Packit Service 584ef9
    s_cls->sl_curr--;
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
enum move_status {
Packit Service 584ef9
    MOVE_PASS=0, MOVE_FROM_SLAB, MOVE_FROM_LRU, MOVE_BUSY, MOVE_LOCKED
Packit Service 584ef9
};
Packit Service 584ef9
Packit Service 584ef9
#define SLAB_MOVE_MAX_LOOPS 1000
Packit Service 584ef9
Packit Service 584ef9
/* refcount == 0 is safe since nobody can incr while item_lock is held.
Packit Service 584ef9
 * refcount != 0 is impossible since flags/etc can be modified in other
Packit Service 584ef9
 * threads. instead, note we found a busy one and bail. logic in do_item_get
Packit Service 584ef9
 * will prevent busy items from continuing to be busy
Packit Service 584ef9
 * NOTE: This is checking it_flags outside of an item lock. I believe this
Packit Service 584ef9
 * works since it_flags is 8 bits, and we're only ever comparing a single bit
Packit Service 584ef9
 * regardless. ITEM_SLABBED bit will always be correct since we're holding the
Packit Service 584ef9
 * lock which modifies that bit. ITEM_LINKED won't exist if we're between an
Packit Service 584ef9
 * item having ITEM_SLABBED removed, and the key hasn't been added to the item
Packit Service 584ef9
 * yet. The memory barrier from the slabs lock should order the key write and the
Packit Service 584ef9
 * flags to the item?
Packit Service 584ef9
 * If ITEM_LINKED did exist and was just removed, but we still see it, that's
Packit Service 584ef9
 * still safe since it will have a valid key, which we then lock, and then
Packit Service 584ef9
 * recheck everything.
Packit Service 584ef9
 * This may not be safe on all platforms; If not, slabs_alloc() will need to
Packit Service 584ef9
 * seed the item key while holding slabs_lock.
Packit Service 584ef9
 */
Packit Service 584ef9
static int slab_rebalance_move(void) {
Packit Service 584ef9
    slabclass_t *s_cls;
Packit Service 584ef9
    int was_busy = 0;
Packit Service 584ef9
    int refcount = 0;
Packit Service 584ef9
    uint32_t hv;
Packit Service 584ef9
    void *hold_lock;
Packit Service 584ef9
    enum move_status status = MOVE_PASS;
Packit Service 584ef9
Packit Service 584ef9
    s_cls = &slabclass[slab_rebal.s_clsid];
Packit Service 584ef9
    // the offset to check if completed or not
Packit Service 584ef9
    int offset = ((char*)slab_rebal.slab_pos-(char*)slab_rebal.slab_start)/(s_cls->size);
Packit Service 584ef9
Packit Service 584ef9
    // skip acquiring the slabs lock for items we've already fully processed.
Packit Service 584ef9
    if (slab_rebal.completed[offset] == 0) {
Packit Service 584ef9
        pthread_mutex_lock(&slabs_lock);
Packit Service 584ef9
        hv = 0;
Packit Service 584ef9
        hold_lock = NULL;
Packit Service 584ef9
        item *it = slab_rebal.slab_pos;
Packit Service 584ef9
Packit Service 584ef9
        item_chunk *ch = NULL;
Packit Service 584ef9
        status = MOVE_PASS;
Packit Service 584ef9
Packit Service 584ef9
        if (it->it_flags & ITEM_CHUNK) {
Packit Service 584ef9
            /* This chunk is a chained part of a larger item. */
Packit Service 584ef9
            ch = (item_chunk *) it;
Packit Service 584ef9
            /* Instead, we use the head chunk to find the item and effectively
Packit Service 584ef9
             * lock the entire structure. If a chunk has ITEM_CHUNK flag, its
Packit Service 584ef9
             * head cannot be slabbed, so the normal routine is safe. */
Packit Service 584ef9
            it = ch->head;
Packit Service 584ef9
            assert(it->it_flags & ITEM_CHUNKED);
Packit Service 584ef9
        }
Packit Service 584ef9
Packit Service 584ef9
        /* ITEM_FETCHED when ITEM_SLABBED is overloaded to mean we've cleared
Packit Service 584ef9
         * the chunk for move. Only these two flags should exist.
Packit Service 584ef9
         */
Packit Service 584ef9
        if (it->it_flags != (ITEM_SLABBED|ITEM_FETCHED)) {
Packit Service 584ef9
            /* ITEM_SLABBED can only be added/removed under the slabs_lock */
Packit Service 584ef9
            if (it->it_flags & ITEM_SLABBED) {
Packit Service 584ef9
                assert(ch == NULL);
Packit Service 584ef9
                slab_rebalance_cut_free(s_cls, it);
Packit Service 584ef9
                status = MOVE_FROM_SLAB;
Packit Service 584ef9
            } else if ((it->it_flags & ITEM_LINKED) != 0) {
Packit Service 584ef9
                /* If it doesn't have ITEM_SLABBED, the item could be in any
Packit Service 584ef9
                 * state on its way to being freed or written to. If no
Packit Service 584ef9
                 * ITEM_SLABBED, but it's had ITEM_LINKED, it must be active
Packit Service 584ef9
                 * and have the key written to it already.
Packit Service 584ef9
                 */
Packit Service 584ef9
                hv = hash(ITEM_key(it), it->nkey);
Packit Service 584ef9
                if ((hold_lock = item_trylock(hv)) == NULL) {
Packit Service 584ef9
                    status = MOVE_LOCKED;
Packit Service 584ef9
                } else {
Packit Service 584ef9
                    bool is_linked = (it->it_flags & ITEM_LINKED);
Packit Service 584ef9
                    refcount = refcount_incr(it);
Packit Service 584ef9
                    if (refcount == 2) { /* item is linked but not busy */
Packit Service 584ef9
                        /* Double check ITEM_LINKED flag here, since we're
Packit Service 584ef9
                         * past a memory barrier from the mutex. */
Packit Service 584ef9
                        if (is_linked) {
Packit Service 584ef9
                            status = MOVE_FROM_LRU;
Packit Service 584ef9
                        } else {
Packit Service 584ef9
                            /* refcount == 1 + !ITEM_LINKED means the item is being
Packit Service 584ef9
                             * uploaded to, or was just unlinked but hasn't been freed
Packit Service 584ef9
                             * yet. Let it bleed off on its own and try again later */
Packit Service 584ef9
                            status = MOVE_BUSY;
Packit Service 584ef9
                        }
Packit Service 584ef9
                    } else if (refcount > 2 && is_linked) {
Packit Service 584ef9
                        // TODO: Mark items for delete/rescue and process
Packit Service 584ef9
                        // outside of the main loop.
Packit Service 584ef9
                        if (slab_rebal.busy_loops > SLAB_MOVE_MAX_LOOPS) {
Packit Service 584ef9
                            slab_rebal.busy_deletes++;
Packit Service 584ef9
                            // Only safe to hold slabs lock because refcount
Packit Service 584ef9
                            // can't drop to 0 until we release item lock.
Packit Service 584ef9
                            STORAGE_delete(storage, it);
Packit Service 584ef9
                            pthread_mutex_unlock(&slabs_lock);
Packit Service 584ef9
                            do_item_unlink(it, hv);
Packit Service 584ef9
                            pthread_mutex_lock(&slabs_lock);
Packit Service 584ef9
                        }
Packit Service 584ef9
                        status = MOVE_BUSY;
Packit Service 584ef9
                    } else {
Packit Service 584ef9
                        if (settings.verbose > 2) {
Packit Service 584ef9
                            fprintf(stderr, "Slab reassign hit a busy item: refcount: %d (%d -> %d)\n",
Packit Service 584ef9
                                it->refcount, slab_rebal.s_clsid, slab_rebal.d_clsid);
Packit Service 584ef9
                        }
Packit Service 584ef9
                        status = MOVE_BUSY;
Packit Service 584ef9
                    }
Packit Service 584ef9
                    /* Item lock must be held while modifying refcount */
Packit Service 584ef9
                    if (status == MOVE_BUSY) {
Packit Service 584ef9
                        refcount_decr(it);
Packit Service 584ef9
                        item_trylock_unlock(hold_lock);
Packit Service 584ef9
                    }
Packit Service 584ef9
                }
Packit Service 584ef9
            } else {
Packit Service 584ef9
                /* See above comment. No ITEM_SLABBED or ITEM_LINKED. Mark
Packit Service 584ef9
                 * busy and wait for item to complete its upload. */
Packit Service 584ef9
                status = MOVE_BUSY;
Packit Service 584ef9
            }
Packit Service 584ef9
        }
Packit Service 584ef9
Packit Service 584ef9
        int save_item = 0;
Packit Service 584ef9
        item *new_it = NULL;
Packit Service 584ef9
        size_t ntotal = 0;
Packit Service 584ef9
        switch (status) {
Packit Service 584ef9
            case MOVE_FROM_LRU:
Packit Service 584ef9
                /* Lock order is LRU locks -> slabs_lock. unlink uses LRU lock.
Packit Service 584ef9
                 * We only need to hold the slabs_lock while initially looking
Packit Service 584ef9
                 * at an item, and at this point we have an exclusive refcount
Packit Service 584ef9
                 * (2) + the item is locked. Drop slabs lock, drop item to
Packit Service 584ef9
                 * refcount 1 (just our own, then fall through and wipe it
Packit Service 584ef9
                 */
Packit Service 584ef9
                /* Check if expired or flushed */
Packit Service 584ef9
                ntotal = ITEM_ntotal(it);
Packit Service 584ef9
#ifdef EXTSTORE
Packit Service 584ef9
                if (it->it_flags & ITEM_HDR) {
Packit Service 584ef9
                    ntotal = (ntotal - it->nbytes) + sizeof(item_hdr);
Packit Service 584ef9
                }
Packit Service 584ef9
#endif
Packit Service 584ef9
                /* REQUIRES slabs_lock: CHECK FOR cls->sl_curr > 0 */
Packit Service 584ef9
                if (ch == NULL && (it->it_flags & ITEM_CHUNKED)) {
Packit Service 584ef9
                    /* Chunked should be identical to non-chunked, except we need
Packit Service 584ef9
                     * to swap out ntotal for the head-chunk-total. */
Packit Service 584ef9
                    ntotal = s_cls->size;
Packit Service 584ef9
                }
Packit Service 584ef9
                if ((it->exptime != 0 && it->exptime < current_time)
Packit Service 584ef9
                    || item_is_flushed(it)) {
Packit Service 584ef9
                    /* Expired, don't save. */
Packit Service 584ef9
                    save_item = 0;
Packit Service 584ef9
                } else if (ch == NULL &&
Packit Service 584ef9
                        (new_it = slab_rebalance_alloc(ntotal, slab_rebal.s_clsid)) == NULL) {
Packit Service 584ef9
                    /* Not a chunk of an item, and nomem. */
Packit Service 584ef9
                    save_item = 0;
Packit Service 584ef9
                    slab_rebal.evictions_nomem++;
Packit Service 584ef9
                } else if (ch != NULL &&
Packit Service 584ef9
                        (new_it = slab_rebalance_alloc(s_cls->size, slab_rebal.s_clsid)) == NULL) {
Packit Service 584ef9
                    /* Is a chunk of an item, and nomem. */
Packit Service 584ef9
                    save_item = 0;
Packit Service 584ef9
                    slab_rebal.evictions_nomem++;
Packit Service 584ef9
                } else {
Packit Service 584ef9
                    /* Was whatever it was, and we have memory for it. */
Packit Service 584ef9
                    save_item = 1;
Packit Service 584ef9
                }
Packit Service 584ef9
                pthread_mutex_unlock(&slabs_lock);
Packit Service 584ef9
                if (save_item) {
Packit Service 584ef9
                    if (ch == NULL) {
Packit Service 584ef9
                        assert((new_it->it_flags & ITEM_CHUNKED) == 0);
Packit Service 584ef9
                        /* if free memory, memcpy. clear prev/next/h_bucket */
Packit Service 584ef9
                        memcpy(new_it, it, ntotal);
Packit Service 584ef9
                        new_it->prev = 0;
Packit Service 584ef9
                        new_it->next = 0;
Packit Service 584ef9
                        new_it->h_next = 0;
Packit Service 584ef9
                        /* These are definitely required. else fails assert */
Packit Service 584ef9
                        new_it->it_flags &= ~ITEM_LINKED;
Packit Service 584ef9
                        new_it->refcount = 0;
Packit Service 584ef9
                        do_item_replace(it, new_it, hv);
Packit Service 584ef9
                        /* Need to walk the chunks and repoint head  */
Packit Service 584ef9
                        if (new_it->it_flags & ITEM_CHUNKED) {
Packit Service 584ef9
                            item_chunk *fch = (item_chunk *) ITEM_schunk(new_it);
Packit Service 584ef9
                            fch->next->prev = fch;
Packit Service 584ef9
                            while (fch) {
Packit Service 584ef9
                                fch->head = new_it;
Packit Service 584ef9
                                fch = fch->next;
Packit Service 584ef9
                            }
Packit Service 584ef9
                        }
Packit Service 584ef9
                        it->refcount = 0;
Packit Service 584ef9
                        it->it_flags = ITEM_SLABBED|ITEM_FETCHED;
Packit Service 584ef9
#ifdef DEBUG_SLAB_MOVER
Packit Service 584ef9
                        memcpy(ITEM_key(it), "deadbeef", 8);
Packit Service 584ef9
#endif
Packit Service 584ef9
                        slab_rebal.rescues++;
Packit Service 584ef9
                    } else {
Packit Service 584ef9
                        item_chunk *nch = (item_chunk *) new_it;
Packit Service 584ef9
                        /* Chunks always have head chunk (the main it) */
Packit Service 584ef9
                        ch->prev->next = nch;
Packit Service 584ef9
                        if (ch->next)
Packit Service 584ef9
                            ch->next->prev = nch;
Packit Service 584ef9
                        memcpy(nch, ch, ch->used + sizeof(item_chunk));
Packit Service 584ef9
                        ch->refcount = 0;
Packit Service 584ef9
                        ch->it_flags = ITEM_SLABBED|ITEM_FETCHED;
Packit Service 584ef9
                        slab_rebal.chunk_rescues++;
Packit Service 584ef9
#ifdef DEBUG_SLAB_MOVER
Packit Service 584ef9
                        memcpy(ITEM_key((item *)ch), "deadbeef", 8);
Packit Service 584ef9
#endif
Packit Service 584ef9
                        refcount_decr(it);
Packit Service 584ef9
                    }
Packit Service 584ef9
                    slab_rebal.completed[offset] = 1;
Packit Service 584ef9
                } else {
Packit Service 584ef9
                    /* unlink and mark as done if it's not
Packit Service 584ef9
                     * a chunked item as they require more book-keeping) */
Packit Service 584ef9
                    STORAGE_delete(storage, it);
Packit Service 584ef9
                    if (!ch && (it->it_flags & ITEM_CHUNKED) == 0) {
Packit Service 584ef9
                        do_item_unlink(it, hv);
Packit Service 584ef9
                        it->it_flags = ITEM_SLABBED|ITEM_FETCHED;
Packit Service 584ef9
                        it->refcount = 0;
Packit Service 584ef9
#ifdef DEBUG_SLAB_MOVER
Packit Service 584ef9
                        memcpy(ITEM_key(it), "deadbeef", 8);
Packit Service 584ef9
#endif
Packit Service 584ef9
                        slab_rebal.completed[offset] = 1;
Packit Service 584ef9
                    } else {
Packit Service 584ef9
                        ntotal = ITEM_ntotal(it);
Packit Service 584ef9
                        do_item_unlink(it, hv);
Packit Service 584ef9
                        slabs_free(it, ntotal, slab_rebal.s_clsid);
Packit Service 584ef9
                        /* Swing around again later to remove it from the freelist. */
Packit Service 584ef9
                        slab_rebal.busy_items++;
Packit Service 584ef9
                        was_busy++;
Packit Service 584ef9
                    }
Packit Service 584ef9
Packit Service 584ef9
                }
Packit Service 584ef9
                item_trylock_unlock(hold_lock);
Packit Service 584ef9
                pthread_mutex_lock(&slabs_lock);
Packit Service 584ef9
                /* Always remove the ntotal, as we added it in during
Packit Service 584ef9
                 * do_slabs_alloc() when copying the item.
Packit Service 584ef9
                 */
Packit Service 584ef9
                break;
Packit Service 584ef9
            case MOVE_FROM_SLAB:
Packit Service 584ef9
                slab_rebal.completed[offset] = 1;
Packit Service 584ef9
                it->refcount = 0;
Packit Service 584ef9
                it->it_flags = ITEM_SLABBED|ITEM_FETCHED;
Packit Service 584ef9
#ifdef DEBUG_SLAB_MOVER
Packit Service 584ef9
                memcpy(ITEM_key(it), "deadbeef", 8);
Packit Service 584ef9
#endif
Packit Service 584ef9
                break;
Packit Service 584ef9
            case MOVE_BUSY:
Packit Service 584ef9
            case MOVE_LOCKED:
Packit Service 584ef9
                slab_rebal.busy_items++;
Packit Service 584ef9
                was_busy++;
Packit Service 584ef9
                break;
Packit Service 584ef9
            case MOVE_PASS:
Packit Service 584ef9
                break;
Packit Service 584ef9
        }
Packit Service 584ef9
Packit Service 584ef9
        pthread_mutex_unlock(&slabs_lock);
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
    // Note: slab_rebal.* is occasionally protected under slabs_lock, but
Packit Service 584ef9
    // the mover thread is the only user while active: so it's only necessary
Packit Service 584ef9
    // for start/stop synchronization.
Packit Service 584ef9
    slab_rebal.slab_pos = (char *)slab_rebal.slab_pos + s_cls->size;
Packit Service 584ef9
Packit Service 584ef9
    if (slab_rebal.slab_pos >= slab_rebal.slab_end) {
Packit Service 584ef9
        /* Some items were busy, start again from the top */
Packit Service 584ef9
        if (slab_rebal.busy_items) {
Packit Service 584ef9
            slab_rebal.slab_pos = slab_rebal.slab_start;
Packit Service 584ef9
            STATS_LOCK();
Packit Service 584ef9
            stats.slab_reassign_busy_items += slab_rebal.busy_items;
Packit Service 584ef9
            STATS_UNLOCK();
Packit Service 584ef9
            slab_rebal.busy_items = 0;
Packit Service 584ef9
            slab_rebal.busy_loops++;
Packit Service 584ef9
        } else {
Packit Service 584ef9
            slab_rebal.done++;
Packit Service 584ef9
        }
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
    return was_busy;
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
static void slab_rebalance_finish(void) {
Packit Service 584ef9
    slabclass_t *s_cls;
Packit Service 584ef9
    slabclass_t *d_cls;
Packit Service 584ef9
    int x;
Packit Service 584ef9
    uint32_t rescues;
Packit Service 584ef9
    uint32_t evictions_nomem;
Packit Service 584ef9
    uint32_t inline_reclaim;
Packit Service 584ef9
    uint32_t chunk_rescues;
Packit Service 584ef9
    uint32_t busy_deletes;
Packit Service 584ef9
Packit Service 584ef9
    pthread_mutex_lock(&slabs_lock);
Packit Service 584ef9
Packit Service 584ef9
    s_cls = &slabclass[slab_rebal.s_clsid];
Packit Service 584ef9
    d_cls = &slabclass[slab_rebal.d_clsid];
Packit Service 584ef9
Packit Service 584ef9
#ifdef DEBUG_SLAB_MOVER
Packit Service 584ef9
    /* If the algorithm is broken, live items can sneak in. */
Packit Service 584ef9
    slab_rebal.slab_pos = slab_rebal.slab_start;
Packit Service 584ef9
    while (1) {
Packit Service 584ef9
        item *it = slab_rebal.slab_pos;
Packit Service 584ef9
        assert(it->it_flags == (ITEM_SLABBED|ITEM_FETCHED));
Packit Service 584ef9
        assert(memcmp(ITEM_key(it), "deadbeef", 8) == 0);
Packit Service 584ef9
        it->it_flags = ITEM_SLABBED|ITEM_FETCHED;
Packit Service 584ef9
        slab_rebal.slab_pos = (char *)slab_rebal.slab_pos + s_cls->size;
Packit Service 584ef9
        if (slab_rebal.slab_pos >= slab_rebal.slab_end)
Packit Service 584ef9
            break;
Packit Service 584ef9
    }
Packit Service 584ef9
#endif
Packit Service 584ef9
Packit Service 584ef9
    /* At this point the stolen slab is completely clear.
Packit Service 584ef9
     * We always kill the "first"/"oldest" slab page in the slab_list, so
Packit Service 584ef9
     * shuffle the page list backwards and decrement.
Packit Service 584ef9
     */
Packit Service 584ef9
    s_cls->slabs--;
Packit Service 584ef9
    for (x = 0; x < s_cls->slabs; x++) {
Packit Service 584ef9
        s_cls->slab_list[x] = s_cls->slab_list[x+1];
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
    d_cls->slab_list[d_cls->slabs++] = slab_rebal.slab_start;
Packit Service 584ef9
    /* Don't need to split the page into chunks if we're just storing it */
Packit Service 584ef9
    if (slab_rebal.d_clsid > SLAB_GLOBAL_PAGE_POOL) {
Packit Service 584ef9
        memset(slab_rebal.slab_start, 0, (size_t)settings.slab_page_size);
Packit Service 584ef9
        split_slab_page_into_freelist(slab_rebal.slab_start,
Packit Service 584ef9
            slab_rebal.d_clsid);
Packit Service 584ef9
    } else if (slab_rebal.d_clsid == SLAB_GLOBAL_PAGE_POOL) {
Packit Service 584ef9
        /* memset just enough to signal restart handler to skip */
Packit Service 584ef9
        memset(slab_rebal.slab_start, 0, sizeof(item));
Packit Service 584ef9
        /* mem_malloc'ed might be higher than mem_limit. */
Packit Service 584ef9
        mem_limit_reached = false;
Packit Service 584ef9
        memory_release();
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
    slab_rebal.busy_loops = 0;
Packit Service 584ef9
    slab_rebal.done       = 0;
Packit Service 584ef9
    slab_rebal.s_clsid    = 0;
Packit Service 584ef9
    slab_rebal.d_clsid    = 0;
Packit Service 584ef9
    slab_rebal.slab_start = NULL;
Packit Service 584ef9
    slab_rebal.slab_end   = NULL;
Packit Service 584ef9
    slab_rebal.slab_pos   = NULL;
Packit Service 584ef9
    evictions_nomem    = slab_rebal.evictions_nomem;
Packit Service 584ef9
    inline_reclaim = slab_rebal.inline_reclaim;
Packit Service 584ef9
    rescues   = slab_rebal.rescues;
Packit Service 584ef9
    chunk_rescues = slab_rebal.chunk_rescues;
Packit Service 584ef9
    busy_deletes = slab_rebal.busy_deletes;
Packit Service 584ef9
    slab_rebal.evictions_nomem    = 0;
Packit Service 584ef9
    slab_rebal.inline_reclaim = 0;
Packit Service 584ef9
    slab_rebal.rescues  = 0;
Packit Service 584ef9
    slab_rebal.chunk_rescues = 0;
Packit Service 584ef9
    slab_rebal.busy_deletes = 0;
Packit Service 584ef9
Packit Service 584ef9
    slab_rebalance_signal = 0;
Packit Service 584ef9
Packit Service 584ef9
    free(slab_rebal.completed);
Packit Service 584ef9
    pthread_mutex_unlock(&slabs_lock);
Packit Service 584ef9
Packit Service 584ef9
    STATS_LOCK();
Packit Service 584ef9
    stats.slabs_moved++;
Packit Service 584ef9
    stats.slab_reassign_rescues += rescues;
Packit Service 584ef9
    stats.slab_reassign_evictions_nomem += evictions_nomem;
Packit Service 584ef9
    stats.slab_reassign_inline_reclaim += inline_reclaim;
Packit Service 584ef9
    stats.slab_reassign_chunk_rescues += chunk_rescues;
Packit Service 584ef9
    stats.slab_reassign_busy_deletes += busy_deletes;
Packit Service 584ef9
    stats_state.slab_reassign_running = false;
Packit Service 584ef9
    STATS_UNLOCK();
Packit Service 584ef9
Packit Service 584ef9
    if (settings.verbose > 1) {
Packit Service 584ef9
        fprintf(stderr, "finished a slab move\n");
Packit Service 584ef9
    }
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
/* Slab mover thread.
Packit Service 584ef9
 * Sits waiting for a condition to jump off and shovel some memory about
Packit Service 584ef9
 */
Packit Service 584ef9
static void *slab_rebalance_thread(void *arg) {
Packit Service 584ef9
    int was_busy = 0;
Packit Service 584ef9
    int backoff_timer = 1;
Packit Service 584ef9
    int backoff_max = 1000;
Packit Service 584ef9
    /* So we first pass into cond_wait with the mutex held */
Packit Service 584ef9
    mutex_lock(&slabs_rebalance_lock);
Packit Service 584ef9
Packit Service 584ef9
    /* Must finish moving page before stopping */
Packit Service 584ef9
    while (slab_rebalance_signal || do_run_slab_rebalance_thread) {
Packit Service 584ef9
        if (slab_rebalance_signal == 1) {
Packit Service 584ef9
            if (slab_rebalance_start() < 0) {
Packit Service 584ef9
                /* Handle errors with more specificity as required. */
Packit Service 584ef9
                slab_rebalance_signal = 0;
Packit Service 584ef9
            }
Packit Service 584ef9
Packit Service 584ef9
            was_busy = 0;
Packit Service 584ef9
        } else if (slab_rebalance_signal && slab_rebal.slab_start != NULL) {
Packit Service 584ef9
            was_busy = slab_rebalance_move();
Packit Service 584ef9
        }
Packit Service 584ef9
Packit Service 584ef9
        if (slab_rebal.done) {
Packit Service 584ef9
            slab_rebalance_finish();
Packit Service 584ef9
        } else if (was_busy) {
Packit Service 584ef9
            /* Stuck waiting for some items to unlock, so slow down a bit
Packit Service 584ef9
             * to give them a chance to free up */
Packit Service 584ef9
            usleep(backoff_timer);
Packit Service 584ef9
            backoff_timer = backoff_timer * 2;
Packit Service 584ef9
            if (backoff_timer > backoff_max)
Packit Service 584ef9
                backoff_timer = backoff_max;
Packit Service 584ef9
        }
Packit Service 584ef9
Packit Service 584ef9
        if (slab_rebalance_signal == 0) {
Packit Service 584ef9
            /* always hold this lock while we're running */
Packit Service 584ef9
            pthread_cond_wait(&slab_rebalance_cond, &slabs_rebalance_lock);
Packit Service 584ef9
        }
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
    // TODO: cancel in-flight slab page move
Packit Service 584ef9
    mutex_unlock(&slabs_rebalance_lock);
Packit Service 584ef9
    return NULL;
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
/* Iterate at most once through the slab classes and pick a "random" source.
Packit Service 584ef9
 * I like this better than calling rand() since rand() is slow enough that we
Packit Service 584ef9
 * can just check all of the classes once instead.
Packit Service 584ef9
 */
Packit Service 584ef9
static int slabs_reassign_pick_any(int dst) {
Packit Service 584ef9
    static int cur = POWER_SMALLEST - 1;
Packit Service 584ef9
    int tries = power_largest - POWER_SMALLEST + 1;
Packit Service 584ef9
    for (; tries > 0; tries--) {
Packit Service 584ef9
        cur++;
Packit Service 584ef9
        if (cur > power_largest)
Packit Service 584ef9
            cur = POWER_SMALLEST;
Packit Service 584ef9
        if (cur == dst)
Packit Service 584ef9
            continue;
Packit Service 584ef9
        if (slabclass[cur].slabs > 1) {
Packit Service 584ef9
            return cur;
Packit Service 584ef9
        }
Packit Service 584ef9
    }
Packit Service 584ef9
    return -1;
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
static enum reassign_result_type do_slabs_reassign(int src, int dst) {
Packit Service 584ef9
    bool nospare = false;
Packit Service 584ef9
    if (slab_rebalance_signal != 0)
Packit Service 584ef9
        return REASSIGN_RUNNING;
Packit Service 584ef9
Packit Service 584ef9
    if (src == dst)
Packit Service 584ef9
        return REASSIGN_SRC_DST_SAME;
Packit Service 584ef9
Packit Service 584ef9
    /* Special indicator to choose ourselves. */
Packit Service 584ef9
    if (src == -1) {
Packit Service 584ef9
        src = slabs_reassign_pick_any(dst);
Packit Service 584ef9
        /* TODO: If we end up back at -1, return a new error type */
Packit Service 584ef9
    }
Packit Service 584ef9
Packit Service 584ef9
    if (src < SLAB_GLOBAL_PAGE_POOL || src > power_largest ||
Packit Service 584ef9
        dst < SLAB_GLOBAL_PAGE_POOL || dst > power_largest)
Packit Service 584ef9
        return REASSIGN_BADCLASS;
Packit Service 584ef9
Packit Service 584ef9
    pthread_mutex_lock(&slabs_lock);
Packit Service 584ef9
    if (slabclass[src].slabs < 2)
Packit Service 584ef9
        nospare = true;
Packit Service 584ef9
    pthread_mutex_unlock(&slabs_lock);
Packit Service 584ef9
    if (nospare)
Packit Service 584ef9
        return REASSIGN_NOSPARE;
Packit Service 584ef9
Packit Service 584ef9
    slab_rebal.s_clsid = src;
Packit Service 584ef9
    slab_rebal.d_clsid = dst;
Packit Service 584ef9
Packit Service 584ef9
    slab_rebalance_signal = 1;
Packit Service 584ef9
    pthread_cond_signal(&slab_rebalance_cond);
Packit Service 584ef9
Packit Service 584ef9
    return REASSIGN_OK;
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
enum reassign_result_type slabs_reassign(int src, int dst) {
Packit Service 584ef9
    enum reassign_result_type ret;
Packit Service 584ef9
    if (pthread_mutex_trylock(&slabs_rebalance_lock) != 0) {
Packit Service 584ef9
        return REASSIGN_RUNNING;
Packit Service 584ef9
    }
Packit Service 584ef9
    ret = do_slabs_reassign(src, dst);
Packit Service 584ef9
    pthread_mutex_unlock(&slabs_rebalance_lock);
Packit Service 584ef9
    return ret;
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
/* If we hold this lock, rebalancer can't wake up or move */
Packit Service 584ef9
void slabs_rebalancer_pause(void) {
Packit Service 584ef9
    pthread_mutex_lock(&slabs_rebalance_lock);
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
void slabs_rebalancer_resume(void) {
Packit Service 584ef9
    pthread_mutex_unlock(&slabs_rebalance_lock);
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
static pthread_t rebalance_tid;
Packit Service 584ef9
Packit Service 584ef9
int start_slab_maintenance_thread(void) {
Packit Service 584ef9
    int ret;
Packit Service 584ef9
    slab_rebalance_signal = 0;
Packit Service 584ef9
    slab_rebal.slab_start = NULL;
Packit Service 584ef9
Packit Service 584ef9
    if ((ret = pthread_create(&rebalance_tid, NULL,
Packit Service 584ef9
                              slab_rebalance_thread, NULL)) != 0) {
Packit Service 584ef9
        fprintf(stderr, "Can't create rebal thread: %s\n", strerror(ret));
Packit Service 584ef9
        return -1;
Packit Service 584ef9
    }
Packit Service 584ef9
    return 0;
Packit Service 584ef9
}
Packit Service 584ef9
Packit Service 584ef9
/* The maintenance thread is on a sleep/loop cycle, so it should join after a
Packit Service 584ef9
 * short wait */
Packit Service 584ef9
void stop_slab_maintenance_thread(void) {
Packit Service 584ef9
    mutex_lock(&slabs_rebalance_lock);
Packit Service 584ef9
    do_run_slab_rebalance_thread = 0;
Packit Service 584ef9
    pthread_cond_signal(&slab_rebalance_cond);
Packit Service 584ef9
    pthread_mutex_unlock(&slabs_rebalance_lock);
Packit Service 584ef9
Packit Service 584ef9
    /* Wait for the maintenance thread to stop */
Packit Service 584ef9
    pthread_join(rebalance_tid, NULL);
Packit Service 584ef9
}