|
Packit |
4e8bc4 |
/* Copyright 2017 Facebook.
|
|
Packit |
4e8bc4 |
*
|
|
Packit |
4e8bc4 |
* Use and distribution licensed under the BSD license. See
|
|
Packit |
4e8bc4 |
* the LICENSE file for full text.
|
|
Packit |
4e8bc4 |
*/
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
|
Packit |
4e8bc4 |
#include "memcached.h"
|
|
Packit |
4e8bc4 |
#include "slab_automove_extstore.h"
|
|
Packit |
4e8bc4 |
#include <stdlib.h>
|
|
Packit |
4e8bc4 |
#include <string.h>
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
#define MIN_PAGES_FOR_SOURCE 2
|
|
Packit |
4e8bc4 |
#define MIN_PAGES_FOR_RECLAIM 2.5
|
|
Packit |
4e8bc4 |
#define MIN_PAGES_FREE 1.5
|
|
Packit |
4e8bc4 |
#define MEMCHECK_PERIOD 60
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
struct window_data {
|
|
Packit |
4e8bc4 |
uint64_t age;
|
|
Packit |
4e8bc4 |
uint64_t dirty;
|
|
Packit |
4e8bc4 |
uint64_t evicted;
|
|
Packit |
4e8bc4 |
unsigned int excess_free;
|
|
Packit |
4e8bc4 |
unsigned int relaxed;
|
|
Packit |
4e8bc4 |
};
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
struct window_global {
|
|
Packit |
4e8bc4 |
uint32_t pool_low;
|
|
Packit |
4e8bc4 |
uint32_t pool_high;
|
|
Packit |
4e8bc4 |
};
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
typedef struct {
|
|
Packit |
4e8bc4 |
struct window_data *window_data;
|
|
Packit |
4e8bc4 |
struct window_global *window_global;
|
|
Packit |
4e8bc4 |
struct settings *settings;
|
|
Packit |
4e8bc4 |
uint32_t window_size;
|
|
Packit |
4e8bc4 |
uint32_t window_cur;
|
|
Packit |
4e8bc4 |
uint32_t item_size;
|
|
Packit |
4e8bc4 |
rel_time_t last_memcheck_run;
|
|
Packit |
4e8bc4 |
double max_age_ratio;
|
|
Packit |
4e8bc4 |
double free_ratio;
|
|
Packit |
4e8bc4 |
bool pool_filled_once;
|
|
Packit |
4e8bc4 |
unsigned int free_mem[MAX_NUMBER_OF_SLAB_CLASSES];
|
|
Packit |
4e8bc4 |
item_stats_automove iam_before[MAX_NUMBER_OF_SLAB_CLASSES];
|
|
Packit |
4e8bc4 |
item_stats_automove iam_after[MAX_NUMBER_OF_SLAB_CLASSES];
|
|
Packit |
4e8bc4 |
slab_stats_automove sam_before[MAX_NUMBER_OF_SLAB_CLASSES];
|
|
Packit |
4e8bc4 |
slab_stats_automove sam_after[MAX_NUMBER_OF_SLAB_CLASSES];
|
|
Packit |
4e8bc4 |
} slab_automove;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
void *slab_automove_extstore_init(struct settings *settings) {
|
|
Packit |
4e8bc4 |
uint32_t window_size = settings->slab_automove_window;
|
|
Packit |
4e8bc4 |
double max_age_ratio = settings->slab_automove_ratio;
|
|
Packit |
4e8bc4 |
slab_automove *a = calloc(1, sizeof(slab_automove));
|
|
Packit |
4e8bc4 |
if (a == NULL)
|
|
Packit |
4e8bc4 |
return NULL;
|
|
Packit |
4e8bc4 |
a->window_data = calloc(window_size * MAX_NUMBER_OF_SLAB_CLASSES, sizeof(struct window_data));
|
|
Packit |
4e8bc4 |
a->window_global = calloc(window_size, sizeof(struct window_global));
|
|
Packit |
4e8bc4 |
a->window_size = window_size;
|
|
Packit |
4e8bc4 |
a->max_age_ratio = max_age_ratio;
|
|
Packit |
4e8bc4 |
a->free_ratio = settings->slab_automove_freeratio;
|
|
Packit |
4e8bc4 |
a->item_size = settings->ext_item_size;
|
|
Packit |
4e8bc4 |
a->last_memcheck_run = 0;
|
|
Packit |
4e8bc4 |
a->settings = settings;
|
|
Packit |
4e8bc4 |
a->pool_filled_once = false;
|
|
Packit |
4e8bc4 |
if (a->window_data == NULL || a->window_global == NULL) {
|
|
Packit |
4e8bc4 |
if (a->window_data)
|
|
Packit |
4e8bc4 |
free(a->window_data);
|
|
Packit |
4e8bc4 |
if (a->window_global)
|
|
Packit |
4e8bc4 |
free(a->window_global);
|
|
Packit |
4e8bc4 |
free(a);
|
|
Packit |
4e8bc4 |
return NULL;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
// do a dry run to fill the before structs
|
|
Packit |
4e8bc4 |
fill_item_stats_automove(a->iam_before);
|
|
Packit |
4e8bc4 |
fill_slab_stats_automove(a->sam_before);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
return (void *)a;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
void slab_automove_extstore_free(void *arg) {
|
|
Packit |
4e8bc4 |
slab_automove *a = (slab_automove *)arg;
|
|
Packit |
4e8bc4 |
free(a->window_data);
|
|
Packit |
4e8bc4 |
free(a->window_global);
|
|
Packit |
4e8bc4 |
free(a);
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
static void window_sum(struct window_data *wd, struct window_data *w,
|
|
Packit |
4e8bc4 |
uint32_t size) {
|
|
Packit |
4e8bc4 |
for (int x = 0; x < size; x++) {
|
|
Packit |
4e8bc4 |
struct window_data *d = &wd[x];
|
|
Packit |
4e8bc4 |
w->age += d->age;
|
|
Packit |
4e8bc4 |
w->dirty += d->dirty;
|
|
Packit |
4e8bc4 |
w->evicted += d->evicted;
|
|
Packit |
4e8bc4 |
w->excess_free += d->excess_free;
|
|
Packit |
4e8bc4 |
w->relaxed += d->relaxed;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* This could potentially merge with above */
|
|
Packit |
4e8bc4 |
static void window_global_sum(struct window_global *wg,
|
|
Packit |
4e8bc4 |
struct window_global *w, uint32_t size) {
|
|
Packit |
4e8bc4 |
for (int x = 0; x < size; x++) {
|
|
Packit |
4e8bc4 |
struct window_global *d = &wg[x];
|
|
Packit |
4e8bc4 |
w->pool_high += d->pool_high;
|
|
Packit |
4e8bc4 |
w->pool_low += d->pool_low;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
static void global_pool_check(slab_automove *a) {
|
|
Packit |
4e8bc4 |
bool mem_limit_reached;
|
|
Packit |
4e8bc4 |
uint32_t free = a->free_mem[0];
|
|
Packit |
4e8bc4 |
struct window_global *wg = &a->window_global[a->window_cur % a->window_size];
|
|
Packit |
4e8bc4 |
unsigned int count = global_page_pool_size(&mem_limit_reached);
|
|
Packit |
4e8bc4 |
memset(wg, 0, sizeof(struct window_global));
|
|
Packit |
4e8bc4 |
if (!mem_limit_reached)
|
|
Packit |
4e8bc4 |
return;
|
|
Packit |
4e8bc4 |
if (count < free / 2) {
|
|
Packit |
4e8bc4 |
wg->pool_low = 1;
|
|
Packit |
4e8bc4 |
a->pool_filled_once = true;
|
|
Packit |
4e8bc4 |
} else if (count > free) {
|
|
Packit |
4e8bc4 |
wg->pool_high = 1;
|
|
Packit |
4e8bc4 |
} else {
|
|
Packit |
4e8bc4 |
a->pool_filled_once = true;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
/* A percentage of memory is configured to be held "free" as buffers for the
|
|
Packit |
4e8bc4 |
* external storage system.
|
|
Packit |
4e8bc4 |
* % of global memory is desired in the global page pool
|
|
Packit |
4e8bc4 |
* each slab class has a % of free chunks desired based on how much memory is
|
|
Packit |
4e8bc4 |
* currently in the class. This allows time for extstore to flush data when
|
|
Packit |
4e8bc4 |
* spikes or waves of set data arrive.
|
|
Packit |
4e8bc4 |
* The global page pool reserve acts as a secondary buffer for any slab class,
|
|
Packit |
4e8bc4 |
* which helps absorb shifts in which class is active.
|
|
Packit |
4e8bc4 |
*/
|
|
Packit |
4e8bc4 |
static void memcheck(slab_automove *a) {
|
|
Packit |
4e8bc4 |
unsigned int total_pages = 0;
|
|
Packit |
4e8bc4 |
if (current_time < a->last_memcheck_run + MEMCHECK_PERIOD)
|
|
Packit |
4e8bc4 |
return;
|
|
Packit |
4e8bc4 |
a->last_memcheck_run = current_time;
|
|
Packit |
4e8bc4 |
for (int n = 1; n < MAX_NUMBER_OF_SLAB_CLASSES; n++) {
|
|
Packit |
4e8bc4 |
slab_stats_automove *sam = &a->sam_after[n];
|
|
Packit |
4e8bc4 |
total_pages += sam->total_pages;
|
|
Packit |
4e8bc4 |
unsigned int hold_free = (sam->total_pages * sam->chunks_per_page)
|
|
Packit |
4e8bc4 |
* a->free_ratio;
|
|
Packit |
4e8bc4 |
if (sam->chunks_per_page * MIN_PAGES_FREE > hold_free)
|
|
Packit |
4e8bc4 |
hold_free = sam->chunks_per_page * MIN_PAGES_FREE;
|
|
Packit |
4e8bc4 |
a->free_mem[n] = hold_free;
|
|
Packit |
4e8bc4 |
if (a->settings->ext_free_memchunks[n] != hold_free && a->pool_filled_once) {
|
|
Packit |
4e8bc4 |
a->settings->ext_free_memchunks[n] = hold_free;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
// remember to add what remains in global pool.
|
|
Packit |
4e8bc4 |
total_pages += a->sam_after[0].total_pages;
|
|
Packit |
4e8bc4 |
a->free_mem[0] = total_pages * a->free_ratio;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
static struct window_data *get_window_data(slab_automove *a, int class) {
|
|
Packit |
4e8bc4 |
int w_offset = class * a->window_size;
|
|
Packit |
4e8bc4 |
return &a->window_data[w_offset + (a->window_cur % a->window_size)];
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
void slab_automove_extstore_run(void *arg, int *src, int *dst) {
|
|
Packit |
4e8bc4 |
slab_automove *a = (slab_automove *)arg;
|
|
Packit |
4e8bc4 |
int n;
|
|
Packit |
4e8bc4 |
struct window_data w_sum;
|
|
Packit |
4e8bc4 |
int oldest = -1;
|
|
Packit |
4e8bc4 |
uint64_t oldest_age = 0;
|
|
Packit |
4e8bc4 |
int youngest = -1;
|
|
Packit |
4e8bc4 |
uint64_t youngest_age = ~0;
|
|
Packit |
4e8bc4 |
bool too_free = false;
|
|
Packit |
4e8bc4 |
*src = -1;
|
|
Packit |
4e8bc4 |
*dst = -1;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
global_pool_check(a);
|
|
Packit |
4e8bc4 |
struct window_global wg_sum;
|
|
Packit |
4e8bc4 |
memset(&wg_sum, 0, sizeof(struct window_global));
|
|
Packit |
4e8bc4 |
window_global_sum(a->window_global, &wg_sum, a->window_size);
|
|
Packit |
4e8bc4 |
// fill after structs
|
|
Packit |
4e8bc4 |
fill_item_stats_automove(a->iam_after);
|
|
Packit |
4e8bc4 |
fill_slab_stats_automove(a->sam_after);
|
|
Packit |
4e8bc4 |
a->window_cur++;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
memcheck(a);
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
// iterate slabs
|
|
Packit |
4e8bc4 |
for (n = POWER_SMALLEST; n < MAX_NUMBER_OF_SLAB_CLASSES; n++) {
|
|
Packit |
4e8bc4 |
bool small_slab = a->sam_before[n].chunk_size < a->item_size
|
|
Packit |
4e8bc4 |
? true : false;
|
|
Packit |
4e8bc4 |
bool free_enough = false;
|
|
Packit |
4e8bc4 |
struct window_data *wd = get_window_data(a, n);
|
|
Packit |
4e8bc4 |
// summarize the window-up-to-now.
|
|
Packit |
4e8bc4 |
memset(&w_sum, 0, sizeof(struct window_data));
|
|
Packit |
4e8bc4 |
int w_offset = n * a->window_size;
|
|
Packit |
4e8bc4 |
window_sum(&a->window_data[w_offset], &w_sum, a->window_size);
|
|
Packit |
4e8bc4 |
memset(wd, 0, sizeof(struct window_data));
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
// if page delta, oom, or evicted delta, mark window dirty
|
|
Packit |
4e8bc4 |
// classes marked dirty cannot donate memory back to global pool.
|
|
Packit |
4e8bc4 |
if (a->iam_after[n].evicted - a->iam_before[n].evicted > 0 ||
|
|
Packit |
4e8bc4 |
a->iam_after[n].outofmemory - a->iam_before[n].outofmemory > 0) {
|
|
Packit |
4e8bc4 |
wd->evicted = 1;
|
|
Packit |
4e8bc4 |
wd->dirty = 1;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
if (a->sam_after[n].total_pages - a->sam_before[n].total_pages > 0) {
|
|
Packit |
4e8bc4 |
wd->dirty = 1;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
// Mark excess free if we're over the free mem limit for too long.
|
|
Packit |
4e8bc4 |
// "free_enough" means it is either wobbling, recently received a new
|
|
Packit |
4e8bc4 |
// page of memory, or the crawler is freeing memory.
|
|
Packit |
4e8bc4 |
if (a->sam_after[n].free_chunks > a->free_mem[n]) {
|
|
Packit |
4e8bc4 |
free_enough = true;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
// double the free requirements means we may have memory we can
|
|
Packit |
4e8bc4 |
// reclaim to global, if it stays this way for the whole window.
|
|
Packit |
4e8bc4 |
if (a->sam_after[n].free_chunks > (a->free_mem[n] * 2) && a->free_mem[n] > 0) {
|
|
Packit |
4e8bc4 |
wd->excess_free = 1;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
// set age into window
|
|
Packit |
4e8bc4 |
wd->age = a->iam_after[n].age;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
// grab age as average of window total
|
|
Packit |
4e8bc4 |
uint64_t age = w_sum.age / a->window_size;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
// if > N free chunks and not dirty, reclaim memory
|
|
Packit |
4e8bc4 |
// small slab classes aren't age balanced and rely more on global
|
|
Packit |
4e8bc4 |
// pool. reclaim them more aggressively.
|
|
Packit |
4e8bc4 |
if (a->sam_after[n].free_chunks > a->sam_after[n].chunks_per_page * MIN_PAGES_FOR_RECLAIM
|
|
Packit |
4e8bc4 |
&& w_sum.dirty == 0) {
|
|
Packit |
4e8bc4 |
if (small_slab) {
|
|
Packit |
4e8bc4 |
*src = n;
|
|
Packit |
4e8bc4 |
*dst = 0;
|
|
Packit |
4e8bc4 |
too_free = true;
|
|
Packit |
4e8bc4 |
} else if (!small_slab && w_sum.excess_free >= a->window_size) {
|
|
Packit |
4e8bc4 |
// If large slab and free chunks haven't decreased for a full
|
|
Packit |
4e8bc4 |
// window, reclaim pages.
|
|
Packit |
4e8bc4 |
*src = n;
|
|
Packit |
4e8bc4 |
*dst = 0;
|
|
Packit |
4e8bc4 |
too_free = true;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
if (!small_slab) {
|
|
Packit |
4e8bc4 |
// if oldest and have enough pages, is oldest
|
|
Packit |
4e8bc4 |
if (age > oldest_age
|
|
Packit |
4e8bc4 |
&& a->sam_after[n].total_pages > MIN_PAGES_FOR_SOURCE) {
|
|
Packit |
4e8bc4 |
oldest = n;
|
|
Packit |
4e8bc4 |
oldest_age = age;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
// don't count as youngest if it hasn't been using new chunks.
|
|
Packit |
4e8bc4 |
// (if it was relaxed recently, and is currently "free enough")
|
|
Packit |
4e8bc4 |
if (age < youngest_age && a->sam_after[n].total_pages != 0
|
|
Packit |
4e8bc4 |
&& w_sum.excess_free < a->window_size
|
|
Packit |
4e8bc4 |
&& !(w_sum.relaxed && free_enough)) {
|
|
Packit |
4e8bc4 |
youngest = n;
|
|
Packit |
4e8bc4 |
youngest_age = age;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
memcpy(a->iam_before, a->iam_after,
|
|
Packit |
4e8bc4 |
sizeof(item_stats_automove) * MAX_NUMBER_OF_SLAB_CLASSES);
|
|
Packit |
4e8bc4 |
memcpy(a->sam_before, a->sam_after,
|
|
Packit |
4e8bc4 |
sizeof(slab_stats_automove) * MAX_NUMBER_OF_SLAB_CLASSES);
|
|
Packit |
4e8bc4 |
// only make decisions if window has filled once.
|
|
Packit |
4e8bc4 |
if (a->window_cur < a->window_size)
|
|
Packit |
4e8bc4 |
return;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
if (wg_sum.pool_high >= a->window_size && !wg_sum.pool_low && youngest != -1) {
|
|
Packit |
4e8bc4 |
if (a->sam_after[youngest].free_chunks <= a->free_mem[youngest]) {
|
|
Packit |
4e8bc4 |
*src = 0;
|
|
Packit |
4e8bc4 |
*dst = youngest;
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
struct window_data *wd = get_window_data(a, youngest);
|
|
Packit |
4e8bc4 |
// "relaxing" here and below allows us to skip classes which will
|
|
Packit |
4e8bc4 |
// never grow or are growing slowly, more quickly finding other
|
|
Packit |
4e8bc4 |
// classes which violate the age ratio.
|
|
Packit |
4e8bc4 |
wd->relaxed = 1;
|
|
Packit |
4e8bc4 |
} else if (!too_free && wg_sum.pool_low && oldest != -1) {
|
|
Packit |
4e8bc4 |
*src = oldest;
|
|
Packit |
4e8bc4 |
*dst = 0;
|
|
Packit |
4e8bc4 |
} else if (!too_free && youngest != -1 && oldest != -1 && youngest != oldest) {
|
|
Packit |
4e8bc4 |
// if we have a youngest and oldest, and oldest is outside the ratio.
|
|
Packit |
4e8bc4 |
if (youngest_age < ((double)oldest_age * a->max_age_ratio)) {
|
|
Packit |
4e8bc4 |
struct window_data *wd = get_window_data(a, youngest);
|
|
Packit |
4e8bc4 |
wd->relaxed = 1;
|
|
Packit |
4e8bc4 |
// only actually assign more memory if it's absorbed what it has
|
|
Packit |
4e8bc4 |
if (a->sam_after[youngest].free_chunks <= a->free_mem[youngest]) {
|
|
Packit |
4e8bc4 |
*src = 0;
|
|
Packit |
4e8bc4 |
*dst = youngest;
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
}
|
|
Packit |
4e8bc4 |
return;
|
|
Packit |
4e8bc4 |
}
|