Blame scripts/memcached-automove-extstore

Packit 4e8bc4
#!/usr/bin/python3
Packit 4e8bc4
# Copyright 2017 Facebook.
Packit 4e8bc4
# Licensed under the same terms as memcached itself.
Packit 4e8bc4
Packit 4e8bc4
import argparse
Packit 4e8bc4
import socket
Packit 4e8bc4
import sys
Packit 4e8bc4
import re
Packit 4e8bc4
import traceback
Packit 4e8bc4
from time import sleep, time
Packit 4e8bc4
Packit 4e8bc4
parser = argparse.ArgumentParser(description="daemon for rebalancing slabs")
Packit 4e8bc4
parser.add_argument("--host", help="host to connect to",
Packit 4e8bc4
        default="localhost:11211", metavar="HOST:PORT")
Packit 4e8bc4
parser.add_argument("-s", "--sleep", help="seconds between runs",
Packit 4e8bc4
                    type=int, default="1")
Packit 4e8bc4
parser.add_argument("-v", "--verbose", action="store_true")
Packit 4e8bc4
parser.add_argument("-a", "--automove", action="store_true", default=False,
Packit 4e8bc4
                    help="enable automatic page rebalancing")
Packit 4e8bc4
parser.add_argument("-w", "--window", type=int, default="30",
Packit 4e8bc4
                    help="rolling window size for decision history")
Packit 4e8bc4
parser.add_argument("-r", "--ratio", type=float, default=0.8,
Packit 4e8bc4
                    help="ratio limiting distance between low/high class ages")
Packit 4e8bc4
parser.add_argument("-f", "--free", type=float, default=0.005,
Packit 4e8bc4
                    help="free chunks/pages buffer ratio")
Packit 4e8bc4
parser.add_argument("-z", "--size", type=int, default=512,
Packit 4e8bc4
                    help="item size cutoff for storage")
Packit 4e8bc4
Packit 4e8bc4
args = parser.parse_args()
Packit 4e8bc4
Packit 4e8bc4
host, port = args.host.split(':')
Packit 4e8bc4
Packit 4e8bc4
MIN_PAGES_FOR_SOURCE = 2
Packit 4e8bc4
MIN_PAGES_FOR_RECLAIM = 2.5
Packit 4e8bc4
MIN_PAGES_FREE = 1.5
Packit 4e8bc4
MEMCHECK_PERIOD = 60
Packit 4e8bc4
Packit 4e8bc4
def window_check(history, sid, key):
Packit 4e8bc4
    total = 0
Packit 4e8bc4
    for window in history['w']:
Packit 4e8bc4
        s = window.get(sid)
Packit 4e8bc4
        if s and s.get(key):
Packit 4e8bc4
            total += s.get(key)
Packit 4e8bc4
    return total
Packit 4e8bc4
Packit 4e8bc4
def window_key_check(history, key):
Packit 4e8bc4
    total = 0
Packit 4e8bc4
    for window in history['w']:
Packit 4e8bc4
        v = window.get(key)
Packit 4e8bc4
        if v:
Packit 4e8bc4
            total += v
Packit 4e8bc4
    return total
Packit 4e8bc4
Packit 4e8bc4
Packit 4e8bc4
def determine_move(history, stats, diffs, memfree):
Packit 4e8bc4
    """ Figure out of a page move is in order.
Packit 4e8bc4
Packit 4e8bc4
    - Use as much memory as possible to hold items, reducing the load on
Packit 4e8bc4
      flash.
Packit 4e8bc4
    - tries to keep the global free page pool inbetween poolmin/poolmax.
Packit 4e8bc4
    - avoids flapping as much as possible:
Packit 4e8bc4
      - only pull pages off of a class if it hasn't recently evicted or allocated pages.
Packit 4e8bc4
      - only pull pages off if a sufficient number of free chunks are available.
Packit 4e8bc4
      - if global pool is below minimum remove pages from oldest large class.
Packit 4e8bc4
      - if global pool is above maximum, move pages to youngest large class.
Packit 4e8bc4
    - extstore manages a desired number of free chunks in each slab class.
Packit 4e8bc4
    - automover adjusts above limits once per minute based on current sizes.
Packit 4e8bc4
    - if youngest is below the age ratio limit of oldest, move a page to it.
Packit 4e8bc4
    """
Packit 4e8bc4
    # rotate windows
Packit 4e8bc4
    history['w'].append({})
Packit 4e8bc4
    if (len(history['w']) > args.window):
Packit 4e8bc4
        history['w'].pop(0)
Packit 4e8bc4
    w = history['w'][-1]
Packit 4e8bc4
    oldest = (-1, 0)
Packit 4e8bc4
    youngest = (-1, sys.maxsize)
Packit 4e8bc4
    too_free = False
Packit 4e8bc4
    # Most bytes free
Packit 4e8bc4
    decision = (-1, -1)
Packit 4e8bc4
    if int(stats['slab_global_page_pool']) < memfree[0] / 2:
Packit 4e8bc4
        w['slab_pool_low'] = 1
Packit 4e8bc4
    if int(stats['slab_global_page_pool']) > memfree[0]:
Packit 4e8bc4
        w['slab_pool_high'] = 1
Packit 4e8bc4
    if args.verbose:
Packit 4e8bc4
        print("global pool: [{}]".format(stats['slab_global_page_pool']))
Packit 4e8bc4
Packit 4e8bc4
    pool_low = window_key_check(history, 'slab_pool_low')
Packit 4e8bc4
    for sid, slab in diffs.items():
Packit 4e8bc4
        small_slab = False
Packit 4e8bc4
        free_enough = False
Packit 4e8bc4
        # Only balance larger slab classes
Packit 4e8bc4
        if slab['chunk_size'] < args.size:
Packit 4e8bc4
            small_slab = True
Packit 4e8bc4
Packit 4e8bc4
        w[sid] = {}
Packit 4e8bc4
        if 'evicted_d' not in slab or 'total_pages_d' not in slab:
Packit 4e8bc4
            continue
Packit 4e8bc4
        # mark this window as dirty if total pages increases or evictions
Packit 4e8bc4
        # happened
Packit 4e8bc4
        if slab['total_pages_d'] > 0:
Packit 4e8bc4
            w[sid]['dirty'] = 1
Packit 4e8bc4
        if slab['evicted_d'] > 0:
Packit 4e8bc4
            w[sid]['dirty'] = 1
Packit 4e8bc4
            w[sid]['ev'] = 1
Packit 4e8bc4
        if slab['free_chunks'] > memfree[sid]:
Packit 4e8bc4
            free_enough = True
Packit 4e8bc4
        if memfree[sid] > 0 and slab['free_chunks'] > (memfree[sid] * 2):
Packit 4e8bc4
            w[sid]['excess_free'] = 1
Packit 4e8bc4
        w[sid]['age'] = slab['age']
Packit 4e8bc4
        age = window_check(history, sid, 'age') / len(history['w'])
Packit 4e8bc4
Packit 4e8bc4
        # if > 2.5 pages free, and not dirty, reassign to global page pool
Packit 4e8bc4
        if slab['free_chunks'] > slab['chunks_per_page'] * MIN_PAGES_FOR_RECLAIM and too_free == False:
Packit 4e8bc4
            dirt = window_check(history, sid, 'dirty')
Packit 4e8bc4
            excess = window_check(history, sid, 'excess_free')
Packit 4e8bc4
            if small_slab == True and dirt == 0:
Packit 4e8bc4
                # If we're a small slab, don't hang on to free memory forever.
Packit 4e8bc4
                decision = (sid, 0)
Packit 4e8bc4
                too_free = True
Packit 4e8bc4
            elif small_slab == False and dirt == 0 \
Packit 4e8bc4
                    and excess >= len(history['w']):
Packit 4e8bc4
                decision = (sid, 0)
Packit 4e8bc4
                too_free = True
Packit 4e8bc4
Packit 4e8bc4
        # are we the oldest slab class? (and a valid target)
Packit 4e8bc4
        # don't consider for young if we've recently given it unused memory
Packit 4e8bc4
        if small_slab == False:
Packit 4e8bc4
            if age > oldest[1] and slab['total_pages'] > MIN_PAGES_FOR_SOURCE:
Packit 4e8bc4
                oldest = (sid, age)
Packit 4e8bc4
            if age < youngest[1] and slab['total_pages'] > 0 \
Packit 4e8bc4
                    and window_check(history, sid, 'excess_free') < len(history['w']) \
Packit 4e8bc4
                    and not (window_check(history, sid, 'relaxed') and free_enough):
Packit 4e8bc4
                youngest = (sid, age)
Packit 4e8bc4
Packit 4e8bc4
Packit 4e8bc4
    if w.get('slab_pool_high') and youngest[0] != -1:
Packit 4e8bc4
        # if global pool is too high, feed youngest large class.
Packit 4e8bc4
        if slab['free_chunks'] <= memfree[youngest[0]]:
Packit 4e8bc4
            decision = (0, youngest[0])
Packit 4e8bc4
        w[youngest[0]]['relaxed'] = 1
Packit 4e8bc4
    elif too_free == False and pool_low and oldest[0] != -1:
Packit 4e8bc4
        # if pool is too low, take from oldest large class.
Packit 4e8bc4
        if args.verbose:
Packit 4e8bc4
            print("oldest:  [class: {}] [age: {:.2f}]".format(int(oldest[0]), oldest[1]))
Packit 4e8bc4
        decision = (oldest[0], 0)
Packit 4e8bc4
    elif too_free == False and youngest[0] != -1 and oldest[0] != -1 and youngest[0] != oldest[0]:
Packit 4e8bc4
        # youngest is outside of the tolerance ratio, move a page around.
Packit 4e8bc4
        if args.verbose:
Packit 4e8bc4
            print("old:   [class: {}] [age: {:.2f}]\nyoung: [class: {}] [age: {:.2f}]".format(
Packit 4e8bc4
                int(oldest[0]), oldest[1], int(youngest[0]), youngest[1]))
Packit 4e8bc4
Packit 4e8bc4
        slab = diffs[youngest[0]]
Packit 4e8bc4
        #print("F:{} L:{} Y:{} R:{}".format(slab['free_chunks'], memfree[youngest[0]], int(youngest[1]), int(oldest[1] * args.ratio)))
Packit 4e8bc4
        if youngest[1] < oldest[1] * args.ratio:
Packit 4e8bc4
            w[youngest[0]]['relaxed'] = 1
Packit 4e8bc4
            if slab['free_chunks'] <= memfree[youngest[0]]:
Packit 4e8bc4
                decision = (0, youngest[0])
Packit 4e8bc4
Packit 4e8bc4
    if (len(history['w']) >= args.window):
Packit 4e8bc4
        return decision
Packit 4e8bc4
    return (-1, -1)
Packit 4e8bc4
Packit 4e8bc4
Packit 4e8bc4
def run_move(s, decision):
Packit 4e8bc4
    s.write("slabs reassign " + str(decision[0]) + " " + str(decision[1]) + "\r\n")
Packit 4e8bc4
    line = s.readline().rstrip()
Packit 4e8bc4
    if args.verbose:
Packit 4e8bc4
        print("move result:", line)
Packit 4e8bc4
Packit 4e8bc4
Packit 4e8bc4
def diff_stats(before, after):
Packit 4e8bc4
    """ fills out "diffs" as deltas between before/after,
Packit 4e8bc4
    and "totals" as the sum of all slab classes.
Packit 4e8bc4
    "_d" postfix to keys means the delta between before/after.
Packit 4e8bc4
    non-postfix keys are total as of 'after's reading.
Packit 4e8bc4
    """
Packit 4e8bc4
    diffs = {}
Packit 4e8bc4
    totals = {}
Packit 4e8bc4
    for slabid in after.keys():
Packit 4e8bc4
        sb = before.get(slabid)
Packit 4e8bc4
        sa = after.get(slabid)
Packit 4e8bc4
        if not (sb and sa):
Packit 4e8bc4
            continue
Packit 4e8bc4
        slab = sa.copy()
Packit 4e8bc4
        for k in sa.keys():
Packit 4e8bc4
            if k not in sb:
Packit 4e8bc4
                continue
Packit 4e8bc4
            if k not in totals:
Packit 4e8bc4
                totals[k] = 0
Packit 4e8bc4
                totals[k + '_d'] = 0
Packit 4e8bc4
            if k + '_d' not in slab:
Packit 4e8bc4
                slab[k + '_d'] = 0
Packit 4e8bc4
            if re.search(r"^\d+$", sa[k]):
Packit 4e8bc4
                totals[k] += int(sa[k])
Packit 4e8bc4
                slab[k] = int(sa[k])
Packit 4e8bc4
                slab[k + '_d'] = int(sa[k]) - int(sb[k])
Packit 4e8bc4
                totals[k + '_d'] += int(sa[k]) - int(sb[k])
Packit 4e8bc4
        slab['slab'] = slabid
Packit 4e8bc4
        diffs[slabid] = slab
Packit 4e8bc4
    return (diffs, totals)
Packit 4e8bc4
Packit 4e8bc4
Packit 4e8bc4
def read_slab_stats(s):
Packit 4e8bc4
    slabs = {}
Packit 4e8bc4
    for statcmd in ['items', 'slabs']:
Packit 4e8bc4
        #print("stat cmd: " + statcmd)
Packit 4e8bc4
        # FIXME: Formatting
Packit 4e8bc4
        s.write("stats " + statcmd + "\r\n")
Packit 4e8bc4
        while True:
Packit 4e8bc4
            line = s.readline().rstrip()
Packit 4e8bc4
            if line.startswith("END"):
Packit 4e8bc4
                break
Packit 4e8bc4
Packit 4e8bc4
            m = re.match(r"^STAT (?:items:)?(\d+):(\S+) (\S+)", line)
Packit 4e8bc4
            if m:
Packit 4e8bc4
                (slab, var, val) = m.groups()
Packit 4e8bc4
                if slab not in slabs:
Packit 4e8bc4
                    slabs[slab] = {}
Packit 4e8bc4
                slabs[slab][var] = val
Packit 4e8bc4
            #print("line: " + line)
Packit 4e8bc4
    return slabs
Packit 4e8bc4
Packit 4e8bc4
Packit 4e8bc4
# HACK: lets look at 'evictions' being nonzero to indicate memory filled at some point.
Packit 4e8bc4
def read_stats(s):
Packit 4e8bc4
    stats = {}
Packit 4e8bc4
    s.write("stats\r\n")
Packit 4e8bc4
    while True:
Packit 4e8bc4
        line = s.readline().rstrip()
Packit 4e8bc4
        if line.startswith("END"):
Packit 4e8bc4
            break
Packit 4e8bc4
Packit 4e8bc4
        m = re.match(r"^STAT (\S+) (\S+)", line)
Packit 4e8bc4
        if m:
Packit 4e8bc4
            (key, val) = m.groups()
Packit 4e8bc4
            stats[key] = val
Packit 4e8bc4
    return stats
Packit 4e8bc4
Packit 4e8bc4
Packit 4e8bc4
def pct(num, divisor):
Packit 4e8bc4
    if not divisor:
Packit 4e8bc4
        return 0
Packit 4e8bc4
    return (num / divisor)
Packit 4e8bc4
Packit 4e8bc4
Packit 4e8bc4
def show_detail(diffs, totals):
Packit 4e8bc4
    """ just a pretty printer for some extra data """
Packit 4e8bc4
    print("\n  {:2s}: {:8s} (pct  ) {:10s} (pct    ) {:6s} (pct)   {:6s}".format('sb',
Packit 4e8bc4
                'evicted', 'items', 'pages', 'age'))
Packit 4e8bc4
Packit 4e8bc4
    for sid, slab in diffs.items():
Packit 4e8bc4
        if 'evicted_d' not in slab:
Packit 4e8bc4
            continue
Packit 4e8bc4
        print("  {:2d}: {:8d} ({:.2f}%) {:10d} ({:.4f}%) {:6d} ({:.2f}%) {:6d}".format(
Packit 4e8bc4
              int(sid), slab['evicted_d'], pct(slab['evicted_d'], totals['evicted_d']),
Packit 4e8bc4
              slab['number'], pct(slab['number'], totals['number']),
Packit 4e8bc4
              slab['total_pages'], pct(slab['total_pages'],
Packit 4e8bc4
              totals['total_pages']),
Packit 4e8bc4
              slab['age']))
Packit 4e8bc4
Packit 4e8bc4
def memfree_check(s, diffs, totals):
Packit 4e8bc4
    info = {}
Packit 4e8bc4
    # manage about this many free chunks in each slab class.
Packit 4e8bc4
    for sid, slab in diffs.items():
Packit 4e8bc4
        if sid == 0:
Packit 4e8bc4
            continue
Packit 4e8bc4
        hold_free = int((slab['used_chunks'] + slab['free_chunks']) * args.free)
Packit 4e8bc4
        # Hold a minimum of 1.5 pages so page moves are unlikely to lose items.
Packit 4e8bc4
        if slab['chunks_per_page'] * MIN_PAGES_FREE > hold_free:
Packit 4e8bc4
            hold_free = int(slab['chunks_per_page'] * MIN_PAGES_FREE)
Packit 4e8bc4
        info[sid] = hold_free
Packit 4e8bc4
        # TODO: only adjust if different?
Packit 4e8bc4
        s.write("extstore free_memchunks {} {}\r\n".format(sid, hold_free))
Packit 4e8bc4
        s.readline()
Packit 4e8bc4
Packit 4e8bc4
    # how many pages to leave in the global pool.
Packit 4e8bc4
    info[0] = int(totals['total_pages'] * args.free)
Packit 4e8bc4
    return info
Packit 4e8bc4
Packit 4e8bc4
Packit 4e8bc4
stats_pre = {}
Packit 4e8bc4
history = { 'w': [{}] }
Packit 4e8bc4
memfree = { 0: 2 }
Packit 4e8bc4
last_memfree_check = 0
Packit 4e8bc4
while True:
Packit 4e8bc4
    try:
Packit 4e8bc4
        with socket.create_connection((host, port), 5) as c:
Packit 4e8bc4
            s = c.makefile(mode="rw", buffering=1)
Packit 4e8bc4
            s.write("slabs automove 0\r\n")
Packit 4e8bc4
            print(s.readline().rstrip())
Packit 4e8bc4
            while True:
Packit 4e8bc4
                stats_post = read_slab_stats(s)
Packit 4e8bc4
                stats = read_stats(s)
Packit 4e8bc4
                (diffs, totals) = diff_stats(stats_pre, stats_post)
Packit 4e8bc4
                #if args.verbose:
Packit 4e8bc4
                #    show_detail(diffs, totals)
Packit 4e8bc4
                if int(stats['evictions']) > 0:
Packit 4e8bc4
                    if (last_memfree_check < time() - 60) and totals.get('total_pages'):
Packit 4e8bc4
                        memfree = memfree_check(s, diffs, totals)
Packit 4e8bc4
                        last_memfree_check = time()
Packit 4e8bc4
                    decision = (-1, -1)
Packit 4e8bc4
                    decision = determine_move(history, stats, diffs, memfree)
Packit 4e8bc4
                    if int(decision[0]) > 0 and int(decision[1]) >= 0:
Packit 4e8bc4
                        print("moving page from, to:", decision)
Packit 4e8bc4
                        if args.automove:
Packit 4e8bc4
                            run_move(s, decision)
Packit 4e8bc4
Packit 4e8bc4
                # Minimize sleeping if we just moved a page to global pool.
Packit 4e8bc4
                # Improves responsiveness during flushes/quick changes.
Packit 4e8bc4
                if decision[1] == 0:
Packit 4e8bc4
                    continue
Packit 4e8bc4
                else:
Packit 4e8bc4
                    sleep(args.sleep)
Packit 4e8bc4
                stats_pre = stats_post
Packit 4e8bc4
    except:
Packit 4e8bc4
        err = sys.exc_info()
Packit 4e8bc4
        print("disconnected:", err[0], err[1])
Packit 4e8bc4
        traceback.print_exc()
Packit 4e8bc4
        stats_pre = {}
Packit 4e8bc4
        history = { 'w': [{}] }
Packit 4e8bc4
        sleep(args.sleep)
Packit 4e8bc4