|
Packit |
4e8bc4 |
#!/usr/bin/python3
|
|
Packit |
4e8bc4 |
# Copyright 2017 Facebook.
|
|
Packit |
4e8bc4 |
# Licensed under the same terms as memcached itself.
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
import argparse
|
|
Packit |
4e8bc4 |
import socket
|
|
Packit |
4e8bc4 |
import sys
|
|
Packit |
4e8bc4 |
import re
|
|
Packit |
4e8bc4 |
import traceback
|
|
Packit |
4e8bc4 |
from time import sleep, time
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
parser = argparse.ArgumentParser(description="daemon for rebalancing slabs")
|
|
Packit |
4e8bc4 |
parser.add_argument("--host", help="host to connect to",
|
|
Packit |
4e8bc4 |
default="localhost:11211", metavar="HOST:PORT")
|
|
Packit |
4e8bc4 |
parser.add_argument("-s", "--sleep", help="seconds between runs",
|
|
Packit |
4e8bc4 |
type=int, default="1")
|
|
Packit |
4e8bc4 |
parser.add_argument("-v", "--verbose", action="store_true")
|
|
Packit |
4e8bc4 |
parser.add_argument("-a", "--automove", action="store_true", default=False,
|
|
Packit |
4e8bc4 |
help="enable automatic page rebalancing")
|
|
Packit |
4e8bc4 |
parser.add_argument("-w", "--window", type=int, default="30",
|
|
Packit |
4e8bc4 |
help="rolling window size for decision history")
|
|
Packit |
4e8bc4 |
parser.add_argument("-r", "--ratio", type=float, default=0.8,
|
|
Packit |
4e8bc4 |
help="ratio limiting distance between low/high class ages")
|
|
Packit |
4e8bc4 |
parser.add_argument("-f", "--free", type=float, default=0.005,
|
|
Packit |
4e8bc4 |
help="free chunks/pages buffer ratio")
|
|
Packit |
4e8bc4 |
parser.add_argument("-z", "--size", type=int, default=512,
|
|
Packit |
4e8bc4 |
help="item size cutoff for storage")
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
args = parser.parse_args()
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
host, port = args.host.split(':')
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
MIN_PAGES_FOR_SOURCE = 2
|
|
Packit |
4e8bc4 |
MIN_PAGES_FOR_RECLAIM = 2.5
|
|
Packit |
4e8bc4 |
MIN_PAGES_FREE = 1.5
|
|
Packit |
4e8bc4 |
MEMCHECK_PERIOD = 60
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
def window_check(history, sid, key):
|
|
Packit |
4e8bc4 |
total = 0
|
|
Packit |
4e8bc4 |
for window in history['w']:
|
|
Packit |
4e8bc4 |
s = window.get(sid)
|
|
Packit |
4e8bc4 |
if s and s.get(key):
|
|
Packit |
4e8bc4 |
total += s.get(key)
|
|
Packit |
4e8bc4 |
return total
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
def window_key_check(history, key):
|
|
Packit |
4e8bc4 |
total = 0
|
|
Packit |
4e8bc4 |
for window in history['w']:
|
|
Packit |
4e8bc4 |
v = window.get(key)
|
|
Packit |
4e8bc4 |
if v:
|
|
Packit |
4e8bc4 |
total += v
|
|
Packit |
4e8bc4 |
return total
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
def determine_move(history, stats, diffs, memfree):
|
|
Packit |
4e8bc4 |
""" Figure out of a page move is in order.
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
- Use as much memory as possible to hold items, reducing the load on
|
|
Packit |
4e8bc4 |
flash.
|
|
Packit |
4e8bc4 |
- tries to keep the global free page pool inbetween poolmin/poolmax.
|
|
Packit |
4e8bc4 |
- avoids flapping as much as possible:
|
|
Packit |
4e8bc4 |
- only pull pages off of a class if it hasn't recently evicted or allocated pages.
|
|
Packit |
4e8bc4 |
- only pull pages off if a sufficient number of free chunks are available.
|
|
Packit |
4e8bc4 |
- if global pool is below minimum remove pages from oldest large class.
|
|
Packit |
4e8bc4 |
- if global pool is above maximum, move pages to youngest large class.
|
|
Packit |
4e8bc4 |
- extstore manages a desired number of free chunks in each slab class.
|
|
Packit |
4e8bc4 |
- automover adjusts above limits once per minute based on current sizes.
|
|
Packit |
4e8bc4 |
- if youngest is below the age ratio limit of oldest, move a page to it.
|
|
Packit |
4e8bc4 |
"""
|
|
Packit |
4e8bc4 |
# rotate windows
|
|
Packit |
4e8bc4 |
history['w'].append({})
|
|
Packit |
4e8bc4 |
if (len(history['w']) > args.window):
|
|
Packit |
4e8bc4 |
history['w'].pop(0)
|
|
Packit |
4e8bc4 |
w = history['w'][-1]
|
|
Packit |
4e8bc4 |
oldest = (-1, 0)
|
|
Packit |
4e8bc4 |
youngest = (-1, sys.maxsize)
|
|
Packit |
4e8bc4 |
too_free = False
|
|
Packit |
4e8bc4 |
# Most bytes free
|
|
Packit |
4e8bc4 |
decision = (-1, -1)
|
|
Packit |
4e8bc4 |
if int(stats['slab_global_page_pool']) < memfree[0] / 2:
|
|
Packit |
4e8bc4 |
w['slab_pool_low'] = 1
|
|
Packit |
4e8bc4 |
if int(stats['slab_global_page_pool']) > memfree[0]:
|
|
Packit |
4e8bc4 |
w['slab_pool_high'] = 1
|
|
Packit |
4e8bc4 |
if args.verbose:
|
|
Packit |
4e8bc4 |
print("global pool: [{}]".format(stats['slab_global_page_pool']))
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
pool_low = window_key_check(history, 'slab_pool_low')
|
|
Packit |
4e8bc4 |
for sid, slab in diffs.items():
|
|
Packit |
4e8bc4 |
small_slab = False
|
|
Packit |
4e8bc4 |
free_enough = False
|
|
Packit |
4e8bc4 |
# Only balance larger slab classes
|
|
Packit |
4e8bc4 |
if slab['chunk_size'] < args.size:
|
|
Packit |
4e8bc4 |
small_slab = True
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
w[sid] = {}
|
|
Packit |
4e8bc4 |
if 'evicted_d' not in slab or 'total_pages_d' not in slab:
|
|
Packit |
4e8bc4 |
continue
|
|
Packit |
4e8bc4 |
# mark this window as dirty if total pages increases or evictions
|
|
Packit |
4e8bc4 |
# happened
|
|
Packit |
4e8bc4 |
if slab['total_pages_d'] > 0:
|
|
Packit |
4e8bc4 |
w[sid]['dirty'] = 1
|
|
Packit |
4e8bc4 |
if slab['evicted_d'] > 0:
|
|
Packit |
4e8bc4 |
w[sid]['dirty'] = 1
|
|
Packit |
4e8bc4 |
w[sid]['ev'] = 1
|
|
Packit |
4e8bc4 |
if slab['free_chunks'] > memfree[sid]:
|
|
Packit |
4e8bc4 |
free_enough = True
|
|
Packit |
4e8bc4 |
if memfree[sid] > 0 and slab['free_chunks'] > (memfree[sid] * 2):
|
|
Packit |
4e8bc4 |
w[sid]['excess_free'] = 1
|
|
Packit |
4e8bc4 |
w[sid]['age'] = slab['age']
|
|
Packit |
4e8bc4 |
age = window_check(history, sid, 'age') / len(history['w'])
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
# if > 2.5 pages free, and not dirty, reassign to global page pool
|
|
Packit |
4e8bc4 |
if slab['free_chunks'] > slab['chunks_per_page'] * MIN_PAGES_FOR_RECLAIM and too_free == False:
|
|
Packit |
4e8bc4 |
dirt = window_check(history, sid, 'dirty')
|
|
Packit |
4e8bc4 |
excess = window_check(history, sid, 'excess_free')
|
|
Packit |
4e8bc4 |
if small_slab == True and dirt == 0:
|
|
Packit |
4e8bc4 |
# If we're a small slab, don't hang on to free memory forever.
|
|
Packit |
4e8bc4 |
decision = (sid, 0)
|
|
Packit |
4e8bc4 |
too_free = True
|
|
Packit |
4e8bc4 |
elif small_slab == False and dirt == 0 \
|
|
Packit |
4e8bc4 |
and excess >= len(history['w']):
|
|
Packit |
4e8bc4 |
decision = (sid, 0)
|
|
Packit |
4e8bc4 |
too_free = True
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
# are we the oldest slab class? (and a valid target)
|
|
Packit |
4e8bc4 |
# don't consider for young if we've recently given it unused memory
|
|
Packit |
4e8bc4 |
if small_slab == False:
|
|
Packit |
4e8bc4 |
if age > oldest[1] and slab['total_pages'] > MIN_PAGES_FOR_SOURCE:
|
|
Packit |
4e8bc4 |
oldest = (sid, age)
|
|
Packit |
4e8bc4 |
if age < youngest[1] and slab['total_pages'] > 0 \
|
|
Packit |
4e8bc4 |
and window_check(history, sid, 'excess_free') < len(history['w']) \
|
|
Packit |
4e8bc4 |
and not (window_check(history, sid, 'relaxed') and free_enough):
|
|
Packit |
4e8bc4 |
youngest = (sid, age)
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
if w.get('slab_pool_high') and youngest[0] != -1:
|
|
Packit |
4e8bc4 |
# if global pool is too high, feed youngest large class.
|
|
Packit |
4e8bc4 |
if slab['free_chunks'] <= memfree[youngest[0]]:
|
|
Packit |
4e8bc4 |
decision = (0, youngest[0])
|
|
Packit |
4e8bc4 |
w[youngest[0]]['relaxed'] = 1
|
|
Packit |
4e8bc4 |
elif too_free == False and pool_low and oldest[0] != -1:
|
|
Packit |
4e8bc4 |
# if pool is too low, take from oldest large class.
|
|
Packit |
4e8bc4 |
if args.verbose:
|
|
Packit |
4e8bc4 |
print("oldest: [class: {}] [age: {:.2f}]".format(int(oldest[0]), oldest[1]))
|
|
Packit |
4e8bc4 |
decision = (oldest[0], 0)
|
|
Packit |
4e8bc4 |
elif too_free == False and youngest[0] != -1 and oldest[0] != -1 and youngest[0] != oldest[0]:
|
|
Packit |
4e8bc4 |
# youngest is outside of the tolerance ratio, move a page around.
|
|
Packit |
4e8bc4 |
if args.verbose:
|
|
Packit |
4e8bc4 |
print("old: [class: {}] [age: {:.2f}]\nyoung: [class: {}] [age: {:.2f}]".format(
|
|
Packit |
4e8bc4 |
int(oldest[0]), oldest[1], int(youngest[0]), youngest[1]))
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
slab = diffs[youngest[0]]
|
|
Packit |
4e8bc4 |
#print("F:{} L:{} Y:{} R:{}".format(slab['free_chunks'], memfree[youngest[0]], int(youngest[1]), int(oldest[1] * args.ratio)))
|
|
Packit |
4e8bc4 |
if youngest[1] < oldest[1] * args.ratio:
|
|
Packit |
4e8bc4 |
w[youngest[0]]['relaxed'] = 1
|
|
Packit |
4e8bc4 |
if slab['free_chunks'] <= memfree[youngest[0]]:
|
|
Packit |
4e8bc4 |
decision = (0, youngest[0])
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
if (len(history['w']) >= args.window):
|
|
Packit |
4e8bc4 |
return decision
|
|
Packit |
4e8bc4 |
return (-1, -1)
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
def run_move(s, decision):
|
|
Packit |
4e8bc4 |
s.write("slabs reassign " + str(decision[0]) + " " + str(decision[1]) + "\r\n")
|
|
Packit |
4e8bc4 |
line = s.readline().rstrip()
|
|
Packit |
4e8bc4 |
if args.verbose:
|
|
Packit |
4e8bc4 |
print("move result:", line)
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
def diff_stats(before, after):
|
|
Packit |
4e8bc4 |
""" fills out "diffs" as deltas between before/after,
|
|
Packit |
4e8bc4 |
and "totals" as the sum of all slab classes.
|
|
Packit |
4e8bc4 |
"_d" postfix to keys means the delta between before/after.
|
|
Packit |
4e8bc4 |
non-postfix keys are total as of 'after's reading.
|
|
Packit |
4e8bc4 |
"""
|
|
Packit |
4e8bc4 |
diffs = {}
|
|
Packit |
4e8bc4 |
totals = {}
|
|
Packit |
4e8bc4 |
for slabid in after.keys():
|
|
Packit |
4e8bc4 |
sb = before.get(slabid)
|
|
Packit |
4e8bc4 |
sa = after.get(slabid)
|
|
Packit |
4e8bc4 |
if not (sb and sa):
|
|
Packit |
4e8bc4 |
continue
|
|
Packit |
4e8bc4 |
slab = sa.copy()
|
|
Packit |
4e8bc4 |
for k in sa.keys():
|
|
Packit |
4e8bc4 |
if k not in sb:
|
|
Packit |
4e8bc4 |
continue
|
|
Packit |
4e8bc4 |
if k not in totals:
|
|
Packit |
4e8bc4 |
totals[k] = 0
|
|
Packit |
4e8bc4 |
totals[k + '_d'] = 0
|
|
Packit |
4e8bc4 |
if k + '_d' not in slab:
|
|
Packit |
4e8bc4 |
slab[k + '_d'] = 0
|
|
Packit |
4e8bc4 |
if re.search(r"^\d+$", sa[k]):
|
|
Packit |
4e8bc4 |
totals[k] += int(sa[k])
|
|
Packit |
4e8bc4 |
slab[k] = int(sa[k])
|
|
Packit |
4e8bc4 |
slab[k + '_d'] = int(sa[k]) - int(sb[k])
|
|
Packit |
4e8bc4 |
totals[k + '_d'] += int(sa[k]) - int(sb[k])
|
|
Packit |
4e8bc4 |
slab['slab'] = slabid
|
|
Packit |
4e8bc4 |
diffs[slabid] = slab
|
|
Packit |
4e8bc4 |
return (diffs, totals)
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
def read_slab_stats(s):
|
|
Packit |
4e8bc4 |
slabs = {}
|
|
Packit |
4e8bc4 |
for statcmd in ['items', 'slabs']:
|
|
Packit |
4e8bc4 |
#print("stat cmd: " + statcmd)
|
|
Packit |
4e8bc4 |
# FIXME: Formatting
|
|
Packit |
4e8bc4 |
s.write("stats " + statcmd + "\r\n")
|
|
Packit |
4e8bc4 |
while True:
|
|
Packit |
4e8bc4 |
line = s.readline().rstrip()
|
|
Packit |
4e8bc4 |
if line.startswith("END"):
|
|
Packit |
4e8bc4 |
break
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
m = re.match(r"^STAT (?:items:)?(\d+):(\S+) (\S+)", line)
|
|
Packit |
4e8bc4 |
if m:
|
|
Packit |
4e8bc4 |
(slab, var, val) = m.groups()
|
|
Packit |
4e8bc4 |
if slab not in slabs:
|
|
Packit |
4e8bc4 |
slabs[slab] = {}
|
|
Packit |
4e8bc4 |
slabs[slab][var] = val
|
|
Packit |
4e8bc4 |
#print("line: " + line)
|
|
Packit |
4e8bc4 |
return slabs
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
# HACK: lets look at 'evictions' being nonzero to indicate memory filled at some point.
|
|
Packit |
4e8bc4 |
def read_stats(s):
|
|
Packit |
4e8bc4 |
stats = {}
|
|
Packit |
4e8bc4 |
s.write("stats\r\n")
|
|
Packit |
4e8bc4 |
while True:
|
|
Packit |
4e8bc4 |
line = s.readline().rstrip()
|
|
Packit |
4e8bc4 |
if line.startswith("END"):
|
|
Packit |
4e8bc4 |
break
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
m = re.match(r"^STAT (\S+) (\S+)", line)
|
|
Packit |
4e8bc4 |
if m:
|
|
Packit |
4e8bc4 |
(key, val) = m.groups()
|
|
Packit |
4e8bc4 |
stats[key] = val
|
|
Packit |
4e8bc4 |
return stats
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
def pct(num, divisor):
|
|
Packit |
4e8bc4 |
if not divisor:
|
|
Packit |
4e8bc4 |
return 0
|
|
Packit |
4e8bc4 |
return (num / divisor)
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
def show_detail(diffs, totals):
|
|
Packit |
4e8bc4 |
""" just a pretty printer for some extra data """
|
|
Packit |
4e8bc4 |
print("\n {:2s}: {:8s} (pct ) {:10s} (pct ) {:6s} (pct) {:6s}".format('sb',
|
|
Packit |
4e8bc4 |
'evicted', 'items', 'pages', 'age'))
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
for sid, slab in diffs.items():
|
|
Packit |
4e8bc4 |
if 'evicted_d' not in slab:
|
|
Packit |
4e8bc4 |
continue
|
|
Packit |
4e8bc4 |
print(" {:2d}: {:8d} ({:.2f}%) {:10d} ({:.4f}%) {:6d} ({:.2f}%) {:6d}".format(
|
|
Packit |
4e8bc4 |
int(sid), slab['evicted_d'], pct(slab['evicted_d'], totals['evicted_d']),
|
|
Packit |
4e8bc4 |
slab['number'], pct(slab['number'], totals['number']),
|
|
Packit |
4e8bc4 |
slab['total_pages'], pct(slab['total_pages'],
|
|
Packit |
4e8bc4 |
totals['total_pages']),
|
|
Packit |
4e8bc4 |
slab['age']))
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
def memfree_check(s, diffs, totals):
|
|
Packit |
4e8bc4 |
info = {}
|
|
Packit |
4e8bc4 |
# manage about this many free chunks in each slab class.
|
|
Packit |
4e8bc4 |
for sid, slab in diffs.items():
|
|
Packit |
4e8bc4 |
if sid == 0:
|
|
Packit |
4e8bc4 |
continue
|
|
Packit |
4e8bc4 |
hold_free = int((slab['used_chunks'] + slab['free_chunks']) * args.free)
|
|
Packit |
4e8bc4 |
# Hold a minimum of 1.5 pages so page moves are unlikely to lose items.
|
|
Packit |
4e8bc4 |
if slab['chunks_per_page'] * MIN_PAGES_FREE > hold_free:
|
|
Packit |
4e8bc4 |
hold_free = int(slab['chunks_per_page'] * MIN_PAGES_FREE)
|
|
Packit |
4e8bc4 |
info[sid] = hold_free
|
|
Packit |
4e8bc4 |
# TODO: only adjust if different?
|
|
Packit |
4e8bc4 |
s.write("extstore free_memchunks {} {}\r\n".format(sid, hold_free))
|
|
Packit |
4e8bc4 |
s.readline()
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
# how many pages to leave in the global pool.
|
|
Packit |
4e8bc4 |
info[0] = int(totals['total_pages'] * args.free)
|
|
Packit |
4e8bc4 |
return info
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
stats_pre = {}
|
|
Packit |
4e8bc4 |
history = { 'w': [{}] }
|
|
Packit |
4e8bc4 |
memfree = { 0: 2 }
|
|
Packit |
4e8bc4 |
last_memfree_check = 0
|
|
Packit |
4e8bc4 |
while True:
|
|
Packit |
4e8bc4 |
try:
|
|
Packit |
4e8bc4 |
with socket.create_connection((host, port), 5) as c:
|
|
Packit |
4e8bc4 |
s = c.makefile(mode="rw", buffering=1)
|
|
Packit |
4e8bc4 |
s.write("slabs automove 0\r\n")
|
|
Packit |
4e8bc4 |
print(s.readline().rstrip())
|
|
Packit |
4e8bc4 |
while True:
|
|
Packit |
4e8bc4 |
stats_post = read_slab_stats(s)
|
|
Packit |
4e8bc4 |
stats = read_stats(s)
|
|
Packit |
4e8bc4 |
(diffs, totals) = diff_stats(stats_pre, stats_post)
|
|
Packit |
4e8bc4 |
#if args.verbose:
|
|
Packit |
4e8bc4 |
# show_detail(diffs, totals)
|
|
Packit |
4e8bc4 |
if int(stats['evictions']) > 0:
|
|
Packit |
4e8bc4 |
if (last_memfree_check < time() - 60) and totals.get('total_pages'):
|
|
Packit |
4e8bc4 |
memfree = memfree_check(s, diffs, totals)
|
|
Packit |
4e8bc4 |
last_memfree_check = time()
|
|
Packit |
4e8bc4 |
decision = (-1, -1)
|
|
Packit |
4e8bc4 |
decision = determine_move(history, stats, diffs, memfree)
|
|
Packit |
4e8bc4 |
if int(decision[0]) > 0 and int(decision[1]) >= 0:
|
|
Packit |
4e8bc4 |
print("moving page from, to:", decision)
|
|
Packit |
4e8bc4 |
if args.automove:
|
|
Packit |
4e8bc4 |
run_move(s, decision)
|
|
Packit |
4e8bc4 |
|
|
Packit |
4e8bc4 |
# Minimize sleeping if we just moved a page to global pool.
|
|
Packit |
4e8bc4 |
# Improves responsiveness during flushes/quick changes.
|
|
Packit |
4e8bc4 |
if decision[1] == 0:
|
|
Packit |
4e8bc4 |
continue
|
|
Packit |
4e8bc4 |
else:
|
|
Packit |
4e8bc4 |
sleep(args.sleep)
|
|
Packit |
4e8bc4 |
stats_pre = stats_post
|
|
Packit |
4e8bc4 |
except:
|
|
Packit |
4e8bc4 |
err = sys.exc_info()
|
|
Packit |
4e8bc4 |
print("disconnected:", err[0], err[1])
|
|
Packit |
4e8bc4 |
traceback.print_exc()
|
|
Packit |
4e8bc4 |
stats_pre = {}
|
|
Packit |
4e8bc4 |
history = { 'w': [{}] }
|
|
Packit |
4e8bc4 |
sleep(args.sleep)
|
|
Packit |
4e8bc4 |
|