/*
* Copyright 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* * Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* btree_map.c -- textbook implementation of btree /w preemptive splitting
*/
#include <assert.h>
#include <errno.h>
#include <stdio.h>
#include "btree_map.h"
TOID_DECLARE(struct tree_map_node, BTREE_MAP_TYPE_OFFSET + 1);
#define BTREE_ORDER 8 /* can't be odd */
#define BTREE_MIN ((BTREE_ORDER / 2) - 1) /* min number of keys per node */
struct tree_map_node_item {
uint64_t key;
PMEMoid value;
};
struct tree_map_node {
int n; /* number of occupied slots */
struct tree_map_node_item items[BTREE_ORDER - 1];
TOID(struct tree_map_node) slots[BTREE_ORDER];
};
struct btree_map {
TOID(struct tree_map_node) root;
};
/*
* set_empty_item -- (internal) sets null to the item
*/
static void
set_empty_item(struct tree_map_node_item *item)
{
item->key = 0;
item->value = OID_NULL;
}
/*
* btree_map_create -- allocates a new btree instance
*/
int
btree_map_create(PMEMobjpool *pop, TOID(struct btree_map) *map, void *arg)
{
int ret = 0;
TX_BEGIN(pop) {
pmemobj_tx_add_range_direct(map, sizeof(*map));
*map = TX_ZNEW(struct btree_map);
} TX_ONABORT {
ret = 1;
} TX_END
return ret;
}
/*
* btree_map_clear_node -- (internal) removes all elements from the node
*/
static void
btree_map_clear_node(TOID(struct tree_map_node) node)
{
for (int i = 0; i < D_RO(node)->n; ++i) {
btree_map_clear_node(D_RO(node)->slots[i]);
}
TX_FREE(node);
}
/*
* btree_map_clear -- removes all elements from the map
*/
int
btree_map_clear(PMEMobjpool *pop, TOID(struct btree_map) map)
{
int ret = 0;
TX_BEGIN(pop) {
btree_map_clear_node(D_RO(map)->root);
TX_ADD_FIELD(map, root);
D_RW(map)->root = TOID_NULL(struct tree_map_node);
} TX_ONABORT {
ret = 1;
} TX_END
return ret;
}
/*
* btree_map_destroy -- cleanups and frees btree instance
*/
int
btree_map_destroy(PMEMobjpool *pop, TOID(struct btree_map) *map)
{
int ret = 0;
TX_BEGIN(pop) {
btree_map_clear(pop, *map);
pmemobj_tx_add_range_direct(map, sizeof(*map));
TX_FREE(*map);
*map = TOID_NULL(struct btree_map);
} TX_ONABORT {
ret = 1;
} TX_END
return ret;
}
/*
* btree_map_insert_item_at -- (internal) inserts an item at position
*/
static void
btree_map_insert_item_at(TOID(struct tree_map_node) node, int pos,
struct tree_map_node_item item)
{
D_RW(node)->items[pos] = item;
D_RW(node)->n += 1;
}
/*
* btree_map_insert_empty -- (internal) inserts an item into an empty node
*/
static void
btree_map_insert_empty(TOID(struct btree_map) map,
struct tree_map_node_item item)
{
TX_ADD_FIELD(map, root);
D_RW(map)->root = TX_ZNEW(struct tree_map_node);
btree_map_insert_item_at(D_RO(map)->root, 0, item);
}
/*
* btree_map_insert_node -- (internal) inserts and makes space for new node
*/
static void
btree_map_insert_node(TOID(struct tree_map_node) node, int p,
struct tree_map_node_item item,
TOID(struct tree_map_node) left, TOID(struct tree_map_node) right)
{
TX_ADD(node);
if (D_RO(node)->items[p].key != 0) { /* move all existing data */
memmove(&D_RW(node)->items[p + 1], &D_RW(node)->items[p],
sizeof(struct tree_map_node_item) * ((BTREE_ORDER - 2 - p)));
memmove(&D_RW(node)->slots[p + 1], &D_RW(node)->slots[p],
sizeof(TOID(struct tree_map_node)) * ((BTREE_ORDER - 1 - p)));
}
D_RW(node)->slots[p] = left;
D_RW(node)->slots[p + 1] = right;
btree_map_insert_item_at(node, p, item);
}
/*
* btree_map_create_split_node -- (internal) splits a node into two
*/
static TOID(struct tree_map_node)
btree_map_create_split_node(TOID(struct tree_map_node) node,
struct tree_map_node_item *m)
{
TOID(struct tree_map_node) right = TX_ZNEW(struct tree_map_node);
int c = (BTREE_ORDER / 2);
*m = D_RO(node)->items[c - 1]; /* select median item */
TX_ADD(node);
set_empty_item(&D_RW(node)->items[c - 1]);
/* move everything right side of median to the new node */
for (int i = c; i < BTREE_ORDER; ++i) {
if (i != BTREE_ORDER - 1) {
D_RW(right)->items[D_RW(right)->n++] =
D_RO(node)->items[i];
set_empty_item(&D_RW(node)->items[i]);
}
D_RW(right)->slots[i - c] = D_RO(node)->slots[i];
D_RW(node)->slots[i] = TOID_NULL(struct tree_map_node);
}
D_RW(node)->n = c - 1;
return right;
}
/*
* btree_map_find_dest_node -- (internal) finds a place to insert the new key at
*/
static TOID(struct tree_map_node)
btree_map_find_dest_node(TOID(struct btree_map) map,
TOID(struct tree_map_node) n, TOID(struct tree_map_node) parent,
uint64_t key, int *p)
{
if (D_RO(n)->n == BTREE_ORDER - 1) { /* node is full, perform a split */
struct tree_map_node_item m;
TOID(struct tree_map_node) right =
btree_map_create_split_node(n, &m);
if (!TOID_IS_NULL(parent)) {
btree_map_insert_node(parent, *p, m, n, right);
if (key > m.key) /* select node to continue search */
n = right;
} else { /* replacing root node, the tree grows in height */
TOID(struct tree_map_node) up =
TX_ZNEW(struct tree_map_node);
D_RW(up)->n = 1;
D_RW(up)->items[0] = m;
D_RW(up)->slots[0] = n;
D_RW(up)->slots[1] = right;
TX_ADD_FIELD(map, root);
D_RW(map)->root = up;
n = up;
}
}
int i;
for (i = 0; i < BTREE_ORDER - 1; ++i) {
*p = i;
/*
* The key either fits somewhere in the middle or at the
* right edge of the node.
*/
if (D_RO(n)->n == i || D_RO(n)->items[i].key > key) {
return TOID_IS_NULL(D_RO(n)->slots[i]) ? n :
btree_map_find_dest_node(map,
D_RO(n)->slots[i], n, key, p);
}
}
/*
* The key is bigger than the last node element, go one level deeper
* in the rightmost child.
*/
return btree_map_find_dest_node(map, D_RO(n)->slots[i], n, key, p);
}
/*
* btree_map_insert_item -- (internal) inserts and makes space for new item
*/
static void
btree_map_insert_item(TOID(struct tree_map_node) node, int p,
struct tree_map_node_item item)
{
TX_ADD(node);
if (D_RO(node)->items[p].key != 0) {
memmove(&D_RW(node)->items[p + 1], &D_RW(node)->items[p],
sizeof(struct tree_map_node_item) * ((BTREE_ORDER - 2 - p)));
}
btree_map_insert_item_at(node, p, item);
}
/*
* btree_map_is_empty -- checks whether the tree map is empty
*/
int
btree_map_is_empty(PMEMobjpool *pop, TOID(struct btree_map) map)
{
return TOID_IS_NULL(D_RO(map)->root) || D_RO(D_RO(map)->root)->n == 0;
}
/*
* btree_map_insert -- inserts a new key-value pair into the map
*/
int
btree_map_insert(PMEMobjpool *pop, TOID(struct btree_map) map,
uint64_t key, PMEMoid value)
{
struct tree_map_node_item item = {key, value};
TX_BEGIN(pop) {
if (btree_map_is_empty(pop, map)) {
btree_map_insert_empty(map, item);
} else {
int p; /* position at the dest node to insert */
TOID(struct tree_map_node) parent =
TOID_NULL(struct tree_map_node);
TOID(struct tree_map_node) dest =
btree_map_find_dest_node(map, D_RW(map)->root,
parent, key, &p);
btree_map_insert_item(dest, p, item);
}
} TX_END
return 0;
}
/*
* btree_map_rotate_right -- (internal) takes one element from right sibling
*/
static void
btree_map_rotate_right(TOID(struct tree_map_node) rsb,
TOID(struct tree_map_node) node,
TOID(struct tree_map_node) parent, int p)
{
/* move the separator from parent to the deficient node */
struct tree_map_node_item sep = D_RO(parent)->items[p];
btree_map_insert_item(node, D_RO(node)->n, sep);
/* the first element of the right sibling is the new separator */
TX_ADD_FIELD(parent, items[p]);
D_RW(parent)->items[p] = D_RO(rsb)->items[0];
/* the nodes are not necessarily leafs, so copy also the slot */
TX_ADD_FIELD(node, slots[D_RO(node)->n]);
D_RW(node)->slots[D_RO(node)->n] = D_RO(rsb)->slots[0];
TX_ADD(rsb);
D_RW(rsb)->n -= 1; /* it loses one element, but still > min */
/* move all existing elements back by one array slot */
memmove(D_RW(rsb)->items, D_RO(rsb)->items + 1,
sizeof(struct tree_map_node_item) * (D_RO(rsb)->n));
memmove(D_RW(rsb)->slots, D_RO(rsb)->slots + 1,
sizeof(TOID(struct tree_map_node)) * (D_RO(rsb)->n + 1));
}
/*
* btree_map_rotate_left -- (internal) takes one element from left sibling
*/
static void
btree_map_rotate_left(TOID(struct tree_map_node) lsb,
TOID(struct tree_map_node) node,
TOID(struct tree_map_node) parent, int p)
{
/* move the separator from parent to the deficient node */
struct tree_map_node_item sep = D_RO(parent)->items[p - 1];
btree_map_insert_item(node, 0, sep);
/* the last element of the left sibling is the new separator */
TX_ADD_FIELD(parent, items[p - 1]);
D_RW(parent)->items[p - 1] = D_RO(lsb)->items[D_RO(lsb)->n - 1];
/* rotate the node children */
memmove(D_RW(node)->slots + 1, D_RO(node)->slots,
sizeof(TOID(struct tree_map_node)) * (D_RO(node)->n));
/* the nodes are not necessarily leafs, so copy also the slot */
D_RW(node)->slots[0] = D_RO(lsb)->slots[D_RO(lsb)->n];
TX_ADD_FIELD(lsb, n);
D_RW(lsb)->n -= 1; /* it loses one element, but still > min */
}
/*
* btree_map_merge -- (internal) merges node and right sibling
*/
static void
btree_map_merge(TOID(struct btree_map) map, TOID(struct tree_map_node) rn,
TOID(struct tree_map_node) node,
TOID(struct tree_map_node) parent, int p)
{
struct tree_map_node_item sep = D_RO(parent)->items[p];
TX_ADD(node);
/* add separator to the deficient node */
D_RW(node)->items[D_RW(node)->n++] = sep;
/* copy right sibling data to node */
memcpy(&D_RW(node)->items[D_RO(node)->n], D_RO(rn)->items,
sizeof(struct tree_map_node_item) * D_RO(rn)->n);
memcpy(&D_RW(node)->slots[D_RO(node)->n], D_RO(rn)->slots,
sizeof(TOID(struct tree_map_node)) * (D_RO(rn)->n + 1));
D_RW(node)->n += D_RO(rn)->n;
TX_FREE(rn); /* right node is now empty */
TX_ADD(parent);
D_RW(parent)->n -= 1;
/* move everything to the right of the separator by one array slot */
memmove(D_RW(parent)->items + p, D_RW(parent)->items + p + 1,
sizeof(struct tree_map_node_item) * (D_RO(parent)->n - p));
memmove(D_RW(parent)->slots + p + 1, D_RW(parent)->slots + p + 2,
sizeof(TOID(struct tree_map_node)) * (D_RO(parent)->n - p + 1));
/* if the parent is empty then the tree shrinks in height */
if (D_RO(parent)->n == 0 && TOID_EQUALS(parent, D_RO(map)->root)) {
TX_ADD(map);
TX_FREE(D_RO(map)->root);
D_RW(map)->root = node;
}
}
/*
* btree_map_rebalance -- (internal) performs tree rebalance
*/
static void
btree_map_rebalance(TOID(struct btree_map) map, TOID(struct tree_map_node) node,
TOID(struct tree_map_node) parent, int p)
{
TOID(struct tree_map_node) rsb = p >= D_RO(parent)->n ?
TOID_NULL(struct tree_map_node) : D_RO(parent)->slots[p + 1];
TOID(struct tree_map_node) lsb = p == 0 ?
TOID_NULL(struct tree_map_node) : D_RO(parent)->slots[p - 1];
if (!TOID_IS_NULL(rsb) && D_RO(rsb)->n > BTREE_MIN)
btree_map_rotate_right(rsb, node, parent, p);
else if (!TOID_IS_NULL(lsb) && D_RO(lsb)->n > BTREE_MIN)
btree_map_rotate_left(lsb, node, parent, p);
else if (TOID_IS_NULL(rsb)) /* always merge with rightmost node */
btree_map_merge(map, node, lsb, parent, p - 1);
else
btree_map_merge(map, rsb, node, parent, p);
}
/*
* btree_map_get_leftmost_leaf -- (internal) searches for the successor
*/
static TOID(struct tree_map_node)
btree_map_get_leftmost_leaf(TOID(struct btree_map) map,
TOID(struct tree_map_node) n, TOID(struct tree_map_node) *p)
{
if (TOID_IS_NULL(D_RO(n)->slots[0]))
return n;
*p = n;
return btree_map_get_leftmost_leaf(map, D_RO(n)->slots[0], p);
}
/*
* btree_map_remove_from_node -- (internal) removes element from node
*/
static void
btree_map_remove_from_node(TOID(struct btree_map) map,
TOID(struct tree_map_node) node,
TOID(struct tree_map_node) parent, int p)
{
if (TOID_IS_NULL(D_RO(node)->slots[0])) { /* leaf */
TX_ADD(node);
if (D_RO(node)->n == 1 || p == BTREE_ORDER - 2) {
set_empty_item(&D_RW(node)->items[p]);
} else if (D_RO(node)->n != 1) {
memmove(&D_RW(node)->items[p],
&D_RW(node)->items[p + 1],
sizeof(struct tree_map_node_item) *
(D_RO(node)->n - p));
}
D_RW(node)->n -= 1;
return;
}
/* can't delete from non-leaf nodes, remove successor */
TOID(struct tree_map_node) rchild = D_RW(node)->slots[p + 1];
TOID(struct tree_map_node) lp = node;
TOID(struct tree_map_node) lm =
btree_map_get_leftmost_leaf(map, rchild, &lp);
TX_ADD_FIELD(node, items[p]);
D_RW(node)->items[p] = D_RO(lm)->items[0];
btree_map_remove_from_node(map, lm, lp, 0);
if (D_RO(lm)->n < BTREE_MIN) /* right child can be deficient now */
btree_map_rebalance(map, lm, lp,
TOID_EQUALS(lp, node) ? p + 1 : 0);
}
#define NODE_CONTAINS_ITEM(_n, _i, _k)\
((_i) != D_RO(_n)->n && D_RO(_n)->items[_i].key == (_k))
#define NODE_CHILD_CAN_CONTAIN_ITEM(_n, _i, _k)\
((_i) == D_RO(_n)->n || D_RO(_n)->items[_i].key > (_k)) &&\
!TOID_IS_NULL(D_RO(_n)->slots[_i])
/*
* btree_map_remove_item -- (internal) removes item from node
*/
static PMEMoid
btree_map_remove_item(TOID(struct btree_map) map,
TOID(struct tree_map_node) node, TOID(struct tree_map_node) parent,
uint64_t key, int p)
{
PMEMoid ret = OID_NULL;
for (int i = 0; i <= D_RO(node)->n; ++i) {
if (NODE_CONTAINS_ITEM(node, i, key)) {
ret = D_RO(node)->items[i].value;
btree_map_remove_from_node(map, node, parent, i);
break;
} else if (NODE_CHILD_CAN_CONTAIN_ITEM(node, i, key)) {
ret = btree_map_remove_item(map, D_RO(node)->slots[i],
node, key, i);
break;
}
}
/* check for deficient nodes walking up */
if (!TOID_IS_NULL(parent) && D_RO(node)->n < BTREE_MIN)
btree_map_rebalance(map, node, parent, p);
return ret;
}
/*
* btree_map_remove -- removes key-value pair from the map
*/
PMEMoid
btree_map_remove(PMEMobjpool *pop, TOID(struct btree_map) map, uint64_t key)
{
PMEMoid ret = OID_NULL;
TX_BEGIN(pop) {
ret = btree_map_remove_item(map, D_RW(map)->root,
TOID_NULL(struct tree_map_node), key, 0);
} TX_END
return ret;
}
/*
* btree_map_get_in_node -- (internal) searches for a value in the node
*/
static PMEMoid
btree_map_get_in_node(TOID(struct tree_map_node) node, uint64_t key)
{
for (int i = 0; i <= D_RO(node)->n; ++i) {
if (NODE_CONTAINS_ITEM(node, i, key))
return D_RO(node)->items[i].value;
else if (NODE_CHILD_CAN_CONTAIN_ITEM(node, i, key))
return btree_map_get_in_node(D_RO(node)->slots[i], key);
}
return OID_NULL;
}
/*
* btree_map_get -- searches for a value of the key
*/
PMEMoid
btree_map_get(PMEMobjpool *pop, TOID(struct btree_map) map, uint64_t key)
{
if (TOID_IS_NULL(D_RO(map)->root))
return OID_NULL;
return btree_map_get_in_node(D_RO(map)->root, key);
}
/*
* btree_map_lookup_in_node -- (internal) searches for key if exists
*/
static int
btree_map_lookup_in_node(TOID(struct tree_map_node) node, uint64_t key)
{
for (int i = 0; i <= D_RO(node)->n; ++i) {
if (NODE_CONTAINS_ITEM(node, i, key))
return 1;
else if (NODE_CHILD_CAN_CONTAIN_ITEM(node, i, key))
return btree_map_lookup_in_node(
D_RO(node)->slots[i], key);
}
return 0;
}
/*
* btree_map_lookup -- searches if key exists
*/
int
btree_map_lookup(PMEMobjpool *pop, TOID(struct btree_map) map, uint64_t key)
{
if (TOID_IS_NULL(D_RO(map)->root))
return 0;
return btree_map_lookup_in_node(D_RO(map)->root, key);
}
/*
* btree_map_foreach_node -- (internal) recursively traverses tree
*/
static int
btree_map_foreach_node(const TOID(struct tree_map_node) p,
int (*cb)(uint64_t key, PMEMoid, void *arg), void *arg)
{
if (TOID_IS_NULL(p))
return 0;
for (int i = 0; i <= D_RO(p)->n; ++i) {
if (btree_map_foreach_node(D_RO(p)->slots[i], cb, arg) != 0)
return 1;
if (i != D_RO(p)->n && D_RO(p)->items[i].key != 0) {
if (cb(D_RO(p)->items[i].key, D_RO(p)->items[i].value,
arg) != 0)
return 1;
}
}
return 0;
}
/*
* btree_map_foreach -- initiates recursive traversal
*/
int
btree_map_foreach(PMEMobjpool *pop, TOID(struct btree_map) map,
int (*cb)(uint64_t key, PMEMoid value, void *arg), void *arg)
{
return btree_map_foreach_node(D_RO(map)->root, cb, arg);
}
/*
* ctree_map_check -- check if given persistent object is a tree map
*/
int
btree_map_check(PMEMobjpool *pop, TOID(struct btree_map) map)
{
return TOID_IS_NULL(map) || !TOID_VALID(map);
}
/*
* btree_map_insert_new -- allocates a new object and inserts it into the tree
*/
int
btree_map_insert_new(PMEMobjpool *pop, TOID(struct btree_map) map,
uint64_t key, size_t size, unsigned type_num,
void (*constructor)(PMEMobjpool *pop, void *ptr, void *arg),
void *arg)
{
int ret = 0;
TX_BEGIN(pop) {
PMEMoid n = pmemobj_tx_alloc(size, type_num);
constructor(pop, pmemobj_direct(n), arg);
btree_map_insert(pop, map, key, n);
} TX_ONABORT {
ret = 1;
} TX_END
return ret;
}
/*
* btree_map_remove_free -- removes and frees an object from the tree
*/
int
btree_map_remove_free(PMEMobjpool *pop, TOID(struct btree_map) map,
uint64_t key)
{
int ret = 0;
TX_BEGIN(pop) {
PMEMoid val = btree_map_remove(pop, map, key);
pmemobj_tx_free(val);
} TX_ONABORT {
ret = 1;
} TX_END
return ret;
}