/*
* Copyright 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* * Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* lane.c -- lane implementation
*/
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <inttypes.h>
#include <errno.h>
#include <limits.h>
#include <sched.h>
#include "libpmemobj.h"
#include "cuckoo.h"
#include "lane.h"
#include "out.h"
#include "util.h"
#include "obj.h"
#include "os_thread.h"
#include "valgrind_internal.h"
#include "memops.h"
#include "palloc.h"
#include "tx.h"
static os_tls_key_t Lane_info_key;
static __thread struct cuckoo *Lane_info_ht;
static __thread struct lane_info *Lane_info_records;
static __thread struct lane_info *Lane_info_cache;
/*
* lane_info_create -- (internal) constructor for thread shared data
*/
static inline void
lane_info_create(void)
{
Lane_info_ht = cuckoo_new();
if (Lane_info_ht == NULL)
FATAL("cuckoo_new");
}
/*
* lane_info_delete -- (internal) deletes lane info hash table
*/
static inline void
lane_info_delete(void)
{
if (unlikely(Lane_info_ht == NULL))
return;
cuckoo_delete(Lane_info_ht);
struct lane_info *record;
struct lane_info *head = Lane_info_records;
while (head != NULL) {
record = head;
head = head->next;
Free(record);
}
Lane_info_ht = NULL;
Lane_info_records = NULL;
Lane_info_cache = NULL;
}
/*
* lane_info_ht_boot -- (internal) boot lane info and add it to thread shared
* data
*/
static inline void
lane_info_ht_boot(void)
{
lane_info_create();
int result = os_tls_set(Lane_info_key, Lane_info_ht);
if (result != 0) {
errno = result;
FATAL("!os_tls_set");
}
}
/*
* lane_info_ht_destroy -- (internal) destructor for thread shared data
*/
static inline void
lane_info_ht_destroy(void *ht)
{
lane_info_delete();
}
/*
* lane_info_boot -- initialize lane info hash table and lane info key
*/
void
lane_info_boot(void)
{
int result = os_tls_key_create(&Lane_info_key, lane_info_ht_destroy);
if (result != 0) {
errno = result;
FATAL("!os_tls_key_create");
}
}
/*
* lane_info_destroy -- destroy lane info hash table
*/
void
lane_info_destroy(void)
{
lane_info_delete();
(void) os_tls_key_delete(Lane_info_key);
}
/*
* lane_info_cleanup -- remove lane info record regarding pool being deleted
*/
static inline void
lane_info_cleanup(PMEMobjpool *pop)
{
if (unlikely(Lane_info_ht == NULL))
return;
struct lane_info *info = cuckoo_remove(Lane_info_ht, pop->uuid_lo);
if (likely(info != NULL)) {
if (info->prev)
info->prev->next = info->next;
if (info->next)
info->next->prev = info->prev;
if (Lane_info_cache == info)
Lane_info_cache = NULL;
if (Lane_info_records == info)
Lane_info_records = info->next;
Free(info);
}
}
/*
* lane_get_layout -- (internal) calculates the real pointer of the lane layout
*/
static struct lane_layout *
lane_get_layout(PMEMobjpool *pop, uint64_t lane_idx)
{
return (void *)((char *)pop + pop->lanes_offset +
sizeof(struct lane_layout) * lane_idx);
}
/*
* lane_ulog_constructor -- (internal) constructor of a ulog extension
*/
static int
lane_ulog_constructor(void *base, void *ptr, size_t usable_size, void *arg)
{
PMEMobjpool *pop = base;
const struct pmem_ops *p_ops = &pop->p_ops;
size_t capacity = ALIGN_DOWN(usable_size - sizeof(struct ulog),
CACHELINE_SIZE);
ulog_construct(OBJ_PTR_TO_OFF(base, ptr), capacity, 1, p_ops);
return 0;
}
/*
* lane_undo_extend -- allocates a new undo log
*/
static int
lane_undo_extend(void *base, uint64_t *redo)
{
PMEMobjpool *pop = base;
struct tx_parameters *params = pop->tx_params;
size_t s = SIZEOF_ALIGNED_ULOG(params->cache_size);
return pmalloc_construct(base, redo, s, lane_ulog_constructor, NULL,
0, OBJ_INTERNAL_OBJECT_MASK, 0);
}
/*
* lane_redo_extend -- allocates a new redo log
*/
static int
lane_redo_extend(void *base, uint64_t *redo)
{
size_t s = SIZEOF_ALIGNED_ULOG(LANE_REDO_EXTERNAL_SIZE);
return pmalloc_construct(base, redo, s, lane_ulog_constructor, NULL,
0, OBJ_INTERNAL_OBJECT_MASK, 0);
}
/*
* lane_init -- (internal) initializes a single lane runtime variables
*/
static int
lane_init(PMEMobjpool *pop, struct lane *lane, struct lane_layout *layout)
{
ASSERTne(lane, NULL);
lane->layout = layout;
lane->internal = operation_new((struct ulog *)&layout->internal,
LANE_REDO_INTERNAL_SIZE,
NULL, NULL, &pop->p_ops,
LOG_TYPE_REDO);
if (lane->internal == NULL)
goto error_internal_new;
lane->external = operation_new((struct ulog *)&layout->external,
LANE_REDO_EXTERNAL_SIZE,
lane_redo_extend, (ulog_free_fn)pfree, &pop->p_ops,
LOG_TYPE_REDO);
if (lane->external == NULL)
goto error_external_new;
lane->undo = operation_new((struct ulog *)&layout->undo,
LANE_UNDO_SIZE,
lane_undo_extend, (ulog_free_fn)pfree, &pop->p_ops,
LOG_TYPE_UNDO);
if (lane->undo == NULL)
goto error_undo_new;
return 0;
error_undo_new:
operation_delete(lane->external);
error_external_new:
operation_delete(lane->internal);
error_internal_new:
return -1;
}
/*
* lane_destroy -- cleanups a single lane runtime variables
*/
static void
lane_destroy(PMEMobjpool *pop, struct lane *lane)
{
operation_delete(lane->undo);
operation_delete(lane->internal);
operation_delete(lane->external);
}
/*
* lane_boot -- initializes all lanes
*/
int
lane_boot(PMEMobjpool *pop)
{
int err = 0;
pop->lanes_desc.lane = Malloc(sizeof(struct lane) * pop->nlanes);
if (pop->lanes_desc.lane == NULL) {
err = ENOMEM;
ERR("!Malloc of volatile lanes");
goto error_lanes_malloc;
}
pop->lanes_desc.next_lane_idx = 0;
pop->lanes_desc.lane_locks =
Zalloc(sizeof(*pop->lanes_desc.lane_locks) * pop->nlanes);
if (pop->lanes_desc.lane_locks == NULL) {
ERR("!Malloc for lane locks");
goto error_locks_malloc;
}
/* add lanes to pmemcheck ignored list */
VALGRIND_ADD_TO_GLOBAL_TX_IGNORE((char *)pop + pop->lanes_offset,
(sizeof(struct lane_layout) * pop->nlanes));
uint64_t i;
for (i = 0; i < pop->nlanes; ++i) {
struct lane_layout *layout = lane_get_layout(pop, i);
if ((err = lane_init(pop, &pop->lanes_desc.lane[i], layout))) {
ERR("!lane_init");
goto error_lane_init;
}
}
return 0;
error_lane_init:
for (; i >= 1; --i)
lane_destroy(pop, &pop->lanes_desc.lane[i - 1]);
Free(pop->lanes_desc.lane_locks);
pop->lanes_desc.lane_locks = NULL;
error_locks_malloc:
Free(pop->lanes_desc.lane);
pop->lanes_desc.lane = NULL;
error_lanes_malloc:
return err;
}
/*
* lane_init_data -- initalizes ulogs for all the lanes
*/
void
lane_init_data(PMEMobjpool *pop)
{
struct lane_layout *layout;
for (uint64_t i = 0; i < pop->nlanes; ++i) {
layout = lane_get_layout(pop, i);
ulog_construct(OBJ_PTR_TO_OFF(pop, &layout->internal),
LANE_REDO_INTERNAL_SIZE, 0, &pop->p_ops);
ulog_construct(OBJ_PTR_TO_OFF(pop, &layout->external),
LANE_REDO_EXTERNAL_SIZE, 0, &pop->p_ops);
ulog_construct(OBJ_PTR_TO_OFF(pop, &layout->undo),
LANE_UNDO_SIZE, 0, &pop->p_ops);
}
layout = lane_get_layout(pop, 0);
pmemops_xpersist(&pop->p_ops, layout,
pop->nlanes * sizeof(struct lane_layout),
PMEMOBJ_F_RELAXED);
}
/*
* lane_cleanup -- destroys all lanes
*/
void
lane_cleanup(PMEMobjpool *pop)
{
for (uint64_t i = 0; i < pop->nlanes; ++i)
lane_destroy(pop, &pop->lanes_desc.lane[i]);
Free(pop->lanes_desc.lane);
pop->lanes_desc.lane = NULL;
Free(pop->lanes_desc.lane_locks);
pop->lanes_desc.lane_locks = NULL;
lane_info_cleanup(pop);
}
/*
* lane_recover_and_section_boot -- performs initialization and recovery of all
* lanes
*/
int
lane_recover_and_section_boot(PMEMobjpool *pop)
{
COMPILE_ERROR_ON(SIZEOF_ULOG(LANE_UNDO_SIZE) +
SIZEOF_ULOG(LANE_REDO_EXTERNAL_SIZE) +
SIZEOF_ULOG(LANE_REDO_INTERNAL_SIZE) != LANE_TOTAL_SIZE);
int err = 0;
uint64_t i; /* lane index */
struct lane_layout *layout;
/*
* First we need to recover the internal/external redo logs so that the
* allocator state is consistent before we boot it.
*/
for (i = 0; i < pop->nlanes; ++i) {
layout = lane_get_layout(pop, i);
ulog_recover((struct ulog *)&layout->internal,
OBJ_OFF_IS_VALID_FROM_CTX, &pop->p_ops);
ulog_recover((struct ulog *)&layout->external,
OBJ_OFF_IS_VALID_FROM_CTX, &pop->p_ops);
}
if ((err = pmalloc_boot(pop)) != 0)
return err;
/*
* Undo logs must be processed after the heap is initialized since
* a undo recovery might require deallocation of the next ulogs.
*/
for (i = 0; i < pop->nlanes; ++i) {
layout = lane_get_layout(pop, i);
struct ulog *undo = (struct ulog *)&layout->undo;
struct operation_context *ctx = operation_new(
undo,
LANE_UNDO_SIZE,
lane_undo_extend, (ulog_free_fn)pfree, &pop->p_ops,
LOG_TYPE_UNDO);
if (ctx == NULL) {
LOG(2, "undo recovery failed %" PRIu64 " %d",
i, err);
return err;
}
operation_resume(ctx);
operation_process(ctx);
operation_finish(ctx);
operation_delete(ctx);
}
return 0;
}
/*
* lane_section_cleanup -- performs runtime cleanup of all lanes
*/
int
lane_section_cleanup(PMEMobjpool *pop)
{
return pmalloc_cleanup(pop);
}
/*
* lane_check -- performs check of all lanes
*/
int
lane_check(PMEMobjpool *pop)
{
int err = 0;
uint64_t j; /* lane index */
struct lane_layout *layout;
for (j = 0; j < pop->nlanes; ++j) {
layout = lane_get_layout(pop, j);
if (ulog_check((struct ulog *)&layout->internal,
OBJ_OFF_IS_VALID_FROM_CTX, &pop->p_ops) != 0) {
LOG(2, "lane %" PRIu64 " internal redo failed: %d",
j, err);
return err;
}
}
return 0;
}
/*
* get_lane -- (internal) get free lane index
*/
static inline void
get_lane(uint64_t *locks, struct lane_info *info, uint64_t nlocks)
{
info->lane_idx = info->primary;
while (1) {
do {
info->lane_idx %= nlocks;
if (likely(util_bool_compare_and_swap64(
&locks[info->lane_idx], 0, 1))) {
if (info->lane_idx == info->primary) {
info->primary_attempts =
LANE_PRIMARY_ATTEMPTS;
} else if (info->primary_attempts == 0) {
info->primary = info->lane_idx;
info->primary_attempts =
LANE_PRIMARY_ATTEMPTS;
}
return;
}
if (info->lane_idx == info->primary &&
info->primary_attempts > 0) {
info->primary_attempts--;
}
++info->lane_idx;
} while (info->lane_idx < nlocks);
sched_yield();
}
}
/*
* get_lane_info_record -- (internal) get lane record attached to memory pool
* or first free
*/
static inline struct lane_info *
get_lane_info_record(PMEMobjpool *pop)
{
if (likely(Lane_info_cache != NULL &&
Lane_info_cache->pop_uuid_lo == pop->uuid_lo)) {
return Lane_info_cache;
}
if (unlikely(Lane_info_ht == NULL)) {
lane_info_ht_boot();
}
struct lane_info *info = cuckoo_get(Lane_info_ht, pop->uuid_lo);
if (unlikely(info == NULL)) {
info = Malloc(sizeof(struct lane_info));
if (unlikely(info == NULL)) {
FATAL("Malloc");
}
info->pop_uuid_lo = pop->uuid_lo;
info->lane_idx = UINT64_MAX;
info->nest_count = 0;
info->next = Lane_info_records;
info->prev = NULL;
info->primary = 0;
info->primary_attempts = LANE_PRIMARY_ATTEMPTS;
if (Lane_info_records) {
Lane_info_records->prev = info;
}
Lane_info_records = info;
if (unlikely(cuckoo_insert(
Lane_info_ht, pop->uuid_lo, info) != 0)) {
FATAL("cuckoo_insert");
}
}
Lane_info_cache = info;
return info;
}
/*
* lane_hold -- grabs a per-thread lane in a round-robin fashion
*/
unsigned
lane_hold(PMEMobjpool *pop, struct lane **lanep)
{
/*
* Before runtime lane initialization all remote operations are
* executed using RLANE_DEFAULT.
*/
if (unlikely(!pop->lanes_desc.runtime_nlanes)) {
ASSERT(pop->has_remote_replicas);
if (lanep != NULL)
FATAL("cannot obtain section before lane's init");
return RLANE_DEFAULT;
}
struct lane_info *lane = get_lane_info_record(pop);
while (unlikely(lane->lane_idx == UINT64_MAX)) {
/* initial wrap to next CL */
lane->primary = lane->lane_idx = util_fetch_and_add32(
&pop->lanes_desc.next_lane_idx, LANE_JUMP);
} /* handles wraparound */
uint64_t *llocks = pop->lanes_desc.lane_locks;
/* grab next free lane from lanes available at runtime */
if (!lane->nest_count++) {
get_lane(llocks, lane, pop->lanes_desc.runtime_nlanes);
}
struct lane *l = &pop->lanes_desc.lane[lane->lane_idx];
/* reinitialize lane's content only if in outermost hold */
if (lanep && lane->nest_count == 1) {
VALGRIND_ANNOTATE_NEW_MEMORY(l, sizeof(*l));
VALGRIND_ANNOTATE_NEW_MEMORY(l->layout, sizeof(*l->layout));
operation_init(l->external);
operation_init(l->internal);
operation_init(l->undo);
}
if (lanep)
*lanep = l;
return (unsigned)lane->lane_idx;
}
/*
* lane_attach -- attaches the lane with the given index to the current thread
*/
void
lane_attach(PMEMobjpool *pop, unsigned lane)
{
struct lane_info *info = get_lane_info_record(pop);
info->nest_count = 1;
info->lane_idx = lane;
}
/*
* lane_detach -- detaches the currently held lane from the current thread
*/
unsigned
lane_detach(PMEMobjpool *pop)
{
struct lane_info *lane = get_lane_info_record(pop);
lane->nest_count -= 1;
ASSERTeq(lane->nest_count, 0);
return (unsigned)lane->lane_idx;
}
/*
* lane_release -- drops the per-thread lane
*/
void
lane_release(PMEMobjpool *pop)
{
if (unlikely(!pop->lanes_desc.runtime_nlanes)) {
ASSERT(pop->has_remote_replicas);
return;
}
struct lane_info *lane = get_lane_info_record(pop);
ASSERTne(lane, NULL);
ASSERTne(lane->lane_idx, UINT64_MAX);
if (unlikely(lane->nest_count == 0)) {
FATAL("lane_release");
} else if (--(lane->nest_count) == 0) {
if (unlikely(!util_bool_compare_and_swap64(
&pop->lanes_desc.lane_locks[lane->lane_idx],
1, 0))) {
FATAL("util_bool_compare_and_swap64");
}
}
}