Blob Blame History Raw
/*
 * Copyright (C) 2014 - 2019 Intel Corporation.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 * 1. Redistributions of source code must retain the above copyright notice(s),
 *    this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice(s),
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
 * EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <hbwmalloc.h>
#include <memkind.h>
#include <memkind/internal/memkind_private.h>
#include <memkind/internal/memkind_hbw.h>
#include <memkind/internal/memkind_log.h>

#include <stdlib.h>
#include <stdio.h>
#include <pthread.h>
#include <errno.h>
#include <numa.h>
#include <numaif.h>
#include <unistd.h>
#include <stdint.h>

static hbw_policy_t hbw_policy_g = HBW_POLICY_PREFERRED;
static pthread_once_t hbw_policy_once_g = PTHREAD_ONCE_INIT;

static void hbw_policy_bind_init(void)
{
    hbw_policy_g = HBW_POLICY_BIND;
}

static void hbw_policy_bind_all_init(void)
{
    hbw_policy_g = HBW_POLICY_BIND_ALL;
}

static void hbw_policy_preferred_init(void)
{
    hbw_policy_g = HBW_POLICY_PREFERRED;
}

static void hbw_policy_interleave_init(void)
{
    hbw_policy_g = HBW_POLICY_INTERLEAVE;
}

// This function is intended to be called once per pagesize
// Getting kind should be done using hbw_get_kind() defined below
static memkind_t hbw_choose_kind(hbw_pagesize_t pagesize)
{
    memkind_t result = NULL;

    hbw_set_policy(hbw_policy_g);

    int policy = hbw_get_policy();

    // PREFERRED policy have separate handling cause it can fallback
    // to non-HBW kinds in case of HBW absence
    if (policy != HBW_POLICY_PREFERRED ) {
        switch (pagesize) {
            case HBW_PAGESIZE_2MB:
                if(policy == HBW_POLICY_BIND_ALL) {
                    result = MEMKIND_HBW_ALL_HUGETLB;
                } else {
                    result = MEMKIND_HBW_HUGETLB;
                }
                break;
            case HBW_PAGESIZE_1GB:
            case HBW_PAGESIZE_1GB_STRICT:
                result = MEMKIND_HBW_GBTLB;
                break;
            default:
                if (policy == HBW_POLICY_BIND) {
                    result = MEMKIND_HBW;
                } else if (policy == HBW_POLICY_BIND_ALL) {
                    result = MEMKIND_HBW_ALL;
                } else {
                    result = MEMKIND_HBW_INTERLEAVE;
                }
                break;
        }
    } else if (memkind_check_available(MEMKIND_HBW) == 0) {
        switch (pagesize) {
            case HBW_PAGESIZE_2MB:
                result = MEMKIND_HBW_PREFERRED_HUGETLB;
                break;
            case HBW_PAGESIZE_1GB:
            case HBW_PAGESIZE_1GB_STRICT:
                result = MEMKIND_HBW_PREFERRED_GBTLB;
                break;
            default:
                result = MEMKIND_HBW_PREFERRED;
                break;
        }
    } else {
        switch (pagesize) {
            case HBW_PAGESIZE_2MB:
                result = MEMKIND_HUGETLB;
                break;
            case HBW_PAGESIZE_1GB:
            case HBW_PAGESIZE_1GB_STRICT:
                result = MEMKIND_GBTLB;
                break;
            default:
                result = MEMKIND_DEFAULT;
                break;
        }
    }
    return result;
}

static memkind_t pagesize_kind[HBW_PAGESIZE_MAX_VALUE];
static inline memkind_t hbw_get_kind(hbw_pagesize_t pagesize)
{
    if(pagesize_kind[pagesize] == NULL) {
        pagesize_kind[pagesize] = hbw_choose_kind(pagesize);
    }
    return pagesize_kind[pagesize];
}


MEMKIND_EXPORT hbw_policy_t hbw_get_policy(void)
{
    return hbw_policy_g;
}

MEMKIND_EXPORT int hbw_set_policy(hbw_policy_t mode)
{
    switch(mode) {
        case HBW_POLICY_PREFERRED:
            pthread_once(&hbw_policy_once_g, hbw_policy_preferred_init);
            break;
        case HBW_POLICY_BIND:
            pthread_once(&hbw_policy_once_g, hbw_policy_bind_init);
            break;
        case HBW_POLICY_BIND_ALL:
            pthread_once(&hbw_policy_once_g, hbw_policy_bind_all_init);
            break;
        case HBW_POLICY_INTERLEAVE:
            pthread_once(&hbw_policy_once_g, hbw_policy_interleave_init);
            break;
        default:
            return EINVAL;
    }

    if (mode != hbw_policy_g) {
        return EPERM;
    }

    return 0;
}

MEMKIND_EXPORT int hbw_check_available(void)
{
    return  (memkind_check_available(MEMKIND_HBW) == 0) ? 0 : ENODEV;
}

static inline void hbw_touch_page(void *addr)
{
    volatile char *temp_ptr = (volatile char *) addr;
    char value = temp_ptr[0];
    temp_ptr[0] = value;
}

MEMKIND_EXPORT int hbw_verify_memory_region(void *addr, size_t size, int flags)
{
    /*
     * if size is invalid, flags have unsupported bit set or if addr is NULL.
     */
    if (addr == NULL || size == 0 || flags & ~HBW_TOUCH_PAGES) {
        return EINVAL;
    }

    /*
     * 4KB is the smallest pagesize. When pagesize is bigger, pages are verified more than once
     */
    const size_t page_size = sysconf(_SC_PAGESIZE);
    const size_t page_mask = ~(page_size-1);

    /*
     * block size should be power of two to enable compiler optimizations
     */
    const unsigned block_size = 64;

    char *end = addr + size;
    char *aligned_beg = (char *)((uintptr_t)addr & page_mask);
    nodemask_t nodemask;
    struct bitmask expected_nodemask = {NUMA_NUM_NODES, nodemask.n};

    memkind_hbw_all_get_mbind_nodemask(NULL, expected_nodemask.maskp,
                                       expected_nodemask.size);

    while(aligned_beg < end) {
        int nodes[block_size];
        void *pages[block_size];
        int i = 0, page_count = 0;
        char *iter_end = aligned_beg + block_size*page_size;

        if (iter_end > end) {
            iter_end = end;
        }

        while (aligned_beg < iter_end) {
            if (flags & HBW_TOUCH_PAGES) {
                hbw_touch_page(aligned_beg);
            }
            pages[page_count++] = aligned_beg;
            aligned_beg += page_size;
        }

        if (move_pages(0, page_count, pages, NULL, nodes, MPOL_MF_MOVE)) {
            return EFAULT;
        }

        for (i = 0; i < page_count; i++) {
            /*
             * negative value of nodes[i] indicates that move_pages could not establish
             * page location, e.g. addr is not pointing to valid virtual mapping
             */
            if(nodes[i] < 0) {
                return -1;
            }
            /*
             * if nodes[i] is not present in expected_nodemask then
             * physical memory backing page is not hbw
             */
            if (!numa_bitmask_isbitset(&expected_nodemask, nodes[i])) {
                return -1;
            }
        }
    }

    return 0;
}

MEMKIND_EXPORT void *hbw_malloc(size_t size)
{
    return memkind_malloc(hbw_get_kind(HBW_PAGESIZE_4KB), size);
}

MEMKIND_EXPORT void *hbw_calloc(size_t num, size_t size)
{
    return memkind_calloc(hbw_get_kind(HBW_PAGESIZE_4KB), num, size);
}

MEMKIND_EXPORT int hbw_posix_memalign(void **memptr, size_t alignment,
                                      size_t size)
{
    return memkind_posix_memalign(hbw_get_kind(HBW_PAGESIZE_4KB), memptr, alignment,
                                  size);
}

MEMKIND_EXPORT int hbw_posix_memalign_psize(void **memptr, size_t alignment,
                                            size_t size,
                                            hbw_pagesize_t pagesize)
{
    if (pagesize == HBW_PAGESIZE_1GB_STRICT && size % (1 << 30)) {
        return EINVAL;
    }

    if((pagesize == HBW_PAGESIZE_2MB ||
        pagesize == HBW_PAGESIZE_1GB_STRICT ||
        pagesize == HBW_PAGESIZE_1GB) &&
       hbw_get_policy() == HBW_POLICY_INTERLEAVE) {

        log_err("HBW_POLICY_INTERLEAVE is unsupported with used page size!");
        return EINVAL;
    }

    return memkind_posix_memalign(hbw_get_kind(pagesize), memptr, alignment, size);
}

MEMKIND_EXPORT void *hbw_realloc(void *ptr, size_t size)
{
    return memkind_realloc(hbw_get_kind(HBW_PAGESIZE_4KB), ptr, size);
}

MEMKIND_EXPORT void hbw_free(void *ptr)
{
    memkind_free(0, ptr);
}

MEMKIND_EXPORT size_t hbw_malloc_usable_size(void *ptr)
{
    return memkind_malloc_usable_size(0, ptr);
}