/** * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. * Copyright (c) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. * * See file LICENSE for terms. */ #include "knem_md.h" #include "knem_io.h" #include #include #include #include static ucs_config_field_t uct_knem_md_config_table[] = { {"", "", NULL, ucs_offsetof(uct_knem_md_config_t, super), UCS_CONFIG_TYPE_TABLE(uct_md_config_table)}, {"RCACHE", "try", "Enable using memory registration cache", ucs_offsetof(uct_knem_md_config_t, rcache_enable), UCS_CONFIG_TYPE_TERNARY}, {"", "", NULL, ucs_offsetof(uct_knem_md_config_t, rcache), UCS_CONFIG_TYPE_TABLE(uct_md_config_rcache_table)}, {NULL} }; ucs_status_t uct_knem_md_query(uct_md_h uct_md, uct_md_attr_t *md_attr) { uct_knem_md_t *md = ucs_derived_of(uct_md, uct_knem_md_t); md_attr->rkey_packed_size = sizeof(uct_knem_key_t); md_attr->cap.flags = UCT_MD_FLAG_REG | UCT_MD_FLAG_NEED_RKEY; md_attr->cap.reg_mem_types = UCS_MEMORY_TYPES_CPU_ACCESSIBLE; md_attr->cap.access_mem_type = UCS_MEMORY_TYPE_HOST; md_attr->cap.detect_mem_types = 0; md_attr->cap.max_alloc = 0; md_attr->cap.max_reg = ULONG_MAX; md_attr->reg_cost = md->reg_cost; memset(&md_attr->local_cpus, 0xff, sizeof(md_attr->local_cpus)); return UCS_OK; } static ucs_status_t uct_knem_query_md_resources(uct_component_t *component, uct_md_resource_desc_t **resources_p, unsigned *num_resources_p) { int fd; int rc; struct knem_cmd_info info; memset(&info, 0, sizeof(struct knem_cmd_info)); fd = open("/dev/knem", O_RDWR); if (fd < 0) { ucs_debug("could not open the KNEM device file at /dev/knem: %m. Disabling knem resource"); goto out_empty; } rc = ioctl(fd, KNEM_CMD_GET_INFO, &info); if (rc < 0) { ucs_debug("KNEM get info failed. not using knem, err = %d %m", rc); goto out_empty_close_fd; } if (KNEM_ABI_VERSION != info.abi) { ucs_error("KNEM ABI mismatch: KNEM_ABI_VERSION: %d, Driver binary interface version: %d", KNEM_ABI_VERSION, info.abi); goto out_empty_close_fd; } /* We have to close it since it is not clear * if it will be selected in future */ close(fd); return uct_md_query_single_md_resource(component, resources_p, num_resources_p); out_empty_close_fd: close(fd); out_empty: return uct_md_query_empty_md_resource(resources_p, num_resources_p); } static void uct_knem_md_close(uct_md_h md) { uct_knem_md_t *knem_md = ucs_derived_of(md, uct_knem_md_t); if (knem_md->rcache != NULL) { ucs_rcache_destroy(knem_md->rcache); } close(knem_md->knem_fd); ucs_free(knem_md); } static ucs_status_t uct_knem_mem_reg_internal(uct_md_h md, void *address, size_t length, unsigned flags, unsigned silent, uct_knem_key_t *key) { int rc; struct knem_cmd_create_region create; struct knem_cmd_param_iovec knem_iov[1]; uct_knem_md_t *knem_md = (uct_knem_md_t *)md; int knem_fd = knem_md->knem_fd; ucs_assert_always(knem_fd > -1); knem_iov[0].base = (uintptr_t) address; knem_iov[0].len = length; memset(&create, 0, sizeof(struct knem_cmd_create_region)); create.iovec_array = (uintptr_t) &knem_iov[0]; create.iovec_nr = 1; create.flags = 0; create.protection = PROT_READ | PROT_WRITE; rc = ioctl(knem_fd, KNEM_CMD_CREATE_REGION, &create); if (rc < 0) { if (!silent) { /* do not report error in silent mode: it called from rcache * internals, rcache will try to register memory again with * more accurate data */ ucs_error("KNEM create region failed: %m"); } return UCS_ERR_IO_ERROR; } ucs_assert_always(create.cookie != 0); key->cookie = create.cookie; key->address = (uintptr_t)address; return UCS_OK; } static ucs_status_t uct_knem_mem_reg(uct_md_h md, void *address, size_t length, unsigned flags, uct_mem_h *memh_p) { uct_knem_key_t *key; ucs_status_t status; key = ucs_malloc(sizeof(uct_knem_key_t), "uct_knem_key_t"); if (NULL == key) { ucs_error("Failed to allocate memory for uct_knem_key_t"); return UCS_ERR_NO_MEMORY; } status = uct_knem_mem_reg_internal(md, address, length, flags, 0, key); if (status == UCS_OK) { *memh_p = key; } else { ucs_free(key); } return status; } static ucs_status_t uct_knem_mem_dereg_internal(uct_md_h md, uct_knem_key_t *key) { int rc; uct_knem_md_t *knem_md = (uct_knem_md_t *)md; int knem_fd = knem_md->knem_fd; ucs_assert_always(knem_fd > -1); ucs_assert_always(key->cookie != 0); ucs_assert_always(key->address != 0); rc = ioctl(knem_fd, KNEM_CMD_DESTROY_REGION, &key->cookie); if (rc < 0) { ucs_error("KNEM destroy region failed, err = %m"); } return UCS_OK; } static ucs_status_t uct_knem_mem_dereg(uct_md_h md, uct_mem_h memh) { uct_knem_key_t *key = (uct_knem_key_t *)memh; ucs_status_t status; status = uct_knem_mem_dereg_internal(md, key); if (status == UCS_OK) { ucs_free(key); } return status; } static ucs_status_t uct_knem_rkey_pack(uct_md_h md, uct_mem_h memh, void *rkey_buffer) { uct_knem_key_t *packed = (uct_knem_key_t*)rkey_buffer; uct_knem_key_t *key = (uct_knem_key_t *)memh; packed->cookie = (uint64_t)key->cookie; packed->address = (uintptr_t)key->address; ucs_trace("packed rkey: cookie 0x%"PRIx64" address %"PRIxPTR, key->cookie, key->address); return UCS_OK; } static ucs_status_t uct_knem_rkey_unpack(uct_component_t *component, const void *rkey_buffer, uct_rkey_t *rkey_p, void **handle_p) { uct_knem_key_t *packed = (uct_knem_key_t *)rkey_buffer; uct_knem_key_t *key; key = ucs_malloc(sizeof(uct_knem_key_t), "uct_knem_key_t"); if (NULL == key) { ucs_error("Failed to allocate memory for uct_knem_key_t"); return UCS_ERR_NO_MEMORY; } key->cookie = packed->cookie; key->address = packed->address; *handle_p = NULL; *rkey_p = (uintptr_t)key; ucs_trace("unpacked rkey: key %p cookie 0x%"PRIx64" address %"PRIxPTR, key, key->cookie, key->address); return UCS_OK; } static ucs_status_t uct_knem_rkey_release(uct_component_t *component, uct_rkey_t rkey, void *handle) { ucs_assert(NULL == handle); ucs_free((void *)rkey); return UCS_OK; } static uct_md_ops_t md_ops = { .close = uct_knem_md_close, .query = uct_knem_md_query, .mkey_pack = uct_knem_rkey_pack, .mem_reg = uct_knem_mem_reg, .mem_dereg = uct_knem_mem_dereg, .detect_memory_type = ucs_empty_function_return_unsupported, }; static inline uct_knem_rcache_region_t* uct_knem_rcache_region_from_memh(uct_mem_h memh) { return ucs_container_of(memh, uct_knem_rcache_region_t, key); } static ucs_status_t uct_knem_mem_rcache_reg(uct_md_h uct_md, void *address, size_t length, unsigned flags, uct_mem_h *memh_p) { uct_knem_md_t *md = ucs_derived_of(uct_md, uct_knem_md_t); ucs_rcache_region_t *rregion; ucs_status_t status; status = ucs_rcache_get(md->rcache, address, length, PROT_READ|PROT_WRITE, &flags, &rregion); if (status != UCS_OK) { return status; } ucs_assert(rregion->refcount > 0); *memh_p = &ucs_derived_of(rregion, uct_knem_rcache_region_t)->key; return UCS_OK; } static ucs_status_t uct_knem_mem_rcache_dereg(uct_md_h uct_md, uct_mem_h memh) { uct_knem_md_t *md = ucs_derived_of(uct_md, uct_knem_md_t); uct_knem_rcache_region_t *region = uct_knem_rcache_region_from_memh(memh); ucs_rcache_region_put(md->rcache, ®ion->super); return UCS_OK; } static uct_md_ops_t uct_knem_md_rcache_ops = { .close = uct_knem_md_close, .query = uct_knem_md_query, .mkey_pack = uct_knem_rkey_pack, .mem_reg = uct_knem_mem_rcache_reg, .mem_dereg = uct_knem_mem_rcache_dereg, .detect_memory_type = ucs_empty_function_return_unsupported, }; static ucs_status_t uct_knem_rcache_mem_reg_cb(void *context, ucs_rcache_t *rcache, void *arg, ucs_rcache_region_t *rregion, uint16_t rcache_mem_reg_flags) { uct_knem_rcache_region_t *region = ucs_derived_of(rregion, uct_knem_rcache_region_t); uct_knem_md_t *md = context; int *flags = arg; return uct_knem_mem_reg_internal(&md->super, (void*)region->super.super.start, region->super.super.end - region->super.super.start, *flags, rcache_mem_reg_flags & UCS_RCACHE_MEM_REG_HIDE_ERRORS, ®ion->key); } static void uct_knem_rcache_mem_dereg_cb(void *context, ucs_rcache_t *rcache, ucs_rcache_region_t *rregion) { uct_knem_rcache_region_t *region = ucs_derived_of(rregion, uct_knem_rcache_region_t); uct_knem_md_t *md = context; uct_knem_mem_dereg_internal(&md->super, ®ion->key); } static void uct_knem_rcache_dump_region_cb(void *context, ucs_rcache_t *rcache, ucs_rcache_region_t *rregion, char *buf, size_t max) { uct_knem_rcache_region_t *region = ucs_derived_of(rregion, uct_knem_rcache_region_t); uct_knem_key_t *key = ®ion->key; snprintf(buf, max, "cookie %"PRIu64" addr %p", key->cookie, (void*)key->address); } static ucs_rcache_ops_t uct_knem_rcache_ops = { .mem_reg = uct_knem_rcache_mem_reg_cb, .mem_dereg = uct_knem_rcache_mem_dereg_cb, .dump_region = uct_knem_rcache_dump_region_cb }; static ucs_status_t uct_knem_md_open(uct_component_t *component, const char *md_name, const uct_md_config_t *uct_md_config, uct_md_h *md_p) { const uct_knem_md_config_t *md_config = ucs_derived_of(uct_md_config, uct_knem_md_config_t); uct_knem_md_t *knem_md; ucs_rcache_params_t rcache_params; ucs_status_t status; knem_md = ucs_malloc(sizeof(uct_knem_md_t), "uct_knem_md_t"); if (NULL == knem_md) { ucs_error("Failed to allocate memory for uct_knem_md_t"); return UCS_ERR_NO_MEMORY; } knem_md->super.ops = &md_ops; knem_md->super.component = &uct_knem_component; knem_md->reg_cost.overhead = 1200.0e-9; knem_md->reg_cost.growth = 0.007e-9; knem_md->rcache = NULL; knem_md->knem_fd = open("/dev/knem", O_RDWR); if (knem_md->knem_fd < 0) { ucs_error("Could not open the KNEM device file at /dev/knem: %m."); free(knem_md); return UCS_ERR_IO_ERROR; } if (md_config->rcache_enable != UCS_NO) { rcache_params.region_struct_size = sizeof(uct_knem_rcache_region_t); rcache_params.alignment = md_config->rcache.alignment; rcache_params.max_alignment = ucs_get_page_size(); rcache_params.ucm_events = UCM_EVENT_VM_UNMAPPED; rcache_params.ucm_event_priority = md_config->rcache.event_prio; rcache_params.context = knem_md; rcache_params.ops = &uct_knem_rcache_ops; status = ucs_rcache_create(&rcache_params, "knem rcache device", ucs_stats_get_root(), &knem_md->rcache); if (status == UCS_OK) { knem_md->super.ops = &uct_knem_md_rcache_ops; knem_md->reg_cost.overhead = md_config->rcache.overhead; knem_md->reg_cost.growth = 0; /* It's close enough to 0 */ } else { ucs_assert(knem_md->rcache == NULL); if (md_config->rcache_enable == UCS_YES) { ucs_error("Failed to create registration cache: %s", ucs_status_string(status)); uct_knem_md_close(&knem_md->super); return status; } else { ucs_debug("Could not create registration cache: %s", ucs_status_string(status)); } } } *md_p = (uct_md_h)knem_md; return UCS_OK; } uct_component_t uct_knem_component = { .query_md_resources = uct_knem_query_md_resources, .md_open = uct_knem_md_open, .cm_open = ucs_empty_function_return_unsupported, .rkey_unpack = uct_knem_rkey_unpack, .rkey_ptr = ucs_empty_function_return_unsupported, .rkey_release = uct_knem_rkey_release, .name = "knem", .md_config = { .name = "KNEM memory domain", .prefix = "KNEM_", .table = uct_knem_md_config_table, .size = sizeof(uct_knem_md_config_t), }, .cm_config = UCS_CONFIG_EMPTY_GLOBAL_LIST_ENTRY, .tl_list = UCT_COMPONENT_TL_LIST_INITIALIZER(&uct_knem_component), .flags = 0 }; UCT_COMPONENT_REGISTER(&uct_knem_component);