/* * Copyright (c) 2001-2020 Mellanox Technologies, Ltd. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include "vma/dev/ring_allocation_logic.h" #define MODULE_NAME "ral" #undef MODULE_HDR_INFO #define MODULE_HDR_INFO MODULE_NAME "%s:%d:%s() " #undef __INFO__ #define __INFO__ m_tostr.c_str() #define ral_logpanic __log_info_panic #define ral_logerr __log_info_err #define ral_logwarn __log_info_warn #define ral_loginfo __log_info_info #define ral_logdbg __log_info_dbg #define ral_logfunc __log_info_func #define ral_logfuncall __log_info_funcall ring_allocation_logic::ring_allocation_logic():m_ring_migration_ratio(0), m_source(-1), m_migration_try_count(0), m_migration_candidate(0), m_active(true), m_res_key() {} ring_allocation_logic::ring_allocation_logic(ring_logic_t allocation_logic, int ring_migration_ratio, source_t source, resource_allocation_key &ring_profile): m_tostr("base"), m_ring_migration_ratio(ring_migration_ratio), m_source(source), m_migration_try_count(ring_migration_ratio) { if (ring_profile.get_ring_alloc_logic() == RING_LOGIC_PER_INTERFACE && ring_profile.get_ring_profile_key() < START_RING_INDEX) { ring_profile.set_ring_alloc_logic(allocation_logic); } m_res_key = resource_allocation_key(ring_profile); m_migration_candidate = 0; m_res_key.set_user_id_key(calc_res_key_by_logic()); m_active = true; } /** * * @return the key that is part of a unique id in rings map */ uint64_t ring_allocation_logic::calc_res_key_by_logic() { uint64_t res_key = 0; switch (m_res_key.get_ring_alloc_logic()) { case RING_LOGIC_PER_INTERFACE: res_key = 0; if (safe_mce_sys().tcp_ctl_thread > CTL_THREAD_DISABLE) res_key = 1; break; case RING_LOGIC_PER_IP: res_key = m_source.m_ip; break; case RING_LOGIC_PER_SOCKET: res_key = m_source.m_fd; break; case RING_LOGIC_PER_USER_ID: res_key = m_res_key.get_user_id_key(); break; case RING_LOGIC_PER_THREAD: res_key = pthread_self(); break; case RING_LOGIC_PER_CORE: case RING_LOGIC_PER_CORE_ATTACH_THREADS: res_key = sched_getcpu(); break; BULLSEYE_EXCLUDE_BLOCK_START default: //not suppose to get here ral_logdbg("non-valid ring logic = %d", m_res_key.get_ring_alloc_logic()); break; BULLSEYE_EXCLUDE_BLOCK_END } return res_key; } resource_allocation_key* ring_allocation_logic::create_new_key(in_addr_t addr, int suggested_cpu /* = NO_CPU */) { if (m_res_key.get_ring_alloc_logic() == RING_LOGIC_PER_CORE_ATTACH_THREADS) { pthread_t tid = pthread_self(); int cpu = g_cpu_manager.reserve_cpu_for_thread(tid, suggested_cpu); if (cpu >= 0) { m_res_key.set_user_id_key(cpu); return &m_res_key; } } if (m_res_key.get_ring_alloc_logic() == RING_LOGIC_PER_IP) { m_source.m_ip = addr; } m_res_key.set_user_id_key(calc_res_key_by_logic()); return &m_res_key; } /* * return true if ring migration is recommended. */ bool ring_allocation_logic::should_migrate_ring() { ral_logfuncall("currently accessed from thread=%lu, cpu=%d", pthread_self(), sched_getcpu()); if (false == m_active) { return false; } int count_max = m_ring_migration_ratio; if (m_migration_candidate) { count_max = CANDIDATE_STABILITY_ROUNDS; uint64_t new_id = calc_res_key_by_logic(); if (m_migration_candidate != new_id) { m_migration_candidate = 0; m_migration_try_count = 0; return false; } } if (m_migration_try_count < count_max) { m_migration_try_count++; return false; } else { m_migration_try_count = 0; } if (!m_migration_candidate) { // save current used allocation key // no need to save profile, and allocation logic uint64_t curr_id = m_res_key.get_user_id_key(); // calc new key uint64_t new_id = calc_res_key_by_logic(); if (new_id == curr_id || g_n_internal_thread_id == curr_id) { return false; } m_migration_candidate = new_id; return false; } ral_logdbg("migrating from ring of id=%s to ring of id=%lu", m_res_key.to_str(), m_migration_candidate); m_migration_candidate = 0; return true; } cpu_manager g_cpu_manager; __thread int g_n_thread_cpu_core = NO_CPU; cpu_manager::cpu_manager() { reset(); } void cpu_manager::reset() { memset(m_cpu_thread_count, 0, sizeof(m_cpu_thread_count)); } int cpu_manager::reserve_cpu_for_thread(pthread_t tid, int suggested_cpu /* = NO_CPU */) { lock(); int cpu = g_n_thread_cpu_core; if (cpu != NO_CPU) { //already reserved unlock(); return cpu; } cpu_set_t cpu_set; CPU_ZERO(&cpu_set); int ret = pthread_getaffinity_np(tid, sizeof(cpu_set_t), &cpu_set); if (ret) { unlock(); __log_err("pthread_getaffinity_np failed for tid=%lu, ret=%d (errno=%d %m)", tid, ret, errno); return -1; } int avail_cpus = CPU_COUNT(&cpu_set); if (avail_cpus == 0) { unlock(); __log_err("no cpu available for tid=%lu", tid); return -1; } if (avail_cpus == 1) { //already attached for (cpu = 0; cpu < MAX_CPU && !CPU_ISSET(cpu, &cpu_set); cpu++) {} } else { //need to choose one cpu to attach to int min_cpu_count = -1; for (int i = 0, j = 0; i < MAX_CPU && j < avail_cpus; i++) { if (!CPU_ISSET(i, &cpu_set)) continue; j++; if (min_cpu_count < 0 || m_cpu_thread_count[i] < min_cpu_count) { min_cpu_count = m_cpu_thread_count[i]; cpu = i; } } if (suggested_cpu >= 0 && CPU_ISSET(suggested_cpu, &cpu_set) && m_cpu_thread_count[suggested_cpu] <= min_cpu_count + 1 ) { cpu = suggested_cpu; } CPU_ZERO(&cpu_set); CPU_SET(cpu, &cpu_set); __log_dbg("attach tid=%lu running on cpu=%d to cpu=%d", tid, sched_getcpu(), cpu); ret = pthread_setaffinity_np(tid, sizeof(cpu_set_t), &cpu_set); if (ret) { unlock(); __log_err("pthread_setaffinity_np failed for tid=%lu to cpu=%d, ret=%d (errno=%d %m)", tid, cpu, ret, errno); return -1; } } g_n_thread_cpu_core = cpu; if (cpu > NO_CPU && cpu < MAX_CPU) m_cpu_thread_count[cpu]++; unlock(); return cpu; }