/** * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. * * See file LICENSE for terms. */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include "ucx_info.h" #include #include #include #include #include #include static const char* cpu_model_names[] = { [UCS_CPU_MODEL_UNKNOWN] = "unknown", [UCS_CPU_MODEL_INTEL_IVYBRIDGE] = "IvyBridge", [UCS_CPU_MODEL_INTEL_SANDYBRIDGE] = "SandyBridge", [UCS_CPU_MODEL_INTEL_NEHALEM] = "Nehalem", [UCS_CPU_MODEL_INTEL_WESTMERE] = "Westmere", [UCS_CPU_MODEL_INTEL_HASWELL] = "Haswell", [UCS_CPU_MODEL_INTEL_BROADWELL] = "Broadwell", [UCS_CPU_MODEL_INTEL_SKYLAKE] = "Skylake", [UCS_CPU_MODEL_ARM_AARCH64] = "ARM 64-bit", [UCS_CPU_MODEL_AMD_NAPLES] = "Naples", [UCS_CPU_MODEL_AMD_ROME] = "Rome" }; static const char* cpu_vendor_names[] = { [UCS_CPU_VENDOR_UNKNOWN] = "unknown", [UCS_CPU_VENDOR_INTEL] = "Intel", [UCS_CPU_VENDOR_AMD] = "AMD", [UCS_CPU_VENDOR_GENERIC_ARM] = "Generic ARM", [UCS_CPU_VENDOR_GENERIC_PPC] = "Generic PPC" }; static double measure_memcpy_bandwidth(size_t size) { ucs_time_t start_time, end_time; void *src, *dst; double result = 0.0; int iter; src = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (src == MAP_FAILED) { goto out; } dst = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (dst == MAP_FAILED) { goto out_unmap_src; } memset(dst, 0, size); memset(src, 0, size); memcpy(dst, src, size); iter = 0; start_time = ucs_get_time(); do { ucs_memcpy_relaxed(dst, src, size); end_time = ucs_get_time(); ++iter; } while (end_time < start_time + ucs_time_from_sec(0.5)); result = size * iter / ucs_time_to_sec(end_time - start_time); munmap(dst, size); out_unmap_src: munmap(src, size); out: return result; } void print_sys_info() { size_t size; printf("# Timer frequency: %.3f MHz\n", ucs_get_cpu_clocks_per_sec() / 1e6); printf("# CPU vendor: %s\n", cpu_vendor_names[ucs_arch_get_cpu_vendor()]); printf("# CPU model: %s\n", cpu_model_names[ucs_arch_get_cpu_model()]); ucs_arch_print_memcpy_limits(&ucs_global_opts.arch); printf("# Memcpy bandwidth:\n"); for (size = 4096; size <= 256 * UCS_MBYTE; size *= 2) { printf("# %10zu bytes: %.3f MB/s\n", size, measure_memcpy_bandwidth(size) / UCS_MBYTE); } }