/*
* Copyright © 2009 CNRS
* Copyright © 2009-2018 Inria. All rights reserved.
* Copyright © 2009-2012 Université Bordeaux
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
*/
/* To try to get all declarations duplicated below. */
#define _WIN32_WINNT 0x0601
#include <private/autogen/config.h>
#include <hwloc.h>
#include <private/private.h>
#include <private/debug.h>
#include <windows.h>
#ifndef HAVE_KAFFINITY
typedef ULONG_PTR KAFFINITY, *PKAFFINITY;
#endif
#ifndef HAVE_PROCESSOR_CACHE_TYPE
typedef enum _PROCESSOR_CACHE_TYPE {
CacheUnified,
CacheInstruction,
CacheData,
CacheTrace
} PROCESSOR_CACHE_TYPE;
#endif
#ifndef CACHE_FULLY_ASSOCIATIVE
#define CACHE_FULLY_ASSOCIATIVE 0xFF
#endif
#ifndef MAXIMUM_PROC_PER_GROUP /* missing in MinGW */
#define MAXIMUM_PROC_PER_GROUP 64
#endif
#ifndef HAVE_CACHE_DESCRIPTOR
typedef struct _CACHE_DESCRIPTOR {
BYTE Level;
BYTE Associativity;
WORD LineSize;
DWORD Size; /* in bytes */
PROCESSOR_CACHE_TYPE Type;
} CACHE_DESCRIPTOR, *PCACHE_DESCRIPTOR;
#endif
#ifndef HAVE_LOGICAL_PROCESSOR_RELATIONSHIP
typedef enum _LOGICAL_PROCESSOR_RELATIONSHIP {
RelationProcessorCore,
RelationNumaNode,
RelationCache,
RelationProcessorPackage,
RelationGroup,
RelationAll = 0xffff
} LOGICAL_PROCESSOR_RELATIONSHIP;
#else /* HAVE_LOGICAL_PROCESSOR_RELATIONSHIP */
# ifndef HAVE_RELATIONPROCESSORPACKAGE
# define RelationProcessorPackage 3
# define RelationGroup 4
# define RelationAll 0xffff
# endif /* HAVE_RELATIONPROCESSORPACKAGE */
#endif /* HAVE_LOGICAL_PROCESSOR_RELATIONSHIP */
#ifndef HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION
typedef struct _SYSTEM_LOGICAL_PROCESSOR_INFORMATION {
ULONG_PTR ProcessorMask;
LOGICAL_PROCESSOR_RELATIONSHIP Relationship;
_ANONYMOUS_UNION
union {
struct {
BYTE flags;
} ProcessorCore;
struct {
DWORD NodeNumber;
} NumaNode;
CACHE_DESCRIPTOR Cache;
ULONGLONG Reserved[2];
} DUMMYUNIONNAME;
} SYSTEM_LOGICAL_PROCESSOR_INFORMATION, *PSYSTEM_LOGICAL_PROCESSOR_INFORMATION;
#endif
/* Extended interface, for group support */
#ifndef HAVE_GROUP_AFFINITY
typedef struct _GROUP_AFFINITY {
KAFFINITY Mask;
WORD Group;
WORD Reserved[3];
} GROUP_AFFINITY, *PGROUP_AFFINITY;
#endif
#ifndef HAVE_PROCESSOR_RELATIONSHIP
typedef struct _PROCESSOR_RELATIONSHIP {
BYTE Flags;
BYTE Reserved[21];
WORD GroupCount;
GROUP_AFFINITY GroupMask[ANYSIZE_ARRAY];
} PROCESSOR_RELATIONSHIP, *PPROCESSOR_RELATIONSHIP;
#endif
#ifndef HAVE_NUMA_NODE_RELATIONSHIP
typedef struct _NUMA_NODE_RELATIONSHIP {
DWORD NodeNumber;
BYTE Reserved[20];
GROUP_AFFINITY GroupMask;
} NUMA_NODE_RELATIONSHIP, *PNUMA_NODE_RELATIONSHIP;
#endif
#ifndef HAVE_CACHE_RELATIONSHIP
typedef struct _CACHE_RELATIONSHIP {
BYTE Level;
BYTE Associativity;
WORD LineSize;
DWORD CacheSize;
PROCESSOR_CACHE_TYPE Type;
BYTE Reserved[20];
GROUP_AFFINITY GroupMask;
} CACHE_RELATIONSHIP, *PCACHE_RELATIONSHIP;
#endif
#ifndef HAVE_PROCESSOR_GROUP_INFO
typedef struct _PROCESSOR_GROUP_INFO {
BYTE MaximumProcessorCount;
BYTE ActiveProcessorCount;
BYTE Reserved[38];
KAFFINITY ActiveProcessorMask;
} PROCESSOR_GROUP_INFO, *PPROCESSOR_GROUP_INFO;
#endif
#ifndef HAVE_GROUP_RELATIONSHIP
typedef struct _GROUP_RELATIONSHIP {
WORD MaximumGroupCount;
WORD ActiveGroupCount;
ULONGLONG Reserved[2];
PROCESSOR_GROUP_INFO GroupInfo[ANYSIZE_ARRAY];
} GROUP_RELATIONSHIP, *PGROUP_RELATIONSHIP;
#endif
#ifndef HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX
typedef struct _SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX {
LOGICAL_PROCESSOR_RELATIONSHIP Relationship;
DWORD Size;
_ANONYMOUS_UNION
union {
PROCESSOR_RELATIONSHIP Processor;
NUMA_NODE_RELATIONSHIP NumaNode;
CACHE_RELATIONSHIP Cache;
GROUP_RELATIONSHIP Group;
/* Odd: no member to tell the cpu mask of the package... */
} DUMMYUNIONNAME;
} SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, *PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX;
#endif
#ifndef HAVE_PSAPI_WORKING_SET_EX_BLOCK
typedef union _PSAPI_WORKING_SET_EX_BLOCK {
ULONG_PTR Flags;
struct {
unsigned Valid :1;
unsigned ShareCount :3;
unsigned Win32Protection :11;
unsigned Shared :1;
unsigned Node :6;
unsigned Locked :1;
unsigned LargePage :1;
};
} PSAPI_WORKING_SET_EX_BLOCK;
#endif
#ifndef HAVE_PSAPI_WORKING_SET_EX_INFORMATION
typedef struct _PSAPI_WORKING_SET_EX_INFORMATION {
PVOID VirtualAddress;
PSAPI_WORKING_SET_EX_BLOCK VirtualAttributes;
} PSAPI_WORKING_SET_EX_INFORMATION;
#endif
#ifndef HAVE_PROCESSOR_NUMBER
typedef struct _PROCESSOR_NUMBER {
WORD Group;
BYTE Number;
BYTE Reserved;
} PROCESSOR_NUMBER, *PPROCESSOR_NUMBER;
#endif
/* Function pointers */
typedef WORD (WINAPI *PFN_GETACTIVEPROCESSORGROUPCOUNT)(void);
static PFN_GETACTIVEPROCESSORGROUPCOUNT GetActiveProcessorGroupCountProc;
static unsigned long nr_processor_groups = 1;
static unsigned long max_numanode_index = 0;
typedef WORD (WINAPI *PFN_GETACTIVEPROCESSORCOUNT)(WORD);
static PFN_GETACTIVEPROCESSORCOUNT GetActiveProcessorCountProc;
typedef DWORD (WINAPI *PFN_GETCURRENTPROCESSORNUMBER)(void);
static PFN_GETCURRENTPROCESSORNUMBER GetCurrentProcessorNumberProc;
typedef VOID (WINAPI *PFN_GETCURRENTPROCESSORNUMBEREX)(PPROCESSOR_NUMBER);
static PFN_GETCURRENTPROCESSORNUMBEREX GetCurrentProcessorNumberExProc;
typedef BOOL (WINAPI *PFN_GETLOGICALPROCESSORINFORMATION)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION Buffer, PDWORD ReturnLength);
static PFN_GETLOGICALPROCESSORINFORMATION GetLogicalProcessorInformationProc;
typedef BOOL (WINAPI *PFN_GETLOGICALPROCESSORINFORMATIONEX)(LOGICAL_PROCESSOR_RELATIONSHIP relationship, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer, PDWORD ReturnLength);
static PFN_GETLOGICALPROCESSORINFORMATIONEX GetLogicalProcessorInformationExProc;
typedef BOOL (WINAPI *PFN_SETTHREADGROUPAFFINITY)(HANDLE hThread, const GROUP_AFFINITY *GroupAffinity, PGROUP_AFFINITY PreviousGroupAffinity);
static PFN_SETTHREADGROUPAFFINITY SetThreadGroupAffinityProc;
typedef BOOL (WINAPI *PFN_GETTHREADGROUPAFFINITY)(HANDLE hThread, PGROUP_AFFINITY GroupAffinity);
static PFN_GETTHREADGROUPAFFINITY GetThreadGroupAffinityProc;
typedef BOOL (WINAPI *PFN_GETNUMAAVAILABLEMEMORYNODE)(UCHAR Node, PULONGLONG AvailableBytes);
static PFN_GETNUMAAVAILABLEMEMORYNODE GetNumaAvailableMemoryNodeProc;
typedef BOOL (WINAPI *PFN_GETNUMAAVAILABLEMEMORYNODEEX)(USHORT Node, PULONGLONG AvailableBytes);
static PFN_GETNUMAAVAILABLEMEMORYNODEEX GetNumaAvailableMemoryNodeExProc;
typedef LPVOID (WINAPI *PFN_VIRTUALALLOCEXNUMA)(HANDLE hProcess, LPVOID lpAddress, SIZE_T dwSize, DWORD flAllocationType, DWORD flProtect, DWORD nndPreferred);
static PFN_VIRTUALALLOCEXNUMA VirtualAllocExNumaProc;
typedef BOOL (WINAPI *PFN_VIRTUALFREEEX)(HANDLE hProcess, LPVOID lpAddress, SIZE_T dwSize, DWORD dwFreeType);
static PFN_VIRTUALFREEEX VirtualFreeExProc;
typedef BOOL (WINAPI *PFN_QUERYWORKINGSETEX)(HANDLE hProcess, PVOID pv, DWORD cb);
static PFN_QUERYWORKINGSETEX QueryWorkingSetExProc;
static void hwloc_win_get_function_ptrs(void)
{
HMODULE kernel32;
kernel32 = LoadLibrary("kernel32.dll");
if (kernel32) {
GetActiveProcessorGroupCountProc =
(PFN_GETACTIVEPROCESSORGROUPCOUNT) GetProcAddress(kernel32, "GetActiveProcessorGroupCount");
GetActiveProcessorCountProc =
(PFN_GETACTIVEPROCESSORCOUNT) GetProcAddress(kernel32, "GetActiveProcessorCount");
GetLogicalProcessorInformationProc =
(PFN_GETLOGICALPROCESSORINFORMATION) GetProcAddress(kernel32, "GetLogicalProcessorInformation");
GetCurrentProcessorNumberProc =
(PFN_GETCURRENTPROCESSORNUMBER) GetProcAddress(kernel32, "GetCurrentProcessorNumber");
GetCurrentProcessorNumberExProc =
(PFN_GETCURRENTPROCESSORNUMBEREX) GetProcAddress(kernel32, "GetCurrentProcessorNumberEx");
SetThreadGroupAffinityProc =
(PFN_SETTHREADGROUPAFFINITY) GetProcAddress(kernel32, "SetThreadGroupAffinity");
GetThreadGroupAffinityProc =
(PFN_GETTHREADGROUPAFFINITY) GetProcAddress(kernel32, "GetThreadGroupAffinity");
GetNumaAvailableMemoryNodeProc =
(PFN_GETNUMAAVAILABLEMEMORYNODE) GetProcAddress(kernel32, "GetNumaAvailableMemoryNode");
GetNumaAvailableMemoryNodeExProc =
(PFN_GETNUMAAVAILABLEMEMORYNODEEX) GetProcAddress(kernel32, "GetNumaAvailableMemoryNodeEx");
GetLogicalProcessorInformationExProc =
(PFN_GETLOGICALPROCESSORINFORMATIONEX)GetProcAddress(kernel32, "GetLogicalProcessorInformationEx");
QueryWorkingSetExProc =
(PFN_QUERYWORKINGSETEX) GetProcAddress(kernel32, "K32QueryWorkingSetEx");
VirtualAllocExNumaProc =
(PFN_VIRTUALALLOCEXNUMA) GetProcAddress(kernel32, "VirtualAllocExNuma");
VirtualFreeExProc =
(PFN_VIRTUALFREEEX) GetProcAddress(kernel32, "VirtualFreeEx");
}
if (GetActiveProcessorGroupCountProc)
nr_processor_groups = GetActiveProcessorGroupCountProc();
if (!QueryWorkingSetExProc) {
HMODULE psapi = LoadLibrary("psapi.dll");
if (psapi)
QueryWorkingSetExProc = (PFN_QUERYWORKINGSETEX) GetProcAddress(psapi, "QueryWorkingSetEx");
}
}
/*
* ULONG_PTR and DWORD_PTR are 64/32bits depending on the arch
* while bitmaps use unsigned long (always 32bits)
*/
static void hwloc_bitmap_from_ULONG_PTR(hwloc_bitmap_t set, ULONG_PTR mask)
{
#if SIZEOF_VOID_P == 8
hwloc_bitmap_from_ulong(set, mask & 0xffffffff);
hwloc_bitmap_set_ith_ulong(set, 1, mask >> 32);
#else
hwloc_bitmap_from_ulong(set, mask);
#endif
}
static void hwloc_bitmap_from_ith_ULONG_PTR(hwloc_bitmap_t set, unsigned i, ULONG_PTR mask)
{
#if SIZEOF_VOID_P == 8
hwloc_bitmap_from_ith_ulong(set, 2*i, mask & 0xffffffff);
hwloc_bitmap_set_ith_ulong(set, 2*i+1, mask >> 32);
#else
hwloc_bitmap_from_ith_ulong(set, i, mask);
#endif
}
static void hwloc_bitmap_set_ith_ULONG_PTR(hwloc_bitmap_t set, unsigned i, ULONG_PTR mask)
{
#if SIZEOF_VOID_P == 8
hwloc_bitmap_set_ith_ulong(set, 2*i, mask & 0xffffffff);
hwloc_bitmap_set_ith_ulong(set, 2*i+1, mask >> 32);
#else
hwloc_bitmap_set_ith_ulong(set, i, mask);
#endif
}
static ULONG_PTR hwloc_bitmap_to_ULONG_PTR(hwloc_const_bitmap_t set)
{
#if SIZEOF_VOID_P == 8
ULONG_PTR up = hwloc_bitmap_to_ith_ulong(set, 1);
up <<= 32;
up |= hwloc_bitmap_to_ulong(set);
return up;
#else
return hwloc_bitmap_to_ulong(set);
#endif
}
static ULONG_PTR hwloc_bitmap_to_ith_ULONG_PTR(hwloc_const_bitmap_t set, unsigned i)
{
#if SIZEOF_VOID_P == 8
ULONG_PTR up = hwloc_bitmap_to_ith_ulong(set, 2*i+1);
up <<= 32;
up |= hwloc_bitmap_to_ith_ulong(set, 2*i);
return up;
#else
return hwloc_bitmap_to_ith_ulong(set, i);
#endif
}
/* convert set into index+mask if all set bits are in the same ULONG.
* otherwise return -1.
*/
static int hwloc_bitmap_to_single_ULONG_PTR(hwloc_const_bitmap_t set, unsigned *index, ULONG_PTR *mask)
{
unsigned first_ulp, last_ulp;
if (hwloc_bitmap_weight(set) == -1)
return -1;
first_ulp = hwloc_bitmap_first(set) / (sizeof(ULONG_PTR)*8);
last_ulp = hwloc_bitmap_last(set) / (sizeof(ULONG_PTR)*8);
if (first_ulp != last_ulp)
return -1;
*mask = hwloc_bitmap_to_ith_ULONG_PTR(set, first_ulp);
*index = first_ulp;
return 0;
}
/**************************************************************
* hwloc PU numbering with respect to Windows processor groups
*
* Everywhere below we reserve 64 physical indexes per processor groups because that's
* the maximum (MAXIMUM_PROC_PER_GROUP). Windows may actually use less bits than that
* in some groups (either to avoid splitting NUMA nodes across groups, or because of OS
* tweaks such as "bcdedit /set groupsize 8") but we keep some unused indexes for simplicity.
* That means PU physical indexes and cpusets may be non-contigous.
* That also means hwloc_fallback_nbprocessors() below must return the last PU index + 1
* instead the actual number of processors.
*/
/********************
* last_cpu_location
*/
static int
hwloc_win_get_thisthread_last_cpu_location(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_cpuset_t set, int flags __hwloc_attribute_unused)
{
assert(GetCurrentProcessorNumberExProc || (GetCurrentProcessorNumberProc && nr_processor_groups == 1));
if (nr_processor_groups > 1 || !GetCurrentProcessorNumberProc) {
PROCESSOR_NUMBER num;
GetCurrentProcessorNumberExProc(&num);
hwloc_bitmap_from_ith_ULONG_PTR(set, num.Group, ((ULONG_PTR)1) << num.Number);
return 0;
}
hwloc_bitmap_from_ith_ULONG_PTR(set, 0, ((ULONG_PTR)1) << GetCurrentProcessorNumberProc());
return 0;
}
/* TODO: hwloc_win_get_thisproc_last_cpu_location() using
* CreateToolhelp32Snapshot(), Thread32First/Next()
* th.th32OwnerProcessID == GetCurrentProcessId() for filtering within process
* OpenThread(THREAD_SET_INFORMATION|THREAD_QUERY_INFORMATION, FALSE, te32.th32ThreadID) to get a handle.
*/
/******************************
* set cpu/membind for threads
*/
/* TODO: SetThreadIdealProcessor{,Ex} */
static int
hwloc_win_set_thread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_thread_t thread, hwloc_const_bitmap_t hwloc_set, int flags)
{
DWORD_PTR mask;
unsigned group;
if (flags & HWLOC_CPUBIND_NOMEMBIND) {
errno = ENOSYS;
return -1;
}
if (hwloc_bitmap_to_single_ULONG_PTR(hwloc_set, &group, &mask) < 0) {
errno = ENOSYS;
return -1;
}
assert(nr_processor_groups == 1 || SetThreadGroupAffinityProc);
if (nr_processor_groups > 1) {
GROUP_AFFINITY aff;
memset(&aff, 0, sizeof(aff)); /* we get Invalid Parameter error if Reserved field isn't cleared */
aff.Group = group;
aff.Mask = mask;
if (!SetThreadGroupAffinityProc(thread, &aff, NULL))
return -1;
} else {
/* SetThreadAffinityMask() only changes the mask inside the current processor group */
/* The resulting binding is always strict */
if (!SetThreadAffinityMask(thread, mask))
return -1;
}
return 0;
}
static int
hwloc_win_set_thisthread_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags)
{
return hwloc_win_set_thread_cpubind(topology, GetCurrentThread(), hwloc_set, flags);
}
static int
hwloc_win_set_thisthread_membind(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
{
int ret;
hwloc_const_cpuset_t cpuset;
hwloc_cpuset_t _cpuset = NULL;
if ((policy != HWLOC_MEMBIND_DEFAULT && policy != HWLOC_MEMBIND_BIND)
|| flags & HWLOC_MEMBIND_NOCPUBIND) {
errno = ENOSYS;
return -1;
}
if (policy == HWLOC_MEMBIND_DEFAULT) {
cpuset = hwloc_topology_get_complete_cpuset(topology);
} else {
cpuset = _cpuset = hwloc_bitmap_alloc();
hwloc_cpuset_from_nodeset(topology, _cpuset, nodeset);
}
ret = hwloc_win_set_thisthread_cpubind(topology, cpuset,
(flags & HWLOC_MEMBIND_STRICT) ? HWLOC_CPUBIND_STRICT : 0);
hwloc_bitmap_free(_cpuset);
return ret;
}
/******************************
* get cpu/membind for threads
*/
static int
hwloc_win_get_thread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_thread_t thread, hwloc_cpuset_t set, int flags __hwloc_attribute_unused)
{
GROUP_AFFINITY aff;
assert(GetThreadGroupAffinityProc);
if (!GetThreadGroupAffinityProc(thread, &aff))
return -1;
hwloc_bitmap_from_ith_ULONG_PTR(set, aff.Group, aff.Mask);
return 0;
}
static int
hwloc_win_get_thisthread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_cpuset_t set, int flags __hwloc_attribute_unused)
{
return hwloc_win_get_thread_cpubind(topology, GetCurrentThread(), set, flags);
}
static int
hwloc_win_get_thisthread_membind(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags)
{
int ret;
hwloc_cpuset_t cpuset = hwloc_bitmap_alloc();
ret = hwloc_win_get_thread_cpubind(topology, GetCurrentThread(), cpuset, flags);
if (!ret) {
*policy = HWLOC_MEMBIND_BIND;
hwloc_cpuset_to_nodeset(topology, cpuset, nodeset);
}
hwloc_bitmap_free(cpuset);
return ret;
}
/********************************
* set cpu/membind for processes
*/
static int
hwloc_win_set_proc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t proc, hwloc_const_bitmap_t hwloc_set, int flags)
{
DWORD_PTR mask;
assert(nr_processor_groups == 1);
if (flags & HWLOC_CPUBIND_NOMEMBIND) {
errno = ENOSYS;
return -1;
}
/* TODO: SetThreadGroupAffinity() for all threads doesn't enforce the whole process affinity,
* maybe because of process-specific resource locality */
/* TODO: if we are in a single group (check with GetProcessGroupAffinity()),
* SetProcessAffinityMask() changes the binding within that same group.
*/
/* TODO: NtSetInformationProcess() works very well for binding to any mask in a single group,
* but it's an internal routine.
*/
/* TODO: checks whether hwloc-bind.c needs to pass INHERIT_PARENT_AFFINITY to CreateProcess() instead of execvp(). */
/* The resulting binding is always strict */
mask = hwloc_bitmap_to_ULONG_PTR(hwloc_set);
if (!SetProcessAffinityMask(proc, mask))
return -1;
return 0;
}
static int
hwloc_win_set_thisproc_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags)
{
return hwloc_win_set_proc_cpubind(topology, GetCurrentProcess(), hwloc_set, flags);
}
static int
hwloc_win_set_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
{
int ret;
hwloc_const_cpuset_t cpuset;
hwloc_cpuset_t _cpuset = NULL;
if ((policy != HWLOC_MEMBIND_DEFAULT && policy != HWLOC_MEMBIND_BIND)
|| flags & HWLOC_MEMBIND_NOCPUBIND) {
errno = ENOSYS;
return -1;
}
if (policy == HWLOC_MEMBIND_DEFAULT) {
cpuset = hwloc_topology_get_complete_cpuset(topology);
} else {
cpuset = _cpuset = hwloc_bitmap_alloc();
hwloc_cpuset_from_nodeset(topology, _cpuset, nodeset);
}
ret = hwloc_win_set_proc_cpubind(topology, pid, cpuset,
(flags & HWLOC_MEMBIND_STRICT) ? HWLOC_CPUBIND_STRICT : 0);
hwloc_bitmap_free(_cpuset);
return ret;
}
static int
hwloc_win_set_thisproc_membind(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
{
return hwloc_win_set_proc_membind(topology, GetCurrentProcess(), nodeset, policy, flags);
}
/********************************
* get cpu/membind for processes
*/
static int
hwloc_win_get_proc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t proc, hwloc_bitmap_t hwloc_set, int flags)
{
DWORD_PTR proc_mask, sys_mask;
assert(nr_processor_groups == 1);
if (flags & HWLOC_CPUBIND_NOMEMBIND) {
errno = ENOSYS;
return -1;
}
/* TODO: if we are in a single group (check with GetProcessGroupAffinity()),
* GetProcessAffinityMask() gives the mask within that group.
*/
/* TODO: if we are in multiple groups, GetProcessGroupAffinity() gives their IDs,
* but we don't know their masks.
*/
/* TODO: GetThreadGroupAffinity() for all threads can be smaller than the whole process affinity,
* maybe because of process-specific resource locality.
*/
if (!GetProcessAffinityMask(proc, &proc_mask, &sys_mask))
return -1;
hwloc_bitmap_from_ULONG_PTR(hwloc_set, proc_mask);
return 0;
}
static int
hwloc_win_get_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags)
{
int ret;
hwloc_cpuset_t cpuset = hwloc_bitmap_alloc();
ret = hwloc_win_get_proc_cpubind(topology, pid, cpuset,
(flags & HWLOC_MEMBIND_STRICT) ? HWLOC_CPUBIND_STRICT : 0);
if (!ret) {
*policy = HWLOC_MEMBIND_BIND;
hwloc_cpuset_to_nodeset(topology, cpuset, nodeset);
}
hwloc_bitmap_free(cpuset);
return ret;
}
static int
hwloc_win_get_thisproc_cpubind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_cpuset, int flags)
{
return hwloc_win_get_proc_cpubind(topology, GetCurrentProcess(), hwloc_cpuset, flags);
}
static int
hwloc_win_get_thisproc_membind(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags)
{
return hwloc_win_get_proc_membind(topology, GetCurrentProcess(), nodeset, policy, flags);
}
/************************
* membind alloc/free
*/
static void *
hwloc_win_alloc(hwloc_topology_t topology __hwloc_attribute_unused, size_t len) {
return VirtualAlloc(NULL, len, MEM_COMMIT|MEM_RESERVE, PAGE_EXECUTE_READWRITE);
}
static void *
hwloc_win_alloc_membind(hwloc_topology_t topology __hwloc_attribute_unused, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) {
int node;
switch (policy) {
case HWLOC_MEMBIND_DEFAULT:
case HWLOC_MEMBIND_BIND:
break;
default:
errno = ENOSYS;
return hwloc_alloc_or_fail(topology, len, flags);
}
if (flags & HWLOC_MEMBIND_STRICT) {
errno = ENOSYS;
return NULL;
}
if (policy == HWLOC_MEMBIND_DEFAULT
|| hwloc_bitmap_isequal(nodeset, hwloc_topology_get_complete_nodeset(topology)))
return hwloc_win_alloc(topology, len);
if (hwloc_bitmap_weight(nodeset) != 1) {
/* Not a single node, can't do this */
errno = EXDEV;
return hwloc_alloc_or_fail(topology, len, flags);
}
node = hwloc_bitmap_first(nodeset);
return VirtualAllocExNumaProc(GetCurrentProcess(), NULL, len, MEM_COMMIT|MEM_RESERVE, PAGE_EXECUTE_READWRITE, node);
}
static int
hwloc_win_free_membind(hwloc_topology_t topology __hwloc_attribute_unused, void *addr, size_t len __hwloc_attribute_unused) {
if (!addr)
return 0;
if (!VirtualFreeExProc(GetCurrentProcess(), addr, 0, MEM_RELEASE))
return -1;
return 0;
}
/**********************
* membind for areas
*/
static int
hwloc_win_get_area_memlocation(hwloc_topology_t topology __hwloc_attribute_unused, const void *addr, size_t len, hwloc_nodeset_t nodeset, int flags __hwloc_attribute_unused)
{
SYSTEM_INFO SystemInfo;
DWORD page_size;
uintptr_t start;
unsigned nb;
PSAPI_WORKING_SET_EX_INFORMATION *pv;
unsigned i;
GetSystemInfo(&SystemInfo);
page_size = SystemInfo.dwPageSize;
start = (((uintptr_t) addr) / page_size) * page_size;
nb = (unsigned)((((uintptr_t) addr + len - start) + page_size - 1) / page_size);
if (!nb)
nb = 1;
pv = calloc(nb, sizeof(*pv));
if (!pv)
return -1;
for (i = 0; i < nb; i++)
pv[i].VirtualAddress = (void*) (start + i * page_size);
if (!QueryWorkingSetExProc(GetCurrentProcess(), pv, nb * sizeof(*pv))) {
free(pv);
return -1;
}
for (i = 0; i < nb; i++) {
if (pv[i].VirtualAttributes.Valid)
hwloc_bitmap_set(nodeset, pv[i].VirtualAttributes.Node);
}
free(pv);
return 0;
}
/*************************
* discovery
*/
static int
hwloc_look_windows(struct hwloc_backend *backend)
{
struct hwloc_topology *topology = backend->topology;
hwloc_bitmap_t groups_pu_set = NULL;
SYSTEM_INFO SystemInfo;
DWORD length;
int gotnuma = 0;
int gotnumamemory = 0;
if (topology->levels[0][0]->cpuset)
/* somebody discovered things */
return -1;
hwloc_alloc_root_sets(topology->levels[0][0]);
GetSystemInfo(&SystemInfo);
if (!GetLogicalProcessorInformationExProc && GetLogicalProcessorInformationProc) {
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION procInfo, tmpprocInfo;
unsigned id;
unsigned i;
struct hwloc_obj *obj;
hwloc_obj_type_t type;
length = 0;
procInfo = NULL;
while (1) {
if (GetLogicalProcessorInformationProc(procInfo, &length))
break;
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
return -1;
tmpprocInfo = realloc(procInfo, length);
if (!tmpprocInfo) {
free(procInfo);
goto out;
}
procInfo = tmpprocInfo;
}
assert(!length || procInfo);
for (i = 0; i < length / sizeof(*procInfo); i++) {
/* Ignore unknown caches */
if (procInfo->Relationship == RelationCache
&& procInfo->Cache.Type != CacheUnified
&& procInfo->Cache.Type != CacheData
&& procInfo->Cache.Type != CacheInstruction)
continue;
id = HWLOC_UNKNOWN_INDEX;
switch (procInfo[i].Relationship) {
case RelationNumaNode:
type = HWLOC_OBJ_NUMANODE;
id = procInfo[i].NumaNode.NodeNumber;
gotnuma++;
if (id > max_numanode_index)
max_numanode_index = id;
break;
case RelationProcessorPackage:
type = HWLOC_OBJ_PACKAGE;
break;
case RelationCache:
type = (procInfo[i].Cache.Type == CacheInstruction ? HWLOC_OBJ_L1ICACHE : HWLOC_OBJ_L1CACHE) + procInfo[i].Cache.Level - 1;
break;
case RelationProcessorCore:
type = HWLOC_OBJ_CORE;
break;
case RelationGroup:
default:
type = HWLOC_OBJ_GROUP;
break;
}
if (!hwloc_filter_check_keep_object_type(topology, type))
continue;
obj = hwloc_alloc_setup_object(topology, type, id);
obj->cpuset = hwloc_bitmap_alloc();
hwloc_debug("%s#%u mask %llx\n", hwloc_obj_type_string(type), id, (unsigned long long) procInfo[i].ProcessorMask);
/* ProcessorMask is a ULONG_PTR */
hwloc_bitmap_set_ith_ULONG_PTR(obj->cpuset, 0, procInfo[i].ProcessorMask);
hwloc_debug_2args_bitmap("%s#%u bitmap %s\n", hwloc_obj_type_string(type), id, obj->cpuset);
switch (type) {
case HWLOC_OBJ_NUMANODE:
{
ULONGLONG avail;
obj->nodeset = hwloc_bitmap_alloc();
hwloc_bitmap_set(obj->nodeset, id);
if ((GetNumaAvailableMemoryNodeExProc && GetNumaAvailableMemoryNodeExProc(id, &avail))
|| (GetNumaAvailableMemoryNodeProc && GetNumaAvailableMemoryNodeProc(id, &avail))) {
obj->attr->numanode.local_memory = avail;
gotnumamemory++;
}
obj->attr->numanode.page_types_len = 2;
obj->attr->numanode.page_types = malloc(2 * sizeof(*obj->attr->numanode.page_types));
memset(obj->attr->numanode.page_types, 0, 2 * sizeof(*obj->attr->numanode.page_types));
obj->attr->numanode.page_types_len = 1;
obj->attr->numanode.page_types[0].size = SystemInfo.dwPageSize;
#if HAVE_DECL__SC_LARGE_PAGESIZE
obj->attr->numanode.page_types_len++;
obj->attr->numanode.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
#endif
break;
}
case HWLOC_OBJ_L1CACHE:
case HWLOC_OBJ_L2CACHE:
case HWLOC_OBJ_L3CACHE:
case HWLOC_OBJ_L4CACHE:
case HWLOC_OBJ_L5CACHE:
case HWLOC_OBJ_L1ICACHE:
case HWLOC_OBJ_L2ICACHE:
case HWLOC_OBJ_L3ICACHE:
obj->attr->cache.size = procInfo[i].Cache.Size;
obj->attr->cache.associativity = procInfo[i].Cache.Associativity == CACHE_FULLY_ASSOCIATIVE ? -1 : procInfo[i].Cache.Associativity ;
obj->attr->cache.linesize = procInfo[i].Cache.LineSize;
obj->attr->cache.depth = procInfo[i].Cache.Level;
switch (procInfo->Cache.Type) {
case CacheUnified:
obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
break;
case CacheData:
obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
break;
case CacheInstruction:
obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
break;
default:
hwloc_free_unlinked_object(obj);
continue;
}
break;
case HWLOC_OBJ_GROUP:
obj->attr->group.kind = procInfo[i].Relationship == RelationGroup ? HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP : HWLOC_GROUP_KIND_WINDOWS_RELATIONSHIP_UNKNOWN;
break;
default:
break;
}
hwloc_insert_object_by_cpuset(topology, obj);
}
free(procInfo);
}
if (GetLogicalProcessorInformationExProc) {
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX procInfoTotal, tmpprocInfoTotal, procInfo;
unsigned id;
struct hwloc_obj *obj;
hwloc_obj_type_t type;
length = 0;
procInfoTotal = NULL;
while (1) {
if (GetLogicalProcessorInformationExProc(RelationAll, procInfoTotal, &length))
break;
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
return -1;
tmpprocInfoTotal = realloc(procInfoTotal, length);
if (!tmpprocInfoTotal) {
free(procInfoTotal);
goto out;
}
procInfoTotal = tmpprocInfoTotal;
}
for (procInfo = procInfoTotal;
(void*) procInfo < (void*) ((uintptr_t) procInfoTotal + length);
procInfo = (void*) ((uintptr_t) procInfo + procInfo->Size)) {
unsigned num, i;
GROUP_AFFINITY *GroupMask;
/* Ignore unknown caches */
if (procInfo->Relationship == RelationCache
&& procInfo->Cache.Type != CacheUnified
&& procInfo->Cache.Type != CacheData
&& procInfo->Cache.Type != CacheInstruction)
continue;
id = HWLOC_UNKNOWN_INDEX;
switch (procInfo->Relationship) {
case RelationNumaNode:
type = HWLOC_OBJ_NUMANODE;
num = 1;
GroupMask = &procInfo->NumaNode.GroupMask;
id = procInfo->NumaNode.NodeNumber;
gotnuma++;
if (id > max_numanode_index)
max_numanode_index = id;
break;
case RelationProcessorPackage:
type = HWLOC_OBJ_PACKAGE;
num = procInfo->Processor.GroupCount;
GroupMask = procInfo->Processor.GroupMask;
break;
case RelationCache:
type = (procInfo->Cache.Type == CacheInstruction ? HWLOC_OBJ_L1ICACHE : HWLOC_OBJ_L1CACHE) + procInfo->Cache.Level - 1;
num = 1;
GroupMask = &procInfo->Cache.GroupMask;
break;
case RelationProcessorCore:
type = HWLOC_OBJ_CORE;
num = procInfo->Processor.GroupCount;
GroupMask = procInfo->Processor.GroupMask;
break;
case RelationGroup:
/* So strange an interface... */
for (id = 0; id < procInfo->Group.ActiveGroupCount; id++) {
KAFFINITY mask;
hwloc_bitmap_t set;
set = hwloc_bitmap_alloc();
mask = procInfo->Group.GroupInfo[id].ActiveProcessorMask;
hwloc_debug("group %u %d cpus mask %lx\n", id,
procInfo->Group.GroupInfo[id].ActiveProcessorCount, mask);
/* KAFFINITY is ULONG_PTR */
hwloc_bitmap_set_ith_ULONG_PTR(set, id, mask);
/* FIXME: what if running 32bits on a 64bits windows with 64-processor groups?
* ULONG_PTR is 32bits, so half the group is invisible?
* maybe scale id to id*8/sizeof(ULONG_PTR) so that groups are 64-PU aligned?
*/
hwloc_debug_2args_bitmap("group %u %d bitmap %s\n", id, procInfo->Group.GroupInfo[id].ActiveProcessorCount, set);
/* save the set of PUs so that we can create them at the end */
if (!groups_pu_set)
groups_pu_set = hwloc_bitmap_alloc();
hwloc_bitmap_or(groups_pu_set, groups_pu_set, set);
if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) {
obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, id);
obj->cpuset = set;
obj->attr->group.kind = HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP;
hwloc_insert_object_by_cpuset(topology, obj);
} else
hwloc_bitmap_free(set);
}
continue;
default:
/* Don't know how to get the mask. */
hwloc_debug("unknown relation %d\n", procInfo->Relationship);
continue;
}
if (!hwloc_filter_check_keep_object_type(topology, type))
continue;
obj = hwloc_alloc_setup_object(topology, type, id);
obj->cpuset = hwloc_bitmap_alloc();
for (i = 0; i < num; i++) {
hwloc_debug("%s#%u %d: mask %d:%lx\n", hwloc_obj_type_string(type), id, i, GroupMask[i].Group, GroupMask[i].Mask);
/* GROUP_AFFINITY.Mask is KAFFINITY, which is ULONG_PTR */
hwloc_bitmap_set_ith_ULONG_PTR(obj->cpuset, GroupMask[i].Group, GroupMask[i].Mask);
/* FIXME: scale id to id*8/sizeof(ULONG_PTR) as above? */
}
hwloc_debug_2args_bitmap("%s#%u bitmap %s\n", hwloc_obj_type_string(type), id, obj->cpuset);
switch (type) {
case HWLOC_OBJ_NUMANODE:
{
ULONGLONG avail;
obj->nodeset = hwloc_bitmap_alloc();
hwloc_bitmap_set(obj->nodeset, id);
if ((GetNumaAvailableMemoryNodeExProc && GetNumaAvailableMemoryNodeExProc(id, &avail))
|| (GetNumaAvailableMemoryNodeProc && GetNumaAvailableMemoryNodeProc(id, &avail))) {
obj->attr->numanode.local_memory = avail;
gotnumamemory++;
}
obj->attr->numanode.page_types = malloc(2 * sizeof(*obj->attr->numanode.page_types));
memset(obj->attr->numanode.page_types, 0, 2 * sizeof(*obj->attr->numanode.page_types));
obj->attr->numanode.page_types_len = 1;
obj->attr->numanode.page_types[0].size = SystemInfo.dwPageSize;
#if HAVE_DECL__SC_LARGE_PAGESIZE
obj->attr->numanode.page_types_len++;
obj->attr->numanode.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
#endif
break;
}
case HWLOC_OBJ_L1CACHE:
case HWLOC_OBJ_L2CACHE:
case HWLOC_OBJ_L3CACHE:
case HWLOC_OBJ_L4CACHE:
case HWLOC_OBJ_L5CACHE:
case HWLOC_OBJ_L1ICACHE:
case HWLOC_OBJ_L2ICACHE:
case HWLOC_OBJ_L3ICACHE:
obj->attr->cache.size = procInfo->Cache.CacheSize;
obj->attr->cache.associativity = procInfo->Cache.Associativity == CACHE_FULLY_ASSOCIATIVE ? -1 : procInfo->Cache.Associativity ;
obj->attr->cache.linesize = procInfo->Cache.LineSize;
obj->attr->cache.depth = procInfo->Cache.Level;
switch (procInfo->Cache.Type) {
case CacheUnified:
obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
break;
case CacheData:
obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
break;
case CacheInstruction:
obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
break;
default:
hwloc_free_unlinked_object(obj);
continue;
}
break;
default:
break;
}
hwloc_insert_object_by_cpuset(topology, obj);
}
free(procInfoTotal);
}
topology->support.discovery->pu = 1;
topology->support.discovery->numa = gotnuma;
topology->support.discovery->numa_memory = gotnumamemory;
if (groups_pu_set) {
/* the system supports multiple Groups.
* PU indexes may be discontiguous, especially if Groups contain less than 64 procs.
*/
hwloc_obj_t obj;
unsigned idx;
hwloc_bitmap_foreach_begin(idx, groups_pu_set) {
obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PU, idx);
obj->cpuset = hwloc_bitmap_alloc();
hwloc_bitmap_only(obj->cpuset, idx);
hwloc_debug_1arg_bitmap("cpu %u has cpuset %s\n",
idx, obj->cpuset);
hwloc_insert_object_by_cpuset(topology, obj);
} hwloc_bitmap_foreach_end();
hwloc_bitmap_free(groups_pu_set);
} else {
/* no processor groups */
SYSTEM_INFO sysinfo;
hwloc_obj_t obj;
unsigned idx;
GetSystemInfo(&sysinfo);
for(idx=0; idx<32; idx++)
if (sysinfo.dwActiveProcessorMask & (((DWORD_PTR)1)<<idx)) {
obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PU, idx);
obj->cpuset = hwloc_bitmap_alloc();
hwloc_bitmap_only(obj->cpuset, idx);
hwloc_debug_1arg_bitmap("cpu %u has cpuset %s\n",
idx, obj->cpuset);
hwloc_insert_object_by_cpuset(topology, obj);
}
}
out:
hwloc_obj_add_info(topology->levels[0][0], "Backend", "Windows");
hwloc_add_uname_info(topology, NULL);
return 0;
}
void
hwloc_set_windows_hooks(struct hwloc_binding_hooks *hooks,
struct hwloc_topology_support *support)
{
if (GetCurrentProcessorNumberExProc || (GetCurrentProcessorNumberProc && nr_processor_groups == 1))
hooks->get_thisthread_last_cpu_location = hwloc_win_get_thisthread_last_cpu_location;
if (nr_processor_groups == 1) {
hooks->set_proc_cpubind = hwloc_win_set_proc_cpubind;
hooks->get_proc_cpubind = hwloc_win_get_proc_cpubind;
hooks->set_thisproc_cpubind = hwloc_win_set_thisproc_cpubind;
hooks->get_thisproc_cpubind = hwloc_win_get_thisproc_cpubind;
hooks->set_proc_membind = hwloc_win_set_proc_membind;
hooks->get_proc_membind = hwloc_win_get_proc_membind;
hooks->set_thisproc_membind = hwloc_win_set_thisproc_membind;
hooks->get_thisproc_membind = hwloc_win_get_thisproc_membind;
}
if (nr_processor_groups == 1 || SetThreadGroupAffinityProc) {
hooks->set_thread_cpubind = hwloc_win_set_thread_cpubind;
hooks->set_thisthread_cpubind = hwloc_win_set_thisthread_cpubind;
hooks->set_thisthread_membind = hwloc_win_set_thisthread_membind;
}
if (GetThreadGroupAffinityProc) {
hooks->get_thread_cpubind = hwloc_win_get_thread_cpubind;
hooks->get_thisthread_cpubind = hwloc_win_get_thisthread_cpubind;
hooks->get_thisthread_membind = hwloc_win_get_thisthread_membind;
}
if (VirtualAllocExNumaProc) {
hooks->alloc_membind = hwloc_win_alloc_membind;
hooks->alloc = hwloc_win_alloc;
hooks->free_membind = hwloc_win_free_membind;
support->membind->bind_membind = 1;
}
if (QueryWorkingSetExProc && max_numanode_index <= 63 /* PSAPI_WORKING_SET_EX_BLOCK.Node is 6 bits only */)
hooks->get_area_memlocation = hwloc_win_get_area_memlocation;
}
static int hwloc_windows_component_init(unsigned long flags __hwloc_attribute_unused)
{
hwloc_win_get_function_ptrs();
return 0;
}
static void hwloc_windows_component_finalize(unsigned long flags __hwloc_attribute_unused)
{
}
static struct hwloc_backend *
hwloc_windows_component_instantiate(struct hwloc_disc_component *component,
const void *_data1 __hwloc_attribute_unused,
const void *_data2 __hwloc_attribute_unused,
const void *_data3 __hwloc_attribute_unused)
{
struct hwloc_backend *backend;
backend = hwloc_backend_alloc(component);
if (!backend)
return NULL;
backend->discover = hwloc_look_windows;
return backend;
}
static struct hwloc_disc_component hwloc_windows_disc_component = {
HWLOC_DISC_COMPONENT_TYPE_CPU,
"windows",
HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
hwloc_windows_component_instantiate,
50,
1,
NULL
};
const struct hwloc_component hwloc_windows_component = {
HWLOC_COMPONENT_ABI,
hwloc_windows_component_init, hwloc_windows_component_finalize,
HWLOC_COMPONENT_TYPE_DISC,
0,
&hwloc_windows_disc_component
};
int
hwloc_fallback_nbprocessors(struct hwloc_topology *topology __hwloc_attribute_unused) {
int n;
SYSTEM_INFO sysinfo;
/* by default, ignore groups (return only the number in the current group) */
GetSystemInfo(&sysinfo);
n = sysinfo.dwNumberOfProcessors; /* FIXME could be non-contigous, rather return a mask from dwActiveProcessorMask? */
if (nr_processor_groups > 1) {
/* assume n-1 groups are complete, since that's how we store things in cpusets */
if (GetActiveProcessorCountProc)
n = MAXIMUM_PROC_PER_GROUP*(nr_processor_groups-1)
+ GetActiveProcessorCountProc((WORD)nr_processor_groups-1);
else
n = MAXIMUM_PROC_PER_GROUP*nr_processor_groups;
}
return n;
}