/*
* Copyright (c) 2015, NVIDIA CORPORATION.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* unaltered in all copies or substantial portions of the Materials.
* Any additions, deletions, or changes to the original source files
* must be clearly indicated in accompanying documentation.
*
* If only executable code is distributed, then the accompanying
* documentation must state that "this software is based in part on the
* work of the Khronos Group."
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
*/
#include "glvnd_genentry.h"
#include "utils_misc.h"
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/mman.h>
#include <assert.h>
#if defined(USE_X86_ASM) || \
defined(USE_X86_64_ASM) || \
defined(USE_ARMV7_ASM) || \
defined(USE_AARCH64_ASM) || \
defined(USE_PPC64LE_ASM)
# define USE_ASM 1
#else
# define USE_ASM 0
#endif
#if defined(__GNUC__) && USE_ASM
/// The maximum number of entrypoints that we can generate.
#define GENERATED_ENTRYPOINT_MAX 4096
/// The size of each generated entrypoint.
static const int STUB_ENTRY_SIZE = 32;
#if defined(USE_X86_ASM)
/// A template used to generate an entrypoint.
static unsigned char STUB_TEMPLATE[] =
{
0xe9, 0x78, 0x56, 0x34, 0x12, // jmp 0x12345678
};
static const int DISPATCH_FUNC_OFFSET = 1;
static const int DISPATCH_FUNC_OFFSET_REL = 5;
#elif defined(USE_X86_64_ASM)
// For x86_64, the offset from the entrypoint to the dispatch function might be
// more than 2^31, and there's no JMP instruction that takes a 64-bit offset.
// Note that the same stub also works for an x32 build. In that case, though, a
// pointer is only 32 bits, so we have to make sure we expand it a 64-bit value
// when we patch it in SetDispatchFuncPointer.
static unsigned char STUB_TEMPLATE[] =
{
0x48, 0xb8, 0xbd, 0xac, 0xcd, 0xab, 0x78, 0x56, 0x34, 0x12, // movabs 0x12345678abcdacbd,%rax
0xff, 0xe0, // jmp *%rax
};
static const int DISPATCH_FUNC_OFFSET = 2;
#elif defined(USE_ARMV7_ASM)
// Thumb bytecode
static const uint16_t STUB_TEMPLATE[] =
{
// ldr ip, 1f
0xf8df, 0xc004,
// bx ip
0x4760,
// nop
0xbf00,
// Offset that needs to be patched
// 1:
0x0000, 0x0000,
};
static const int DISPATCH_FUNC_OFFSET = 8;
#elif defined(USE_AARCH64_ASM)
static const uint32_t STUB_TEMPLATE[] =
{
// ldr x16, 1f
0x58000070,
// br x16
0xd61f0200,
// nop
0xd503201f,
// Offset that needs to be patched
// 1:
0x00000000, 0x00000000,
};
static const int DISPATCH_FUNC_OFFSET = 12;
#elif defined(USE_PPC64LE_ASM)
static uint32_t STUB_TEMPLATE[] =
{
// NOTE!!! NOTE!!! NOTE!!!
// This data is endian-reversed from the code you would see in an assembly
// listing!
// 1000:
0xE98C0010, // ld 12, 9000f-1000b(12)
0x7D8903A6, // mtctr 12
0x4E800420, // bctr
0x60000000, // nop
// 9000:
0, 0 // .quad 0
};
static const int DISPATCH_FUNC_OFFSET = sizeof(STUB_TEMPLATE) - 8;
#else
#error "Can't happen -- not implemented"
#endif
typedef struct GLVNDGenEntrypointRec
{
/// The name of the function.
char *procName;
/// The generated entrypoint function, mapped as read/write.
uint8_t *entrypointWrite;
/// The generated entrypoint function, mapped as read/exec.
GLVNDentrypointStub entrypointExec;
/// Set to 1 if we've assigned a dispatch function to this entrypoint.
int assigned;
} GLVNDGenEntrypoint;
/**
* Allocates memory for all of the entrypoint functions.
*
* \return Zero on success, non-zero on failure.
*/
static int InitEntrypoints(void);
/**
* Generates a new entrypoint.
*
* \param entry The entrypoint structure to fill in.
* \param index The index of the dispatch function.
*/
static void GenerateEntrypointFunc(GLVNDGenEntrypoint *entry, int index);
/**
* A default function plugged into the entrypoints. This is called if no vendor
* library has supplied a dispatch function.
*/
static void *DefaultDispatchFunc(void);
/**
* Patches an entrypoint to assign a dispatch function to it.
*/
static void SetDispatchFuncPointer(GLVNDGenEntrypoint *entry,
GLVNDentrypointStub dispatch);
static GLVNDGenEntrypoint entrypoints[GENERATED_ENTRYPOINT_MAX] = {};
static uint8_t *entrypointBufferWrite = NULL;
static uint8_t *entrypointBufferExec = NULL;
static int entrypointCount = 0;
GLVNDentrypointStub glvndGenerateEntrypoint(const char *procName)
{
int i;
if (InitEntrypoints() != 0) {
return NULL;
}
for (i=0; i<entrypointCount; i++) {
if (strcmp(procName, entrypoints[i].procName) == 0) {
// We already generated this function, so return it.
return entrypoints[i].entrypointExec;
}
}
if (entrypointCount < GENERATED_ENTRYPOINT_MAX) {
GLVNDGenEntrypoint *entry = &entrypoints[entrypointCount];
entry->procName = strdup(procName);
if (entry->procName == NULL) {
return NULL;
}
entry->assigned = 0;
GenerateEntrypointFunc(entry, entrypointCount);
entrypointCount++;
return entry->entrypointExec;
}
return NULL;
}
void glvndUpdateEntrypoints(GLVNDentrypointUpdateCallback callback, void *param)
{
int i;
for (i=0; i<entrypointCount; i++) {
if (!entrypoints[i].assigned) {
GLVNDentrypointStub addr = callback(entrypoints[i].procName, param);
if (addr != NULL) {
SetDispatchFuncPointer(&entrypoints[i], addr);
entrypoints[i].assigned = 1;
}
}
}
}
void glvndFreeEntrypoints(void)
{
int i;
for (i=0; i<entrypointCount; i++) {
free(entrypoints[i].procName);
entrypoints[i].procName = NULL;
entrypoints[i].entrypointWrite = NULL;
entrypoints[i].entrypointExec = NULL;
entrypoints[i].assigned = 0;
}
entrypointCount = 0;
if (entrypointBufferExec != NULL) {
FreeExecPages(STUB_ENTRY_SIZE * GENERATED_ENTRYPOINT_MAX,
entrypointBufferWrite, entrypointBufferExec);
entrypointBufferWrite = NULL;
entrypointBufferExec = NULL;
}
}
int InitEntrypoints(void)
{
if (entrypointBufferExec == NULL) {
void *writeBuf, *execBuf;
if (AllocExecPages(STUB_ENTRY_SIZE * GENERATED_ENTRYPOINT_MAX,
&writeBuf, &execBuf) != 0) {
return -1;
}
entrypointBufferWrite = (uint8_t *) writeBuf;
entrypointBufferExec = (uint8_t *) execBuf;
}
return 0;
}
void GenerateEntrypointFunc(GLVNDGenEntrypoint *entry, int index)
{
entry->entrypointWrite = entrypointBufferWrite + (index * STUB_ENTRY_SIZE);
entry->entrypointExec = (GLVNDentrypointStub)
(entrypointBufferExec + (index * STUB_ENTRY_SIZE));
assert(STUB_ENTRY_SIZE >= sizeof(STUB_TEMPLATE));
// Copy the template into our buffer.
memcpy(entry->entrypointWrite, STUB_TEMPLATE, sizeof(STUB_TEMPLATE));
#if defined(USE_ARMV7_ASM)
// Add 1 to the base address to force Thumb mode when jumping to the stub
entry->entrypointExec = (GLVNDentrypointStub)((char *)entry->entrypointExec + 1);
#endif
// Assign DefaultDispatchFunc as the dispatch function.
SetDispatchFuncPointer(entry, (GLVNDentrypointStub) DefaultDispatchFunc);
}
void SetDispatchFuncPointer(GLVNDGenEntrypoint *entry,
GLVNDentrypointStub dispatch)
{
uint8_t *code = entry->entrypointWrite;
#if defined(USE_X86_ASM)
// For x86, we use a JMP instruction with a PC-relative offset. Figure out
// the offset from the generated entrypoint to the dispatch function.
intptr_t offset = ((intptr_t) dispatch) - ((intptr_t) entry->entrypointExec) - DISPATCH_FUNC_OFFSET_REL;
*((intptr_t *) (code + DISPATCH_FUNC_OFFSET)) = offset;
#elif defined(USE_X86_64_ASM)
// For x86_64, we have to use a movabs instruction, which needs the
// absolute address of the dispatch function. On an x32 build, pointers are
// 32 bits long, but the stub still uses a 64-bit address, so we cast it to
// a uint64_t value to make sure that we write a 64-bit value in both
// cases.
*((uint64_t *) (code + DISPATCH_FUNC_OFFSET)) = (uint64_t) ((uintptr_t) dispatch);
#elif defined(USE_ARMV7_ASM)
*((uint32_t *)(code + DISPATCH_FUNC_OFFSET)) = (uint32_t)dispatch;
// Make sure the base address has the Thumb mode bit
assert((uintptr_t)entry->entrypointExec & (uintptr_t)0x1);
// See http://community.arm.com/groups/processors/blog/2010/02/17/caches-and-self-modifying-code
__builtin___clear_cache((char *)entry->entrypointExec - 1,
(char *)entry->entrypointExec - 1 + sizeof(STUB_TEMPLATE));
#elif defined(USE_AARCH64_ASM)
*((uintptr_t *)(code + DISPATCH_FUNC_OFFSET)) = (uintptr_t)dispatch;
// See http://community.arm.com/groups/processors/blog/2010/02/17/caches-and-self-modifying-code
__builtin___clear_cache((char *)entry->entrypointExec,
(char *)entry->entrypointExec + sizeof(STUB_TEMPLATE));
#elif defined(USE_PPC64LE_ASM)
// For PPC64LE, we need to patch in an absolute address.
*((uintptr_t *)(code + DISPATCH_FUNC_OFFSET)) = (uintptr_t)dispatch;
// This sequence is from the PowerISA Version 2.07B book.
// It may be a bigger hammer than we need, but it works;
// note that the __builtin___clear_cache intrinsic for
// PPC does not seem to generate any code.
__asm__ __volatile__(
" dcbst 0, %0\n\t"
" sync\n\t"
" icbi 0, %0\n\t"
" isync\n"
: : "r" (code)
);
#else
#error "Can't happen -- not implemented"
#endif
}
void *DefaultDispatchFunc(void)
{
// Print a warning message?
return NULL;
}
#else // defined(__GNUC__) && USE_ASM
GLVNDentrypointStub glvndGenerateEntrypoint(const char *procName)
{
return NULL;
}
void glvndFreeEntrypoints(void)
{
}
void glvndUpdateEntrypoints(GLVNDentrypointUpdateCallback callback, void *param)
{
}
#endif // defined(__GNUC__) && USE_ASM