Blob Blame History Raw
/*
  SPDX-License-Identifier: GPL-2.0-only

  Copyright (C) 2019 Facebook

  Derived from ctf_encoder.c, which is:

  Copyright (C) Arnaldo Carvalho de Melo <acme@redhat.com>
  Copyright (C) Red Hat Inc
 */

#include "dwarves.h"
#include "libbtf.h"
#include "lib/bpf/include/uapi/linux/btf.h"
#include "lib/bpf/src/libbpf.h"
#include "hash.h"
#include "elf_symtab.h"
#include "btf_encoder.h"

#include <ctype.h> /* for isalpha() and isalnum() */
#include <stdlib.h> /* for qsort() and bsearch() */
#include <inttypes.h>

/*
 * This corresponds to the same macro defined in
 * include/linux/kallsyms.h
 */
#define KSYM_NAME_LEN 128

struct funcs_layout {
	unsigned long mcount_start;
	unsigned long mcount_stop;
	unsigned long mcount_sec_idx;
};

struct elf_function {
	const char	*name;
	unsigned long	 addr;
	bool		 generated;
};

static struct elf_function *functions;
static int functions_alloc;
static int functions_cnt;

static int functions_cmp(const void *_a, const void *_b)
{
	const struct elf_function *a = _a;
	const struct elf_function *b = _b;

	return strcmp(a->name, b->name);
}

static void delete_functions(void)
{
	free(functions);
	functions_alloc = functions_cnt = 0;
	functions = NULL;
}

#ifndef max
#define max(x, y) ((x) < (y) ? (y) : (x))
#endif

static int collect_function(struct btf_elf *btfe, GElf_Sym *sym)
{
	struct elf_function *new;

	if (elf_sym__type(sym) != STT_FUNC)
		return 0;
	if (!elf_sym__value(sym))
		return 0;

	if (functions_cnt == functions_alloc) {
		functions_alloc = max(1000, functions_alloc * 3 / 2);
		new = realloc(functions, functions_alloc * sizeof(*functions));
		if (!new) {
			/*
			 * The cleanup - delete_functions is called
			 * in cu__encode_btf error path.
			 */
			return -1;
		}
		functions = new;
	}

	functions[functions_cnt].name = elf_sym__name(sym, btfe->symtab);
	functions[functions_cnt].addr = elf_sym__value(sym);
	functions[functions_cnt].generated = false;
	functions_cnt++;
	return 0;
}

static int addrs_cmp(const void *_a, const void *_b)
{
	const unsigned long *a = _a;
	const unsigned long *b = _b;

	if (*a == *b)
		return 0;
	return *a < *b ? -1 : 1;
}

static int filter_functions(struct btf_elf *btfe, struct funcs_layout *fl)
{
	unsigned long *addrs, count, offset, i;
	int functions_valid = 0;
	Elf_Data *data;
	GElf_Shdr shdr;
	Elf_Scn *sec;

	/*
	 * Find mcount addressed marked by __start_mcount_loc
	 * and __stop_mcount_loc symbols and load them into
	 * sorted array.
	 */
	sec = elf_getscn(btfe->elf, fl->mcount_sec_idx);
	if (!sec || !gelf_getshdr(sec, &shdr)) {
		fprintf(stderr, "Failed to get section(%lu) header.\n",
			fl->mcount_sec_idx);
		return -1;
	}

	offset = fl->mcount_start - shdr.sh_addr;
	count  = (fl->mcount_stop - fl->mcount_start) / 8;

	data = elf_getdata(sec, 0);
	if (!data) {
		fprintf(stderr, "Failed to get section(%lu) data.\n",
			fl->mcount_sec_idx);
		return -1;
	}

	addrs = malloc(count * sizeof(addrs[0]));
	if (!addrs) {
		fprintf(stderr, "Failed to allocate memory for ftrace addresses.\n");
		return -1;
	}

	memcpy(addrs, data->d_buf + offset, count * sizeof(addrs[0]));
	qsort(addrs, count, sizeof(addrs[0]), addrs_cmp);

	/*
	 * Let's got through all collected functions and filter
	 * out those that are not in ftrace.
	 */
	for (i = 0; i < functions_cnt; i++) {
		struct elf_function *func = &functions[i];

		/* Make sure function is within ftrace addresses. */
		if (bsearch(&func->addr, addrs, count, sizeof(addrs[0]), addrs_cmp)) {
			/*
			 * We iterate over sorted array, so we can easily skip
			 * not valid item and move following valid field into
			 * its place, and still keep the 'new' array sorted.
			 */
			if (i != functions_valid)
				functions[functions_valid] = functions[i];
			functions_valid++;
		}
	}

	functions_cnt = functions_valid;
	free(addrs);
	return 0;
}

static struct elf_function *find_function(const struct btf_elf *btfe,
					  const char *name)
{
	struct elf_function key = { .name = name };

	return bsearch(&key, functions, functions_cnt, sizeof(functions[0]),
		       functions_cmp);
}

static bool btf_name_char_ok(char c, bool first)
{
	if (c == '_' || c == '.')
		return true;

	return first ? isalpha(c) : isalnum(c);
}

/* Check whether the given name is valid in vmlinux btf. */
static bool btf_name_valid(const char *p)
{
	const char *limit;

	if (!btf_name_char_ok(*p, true))
		return false;

	/* set a limit on identifier length */
	limit = p + KSYM_NAME_LEN;
	p++;
	while (*p && p < limit) {
		if (!btf_name_char_ok(*p, false))
			return false;
		p++;
	}

	return !*p;
}

static void dump_invalid_symbol(const char *msg, const char *sym,
				int verbose, bool force)
{
	if (force) {
		if (verbose)
			fprintf(stderr, "PAHOLE: Warning: %s, ignored (sym: '%s').\n",
				msg, sym);
		return;
	}

	fprintf(stderr, "PAHOLE: Error: %s (sym: '%s').\n", msg, sym);
	fprintf(stderr, "PAHOLE: Error: Use '--btf_encode_force' to ignore such symbols and force emit the btf.\n");
}

extern struct debug_fmt_ops *dwarves__active_loader;

static int tag__check_id_drift(const struct tag *tag,
			       uint32_t core_id, uint32_t btf_type_id,
			       uint32_t type_id_off)
{
	if (btf_type_id != (core_id + type_id_off)) {
		fprintf(stderr,
			"%s: %s id drift, core_id: %u, btf_type_id: %u, type_id_off: %u\n",
			__func__, dwarf_tag_name(tag->tag),
			core_id, btf_type_id, type_id_off);
		return -1;
	}

	return 0;
}

static int32_t structure_type__encode(struct btf_elf *btfe, struct cu *cu, struct tag *tag, uint32_t type_id_off)
{
	struct type *type = tag__type(tag);
	struct class_member *pos;
	const char *name;
	int32_t type_id;
	uint8_t kind;

	kind = (tag->tag == DW_TAG_union_type) ?
		BTF_KIND_UNION : BTF_KIND_STRUCT;

	name = dwarves__active_loader->strings__ptr(cu, type->namespace.name);
	type_id = btf_elf__add_struct(btfe, kind, name, type->size);
	if (type_id < 0)
		return type_id;

	type__for_each_data_member(type, pos) {
		/*
		 * dwarf_loader uses DWARF's recommended bit offset addressing
		 * scheme, which conforms to BTF requirement, so no conversion
		 * is required.
		 */
		name = dwarves__active_loader->strings__ptr(cu, pos->name);
		if (btf_elf__add_member(btfe, name, type_id_off + pos->tag.type, pos->bitfield_size, pos->bit_offset))
			return -1;
	}

	return type_id;
}

static uint32_t array_type__nelems(struct tag *tag)
{
	int i;
	uint32_t nelem = 1;
	struct array_type *array = tag__array_type(tag);

	for (i = array->dimensions - 1; i >= 0; --i)
		nelem *= array->nr_entries[i];

	return nelem;
}

static int32_t enumeration_type__encode(struct btf_elf *btfe, struct cu *cu, struct tag *tag)
{
	struct type *etype = tag__type(tag);
	struct enumerator *pos;
	const char *name;
	int32_t type_id;

	name = dwarves__active_loader->strings__ptr(cu, etype->namespace.name);
	type_id = btf_elf__add_enum(btfe, name, etype->size);
	if (type_id < 0)
		return type_id;

	type__for_each_enumerator(etype, pos) {
		name = dwarves__active_loader->strings__ptr(cu, pos->name);
		if (btf_elf__add_enum_val(btfe, name, pos->value))
			return -1;
	}

	return type_id;
}

static bool need_index_type;

static int tag__encode_btf(struct cu *cu, struct tag *tag, uint32_t core_id, struct btf_elf *btfe,
			   uint32_t array_index_id, uint32_t type_id_off)
{
	/* single out type 0 as it represents special type "void" */
	uint32_t ref_type_id = tag->type == 0 ? 0 : type_id_off + tag->type;
	const char *name;

	switch (tag->tag) {
	case DW_TAG_base_type:
		name = dwarves__active_loader->strings__ptr(cu, tag__base_type(tag)->name);
		return btf_elf__add_base_type(btfe, tag__base_type(tag), name);
	case DW_TAG_const_type:
		return btf_elf__add_ref_type(btfe, BTF_KIND_CONST, ref_type_id, NULL, false);
	case DW_TAG_pointer_type:
		return btf_elf__add_ref_type(btfe, BTF_KIND_PTR, ref_type_id, NULL, false);
	case DW_TAG_restrict_type:
		return btf_elf__add_ref_type(btfe, BTF_KIND_RESTRICT, ref_type_id, NULL, false);
	case DW_TAG_volatile_type:
		return btf_elf__add_ref_type(btfe, BTF_KIND_VOLATILE, ref_type_id, NULL, false);
	case DW_TAG_typedef:
		name = dwarves__active_loader->strings__ptr(cu, tag__namespace(tag)->name);
		return btf_elf__add_ref_type(btfe, BTF_KIND_TYPEDEF, ref_type_id, name, false);
	case DW_TAG_structure_type:
	case DW_TAG_union_type:
	case DW_TAG_class_type:
		name = dwarves__active_loader->strings__ptr(cu, tag__namespace(tag)->name);
		if (tag__type(tag)->declaration)
			return btf_elf__add_ref_type(btfe, BTF_KIND_FWD, 0, name, tag->tag == DW_TAG_union_type);
		else
			return structure_type__encode(btfe, cu, tag, type_id_off);
	case DW_TAG_array_type:
		/* TODO: Encode one dimension at a time. */
		need_index_type = true;
		return btf_elf__add_array(btfe, ref_type_id, array_index_id, array_type__nelems(tag));
	case DW_TAG_enumeration_type:
		return enumeration_type__encode(btfe, cu, tag);
	case DW_TAG_subroutine_type:
		return btf_elf__add_func_proto(btfe, cu, tag__ftype(tag), type_id_off);
	default:
		fprintf(stderr, "Unsupported DW_TAG_%s(0x%x)\n",
			dwarf_tag_name(tag->tag), tag->tag);
		return -1;
	}
}

static struct btf_elf *btfe;
static uint32_t array_index_id;
static bool has_index_type;

int btf_encoder__encode()
{
	int err;

	if (gobuffer__size(&btfe->percpu_secinfo) != 0)
		btf_elf__add_datasec_type(btfe, PERCPU_SECTION, &btfe->percpu_secinfo);

	err = btf_elf__encode(btfe, 0);
	delete_functions();
	btf_elf__delete(btfe);
	btfe = NULL;

	return err;
}

#define MAX_PERCPU_VAR_CNT 4096

struct var_info {
	uint64_t addr;
	uint32_t sz;
	const char *name;
};

static struct var_info percpu_vars[MAX_PERCPU_VAR_CNT];
static int percpu_var_cnt;

static int percpu_var_cmp(const void *_a, const void *_b)
{
	const struct var_info *a = _a;
	const struct var_info *b = _b;

	if (a->addr == b->addr)
		return 0;
	return a->addr < b->addr ? -1 : 1;
}

static bool percpu_var_exists(uint64_t addr, uint32_t *sz, const char **name)
{
	const struct var_info *p;
	struct var_info key = { .addr = addr };

	p = bsearch(&key, percpu_vars, percpu_var_cnt,
		    sizeof(percpu_vars[0]), percpu_var_cmp);

	if (!p)
		return false;

	*sz = p->sz;
	*name = p->name;
	return true;
}

static int collect_percpu_var(struct btf_elf *btfe, GElf_Sym *sym)
{
	const char *sym_name;
	uint64_t addr;
	uint32_t size;

	/* compare a symbol's shndx to determine if it's a percpu variable */
	if (elf_sym__section(sym) != btfe->percpu_shndx)
		return 0;
	if (elf_sym__type(sym) != STT_OBJECT)
		return 0;

	addr = elf_sym__value(sym);
	/*
	 * Store only those symbols that have allocated space in the percpu section.
	 * This excludes the following three types of symbols:
	 *
	 *  1. __ADDRESSABLE(sym), which are forcely emitted as symbols.
	 *  2. __UNIQUE_ID(prefix), which are introduced to generate unique ids.
	 *  3. __exitcall(fn), functions which are labeled as exit calls.
	 *
	 * In addition, the variables defined using DEFINE_PERCPU_FIRST are
	 * also not included, which currently includes:
	 *
	 *  1. fixed_percpu_data
	 */
	if (!addr)
		return 0;

	size = elf_sym__size(sym);
	if (!size)
		return 0; /* ignore zero-sized symbols */

	sym_name = elf_sym__name(sym, btfe->symtab);
	if (!btf_name_valid(sym_name)) {
		dump_invalid_symbol("Found symbol of invalid name when encoding btf",
				    sym_name, btf_elf__verbose, btf_elf__force);
		if (btf_elf__force)
			return 0;
		return -1;
	}

	if (btf_elf__verbose)
		printf("Found per-CPU symbol '%s' at address 0x%lx\n", sym_name, addr);

	if (percpu_var_cnt == MAX_PERCPU_VAR_CNT) {
		fprintf(stderr, "Reached the limit of per-CPU variables: %d\n",
			MAX_PERCPU_VAR_CNT);
		return -1;
	}
	percpu_vars[percpu_var_cnt].addr = addr;
	percpu_vars[percpu_var_cnt].sz = size;
	percpu_vars[percpu_var_cnt].name = sym_name;
	percpu_var_cnt++;

	return 0;
}

static void collect_symbol(GElf_Sym *sym, struct funcs_layout *fl)
{
	if (!fl->mcount_start &&
	    !strcmp("__start_mcount_loc", elf_sym__name(sym, btfe->symtab))) {
		fl->mcount_start = sym->st_value;
		fl->mcount_sec_idx = sym->st_shndx;
	}

	if (!fl->mcount_stop &&
	    !strcmp("__stop_mcount_loc", elf_sym__name(sym, btfe->symtab)))
		fl->mcount_stop = sym->st_value;
}

static int has_all_symbols(struct funcs_layout *fl)
{
	return fl->mcount_start && fl->mcount_stop;
}

static int collect_symbols(struct btf_elf *btfe, bool collect_percpu_vars)
{
	struct funcs_layout fl = { };
	uint32_t core_id;
	GElf_Sym sym;

	/* cache variables' addresses, preparing for searching in symtab. */
	percpu_var_cnt = 0;

	/* search within symtab for percpu variables */
	elf_symtab__for_each_symbol(btfe->symtab, core_id, sym) {
		if (collect_percpu_vars && collect_percpu_var(btfe, &sym))
			return -1;
		if (collect_function(btfe, &sym))
			return -1;
		collect_symbol(&sym, &fl);
	}

	if (collect_percpu_vars) {
		if (percpu_var_cnt)
			qsort(percpu_vars, percpu_var_cnt, sizeof(percpu_vars[0]), percpu_var_cmp);

		if (btf_elf__verbose)
			printf("Found %d per-CPU variables!\n", percpu_var_cnt);
	}

	if (functions_cnt && has_all_symbols(&fl)) {
		qsort(functions, functions_cnt, sizeof(functions[0]), functions_cmp);
		if (filter_functions(btfe, &fl)) {
			fprintf(stderr, "Failed to filter dwarf functions\n");
			return -1;
		}
		if (btf_elf__verbose)
			printf("Found %d functions!\n", functions_cnt);
	} else {
		if (btf_elf__verbose)
			printf("ftrace symbols not detected, falling back to DWARF data\n");
		delete_functions();
	}

	return 0;
}

static bool has_arg_names(struct cu *cu, struct ftype *ftype)
{
	struct parameter *param;
	const char *name;

	ftype__for_each_parameter(ftype, param) {
		name = dwarves__active_loader->strings__ptr(cu, param->name);
		if (name == NULL)
			return false;
	}
	return true;
}

int cu__encode_btf(struct cu *cu, int verbose, bool force,
		   bool skip_encoding_vars)
{
	uint32_t type_id_off;
	uint32_t core_id;
	struct variable *var;
	struct function *fn;
	struct tag *pos;
	int err = 0;

	btf_elf__verbose = verbose;
	btf_elf__force = force;

	if (btfe && strcmp(btfe->filename, cu->filename)) {
		err = btf_encoder__encode();
		if (err)
			goto out;

		/* Finished one file, add one empty line */
		if (verbose)
			printf("\n");
	}

	if (!btfe) {
		btfe = btf_elf__new(cu->filename, cu->elf, base_btf);
		if (!btfe)
			return -1;

		err = collect_symbols(btfe, !skip_encoding_vars);
		if (err)
			goto out;

		has_index_type = false;
		need_index_type = false;
		array_index_id = 0;

		if (verbose)
			printf("File %s:\n", btfe->filename);
	}

	type_id_off = btf__get_nr_types(btfe->btf);

	if (!has_index_type) {
		/* cu__find_base_type_by_name() takes "type_id_t *id" */
		type_id_t id;
		if (cu__find_base_type_by_name(cu, "int", &id)) {
			has_index_type = true;
			array_index_id = type_id_off + id;
		} else {
			has_index_type = false;
			array_index_id = type_id_off + cu->types_table.nr_entries;
		}
	}

	cu__for_each_type(cu, core_id, pos) {
		int32_t btf_type_id = tag__encode_btf(cu, pos, core_id, btfe, array_index_id, type_id_off);

		if (btf_type_id < 0 ||
		    tag__check_id_drift(pos, core_id, btf_type_id, type_id_off)) {
			err = -1;
			goto out;
		}
	}

	if (need_index_type && !has_index_type) {
		struct base_type bt = {};

		bt.name = 0;
		bt.bit_size = 32;
		btf_elf__add_base_type(btfe, &bt, "__ARRAY_SIZE_TYPE__");
		has_index_type = true;
	}

	cu__for_each_function(cu, core_id, fn) {
		int btf_fnproto_id, btf_fn_id;
		const char *name;

		/*
		 * Skip functions that:
		 *   - are marked as declarations
		 *   - do not have full argument names
		 *   - are not in ftrace list (if it's available)
		 *   - are not external (in case ftrace filter is not available)
		 */
		if (fn->declaration)
			continue;
		if (!has_arg_names(cu, &fn->proto))
			continue;
		if (functions_cnt) {
			struct elf_function *func;

			func = find_function(btfe, function__name(fn, cu));
			if (!func || func->generated)
				continue;
			func->generated = true;
		} else {
			if (!fn->external)
				continue;
		}

		btf_fnproto_id = btf_elf__add_func_proto(btfe, cu, &fn->proto, type_id_off);
		name = dwarves__active_loader->strings__ptr(cu, fn->name);
		btf_fn_id = btf_elf__add_ref_type(btfe, BTF_KIND_FUNC, btf_fnproto_id, name, false);
		if (btf_fnproto_id < 0 || btf_fn_id < 0) {
			err = -1;
			printf("error: failed to encode function '%s'\n", function__name(fn, cu));
			goto out;
		}
	}

	if (skip_encoding_vars)
		goto out;

	if (btfe->percpu_shndx == 0 || !btfe->symtab)
		goto out;

	if (verbose)
		printf("search cu '%s' for percpu global variables.\n", cu->name);

	cu__for_each_variable(cu, core_id, pos) {
		uint32_t size, type, linkage, offset;
		const char *name;
		uint64_t addr;
		int id;

		var = tag__variable(pos);
		if (var->declaration && !var->spec)
			continue;
		/* percpu variables are allocated in global space */
		if (variable__scope(var) != VSCOPE_GLOBAL && !var->spec)
			continue;

		/* addr has to be recorded before we follow spec */
		addr = var->ip.addr;
		if (var->spec)
			var = var->spec;

		if (!percpu_var_exists(addr, &size, &name))
			continue; /* not a per-CPU variable */

		if (var->ip.tag.type == 0) {
			fprintf(stderr, "error: found variable '%s' in CU '%s' that has void type\n",
				name, cu->name);
			if (force)
				continue;
			err = -1;
			break;
		}

		type = var->ip.tag.type + type_id_off;
		linkage = var->external ? BTF_VAR_GLOBAL_ALLOCATED : BTF_VAR_STATIC;

		if (btf_elf__verbose) {
			printf("Variable '%s' from CU '%s' at address 0x%lx encoded\n",
			       name, cu->name, addr);
		}

		/* add a BTF_KIND_VAR in btfe->types */
		id = btf_elf__add_var_type(btfe, type, name, linkage);
		if (id < 0) {
			err = -1;
			fprintf(stderr, "error: failed to encode variable '%s' at addr 0x%lx\n",
			        name, addr);
			break;
		}

		/*
		 * add a BTF_VAR_SECINFO in btfe->percpu_secinfo, which will be added into
		 * btfe->types later when we add BTF_VAR_DATASEC.
		 */
		offset = addr - btfe->percpu_base_addr;
		id = btf_elf__add_var_secinfo(&btfe->percpu_secinfo, id, offset, size);
		if (id < 0) {
			err = -1;
			fprintf(stderr, "error: failed to encode section info for variable '%s' at addr 0x%lx\n",
			        name, addr);
			break;
		}
	}

out:
	if (err) {
		delete_functions();
		btf_elf__delete(btfe);
		btfe = NULL;
	}
	return err;
}