From 28f42460c75d9acb87a34cad4164863e8fe2f3aa Mon Sep 17 00:00:00 2001
From: Packit Service <user-cont-team+packit-service@redhat.com>
Date: Dec 09 2020 17:45:00 +0000
Subject: kabi-dw-0 base


---

diff --git a/Makefile b/Makefile
index 84d4e91..8a9125b 100644
--- a/Makefile
+++ b/Makefile
@@ -18,9 +18,12 @@ SRCS=generate.c ksymtab.c utils.c main.c stack.c objects.c hash.c list.c
 SRCS += compare.c show.c
 
 CC?=gcc
-CFLAGS+=-Wall -O2 --std=gnu99 -D_GNU_SOURCE -c
+CFLAGS+=-Wall --std=gnu99 -D_GNU_SOURCE -c
 LDFLAGS+=-ldw -lelf
 
+CFLAGS_RELEASE+=-O2
+CFLAGS_DEBUG+=-O0 -g3 -DDEBUG -Wextra -pedantic
+
 YACC=bison
 YACCFLAGS=-d -t
 
@@ -48,14 +51,15 @@ ifeq (,$(findstring -lelf,$(LDFLAGS)))
 override LDFLAGS+=-lelf
 endif
 
+all: CFLAGS+=$(CFLAGS_RELEASE)
 all: $(PROG)
 
-debug: CFLAGS+=-g -DDEBUG
+debug: CFLAGS+=$(CFLAGS_DEBUG)
 debug: LDFLAGS:=$(LDFLAGS)
 debug: FLEXFLAGS+=-d
 debug: $(PROG)
 
-asan-debug: CFLAGS+=-g -DDEBUG -fsanitize=address
+asan-debug: CFLAGS+=$(CFLAGS_DEBUG) -fsanitize=address
 asan-debug: LDFLAGS:=-lasan $(LDFLAGS)
 asan-debug: FLEXFLAGS+=-d
 asan-debug: $(PROG)
diff --git a/README.md b/README.md
index 7e43187..e53ca11 100644
--- a/README.md
+++ b/README.md
@@ -49,10 +49,10 @@ Compare the two type dumps:
 
 ## Motivation
 
-Traditionaly Unix System V had a stable ABI to allow external modules to work with the OS kernel without a recompilation called Device Driver Interface.
+Traditionally Unix System V had a stable ABI to allow external modules to work with the OS kernel without a recompilation called Device Driver Interface.
 Linux however never developed such stable kernel ABI. Therefore it's vital to monitor all kernel interfaces used by the external module for change, and if such change happens, the module needs to be recompiled.
 
-Linux has an option (CONFIG\_MODVERSIONS) to generate a checksum identifing all exported symbols thourhg the EXPORT\_SYMBOL() macro. But these checksum are not sufficient to actually identify the scope of the change. For example changing a couple of unused padding bits in a structure to a new field won't break any external modules, but such change changes the chekcsum of any function which receives such structure through its arguments.
+Linux has an option (CONFIG\_MODVERSIONS) to generate a checksum identifying all exported symbols through the EXPORT\_SYMBOL() macro. But these checksum are not sufficient to actually identify the scope of the change. For example changing a couple of unused padding bits in a structure to a new field won't break any external modules, but such change changes the checksum of any function which receives such structure through its arguments.
 
 ## Installation
 
diff --git a/compare.c b/compare.c
index 2d12a09..183fb23 100644
--- a/compare.c
+++ b/compare.c
@@ -99,10 +99,11 @@ static void print_node_list(const char *s, const char *prefix,
  * the case, but a referred symbol has changed, we return
  * CMP_REFFILE; If that's not the case, but the offset has
  * changed, we return CMP_OFFSET. So the current order is
- * CMP_DIFF > CMP_REFFILE > CMP_OFFSET > CMP_ALIGNMENT"
+ * CMP_DIFF > CMP_REFFILE > CMP_OFFSET > CMP_ALIGNMENT > CMP_BYTE_SIZE
  * In case of alignment, if the structure alignment has changed,
  * only that is reported. If not, then the fields are checked and
  * the all the different fields are reported.
+ * The same is true of byte size changes.
  */
 
 typedef enum {
@@ -111,9 +112,11 @@ typedef enum {
 	CMP_DIFF,	/* Nodes are differents */
 	CMP_REFFILE,	/* A refered symbol has changed */
 	CMP_ALIGNMENT,  /* An alignment has changed */
+	CMP_BYTE_SIZE,  /* Byte size has changed */
 } cmp_ret_t;
 
-static int compare_two_files(char *filename, char *newfile, bool follow);
+static int compare_two_files(const char *filename, const char *newfile,
+			     bool follow);
 
 static int cmp_node_reffile(obj_t *o1, obj_t *o2)
 {
@@ -184,6 +187,9 @@ static int _cmp_nodes(obj_t *o1, obj_t *o2, bool search)
 	if (o1->alignment != o2->alignment)
 		return CMP_ALIGNMENT;
 
+	if (o1->byte_size != o2->byte_size)
+		return CMP_BYTE_SIZE;
+
 	return CMP_SAME;
 }
 
@@ -336,6 +342,14 @@ static void message_alignment_value(unsigned v, FILE *stream)
 		fprintf(stream, "%u", v);
 }
 
+static void message_byte_size_value(unsigned int v, FILE *stream)
+{
+	if (v == 0)
+		fprintf(stream, "<undefined>");
+	else
+		fprintf(stream, "%u", v);
+}
+
 static void message_alignment(obj_t *o1, obj_t *o2, FILE *stream)
 {
 	char *part_str;
@@ -355,6 +369,17 @@ static void message_alignment(obj_t *o1, obj_t *o2, FILE *stream)
 	fprintf(stream, "\n");
 }
 
+static void message_byte_size(obj_t *o1, obj_t *o2, FILE *stream)
+{
+	fprintf(stream, "The byte size of symbol '%s' has changed from ",
+		o1->name);
+
+	message_byte_size_value(o1->byte_size, stream);
+	fprintf(stream, " to ");
+	message_byte_size_value(o2->byte_size, stream);
+	fprintf(stream, "\n");
+}
+
 static int _compare_tree(obj_t *o1, obj_t *o2, FILE *stream)
 {
 	obj_list_t *list1 = NULL, *list2 = NULL;
@@ -375,6 +400,9 @@ static int _compare_tree(obj_t *o1, obj_t *o2, FILE *stream)
 		} else if (tmp == CMP_ALIGNMENT) {
 			message_alignment(o1, o2, stream);
 			ret = COMP_CONT;
+		} else if (tmp == CMP_BYTE_SIZE) {
+			message_byte_size(o1, o2, stream);
+			ret = COMP_CONT;
 		}
 
 		if (ret == COMP_DIFF)
@@ -463,7 +491,7 @@ static int compare_tree(obj_t *o1, obj_t *o2, FILE *stream)
 	return _compare_tree(o1, o2, stream);
 }
 
-static bool push_file(char *filename)
+static bool push_file(const char *filename)
 {
 	int i, sz = compare_config.flistsz;
 	int cnt = compare_config.flistcnt;
@@ -545,12 +573,14 @@ static void compare_usage()
  *           don't print anything and exit immediately if follow
  *           option isn't set.
  */
-static int compare_two_files(char *filename, char *newfile, bool follow)
+static int compare_two_files(const char *filename, const char *newfile,
+			     bool follow)
 {
 	obj_t *root1, *root2;
 	char *old_dir = compare_config.old_dir;
 	char *new_dir = compare_config.new_dir;
-	char *path1, *path2, *s = NULL, *filename2;
+	char *path1, *path2, *s = NULL;
+	const char *filename2;
 	FILE *file1, *file2, *stream;
 	struct stat fstat;
 	size_t sz;
diff --git a/generate.c b/generate.c
index 8d4af66..f589cf2 100644
--- a/generate.c
+++ b/generate.c
@@ -24,6 +24,7 @@
 #include <inttypes.h>
 #include <ctype.h>
 #include <libelf.h>
+#include <gelf.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <string.h>
@@ -48,6 +49,7 @@
 #include "hash.h"
 #include "objects.h"
 #include "list.h"
+#include "record.h"
 
 #define	EMPTY_NAME	"(NULL)"
 #define PROCESSED_SIZE 1024
@@ -85,11 +87,17 @@ struct cu_ctx {
 	Dwarf_Die *cu_die;
 	stack_t *stack; /* Current stack of symbol we're parsing */
 	struct set *processed; /* Set of processed types for this CU */
+	unsigned char dw_version : 6;
+	unsigned char elf_endian : 2;
+
+	struct hash *cu_db;
 };
 
 struct file_ctx {
 	generate_config_t *conf;
 	struct ksymtab *ksymtab; /* ksymtab of the current kernel module */
+	unsigned char dw_version : 6;
+	unsigned char elf_endian : 2;
 };
 
 struct dwarf_type {
@@ -105,81 +113,15 @@ struct dwarf_type {
 	{ 0, NULL }
 };
 
-
-/*
- * Structure of the database record:
- *
- * key: record key, usually includes path the file, where the type is
- *      defined (may include pseudo path, like <declaration>);
- *
- * version: type's version, used when we need to add another type of the same
- *	    name. It may happend, for example, when because of defines the same
- *          structure has changed for different compilation units.
- *
- *          It is not for the case, when the same structure defined in
- *	    different files -- it will have different keys, since it includes
- *	    the path;
- *
- * ref_count: reference counter, needed since the ownership is shared with the
- *            internal database;
- *
- * base_file: base part of the key (without version), used to generate the
- *            unique key for the new version;
- *
- * cu: compilation unit, where the type for the record defined;
- *
- * origin: "File <file>:<line>" string, describing the source, where the type
- *         for the record defined;
- *
- * stack: stack of types to reach this one.
- *         Ex.: on the toplevel
- *              struct A {
- *                        struct B fieldA;
- *              }
- *         in another file:
- *              struct B {
- *                        basetype fieldB;
- *              }
- *         the "struct B" description will contain key of the "struct A"
- *         description record in the stack;
- *
- * obj: pointer to the abstract type object, representing the toplevel type of
- *      the record.
- *
- * link: name of weak link alisas for the weak aliases.
- *
- * free: type specific function to free the record
- *       (there are normal, weak and assembly records).
- *
- * dump: type specific function for record output.
- *
- * dependents: objects that reference this record.
- */
-struct record {
-	char *key;
-	int version;
-	int ref_count;
-	char *base_file;
-	char *cu;
-	char *origin;
-	stack_t *stack;
-	obj_t *obj;
-	char *link;
-	void (*free)(struct record *);
-	void (*dump)(struct record *, FILE *);
-
-	struct list dependents;
-};
-
-void record_update_dependents(struct record *record)
+static void record_redirect_dependents(struct record *rec_dst,
+				       struct record *rec_src)
 {
 	struct list_node *iter;
 
-	LIST_FOR_EACH(&record->dependents, iter) {
+	LIST_FOR_EACH(&rec_src->dependents, iter) {
 		obj_t *obj = list_node_data(iter);
 
-		free(obj->base_type);
-		obj->base_type = safe_strdup(record->key);
+		obj->ref_record = rec_dst;
 	}
 }
 
@@ -218,6 +160,8 @@ static bool is_declaration(Dwarf_Die *die)
 	if (!dwarf_hasattr(die, DW_AT_declaration))
 		return false;
 	(void) dwarf_attr(die, DW_AT_declaration, &attr);
+	if (dwarf_hasform(&attr, DW_FORM_flag))
+		return attr.valp != NULL;
 	if (!dwarf_hasform(&attr, DW_FORM_flag_present))
 		return false;
 	return true;
@@ -335,17 +279,6 @@ static char *get_symbol_file(Dwarf_Die *die, Dwarf_Die *cu_die)
 	}
 
 	/*
-	 * DW_AT_declaration don't have DW_AT_decl_file.
-	 * Pretend like it's in other, non existent file.
-	 */
-	if (is_declaration(die)) {
-		safe_asprintf(&file_name, DECLARATION_PATH "/%s%s.txt",
-		    file_prefix, name);
-
-		return file_name;
-	}
-
-	/*
 	 * Following types can be anonymous, eg. used directly as variable type
 	 * in the declaration. We don't create new file for them if that's
 	 * the case, embed them directly in the current file.
@@ -364,7 +297,7 @@ static char *get_symbol_file(Dwarf_Die *die, Dwarf_Die *cu_die)
 	/* We don't expect our name to be empty now */
 	assert(name != NULL);
 
-	safe_asprintf(&file_name, "%s%s.txt", file_prefix, name);
+	safe_asprintf(&file_name, "%s%s", file_prefix, name);
 
 	return file_name;
 }
@@ -382,6 +315,8 @@ static int is_external(Dwarf_Die *die)
 
 	if (dwarf_hasattr(die, DW_AT_external)) {
 		dwarf_attr(die, DW_AT_external, &attr);
+		if (dwarf_hasform(&attr, DW_FORM_flag))
+			return attr.valp != NULL;
 		if (!dwarf_hasform(&attr, DW_FORM_flag_present))
 			return false;
 		return true;
@@ -396,17 +331,204 @@ static int is_external(Dwarf_Die *die)
 	return is_external(&spec_die);
 }
 
-static obj_t *die_read_alignment(Dwarf_Die *die, obj_t *obj)
+static uint8_t die_attr_eval_op(Dwarf_Attribute *attr, Dwarf_Word *value)
+{
+	size_t op_idx, op_cnt;
+	uint8_t loc_expr_type = 0;
+	Dwarf_Op *loc_expr_oper;
+
+	dwarf_getlocation(attr, &loc_expr_oper, &op_cnt);
+
+	if (op_cnt == 0)
+		loc_expr_type = -1;
+
+	for (op_idx = 0; op_idx < op_cnt; ++op_idx) {
+		loc_expr_type = loc_expr_oper[op_idx].atom;
+		switch (loc_expr_oper[op_idx].atom) {
+
+		/* supported 0-ary operations */
+		case DW_OP_const1u: /* unsigned 1-byte constant */
+		case DW_OP_const1s: /* signed   1-byte constant */
+		case DW_OP_const2u: /* unsigned 2-byte constant */
+		case DW_OP_const2s: /* signed   2-byte constant */
+		case DW_OP_skip:    /* signed   2-byte constant */
+		case DW_OP_const4u: /* unsigned 4-byte constant */
+		case DW_OP_const4s: /* signed   4-byte constant */
+		case DW_OP_const8u: /* unsigned 8-byte constant */
+		case DW_OP_const8s: /* signed   8-byte constant */
+		case DW_OP_constu:  /* unsigned LEB128 constant */
+		case DW_OP_consts:  /* signed   LEB128 constant */
+		case DW_OP_plus_uconst: /* unsigned LEB128 addend */
+			*value = loc_expr_oper[op_idx].number;
+			break;
+
+		/* supported 1-ary operations */
+		case DW_OP_abs:
+			*value = abs(loc_expr_oper[op_idx].number);
+			break;
+		case DW_OP_neg:
+		case DW_OP_not:
+			*value = !loc_expr_oper[op_idx].number;
+			break;
+
+		/* supported 2-ary operations */
+		case DW_OP_and:
+			*value = loc_expr_oper[op_idx].number;
+			*value &= loc_expr_oper[op_idx].number2;
+			break;
+		case DW_OP_or:
+			*value = loc_expr_oper[op_idx].number;
+			*value |= loc_expr_oper[op_idx].number2;
+			break;
+		case DW_OP_xor:
+			*value = loc_expr_oper[op_idx].number;
+			*value ^= loc_expr_oper[op_idx].number2;
+			break;
+		case DW_OP_plus:
+			*value = loc_expr_oper[op_idx].number;
+			*value += loc_expr_oper[op_idx].number2;
+			break;
+		case DW_OP_minus:
+			*value = loc_expr_oper[op_idx].number;
+			*value -= loc_expr_oper[op_idx].number2;
+			break;
+		case DW_OP_mul:
+			*value = loc_expr_oper[op_idx].number;
+			*value *= loc_expr_oper[op_idx].number2;
+			break;
+		case DW_OP_div:
+			*value = loc_expr_oper[op_idx].number;
+			*value /= loc_expr_oper[op_idx].number2;
+			break;
+		case DW_OP_mod:
+			*value = loc_expr_oper[op_idx].number;
+			*value %= loc_expr_oper[op_idx].number2;
+			break;
+
+		/* sink */
+		default:
+			printf("Unsupported Dwarf operation %x.\n",
+			       loc_expr_oper[op_idx].atom);
+			break;
+		}
+	}
+
+	return loc_expr_type;
+}
+
+static Dwarf_Word die_get_attr(Dwarf_Die *die, Dwarf_Half ar_attr)
 {
+	int attr_form;
+	Dwarf_Word value = 0;
 	Dwarf_Attribute attr;
-	Dwarf_Word value;
 
-	if (dwarf_attr(die, DW_AT_alignment, &attr) == NULL)
-		goto out;
+	if (!dwarf_hasattr(die, ar_attr))
+		return value;
+
+	if (dwarf_attr(die, ar_attr, &attr) == NULL)
+		return value;
+
+	attr_form = dwarf_whatform(&attr);
+
+	switch (attr_form) {
+	case DW_FORM_data1:
+	case DW_FORM_data2:
+	case DW_FORM_data4:
+	case DW_FORM_data8:
+	case DW_FORM_sec_offset:
+	case DW_FORM_sdata:
+	case DW_FORM_udata:
+	case DW_FORM_rnglistx:
+	case DW_FORM_loclistx:
+	case DW_FORM_implicit_const:
+	case DW_FORM_GNU_addr_index:
+	case DW_FORM_addrx:
+	case DW_FORM_addrx1:
+	case DW_FORM_addrx2:
+	case DW_FORM_addrx3:
+	case DW_FORM_addrx4:
+		if (dwarf_formudata(&attr, &value) == -1)
+			fail("Unable to get DWARF data for %s:0x%x:0x%x\n",
+			     dwarf_diename(die), attr_form, ar_attr);
+		break;
+	case DW_FORM_block:
+	case DW_FORM_block1:
+	case DW_FORM_block2:
+	case DW_FORM_block4:
+		die_attr_eval_op(&attr, &value);
+		break;
+	default:
+		fail("Unsupported DWARF form 0x%x for DIE %s, type 0x%x\n",
+		     attr_form, dwarf_diename(die), ar_attr);
+		break;
+	}
 
-	dwarf_formudata(&attr, &value);
-	obj->alignment = value;
-out:
+	return value;
+}
+
+static unsigned int die_get_byte_size(Dwarf_Die *die, obj_t *obj)
+{
+	unsigned int byte_sz_1;
+	unsigned int byte_sz_2;
+
+	/**
+	 * Make sure that this function will not be used on bitfields.
+	 * This is not supported as space requirements in such a case are
+	 * likely not to be divisible by CHAR_BIT and thus not applicable.
+	 */
+	assert(obj->is_bitfield == 0);
+
+	if (obj->byte_size > 0)
+		return obj->byte_size;
+
+	/*
+	 * Since any subset of {DW_AT_byte_size, DW_AT_bit_size} may be
+	 * specified in DWARF for any given DIE, we need to check both to
+	 * get byte size.
+	 */
+	byte_sz_1 = die_get_attr(die, DW_AT_byte_size);
+	byte_sz_2 = die_get_attr(die, DW_AT_bit_size);
+
+	assert(byte_sz_2 % CHAR_BIT == 0);
+
+	byte_sz_2 /= CHAR_BIT;
+
+	if (byte_sz_1 > 0 && byte_sz_2 > 0 && byte_sz_1 != byte_sz_2)
+		fail("DIE %s: DW_AT_byte_size and DW_AT_bit_size differ\n",
+		     dwarf_diename(die));
+
+	if (byte_sz_1 > 0)
+		return byte_sz_1;
+
+	return byte_sz_2;
+}
+
+static obj_t *die_read_byte_size(Dwarf_Die *die, obj_t *obj)
+{
+	obj_t *ptr = obj;
+	unsigned int coeff = 1;
+	unsigned int byte_size = 0;
+
+	while (ptr != NULL) {
+		byte_size = die_get_byte_size(die, ptr);
+
+		if (ptr->index && dwarf_tag(die) == DW_TAG_array_type)
+			coeff *= ptr->index;
+
+		if (byte_size > 0) {
+			obj->byte_size = byte_size * coeff;
+			break;
+		}
+
+		ptr = ptr->ptr;
+	}
+
+	return obj;
+}
+
+static obj_t *die_read_alignment(Dwarf_Die *die, obj_t *obj)
+{
+	obj->alignment = die_get_attr(die, DW_AT_alignment);
 	return obj;
 }
 
@@ -446,6 +568,25 @@ static void set_free(struct set *set)
 	hash_free(h);
 }
 
+bool record_same_declarations(struct record *r1, struct record *r2,
+			      struct set *processed)
+{
+	if (r1 == r2)
+		return true;
+
+	if (record_is_declaration(r1) || record_is_declaration(r2))
+		/* since they are not same, only one is a declaration */
+		return false;
+
+	if (set_contains(processed, r1->key))
+		/* skipping already processed record */
+		return true;
+
+	set_add(processed, r1->key);
+
+	return obj_same_declarations(r1->obj, r2->obj, processed);
+}
+
 static struct record *record_alloc(void)
 {
 	struct record *rec;
@@ -459,8 +600,6 @@ static void record_free_regular(struct record *rec)
 	void *data;
 	struct list_node *iter;
 
-	free(rec->base_file);
-	free(rec->origin);
 	if (rec->cu)
 		free(rec->cu);
 
@@ -485,8 +624,6 @@ static void record_free_weak(struct record *rec)
 
 static void record_free(struct record *rec)
 {
-	free(rec->key);
-
 	if (rec->free)
 		rec->free(rec);
 	free(rec);
@@ -512,7 +649,7 @@ static struct record *record_new_regular(const char *key)
 	struct record *rec;
 
 	rec = record_alloc();
-	rec->key = safe_strdup(key);
+	rec->key = global_string_get_copy(key);
 	rec->stack = stack_init();
 	rec->free = record_free_regular;
 	rec->dump = record_dump_regular;
@@ -528,7 +665,7 @@ static struct record *record_new_assembly(const char *key)
 	struct record *rec;
 
 	rec = record_alloc();
-	rec->key = safe_strdup(key);
+	rec->key = global_string_get_copy(key);
 
 	/*
 	 * The symbol not necessary belongs to an assembly function,
@@ -551,7 +688,7 @@ static struct record *record_new_weak(const char *key, const char *link)
 	struct record *rec;
 
 	rec = record_alloc();
-	rec->key = safe_strdup(key);
+	rec->key = global_string_get_copy(key);
 	rec->link = safe_strdup(link);
 
 	rec->free = record_free_weak;
@@ -567,6 +704,18 @@ static obj_t *record_obj(struct record *rec)
 	return rec->obj;
 }
 
+static struct record *record_copy(struct record *src)
+{
+	struct record *res = record_new_regular("");
+	obj_t *o1 = record_obj(src);
+
+	res->obj = obj_merge(o1, o1, MERGE_FLAG_DECL_MERGE);
+	obj_fill_parent(res->obj);
+	res->origin = src->origin;
+
+	return res;
+}
+
 static obj_t *record_obj_exchange(struct record *rec, obj_t *o)
 {
 	obj_t *old;
@@ -613,11 +762,14 @@ static void record_add_origin(struct record *rec,
 {
 	char *dec_file;
 	long dec_line;
+	char *origin;
 
 	dec_file = get_file(cu_die, die);
 	dec_line = get_line(cu_die, die);
 
-	safe_asprintf(&rec->origin, "File: %s:%lu\n", dec_file, dec_line);
+	safe_asprintf(&origin, "File: %s:%lu\n", dec_file, dec_line);
+	rec->origin = global_string_get_move(origin);
+
 	free(dec_file);
 }
 
@@ -666,22 +818,7 @@ done:
 
 static void record_set_version(struct record *rec, int version)
 {
-	char *base_file = rec->base_file;
-	char *key = NULL;
-
-	if (version == 0)
-		return;
-
-	if (rec->version == 0) {
-		base_file = safe_strdup(rec->key);
-		/* Remove .txt ending */
-		base_file[strlen(base_file) - 4] = '\0';
-		rec->base_file = base_file;
-	}
 	rec->version = version;
-	safe_asprintf(&key, "%s-%i.txt", base_file, rec->version);
-	free(rec->key);
-	rec->key = key;
 }
 
 static void record_close(struct record *rec, obj_t *obj)
@@ -721,6 +858,8 @@ static void record_dump_regular(struct record *rec, FILE *f)
 	record_stack_dump_and_clear(rec, f);
 
 	fprintf(f, "Symbol:\n");
+	if (rec->obj->byte_size != 0)
+		fprintf(f, "Byte size %u\n", rec->obj->byte_size);
 	if (rec->obj->alignment != 0)
 		fprintf(f, "Alignment %u\n", rec->obj->alignment);
 
@@ -756,7 +895,13 @@ static void record_dump(struct record *rec, const char *dir)
 	FILE *f;
 	char *slash;
 
-	snprintf(path, sizeof(path), "%s/%s", dir, rec->key);
+	if (rec->version == 0) {
+		snprintf(path, sizeof(path),
+			 "%s/%s.txt", dir, rec->key);
+	} else {
+		snprintf(path, sizeof(path),
+			 "%s/%s-%i.txt", dir, rec->key, rec->version);
+	}
 
 	slash = strrchr(path, '/');
 	assert(slash != NULL);
@@ -785,7 +930,7 @@ static void list_record_free(void *value)
  */
 static bool record_merge(struct record *rec_dst,
 			 struct record *rec_src,
-			 bool merge_decl)
+			 unsigned int flags)
 {
 	const char *s1;
 	const char *s2;
@@ -796,13 +941,13 @@ static bool record_merge(struct record *rec_dst,
 	s1 = record_origin(rec_dst);
 	s2 = record_origin(rec_src);
 
-	if (!safe_streq(s1, s2))
+	if (s1 != s2)
 		return false;
 
 	o1 = record_obj(rec_dst);
 	o2 = record_obj(rec_src);
 
-	o = obj_merge(o1, o2, merge_decl);
+	o = obj_merge(o1, o2, flags);
 	if (o == NULL)
 		return false;
 
@@ -813,6 +958,283 @@ static bool record_merge(struct record *rec_dst,
 	return true;
 }
 
+struct record_list {
+	struct record *decl_dummy;
+	/*
+	 * Nodes with data members set to NULL are unavailable,
+	 * due to their data being moved.
+	 */
+	struct list *records;
+	struct list *postponed;
+};
+
+static struct record_list *record_list_new(const char *key)
+{
+	struct record_list *rec_list = safe_zmalloc(sizeof(*rec_list));
+	char *declaration_key;
+
+	safe_asprintf(&declaration_key, "%s/%s", DECLARATION_PATH, key);
+	rec_list->decl_dummy = record_new_regular(declaration_key);
+	rec_list->decl_dummy->version = RECORD_VERSION_DECLARATION;
+	free(declaration_key);
+
+	rec_list->records = list_new(list_record_free);
+	rec_list->postponed = list_new(NULL);
+
+	return rec_list;
+}
+
+static void record_list_free(struct record_list *rec_list)
+{
+	assert(list_len(rec_list->postponed) == 0);
+
+	record_free(rec_list->decl_dummy);
+	list_free(rec_list->records);
+	list_free(rec_list->postponed);
+	free(rec_list);
+}
+
+static inline void record_list_node_make_unavailable(struct list_node *node)
+{
+	node->data = NULL;
+}
+
+static inline bool record_list_node_is_available(struct list_node *node)
+{
+	return node->data != NULL;
+}
+
+static inline struct list *record_list_records(struct record_list *rec_list)
+{
+	return rec_list->records;
+}
+
+static inline struct record *record_list_decl_dummy(struct record_list *rec_list)
+{
+	return rec_list->decl_dummy;
+}
+
+static void record_list_restore_postponed(struct record_list *rec_list)
+{
+	list_concat(rec_list->records, rec_list->postponed);
+}
+
+static struct record_list *record_db_lookup_or_init(struct record_db *db,
+					       const char *key)
+{
+	struct record_list *rec_list;
+	struct hash *hash = (struct hash *)db;
+
+	rec_list = hash_find(hash, key);
+	if (rec_list == NULL) {
+		rec_list = record_list_new(key);
+
+		hash_add(hash, global_string_get_copy(key), rec_list);
+	}
+
+	return rec_list;
+}
+
+struct merging_ctx {
+	/*
+	 * records found since recursion entry;
+	 * used for infinite loop detection
+	 */
+	struct set *current_records;
+	/*
+	 * records found since manual reset;
+	 * newly found records are merged with records in the hash
+	 */
+	struct hash *accumulated_records;
+
+
+	unsigned int flags;
+
+	bool use_copies; /* use copies of records instead of actual record */
+	bool merged;
+};
+
+static int record_merge_walk_record(struct record *followed,
+				    struct merging_ctx *ctx);
+static int record_merge_walk_object(obj_t *obj, void *arg)
+{
+	if (obj->type != __type_reffile)
+		return CB_CONT;
+
+	return record_merge_walk_record(obj->ref_record, arg);
+}
+
+static int record_merge_walk_record(struct record *followed,
+				    struct merging_ctx *ctx)
+{
+	struct record *record_dst;
+	bool clean_up = false;
+
+	if (record_is_declaration(followed))
+		return CB_CONT;
+
+	if (set_contains(ctx->current_records, followed->key))
+		return CB_CONT;
+	set_add(ctx->current_records, followed->key);
+
+	record_dst = hash_find(ctx->accumulated_records, followed->key);
+
+	if (record_dst == NULL) {
+		/* first of this key found */
+		if (ctx->use_copies)
+			record_dst = record_copy(followed);
+		else
+			record_dst = followed;
+		hash_add(ctx->accumulated_records, followed->key, record_dst);
+	} else {
+		if (record_dst == followed)
+			return CB_CONT;
+
+		if (!record_merge(record_dst, followed, ctx->flags))
+			return CB_FAIL;
+
+		ctx->merged = true;
+		if (!ctx->use_copies) {
+			record_redirect_dependents(record_dst, followed);
+			list_concat(&record_dst->dependents,
+				    &followed->dependents);
+
+			record_list_node_make_unavailable(followed->list_node);
+			clean_up = true;
+		}
+	}
+
+	int status = obj_walk_tree(followed->obj,
+				   record_merge_walk_object, ctx);
+
+	if (clean_up)
+		record_put(followed);
+
+	return status;
+}
+
+static int record_merge_walk(struct record *starting_rec,
+			     struct merging_ctx *ctx)
+{
+	int result;
+
+	ctx->current_records = set_init(PROCESSED_SIZE);
+	result = record_merge_walk_record(starting_rec, ctx);
+	set_free(ctx->current_records);
+
+	return result != CB_FAIL;
+}
+
+static bool record_merge_many_sub(struct list *list,
+				  unsigned int flags, bool use_copies)
+{
+	void (*free_fun)(void *);
+	struct merging_ctx ctx;
+	struct list_node *iter;
+	bool result = false;
+
+	if (use_copies)
+		free_fun = (void (*)(void *))record_free;
+	else
+		free_fun = NULL;
+
+	ctx.flags = flags;
+	ctx.current_records = NULL;
+	ctx.merged = false;
+
+	/* first, check if the list can be merged into one record */
+	ctx.use_copies = use_copies;
+	ctx.accumulated_records = hash_new(PROCESSED_SIZE, free_fun);
+
+	LIST_FOR_EACH(list, iter) {
+		result = record_merge_walk(list_node_data(iter), &ctx);
+
+		if (result == false && use_copies)
+			break;
+	}
+	hash_free(ctx.accumulated_records);
+
+	return result && ctx.merged;
+}
+
+static bool record_merge_many(struct list *list, unsigned int flags)
+{
+	bool result;
+
+	/* first, check if the list can be merged into one record */
+	result = record_merge_many_sub(list, flags, true);
+
+	if (result == false)
+		return false;
+
+	/* if it can be, then merge it */
+	result = record_merge_many_sub(list, flags, false);
+
+	return result;
+}
+
+static void record_list_clean_up(struct record_list *rec_list)
+{
+	const unsigned int FAILED_LIMIT = 10;
+	struct list_node *next = rec_list->records->first;
+
+	while (next != NULL) {
+		struct list_node *temp;
+		struct record *rec;
+
+		temp = next;
+		rec = list_node_data(temp);
+		next = next->next;
+
+		if (!rec) {
+			/* record was merged */
+			list_del(temp);
+		} else if (rec->failed > FAILED_LIMIT) {
+			list_del(temp);
+			rec->list_node = list_add(rec_list->postponed, rec);
+		}
+	}
+}
+
+static bool record_merge_pair(struct record *record_dst,
+			      struct record *record_src)
+{
+	bool merged;
+	struct set *processed;
+	struct list to_merge;
+
+	if (record_dst == NULL)
+		return false;
+
+	processed = set_init(PROCESSED_SIZE);
+	merged = record_same_declarations(record_dst, record_src, processed);
+	set_free(processed);
+	if (!merged) {
+		record_dst->failed++;
+		return false;
+	}
+
+	list_init(&to_merge, NULL);
+	list_add(&to_merge, record_dst);
+	list_add(&to_merge, record_src);
+
+	merged = record_merge_many(&to_merge,
+				    MERGE_FLAG_VER_IGNORE |
+				    MERGE_FLAG_DECL_EQ);
+	list_clear(&to_merge);
+
+	if (merged) {
+		/* continue with next unmerged */
+		record_dst->failed = 0;
+		return true;
+	}
+
+	record_dst->failed++;
+	list_clear(&to_merge);
+
+	return false;
+}
+
 static char *record_db_add(struct record_db *db, struct record *rec)
 {
 	/*
@@ -837,39 +1259,114 @@ static char *record_db_add(struct record_db *db, struct record *rec)
 	 * referencing records that we couldn't merge wouldn't get merged.
 	 */
 
-	struct hash *hash = (struct hash *)db;
 	struct record *tmp_rec;
-	struct list *list;
+	struct record_list *rec_list;
 	struct list_node *iter;
+	int records_amount;
 
-	list = hash_find(hash, rec->key);
-	if (list == NULL) {
-		list = list_new(list_record_free);
-
-		hash_add(hash, rec->key, list);
-	}
+	rec_list = record_db_lookup_or_init(db, rec->key);
 
-	LIST_FOR_EACH(list, iter) {
+	LIST_FOR_EACH(record_list_records(rec_list), iter) {
 		tmp_rec = list_node_data(iter);
 
-		if (record_merge(tmp_rec, rec, NO_MERGE_DECL)) {
+		if (record_merge(tmp_rec, rec, MERGE_DEFAULT)) {
+			record_redirect_dependents(tmp_rec, rec);
 			list_concat(&tmp_rec->dependents, &rec->dependents);
 			return safe_strdup(tmp_rec->key);
 		}
 	}
 
+	records_amount = list_len(record_list_records(rec_list));
+
 	record_get(rec);
-	record_set_version(rec, list->len);
-	list_add(list, rec);
+	record_set_version(rec, records_amount);
+	record_redirect_dependents(rec, rec);
+	rec->list_node = list_add(record_list_records(rec_list), rec);
 
 	return safe_strdup(rec->key);
 }
 
+static void record_db_add_cu(struct record_db *db, struct hash *cu_db)
+{
+	struct list unmerged_list;
+	struct hash_iter iter;
+	const void *val;
+	bool merged;
+	struct list_node *unmerged_iter;
+	struct list_node *merger_iter;
+
+	/*
+	 * Use list instead of hash map,
+	 * since nodes are going to be gradually removed.
+	 */
+	list_init(&unmerged_list, NULL);
+	hash_iter_init((struct hash *)cu_db, &iter);
+	while (hash_iter_next(&iter, NULL, &val)) {
+		struct record *rec = (struct record *)val;
+
+		rec->list_node = list_add(&unmerged_list, rec);
+	}
+
+	/* try to merge, as long as at least one record was merged */
+	do {
+		merged = false;
+
+		LIST_FOR_EACH(&unmerged_list, unmerged_iter) {
+			struct record *unmerged_record
+				= list_node_data(unmerged_iter);
+			struct record_list *rec_list;
+			struct list *records;
+			const char *key;
+
+			if (!record_list_node_is_available(unmerged_iter)) {
+				/* already merged */
+				continue;
+			}
+
+			key = unmerged_record->key;
+			rec_list = record_db_lookup_or_init(db, key);
+			records = record_list_records(rec_list);
+
+			LIST_FOR_EACH(records, merger_iter) {
+				struct record *merger
+					= list_node_data(merger_iter);
+
+				if (record_merge_pair(merger,
+						      unmerged_record)) {
+					merged = true;
+					break;
+				}
+			}
+
+			record_list_clean_up(rec_list);
+		}
+	} while (merged);
+
+	/* add the rest that was not merged */
+	LIST_FOR_EACH(&unmerged_list, unmerged_iter) {
+		struct record *unmerged_record = list_node_data(unmerged_iter);
+		struct record_list *rec_list;
+		struct list *records;
+
+		if (!record_list_node_is_available(unmerged_iter)) {
+			/* already merged */
+			continue;
+		}
+
+		rec_list = record_db_lookup_or_init(db, unmerged_record->key);
+		records = record_list_records(rec_list);
+
+		unmerged_record->list_node
+			= list_add(records, unmerged_record);
+	}
+	list_clear(&unmerged_list);
+}
+
 static void hash_list_free(void *value)
 {
-	struct list *list = value;
+	struct record_list *rec_list = value;
 
-	list_free(list);
+	record_list_free(rec_list);
 }
 
 static struct record_db *record_db_init(void)
@@ -889,12 +1386,26 @@ static void record_db_dump(struct record_db *_db, char *dir)
 	const void *v;
 	struct hash *db = (struct hash *)_db;
 
+	/* set correct versions */
 	hash_iter_init(db, &iter);
 	while (hash_iter_next(&iter, NULL, &v)) {
-		struct list *list = (struct list *)v;
 		struct list_node *iter;
+		struct record_list *rec_list = (struct record_list *)v;
+		int ver = 0;
+
+		LIST_FOR_EACH(record_list_records(rec_list), iter) {
+			struct record *record = list_node_data(iter);
 
-		LIST_FOR_EACH(list, iter) {
+			record_set_version(record, ver++);
+		}
+	}
+
+	hash_iter_init(db, &iter);
+	while (hash_iter_next(&iter, NULL, &v)) {
+		struct record_list *rec_list = (struct record_list *)v;
+		struct list_node *iter;
+
+		LIST_FOR_EACH(record_list_records(rec_list), iter) {
 			struct record *rec = list_node_data(iter);
 
 			record_dump(rec, dir);
@@ -924,6 +1435,10 @@ static obj_t *print_die_type(struct cu_ctx *ctx,
 		fail("dwarf_formref_die() failed for %s\n",
 		    dwarf_diename(die));
 
+	if (dwarf_hasattr(&type_die, DW_AT_endianity))
+		fail("DIE %s has non-standard endianity\n",
+		     dwarf_diename(&type_die))
+
 	/* Print the type of the die */
 	return print_die(ctx, rec, &type_die);
 }
@@ -933,39 +1448,62 @@ static obj_t *print_die_struct_member(struct cu_ctx *ctx,
 				      Dwarf_Die *die,
 				      const char *name)
 {
-	Dwarf_Attribute attr;
-	Dwarf_Word value;
 	obj_t *type;
 	obj_t *obj;
-
-	if (dwarf_attr(die, DW_AT_data_member_location, &attr) == NULL)
-		fail("Offset of member %s missing!\n", name);
-
-	(void) dwarf_formudata(&attr, &value);
+	Dwarf_Half dw_attr_bit_offset;
+	unsigned int bit_offset = 0;
 
 	type = print_die_type(ctx, rec, die);
 	obj = obj_struct_member_new_add(safe_strdup(name), type);
-	obj->offset = value;
+	die_read_alignment(die, obj);
 
-	if (dwarf_hasattr(die, DW_AT_bit_offset)) {
-		Dwarf_Word offset, size;
+	/*
+	 * DWARF attribute specifying offset varies depending on DWARF version.
+	 * DW_AT_data_member_location is not guaranteed to be emitted; a fall-
+	 * back attribute DW_AT_data_bit_offset (present in DWARF v4 and later)
+	 * is used when not encountered.
+	 */
+	if (dwarf_hasattr(die, DW_AT_data_member_location))
+		obj->offset = die_get_attr(die, DW_AT_data_member_location);
+	else if (dwarf_hasattr(die, DW_AT_data_bit_offset))
+		obj->offset = die_get_attr(die, DW_AT_data_bit_offset)/CHAR_BIT;
 
-		if (!dwarf_hasattr(die, DW_AT_bit_size))
-			fail("Missing expected bit size attribute in %s!\n",
-			    name);
+	/*
+	 * DWARF attribute specifying bit-offset. Note that DW_AT_bit_offset
+	 * is endian-sensitive, whereas DW_AT_data_bit_offset is not.
+	 * Presence of this attribute indicates that we're dealing with
+	 * bit-field.
+	 */
+	if (dwarf_hasattr(die, DW_AT_bit_offset))
+		dw_attr_bit_offset = DW_AT_bit_offset;
+	else if (dwarf_hasattr(die, DW_AT_data_bit_offset))
+		dw_attr_bit_offset = DW_AT_data_bit_offset;
+	else
+		goto out;
 
-		if (dwarf_attr(die, DW_AT_bit_offset, &attr) == NULL)
-			fail("Bit offset of member %s missing!\n", name);
-		(void) dwarf_formudata(&attr, &offset);
-		if (dwarf_attr(die, DW_AT_bit_size, &attr) == NULL)
-			fail("Bit size of member %s missing!\n", name);
-		(void) dwarf_formudata(&attr, &size);
+	/*
+	 * Bit-field section; offset, first and last bits are converted to
+	 * DWARF5-compliant (endian-oblivious) format.
+	 */
+	obj->is_bitfield = 1;
 
-		obj->is_bitfield = 1;
-		obj->first_bit = offset;
-		obj->last_bit = offset + size - 1;
+	if (dwarf_hasattr(die, DW_AT_data_bit_offset)) {
+		bit_offset = die_get_attr(die, dw_attr_bit_offset);
+	} else if (ctx->elf_endian == ELFDATA2MSB) {
+		bit_offset = die_get_attr(die, dw_attr_bit_offset) \
+			   + obj->offset*CHAR_BIT;
+	} else {
+		bit_offset = die_get_attr(die, DW_AT_byte_size) * CHAR_BIT \
+			   + obj->offset * CHAR_BIT \
+			   - die_get_attr(die, DW_AT_bit_offset) \
+			   - die_get_attr(die, DW_AT_bit_size);
 	}
-	die_read_alignment(die, obj);
+
+	obj->offset = bit_offset / CHAR_BIT;
+	obj->first_bit = bit_offset % CHAR_BIT;
+	obj->last_bit  = die_get_attr(die, DW_AT_bit_size) + obj->first_bit;
+
+out:
 	return obj;
 }
 
@@ -1271,12 +1809,12 @@ static obj_t *print_die_tag(struct cu_ctx *ctx,
 	case DW_TAG_volatile_type:
 		obj = print_die_type(ctx, rec, die);
 		obj = obj_qualifier_new_add(obj);
-		obj->base_type = safe_strdup("volatile");
+		obj->base_type = global_string_get_copy("volatile");
 		break;
 	case DW_TAG_const_type:
 		obj = print_die_type(ctx, rec, die);
 		obj = obj_qualifier_new_add(obj);
-		obj->base_type = safe_strdup("const");
+		obj->base_type = global_string_get_copy("const");
 		break;
 	case DW_TAG_array_type:
 		obj = print_die_array_type(ctx, rec, die);
@@ -1290,6 +1828,10 @@ static obj_t *print_die_tag(struct cu_ctx *ctx,
 		break;
 	}
 	}
+
+	if (tag != DW_TAG_subprogram && tag != DW_TAG_subroutine_type)
+		obj = die_read_byte_size(die, obj);
+
 	obj = die_read_alignment(die, obj);
 	return obj;
 }
@@ -1300,10 +1842,10 @@ static obj_t *print_die(struct cu_ctx *ctx,
 {
 	char *file;
 	struct record *rec;
-	char *old_file;
 	obj_t *obj;
 	obj_t *ref_obj;
 	generate_config_t *conf = ctx->conf;
+	struct hash *cu_db = (struct hash *)ctx->cu_db;
 
 	/*
 	 * Sigh. The type of some fields (eg. struct member as a pointer to
@@ -1326,9 +1868,25 @@ static obj_t *print_die(struct cu_ctx *ctx,
 
 	/* else handle new record */
 	rec = record_start(ctx, die, file);
-	if (rec == NULL)
+	if (rec == NULL) {
 		/* declaration or already processed */
+		struct record_list *rec_list
+			= record_db_lookup_or_init(conf->db, file);
+
+		if (is_declaration(die)) {
+			ref_obj->ref_record = record_list_decl_dummy(rec_list);
+		} else {
+			struct record *processed = hash_find(cu_db, file);
+
+			ref_obj->depend_rec_node
+				= list_add(&processed->dependents, ref_obj);
+			ref_obj->ref_record = processed;
+		}
+
 		goto out;
+	}
+
+	hash_add(cu_db, rec->key, rec);
 
 	if (conf->gen_extra)
 		stack_push(ctx->stack, safe_strdup(file));
@@ -1338,16 +1896,12 @@ static obj_t *print_die(struct cu_ctx *ctx,
 
 	record_close(rec, obj);
 
-	old_file = file;
 	ref_obj->depend_rec_node = list_add(&rec->dependents, ref_obj);
-	/* if it creates new version, key/file name can change */
-	file = record_db_add(conf->db, rec);
-	record_put(rec);
-	/* record_db_add() returns allocated string */
-	free(old_file);
+	ref_obj->ref_record = rec;
 
 out:
-	ref_obj->base_type = file;
+	free(file);
+
 	return ref_obj;
 }
 
@@ -1460,6 +2014,8 @@ static void process_cu_die(Dwarf_Die *cu_die, struct file_ctx *fctx)
 			cu_printed = true;
 		}
 
+		ctx.dw_version = fctx->dw_version;
+		ctx.elf_endian = fctx->elf_endian;
 		ctx.conf = conf;
 		ctx.cu_die = cu_die;
 
@@ -1468,8 +2024,12 @@ static void process_cu_die(Dwarf_Die *cu_die, struct file_ctx *fctx)
 		/* And a set of all processed symbols */
 		ctx.processed = set_init(PROCESSED_SIZE);
 
+		ctx.cu_db = hash_new(PROCESSED_SIZE, NULL);
+
 		/* Print both the CU DIE and symbol DIE */
 		ref = print_die(&ctx, NULL, &child_die);
+		record_db_add_cu(conf->db, ctx.cu_db);
+
 		obj_free(ref);
 
 		/* And clear the stack again */
@@ -1478,6 +2038,8 @@ static void process_cu_die(Dwarf_Die *cu_die, struct file_ctx *fctx)
 
 		stack_destroy(ctx.stack);
 		set_free(ctx.processed);
+
+		hash_free((struct hash *)ctx.cu_db);
 	} while (dwarf_siblingof(&child_die, &child_die) == 0);
 }
 
@@ -1503,7 +2065,9 @@ static int dwflmod_generate_cb(Dwfl_Module *dwflmod, void **userdata,
 
 	while (dwarf_next_unit(dbg, off, &off, &hsize, &version, &abbrev,
 	    &addresssize, &offsetsize, NULL, &type_offset) == 0) {
-		if (version < 2 || version > 4)
+		fctx->dw_version = version;
+
+		if (version < 2 || version > 5)
 			fail("Unsupported dwarf version: %d\n", version);
 
 		/* CU is followed by a single DIE */
@@ -1554,7 +2118,7 @@ static void generate_assembly_record(generate_config_t *conf, const char *key)
 	if (conf->verbose)
 		printf("Generating assembly record for %s\n", key);
 
-	safe_asprintf(&name, "asm--%s.txt", key);
+	safe_asprintf(&name, "asm--%s", key);
 
 	rec = record_new_assembly(name);
 	new_key = record_db_add(conf->db, rec);
@@ -1578,7 +2142,7 @@ static bool try_generate_alias(generate_config_t *conf, struct ksym *ksym)
 		printf("Generating weak record %s -> %s\n",
 		       key, link);
 
-	safe_asprintf(&name, "weak--%s.txt", key);
+	safe_asprintf(&name, "weak--%s", key);
 
 	rec = record_new_weak(name, link);
 	new_key = record_db_add(conf->db, rec);
@@ -1657,6 +2221,8 @@ static void merge_aliases(struct ksymtab *ksymtab,
 
 static walk_rv_t process_symbol_file(char *path, void *arg)
 {
+	unsigned int endianness;
+	struct elf_data *elf;
 	struct file_ctx fctx;
 	generate_config_t *conf = (generate_config_t *)arg;
 	struct ksymtab *ksymtab;
@@ -1665,8 +2231,16 @@ static walk_rv_t process_symbol_file(char *path, void *arg)
 
 	/* We want to process only .ko kernel modules and vmlinux itself */
 	if (!safe_strendswith(path, ".ko") &&
-	    !safe_strendswith(path, "/vmlinux"))
-		return ret;
+	    !safe_strendswith(path, "/vmlinux")) {
+		if (conf->kernel_dir) {
+			if (conf->verbose)
+				printf("Skip non-object file %s\n", path);
+			return ret;
+		} else {
+			if (conf->verbose)
+				printf("Force processing file %s\n", path);
+		}
+	}
 
 	/*
 	 * Don't look into RHEL build cache directories.
@@ -1676,18 +2250,31 @@ static walk_rv_t process_symbol_file(char *path, void *arg)
 			return WALK_SKIP;
 	}
 
-	ksymtab = ksymtab_read(path, &aliases);
+	elf = elf_open(path);
+	if (elf == NULL) {
+		if (conf->verbose)
+			printf("Skip %s (unable to process ELF file)\n",
+			       path);
+		goto out;
+	}
+
+	if (elf_get_endianness(elf, &endianness) > 0)
+		goto clean_elf;
+
+	if (elf_get_exported(elf, &ksymtab, &aliases) > 0)
+		goto clean_elf;
 
 	if (ksymtab_len(ksymtab) == 0) {
 		if (conf->verbose)
 			printf("Skip %s (no exported symbols)\n", path);
-		goto out;
+		goto clean_ksymtab;
 	}
 
 	merge_aliases(ksymtab, conf->symbols, aliases);
 
 	fctx.conf = conf;
 	fctx.ksymtab = ksymtab;
+	fctx.elf_endian = endianness;
 
 	if (conf->verbose)
 		printf("Processing %s\n", path);
@@ -1697,9 +2284,14 @@ static walk_rv_t process_symbol_file(char *path, void *arg)
 
 	if (is_all_done(conf))
 		ret = WALK_STOP;
-out:
+clean_ksymtab:
 	ksymtab_free(aliases);
 	ksymtab_free(ksymtab);
+clean_elf:
+	elf_close(elf);
+	free(elf->ehdr);
+	free(elf);
+out:
 	return ret;
 }
 
@@ -1712,92 +2304,236 @@ static void print_not_found(struct ksym *ksym, void *ctx)
 	printf("%s not found!\n", s);
 }
 
-struct record *record_copy(struct record *src)
+ /*
+  * Creates a string describing given record.
+  * The burden of freeing the string falls on the caller.
+  */
+static char *record_get_digest(struct record *rec)
 {
-	struct record *res = record_new_regular("");
-	obj_t *o1 = record_obj(src);
 
-	res->obj = obj_merge(o1, o1, MERGE_DECL);
-	obj_fill_parent(res->obj);
-	res->origin = safe_strdup(src->origin);
-	res->base_file = NULL;
+	/*
+	 * TODO: this approach is far from perfect, there could be more
+	 * information about members as part of the key, but for now this works
+	 * good enough.
+	 */
+	char *key;
+	obj_t *obj = rec->obj;
+	const char *origin = rec->origin ? rec->origin : "";
+	int member_count = 0;
+
+	if (!obj)
+		return safe_strdup(origin);
+
+	if (obj->member_list) {
+		for (obj_list_t *member = obj->member_list->first;
+		     member != NULL;
+		     member = member->next) {
+			member_count++;
+		}
+	}
 
-	return res;
+	safe_asprintf(&key,
+		      "%s.%zu.%zu.%zu.%zu.%zu.%i",
+		      origin,
+		      obj->alignment, obj->is_bitfield,
+		      obj->first_bit, obj->last_bit,
+		      obj->offset,
+		      member_count
+		);
+
+	return key;
 }
 
-bool record_list_can_merge(struct list *rec_list)
+struct digest_equivalence_list {
+	char *key;
+	struct list *records;
+};
+
+static void digest_equivalence_list_free(struct digest_equivalence_list *arg)
+{
+	free(arg->key);
+	list_free(arg->records);
+	free(arg);
+}
+
+static struct hash *split_record_list(struct list *input)
 {
-	bool result = true;
-	struct record *merger;
+	struct hash *result
+		= hash_new(16, (void (*)(void *))digest_equivalence_list_free);
+	void *temp;
 	struct list_node *iter;
 
-	if (list_len(rec_list) <= 1) {
-		/* only one record -> nothing to merge */
-		return false;
-	}
+	LIST_FOR_EACH(input, iter) {
+		struct record *rec = list_node_data(iter);
+		char *key;
+		struct digest_equivalence_list *eq_list;
 
-	merger = record_copy(list_node_data(rec_list->first));
-	LIST_FOR_EACH(rec_list, iter) {
-		struct record *record = list_node_data(iter);
+		if (!record_list_node_is_available(iter))
+			continue;
 
-		if (!record_merge(merger, record, MERGE_DECL)) {
-			result = false;
-			break;
+		key = record_get_digest(rec);
+		eq_list = hash_find(result, key);
+		if (eq_list == NULL) {
+			eq_list = malloc(sizeof(*eq_list));
+			eq_list->key = key;
+			eq_list->records = list_new(NULL);
+
+			hash_add(result, eq_list->key, eq_list);
+		} else {
+			free(key);
 		}
+
+		rec->list_node = list_add(eq_list->records, rec);
 	}
-	record_put(merger);
+
+	/* clear the input list but do not free the data */
+	temp = input->free;
+	input->free = NULL;
+	list_clear(input);
+	input->free = temp;
 
 	return result;
 }
 
-void record_list_merge(struct list *rec_list)
+static bool record_list_split_and_merge(struct record_list *rec_list)
 {
-	struct record *first = rec_list->first->data;
-	struct list_node *next = rec_list->first->next;
-	struct list_node *curr;
+	struct list *list = rec_list->records;
+	struct hash *split = split_record_list(list);
+	const void *val;
+	bool merged = false;
+	struct hash_iter split_iter;
 
-	struct record *record;
 
-	while (next != NULL) {
-		curr = next;
-		next = next->next;
+	/* try to merge digest_equivalence_lists */
+	hash_iter_init(split, &split_iter);
+	while (hash_iter_next(&split_iter, NULL, &val)) {
+		struct digest_equivalence_list *eq_list
+			= (struct digest_equivalence_list *)val;
+
+		if (list_len(eq_list->records) < 2) {
+			/* skipping lists with nothing to merge */
+			continue;
+		}
+
+		if (record_merge_many(eq_list->records,
+				      MERGE_FLAG_VER_IGNORE |
+				      MERGE_FLAG_DECL_MERGE)) {
+			merged = true;
+		}
+	}
 
-		record = curr->data;
 
-		list_concat(&first->dependents, &record->dependents);
-		record_merge(first, record, MERGE_DECL);
-		record_put(record);
+	/* concat back together */
+	hash_iter_init(split, &split_iter);
+	while (hash_iter_next(&split_iter, NULL, &val)) {
+		struct digest_equivalence_list *eq_list
+			= (struct digest_equivalence_list *)val;
 
-		free(curr);
+		list_concat(list, eq_list->records);
 	}
 
-	record_update_dependents(first);
-	rec_list->first->next = NULL;
-	rec_list->last = rec_list->first;
-	rec_list->len = 1;
+	hash_free(split);
+
+	return merged;
+}
+
+bool record_db_merge_pairs(struct hash *hash)
+{
+	struct hash_iter iter;
+	const void *val;
+	bool merged = false;
+
+	/*
+	 * Try to merge as pairs.
+	 *
+	 * Since not every combination was tried while loading CUs, we
+	 * can try to merge them now, after merging some of them as
+	 * groups and decreasing their count.
+	 *
+	 * Should only be trying to merge them once, since trying more times
+	 * would be useless.
+	 */
+	hash_iter_init(hash, &iter);
+	while (hash_iter_next(&iter, NULL, &val)) {
+		struct record_list *rec_list
+			= (struct record_list *)val;
+		struct list_node *unsuc_iter;
+		struct list *con_list = record_list_records(rec_list);
+
+		LIST_FOR_EACH(con_list, unsuc_iter) {
+			struct record *unsuc = unsuc_iter->data;
+			struct list_node *con_iter;
+
+			if (unsuc == NULL)
+				continue;
+
+			for (con_iter = unsuc_iter->next;
+			     con_iter != NULL;
+			     con_iter = con_iter->next) {
+				struct record *con_rec = con_iter->data;
+
+				if (!record_list_node_is_available(con_iter))
+					continue;
+
+				if (record_merge_pair(unsuc, con_rec))
+					merged = true;
+			}
+		}
+	}
+
+	return merged;
 }
 
 void record_db_merge(struct record_db *db)
 {
+	bool first = true;
+
 	struct hash *hash = (struct hash *)db;
 	bool merged;
 	struct hash_iter iter;
-	const char *key;
 	const void *val;
 
+	hash_iter_init(hash, &iter);
+	while (hash_iter_next(&iter, NULL, &val)) {
+		struct record_list *rec_list = (struct record_list *)val;
+
+		record_list_restore_postponed(rec_list);
+	}
+
 	do {
+		/* merge as groups */
 		merged = false;
 
 		hash_iter_init(hash, &iter);
-		while (hash_iter_next(&iter, &key, &val)) {
-			struct list *list = (struct list *)val;
+		while (hash_iter_next(&iter, NULL, &val)) {
+			struct record_list *rec_list
+				= (struct record_list *)val;
+
+			if (rec_list == NULL)
+				continue;
 
-			if (list != NULL && record_list_can_merge(list)) {
-				record_list_merge(list);
+			if (record_list_split_and_merge(rec_list))
 				merged = true;
-			}
 		}
+
+
+		/* merge as pairs, once */
+		if (!first)
+			continue;
+		first = false;
+
+		if (record_db_merge_pairs(hash))
+			merged = true;
+
 	} while (merged);
+
+	hash_iter_init(hash, &iter);
+	while (hash_iter_next(&iter, NULL, &val)) {
+		struct record_list *rec_list = (struct record_list *)val;
+
+		record_list_clean_up(rec_list);
+		record_list_restore_postponed(rec_list);
+	}
 }
 
 /*
@@ -1813,7 +2549,7 @@ static void generate_symbol_defs(generate_config_t *conf)
 		    strerror(errno));
 
 	/* Lets walk the normal modules */
-	printf("Generating symbol defs from %s...\n", conf->kernel_dir);
+	printf("Generating symbol defs from %s\n", conf->kernel_dir);
 
 	conf->db = record_db_init();
 
@@ -1821,7 +2557,7 @@ static void generate_symbol_defs(generate_config_t *conf)
 		walk_dir(conf->kernel_dir, false, process_symbol_file, conf);
 	} else if (S_ISREG(st.st_mode)) {
 		char *path = conf->kernel_dir;
-		conf->kernel_dir = "";
+		conf->kernel_dir = NULL;
 		process_symbol_file(path, conf);
 	} else {
 		fail("Not a file or directory: %s\n", conf->kernel_dir);
diff --git a/ksymtab.c b/ksymtab.c
index a24a4ae..2309fc0 100644
--- a/ksymtab.c
+++ b/ksymtab.c
@@ -45,14 +45,6 @@
 
 #define KSYMTAB_SIZE 8192
 
-struct ksymtab_elf {
-	Elf *elf;
-	size_t shstrndx;
-	const char *strtab;
-	size_t strtab_size;
-	int fd;
-};
-
 struct ksymtab {
 	struct hash *hash;
 	size_t mark_count;
@@ -60,13 +52,12 @@ struct ksymtab {
 
 struct ksym;
 
-static int ksymtab_elf_get_section(struct ksymtab_elf *ke,
-				   const char *section,
-				   const char **d_data,
-				   size_t *size)
+static int elf_get_section(Elf *elf,
+			   size_t shstrndx,
+			   const char *section,
+			   const char **d_data,
+			   size_t *size)
 {
-	Elf *elf = ke->elf;
-	size_t shstrndx = ke->shstrndx;
 	Elf_Scn *scn;
 	GElf_Shdr shdr;
 	char *name;
@@ -119,16 +110,14 @@ static int ksymtab_elf_get_section(struct ksymtab_elf *ke,
 	return 0;
 }
 
-static struct ksymtab_elf *ksymtab_elf_open(const char *filename)
+struct elf_data *elf_open(const char *filename)
 {
 	Elf *elf;
 	int fd;
 	int class;
-	GElf_Ehdr ehdr;
+	GElf_Ehdr *ehdr;
 	size_t shstrndx;
-	const char *strtab;
-	size_t strtab_size;
-	struct ksymtab_elf *ke = NULL;
+	struct elf_data *data = NULL;
 
 	if (elf_version(EV_CURRENT) == EV_NONE)
 		fail("elf_version() failed: %s\n", elf_errmsg(-1));
@@ -136,7 +125,7 @@ static struct ksymtab_elf *ksymtab_elf_open(const char *filename)
 	fd = open(filename, O_RDONLY, 0);
 	if (fd < 0)
 		fail("Failed to open file %s: %s\n", filename,
-		    strerror(errno));
+		     strerror(errno));
 
 	elf = elf_begin(fd, ELF_C_READ, NULL);
 	if (elf == NULL)
@@ -144,55 +133,58 @@ static struct ksymtab_elf *ksymtab_elf_open(const char *filename)
 
 	if (elf_kind(elf) != ELF_K_ELF) {
 		printf("Doesn't look like an ELF file, ignoring: %s\n",
-		    filename);
+		       filename);
+		(void) elf_end(elf);
+		(void) close(fd);
 		goto out;
 	}
 
-	if (gelf_getehdr(elf, &ehdr) == NULL)
+	ehdr = safe_zmalloc(sizeof(*ehdr));
+
+	if (gelf_getehdr(elf, ehdr) == NULL)
 		fail("getehdr() failed: %s\n", elf_errmsg(-1));
 
 	class = gelf_getclass(elf);
 	if (class != ELFCLASS64) {
 		printf("Unsupported elf class of %s: %d\n", filename, class);
+		free(ehdr);
+		(void) elf_end(elf);
+		(void) close(fd);
 		goto out;
 	}
 
+	/*
+	 * Get section index of the string table associated with the section
+	 * headers in the ELF file.
+	 * Required by elf_get_section calls.
+	 */
 	if (elf_getshdrstrndx(elf, &shstrndx) != 0)
-		fail("elf_getshdrstrndx() failed: %s\n", elf_errmsg(-1));
-
-	ke = safe_zmalloc(sizeof(*ke));
-	ke->elf = elf;
-	ke->fd = fd;
-	ke->shstrndx = shstrndx;
-
-	if (ksymtab_elf_get_section(ke, STRTAB, &strtab, &strtab_size) < 0) {
-		free(ke);
-		goto out;
-	}
+		fail("elf_getshdrstrndx() failed: %s\n", elf_errmsg(-1))
 
-	ke->strtab = strtab;
-	ke->strtab_size = strtab_size;
-	return ke;
+	data = safe_zmalloc(sizeof(*data));
 
+	data->fd = fd;
+	data->elf = elf;
+	data->ehdr = ehdr;
+	data->shstrndx = shstrndx;
 out:
-	(void) elf_end(elf);
-	(void) close(fd);
-	return NULL;
+	return data;
 }
 
-static void ksymtab_elf_close(struct ksymtab_elf *ke)
+void elf_close(struct elf_data *ed)
 {
-	(void) elf_end(ke->elf);
-	(void) close(ke->fd);
-	free(ke);
+	if (ed == NULL)
+		return;
+	(void) elf_end(ed->elf);
+	(void) close(ed->fd);
 }
 
-static void ksymtab_elf_for_each_global_sym(struct ksymtab_elf *ke,
-					    void (*fn)(const char *name,
-						       uint64_t value,
-						       int binding,
-						       void *ctx),
-					    void *ctx)
+static void elf_for_each_global_sym(struct elf_data *ed,
+				    void (*fn)(const char *name,
+					       uint64_t value,
+					       int binding,
+					       void *ctx),
+				    void *ctx)
 {
 	const Elf64_Sym *end;
 	Elf64_Sym *sym;
@@ -201,7 +193,7 @@ static void ksymtab_elf_for_each_global_sym(struct ksymtab_elf *ke,
 	const char *data;
 	size_t size;
 
-	if (ksymtab_elf_get_section(ke, SYMTAB, &data, &size) < 0)
+	if (elf_get_section(ed->elf, ed->shstrndx, SYMTAB, &data, &size) < 0)
 		return;
 
 	sym = (Elf64_Sym *)data;
@@ -218,11 +210,11 @@ static void ksymtab_elf_for_each_global_sym(struct ksymtab_elf *ke,
 		if (sym->st_name == 0)
 			continue;
 
-		if (sym->st_name > ke->strtab_size)
+		if (sym->st_name > ed->strtab_size)
 			fail("Symbol name index %d out of range %ld\n",
-			    sym->st_name, ke->strtab_size);
+			    sym->st_name, ed->strtab_size);
 
-		name = ke->strtab + sym->st_name;
+		name = ed->strtab + sym->st_name;
 		if (name == NULL)
 			fail("Could not find symbol name\n");
 
@@ -494,7 +486,7 @@ static struct ksymtab *ksymtab_weaks_to_aliases(struct ksymtab *weaks,
  * It will work correctly for one alias only.
  */
 static struct ksymtab *ksymtab_find_aliases(struct ksymtab *ksymtab,
-					    struct ksymtab_elf *elf)
+					    struct elf_data *elf)
 {
 	struct ksymtab *aliases;
 	struct ksymtab *weaks;
@@ -522,7 +514,7 @@ static struct ksymtab *ksymtab_find_aliases(struct ksymtab *ksymtab,
 	 *    suitable weak symbol list;
 	 * 2) for all weak symbols find its alias with the mapping.
 	 */
-	ksymtab_elf_for_each_global_sym(elf, weak_filter, &ctx);
+	elf_for_each_global_sym(elf, weak_filter, &ctx);
 	aliases = ksymtab_weaks_to_aliases(weaks, map);
 
 	hash_free(map);
@@ -531,32 +523,59 @@ static struct ksymtab *ksymtab_find_aliases(struct ksymtab *ksymtab,
 	return aliases;
 }
 
+int elf_get_endianness(struct elf_data *data, unsigned int *endianness)
+{
+	if (data->ehdr->e_ident[EI_DATA] != ELFDATA2LSB &&
+	     data->ehdr->e_ident[EI_DATA] != ELFDATA2MSB) {
+		printf("Unsupported ELF endianness (EI_DATA) found: %d.\n",
+		     data->ehdr->e_ident[EI_DATA]);
+		return 1;
+	}
+
+	*endianness = data->ehdr->e_ident[EI_DATA];
+	return 0;
+}
+
+static inline int elf_get_strtab(struct elf_data *data)
+{
+	const char *strtab;
+	size_t strtab_size;
+
+	if (elf_get_section(data->elf, data->shstrndx, STRTAB, &strtab,
+			   &strtab_size) < 0) {
+		return 1;
+	}
+
+	data->strtab = strtab;
+	data->strtab_size = strtab_size;
+
+	return 0;
+}
+
 /*
  * Build list of exported symbols, ie. read seciton __ksymtab_strings,
  * analyze symbol table and create table of aliases -- list of global symbols,
  * which have the same addresses, as weak symbols,
  * mentioned by __ksymtab_strings
  */
-struct ksymtab *ksymtab_read(char *filename, struct ksymtab **aliases)
+int elf_get_exported(struct elf_data *data, struct ksymtab **ksymtab,
+		     struct ksymtab **aliases)
 {
-	struct ksymtab_elf *elf;
-	const char *data;
-	size_t size;
-	struct ksymtab *res = NULL;
-
-	assert(aliases != NULL);
+	const char *ksymtab_raw;
+	size_t ksymtab_sz;
 
-	elf = ksymtab_elf_open(filename);
-	if (elf == NULL)
-		return NULL;
+	if (elf_get_strtab(data) > 0)
+		return 1;
 
-	if (ksymtab_elf_get_section(elf, KSYMTAB_STRINGS, &data, &size) < 0)
-		goto done;
+	if (elf_get_section(data->elf, data->shstrndx, KSYMTAB_STRINGS,
+			   &ksymtab_raw, &ksymtab_sz) < 0)
+		return 1;
 
-	res = parse_ksymtab_strings(data, size);
-	*aliases = ksymtab_find_aliases(res, elf);
+	*ksymtab = parse_ksymtab_strings(ksymtab_raw, ksymtab_sz);
+	*aliases = ksymtab_find_aliases(*ksymtab, data);
 
-done:
-	ksymtab_elf_close(elf);
-	return res;
+	return 0;
 }
+
+
+
diff --git a/ksymtab.h b/ksymtab.h
index 84b5326..72dc968 100644
--- a/ksymtab.h
+++ b/ksymtab.h
@@ -20,6 +20,15 @@
 
 #include <stdint.h>
 
+struct elf_data {
+	Elf *elf;
+	GElf_Ehdr *ehdr;
+	size_t shstrndx;
+	const char *strtab;
+	size_t strtab_size;
+	int fd;
+};
+
 struct ksymtab;
 struct ksym {
 	uint64_t value;
@@ -57,7 +66,11 @@ static inline void ksymtab_ksym_set_link(struct ksym *ksym, const char *link)
 }
 
 extern void ksymtab_free(struct ksymtab *);
-extern struct ksymtab *ksymtab_read(char *, struct ksymtab **);
+extern struct elf_data *elf_open(const char *);
+extern int elf_get_exported(struct elf_data *, struct ksymtab **,
+			    struct ksymtab **);
+extern void elf_close(struct elf_data *);
+extern int elf_get_endianness(struct elf_data *, unsigned int *);
 extern struct ksym *ksymtab_find(struct ksymtab *, const char *);
 extern size_t ksymtab_len(struct ksymtab *);
 extern struct ksymtab *ksymtab_new(size_t);
diff --git a/main.c b/main.c
index d73e2b8..90f9427 100644
--- a/main.c
+++ b/main.c
@@ -33,6 +33,7 @@
 #include "generate.h"
 #include "compare.h"
 #include "show.h"
+#include "utils.h"
 
 static char *progname;
 
@@ -57,6 +58,8 @@ int main(int argc, char **argv)
 
 	argv++; argc--;
 
+	global_string_keeper_init();
+
 	if (strcmp(argv[0], "generate") == 0)
 		generate(argc, argv);
 	else if (strcmp(argv[0], "compare") == 0)
@@ -66,5 +69,7 @@ int main(int argc, char **argv)
 	else
 		usage();
 
+	global_string_keeper_free();
+
 	return ret;
 }
diff --git a/objects.c b/objects.c
index f76bab1..d81556c 100644
--- a/objects.c
+++ b/objects.c
@@ -38,6 +38,7 @@
 #include "objects.h"
 #include "utils.h"
 #include "main.h"
+#include "record.h"
 
 /* Indentation offset for c-style and tree debug outputs */
 #define C_INDENT_OFFSET   8
@@ -93,7 +94,7 @@ obj_t *obj_new(obj_types type, char *name)
 	obj_t *new = safe_zmalloc(sizeof(obj_t));
 
 	new->type = type;
-	new->name = name;
+	new->name = global_string_get_move(name);
 
 	return new;
 }
@@ -137,11 +138,6 @@ static void _obj_free(obj_t *o, obj_t *skip)
 		o->depend_rec_node = NULL;
 	}
 
-	if (o->name)
-		free(o->name);
-	if (o->base_type)
-		free(o->base_type);
-
 	_obj_list_free(o->member_list, skip);
 
 	if (o->ptr)
@@ -209,7 +205,7 @@ obj_t *obj_basetype_new(char *base_type)
 {
 	obj_t *new = obj_new(__type_base, NULL);
 
-	new->base_type = base_type;
+	new->base_type = global_string_get_move(base_type);
 
 	return new;
 }
@@ -366,11 +362,12 @@ static char *prefix_str_free(char **s, char *p)
 	return _prefix_str(s, p, false, true);
 }
 
-static char *prefix_str_space(char **s, char *p)
+static char *prefix_str_space(char **s, const char *p)
 {
 	if (!p)
 		return *s;
-	return _prefix_str(s, p, true, false);
+	/* freep is false so we can pass const char * */
+	return _prefix_str(s, (char *)p, true, false);
 }
 
 /*
@@ -403,11 +400,13 @@ static char *_postfix_str(char **s, char *p, bool space, bool freep)
 	return *s;
 }
 
-static char *postfix_str(char **s, char *p)
+static char *postfix_str(char **s, const char *p)
 {
 	if (!p)
 		return *s;
-	return _postfix_str(s, p, false, false);
+
+	/* freep is false so we can pass const char * */
+	return _postfix_str(s, (char *)p, false, false);
 }
 
 static char *postfix_str_free(char **s, char *p)
@@ -482,7 +481,8 @@ static pp_t print_func(obj_t *o, int depth, const char *prefix)
 	pp_t ret = {NULL, NULL}, return_type;
 	obj_list_t *list = NULL;
 	obj_t *next = o->ptr;
-	char *s, *name, *margin;
+	char *s, *margin;
+	const char *name;
 
 	return_type = _print_tree(next, depth, false, prefix);
 	ret.prefix = return_type.prefix;
@@ -552,7 +552,7 @@ static pp_t print_varlike(obj_t *o, int depth, const char *prefix)
 		safe_asprintf(&s, "%s:%i",
 			      o->name, o->last_bit - o->first_bit + 1);
 	else
-		s = o->name;
+		s = (char *)o->name;
 
 	ret = _print_tree(o->ptr, depth, false, prefix);
 
@@ -916,8 +916,7 @@ static int hide_kabi_cb(obj_t *o, void *args)
 		if (!strncmp(o->name, "rh_reserved_", 12) &&
 		    strncmp(o->name, "rh_reserved_ptrs", 16)) {
 			char *tmp = strdup(o->name+12);
-			free(o->name);
-			o->name = tmp;
+			o->name = global_string_get_move(tmp);
 		}
 	}
 
@@ -997,13 +996,10 @@ int obj_hide_kabi(obj_t *root, bool show_new_field)
 
 static bool obj_is_declaration(obj_t *obj)
 {
-	size_t len;
-
-	if (obj->base_type == NULL)
+	if (obj->type != __type_reffile || obj->ref_record == NULL)
 		return false;
 
-	len = strlen(DECLARATION_PATH);
-	return strncmp(obj->base_type, DECLARATION_PATH, len) == 0;
+	return record_is_declaration(obj->ref_record);
 }
 
 static bool obj_is_kabi_hide(obj_t *obj)
@@ -1014,16 +1010,28 @@ static bool obj_is_kabi_hide(obj_t *obj)
 	return strncmp(obj->name, RH_KABI_HIDE, RH_KABI_HIDE_LEN) == 0;
 }
 
-static bool obj_eq(obj_t *o1, obj_t *o2)
+bool obj_eq(obj_t *o1, obj_t *o2, bool ignore_versions)
 {
+	if (o1->type != o2->type)
+		return false;
+
+	if (o1->type == __type_reffile) {
+		if (ignore_versions) {
+			return record_get_key(o1->ref_record) ==
+				record_get_key(o2->ref_record);
+		}
+
+		return o1->ref_record == o2->ref_record;
+	}
+
 	/* borrow parts from cmp_nodes */
-	if ((o1->type != o2->type) ||
-	    !safe_streq(o1->name, o2->name) ||
+	if ((o1->name != o2->name) ||
 	    ((o1->ptr == NULL) != (o2->ptr == NULL)) ||
 	    (has_constant(o1) && (o1->constant != o2->constant)) ||
 	    (has_index(o1) && (o1->index != o2->index)) ||
 	    (is_bitfield(o1) != is_bitfield(o2)) ||
-	    (o1->alignment != o2->alignment))
+	    (o1->alignment != o2->alignment) ||
+	    (o1->byte_size != o2->byte_size))
 		return false;
 
 	/* just compare bitfields */
@@ -1036,7 +1044,7 @@ static bool obj_eq(obj_t *o1, obj_t *o2)
 	    (o2->member_list == NULL))
 		return false;
 
-	if (!safe_streq(o1->base_type, o2->base_type))
+	if (o1->base_type != o2->base_type)
 		return false;
 
 	return true;
@@ -1049,10 +1057,6 @@ static obj_t *obj_copy(obj_t *o1)
 	o = safe_zmalloc(sizeof(*o));
 	*o = *o1;
 
-	o->type = o1->type;
-	o->name = safe_strdup_or_null(o1->name);
-	o->base_type = safe_strdup_or_null(o1->base_type);
-
 	o->ptr = NULL;
 	o->member_list = NULL;
 
@@ -1062,10 +1066,11 @@ static obj_t *obj_copy(obj_t *o1)
 	return o;
 }
 
-obj_t *obj_merge(obj_t *o1, obj_t *o2, bool merge_decl);
+obj_t *obj_merge(obj_t *o1, obj_t *o2, unsigned int flags);
 
 static obj_list_head_t *obj_members_merge(obj_list_head_t *list1,
-		obj_list_head_t *list2, bool merge_decl)
+					  obj_list_head_t *list2,
+					  unsigned int flags)
 {
 	obj_list_head_t *res = NULL;
 	obj_list_t *l1;
@@ -1079,7 +1084,7 @@ static obj_list_head_t *obj_members_merge(obj_list_head_t *list1,
 	l2 = list2->first;
 
 	while (l1 && l2) {
-		o = obj_merge(l1->member, l2->member, merge_decl);
+		o = obj_merge(l1->member, l2->member, flags);
 		if (o == NULL)
 			goto cleanup;
 
@@ -1102,7 +1107,39 @@ cleanup:
 	return NULL;
 }
 
-obj_t *obj_merge(obj_t *o1, obj_t *o2, bool merge_decl)
+static inline bool obj_can_merge_two_lines(obj_t *o1, obj_t *o2,
+					   unsigned int flags)
+{
+	/*
+	 * We cannot merge two lines if:
+	 *  - their states of being declarations are not equivalent,
+	 *    and we require them to be
+	 */
+	if (flags & MERGE_FLAG_DECL_EQ &&
+	    (obj_is_declaration(o1) != obj_is_declaration(o2)))
+		return false;
+
+	/*
+	 * We can merge the two lines if:
+	 *  - they are the same, or
+	 *  - they are both RH_KABI_HIDE, or
+	 *  - at least one of them is a declaration,
+	 *    and we can merge declarations
+	 */
+	if (obj_eq(o1, o2, flags & MERGE_FLAG_VER_IGNORE))
+		return true;
+
+	if (obj_is_kabi_hide(o1) && obj_is_kabi_hide(o2))
+		return true;
+
+	if (flags & MERGE_FLAG_DECL_MERGE &&
+	    (obj_is_declaration(o1) || obj_is_declaration(o2)))
+		return true;
+
+	return false;
+}
+
+obj_t *obj_merge(obj_t *o1, obj_t *o2, unsigned int flags)
 {
 	obj_t *merged_ptr;
 	obj_list_head_t *merged_members;
@@ -1111,25 +1148,16 @@ obj_t *obj_merge(obj_t *o1, obj_t *o2, bool merge_decl)
 	if (o1 == NULL || o2 == NULL)
 		return NULL;
 
-	/*
-	 * We can merge the two lines if:
-	 *  - they are the same, or
-	 *  - they are both RH_KABI_HIDE, or
-	 *  - at least one of them is a declaration
-	 */
-	if ((!obj_eq(o1, o2)) &&
-	    (!obj_is_kabi_hide(o1) || !obj_is_kabi_hide(o2)) &&
-	    !(obj_is_declaration(o1) && merge_decl) &&
-	    !(obj_is_declaration(o2) && merge_decl))
+	if (!obj_can_merge_two_lines(o1, o2, flags))
 		goto no_merge;
 
-	merged_ptr = obj_merge(o1->ptr, o2->ptr, merge_decl);
+	merged_ptr = obj_merge(o1->ptr, o2->ptr, flags);
 	if (o1->ptr && !merged_ptr)
 		goto no_merge_ptr;
 
 	merged_members = obj_members_merge(o1->member_list,
 					   o2->member_list,
-					   merge_decl);
+					   flags);
 	if (o1->member_list && !merged_members)
 		goto no_merge_members;
 
@@ -1156,7 +1184,12 @@ no_merge:
 
 static void dump_reffile(obj_t *o, FILE *f)
 {
-	fprintf(f, "@\"%s\"\n", o->base_type);
+	int version = record_get_version(o->ref_record);
+
+	fprintf(f, "@\"%s", record_get_key(o->ref_record));
+	if (version > 0)
+		fprintf(f, "-%i", version);
+	fprintf(f, ".txt\"\n");
 }
 
 static void _dump_members(obj_t *o, FILE *f, void (*dumper)(obj_t *, FILE *))
@@ -1265,7 +1298,7 @@ static void dump_qualifier(obj_t *o, FILE *f)
 
 static void dump_base(obj_t *o, FILE *f)
 {
-	char *type = o->base_type;
+	const char *type = o->base_type;
 
 	/* variable args (...) is a special base case */
 	if (type[0] == '.')
@@ -1316,3 +1349,60 @@ void obj_dump(obj_t *o, FILE *f)
 
 	dumpers[o->type].dumper(o, f);
 }
+
+bool obj_same_declarations(obj_t *o1, obj_t *o2,
+			   struct set *processed)
+{
+	const int ignore_versions = true;
+	obj_list_t *list1;
+	obj_list_t *list2;
+
+	if (o1 == o2)
+		return true;
+
+	if (!obj_eq(o1, o2, ignore_versions))
+		return false;
+
+	if (o1->type != o2->type ||
+	    (o1->ptr == NULL) != (o2->ptr == NULL) ||
+	    (o1->member_list == NULL) != (o2->member_list == NULL)) {
+		return false;
+	}
+
+
+	if (o1->type == __type_reffile &&
+	    !record_same_declarations(o1->ref_record, o2->ref_record,
+				      processed)) {
+		return false;
+	}
+
+	if (o1->ptr &&
+	    !obj_same_declarations(o1->ptr, o2->ptr, processed)) {
+		return false;
+	}
+
+	if (o1->member_list) {
+		list1 = o1->member_list->first;
+		list2 = o2->member_list->first;
+
+		while (list1) {
+			if (list2 == NULL)
+				return false;
+
+			if (!obj_same_declarations(list1->member,
+						   list2->member,
+						   processed))
+				return false;
+
+			list1 = list1->next;
+			list2 = list2->next;
+		}
+
+		if (list1 != list2) {
+			/* different member_list lengths */
+			return false;
+		}
+	}
+
+	return true;
+}
diff --git a/objects.h b/objects.h
index 76336b4..c205c98 100644
--- a/objects.h
+++ b/objects.h
@@ -26,6 +26,7 @@
 #include <stdio.h>
 
 #include "list.h"
+#include "utils.h"
 
 #ifdef DEBUG
 #define debug(args...) do { printf(args); } while (0)
@@ -33,8 +34,14 @@
 #define debug(args...)
 #endif
 
-#define MERGE_DECL true
-#define NO_MERGE_DECL false
+struct set;
+
+enum merge_flag {
+	MERGE_DEFAULT = 0,
+	MERGE_FLAG_DECL_MERGE = 1 << 0,
+	MERGE_FLAG_VER_IGNORE = 1 << 1,
+	MERGE_FLAG_DECL_EQ = 1 << 2,
+};
 
 typedef enum {
 	__type_reffile,
@@ -71,7 +78,11 @@ typedef struct obj_list_head {
  *
  * type:	type of the symbol (such as struct, function, pointer, base
  *		type...)
+ * is_bitfield:	(var) It's a bitfield
+ * first_bit, last_bit:	(var) bit range within the offset.
  * name:	name of the symbol
+ * ref_record:	(reffile) pointer to the referenced record (only while
+ *              generating records, otherwise base_type with string is used)
  * base_type:	(base type) the type of the symbol,
  *		(qualifier) the type qualifier (const or volatile)
  *		(reffile) path to the file
@@ -86,8 +97,6 @@ typedef struct obj_list_head {
  * index:	(array) index of array
  * link:	(weak) weak alias link
  * offset:	(var) offset of a struct member
- * is_bitfield: (var) It's a bitfield
- * first_bit, last_bit: (var) bit range within the offset.
  * depend_rec_node:	(reffile) node from dependents field of record where
  *			this obj references.
  *
@@ -96,19 +105,21 @@ typedef struct obj_list_head {
  */
 typedef struct obj {
 	obj_types type;
-	char *name;
-	char *base_type;
+	unsigned char is_bitfield, first_bit, last_bit;
+	union {
+		const char *name;
+		struct record *ref_record;
+	};
+	const char *base_type;
 	unsigned alignment;
+	unsigned int byte_size;
 	obj_list_head_t *member_list;
 	struct obj *ptr, *parent;
 	union {
 		unsigned long constant;
 		unsigned long index;
 		char *link;
-		struct {
-			unsigned long offset;
-			unsigned char is_bitfield, first_bit, last_bit;
-		};
+		unsigned long offset;
 		struct list_node *depend_rec_node;
 	};
 } obj_t;
@@ -230,7 +241,11 @@ int obj_walk_tree3(obj_t *o, cb_t cb_pre, cb_t cb_in, cb_t cb_post,
 int obj_hide_kabi(obj_t *root, bool show_new_field);
 
 obj_t *obj_parse(FILE *file, char *fn);
-obj_t *obj_merge(obj_t *o1, obj_t *o2, bool merge_decl);
+obj_t *obj_merge(obj_t *o1, obj_t *o2, unsigned int flags);
 void obj_dump(obj_t *o, FILE *f);
 
+bool obj_eq(obj_t *o1, obj_t *o2, bool ignore_versions);
+
+bool obj_same_declarations(obj_t *o1, obj_t *o2, struct set *processed);
+
 #endif
diff --git a/parser.h b/parser.h
index 00fe07d..cee2056 100644
--- a/parser.h
+++ b/parser.h
@@ -30,4 +30,4 @@ int fileno(FILE *stream);
 
 int yylex();
 int yyerror(obj_t **root, char *s);
-FILE *yyin;
+extern FILE *yyin;
diff --git a/parser.y b/parser.y
index 8075d68..2446057 100644
--- a/parser.y
+++ b/parser.y
@@ -67,7 +67,7 @@
 %type <obj> kabi_dw_file symbol
 %type <obj> asm_symbol weak_symbol
 %type <list> elt_list arg_list enum_list struct_list
-%type <ul> alignment
+%type <ul> alignment byte_size
 
 %parse-param {obj_t **root}
 
@@ -142,6 +142,17 @@ symbol:
 		$$ = $declaration;
 		$$->alignment = $alignment;
 	}
+	| byte_size declaration NEWLINE
+	{
+		$$ = $declaration;
+		$$->byte_size = $byte_size;
+	}
+	| byte_size alignment declaration NEWLINE
+	{
+		$$ = $declaration;
+		$$->byte_size = $byte_size;
+		$$->alignment = $alignment;
+	}
 
 alignment:
         IDENTIFIER CONSTANT NEWLINE
@@ -150,6 +161,14 @@ alignment:
 		$$ = $CONSTANT;
 	}
 
+byte_size:
+        IDENTIFIER IDENTIFIER CONSTANT NEWLINE
+	{
+		check_and_free_keyword($1, "Byte");
+		check_and_free_keyword($2, "size");
+		$$ = $CONSTANT;
+	}
+
 /* Possible types are struct union enum func typedef and var */
 declaration:
 	struct_type
diff --git a/record.h b/record.h
new file mode 100644
index 0000000..7e05d17
--- /dev/null
+++ b/record.h
@@ -0,0 +1,100 @@
+#ifndef RECORD_H_
+#define RECORD_H_
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "list.h"
+#include "stack.h"
+#include "objects.h"
+
+#define RECORD_VERSION_DECLARATION -1
+
+/*
+ * Structure of the database record:
+ *
+ * key: record key, usually includes path the file, where the type is
+ *      defined (may include pseudo path, like <declaration>);
+ *      Does not contain version and the .txt suffix.
+ *
+ * version: type's version, used when we need to add another type of the same
+ *	    name. It may happend, for example, when because of defines the same
+ *          structure has changed for different compilation units.
+ *
+ *          It is not for the case, when the same structure defined in
+ *	    different files -- it will have different keys, since it includes
+ *	    the path;
+ *
+ * ref_count: reference counter, needed since the ownership is shared with the
+ *            internal database;
+ *
+ * cu: compilation unit, where the type for the record defined;
+ *
+ * origin: "File <file>:<line>" string, describing the source, where the type
+ *         for the record defined;
+ *
+ * stack: stack of types to reach this one.
+ *         Ex.: on the toplevel
+ *              struct A {
+ *                        struct B fieldA;
+ *              }
+ *         in another file:
+ *              struct B {
+ *                        basetype fieldB;
+ *              }
+ *         the "struct B" description will contain key of the "struct A"
+ *         description record in the stack;
+ *
+ * obj: pointer to the abstract type object, representing the toplevel type of
+ *      the record.
+ *
+ * link: name of weak link alisas for the weak aliases.
+ *
+ * free: type specific function to free the record
+ *       (there are normal, weak and assembly records).
+ *
+ * dump: type specific function for record output.
+ *
+ * dependents: objects that reference this record.
+ *
+ * list_node: node containing the record, record only belong to one list
+ *            at a time(usually record_list.records)
+ *
+ * failed: number of times the record could not be used for merging
+ */
+struct record {
+	const char *key;
+	int version;
+	int ref_count;
+	char *cu;
+	const char *origin;
+	stack_t *stack;
+	obj_t *obj;
+	char *link;
+	void (*free)(struct record *);
+	void (*dump)(struct record *, FILE *);
+
+	struct list dependents;
+	struct list_node *list_node;
+	unsigned int failed;
+};
+
+static inline const char *record_get_key(struct record *record)
+{
+	return record->key;
+}
+
+static inline int record_get_version(struct record *record)
+{
+	return record->version;
+}
+
+static inline bool record_is_declaration(struct record *record)
+{
+	return record->version == RECORD_VERSION_DECLARATION;
+}
+
+bool record_same_declarations(struct record *r1, struct record *r2,
+			      struct set *processed);
+
+#endif /* RECORD_H_ */
diff --git a/utils.c b/utils.c
index 2cd87b3..75eef74 100644
--- a/utils.c
+++ b/utils.c
@@ -33,6 +33,7 @@
 
 #include "main.h"
 #include "utils.h"
+#include "hash.h"
 
 /*
  * Sort function for scandir.
@@ -353,10 +354,11 @@ char *path_normalize(char *path)
 /* Removes the two dashes at the end of the prefix */
 #define IS_PREFIX(s, prefix) !strncmp(s, prefix, strlen(prefix) - 2)
 
-static void split_filename(char *filename, char **prefix,
+static void split_filename(const char *filename, char **prefix,
 			   char **name, int *version)
 {
-	char *base = basename(filename);
+	/* GNU version of basename never modifies its argument */
+	char *base = basename((char *)filename);
 
 	version = 0;
 
@@ -371,7 +373,7 @@ static void split_filename(char *filename, char **prefix,
  *
  * It allocates the string which must be freed by the caller.
  */
-char *filenametotype(char *filename)
+char *filenametotype(const char *filename)
 {
 	char *prefix = NULL, *name = NULL, *type = NULL;
 	int version = 0;
@@ -399,7 +401,7 @@ char *filenametotype(char *filename)
  *
  * It allocates the string which must be freed by the caller.
  */
-char *filenametosymbol(char *filename)
+char *filenametosymbol(const char *filename)
 {
 	char *prefix = NULL, *name = NULL;
 	int version = 0;
@@ -409,3 +411,49 @@ char *filenametosymbol(char *filename)
 
 	return name;
 }
+
+struct hash *global_string_keeper;
+
+void global_string_keeper_init(void)
+{
+	global_string_keeper = hash_new(1 << 20, free);
+}
+
+void global_string_keeper_free(void)
+{
+	hash_free(global_string_keeper);
+}
+
+const char *global_string_get_copy(const char *string)
+{
+	const char *result;
+
+	if (string == NULL)
+		return NULL;
+
+	result = hash_find(global_string_keeper, string);
+	if (result == NULL) {
+		result = safe_strdup(string);
+		hash_add(global_string_keeper, result, result);
+	}
+
+	return result;
+}
+
+const char *global_string_get_move(char *string)
+{
+	const char *result;
+
+	if (string == NULL)
+		return NULL;
+
+	result = hash_find(global_string_keeper, string);
+	if (result == NULL) {
+		result = string;
+		hash_add(global_string_keeper, result, result);
+	} else {
+		free(string);
+	}
+
+	return result;
+}
diff --git a/utils.h b/utils.h
index 3d3babb..0f9bb7a 100644
--- a/utils.h
+++ b/utils.h
@@ -152,7 +152,12 @@ extern int check_is_directory(char *);
 extern void rec_mkdir(char *);
 extern void safe_rename(const char *, const char *);
 extern char *path_normalize(char *);
-extern char *filenametotype(char *);
-extern char *filenametosymbol(char *);
+extern char *filenametotype(const char *);
+extern char *filenametosymbol(const char *);
+
+extern void global_string_keeper_init(void);
+extern void global_string_keeper_free(void);
+extern const char *global_string_get_copy(const char *string);
+extern const char *global_string_get_move(char *string);
 
 #endif /* UTILS_H */