Blame rpmio/rpmstrpool.c

2ff057
#include "system.h"
2ff057
#include <stdio.h>
2ff057
#include <stdlib.h>
2ff057
#include <rpm/rpmstring.h>
2ff057
#include <rpm/rpmstrpool.h>
2ff057
#include "debug.h"
2ff057
2ff057
#define STRDATA_CHUNKS 1024
2ff057
#define STRDATA_CHUNK 65536
2ff057
#define STROFFS_CHUNK 2048
2ff057
/* XXX this is ridiculously small... */
2ff057
#define STRHASH_INITSIZE 1024
2ff057
2ff057
static int pool_debug = 0;
2ff057
2ff057
typedef struct poolHash_s * poolHash;
2ff057
typedef struct poolHashBucket_s poolHashBucket;
2ff057
2ff057
struct poolHashBucket_s {
2ff057
    rpmsid keyid;
2ff057
};
2ff057
2ff057
struct poolHash_s {
2ff057
    int numBuckets;
2ff057
    poolHashBucket * buckets;
2ff057
    int keyCount;
2ff057
};
2ff057
2ff057
struct rpmstrPool_s {
2ff057
    const char ** offs;		/* pointers into data area */
2ff057
    rpmsid offs_size;		/* largest offset index */;
2ff057
    rpmsid offs_alloced;	/* offsets allocation size */
2ff057
2ff057
    char ** chunks;		/* memory chunks for storing the strings */
2ff057
    size_t chunks_size;		/* current chunk */
2ff057
    size_t chunks_allocated;	/* allocated size of the chunks array */
2ff057
    size_t chunk_allocated;	/* size of the current chunk */
2ff057
    size_t chunk_used;		/* usage of the current chunk */
2ff057
2ff057
    poolHash hash;		/* string -> sid hash table */
2ff057
    int frozen;			/* are new id additions allowed? */
2ff057
    int nrefs;			/* refcount */
2ff057
};
2ff057
2ff057
/* calculate hash and string length on at once */
2ff057
static inline unsigned int rstrlenhash(const char * str, size_t * len)
2ff057
{
2ff057
    /* Jenkins One-at-a-time hash */
2ff057
    unsigned int hash = 0xe4721b68;
2ff057
    const char * s = str;
2ff057
2ff057
    while (*s != '\0') {
2ff057
      hash += *s;
2ff057
      hash += (hash << 10);
2ff057
      hash ^= (hash >> 6);
2ff057
      s++;
2ff057
    }
2ff057
    hash += (hash << 3);
2ff057
    hash ^= (hash >> 11);
2ff057
    hash += (hash << 15);
2ff057
2ff057
    if (len)
2ff057
	*len = (s - str);
2ff057
2ff057
    return hash;
2ff057
}
2ff057
2ff057
static inline unsigned int rstrnlenhash(const char * str, size_t n, size_t * len)
2ff057
{
2ff057
    /* Jenkins One-at-a-time hash */
2ff057
    unsigned int hash = 0xe4721b68;
2ff057
    const char * s = str;
2ff057
2ff057
    while (*s != '\0' && n-- > 0) {
2ff057
      hash += *s;
2ff057
      hash += (hash << 10);
2ff057
      hash ^= (hash >> 6);
2ff057
      s++;
2ff057
    }
2ff057
    hash += (hash << 3);
2ff057
    hash ^= (hash >> 11);
2ff057
    hash += (hash << 15);
2ff057
2ff057
    if (len)
2ff057
	*len = (s - str);
2ff057
2ff057
    return hash;
2ff057
}
2ff057
2ff057
static inline unsigned int rstrnhash(const char * string, size_t n)
2ff057
{
2ff057
    return rstrnlenhash(string, n, NULL);
2ff057
}
2ff057
2ff057
unsigned int rstrhash(const char * string)
2ff057
{
2ff057
    return rstrlenhash(string, NULL);
2ff057
}
2ff057
2ff057
static inline unsigned int hashbucket(unsigned int hash, unsigned int number)
2ff057
{
2ff057
    return hash + number*number;
2ff057
}
2ff057
2ff057
static poolHash poolHashCreate(int numBuckets)
2ff057
{
2ff057
    poolHash ht;
2ff057
2ff057
    ht = xmalloc(sizeof(*ht));
2ff057
    ht->numBuckets = numBuckets;
2ff057
    ht->buckets = xcalloc(numBuckets, sizeof(*ht->buckets));
2ff057
    ht->keyCount = 0;
2ff057
    return ht;
2ff057
}
2ff057
2ff057
static void poolHashResize(rpmstrPool pool, int numBuckets)
2ff057
{
2ff057
    poolHash ht = pool->hash;
2ff057
    poolHashBucket * buckets = xcalloc(numBuckets, sizeof(*ht->buckets));
2ff057
2ff057
    for (int i=0; i<ht->numBuckets; i++) {
2ff057
        if (!ht->buckets[i].keyid) continue;
2ff057
        unsigned int keyHash = rstrhash(rpmstrPoolStr(pool, ht->buckets[i].keyid));
2ff057
        for (unsigned int j=0;;j++) {
2ff057
            unsigned int hash = hashbucket(keyHash, j) % numBuckets;
2ff057
            if (!buckets[hash].keyid) {
2ff057
                buckets[hash].keyid = ht->buckets[i].keyid;
2ff057
                break;
2ff057
            }
2ff057
        }
2ff057
    }
2ff057
    free((void *)(ht->buckets));
2ff057
    ht->buckets = buckets;
2ff057
    ht->numBuckets = numBuckets;
2ff057
}
2ff057
2ff057
static void poolHashAddHEntry(rpmstrPool pool, const char * key, unsigned int keyHash, rpmsid keyid)
2ff057
{
2ff057
    poolHash ht = pool->hash;
2ff057
2ff057
    /* keep load factor between 0.25 and 0.5 */
2ff057
    if (2*(ht->keyCount) > ht->numBuckets) {
2ff057
        poolHashResize(pool, ht->numBuckets * 2);
2ff057
    }
2ff057
2ff057
    for (unsigned int i=0;;i++) {
2ff057
        unsigned int hash = hashbucket(keyHash, i) % ht->numBuckets;
2ff057
        if (!ht->buckets[hash].keyid) {
2ff057
            ht->buckets[hash].keyid = keyid;
2ff057
            ht->keyCount++;
2ff057
            break;
2ff057
        } else if (!strcmp(rpmstrPoolStr(pool, ht->buckets[hash].keyid), key)) {
2ff057
            return;
2ff057
        }
2ff057
    }
2ff057
}
2ff057
2ff057
static void poolHashAddEntry(rpmstrPool pool, const char * key, rpmsid keyid)
2ff057
{
2ff057
    poolHashAddHEntry(pool, key, rstrhash(key), keyid);
2ff057
}
2ff057
2ff057
static void poolHashEmpty( poolHash ht)
2ff057
{
2ff057
    unsigned int i;
2ff057
2ff057
    if (ht->keyCount == 0) return;
2ff057
2ff057
    for (i = 0; i < ht->numBuckets; i++) {
2ff057
	ht->buckets[i].keyid = 0;
2ff057
    }
2ff057
    ht->keyCount = 0;
2ff057
}
2ff057
2ff057
static poolHash poolHashFree(poolHash ht)
2ff057
{
2ff057
    if (ht==NULL)
2ff057
        return ht;
2ff057
    poolHashEmpty(ht);
2ff057
    ht->buckets = _free(ht->buckets);
2ff057
    ht = _free(ht);
2ff057
2ff057
    return NULL;
2ff057
}
2ff057
2ff057
static void poolHashPrintStats(rpmstrPool pool)
2ff057
{
2ff057
    poolHash ht = pool->hash;
2ff057
    int i;
2ff057
    int collisions = 0;
2ff057
    int maxcollisions = 0;
2ff057
2ff057
    for (i=0; i<ht->numBuckets; i++) {
2ff057
        unsigned int keyHash = rstrhash(rpmstrPoolStr(pool, ht->buckets[i].keyid));
2ff057
        for (unsigned int j=0;;j++) {
2ff057
            unsigned int hash = hashbucket(keyHash, i) % ht->numBuckets;
2ff057
            if (hash==i) {
2ff057
                collisions += j;
2ff057
                maxcollisions = maxcollisions > j ? maxcollisions : j;
2ff057
                break;
2ff057
            }
2ff057
        }
2ff057
    }
2ff057
    fprintf(stderr, "Hashsize: %i\n", ht->numBuckets);
2ff057
    fprintf(stderr, "Keys: %i\n", ht->keyCount);
2ff057
    fprintf(stderr, "Collisions: %i\n", collisions);
2ff057
    fprintf(stderr, "Max collisions: %i\n", maxcollisions);
2ff057
}
2ff057
2ff057
static void rpmstrPoolRehash(rpmstrPool pool)
2ff057
{
2ff057
    int sizehint;
2ff057
2ff057
    if (pool->offs_size < STRHASH_INITSIZE)
2ff057
	sizehint = STRHASH_INITSIZE;
2ff057
    else
2ff057
	sizehint = pool->offs_size * 2;
2ff057
2ff057
    if (pool->hash)
2ff057
	pool->hash = poolHashFree(pool->hash);
2ff057
2ff057
    pool->hash = poolHashCreate(sizehint);
2ff057
    for (int i = 1; i <= pool->offs_size; i++)
2ff057
	poolHashAddEntry(pool, rpmstrPoolStr(pool, i), i);
2ff057
}
2ff057
2ff057
rpmstrPool rpmstrPoolCreate(void)
2ff057
{
2ff057
    rpmstrPool pool = xcalloc(1, sizeof(*pool));
2ff057
2ff057
    pool->offs_alloced = STROFFS_CHUNK;
2ff057
    pool->offs = xcalloc(pool->offs_alloced, sizeof(*pool->offs));
2ff057
2ff057
    pool->chunks_allocated = STRDATA_CHUNKS;
2ff057
    pool->chunks = xcalloc(pool->chunks_allocated, sizeof(*pool->chunks));
2ff057
    pool->chunks_size = 1;
2ff057
    pool->chunk_allocated = STRDATA_CHUNK;
2ff057
    pool->chunks[pool->chunks_size] = xcalloc(1, pool->chunk_allocated);
2ff057
    pool->offs[1] = pool->chunks[pool->chunks_size];
2ff057
2ff057
    rpmstrPoolRehash(pool);
2ff057
    pool->nrefs = 1;
2ff057
    return pool;
2ff057
}
2ff057
2ff057
rpmstrPool rpmstrPoolFree(rpmstrPool pool)
2ff057
{
2ff057
    if (pool) {
2ff057
	if (pool->nrefs > 1) {
2ff057
	    pool->nrefs--;
2ff057
	} else {
2ff057
	    if (pool_debug)
2ff057
		poolHashPrintStats(pool);
2ff057
	    poolHashFree(pool->hash);
2ff057
	    free(pool->offs);
2ff057
	    for (int i=1;i<=pool->chunks_size;i++) {
2ff057
		pool->chunks[i] = _free(pool->chunks[i]);
2ff057
	    }
2ff057
	    free(pool->chunks);
2ff057
	    free(pool);
2ff057
	}
2ff057
    }
2ff057
    return NULL;
2ff057
}
2ff057
2ff057
rpmstrPool rpmstrPoolLink(rpmstrPool pool)
2ff057
{
2ff057
    if (pool)
2ff057
	pool->nrefs++;
2ff057
    return pool;
2ff057
}
2ff057
2ff057
void rpmstrPoolFreeze(rpmstrPool pool, int keephash)
2ff057
{
2ff057
    if (pool && !pool->frozen) {
2ff057
	if (!keephash) {
2ff057
	    pool->hash = poolHashFree(pool->hash);
2ff057
	}
2ff057
	pool->offs_alloced = pool->offs_size + 2; /* space for end marker */
2ff057
	pool->offs = xrealloc(pool->offs,
2ff057
			      pool->offs_alloced * sizeof(*pool->offs));
2ff057
	pool->frozen = 1;
2ff057
    }
2ff057
}
2ff057
2ff057
void rpmstrPoolUnfreeze(rpmstrPool pool)
2ff057
{
2ff057
    if (pool) {
2ff057
	if (pool->hash == NULL) {
2ff057
	    rpmstrPoolRehash(pool);
2ff057
	}
2ff057
	pool->frozen = 0;
2ff057
    }
2ff057
}
2ff057
2ff057
static rpmsid rpmstrPoolPut(rpmstrPool pool, const char *s, size_t slen, unsigned int hash)
2ff057
{
2ff057
    char *t = NULL;
2ff057
    size_t ssize = slen + 1;
2ff057
2ff057
    pool->offs_size += 1;
2ff057
    if (pool->offs_alloced <= pool->offs_size) {
2ff057
	pool->offs_alloced += STROFFS_CHUNK;
2ff057
	pool->offs = xrealloc(pool->offs,
2ff057
			      pool->offs_alloced * sizeof(*pool->offs));
2ff057
    }
2ff057
2ff057
    /* Do we need a new chunk to store the string? */
2ff057
    if (ssize > pool->chunk_allocated - pool->chunk_used) {
2ff057
	pool->chunks_size += 1;
2ff057
	/* Grow chunks array if needed */
2ff057
	if (pool->chunks_size >= pool->chunks_allocated) {
2ff057
	    pool->chunks_allocated += pool->chunks_allocated;
2ff057
	    pool->chunks = xrealloc(pool->chunks,
2ff057
				pool->chunks_allocated * sizeof(*pool->chunks));
2ff057
	}
2ff057
2ff057
	/* Ensure the string fits in the new chunk we're about to allocate */
2ff057
	if (ssize > pool->chunk_allocated) {
2ff057
	    pool->chunk_allocated = 2 * ssize;
2ff057
	}
2ff057
2ff057
	pool->chunks[pool->chunks_size] = xcalloc(1, pool->chunk_allocated);
2ff057
	pool->chunk_used = 0;
2ff057
    }
2ff057
2ff057
    /* Copy the string into current chunk, ensure termination */
2ff057
    t = memcpy(pool->chunks[pool->chunks_size] + pool->chunk_used, s, slen);
2ff057
    t[slen] = '\0';
2ff057
    pool->chunk_used += ssize;
2ff057
2ff057
    /* Actually add the string to the pool */
2ff057
    pool->offs[pool->offs_size] = t;
2ff057
    poolHashAddHEntry(pool, t, hash, pool->offs_size);
2ff057
2ff057
    return pool->offs_size;
2ff057
}
2ff057
2ff057
static rpmsid rpmstrPoolGet(rpmstrPool pool, const char * key, size_t keylen,
2ff057
			    unsigned int keyHash)
2ff057
{
2ff057
    poolHash ht = pool->hash;
2ff057
    const char * s;
2ff057
2ff057
2ff057
    for (unsigned int i=0;; i++) {
2ff057
        unsigned int hash = hashbucket(keyHash, i) % ht->numBuckets;
2ff057
2ff057
        if (!ht->buckets[hash].keyid) {
2ff057
            return 0;
2ff057
        }
2ff057
2ff057
	s = rpmstrPoolStr(pool, ht->buckets[hash].keyid);
2ff057
	/* pool string could be longer than keylen, require exact matche */
2ff057
	if (strncmp(s, key, keylen) == 0 && s[keylen] == '\0')
2ff057
	    return ht->buckets[hash].keyid;
2ff057
    }
2ff057
}
2ff057
2ff057
static inline rpmsid strn2id(rpmstrPool pool, const char *s, size_t slen,
2ff057
			     unsigned int hash, int create)
2ff057
{
2ff057
    rpmsid sid = 0;
2ff057
2ff057
    if (pool && pool->hash) {
2ff057
	sid = rpmstrPoolGet(pool, s, slen, hash);
2ff057
	if (sid == 0 && create && !pool->frozen)
2ff057
	    sid = rpmstrPoolPut(pool, s, slen, hash);
2ff057
    }
2ff057
    return sid;
2ff057
}
2ff057
2ff057
rpmsid rpmstrPoolIdn(rpmstrPool pool, const char *s, size_t slen, int create)
2ff057
{
2ff057
    rpmsid sid = 0;
2ff057
2ff057
    if (s != NULL) {
2ff057
	unsigned int hash = rstrnhash(s, slen);
2ff057
	sid = strn2id(pool, s, slen, hash, create);
2ff057
    }
2ff057
    return sid;
2ff057
}
2ff057
2ff057
rpmsid rpmstrPoolId(rpmstrPool pool, const char *s, int create)
2ff057
{
2ff057
    rpmsid sid = 0;
2ff057
2ff057
    if (s != NULL) {
2ff057
	size_t slen;
2ff057
	unsigned int hash = rstrlenhash(s, &slen);
2ff057
	sid = strn2id(pool, s, slen, hash, create);
2ff057
    }
2ff057
    return sid;
2ff057
}
2ff057
2ff057
const char * rpmstrPoolStr(rpmstrPool pool, rpmsid sid)
2ff057
{
2ff057
    const char *s = NULL;
2ff057
    if (pool && sid > 0 && sid <= pool->offs_size)
2ff057
	s = pool->offs[sid];
2ff057
    return s;
2ff057
}
2ff057
2ff057
size_t rpmstrPoolStrlen(rpmstrPool pool, rpmsid sid)
2ff057
{
2ff057
    size_t slen = 0;
2ff057
    if (pool && sid > 0 && sid <= pool->offs_size) {
2ff057
	slen = strlen(pool->offs[sid]);
2ff057
    }
2ff057
    return slen;
2ff057
}
2ff057
2ff057
int rpmstrPoolStreq(rpmstrPool poolA, rpmsid sidA,
2ff057
		    rpmstrPool poolB, rpmsid sidB)
2ff057
{
2ff057
    if (poolA == poolB)
2ff057
	return (sidA == sidB);
2ff057
    else
2ff057
	return rstreq(rpmstrPoolStr(poolA, sidA), rpmstrPoolStr(poolB, sidB));
2ff057
}
2ff057
2ff057
rpmsid rpmstrPoolNumStr(rpmstrPool pool)
2ff057
{
2ff057
    return (pool != NULL) ? pool->offs_size : 0;
2ff057
}