|
Packit Service |
9e77c8 |
#include <sys/types.h> /* for 'open' */
|
|
Packit Service |
9e77c8 |
#include <sys/stat.h> /* for 'open' */
|
|
Packit Service |
9e77c8 |
#include <fcntl.h> /* for 'open' */
|
|
Packit Service |
9e77c8 |
#include <stdlib.h> /* for 'malloc' */
|
|
Packit Service |
9e77c8 |
#include <stdio.h> /* for 'printf' */
|
|
Packit Service |
9e77c8 |
#include <unistd.h> /* for 'read' */
|
|
Packit Service |
9e77c8 |
#include <errno.h> /* for 'sterror' */
|
|
Packit Service |
9e77c8 |
#include <sys/time.h> /* for 'gettimeofday' */
|
|
Packit Service |
9e77c8 |
#include "uthash.h"
|
|
Packit Service |
9e77c8 |
|
|
Packit Service |
9e77c8 |
#undef uthash_noexpand_fyi
|
|
Packit Service |
9e77c8 |
#define uthash_noexpand_fyi(t) die()
|
|
Packit Service |
9e77c8 |
#define UNALIGNED_KEYS 0
|
|
Packit Service |
9e77c8 |
|
|
Packit Service |
9e77c8 |
void die() {
|
|
Packit Service |
9e77c8 |
fprintf(stderr,"expansion inhibited\n");
|
|
Packit Service |
9e77c8 |
exit(-1);
|
|
Packit Service |
9e77c8 |
}
|
|
Packit Service |
9e77c8 |
|
|
Packit Service |
9e77c8 |
/* Windows doesn't have gettimeofday. While Cygwin and some
|
|
Packit Service |
9e77c8 |
* versions of MinGW supply one, it is very coarse. This substitute
|
|
Packit Service |
9e77c8 |
* gives much more accurate elapsed times under Windows. */
|
|
Packit Service |
9e77c8 |
#if (( defined __CYGWIN__ ) || ( defined __MINGW32__ ))
|
|
Packit Service |
9e77c8 |
#include <windows.h>
|
|
Packit Service |
9e77c8 |
void win_gettimeofday(struct timeval* p, void* tz /* IGNORED */) {
|
|
Packit Service |
9e77c8 |
LARGE_INTEGER q;
|
|
Packit Service |
9e77c8 |
static long long freq;
|
|
Packit Service |
9e77c8 |
static long long cyg_timer;
|
|
Packit Service |
9e77c8 |
QueryPerformanceFrequency(&q);
|
|
Packit Service |
9e77c8 |
freq = q.QuadPart;
|
|
Packit Service |
9e77c8 |
QueryPerformanceCounter(&q);
|
|
Packit Service |
9e77c8 |
cyg_timer = q.QuadPart;
|
|
Packit Service |
9e77c8 |
p->tv_sec = (long)(cyg_timer / freq);
|
|
Packit Service |
9e77c8 |
p->tv_usec = (long)(((cyg_timer % freq) * 1000000) / freq);
|
|
Packit Service |
9e77c8 |
}
|
|
Packit Service |
9e77c8 |
#define gettimeofday win_gettimeofday
|
|
Packit Service |
9e77c8 |
#define MODE (O_RDONLY|O_BINARY)
|
|
Packit Service |
9e77c8 |
#else
|
|
Packit Service |
9e77c8 |
#define MODE (O_RDONLY)
|
|
Packit Service |
9e77c8 |
#endif
|
|
Packit Service |
9e77c8 |
|
|
Packit Service |
9e77c8 |
#ifndef timersub
|
|
Packit Service |
9e77c8 |
#define timersub(a, b, result) \
|
|
Packit Service |
9e77c8 |
do { \
|
|
Packit Service |
9e77c8 |
(result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
|
|
Packit Service |
9e77c8 |
(result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
|
|
Packit Service |
9e77c8 |
if ((result)->tv_usec < 0) { \
|
|
Packit Service |
9e77c8 |
--(result)->tv_sec; \
|
|
Packit Service |
9e77c8 |
(result)->tv_usec += 1000000; \
|
|
Packit Service |
9e77c8 |
} \
|
|
Packit Service |
9e77c8 |
} while (0)
|
|
Packit Service |
9e77c8 |
#endif
|
|
Packit Service |
9e77c8 |
|
|
Packit Service |
9e77c8 |
typedef struct stat_key {
|
|
Packit Service |
9e77c8 |
char *key;
|
|
Packit Service |
9e77c8 |
unsigned len;
|
|
Packit Service |
9e77c8 |
UT_hash_handle hh, hh2;
|
|
Packit Service |
9e77c8 |
} stat_key;
|
|
Packit Service |
9e77c8 |
|
|
Packit Service |
9e77c8 |
#define CHAIN_0 0
|
|
Packit Service |
9e77c8 |
#define CHAIN_5 1
|
|
Packit Service |
9e77c8 |
#define CHAIN_10 2
|
|
Packit Service |
9e77c8 |
#define CHAIN_20 3
|
|
Packit Service |
9e77c8 |
#define CHAIN_100 4
|
|
Packit Service |
9e77c8 |
#define CHAIN_MAX 5
|
|
Packit Service |
9e77c8 |
void hash_chain_len_histogram(UT_hash_table *tbl) {
|
|
Packit Service |
9e77c8 |
unsigned i, bkt_hist[CHAIN_MAX+1];
|
|
Packit Service |
9e77c8 |
double pct = 100.0/tbl->num_buckets;
|
|
Packit Service |
9e77c8 |
memset(bkt_hist,0,sizeof(bkt_hist));
|
|
Packit Service |
9e77c8 |
for(i=0; i < tbl->num_buckets; i++) {
|
|
Packit Service |
9e77c8 |
unsigned count = tbl->buckets[i].count;
|
|
Packit Service |
9e77c8 |
if (count == 0) bkt_hist[CHAIN_0]++;
|
|
Packit Service |
9e77c8 |
else if (count < 5) bkt_hist[CHAIN_5]++;
|
|
Packit Service |
9e77c8 |
else if (count < 10) bkt_hist[CHAIN_10]++;
|
|
Packit Service |
9e77c8 |
else if (count < 20) bkt_hist[CHAIN_20]++;
|
|
Packit Service |
9e77c8 |
else if (count < 100) bkt_hist[CHAIN_100]++;
|
|
Packit Service |
9e77c8 |
else bkt_hist[CHAIN_MAX]++;
|
|
Packit Service |
9e77c8 |
}
|
|
Packit Service |
9e77c8 |
fprintf(stderr, "Buckets with 0 items: %.1f%%\n", bkt_hist[CHAIN_0 ]*pct);
|
|
Packit Service |
9e77c8 |
fprintf(stderr, "Buckets with < 5 items: %.1f%%\n", bkt_hist[CHAIN_5 ]*pct);
|
|
Packit Service |
9e77c8 |
fprintf(stderr, "Buckets with < 10 items: %.1f%%\n", bkt_hist[CHAIN_10]*pct);
|
|
Packit Service |
9e77c8 |
fprintf(stderr, "Buckets with < 20 items: %.1f%%\n", bkt_hist[CHAIN_20]*pct);
|
|
Packit Service |
9e77c8 |
fprintf(stderr, "Buckets with < 100 items: %.1f%%\n", bkt_hist[CHAIN_100]*pct);
|
|
Packit Service |
9e77c8 |
fprintf(stderr, "Buckets with > 100 items: %.1f%%\n", bkt_hist[CHAIN_MAX]*pct);
|
|
Packit Service |
9e77c8 |
}
|
|
Packit Service |
9e77c8 |
|
|
Packit Service |
9e77c8 |
int main(int argc, char *argv[]) {
|
|
Packit Service |
9e77c8 |
int dups=0, rc, fd, done=0, err=0, want, i=0, padding=0, v=1, percent=100;
|
|
Packit Service |
9e77c8 |
unsigned keylen, max_keylen=0, verbose=0;
|
|
Packit Service |
9e77c8 |
const char *filename = "/dev/stdin";
|
|
Packit Service |
9e77c8 |
char *dst;
|
|
Packit Service |
9e77c8 |
stat_key *keyt, *keytmp, *keys=NULL, *keys2=NULL;
|
|
Packit Service |
9e77c8 |
struct timeval start_tm, end_tm, elapsed_tm, elapsed_tm2, elapsed_tm3;
|
|
Packit Service |
9e77c8 |
|
|
Packit Service |
9e77c8 |
if ((argc >= 3) && !strcmp(argv[1],"-p")) {percent = atoi(argv[2]); v = 3;}
|
|
Packit Service |
9e77c8 |
if ((argc >= v) && !strcmp(argv[v],"-v")) {verbose=1; v++;}
|
|
Packit Service |
9e77c8 |
if (argc >= v) filename=argv[v];
|
|
Packit Service |
9e77c8 |
fd=open(filename,MODE);
|
|
Packit Service |
9e77c8 |
|
|
Packit Service |
9e77c8 |
if ( fd == -1 ) {
|
|
Packit Service |
9e77c8 |
fprintf(stderr,"open failed %s: %s\n", filename, strerror(errno));
|
|
Packit Service |
9e77c8 |
return -1;
|
|
Packit Service |
9e77c8 |
}
|
|
Packit Service |
9e77c8 |
|
|
Packit Service |
9e77c8 |
for(i=0; !done; i++) {
|
|
Packit Service |
9e77c8 |
|
|
Packit Service |
9e77c8 |
want = sizeof(int);
|
|
Packit Service |
9e77c8 |
dst = (char*)&keylen;
|
|
Packit Service |
9e77c8 |
readmore1:
|
|
Packit Service |
9e77c8 |
rc = read(fd,dst,want);
|
|
Packit Service |
9e77c8 |
if (rc != want) {
|
|
Packit Service |
9e77c8 |
if (rc == 0) done=1;
|
|
Packit Service |
9e77c8 |
else if (rc == -1) {
|
|
Packit Service |
9e77c8 |
fprintf(stderr,"read failed: %s\n", strerror(errno));
|
|
Packit Service |
9e77c8 |
err=1;
|
|
Packit Service |
9e77c8 |
}
|
|
Packit Service |
9e77c8 |
else if (rc > 0) { want -= rc; dst += rc; goto readmore1; }
|
|
Packit Service |
9e77c8 |
}
|
|
Packit Service |
9e77c8 |
|
|
Packit Service |
9e77c8 |
if (done || err) break;
|
|
Packit Service |
9e77c8 |
if (keylen > max_keylen) max_keylen=keylen;
|
|
Packit Service |
9e77c8 |
|
|
Packit Service |
9e77c8 |
if ( (keyt = (stat_key*)malloc(sizeof(stat_key))) == NULL) {
|
|
Packit Service |
9e77c8 |
fprintf(stderr,"out of memory\n");
|
|
Packit Service |
9e77c8 |
exit(-1);
|
|
Packit Service |
9e77c8 |
}
|
|
Packit Service |
9e77c8 |
|
|
Packit Service |
9e77c8 |
/* read key */
|
|
Packit Service |
9e77c8 |
#ifdef UNALIGNED_KEYS
|
|
Packit Service |
9e77c8 |
padding = i%8;
|
|
Packit Service |
9e77c8 |
#endif
|
|
Packit Service |
9e77c8 |
if ( (keyt->key = (char*)malloc(padding+keylen)) == NULL) {
|
|
Packit Service |
9e77c8 |
fprintf(stderr,"out of memory\n");
|
|
Packit Service |
9e77c8 |
exit(-1);
|
|
Packit Service |
9e77c8 |
}
|
|
Packit Service |
9e77c8 |
keyt->key += padding; /* forcibly alter the alignment of key */
|
|
Packit Service |
9e77c8 |
keyt->len = keylen;
|
|
Packit Service |
9e77c8 |
|
|
Packit Service |
9e77c8 |
want = keylen;
|
|
Packit Service |
9e77c8 |
dst = keyt->key;
|
|
Packit Service |
9e77c8 |
readmore2:
|
|
Packit Service |
9e77c8 |
rc = read(fd,dst,want);
|
|
Packit Service |
9e77c8 |
if (rc != want) {
|
|
Packit Service |
9e77c8 |
if (rc == -1) {
|
|
Packit Service |
9e77c8 |
fprintf(stderr,"read failed: %s\n", strerror(errno));
|
|
Packit Service |
9e77c8 |
err=1;
|
|
Packit Service |
9e77c8 |
} else if (rc == 0) {
|
|
Packit Service |
9e77c8 |
fprintf(stderr,"incomplete file\n");
|
|
Packit Service |
9e77c8 |
err=1;
|
|
Packit Service |
9e77c8 |
} else if (rc >= 0) { want -= rc; dst += rc; goto readmore2; }
|
|
Packit Service |
9e77c8 |
}
|
|
Packit Service |
9e77c8 |
if (err) break;
|
|
Packit Service |
9e77c8 |
/* if percent was set to something less than 100%, skip some keys*/
|
|
Packit Service |
9e77c8 |
if (((rand()*1.0) / RAND_MAX) > ((percent*1.0)/100)) {
|
|
Packit Service |
9e77c8 |
free(keyt->key-padding);
|
|
Packit Service |
9e77c8 |
free(keyt);
|
|
Packit Service |
9e77c8 |
continue;
|
|
Packit Service |
9e77c8 |
}
|
|
Packit Service |
9e77c8 |
|
|
Packit Service |
9e77c8 |
/* eliminate dups */
|
|
Packit Service |
9e77c8 |
HASH_FIND(hh,keys,keyt->key,keylen,keytmp);
|
|
Packit Service |
9e77c8 |
if (keytmp) {
|
|
Packit Service |
9e77c8 |
dups++;
|
|
Packit Service |
9e77c8 |
free(keyt->key - padding);
|
|
Packit Service |
9e77c8 |
free(keyt);
|
|
Packit Service |
9e77c8 |
} else {
|
|
Packit Service |
9e77c8 |
HASH_ADD_KEYPTR(hh,keys,keyt->key,keylen,keyt);
|
|
Packit Service |
9e77c8 |
}
|
|
Packit Service |
9e77c8 |
}
|
|
Packit Service |
9e77c8 |
|
|
Packit Service |
9e77c8 |
if (verbose) {
|
|
Packit Service |
9e77c8 |
unsigned key_count = HASH_COUNT(keys);
|
|
Packit Service |
9e77c8 |
fprintf(stderr,"max key length: %u\n", max_keylen);
|
|
Packit Service |
9e77c8 |
fprintf(stderr,"number unique keys: %u\n", key_count);
|
|
Packit Service |
9e77c8 |
fprintf(stderr,"keystats memory: %u\n",
|
|
Packit Service |
9e77c8 |
(unsigned)((sizeof(stat_key)+max_keylen)*key_count));
|
|
Packit Service |
9e77c8 |
hash_chain_len_histogram(keys->hh.tbl);
|
|
Packit Service |
9e77c8 |
}
|
|
Packit Service |
9e77c8 |
|
|
Packit Service |
9e77c8 |
/* add all keys to a new hash, so we can measure add time w/o malloc */
|
|
Packit Service |
9e77c8 |
gettimeofday(&start_tm,NULL);
|
|
Packit Service |
9e77c8 |
for(keyt = keys; keyt != NULL; keyt=(stat_key*)keyt->hh.next) {
|
|
Packit Service |
9e77c8 |
HASH_ADD_KEYPTR(hh2,keys2,keyt->key,keyt->len,keyt);
|
|
Packit Service |
9e77c8 |
}
|
|
Packit Service |
9e77c8 |
gettimeofday(&end_tm,NULL);
|
|
Packit Service |
9e77c8 |
timersub(&end_tm, &start_tm, &elapsed_tm);
|
|
Packit Service |
9e77c8 |
|
|
Packit Service |
9e77c8 |
/* now look up all keys in the new hash, again measuring elapsed time */
|
|
Packit Service |
9e77c8 |
gettimeofday(&start_tm,NULL);
|
|
Packit Service |
9e77c8 |
for(keyt = keys; keyt != NULL; keyt=(stat_key*)keyt->hh.next) {
|
|
Packit Service |
9e77c8 |
HASH_FIND(hh2,keys2,keyt->key,keyt->len,keytmp);
|
|
Packit Service |
9e77c8 |
if (!keytmp) fprintf(stderr,"internal error, key not found\n");
|
|
Packit Service |
9e77c8 |
}
|
|
Packit Service |
9e77c8 |
gettimeofday(&end_tm,NULL);
|
|
Packit Service |
9e77c8 |
timersub(&end_tm, &start_tm, &elapsed_tm2);
|
|
Packit Service |
9e77c8 |
|
|
Packit Service |
9e77c8 |
/* now delete all items in the new hash, measuring elapsed time */
|
|
Packit Service |
9e77c8 |
gettimeofday(&start_tm,NULL);
|
|
Packit Service |
9e77c8 |
while (keys2) {
|
|
Packit Service |
9e77c8 |
keytmp = keys2;
|
|
Packit Service |
9e77c8 |
HASH_DELETE(hh2,keys2,keytmp);
|
|
Packit Service |
9e77c8 |
}
|
|
Packit Service |
9e77c8 |
gettimeofday(&end_tm,NULL);
|
|
Packit Service |
9e77c8 |
timersub(&end_tm, &start_tm, &elapsed_tm3);
|
|
Packit Service |
9e77c8 |
|
|
Packit Service |
9e77c8 |
if (!err) {
|
|
Packit Service |
9e77c8 |
printf("%.3f,%d,%d,%d,%s,%ld,%ld,%ld\n",
|
|
Packit Service |
9e77c8 |
1-(1.0*keys->hh.tbl->nonideal_items/keys->hh.tbl->num_items),
|
|
Packit Service |
9e77c8 |
keys->hh.tbl->num_items,
|
|
Packit Service |
9e77c8 |
keys->hh.tbl->num_buckets,
|
|
Packit Service |
9e77c8 |
dups,
|
|
Packit Service |
9e77c8 |
(keys->hh.tbl->noexpand ? "nx" : "ok"),
|
|
Packit Service |
9e77c8 |
(elapsed_tm.tv_sec * 1000000) + elapsed_tm.tv_usec,
|
|
Packit Service |
9e77c8 |
(elapsed_tm2.tv_sec * 1000000) + elapsed_tm2.tv_usec,
|
|
Packit Service |
9e77c8 |
(elapsed_tm3.tv_sec * 1000000) + elapsed_tm3.tv_usec );
|
|
Packit Service |
9e77c8 |
}
|
|
Packit Service |
9e77c8 |
return 0;
|
|
Packit Service |
9e77c8 |
}
|
|
Packit Service |
9e77c8 |
|