/*
 * extensible hashing routines
 *
 * the original v7 code seems to use the dir file as a bitmap, organised as
 * bytes, and that looks portable, but it stored lengths as shorts
 * in native order in the pag file, which was a problem.  i've fixed this
 * by storing lengths to disk in big-endian order and reading them
 * back from big-endian order.
 */
#include <u.h>
#include <libc.h>
#include "dbm.h"

enum {
	PBLKSIZ	= 512,
	DBLKSIZ	= 8192,

	BYTESIZ = 8,
};

typedef vlong Offset;

static long bitno, maxbno;
static long blkno;
static long hmask;

static char pagbuf[PBLKSIZ];
static char dirbuf[DBLKSIZ];
static int dirf, pagf;

static int	additem(char buf[PBLKSIZ], datum item);
static long	calchash(datum item);
static void	chkblk(char buf[PBLKSIZ]);
static void	clrbuf(char *cp, int n);
static int	cmpdatum(datum d1, datum d2);
static void	dbm_access(long hash);
static void	delitem(char buf[PBLKSIZ], int n);
static datum	firsthash(long hash);
static long	forder(datum key);
static int	getbit(void);
static long	hashinc(long hash);
static datum	makdatum(char buf[PBLKSIZ], int n);
static int	setbit(void);

static int
dbmopen(char *file, char *ext)
{
	int fd;

	strcpy(pagbuf, file);
	strcat(pagbuf, ext);
	fd = open(pagbuf, ORDWR);
	if (fd < 0)
		fd = open(pagbuf, OREAD);
	return fd;
}

int
dbminit(char *file)
{
	Dir *stp;

	pagf = dbmopen(file, ".pag");
	dirf = dbmopen(file, ".dir");
	if(pagf < 0 || dirf < 0) {
		close(pagf);
		close(dirf);
		return -1;
	}
	stp = dirfstat(dirf);
	maxbno = stp->length*BYTESIZ - 1;
	free(stp);
	return 0;
}

void
dbmclose(void)
{
	close(pagf);
	close(dirf);
	pagf = dirf = -1;
	maxbno = 0;
}

/* big-endian get and put for shorts */

#define begets(p)	(((uchar *)(p))[0]<<BYTESIZ | ((uchar *)(p))[1])
#define beputs(p, s) (((char *)(p))[0] = (s)>>BYTESIZ, ((char *)(p))[1] = (s))

// static short
// begets(void *p)
// {
// 	uchar *up = (uchar *)p;
// 
// 	return up[0]<<BYTESIZ | up[1];
// }
// 
// static char *
// beputs(void *vp, short s)
// {
// 	char *p = (char *)vp;
// 
// 	*p++ = s >> BYTESIZ;
// 	*p++ = s;
// 	return p;
// }

static long
forder(datum key)
{
	long hash;

	hash = calchash(key);
	for(hmask=0;; hmask=(hmask<<1)+1) {
		blkno = hash & hmask;
		bitno = blkno + hmask;
		if(getbit() == 0)
			break;
	}
	return blkno;
}

datum
fetch(datum key)
{
	int i;
	datum item;

	dbm_access(calchash(key));
	for(i=0;; i+=2) {
		item = makdatum(pagbuf, i);
		if(item.dptr == nil)
			return item;
		if(cmpdatum(key, item) == 0) {
			item = makdatum(pagbuf, i+1);
			if(item.dptr == nil)
				fprint(2, "dbm: items not in pairs\n");
			return item;
		}
	}
}

int
delete(datum key)
{
	int i;
	datum item;

	dbm_access(calchash(key));
	for(i=0;; i+=2) {
		item = makdatum(pagbuf, i);
		if(item.dptr == nil)
			return -1;
		if(cmpdatum(key, item) == 0) {
			delitem(pagbuf, i);
			delitem(pagbuf, i);
			break;
		}
	}
	seek(pagf, (Offset)blkno*PBLKSIZ, 0);
	if (write(pagf, pagbuf, PBLKSIZ) != PBLKSIZ)
		return -1;
	return 0;
}

int
store(datum key, datum dat)
{
	int i;
	datum item;
	char ovfbuf[PBLKSIZ];

	for (;;) {
		dbm_access(calchash(key));
		for(i=0;; i+=2) {
			item = makdatum(pagbuf, i);
			if(item.dptr == nil)
				break;
			if(cmpdatum(key, item) == 0) {
				delitem(pagbuf, i);
				delitem(pagbuf, i);
				break;
			}
		}
		i = additem(pagbuf, key);
		if(i >= 0)
			if(additem(pagbuf, dat) < 0)
				delitem(pagbuf, i);
			else {
				seek(pagf, (Offset)blkno*PBLKSIZ, 0);
				if (write(pagf, pagbuf, PBLKSIZ) != PBLKSIZ)
					return -1;
				return 0;
			}

		/* split */
		if(key.dsize+dat.dsize+2*sizeof(short) >= PBLKSIZ) {
			fprint(2, "dbm: entry too big\n");
			return -1;
		}
		clrbuf(ovfbuf, PBLKSIZ);
		for(i=0;;) {
			item = makdatum(pagbuf, i);
			if(item.dptr == nil)
				break;
			if(calchash(item) & (hmask+1)) {
				additem(ovfbuf, item);
				delitem(pagbuf, i);
				item = makdatum(pagbuf, i);
				if(item.dptr == nil) {
					fprint(2, "dbm: split not paired\n");
					break;
				}
				additem(ovfbuf, item);
				delitem(pagbuf, i);
				continue;
			}
			i += 2;
		}
		seek(pagf, (Offset)blkno*PBLKSIZ, 0);
		if (write(pagf, pagbuf, PBLKSIZ) != PBLKSIZ)
			return -1;
		seek(pagf, ((Offset)blkno+hmask+1)*PBLKSIZ, 0);
		if (write(pagf, ovfbuf, PBLKSIZ) != PBLKSIZ)
			return -1;
		setbit();
	}
}

datum
firstkey(void)
{
	return firsthash(0L);
}

datum
nextkey(datum key)
{
	int i, f;
	datum item, bitem;
	long hash;

	hash = calchash(key);
	dbm_access(hash);
	f = 1;
	for(i=0;; i+=2) {
		item = makdatum(pagbuf, i);
		if(item.dptr == nil)
			break;
		if(cmpdatum(key, item) <= 0)
			continue;
		if(f || cmpdatum(bitem, item) < 0) {
			bitem = item;
			f = 0;
		}
	}
	if(f == 0)
		return bitem;
	hash = hashinc(hash);
	if(hash == 0)
		return item;
	return firsthash(hash);
}

static datum
firsthash(long hash)
{
	int i;
	datum item, bitem;

	for (;;) {
		dbm_access(hash);
		bitem = makdatum(pagbuf, 0);
		for(i=2;; i+=2) {
			item = makdatum(pagbuf, i);
			if(item.dptr == nil)
				break;
			if(cmpdatum(bitem, item) < 0)
				bitem = item;
		}
		if(bitem.dptr != nil)
			return bitem;
		hash = hashinc(hash);
		if(hash == 0)
			return item;
	}
}

static void
dbm_access(long hash)
{
	static long oldb = -1;

	for(hmask=0;; hmask=(hmask<<1)+1) {
		blkno = hash & hmask;
		bitno = blkno + hmask;
		if(getbit() == 0)
			break;
	}
	if(blkno != oldb) {
		clrbuf(pagbuf, PBLKSIZ);
		seek(pagf, blkno*PBLKSIZ, 0);
		if (read(pagf, pagbuf, PBLKSIZ) != PBLKSIZ)
			fprint(2, "dbm: short pag read\n");
		chkblk(pagbuf);
		oldb = blkno;
	}
}

static int
getbit(void)
{
	long bn;
	int b, i, n;
	static int oldb = -1;

	if(bitno > maxbno)
		return 0;
	n =  bitno % BYTESIZ;
	bn = bitno / BYTESIZ;
	i = bn % DBLKSIZ;
	b = bn / DBLKSIZ;
	if(b != oldb) {
		clrbuf(dirbuf, DBLKSIZ);
		seek(dirf, (Offset)b*DBLKSIZ, 0);
		/* short reads are okay */
		if (read(dirf, dirbuf, DBLKSIZ) < 0)
			fprint(2, "dbm: read error\n");
		oldb = b;
	}
	if(dirbuf[i] & (1<<n))
		return 1;
	return 0;
}

static int
setbit(void)
{
	long bn;
	int i, n, b;

	if(bitno > maxbno) {
		maxbno = bitno;
		getbit();
	}
	n =  bitno % BYTESIZ;
	bn = bitno / BYTESIZ;
	i = bn % DBLKSIZ;
	b = bn / DBLKSIZ;
	dirbuf[i] |= 1<<n;
	seek(dirf, (Offset)b*DBLKSIZ, 0);
	if (write(dirf, dirbuf, DBLKSIZ) != DBLKSIZ)
		return -1;
	return 0;
}

static void
clrbuf(char *cp, int n)
{
	memset(cp, 0, n);
}

static datum
makdatum(char buf[PBLKSIZ], int n)
{
	int t;
	short *sp;
	datum item;

	sp = (short *)buf;
	if(n < 0 || n >= begets(&sp[0])) {
		item.dptr = nil;
		item.dsize = 0;
		return item;
	}
	t = PBLKSIZ;
	if(n > 0)
		t = begets(&sp[n+1-1]);
	item.dptr = buf + begets(&sp[n+1]);
	item.dsize = t -  begets(&sp[n+1]);
	return item;
}

static int
cmpdatum(datum d1, datum d2)
{
	int n;

	n = d1.dsize;
	if(n != d2.dsize)
		return n - d2.dsize;
	if(n <= 0)
		return 0;
	return memcmp(d1.dptr, d2.dptr, n);
}

static int hitab[16] = {
	61, 57, 53, 49, 45, 41, 37, 33,
	29, 25, 21, 17, 13,  9,  5,  1,
};
static long hltab[64] = {
	06100151277L,06106161736L,06452611562L,05001724107L,
	02614772546L,04120731531L,04665262210L,07347467531L,
	06735253126L,06042345173L,03072226605L,01464164730L,
	03247435524L,07652510057L,01546775256L,05714532133L,
	06173260402L,07517101630L,02431460343L,01743245566L,
	00261675137L,02433103631L,03421772437L,04447707466L,
	04435620103L,03757017115L,03641531772L,06767633246L,
	02673230344L,00260612216L,04133454451L,00615531516L,
	06137717526L,02574116560L,02304023373L,07061702261L,
	05153031405L,05322056705L,07401116734L,06552375715L,
	06165233473L,05311063631L,01212221723L,01052267235L,
	06000615237L,01075222665L,06330216006L,04402355630L,
	01451177262L,02000133436L,06025467062L,07121076461L,
	03123433522L,01010635225L,01716177066L,05161746527L,
	01736635071L,06243505026L,03637211610L,01756474365L,
	04723077174L,03642763134L,05750130273L,03655541561L,
};

static long
hashinc(long hash)
{
	long bit;

	hash &= hmask;
	bit = hmask+1;
	for(;;) {
		bit >>= 1;
		if(bit == 0)
			return 0L;
		if((hash&bit) == 0)
			return hash|bit;
		hash &= ~bit;
	}
	return 0;
}

static long
calchash(datum item)
{
	int i, j, f, hashi;
	long hashl;

	hashl = 0;
	hashi = 0;
	for(i=0; i<item.dsize; i++) {
		f = item.dptr[i];
		for(j=0; j<BYTESIZ; j+=4) {
			hashi += hitab[f&017];
			hashl += hltab[hashi&077];
			f >>= 4;
		}
	}
	return hashl;
}

/* functions that change the pag (data) file */

static void
delitem(char buf[PBLKSIZ], int n)
{
	short *sp;
	int i1, i2, i3, b;

	sp = (short *)buf;
	if(n < 0 || n >= begets(&sp[0])) {
		fprint(2, "dbm: bad delitem\n");
		abort();
	}
	i1 = begets(&sp[n+1]);
	i2 = PBLKSIZ;
	if(n > 0)
		i2 = begets(&sp[n+1-1]);
	i3 = begets(&sp[begets(buf)+1-1]);
	if(i2 > i1) {
		b = i1 - i3;
		if (b > 0) {
			memmove(&buf[i2-1], &buf[i1-1], b);
			memset(&buf[i3], 0, b);
			i1 -= b;
			i2 -= b;
		}
	}
	i2 -= i1;
	for(i1 = n+1; i1 < begets(&sp[0]); i1++)
		beputs(&sp[i1+1-1], begets(&sp[i1+1]) + i2);
		
	beputs(&sp[0], begets(&sp[0]) - 1);
	beputs(&sp[begets(&sp[0])+1], 0);
}

static int
additem(char buf[PBLKSIZ], datum item)
{
	short *sp;
	int i1, i2;

	sp = (short *)buf;
	i1 = PBLKSIZ;
	if(begets(&sp[0]) > 0)
		i1 = begets(&sp[begets(&sp[0])+1-1]);
	i1 -= item.dsize;
	i2 = (begets(&sp[0])+2) * sizeof(short);
	if(i1 <= i2)
		return -1;
	beputs(&sp[begets(&sp[0])+1], i1);
	memmove(&buf[i1], item.dptr, item.dsize);
	beputs(&sp[0], begets(&sp[0]) + 1);
	return begets(&sp[0]) - 1;
}

static void
badblk(char buf[PBLKSIZ])
{
	fprint(2, "dbm: bad block\n");
	abort();
	clrbuf(buf, PBLKSIZ);
}

static void
chkblk(char buf[PBLKSIZ])
{
	short *sp;
	int t, i, len;

	sp = (short *)buf;
	t = PBLKSIZ;
	for(i=0; i < begets(&sp[0]); i++) {
		len = begets(&sp[i+1]);
		if(len > t) {
			badblk(buf);
			return;
		}
		t = len;
	}
	if(t < (begets(&sp[0])+1)*sizeof(short))
		badblk(buf);
}
