Add binary fraction tree index.
The old index code is still supported too. Buildindex and checkindex need to be revisited, though they should be easy to adapt.
This commit is contained in:
parent
9ffbb5adca
commit
333c1dccc2
11 changed files with 239 additions and 44 deletions
|
|
@ -468,18 +468,16 @@ ReadErr:
|
||||||
int
|
int
|
||||||
wbarena(Arena *arena)
|
wbarena(Arena *arena)
|
||||||
{
|
{
|
||||||
ZBlock *b;
|
DBlock *b;
|
||||||
int bad;
|
int bad;
|
||||||
|
|
||||||
b = alloczblock(arena->blocksize, 1);
|
if((b = getdblock(arena->part, arena->base + arena->size, 0)) == nil){
|
||||||
if(b == nil){
|
|
||||||
logerr(EAdmin, "can't write arena trailer: %r");
|
logerr(EAdmin, "can't write arena trailer: %r");
|
||||||
///ZZZ add error message?
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
bad = okarena(arena)<0 || packarena(arena, b->data)<0 ||
|
dirtydblock(b, DirtyArenaTrailer);
|
||||||
writepart(arena->part, arena->base + arena->size, b->data, arena->blocksize)<0;
|
bad = okarena(arena)<0 || packarena(arena, b->data)<0;
|
||||||
freezblock(b);
|
putdblock(b);
|
||||||
if(bad)
|
if(bad)
|
||||||
return -1;
|
return -1;
|
||||||
return 0;
|
return 0;
|
||||||
|
|
@ -502,6 +500,10 @@ wbarenahead(Arena *arena)
|
||||||
///ZZZ add error message?
|
///ZZZ add error message?
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
* this writepart is okay because it only happens
|
||||||
|
* during initialization.
|
||||||
|
*/
|
||||||
bad = packarenahead(&head, b->data)<0 ||
|
bad = packarenahead(&head, b->data)<0 ||
|
||||||
writepart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize)<0;
|
writepart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize)<0;
|
||||||
freezblock(b);
|
freezblock(b);
|
||||||
|
|
@ -582,6 +584,7 @@ okarena(Arena *arena)
|
||||||
static CIBlock*
|
static CIBlock*
|
||||||
getcib(Arena *arena, int clump, int writing, CIBlock *rock)
|
getcib(Arena *arena, int clump, int writing, CIBlock *rock)
|
||||||
{
|
{
|
||||||
|
int read;
|
||||||
CIBlock *cib;
|
CIBlock *cib;
|
||||||
u32int block, off;
|
u32int block, off;
|
||||||
|
|
||||||
|
|
@ -613,7 +616,12 @@ getcib(Arena *arena, int clump, int writing, CIBlock *rock)
|
||||||
|
|
||||||
cib->block = block;
|
cib->block = block;
|
||||||
cib->offset = off;
|
cib->offset = off;
|
||||||
cib->data = getdblock(arena->part, arena->base + arena->size - (block + 1) * arena->blocksize, arena->blocksize);
|
|
||||||
|
read = 1;
|
||||||
|
if(writing && off == 0 && clump == arena->clumps-1)
|
||||||
|
read = 0;
|
||||||
|
|
||||||
|
cib->data = getdblock(arena->part, arena->base + arena->size - (block + 1) * arena->blocksize, read);
|
||||||
if(cib->data == nil)
|
if(cib->data == nil)
|
||||||
return nil;
|
return nil;
|
||||||
return cib;
|
return cib;
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,7 @@ if(0)print("storeclump %08x %p\n", mainindex->arenas[0], &cl);
|
||||||
scorecp(cl.info.score, sc);
|
scorecp(cl.info.score, sc);
|
||||||
|
|
||||||
if(0)print("whackblock %08x %p\n", mainindex->arenas[0], &cl);
|
if(0)print("whackblock %08x %p\n", mainindex->arenas[0], &cl);
|
||||||
dsize = whackblock(&cb->data[ClumpSize], zb->data, size);
|
dsize=0; // dsize = whackblock(&cb->data[ClumpSize], zb->data, size);
|
||||||
if(0)print("whackedblock %08x %p\n", mainindex->arenas[0], &cl);
|
if(0)print("whackedblock %08x %p\n", mainindex->arenas[0], &cl);
|
||||||
if(dsize > 0 && dsize < size){
|
if(dsize > 0 && dsize < size){
|
||||||
cl.encoding = ClumpECompress;
|
cl.encoding = ClumpECompress;
|
||||||
|
|
|
||||||
|
|
@ -124,6 +124,7 @@ enum
|
||||||
DirtyIndex,
|
DirtyIndex,
|
||||||
DirtyIndexBitmap,
|
DirtyIndexBitmap,
|
||||||
DirtyArenaCib,
|
DirtyArenaCib,
|
||||||
|
DirtyArenaTrailer,
|
||||||
DirtyMax,
|
DirtyMax,
|
||||||
|
|
||||||
VentiZZZZZZZZ
|
VentiZZZZZZZZ
|
||||||
|
|
@ -355,6 +356,10 @@ struct Clump
|
||||||
* <field name="tabsize" val="s->tabsize" type="U32int"/>
|
* <field name="tabsize" val="s->tabsize" type="U32int"/>
|
||||||
* <field name="buckets" val="s->buckets" type="U32int"/>
|
* <field name="buckets" val="s->buckets" type="U32int"/>
|
||||||
* <field name="buckdiv" val="s->div" type="U32int"/>
|
* <field name="buckdiv" val="s->div" type="U32int"/>
|
||||||
|
* <field name="bitblocks" val="s->div" type="U32int"/>
|
||||||
|
* <field name="maxdepth" val="s->div" type="U32int"/>
|
||||||
|
* <field name="bitkeylog" val="s->div" type="U32int"/>
|
||||||
|
* <field name="bitkeymask" val="s->div" type="U32int"/>
|
||||||
* <array name="sect" val="&s->smap[i]" elems="s->nsects" type="Amap"/>
|
* <array name="sect" val="&s->smap[i]" elems="s->nsects" type="Amap"/>
|
||||||
* <array name="amap" val="&s->amap[i]" elems="s->narenas" type="Amap"/>
|
* <array name="amap" val="&s->amap[i]" elems="s->narenas" type="Amap"/>
|
||||||
* <array name="arena" val="s->arenas[i]" elems="s->narenas" type="Arena"/>
|
* <array name="arena" val="s->arenas[i]" elems="s->narenas" type="Arena"/>
|
||||||
|
|
@ -495,6 +500,7 @@ struct Stats
|
||||||
long indexreads; /* index from disk */
|
long indexreads; /* index from disk */
|
||||||
long indexwreads; /* for writing a new entry */
|
long indexwreads; /* for writing a new entry */
|
||||||
long indexareads; /* for allocating an overflow block */
|
long indexareads; /* for allocating an overflow block */
|
||||||
|
long indexsplits; /* index block splits */
|
||||||
long diskwrites; /* total disk writes */
|
long diskwrites; /* total disk writes */
|
||||||
long diskreads; /* total disk reads */
|
long diskreads; /* total disk reads */
|
||||||
vlong diskbwrites; /* total disk bytes written */
|
vlong diskbwrites; /* total disk bytes written */
|
||||||
|
|
|
||||||
|
|
@ -230,10 +230,38 @@ dirtydblock(DBlock *b, int dirty)
|
||||||
int odirty;
|
int odirty;
|
||||||
Part *p;
|
Part *p;
|
||||||
|
|
||||||
rlock(&dcache.dirtylock);
|
|
||||||
assert(b->ref != 0);
|
assert(b->ref != 0);
|
||||||
assert(b->dirtying == 0);
|
|
||||||
b->dirtying = 1;
|
/*
|
||||||
|
* Because we use an rlock to keep track of how
|
||||||
|
* many blocks are being dirtied (and a wlock to
|
||||||
|
* stop dirtying), we cannot try to dirty two blocks
|
||||||
|
* at the same time in the same thread -- if the
|
||||||
|
* wlock happens between them, we get a deadlock.
|
||||||
|
*
|
||||||
|
* The only place in the code where we need to
|
||||||
|
* dirty multiple blocks at once is in splitiblock, when
|
||||||
|
* we split an index block. The new block has a dirty
|
||||||
|
* flag of DirtyIndexSplit already, because it has to get
|
||||||
|
* written to disk before the old block (DirtyIndex).
|
||||||
|
* So when we see the DirtyIndexSplit block, we don't
|
||||||
|
* acquire the rlock (and we don't set dirtying, so putdblock
|
||||||
|
* won't drop the rlock). This kludginess means that
|
||||||
|
* the caller will have to be sure to putdblock on the
|
||||||
|
* new block before putdblock on the old block.
|
||||||
|
*/
|
||||||
|
if(dirty == DirtyIndexSplit){
|
||||||
|
/* caller should have another dirty block */
|
||||||
|
assert(!canwlock(&dcache.dirtylock));
|
||||||
|
/* this block should be clean */
|
||||||
|
assert(b->dirtying == 0);
|
||||||
|
assert(b->dirty == 0);
|
||||||
|
}else{
|
||||||
|
rlock(&dcache.dirtylock);
|
||||||
|
assert(b->dirtying == 0);
|
||||||
|
b->dirtying = 1;
|
||||||
|
}
|
||||||
|
|
||||||
qlock(&stats.lock);
|
qlock(&stats.lock);
|
||||||
if(b->dirty)
|
if(b->dirty)
|
||||||
|
|
@ -247,12 +275,13 @@ dirtydblock(DBlock *b, int dirty)
|
||||||
* blocks. Only clean blocks ever get marked DirtyIndexSplit,
|
* blocks. Only clean blocks ever get marked DirtyIndexSplit,
|
||||||
* though, so we don't need the opposite conjunction here.
|
* though, so we don't need the opposite conjunction here.
|
||||||
*/
|
*/
|
||||||
|
odirty = b->dirty;
|
||||||
if(b->dirty)
|
if(b->dirty)
|
||||||
assert(b->dirty == dirty
|
assert(b->dirty == dirty
|
||||||
|| (b->dirty==DirtyIndexSplit && dirty==DirtyIndex));
|
|| (b->dirty==DirtyIndexSplit && dirty==DirtyIndex));
|
||||||
|
else
|
||||||
|
b->dirty = dirty;
|
||||||
|
|
||||||
odirty = b->dirty;
|
|
||||||
b->dirty = dirty;
|
|
||||||
p = b->part;
|
p = b->part;
|
||||||
if(p->writechan == nil){
|
if(p->writechan == nil){
|
||||||
fprint(2, "allocate write proc for part %s\n", p->name);
|
fprint(2, "allocate write proc for part %s\n", p->name);
|
||||||
|
|
@ -304,6 +333,8 @@ bumpdblock(void)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fprint(2, "bump %s at %llud\n", b->part->name, b->addr);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* unchain the block
|
* unchain the block
|
||||||
*/
|
*/
|
||||||
|
|
@ -541,6 +572,7 @@ static void
|
||||||
flushproc(void *v)
|
flushproc(void *v)
|
||||||
{
|
{
|
||||||
int i, j, n;
|
int i, j, n;
|
||||||
|
ulong t0;
|
||||||
DBlock *b, **write;
|
DBlock *b, **write;
|
||||||
|
|
||||||
USED(v);
|
USED(v);
|
||||||
|
|
@ -551,7 +583,8 @@ flushproc(void *v)
|
||||||
rsleep(&dcache.flush);
|
rsleep(&dcache.flush);
|
||||||
qunlock(&dcache.lock);
|
qunlock(&dcache.lock);
|
||||||
|
|
||||||
fprint(2, "flushing dcache\n");
|
fprint(2, "flushing dcache: t=0 ms\n");
|
||||||
|
t0 = nsec()/1000;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Because we don't record any dependencies at all, we must write out
|
* Because we don't record any dependencies at all, we must write out
|
||||||
|
|
@ -568,10 +601,10 @@ flushproc(void *v)
|
||||||
* finishes with them.
|
* finishes with them.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
fprint(2, "flushproc: wlock\n");
|
fprint(2, "flushproc: wlock at t=%lud\n", (ulong)(nsec()/1000) - t0);
|
||||||
wlock(&dcache.dirtylock);
|
wlock(&dcache.dirtylock);
|
||||||
|
|
||||||
fprint(2, "flushproc: build list\n");
|
fprint(2, "flushproc: build list at t=%lud\n", (ulong)(nsec()/1000) - t0);
|
||||||
write = dcache.write;
|
write = dcache.write;
|
||||||
n = 0;
|
n = 0;
|
||||||
for(i=0; i<dcache.nblocks; i++){
|
for(i=0; i<dcache.nblocks; i++){
|
||||||
|
|
@ -583,12 +616,15 @@ flushproc(void *v)
|
||||||
qsort(write, n, sizeof(write[0]), writeblockcmp);
|
qsort(write, n, sizeof(write[0]), writeblockcmp);
|
||||||
|
|
||||||
/* Write each stage of blocks out. */
|
/* Write each stage of blocks out. */
|
||||||
|
fprint(2, "flushproc: write blocks at t=%lud\n", (ulong)(nsec()/1000) - t0);
|
||||||
i = 0;
|
i = 0;
|
||||||
for(j=1; j<DirtyMax; j++)
|
for(j=1; j<DirtyMax; j++){
|
||||||
|
fprint(2, "flushproc: flush dirty %d at t=%lud\n", j, (ulong)(nsec()/1000) - t0);
|
||||||
i += parallelwrites(write+i, write+n, j);
|
i += parallelwrites(write+i, write+n, j);
|
||||||
|
}
|
||||||
assert(i == n);
|
assert(i == n);
|
||||||
|
|
||||||
fprint(2, "flushproc: update dirty bits\n");
|
fprint(2, "flushproc: update dirty bits at t=%lud\n", (ulong)(nsec()/1000) - t0);
|
||||||
qlock(&dcache.lock);
|
qlock(&dcache.lock);
|
||||||
for(i=0; i<n; i++){
|
for(i=0; i<n; i++){
|
||||||
b = write[i];
|
b = write[i];
|
||||||
|
|
@ -602,6 +638,8 @@ flushproc(void *v)
|
||||||
qunlock(&dcache.lock);
|
qunlock(&dcache.lock);
|
||||||
wunlock(&dcache.dirtylock);
|
wunlock(&dcache.dirtylock);
|
||||||
|
|
||||||
|
fprint(2, "flushproc: done at t=%lud\n", (ulong)(nsec()/1000) - t0);
|
||||||
|
|
||||||
qlock(&stats.lock);
|
qlock(&stats.lock);
|
||||||
stats.dcacheflushes++;
|
stats.dcacheflushes++;
|
||||||
stats.dcacheflushwrites += n;
|
stats.dcacheflushwrites += n;
|
||||||
|
|
|
||||||
|
|
@ -66,9 +66,6 @@ threadmain(int argc, char *argv[])
|
||||||
for(i = 0; i < ix->nsects; i++)
|
for(i = 0; i < ix->nsects; i++)
|
||||||
n += ix->sects[i]->blocks;
|
n += ix->sects[i]->blocks;
|
||||||
|
|
||||||
if(ix->div < 100)
|
|
||||||
sysfatal("index divisor too coarse: use bigger block size");
|
|
||||||
|
|
||||||
fprint(2, "using %ud buckets of %ud; div=%d\n", ix->buckets, n, ix->div);
|
fprint(2, "using %ud buckets of %ud; div=%d\n", ix->buckets, n, ix->div);
|
||||||
}
|
}
|
||||||
amap = MKNZ(AMap, narenas);
|
amap = MKNZ(AMap, narenas);
|
||||||
|
|
@ -105,6 +102,8 @@ threadmain(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
fprint(2, "configured index=%s with arenas=%d and storage=%lld\n",
|
fprint(2, "configured index=%s with arenas=%d and storage=%lld\n",
|
||||||
ix->name, n, addr - IndexBase);
|
ix->name, n, addr - IndexBase);
|
||||||
|
fprint(2, "\tbitblocks=%d maxdepth=%d buckets=%d\n",
|
||||||
|
ix->bitblocks, ix->maxdepth, ix->buckets);
|
||||||
|
|
||||||
ix->amap = amap;
|
ix->amap = amap;
|
||||||
ix->arenas = arenas;
|
ix->arenas = arenas;
|
||||||
|
|
|
||||||
|
|
@ -258,6 +258,7 @@ estats(HConnect *c)
|
||||||
hprint(hout, "index disk reads=%,ld\n", stats.indexreads);
|
hprint(hout, "index disk reads=%,ld\n", stats.indexreads);
|
||||||
hprint(hout, "index disk reads for modify=%,ld\n", stats.indexwreads);
|
hprint(hout, "index disk reads for modify=%,ld\n", stats.indexwreads);
|
||||||
hprint(hout, "index disk reads for allocation=%,ld\n", stats.indexareads);
|
hprint(hout, "index disk reads for allocation=%,ld\n", stats.indexareads);
|
||||||
|
hprint(hout, "index block splits=%,ld\n", stats.indexsplits);
|
||||||
|
|
||||||
hprint(hout, "index cache lookups=%,ld\n", stats.iclookups);
|
hprint(hout, "index cache lookups=%,ld\n", stats.iclookups);
|
||||||
hprint(hout, "index cache hits=%,ld %d%%\n", stats.ichits,
|
hprint(hout, "index cache hits=%,ld %d%%\n", stats.ichits,
|
||||||
|
|
@ -277,7 +278,8 @@ estats(HConnect *c)
|
||||||
|
|
||||||
hprint(hout, "disk cache flushes=%,ld\n", stats.dcacheflushes);
|
hprint(hout, "disk cache flushes=%,ld\n", stats.dcacheflushes);
|
||||||
hprint(hout, "disk cache flush writes=%,ld (%,ld per flush)\n",
|
hprint(hout, "disk cache flush writes=%,ld (%,ld per flush)\n",
|
||||||
stats.dcacheflushwrites, stats.dcacheflushwrites/stats.dcacheflushes);
|
stats.dcacheflushwrites,
|
||||||
|
stats.dcacheflushwrites/(stats.dcacheflushes ? stats.dcacheflushes : 1));
|
||||||
|
|
||||||
hprint(hout, "disk writes=%,ld\n", stats.diskwrites);
|
hprint(hout, "disk writes=%,ld\n", stats.diskwrites);
|
||||||
hprint(hout, "disk bytes written=%,lld\n", stats.diskbwrites);
|
hprint(hout, "disk bytes written=%,lld\n", stats.diskbwrites);
|
||||||
|
|
@ -368,7 +370,7 @@ dindex(HConnect *c)
|
||||||
ix->name, ix->version, ix->blocksize, ix->tabsize);
|
ix->name, ix->version, ix->blocksize, ix->tabsize);
|
||||||
hprint(hout, "\tbuckets=%d div=%d\n", ix->buckets, ix->div);
|
hprint(hout, "\tbuckets=%d div=%d\n", ix->buckets, ix->div);
|
||||||
for(i = 0; i < ix->nsects; i++)
|
for(i = 0; i < ix->nsects; i++)
|
||||||
hprint(hout, "\tsect=%s for buckets [%lld,%lld)\n", ix->smap[i].name, ix->smap[i].start, ix->smap[i].stop);
|
hprint(hout, "\tsect=%s for buckets [%lld,%lld) buckmax=%d\n", ix->smap[i].name, ix->smap[i].start, ix->smap[i].stop, ix->sects[i]->buckmax);
|
||||||
for(i = 0; i < ix->narenas; i++){
|
for(i = 0; i < ix->narenas; i++){
|
||||||
if(ix->arenas[i] != nil && ix->arenas[i]->clumps != 0){
|
if(ix->arenas[i] != nil && ix->arenas[i]->clumps != 0){
|
||||||
hprint(hout, "arena=%s at index [%lld,%lld)\n\t", ix->amap[i].name, ix->amap[i].start, ix->amap[i].stop);
|
hprint(hout, "arena=%s at index [%lld,%lld)\n\t", ix->amap[i].name, ix->amap[i].start, ix->amap[i].stop);
|
||||||
|
|
|
||||||
|
|
@ -116,6 +116,9 @@ static int writebucket(ISect *is, u32int buck, IBucket *ib, DBlock *b);
|
||||||
static int okibucket(IBucket *ib, ISect *is);
|
static int okibucket(IBucket *ib, ISect *is);
|
||||||
static int initindex1(Index*);
|
static int initindex1(Index*);
|
||||||
static ISect *initisect1(ISect *is);
|
static ISect *initisect1(ISect *is);
|
||||||
|
static int splitiblock(Index *ix, DBlock *b, ISect *is, u32int buck, IBucket *ib);
|
||||||
|
|
||||||
|
#define KEY(k,d) ((d) ? (k)>>(32-(d)) : 0)
|
||||||
|
|
||||||
//static QLock indexlock; //ZZZ
|
//static QLock indexlock; //ZZZ
|
||||||
|
|
||||||
|
|
@ -763,6 +766,7 @@ storeientry(Index *ix, IEntry *ie)
|
||||||
stats.indexwreads++;
|
stats.indexwreads++;
|
||||||
qunlock(&stats.lock);
|
qunlock(&stats.lock);
|
||||||
|
|
||||||
|
top:
|
||||||
b = loadibucket(ix, ie->score, &is, &buck, &ib);
|
b = loadibucket(ix, ie->score, &is, &buck, &ib);
|
||||||
if(b == nil)
|
if(b == nil)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
@ -789,6 +793,18 @@ storeientry(Index *ix, IEntry *ie)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* block is full -- not supposed to happen in V1 */
|
||||||
|
if(ix->version == IndexVersion1){
|
||||||
|
seterr(EAdmin, "index bucket %ud on %s is full\n", buck, is->part->name);
|
||||||
|
ok = -1;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* in V2, split the block and try again; splitiblock puts b */
|
||||||
|
if(splitiblock(ix, b, is, buck, &ib) < 0)
|
||||||
|
return -1;
|
||||||
|
goto top;
|
||||||
|
|
||||||
out:
|
out:
|
||||||
putdblock(b);
|
putdblock(b);
|
||||||
return ok;
|
return ok;
|
||||||
|
|
@ -797,7 +813,7 @@ out:
|
||||||
static int
|
static int
|
||||||
writebucket(ISect *is, u32int buck, IBucket *ib, DBlock *b)
|
writebucket(ISect *is, u32int buck, IBucket *ib, DBlock *b)
|
||||||
{
|
{
|
||||||
assert(b->dirty == DirtyIndex);
|
assert(b->dirty == DirtyIndex || b->dirty == DirtyIndexSplit);
|
||||||
|
|
||||||
if(buck >= is->blocks){
|
if(buck >= is->blocks){
|
||||||
seterr(EAdmin, "index write out of bounds: %d >= %d\n",
|
seterr(EAdmin, "index write out of bounds: %d >= %d\n",
|
||||||
|
|
@ -919,7 +935,7 @@ indexsect0(Index *ix, u32int buck)
|
||||||
* load the index block at bucket #buck
|
* load the index block at bucket #buck
|
||||||
*/
|
*/
|
||||||
static DBlock*
|
static DBlock*
|
||||||
loadibucket0(Index *ix, u32int buck, ISect **pis, u32int *pbuck, IBucket *ib)
|
loadibucket0(Index *ix, u32int buck, ISect **pis, u32int *pbuck, IBucket *ib, int read)
|
||||||
{
|
{
|
||||||
ISect *is;
|
ISect *is;
|
||||||
DBlock *b;
|
DBlock *b;
|
||||||
|
|
@ -931,12 +947,15 @@ loadibucket0(Index *ix, u32int buck, ISect **pis, u32int *pbuck, IBucket *ib)
|
||||||
}
|
}
|
||||||
|
|
||||||
buck -= is->start;
|
buck -= is->start;
|
||||||
if((b = getdblock(is->part, is->blockbase + ((u64int)buck << is->blocklog), 1)) == nil)
|
if((b = getdblock(is->part, is->blockbase + ((u64int)buck << is->blocklog), read)) == nil)
|
||||||
return nil;
|
return nil;
|
||||||
|
|
||||||
*pis = is;
|
if(pis)
|
||||||
*pbuck = buck;
|
*pis = is;
|
||||||
unpackibucket(ib, b->data);
|
if(pbuck)
|
||||||
|
*pbuck = buck;
|
||||||
|
if(ib)
|
||||||
|
unpackibucket(ib, b->data);
|
||||||
return b;
|
return b;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -955,14 +974,16 @@ indexsect1(Index *ix, u8int *score)
|
||||||
static DBlock*
|
static DBlock*
|
||||||
loadibucket1(Index *ix, u8int *score, ISect **pis, u32int *pbuck, IBucket *ib)
|
loadibucket1(Index *ix, u8int *score, ISect **pis, u32int *pbuck, IBucket *ib)
|
||||||
{
|
{
|
||||||
return loadibucket0(ix, hashbits(score, 32)/ix->div, pis, pbuck, ib);
|
return loadibucket0(ix, hashbits(score, 32)/ix->div, pis, pbuck, ib, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static u32int
|
static u32int
|
||||||
keytobuck(Index *ix, u32int key, int d)
|
keytobuck(Index *ix, u32int key, int d)
|
||||||
{
|
{
|
||||||
/* clear all but top d bits */
|
/* clear all but top d bits; can't depend on boundary case shifts */
|
||||||
if(d != 32)
|
if(d == 0)
|
||||||
|
key = 0;
|
||||||
|
else if(d != 32)
|
||||||
key &= ~((1<<(32-d))-1);
|
key &= ~((1<<(32-d))-1);
|
||||||
|
|
||||||
/* truncate to maxdepth bits */
|
/* truncate to maxdepth bits */
|
||||||
|
|
@ -981,27 +1002,33 @@ static int
|
||||||
bitmapop(Index *ix, u32int key, int d, int set)
|
bitmapop(Index *ix, u32int key, int d, int set)
|
||||||
{
|
{
|
||||||
DBlock *b;
|
DBlock *b;
|
||||||
ISect *is;
|
|
||||||
IBucket ib;
|
|
||||||
u32int buck;
|
|
||||||
int inuse;
|
int inuse;
|
||||||
|
u32int key1, buck1;
|
||||||
|
|
||||||
if(d >= ix->maxdepth)
|
if(d >= ix->maxdepth)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
|
||||||
/* construct .xxx1 in bucket number format */
|
/* construct .xxx1 in bucket number format */
|
||||||
key = keytobuck(ix, key, d) | (1<<(ix->maxdepth-d-1));
|
key1 = key | (1<<(32-d-1));
|
||||||
|
buck1 = keytobuck(ix, key1, d+1);
|
||||||
|
|
||||||
/* check whether key (now the bucket number for .xxx1) is in use */
|
if(0) fprint(2, "key %d/%0*ub key1 %d/%0*ub buck1 %08ux\n",
|
||||||
|
d, d, KEY(key, d), d+1, d+1, KEY(key1, d+1), buck1);
|
||||||
|
|
||||||
if((b = loadibucket0(ix, key >> ix->bitkeylog, &is, &buck, &ib)) == nil){
|
/* check whether buck1 is in use */
|
||||||
|
|
||||||
|
if((b = loadibucket0(ix, buck1 >> ix->bitkeylog, nil, nil, nil, 1)) == nil){
|
||||||
seterr(ECorrupt, "cannot load in-use bitmap block");
|
seterr(ECorrupt, "cannot load in-use bitmap block");
|
||||||
|
fprint(2, "loadibucket: %r\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
inuse = ((u32int*)b->data)[(key & ix->bitkeymask)>>5] & (1<<(key&31));
|
if(0) fprint(2, "buck1 %08ux bitkeymask %08ux bitkeylog %d\n", buck1, ix->bitkeymask, ix->bitkeylog);
|
||||||
|
buck1 &= ix->bitkeymask;
|
||||||
|
inuse = ((u32int*)b->data)[buck1>>5] & (1<<(buck1&31));
|
||||||
if(set && !inuse){
|
if(set && !inuse){
|
||||||
dirtydblock(b, DirtyIndexBitmap);
|
dirtydblock(b, DirtyIndexBitmap);
|
||||||
((u32int*)b->data)[(key & ix->bitkeymask)>>5] |= (1<<(key&31));
|
((u32int*)b->data)[buck1>>5] |= (1<<(buck1&31));
|
||||||
}
|
}
|
||||||
putdblock(b);
|
putdblock(b);
|
||||||
return inuse;
|
return inuse;
|
||||||
|
|
@ -1073,13 +1100,15 @@ loadibucket2(Index *ix, u8int *score, ISect **pis, u32int *pbuck, IBucket *ib)
|
||||||
return nil;
|
return nil;
|
||||||
}
|
}
|
||||||
|
|
||||||
if((b = loadibucket0(ix, keytobuck(ix, key, d), pis, pbuck, ib)) == nil)
|
if((b = loadibucket0(ix, keytobuck(ix, key, d), pis, pbuck, ib, 1)) == nil)
|
||||||
return nil;
|
return nil;
|
||||||
|
|
||||||
if(ib->depth == d)
|
if(ib->depth == d)
|
||||||
return b;
|
return b;
|
||||||
|
|
||||||
if(ib->depth < d){
|
if(ib->depth < d){
|
||||||
|
fprint(2, "loaded block %ud for %d/%0*ub got %d/%0*ub\n",
|
||||||
|
*pbuck, d, d, KEY(key,d), ib->depth, ib->depth, KEY(key, ib->depth));
|
||||||
seterr(EBug, "index block has smaller depth than expected -- cannot happen");
|
seterr(EBug, "index block has smaller depth than expected -- cannot happen");
|
||||||
putdblock(b);
|
putdblock(b);
|
||||||
return nil;
|
return nil;
|
||||||
|
|
@ -1087,8 +1116,12 @@ loadibucket2(Index *ix, u8int *score, ISect **pis, u32int *pbuck, IBucket *ib)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ib->depth > d, meaning the bitmap was out of date.
|
* ib->depth > d, meaning the bitmap was out of date.
|
||||||
* fix the bitmap and try again.
|
* fix the bitmap and try again. if we actually updated
|
||||||
|
* the bitmap in splitiblock, this would only happen if
|
||||||
|
* venti crashed at an inopportune moment. but this way
|
||||||
|
* the code gets tested more.
|
||||||
*/
|
*/
|
||||||
|
if(0) fprint(2, "update bitmap: %d/%0*ub is split\n", d, d, KEY(key,d));
|
||||||
putdblock(b);
|
putdblock(b);
|
||||||
if(marksplit(ix, key, d) < 0)
|
if(marksplit(ix, key, d) < 0)
|
||||||
return nil;
|
return nil;
|
||||||
|
|
@ -1097,6 +1130,110 @@ loadibucket2(Index *ix, u8int *score, ISect **pis, u32int *pbuck, IBucket *ib)
|
||||||
return nil;
|
return nil;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
splitiblock(Index *ix, DBlock *b, ISect *is, u32int buck, IBucket *ib)
|
||||||
|
{
|
||||||
|
int i, d;
|
||||||
|
u8int *score;
|
||||||
|
u32int buck0, buck1, key0, key1, key, dmask;
|
||||||
|
DBlock *b0, *b1;
|
||||||
|
IBucket ib0, ib1;
|
||||||
|
|
||||||
|
if(ib->depth == ix->maxdepth){
|
||||||
|
if(0) fprint(2, "depth=%d == maxdepth\n", ib->depth);
|
||||||
|
seterr(EAdmin, "index bucket %ud on %s is full\n", buck, is->part->name);
|
||||||
|
putdblock(b);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
buck = is->start+buck - ix->bitblocks;
|
||||||
|
d = ib->depth+1;
|
||||||
|
buck0 = buck;
|
||||||
|
buck1 = buck0 | (1<<(ix->maxdepth-d));
|
||||||
|
if(ix->maxdepth == 32){
|
||||||
|
key0 = buck0;
|
||||||
|
key1 = buck1;
|
||||||
|
}else{
|
||||||
|
key0 = buck0 << (32-ix->maxdepth);
|
||||||
|
key1 = buck1 << (32-ix->maxdepth);
|
||||||
|
}
|
||||||
|
buck0 += ix->bitblocks;
|
||||||
|
buck1 += ix->bitblocks;
|
||||||
|
USED(buck0);
|
||||||
|
USED(key1);
|
||||||
|
|
||||||
|
if(d == 32)
|
||||||
|
dmask = TWID32;
|
||||||
|
else
|
||||||
|
dmask = TWID32 ^ ((1<<(32-d))-1);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Since we hold the lock for b, the bitmap
|
||||||
|
* thinks buck1 doesn't exist, and the bit
|
||||||
|
* for buck1 can't be updated without first
|
||||||
|
* locking and splitting b, it's safe to try to
|
||||||
|
* acquire the lock on buck1 without dropping b.
|
||||||
|
* No one else will go after it too.
|
||||||
|
*
|
||||||
|
* Also, none of the rest of the code ever locks
|
||||||
|
* more than one block at a time, so it's okay if
|
||||||
|
* we do.
|
||||||
|
*/
|
||||||
|
if((b1 = loadibucket0(ix, buck1, nil, nil, &ib1, 0)) == nil){
|
||||||
|
putdblock(b);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
b0 = b;
|
||||||
|
ib0 = *ib;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Funny locking going on here -- see dirtydblock.
|
||||||
|
* We must putdblock(b1) before putdblock(b0).
|
||||||
|
*/
|
||||||
|
dirtydblock(b0, DirtyIndex);
|
||||||
|
dirtydblock(b1, DirtyIndexSplit);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Split the block contents.
|
||||||
|
* The block is already sorted, so it's pretty easy:
|
||||||
|
* the first part stays, the second part goes to b1.
|
||||||
|
*/
|
||||||
|
ib0.n = 0;
|
||||||
|
ib0.depth = d;
|
||||||
|
ib1.n = 0;
|
||||||
|
ib1.depth = d;
|
||||||
|
for(i=0; i<ib->n; i++){
|
||||||
|
score = ib->data+i*IEntrySize;
|
||||||
|
key = hashbits(score, 32);
|
||||||
|
if((key&dmask) != key0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
ib0.n = i;
|
||||||
|
ib1.n = ib->n - ib0.n;
|
||||||
|
memmove(ib1.data, ib0.data+ib0.n*IEntrySize, ib1.n*IEntrySize);
|
||||||
|
if(0) fprint(2, "splitiblock %d in %d/%0*ub => %d in %d/%0*ub + %d in %d/%0*ub\n",
|
||||||
|
ib->n, d-1, d-1, key0>>(32-d+1),
|
||||||
|
ib0.n, d, d, key0>>(32-d),
|
||||||
|
ib1.n, d, d, key1>>(32-d));
|
||||||
|
|
||||||
|
packibucket(&ib0, b0->data);
|
||||||
|
packibucket(&ib1, b1->data);
|
||||||
|
|
||||||
|
/* order matters! see comment above. */
|
||||||
|
putdblock(b1);
|
||||||
|
putdblock(b0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* let the recovery code take care of updating the bitmap.
|
||||||
|
*/
|
||||||
|
|
||||||
|
qlock(&stats.lock);
|
||||||
|
stats.indexsplits++;
|
||||||
|
qunlock(&stats.lock);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
indexsect(Index *ix, u8int *score)
|
indexsect(Index *ix, u8int *score)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -102,6 +102,9 @@ flushqueue(void)
|
||||||
int i;
|
int i;
|
||||||
LumpQueue *q;
|
LumpQueue *q;
|
||||||
|
|
||||||
|
if(!lumpqs)
|
||||||
|
return;
|
||||||
|
|
||||||
qlock(&glk);
|
qlock(&glk);
|
||||||
gen++;
|
gen++;
|
||||||
qunlock(&glk);
|
qunlock(&glk);
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
#include "dat.h"
|
#include "dat.h"
|
||||||
#include "fns.h"
|
#include "fns.h"
|
||||||
|
|
||||||
#define trace 0
|
#define trace 1
|
||||||
|
|
||||||
u32int maxblocksize;
|
u32int maxblocksize;
|
||||||
int readonly;
|
int readonly;
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,7 @@ printstats(void)
|
||||||
fprint(2, "index disk reads=%,ld\n", stats.indexreads);
|
fprint(2, "index disk reads=%,ld\n", stats.indexreads);
|
||||||
fprint(2, "index disk reads for modify=%,ld\n", stats.indexwreads);
|
fprint(2, "index disk reads for modify=%,ld\n", stats.indexwreads);
|
||||||
fprint(2, "index disk reads for allocation=%,ld\n", stats.indexareads);
|
fprint(2, "index disk reads for allocation=%,ld\n", stats.indexareads);
|
||||||
|
fprint(2, "index block splits=%,ld\n", stats.indexsplits);
|
||||||
|
|
||||||
fprint(2, "index cache lookups=%,ld\n", stats.iclookups);
|
fprint(2, "index cache lookups=%,ld\n", stats.iclookups);
|
||||||
fprint(2, "index cache hits=%,ld %d%%\n", stats.ichits,
|
fprint(2, "index cache hits=%,ld %d%%\n", stats.ichits,
|
||||||
|
|
|
||||||
|
|
@ -128,6 +128,7 @@ syncarena(Arena *arena, u32int n, int zok, int fix)
|
||||||
fprint(2, "can't flush arena directory cache: %r");
|
fprint(2, "can't flush arena directory cache: %r");
|
||||||
err |= SyncFixErr;
|
err |= SyncFixErr;
|
||||||
}
|
}
|
||||||
|
flushdcache();
|
||||||
}
|
}
|
||||||
|
|
||||||
if(used != arena->used
|
if(used != arena->used
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue