venti: new icache

This commit is contained in:
Russ Cox 2007-09-25 09:47:31 -04:00
parent 25a4e89fa9
commit 7a400ee957
17 changed files with 854 additions and 468 deletions

View file

@ -45,6 +45,16 @@ initicachewrite(void)
vtproc(delaykickroundproc, &iwrite.round);
}
static u64int
ie2diskaddr(Index *ix, ISect *is, IEntry *ie)
{
u64int bucket, addr;
bucket = hashbits(ie->score, 32)/ix->div;
addr = is->blockbase + ((bucket - is->start) << is->blocklog);
return addr;
}
static IEntry*
nextchunk(Index *ix, ISect *is, IEntry **pie, u64int *paddr, uint *pnbuf)
{
@ -55,13 +65,13 @@ nextchunk(Index *ix, ISect *is, IEntry **pie, u64int *paddr, uint *pnbuf)
bsize = 1<<is->blocklog;
iefirst = *pie;
addr = is->blockbase + ((u64int)(hashbits(iefirst->score, 32) / ix->div - is->start) << is->blocklog);
addr = ie2diskaddr(ix, is, iefirst);
nbuf = 0;
for(l=&iefirst->nextdirty; (ie=*l)!=nil; l=&(*l)->nextdirty){
naddr = is->blockbase + ((u64int)(hashbits(ie->score, 32) / ix->div - is->start) << is->blocklog);
for(l = &iefirst->nextdirty; (ie = *l) != nil; l = &(*l)->nextdirty){
naddr = ie2diskaddr(ix, is, ie);
if(naddr - addr >= Bufsize)
break;
nbuf = naddr-addr;
nbuf = naddr - addr;
}
nbuf += bsize;
@ -75,7 +85,7 @@ nextchunk(Index *ix, ISect *is, IEntry **pie, u64int *paddr, uint *pnbuf)
static int
icachewritesect(Index *ix, ISect *is, u8int *buf)
{
int err, h, bsize, t;
int err, i, werr, h, bsize, t;
u32int lo, hi;
u64int addr, naddr;
uint nbuf, off;
@ -89,29 +99,32 @@ icachewritesect(Index *ix, ISect *is, u8int *buf)
else
hi = is->stop * ix->div - 1;
trace(TraceProc, "icachewritesect enter %ud %ud %llud", lo, hi, iwrite.as.aa);
trace(TraceProc, "icachewritesect enter %ud %ud %llud",
lo, hi, iwrite.as.aa);
iedirty = icachedirty(lo, hi, iwrite.as.aa);
iedirty = iesort(iedirty);
bsize = 1<<is->blocklog;
bsize = 1 << is->blocklog;
err = 0;
while(iedirty){
disksched();
while((t=icachesleeptime) == SleepForever){
while((t = icachesleeptime) == SleepForever){
sleep(1000);
disksched();
}
if(t < minicachesleeptime)
t = minicachesleeptime;
sleep(t);
if(t > 0)
sleep(t);
trace(TraceProc, "icachewritesect nextchunk");
chunk = nextchunk(ix, is, &iedirty, &addr, &nbuf);
trace(TraceProc, "icachewritesect readpart 0x%llux+0x%ux", addr, nbuf);
trace(TraceProc, "icachewritesect readpart 0x%llux+0x%ux",
addr, nbuf);
if(readpart(is->part, addr, buf, nbuf) < 0){
/* XXX more details here */
fprint(2, "icachewriteproc readpart: %r\n");
fprint(2, "%s: part %s addr 0x%llux: icachewritesect "
"readpart: %r\n", argv0, is->part->name, addr);
err = -1;
continue;
}
@ -120,31 +133,34 @@ icachewritesect(Index *ix, ISect *is, u8int *buf)
addstat(StatIsectRead, 1);
for(l=&chunk; (ie=*l)!=nil; l=&ie->nextdirty){
again:
naddr = is->blockbase + ((u64int)(hashbits(ie->score, 32) / ix->div - is->start) << is->blocklog);
again:
naddr = ie2diskaddr(ix, is, ie);
off = naddr - addr;
if(off+bsize > nbuf){
fprint(2, "whoops! addr=0x%llux nbuf=%ud addr+nbuf=0x%llux naddr=0x%llux\n",
addr, nbuf, addr+nbuf, naddr);
fprint(2, "%s: whoops! addr=0x%llux nbuf=%ud "
"addr+nbuf=0x%llux naddr=0x%llux\n",
argv0, addr, nbuf, addr+nbuf, naddr);
assert(off+bsize <= nbuf);
}
unpackibucket(&ib, buf+off, is->bucketmagic);
if(okibucket(&ib, is) < 0){
fprint(2, "bad bucket XXX\n");
fprint(2, "%s: bad bucket XXX\n", argv0);
goto skipit;
}
trace(TraceProc, "icachewritesect add %V at 0x%llux", ie->score, naddr);
trace(TraceProc, "icachewritesect add %V at 0x%llux",
ie->score, naddr);
h = bucklook(ie->score, ie->ia.type, ib.data, ib.n);
if(h & 1){
h ^= 1;
packientry(ie, &ib.data[h]);
}else if(ib.n < is->buckmax){
memmove(&ib.data[h+IEntrySize], &ib.data[h], ib.n*IEntrySize - h);
memmove(&ib.data[h + IEntrySize], &ib.data[h],
ib.n*IEntrySize - h);
ib.n++;
packientry(ie, &ib.data[h]);
}else{
fprint(2, "bucket overflow XXX\n");
skipit:
fprint(2, "%s: bucket overflow XXX\n", argv0);
skipit:
err = -1;
*l = ie->nextdirty;
ie = *l;
@ -154,33 +170,29 @@ icachewritesect(Index *ix, ISect *is, u8int *buf)
break;
}
packibucket(&ib, buf+off, is->bucketmagic);
/* XXX
* This is not quite right - it's good that we
* update the cached block (if any) here, but
* since the block doesn't get written until writepart
* below, we also need to make sure that the cache
* doesn't load the stale block before we write it to
* disk below. We could lock the disk cache during
* the writepart, but that's pretty annoying.
* Another possibility would be never to cache
* index partition blocks. The hit rate on those is
* miniscule anyway.
*/
if((b = _getdblock(is->part, naddr, ORDWR, 0)) != nil){
memmove(b->data, buf+off, bsize);
putdblock(b);
}
}
diskaccess(1);
trace(TraceProc, "icachewritesect writepart", addr, nbuf);
if(writepart(is->part, addr, buf, nbuf) < 0 || flushpart(is->part) < 0){
/* XXX more details here */
fprint(2, "icachewriteproc writepart: %r\n");
werr = 0;
if(writepart(is->part, addr, buf, nbuf) < 0 || flushpart(is->part) < 0)
werr = -1;
for(i=0; i<nbuf; i+=bsize){
if((b = _getdblock(is->part, addr+i, ORDWR, 0)) != nil){
memmove(b->data, buf+i, bsize);
putdblock(b);
}
}
if(werr < 0){
fprint(2, "%s: part %s addr 0x%llux: icachewritesect "
"writepart: %r\n", argv0, is->part->name, addr);
err = -1;
continue;
}
addstat(StatIsectWriteBytes, nbuf);
addstat(StatIsectWrite, 1);
icacheclean(chunk);