358 lines
7.3 KiB
C
358 lines
7.3 KiB
C
/*
|
|
* Write the dirty icache entries to disk. Random seeks are
|
|
* so expensive that it makes sense to wait until we have
|
|
* a lot and then just make a sequential pass over the disk.
|
|
*/
|
|
#include "stdinc.h"
|
|
#include "dat.h"
|
|
#include "fns.h"
|
|
|
|
static void icachewriteproc(void*);
|
|
static void icachewritecoord(void*);
|
|
static IEntry *iesort(IEntry*);
|
|
|
|
int icachesleeptime = 1000; /* milliseconds */
|
|
int minicachesleeptime = 0;
|
|
|
|
enum
|
|
{
|
|
Bufsize = 8*1024*1024
|
|
};
|
|
|
|
typedef struct IWrite IWrite;
|
|
struct IWrite
|
|
{
|
|
Round round;
|
|
AState as;
|
|
};
|
|
|
|
static IWrite iwrite;
|
|
|
|
void
|
|
initicachewrite(void)
|
|
{
|
|
int i;
|
|
Index *ix;
|
|
|
|
initround(&iwrite.round, "icache", 120*60*1000);
|
|
ix = mainindex;
|
|
for(i=0; i<ix->nsects; i++){
|
|
ix->sects[i]->writechan = chancreate(sizeof(ulong), 1);
|
|
ix->sects[i]->writedonechan = chancreate(sizeof(ulong), 1);
|
|
vtproc(icachewriteproc, ix->sects[i]);
|
|
}
|
|
vtproc(icachewritecoord, nil);
|
|
vtproc(delaykickroundproc, &iwrite.round);
|
|
}
|
|
|
|
static u64int
|
|
ie2diskaddr(Index *ix, ISect *is, IEntry *ie)
|
|
{
|
|
u64int bucket, addr;
|
|
|
|
bucket = hashbits(ie->score, 32)/ix->div;
|
|
addr = is->blockbase + ((bucket - is->start) << is->blocklog);
|
|
return addr;
|
|
}
|
|
|
|
static IEntry*
|
|
nextchunk(Index *ix, ISect *is, IEntry **pie, u64int *paddr, uint *pnbuf)
|
|
{
|
|
u64int addr, naddr;
|
|
uint nbuf;
|
|
int bsize;
|
|
IEntry *iefirst, *ie, **l;
|
|
|
|
bsize = 1<<is->blocklog;
|
|
iefirst = *pie;
|
|
addr = ie2diskaddr(ix, is, iefirst);
|
|
nbuf = 0;
|
|
for(l = &iefirst->nextdirty; (ie = *l) != nil; l = &(*l)->nextdirty){
|
|
naddr = ie2diskaddr(ix, is, ie);
|
|
if(naddr - addr >= Bufsize)
|
|
break;
|
|
nbuf = naddr - addr;
|
|
}
|
|
nbuf += bsize;
|
|
|
|
*l = nil;
|
|
*pie = ie;
|
|
*paddr = addr;
|
|
*pnbuf = nbuf;
|
|
return iefirst;
|
|
}
|
|
|
|
static int
|
|
icachewritesect(Index *ix, ISect *is, u8int *buf)
|
|
{
|
|
int err, i, werr, h, bsize, t;
|
|
u32int lo, hi;
|
|
u64int addr, naddr;
|
|
uint nbuf, off;
|
|
DBlock *b;
|
|
IBucket ib;
|
|
IEntry *ie, *iedirty, **l, *chunk;
|
|
|
|
lo = is->start * ix->div;
|
|
if(TWID32/ix->div < is->stop)
|
|
hi = TWID32;
|
|
else
|
|
hi = is->stop * ix->div - 1;
|
|
|
|
trace(TraceProc, "icachewritesect enter %ud %ud %llud",
|
|
lo, hi, iwrite.as.aa);
|
|
|
|
iedirty = icachedirty(lo, hi, iwrite.as.aa);
|
|
iedirty = iesort(iedirty);
|
|
bsize = 1 << is->blocklog;
|
|
err = 0;
|
|
|
|
while(iedirty){
|
|
disksched();
|
|
while((t = icachesleeptime) == SleepForever){
|
|
sleep(1000);
|
|
disksched();
|
|
}
|
|
if(t < minicachesleeptime)
|
|
t = minicachesleeptime;
|
|
if(t > 0)
|
|
sleep(t);
|
|
trace(TraceProc, "icachewritesect nextchunk");
|
|
chunk = nextchunk(ix, is, &iedirty, &addr, &nbuf);
|
|
|
|
trace(TraceProc, "icachewritesect readpart 0x%llux+0x%ux",
|
|
addr, nbuf);
|
|
if(readpart(is->part, addr, buf, nbuf) < 0){
|
|
fprint(2, "%s: part %s addr 0x%llux: icachewritesect "
|
|
"readpart: %r\n", argv0, is->part->name, addr);
|
|
err = -1;
|
|
continue;
|
|
}
|
|
trace(TraceProc, "icachewritesect updatebuf");
|
|
addstat(StatIsectReadBytes, nbuf);
|
|
addstat(StatIsectRead, 1);
|
|
|
|
for(l=&chunk; (ie=*l)!=nil; l=&ie->nextdirty){
|
|
again:
|
|
naddr = ie2diskaddr(ix, is, ie);
|
|
off = naddr - addr;
|
|
if(off+bsize > nbuf){
|
|
fprint(2, "%s: whoops! addr=0x%llux nbuf=%ud "
|
|
"addr+nbuf=0x%llux naddr=0x%llux\n",
|
|
argv0, addr, nbuf, addr+nbuf, naddr);
|
|
assert(off+bsize <= nbuf);
|
|
}
|
|
unpackibucket(&ib, buf+off, is->bucketmagic);
|
|
if(okibucket(&ib, is) < 0){
|
|
fprint(2, "%s: bad bucket XXX\n", argv0);
|
|
goto skipit;
|
|
}
|
|
trace(TraceProc, "icachewritesect add %V at 0x%llux",
|
|
ie->score, naddr);
|
|
h = bucklook(ie->score, ie->ia.type, ib.data, ib.n);
|
|
if(h & 1){
|
|
h ^= 1;
|
|
packientry(ie, &ib.data[h]);
|
|
}else if(ib.n < is->buckmax){
|
|
memmove(&ib.data[h + IEntrySize], &ib.data[h],
|
|
ib.n*IEntrySize - h);
|
|
ib.n++;
|
|
packientry(ie, &ib.data[h]);
|
|
}else{
|
|
fprint(2, "%s: bucket overflow XXX\n", argv0);
|
|
skipit:
|
|
err = -1;
|
|
*l = ie->nextdirty;
|
|
ie = *l;
|
|
if(ie)
|
|
goto again;
|
|
else
|
|
break;
|
|
}
|
|
packibucket(&ib, buf+off, is->bucketmagic);
|
|
}
|
|
|
|
diskaccess(1);
|
|
|
|
trace(TraceProc, "icachewritesect writepart", addr, nbuf);
|
|
werr = 0;
|
|
if(writepart(is->part, addr, buf, nbuf) < 0 || flushpart(is->part) < 0)
|
|
werr = -1;
|
|
|
|
for(i=0; i<nbuf; i+=bsize){
|
|
if((b = _getdblock(is->part, addr+i, ORDWR, 0)) != nil){
|
|
memmove(b->data, buf+i, bsize);
|
|
putdblock(b);
|
|
}
|
|
}
|
|
|
|
if(werr < 0){
|
|
fprint(2, "%s: part %s addr 0x%llux: icachewritesect "
|
|
"writepart: %r\n", argv0, is->part->name, addr);
|
|
err = -1;
|
|
continue;
|
|
}
|
|
|
|
addstat(StatIsectWriteBytes, nbuf);
|
|
addstat(StatIsectWrite, 1);
|
|
icacheclean(chunk);
|
|
}
|
|
|
|
trace(TraceProc, "icachewritesect done");
|
|
return err;
|
|
}
|
|
|
|
static void
|
|
icachewriteproc(void *v)
|
|
{
|
|
int ret;
|
|
uint bsize;
|
|
ISect *is;
|
|
Index *ix;
|
|
u8int *buf;
|
|
|
|
ix = mainindex;
|
|
is = v;
|
|
threadsetname("icachewriteproc:%s", is->part->name);
|
|
|
|
bsize = 1<<is->blocklog;
|
|
buf = emalloc(Bufsize+bsize);
|
|
buf = (u8int*)(((uintptr)buf+bsize-1)&~(uintptr)(bsize-1));
|
|
|
|
for(;;){
|
|
trace(TraceProc, "icachewriteproc recv");
|
|
recv(is->writechan, 0);
|
|
trace(TraceWork, "start");
|
|
ret = icachewritesect(ix, is, buf);
|
|
trace(TraceProc, "icachewriteproc send");
|
|
trace(TraceWork, "finish");
|
|
sendul(is->writedonechan, ret);
|
|
}
|
|
}
|
|
|
|
static void
|
|
icachewritecoord(void *v)
|
|
{
|
|
int i, err;
|
|
Index *ix;
|
|
AState as;
|
|
|
|
USED(v);
|
|
|
|
threadsetname("icachewritecoord");
|
|
|
|
ix = mainindex;
|
|
iwrite.as = icachestate();
|
|
|
|
for(;;){
|
|
trace(TraceProc, "icachewritecoord sleep");
|
|
waitforkick(&iwrite.round);
|
|
trace(TraceWork, "start");
|
|
as = icachestate();
|
|
if(as.arena==iwrite.as.arena && as.aa==iwrite.as.aa){
|
|
/* will not be able to do anything more than last flush - kick disk */
|
|
trace(TraceProc, "icachewritecoord kick dcache");
|
|
kickdcache();
|
|
trace(TraceProc, "icachewritecoord kicked dcache");
|
|
goto SkipWork; /* won't do anything; don't bother rewriting bloom filter */
|
|
}
|
|
iwrite.as = as;
|
|
|
|
trace(TraceProc, "icachewritecoord start flush");
|
|
if(iwrite.as.arena){
|
|
for(i=0; i<ix->nsects; i++)
|
|
send(ix->sects[i]->writechan, 0);
|
|
if(ix->bloom)
|
|
send(ix->bloom->writechan, 0);
|
|
|
|
err = 0;
|
|
for(i=0; i<ix->nsects; i++)
|
|
err |= recvul(ix->sects[i]->writedonechan);
|
|
if(ix->bloom)
|
|
err |= recvul(ix->bloom->writedonechan);
|
|
|
|
trace(TraceProc, "icachewritecoord donewrite err=%d", err);
|
|
if(err == 0){
|
|
setatailstate(&iwrite.as);
|
|
}
|
|
}
|
|
SkipWork:
|
|
icacheclean(nil); /* wake up anyone waiting */
|
|
trace(TraceWork, "finish");
|
|
addstat(StatIcacheFlush, 1);
|
|
}
|
|
}
|
|
|
|
void
|
|
flushicache(void)
|
|
{
|
|
trace(TraceProc, "flushicache enter");
|
|
kickround(&iwrite.round, 1);
|
|
trace(TraceProc, "flushicache exit");
|
|
}
|
|
|
|
void
|
|
kickicache(void)
|
|
{
|
|
kickround(&iwrite.round, 0);
|
|
}
|
|
|
|
void
|
|
delaykickicache(void)
|
|
{
|
|
delaykickround(&iwrite.round);
|
|
}
|
|
|
|
static IEntry*
|
|
iesort(IEntry *ie)
|
|
{
|
|
int cmp;
|
|
IEntry **l;
|
|
IEntry *ie1, *ie2, *sorted;
|
|
|
|
if(ie == nil || ie->nextdirty == nil)
|
|
return ie;
|
|
|
|
/* split the lists */
|
|
ie1 = ie;
|
|
ie2 = ie;
|
|
if(ie2)
|
|
ie2 = ie2->nextdirty;
|
|
if(ie2)
|
|
ie2 = ie2->nextdirty;
|
|
while(ie1 && ie2){
|
|
ie1 = ie1->nextdirty;
|
|
ie2 = ie2->nextdirty;
|
|
if(ie2)
|
|
ie2 = ie2->nextdirty;
|
|
}
|
|
if(ie1){
|
|
ie2 = ie1->nextdirty;
|
|
ie1->nextdirty = nil;
|
|
}
|
|
|
|
/* sort the lists */
|
|
ie1 = iesort(ie);
|
|
ie2 = iesort(ie2);
|
|
|
|
/* merge the lists */
|
|
sorted = nil;
|
|
l = &sorted;
|
|
cmp = 0;
|
|
while(ie1 || ie2){
|
|
if(ie1 && ie2)
|
|
cmp = scorecmp(ie1->score, ie2->score);
|
|
if(ie1==nil || (ie2 && cmp > 0)){
|
|
*l = ie2;
|
|
l = &ie2->nextdirty;
|
|
ie2 = ie2->nextdirty;
|
|
}else{
|
|
*l = ie1;
|
|
l = &ie1->nextdirty;
|
|
ie1 = ie1->nextdirty;
|
|
}
|
|
}
|
|
*l = nil;
|
|
return sorted;
|
|
}
|
|
|