checkpoint

This commit is contained in:
rsc 2007-04-27 17:52:24 +00:00
parent 9ec61f3ede
commit 7e4524011b
10 changed files with 97 additions and 50 deletions

View file

@ -295,7 +295,7 @@ writeaclump(Arena *arena, Clump *c, u8int *clbuf, u64int start, u64int *pa)
if(arena->memstats.sealed
|| aa + n + U32Size + arenadirsize(arena, arena->memstats.clumps + 1) > arena->size){
if(!arena->memstats.sealed){
trace(0, "seal memstats %s", arena->name);
logerr(EOk, "seal memstats %s", arena->name);
arena->memstats.sealed = 1;
as.arena = arena;
as.aa = start+aa;
@ -422,16 +422,10 @@ setatailstate(AState *as)
return;
}
/*
* Walk backward until we find the last time these were in sync.
*/
for(j=i; --j>=0; ){
for(j=0; j<=i; j++){
a = ix->arenas[j];
if(atailcmp(&a->diskstats, &a->memstats) == 0)
break;
}
for(j++; j<=i; j++){
a = ix->arenas[j];
continue;
qlock(&a->lock);
osealed = a->diskstats.sealed;
if(j == i)

View file

@ -26,7 +26,6 @@ bloominit(Bloom *b, vlong vsize, u8int *data)
if(unpackbloomhead(b, data) < 0)
return -1;
fprint(2, "bloom size %lud nhash %d\n", b->size, b->nhash);
b->bitmask = (b->size<<3) - 1;
b->data = data;
return 0;
@ -54,7 +53,19 @@ readbloom(Part *p)
*/
if(bloominit(b, 0, buf) < 0){
vtfree(b);
freepart(p);
return nil;
}else{
/*
* default block size is system page size.
* the bloom filter is usually very big.
* bump the block size up to speed i/o.
*/
if(p->blocksize < (1<<20)){
p->blocksize = 1<<20;
if(p->blocksize > p->size)
p->blocksize = p->size;
}
}
b->part = p;
b->data = nil;

View file

@ -119,10 +119,11 @@ threadmain(int argc, char *argv[])
fprint(2, "%T read index\n");
isectdonechan = chancreate(sizeof(void*), 0);
for(i=0; i<ix->nsects; i++){
if(shouldprocess(ix->sects[i]))
if(shouldprocess(ix->sects[i])){
ix->sects[i]->writechan = chancreate(sizeof(IEntry), 0);
vtproc(isectproc, ix->sects[i]);
}
}
for(i=0; i<nisect; i++)
if(isect[i])

View file

@ -715,6 +715,7 @@ static int
parallelwrites(DBlock **b, DBlock **eb, int dirty)
{
DBlock **p, **q;
Part *part;
for(p=b; p<eb && (*p)->dirty == dirty; p++){
assert(b<=p && p<eb);
@ -726,6 +727,17 @@ parallelwrites(DBlock **b, DBlock **eb, int dirty)
recvp((*p)->writedonechan);
}
/*
* Flush the partitions that have been written to.
*/
part = nil;
for(p=b; p<q; p++){
if(part != (*p)->part){
part = (*p)->part;
flushpart(part);
}
}
return p-b;
}

View file

@ -181,6 +181,7 @@ icachewritesect(Index *ix, ISect *is, u8int *buf)
err = -1;
continue;
}
flushpart(is->part);
addstat(StatIsectWriteBytes, nbuf);
addstat(StatIsectWrite, 1);
icacheclean(chunk);

View file

@ -259,8 +259,14 @@ newindex(char *name, ISect **sects, int n)
blocksize = sects[0]->blocksize;
tabsize = sects[0]->tabsize;
for(i = 0; i < n; i++){
if(sects[i]->start != 0 || sects[i]->stop != 0
|| sects[i]->index[0] != '\0'){
/*
* allow index, start, and stop to be set if index is correct
* and start and stop are what we would have picked.
* this allows calling fmtindex to reformat the index after
* replacing a bad index section with a freshly formatted one.
* start and stop are checked below.
*/
if(sects[i]->index[0] != '\0' && strcmp(sects[i]->index, name) != 0){
seterr(EOk, "creating new index using non-empty section %s", sects[i]->name);
return nil;
}
@ -318,6 +324,13 @@ newindex(char *name, ISect **sects, int n)
stop = start + sects[i]->blocks - xb / n;
if(i == n - 1)
stop = ub;
if(sects[i]->start != 0 || sects[i]->stop != 0)
if(sects[i]->start != start || sects[i]->stop != stop){
seterr(EOk, "creating new index using non-empty section %s", sects[i]->name);
return nil;
}
sects[i]->start = start;
sects[i]->stop = stop;
namecp(sects[i]->index, name);
@ -367,8 +380,6 @@ initisect(Part *part)
seterr(EAdmin, "can't read index section header: %r");
return nil;
}
print("read %s at %d: %.2ux %.2ux %.2ux %.2ux\n",
part->name, PartBlank, b->data[0], b->data[1], b->data[2], b->data[3]);
is = MKZ(ISect);
if(is == nil){

View file

@ -16,6 +16,16 @@
#include "dat.h"
#include "fns.h"
/* TODO for linux:
don't use O_DIRECT.
use
posix_fadvise(fd, 0, 0, POSIX_FADV_NOREUSE);
after block is read and also use
posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM);
to disable readahead on the index partition.
bump block size of bloom filter higher.
*/
u32int maxblocksize;
int readonly;
@ -156,6 +166,11 @@ initpart(char *name, int mode)
return part;
}
void
flushpart(Part *part)
{
}
void
freepart(Part *part)
{
@ -397,11 +412,7 @@ rwpart(Part *part, int isread, u64int offset, u8int *buf, u32int count)
* Try to fix things up and continue.
*/
rerrstr(err, sizeof err);
if(strstr(err, "i/o timeout") || strstr(err, "i/o error")){
if(sdreset(part) >= 0)
reopen(part);
continue;
}else if(strstr(err, "partition has changed")){
if(strstr(err, "i/o timeout") || strstr(err, "i/o error") || strstr(err, "partition has changed")){
reopen(part);
continue;
}
@ -583,9 +594,15 @@ reopen(Part *part)
fprint(2, "reopen %s\n", part->filename);
if((fd = open(part->filename, ORDWR)) < 0){
if(access(part->filename, AEXIST) < 0){
sdreset(part);
fd = open(part->filename, ORDWR);
}
if(fd < 0){
fprint(2, "reopen %s: %r\n", part->filename);
return -1;
}
}
if(fd != part->fd){
dup(fd, part->fd);
close(fd);

View file

@ -94,9 +94,9 @@ logit(int severity, char *fmt, va_list args)
return nil;
if(severity != EOk){
if(argv0 == nil)
fprint(2, "%s: err %d: %s\n", argv0, severity, s);
fprint(2, "%T %s: err %d: %s\n", argv0, severity, s);
else
fprint(2, "err %d: %s\n", severity, s);
fprint(2, "%T err %d: %s\n", severity, s);
}
return s;
}

View file

@ -92,10 +92,10 @@ threadmain(int argc, char *argv[])
}
#endif
trace(TraceQuiet, "venti started");
fprint(2, "venti: ");
ventifmtinstall();
trace(TraceQuiet, "venti started");
fprint(2, "%T venti: ");
if(configfile == nil)
configfile = "venti.conf";

View file

@ -59,7 +59,7 @@ verifyarena(char *name, vlong len)
u32int bs;
u8int score[VtScoreSize];
fprint(2, "verify %s\n", name);
fprint(2, "%T verify %s\n", name);
memset(&arena, 0, sizeof arena);
memset(&s, 0, sizeof s);
@ -68,20 +68,20 @@ verifyarena(char *name, vlong len)
* read a little bit, which will include the header
*/
if(readblock(data, HeadSize) < 0){
fprint(2, "%s: reading header: %r\n", name);
fprint(2, "%T %s: reading header: %r\n", name);
return;
}
sha1(data, HeadSize, nil, &s);
if(unpackarenahead(&head, data) < 0){
fprint(2, "%s: corrupt arena header: %r\n", name);
fprint(2, "%T %s: corrupt arena header: %r\n", name);
return;
}
if(head.version != ArenaVersion4 && head.version != ArenaVersion5)
fprint(2, "%s: warning: unknown arena version %d\n", name, head.version);
fprint(2, "%T %s: warning: unknown arena version %d\n", name, head.version);
if(len != 0 && len != head.size)
fprint(2, "%s: warning: unexpected length %lld != %lld\n", name, head.size, len);
fprint(2, "%T %s: warning: unexpected length %lld != %lld\n", name, head.size, len);
if(strcmp(name, "<stdin>") != 0 && strcmp(head.name, name) != 0)
fprint(2, "%s: warning: unexpected name %s\n", name, head.name);
fprint(2, "%T %s: warning: unexpected name %s\n", name, head.name);
/*
* now we know how much to read
@ -93,7 +93,7 @@ verifyarena(char *name, vlong len)
if(n + bs > e)
bs = e - n;
if(readblock(data, bs) < 0){
fprint(2, "%s: read data: %r\n", name);
fprint(2, "%T %s: read data: %r\n", name);
return;
}
sha1(data, bs, nil, &s);
@ -107,7 +107,7 @@ verifyarena(char *name, vlong len)
*/
bs = head.blocksize;
if(readblock(data, bs) < 0){
fprint(2, "%s: read last block: %r\n", name);
fprint(2, "%T %s: read last block: %r\n", name);
return;
}
sha1(data, bs-VtScoreSize, nil, &s);
@ -119,18 +119,18 @@ verifyarena(char *name, vlong len)
*/
arena.blocksize = head.blocksize;
if(unpackarena(&arena, data) < 0){
fprint(2, "%s: corrupt arena trailer: %r\n", name);
fprint(2, "%T %s: corrupt arena trailer: %r\n", name);
return;
}
scorecp(arena.score, &data[arena.blocksize - VtScoreSize]);
if(namecmp(arena.name, head.name) != 0){
fprint(2, "%s: wrong name in trailer: %s vs. %s\n",
fprint(2, "%T %s: wrong name in trailer: %s vs. %s\n",
name, head.name, arena.name);
return;
}
if(arena.version != head.version){
fprint(2, "%s: wrong version in trailer: %d vs. %d\n",
fprint(2, "%T %s: wrong version in trailer: %d vs. %d\n",
name, head.version, arena.version);
return;
}
@ -140,11 +140,11 @@ verifyarena(char *name, vlong len)
* check for no checksum or the same
*/
if(scorecmp(score, arena.score) == 0)
fprint(2, "%s: verified score\n", name);
fprint(2, "%T %s: verified score\n", name);
else if(scorecmp(zeroscore, arena.score) == 0)
fprint(2, "%s: unsealed\n", name);
fprint(2, "%T %s: unsealed\n", name);
else{
fprint(2, "%s: mismatch checksum - found=%V calculated=%V\n",
fprint(2, "%T %s: mismatch checksum - found=%V calculated=%V\n",
name, arena.score, score);
return;
}
@ -207,7 +207,7 @@ threadmain(int argc, char *argv[])
sysfatal("read arena part header: %r");
if(unpackarenapart(&ap, data) < 0)
sysfatal("corrupted arena part header: %r");
fprint(2, "# arena part version=%d blocksize=%d arenabase=%d\n",
fprint(2, "%T # arena part version=%d blocksize=%d arenabase=%d\n",
ap.version, ap.blocksize, ap.arenabase);
ap.tabbase = (PartBlank+HeadSize+ap.blocksize-1)&~(ap.blocksize-1);
ap.tabsize = ap.arenabase - ap.tabbase;
@ -222,21 +222,21 @@ threadmain(int argc, char *argv[])
p++;
for(i=0; i<nline; i++){
if(p == nil){
fprint(2, "warning: unexpected arena table end\n");
fprint(2, "%T warning: unexpected arena table end\n");
break;
}
q = strchr(p, '\n');
if(q)
*q++ = 0;
if(strlen(p) >= sizeof line){
fprint(2, "warning: long arena table line: %s\n", p);
fprint(2, "%T warning: long arena table line: %s\n", p);
p = q;
continue;
}
strcpy(line, p);
memset(f, 0, sizeof f);
if(tokenize(line, f, nelem(f)) < 3){
fprint(2, "warning: bad arena table line: %s\n", p);
fprint(2, "%T warning: bad arena table line: %s\n", p);
p = q;
continue;
}
@ -245,17 +245,17 @@ threadmain(int argc, char *argv[])
start = strtoull(f[1], 0, 0);
stop = strtoull(f[2], 0, 0);
if(stop <= start){
fprint(2, "%s: bad start,stop %lld,%lld\n", f[0], stop, start);
fprint(2, "%T %s: bad start,stop %lld,%lld\n", f[0], stop, start);
continue;
}
if(seek(fd, start, 0) < 0)
fprint(2, "%s: seek to start: %r\n", f[0]);
fprint(2, "%T %s: seek to start: %r\n", f[0]);
verifyarena(f[0], stop - start);
}
}
for(i=1; i<argc; i++)
if(argv[i] != 0)
fprint(2, "%s: did not find arena\n", argv[i]);
fprint(2, "%T %s: did not find arena\n", argv[i]);
threadexitsall(nil);
}