checkpoint

This commit is contained in:
rsc 2007-04-27 17:52:24 +00:00
parent 9ec61f3ede
commit 7e4524011b
10 changed files with 97 additions and 50 deletions

View file

@ -295,7 +295,7 @@ writeaclump(Arena *arena, Clump *c, u8int *clbuf, u64int start, u64int *pa)
if(arena->memstats.sealed if(arena->memstats.sealed
|| aa + n + U32Size + arenadirsize(arena, arena->memstats.clumps + 1) > arena->size){ || aa + n + U32Size + arenadirsize(arena, arena->memstats.clumps + 1) > arena->size){
if(!arena->memstats.sealed){ if(!arena->memstats.sealed){
trace(0, "seal memstats %s", arena->name); logerr(EOk, "seal memstats %s", arena->name);
arena->memstats.sealed = 1; arena->memstats.sealed = 1;
as.arena = arena; as.arena = arena;
as.aa = start+aa; as.aa = start+aa;
@ -422,16 +422,10 @@ setatailstate(AState *as)
return; return;
} }
/* for(j=0; j<=i; j++){
* Walk backward until we find the last time these were in sync.
*/
for(j=i; --j>=0; ){
a = ix->arenas[j]; a = ix->arenas[j];
if(atailcmp(&a->diskstats, &a->memstats) == 0) if(atailcmp(&a->diskstats, &a->memstats) == 0)
break; continue;
}
for(j++; j<=i; j++){
a = ix->arenas[j];
qlock(&a->lock); qlock(&a->lock);
osealed = a->diskstats.sealed; osealed = a->diskstats.sealed;
if(j == i) if(j == i)

View file

@ -26,7 +26,6 @@ bloominit(Bloom *b, vlong vsize, u8int *data)
if(unpackbloomhead(b, data) < 0) if(unpackbloomhead(b, data) < 0)
return -1; return -1;
fprint(2, "bloom size %lud nhash %d\n", b->size, b->nhash);
b->bitmask = (b->size<<3) - 1; b->bitmask = (b->size<<3) - 1;
b->data = data; b->data = data;
return 0; return 0;
@ -54,7 +53,19 @@ readbloom(Part *p)
*/ */
if(bloominit(b, 0, buf) < 0){ if(bloominit(b, 0, buf) < 0){
vtfree(b); vtfree(b);
freepart(p);
return nil; return nil;
}else{
/*
* default block size is system page size.
* the bloom filter is usually very big.
* bump the block size up to speed i/o.
*/
if(p->blocksize < (1<<20)){
p->blocksize = 1<<20;
if(p->blocksize > p->size)
p->blocksize = p->size;
}
} }
b->part = p; b->part = p;
b->data = nil; b->data = nil;

View file

@ -119,9 +119,10 @@ threadmain(int argc, char *argv[])
fprint(2, "%T read index\n"); fprint(2, "%T read index\n");
isectdonechan = chancreate(sizeof(void*), 0); isectdonechan = chancreate(sizeof(void*), 0);
for(i=0; i<ix->nsects; i++){ for(i=0; i<ix->nsects; i++){
if(shouldprocess(ix->sects[i])) if(shouldprocess(ix->sects[i])){
ix->sects[i]->writechan = chancreate(sizeof(IEntry), 0); ix->sects[i]->writechan = chancreate(sizeof(IEntry), 0);
vtproc(isectproc, ix->sects[i]); vtproc(isectproc, ix->sects[i]);
}
} }
for(i=0; i<nisect; i++) for(i=0; i<nisect; i++)

View file

@ -715,6 +715,7 @@ static int
parallelwrites(DBlock **b, DBlock **eb, int dirty) parallelwrites(DBlock **b, DBlock **eb, int dirty)
{ {
DBlock **p, **q; DBlock **p, **q;
Part *part;
for(p=b; p<eb && (*p)->dirty == dirty; p++){ for(p=b; p<eb && (*p)->dirty == dirty; p++){
assert(b<=p && p<eb); assert(b<=p && p<eb);
@ -725,6 +726,17 @@ parallelwrites(DBlock **b, DBlock **eb, int dirty)
assert(b<=p && p<eb); assert(b<=p && p<eb);
recvp((*p)->writedonechan); recvp((*p)->writedonechan);
} }
/*
* Flush the partitions that have been written to.
*/
part = nil;
for(p=b; p<q; p++){
if(part != (*p)->part){
part = (*p)->part;
flushpart(part);
}
}
return p-b; return p-b;
} }

View file

@ -181,6 +181,7 @@ icachewritesect(Index *ix, ISect *is, u8int *buf)
err = -1; err = -1;
continue; continue;
} }
flushpart(is->part);
addstat(StatIsectWriteBytes, nbuf); addstat(StatIsectWriteBytes, nbuf);
addstat(StatIsectWrite, 1); addstat(StatIsectWrite, 1);
icacheclean(chunk); icacheclean(chunk);

View file

@ -259,8 +259,14 @@ newindex(char *name, ISect **sects, int n)
blocksize = sects[0]->blocksize; blocksize = sects[0]->blocksize;
tabsize = sects[0]->tabsize; tabsize = sects[0]->tabsize;
for(i = 0; i < n; i++){ for(i = 0; i < n; i++){
if(sects[i]->start != 0 || sects[i]->stop != 0 /*
|| sects[i]->index[0] != '\0'){ * allow index, start, and stop to be set if index is correct
* and start and stop are what we would have picked.
* this allows calling fmtindex to reformat the index after
* replacing a bad index section with a freshly formatted one.
* start and stop are checked below.
*/
if(sects[i]->index[0] != '\0' && strcmp(sects[i]->index, name) != 0){
seterr(EOk, "creating new index using non-empty section %s", sects[i]->name); seterr(EOk, "creating new index using non-empty section %s", sects[i]->name);
return nil; return nil;
} }
@ -318,6 +324,13 @@ newindex(char *name, ISect **sects, int n)
stop = start + sects[i]->blocks - xb / n; stop = start + sects[i]->blocks - xb / n;
if(i == n - 1) if(i == n - 1)
stop = ub; stop = ub;
if(sects[i]->start != 0 || sects[i]->stop != 0)
if(sects[i]->start != start || sects[i]->stop != stop){
seterr(EOk, "creating new index using non-empty section %s", sects[i]->name);
return nil;
}
sects[i]->start = start; sects[i]->start = start;
sects[i]->stop = stop; sects[i]->stop = stop;
namecp(sects[i]->index, name); namecp(sects[i]->index, name);
@ -367,8 +380,6 @@ initisect(Part *part)
seterr(EAdmin, "can't read index section header: %r"); seterr(EAdmin, "can't read index section header: %r");
return nil; return nil;
} }
print("read %s at %d: %.2ux %.2ux %.2ux %.2ux\n",
part->name, PartBlank, b->data[0], b->data[1], b->data[2], b->data[3]);
is = MKZ(ISect); is = MKZ(ISect);
if(is == nil){ if(is == nil){

View file

@ -16,6 +16,16 @@
#include "dat.h" #include "dat.h"
#include "fns.h" #include "fns.h"
/* TODO for linux:
don't use O_DIRECT.
use
posix_fadvise(fd, 0, 0, POSIX_FADV_NOREUSE);
after block is read and also use
posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM);
to disable readahead on the index partition.
bump block size of bloom filter higher.
*/
u32int maxblocksize; u32int maxblocksize;
int readonly; int readonly;
@ -156,6 +166,11 @@ initpart(char *name, int mode)
return part; return part;
} }
void
flushpart(Part *part)
{
}
void void
freepart(Part *part) freepart(Part *part)
{ {
@ -397,11 +412,7 @@ rwpart(Part *part, int isread, u64int offset, u8int *buf, u32int count)
* Try to fix things up and continue. * Try to fix things up and continue.
*/ */
rerrstr(err, sizeof err); rerrstr(err, sizeof err);
if(strstr(err, "i/o timeout") || strstr(err, "i/o error")){ if(strstr(err, "i/o timeout") || strstr(err, "i/o error") || strstr(err, "partition has changed")){
if(sdreset(part) >= 0)
reopen(part);
continue;
}else if(strstr(err, "partition has changed")){
reopen(part); reopen(part);
continue; continue;
} }
@ -583,9 +594,15 @@ reopen(Part *part)
fprint(2, "reopen %s\n", part->filename); fprint(2, "reopen %s\n", part->filename);
if((fd = open(part->filename, ORDWR)) < 0){ if((fd = open(part->filename, ORDWR)) < 0){
fprint(2, "reopen %s: %r\n", part->filename); if(access(part->filename, AEXIST) < 0){
return -1; sdreset(part);
} fd = open(part->filename, ORDWR);
}
if(fd < 0){
fprint(2, "reopen %s: %r\n", part->filename);
return -1;
}
}
if(fd != part->fd){ if(fd != part->fd){
dup(fd, part->fd); dup(fd, part->fd);
close(fd); close(fd);

View file

@ -94,9 +94,9 @@ logit(int severity, char *fmt, va_list args)
return nil; return nil;
if(severity != EOk){ if(severity != EOk){
if(argv0 == nil) if(argv0 == nil)
fprint(2, "%s: err %d: %s\n", argv0, severity, s); fprint(2, "%T %s: err %d: %s\n", argv0, severity, s);
else else
fprint(2, "err %d: %s\n", severity, s); fprint(2, "%T err %d: %s\n", severity, s);
} }
return s; return s;
} }

View file

@ -92,10 +92,10 @@ threadmain(int argc, char *argv[])
} }
#endif #endif
trace(TraceQuiet, "venti started");
fprint(2, "venti: ");
ventifmtinstall(); ventifmtinstall();
trace(TraceQuiet, "venti started");
fprint(2, "%T venti: ");
if(configfile == nil) if(configfile == nil)
configfile = "venti.conf"; configfile = "venti.conf";

View file

@ -59,7 +59,7 @@ verifyarena(char *name, vlong len)
u32int bs; u32int bs;
u8int score[VtScoreSize]; u8int score[VtScoreSize];
fprint(2, "verify %s\n", name); fprint(2, "%T verify %s\n", name);
memset(&arena, 0, sizeof arena); memset(&arena, 0, sizeof arena);
memset(&s, 0, sizeof s); memset(&s, 0, sizeof s);
@ -68,20 +68,20 @@ verifyarena(char *name, vlong len)
* read a little bit, which will include the header * read a little bit, which will include the header
*/ */
if(readblock(data, HeadSize) < 0){ if(readblock(data, HeadSize) < 0){
fprint(2, "%s: reading header: %r\n", name); fprint(2, "%T %s: reading header: %r\n", name);
return; return;
} }
sha1(data, HeadSize, nil, &s); sha1(data, HeadSize, nil, &s);
if(unpackarenahead(&head, data) < 0){ if(unpackarenahead(&head, data) < 0){
fprint(2, "%s: corrupt arena header: %r\n", name); fprint(2, "%T %s: corrupt arena header: %r\n", name);
return; return;
} }
if(head.version != ArenaVersion4 && head.version != ArenaVersion5) if(head.version != ArenaVersion4 && head.version != ArenaVersion5)
fprint(2, "%s: warning: unknown arena version %d\n", name, head.version); fprint(2, "%T %s: warning: unknown arena version %d\n", name, head.version);
if(len != 0 && len != head.size) if(len != 0 && len != head.size)
fprint(2, "%s: warning: unexpected length %lld != %lld\n", name, head.size, len); fprint(2, "%T %s: warning: unexpected length %lld != %lld\n", name, head.size, len);
if(strcmp(name, "<stdin>") != 0 && strcmp(head.name, name) != 0) if(strcmp(name, "<stdin>") != 0 && strcmp(head.name, name) != 0)
fprint(2, "%s: warning: unexpected name %s\n", name, head.name); fprint(2, "%T %s: warning: unexpected name %s\n", name, head.name);
/* /*
* now we know how much to read * now we know how much to read
@ -93,7 +93,7 @@ verifyarena(char *name, vlong len)
if(n + bs > e) if(n + bs > e)
bs = e - n; bs = e - n;
if(readblock(data, bs) < 0){ if(readblock(data, bs) < 0){
fprint(2, "%s: read data: %r\n", name); fprint(2, "%T %s: read data: %r\n", name);
return; return;
} }
sha1(data, bs, nil, &s); sha1(data, bs, nil, &s);
@ -107,7 +107,7 @@ verifyarena(char *name, vlong len)
*/ */
bs = head.blocksize; bs = head.blocksize;
if(readblock(data, bs) < 0){ if(readblock(data, bs) < 0){
fprint(2, "%s: read last block: %r\n", name); fprint(2, "%T %s: read last block: %r\n", name);
return; return;
} }
sha1(data, bs-VtScoreSize, nil, &s); sha1(data, bs-VtScoreSize, nil, &s);
@ -119,18 +119,18 @@ verifyarena(char *name, vlong len)
*/ */
arena.blocksize = head.blocksize; arena.blocksize = head.blocksize;
if(unpackarena(&arena, data) < 0){ if(unpackarena(&arena, data) < 0){
fprint(2, "%s: corrupt arena trailer: %r\n", name); fprint(2, "%T %s: corrupt arena trailer: %r\n", name);
return; return;
} }
scorecp(arena.score, &data[arena.blocksize - VtScoreSize]); scorecp(arena.score, &data[arena.blocksize - VtScoreSize]);
if(namecmp(arena.name, head.name) != 0){ if(namecmp(arena.name, head.name) != 0){
fprint(2, "%s: wrong name in trailer: %s vs. %s\n", fprint(2, "%T %s: wrong name in trailer: %s vs. %s\n",
name, head.name, arena.name); name, head.name, arena.name);
return; return;
} }
if(arena.version != head.version){ if(arena.version != head.version){
fprint(2, "%s: wrong version in trailer: %d vs. %d\n", fprint(2, "%T %s: wrong version in trailer: %d vs. %d\n",
name, head.version, arena.version); name, head.version, arena.version);
return; return;
} }
@ -140,11 +140,11 @@ verifyarena(char *name, vlong len)
* check for no checksum or the same * check for no checksum or the same
*/ */
if(scorecmp(score, arena.score) == 0) if(scorecmp(score, arena.score) == 0)
fprint(2, "%s: verified score\n", name); fprint(2, "%T %s: verified score\n", name);
else if(scorecmp(zeroscore, arena.score) == 0) else if(scorecmp(zeroscore, arena.score) == 0)
fprint(2, "%s: unsealed\n", name); fprint(2, "%T %s: unsealed\n", name);
else{ else{
fprint(2, "%s: mismatch checksum - found=%V calculated=%V\n", fprint(2, "%T %s: mismatch checksum - found=%V calculated=%V\n",
name, arena.score, score); name, arena.score, score);
return; return;
} }
@ -207,7 +207,7 @@ threadmain(int argc, char *argv[])
sysfatal("read arena part header: %r"); sysfatal("read arena part header: %r");
if(unpackarenapart(&ap, data) < 0) if(unpackarenapart(&ap, data) < 0)
sysfatal("corrupted arena part header: %r"); sysfatal("corrupted arena part header: %r");
fprint(2, "# arena part version=%d blocksize=%d arenabase=%d\n", fprint(2, "%T # arena part version=%d blocksize=%d arenabase=%d\n",
ap.version, ap.blocksize, ap.arenabase); ap.version, ap.blocksize, ap.arenabase);
ap.tabbase = (PartBlank+HeadSize+ap.blocksize-1)&~(ap.blocksize-1); ap.tabbase = (PartBlank+HeadSize+ap.blocksize-1)&~(ap.blocksize-1);
ap.tabsize = ap.arenabase - ap.tabbase; ap.tabsize = ap.arenabase - ap.tabbase;
@ -222,21 +222,21 @@ threadmain(int argc, char *argv[])
p++; p++;
for(i=0; i<nline; i++){ for(i=0; i<nline; i++){
if(p == nil){ if(p == nil){
fprint(2, "warning: unexpected arena table end\n"); fprint(2, "%T warning: unexpected arena table end\n");
break; break;
} }
q = strchr(p, '\n'); q = strchr(p, '\n');
if(q) if(q)
*q++ = 0; *q++ = 0;
if(strlen(p) >= sizeof line){ if(strlen(p) >= sizeof line){
fprint(2, "warning: long arena table line: %s\n", p); fprint(2, "%T warning: long arena table line: %s\n", p);
p = q; p = q;
continue; continue;
} }
strcpy(line, p); strcpy(line, p);
memset(f, 0, sizeof f); memset(f, 0, sizeof f);
if(tokenize(line, f, nelem(f)) < 3){ if(tokenize(line, f, nelem(f)) < 3){
fprint(2, "warning: bad arena table line: %s\n", p); fprint(2, "%T warning: bad arena table line: %s\n", p);
p = q; p = q;
continue; continue;
} }
@ -245,17 +245,17 @@ threadmain(int argc, char *argv[])
start = strtoull(f[1], 0, 0); start = strtoull(f[1], 0, 0);
stop = strtoull(f[2], 0, 0); stop = strtoull(f[2], 0, 0);
if(stop <= start){ if(stop <= start){
fprint(2, "%s: bad start,stop %lld,%lld\n", f[0], stop, start); fprint(2, "%T %s: bad start,stop %lld,%lld\n", f[0], stop, start);
continue; continue;
} }
if(seek(fd, start, 0) < 0) if(seek(fd, start, 0) < 0)
fprint(2, "%s: seek to start: %r\n", f[0]); fprint(2, "%T %s: seek to start: %r\n", f[0]);
verifyarena(f[0], stop - start); verifyarena(f[0], stop - start);
} }
} }
for(i=1; i<argc; i++) for(i=1; i<argc; i++)
if(argv[i] != 0) if(argv[i] != 0)
fprint(2, "%s: did not find arena\n", argv[i]); fprint(2, "%T %s: did not find arena\n", argv[i]);
threadexitsall(nil); threadexitsall(nil);
} }