Initial stab at Venti.
This commit is contained in:
parent
4fbfdd7acd
commit
7a4ee46d25
52 changed files with 9527 additions and 0 deletions
641
src/cmd/venti/arena.c
Normal file
641
src/cmd/venti/arena.c
Normal file
|
|
@ -0,0 +1,641 @@
|
|||
#include "stdinc.h"
|
||||
#include "dat.h"
|
||||
#include "fns.h"
|
||||
|
||||
typedef struct ASum ASum;
|
||||
|
||||
struct ASum
|
||||
{
|
||||
Arena *arena;
|
||||
ASum *next;
|
||||
};
|
||||
|
||||
static void sealarena(Arena *arena);
|
||||
static int okarena(Arena *arena);
|
||||
static int loadarena(Arena *arena);
|
||||
static CIBlock *getcib(Arena *arena, int clump, int writing, CIBlock *rock);
|
||||
static void putcib(Arena *arena, CIBlock *cib);
|
||||
static void sumproc(void *);
|
||||
|
||||
static QLock sumlock;
|
||||
static Rendez sumwait;
|
||||
static ASum *sumq;
|
||||
|
||||
int
|
||||
initarenasum(void)
|
||||
{
|
||||
sumwait.l = &sumlock;
|
||||
|
||||
if(vtproc(sumproc, nil) < 0){
|
||||
seterr(EOk, "can't start arena checksum slave: %r");
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* make an Arena, and initialize it based upon the disk header and trailer.
|
||||
*/
|
||||
Arena*
|
||||
initarena(Part *part, u64int base, u64int size, u32int blocksize)
|
||||
{
|
||||
Arena *arena;
|
||||
|
||||
arena = MKZ(Arena);
|
||||
arena->part = part;
|
||||
arena->blocksize = blocksize;
|
||||
arena->clumpmax = arena->blocksize / ClumpInfoSize;
|
||||
arena->base = base + blocksize;
|
||||
arena->size = size - 2 * blocksize;
|
||||
|
||||
if(loadarena(arena) < 0){
|
||||
seterr(ECorrupt, "arena header or trailer corrupted");
|
||||
freearena(arena);
|
||||
return nil;
|
||||
}
|
||||
if(okarena(arena) < 0){
|
||||
freearena(arena);
|
||||
return nil;
|
||||
}
|
||||
|
||||
if(arena->sealed && scorecmp(zeroscore, arena->score)==0)
|
||||
backsumarena(arena);
|
||||
|
||||
return arena;
|
||||
}
|
||||
|
||||
void
|
||||
freearena(Arena *arena)
|
||||
{
|
||||
if(arena == nil)
|
||||
return;
|
||||
if(arena->cib.data != nil){
|
||||
putdblock(arena->cib.data);
|
||||
arena->cib.data = nil;
|
||||
}
|
||||
free(arena);
|
||||
}
|
||||
|
||||
Arena*
|
||||
newarena(Part *part, char *name, u64int base, u64int size, u32int blocksize)
|
||||
{
|
||||
Arena *arena;
|
||||
|
||||
if(nameok(name) < 0){
|
||||
seterr(EOk, "illegal arena name", name);
|
||||
return nil;
|
||||
}
|
||||
arena = MKZ(Arena);
|
||||
arena->part = part;
|
||||
arena->version = ArenaVersion;
|
||||
arena->blocksize = blocksize;
|
||||
arena->clumpmax = arena->blocksize / ClumpInfoSize;
|
||||
arena->base = base + blocksize;
|
||||
arena->size = size - 2 * blocksize;
|
||||
|
||||
namecp(arena->name, name);
|
||||
|
||||
if(wbarena(arena)<0 || wbarenahead(arena)<0){
|
||||
freearena(arena);
|
||||
return nil;
|
||||
}
|
||||
|
||||
return arena;
|
||||
}
|
||||
|
||||
int
|
||||
readclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
|
||||
{
|
||||
CIBlock *cib, r;
|
||||
|
||||
cib = getcib(arena, clump, 0, &r);
|
||||
if(cib == nil)
|
||||
return -1;
|
||||
unpackclumpinfo(ci, &cib->data->data[cib->offset]);
|
||||
putcib(arena, cib);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
readclumpinfos(Arena *arena, int clump, ClumpInfo *cis, int n)
|
||||
{
|
||||
CIBlock *cib, r;
|
||||
int i;
|
||||
|
||||
for(i = 0; i < n; i++){
|
||||
cib = getcib(arena, clump + i, 0, &r);
|
||||
if(cib == nil)
|
||||
break;
|
||||
unpackclumpinfo(&cis[i], &cib->data->data[cib->offset]);
|
||||
putcib(arena, cib);
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
/*
|
||||
* write directory information for one clump
|
||||
* must be called the arena locked
|
||||
*/
|
||||
int
|
||||
writeclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
|
||||
{
|
||||
CIBlock *cib, r;
|
||||
|
||||
cib = getcib(arena, clump, 1, &r);
|
||||
if(cib == nil)
|
||||
return -1;
|
||||
packclumpinfo(ci, &cib->data->data[cib->offset]);
|
||||
putcib(arena, cib);
|
||||
return 0;
|
||||
}
|
||||
|
||||
u64int
|
||||
arenadirsize(Arena *arena, u32int clumps)
|
||||
{
|
||||
return ((clumps / arena->clumpmax) + 1) * arena->blocksize;
|
||||
}
|
||||
|
||||
/*
|
||||
* read a clump of data
|
||||
* n is a hint of the size of the data, not including the header
|
||||
* make sure it won't run off the end, then return the number of bytes actually read
|
||||
*/
|
||||
u32int
|
||||
readarena(Arena *arena, u64int aa, u8int *buf, long n)
|
||||
{
|
||||
DBlock *b;
|
||||
u64int a;
|
||||
u32int blocksize, off, m;
|
||||
long nn;
|
||||
|
||||
if(n == 0)
|
||||
return -1;
|
||||
|
||||
qlock(&arena->lock);
|
||||
a = arena->size - arenadirsize(arena, arena->clumps);
|
||||
qunlock(&arena->lock);
|
||||
if(aa >= a){
|
||||
seterr(EOk, "reading beyond arena clump storage: clumps=%d aa=%lld a=%lld -1 clumps=%lld\n", arena->clumps, aa, a, arena->size - arenadirsize(arena, arena->clumps - 1));
|
||||
return -1;
|
||||
}
|
||||
if(aa + n > a)
|
||||
n = a - aa;
|
||||
|
||||
blocksize = arena->blocksize;
|
||||
a = arena->base + aa;
|
||||
off = a & (blocksize - 1);
|
||||
a -= off;
|
||||
nn = 0;
|
||||
for(;;){
|
||||
b = getdblock(arena->part, a, 1);
|
||||
if(b == nil)
|
||||
return -1;
|
||||
m = blocksize - off;
|
||||
if(m > n - nn)
|
||||
m = n - nn;
|
||||
memmove(&buf[nn], &b->data[off], m);
|
||||
putdblock(b);
|
||||
nn += m;
|
||||
if(nn == n)
|
||||
break;
|
||||
off = 0;
|
||||
a += blocksize;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
/*
|
||||
* write some data to the clump section at a given offset
|
||||
* used to fix up corrupted arenas.
|
||||
*/
|
||||
u32int
|
||||
writearena(Arena *arena, u64int aa, u8int *clbuf, u32int n)
|
||||
{
|
||||
DBlock *b;
|
||||
u64int a;
|
||||
u32int blocksize, off, m;
|
||||
long nn;
|
||||
int ok;
|
||||
|
||||
if(n == 0)
|
||||
return -1;
|
||||
|
||||
qlock(&arena->lock);
|
||||
a = arena->size - arenadirsize(arena, arena->clumps);
|
||||
if(aa >= a || aa + n > a){
|
||||
qunlock(&arena->lock);
|
||||
seterr(EOk, "writing beyond arena clump storage");
|
||||
return -1;
|
||||
}
|
||||
|
||||
blocksize = arena->blocksize;
|
||||
a = arena->base + aa;
|
||||
off = a & (blocksize - 1);
|
||||
a -= off;
|
||||
nn = 0;
|
||||
for(;;){
|
||||
b = getdblock(arena->part, a, off != 0 || off + n < blocksize);
|
||||
if(b == nil){
|
||||
qunlock(&arena->lock);
|
||||
return -1;
|
||||
}
|
||||
m = blocksize - off;
|
||||
if(m > n - nn)
|
||||
m = n - nn;
|
||||
memmove(&b->data[off], &clbuf[nn], m);
|
||||
ok = writepart(arena->part, a, b->data, blocksize);
|
||||
putdblock(b);
|
||||
if(ok < 0){
|
||||
qunlock(&arena->lock);
|
||||
return -1;
|
||||
}
|
||||
nn += m;
|
||||
if(nn == n)
|
||||
break;
|
||||
off = 0;
|
||||
a += blocksize;
|
||||
}
|
||||
qunlock(&arena->lock);
|
||||
return n;
|
||||
}
|
||||
|
||||
/*
|
||||
* allocate space for the clump and write it,
|
||||
* updating the arena directory
|
||||
ZZZ question: should this distinguish between an arena
|
||||
filling up and real errors writing the clump?
|
||||
*/
|
||||
u64int
|
||||
writeaclump(Arena *arena, Clump *c, u8int *clbuf)
|
||||
{
|
||||
DBlock *b;
|
||||
u64int a, aa;
|
||||
u32int clump, n, nn, m, off, blocksize;
|
||||
int ok;
|
||||
|
||||
n = c->info.size + ClumpSize;
|
||||
qlock(&arena->lock);
|
||||
aa = arena->used;
|
||||
if(arena->sealed
|
||||
|| aa + n + U32Size + arenadirsize(arena, arena->clumps + 1) > arena->size){
|
||||
if(!arena->sealed)
|
||||
sealarena(arena);
|
||||
qunlock(&arena->lock);
|
||||
return TWID64;
|
||||
}
|
||||
if(packclump(c, &clbuf[0]) < 0){
|
||||
qunlock(&arena->lock);
|
||||
return TWID64;
|
||||
}
|
||||
|
||||
/*
|
||||
* write the data out one block at a time
|
||||
*/
|
||||
blocksize = arena->blocksize;
|
||||
a = arena->base + aa;
|
||||
off = a & (blocksize - 1);
|
||||
a -= off;
|
||||
nn = 0;
|
||||
for(;;){
|
||||
b = getdblock(arena->part, a, off != 0);
|
||||
if(b == nil){
|
||||
qunlock(&arena->lock);
|
||||
return TWID64;
|
||||
}
|
||||
m = blocksize - off;
|
||||
if(m > n - nn)
|
||||
m = n - nn;
|
||||
memmove(&b->data[off], &clbuf[nn], m);
|
||||
print("writing\n");
|
||||
ok = writepart(arena->part, a, b->data, blocksize);
|
||||
putdblock(b);
|
||||
if(ok < 0){
|
||||
qunlock(&arena->lock);
|
||||
return TWID64;
|
||||
}
|
||||
nn += m;
|
||||
if(nn == n)
|
||||
break;
|
||||
off = 0;
|
||||
a += blocksize;
|
||||
}
|
||||
|
||||
arena->used += c->info.size + ClumpSize;
|
||||
arena->uncsize += c->info.uncsize;
|
||||
if(c->info.size < c->info.uncsize)
|
||||
arena->cclumps++;
|
||||
|
||||
clump = arena->clumps++;
|
||||
if(arena->clumps == 0)
|
||||
sysfatal("clumps wrapped\n");
|
||||
arena->wtime = now();
|
||||
if(arena->ctime == 0)
|
||||
arena->ctime = arena->wtime;
|
||||
|
||||
writeclumpinfo(arena, clump, &c->info);
|
||||
//ZZZ make this an enum param
|
||||
if((clump & 0x1ff) == 0x1ff){
|
||||
flushciblocks(arena);
|
||||
wbarena(arena);
|
||||
}
|
||||
|
||||
qunlock(&arena->lock);
|
||||
return aa;
|
||||
}
|
||||
|
||||
/*
|
||||
* once sealed, an arena never has any data added to it.
|
||||
* it should only be changed to fix errors.
|
||||
* this also syncs the clump directory.
|
||||
*/
|
||||
static void
|
||||
sealarena(Arena *arena)
|
||||
{
|
||||
flushciblocks(arena);
|
||||
arena->sealed = 1;
|
||||
wbarena(arena);
|
||||
backsumarena(arena);
|
||||
}
|
||||
|
||||
void
|
||||
backsumarena(Arena *arena)
|
||||
{
|
||||
ASum *as;
|
||||
|
||||
as = MK(ASum);
|
||||
if(as == nil)
|
||||
return;
|
||||
qlock(&sumlock);
|
||||
as->arena = arena;
|
||||
as->next = sumq;
|
||||
sumq = as;
|
||||
rwakeup(&sumwait);
|
||||
qunlock(&sumlock);
|
||||
}
|
||||
|
||||
static void
|
||||
sumproc(void *unused)
|
||||
{
|
||||
ASum *as;
|
||||
Arena *arena;
|
||||
|
||||
USED(unused);
|
||||
|
||||
for(;;){
|
||||
qlock(&sumlock);
|
||||
while(sumq == nil)
|
||||
rsleep(&sumwait);
|
||||
as = sumq;
|
||||
sumq = as->next;
|
||||
qunlock(&sumlock);
|
||||
arena = as->arena;
|
||||
free(as);
|
||||
|
||||
sumarena(arena);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
sumarena(Arena *arena)
|
||||
{
|
||||
ZBlock *b;
|
||||
DigestState s;
|
||||
u64int a, e;
|
||||
u32int bs;
|
||||
u8int score[VtScoreSize];
|
||||
|
||||
bs = MaxIoSize;
|
||||
if(bs < arena->blocksize)
|
||||
bs = arena->blocksize;
|
||||
|
||||
/*
|
||||
* read & sum all blocks except the last one
|
||||
*/
|
||||
memset(&s, 0, sizeof s);
|
||||
b = alloczblock(bs, 0);
|
||||
e = arena->base + arena->size;
|
||||
for(a = arena->base - arena->blocksize; a + arena->blocksize <= e; a += bs){
|
||||
if(a + bs > e)
|
||||
bs = arena->blocksize;
|
||||
if(readpart(arena->part, a, b->data, bs) < 0)
|
||||
goto ReadErr;
|
||||
sha1(b->data, bs, nil, &s);
|
||||
}
|
||||
|
||||
/*
|
||||
* the last one is special, since it may already have the checksum included
|
||||
*/
|
||||
bs = arena->blocksize;
|
||||
if(readpart(arena->part, e, b->data, bs) < 0){
|
||||
ReadErr:
|
||||
logerr(EOk, "sumarena can't sum %s, read at %lld failed: %r", arena->name, a);
|
||||
freezblock(b);
|
||||
return;
|
||||
}
|
||||
|
||||
sha1(b->data, bs-VtScoreSize, nil, &s);
|
||||
sha1(zeroscore, VtScoreSize, nil, &s);
|
||||
sha1(nil, 0, score, &s);
|
||||
|
||||
/*
|
||||
* check for no checksum or the same
|
||||
*/
|
||||
if(scorecmp(score, &b->data[bs - VtScoreSize]) != 0){
|
||||
if(scorecmp(zeroscore, &b->data[bs - VtScoreSize]) != 0)
|
||||
logerr(EOk, "overwriting mismatched checksums for arena=%s, found=%V calculated=%V",
|
||||
arena->name, &b->data[bs - VtScoreSize], score);
|
||||
scorecp(&b->data[bs - VtScoreSize], score);
|
||||
if(writepart(arena->part, e, b->data, bs) < 0)
|
||||
logerr(EOk, "sumarena can't write sum for %s: %r", arena->name);
|
||||
}
|
||||
freezblock(b);
|
||||
|
||||
qlock(&arena->lock);
|
||||
scorecp(arena->score, score);
|
||||
qunlock(&arena->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* write the arena trailer block to the partition
|
||||
*/
|
||||
int
|
||||
wbarena(Arena *arena)
|
||||
{
|
||||
ZBlock *b;
|
||||
int bad;
|
||||
|
||||
b = alloczblock(arena->blocksize, 1);
|
||||
if(b == nil){
|
||||
logerr(EAdmin, "can't write arena trailer: %r");
|
||||
///ZZZ add error message?
|
||||
return -1;
|
||||
}
|
||||
bad = okarena(arena)<0 || packarena(arena, b->data)<0 ||
|
||||
writepart(arena->part, arena->base + arena->size, b->data, arena->blocksize)<0;
|
||||
freezblock(b);
|
||||
if(bad)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
wbarenahead(Arena *arena)
|
||||
{
|
||||
ZBlock *b;
|
||||
ArenaHead head;
|
||||
int bad;
|
||||
|
||||
namecp(head.name, arena->name);
|
||||
head.version = arena->version;
|
||||
head.size = arena->size + 2 * arena->blocksize;
|
||||
head.blocksize = arena->blocksize;
|
||||
b = alloczblock(arena->blocksize, 1);
|
||||
if(b == nil){
|
||||
logerr(EAdmin, "can't write arena header: %r");
|
||||
///ZZZ add error message?
|
||||
return -1;
|
||||
}
|
||||
bad = packarenahead(&head, b->data)<0 ||
|
||||
writepart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize)<0;
|
||||
freezblock(b);
|
||||
if(bad)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* read the arena header and trailer blocks from disk
|
||||
*/
|
||||
static int
|
||||
loadarena(Arena *arena)
|
||||
{
|
||||
ArenaHead head;
|
||||
ZBlock *b;
|
||||
|
||||
b = alloczblock(arena->blocksize, 0);
|
||||
if(b == nil)
|
||||
return -1;
|
||||
if(readpart(arena->part, arena->base + arena->size, b->data, arena->blocksize) < 0){
|
||||
freezblock(b);
|
||||
return -1;
|
||||
}
|
||||
if(unpackarena(arena, b->data) < 0){
|
||||
freezblock(b);
|
||||
return -1;
|
||||
}
|
||||
if(arena->version != ArenaVersion){
|
||||
seterr(EAdmin, "unknown arena version %d", arena->version);
|
||||
freezblock(b);
|
||||
return -1;
|
||||
}
|
||||
scorecp(arena->score, &b->data[arena->blocksize - VtScoreSize]);
|
||||
|
||||
if(readpart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize) < 0){
|
||||
logerr(EAdmin, "can't read arena header: %r");
|
||||
freezblock(b);
|
||||
return 0;
|
||||
}
|
||||
if(unpackarenahead(&head, b->data) < 0)
|
||||
logerr(ECorrupt, "corrupted arena header: %r");
|
||||
else if(namecmp(arena->name, head.name)!=0
|
||||
|| arena->version != head.version
|
||||
|| arena->blocksize != head.blocksize
|
||||
|| arena->size + 2 * arena->blocksize != head.size)
|
||||
logerr(ECorrupt, "arena header inconsistent with arena data");
|
||||
freezblock(b);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
okarena(Arena *arena)
|
||||
{
|
||||
u64int dsize;
|
||||
int ok;
|
||||
|
||||
ok = 0;
|
||||
dsize = arenadirsize(arena, arena->clumps);
|
||||
if(arena->used + dsize > arena->size){
|
||||
seterr(ECorrupt, "arena used > size");
|
||||
ok = -1;
|
||||
}
|
||||
|
||||
if(arena->cclumps > arena->clumps)
|
||||
logerr(ECorrupt, "arena has more compressed clumps than total clumps");
|
||||
|
||||
if(arena->uncsize + arena->clumps * ClumpSize + arena->blocksize < arena->used)
|
||||
logerr(ECorrupt, "arena uncompressed size inconsistent with used space %lld %d %lld", arena->uncsize, arena->clumps, arena->used);
|
||||
|
||||
if(arena->ctime > arena->wtime)
|
||||
logerr(ECorrupt, "arena creation time after last write time");
|
||||
|
||||
return ok;
|
||||
}
|
||||
|
||||
static CIBlock*
|
||||
getcib(Arena *arena, int clump, int writing, CIBlock *rock)
|
||||
{
|
||||
CIBlock *cib;
|
||||
u32int block, off;
|
||||
|
||||
if(clump >= arena->clumps){
|
||||
seterr(EOk, "clump directory access out of range");
|
||||
return nil;
|
||||
}
|
||||
block = clump / arena->clumpmax;
|
||||
off = (clump - block * arena->clumpmax) * ClumpInfoSize;
|
||||
|
||||
if(arena->cib.block == block
|
||||
&& arena->cib.data != nil){
|
||||
arena->cib.offset = off;
|
||||
return &arena->cib;
|
||||
}
|
||||
|
||||
if(writing){
|
||||
flushciblocks(arena);
|
||||
cib = &arena->cib;
|
||||
}else
|
||||
cib = rock;
|
||||
|
||||
qlock(&stats.lock);
|
||||
stats.cireads++;
|
||||
qunlock(&stats.lock);
|
||||
|
||||
cib->block = block;
|
||||
cib->offset = off;
|
||||
cib->data = getdblock(arena->part, arena->base + arena->size - (block + 1) * arena->blocksize, arena->blocksize);
|
||||
if(cib->data == nil)
|
||||
return nil;
|
||||
return cib;
|
||||
}
|
||||
|
||||
static void
|
||||
putcib(Arena *arena, CIBlock *cib)
|
||||
{
|
||||
if(cib != &arena->cib){
|
||||
putdblock(cib->data);
|
||||
cib->data = nil;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* must be called with arena locked
|
||||
*/
|
||||
int
|
||||
flushciblocks(Arena *arena)
|
||||
{
|
||||
int ok;
|
||||
|
||||
if(arena->cib.data == nil)
|
||||
return 0;
|
||||
qlock(&stats.lock);
|
||||
stats.ciwrites++;
|
||||
qunlock(&stats.lock);
|
||||
ok = writepart(arena->part, arena->base + arena->size - (arena->cib.block + 1) * arena->blocksize, arena->cib.data->data, arena->blocksize);
|
||||
|
||||
if(ok < 0)
|
||||
seterr(EAdmin, "failed writing arena directory block");
|
||||
putdblock(arena->cib.data);
|
||||
arena->cib.data = nil;
|
||||
return ok;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue