Commit fdcd015e authored by Jeromy's avatar Jeromy

move first data block into top level dag node

parent 78454884
......@@ -25,7 +25,8 @@ func NewDagFromReader(r io.Reader) (*dag.Node, error) {
func NewDagFromReaderWithSplitter(r io.Reader, spl BlockSplitter) (*dag.Node, error) {
blkChan := spl.Split(r)
root := &dag.Node{Data: dag.FilePBData()}
first := <-blkChan
root := &dag.Node{Data: dag.FilePBData(first)}
for blk := range blkChan {
child := &dag.Node{Data: dag.WrapData(blk)}
......
......@@ -82,3 +82,19 @@ func arrComp(a, b []byte) error {
func TestMaybeRabinConsistency(t *testing.T) {
testFileConsistency(t, NewMaybeRabin(4096), 256*4096)
}
func TestRabinBlockSize(t *testing.T) {
buf := new(bytes.Buffer)
nbytes := 1024 * 1024
io.CopyN(buf, rand.Reader, int64(nbytes))
rab := NewMaybeRabin(4096)
blkch := rab.Split(buf)
var blocks [][]byte
for b := range blkch {
blocks = append(blocks, b)
}
fmt.Printf("Avg block size: %d\n", nbytes/len(blocks))
}
......@@ -9,8 +9,10 @@ import (
)
type MaybeRabin struct {
mask int
windowSize int
mask int
windowSize int
MinBlockSize int
MaxBlockSize int
}
func NewMaybeRabin(avgBlkSize int) *MaybeRabin {
......@@ -18,6 +20,8 @@ func NewMaybeRabin(avgBlkSize int) *MaybeRabin {
rb := new(MaybeRabin)
rb.mask = (1 << blkbits) - 1
rb.windowSize = 16 // probably a good number...
rb.MinBlockSize = avgBlkSize / 2
rb.MaxBlockSize = (avgBlkSize / 2) * 3
return rb
}
......@@ -70,7 +74,8 @@ func (mr *MaybeRabin) Split(r io.Reader) chan []byte {
outval := push(i, b)
blkbuf.WriteByte(b)
rollingHash = (rollingHash*a + int(b) - an*outval) % MOD
if rollingHash&mr.mask == mr.mask {
if (rollingHash&mr.mask == mr.mask && blkbuf.Len() > mr.MinBlockSize) ||
blkbuf.Len() >= mr.MaxBlockSize {
out <- dup(blkbuf.Bytes())
blkbuf.Reset()
}
......
......@@ -34,6 +34,7 @@ func NewDagReader(n *Node, serv *DAGService) (io.Reader, error) {
node: n,
thisData: pb.GetData(),
serv: serv,
buf: bytes.NewBuffer(pb.GetData()),
}, nil
case PBData_Raw:
return bytes.NewBuffer(pb.GetData()), nil
......
......@@ -157,10 +157,11 @@ func (n *DAGService) Get(k u.Key) (*Node, error) {
return Decoded(b.Data)
}
func FilePBData() []byte {
func FilePBData(data []byte) []byte {
pbfile := new(PBData)
typ := PBData_File
pbfile.Type = &typ
pbfile.Data = data
data, err := proto.Marshal(pbfile)
if err != nil {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment