diff --git a/benchmark_test.go b/benchmark_test.go new file mode 100644 index 0000000000000000000000000000000000000000..5069b06536e15084cb7ae68d2d52792719ed4b30 --- /dev/null +++ b/benchmark_test.go @@ -0,0 +1,59 @@ +package chunk + +import ( + "bytes" + "io" + "math/rand" + "testing" +) + +type newSplitter func(io.Reader) Splitter + +type bencSpec struct { + size int + name string +} + +var bSizes = []bencSpec{ + {1 << 10, "1K"}, + {1 << 20, "1M"}, + {16 << 20, "16M"}, + {100 << 20, "100M"}, +} + +func benchmarkChunker(b *testing.B, ns newSplitter) { + for _, s := range bSizes { + s := s + b.Run(s.name, func(b *testing.B) { + benchmarkChunkerSize(b, ns, s.size) + }) + } +} + +func benchmarkChunkerSize(b *testing.B, ns newSplitter, size int) { + rng := rand.New(rand.NewSource(1)) + data := make([]byte, size) + rng.Read(data) + + b.SetBytes(int64(size)) + b.ReportAllocs() + b.ResetTimer() + + var res uint64 + + for i := 0; i < b.N; i++ { + r := ns(bytes.NewReader(data)) + + for { + chunk, err := r.NextBytes() + if err != nil { + if err == io.EOF { + break + } + b.Fatal(err) + } + res = res + uint64(len(chunk)) + } + } + Res = Res + res +} diff --git a/buzhash.go b/buzhash.go index edfc4c03df6f955fcfee5e76faf9d6699254a5ba..099b723b183c3cf2d6ddf4d3b197bebfb1b59cad 100644 --- a/buzhash.go +++ b/buzhash.go @@ -28,6 +28,10 @@ func NewBuzhash(r io.Reader) *Buzhash { } } +func (b *Buzhash) Reader() io.Reader { + return b.r +} + func (b *Buzhash) NextBytes() ([]byte, error) { if b.err != nil { return nil, b.err diff --git a/buzhash_test.go b/buzhash_test.go index 8e2b166326532a39a31b5ba66c9b5c15896ab794..6e59d6be8c98c337ffed7133ea1ae6aedae33473 100644 --- a/buzhash_test.go +++ b/buzhash_test.go @@ -41,37 +41,16 @@ func TestBuzhashChunking(t *testing.T) { } func TestBuzhashChunkReuse(t *testing.T) { - newBuzhash := func(r io.Reader) cher { + newBuzhash := func(r io.Reader) Splitter { return NewBuzhash(r) } testReuse(t, newBuzhash) } -func BenchmarkBuzhash(b *testing.B) { - data := make([]byte, 1<<10) - util.NewTimeSeededRand().Read(data) - - b.SetBytes(int64(len(data))) - b.ReportAllocs() - b.ResetTimer() - - var res uint64 - - for i := 0; i < b.N; i++ { - r := NewBuzhash(bytes.NewReader(data)) - - for { - chunk, err := r.NextBytes() - if err != nil { - if err == io.EOF { - break - } - b.Fatal(err) - } - res = res + uint64(len(chunk)) - } - } - Res = Res + res +func BenchmarkBuzhash2(b *testing.B) { + benchmarkChunker(b, func(r io.Reader) Splitter { + return NewBuzhash(r) + }) } func TestBuzhashBitsHash(t *testing.T) { diff --git a/rabin_test.go b/rabin_test.go index 9eb8c669390e1b22604f386c2383b67f6cc4cdab..857e97c2ca1ef528d75ed69970e1b73449feb2ba 100644 --- a/rabin_test.go +++ b/rabin_test.go @@ -39,13 +39,7 @@ func TestRabinChunking(t *testing.T) { } } -type cher interface { - NextBytes() ([]byte, error) -} - -type newChunker func(io.Reader) cher - -func chunkData(t *testing.T, newC newChunker, data []byte) map[string]blocks.Block { +func chunkData(t *testing.T, newC newSplitter, data []byte) map[string]blocks.Block { r := newC(bytes.NewReader(data)) blkmap := make(map[string]blocks.Block) @@ -66,7 +60,7 @@ func chunkData(t *testing.T, newC newChunker, data []byte) map[string]blocks.Blo return blkmap } -func testReuse(t *testing.T, cr newChunker) { +func testReuse(t *testing.T, cr newSplitter) { data := make([]byte, 1024*1024*16) util.NewTimeSeededRand().Read(data) @@ -87,7 +81,7 @@ func testReuse(t *testing.T, cr newChunker) { } func TestRabinChunkReuse(t *testing.T) { - newRabin := func(r io.Reader) cher { + newRabin := func(r io.Reader) Splitter { return NewRabin(r, 256*1024) } testReuse(t, newRabin) @@ -96,29 +90,7 @@ func TestRabinChunkReuse(t *testing.T) { var Res uint64 func BenchmarkRabin(b *testing.B) { - const size = 1 << 10 - data := make([]byte, size) - util.NewTimeSeededRand().Read(data) - - b.SetBytes(size) - b.ReportAllocs() - b.ResetTimer() - - var res uint64 - - for i := 0; i < b.N; i++ { - r := NewRabin(bytes.NewReader(data), 1024*256) - - for { - chunk, err := r.NextBytes() - if err != nil { - if err == io.EOF { - break - } - b.Fatal(err) - } - res = res + uint64(len(chunk)) - } - } - Res = Res + res + benchmarkChunker(b, func(r io.Reader) Splitter { + return NewRabin(r, 256<<10) + }) } diff --git a/splitting_test.go b/splitting_test.go index f2de774421487b82c71a5b4e8ea38d0aaf446cf1..d6498fcbedcf7246531775ba2d5b324fb79d5770 100644 --- a/splitting_test.go +++ b/splitting_test.go @@ -6,7 +6,6 @@ import ( "testing" u "github.com/ipfs/go-ipfs-util" - util "github.com/ipfs/go-ipfs-util" ) func randBuf(t *testing.T, size int) []byte { @@ -121,29 +120,7 @@ func (s *clipReader) Read(buf []byte) (int, error) { } func BenchmarkDefault(b *testing.B) { - const size = 1 << 10 - data := make([]byte, size) - util.NewTimeSeededRand().Read(data) - - b.SetBytes(size) - b.ReportAllocs() - b.ResetTimer() - - var res uint64 - - for i := 0; i < b.N; i++ { - r := DefaultSplitter(bytes.NewReader(data)) - - for { - chunk, err := r.NextBytes() - if err != nil { - if err == io.EOF { - break - } - b.Fatal(err) - } - res = res + uint64(len(chunk)) - } - } - Res = Res + res + benchmarkChunker(b, func(r io.Reader) Splitter { + return DefaultSplitter(r) + }) }