Merge branch 'port-2021-05-02' into 'master'

Port 2021 05 02 See merge request dms3/public/go-dms3-chunker!1

Merge branch 'port-2021-05-02' into 'master'
Port 2021 05 02 See merge request dms3/public/go-dms3-chunker!1
e9e855d2 · tavit ohanian · 3215bee4 · 4cd40fd1 · e9e855d2 · e9e855d2
Commit e9e855d2 authored Jun 19, 2021 by tavit ohanian
22 changed files
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
+blank_issues_enabled: false
+contact_links:
+ - name: Getting Help on IPFS
+   url: https://ipfs.io/help
+   about: All information about how and where to get help on IPFS.
+ - name: IPFS Official Forum
+   url: https://discuss.ipfs.io
+   about: Please post general questions, support requests, and discussions here.
--- a/.github/ISSUE_TEMPLATE/open_an_issue.md
+++ b/.github/ISSUE_TEMPLATE/open_an_issue.md
+---
+name: Open an issue
+about: Only for actionable issues relevant to this repository.
+title: ''
+labels: need/triage
+assignees: ''
+
+---
+<!--
+Hello! To ensure this issue is correctly addressed as soon as possible by the IPFS team, please try to make sure:
+
+- This issue is relevant to this repository's topic or codebase.
+
+- A clear description is provided. It should includes as much relevant information as possible and clear scope for the issue to be actionable.
+
+FOR GENERAL DISCUSSION, HELP OR QUESTIONS, please see the options at https://ipfs.io/help or head directly to https://discuss.ipfs.io.
+
+(you can delete this section after reading)
+-->
--- a/.github/config.yml
+++ b/.github/config.yml
+# Configuration for welcome - https://github.com/behaviorbot/welcome
+
+# Configuration for new-issue-welcome - https://github.com/behaviorbot/new-issue-welcome
+# Comment to be posted to on first time issues
+newIssueWelcomeComment: >
+  Thank you for submitting your first issue to this repository! A maintainer
+  will be here shortly to triage and review.
+
+  In the meantime, please double-check that you have provided all the
+  necessary information to make this process easy! Any information that can
+  help save additional round trips is useful! We currently aim to give
+  initial feedback within **two business days**. If this does not happen, feel
+  free to leave a comment.
+
+  Please keep an eye on how this issue will be labeled, as labels give an
+  overview of priorities, assignments and additional actions requested by the
+  maintainers:
+
+    - "Priority" labels will show how urgent this is for the team.
+    - "Status" labels will show if this is ready to be worked on, blocked, or in progress.
+    - "Need" labels will indicate if additional input or analysis is required.
+
+  Finally, remember to use https://discuss.ipfs.io if you just need general
+  support.
+
+# Configuration for new-pr-welcome - https://github.com/behaviorbot/new-pr-welcome
+# Comment to be posted to on PRs from first time contributors in your repository
+newPRWelcomeComment: >
+  Thank you for submitting this PR!
+
+  A maintainer will be here shortly to review it.
+
+  We are super grateful, but we are also overloaded! Help us by making sure
+  that:
+
+    * The context for this PR is clear, with relevant discussion, decisions
+      and stakeholders linked/mentioned.
+
+    * Your contribution itself is clear (code comments, self-review for the
+      rest) and in its best form. Follow the [code contribution
+      guidelines](https://github.com/ipfs/community/blob/master/CONTRIBUTING.md#code-contribution-guidelines)
+      if they apply.
+
+  Getting other community members to do a review would be great help too on
+  complex PRs (you can ask in the chats/forums). If you are unsure about
+  something, just leave us a comment.
+
+  Next steps:
+
+    * A maintainer will triage and assign priority to this PR, commenting on
+      any missing things and potentially assigning a reviewer for high
+      priority items.
+
+    * The PR gets reviews, discussed and approvals as needed.
+
+    * The PR is merged by maintainers when it has been approved and comments addressed.
+
+  We currently aim to provide initial feedback/triaging within **two business
+  days**. Please keep an eye on any labelling actions, as these will indicate
+  priorities and status of your contribution.
+
+  We are very grateful for your contribution!
+
+
+# Configuration for first-pr-merge - https://github.com/behaviorbot/first-pr-merge
+# Comment to be posted to on pull requests merged by a first time user
+# Currently disabled
+#firstPRMergeComment: ""
--- a/.github/workflows/automerge.yml
+++ b/.github/workflows/automerge.yml
+# File managed by web3-bot. DO NOT EDIT.
+# See https://github.com/protocol/.github/ for details.
+
+# Automatically merge pull requests opened by web3-bot, as soon as (and only if) all tests pass.
+# This reduces the friction associated with updating with our workflows.
+
+on: [ pull_request ]
+
+jobs:
+  automerge:
+    if: github.event.pull_request.user.login == 'web3-bot'
+    runs-on: ubuntu-latest
+    steps:
+    - name: Wait on tests
+      uses: lewagon/wait-on-check-action@bafe56a6863672c681c3cf671f5e10b20abf2eaa # v0.2
+      with:
+        ref: ${{ github.event.pull_request.head.sha }}
+        repo-token: ${{ secrets.GITHUB_TOKEN }}
+        wait-interval: 10
+        running-workflow-name: 'automerge' # the name of this job
+    - name: Merge PR
+      uses: pascalgn/automerge-action@741c311a47881be9625932b0a0de1b0937aab1ae # v0.13.1
+      env:
+        GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
+        MERGE_LABELS: ""
+        MERGE_METHOD: "squash"
+        MERGE_DELETE_BRANCH: true
--- a/.github/workflows/go-check.yml
+++ b/.github/workflows/go-check.yml
+# File managed by web3-bot. DO NOT EDIT.
+# See https://github.com/protocol/.github/ for details.
+
+on: [push, pull_request]
+
+jobs:
+  unit:
+    runs-on: ubuntu-latest
+    name: Go checks
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-go@v2
+        with:
+          go-version: "1.16.x"
+      - name: Install staticcheck
+        run: go install honnef.co/go/tools/cmd/staticcheck@be534f007836a777104a15f2456cd1fffd3ddee8 # v2020.2.2
+      - name: Check that go.mod is tidy
+        run: |
+          go mod tidy
+          if [[ -n $(git ls-files --other --exclude-standard --directory -- go.sum) ]]; then
+            echo "go.sum was added by go mod tidy"
+            exit 1
+          fi
+          git diff --exit-code -- go.sum go.mod
+      - name: gofmt
+        if: ${{ success() || failure() }} # run this step even if the previous one failed
+        run: |
+          out=$(gofmt -s -l .)
+          if [[ -n "$out" ]]; then
+            echo $out | awk '{print "::error file=" $0 ",line=0,col=0::File is not gofmt-ed."}'
+            exit 1
+          fi
+      - name: go vet
+        if: ${{ success() || failure() }} # run this step even if the previous one failed
+        run: go vet ./...
+      - name: staticcheck
+        if: ${{ success() || failure() }} # run this step even if the previous one failed
+        run: |
+          set -o pipefail
+          staticcheck ./... | sed -e 's@\(.*\)\.go@./\1.go@g'
+
--- a/.github/workflows/go-test.yml
+++ b/.github/workflows/go-test.yml
+# File managed by web3-bot. DO NOT EDIT.
+# See https://github.com/protocol/.github/ for details.
+
+on: [push, pull_request]
+
+jobs:
+  unit:
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ "ubuntu", "windows", "macos" ]
+        go: [ "1.15.x", "1.16.x" ]
+    runs-on: ${{ matrix.os }}-latest
+    name: Unit tests (${{ matrix.os}}, Go ${{ matrix.go }})
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-go@v2
+        with:
+          go-version: ${{ matrix.go }}
+      - name: Go information
+        run: |
+          go version
+          go env
+      - name: Run tests
+        run: go test -v -coverprofile coverage.txt ./...
+      - name: Run tests (32 bit)
+        if: ${{ matrix.os != 'macos' }} # can't run 32 bit tests on OSX.
+        env:
+          GOARCH: 386
+        run: go test -v ./...
+      - name: Run tests with race detector
+        if: ${{ matrix.os == 'ubuntu' }} # speed things up. Windows and OSX VMs are slow
+        run: go test -v -race ./...
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@967e2b38a85a62bd61be5529ada27ebc109948c2 # v1.4.1
+        with:
+          file: coverage.txt
+          env_vars: OS=${{ matrix.os }}, GO=${{ matrix.go }}
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
+stages:
+  - build
+  - test
+
+variables:
+  BUILD_DIR: "/tmp/$CI_CONCURRENT_PROJECT_ID"
+
+before_script:
+  - mkdir -p $BUILD_DIR/src
+  - cd $BUILD_DIR/src
+  - if [ -d $CI_PROJECT_DIR ]
+  - then
+  -    echo "soft link $CI_PROJECT_DIR exists"
+  - else
+  -    echo "creating soft link $CI_PROJECT_DIR"
+  -    ln -s $CI_PROJECT_DIR
+  - fi
+  - cd $CI_PROJECT_DIR
+
+build:
+  stage: build
+  tags:
+    - testing
+  script:
+    - echo $CI_JOB_STAGE
+    - go build
+
+test:
+  stage: test
+  tags:
+    - testing
+  script:
+    - echo $CI_JOB_STAGE
+    - go test -cover
+  coverage: '/coverage: \d+.\d+% of statements/'
--- a/LICENSE
+++ b/LICENSE
+MIT License
+
+Copyright (c) 2018 IPFS
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/README.md
+++ b/README.md
 # go-dms3-chunker

+[![](https://img.shields.io/badge/made%20by-Protocol%20Labs-blue.svg?style=flat-square)](http://ipn.io)
+[![](https://img.shields.io/badge/project-IPFS-blue.svg?style=flat-square)](http://ipfs.io/)
+[![standard-readme compliant](https://img.shields.io/badge/standard--readme-OK-green.svg?style=flat-square)](https://github.com/RichardLitt/standard-readme)
+[![GoDoc](https://godoc.org/github.com/ipfs/go-ipfs-chunker?status.svg)](https://godoc.org/github.com/ipfs/go-ipfs-chunker)
+[![Build Status](https://travis-ci.org/ipfs/go-ipfs-chunker.svg?branch=master)](https://travis-ci.org/ipfs/go-ipfs-chunker)
+
+> go-ipfs-chunker implements data Splitters for go-ipfs.
+
+`go-ipfs-chunker` provides the `Splitter` interface. IPFS splitters read data from a reader an create "chunks". These chunks are used to build the ipfs DAGs (Merkle Tree) and are the base unit to obtain the sums that ipfs uses to address content.
+
+The package provides a `SizeSplitter` which creates chunks of equal size and it is used by default in most cases, and a `rabin` fingerprint chunker. This chunker will attempt to split data in a way that the resulting blocks are the same when the data has repetitive patterns, thus optimizing the resulting DAGs.
+
+## Lead Maintainer
+
+[Steven Allen](https://github.com/Stebalien)
+
+## Table of Contents
+
+- [Install](#install)
+- [Usage](#usage)
+- [Contribute](#contribute)
+- [License](#license)
+
+## Install
+
+`go-ipfs-chunker` works like a regular Go module:
+
+```
+> go get github.com/ipfs/go-ipfs-chunker
+```
+
+## Usage
+
+```
+import "github.com/ipfs/go-ipfs-chunker"
+```
+
+Check the [GoDoc documentation](https://godoc.org/github.com/ipfs/go-ipfs-chunker)
+
+## Contribute
+
+PRs accepted.
+
+Small note: If editing the README, please conform to the [standard-readme](https://github.com/RichardLitt/standard-readme) specification.
+
+## License
+
+MIT © Protocol Labs, Inc.
--- a/benchmark_test.go
+++ b/benchmark_test.go
+package chunk
+
+import (
+	"bytes"
+	"io"
+	"math/rand"
+	"testing"
+)
+
+type newSplitter func(io.Reader) Splitter
+
+type bencSpec struct {
+	size int
+	name string
+}
+
+var bSizes = []bencSpec{
+	{1 << 10, "1K"},
+	{1 << 20, "1M"},
+	{16 << 20, "16M"},
+	{100 << 20, "100M"},
+}
+
+func benchmarkChunker(b *testing.B, ns newSplitter) {
+	for _, s := range bSizes {
+		s := s
+		b.Run(s.name, func(b *testing.B) {
+			benchmarkChunkerSize(b, ns, s.size)
+		})
+	}
+}
+
+func benchmarkChunkerSize(b *testing.B, ns newSplitter, size int) {
+	rng := rand.New(rand.NewSource(1))
+	data := make([]byte, size)
+	rng.Read(data)
+
+	b.SetBytes(int64(size))
+	b.ReportAllocs()
+	b.ResetTimer()
+
+	var res uint64
+
+	for i := 0; i < b.N; i++ {
+		r := ns(bytes.NewReader(data))
+
+		for {
+			chunk, err := r.NextBytes()
+			if err != nil {
+				if err == io.EOF {
+					break
+				}
+				b.Fatal(err)
+			}
+			res = res + uint64(len(chunk))
+		}
+	}
+	Res = Res + res
+}
--- a/buzhash.go
+++ b/buzhash.go
+package chunk
+
+import (
+	"io"
+	"math/bits"
+
+	pool "github.com/libp2p/go-buffer-pool"
+)
+
+const (
+	buzMin  = 128 << 10
+	buzMax  = 512 << 10
+	buzMask = 1<<17 - 1
+)
+
+type Buzhash struct {
+	r   io.Reader
+	buf []byte
+	n   int
+
+	err error
+}
+
+func NewBuzhash(r io.Reader) *Buzhash {
+	return &Buzhash{
+		r:   r,
+		buf: pool.Get(buzMax),
+	}
+}
+
+func (b *Buzhash) Reader() io.Reader {
+	return b.r
+}
+
+func (b *Buzhash) NextBytes() ([]byte, error) {
+	if b.err != nil {
+		return nil, b.err
+	}
+
+	n, err := io.ReadFull(b.r, b.buf[b.n:])
+	if err != nil {
+		if err == io.ErrUnexpectedEOF || err == io.EOF {
+			buffered := b.n + n
+			if buffered < buzMin {
+				b.err = io.EOF
+				// Read nothing? Don't return an empty block.
+				if buffered == 0 {
+					pool.Put(b.buf)
+					b.buf = nil
+					return nil, b.err
+				}
+				res := make([]byte, buffered)
+				copy(res, b.buf)
+
+				pool.Put(b.buf)
+				b.buf = nil
+				return res, nil
+			}
+		} else {
+			b.err = err
+			pool.Put(b.buf)
+			b.buf = nil
+			return nil, err
+		}
+	}
+
+	i := buzMin - 32
+
+	var state uint32 = 0
+
+	if buzMin > len(b.buf) {
+		panic("this is impossible")
+	}
+
+	for ; i < buzMin; i++ {
+		state = bits.RotateLeft32(state, 1)
+		state = state ^ bytehash[b.buf[i]]
+	}
+
+	{
+		max := b.n + n - 32 - 1
+
+		buf := b.buf
+		bufshf := b.buf[32:]
+		i = buzMin - 32
+		_ = buf[max]
+		_ = bufshf[max]
+
+		for ; i <= max; i++ {
+			if state&buzMask == 0 {
+				break
+			}
+			state = bits.RotateLeft32(state, 1) ^
+				bytehash[buf[i]] ^
+				bytehash[bufshf[i]]
+		}
+		i += 32
+	}
+
+	res := make([]byte, i)
+	copy(res, b.buf)
+
+	b.n = copy(b.buf, b.buf[i:b.n+n])
+
+	return res, nil
+}
+
+var bytehash = [256]uint32{
+	0x6236e7d5, 0x10279b0b, 0x72818182, 0xdc526514, 0x2fd41e3d, 0x777ef8c8,
+	0x83ee5285, 0x2c8f3637, 0x2f049c1a, 0x57df9791, 0x9207151f, 0x9b544818,
+	0x74eef658, 0x2028ca60, 0x0271d91a, 0x27ae587e, 0xecf9fa5f, 0x236e71cd,
+	0xf43a8a2e, 0xbb13380, 0x9e57912c, 0x89a26cdb, 0x9fcf3d71, 0xa86da6f1,
+	0x9c49f376, 0x346aecc7, 0xf094a9ee, 0xea99e9cb, 0xb01713c6, 0x88acffb,
+	0x2960a0fb, 0x344a626c, 0x7ff22a46, 0x6d7a1aa5, 0x6a714916, 0x41d454ca,
+	0x8325b830, 0xb65f563, 0x447fecca, 0xf9d0ea5e, 0xc1d9d3d4, 0xcb5ec574,
+	0x55aae902, 0x86edc0e7, 0xd3a9e33, 0xe70dc1e1, 0xe3c5f639, 0x9b43140a,
+	0xc6490ac5, 0x5e4030fb, 0x8e976dd5, 0xa87468ea, 0xf830ef6f, 0xcc1ed5a5,
+	0x611f4e78, 0xddd11905, 0xf2613904, 0x566c67b9, 0x905a5ccc, 0x7b37b3a4,
+	0x4b53898a, 0x6b8fd29d, 0xaad81575, 0x511be414, 0x3cfac1e7, 0x8029a179,
+	0xd40efeda, 0x7380e02, 0xdc9beffd, 0x2d049082, 0x99bc7831, 0xff5002a8,
+	0x21ce7646, 0x1cd049b, 0xf43994f, 0xc3c6c5a5, 0xbbda5f50, 0xec15ec7,
+	0x9adb19b6, 0xc1e80b9, 0xb9b52968, 0xae162419, 0x2542b405, 0x91a42e9d,
+	0x6be0f668, 0x6ed7a6b9, 0xbc2777b4, 0xe162ce56, 0x4266aad5, 0x60fdb704,
+	0x66f832a5, 0x9595f6ca, 0xfee83ced, 0x55228d99, 0x12bf0e28, 0x66896459,
+	0x789afda, 0x282baa8, 0x2367a343, 0x591491b0, 0x2ff1a4b1, 0x410739b6,
+	0x9b7055a0, 0x2e0eb229, 0x24fc8252, 0x3327d3df, 0xb0782669, 0x1c62e069,
+	0x7f503101, 0xf50593ae, 0xd9eb275d, 0xe00eb678, 0x5917ccde, 0x97b9660a,
+	0xdd06202d, 0xed229e22, 0xa9c735bf, 0xd6316fe6, 0x6fc72e4c, 0x206dfa2,
+	0xd6b15c5a, 0x69d87b49, 0x9c97745, 0x13445d61, 0x35a975aa, 0x859aa9b9,
+	0x65380013, 0xd1fb6391, 0xc29255fd, 0x784a3b91, 0xb9e74c26, 0x63ce4d40,
+	0xc07cbe9e, 0xe6e4529e, 0xfb3632f, 0x9438d9c9, 0x682f94a8, 0xf8fd4611,
+	0x257ec1ed, 0x475ce3d6, 0x60ee2db1, 0x2afab002, 0x2b9e4878, 0x86b340de,
+	0x1482fdca, 0xfe41b3bf, 0xd4a412b0, 0xe09db98c, 0xc1af5d53, 0x7e55e25f,
+	0xd3346b38, 0xb7a12cbd, 0x9c6827ba, 0x71f78bee, 0x8c3a0f52, 0x150491b0,
+	0xf26de912, 0x233e3a4e, 0xd309ebba, 0xa0a9e0ff, 0xca2b5921, 0xeeb9893c,
+	0x33829e88, 0x9870cc2a, 0x23c4b9d0, 0xeba32ea3, 0xbdac4d22, 0x3bc8c44c,
+	0x1e8d0397, 0xf9327735, 0x783b009f, 0xeb83742, 0x2621dc71, 0xed017d03,
+	0x5c760aa1, 0x5a69814b, 0x96e3047f, 0xa93c9cde, 0x615c86f5, 0xb4322aa5,
+	0x4225534d, 0xd2e2de3, 0xccfccc4b, 0xbac2a57, 0xf0a06d04, 0xbc78d737,
+	0xf2d1f766, 0xf5a7953c, 0xbcdfda85, 0x5213b7d5, 0xbce8a328, 0xd38f5f18,
+	0xdb094244, 0xfe571253, 0x317fa7ee, 0x4a324f43, 0x3ffc39d9, 0x51b3fa8e,
+	0x7a4bee9f, 0x78bbc682, 0x9f5c0350, 0x2fe286c, 0x245ab686, 0xed6bf7d7,
+	0xac4988a, 0x3fe010fa, 0xc65fe369, 0xa45749cb, 0x2b84e537, 0xde9ff363,
+	0x20540f9a, 0xaa8c9b34, 0x5bc476b3, 0x1d574bd7, 0x929100ad, 0x4721de4d,
+	0x27df1b05, 0x58b18546, 0xb7e76764, 0xdf904e58, 0x97af57a1, 0xbd4dc433,
+	0xa6256dfd, 0xf63998f3, 0xf1e05833, 0xe20acf26, 0xf57fd9d6, 0x90300b4d,
+	0x89df4290, 0x68d01cbc, 0xcf893ee3, 0xcc42a046, 0x778e181b, 0x67265c76,
+	0xe981a4c4, 0x82991da1, 0x708f7294, 0xe6e2ae62, 0xfc441870, 0x95e1b0b6,
+	0x445f825, 0x5a93b47f, 0x5e9cf4be, 0x84da71e7, 0x9d9582b0, 0x9bf835ef,
+	0x591f61e2, 0x43325985, 0x5d2de32e, 0x8d8fbf0f, 0x95b30f38, 0x7ad5b6e,
+	0x4e934edf, 0x3cd4990e, 0x9053e259, 0x5c41857d}
--- a/buzhash_norace_test.go
+++ b/buzhash_norace_test.go
+//+build !race
+
+package chunk
+
+import (
+	"testing"
+)
+
+func TestFuzzBuzhashChunking(t *testing.T) {
+	buf := make([]byte, 1024*1024*16)
+	for i := 0; i < 100; i++ {
+		testBuzhashChunking(t, buf)
+	}
+}
--- a/buzhash_test.go
+++ b/buzhash_test.go
+package chunk
+
+import (
+	"bytes"
+	"io"
+	"testing"
+
+	util "gitlab.dms3.io/dms3/public/go-dms3-util"
+)
+
+func testBuzhashChunking(t *testing.T, buf []byte) (chunkCount int) {
+	n, err := util.NewTimeSeededRand().Read(buf)
+	if n < len(buf) {
+		t.Fatalf("expected %d bytes, got %d", len(buf), n)
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	r := NewBuzhash(bytes.NewReader(buf))
+
+	var chunks [][]byte
+
+	for {
+		chunk, err := r.NextBytes()
+		if err != nil {
+			if err == io.EOF {
+				break
+			}
+			t.Fatal(err)
+		}
+
+		chunks = append(chunks, chunk)
+	}
+	chunkCount += len(chunks)
+
+	for i, chunk := range chunks {
+		if len(chunk) == 0 {
+			t.Fatalf("chunk %d/%d is empty", i+1, len(chunks))
+		}
+	}
+
+	for i, chunk := range chunks[:len(chunks)-1] {
+		if len(chunk) < buzMin {
+			t.Fatalf("chunk %d/%d is less than the minimum size", i+1, len(chunks))
+		}
+	}
+
+	unchunked := bytes.Join(chunks, nil)
+	if !bytes.Equal(unchunked, buf) {
+		t.Fatal("data was chunked incorrectly")
+	}
+
+	return chunkCount
+}
+
+func TestBuzhashChunking(t *testing.T) {
+	buf := make([]byte, 1024*1024*16)
+	count := testBuzhashChunking(t, buf)
+	t.Logf("average block size: %d\n", len(buf)/count)
+}
+
+func TestBuzhashChunkReuse(t *testing.T) {
+	newBuzhash := func(r io.Reader) Splitter {
+		return NewBuzhash(r)
+	}
+	testReuse(t, newBuzhash)
+}
+
+func BenchmarkBuzhash2(b *testing.B) {
+	benchmarkChunker(b, func(r io.Reader) Splitter {
+		return NewBuzhash(r)
+	})
+}
+
+func TestBuzhashBitsHashBias(t *testing.T) {
+	counts := make([]byte, 32)
+	for _, h := range bytehash {
+		for i := 0; i < 32; i++ {
+			if h&1 == 1 {
+				counts[i]++
+			}
+			h = h >> 1
+		}
+	}
+	for i, c := range counts {
+		if c != 128 {
+			t.Errorf("Bit balance in position %d broken, %d ones", i, c)
+		}
+	}
+}
--- a/gen/main.go
+++ b/gen/main.go
+// This file generates bytehash LUT
+package main
+
+import (
+	"fmt"
+	"math/rand"
+)
+
+const nRounds = 200
+
+func main() {
+	rnd := rand.New(rand.NewSource(0))
+
+	lut := make([]uint32, 256)
+	for i := 0; i < 256/2; i++ {
+		lut[i] = 1<<32 - 1
+	}
+
+	for r := 0; r < nRounds; r++ {
+		for b := uint32(0); b < 32; b++ {
+			mask := uint32(1) << b
+			nmask := ^mask
+			for i, j := range rnd.Perm(256) {
+				li := lut[i]
+				lj := lut[j]
+				lut[i] = li&nmask | (lj & mask)
+				lut[j] = lj&nmask | (li & mask)
+			}
+		}
+	}
+
+	fmt.Printf("%#v", lut)
+}
--- a/go.mod
+++ b/go.mod
+module gitlab.dms3.io/dms3/public/go-dms3-chunker
+
+require (
+	gitlab.dms3.io/dms3/public/go-block-format v0.0.1
+	gitlab.dms3.io/dms3/public/go-dms3-util v0.0.1
+	gitlab.dms3.io/dms3/public/go-log v0.0.1
+	github.com/libp2p/go-buffer-pool v0.0.2
+	github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f
+)
+
+go 1.15
--- a/go.sum
+++ b/go.sum
+github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
+github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/gogo/protobuf v1.2.1 h1:/s5zKNz0uPFCZ5hddgPdo2TK2TVrUNMn0OOX8/aZMTE=
+github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4=
+github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
+github.com/gxed/hashland/keccakpg v0.0.1 h1:wrk3uMNaMxbXiHibbPO4S0ymqJMm41WiudyFSs7UnsU=
+github.com/gxed/hashland/keccakpg v0.0.1/go.mod h1:kRzw3HkwxFU1mpmPP8v1WyQzwdGfmKFJ6tItnhQ67kU=
+github.com/gxed/hashland/murmur3 v0.0.1 h1:SheiaIt0sda5K+8FLz952/1iWS9zrnKsEJaOJu4ZbSc=
+github.com/gxed/hashland/murmur3 v0.0.1/go.mod h1:KjXop02n4/ckmZSnY2+HKcLud/tcmvhST0bie/0lS48=
+github.com/ipfs/go-block-format v0.0.2 h1:qPDvcP19izTjU8rgo6p7gTXZlkMkF5bz5G3fqIsSCPE=
+github.com/ipfs/go-block-format v0.0.2/go.mod h1:AWR46JfpcObNfg3ok2JHDUfdiHRgWhJgCQF+KIgOPJY=
+github.com/ipfs/go-cid v0.0.1 h1:GBjWPktLnNyX0JiQCNFpUuUSoMw5KMyqrsejHYlILBE=
+github.com/ipfs/go-cid v0.0.1/go.mod h1:GHWU/WuQdMPmIosc4Yn1bcCT7dSeX4lBafM7iqUPQvM=
+github.com/ipfs/go-ipfs-util v0.0.1 h1:Wz9bL2wB2YBJqggkA4dD7oSmqB4cAnpNbGrlHJulv50=
+github.com/ipfs/go-ipfs-util v0.0.1/go.mod h1:spsl5z8KUnrve+73pOhSVZND1SIxPW5RyBCNzQxlJBc=
+github.com/ipfs/go-log v0.0.1 h1:9XTUN/rW64BCG1YhPK9Hoy3q8nr4gOmHHBpgFdfw6Lc=
+github.com/ipfs/go-log v0.0.1/go.mod h1:kL1d2/hzSpI0thNYjiKfjanbVNU+IIGA/WnNESY9leM=
+github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=
+github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
+github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/libp2p/go-buffer-pool v0.0.2 h1:QNK2iAFa8gjAe1SPz6mHSMuCcjs+X1wlHzeOSqcmlfs=
+github.com/libp2p/go-buffer-pool v0.0.2/go.mod h1:MvaB6xw5vOrDl8rYZGLFdKAuk/hRoRZd1Vi32+RXyFM=
+github.com/mattn/go-colorable v0.1.1 h1:G1f5SKeVxmagw/IyvzvtZE4Gybcc4Tr1tf7I8z0XgOg=
+github.com/mattn/go-colorable v0.1.1/go.mod h1:FuOcm+DKB9mbwrcAfNl7/TZVBZ6rcnceauSikq3lYCQ=
+github.com/mattn/go-isatty v0.0.5 h1:tHXDdz1cpzGaovsTB+TVB8q90WEokoVmfMqoVcrLUgw=
+github.com/mattn/go-isatty v0.0.5/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
+github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1 h1:lYpkrQH5ajf0OXOcUbGjvZxxijuBwbbmlSxLiuofa+g=
+github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1/go.mod h1:pD8RvIylQ358TN4wwqatJ8rNavkEINozVn9DtGI3dfQ=
+github.com/minio/sha256-simd v0.0.0-20190131020904-2d45a736cd16 h1:5W7KhL8HVF3XCFOweFD3BNESdnO8ewyYTFT2R+/b8FQ=
+github.com/minio/sha256-simd v0.0.0-20190131020904-2d45a736cd16/go.mod h1:2FMWW+8GMoPweT6+pI63m9YE3Lmw4J71hV56Chs1E/U=
+github.com/minio/sha256-simd v0.1.1-0.20190913151208-6de447530771 h1:MHkK1uRtFbVqvAgvWxafZe54+5uBxLluGylDiKgdhwo=
+github.com/minio/sha256-simd v0.1.1-0.20190913151208-6de447530771/go.mod h1:B5e1o+1/KgNmWrSQK08Y6Z1Vb5pwIktudl0J58iy0KM=
+github.com/mr-tron/base58 v1.1.0 h1:Y51FGVJ91WBqCEabAi5OPUz38eAx8DakuAm5svLcsfQ=
+github.com/mr-tron/base58 v1.1.0/go.mod h1:xcD2VGqlgYjBdcBLw+TuYLr8afG+Hj8g2eTVqeSzSU8=
+github.com/mr-tron/base58 v1.1.3 h1:v+sk57XuaCKGXpWtVBX8YJzO7hMGx4Aajh4TQbdEFdc=
+github.com/mr-tron/base58 v1.1.3/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
+github.com/multiformats/go-base32 v0.0.3 h1:tw5+NhuwaOjJCC5Pp82QuXbrmLzWg7uxlMFp8Nq/kkI=
+github.com/multiformats/go-base32 v0.0.3/go.mod h1:pLiuGC8y0QR3Ue4Zug5UzK9LjgbkL8NSQj0zQ5Nz/AA=
+github.com/multiformats/go-base36 v0.1.0 h1:JR6TyF7JjGd3m6FbLU2cOxhC0Li8z8dLNGQ89tUg4F4=
+github.com/multiformats/go-base36 v0.1.0/go.mod h1:kFGE83c6s80PklsHO9sRn2NCoffoRdUUOENyW/Vv6sM=
+github.com/multiformats/go-multibase v0.0.1 h1:PN9/v21eLywrFWdFNsFKaU04kLJzuYzmrJR+ubhT9qA=
+github.com/multiformats/go-multibase v0.0.1/go.mod h1:bja2MqRZ3ggyXtZSEDKpl0uO/gviWFaSteVbWT51qgs=
+github.com/multiformats/go-multibase v0.0.3 h1:l/B6bJDQjvQ5G52jw4QGSYeOTZoAwIO77RblWplfIqk=
+github.com/multiformats/go-multibase v0.0.3/go.mod h1:5+1R4eQrT3PkYZ24C3W2Ue2tPwIdYQD509ZjSb5y9Oc=
+github.com/multiformats/go-multihash v0.0.1 h1:HHwN1K12I+XllBCrqKnhX949Orn4oawPkegHMu2vDqQ=
+github.com/multiformats/go-multihash v0.0.1/go.mod h1:w/5tugSrLEbWqlcgJabL3oHFKTwfvkofsjW2Qa1ct4U=
+github.com/multiformats/go-multihash v0.0.13/go.mod h1:VdAWLKTwram9oKAatUcLxBNUjdtcVwxObEQBtRfuyjc=
+github.com/multiformats/go-multihash v0.0.14 h1:QoBceQYQQtNUuf6s7wHxnE2c8bhbMqhfGzNI032se/I=
+github.com/multiformats/go-multihash v0.0.14/go.mod h1:VdAWLKTwram9oKAatUcLxBNUjdtcVwxObEQBtRfuyjc=
+github.com/multiformats/go-varint v0.0.5/go.mod h1:3Ls8CIEsrijN6+B7PbrXRPxHRPuXSrVKRY101jdMZYE=
+github.com/multiformats/go-varint v0.0.6 h1:gk85QWKxh3TazbLxED/NlDVv8+q+ReFJk7Y2W/KhfNY=
+github.com/multiformats/go-varint v0.0.6/go.mod h1:3Ls8CIEsrijN6+B7PbrXRPxHRPuXSrVKRY101jdMZYE=
+github.com/opentracing/opentracing-go v1.0.2 h1:3jA2P6O1F9UOrWVpwrIo17pu01KWvNWg4X946/Y5Zwg=
+github.com/opentracing/opentracing-go v1.0.2/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
+github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
+github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
+github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
+github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f h1:jQa4QT2UP9WYv2nzyawpKMOCl+Z/jW7djv2/J50lj9E=
+github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f/go.mod h1:p9UJB6dDgdPgMJZs7UjUOdulKyRr9fqkS+6JKAInPy8=
+github.com/whyrusleeping/go-logging v0.0.0-20170515211332-0457bb6b88fc h1:9lDbC6Rz4bwmou+oE6Dt4Cb2BGMur5eR/GYptkKUVHo=
+github.com/whyrusleeping/go-logging v0.0.0-20170515211332-0457bb6b88fc/go.mod h1:bopw91TMyo8J3tvftk8xmU2kPmlrt4nScJQZU2hE5EM=
+gitlab.dms3.io/dms3/public/go-block-format v0.0.1 h1:PQ6+E7zY6kUIHET86uJTQHTTj4Z9ZNfP7w281ZdExgk=
+gitlab.dms3.io/dms3/public/go-block-format v0.0.1/go.mod h1:xlvtW/OF72rOzLa2RVWXX2Uw18qTAWTQEs/Xp7SCnuY=
+gitlab.dms3.io/dms3/public/go-cid v0.0.1 h1:qs4dtkDigcLGY/58dIZaFjKLt+orrTcmTBvtqaM3570=
+gitlab.dms3.io/dms3/public/go-cid v0.0.1/go.mod h1:GQw3gc4CSrFY+aX6M+OBQDlg0p5/eQJoRrayaZzkAOQ=
+gitlab.dms3.io/dms3/public/go-dms3-util v0.0.1 h1:Gd+kJl1Rc+ZEUb9CIS1ZctQnF9G1oruNFyxUC//QBUQ=
+gitlab.dms3.io/dms3/public/go-dms3-util v0.0.1/go.mod h1:ymlwtzTNMq8Ug+gVtPAMxXKCKTXwXJAzXS+SUihfKgo=
+gitlab.dms3.io/dms3/public/go-log v0.0.1 h1:jqz2g8pVdPW+Sy8CCo4rYfGEjktGhCBfgIb3oeY6yx8=
+gitlab.dms3.io/dms3/public/go-log v0.0.1/go.mod h1:OsyF7lVYe47r03v1ZCbrmz0byeGUWB0Y219jN1DJx3s=
+go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ=
+go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw=
+go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
+go.uber.org/multierr v1.5.0/go.mod h1:FeouvMocqHpRaaGuG9EjoKcStLC43Zu/fmqdUMPcKYU=
+go.uber.org/multierr v1.6.0 h1:y6IPFStTAIT5Ytl7/XYmHvzXQ7S3g/IeZW9hyZ5thw4=
+go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU=
+go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA=
+go.uber.org/zap v1.16.0 h1:uFRZXykJGK9lLY4HtgSw44DnIcAM+kRBP7x5m+NpAOM=
+go.uber.org/zap v1.16.0/go.mod h1:MA8QOfq0BHJwdXa996Y4dYkAqRKB8/1K1QMMZVaNZjQ=
+golang.org/x/crypto v0.0.0-20190211182817-74369b46fc67 h1:ng3VDlRp5/DHpSWl02R4rM9I+8M2rhmsuLwAMmkLQWE=
+golang.org/x/crypto v0.0.0-20190211182817-74369b46fc67/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8 h1:1wopBVtVdWnn03fZelqdXTqk7U7zPQCb+T4rbU9ZEoU=
+golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
+golang.org/x/net v0.0.0-20190227160552-c95aed5357e7 h1:C2F/nMkR/9sfUTpvR3QrjBuTdvMUC/cFajkphs1YLQo=
+golang.org/x/net v0.0.0-20190227160552-c95aed5357e7/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190219092855-153ac476189d/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223 h1:DH4skfRX4EBpamg7iV4ZlCpblAHI6s6TDM39bFZumv8=
+golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d h1:+R4KGOnez64A81RvjARKc4UT5/tI9ujCIVX+P5KiHuI=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
+golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
+golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
+gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
--- a/parse.go
+++ b/parse.go
+package chunk
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"strconv"
+	"strings"
+)
+
+const (
+	// DefaultBlockSize is the chunk size that splitters produce (or aim to).
+	DefaultBlockSize int64 = 1024 * 256
+
+	// No leaf block should contain more than 1MiB of payload data ( wrapping overhead aside )
+	// This effectively mandates the maximum chunk size
+	// See discussion at https://gitlab.dms3.io/dms3/public/go-dms3-chunker/pull/21#discussion_r369124879 for background
+	ChunkSizeLimit int = 1048576
+)
+
+var (
+	ErrRabinMin = errors.New("rabin min must be greater than 16")
+	ErrSize     = errors.New("chunker size must be greater than 0")
+	ErrSizeMax  = fmt.Errorf("chunker parameters may not exceed the maximum chunk size of %d", ChunkSizeLimit)
+)
+
+// FromString returns a Splitter depending on the given string:
+// it supports "default" (""), "size-{size}", "rabin", "rabin-{blocksize}",
+// "rabin-{min}-{avg}-{max}" and "buzhash".
+func FromString(r io.Reader, chunker string) (Splitter, error) {
+	switch {
+	case chunker == "" || chunker == "default":
+		return DefaultSplitter(r), nil
+
+	case strings.HasPrefix(chunker, "size-"):
+		sizeStr := strings.Split(chunker, "-")[1]
+		size, err := strconv.Atoi(sizeStr)
+		if err != nil {
+			return nil, err
+		} else if size <= 0 {
+			return nil, ErrSize
+		} else if size > ChunkSizeLimit {
+			return nil, ErrSizeMax
+		}
+		return NewSizeSplitter(r, int64(size)), nil
+
+	case strings.HasPrefix(chunker, "rabin"):
+		return parseRabinString(r, chunker)
+
+	case chunker == "buzhash":
+		return NewBuzhash(r), nil
+
+	default:
+		return nil, fmt.Errorf("unrecognized chunker option: %s", chunker)
+	}
+}
+
+func parseRabinString(r io.Reader, chunker string) (Splitter, error) {
+	parts := strings.Split(chunker, "-")
+	switch len(parts) {
+	case 1:
+		return NewRabin(r, uint64(DefaultBlockSize)), nil
+	case 2:
+		size, err := strconv.Atoi(parts[1])
+		if err != nil {
+			return nil, err
+		} else if int(float32(size)*1.5) > ChunkSizeLimit { // FIXME - this will be addressed in a subsequent PR
+			return nil, ErrSizeMax
+		}
+		return NewRabin(r, uint64(size)), nil
+	case 4:
+		sub := strings.Split(parts[1], ":")
+		if len(sub) > 1 && sub[0] != "min" {
+			return nil, errors.New("first label must be min")
+		}
+		min, err := strconv.Atoi(sub[len(sub)-1])
+		if err != nil {
+			return nil, err
+		}
+		if min < 16 {
+			return nil, ErrRabinMin
+		}
+		sub = strings.Split(parts[2], ":")
+		if len(sub) > 1 && sub[0] != "avg" {
+			log.Error("sub == ", sub)
+			return nil, errors.New("second label must be avg")
+		}
+		avg, err := strconv.Atoi(sub[len(sub)-1])
+		if err != nil {
+			return nil, err
+		}
+
+		sub = strings.Split(parts[3], ":")
+		if len(sub) > 1 && sub[0] != "max" {
+			return nil, errors.New("final label must be max")
+		}
+		max, err := strconv.Atoi(sub[len(sub)-1])
+		if err != nil {
+			return nil, err
+		}
+
+		if min >= avg {
+			return nil, errors.New("incorrect format: rabin-min must be smaller than rabin-avg")
+		} else if avg >= max {
+			return nil, errors.New("incorrect format: rabin-avg must be smaller than rabin-max")
+		} else if max > ChunkSizeLimit {
+			return nil, ErrSizeMax
+		}
+
+		return NewRabinMinMax(r, uint64(min), uint64(avg), uint64(max)), nil
+	default:
+		return nil, errors.New("incorrect format (expected 'rabin' 'rabin-[avg]' or 'rabin-[min]-[avg]-[max]'")
+	}
+}
--- a/parse_test.go
+++ b/parse_test.go
+package chunk
+
+import (
+	"bytes"
+	"fmt"
+	"testing"
+)
+
+const (
+	testTwoThirdsOfChunkLimit = 2 * (float32(ChunkSizeLimit) / float32(3))
+)
+
+func TestParseRabin(t *testing.T) {
+	r := bytes.NewReader(randBuf(t, 1000))
+
+	_, err := FromString(r, "rabin-18-25-32")
+	if err != nil {
+		t.Errorf(err.Error())
+	}
+
+	_, err = FromString(r, "rabin-15-23-31")
+	if err != ErrRabinMin {
+		t.Fatalf("Expected an 'ErrRabinMin' error, got: %#v", err)
+	}
+
+	_, err = FromString(r, "rabin-20-20-21")
+	if err == nil || err.Error() != "incorrect format: rabin-min must be smaller than rabin-avg" {
+		t.Fatalf("Expected an arg-out-of-order error, got: %#v", err)
+	}
+
+	_, err = FromString(r, "rabin-19-21-21")
+	if err == nil || err.Error() != "incorrect format: rabin-avg must be smaller than rabin-max" {
+		t.Fatalf("Expected an arg-out-of-order error, got: %#v", err)
+	}
+
+	_, err = FromString(r, fmt.Sprintf("rabin-19-21-%d", ChunkSizeLimit))
+	if err != nil {
+		t.Fatalf("Expected success, got: %#v", err)
+	}
+
+	_, err = FromString(r, fmt.Sprintf("rabin-19-21-%d", 1+ChunkSizeLimit))
+	if err != ErrSizeMax {
+		t.Fatalf("Expected 'ErrSizeMax', got: %#v", err)
+	}
+
+	_, err = FromString(r, fmt.Sprintf("rabin-%.0f", testTwoThirdsOfChunkLimit))
+	if err != nil {
+		t.Fatalf("Expected success, got: %#v", err)
+	}
+
+	_, err = FromString(r, fmt.Sprintf("rabin-%.0f", 1+testTwoThirdsOfChunkLimit))
+	if err != ErrSizeMax {
+		t.Fatalf("Expected 'ErrSizeMax', got: %#v", err)
+	}
+
+}
+
+func TestParseSize(t *testing.T) {
+	r := bytes.NewReader(randBuf(t, 1000))
+
+	_, err := FromString(r, "size-0")
+	if err != ErrSize {
+		t.Fatalf("Expected an 'ErrSize' error, got: %#v", err)
+	}
+
+	_, err = FromString(r, "size-32")
+	if err != nil {
+		t.Fatalf("Expected success, got: %#v", err)
+	}
+
+	_, err = FromString(r, fmt.Sprintf("size-%d", ChunkSizeLimit))
+	if err != nil {
+		t.Fatalf("Expected success, got: %#v", err)
+	}
+
+	_, err = FromString(r, fmt.Sprintf("size-%d", 1+ChunkSizeLimit))
+	if err != ErrSizeMax {
+		t.Fatalf("Expected 'ErrSizeMax', got: %#v", err)
+	}
+}
--- a/rabin.go
+++ b/rabin.go
+package chunk
+
+import (
+	"hash/fnv"
+	"io"
+
+	"github.com/whyrusleeping/chunker"
+)
+
+// Dms3RabinPoly is the irreducible polynomial of degree 53 used by for Rabin.
+var Dms3RabinPoly = chunker.Pol(17437180132763653)
+
+// Rabin implements the Splitter interface and splits content with Rabin
+// fingerprints.
+type Rabin struct {
+	r      *chunker.Chunker
+	reader io.Reader
+}
+
+// NewRabin creates a new Rabin splitter with the given
+// average block size.
+func NewRabin(r io.Reader, avgBlkSize uint64) *Rabin {
+	min := avgBlkSize / 3
+	max := avgBlkSize + (avgBlkSize / 2)
+
+	return NewRabinMinMax(r, min, avgBlkSize, max)
+}
+
+// NewRabinMinMax returns a new Rabin splitter which uses
+// the given min, average and max block sizes.
+func NewRabinMinMax(r io.Reader, min, avg, max uint64) *Rabin {
+	h := fnv.New32a()
+	ch := chunker.New(r, Dms3RabinPoly, h, avg, min, max)
+
+	return &Rabin{
+		r:      ch,
+		reader: r,
+	}
+}
+
+// NextBytes reads the next bytes from the reader and returns a slice.
+func (r *Rabin) NextBytes() ([]byte, error) {
+	ch, err := r.r.Next()
+	if err != nil {
+		return nil, err
+	}
+
+	return ch.Data, nil
+}
+
+// Reader returns the io.Reader associated to this Splitter.
+func (r *Rabin) Reader() io.Reader {
+	return r.reader
+}
--- a/rabin_test.go
+++ b/rabin_test.go
+package chunk
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"testing"
+
+	blocks "gitlab.dms3.io/dms3/public/go-block-format"
+	util "gitlab.dms3.io/dms3/public/go-dms3-util"
+)
+
+func TestRabinChunking(t *testing.T) {
+	data := make([]byte, 1024*1024*16)
+	n, err := util.NewTimeSeededRand().Read(data)
+	if n < len(data) {
+		t.Fatalf("expected %d bytes, got %d", len(data), n)
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	r := NewRabin(bytes.NewReader(data), 1024*256)
+
+	var chunks [][]byte
+
+	for {
+		chunk, err := r.NextBytes()
+		if err != nil {
+			if err == io.EOF {
+				break
+			}
+			t.Fatal(err)
+		}
+
+		chunks = append(chunks, chunk)
+	}
+
+	fmt.Printf("average block size: %d\n", len(data)/len(chunks))
+
+	unchunked := bytes.Join(chunks, nil)
+	if !bytes.Equal(unchunked, data) {
+		fmt.Printf("%d %d\n", len(unchunked), len(data))
+		t.Fatal("data was chunked incorrectly")
+	}
+}
+
+func chunkData(t *testing.T, newC newSplitter, data []byte) map[string]blocks.Block {
+	r := newC(bytes.NewReader(data))
+
+	blkmap := make(map[string]blocks.Block)
+
+	for {
+		blk, err := r.NextBytes()
+		if err != nil {
+			if err == io.EOF {
+				break
+			}
+			t.Fatal(err)
+		}
+
+		b := blocks.NewBlock(blk)
+		blkmap[b.Cid().KeyString()] = b
+	}
+
+	return blkmap
+}
+
+func testReuse(t *testing.T, cr newSplitter) {
+	data := make([]byte, 1024*1024*16)
+	n, err := util.NewTimeSeededRand().Read(data)
+	if n < len(data) {
+		t.Fatalf("expected %d bytes, got %d", len(data), n)
+	}
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	ch1 := chunkData(t, cr, data[1000:])
+	ch2 := chunkData(t, cr, data)
+
+	var extra int
+	for k := range ch2 {
+		_, ok := ch1[k]
+		if !ok {
+			extra++
+		}
+	}
+
+	if extra > 2 {
+		t.Logf("too many spare chunks made: %d", extra)
+	}
+}
+
+func TestRabinChunkReuse(t *testing.T) {
+	newRabin := func(r io.Reader) Splitter {
+		return NewRabin(r, 256*1024)
+	}
+	testReuse(t, newRabin)
+}
+
+var Res uint64
+
+func BenchmarkRabin(b *testing.B) {
+	benchmarkChunker(b, func(r io.Reader) Splitter {
+		return NewRabin(r, 256<<10)
+	})
+}
--- a/splitting.go
+++ b/splitting.go
+// Package chunk implements streaming block splitters.
+// Splitters read data from a reader and provide byte slices (chunks)
+// The size and contents of these slices depend on the splitting method
+// used.
+package chunk
+
+import (
+	"io"
+
+	pool "github.com/libp2p/go-buffer-pool"
+	logging "gitlab.dms3.io/dms3/public/go-log"
+)
+
+var log = logging.Logger("chunk")
+
+// A Splitter reads bytes from a Reader and creates "chunks" (byte slices)
+// that can be used to build DAG nodes.
+type Splitter interface {
+	Reader() io.Reader
+	NextBytes() ([]byte, error)
+}
+
+// SplitterGen is a splitter generator, given a reader.
+type SplitterGen func(r io.Reader) Splitter
+
+// DefaultSplitter returns a SizeSplitter with the DefaultBlockSize.
+func DefaultSplitter(r io.Reader) Splitter {
+	return NewSizeSplitter(r, DefaultBlockSize)
+}
+
+// SizeSplitterGen returns a SplitterGen function which will create
+// a splitter with the given size when called.
+func SizeSplitterGen(size int64) SplitterGen {
+	return func(r io.Reader) Splitter {
+		return NewSizeSplitter(r, size)
+	}
+}
+
+// Chan returns a channel that receives each of the chunks produced
+// by a splitter, along with another one for errors.
+func Chan(s Splitter) (<-chan []byte, <-chan error) {
+	out := make(chan []byte)
+	errs := make(chan error, 1)
+	go func() {
+		defer close(out)
+		defer close(errs)
+
+		// all-chunks loop (keep creating chunks)
+		for {
+			b, err := s.NextBytes()
+			if err != nil {
+				errs <- err
+				return
+			}
+
+			out <- b
+		}
+	}()
+	return out, errs
+}
+
+type sizeSplitterv2 struct {
+	r    io.Reader
+	size uint32
+	err  error
+}
+
+// NewSizeSplitter returns a new size-based Splitter with the given block size.
+func NewSizeSplitter(r io.Reader, size int64) Splitter {
+	return &sizeSplitterv2{
+		r:    r,
+		size: uint32(size),
+	}
+}
+
+// NextBytes produces a new chunk.
+func (ss *sizeSplitterv2) NextBytes() ([]byte, error) {
+	if ss.err != nil {
+		return nil, ss.err
+	}
+
+	full := pool.Get(int(ss.size))
+	n, err := io.ReadFull(ss.r, full)
+	switch err {
+	case io.ErrUnexpectedEOF:
+		ss.err = io.EOF
+		small := make([]byte, n)
+		copy(small, full)
+		pool.Put(full)
+		return small, nil
+	case nil:
+		return full, nil
+	default:
+		pool.Put(full)
+		return nil, err
+	}
+}
+
+// Reader returns the io.Reader associated to this Splitter.
+func (ss *sizeSplitterv2) Reader() io.Reader {
+	return ss.r
+}
--- a/splitting_test.go
+++ b/splitting_test.go
+package chunk
+
+import (
+	"bytes"
+	"io"
+	"testing"
+
+	u "gitlab.dms3.io/dms3/public/go-dms3-util"
+)
+
+func randBuf(t *testing.T, size int) []byte {
+	buf := make([]byte, size)
+	if _, err := u.NewTimeSeededRand().Read(buf); err != nil {
+		t.Fatal("failed to read enough randomness")
+	}
+	return buf
+}
+
+func copyBuf(buf []byte) []byte {
+	cpy := make([]byte, len(buf))
+	copy(cpy, buf)
+	return cpy
+}
+
+func TestSizeSplitterOverAllocate(t *testing.T) {
+	max := 1000
+	r := bytes.NewReader(randBuf(t, max))
+	chunksize := int64(1024 * 256)
+	splitter := NewSizeSplitter(r, chunksize)
+	chunk, err := splitter.NextBytes()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if cap(chunk) > len(chunk) {
+		t.Fatal("chunk capacity too large")
+	}
+}
+
+func TestSizeSplitterIsDeterministic(t *testing.T) {
+	if testing.Short() {
+		t.SkipNow()
+	}
+
+	test := func() {
+		bufR := randBuf(t, 10000000) // crank this up to satisfy yourself.
+		bufA := copyBuf(bufR)
+		bufB := copyBuf(bufR)
+
+		chunksA, _ := Chan(DefaultSplitter(bytes.NewReader(bufA)))
+		chunksB, _ := Chan(DefaultSplitter(bytes.NewReader(bufB)))
+
+		for n := 0; ; n++ {
+			a, moreA := <-chunksA
+			b, moreB := <-chunksB
+
+			if !moreA {
+				if moreB {
+					t.Fatal("A ended, B didnt.")
+				}
+				return
+			}
+
+			if !bytes.Equal(a, b) {
+				t.Fatalf("chunk %d not equal", n)
+			}
+		}
+	}
+
+	for run := 0; run < 1; run++ { // crank this up to satisfy yourself.
+		test()
+	}
+}
+
+func TestSizeSplitterFillsChunks(t *testing.T) {
+	if testing.Short() {
+		t.SkipNow()
+	}
+
+	max := 10000000
+	b := randBuf(t, max)
+	r := &clipReader{r: bytes.NewReader(b), size: 4000}
+	chunksize := int64(1024 * 256)
+	c, _ := Chan(NewSizeSplitter(r, chunksize))
+
+	sofar := 0
+	whole := make([]byte, max)
+	for chunk := range c {
+
+		bc := b[sofar : sofar+len(chunk)]
+		if !bytes.Equal(bc, chunk) {
+			t.Fatalf("chunk not correct: (sofar: %d) %d != %d, %v != %v", sofar, len(bc), len(chunk), bc[:100], chunk[:100])
+		}
+
+		copy(whole[sofar:], chunk)
+
+		sofar += len(chunk)
+		if sofar != max && len(chunk) < int(chunksize) {
+			t.Fatal("sizesplitter split at a smaller size")
+		}
+	}
+
+	if !bytes.Equal(b, whole) {
+		t.Fatal("splitter did not split right")
+	}
+}
+
+type clipReader struct {
+	size int
+	r    io.Reader
+}
+
+func (s *clipReader) Read(buf []byte) (int, error) {
+
+	// clip the incoming buffer to produce smaller chunks
+	if len(buf) > s.size {
+		buf = buf[:s.size]
+	}
+
+	return s.r.Read(buf)
+}
+
+func BenchmarkDefault(b *testing.B) {
+	benchmarkChunker(b, func(r io.Reader) Splitter {
+		return DefaultSplitter(r)
+	})
+}