Commit e9e855d2 authored by tavit ohanian's avatar tavit ohanian

Merge branch 'port-2021-05-02' into 'master'

Port 2021 05 02

See merge request dms3/public/go-dms3-chunker!1
parents 3215bee4 4cd40fd1
Pipeline #305 passed with stages
in 5 minutes and 24 seconds
blank_issues_enabled: false
contact_links:
- name: Getting Help on IPFS
url: https://ipfs.io/help
about: All information about how and where to get help on IPFS.
- name: IPFS Official Forum
url: https://discuss.ipfs.io
about: Please post general questions, support requests, and discussions here.
---
name: Open an issue
about: Only for actionable issues relevant to this repository.
title: ''
labels: need/triage
assignees: ''
---
<!--
Hello! To ensure this issue is correctly addressed as soon as possible by the IPFS team, please try to make sure:
- This issue is relevant to this repository's topic or codebase.
- A clear description is provided. It should includes as much relevant information as possible and clear scope for the issue to be actionable.
FOR GENERAL DISCUSSION, HELP OR QUESTIONS, please see the options at https://ipfs.io/help or head directly to https://discuss.ipfs.io.
(you can delete this section after reading)
-->
# Configuration for welcome - https://github.com/behaviorbot/welcome
# Configuration for new-issue-welcome - https://github.com/behaviorbot/new-issue-welcome
# Comment to be posted to on first time issues
newIssueWelcomeComment: >
Thank you for submitting your first issue to this repository! A maintainer
will be here shortly to triage and review.
In the meantime, please double-check that you have provided all the
necessary information to make this process easy! Any information that can
help save additional round trips is useful! We currently aim to give
initial feedback within **two business days**. If this does not happen, feel
free to leave a comment.
Please keep an eye on how this issue will be labeled, as labels give an
overview of priorities, assignments and additional actions requested by the
maintainers:
- "Priority" labels will show how urgent this is for the team.
- "Status" labels will show if this is ready to be worked on, blocked, or in progress.
- "Need" labels will indicate if additional input or analysis is required.
Finally, remember to use https://discuss.ipfs.io if you just need general
support.
# Configuration for new-pr-welcome - https://github.com/behaviorbot/new-pr-welcome
# Comment to be posted to on PRs from first time contributors in your repository
newPRWelcomeComment: >
Thank you for submitting this PR!
A maintainer will be here shortly to review it.
We are super grateful, but we are also overloaded! Help us by making sure
that:
* The context for this PR is clear, with relevant discussion, decisions
and stakeholders linked/mentioned.
* Your contribution itself is clear (code comments, self-review for the
rest) and in its best form. Follow the [code contribution
guidelines](https://github.com/ipfs/community/blob/master/CONTRIBUTING.md#code-contribution-guidelines)
if they apply.
Getting other community members to do a review would be great help too on
complex PRs (you can ask in the chats/forums). If you are unsure about
something, just leave us a comment.
Next steps:
* A maintainer will triage and assign priority to this PR, commenting on
any missing things and potentially assigning a reviewer for high
priority items.
* The PR gets reviews, discussed and approvals as needed.
* The PR is merged by maintainers when it has been approved and comments addressed.
We currently aim to provide initial feedback/triaging within **two business
days**. Please keep an eye on any labelling actions, as these will indicate
priorities and status of your contribution.
We are very grateful for your contribution!
# Configuration for first-pr-merge - https://github.com/behaviorbot/first-pr-merge
# Comment to be posted to on pull requests merged by a first time user
# Currently disabled
#firstPRMergeComment: ""
# File managed by web3-bot. DO NOT EDIT.
# See https://github.com/protocol/.github/ for details.
# Automatically merge pull requests opened by web3-bot, as soon as (and only if) all tests pass.
# This reduces the friction associated with updating with our workflows.
on: [ pull_request ]
jobs:
automerge:
if: github.event.pull_request.user.login == 'web3-bot'
runs-on: ubuntu-latest
steps:
- name: Wait on tests
uses: lewagon/wait-on-check-action@bafe56a6863672c681c3cf671f5e10b20abf2eaa # v0.2
with:
ref: ${{ github.event.pull_request.head.sha }}
repo-token: ${{ secrets.GITHUB_TOKEN }}
wait-interval: 10
running-workflow-name: 'automerge' # the name of this job
- name: Merge PR
uses: pascalgn/automerge-action@741c311a47881be9625932b0a0de1b0937aab1ae # v0.13.1
env:
GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
MERGE_LABELS: ""
MERGE_METHOD: "squash"
MERGE_DELETE_BRANCH: true
# File managed by web3-bot. DO NOT EDIT.
# See https://github.com/protocol/.github/ for details.
on: [push, pull_request]
jobs:
unit:
runs-on: ubuntu-latest
name: Go checks
steps:
- uses: actions/checkout@v2
- uses: actions/setup-go@v2
with:
go-version: "1.16.x"
- name: Install staticcheck
run: go install honnef.co/go/tools/cmd/staticcheck@be534f007836a777104a15f2456cd1fffd3ddee8 # v2020.2.2
- name: Check that go.mod is tidy
run: |
go mod tidy
if [[ -n $(git ls-files --other --exclude-standard --directory -- go.sum) ]]; then
echo "go.sum was added by go mod tidy"
exit 1
fi
git diff --exit-code -- go.sum go.mod
- name: gofmt
if: ${{ success() || failure() }} # run this step even if the previous one failed
run: |
out=$(gofmt -s -l .)
if [[ -n "$out" ]]; then
echo $out | awk '{print "::error file=" $0 ",line=0,col=0::File is not gofmt-ed."}'
exit 1
fi
- name: go vet
if: ${{ success() || failure() }} # run this step even if the previous one failed
run: go vet ./...
- name: staticcheck
if: ${{ success() || failure() }} # run this step even if the previous one failed
run: |
set -o pipefail
staticcheck ./... | sed -e 's@\(.*\)\.go@./\1.go@g'
# File managed by web3-bot. DO NOT EDIT.
# See https://github.com/protocol/.github/ for details.
on: [push, pull_request]
jobs:
unit:
strategy:
fail-fast: false
matrix:
os: [ "ubuntu", "windows", "macos" ]
go: [ "1.15.x", "1.16.x" ]
runs-on: ${{ matrix.os }}-latest
name: Unit tests (${{ matrix.os}}, Go ${{ matrix.go }})
steps:
- uses: actions/checkout@v2
- uses: actions/setup-go@v2
with:
go-version: ${{ matrix.go }}
- name: Go information
run: |
go version
go env
- name: Run tests
run: go test -v -coverprofile coverage.txt ./...
- name: Run tests (32 bit)
if: ${{ matrix.os != 'macos' }} # can't run 32 bit tests on OSX.
env:
GOARCH: 386
run: go test -v ./...
- name: Run tests with race detector
if: ${{ matrix.os == 'ubuntu' }} # speed things up. Windows and OSX VMs are slow
run: go test -v -race ./...
- name: Upload coverage to Codecov
uses: codecov/codecov-action@967e2b38a85a62bd61be5529ada27ebc109948c2 # v1.4.1
with:
file: coverage.txt
env_vars: OS=${{ matrix.os }}, GO=${{ matrix.go }}
stages:
- build
- test
variables:
BUILD_DIR: "/tmp/$CI_CONCURRENT_PROJECT_ID"
before_script:
- mkdir -p $BUILD_DIR/src
- cd $BUILD_DIR/src
- if [ -d $CI_PROJECT_DIR ]
- then
- echo "soft link $CI_PROJECT_DIR exists"
- else
- echo "creating soft link $CI_PROJECT_DIR"
- ln -s $CI_PROJECT_DIR
- fi
- cd $CI_PROJECT_DIR
build:
stage: build
tags:
- testing
script:
- echo $CI_JOB_STAGE
- go build
test:
stage: test
tags:
- testing
script:
- echo $CI_JOB_STAGE
- go test -cover
coverage: '/coverage: \d+.\d+% of statements/'
MIT License
Copyright (c) 2018 IPFS
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
# go-dms3-chunker
[![](https://img.shields.io/badge/made%20by-Protocol%20Labs-blue.svg?style=flat-square)](http://ipn.io)
[![](https://img.shields.io/badge/project-IPFS-blue.svg?style=flat-square)](http://ipfs.io/)
[![standard-readme compliant](https://img.shields.io/badge/standard--readme-OK-green.svg?style=flat-square)](https://github.com/RichardLitt/standard-readme)
[![GoDoc](https://godoc.org/github.com/ipfs/go-ipfs-chunker?status.svg)](https://godoc.org/github.com/ipfs/go-ipfs-chunker)
[![Build Status](https://travis-ci.org/ipfs/go-ipfs-chunker.svg?branch=master)](https://travis-ci.org/ipfs/go-ipfs-chunker)
> go-ipfs-chunker implements data Splitters for go-ipfs.
`go-ipfs-chunker` provides the `Splitter` interface. IPFS splitters read data from a reader an create "chunks". These chunks are used to build the ipfs DAGs (Merkle Tree) and are the base unit to obtain the sums that ipfs uses to address content.
The package provides a `SizeSplitter` which creates chunks of equal size and it is used by default in most cases, and a `rabin` fingerprint chunker. This chunker will attempt to split data in a way that the resulting blocks are the same when the data has repetitive patterns, thus optimizing the resulting DAGs.
## Lead Maintainer
[Steven Allen](https://github.com/Stebalien)
## Table of Contents
- [Install](#install)
- [Usage](#usage)
- [Contribute](#contribute)
- [License](#license)
## Install
`go-ipfs-chunker` works like a regular Go module:
```
> go get github.com/ipfs/go-ipfs-chunker
```
## Usage
```
import "github.com/ipfs/go-ipfs-chunker"
```
Check the [GoDoc documentation](https://godoc.org/github.com/ipfs/go-ipfs-chunker)
## Contribute
PRs accepted.
Small note: If editing the README, please conform to the [standard-readme](https://github.com/RichardLitt/standard-readme) specification.
## License
MIT © Protocol Labs, Inc.
package chunk
import (
"bytes"
"io"
"math/rand"
"testing"
)
type newSplitter func(io.Reader) Splitter
type bencSpec struct {
size int
name string
}
var bSizes = []bencSpec{
{1 << 10, "1K"},
{1 << 20, "1M"},
{16 << 20, "16M"},
{100 << 20, "100M"},
}
func benchmarkChunker(b *testing.B, ns newSplitter) {
for _, s := range bSizes {
s := s
b.Run(s.name, func(b *testing.B) {
benchmarkChunkerSize(b, ns, s.size)
})
}
}
func benchmarkChunkerSize(b *testing.B, ns newSplitter, size int) {
rng := rand.New(rand.NewSource(1))
data := make([]byte, size)
rng.Read(data)
b.SetBytes(int64(size))
b.ReportAllocs()
b.ResetTimer()
var res uint64
for i := 0; i < b.N; i++ {
r := ns(bytes.NewReader(data))
for {
chunk, err := r.NextBytes()
if err != nil {
if err == io.EOF {
break
}
b.Fatal(err)
}
res = res + uint64(len(chunk))
}
}
Res = Res + res
}
package chunk
import (
"io"
"math/bits"
pool "github.com/libp2p/go-buffer-pool"
)
const (
buzMin = 128 << 10
buzMax = 512 << 10
buzMask = 1<<17 - 1
)
type Buzhash struct {
r io.Reader
buf []byte
n int
err error
}
func NewBuzhash(r io.Reader) *Buzhash {
return &Buzhash{
r: r,
buf: pool.Get(buzMax),
}
}
func (b *Buzhash) Reader() io.Reader {
return b.r
}
func (b *Buzhash) NextBytes() ([]byte, error) {
if b.err != nil {
return nil, b.err
}
n, err := io.ReadFull(b.r, b.buf[b.n:])
if err != nil {
if err == io.ErrUnexpectedEOF || err == io.EOF {
buffered := b.n + n
if buffered < buzMin {
b.err = io.EOF
// Read nothing? Don't return an empty block.
if buffered == 0 {
pool.Put(b.buf)
b.buf = nil
return nil, b.err
}
res := make([]byte, buffered)
copy(res, b.buf)
pool.Put(b.buf)
b.buf = nil
return res, nil
}
} else {
b.err = err
pool.Put(b.buf)
b.buf = nil
return nil, err
}
}
i := buzMin - 32
var state uint32 = 0
if buzMin > len(b.buf) {
panic("this is impossible")
}
for ; i < buzMin; i++ {
state = bits.RotateLeft32(state, 1)
state = state ^ bytehash[b.buf[i]]
}
{
max := b.n + n - 32 - 1
buf := b.buf
bufshf := b.buf[32:]
i = buzMin - 32
_ = buf[max]
_ = bufshf[max]
for ; i <= max; i++ {
if state&buzMask == 0 {
break
}
state = bits.RotateLeft32(state, 1) ^
bytehash[buf[i]] ^
bytehash[bufshf[i]]
}
i += 32
}
res := make([]byte, i)
copy(res, b.buf)
b.n = copy(b.buf, b.buf[i:b.n+n])
return res, nil
}
var bytehash = [256]uint32{
0x6236e7d5, 0x10279b0b, 0x72818182, 0xdc526514, 0x2fd41e3d, 0x777ef8c8,
0x83ee5285, 0x2c8f3637, 0x2f049c1a, 0x57df9791, 0x9207151f, 0x9b544818,
0x74eef658, 0x2028ca60, 0x0271d91a, 0x27ae587e, 0xecf9fa5f, 0x236e71cd,
0xf43a8a2e, 0xbb13380, 0x9e57912c, 0x89a26cdb, 0x9fcf3d71, 0xa86da6f1,
0x9c49f376, 0x346aecc7, 0xf094a9ee, 0xea99e9cb, 0xb01713c6, 0x88acffb,
0x2960a0fb, 0x344a626c, 0x7ff22a46, 0x6d7a1aa5, 0x6a714916, 0x41d454ca,
0x8325b830, 0xb65f563, 0x447fecca, 0xf9d0ea5e, 0xc1d9d3d4, 0xcb5ec574,
0x55aae902, 0x86edc0e7, 0xd3a9e33, 0xe70dc1e1, 0xe3c5f639, 0x9b43140a,
0xc6490ac5, 0x5e4030fb, 0x8e976dd5, 0xa87468ea, 0xf830ef6f, 0xcc1ed5a5,
0x611f4e78, 0xddd11905, 0xf2613904, 0x566c67b9, 0x905a5ccc, 0x7b37b3a4,
0x4b53898a, 0x6b8fd29d, 0xaad81575, 0x511be414, 0x3cfac1e7, 0x8029a179,
0xd40efeda, 0x7380e02, 0xdc9beffd, 0x2d049082, 0x99bc7831, 0xff5002a8,
0x21ce7646, 0x1cd049b, 0xf43994f, 0xc3c6c5a5, 0xbbda5f50, 0xec15ec7,
0x9adb19b6, 0xc1e80b9, 0xb9b52968, 0xae162419, 0x2542b405, 0x91a42e9d,
0x6be0f668, 0x6ed7a6b9, 0xbc2777b4, 0xe162ce56, 0x4266aad5, 0x60fdb704,
0x66f832a5, 0x9595f6ca, 0xfee83ced, 0x55228d99, 0x12bf0e28, 0x66896459,
0x789afda, 0x282baa8, 0x2367a343, 0x591491b0, 0x2ff1a4b1, 0x410739b6,
0x9b7055a0, 0x2e0eb229, 0x24fc8252, 0x3327d3df, 0xb0782669, 0x1c62e069,
0x7f503101, 0xf50593ae, 0xd9eb275d, 0xe00eb678, 0x5917ccde, 0x97b9660a,
0xdd06202d, 0xed229e22, 0xa9c735bf, 0xd6316fe6, 0x6fc72e4c, 0x206dfa2,
0xd6b15c5a, 0x69d87b49, 0x9c97745, 0x13445d61, 0x35a975aa, 0x859aa9b9,
0x65380013, 0xd1fb6391, 0xc29255fd, 0x784a3b91, 0xb9e74c26, 0x63ce4d40,
0xc07cbe9e, 0xe6e4529e, 0xfb3632f, 0x9438d9c9, 0x682f94a8, 0xf8fd4611,
0x257ec1ed, 0x475ce3d6, 0x60ee2db1, 0x2afab002, 0x2b9e4878, 0x86b340de,
0x1482fdca, 0xfe41b3bf, 0xd4a412b0, 0xe09db98c, 0xc1af5d53, 0x7e55e25f,
0xd3346b38, 0xb7a12cbd, 0x9c6827ba, 0x71f78bee, 0x8c3a0f52, 0x150491b0,
0xf26de912, 0x233e3a4e, 0xd309ebba, 0xa0a9e0ff, 0xca2b5921, 0xeeb9893c,
0x33829e88, 0x9870cc2a, 0x23c4b9d0, 0xeba32ea3, 0xbdac4d22, 0x3bc8c44c,
0x1e8d0397, 0xf9327735, 0x783b009f, 0xeb83742, 0x2621dc71, 0xed017d03,
0x5c760aa1, 0x5a69814b, 0x96e3047f, 0xa93c9cde, 0x615c86f5, 0xb4322aa5,
0x4225534d, 0xd2e2de3, 0xccfccc4b, 0xbac2a57, 0xf0a06d04, 0xbc78d737,
0xf2d1f766, 0xf5a7953c, 0xbcdfda85, 0x5213b7d5, 0xbce8a328, 0xd38f5f18,
0xdb094244, 0xfe571253, 0x317fa7ee, 0x4a324f43, 0x3ffc39d9, 0x51b3fa8e,
0x7a4bee9f, 0x78bbc682, 0x9f5c0350, 0x2fe286c, 0x245ab686, 0xed6bf7d7,
0xac4988a, 0x3fe010fa, 0xc65fe369, 0xa45749cb, 0x2b84e537, 0xde9ff363,
0x20540f9a, 0xaa8c9b34, 0x5bc476b3, 0x1d574bd7, 0x929100ad, 0x4721de4d,
0x27df1b05, 0x58b18546, 0xb7e76764, 0xdf904e58, 0x97af57a1, 0xbd4dc433,
0xa6256dfd, 0xf63998f3, 0xf1e05833, 0xe20acf26, 0xf57fd9d6, 0x90300b4d,
0x89df4290, 0x68d01cbc, 0xcf893ee3, 0xcc42a046, 0x778e181b, 0x67265c76,
0xe981a4c4, 0x82991da1, 0x708f7294, 0xe6e2ae62, 0xfc441870, 0x95e1b0b6,
0x445f825, 0x5a93b47f, 0x5e9cf4be, 0x84da71e7, 0x9d9582b0, 0x9bf835ef,
0x591f61e2, 0x43325985, 0x5d2de32e, 0x8d8fbf0f, 0x95b30f38, 0x7ad5b6e,
0x4e934edf, 0x3cd4990e, 0x9053e259, 0x5c41857d}
//+build !race
package chunk
import (
"testing"
)
func TestFuzzBuzhashChunking(t *testing.T) {
buf := make([]byte, 1024*1024*16)
for i := 0; i < 100; i++ {
testBuzhashChunking(t, buf)
}
}
package chunk
import (
"bytes"
"io"
"testing"
util "gitlab.dms3.io/dms3/public/go-dms3-util"
)
func testBuzhashChunking(t *testing.T, buf []byte) (chunkCount int) {
n, err := util.NewTimeSeededRand().Read(buf)
if n < len(buf) {
t.Fatalf("expected %d bytes, got %d", len(buf), n)
}
if err != nil {
t.Fatal(err)
}
r := NewBuzhash(bytes.NewReader(buf))
var chunks [][]byte
for {
chunk, err := r.NextBytes()
if err != nil {
if err == io.EOF {
break
}
t.Fatal(err)
}
chunks = append(chunks, chunk)
}
chunkCount += len(chunks)
for i, chunk := range chunks {
if len(chunk) == 0 {
t.Fatalf("chunk %d/%d is empty", i+1, len(chunks))
}
}
for i, chunk := range chunks[:len(chunks)-1] {
if len(chunk) < buzMin {
t.Fatalf("chunk %d/%d is less than the minimum size", i+1, len(chunks))
}
}
unchunked := bytes.Join(chunks, nil)
if !bytes.Equal(unchunked, buf) {
t.Fatal("data was chunked incorrectly")
}
return chunkCount
}
func TestBuzhashChunking(t *testing.T) {
buf := make([]byte, 1024*1024*16)
count := testBuzhashChunking(t, buf)
t.Logf("average block size: %d\n", len(buf)/count)
}
func TestBuzhashChunkReuse(t *testing.T) {
newBuzhash := func(r io.Reader) Splitter {
return NewBuzhash(r)
}
testReuse(t, newBuzhash)
}
func BenchmarkBuzhash2(b *testing.B) {
benchmarkChunker(b, func(r io.Reader) Splitter {
return NewBuzhash(r)
})
}
func TestBuzhashBitsHashBias(t *testing.T) {
counts := make([]byte, 32)
for _, h := range bytehash {
for i := 0; i < 32; i++ {
if h&1 == 1 {
counts[i]++
}
h = h >> 1
}
}
for i, c := range counts {
if c != 128 {
t.Errorf("Bit balance in position %d broken, %d ones", i, c)
}
}
}
// This file generates bytehash LUT
package main
import (
"fmt"
"math/rand"
)
const nRounds = 200
func main() {
rnd := rand.New(rand.NewSource(0))
lut := make([]uint32, 256)
for i := 0; i < 256/2; i++ {
lut[i] = 1<<32 - 1
}
for r := 0; r < nRounds; r++ {
for b := uint32(0); b < 32; b++ {
mask := uint32(1) << b
nmask := ^mask
for i, j := range rnd.Perm(256) {
li := lut[i]
lj := lut[j]
lut[i] = li&nmask | (lj & mask)
lut[j] = lj&nmask | (li & mask)
}
}
}
fmt.Printf("%#v", lut)
}
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/gogo/protobuf v1.2.1 h1:/s5zKNz0uPFCZ5hddgPdo2TK2TVrUNMn0OOX8/aZMTE=
github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/gxed/hashland/keccakpg v0.0.1 h1:wrk3uMNaMxbXiHibbPO4S0ymqJMm41WiudyFSs7UnsU=
github.com/gxed/hashland/keccakpg v0.0.1/go.mod h1:kRzw3HkwxFU1mpmPP8v1WyQzwdGfmKFJ6tItnhQ67kU=
github.com/gxed/hashland/murmur3 v0.0.1 h1:SheiaIt0sda5K+8FLz952/1iWS9zrnKsEJaOJu4ZbSc=
github.com/gxed/hashland/murmur3 v0.0.1/go.mod h1:KjXop02n4/ckmZSnY2+HKcLud/tcmvhST0bie/0lS48=
github.com/ipfs/go-block-format v0.0.2 h1:qPDvcP19izTjU8rgo6p7gTXZlkMkF5bz5G3fqIsSCPE=
github.com/ipfs/go-block-format v0.0.2/go.mod h1:AWR46JfpcObNfg3ok2JHDUfdiHRgWhJgCQF+KIgOPJY=
github.com/ipfs/go-cid v0.0.1 h1:GBjWPktLnNyX0JiQCNFpUuUSoMw5KMyqrsejHYlILBE=
github.com/ipfs/go-cid v0.0.1/go.mod h1:GHWU/WuQdMPmIosc4Yn1bcCT7dSeX4lBafM7iqUPQvM=
github.com/ipfs/go-ipfs-util v0.0.1 h1:Wz9bL2wB2YBJqggkA4dD7oSmqB4cAnpNbGrlHJulv50=
github.com/ipfs/go-ipfs-util v0.0.1/go.mod h1:spsl5z8KUnrve+73pOhSVZND1SIxPW5RyBCNzQxlJBc=
github.com/ipfs/go-log v0.0.1 h1:9XTUN/rW64BCG1YhPK9Hoy3q8nr4gOmHHBpgFdfw6Lc=
github.com/ipfs/go-log v0.0.1/go.mod h1:kL1d2/hzSpI0thNYjiKfjanbVNU+IIGA/WnNESY9leM=
github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/libp2p/go-buffer-pool v0.0.2 h1:QNK2iAFa8gjAe1SPz6mHSMuCcjs+X1wlHzeOSqcmlfs=
github.com/libp2p/go-buffer-pool v0.0.2/go.mod h1:MvaB6xw5vOrDl8rYZGLFdKAuk/hRoRZd1Vi32+RXyFM=
github.com/mattn/go-colorable v0.1.1 h1:G1f5SKeVxmagw/IyvzvtZE4Gybcc4Tr1tf7I8z0XgOg=
github.com/mattn/go-colorable v0.1.1/go.mod h1:FuOcm+DKB9mbwrcAfNl7/TZVBZ6rcnceauSikq3lYCQ=
github.com/mattn/go-isatty v0.0.5 h1:tHXDdz1cpzGaovsTB+TVB8q90WEokoVmfMqoVcrLUgw=
github.com/mattn/go-isatty v0.0.5/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1 h1:lYpkrQH5ajf0OXOcUbGjvZxxijuBwbbmlSxLiuofa+g=
github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1/go.mod h1:pD8RvIylQ358TN4wwqatJ8rNavkEINozVn9DtGI3dfQ=
github.com/minio/sha256-simd v0.0.0-20190131020904-2d45a736cd16 h1:5W7KhL8HVF3XCFOweFD3BNESdnO8ewyYTFT2R+/b8FQ=
github.com/minio/sha256-simd v0.0.0-20190131020904-2d45a736cd16/go.mod h1:2FMWW+8GMoPweT6+pI63m9YE3Lmw4J71hV56Chs1E/U=
github.com/minio/sha256-simd v0.1.1-0.20190913151208-6de447530771 h1:MHkK1uRtFbVqvAgvWxafZe54+5uBxLluGylDiKgdhwo=
github.com/minio/sha256-simd v0.1.1-0.20190913151208-6de447530771/go.mod h1:B5e1o+1/KgNmWrSQK08Y6Z1Vb5pwIktudl0J58iy0KM=
github.com/mr-tron/base58 v1.1.0 h1:Y51FGVJ91WBqCEabAi5OPUz38eAx8DakuAm5svLcsfQ=
github.com/mr-tron/base58 v1.1.0/go.mod h1:xcD2VGqlgYjBdcBLw+TuYLr8afG+Hj8g2eTVqeSzSU8=
github.com/mr-tron/base58 v1.1.3 h1:v+sk57XuaCKGXpWtVBX8YJzO7hMGx4Aajh4TQbdEFdc=
github.com/mr-tron/base58 v1.1.3/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
github.com/multiformats/go-base32 v0.0.3 h1:tw5+NhuwaOjJCC5Pp82QuXbrmLzWg7uxlMFp8Nq/kkI=
github.com/multiformats/go-base32 v0.0.3/go.mod h1:pLiuGC8y0QR3Ue4Zug5UzK9LjgbkL8NSQj0zQ5Nz/AA=
github.com/multiformats/go-base36 v0.1.0 h1:JR6TyF7JjGd3m6FbLU2cOxhC0Li8z8dLNGQ89tUg4F4=
github.com/multiformats/go-base36 v0.1.0/go.mod h1:kFGE83c6s80PklsHO9sRn2NCoffoRdUUOENyW/Vv6sM=
github.com/multiformats/go-multibase v0.0.1 h1:PN9/v21eLywrFWdFNsFKaU04kLJzuYzmrJR+ubhT9qA=
github.com/multiformats/go-multibase v0.0.1/go.mod h1:bja2MqRZ3ggyXtZSEDKpl0uO/gviWFaSteVbWT51qgs=
github.com/multiformats/go-multibase v0.0.3 h1:l/B6bJDQjvQ5G52jw4QGSYeOTZoAwIO77RblWplfIqk=
github.com/multiformats/go-multibase v0.0.3/go.mod h1:5+1R4eQrT3PkYZ24C3W2Ue2tPwIdYQD509ZjSb5y9Oc=
github.com/multiformats/go-multihash v0.0.1 h1:HHwN1K12I+XllBCrqKnhX949Orn4oawPkegHMu2vDqQ=
github.com/multiformats/go-multihash v0.0.1/go.mod h1:w/5tugSrLEbWqlcgJabL3oHFKTwfvkofsjW2Qa1ct4U=
github.com/multiformats/go-multihash v0.0.13/go.mod h1:VdAWLKTwram9oKAatUcLxBNUjdtcVwxObEQBtRfuyjc=
github.com/multiformats/go-multihash v0.0.14 h1:QoBceQYQQtNUuf6s7wHxnE2c8bhbMqhfGzNI032se/I=
github.com/multiformats/go-multihash v0.0.14/go.mod h1:VdAWLKTwram9oKAatUcLxBNUjdtcVwxObEQBtRfuyjc=
github.com/multiformats/go-varint v0.0.5/go.mod h1:3Ls8CIEsrijN6+B7PbrXRPxHRPuXSrVKRY101jdMZYE=
github.com/multiformats/go-varint v0.0.6 h1:gk85QWKxh3TazbLxED/NlDVv8+q+ReFJk7Y2W/KhfNY=
github.com/multiformats/go-varint v0.0.6/go.mod h1:3Ls8CIEsrijN6+B7PbrXRPxHRPuXSrVKRY101jdMZYE=
github.com/opentracing/opentracing-go v1.0.2 h1:3jA2P6O1F9UOrWVpwrIo17pu01KWvNWg4X946/Y5Zwg=
github.com/opentracing/opentracing-go v1.0.2/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f h1:jQa4QT2UP9WYv2nzyawpKMOCl+Z/jW7djv2/J50lj9E=
github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f/go.mod h1:p9UJB6dDgdPgMJZs7UjUOdulKyRr9fqkS+6JKAInPy8=
github.com/whyrusleeping/go-logging v0.0.0-20170515211332-0457bb6b88fc h1:9lDbC6Rz4bwmou+oE6Dt4Cb2BGMur5eR/GYptkKUVHo=
github.com/whyrusleeping/go-logging v0.0.0-20170515211332-0457bb6b88fc/go.mod h1:bopw91TMyo8J3tvftk8xmU2kPmlrt4nScJQZU2hE5EM=
gitlab.dms3.io/dms3/public/go-block-format v0.0.1 h1:PQ6+E7zY6kUIHET86uJTQHTTj4Z9ZNfP7w281ZdExgk=
gitlab.dms3.io/dms3/public/go-block-format v0.0.1/go.mod h1:xlvtW/OF72rOzLa2RVWXX2Uw18qTAWTQEs/Xp7SCnuY=
gitlab.dms3.io/dms3/public/go-cid v0.0.1 h1:qs4dtkDigcLGY/58dIZaFjKLt+orrTcmTBvtqaM3570=
gitlab.dms3.io/dms3/public/go-cid v0.0.1/go.mod h1:GQw3gc4CSrFY+aX6M+OBQDlg0p5/eQJoRrayaZzkAOQ=
gitlab.dms3.io/dms3/public/go-dms3-util v0.0.1 h1:Gd+kJl1Rc+ZEUb9CIS1ZctQnF9G1oruNFyxUC//QBUQ=
gitlab.dms3.io/dms3/public/go-dms3-util v0.0.1/go.mod h1:ymlwtzTNMq8Ug+gVtPAMxXKCKTXwXJAzXS+SUihfKgo=
gitlab.dms3.io/dms3/public/go-log v0.0.1 h1:jqz2g8pVdPW+Sy8CCo4rYfGEjktGhCBfgIb3oeY6yx8=
gitlab.dms3.io/dms3/public/go-log v0.0.1/go.mod h1:OsyF7lVYe47r03v1ZCbrmz0byeGUWB0Y219jN1DJx3s=
go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ=
go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw=
go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
go.uber.org/multierr v1.5.0/go.mod h1:FeouvMocqHpRaaGuG9EjoKcStLC43Zu/fmqdUMPcKYU=
go.uber.org/multierr v1.6.0 h1:y6IPFStTAIT5Ytl7/XYmHvzXQ7S3g/IeZW9hyZ5thw4=
go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU=
go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA=
go.uber.org/zap v1.16.0 h1:uFRZXykJGK9lLY4HtgSw44DnIcAM+kRBP7x5m+NpAOM=
go.uber.org/zap v1.16.0/go.mod h1:MA8QOfq0BHJwdXa996Y4dYkAqRKB8/1K1QMMZVaNZjQ=
golang.org/x/crypto v0.0.0-20190211182817-74369b46fc67 h1:ng3VDlRp5/DHpSWl02R4rM9I+8M2rhmsuLwAMmkLQWE=
golang.org/x/crypto v0.0.0-20190211182817-74369b46fc67/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8 h1:1wopBVtVdWnn03fZelqdXTqk7U7zPQCb+T4rbU9ZEoU=
golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
golang.org/x/net v0.0.0-20190227160552-c95aed5357e7 h1:C2F/nMkR/9sfUTpvR3QrjBuTdvMUC/cFajkphs1YLQo=
golang.org/x/net v0.0.0-20190227160552-c95aed5357e7/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190219092855-153ac476189d/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223 h1:DH4skfRX4EBpamg7iV4ZlCpblAHI6s6TDM39bFZumv8=
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d h1:+R4KGOnez64A81RvjARKc4UT5/tI9ujCIVX+P5KiHuI=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
package chunk
import (
"errors"
"fmt"
"io"
"strconv"
"strings"
)
const (
// DefaultBlockSize is the chunk size that splitters produce (or aim to).
DefaultBlockSize int64 = 1024 * 256
// No leaf block should contain more than 1MiB of payload data ( wrapping overhead aside )
// This effectively mandates the maximum chunk size
// See discussion at https://gitlab.dms3.io/dms3/public/go-dms3-chunker/pull/21#discussion_r369124879 for background
ChunkSizeLimit int = 1048576
)
var (
ErrRabinMin = errors.New("rabin min must be greater than 16")
ErrSize = errors.New("chunker size must be greater than 0")
ErrSizeMax = fmt.Errorf("chunker parameters may not exceed the maximum chunk size of %d", ChunkSizeLimit)
)
// FromString returns a Splitter depending on the given string:
// it supports "default" (""), "size-{size}", "rabin", "rabin-{blocksize}",
// "rabin-{min}-{avg}-{max}" and "buzhash".
func FromString(r io.Reader, chunker string) (Splitter, error) {
switch {
case chunker == "" || chunker == "default":
return DefaultSplitter(r), nil
case strings.HasPrefix(chunker, "size-"):
sizeStr := strings.Split(chunker, "-")[1]
size, err := strconv.Atoi(sizeStr)
if err != nil {
return nil, err
} else if size <= 0 {
return nil, ErrSize
} else if size > ChunkSizeLimit {
return nil, ErrSizeMax
}
return NewSizeSplitter(r, int64(size)), nil
case strings.HasPrefix(chunker, "rabin"):
return parseRabinString(r, chunker)
case chunker == "buzhash":
return NewBuzhash(r), nil
default:
return nil, fmt.Errorf("unrecognized chunker option: %s", chunker)
}
}
func parseRabinString(r io.Reader, chunker string) (Splitter, error) {
parts := strings.Split(chunker, "-")
switch len(parts) {
case 1:
return NewRabin(r, uint64(DefaultBlockSize)), nil
case 2:
size, err := strconv.Atoi(parts[1])
if err != nil {
return nil, err
} else if int(float32(size)*1.5) > ChunkSizeLimit { // FIXME - this will be addressed in a subsequent PR
return nil, ErrSizeMax
}
return NewRabin(r, uint64(size)), nil
case 4:
sub := strings.Split(parts[1], ":")
if len(sub) > 1 && sub[0] != "min" {
return nil, errors.New("first label must be min")
}
min, err := strconv.Atoi(sub[len(sub)-1])
if err != nil {
return nil, err
}
if min < 16 {
return nil, ErrRabinMin
}
sub = strings.Split(parts[2], ":")
if len(sub) > 1 && sub[0] != "avg" {
log.Error("sub == ", sub)
return nil, errors.New("second label must be avg")
}
avg, err := strconv.Atoi(sub[len(sub)-1])
if err != nil {
return nil, err
}
sub = strings.Split(parts[3], ":")
if len(sub) > 1 && sub[0] != "max" {
return nil, errors.New("final label must be max")
}
max, err := strconv.Atoi(sub[len(sub)-1])
if err != nil {
return nil, err
}
if min >= avg {
return nil, errors.New("incorrect format: rabin-min must be smaller than rabin-avg")
} else if avg >= max {
return nil, errors.New("incorrect format: rabin-avg must be smaller than rabin-max")
} else if max > ChunkSizeLimit {
return nil, ErrSizeMax
}
return NewRabinMinMax(r, uint64(min), uint64(avg), uint64(max)), nil
default:
return nil, errors.New("incorrect format (expected 'rabin' 'rabin-[avg]' or 'rabin-[min]-[avg]-[max]'")
}
}
package chunk
import (
"bytes"
"fmt"
"testing"
)
const (
testTwoThirdsOfChunkLimit = 2 * (float32(ChunkSizeLimit) / float32(3))
)
func TestParseRabin(t *testing.T) {
r := bytes.NewReader(randBuf(t, 1000))
_, err := FromString(r, "rabin-18-25-32")
if err != nil {
t.Errorf(err.Error())
}
_, err = FromString(r, "rabin-15-23-31")
if err != ErrRabinMin {
t.Fatalf("Expected an 'ErrRabinMin' error, got: %#v", err)
}
_, err = FromString(r, "rabin-20-20-21")
if err == nil || err.Error() != "incorrect format: rabin-min must be smaller than rabin-avg" {
t.Fatalf("Expected an arg-out-of-order error, got: %#v", err)
}
_, err = FromString(r, "rabin-19-21-21")
if err == nil || err.Error() != "incorrect format: rabin-avg must be smaller than rabin-max" {
t.Fatalf("Expected an arg-out-of-order error, got: %#v", err)
}
_, err = FromString(r, fmt.Sprintf("rabin-19-21-%d", ChunkSizeLimit))
if err != nil {
t.Fatalf("Expected success, got: %#v", err)
}
_, err = FromString(r, fmt.Sprintf("rabin-19-21-%d", 1+ChunkSizeLimit))
if err != ErrSizeMax {
t.Fatalf("Expected 'ErrSizeMax', got: %#v", err)
}
_, err = FromString(r, fmt.Sprintf("rabin-%.0f", testTwoThirdsOfChunkLimit))
if err != nil {
t.Fatalf("Expected success, got: %#v", err)
}
_, err = FromString(r, fmt.Sprintf("rabin-%.0f", 1+testTwoThirdsOfChunkLimit))
if err != ErrSizeMax {
t.Fatalf("Expected 'ErrSizeMax', got: %#v", err)
}
}
func TestParseSize(t *testing.T) {
r := bytes.NewReader(randBuf(t, 1000))
_, err := FromString(r, "size-0")
if err != ErrSize {
t.Fatalf("Expected an 'ErrSize' error, got: %#v", err)
}
_, err = FromString(r, "size-32")
if err != nil {
t.Fatalf("Expected success, got: %#v", err)
}
_, err = FromString(r, fmt.Sprintf("size-%d", ChunkSizeLimit))
if err != nil {
t.Fatalf("Expected success, got: %#v", err)
}
_, err = FromString(r, fmt.Sprintf("size-%d", 1+ChunkSizeLimit))
if err != ErrSizeMax {
t.Fatalf("Expected 'ErrSizeMax', got: %#v", err)
}
}
package chunk
import (
"hash/fnv"
"io"
"github.com/whyrusleeping/chunker"
)
// Dms3RabinPoly is the irreducible polynomial of degree 53 used by for Rabin.
var Dms3RabinPoly = chunker.Pol(17437180132763653)
// Rabin implements the Splitter interface and splits content with Rabin
// fingerprints.
type Rabin struct {
r *chunker.Chunker
reader io.Reader
}
// NewRabin creates a new Rabin splitter with the given
// average block size.
func NewRabin(r io.Reader, avgBlkSize uint64) *Rabin {
min := avgBlkSize / 3
max := avgBlkSize + (avgBlkSize / 2)
return NewRabinMinMax(r, min, avgBlkSize, max)
}
// NewRabinMinMax returns a new Rabin splitter which uses
// the given min, average and max block sizes.
func NewRabinMinMax(r io.Reader, min, avg, max uint64) *Rabin {
h := fnv.New32a()
ch := chunker.New(r, Dms3RabinPoly, h, avg, min, max)
return &Rabin{
r: ch,
reader: r,
}
}
// NextBytes reads the next bytes from the reader and returns a slice.
func (r *Rabin) NextBytes() ([]byte, error) {
ch, err := r.r.Next()
if err != nil {
return nil, err
}
return ch.Data, nil
}
// Reader returns the io.Reader associated to this Splitter.
func (r *Rabin) Reader() io.Reader {
return r.reader
}
package chunk
import (
"bytes"
"fmt"
"io"
"testing"
blocks "gitlab.dms3.io/dms3/public/go-block-format"
util "gitlab.dms3.io/dms3/public/go-dms3-util"
)
func TestRabinChunking(t *testing.T) {
data := make([]byte, 1024*1024*16)
n, err := util.NewTimeSeededRand().Read(data)
if n < len(data) {
t.Fatalf("expected %d bytes, got %d", len(data), n)
}
if err != nil {
t.Fatal(err)
}
r := NewRabin(bytes.NewReader(data), 1024*256)
var chunks [][]byte
for {
chunk, err := r.NextBytes()
if err != nil {
if err == io.EOF {
break
}
t.Fatal(err)
}
chunks = append(chunks, chunk)
}
fmt.Printf("average block size: %d\n", len(data)/len(chunks))
unchunked := bytes.Join(chunks, nil)
if !bytes.Equal(unchunked, data) {
fmt.Printf("%d %d\n", len(unchunked), len(data))
t.Fatal("data was chunked incorrectly")
}
}
func chunkData(t *testing.T, newC newSplitter, data []byte) map[string]blocks.Block {
r := newC(bytes.NewReader(data))
blkmap := make(map[string]blocks.Block)
for {
blk, err := r.NextBytes()
if err != nil {
if err == io.EOF {
break
}
t.Fatal(err)
}
b := blocks.NewBlock(blk)
blkmap[b.Cid().KeyString()] = b
}
return blkmap
}
func testReuse(t *testing.T, cr newSplitter) {
data := make([]byte, 1024*1024*16)
n, err := util.NewTimeSeededRand().Read(data)
if n < len(data) {
t.Fatalf("expected %d bytes, got %d", len(data), n)
}
if err != nil {
t.Fatal(err)
}
ch1 := chunkData(t, cr, data[1000:])
ch2 := chunkData(t, cr, data)
var extra int
for k := range ch2 {
_, ok := ch1[k]
if !ok {
extra++
}
}
if extra > 2 {
t.Logf("too many spare chunks made: %d", extra)
}
}
func TestRabinChunkReuse(t *testing.T) {
newRabin := func(r io.Reader) Splitter {
return NewRabin(r, 256*1024)
}
testReuse(t, newRabin)
}
var Res uint64
func BenchmarkRabin(b *testing.B) {
benchmarkChunker(b, func(r io.Reader) Splitter {
return NewRabin(r, 256<<10)
})
}
// Package chunk implements streaming block splitters.
// Splitters read data from a reader and provide byte slices (chunks)
// The size and contents of these slices depend on the splitting method
// used.
package chunk
import (
"io"
pool "github.com/libp2p/go-buffer-pool"
logging "gitlab.dms3.io/dms3/public/go-log"
)
var log = logging.Logger("chunk")
// A Splitter reads bytes from a Reader and creates "chunks" (byte slices)
// that can be used to build DAG nodes.
type Splitter interface {
Reader() io.Reader
NextBytes() ([]byte, error)
}
// SplitterGen is a splitter generator, given a reader.
type SplitterGen func(r io.Reader) Splitter
// DefaultSplitter returns a SizeSplitter with the DefaultBlockSize.
func DefaultSplitter(r io.Reader) Splitter {
return NewSizeSplitter(r, DefaultBlockSize)
}
// SizeSplitterGen returns a SplitterGen function which will create
// a splitter with the given size when called.
func SizeSplitterGen(size int64) SplitterGen {
return func(r io.Reader) Splitter {
return NewSizeSplitter(r, size)
}
}
// Chan returns a channel that receives each of the chunks produced
// by a splitter, along with another one for errors.
func Chan(s Splitter) (<-chan []byte, <-chan error) {
out := make(chan []byte)
errs := make(chan error, 1)
go func() {
defer close(out)
defer close(errs)
// all-chunks loop (keep creating chunks)
for {
b, err := s.NextBytes()
if err != nil {
errs <- err
return
}
out <- b
}
}()
return out, errs
}
type sizeSplitterv2 struct {
r io.Reader
size uint32
err error
}
// NewSizeSplitter returns a new size-based Splitter with the given block size.
func NewSizeSplitter(r io.Reader, size int64) Splitter {
return &sizeSplitterv2{
r: r,
size: uint32(size),
}
}
// NextBytes produces a new chunk.
func (ss *sizeSplitterv2) NextBytes() ([]byte, error) {
if ss.err != nil {
return nil, ss.err
}
full := pool.Get(int(ss.size))
n, err := io.ReadFull(ss.r, full)
switch err {
case io.ErrUnexpectedEOF:
ss.err = io.EOF
small := make([]byte, n)
copy(small, full)
pool.Put(full)
return small, nil
case nil:
return full, nil
default:
pool.Put(full)
return nil, err
}
}
// Reader returns the io.Reader associated to this Splitter.
func (ss *sizeSplitterv2) Reader() io.Reader {
return ss.r
}
package chunk
import (
"bytes"
"io"
"testing"
u "gitlab.dms3.io/dms3/public/go-dms3-util"
)
func randBuf(t *testing.T, size int) []byte {
buf := make([]byte, size)
if _, err := u.NewTimeSeededRand().Read(buf); err != nil {
t.Fatal("failed to read enough randomness")
}
return buf
}
func copyBuf(buf []byte) []byte {
cpy := make([]byte, len(buf))
copy(cpy, buf)
return cpy
}
func TestSizeSplitterOverAllocate(t *testing.T) {
max := 1000
r := bytes.NewReader(randBuf(t, max))
chunksize := int64(1024 * 256)
splitter := NewSizeSplitter(r, chunksize)
chunk, err := splitter.NextBytes()
if err != nil {
t.Fatal(err)
}
if cap(chunk) > len(chunk) {
t.Fatal("chunk capacity too large")
}
}
func TestSizeSplitterIsDeterministic(t *testing.T) {
if testing.Short() {
t.SkipNow()
}
test := func() {
bufR := randBuf(t, 10000000) // crank this up to satisfy yourself.
bufA := copyBuf(bufR)
bufB := copyBuf(bufR)
chunksA, _ := Chan(DefaultSplitter(bytes.NewReader(bufA)))
chunksB, _ := Chan(DefaultSplitter(bytes.NewReader(bufB)))
for n := 0; ; n++ {
a, moreA := <-chunksA
b, moreB := <-chunksB
if !moreA {
if moreB {
t.Fatal("A ended, B didnt.")
}
return
}
if !bytes.Equal(a, b) {
t.Fatalf("chunk %d not equal", n)
}
}
}
for run := 0; run < 1; run++ { // crank this up to satisfy yourself.
test()
}
}
func TestSizeSplitterFillsChunks(t *testing.T) {
if testing.Short() {
t.SkipNow()
}
max := 10000000
b := randBuf(t, max)
r := &clipReader{r: bytes.NewReader(b), size: 4000}
chunksize := int64(1024 * 256)
c, _ := Chan(NewSizeSplitter(r, chunksize))
sofar := 0
whole := make([]byte, max)
for chunk := range c {
bc := b[sofar : sofar+len(chunk)]
if !bytes.Equal(bc, chunk) {
t.Fatalf("chunk not correct: (sofar: %d) %d != %d, %v != %v", sofar, len(bc), len(chunk), bc[:100], chunk[:100])
}
copy(whole[sofar:], chunk)
sofar += len(chunk)
if sofar != max && len(chunk) < int(chunksize) {
t.Fatal("sizesplitter split at a smaller size")
}
}
if !bytes.Equal(b, whole) {
t.Fatal("splitter did not split right")
}
}
type clipReader struct {
size int
r io.Reader
}
func (s *clipReader) Read(buf []byte) (int, error) {
// clip the incoming buffer to produce smaller chunks
if len(buf) > s.size {
buf = buf[:s.size]
}
return s.r.Read(buf)
}
func BenchmarkDefault(b *testing.B) {
benchmarkChunker(b, func(r io.Reader) Splitter {
return DefaultSplitter(r)
})
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment