Unverified Commit 72733e3f authored by Steven Allen's avatar Steven Allen Committed by GitHub

Merge pull request #21 from ipfs/fix_rigorous_sizing_checks

Rigorous sizing checks
parents 207cdc1c a144aabc
0.1.6: QmYmZ81dU5nnmBFy5MmktXLZpt8QCWhRJd6M1uxVF6vke8
...@@ -9,11 +9,8 @@ go: ...@@ -9,11 +9,8 @@ go:
env: env:
global: global:
- GOTFLAGS="-race" - GOTFLAGS="-race"
matrix:
- BUILD_DEPTYPE=gx
- BUILD_DEPTYPE=gomod - BUILD_DEPTYPE=gomod
# disable travis install # disable travis install
install: install:
- true - true
...@@ -24,7 +21,6 @@ script: ...@@ -24,7 +21,6 @@ script:
cache: cache:
directories: directories:
- $GOPATH/src/gx
- $GOPATH/pkg/mod - $GOPATH/pkg/mod
- $HOME/.cache/go-build - $HOME/.cache/go-build
......
all: deps
gx:
go get github.com/whyrusleeping/gx
go get github.com/whyrusleeping/gx-go
deps: gx
gx --verbose install --global
gx-go rewrite
test: deps
gx test -v -race -coverprofile=coverage.txt -covermode=atomic .
rw:
gx-go rewrite
rwundo:
gx-go rewrite --undo
publish: rwundo
gx publish
.PHONY: all gx deps test rw rwundo publish
...@@ -31,8 +31,6 @@ The package provides a `SizeSplitter` which creates chunks of equal size and it ...@@ -31,8 +31,6 @@ The package provides a `SizeSplitter` which creates chunks of equal size and it
> go get github.com/ipfs/go-ipfs-chunker > go get github.com/ipfs/go-ipfs-chunker
``` ```
It uses [Gx](https://github.com/whyrusleeping/gx) to manage dependencies. You can use `make all` to build it with the `gx` dependencies.
## Usage ## Usage
``` ```
......
{
"author": "hsanjuan",
"bugs": {
"url": "https://github.com/ipfs/go-ipfs-chunker"
},
"gx": {
"dvcsimport": "github.com/ipfs/go-ipfs-chunker"
},
"gxDependencies": [
{
"hash": "QmbkT7eMTyXfpeyB3ZMxxcxg7XH8t6uXp49jqzz4HB7BGF",
"name": "go-log",
"version": "1.5.9"
},
{
"author": "whyrusleeping",
"hash": "QmZooytqEoUwQjv7KzH4d3xyJnyvD3AWJaCDMYt5pbCtua",
"name": "chunker",
"version": "0.0.1"
},
{
"author": "whyrusleeping",
"hash": "QmNohiVssaPw3KVLZik59DBVGTSm2dGvYT9eoXt5DQ36Yz",
"name": "go-ipfs-util",
"version": "1.2.9"
},
{
"author": "stebalien",
"hash": "QmYYLnAzR28nAQ4U5MFniLprnktu6eTFKibeNt96V21EZK",
"name": "go-block-format",
"version": "0.2.2"
},
{
"author": "Stebalien",
"hash": "QmQDvJoB6aJWN3sjr3xsgXqKCXf4jU5zdMXpDMsBkYVNqa",
"name": "go-buffer-pool",
"version": "0.1.3"
}
],
"gxVersion": "0.12.1",
"language": "go",
"license": "MIT",
"name": "go-ipfs-chunker",
"releaseCmd": "git commit -a -m \"gx publish $VERSION\"",
"version": "0.1.6"
}
...@@ -8,9 +8,20 @@ import ( ...@@ -8,9 +8,20 @@ import (
"strings" "strings"
) )
const (
// DefaultBlockSize is the chunk size that splitters produce (or aim to).
DefaultBlockSize int64 = 1024 * 256
// No leaf block should contain more than 1MiB of payload data ( wrapping overhead aside )
// This effectively mandates the maximum chunk size
// See discussion at https://github.com/ipfs/go-ipfs-chunker/pull/21#discussion_r369124879 for background
ChunkSizeLimit int = 1048576
)
var ( var (
ErrRabinMin = errors.New("rabin min must be greater than 16") ErrRabinMin = errors.New("rabin min must be greater than 16")
ErrSize = errors.New("chunker size muster greater than 0") ErrSize = errors.New("chunker size must be greater than 0")
ErrSizeMax = fmt.Errorf("chunker parameters may not exceed the maximum chunk size of %d", ChunkSizeLimit)
) )
// FromString returns a Splitter depending on the given string: // FromString returns a Splitter depending on the given string:
...@@ -28,6 +39,8 @@ func FromString(r io.Reader, chunker string) (Splitter, error) { ...@@ -28,6 +39,8 @@ func FromString(r io.Reader, chunker string) (Splitter, error) {
return nil, err return nil, err
} else if size <= 0 { } else if size <= 0 {
return nil, ErrSize return nil, ErrSize
} else if size > ChunkSizeLimit {
return nil, ErrSizeMax
} }
return NewSizeSplitter(r, int64(size)), nil return NewSizeSplitter(r, int64(size)), nil
...@@ -51,6 +64,8 @@ func parseRabinString(r io.Reader, chunker string) (Splitter, error) { ...@@ -51,6 +64,8 @@ func parseRabinString(r io.Reader, chunker string) (Splitter, error) {
size, err := strconv.Atoi(parts[1]) size, err := strconv.Atoi(parts[1])
if err != nil { if err != nil {
return nil, err return nil, err
} else if int(float32(size)*1.5) > ChunkSizeLimit { // FIXME - this will be addressed in a subsequent PR
return nil, ErrSizeMax
} }
return NewRabin(r, uint64(size)), nil return NewRabin(r, uint64(size)), nil
case 4: case 4:
...@@ -84,6 +99,14 @@ func parseRabinString(r io.Reader, chunker string) (Splitter, error) { ...@@ -84,6 +99,14 @@ func parseRabinString(r io.Reader, chunker string) (Splitter, error) {
return nil, err return nil, err
} }
if min >= avg {
return nil, errors.New("incorrect format: rabin-min must be smaller than rabin-avg")
} else if avg >= max {
return nil, errors.New("incorrect format: rabin-avg must be smaller than rabin-max")
} else if max > ChunkSizeLimit {
return nil, ErrSizeMax
}
return NewRabinMinMax(r, uint64(min), uint64(avg), uint64(max)), nil return NewRabinMinMax(r, uint64(min), uint64(avg), uint64(max)), nil
default: default:
return nil, errors.New("incorrect format (expected 'rabin' 'rabin-[avg]' or 'rabin-[min]-[avg]-[max]'") return nil, errors.New("incorrect format (expected 'rabin' 'rabin-[avg]' or 'rabin-[min]-[avg]-[max]'")
......
...@@ -2,35 +2,79 @@ package chunk ...@@ -2,35 +2,79 @@ package chunk
import ( import (
"bytes" "bytes"
"fmt"
"testing" "testing"
) )
const (
testTwoThirdsOfChunkLimit = 2 * (float32(ChunkSizeLimit) / float32(3))
)
func TestParseRabin(t *testing.T) { func TestParseRabin(t *testing.T) {
max := 1000 r := bytes.NewReader(randBuf(t, 1000))
r := bytes.NewReader(randBuf(t, max))
chk1 := "rabin-18-25-32" _, err := FromString(r, "rabin-18-25-32")
chk2 := "rabin-15-23-31"
_, err := parseRabinString(r, chk1)
if err != nil { if err != nil {
t.Errorf(err.Error()) t.Errorf(err.Error())
} }
_, err = parseRabinString(r, chk2)
if err == ErrRabinMin { _, err = FromString(r, "rabin-15-23-31")
t.Log("it should be ErrRabinMin here.") if err != ErrRabinMin {
t.Fatalf("Expected an 'ErrRabinMin' error, got: %#v", err)
}
_, err = FromString(r, "rabin-20-20-21")
if err == nil || err.Error() != "incorrect format: rabin-min must be smaller than rabin-avg" {
t.Fatalf("Expected an arg-out-of-order error, got: %#v", err)
}
_, err = FromString(r, "rabin-19-21-21")
if err == nil || err.Error() != "incorrect format: rabin-avg must be smaller than rabin-max" {
t.Fatalf("Expected an arg-out-of-order error, got: %#v", err)
}
_, err = FromString(r, fmt.Sprintf("rabin-19-21-%d", ChunkSizeLimit))
if err != nil {
t.Fatalf("Expected success, got: %#v", err)
}
_, err = FromString(r, fmt.Sprintf("rabin-19-21-%d", 1+ChunkSizeLimit))
if err != ErrSizeMax {
t.Fatalf("Expected 'ErrSizeMax', got: %#v", err)
}
_, err = FromString(r, fmt.Sprintf("rabin-%.0f", testTwoThirdsOfChunkLimit))
if err != nil {
t.Fatalf("Expected success, got: %#v", err)
}
_, err = FromString(r, fmt.Sprintf("rabin-%.0f", 1+testTwoThirdsOfChunkLimit))
if err != ErrSizeMax {
t.Fatalf("Expected 'ErrSizeMax', got: %#v", err)
} }
} }
func TestParseSize(t *testing.T) { func TestParseSize(t *testing.T) {
max := 1000 r := bytes.NewReader(randBuf(t, 1000))
r := bytes.NewReader(randBuf(t, max))
size1 := "size-0" _, err := FromString(r, "size-0")
size2 := "size-32" if err != ErrSize {
_, err := FromString(r, size1) t.Fatalf("Expected an 'ErrSize' error, got: %#v", err)
if err == ErrSize { }
t.Log("it should be ErrSize here.")
} _, err = FromString(r, "size-32")
_, err = FromString(r, size2) if err != nil {
if err == ErrSize { t.Fatalf("Expected success, got: %#v", err)
t.Fatal(err) }
_, err = FromString(r, fmt.Sprintf("size-%d", ChunkSizeLimit))
if err != nil {
t.Fatalf("Expected success, got: %#v", err)
}
_, err = FromString(r, fmt.Sprintf("size-%d", 1+ChunkSizeLimit))
if err != ErrSizeMax {
t.Fatalf("Expected 'ErrSizeMax', got: %#v", err)
} }
} }
...@@ -13,9 +13,6 @@ import ( ...@@ -13,9 +13,6 @@ import (
var log = logging.Logger("chunk") var log = logging.Logger("chunk")
// DefaultBlockSize is the chunk size that splitters produce (or aim to).
var DefaultBlockSize int64 = 1024 * 256
// A Splitter reads bytes from a Reader and creates "chunks" (byte slices) // A Splitter reads bytes from a Reader and creates "chunks" (byte slices)
// that can be used to build DAG nodes. // that can be used to build DAG nodes.
type Splitter interface { type Splitter interface {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment