From 397f536c853dd5e6145e26c0720a736f18c2e4f2 Mon Sep 17 00:00:00 2001 From: Peter Rabbitson Date: Tue, 21 Jan 2020 16:57:58 +0100 Subject: [PATCH] Add various sanity checks for size specifications --- parse.go | 30 +++++++++++++++++++++++++++++- splitting.go | 3 --- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/parse.go b/parse.go index 5d472b7..59656bf 100644 --- a/parse.go +++ b/parse.go @@ -8,9 +8,25 @@ import ( "strings" ) +const ( + // DefaultBlockSize is the chunk size that splitters produce (or aim to). + DefaultBlockSize int64 = 1024 * 256 + + // 1 MB, on-wire block size for "datablocks ( unixfs, etc )" + // copy of https://github.com/ipfs/go-unixfs/blob/v0.2.3/importer/helpers/helpers.go#L8 + BlockSizeLimit int = 1048576 + + // in case we are using raw-leaves: this would match BlockSizeLimit, but we can't assume that + // be conservative and substract the PB wraping size of a full DAG-PB+UnixFS node describing 1M + // (2b(type2/file)+4b(data-field:3-byte-len-delimited)+4b(size-field:3-byte-varint))+(4b(DAG-type-1:3-byte-len-delimited)) + // FIXME - this calculation will need an update for CBOR + BlockPayloadLimit int = (BlockSizeLimit - (2 + 4 + 4 + 4)) +) + var ( ErrRabinMin = errors.New("rabin min must be greater than 16") - ErrSize = errors.New("chunker size muster greater than 0") + ErrSize = errors.New("chunker size must be greater than 0") + ErrSizeMax = fmt.Errorf("chunker parameters may not exceed the maximum block payload size of %d", BlockPayloadLimit) ) // FromString returns a Splitter depending on the given string: @@ -28,6 +44,8 @@ func FromString(r io.Reader, chunker string) (Splitter, error) { return nil, err } else if size <= 0 { return nil, ErrSize + } else if size > BlockPayloadLimit { + return nil, ErrSizeMax } return NewSizeSplitter(r, int64(size)), nil @@ -51,6 +69,8 @@ func parseRabinString(r io.Reader, chunker string) (Splitter, error) { size, err := strconv.Atoi(parts[1]) if err != nil { return nil, err + } else if int(float32(size)*1.5) > BlockPayloadLimit { // FIXME - there is probably a better way to bubble up this calculation from NewRabin() + return nil, ErrSizeMax } return NewRabin(r, uint64(size)), nil case 4: @@ -84,6 +104,14 @@ func parseRabinString(r io.Reader, chunker string) (Splitter, error) { return nil, err } + if min >= avg { + return nil, errors.New("incorrect format: rabin-min must be smaller than rabin-avg") + } else if avg >= max { + return nil, errors.New("incorrect format: rabin-avg must be smaller than rabin-max") + } else if max > BlockPayloadLimit { + return nil, ErrSizeMax + } + return NewRabinMinMax(r, uint64(min), uint64(avg), uint64(max)), nil default: return nil, errors.New("incorrect format (expected 'rabin' 'rabin-[avg]' or 'rabin-[min]-[avg]-[max]'") diff --git a/splitting.go b/splitting.go index 2b23739..a137820 100644 --- a/splitting.go +++ b/splitting.go @@ -13,9 +13,6 @@ import ( var log = logging.Logger("chunk") -// DefaultBlockSize is the chunk size that splitters produce (or aim to). -var DefaultBlockSize int64 = 1024 * 256 - // A Splitter reads bytes from a Reader and creates "chunks" (byte slices) // that can be used to build DAG nodes. type Splitter interface { -- GitLab