parse.go 3.21 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
package chunk

import (
	"errors"
	"fmt"
	"io"
	"strconv"
	"strings"
)

11 12 13 14
const (
	// DefaultBlockSize is the chunk size that splitters produce (or aim to).
	DefaultBlockSize int64 = 1024 * 256

15 16
	// No leaf block should contain more than 1MiB of payload data ( wrapping overhead aside )
	// This effectively mandates the maximum chunk size
17
	// See discussion at https://gitlab.dms3.io/dms3/go-dms3-chunker/pull/21#discussion_r369124879 for background
18
	ChunkSizeLimit int = 1048576
19 20
)

Kejie Zhang's avatar
Kejie Zhang committed
21 22
var (
	ErrRabinMin = errors.New("rabin min must be greater than 16")
23
	ErrSize     = errors.New("chunker size must be greater than 0")
24
	ErrSizeMax  = fmt.Errorf("chunker parameters may not exceed the maximum chunk size of %d", ChunkSizeLimit)
Kejie Zhang's avatar
Kejie Zhang committed
25
)
Kejie Zhang's avatar
Kejie Zhang committed
26

27
// FromString returns a Splitter depending on the given string:
Jakub Sztandera's avatar
Jakub Sztandera committed
28 29
// it supports "default" (""), "size-{size}", "rabin", "rabin-{blocksize}",
// "rabin-{min}-{avg}-{max}" and "buzhash".
30 31 32
func FromString(r io.Reader, chunker string) (Splitter, error) {
	switch {
	case chunker == "" || chunker == "default":
33
		return DefaultSplitter(r), nil
34 35 36 37 38 39

	case strings.HasPrefix(chunker, "size-"):
		sizeStr := strings.Split(chunker, "-")[1]
		size, err := strconv.Atoi(sizeStr)
		if err != nil {
			return nil, err
Kejie Zhang's avatar
Kejie Zhang committed
40 41
		} else if size <= 0 {
			return nil, ErrSize
42
		} else if size > ChunkSizeLimit {
43
			return nil, ErrSizeMax
44 45 46 47 48 49
		}
		return NewSizeSplitter(r, int64(size)), nil

	case strings.HasPrefix(chunker, "rabin"):
		return parseRabinString(r, chunker)

Jakub Sztandera's avatar
Jakub Sztandera committed
50 51 52
	case chunker == "buzhash":
		return NewBuzhash(r), nil

53 54 55 56 57 58 59 60 61 62 63 64 65 66
	default:
		return nil, fmt.Errorf("unrecognized chunker option: %s", chunker)
	}
}

func parseRabinString(r io.Reader, chunker string) (Splitter, error) {
	parts := strings.Split(chunker, "-")
	switch len(parts) {
	case 1:
		return NewRabin(r, uint64(DefaultBlockSize)), nil
	case 2:
		size, err := strconv.Atoi(parts[1])
		if err != nil {
			return nil, err
67
		} else if int(float32(size)*1.5) > ChunkSizeLimit { // FIXME - this will be addressed in a subsequent PR
68
			return nil, ErrSizeMax
69 70 71 72 73 74 75 76
		}
		return NewRabin(r, uint64(size)), nil
	case 4:
		sub := strings.Split(parts[1], ":")
		if len(sub) > 1 && sub[0] != "min" {
			return nil, errors.New("first label must be min")
		}
		min, err := strconv.Atoi(sub[len(sub)-1])
Kejie Zhang's avatar
Kejie Zhang committed
77
		if err != nil {
78 79
			return nil, err
		}
80
		if min < 16 {
Kejie Zhang's avatar
Kejie Zhang committed
81
			return nil, ErrRabinMin
82
		}
83 84
		sub = strings.Split(parts[2], ":")
		if len(sub) > 1 && sub[0] != "avg" {
Kejie Zhang's avatar
Kejie Zhang committed
85
			log.Error("sub == ", sub)
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
			return nil, errors.New("second label must be avg")
		}
		avg, err := strconv.Atoi(sub[len(sub)-1])
		if err != nil {
			return nil, err
		}

		sub = strings.Split(parts[3], ":")
		if len(sub) > 1 && sub[0] != "max" {
			return nil, errors.New("final label must be max")
		}
		max, err := strconv.Atoi(sub[len(sub)-1])
		if err != nil {
			return nil, err
		}

102 103 104 105
		if min >= avg {
			return nil, errors.New("incorrect format: rabin-min must be smaller than rabin-avg")
		} else if avg >= max {
			return nil, errors.New("incorrect format: rabin-avg must be smaller than rabin-max")
106
		} else if max > ChunkSizeLimit {
107 108 109
			return nil, ErrSizeMax
		}

110 111 112 113 114
		return NewRabinMinMax(r, uint64(min), uint64(avg), uint64(max)), nil
	default:
		return nil, errors.New("incorrect format (expected 'rabin' 'rabin-[avg]' or 'rabin-[min]-[avg]-[max]'")
	}
}