splitting.go 1.58 KB
Newer Older
1 2
package importer

3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
import (
	"io"

	u "github.com/jbenet/go-ipfs/util"
)

type BlockSplitter func(io.Reader) chan []byte

func SplitterBySize(n int) BlockSplitter {
	return func(r io.Reader) chan []byte {
		out := make(chan []byte)
		go func(n int) {
			defer close(out)
			for {
				chunk := make([]byte, n)
				nread, err := r.Read(chunk)
				if err != nil {
					if err == io.EOF {
						return
					}
					u.PErr("block split error: %v\n", err)
					return
				}
				if nread < n {
					chunk = chunk[:n]
				}
				out <- chunk
			}
		}(n)
		return out
	}
}
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72

// TODO: this should take a reader, not a byte array. what if we're splitting a 3TB file?
func Rabin(b []byte) [][]byte {
	var out [][]byte
	windowsize := uint64(48)
	chunk_max := 1024 * 16
	min_blk_size := 2048
	blk_beg_i := 0
	prime := uint64(61)

	var poly uint64
	var curchecksum uint64

	// Smaller than a window?  Get outa here!
	if len(b) <= int(windowsize) {
		return [][]byte{b}
	}

	i := 0
	for n := i; i < n+int(windowsize); i++ {
		cur := uint64(b[i])
		curchecksum = (curchecksum * prime) + cur
		poly = (poly * prime) + cur
	}

	for ; i < len(b); i++ {
		cur := uint64(b[i])
		curchecksum = (curchecksum * prime) + cur
		poly = (poly * prime) + cur
		curchecksum -= (uint64(b[i-1]) * prime)

		if i-blk_beg_i >= chunk_max {
			// push block
			out = append(out, b[blk_beg_i:i])
			blk_beg_i = i
		}

		// first 13 bits of polynomial are 0
73
		if poly%8192 == 0 && i-blk_beg_i >= min_blk_size {
74 75 76 77 78 79 80 81 82 83
			// push block
			out = append(out, b[blk_beg_i:i])
			blk_beg_i = i
		}
	}
	if i > blk_beg_i {
		out = append(out, b[blk_beg_i:])
	}
	return out
}