splitting.go 1.61 KB
Newer Older
1 2
package importer

3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
import (
	"io"

	u "github.com/jbenet/go-ipfs/util"
)

type BlockSplitter func(io.Reader) chan []byte

func SplitterBySize(n int) BlockSplitter {
	return func(r io.Reader) chan []byte {
		out := make(chan []byte)
		go func(n int) {
			defer close(out)
			for {
				chunk := make([]byte, n)
				nread, err := r.Read(chunk)
				if err != nil {
					if err == io.EOF {
						return
					}
					u.PErr("block split error: %v\n", err)
					return
				}
				if nread < n {
27
					chunk = chunk[:nread]
28 29 30 31 32 33 34
				}
				out <- chunk
			}
		}(n)
		return out
	}
}
35 36

// TODO: this should take a reader, not a byte array. what if we're splitting a 3TB file?
Siraj Ravel's avatar
Siraj Ravel committed
37
//Rabin Fingerprinting for file chunking
38 39 40
func Rabin(b []byte) [][]byte {
	var out [][]byte
	windowsize := uint64(48)
Siraj Ravel's avatar
Siraj Ravel committed
41 42 43
	chunkMax := 1024 * 16
	minBlkSize := 2048
	blkBegI := 0
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
	prime := uint64(61)

	var poly uint64
	var curchecksum uint64

	// Smaller than a window?  Get outa here!
	if len(b) <= int(windowsize) {
		return [][]byte{b}
	}

	i := 0
	for n := i; i < n+int(windowsize); i++ {
		cur := uint64(b[i])
		curchecksum = (curchecksum * prime) + cur
		poly = (poly * prime) + cur
	}

	for ; i < len(b); i++ {
		cur := uint64(b[i])
		curchecksum = (curchecksum * prime) + cur
		poly = (poly * prime) + cur
		curchecksum -= (uint64(b[i-1]) * prime)

Siraj Ravel's avatar
Siraj Ravel committed
67
		if i-blkgBegI >= chunkMax {
68
			// push block
Siraj Ravel's avatar
Siraj Ravel committed
69 70
			out = append(out, b[blkgBegI:i])
			blkgBegI = i
71 72 73
		}

		// first 13 bits of polynomial are 0
Siraj Ravel's avatar
Siraj Ravel committed
74
		if poly%8192 == 0 && i-blkgBegI >= minBlkSize {
75
			// push block
Siraj Ravel's avatar
Siraj Ravel committed
76 77
			out = append(out, b[blkgBegI:i])
			blkgBegI = i
78 79
		}
	}
Siraj Ravel's avatar
Siraj Ravel committed
80 81
	if i > blkgBegI {
		out = append(out, b[blkgBegI:])
82 83 84
	}
	return out
}