diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json index 44ba61885a97644a6a643979ac0d89a9186069d9..3d3b556318be94b86974b084cfbf037a625adc92 100644 --- a/Godeps/Godeps.json +++ b/Godeps/Godeps.json @@ -217,6 +217,11 @@ "ImportPath": "github.com/mtchavez/jenkins", "Rev": "5a816af6ef21ef401bff5e4b7dd255d63400f497" }, + { + "ImportPath": "github.com/steakknife/hamming", + "Comment": "0.0.2-2-g9ad4a62", + "Rev": "9ad4a620e3d573267a083c892f2b42a39302153b" + }, { "ImportPath": "github.com/syndtr/goleveldb/leveldb", "Rev": "87e4e645d80ae9c537e8f2dee52b28036a5dd75e" diff --git a/Godeps/_workspace/src/github.com/steakknife/hamming/README.md b/Godeps/_workspace/src/github.com/steakknife/hamming/README.md new file mode 100644 index 0000000000000000000000000000000000000000..21ba7a929c4e089475f989a92a7376c49ba18ea9 --- /dev/null +++ b/Godeps/_workspace/src/github.com/steakknife/hamming/README.md @@ -0,0 +1,3 @@ +Copyright (c) 2014 Barry Allard + +MIT license diff --git a/Godeps/_workspace/src/github.com/steakknife/hamming/hamming.go b/Godeps/_workspace/src/github.com/steakknife/hamming/hamming.go new file mode 100644 index 0000000000000000000000000000000000000000..ae23987f1c92f64555f5914e2dd7314ab325cc15 --- /dev/null +++ b/Godeps/_workspace/src/github.com/steakknife/hamming/hamming.go @@ -0,0 +1,38 @@ +package hamming + +// SSE4.x PopCnt is 10x slower +// References: check out Hacker's Delight + +const ( + m1 uint64 = 0x5555555555555555 //binary: 0101... + m2 uint64 = 0x3333333333333333 //binary: 00110011.. + m4 uint64 = 0x0f0f0f0f0f0f0f0f //binary: 4 zeros, 4 ones ... + m8 uint64 = 0x00ff00ff00ff00ff //binary: 8 zeros, 8 ones ... + m16 uint64 = 0x0000ffff0000ffff //binary: 16 zeros, 16 ones ... + m32 uint64 = 0x00000000ffffffff //binary: 32 zeros, 32 ones + hff uint64 = 0xffffffffffffffff //binary: all ones + h01 uint64 = 0x0101010101010101 //the sum of 256 to the power of 0,1,2,3... +) + +var table = [256]byte{0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8} + +// hamming distance of two uint64's +func Uint64(x, y uint64) int { + return CountBitsUint64(x ^ y) +} + +// hamming distance of two bytes +func Byte(x, y byte) int { + return CountBitsByte(x ^ y) +} + +func CountBitsUint64(x uint64) int { + x -= (x >> 1) & m1 // put count of each 2 bits into those 2 bits + x = (x & m2) + ((x >> 2) & m2) // put count of each 4 bits into those 4 bits + x = (x + (x >> 4)) & m4 // put count of each 8 bits into those 8 bits + return int((x * h01) >> 56) // returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ... +} + +func CountBitsByte(x byte) int { + return int(table[x]) +} diff --git a/Godeps/_workspace/src/github.com/steakknife/hamming/hamming_test.go b/Godeps/_workspace/src/github.com/steakknife/hamming/hamming_test.go new file mode 100644 index 0000000000000000000000000000000000000000..230596255a48d90e045f99aae9bf645803adb4b9 --- /dev/null +++ b/Godeps/_workspace/src/github.com/steakknife/hamming/hamming_test.go @@ -0,0 +1,88 @@ +package hamming + +import ( + "testing" +) + +type testCountBitsUint64Case struct { + x uint64 + n int +} + +type testCountBitsByteCase struct { + x byte + n int +} + +var testCountBitsByteCases = []testCountBitsByteCase{ + {0x00, 0}, + {0x01, 1}, + {0x02, 1}, + {0x03, 2}, + {0xaa, 4}, + {0x55, 4}, + {0x7f, 7}, + {0xff, 8}, +} + +var testCountBitsUint64Cases = []testCountBitsUint64Case{ + {0x00, 0}, + {0x01, 1}, + {0x02, 1}, + {0x03, 2}, + {0xaa, 4}, + {0x55, 4}, + {0x7f, 7}, + {0xff, 8}, + {0xffff, 16}, + {0xffffffff, 32}, + {0x1ffffffff, 33}, + {0x3ffffffff, 34}, + {0x7ffffffff, 35}, + {0xfffffffff, 36}, + {0x3fffffffffffffff, 62}, + {0x7fffffffffffffff, 63}, + {0xffffffffffffffff, 64}, +} + +func TestCountBitByte(t *testing.T) { + for _, c := range testCountBitsByteCases { + if actualN := CountBitsByte(c.x); actualN != c.n { + t.Fatal("CountBitsByte(", c.x, ") = ", actualN, " != ", c.n) + } else { + t.Log("CountBitsByte(", c.x, ") == ", c.n) + } + } +} + +func TestCountBitUint64(t *testing.T) { + for _, c := range testCountBitsUint64Cases { + if actualN := CountBitsUint64(c.x); actualN != c.n { + t.Fatal("CountBitsUint64(", c.x, ") = ", actualN, " != ", c.n) + } else { + t.Log("CountBitsUint64(", c.x, ") == ", c.n) + } + } +} + +func BenchmarkCountBitsUint64(b *testing.B) { + j := 0 + for i := 0; i < b.N; i++ { + CountBitsUint64(testCountBitsUint64Cases[j].x) + j++ + if j == len(testCountBitsUint64Cases) { + j = 0 + } + } +} + +func BenchmarkCountBitsByte(b *testing.B) { + j := 0 + for i := 0; i < b.N; i++ { + CountBitsByte(testCountBitsByteCases[j].x) + j++ + if j == len(testCountBitsByteCases) { + j = 0 + } + } +} diff --git a/blocks/bloom/filter.go b/blocks/bloom/filter.go index 2697c3eab77cb456827cd309d7b04f7c897cea6f..64a2db04286ae7643cfda9e171eff364f828ecfe 100644 --- a/blocks/bloom/filter.go +++ b/blocks/bloom/filter.go @@ -6,6 +6,7 @@ import ( "errors" // Non crypto hash, because speed "github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/mtchavez/jenkins" + "github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/steakknife/hamming" "hash" ) @@ -13,6 +14,7 @@ type Filter interface { Add([]byte) Find([]byte) bool Merge(Filter) (Filter, error) + HammingDistance(Filter) (int, error) } func NewFilter(size int) Filter { @@ -100,3 +102,23 @@ func (f *filter) Merge(o Filter) (Filter, error) { return nfilt, nil } + +func (f *filter) HammingDistance(o Filter) (int, error) { + casfil, ok := o.(*filter) + if !ok { + return 0, errors.New("Unsupported filter type") + } + + if len(f.filter) != len(casfil.filter) { + return 0, errors.New("filter lengths must match!") + } + + acc := 0 + + // xor together + for i := 0; i < len(f.filter); i++ { + acc += hamming.Byte(f.filter[i], casfil.filter[i]) + } + + return acc, nil +} diff --git a/blocks/bloom/filter_test.go b/blocks/bloom/filter_test.go index 8cdb0eddde01da61fa4f4db7c067b8f9aa2408ee..83a500e32a03d5a2ab78de7daa19d7decfa72bb3 100644 --- a/blocks/bloom/filter_test.go +++ b/blocks/bloom/filter_test.go @@ -78,3 +78,17 @@ func TestMerge(t *testing.T) { } } } + +func TestHamming(t *testing.T) { + f1 := NewFilter(128) + f2 := NewFilter(128) + + f1.Add([]byte("no collision")) + f1.Add([]byte("collision? no!")) + + dist, _ := f1.HammingDistance(f2) + + if dist != 6 { + t.Fatal("Should have 6 bit difference") + } +}