Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
dms3
go-dms3
Commits
2c3f9f24
Commit
2c3f9f24
authored
9 years ago
by
Kristoffer Ström
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add hamming distance calculation to bloom filters
parent
24063341
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
170 additions
and
0 deletions
+170
-0
Godeps/Godeps.json
Godeps/Godeps.json
+5
-0
Godeps/_workspace/src/github.com/steakknife/hamming/README.md
...ps/_workspace/src/github.com/steakknife/hamming/README.md
+3
-0
Godeps/_workspace/src/github.com/steakknife/hamming/hamming.go
...s/_workspace/src/github.com/steakknife/hamming/hamming.go
+38
-0
Godeps/_workspace/src/github.com/steakknife/hamming/hamming_test.go
...rkspace/src/github.com/steakknife/hamming/hamming_test.go
+88
-0
blocks/bloom/filter.go
blocks/bloom/filter.go
+22
-0
blocks/bloom/filter_test.go
blocks/bloom/filter_test.go
+14
-0
No files found.
Godeps/Godeps.json
View file @
2c3f9f24
...
...
@@ -217,6 +217,11 @@
"ImportPath"
:
"github.com/mtchavez/jenkins"
,
"Rev"
:
"5a816af6ef21ef401bff5e4b7dd255d63400f497"
},
{
"ImportPath"
:
"github.com/steakknife/hamming"
,
"Comment"
:
"0.0.2-2-g9ad4a62"
,
"Rev"
:
"9ad4a620e3d573267a083c892f2b42a39302153b"
},
{
"ImportPath"
:
"github.com/syndtr/goleveldb/leveldb"
,
"Rev"
:
"87e4e645d80ae9c537e8f2dee52b28036a5dd75e"
...
...
This diff is collapsed.
Click to expand it.
Godeps/_workspace/src/github.com/steakknife/hamming/README.md
0 → 100644
View file @
2c3f9f24
Copyright (c) 2014 Barry Allard
MIT license
This diff is collapsed.
Click to expand it.
Godeps/_workspace/src/github.com/steakknife/hamming/hamming.go
0 → 100644
View file @
2c3f9f24
package
hamming
// SSE4.x PopCnt is 10x slower
// References: check out Hacker's Delight
const
(
m1
uint64
=
0x5555555555555555
//binary: 0101...
m2
uint64
=
0x3333333333333333
//binary: 00110011..
m4
uint64
=
0x0f0f0f0f0f0f0f0f
//binary: 4 zeros, 4 ones ...
m8
uint64
=
0x00ff00ff00ff00ff
//binary: 8 zeros, 8 ones ...
m16
uint64
=
0x0000ffff0000ffff
//binary: 16 zeros, 16 ones ...
m32
uint64
=
0x00000000ffffffff
//binary: 32 zeros, 32 ones
hff
uint64
=
0xffffffffffffffff
//binary: all ones
h01
uint64
=
0x0101010101010101
//the sum of 256 to the power of 0,1,2,3...
)
var
table
=
[
256
]
byte
{
0
,
1
,
1
,
2
,
1
,
2
,
2
,
3
,
1
,
2
,
2
,
3
,
2
,
3
,
3
,
4
,
1
,
2
,
2
,
3
,
2
,
3
,
3
,
4
,
2
,
3
,
3
,
4
,
3
,
4
,
4
,
5
,
1
,
2
,
2
,
3
,
2
,
3
,
3
,
4
,
2
,
3
,
3
,
4
,
3
,
4
,
4
,
5
,
2
,
3
,
3
,
4
,
3
,
4
,
4
,
5
,
3
,
4
,
4
,
5
,
4
,
5
,
5
,
6
,
1
,
2
,
2
,
3
,
2
,
3
,
3
,
4
,
2
,
3
,
3
,
4
,
3
,
4
,
4
,
5
,
2
,
3
,
3
,
4
,
3
,
4
,
4
,
5
,
3
,
4
,
4
,
5
,
4
,
5
,
5
,
6
,
2
,
3
,
3
,
4
,
3
,
4
,
4
,
5
,
3
,
4
,
4
,
5
,
4
,
5
,
5
,
6
,
3
,
4
,
4
,
5
,
4
,
5
,
5
,
6
,
4
,
5
,
5
,
6
,
5
,
6
,
6
,
7
,
1
,
2
,
2
,
3
,
2
,
3
,
3
,
4
,
2
,
3
,
3
,
4
,
3
,
4
,
4
,
5
,
2
,
3
,
3
,
4
,
3
,
4
,
4
,
5
,
3
,
4
,
4
,
5
,
4
,
5
,
5
,
6
,
2
,
3
,
3
,
4
,
3
,
4
,
4
,
5
,
3
,
4
,
4
,
5
,
4
,
5
,
5
,
6
,
3
,
4
,
4
,
5
,
4
,
5
,
5
,
6
,
4
,
5
,
5
,
6
,
5
,
6
,
6
,
7
,
2
,
3
,
3
,
4
,
3
,
4
,
4
,
5
,
3
,
4
,
4
,
5
,
4
,
5
,
5
,
6
,
3
,
4
,
4
,
5
,
4
,
5
,
5
,
6
,
4
,
5
,
5
,
6
,
5
,
6
,
6
,
7
,
3
,
4
,
4
,
5
,
4
,
5
,
5
,
6
,
4
,
5
,
5
,
6
,
5
,
6
,
6
,
7
,
4
,
5
,
5
,
6
,
5
,
6
,
6
,
7
,
5
,
6
,
6
,
7
,
6
,
7
,
7
,
8
}
// hamming distance of two uint64's
func
Uint64
(
x
,
y
uint64
)
int
{
return
CountBitsUint64
(
x
^
y
)
}
// hamming distance of two bytes
func
Byte
(
x
,
y
byte
)
int
{
return
CountBitsByte
(
x
^
y
)
}
func
CountBitsUint64
(
x
uint64
)
int
{
x
-=
(
x
>>
1
)
&
m1
// put count of each 2 bits into those 2 bits
x
=
(
x
&
m2
)
+
((
x
>>
2
)
&
m2
)
// put count of each 4 bits into those 4 bits
x
=
(
x
+
(
x
>>
4
))
&
m4
// put count of each 8 bits into those 8 bits
return
int
((
x
*
h01
)
>>
56
)
// returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ...
}
func
CountBitsByte
(
x
byte
)
int
{
return
int
(
table
[
x
])
}
This diff is collapsed.
Click to expand it.
Godeps/_workspace/src/github.com/steakknife/hamming/hamming_test.go
0 → 100644
View file @
2c3f9f24
package
hamming
import
(
"testing"
)
type
testCountBitsUint64Case
struct
{
x
uint64
n
int
}
type
testCountBitsByteCase
struct
{
x
byte
n
int
}
var
testCountBitsByteCases
=
[]
testCountBitsByteCase
{
{
0x00
,
0
},
{
0x01
,
1
},
{
0x02
,
1
},
{
0x03
,
2
},
{
0xaa
,
4
},
{
0x55
,
4
},
{
0x7f
,
7
},
{
0xff
,
8
},
}
var
testCountBitsUint64Cases
=
[]
testCountBitsUint64Case
{
{
0x00
,
0
},
{
0x01
,
1
},
{
0x02
,
1
},
{
0x03
,
2
},
{
0xaa
,
4
},
{
0x55
,
4
},
{
0x7f
,
7
},
{
0xff
,
8
},
{
0xffff
,
16
},
{
0xffffffff
,
32
},
{
0x1ffffffff
,
33
},
{
0x3ffffffff
,
34
},
{
0x7ffffffff
,
35
},
{
0xfffffffff
,
36
},
{
0x3fffffffffffffff
,
62
},
{
0x7fffffffffffffff
,
63
},
{
0xffffffffffffffff
,
64
},
}
func
TestCountBitByte
(
t
*
testing
.
T
)
{
for
_
,
c
:=
range
testCountBitsByteCases
{
if
actualN
:=
CountBitsByte
(
c
.
x
);
actualN
!=
c
.
n
{
t
.
Fatal
(
"CountBitsByte("
,
c
.
x
,
") = "
,
actualN
,
" != "
,
c
.
n
)
}
else
{
t
.
Log
(
"CountBitsByte("
,
c
.
x
,
") == "
,
c
.
n
)
}
}
}
func
TestCountBitUint64
(
t
*
testing
.
T
)
{
for
_
,
c
:=
range
testCountBitsUint64Cases
{
if
actualN
:=
CountBitsUint64
(
c
.
x
);
actualN
!=
c
.
n
{
t
.
Fatal
(
"CountBitsUint64("
,
c
.
x
,
") = "
,
actualN
,
" != "
,
c
.
n
)
}
else
{
t
.
Log
(
"CountBitsUint64("
,
c
.
x
,
") == "
,
c
.
n
)
}
}
}
func
BenchmarkCountBitsUint64
(
b
*
testing
.
B
)
{
j
:=
0
for
i
:=
0
;
i
<
b
.
N
;
i
++
{
CountBitsUint64
(
testCountBitsUint64Cases
[
j
]
.
x
)
j
++
if
j
==
len
(
testCountBitsUint64Cases
)
{
j
=
0
}
}
}
func
BenchmarkCountBitsByte
(
b
*
testing
.
B
)
{
j
:=
0
for
i
:=
0
;
i
<
b
.
N
;
i
++
{
CountBitsByte
(
testCountBitsByteCases
[
j
]
.
x
)
j
++
if
j
==
len
(
testCountBitsByteCases
)
{
j
=
0
}
}
}
This diff is collapsed.
Click to expand it.
blocks/bloom/filter.go
View file @
2c3f9f24
...
...
@@ -6,6 +6,7 @@ import (
"errors"
// Non crypto hash, because speed
"github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/mtchavez/jenkins"
"github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/steakknife/hamming"
"hash"
)
...
...
@@ -13,6 +14,7 @@ type Filter interface {
Add
([]
byte
)
Find
([]
byte
)
bool
Merge
(
Filter
)
(
Filter
,
error
)
HammingDistance
(
Filter
)
(
int
,
error
)
}
func
NewFilter
(
size
int
)
Filter
{
...
...
@@ -100,3 +102,23 @@ func (f *filter) Merge(o Filter) (Filter, error) {
return
nfilt
,
nil
}
func
(
f
*
filter
)
HammingDistance
(
o
Filter
)
(
int
,
error
)
{
casfil
,
ok
:=
o
.
(
*
filter
)
if
!
ok
{
return
0
,
errors
.
New
(
"Unsupported filter type"
)
}
if
len
(
f
.
filter
)
!=
len
(
casfil
.
filter
)
{
return
0
,
errors
.
New
(
"filter lengths must match!"
)
}
acc
:=
0
// xor together
for
i
:=
0
;
i
<
len
(
f
.
filter
);
i
++
{
acc
+=
hamming
.
Byte
(
f
.
filter
[
i
],
casfil
.
filter
[
i
])
}
return
acc
,
nil
}
This diff is collapsed.
Click to expand it.
blocks/bloom/filter_test.go
View file @
2c3f9f24
...
...
@@ -78,3 +78,17 @@ func TestMerge(t *testing.T) {
}
}
}
func
TestHamming
(
t
*
testing
.
T
)
{
f1
:=
NewFilter
(
128
)
f2
:=
NewFilter
(
128
)
f1
.
Add
([]
byte
(
"no collision"
))
f1
.
Add
([]
byte
(
"collision? no!"
))
dist
,
_
:=
f1
.
HammingDistance
(
f2
)
if
dist
!=
6
{
t
.
Fatal
(
"Should have 6 bit difference"
)
}
}
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment