Commit d343883e authored by Petar Maymounkov's avatar Petar Maymounkov

Xor Trie implementation.

parents
package xortrie
// Add adds the key q to trie, returning a new trie.
// Add is immutable/non-destructive: The original trie remains unchanged.
func Add(trie *XorTrie, q TrieKey) *XorTrie {
return add(0, trie, q)
}
func add(depth int, trie *XorTrie, q TrieKey) *XorTrie {
dir := q.BitAt(depth)
if !trie.isLeaf() {
s := &XorTrie{}
s.branch[dir] = add(depth+1, trie.branch[dir], q)
s.branch[1-dir] = trie.branch[1-dir]
return s
} else {
if trie.key == nil {
return &XorTrie{key: q}
} else {
if TrieKeyEqual(trie.key, q) {
return trie
} else {
s := &XorTrie{}
if q.BitAt(depth) == trie.key.BitAt(depth) {
s.branch[dir] = add(depth+1, &XorTrie{key: trie.key}, q)
s.branch[1-dir] = &XorTrie{}
return s
} else {
s.branch[dir] = add(depth+1, &XorTrie{key: trie.key}, q)
s.branch[1-dir] = &XorTrie{}
}
return s
}
}
}
}
package xortrie
import "testing"
// Verify mutable and immutable add do the same thing.
func TestMutableAndImmutableAddSame(t *testing.T) {
for _, s := range testAddSameSamples {
mut := NewXorTrie()
immut := NewXorTrie()
for _, k := range s.Keys {
mut.Add(k)
immut = Add(immut, k)
}
if !XorTrieEqual(mut, immut) {
t.Errorf("mutable trie %v differs from immutable trie %v", mut, immut)
}
}
}
type testAddSameSample struct {
Keys []TrieKey
}
var testAddSameSamples = []*testAddSameSample{
{Keys: []TrieKey{{1, 3, 5, 7, 11, 13}}},
}
package xortrie
func XorTrieEqual(p, q *XorTrie) bool {
switch {
case p.isLeaf() && q.isLeaf():
return TrieKeyEqual(p.key, q.key)
case !p.isLeaf() && !q.isLeaf():
return XorTrieEqual(p.branch[0], q.branch[0]) && XorTrieEqual(p.branch[1], q.branch[1])
}
return false
}
package xortrie
// Intersect computes the intersection of the keys in p and q.
// p and q must be non-nil. The returned trie is never nil.
func Intersect(p, q *XorTrie) *XorTrie {
return intersect(0, p, q)
}
func intersect(depth int, p, q *XorTrie) *XorTrie {
switch {
case p.isLeaf() && q.isLeaf():
if p.isEmpty() || q.isEmpty() {
return &XorTrie{} // empty set
} else {
if TrieKeyEqual(p.key, q.key) {
return &XorTrie{key: p.key} // singleton
} else {
return &XorTrie{} // empty set
}
}
case p.isLeaf() && !q.isLeaf():
if p.isEmpty() {
return &XorTrie{} // empty set
} else {
if _, found := q.find(depth, p.key); found {
return &XorTrie{key: p.key}
} else {
return &XorTrie{} // empty set
}
}
case !p.isLeaf() && q.isLeaf():
return Intersect(q, p)
case !p.isLeaf() && !q.isLeaf():
disjointUnion := &XorTrie{
branch: [2]*XorTrie{
intersect(depth+1, p.branch[0], q.branch[0]),
intersect(depth+1, p.branch[1], q.branch[1]),
},
}
disjointUnion.shrink()
return disjointUnion
}
panic("unreachable")
}
package xortrie
import "testing"
func TestIntersectRandom(t *testing.T) {
for _, s := range testIntersectSamples {
testIntersect(t, s)
}
}
func testIntersect(t *testing.T, sample *testIntersectSample) {
left, right, expected := NewXorTrie(), NewXorTrie(), NewXorTrie()
for _, l := range sample.LeftKeys {
left.Add(l)
}
for _, r := range sample.RightKeys {
right.Add(r)
}
for _, s := range setIntersect(sample.LeftKeys, sample.RightKeys) {
expected.Add(s)
}
got := Intersect(left, right)
if !XorTrieEqual(expected, got) {
t.Errorf("intersection of %v and %v: expected %v, got %v",
sample.LeftKeys, sample.RightKeys, expected, got)
}
}
func setIntersect(left, right []TrieKey) []TrieKey {
intersection := []TrieKey{}
for _, l := range left {
for _, r := range right {
if TrieKeyEqual(l, r) {
intersection = append(intersection, r)
}
}
}
return intersection
}
type testIntersectSample struct {
LeftKeys []TrieKey
RightKeys []TrieKey
}
var testIntersectSamples = []*testIntersectSample{
{
LeftKeys: []TrieKey{{1, 2, 3}},
RightKeys: []TrieKey{{1, 3, 5}},
},
{
LeftKeys: []TrieKey{{1, 2, 3, 4, 5, 6}},
RightKeys: []TrieKey{{3, 5, 7}},
},
{
LeftKeys: []TrieKey{{23, 3, 7, 13, 17}},
RightKeys: []TrieKey{{2, 11, 17, 19, 23}},
},
}
package xortrie
import "bytes"
// TrieKey is a vector of bits backed by a Go byte slice in big endian byte order and big-endian bit order.
type TrieKey []byte
func (bs TrieKey) BitAt(offset int) byte {
if bs[offset/8]&(1<<(offset%8)) == 0 {
return 0
} else {
return 1
}
}
func (bs TrieKey) BitLen() int {
return 8 * len(bs)
}
func TrieKeyEqual(x, y TrieKey) bool {
return bytes.Equal(x, y)
}
package xortrie
// XorTrie is a trie for equal-length bit vectors, which stores values only in the leaves.
// XorTrie node invariants:
// (1) Either both branches are nil, or both are non-nil.
// (2) If both branches are leaves, then they are both non-empty (have keys).
type XorTrie struct {
branch [2]*XorTrie
key TrieKey
}
func NewXorTrie() *XorTrie {
return &XorTrie{}
}
func (trie *XorTrie) Depth() int {
return trie.depth(0)
}
func (trie *XorTrie) depth(depth int) int {
if trie.branch[0] == nil && trie.branch[1] == nil {
return depth
} else {
return max(trie.branch[0].depth(depth+1), trie.branch[1].depth(depth+1))
}
}
func max(x, y int) int {
if x > y {
return x
}
return y
}
func (trie *XorTrie) Find(q TrieKey) (reachedDepth int, found bool) {
return trie.find(0, q)
}
func (trie *XorTrie) find(depth int, q TrieKey) (reachedDepth int, found bool) {
if qb := trie.branch[q.BitAt(depth)]; qb != nil {
return qb.find(depth+1, q)
} else {
if trie.key == nil {
return depth, false
} else {
return depth, TrieKeyEqual(trie.key, q)
}
}
}
// Add adds the key q to the trie. Add mutates the trie.
// TODO: Also implement an immutable version of Add.
func (trie *XorTrie) Add(q TrieKey) (insertedDepth int, insertedOK bool) {
return trie.add(0, q)
}
func (trie *XorTrie) add(depth int, q TrieKey) (insertedDepth int, insertedOK bool) {
if qb := trie.branch[q.BitAt(depth)]; qb != nil {
return qb.add(depth+1, q)
} else {
if trie.key == nil {
trie.key = q
return depth, true
} else {
if TrieKeyEqual(trie.key, q) {
return depth, false
} else {
p := trie.key
trie.key = nil
// both branches are nil
trie.branch[0], trie.branch[1] = &XorTrie{}, &XorTrie{}
trie.branch[p.BitAt(depth)].add(depth+1, p)
return trie.branch[q.BitAt(depth)].add(depth+1, q)
}
}
}
}
// Remove removes the key q from the trie. Remove mutates the trie.
// TODO: Also implement an immutable version of Add.
func (trie *XorTrie) Remove(q TrieKey) (removedDepth int, removed bool) {
return trie.remove(0, q)
}
func (trie *XorTrie) remove(depth int, q TrieKey) (reachedDepth int, removed bool) {
if qb := trie.branch[q.BitAt(depth)]; qb != nil {
if d, ok := qb.remove(depth+1, q); ok {
trie.shrink()
return d, true
} else {
return d, false
}
} else {
if trie.key != nil && TrieKeyEqual(q, trie.key) {
trie.key = nil
return depth, true
} else {
return depth, false
}
}
}
func (trie *XorTrie) isEmpty() bool {
return trie.key == nil
}
func (trie *XorTrie) isLeaf() bool {
return trie.branch[0] == nil && trie.branch[1] == nil
}
func (trie *XorTrie) isEmptyLeaf() bool {
return trie.isEmpty() && trie.isLeaf()
}
func (trie *XorTrie) isNonEmptyLeaf() bool {
return !trie.isEmpty() && trie.isLeaf()
}
func (trie *XorTrie) shrink() {
b0, b1 := trie.branch[0], trie.branch[1]
switch {
case b0.isEmptyLeaf() && b1.isEmptyLeaf():
trie.branch[0], trie.branch[1] = nil, nil
case b0.isEmptyLeaf() && b1.isNonEmptyLeaf():
trie.key = b1.key
trie.branch[0], trie.branch[1] = nil, nil
case b0.isNonEmptyLeaf() && b1.isEmptyLeaf():
trie.key = b0.key
trie.branch[0], trie.branch[1] = nil, nil
}
}
package xortrie
import "testing"
func TestInsertRemove(t *testing.T) {
r := NewXorTrie()
testSeq(r, t)
testSeq(r, t)
}
func testSeq(r *XorTrie, t *testing.T) {
for _, s := range testInsertSeq {
depth, _ := r.Add(TrieKey(s.key))
if depth != s.insertedDepth {
t.Errorf("inserting expected %d, got %d", s.insertedDepth, depth)
}
}
for _, s := range testRemoveSeq {
depth, _ := r.Remove(TrieKey(s.key))
if depth != s.reachedDepth {
t.Errorf("removing expected %d, got %d", s.reachedDepth, depth)
}
}
}
var testInsertSeq = []struct {
key []byte
insertedDepth int
}{
{key: []byte{0x0}, insertedDepth: 0},
{key: []byte{0x1}, insertedDepth: 1},
{key: []byte{0x8}, insertedDepth: 4},
{key: []byte{0x3}, insertedDepth: 2},
{key: []byte{0x4}, insertedDepth: 3},
}
var testRemoveSeq = []struct {
key []byte
reachedDepth int
}{
{key: []byte{0x0}, reachedDepth: 4},
{key: []byte{0x8}, reachedDepth: 3},
{key: []byte{0x4}, reachedDepth: 1},
{key: []byte{0x1}, reachedDepth: 2},
{key: []byte{0x3}, reachedDepth: 0},
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment