add more docs on hamt

License: MIT Signed-off-by: Jeromy <why@ipfs.io>

add more docs on hamt
License: MIT Signed-off-by: Jeromy <why@ipfs.io>
8d4791c9 · Jeromy · Jeromy · c8af993a · 8d4791c9 · 8d4791c9
Commit 8d4791c9 authored Nov 17, 2016 by Jeromy Committed by Jeromy Mar 21, 2017
Hide whitespace changes
Inline Side-by-side

Showing with 27 additions and 0 deletions

unixfs/hamt/hamt.go unixfs/hamt/hamt.go +25 -0

unixfs/hamt/util.go unixfs/hamt/util.go +2 -0

No files found.
--- a/unixfs/hamt/hamt.go
+++ b/unixfs/hamt/hamt.go
+// Package hamt implements a Hash Array Mapped Trie over ipfs merkledag nodes.
+// It is implemented mostly as described in the wikipedia article on HAMTs,
+// however the table size is variable (usually 256 in our usages) as opposed to
+// 32 as suggested in the article.  The hash function used is currently
+// Murmur3, but this value is configurable (the datastructure reports which
+// hash function its using).
+//
+// The one algorithmic change we implement that is not mentioned in the
+// wikipedia article is the collapsing of empty shards.
+// Given the following tree: ( '[' = shards, '{' = values )
+// [ 'A' ] -> [ 'B' ] -> { "ABC" }
+//    |       L-> { "ABD" }
+//    L-> { "ASDF" }
+// If we simply removed "ABC", we would end up with a tree where shard 'B' only
+// has a single child.  This causes two issues, the first, is that now we have
+// an extra lookup required to get to "ABD".  The second issue is that now we
+// have a tree that contains only "ABD", but is not the same tree that we would
+// get by simply inserting "ABD" into a new tree.  To address this, we always
+// check for empty shard nodes upon deletion and prune them to maintain a
+// consistent tree, independent of insertion order.
 package hamt

 import (
@@ -450,10 +470,15 @@ func (ds *HamtShard) modifyValue(ctx context.Context, hv *hashBits, key string,
 	}
 }

+// indexForBitPos returns the index within the collapsed array corresponding to
+// the given bit in the bitset.  The collapsed array contains only one entry
+// per bit set in the bitfield, and this function is used to map the indices.
 func (ds *HamtShard) indexForBitPos(bp int) int {
 	// TODO: an optimization could reuse the same 'mask' here and change the size
 	//       as needed. This isnt yet done as the bitset package doesnt make it easy
 	//       to do.
+
+	// make a bitmask (all bits set) 'bp' bits long
 	mask := new(big.Int).Sub(new(big.Int).Exp(big.NewInt(2), big.NewInt(int64(bp)), nil), big.NewInt(1))
 	mask.And(mask, ds.bitfield)


--- a/unixfs/hamt/util.go
+++ b/unixfs/hamt/util.go
@@ -4,6 +4,7 @@ import (
 	"math/big"
 )

+// hashBits is a helper that allows the reading of the 'next n bits' as an integer.
 type hashBits struct {
 	b        []byte
 	consumed int
@@ -13,6 +14,7 @@ func mkmask(n int) byte {
 	return (1 << uint(n)) - 1
 }

+// Next returns the next 'i' bits of the hashBits value as an integer
 func (hb *hashBits) Next(i int) int {
 	curbi := hb.consumed / 8
 	leftb := 8 - (hb.consumed % 8)