hamt.go 12.9 KB
Newer Older
Jeromy's avatar
Jeromy committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
// Package hamt implements a Hash Array Mapped Trie over ipfs merkledag nodes.
// It is implemented mostly as described in the wikipedia article on HAMTs,
// however the table size is variable (usually 256 in our usages) as opposed to
// 32 as suggested in the article.  The hash function used is currently
// Murmur3, but this value is configurable (the datastructure reports which
// hash function its using).
//
// The one algorithmic change we implement that is not mentioned in the
// wikipedia article is the collapsing of empty shards.
// Given the following tree: ( '[' = shards, '{' = values )
// [ 'A' ] -> [ 'B' ] -> { "ABC" }
//    |       L-> { "ABD" }
//    L-> { "ASDF" }
// If we simply removed "ABC", we would end up with a tree where shard 'B' only
// has a single child.  This causes two issues, the first, is that now we have
// an extra lookup required to get to "ABD".  The second issue is that now we
// have a tree that contains only "ABD", but is not the same tree that we would
// get by simply inserting "ABD" into a new tree.  To address this, we always
// check for empty shard nodes upon deletion and prune them to maintain a
// consistent tree, independent of insertion order.
21 22 23 24 25 26
package hamt

import (
	"context"
	"fmt"
	"os"
27
	"sync"
28

Jeromy's avatar
Jeromy committed
29 30 31
	bitfield "github.com/Stebalien/go-bitfield"
	cid "github.com/ipfs/go-cid"
	ipld "github.com/ipfs/go-ipld-format"
Kevin Atkinson's avatar
Kevin Atkinson committed
32 33
	dag "github.com/ipfs/go-merkledag"
	format "github.com/ipfs/go-unixfs"
Jeromy's avatar
Jeromy committed
34
	"github.com/spaolacci/murmur3"
35 36 37
)

const (
Hector Sanjuan's avatar
Hector Sanjuan committed
38
	// HashMurmur3 is the multiformats identifier for Murmur3
39 40 41
	HashMurmur3 uint64 = 0x22
)

Hector Sanjuan's avatar
Hector Sanjuan committed
42 43
// A Shard represents the HAMT. It should be initialized with NewShard().
type Shard struct {
44 45
	nd *dag.ProtoNode

Steven Allen's avatar
Steven Allen committed
46
	bitfield bitfield.Bitfield
47 48 49 50 51 52

	children []child

	tableSize    int
	tableSizeLg2 int

53
	builder  cid.Builder
54 55 56 57 58
	hashFunc uint64

	prefixPadStr string
	maxpadlen    int

59
	dserv ipld.DAGService
60 61 62 63
}

// child can either be another shard, or a leaf node value
type child interface {
64
	Link() (*ipld.Link, error)
65 66 67
	Label() string
}

Hector Sanjuan's avatar
Hector Sanjuan committed
68 69 70
// NewShard creates a new, empty HAMT shard with the given size.
func NewShard(dserv ipld.DAGService, size int) (*Shard, error) {
	ds, err := makeShard(dserv, size)
71 72 73 74
	if err != nil {
		return nil, err
	}

75 76
	ds.nd = new(dag.ProtoNode)
	ds.hashFunc = HashMurmur3
77
	return ds, nil
78 79
}

Hector Sanjuan's avatar
Hector Sanjuan committed
80
func makeShard(ds ipld.DAGService, size int) (*Shard, error) {
Steven Allen's avatar
Steven Allen committed
81 82 83
	lg2s, err := logtwo(size)
	if err != nil {
		return nil, err
84
	}
85
	maxpadding := fmt.Sprintf("%X", size-1)
Hector Sanjuan's avatar
Hector Sanjuan committed
86
	return &Shard{
87
		tableSizeLg2: lg2s,
88 89
		prefixPadStr: fmt.Sprintf("%%0%dX", len(maxpadding)),
		maxpadlen:    len(maxpadding),
Steven Allen's avatar
Steven Allen committed
90
		bitfield:     bitfield.NewBitfield(size),
91 92
		tableSize:    size,
		dserv:        ds,
93
	}, nil
94 95
}

Steven Allen's avatar
Steven Allen committed
96
// NewHamtFromDag creates new a HAMT shard from the given DAG.
Hector Sanjuan's avatar
Hector Sanjuan committed
97
func NewHamtFromDag(dserv ipld.DAGService, nd ipld.Node) (*Shard, error) {
98 99
	pbnd, ok := nd.(*dag.ProtoNode)
	if !ok {
100
		return nil, dag.ErrNotProtobuf
101 102
	}

Overbool's avatar
Overbool committed
103
	fsn, err := format.FSNodeFromBytes(pbnd.Data())
104 105 106 107
	if err != nil {
		return nil, err
	}

Overbool's avatar
Overbool committed
108

Overbool's avatar
Overbool committed
109
	if fsn.Type() != format.THAMTShard {
110 111 112
		return nil, fmt.Errorf("node was not a dir shard")
	}

Overbool's avatar
Overbool committed
113
	if fsn.HashType() != HashMurmur3 {
114 115 116
		return nil, fmt.Errorf("only murmur3 supported as hash function")
	}

Overbool's avatar
Overbool committed
117
	ds, err := makeShard(dserv, int(fsn.Fanout()))
118 119 120 121
	if err != nil {
		return nil, err
	}

122 123
	ds.nd = pbnd.Copy().(*dag.ProtoNode)
	ds.children = make([]child, len(pbnd.Links()))
Overbool's avatar
Overbool committed
124 125
	ds.bitfield.SetBytes(fsn.Data())
	ds.hashFunc = fsn.HashType()
126
	ds.builder = ds.nd.CidBuilder()
127 128 129 130

	return ds, nil
}

131 132 133
// SetCidBuilder sets the CID Builder
func (ds *Shard) SetCidBuilder(builder cid.Builder) {
	ds.builder = builder
134 135
}

136 137 138
// CidBuilder gets the CID Builder, may be nil if unset
func (ds *Shard) CidBuilder() cid.Builder {
	return ds.builder
139 140
}

141
// Node serializes the HAMT structure into a merkledag node with unixfs formatting
Hector Sanjuan's avatar
Hector Sanjuan committed
142
func (ds *Shard) Node() (ipld.Node, error) {
143
	out := new(dag.ProtoNode)
144
	out.SetCidBuilder(ds.builder)
145

Steven Allen's avatar
Steven Allen committed
146
	cindex := 0
147 148
	// TODO: optimized 'for each set bit'
	for i := 0; i < ds.tableSize; i++ {
Steven Allen's avatar
Steven Allen committed
149
		if !ds.bitfield.Bit(i) {
150 151 152 153 154
			continue
		}

		ch := ds.children[cindex]
		if ch != nil {
155
			clnk, err := ch.Link()
156 157 158 159
			if err != nil {
				return nil, err
			}

160
			err = out.AddRawLink(ds.linkNamePrefix(i)+ch.Label(), clnk)
161 162 163 164 165 166 167 168 169 170 171 172 173
			if err != nil {
				return nil, err
			}
		} else {
			// child unloaded, just copy in link with updated name
			lnk := ds.nd.Links()[cindex]
			label := lnk.Name[ds.maxpadlen:]

			err := out.AddRawLink(ds.linkNamePrefix(i)+label, lnk)
			if err != nil {
				return nil, err
			}
		}
Steven Allen's avatar
Steven Allen committed
174
		cindex++
175 176
	}

Overbool's avatar
Overbool committed
177
	data, err := format.HAMTShardData(ds.bitfield.Bytes(), uint64(ds.tableSize), HashMurmur3)
178 179 180 181 182 183
	if err != nil {
		return nil, err
	}

	out.SetData(data)

184
	err = ds.dserv.Add(context.TODO(), out)
185 186 187 188 189 190 191 192 193
	if err != nil {
		return nil, err
	}

	return out, nil
}

type shardValue struct {
	key string
194
	val *ipld.Link
195 196
}

Jeromy's avatar
Jeromy committed
197
// Link returns a link to this node
198
func (sv *shardValue) Link() (*ipld.Link, error) {
199 200 201 202 203 204 205 206 207 208 209 210 211
	return sv.val, nil
}

func (sv *shardValue) Label() string {
	return sv.key
}

func hash(val []byte) []byte {
	h := murmur3.New64()
	h.Write(val)
	return h.Sum(nil)
}

Hector Sanjuan's avatar
Hector Sanjuan committed
212
// Label for Shards is the empty string, this is used to differentiate them from
213
// value entries
Hector Sanjuan's avatar
Hector Sanjuan committed
214
func (ds *Shard) Label() string {
215 216 217 218
	return ""
}

// Set sets 'name' = nd in the HAMT
Hector Sanjuan's avatar
Hector Sanjuan committed
219
func (ds *Shard) Set(ctx context.Context, name string, nd ipld.Node) error {
220
	hv := &hashBits{b: hash([]byte(name))}
221
	err := ds.dserv.Add(ctx, nd)
222 223 224 225
	if err != nil {
		return err
	}

226
	lnk, err := ipld.MakeLink(nd)
227 228 229 230 231 232
	if err != nil {
		return err
	}
	lnk.Name = ds.linkNamePrefix(0) + name

	return ds.modifyValue(ctx, hv, name, lnk)
233 234 235
}

// Remove deletes the named entry if it exists, this operation is idempotent.
Hector Sanjuan's avatar
Hector Sanjuan committed
236
func (ds *Shard) Remove(ctx context.Context, name string) error {
237 238 239 240
	hv := &hashBits{b: hash([]byte(name))}
	return ds.modifyValue(ctx, hv, name, nil)
}

Jeromy's avatar
Jeromy committed
241
// Find searches for a child node by 'name' within this hamt
Hector Sanjuan's avatar
Hector Sanjuan committed
242
func (ds *Shard) Find(ctx context.Context, name string) (*ipld.Link, error) {
243 244
	hv := &hashBits{b: hash([]byte(name))}

245
	var out *ipld.Link
246 247 248 249
	err := ds.getValue(ctx, hv, name, func(sv *shardValue) error {
		out = sv.val
		return nil
	})
250 251 252
	if err != nil {
		return nil, err
	}
253

Jeromy's avatar
Jeromy committed
254
	return out, nil
255 256 257 258 259
}

// getChild returns the i'th child of this shard. If it is cached in the
// children array, it will return it from there. Otherwise, it loads the child
// node from disk.
Hector Sanjuan's avatar
Hector Sanjuan committed
260
func (ds *Shard) getChild(ctx context.Context, i int) (child, error) {
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
	if i >= len(ds.children) || i < 0 {
		return nil, fmt.Errorf("invalid index passed to getChild (likely corrupt bitfield)")
	}

	if len(ds.children) != len(ds.nd.Links()) {
		return nil, fmt.Errorf("inconsistent lengths between children array and Links array")
	}

	c := ds.children[i]
	if c != nil {
		return c, nil
	}

	return ds.loadChild(ctx, i)
}

// loadChild reads the i'th child node of this shard from disk and returns it
// as a 'child' interface
Hector Sanjuan's avatar
Hector Sanjuan committed
279
func (ds *Shard) loadChild(ctx context.Context, i int) (child, error) {
280 281 282 283 284 285 286
	lnk := ds.nd.Links()[i]
	if len(lnk.Name) < ds.maxpadlen {
		return nil, fmt.Errorf("invalid link name '%s'", lnk.Name)
	}

	var c child
	if len(lnk.Name) == ds.maxpadlen {
287 288 289 290
		nd, err := lnk.GetNode(ctx, ds.dserv)
		if err != nil {
			return nil, err
		}
291 292 293 294 295 296 297
		cds, err := NewHamtFromDag(ds.dserv, nd)
		if err != nil {
			return nil, err
		}

		c = cds
	} else {
298
		lnk2 := *lnk
299 300
		c = &shardValue{
			key: lnk.Name[ds.maxpadlen:],
301
			val: &lnk2,
302 303 304 305 306 307 308
		}
	}

	ds.children[i] = c
	return c, nil
}

Hector Sanjuan's avatar
Hector Sanjuan committed
309
func (ds *Shard) setChild(i int, c child) {
310 311 312
	ds.children[i] = c
}

Jeromy's avatar
Jeromy committed
313
// Link returns a merklelink to this shard node
Hector Sanjuan's avatar
Hector Sanjuan committed
314
func (ds *Shard) Link() (*ipld.Link, error) {
315 316 317 318 319
	nd, err := ds.Node()
	if err != nil {
		return nil, err
	}

320
	err = ds.dserv.Add(context.TODO(), nd)
321 322 323 324
	if err != nil {
		return nil, err
	}

325
	return ipld.MakeLink(nd)
326 327
}

Hector Sanjuan's avatar
Hector Sanjuan committed
328
func (ds *Shard) insertChild(idx int, key string, lnk *ipld.Link) error {
329
	if lnk == nil {
330 331 332 333
		return os.ErrNotExist
	}

	i := ds.indexForBitPos(idx)
Steven Allen's avatar
Steven Allen committed
334
	ds.bitfield.SetBit(idx)
335 336

	lnk.Name = ds.linkNamePrefix(idx) + key
337 338
	sv := &shardValue{
		key: key,
339
		val: lnk,
340 341 342
	}

	ds.children = append(ds.children[:i], append([]child{sv}, ds.children[i:]...)...)
343
	ds.nd.SetLinks(append(ds.nd.Links()[:i], append([]*ipld.Link{nil}, ds.nd.Links()[i:]...)...))
344 345 346
	return nil
}

Hector Sanjuan's avatar
Hector Sanjuan committed
347
func (ds *Shard) rmChild(i int) error {
348 349 350 351 352 353 354 355 356 357 358 359 360
	if i < 0 || i >= len(ds.children) || i >= len(ds.nd.Links()) {
		return fmt.Errorf("hamt: attempted to remove child with out of range index")
	}

	copy(ds.children[i:], ds.children[i+1:])
	ds.children = ds.children[:len(ds.children)-1]

	copy(ds.nd.Links()[i:], ds.nd.Links()[i+1:])
	ds.nd.SetLinks(ds.nd.Links()[:len(ds.nd.Links())-1])

	return nil
}

Hector Sanjuan's avatar
Hector Sanjuan committed
361
func (ds *Shard) getValue(ctx context.Context, hv *hashBits, key string, cb func(*shardValue) error) error {
362
	idx := hv.Next(ds.tableSizeLg2)
Steven Allen's avatar
Steven Allen committed
363
	if ds.bitfield.Bit(int(idx)) {
364 365 366 367 368 369 370 371
		cindex := ds.indexForBitPos(idx)

		child, err := ds.getChild(ctx, cindex)
		if err != nil {
			return err
		}

		switch child := child.(type) {
Hector Sanjuan's avatar
Hector Sanjuan committed
372
		case *Shard:
373 374 375 376 377 378 379 380 381 382 383
			return child.getValue(ctx, hv, key, cb)
		case *shardValue:
			if child.key == key {
				return cb(child)
			}
		}
	}

	return os.ErrNotExist
}

Hector Sanjuan's avatar
Hector Sanjuan committed
384 385
// EnumLinks collects all links in the Shard.
func (ds *Shard) EnumLinks(ctx context.Context) ([]*ipld.Link, error) {
386
	var links []*ipld.Link
387 388 389 390
	var setlk sync.Mutex

	getLinks := ds.makeAsyncTrieGetLinks(func(l *ipld.Link) error {
		setlk.Lock()
391
		links = append(links, l)
392
		setlk.Unlock()
393 394
		return nil
	})
395

396 397 398
	cset := cid.NewSet()

	err := dag.EnumerateChildrenAsync(ctx, getLinks, ds.nd.Cid(), cset.Visit)
399 400 401
	return links, err
}

Hector Sanjuan's avatar
Hector Sanjuan committed
402 403
// ForEachLink walks the Shard and calls the given function.
func (ds *Shard) ForEachLink(ctx context.Context, f func(*ipld.Link) error) error {
Jeromy's avatar
Jeromy committed
404
	return ds.walkTrie(ctx, func(sv *shardValue) error {
405
		lnk := sv.val
406 407
		lnk.Name = sv.key

408
		return f(lnk)
409 410 411
	})
}

412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440
func (ds *Shard) makeAsyncTrieGetLinks(cb func(*ipld.Link) error) dag.GetLinks {

	return func(ctx context.Context, c cid.Cid) ([]*ipld.Link, error) {
		node, err := ds.dserv.Get(ctx, c)
		if err != nil {
			return nil, err
		}
		cds, err := NewHamtFromDag(ds.dserv, node)
		if err != nil {
			return nil, err
		}

		childShards := make([]*ipld.Link, 0, len(cds.children))
		for idx := range cds.children {
			lnk := cds.nd.Links()[idx]

			if len(lnk.Name) < cds.maxpadlen {
				return nil, fmt.Errorf("invalid link name '%s'", lnk.Name)
			}
			if len(lnk.Name) == cds.maxpadlen {
				childShards = append(childShards, lnk)
			} else {
				cb(lnk)
			}
		}
		return childShards, nil
	}
}

Hector Sanjuan's avatar
Hector Sanjuan committed
441
func (ds *Shard) walkTrie(ctx context.Context, cb func(*shardValue) error) error {
Steven Allen's avatar
Steven Allen committed
442
	for idx := range ds.children {
Jeromy's avatar
Jeromy committed
443
		c, err := ds.getChild(ctx, idx)
444 445 446 447 448 449
		if err != nil {
			return err
		}

		switch c := c.(type) {
		case *shardValue:
Steven Allen's avatar
Steven Allen committed
450
			if err := cb(c); err != nil {
451 452 453
				return err
			}

Hector Sanjuan's avatar
Hector Sanjuan committed
454
		case *Shard:
Steven Allen's avatar
Steven Allen committed
455
			if err := c.walkTrie(ctx, cb); err != nil {
456 457 458 459 460 461 462 463 464
				return err
			}
		default:
			return fmt.Errorf("unexpected child type: %#v", c)
		}
	}
	return nil
}

Hector Sanjuan's avatar
Hector Sanjuan committed
465
func (ds *Shard) modifyValue(ctx context.Context, hv *hashBits, key string, val *ipld.Link) error {
466 467
	idx := hv.Next(ds.tableSizeLg2)

Steven Allen's avatar
Steven Allen committed
468
	if !ds.bitfield.Bit(idx) {
469 470 471 472 473 474 475 476 477 478 479
		return ds.insertChild(idx, key, val)
	}

	cindex := ds.indexForBitPos(idx)

	child, err := ds.getChild(ctx, cindex)
	if err != nil {
		return err
	}

	switch child := child.(type) {
Hector Sanjuan's avatar
Hector Sanjuan committed
480
	case *Shard:
481 482 483 484 485 486 487 488 489 490 491 492
		err := child.modifyValue(ctx, hv, key, val)
		if err != nil {
			return err
		}

		if val == nil {
			switch len(child.children) {
			case 0:
				// empty sub-shard, prune it
				// Note: this shouldnt normally ever happen
				//       in the event of another implementation creates flawed
				//       structures, this will help to normalize them.
Steven Allen's avatar
Steven Allen committed
493
				ds.bitfield.UnsetBit(idx)
494 495 496 497 498 499 500 501 502 503 504 505 506
				return ds.rmChild(cindex)
			case 1:
				nchild, ok := child.children[0].(*shardValue)
				if ok {
					// sub-shard with a single value element, collapse it
					ds.setChild(cindex, nchild)
				}
				return nil
			}
		}

		return nil
	case *shardValue:
Jeromy's avatar
Jeromy committed
507 508 509
		if child.key == key {
			// value modification
			if val == nil {
Steven Allen's avatar
Steven Allen committed
510
				ds.bitfield.UnsetBit(idx)
Jeromy's avatar
Jeromy committed
511 512
				return ds.rmChild(cindex)
			}
513 514 515

			child.val = val
			return nil
Jeromy's avatar
Jeromy committed
516
		}
517

Jeromy's avatar
Jeromy committed
518 519 520
		if val == nil {
			return os.ErrNotExist
		}
521

Jeromy's avatar
Jeromy committed
522
		// replace value with another shard, one level deeper
Hector Sanjuan's avatar
Hector Sanjuan committed
523
		ns, err := NewShard(ds.dserv, ds.tableSize)
Jeromy's avatar
Jeromy committed
524 525 526
		if err != nil {
			return err
		}
527
		ns.builder = ds.builder
Jeromy's avatar
Jeromy committed
528 529 530 531
		chhv := &hashBits{
			b:        hash([]byte(child.key)),
			consumed: hv.consumed,
		}
532

Jeromy's avatar
Jeromy committed
533 534 535 536
		err = ns.modifyValue(ctx, hv, key, val)
		if err != nil {
			return err
		}
537

Jeromy's avatar
Jeromy committed
538 539 540
		err = ns.modifyValue(ctx, chhv, child.key, child.val)
		if err != nil {
			return err
541
		}
Jeromy's avatar
Jeromy committed
542 543 544

		ds.setChild(cindex, ns)
		return nil
545 546 547 548 549
	default:
		return fmt.Errorf("unexpected type for child: %#v", child)
	}
}

Jeromy's avatar
Jeromy committed
550 551 552
// indexForBitPos returns the index within the collapsed array corresponding to
// the given bit in the bitset.  The collapsed array contains only one entry
// per bit set in the bitfield, and this function is used to map the indices.
Hector Sanjuan's avatar
Hector Sanjuan committed
553
func (ds *Shard) indexForBitPos(bp int) int {
Steven Allen's avatar
Steven Allen committed
554
	return ds.bitfield.OnesBefore(bp)
555 556 557
}

// linkNamePrefix takes in the bitfield index of an entry and returns its hex prefix
Hector Sanjuan's avatar
Hector Sanjuan committed
558
func (ds *Shard) linkNamePrefix(idx int) string {
559 560
	return fmt.Sprintf(ds.prefixPadStr, idx)
}