hamt.go 13.3 KB
Newer Older
Jeromy's avatar
Jeromy committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
// Package hamt implements a Hash Array Mapped Trie over ipfs merkledag nodes.
// It is implemented mostly as described in the wikipedia article on HAMTs,
// however the table size is variable (usually 256 in our usages) as opposed to
// 32 as suggested in the article.  The hash function used is currently
// Murmur3, but this value is configurable (the datastructure reports which
// hash function its using).
//
// The one algorithmic change we implement that is not mentioned in the
// wikipedia article is the collapsing of empty shards.
// Given the following tree: ( '[' = shards, '{' = values )
// [ 'A' ] -> [ 'B' ] -> { "ABC" }
//    |       L-> { "ABD" }
//    L-> { "ASDF" }
// If we simply removed "ABC", we would end up with a tree where shard 'B' only
// has a single child.  This causes two issues, the first, is that now we have
// an extra lookup required to get to "ABD".  The second issue is that now we
// have a tree that contains only "ABD", but is not the same tree that we would
// get by simply inserting "ABD" into a new tree.  To address this, we always
// check for empty shard nodes upon deletion and prune them to maintain a
// consistent tree, independent of insertion order.
21 22 23 24 25 26
package hamt

import (
	"context"
	"fmt"
	"os"
27
	"sync"
28

Jeromy's avatar
Jeromy committed
29 30 31
	bitfield "github.com/Stebalien/go-bitfield"
	cid "github.com/ipfs/go-cid"
	ipld "github.com/ipfs/go-ipld-format"
Kevin Atkinson's avatar
Kevin Atkinson committed
32 33
	dag "github.com/ipfs/go-merkledag"
	format "github.com/ipfs/go-unixfs"
Jeromy's avatar
Jeromy committed
34
	"github.com/spaolacci/murmur3"
35 36 37
)

const (
Hector Sanjuan's avatar
Hector Sanjuan committed
38
	// HashMurmur3 is the multiformats identifier for Murmur3
39 40 41
	HashMurmur3 uint64 = 0x22
)

42
func (ds *Shard) isValueNode() bool {
Overbool's avatar
Overbool committed
43
	return ds.key != "" && ds.val != nil
44 45
}

Hector Sanjuan's avatar
Hector Sanjuan committed
46 47
// A Shard represents the HAMT. It should be initialized with NewShard().
type Shard struct {
48 49
	nd *dag.ProtoNode

Steven Allen's avatar
Steven Allen committed
50
	bitfield bitfield.Bitfield
51

52
	children []*Shard
53 54 55 56

	tableSize    int
	tableSizeLg2 int

57
	builder  cid.Builder
58 59 60 61 62
	hashFunc uint64

	prefixPadStr string
	maxpadlen    int

63
	dserv ipld.DAGService
64

65 66 67
	// leaf node
	key string
	val *ipld.Link
68 69
}

Hector Sanjuan's avatar
Hector Sanjuan committed
70 71 72
// NewShard creates a new, empty HAMT shard with the given size.
func NewShard(dserv ipld.DAGService, size int) (*Shard, error) {
	ds, err := makeShard(dserv, size)
73 74 75 76
	if err != nil {
		return nil, err
	}

77 78
	ds.nd = new(dag.ProtoNode)
	ds.hashFunc = HashMurmur3
79
	return ds, nil
80 81
}

Hector Sanjuan's avatar
Hector Sanjuan committed
82
func makeShard(ds ipld.DAGService, size int) (*Shard, error) {
Steven Allen's avatar
Steven Allen committed
83 84 85
	lg2s, err := logtwo(size)
	if err != nil {
		return nil, err
86
	}
87
	maxpadding := fmt.Sprintf("%X", size-1)
Hector Sanjuan's avatar
Hector Sanjuan committed
88
	return &Shard{
89
		tableSizeLg2: lg2s,
90 91
		prefixPadStr: fmt.Sprintf("%%0%dX", len(maxpadding)),
		maxpadlen:    len(maxpadding),
Steven Allen's avatar
Steven Allen committed
92
		bitfield:     bitfield.NewBitfield(size),
93 94
		tableSize:    size,
		dserv:        ds,
95
	}, nil
96 97
}

Steven Allen's avatar
Steven Allen committed
98
// NewHamtFromDag creates new a HAMT shard from the given DAG.
Hector Sanjuan's avatar
Hector Sanjuan committed
99
func NewHamtFromDag(dserv ipld.DAGService, nd ipld.Node) (*Shard, error) {
100 101
	pbnd, ok := nd.(*dag.ProtoNode)
	if !ok {
102
		return nil, dag.ErrNotProtobuf
103 104
	}

Overbool's avatar
Overbool committed
105
	fsn, err := format.FSNodeFromBytes(pbnd.Data())
106 107 108 109
	if err != nil {
		return nil, err
	}

Overbool's avatar
Overbool committed
110
	if fsn.Type() != format.THAMTShard {
111 112 113
		return nil, fmt.Errorf("node was not a dir shard")
	}

Overbool's avatar
Overbool committed
114
	if fsn.HashType() != HashMurmur3 {
115 116 117
		return nil, fmt.Errorf("only murmur3 supported as hash function")
	}

Overbool's avatar
Overbool committed
118
	ds, err := makeShard(dserv, int(fsn.Fanout()))
119 120 121 122
	if err != nil {
		return nil, err
	}

123
	ds.nd = pbnd.Copy().(*dag.ProtoNode)
124
	ds.children = make([]*Shard, len(pbnd.Links()))
Overbool's avatar
Overbool committed
125 126
	ds.bitfield.SetBytes(fsn.Data())
	ds.hashFunc = fsn.HashType()
127
	ds.builder = ds.nd.CidBuilder()
128 129 130 131

	return ds, nil
}

132 133 134
// SetCidBuilder sets the CID Builder
func (ds *Shard) SetCidBuilder(builder cid.Builder) {
	ds.builder = builder
135 136
}

137 138 139
// CidBuilder gets the CID Builder, may be nil if unset
func (ds *Shard) CidBuilder() cid.Builder {
	return ds.builder
140 141
}

142
// Node serializes the HAMT structure into a merkledag node with unixfs formatting
Hector Sanjuan's avatar
Hector Sanjuan committed
143
func (ds *Shard) Node() (ipld.Node, error) {
144
	out := new(dag.ProtoNode)
145
	out.SetCidBuilder(ds.builder)
146

Steven Allen's avatar
Steven Allen committed
147
	cindex := 0
148 149
	// TODO: optimized 'for each set bit'
	for i := 0; i < ds.tableSize; i++ {
Steven Allen's avatar
Steven Allen committed
150
		if !ds.bitfield.Bit(i) {
151 152 153 154 155
			continue
		}

		ch := ds.children[cindex]
		if ch != nil {
156
			clnk, err := ch.Link()
157 158 159 160
			if err != nil {
				return nil, err
			}

161
			err = out.AddRawLink(ds.linkNamePrefix(i)+ch.key, clnk)
162 163 164 165 166 167 168 169 170 171 172 173 174
			if err != nil {
				return nil, err
			}
		} else {
			// child unloaded, just copy in link with updated name
			lnk := ds.nd.Links()[cindex]
			label := lnk.Name[ds.maxpadlen:]

			err := out.AddRawLink(ds.linkNamePrefix(i)+label, lnk)
			if err != nil {
				return nil, err
			}
		}
Steven Allen's avatar
Steven Allen committed
175
		cindex++
176 177
	}

Overbool's avatar
Overbool committed
178
	data, err := format.HAMTShardData(ds.bitfield.Bytes(), uint64(ds.tableSize), HashMurmur3)
179 180 181 182 183 184
	if err != nil {
		return nil, err
	}

	out.SetData(data)

185
	err = ds.dserv.Add(context.TODO(), out)
186 187 188 189 190 191 192
	if err != nil {
		return nil, err
	}

	return out, nil
}

193 194 195
func (ds *Shard) makeShardValue(lnk *ipld.Link) *Shard {
	lnk2 := *lnk
	s, _ := makeShard(ds.dserv, ds.tableSize)
196

197 198
	s.key = lnk.Name[ds.maxpadlen:]
	s.val = &lnk2
199

200
	return s
201 202
}

203 204 205 206 207 208 209
func hash(val []byte) []byte {
	h := murmur3.New64()
	h.Write(val)
	return h.Sum(nil)
}

// Set sets 'name' = nd in the HAMT
Hector Sanjuan's avatar
Hector Sanjuan committed
210
func (ds *Shard) Set(ctx context.Context, name string, nd ipld.Node) error {
211
	hv := &hashBits{b: hash([]byte(name))}
212
	err := ds.dserv.Add(ctx, nd)
213 214 215 216
	if err != nil {
		return err
	}

217
	lnk, err := ipld.MakeLink(nd)
218 219 220 221 222 223
	if err != nil {
		return err
	}
	lnk.Name = ds.linkNamePrefix(0) + name

	return ds.modifyValue(ctx, hv, name, lnk)
224 225 226
}

// Remove deletes the named entry if it exists, this operation is idempotent.
Hector Sanjuan's avatar
Hector Sanjuan committed
227
func (ds *Shard) Remove(ctx context.Context, name string) error {
228 229 230 231
	hv := &hashBits{b: hash([]byte(name))}
	return ds.modifyValue(ctx, hv, name, nil)
}

Jeromy's avatar
Jeromy committed
232
// Find searches for a child node by 'name' within this hamt
Hector Sanjuan's avatar
Hector Sanjuan committed
233
func (ds *Shard) Find(ctx context.Context, name string) (*ipld.Link, error) {
234 235
	hv := &hashBits{b: hash([]byte(name))}

236
	var out *ipld.Link
237
	err := ds.getValue(ctx, hv, name, func(sv *Shard) error {
238 239 240
		out = sv.val
		return nil
	})
241 242 243
	if err != nil {
		return nil, err
	}
244

Jeromy's avatar
Jeromy committed
245
	return out, nil
246 247
}

248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
type linkType int

const (
	invalidLink linkType = iota
	shardLink
	shardValueLink
)

func (ds *Shard) childLinkType(lnk *ipld.Link) (linkType, error) {
	if len(lnk.Name) < ds.maxpadlen {
		return invalidLink, fmt.Errorf("invalid link name '%s'", lnk.Name)
	}
	if len(lnk.Name) == ds.maxpadlen {
		return shardLink, nil
	}
	return shardValueLink, nil
}

266 267 268
// getChild returns the i'th child of this shard. If it is cached in the
// children array, it will return it from there. Otherwise, it loads the child
// node from disk.
269
func (ds *Shard) getChild(ctx context.Context, i int) (*Shard, error) {
270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
	if i >= len(ds.children) || i < 0 {
		return nil, fmt.Errorf("invalid index passed to getChild (likely corrupt bitfield)")
	}

	if len(ds.children) != len(ds.nd.Links()) {
		return nil, fmt.Errorf("inconsistent lengths between children array and Links array")
	}

	c := ds.children[i]
	if c != nil {
		return c, nil
	}

	return ds.loadChild(ctx, i)
}

// loadChild reads the i'th child node of this shard from disk and returns it
// as a 'child' interface
288
func (ds *Shard) loadChild(ctx context.Context, i int) (*Shard, error) {
289
	lnk := ds.nd.Links()[i]
290 291 292
	lnkLinkType, err := ds.childLinkType(lnk)
	if err != nil {
		return nil, err
293 294
	}

295
	var c *Shard
296
	if lnkLinkType == shardLink {
297 298 299 300
		nd, err := lnk.GetNode(ctx, ds.dserv)
		if err != nil {
			return nil, err
		}
301 302 303 304 305 306 307
		cds, err := NewHamtFromDag(ds.dserv, nd)
		if err != nil {
			return nil, err
		}

		c = cds
	} else {
308
		c = ds.makeShardValue(lnk)
309 310 311 312 313 314
	}

	ds.children[i] = c
	return c, nil
}

315
func (ds *Shard) setChild(i int, c *Shard) {
316 317 318
	ds.children[i] = c
}

Jeromy's avatar
Jeromy committed
319
// Link returns a merklelink to this shard node
Hector Sanjuan's avatar
Hector Sanjuan committed
320
func (ds *Shard) Link() (*ipld.Link, error) {
321 322 323 324
	if ds.isValueNode() {
		return ds.val, nil
	}

325 326 327 328 329
	nd, err := ds.Node()
	if err != nil {
		return nil, err
	}

330
	err = ds.dserv.Add(context.TODO(), nd)
331 332 333 334
	if err != nil {
		return nil, err
	}

335
	return ipld.MakeLink(nd)
336 337
}

Hector Sanjuan's avatar
Hector Sanjuan committed
338
func (ds *Shard) insertChild(idx int, key string, lnk *ipld.Link) error {
339
	if lnk == nil {
340 341 342 343
		return os.ErrNotExist
	}

	i := ds.indexForBitPos(idx)
Steven Allen's avatar
Steven Allen committed
344
	ds.bitfield.SetBit(idx)
345 346

	lnk.Name = ds.linkNamePrefix(idx) + key
347
	sv := &Shard{
348
		key: key,
349
		val: lnk,
350 351
	}

352
	ds.children = append(ds.children[:i], append([]*Shard{sv}, ds.children[i:]...)...)
353
	ds.nd.SetLinks(append(ds.nd.Links()[:i], append([]*ipld.Link{nil}, ds.nd.Links()[i:]...)...))
354 355 356
	return nil
}

Hector Sanjuan's avatar
Hector Sanjuan committed
357
func (ds *Shard) rmChild(i int) error {
358 359 360 361 362 363 364 365 366 367 368 369 370
	if i < 0 || i >= len(ds.children) || i >= len(ds.nd.Links()) {
		return fmt.Errorf("hamt: attempted to remove child with out of range index")
	}

	copy(ds.children[i:], ds.children[i+1:])
	ds.children = ds.children[:len(ds.children)-1]

	copy(ds.nd.Links()[i:], ds.nd.Links()[i+1:])
	ds.nd.SetLinks(ds.nd.Links()[:len(ds.nd.Links())-1])

	return nil
}

371
func (ds *Shard) getValue(ctx context.Context, hv *hashBits, key string, cb func(*Shard) error) error {
372
	idx := hv.Next(ds.tableSizeLg2)
Steven Allen's avatar
Steven Allen committed
373
	if ds.bitfield.Bit(int(idx)) {
374 375 376 377 378 379 380
		cindex := ds.indexForBitPos(idx)

		child, err := ds.getChild(ctx, cindex)
		if err != nil {
			return err
		}

381
		if child.isValueNode() {
382 383 384
			if child.key == key {
				return cb(child)
			}
385 386
		} else {
			return child.getValue(ctx, hv, key, cb)
387 388 389 390 391 392
		}
	}

	return os.ErrNotExist
}

Hector Sanjuan's avatar
Hector Sanjuan committed
393 394
// EnumLinks collects all links in the Shard.
func (ds *Shard) EnumLinks(ctx context.Context) ([]*ipld.Link, error) {
395
	var links []*ipld.Link
396 397
	var setlk sync.Mutex

398
	getLinks := makeAsyncTrieGetLinks(ds.dserv, func(sv *Shard) error {
399 400
		lnk := sv.val
		lnk.Name = sv.key
401
		setlk.Lock()
402
		links = append(links, lnk)
403
		setlk.Unlock()
404 405
		return nil
	})
406

407 408 409
	cset := cid.NewSet()

	err := dag.EnumerateChildrenAsync(ctx, getLinks, ds.nd.Cid(), cset.Visit)
410 411 412
	return links, err
}

Hector Sanjuan's avatar
Hector Sanjuan committed
413 414
// ForEachLink walks the Shard and calls the given function.
func (ds *Shard) ForEachLink(ctx context.Context, f func(*ipld.Link) error) error {
415
	return ds.walkTrie(ctx, func(sv *Shard) error {
416
		lnk := sv.val
417 418
		lnk.Name = sv.key

419
		return f(lnk)
420 421 422
	})
}

423 424 425
// makeAsyncTrieGetLinks builds a getLinks function that can be used with EnumerateChildrenAsync
// to iterate a HAMT shard. It takes an IPLD Dag Service to fetch nodes, and a call back that will get called
// on all links to leaf nodes in a HAMT tree, so they can be collected for an EnumLinks operation
426
func makeAsyncTrieGetLinks(dagService ipld.DAGService, onShardValue func(shard *Shard) error) dag.GetLinks {
427

428 429
	return func(ctx context.Context, currentCid cid.Cid) ([]*ipld.Link, error) {
		node, err := dagService.Get(ctx, currentCid)
430 431 432
		if err != nil {
			return nil, err
		}
433
		directoryShard, err := NewHamtFromDag(dagService, node)
434 435 436 437
		if err != nil {
			return nil, err
		}

438
		childShards := make([]*ipld.Link, 0, len(directoryShard.children))
439
		links := directoryShard.nd.Links()
440
		for idx := range directoryShard.children {
441
			lnk := links[idx]
442
			lnkLinkType, err := directoryShard.childLinkType(lnk)
443

444 445
			if err != nil {
				return nil, err
446
			}
447
			if lnkLinkType == shardLink {
448 449
				childShards = append(childShards, lnk)
			} else {
450
				sv := directoryShard.makeShardValue(lnk)
451 452 453 454
				err := onShardValue(sv)
				if err != nil {
					return nil, err
				}
455 456 457 458 459 460
			}
		}
		return childShards, nil
	}
}

461
func (ds *Shard) walkTrie(ctx context.Context, cb func(*Shard) error) error {
Steven Allen's avatar
Steven Allen committed
462
	for idx := range ds.children {
Jeromy's avatar
Jeromy committed
463
		c, err := ds.getChild(ctx, idx)
464 465 466 467
		if err != nil {
			return err
		}

468
		if c.isValueNode() {
Steven Allen's avatar
Steven Allen committed
469
			if err := cb(c); err != nil {
470 471
				return err
			}
472
		} else {
Steven Allen's avatar
Steven Allen committed
473
			if err := c.walkTrie(ctx, cb); err != nil {
474 475 476 477 478 479 480
				return err
			}
		}
	}
	return nil
}

Hector Sanjuan's avatar
Hector Sanjuan committed
481
func (ds *Shard) modifyValue(ctx context.Context, hv *hashBits, key string, val *ipld.Link) error {
482
	idx := hv.Next(ds.tableSizeLg2)
Steven Allen's avatar
Steven Allen committed
483
	if !ds.bitfield.Bit(idx) {
484 485 486 487 488 489 490 491 492 493
		return ds.insertChild(idx, key, val)
	}

	cindex := ds.indexForBitPos(idx)

	child, err := ds.getChild(ctx, cindex)
	if err != nil {
		return err
	}

494
	if child.isValueNode() {
Jeromy's avatar
Jeromy committed
495 496 497
		if child.key == key {
			// value modification
			if val == nil {
Steven Allen's avatar
Steven Allen committed
498
				ds.bitfield.UnsetBit(idx)
Jeromy's avatar
Jeromy committed
499 500
				return ds.rmChild(cindex)
			}
501 502 503

			child.val = val
			return nil
Jeromy's avatar
Jeromy committed
504
		}
505

Jeromy's avatar
Jeromy committed
506 507 508
		if val == nil {
			return os.ErrNotExist
		}
509

Jeromy's avatar
Jeromy committed
510
		// replace value with another shard, one level deeper
Hector Sanjuan's avatar
Hector Sanjuan committed
511
		ns, err := NewShard(ds.dserv, ds.tableSize)
Jeromy's avatar
Jeromy committed
512 513 514
		if err != nil {
			return err
		}
515
		ns.builder = ds.builder
Jeromy's avatar
Jeromy committed
516 517 518 519
		chhv := &hashBits{
			b:        hash([]byte(child.key)),
			consumed: hv.consumed,
		}
520

Jeromy's avatar
Jeromy committed
521 522 523 524
		err = ns.modifyValue(ctx, hv, key, val)
		if err != nil {
			return err
		}
525

Jeromy's avatar
Jeromy committed
526 527 528
		err = ns.modifyValue(ctx, chhv, child.key, child.val)
		if err != nil {
			return err
529
		}
Jeromy's avatar
Jeromy committed
530 531 532

		ds.setChild(cindex, ns)
		return nil
533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558
	} else {
		err := child.modifyValue(ctx, hv, key, val)
		if err != nil {
			return err
		}

		if val == nil {
			switch len(child.children) {
			case 0:
				// empty sub-shard, prune it
				// Note: this shouldnt normally ever happen
				//       in the event of another implementation creates flawed
				//       structures, this will help to normalize them.
				ds.bitfield.UnsetBit(idx)
				return ds.rmChild(cindex)
			case 1:
				nchild := child.children[0]
				if nchild.isValueNode() {
					// sub-shard with a single value element, collapse it
					ds.setChild(cindex, nchild)
				}
				return nil
			}
		}

		return nil
559 560 561
	}
}

Jeromy's avatar
Jeromy committed
562 563 564
// indexForBitPos returns the index within the collapsed array corresponding to
// the given bit in the bitset.  The collapsed array contains only one entry
// per bit set in the bitfield, and this function is used to map the indices.
Hector Sanjuan's avatar
Hector Sanjuan committed
565
func (ds *Shard) indexForBitPos(bp int) int {
Steven Allen's avatar
Steven Allen committed
566
	return ds.bitfield.OnesBefore(bp)
567 568 569
}

// linkNamePrefix takes in the bitfield index of an entry and returns its hex prefix
Hector Sanjuan's avatar
Hector Sanjuan committed
570
func (ds *Shard) linkNamePrefix(idx int) string {
571 572
	return fmt.Sprintf(ds.prefixPadStr, idx)
}