merkledag.go 12.1 KB
Newer Older
1
// package merkledag implements the IPFS Merkle DAG datastructures.
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
2 3 4
package merkledag

import (
5
	"context"
6
	"fmt"
7
	"strings"
8
	"sync"
Jeromy's avatar
Jeromy committed
9

10
	bserv "github.com/ipfs/go-ipfs/blockservice"
11
	offline "github.com/ipfs/go-ipfs/exchange/offline"
12
	blocks "gx/ipfs/QmVA4mafxbfH5aEvNz8fyoxC6J1xhAtw88B4GerPznSZBg/go-block-format"
Jeromy's avatar
Jeromy committed
13

14 15 16
	cid "gx/ipfs/QmTprEaAA2A9bst5XH7exuyi5KzNMK3SEDNN8rBDnKWcUS/go-cid"
	node "gx/ipfs/QmYNyRZJBUYPNrLszFmrBrPJbsBh2vMsefz5gnDpB5M1P6/go-ipld-format"
	ipldcbor "gx/ipfs/QmemYymP73eVdTUUMZEiSpiHeZQKNJdT5dP2iuHssZh1sR/go-ipld-cbor"
Juan Batiz-Benet's avatar
Juan Batiz-Benet committed
17 18
)

19
var ErrNotFound = fmt.Errorf("merkledag: not found")
Jeromy's avatar
Jeromy committed
20

Jeromy's avatar
Jeromy committed
21 22
// DAGService is an IPFS Merkle DAG service.
type DAGService interface {
Jeromy's avatar
Jeromy committed
23 24 25
	Add(node.Node) (*cid.Cid, error)
	Get(context.Context, *cid.Cid) (node.Node, error)
	Remove(node.Node) error
26 27 28

	// GetDAG returns, in order, all the single leve child
	// nodes of the passed in node.
Jeromy's avatar
Jeromy committed
29
	GetMany(context.Context, []*cid.Cid) <-chan *NodeOption
30 31

	Batch() *Batch
32 33

	LinkService
Jeromy's avatar
Jeromy committed
34 35
}

36
type LinkService interface {
37 38 39 40
	// GetLinks return all links for a node.  The complete node does not
	// necessarily have to exist locally, or at all.  For example, raw
	// leaves cannot possibly have links so there is no need to look
	// at the node.
Jeromy's avatar
Jeromy committed
41
	GetLinks(context.Context, *cid.Cid) ([]*node.Link, error)
42 43

	GetOfflineLinkService() LinkService
44 45
}

46
func NewDAGService(bs bserv.BlockService) *dagService {
47
	return &dagService{Blocks: bs}
Jeromy's avatar
Jeromy committed
48 49
}

50
// dagService is an IPFS Merkle DAG service.
Juan Batiz-Benet's avatar
go lint  
Juan Batiz-Benet committed
51 52
// - the root is virtual (like a forest)
// - stores nodes' data in a BlockService
53 54
// TODO: should cache Nodes that are in memory, and be
//       able to free some of them when vm pressure is high
55
type dagService struct {
56
	Blocks bserv.BlockService
57 58
}

59
// Add adds a node to the dagService, storing the block in the BlockService
Jeromy's avatar
Jeromy committed
60
func (n *dagService) Add(nd node.Node) (*cid.Cid, error) {
61
	if n == nil { // FIXME remove this assertion. protect with constructor invariant
Jeromy's avatar
Jeromy committed
62
		return nil, fmt.Errorf("dagService is nil")
63 64
	}

65
	return n.Blocks.AddBlock(nd)
66 67
}

68
func (n *dagService) Batch() *Batch {
69 70 71 72 73 74 75 76 77
	return &Batch{
		ds:      n,
		MaxSize: 8 << 20,

		// By default, only batch up to 128 nodes at a time.
		// The current implementation of flatfs opens this many file
		// descriptors at the same time for the optimized batch write.
		MaxBlocks: 128,
	}
78 79
}

80
// Get retrieves a node from the dagService, fetching the block in the BlockService
Jeromy's avatar
Jeromy committed
81
func (n *dagService) Get(ctx context.Context, c *cid.Cid) (node.Node, error) {
82
	if n == nil {
83
		return nil, fmt.Errorf("dagService is nil")
84
	}
Jeromy's avatar
Jeromy committed
85

86 87
	ctx, cancel := context.WithCancel(ctx)
	defer cancel()
88

Jeromy's avatar
Jeromy committed
89
	b, err := n.Blocks.GetBlock(ctx, c)
90
	if err != nil {
91 92 93
		if err == bserv.ErrNotFound {
			return nil, ErrNotFound
		}
Jeromy's avatar
Jeromy committed
94
		return nil, fmt.Errorf("Failed to get block for %s: %v", c, err)
95 96
	}

97 98 99 100 101 102
	return decodeBlock(b)
}

func decodeBlock(b blocks.Block) (node.Node, error) {
	c := b.Cid()

Jeromy's avatar
Jeromy committed
103
	switch c.Type() {
Jeromy's avatar
Jeromy committed
104
	case cid.DagProtobuf:
105
		decnd, err := DecodeProtobuf(b.RawData())
Jeromy's avatar
Jeromy committed
106 107 108 109 110
		if err != nil {
			if strings.Contains(err.Error(), "Unmarshal failed") {
				return nil, fmt.Errorf("The block referred to by '%s' was not a valid merkledag node", c)
			}
			return nil, fmt.Errorf("Failed to decode Protocol Buffers: %v", err)
111
		}
112 113

		decnd.cached = b.Cid()
114
		decnd.Prefix = b.Cid().Prefix()
115 116
		return decnd, nil
	case cid.Raw:
117
		return NewRawNodeWPrefix(b.RawData(), b.Cid().Prefix())
Jeromy's avatar
Jeromy committed
118
	case cid.DagCBOR:
119
		return ipldcbor.Decode(b.RawData())
Jeromy's avatar
Jeromy committed
120
	default:
121
		return nil, fmt.Errorf("unrecognized object type: %s", c.Type())
122
	}
123
}
Jeromy's avatar
Jeromy committed
124

125 126
// GetLinks return the links for the node, the node doesn't necessarily have
// to exist locally.
Jeromy's avatar
Jeromy committed
127
func (n *dagService) GetLinks(ctx context.Context, c *cid.Cid) ([]*node.Link, error) {
128 129 130
	if c.Type() == cid.Raw {
		return nil, nil
	}
131 132 133 134
	node, err := n.Get(ctx, c)
	if err != nil {
		return nil, err
	}
135
	return node.Links(), nil
136 137
}

138
func (n *dagService) GetOfflineLinkService() LinkService {
139 140
	if n.Blocks.Exchange().IsOnline() {
		bsrv := bserv.New(n.Blocks.Blockstore(), offline.Exchange(n.Blocks.Blockstore()))
141 142 143 144 145 146
		return NewDAGService(bsrv)
	} else {
		return n
	}
}

Jeromy's avatar
Jeromy committed
147
func (n *dagService) Remove(nd node.Node) error {
148
	return n.Blocks.DeleteBlock(nd)
Jeromy's avatar
Jeromy committed
149 150
}

151 152 153
// GetLinksDirect creates a function to get the links for a node, from
// the node, bypassing the LinkService.  If the node does not exist
// locally (and can not be retrieved) an error will be returned.
Jeromy's avatar
Jeromy committed
154
func GetLinksDirect(serv node.NodeGetter) GetLinks {
155 156 157
	return func(ctx context.Context, c *cid.Cid) ([]*node.Link, error) {
		node, err := serv.Get(ctx, c)
		if err != nil {
Jeromy's avatar
Jeromy committed
158 159 160
			if err == bserv.ErrNotFound {
				err = ErrNotFound
			}
161 162 163 164 165 166
			return nil, err
		}
		return node.Links(), nil
	}
}

167 168 169 170 171 172 173 174 175 176 177 178 179
type sesGetter struct {
	bs *bserv.Session
}

func (sg *sesGetter) Get(ctx context.Context, c *cid.Cid) (node.Node, error) {
	blk, err := sg.bs.GetBlock(ctx, c)
	if err != nil {
		return nil, err
	}

	return decodeBlock(blk)
}

180
// FetchGraph fetches all nodes that are children of the given node
181
func FetchGraph(ctx context.Context, root *cid.Cid, serv DAGService) error {
182 183 184
	var ng node.NodeGetter = serv
	ds, ok := serv.(*dagService)
	if ok {
185
		ng = &sesGetter{bserv.NewSession(ctx, ds.Blocks)}
186 187
	}

188 189
	v, _ := ctx.Value("progress").(*ProgressTracker)
	if v == nil {
190
		return EnumerateChildrenAsync(ctx, GetLinksDirect(ng), root, cid.NewSet().Visit)
191 192 193 194 195 196 197 198 199 200
	}
	set := cid.NewSet()
	visit := func(c *cid.Cid) bool {
		if set.Visit(c) {
			v.Increment()
			return true
		} else {
			return false
		}
	}
201
	return EnumerateChildrenAsync(ctx, GetLinksDirect(ng), root, visit)
Jeromy's avatar
Jeromy committed
202
}
203

Jeromy's avatar
Jeromy committed
204 205
// FindLinks searches this nodes links for the given key,
// returns the indexes of any links pointing to it
Jeromy's avatar
Jeromy committed
206
func FindLinks(links []*cid.Cid, c *cid.Cid, start int) []int {
Jeromy's avatar
Jeromy committed
207
	var out []int
Jeromy's avatar
Jeromy committed
208 209
	for i, lnk_c := range links[start:] {
		if c.Equals(lnk_c) {
Jeromy's avatar
Jeromy committed
210
			out = append(out, i+start)
Jeromy's avatar
Jeromy committed
211 212
		}
	}
Jeromy's avatar
Jeromy committed
213
	return out
Jeromy's avatar
Jeromy committed
214 215
}

216
type NodeOption struct {
Jeromy's avatar
Jeromy committed
217
	Node node.Node
218 219 220
	Err  error
}

Jeromy's avatar
Jeromy committed
221
func (ds *dagService) GetMany(ctx context.Context, keys []*cid.Cid) <-chan *NodeOption {
222
	out := make(chan *NodeOption, len(keys))
223
	blocks := ds.Blocks.GetBlocks(ctx, keys)
224 225
	var count int

226 227 228 229 230 231
	go func() {
		defer close(out)
		for {
			select {
			case b, ok := <-blocks:
				if !ok {
232
					if count != len(keys) {
233
						out <- &NodeOption{Err: fmt.Errorf("failed to fetch all nodes")}
234
					}
235 236
					return
				}
Jeromy's avatar
Jeromy committed
237

238 239 240
				nd, err := decodeBlock(b)
				if err != nil {
					out <- &NodeOption{Err: err}
241 242
					return
				}
Jeromy's avatar
Jeromy committed
243

244
				out <- &NodeOption{Node: nd}
Jeromy's avatar
Jeromy committed
245 246
				count++

247
			case <-ctx.Done():
248
				out <- &NodeOption{Err: ctx.Err()}
249 250 251 252
				return
			}
		}
	}()
253
	return out
254 255
}

256
// GetDAG will fill out all of the links of the given Node.
257 258
// It returns a channel of nodes, which the caller can receive
// all the child nodes of 'root' on, in proper order.
Jeromy's avatar
Jeromy committed
259
func GetDAG(ctx context.Context, ds DAGService, root node.Node) []NodeGetter {
Jeromy's avatar
Jeromy committed
260
	var cids []*cid.Cid
261 262
	for _, lnk := range root.Links() {
		cids = append(cids, lnk.Cid)
Jeromy's avatar
Jeromy committed
263 264
	}

Jeromy's avatar
Jeromy committed
265
	return GetNodes(ctx, ds, cids)
Jeromy's avatar
Jeromy committed
266 267
}

Jeromy's avatar
Jeromy committed
268 269
// GetNodes returns an array of 'NodeGetter' promises, with each corresponding
// to the key with the same index as the passed in keys
Jeromy's avatar
Jeromy committed
270
func GetNodes(ctx context.Context, ds DAGService, keys []*cid.Cid) []NodeGetter {
271 272 273 274 275 276

	// Early out if no work to do
	if len(keys) == 0 {
		return nil
	}

Jeromy's avatar
Jeromy committed
277
	promises := make([]NodeGetter, len(keys))
rht's avatar
rht committed
278
	for i := range keys {
279
		promises[i] = newNodePromise(ctx)
Jeromy's avatar
Jeromy committed
280 281
	}

282
	dedupedKeys := dedupeKeys(keys)
283
	go func() {
284 285 286
		ctx, cancel := context.WithCancel(ctx)
		defer cancel()

287
		nodechan := ds.GetMany(ctx, dedupedKeys)
Jeromy's avatar
Jeromy committed
288

289
		for count := 0; count < len(keys); {
Jeromy's avatar
Jeromy committed
290
			select {
291
			case opt, ok := <-nodechan:
Jeromy's avatar
Jeromy committed
292
				if !ok {
Jeromy's avatar
Jeromy committed
293 294 295
					for _, p := range promises {
						p.Fail(ErrNotFound)
					}
Jeromy's avatar
Jeromy committed
296 297
					return
				}
Jeromy's avatar
Jeromy committed
298

299
				if opt.Err != nil {
300 301 302
					for _, p := range promises {
						p.Fail(opt.Err)
					}
303 304 305 306
					return
				}

				nd := opt.Node
Jeromy's avatar
Jeromy committed
307
				is := FindLinks(keys, nd.Cid(), 0)
Jeromy's avatar
Jeromy committed
308
				for _, i := range is {
309
					count++
310
					promises[i].Send(nd)
Jeromy's avatar
Jeromy committed
311 312 313
				}
			case <-ctx.Done():
				return
314 315 316
			}
		}
	}()
Jeromy's avatar
Jeromy committed
317 318 319
	return promises
}

320
// Remove duplicates from a list of keys
Jeromy's avatar
Jeromy committed
321 322 323 324
func dedupeKeys(cids []*cid.Cid) []*cid.Cid {
	set := cid.NewSet()
	for _, c := range cids {
		set.Add(c)
325
	}
Jeromy's avatar
Jeromy committed
326
	return set.Keys()
327 328
}

329
func newNodePromise(ctx context.Context) NodeGetter {
Jeromy's avatar
Jeromy committed
330
	return &nodePromise{
Jeromy's avatar
Jeromy committed
331
		recv: make(chan node.Node, 1),
Jeromy's avatar
Jeromy committed
332
		ctx:  ctx,
Jeromy's avatar
Jeromy committed
333
		err:  make(chan error, 1),
334
	}
Jeromy's avatar
Jeromy committed
335 336 337
}

type nodePromise struct {
Jeromy's avatar
Jeromy committed
338
	cache node.Node
339
	clk   sync.Mutex
Jeromy's avatar
Jeromy committed
340
	recv  chan node.Node
Jeromy's avatar
Jeromy committed
341
	ctx   context.Context
Jeromy's avatar
Jeromy committed
342
	err   chan error
Jeromy's avatar
Jeromy committed
343 344
}

Jeromy's avatar
Jeromy committed
345 346 347 348
// NodeGetter provides a promise like interface for a dag Node
// the first call to Get will block until the Node is received
// from its internal channels, subsequent calls will return the
// cached node.
Jeromy's avatar
Jeromy committed
349
type NodeGetter interface {
Jeromy's avatar
Jeromy committed
350
	Get(context.Context) (node.Node, error)
Jeromy's avatar
Jeromy committed
351
	Fail(err error)
Jeromy's avatar
Jeromy committed
352
	Send(node.Node)
Jeromy's avatar
Jeromy committed
353 354 355
}

func (np *nodePromise) Fail(err error) {
356 357 358 359 360 361 362 363 364
	np.clk.Lock()
	v := np.cache
	np.clk.Unlock()

	// if promise has a value, don't fail it
	if v != nil {
		return
	}

Jeromy's avatar
Jeromy committed
365
	np.err <- err
Jeromy's avatar
Jeromy committed
366 367
}

Jeromy's avatar
Jeromy committed
368
func (np *nodePromise) Send(nd node.Node) {
369 370
	var already bool
	np.clk.Lock()
Jeromy's avatar
Jeromy committed
371
	if np.cache != nil {
372 373 374 375 376 377 378 379 380 381 382 383
		already = true
	}
	np.cache = nd
	np.clk.Unlock()

	if already {
		panic("sending twice to the same promise is an error!")
	}

	np.recv <- nd
}

Jeromy's avatar
Jeromy committed
384
func (np *nodePromise) Get(ctx context.Context) (node.Node, error) {
385 386 387 388 389
	np.clk.Lock()
	c := np.cache
	np.clk.Unlock()
	if c != nil {
		return c, nil
Jeromy's avatar
Jeromy committed
390 391 392
	}

	select {
393 394
	case nd := <-np.recv:
		return nd, nil
Jeromy's avatar
Jeromy committed
395 396
	case <-np.ctx.Done():
		return nil, np.ctx.Err()
397 398
	case <-ctx.Done():
		return nil, ctx.Err()
Jeromy's avatar
Jeromy committed
399 400
	case err := <-np.err:
		return nil, err
Jeromy's avatar
Jeromy committed
401
	}
402
}
403 404 405 406

type Batch struct {
	ds *dagService

407 408 409 410
	blocks    []blocks.Block
	size      int
	MaxSize   int
	MaxBlocks int
411 412
}

Jeromy's avatar
Jeromy committed
413
func (t *Batch) Add(nd node.Node) (*cid.Cid, error) {
414
	t.blocks = append(t.blocks, nd)
415
	t.size += len(nd.RawData())
416
	if t.size > t.MaxSize || len(t.blocks) > t.MaxBlocks {
Jeromy's avatar
Jeromy committed
417
		return nd.Cid(), t.Commit()
418
	}
Jeromy's avatar
Jeromy committed
419
	return nd.Cid(), nil
420 421 422
}

func (t *Batch) Commit() error {
423 424
	_, err := t.ds.Blocks.AddBlocks(t.blocks)
	t.blocks = nil
425 426 427
	t.size = 0
	return err
}
428

429 430
type GetLinks func(context.Context, *cid.Cid) ([]*node.Link, error)

431 432 433
// EnumerateChildren will walk the dag below the given root node and add all
// unseen children to the passed in set.
// TODO: parallelize to avoid disk latency perf hits?
434 435 436
func EnumerateChildren(ctx context.Context, getLinks GetLinks, root *cid.Cid, visit func(*cid.Cid) bool) error {
	links, err := getLinks(ctx, root)
	if err != nil {
437 438
		return err
	}
439
	for _, lnk := range links {
440
		c := lnk.Cid
Jeromy's avatar
Jeromy committed
441
		if visit(c) {
442
			err = EnumerateChildren(ctx, getLinks, c, visit)
443 444 445 446 447 448 449
			if err != nil {
				return err
			}
		}
	}
	return nil
}
Jeromy's avatar
Jeromy committed
450

451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471
type ProgressTracker struct {
	Total int
	lk    sync.Mutex
}

func (p *ProgressTracker) DeriveContext(ctx context.Context) context.Context {
	return context.WithValue(ctx, "progress", p)
}

func (p *ProgressTracker) Increment() {
	p.lk.Lock()
	defer p.lk.Unlock()
	p.Total++
}

func (p *ProgressTracker) Value() int {
	p.lk.Lock()
	defer p.lk.Unlock()
	return p.Total
}

472 473 474
// FetchGraphConcurrency is total number of concurrent fetches that
// 'fetchNodes' will start at a time
var FetchGraphConcurrency = 8
Jeromy's avatar
Jeromy committed
475

476
func EnumerateChildrenAsync(ctx context.Context, getLinks GetLinks, c *cid.Cid, visit func(*cid.Cid) bool) error {
477
	feed := make(chan *cid.Cid)
478
	out := make(chan []*node.Link)
479 480 481
	done := make(chan struct{})

	var setlk sync.Mutex
482

483 484
	errChan := make(chan error)
	fetchersCtx, cancel := context.WithCancel(ctx)
485

486
	defer cancel()
487

488 489
	for i := 0; i < FetchGraphConcurrency; i++ {
		go func() {
490
			for ic := range feed {
491
				links, err := getLinks(ctx, ic)
492 493 494
				if err != nil {
					errChan <- err
					return
Jeromy's avatar
Jeromy committed
495
				}
496

497 498 499
				setlk.Lock()
				unseen := visit(ic)
				setlk.Unlock()
500

501
				if unseen {
502
					select {
503
					case out <- links:
504
					case <-fetchersCtx.Done():
505 506 507
						return
					}
				}
Jeromy's avatar
Jeromy committed
508
				select {
509
				case done <- struct{}{}:
510
				case <-fetchersCtx.Done():
Jeromy's avatar
Jeromy committed
511 512
				}
			}
513
		}()
Jeromy's avatar
Jeromy committed
514
	}
515
	defer close(feed)
Jeromy's avatar
Jeromy committed
516

517
	send := feed
518
	var todobuffer []*cid.Cid
519
	var inProgress int
Jeromy's avatar
Jeromy committed
520

521
	next := c
522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537
	for {
		select {
		case send <- next:
			inProgress++
			if len(todobuffer) > 0 {
				next = todobuffer[0]
				todobuffer = todobuffer[1:]
			} else {
				next = nil
				send = nil
			}
		case <-done:
			inProgress--
			if inProgress == 0 && next == nil {
				return nil
			}
538 539
		case links := <-out:
			for _, lnk := range links {
540 541 542 543 544 545
				if next == nil {
					next = lnk.Cid
					send = feed
				} else {
					todobuffer = append(todobuffer, lnk.Cid)
				}
Jeromy's avatar
Jeromy committed
546
			}
547 548
		case err := <-errChan:
			return err
549

550
		case <-ctx.Done():
551
			return ctx.Err()
552
		}
Jeromy's avatar
Jeromy committed
553
	}
554

Jeromy's avatar
Jeromy committed
555
}