// Package balanced provides methods to build balanced DAGs, which are generalistic // DAGs in which all leaves (nodes representing chunks of data) are at the same // distance from the root. Nodes can have only a maximum number of children; to be // able to store more leaf data nodes balanced DAGs are extended by increasing its // depth (and having more intermediary nodes). // // Internal nodes are always represented by UnixFS nodes (of type `File`) encoded // inside DAG nodes (see the `go-unixfs` package for details of UnixFS). In // contrast, leaf nodes with data have multiple possible representations: UnixFS // nodes as above, raw nodes with just the file data (no format) and Filestore // nodes (that directly link to the file on disk using a format stored on a raw // node, see the `go-ipfs/filestore` package for details of Filestore.) // // In the case the entire file fits into just one node it will be formatted as a // (single) leaf node (without parent) with the possible representations already // mentioned. This is the only scenario where the root can be of a type different // that the UnixFS node. // // +-------------+ // | Root 4 | // +-------------+ // | // +--------------------------+----------------------------+ // | | // +-------------+ +-------------+ // | Node 2 | | Node 5 | // +-------------+ +-------------+ // | | // +-------------+-------------+ +-------------+ // | | | // +-------------+ +-------------+ +-------------+ // | Node 1 | | Node 3 | | Node 6 | // +-------------+ +-------------+ +-------------+ // | | | // +------+------+ +------+------+ +------+ // | | | | | // +=========+ +=========+ +=========+ +=========+ +=========+ // | Chunk 1 | | Chunk 2 | | Chunk 3 | | Chunk 4 | | Chunk 5 | // +=========+ +=========+ +=========+ +=========+ +=========+ // package balanced import ( "errors" ft "github.com/ipfs/go-unixfs" h "github.com/ipfs/go-unixfs/importer/helpers" ipld "github.com/ipfs/go-ipld-format" ) // Layout builds a balanced DAG layout. In a balanced DAG of depth 1, leaf nodes // with data are added to a single `root` until the maximum number of links is // reached. Then, to continue adding more data leaf nodes, a `newRoot` is created // pointing to the old `root` (which will now become and intermediary node), // increasing the depth of the DAG to 2. This will increase the maximum number of // data leaf nodes the DAG can have (`Maxlinks() ^ depth`). The `fillNodeRec` // function will add more intermediary child nodes to `newRoot` (which already has // `root` as child) that in turn will have leaf nodes with data added to them. // After that process is completed (the maximum number of links is reached), // `fillNodeRec` will return and the loop will be repeated: the `newRoot` created // will become the old `root` and a new root will be created again to increase the // depth of the DAG. The process is repeated until there is no more data to add // (i.e. the DagBuilderHelper’s Done() function returns true). // // The nodes are filled recursively, so the DAG is built from the bottom up. Leaf // nodes are created first using the chunked file data and its size. The size is // then bubbled up to the parent (internal) node, which aggregates all the sizes of // its children and bubbles that combined size up to its parent, and so on up to // the root. This way, a balanced DAG acts like a B-tree when seeking to a byte // offset in the file the graph represents: each internal node uses the file size // of its children as an index when seeking. // // `Layout` creates a root and hands it off to be filled: // // +-------------+ // | Root 1 | // +-------------+ // | // ( fillNodeRec fills in the ) // ( chunks on the root. ) // | // +------+------+ // | | // + - - - - + + - - - - + // | Chunk 1 | | Chunk 2 | // + - - - - + + - - - - + // // ↓ // When the root is full but there's more data... // ↓ // // +-------------+ // | Root 1 | // +-------------+ // | // +------+------+ // | | // +=========+ +=========+ + - - - - + // | Chunk 1 | | Chunk 2 | | Chunk 3 | // +=========+ +=========+ + - - - - + // // ↓ // ...Layout's job is to create a new root. // ↓ // // +-------------+ // | Root 2 | // +-------------+ // | // +-------------+ - - - - - - - - + // | | // +-------------+ ( fillNodeRec creates the ) // | Node 1 | ( branch that connects ) // +-------------+ ( "Root 2" to "Chunk 3." ) // | | // +------+------+ + - - - - -+ // | | | // +=========+ +=========+ + - - - - + // | Chunk 1 | | Chunk 2 | | Chunk 3 | // +=========+ +=========+ + - - - - + // func Layout(db *h.DagBuilderHelper) (ipld.Node, error) { if db.Done() { // No data, return just an empty node. root, err := db.NewLeafNode(nil) if err != nil { return nil, err } // This works without Filestore support (`ProcessFileStore`). // TODO: Why? Is there a test case missing? return root, db.Add(root) } // The first `root` will be a single leaf node with data // (corner case), after that subsequent `root` nodes will // always be internal nodes (with a depth > 0) that can // be handled by the loop. root, fileSize, err := db.NewLeafDataNode() if err != nil { return nil, err } // Each time a DAG of a certain `depth` is filled (because it // has reached its maximum capacity of `db.Maxlinks()` per node) // extend it by making it a sub-DAG of a bigger DAG with `depth+1`. for depth := 1; !db.Done(); depth++ { // Add the old `root` as a child of the `newRoot`. newRoot := db.NewFSNodeOverDag(ft.TFile) newRoot.AddChild(root, fileSize, db) // Fill the `newRoot` (that has the old `root` already as child) // and make it the current `root` for the next iteration (when // it will become "old"). root, fileSize, err = fillNodeRec(db, newRoot, depth) if err != nil { return nil, err } } return root, db.Add(root) } // fillNodeRec will "fill" the given internal (non-leaf) `node` with data by // adding child nodes to it, either leaf data nodes (if `depth` is 1) or more // internal nodes with higher depth (and calling itself recursively on them // until *they* are filled with data). The data to fill the node with is // provided by DagBuilderHelper. // // `node` represents a (sub-)DAG root that is being filled. If called recursively, // it is `nil`, a new node is created. If it has been called from `Layout` (see // diagram below) it points to the new root (that increases the depth of the DAG), // it already has a child (the old root). New children will be added to this new // root, and those children will in turn be filled (calling `fillNodeRec` // recursively). // // +-------------+ // | `node` | // | (new root) | // +-------------+ // | // +-------------+ - - - - - - + - - - - - - - - - - - + // | | | // +--------------+ + - - - - - + + - - - - - + // | (old root) | | new child | | | // +--------------+ + - - - - - + + - - - - - + // | | | // +------+------+ + - - + - - - + // | | | | // +=========+ +=========+ + - - - - + + - - - - + // | Chunk 1 | | Chunk 2 | | Chunk 3 | | Chunk 4 | // +=========+ +=========+ + - - - - + + - - - - + // // The `node` to be filled uses the `FSNodeOverDag` abstraction that allows adding // child nodes without packing/unpacking the UnixFS layer node (having an internal // `ft.FSNode` cache). // // It returns the `ipld.Node` representation of the passed `node` filled with // children and the `nodeFileSize` with the total size of the file chunk (leaf) // nodes stored under this node (parent nodes store this to enable efficient // seeking through the DAG when reading data later). // // warning: **children** pinned indirectly, but input node IS NOT pinned. func fillNodeRec(db *h.DagBuilderHelper, node *h.FSNodeOverDag, depth int) (filledNode ipld.Node, nodeFileSize uint64, err error) { if depth < 1 { return nil, 0, errors.New("attempt to fillNode at depth < 1") } if node == nil { node = db.NewFSNodeOverDag(ft.TFile) } // Child node created on every iteration to add to parent `node`. // It can be a leaf node or another internal node. var childNode ipld.Node // File size from the child node needed to update the `FSNode` // in `node` when adding the child. var childFileSize uint64 // While we have room and there is data available to be added. for node.NumChildren() < db.Maxlinks() && !db.Done() { if depth == 1 { // Base case: add leaf node with data. childNode, childFileSize, err = db.NewLeafDataNode() if err != nil { return nil, 0, err } } else { // Recursion case: create an internal node to in turn keep // descending in the DAG and adding child nodes to it. childNode, childFileSize, err = fillNodeRec(db, nil, depth-1) if err != nil { return nil, 0, err } } err = node.AddChild(childNode, childFileSize, db) if err != nil { return nil, 0, err } } nodeFileSize = node.FileSize() // Get the final `dag.ProtoNode` with the `FSNode` data encoded inside. filledNode, err = node.Commit() if err != nil { return nil, 0, err } return filledNode, nodeFileSize, nil }