Commit cdd736c9 authored by Juan Batiz-Benet's avatar Juan Batiz-Benet

merkledag traversal

parent 60d2be9f
// Package traverse provides merkledag traversal functions
package traverse
import (
"errors"
mdag "github.com/jbenet/go-ipfs/merkledag"
)
// Order is an identifier for traversal algorithm orders
type Order int
const (
DFSPre Order = iota // depth-first pre-order
DFSPost // depth-first post-order
BFS // breadth-first
)
// Options specifies a series of traversal options
type Options struct {
DAG mdag.DAGService // the dagservice to fetch nodes
Order Order // what order to traverse in
Func Func // the function to perform at each step
ErrFunc ErrFunc // see ErrFunc. Optional
SkipDuplicates bool // whether to skip duplicate nodes
}
// State is a current traversal state
type State struct {
Node *mdag.Node
Depth int
}
type traversal struct {
opts Options
seen map[string]struct{}
}
func (t *traversal) shouldSkip(n *mdag.Node) (bool, error) {
if t.opts.SkipDuplicates {
k, err := n.Key()
if err != nil {
return true, err
}
if _, found := t.seen[string(k)]; found {
return true, nil
}
t.seen[string(k)] = struct{}{}
}
return false, nil
}
func (t *traversal) callFunc(next State) error {
return t.opts.Func(next)
}
// getNode returns the node for link. If it return an error,
// stop processing. if it returns a nil node, just skip it.
//
// the error handling is a little complicated.
func (t *traversal) getNode(link *mdag.Link) (*mdag.Node, error) {
getNode := func(l *mdag.Link) (*mdag.Node, error) {
next, err := l.GetNode(t.opts.DAG)
if err != nil {
return nil, err
}
skip, err := t.shouldSkip(next)
if skip {
next = nil
}
return next, err
}
next, err := getNode(link)
if err != nil && t.opts.ErrFunc != nil { // attempt recovery.
err = t.opts.ErrFunc(err)
next = nil // skip regardless
}
return next, err
}
// Func is the type of the function called for each dag.Node visited by Traverse.
// The traversal argument contains the current traversal state.
// If an error is returned, processing stops.
type Func func(current State) error
// If there is a problem walking to the Node, and ErrFunc is provided, Traverse
// will call ErrFunc with the error encountered. ErrFunc can decide how to handle
// that error, and return an error back to Traversal with how to proceed:
// * nil - skip the Node and its children, but continue processing
// * all other errors halt processing immediately.
//
// If ErrFunc is nil, Traversal will stop, as if:
//
// opts.ErrFunc = func(err error) { return err }
//
type ErrFunc func(err error) error
func Traverse(root *mdag.Node, o Options) error {
t := traversal{
opts: o,
seen: map[string]struct{}{},
}
state := State{
Node: root,
Depth: 0,
}
switch o.Order {
default:
return dfsPreTraverse(state, &t)
case DFSPre:
return dfsPreTraverse(state, &t)
case DFSPost:
return dfsPostTraverse(state, &t)
case BFS:
return bfsTraverse(state, &t)
}
}
type dfsFunc func(state State, t *traversal) error
func dfsPreTraverse(state State, t *traversal) error {
if err := t.callFunc(state); err != nil {
return err
}
if err := dfsDescend(dfsPreTraverse, state, t); err != nil {
return err
}
return nil
}
func dfsPostTraverse(state State, t *traversal) error {
if err := dfsDescend(dfsPostTraverse, state, t); err != nil {
return err
}
if err := t.callFunc(state); err != nil {
return err
}
return nil
}
func dfsDescend(df dfsFunc, curr State, t *traversal) error {
for _, l := range curr.Node.Links {
node, err := t.getNode(l)
if err != nil {
return err
}
if node == nil { // skip
continue
}
next := State{
Node: node,
Depth: curr.Depth + 1,
}
if err := df(next, t); err != nil {
return err
}
}
return nil
}
func bfsTraverse(root State, t *traversal) error {
if skip, err := t.shouldSkip(root.Node); skip || err != nil {
return err
}
var q queue
q.enq(root)
for q.len() > 0 {
curr := q.deq()
if curr.Node == nil {
return errors.New("failed to dequeue though queue not empty")
}
// call user's func
if err := t.callFunc(curr); err != nil {
return err
}
for _, l := range curr.Node.Links {
node, err := t.getNode(l)
if err != nil {
return err
}
if node == nil { // skip
continue
}
q.enq(State{
Node: node,
Depth: curr.Depth + 1,
})
}
}
return nil
}
type queue struct {
s []State
}
func (q *queue) enq(n State) {
q.s = append(q.s, n)
}
func (q *queue) deq() State {
if len(q.s) < 1 {
return State{}
}
n := q.s[0]
q.s = q.s[1:]
return n
}
func (q *queue) len() int {
return len(q.s)
}
package traverse
import (
"bytes"
"fmt"
"testing"
mdag "github.com/jbenet/go-ipfs/merkledag"
)
func TestDFSPreNoSkip(t *testing.T) {
opts := Options{Order: DFSPre}
testWalkOutputs(t, newFan(t), opts, []byte(`
0 /a
1 /a/aa
1 /a/ab
1 /a/ac
1 /a/ad
`))
testWalkOutputs(t, newLinkedList(t), opts, []byte(`
0 /a
1 /a/aa
2 /a/aa/aaa
3 /a/aa/aaa/aaaa
4 /a/aa/aaa/aaaa/aaaaa
`))
testWalkOutputs(t, newBinaryTree(t), opts, []byte(`
0 /a
1 /a/aa
2 /a/aa/aaa
2 /a/aa/aab
1 /a/ab
2 /a/ab/aba
2 /a/ab/abb
`))
testWalkOutputs(t, newBinaryDAG(t), opts, []byte(`
0 /a
1 /a/aa
2 /a/aa/aaa
3 /a/aa/aaa/aaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
3 /a/aa/aaa/aaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
2 /a/aa/aaa
3 /a/aa/aaa/aaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
3 /a/aa/aaa/aaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
1 /a/aa
2 /a/aa/aaa
3 /a/aa/aaa/aaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
3 /a/aa/aaa/aaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
2 /a/aa/aaa
3 /a/aa/aaa/aaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
3 /a/aa/aaa/aaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
`))
}
func TestDFSPreSkip(t *testing.T) {
opts := Options{Order: DFSPre, SkipDuplicates: true}
testWalkOutputs(t, newFan(t), opts, []byte(`
0 /a
1 /a/aa
1 /a/ab
1 /a/ac
1 /a/ad
`))
testWalkOutputs(t, newLinkedList(t), opts, []byte(`
0 /a
1 /a/aa
2 /a/aa/aaa
3 /a/aa/aaa/aaaa
4 /a/aa/aaa/aaaa/aaaaa
`))
testWalkOutputs(t, newBinaryTree(t), opts, []byte(`
0 /a
1 /a/aa
2 /a/aa/aaa
2 /a/aa/aab
1 /a/ab
2 /a/ab/aba
2 /a/ab/abb
`))
testWalkOutputs(t, newBinaryDAG(t), opts, []byte(`
0 /a
1 /a/aa
2 /a/aa/aaa
3 /a/aa/aaa/aaaa
4 /a/aa/aaa/aaaa/aaaaa
`))
}
func TestDFSPostNoSkip(t *testing.T) {
opts := Options{Order: DFSPost}
testWalkOutputs(t, newFan(t), opts, []byte(`
1 /a/aa
1 /a/ab
1 /a/ac
1 /a/ad
0 /a
`))
testWalkOutputs(t, newLinkedList(t), opts, []byte(`
4 /a/aa/aaa/aaaa/aaaaa
3 /a/aa/aaa/aaaa
2 /a/aa/aaa
1 /a/aa
0 /a
`))
testWalkOutputs(t, newBinaryTree(t), opts, []byte(`
2 /a/aa/aaa
2 /a/aa/aab
1 /a/aa
2 /a/ab/aba
2 /a/ab/abb
1 /a/ab
0 /a
`))
testWalkOutputs(t, newBinaryDAG(t), opts, []byte(`
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
3 /a/aa/aaa/aaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
3 /a/aa/aaa/aaaa
2 /a/aa/aaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
3 /a/aa/aaa/aaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
3 /a/aa/aaa/aaaa
2 /a/aa/aaa
1 /a/aa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
3 /a/aa/aaa/aaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
3 /a/aa/aaa/aaaa
2 /a/aa/aaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
3 /a/aa/aaa/aaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
3 /a/aa/aaa/aaaa
2 /a/aa/aaa
1 /a/aa
0 /a
`))
}
func TestDFSPostSkip(t *testing.T) {
opts := Options{Order: DFSPost, SkipDuplicates: true}
testWalkOutputs(t, newFan(t), opts, []byte(`
1 /a/aa
1 /a/ab
1 /a/ac
1 /a/ad
0 /a
`))
testWalkOutputs(t, newLinkedList(t), opts, []byte(`
4 /a/aa/aaa/aaaa/aaaaa
3 /a/aa/aaa/aaaa
2 /a/aa/aaa
1 /a/aa
0 /a
`))
testWalkOutputs(t, newBinaryTree(t), opts, []byte(`
2 /a/aa/aaa
2 /a/aa/aab
1 /a/aa
2 /a/ab/aba
2 /a/ab/abb
1 /a/ab
0 /a
`))
testWalkOutputs(t, newBinaryDAG(t), opts, []byte(`
4 /a/aa/aaa/aaaa/aaaaa
3 /a/aa/aaa/aaaa
2 /a/aa/aaa
1 /a/aa
0 /a
`))
}
func TestBFSNoSkip(t *testing.T) {
opts := Options{Order: BFS}
testWalkOutputs(t, newFan(t), opts, []byte(`
0 /a
1 /a/aa
1 /a/ab
1 /a/ac
1 /a/ad
`))
testWalkOutputs(t, newLinkedList(t), opts, []byte(`
0 /a
1 /a/aa
2 /a/aa/aaa
3 /a/aa/aaa/aaaa
4 /a/aa/aaa/aaaa/aaaaa
`))
testWalkOutputs(t, newBinaryTree(t), opts, []byte(`
0 /a
1 /a/aa
1 /a/ab
2 /a/aa/aaa
2 /a/aa/aab
2 /a/ab/aba
2 /a/ab/abb
`))
testWalkOutputs(t, newBinaryDAG(t), opts, []byte(`
0 /a
1 /a/aa
1 /a/aa
2 /a/aa/aaa
2 /a/aa/aaa
2 /a/aa/aaa
2 /a/aa/aaa
3 /a/aa/aaa/aaaa
3 /a/aa/aaa/aaaa
3 /a/aa/aaa/aaaa
3 /a/aa/aaa/aaaa
3 /a/aa/aaa/aaaa
3 /a/aa/aaa/aaaa
3 /a/aa/aaa/aaaa
3 /a/aa/aaa/aaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
4 /a/aa/aaa/aaaa/aaaaa
`))
}
func TestBFSSkip(t *testing.T) {
opts := Options{Order: BFS, SkipDuplicates: true}
testWalkOutputs(t, newFan(t), opts, []byte(`
0 /a
1 /a/aa
1 /a/ab
1 /a/ac
1 /a/ad
`))
testWalkOutputs(t, newLinkedList(t), opts, []byte(`
0 /a
1 /a/aa
2 /a/aa/aaa
3 /a/aa/aaa/aaaa
4 /a/aa/aaa/aaaa/aaaaa
`))
testWalkOutputs(t, newBinaryTree(t), opts, []byte(`
0 /a
1 /a/aa
1 /a/ab
2 /a/aa/aaa
2 /a/aa/aab
2 /a/ab/aba
2 /a/ab/abb
`))
testWalkOutputs(t, newBinaryDAG(t), opts, []byte(`
0 /a
1 /a/aa
2 /a/aa/aaa
3 /a/aa/aaa/aaaa
4 /a/aa/aaa/aaaa/aaaaa
`))
}
func testWalkOutputs(t *testing.T, root *mdag.Node, opts Options, expect []byte) {
expect = bytes.TrimLeft(expect, "\n")
var buf bytes.Buffer
walk := func(current State) error {
s := fmt.Sprintf("%d %s\n", current.Depth, current.Node.Data)
t.Logf("walk: %s", s)
buf.Write([]byte(s))
return nil
}
opts.Func = walk
if err := Traverse(root, opts); err != nil {
t.Error(err)
return
}
actual := buf.Bytes()
if !bytes.Equal(actual, expect) {
t.Error("error: outputs differ")
t.Logf("expect:\n%s", expect)
t.Logf("actual:\n%s", actual)
} else {
t.Logf("expect matches actual:\n%s", expect)
}
}
func newFan(t *testing.T) *mdag.Node {
a := &mdag.Node{Data: []byte("/a")}
addChild(t, a, "aa")
addChild(t, a, "ab")
addChild(t, a, "ac")
addChild(t, a, "ad")
return a
}
func newLinkedList(t *testing.T) *mdag.Node {
a := &mdag.Node{Data: []byte("/a")}
aa := addChild(t, a, "aa")
aaa := addChild(t, aa, "aaa")
aaaa := addChild(t, aaa, "aaaa")
addChild(t, aaaa, "aaaaa")
return a
}
func newBinaryTree(t *testing.T) *mdag.Node {
a := &mdag.Node{Data: []byte("/a")}
aa := addChild(t, a, "aa")
ab := addChild(t, a, "ab")
addChild(t, aa, "aaa")
addChild(t, aa, "aab")
addChild(t, ab, "aba")
addChild(t, ab, "abb")
return a
}
func newBinaryDAG(t *testing.T) *mdag.Node {
a := &mdag.Node{Data: []byte("/a")}
aa := addChild(t, a, "aa")
aaa := addChild(t, aa, "aaa")
aaaa := addChild(t, aaa, "aaaa")
aaaaa := addChild(t, aaaa, "aaaaa")
addLink(t, a, aa)
addLink(t, aa, aaa)
addLink(t, aaa, aaaa)
addLink(t, aaaa, aaaaa)
return a
}
func addLink(t *testing.T, a, b *mdag.Node) {
to := string(a.Data) + "2" + string(b.Data)
if err := a.AddNodeLink(to, b); err != nil {
t.Error(err)
}
}
func addChild(t *testing.T, a *mdag.Node, name string) *mdag.Node {
c := &mdag.Node{Data: []byte(string(a.Data) + "/" + name)}
addLink(t, a, c)
return c
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment