token_producers.go 8.79 KB
Newer Older
1 2 3 4 5 6 7 8
package codectools

import (
	"errors"
	"fmt"
	"io"
)

tavit ohanian's avatar
tavit ohanian committed
9
// TokenWalk walks an ld Node and repeatedly calls the visitFn,
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
// calling it once for every "token" yielded by the walk.
// Every map and list is yielded as a token at their beginning,
// and another token when they're finished;
// every scalar value (strings, bools, bytes, ints, etc) is yielded as a single token.
//
// The token pointer given to the visitFn will be identical on every call,
// but the data it contains will vary.
// The token may contain invalid data that is leftover from previous calls
// in some of its union fields; correct behavior requires looking at the
// token's Kind field before handling any of its other fields.
//
// If any error is returned by the visitFn, it will cause the walk to halt,
// and TokenWalk will return that error.
// However, if the error is the value TokenWalkSkip, and it's been returned
// when visitFn was called with a MapOpen or ListOpen token, the walk will
// skip forward over that entire map or list, and continue (with the
// next token being the close token that complements the open token).
// Returning a TokenWalkSkip when the token was any of the scalar kinds
// (e.g. anything other than a MapOpen or a ListOpen) has no effect.
//
// TokenAssembler is the rough dual of TokenWalk.
tavit ohanian's avatar
tavit ohanian committed
31
func TokenWalk(n ld.Node, visitFn func(tk *Token) error) error {
32 33 34 35 36 37 38 39
	// TokenWalk would be trivial to implement over NodeTokenizer,
	//  but we do a distinct implementation here because NodeTokenizer's resumable implementation means it needs a user-space stack,
	//  and to reuse that would require allocations which this method (since it's not resumable in the same way) can easily avoid (or at least, keep on the stack).

	var tk Token // For capture, once.
	return tokenWalk(&tk, n, visitFn)
}

tavit ohanian's avatar
tavit ohanian committed
40
func tokenWalk(tk *Token, n ld.Node, visitFn func(*Token) error) error {
41
	switch n.Kind() {
tavit ohanian's avatar
tavit ohanian committed
42
	case ld.Kind_Map:
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
		tk.Kind = TokenKind_MapOpen
		tk.Length = n.Length()
		tk.Node = n
		if err := visitFn(tk); err != nil {
			return err
		}
		mitr := n.MapIterator()
		for !mitr.Done() {
			k, v, err := mitr.Next()
			if err != nil {
				return err
			}
			if err := tokenWalk(tk, k, visitFn); err != nil {
				return err
			}
			if err := tokenWalk(tk, v, visitFn); err != nil {
				return err
			}
		}
		tk.Kind = TokenKind_MapClose
		tk.Node = n
		return visitFn(tk)
tavit ohanian's avatar
tavit ohanian committed
65
	case ld.Kind_List:
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
		tk.Kind = TokenKind_ListOpen
		tk.Length = n.Length()
		tk.Node = n
		if err := visitFn(tk); err != nil {
			return err
		}
		litr := n.ListIterator()
		for !litr.Done() {
			_, v, err := litr.Next()
			if err != nil {
				return err
			}
			if err := tokenWalk(tk, v, visitFn); err != nil {
				return err
			}
		}
		tk.Kind = TokenKind_ListClose
		tk.Node = n
		return visitFn(tk)
tavit ohanian's avatar
tavit ohanian committed
85
	case ld.Kind_Null:
86 87
		tk.Kind = TokenKind_Null
		return visitFn(tk)
tavit ohanian's avatar
tavit ohanian committed
88
	case ld.Kind_Bool:
89 90 91
		tk.Kind = TokenKind_Bool
		tk.Bool, _ = n.AsBool()
		return visitFn(tk)
tavit ohanian's avatar
tavit ohanian committed
92
	case ld.Kind_Int:
93 94
		tk.Kind = TokenKind_Int
		i, _ := n.AsInt()
tavit ohanian's avatar
tavit ohanian committed
95
		tk.Int = int64(i) // TODO: upgrade all of ld to use high precision int consistently
96
		return visitFn(tk)
tavit ohanian's avatar
tavit ohanian committed
97
	case ld.Kind_Float:
98 99 100
		tk.Kind = TokenKind_Float
		tk.Float, _ = n.AsFloat()
		return visitFn(tk)
tavit ohanian's avatar
tavit ohanian committed
101
	case ld.Kind_String:
102 103 104
		tk.Kind = TokenKind_String
		tk.Str, _ = n.AsString()
		return visitFn(tk)
tavit ohanian's avatar
tavit ohanian committed
105
	case ld.Kind_Bytes:
106 107 108
		tk.Kind = TokenKind_Bytes
		tk.Bytes, _ = n.AsBytes()
		return visitFn(tk)
tavit ohanian's avatar
tavit ohanian committed
109
	case ld.Kind_Link:
110 111 112 113
		tk.Kind = TokenKind_Link
		tk.Link, _ = n.AsLink()
		return visitFn(tk)
	default:
114
		panic(fmt.Errorf("unrecognized node kind (%q?)", n.Kind()))
115 116 117 118 119 120 121 122
	}
	return nil
}

var TokenWalkSkip = errors.New("token walk: skip")

// --- the stepwise token producer system (more complicated; has a userland stack) is below -->

tavit ohanian's avatar
tavit ohanian committed
123
// A TokenReader can be produced from any ld.Node using NodeTokenizer.
124 125 126 127 128 129
// TokenReader are also commonly implemented by codec packages,
// wherein they're created over a serial data stream and tokenize that stream when pumped.
//
// TokenReader implementations are encouraged to yield the same token pointer repeatedly,
// just varying the contents of the value, in order to avoid unnecessary allocations.
//
130 131 132 133 134 135 136 137 138
// A 'budget' parameter must be provided to a TokenReader as a pointer to an integer.
// The TokenReader should limit how much memory it uses according to the budget remaining.
// (The budget is considered to be roughly in units of bytes, but can be treated as an approximation.)
// The budget should primarily be managed by the caller of the TokenReader
// (e.g., after the TokenReader returns a 20 byte string, the caller should decrement the budget by 20),
// but a TokenReader may also do its own decrements to the budget if some operations are particularly costly and the TokenReader wants this to be accounted for.
// The budget may be ignored if the TokenReader just yielding access to already in-memory information;
// the main intent of the budget is to avoid resource exhausting when bringing new data into program memory.
//
139
type TokenReader func(budget *int64) (next *Token, err error)
140 141 142 143 144 145 146 147

type NodeTokenizer struct {
	// This structure is designed to be embeddable.  Use Initialize when doing so.

	tk  Token // We embed this to avoid allocations; we'll be repeatedly yielding a pointer to this piece of memory.
	stk nodeTokenizerStack
}

tavit ohanian's avatar
tavit ohanian committed
148
func (nt *NodeTokenizer) Initialize(n ld.Node) {
149 150 151 152 153 154 155 156 157
	if nt.stk == nil {
		nt.stk = make(nodeTokenizerStack, 0, 10)
	} else {
		nt.stk = nt.stk[0:0]
	}
	nt.stk.Push(n)
}

type nodeTokenizerStackRow struct {
tavit ohanian's avatar
tavit ohanian committed
158 159 160 161 162
	state uint8           // 0: start this node; 1: continue list; 2: continue map with key; 3: continue map with value.
	n     ld.Node         // Always present.
	litr  ld.ListIterator // At most one of these is present.
	mitr  ld.MapIterator  // At most one of these is present.
	mval  ld.Node         // The value to resume at when in state 3.
163 164 165 166 167 168 169

}
type nodeTokenizerStack []nodeTokenizerStackRow

func (stk nodeTokenizerStack) Tip() *nodeTokenizerStackRow {
	return &stk[len(stk)-1]
}
tavit ohanian's avatar
tavit ohanian committed
170
func (stk *nodeTokenizerStack) Push(n ld.Node) {
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
	*stk = append(*stk, nodeTokenizerStackRow{n: n})
}
func (stk *nodeTokenizerStack) Pop() {
	if len(*stk) == 0 {
		return
	}
	*stk = (*stk)[0 : len(*stk)-1]
}

// ReadToken fits the TokenReader functional interface, and so may be used anywhere a TokenReader is required.
func (nt *NodeTokenizer) ReadToken() (next *Token, err error) {
	// How stack depth works:
	// - finding that you're starting to handle map or least leaves it the same;
	// - before recursing to handle a child key or value, push stack;
	// - any time you finish something, whether scalar or recursive, pop stack.
	// This could be written differently: in particular,
	//  scalar leaves could be handled without increasing stack depth by that last increment.
	//   However, doing so would make for more complicated code.
	//    Maybe worth it; PRs welcome; benchmarks first.
	if len(nt.stk) == 0 {
		return nil, io.EOF
	}
	tip := nt.stk.Tip()
	switch tip.state {
	case 0:
196
		switch tip.n.Kind() {
tavit ohanian's avatar
tavit ohanian committed
197
		case ld.Kind_Map:
198 199 200 201 202 203
			nt.tk.Kind = TokenKind_MapOpen
			nt.tk.Length = tip.n.Length()
			nt.tk.Node = tip.n
			tip.state = 2
			tip.mitr = tip.n.MapIterator()
			return &nt.tk, nil
tavit ohanian's avatar
tavit ohanian committed
204
		case ld.Kind_List:
205 206 207 208 209 210
			nt.tk.Kind = TokenKind_ListOpen
			nt.tk.Length = tip.n.Length()
			nt.tk.Node = tip.n
			tip.state = 1
			tip.litr = tip.n.ListIterator()
			return &nt.tk, nil
tavit ohanian's avatar
tavit ohanian committed
211
		case ld.Kind_Null:
212 213 214
			nt.tk.Kind = TokenKind_Null
			nt.stk.Pop()
			return &nt.tk, nil
tavit ohanian's avatar
tavit ohanian committed
215
		case ld.Kind_Bool:
216 217 218 219
			nt.tk.Kind = TokenKind_Bool
			nt.tk.Bool, _ = tip.n.AsBool()
			nt.stk.Pop()
			return &nt.tk, nil
tavit ohanian's avatar
tavit ohanian committed
220
		case ld.Kind_Int:
221 222
			nt.tk.Kind = TokenKind_Int
			i, _ := tip.n.AsInt()
tavit ohanian's avatar
tavit ohanian committed
223
			nt.tk.Int = int64(i) // TODO: upgrade all of ld to use high precision int consistently
224 225
			nt.stk.Pop()
			return &nt.tk, nil
tavit ohanian's avatar
tavit ohanian committed
226
		case ld.Kind_Float:
227 228 229 230
			nt.tk.Kind = TokenKind_Float
			nt.tk.Float, _ = tip.n.AsFloat()
			nt.stk.Pop()
			return &nt.tk, nil
tavit ohanian's avatar
tavit ohanian committed
231
		case ld.Kind_String:
232 233 234 235
			nt.tk.Kind = TokenKind_String
			nt.tk.Str, _ = tip.n.AsString()
			nt.stk.Pop()
			return &nt.tk, nil
tavit ohanian's avatar
tavit ohanian committed
236
		case ld.Kind_Bytes:
237 238 239 240
			nt.tk.Kind = TokenKind_Bytes
			nt.tk.Bytes, _ = tip.n.AsBytes()
			nt.stk.Pop()
			return &nt.tk, nil
tavit ohanian's avatar
tavit ohanian committed
241
		case ld.Kind_Link:
242 243 244 245 246
			nt.tk.Kind = TokenKind_Link
			nt.tk.Link, _ = tip.n.AsLink()
			nt.stk.Pop()
			return &nt.tk, nil
		default:
247
			panic(fmt.Errorf("unrecognized node kind (%q?)", tip.n.Kind()))
248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
		}
	case 1:
		if tip.litr.Done() {
			nt.tk.Kind = TokenKind_ListClose
			nt.tk.Node = tip.n
			nt.stk.Pop()
			return &nt.tk, nil
		}
		_, v, err := tip.litr.Next()
		if err != nil {
			return nil, err
		}
		nt.stk.Push(v)
		return nt.ReadToken()
	case 2:
		if tip.mitr.Done() {
			nt.tk.Kind = TokenKind_MapClose
			nt.tk.Node = tip.n
			nt.stk.Pop()
			return &nt.tk, nil
		}
		k, v, err := tip.mitr.Next()
		if err != nil {
			return nil, err
		}
		tip.mval = v
		tip.state = 3
		nt.stk.Push(k)
		return nt.ReadToken()
	case 3:
		tip.state = 2
		nt.stk.Push(tip.mval)
		return nt.ReadToken()
	default:
		panic("unreachable")
	}
}