token_consumers.go 8.58 KB
Newer Older
1 2 3 4 5 6
package codectools

import (
	"fmt"
	"io"

tavit ohanian's avatar
tavit ohanian committed
7
	"gitlab.dms3.io/ld/go-ld-prime/codec"
8 9
)

tavit ohanian's avatar
tavit ohanian committed
10 11
// TokenAssemble takes an ld.NodeAssembler and a TokenReader,
// and repeatedly pumps the TokenReader for tokens and feeds their data into the ld.NodeAssembler
12 13 14 15 16 17 18 19 20 21
// until it finishes a complete value.
//
// To compare and contrast to other token oriented tools:
// TokenAssemble does the same direction of information transfer as the TokenAssembler gadget does,
// but TokenAssemble moves completely through a value in one step,
// whereas the TokenAssembler accepts tokens pumped into it one step at a time.
//
// TokenAssemble does not enforce the "map keys must be strings" rule which is present in the Data Model;
// it will also happily do even recursive structures in map keys,
// meaning it can be used when handling schema values like maps with complex keys.
tavit ohanian's avatar
tavit ohanian committed
22
func TokenAssemble(na ld.NodeAssembler, tr TokenReader, budget int64) error {
23
	tk, err := tr(&budget)
24 25 26 27 28 29
	if err != nil {
		return err
	}
	return tokenAssemble(na, tk, tr, &budget)
}

tavit ohanian's avatar
tavit ohanian committed
30
func tokenAssemble(na ld.NodeAssembler, tk *Token, tr TokenReader, budget *int64) error {
31 32 33 34 35 36 37 38 39 40 41 42 43 44
	if *budget < 0 {
		return codec.ErrBudgetExhausted{}
	}
	switch tk.Kind {
	case TokenKind_MapOpen:
		if tk.Length > 0 && *budget < tk.Length*2 { // Pre-check budget: at least two decrements estimated for each entry.
			return codec.ErrBudgetExhausted{}
		}
		ma, err := na.BeginMap(tk.Length)
		if err != nil {
			return err
		}
		for {
			// Peek one token.  We need to see if the map is about to end or not.
45
			tk, err = tr(budget)
46 47 48 49 50 51 52 53 54 55 56 57 58 59
			if err != nil {
				return err
			}
			// If the map has ended, invoke the finish operation and check for any errors.
			if tk.Kind == TokenKind_MapClose {
				return ma.Finish()
			}
			// Recurse to assemble the key.
			*budget-- // Decrement budget by at least one for each key.  The key content may also cause further decrements.
			if err = tokenAssemble(ma.AssembleKey(), tk, tr, budget); err != nil {
				return err
			}
			// Recurse to assemble the value.
			//  (We don't really care to peek this token, but do so anyway to keep the calling convention regular.)
60
			tk, err = tr(budget)
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
			if err != nil {
				return err
			}
			*budget-- // Decrement budget by at least one for each value.  The value content may also cause further decrements.
			if err = tokenAssemble(ma.AssembleValue(), tk, tr, budget); err != nil {
				return err
			}
			// Continue around the loop, to encounter either the next entry or the end of the map.
		}
	case TokenKind_MapClose:
		return ErrMalformedTokenSequence{"map close token encountered while not in the middle of a map"}
	case TokenKind_ListOpen:
		if tk.Length > 0 && *budget < tk.Length { // Pre-check budget: at least one decrement estimated for each entry.
			return codec.ErrBudgetExhausted{}
		}
		la, err := na.BeginList(tk.Length)
		if err != nil {
			return err
		}
		for {
			// Peek one token.  We need to see if the list is about to end or not.
82
			tk, err = tr(budget)
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
			if err != nil {
				return err
			}
			// If the list has ended, invoke the finish operation and check for any errors.
			if tk.Kind == TokenKind_ListClose {
				return la.Finish()
			}
			// Recurse to assemble the value.
			*budget-- // Decrement budget by at least one for each value.  The value content may also cause further decrements.
			if err = tokenAssemble(la.AssembleValue(), tk, tr, budget); err != nil {
				return err
			}
			// Continue around the loop, to encounter either the next value or the end of the list.
		}
	case TokenKind_ListClose:
		return ErrMalformedTokenSequence{"list close token encountered while not in the middle of a list"}
	case TokenKind_Null:
		return na.AssignNull()
	case TokenKind_Bool:
		*budget--
		return na.AssignBool(tk.Bool)
	case TokenKind_Int:
		*budget--
106
		return na.AssignInt(tk.Int)
107 108 109 110
	case TokenKind_Float:
		*budget--
		return na.AssignFloat(tk.Float)
	case TokenKind_String:
111
		*budget -= int64(len(tk.Str))
112 113
		return na.AssignString(tk.Str)
	case TokenKind_Bytes:
114
		*budget -= int64(len(tk.Bytes))
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
		return na.AssignBytes(tk.Bytes)
	case TokenKind_Link:
		*budget--
		return na.AssignLink(tk.Link)
	default:
		panic(fmt.Errorf("unrecognized token kind (%q?)", tk.Kind))
	}
}

// --- the stepwise assembler system (more complicated; has a userland stack) is below -->

type TokenAssembler struct {
	// This structure is designed to be embeddable.  Use Initialize when doing so.

	stk    assemblerStack // this is going to end up being a stack you know
	budget int64
}

type assemblerStackRow struct {
tavit ohanian's avatar
tavit ohanian committed
134 135 136 137
	state uint8            // 0: assign this node; 1: continue list; 2: continue map with key; 3: continue map with value.
	na    ld.NodeAssembler // Always present.
	la    ld.ListAssembler // At most one of these is present.
	ma    ld.MapAssembler  // At most one of these is present.
138 139 140 141 142 143
}
type assemblerStack []assemblerStackRow

func (stk assemblerStack) Tip() *assemblerStackRow {
	return &stk[len(stk)-1]
}
tavit ohanian's avatar
tavit ohanian committed
144
func (stk *assemblerStack) Push(na ld.NodeAssembler) {
145 146 147 148 149 150 151 152 153
	*stk = append(*stk, assemblerStackRow{na: na})
}
func (stk *assemblerStack) Pop() {
	if len(*stk) == 0 {
		return
	}
	*stk = (*stk)[0 : len(*stk)-1]
}

tavit ohanian's avatar
tavit ohanian committed
154
func (ta *TokenAssembler) Initialize(na ld.NodeAssembler, budget int64) {
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
	if ta.stk == nil {
		ta.stk = make(assemblerStack, 0, 10)
	} else {
		ta.stk = ta.stk[0:0]
	}
	ta.stk.Push(na)
	ta.budget = budget
}

// Process takes a Token pointer as an argument.
// (Notice how this function happens to match the definition of the visitFn that's usable as an argument to TokenWalk.)
// The token argument can be understood to be "borrowed" for the duration of the Process call, but will not be mutated.
// The use of a pointer here is so that a single Token can be reused by multiple calls, avoiding unnecessary allocations.
//
// Note that Process does very little sanity checking of token sequences itself,
// mostly handing information to the NodeAssemblers directly,
// which presumably will reject the data if it is out of line.
// The NodeAssembler this TokenAssembler is wrapping should already be enforcing the relevant logical rules,
// so it is not useful for TokenAssembler.Process to attempt to duplicate those checks;
// TokenAssembler.Process will also return any errors from the NodeAssembler without attempting to enforce a pattern on those errors.
// In particular, TokenAssembler.Process does not check if every MapOpen is paired with a MapClose;
// it does not check if every ListOpen is paired with a ListClose;
// and it does not check if the token stream is continuing after all open recursives have been closed.
// TODO: review this documentation; more of these checks turn out necessary anyway than originally expected.
func (ta *TokenAssembler) Process(tk *Token) (err error) {
	if len(ta.stk) == 0 {
		return io.EOF
	}
	tip := ta.stk.Tip()
	switch tip.state {
	case 0:
		switch tk.Kind {
		case TokenKind_MapOpen:
			tip.ma, err = tip.na.BeginMap(tk.Length)
			tip.state = 2
			return err
		case TokenKind_MapClose:
			// Mostly we try to just forward things, but can't not check this one: tip.ma would be nil; there's reasonable target for forwarding.
			return ErrMalformedTokenSequence{"map close token encountered while not in the middle of a map"}
		case TokenKind_ListOpen:
			tip.la, err = tip.na.BeginList(tk.Length)
			tip.state = 1
			return err
		case TokenKind_ListClose:
			// Mostly we try to just forward things, but can't not check this one: tip.la would be nil; there's reasonable target for forwarding.
			return ErrMalformedTokenSequence{"list close token encountered while not in the middle of a list"}
		case TokenKind_Null:
			err = tip.na.AssignNull()
			ta.stk.Pop()
			return err
		case TokenKind_Bool:
			err = tip.na.AssignBool(tk.Bool)
			ta.stk.Pop()
			return err
		case TokenKind_Int:
210
			err = tip.na.AssignInt(tk.Int)
211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
			ta.stk.Pop()
			return err
		case TokenKind_Float:
			err = tip.na.AssignFloat(tk.Float)
			ta.stk.Pop()
			return err
		case TokenKind_String:
			err = tip.na.AssignString(tk.Str)
			ta.stk.Pop()
			return err
		case TokenKind_Bytes:
			err = tip.na.AssignBytes(tk.Bytes)
			ta.stk.Pop()
			return err
		case TokenKind_Link:
			err = tip.na.AssignLink(tk.Link)
			ta.stk.Pop()
			return err
		default:
			panic(fmt.Errorf("unrecognized token kind (%q?)", tk.Kind))
		}
		return nil
	case 1:
		if tk.Kind == TokenKind_ListClose {
			err = tip.la.Finish()
			ta.stk.Pop()
			return err
		}
		ta.stk.Push(tip.la.AssembleValue())
		return ta.Process(tk)
	case 2:
		if tk.Kind == TokenKind_MapClose {
			err = tip.ma.Finish()
			ta.stk.Pop()
			return err
		}
		tip.state = 3
		ta.stk.Push(tip.ma.AssembleKey())
		return ta.Process(tk)
	case 3:
		tip.state = 2
		ta.stk.Push(tip.ma.AssembleValue())
		return ta.Process(tk)
	default:
		panic("unreachable")
	}
}