json_decode.go 6.27 KB
Newer Older
1 2 3 4 5 6
package jsontoken

import (
	"fmt"
	"io"

tavit ohanian's avatar
tavit ohanian committed
7 8
	"gitlab.dms3.io/ld/go-ld-prime/codec/codectools"
	"gitlab.dms3.io/ld/go-ld-prime/codec/codectools/scratch"
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
)

type Decoder struct {
	r scratch.Reader

	phase decoderPhase   // current phase.
	stack []decoderPhase // stack of any phases that need to be popped back up to before we're done with a complete tree.
	some  bool           // true after first value in any context; use to decide if a comma must precede the next value.  (doesn't need a stack, because if you're popping, it's true again.)

	tok codectools.Token // we'll be yielding this repeatedly.

	DecoderConfig
}

type DecoderConfig struct {
	AllowDanglingComma  bool // normal json: false; strict: false.
	AllowWhitespace     bool // normal json: true;  strict: false.
	AllowEscapedUnicode bool // normal json: true;  strict: false.
	ParseUtf8C8         bool // normal json: false; dag-json: true.
}

func (d *Decoder) Init(r io.Reader) {
	d.r.Init(r)
	d.phase = decoderPhase_acceptValue
	d.stack = d.stack[0:0]
	d.some = false
}

37
func (d *Decoder) Step(budget *int64) (next *codectools.Token, err error) {
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
	switch d.phase {
	case decoderPhase_acceptValue:
		err = d.step_acceptValue()
	case decoderPhase_acceptMapKeyOrEnd:
		err = d.step_acceptMapKeyOrEnd()
	case decoderPhase_acceptMapValue:
		err = d.step_acceptMapValue()
	case decoderPhase_acceptListValueOrEnd:
		err = d.step_acceptListValueOrEnd()
	}
	return &d.tok, err
}

func (d *Decoder) pushPhase(newPhase decoderPhase) {
	d.stack = append(d.stack, d.phase)
	d.phase = newPhase
	d.some = false
}

func (d *Decoder) popPhase() {
	d.phase = d.stack[len(d.stack)-1]
	d.stack = d.stack[:len(d.stack)-1]
	d.some = true
}

type decoderPhase uint8

const (
	decoderPhase_acceptValue decoderPhase = iota
	decoderPhase_acceptMapKeyOrEnd
	decoderPhase_acceptMapValue
	decoderPhase_acceptListValueOrEnd
)

func (d *Decoder) readn1skippingWhitespace() (majorByte byte, err error) {
	if d.DecoderConfig.AllowWhitespace {
		for {
			majorByte, err = d.r.Readn1()
			switch majorByte {
			case ' ', '\t', '\r', '\n': // continue
			default:
				return
			}
		}
	} else {
		for {
			majorByte, err = d.r.Readn1()
			switch majorByte {
			case ' ', '\t', '\r', '\n':
				return 0, fmt.Errorf("whitespace not allowed by decoder configured for strictness")
			default:
				return
			}
		}
	}
}

// The original step, where any value is accepted, and no terminators for recursives are valid.
// ONLY used in the original step; all other steps handle leaf nodes internally.
func (d *Decoder) step_acceptValue() error {
	majorByte, err := d.r.Readn1()
	if err != nil {
		return err
	}
	return d.stepHelper_acceptValue(majorByte)
}

// Step in midst of decoding a map, key expected up next, or end.
func (d *Decoder) step_acceptMapKeyOrEnd() error {
	majorByte, err := d.readn1skippingWhitespace()
	if err != nil {
		return err
	}
	if d.some {
		switch majorByte {
		case '}':
			d.tok.Kind = codectools.TokenKind_MapClose
			d.popPhase()
			return nil
		case ',':
			majorByte, err = d.readn1skippingWhitespace()
			if err != nil {
				return err
			}
			// and now fall through to the next switch
			// FIXME: AllowDanglingComma needs a check hereabouts
		}
	}
	switch majorByte {
	case '}':
		d.tok.Kind = codectools.TokenKind_MapClose
		d.popPhase()
		return nil
	default:
		// Consume a value for key.
		//  Given that this is JSON, this has to be a string.
		err := d.stepHelper_acceptValue(majorByte)
		if err != nil {
			return err
		}
		if d.tok.Kind != codectools.TokenKind_String {
			return fmt.Errorf("unexpected non-string token where expecting a map key")
		}
		// Now scan up to consume the colon as well, which is required next.
		majorByte, err = d.readn1skippingWhitespace()
		if err != nil {
			return err
		}
		if majorByte != ':' {
			return fmt.Errorf("expected colon after map key; got 0x%x", majorByte)
		}
		// Next up: expect a value.
		d.phase = decoderPhase_acceptMapValue
		d.some = true
		return nil
	}
}

// Step in midst of decoding a map, value expected up next.
func (d *Decoder) step_acceptMapValue() error {
	majorByte, err := d.readn1skippingWhitespace()
	if err != nil {
		return err
	}
	d.phase = decoderPhase_acceptMapKeyOrEnd
	return d.stepHelper_acceptValue(majorByte)
}

// Step in midst of decoding an array.
func (d *Decoder) step_acceptListValueOrEnd() error {
	majorByte, err := d.readn1skippingWhitespace()
	if err != nil {
		return err
	}
	if d.some {
		switch majorByte {
		case ']':
			d.tok.Kind = codectools.TokenKind_ListClose
			d.popPhase()
			return nil
		case ',':
			majorByte, err = d.readn1skippingWhitespace()
			if err != nil {
				return err
			}
			// and now fall through to the next switch
			// FIXME: AllowDanglingComma needs a check hereabouts
		}
	}
	switch majorByte {
	case ']':
		d.tok.Kind = codectools.TokenKind_ListClose
		d.popPhase()
		return nil
	default:
		d.some = true
		return d.stepHelper_acceptValue(majorByte)
	}
}

func (d *Decoder) stepHelper_acceptValue(majorByte byte) (err error) {
	switch majorByte {
	case '{':
		d.tok.Kind = codectools.TokenKind_MapOpen
		d.tok.Length = -1
		d.pushPhase(decoderPhase_acceptMapKeyOrEnd)
		return nil
	case '[':
		d.tok.Kind = codectools.TokenKind_ListOpen
		d.tok.Length = -1
		d.pushPhase(decoderPhase_acceptListValueOrEnd)
		return nil
	case 'n':
		d.r.Readnzc(3) // FIXME must check these equal "ull"!
		d.tok.Kind = codectools.TokenKind_Null
		return nil
	case '"':
		d.tok.Kind = codectools.TokenKind_String
		d.tok.Str, err = DecodeStringBody(&d.r)
		if err == nil {
			d.r.Readn1() // Swallow the trailing `"` (which DecodeStringBody has insured we have).
		}
		return err
	case 'f':
		d.r.Readnzc(4) // FIXME must check these equal "alse"!
		d.tok.Kind = codectools.TokenKind_Bool
		d.tok.Bool = false
		return nil
	case 't':
		d.r.Readnzc(3) // FIXME must check these equal "rue"!
		d.tok.Kind = codectools.TokenKind_Bool
		d.tok.Bool = true
		return nil
	case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
		// Some kind of numeric... but in json, we can't tell if it's float or int.  At least, certainly not yet.
		// We'll have to look ahead quite a bit more to try to differentiate.  The decodeNumber function does this for us.
		d.r.Unreadn1()
		d.tok.Kind, d.tok.Int, d.tok.Float, err = DecodeNumber(&d.r)
		return err
	default:
		return fmt.Errorf("Invalid byte while expecting start of value: 0x%x", majorByte)
	}
}