diff --git a/codec/dagjson/marshal.go b/codec/dagjson/marshal.go index d94da113349a387ea32feead16479ad0036f4993..3e292fdcb186a3182e12c72ef8a20953211d8dde 100644 --- a/codec/dagjson/marshal.go +++ b/codec/dagjson/marshal.go @@ -1,6 +1,7 @@ package dagjson import ( + "encoding/base64" "fmt" "github.com/polydawn/refmt/shared" @@ -115,10 +116,47 @@ func Marshal(n ipld.Node, sink shared.TokenSink, allowLinks bool) error { if err != nil { return err } - tk.Type = tok.TBytes - tk.Bytes = v - _, err = sink.Step(&tk) - return err + if allowLinks { + // Precisely seven tokens to emit: + tk.Type = tok.TMapOpen + tk.Length = 1 + if _, err = sink.Step(&tk); err != nil { + return err + } + tk.Type = tok.TString + tk.Str = "/" + if _, err = sink.Step(&tk); err != nil { + return err + } + tk.Type = tok.TMapOpen + tk.Length = 1 + if _, err = sink.Step(&tk); err != nil { + return err + } + tk.Type = tok.TString + tk.Str = "bytes" + if _, err = sink.Step(&tk); err != nil { + return err + } + tk.Str = base64.StdEncoding.EncodeToString(v) + if _, err = sink.Step(&tk); err != nil { + return err + } + tk.Type = tok.TMapClose + if _, err = sink.Step(&tk); err != nil { + return err + } + tk.Type = tok.TMapClose + if _, err = sink.Step(&tk); err != nil { + return err + } + return nil + } else { + tk.Type = tok.TBytes + tk.Bytes = v + _, err = sink.Step(&tk) + return err + } case ipld.Kind_Link: if !allowLinks { return fmt.Errorf("cannot Marshal ipld links to JSON") diff --git a/codec/dagjson/roundtripBytes_test.go b/codec/dagjson/roundtripBytes_test.go new file mode 100644 index 0000000000000000000000000000000000000000..df4e9f636b01b2bc7a5e3f3cc2bd1374bd95120d --- /dev/null +++ b/codec/dagjson/roundtripBytes_test.go @@ -0,0 +1,74 @@ +package dagjson + +import ( + "bytes" + "strings" + "testing" + + . "github.com/warpfork/go-wish" + + "github.com/ipld/go-ipld-prime/fluent" + basicnode "github.com/ipld/go-ipld-prime/node/basic" +) + +var byteNode = fluent.MustBuildMap(basicnode.Prototype__Map{}, 4, func(na fluent.MapAssembler) { + na.AssembleEntry("plain").AssignString("olde string") + na.AssembleEntry("bytes").AssignBytes([]byte("deadbeef")) +}) +var byteSerial = `{ + "plain": "olde string", + "bytes": { + "/": { + "bytes": "ZGVhZGJlZWY=" + } + } +} +` + +func TestRoundtripBytes(t *testing.T) { + t.Run("encoding", func(t *testing.T) { + var buf bytes.Buffer + err := Encode(byteNode, &buf) + Require(t, err, ShouldEqual, nil) + Wish(t, buf.String(), ShouldEqual, byteSerial) + }) + t.Run("decoding", func(t *testing.T) { + buf := strings.NewReader(byteSerial) + nb := basicnode.Prototype__Map{}.NewBuilder() + err := Decode(nb, buf) + Require(t, err, ShouldEqual, nil) + Wish(t, nb.Build(), ShouldEqual, byteNode) + }) +} + +var encapsulatedNode = fluent.MustBuildMap(basicnode.Prototype__Map{}, 1, func(na fluent.MapAssembler) { + na.AssembleEntry("/").CreateMap(1, func(sa fluent.MapAssembler) { + sa.AssembleEntry("bytes").AssignBytes([]byte("deadbeef")) + }) +}) +var encapsulatedSerial = `{ + "/": { + "bytes": { + "/": { + "bytes": "ZGVhZGJlZWY=" + } + } + } +} +` + +func TestEncapsulatedBytes(t *testing.T) { + t.Run("encoding", func(t *testing.T) { + var buf bytes.Buffer + err := Encode(encapsulatedNode, &buf) + Require(t, err, ShouldEqual, nil) + Wish(t, buf.String(), ShouldEqual, encapsulatedSerial) + }) + t.Run("decoding", func(t *testing.T) { + buf := strings.NewReader(encapsulatedSerial) + nb := basicnode.Prototype__Map{}.NewBuilder() + err := Decode(nb, buf) + Require(t, err, ShouldEqual, nil) + Wish(t, nb.Build(), ShouldEqual, encapsulatedNode) + }) +} diff --git a/codec/dagjson/unmarshal.go b/codec/dagjson/unmarshal.go index 18993658de0d5f35c91ea4b045ef56b930e86776..b4d157103dd5272030f83530ed1e889e8807ccf8 100644 --- a/codec/dagjson/unmarshal.go +++ b/codec/dagjson/unmarshal.go @@ -1,6 +1,7 @@ package dagjson import ( + "encoding/base64" "fmt" cid "github.com/ipfs/go-cid" @@ -33,8 +34,8 @@ func Unmarshal(na ipld.NodeAssembler, tokSrc shared.TokenSource, parseLinks bool } type unmarshalState struct { - tk [4]tok.Token // mostly, only 0'th is used... but [1:4] are used during lookahead for links. - shift int // how many times to slide something out of tk[1:4] instead of getting a new token. + tk [7]tok.Token // mostly, only 0'th is used... but [1:7] are used during lookahead for links. + shift int // how many times to slide something out of tk[1:7] instead of getting a new token. parseLinks bool } @@ -69,11 +70,46 @@ func (st *unmarshalState) step(tokSrc shared.TokenSource) error { st.tk[2] = st.tk[3] st.shift-- return nil + case 4: + st.tk[0] = st.tk[1] + st.tk[1] = st.tk[2] + st.tk[2] = st.tk[3] + st.tk[3] = st.tk[4] + st.shift-- + return nil + case 5: + st.tk[0] = st.tk[1] + st.tk[1] = st.tk[2] + st.tk[2] = st.tk[3] + st.tk[3] = st.tk[4] + st.tk[4] = st.tk[5] + st.shift-- + return nil + case 6: + st.tk[0] = st.tk[1] + st.tk[1] = st.tk[2] + st.tk[2] = st.tk[3] + st.tk[3] = st.tk[4] + st.tk[4] = st.tk[5] + st.tk[5] = st.tk[6] + st.shift-- + return nil default: panic("unreachable") } } +// ensure checks that the token lookahead-ahead (tk[lookhead]) is loaded from the underlying source. +func (st *unmarshalState) ensure(tokSrc shared.TokenSource, lookahead int) error { + if st.shift < lookahead { + if _, err := tokSrc.Step(&st.tk[lookahead]); err != nil { + return err + } + st.shift = lookahead + } + return nil +} + // linkLookahead is called after receiving a TMapOpen token; // when it returns, we will have either created a link, OR // it's not a link, and the caller should proceed to start a map @@ -83,37 +119,30 @@ func (st *unmarshalState) step(tokSrc shared.TokenSource) error { // continue to attempt to build a map. func (st *unmarshalState) linkLookahead(na ipld.NodeAssembler, tokSrc shared.TokenSource) (bool, error) { // Peek next token. If it's a "/" string, link is still a possibility - _, err := tokSrc.Step(&st.tk[1]) - if err != nil { + if err := st.ensure(tokSrc, 1); err != nil { return false, err } if st.tk[1].Type != tok.TString { - st.shift = 1 return false, nil } if st.tk[1].Str != "/" { - st.shift = 1 return false, nil } // Peek next token. If it's a string, link is still a possibility. // We won't try to parse it as a CID until we're sure it's the only thing in the map, though. - _, err = tokSrc.Step(&st.tk[2]) - if err != nil { + if err := st.ensure(tokSrc, 2); err != nil { return false, err } if st.tk[2].Type != tok.TString { - st.shift = 2 return false, nil } // Peek next token. If it's map close, we've got a link! // (Otherwise it had better be a string, because another map key is the // only other valid transition here... but we'll leave that check to the caller. - _, err = tokSrc.Step(&st.tk[3]) - if err != nil { + if err := st.ensure(tokSrc, 3); err != nil { return false, err } if st.tk[3].Type != tok.TMapClose { - st.shift = 3 return false, nil } // Okay, we made it -- this looks like a link. Parse it. @@ -125,8 +154,71 @@ func (st *unmarshalState) linkLookahead(na ipld.NodeAssembler, tokSrc shared.Tok if err := na.AssignLink(cidlink.Link{Cid: elCid}); err != nil { return false, err } + // consume the look-ahead tokens + st.shift = 0 return true, nil +} +func (st *unmarshalState) bytesLookahead(na ipld.NodeAssembler, tokSrc shared.TokenSource) (bool, error) { + // Peek next token. If it's a "/" string, bytes is still a possibility + if err := st.ensure(tokSrc, 1); err != nil { + return false, err + } + if st.tk[1].Type != tok.TString { + return false, nil + } + if st.tk[1].Str != "/" { + return false, nil + } + // Peek next token. If it's a map, bytes is still a possibility. + if err := st.ensure(tokSrc, 2); err != nil { + return false, err + } + if st.tk[2].Type != tok.TMapOpen { + return false, nil + } + // peek next token. If it's the string "bytes", we're on track. + if err := st.ensure(tokSrc, 3); err != nil { + return false, err + } + if st.tk[3].Type != tok.TString { + return false, nil + } + if st.tk[3].Str != "bytes" { + return false, nil + } + // peek next token. if it's a string, we're on track. + if err := st.ensure(tokSrc, 4); err != nil { + return false, err + } + if st.tk[4].Type != tok.TString { + return false, nil + } + // peek next token. if it's the first map close we're on track. + if err := st.ensure(tokSrc, 5); err != nil { + return false, err + } + if st.tk[5].Type != tok.TMapClose { + return false, nil + } + // Peek next token. If it's map close, we've got bytes! + if err := st.ensure(tokSrc, 6); err != nil { + return false, err + } + if st.tk[6].Type != tok.TMapClose { + return false, nil + } + // Okay, we made it -- this looks like bytes. Parse it. + elBytes, err := base64.StdEncoding.DecodeString(st.tk[4].Str) + if err != nil { + return false, err + } + if err := na.AssignBytes(elBytes); err != nil { + return false, err + } + // consume the look-ahead tokens + st.shift = 0 + return true, nil } // starts with the first token already primed. Necessary to get recursion @@ -145,6 +237,14 @@ func (st *unmarshalState) unmarshal(na ipld.NodeAssembler, tokSrc shared.TokenSo if gotLink { return nil } + + gotBytes, err := st.bytesLookahead(na, tokSrc) + if err != nil { + return err + } + if gotBytes { + return nil + } } // Okay, now back to regularly scheduled map logic.