Commit a274b2db authored by Eric Myhre's avatar Eric Myhre

Dag-cbor marshal/unmarshal.

We now have CIDs support!  You can create links backed by cids,
and marshal them with dag-cbor; and you can unmarshal cbor data
with dag-cbor and expect things with the CID link tag to be parsed
into CIDs and exposed as IPLD Links.  Yay!

(Dag-json is lagging.  The parse for those links is... more involved.
When supported, it'll similarly have its own unmarshal and marshal
just like the ones this diff introduces for dag-cbor.)
Signed-off-by: default avatarEric Myhre <hash@exultant.us>
parent 694c6f3c
package dagcbor
const linkTag = 42
package dagcbor
import (
"fmt"
"github.com/polydawn/refmt/shared"
"github.com/polydawn/refmt/tok"
ipld "github.com/ipld/go-ipld-prime"
cidlink "github.com/ipld/go-ipld-prime/linking/cid"
)
// This should be identical to the general feature in the parent package,
// except for the `case ipld.ReprKind_Link` block,
// which is dag-cbor's special sauce for schemafree links.
func Marshal(n ipld.Node, sink shared.TokenSink) error {
var tk tok.Token
switch n.Kind() {
case ipld.ReprKind_Invalid:
return fmt.Errorf("cannot traverse a node that is undefined")
case ipld.ReprKind_Null:
tk.Type = tok.TNull
_, err := sink.Step(&tk)
return err
case ipld.ReprKind_Map:
// Emit start of map.
tk.Type = tok.TMapOpen
tk.Length = n.Length()
if _, err := sink.Step(&tk); err != nil {
return err
}
// Emit map contents (and recurse).
for itr := n.Keys(); itr.HasNext(); {
k, err := itr.Next()
if err != nil {
return err
}
tk.Type = tok.TString
tk.Str = k
if _, err := sink.Step(&tk); err != nil {
return err
}
v, err := n.TraverseField(k)
if err != nil {
return err
}
if err := Marshal(v, sink); err != nil {
return err
}
}
// Emit map close.
tk.Type = tok.TMapClose
_, err := sink.Step(&tk)
return err
case ipld.ReprKind_List:
// Emit start of list.
tk.Type = tok.TArrOpen
l := n.Length()
tk.Length = l
if _, err := sink.Step(&tk); err != nil {
return err
}
// Emit list contents (and recurse).
for i := 0; i < l; i++ {
v, err := n.TraverseIndex(i)
if err != nil {
return err
}
if err := Marshal(v, sink); err != nil {
return err
}
}
// Emit list close.
tk.Type = tok.TArrClose
_, err := sink.Step(&tk)
return err
case ipld.ReprKind_Bool:
v, err := n.AsBool()
if err != nil {
return err
}
tk.Type = tok.TBool
tk.Bool = v
_, err = sink.Step(&tk)
return err
case ipld.ReprKind_Int:
v, err := n.AsInt()
if err != nil {
return err
}
tk.Type = tok.TInt
tk.Int = int64(v)
_, err = sink.Step(&tk)
return err
case ipld.ReprKind_Float:
v, err := n.AsFloat()
if err != nil {
return err
}
tk.Type = tok.TFloat64
tk.Float64 = v
_, err = sink.Step(&tk)
return err
case ipld.ReprKind_String:
v, err := n.AsString()
if err != nil {
return err
}
tk.Type = tok.TString
tk.Str = v
_, err = sink.Step(&tk)
return err
case ipld.ReprKind_Bytes:
v, err := n.AsBytes()
if err != nil {
return err
}
tk.Type = tok.TBytes
tk.Bytes = v
_, err = sink.Step(&tk)
return err
case ipld.ReprKind_Link:
v, err := n.AsLink()
if err != nil {
return err
}
switch lnk := v.(type) {
case cidlink.Link:
tk.Type = tok.TBytes
tk.Bytes = lnk.Bytes()
tk.Tagged = true
tk.Tag = linkTag
_, err = sink.Step(&tk)
return err
default:
return fmt.Errorf("schemafree link emission only supported by this codec for CID type links!")
}
default:
panic("unreachable")
}
}
......@@ -6,7 +6,6 @@ import (
"github.com/polydawn/refmt/cbor"
ipld "github.com/ipld/go-ipld-prime"
"github.com/ipld/go-ipld-prime/encoding"
cidlink "github.com/ipld/go-ipld-prime/linking/cid"
)
......@@ -30,7 +29,7 @@ func Decoder(nb ipld.NodeBuilder, r io.Reader) (ipld.Node, error) {
return nb2.DecodeDagCbor(r)
}
// Okay, generic builder path.
return encoding.Unmarshal(nb, cbor.NewDecoder(cbor.DecodeOptions{}, r))
return Unmarshal(nb, cbor.NewDecoder(cbor.DecodeOptions{}, r))
}
func Encoder(n ipld.Node, w io.Writer) error {
......@@ -43,5 +42,5 @@ func Encoder(n ipld.Node, w io.Writer) error {
return n2.EncodeDagCbor(w)
}
// Okay, generic inspection path.
return encoding.Marshal(n, cbor.NewEncoder(w))
return Marshal(n, cbor.NewEncoder(w))
}
package dagcbor
import (
"fmt"
"github.com/ipfs/go-cid"
"github.com/polydawn/refmt/shared"
"github.com/polydawn/refmt/tok"
"github.com/ipld/go-ipld-prime"
cidlink "github.com/ipld/go-ipld-prime/linking/cid"
)
// This should be identical to the general feature in the parent package,
// except for the `case tok.TBytes` block,
// which has dag-cbor's special sauce for detecting schemafree links.
func Unmarshal(nb ipld.NodeBuilder, tokSrc shared.TokenSource) (ipld.Node, error) {
var tk tok.Token
done, err := tokSrc.Step(&tk)
if done || err != nil {
return nil, err
}
return unmarshal(nb, tokSrc, &tk)
}
// starts with the first token already primed. Necessary to get recursion
// to flow right without a peek+unpeek system.
func unmarshal(nb ipld.NodeBuilder, tokSrc shared.TokenSource, tk *tok.Token) (ipld.Node, error) {
// FUTURE: check for typed.NodeBuilder that's going to parse a Link (they can slurp any token kind they want).
switch tk.Type {
case tok.TMapOpen:
mb, err := nb.CreateMap()
if err != nil {
return nil, err
}
for {
done, err := tokSrc.Step(tk)
if done {
return nil, fmt.Errorf("unexpected EOF")
}
if err != nil {
return nil, err
}
switch tk.Type {
case tok.TMapClose:
return mb.Build()
case tok.TString:
// continue
default:
return nil, fmt.Errorf("unexpected %s token while expecting map key", tk.Type)
}
k := tk.Str
// FUTURE: check for typed.NodeBuilder; need to specialize before recursing if so.
v, err := Unmarshal(nb, tokSrc)
if err != nil {
return nil, err
}
kn, err := nb.CreateString(k)
if err != nil {
panic(err) // TODO: I'm no longer sure Insert should take a Node instead of string, but not recursing into reviewing that choice now.
}
if err := mb.Insert(kn, v); err != nil {
return nil, err
}
}
case tok.TMapClose:
return nil, fmt.Errorf("unexpected mapClose token")
case tok.TArrOpen:
lb, err := nb.CreateList()
if err != nil {
return nil, err
}
for {
done, err := tokSrc.Step(tk)
if done {
return nil, fmt.Errorf("unexpected EOF")
}
if err != nil {
return nil, err
}
switch tk.Type {
case tok.TArrClose:
return lb.Build()
default:
// FUTURE: check for typed.NodeBuilder; need to specialize before recursing if so.
// N.B. when considering optionals for tuple-represented structs, keep in mind how murky that will get here.
v, err := unmarshal(nb, tokSrc, tk)
if err != nil {
return nil, err
}
lb.Append(v)
}
}
case tok.TArrClose:
return nil, fmt.Errorf("unexpected arrClose token")
case tok.TNull:
return nb.CreateNull()
case tok.TString:
return nb.CreateString(tk.Str)
case tok.TBytes:
if !tk.Tagged {
return nb.CreateBytes(tk.Bytes)
}
switch tk.Tag {
case linkTag:
elCid, err := cid.Cast(tk.Bytes)
if err != nil {
return nil, err
}
return nb.CreateLink(cidlink.Link{elCid})
default:
return nil, fmt.Errorf("unhandled cbor tag %d", tk.Tag)
}
case tok.TBool:
return nb.CreateBool(tk.Bool)
case tok.TInt:
return nb.CreateInt(int(tk.Int)) // FIXME overflow check
case tok.TUint:
return nb.CreateInt(int(tk.Uint)) // FIXME overflow check
case tok.TFloat64:
return nb.CreateFloat(tk.Float64)
default:
panic("unreachable")
}
}
......@@ -20,6 +20,9 @@ func init() {
cidlink.RegisterMulticodecEncoder(0x0129, Encoder)
}
// FIXME: technically these are NOT dag-json; they're just regular json.
// We need to get encoder logic that handles the special links cases.
func Decoder(nb ipld.NodeBuilder, r io.Reader) (ipld.Node, error) {
// Shell out directly to generic builder path.
// (There's not really any fastpaths of note for json.)
......
......@@ -11,6 +11,14 @@ import (
// FUTURE there are very open questions on how to handle detection and special-track'ing for advLayout nodes when we get to that feature.
// Marshal provides a very general node-to-tokens marshalling feature.
// It can handle either cbor or json by being combined with a refmt TokenSink.
//
// It is valid for all the data model types except links, which are only
// supported if the nodes are typed and provide additional information
// to clarify how links should be encoded through their type info.
// (The dag-cbor and dag-json formats can be used if links are of CID
// implementation and need to be encoded in a schemafree way.)
func Marshal(n ipld.Node, sink shared.TokenSink) error {
var tk tok.Token
switch n.Kind() {
......@@ -118,7 +126,7 @@ func Marshal(n ipld.Node, sink shared.TokenSink) error {
_, err = sink.Step(&tk)
return err
case ipld.ReprKind_Link:
panic("todo link emission")
return fmt.Errorf("link emission not supported by this codec without a schema! (maybe you want dag-cbor or dag-json)")
default:
panic("unreachable")
}
......
......@@ -18,6 +18,14 @@ import (
// They're effectively doing double duty: testing the builders, too.
// (Is that sensible? Should it be refactored? Not sure; maybe!)
// Unmarshal provides a very general tokens-to-node unmarshalling feature.
// It can handle either cbor or json by being combined with a refmt TokenSink.
//
// It is valid for all the data model types except links, which are only
// supported if the nodes are typed and provide additional information
// to clarify how links should be decoded through their type info.
// (The dag-cbor and dag-json formats can be used if links are of CID
// implementation and need to be decoded in a schemafree way.)
func Unmarshal(nb ipld.NodeBuilder, tokSrc shared.TokenSource) (ipld.Node, error) {
var tk tok.Token
done, err := tokSrc.Step(&tk)
......@@ -30,6 +38,7 @@ func Unmarshal(nb ipld.NodeBuilder, tokSrc shared.TokenSource) (ipld.Node, error
// starts with the first token already primed. Necessary to get recursion
// to flow right without a peek+unpeek system.
func unmarshal(nb ipld.NodeBuilder, tokSrc shared.TokenSource, tk *tok.Token) (ipld.Node, error) {
// FUTURE: check for typed.NodeBuilder that's going to parse a Link (they can slurp any token kind they want).
switch tk.Type {
case tok.TMapOpen:
mb, err := nb.CreateMap()
......@@ -102,9 +111,6 @@ func unmarshal(nb ipld.NodeBuilder, tokSrc shared.TokenSource, tk *tok.Token) (i
return nb.CreateString(tk.Str)
case tok.TBytes:
return nb.CreateBytes(tk.Bytes)
// TODO should also check tags to produce CIDs.
// n.b. with schemas, we can comprehend links without tags;
// but without schemas, tags are the only disambiguator.
case tok.TBool:
return nb.CreateBool(tk.Bool)
case tok.TInt:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment