From 8ec6b0fbad18bd3fadd2a55999f23c42b5cc0d25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mart=C3=AD?= Date: Sun, 28 Mar 2021 17:07:07 +0100 Subject: [PATCH] expose APIs without Reader/Writer overhead MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An io.Writer, by definition, will always copy bytes. That's fine in general, and can't be worked around without breaking the writer's contract. However, the main use of go-codec-dagpb today is go-merkledag, which simply uses the codec to encode a node into a buffer. So, we had to create a new bytes.Buffer, write to it, and grab its bytes. This is one extra allocation (the bytes.Buffer object itself), plus copying the encoded bytes an extra time, since we must copy from Encode's internal buffer to the bytes.Buffer. Add a lower-level append-like AppendEncode that cuts the middle man, removing both of those extra pieces of work. For the sake of consistency, we add DecodeBytes to mirror the above on the decode side. Decode already had a shortcut for Bytes, but this way it's more evident what we're doing, and we also avoid allocating a bytes.Buffer just to call Bytes on it. Using these new APIs in go-merkledag shows nice results: name old time/op new time/op delta Roundtrip-8 4.27µs ± 0% 4.07µs ± 0% -4.50% (p=0.004 n=5+6) name old alloc/op new alloc/op delta Roundtrip-8 6.86kB ± 0% 6.38kB ± 0% -6.99% (p=0.002 n=6+6) name old allocs/op new allocs/op delta Roundtrip-8 106 ± 0% 103 ± 0% -2.83% (p=0.002 n=6+6) While at it, we formally deprecate Marshal and Unmarshal, since we're starting to have lots of redundant API surface. --- marshal.go | 57 +++++++++++++++++++++++++++++++-------------------- multicodec.go | 35 ++++++++----------------------- unmarshal.go | 24 +++++++++++++++------- 3 files changed, 60 insertions(+), 56 deletions(-) diff --git a/marshal.go b/marshal.go index f156236..291ff97 100644 --- a/marshal.go +++ b/marshal.go @@ -19,28 +19,42 @@ type pbLink struct { hasTsize bool } -// Marshal provides an IPLD codec encode interface for DAG-PB data. Provide a +// Encode provides an IPLD codec encode interface for DAG-PB data. Provide a // conforming Node and a destination for bytes to marshal a DAG-PB IPLD Node. // The Node must strictly conform to the DAG-PB schema // (https://github.com/ipld/specs/blob/master/block-layer/codecs/dag-pb.md). // For safest use, build Nodes using the Type.PBNode type. -func Marshal(inNode ipld.Node, out io.Writer) error { +// This function is registered via the go-ipld-prime link loader for multicodec +// code 0x70 when this package is invoked via init. +func Encode(node ipld.Node, w io.Writer) error { + // 1KiB can be allocated on the stack, and covers most small nodes + // without having to grow the buffer and cause allocations. + enc := make([]byte, 0, 1024) + + enc, err := AppendEncode(enc, node) + if err != nil { + return err + } + _, err = w.Write(enc) + return err +} + +// AppendEncode is like Encode, but it uses a destination buffer directly. +// This means less copying of bytes, and if the destination has enough capacity, +// fewer allocations. +func AppendEncode(enc []byte, inNode ipld.Node) ([]byte, error) { // Wrap in a typed node for some basic schema form checking builder := Type.PBNode.NewBuilder() if err := builder.AssignNode(inNode); err != nil { - return err + return enc, err } node := builder.Build() links, err := node.LookupByString("Links") if err != nil { - return err + return enc, err } - // 1KiB can be allocated on the stack, and covers most small nodes - // without having to grow the buffer and cause allocations. - enc := make([]byte, 0, 1024) - if links.Length() > 0 { // collect links into a slice so we can properly sort for encoding pbLinks := make([]pbLink, links.Length()) @@ -49,26 +63,26 @@ func Marshal(inNode ipld.Node, out io.Writer) error { for !linksIter.Done() { ii, link, err := linksIter.Next() if err != nil { - return err + return enc, err } { // Hash (required) d, err := link.LookupByString("Hash") if err != nil { - return err + return enc, err } l, err := d.AsLink() if err != nil { - return err + return enc, err } if err != nil { - return err + return enc, err } cl, ok := l.(cidlink.Link) if !ok { // this _should_ be taken care of by the Typed conversion above with // "missing required fields: Hash" - return fmt.Errorf("invalid DAG-PB form (link must have a Hash)") + return enc, fmt.Errorf("invalid DAG-PB form (link must have a Hash)") } pbLinks[ii].hash = cl.Cid } @@ -76,12 +90,12 @@ func Marshal(inNode ipld.Node, out io.Writer) error { { // Name (optional) nameNode, err := link.LookupByString("Name") if err != nil { - return err + return enc, err } if !nameNode.IsAbsent() { name, err := nameNode.AsString() if err != nil { - return err + return enc, err } pbLinks[ii].name = name pbLinks[ii].hasName = true @@ -91,15 +105,15 @@ func Marshal(inNode ipld.Node, out io.Writer) error { { // Tsize (optional) tsizeNode, err := link.LookupByString("Tsize") if err != nil { - return err + return enc, err } if !tsizeNode.IsAbsent() { tsize, err := tsizeNode.AsInt() if err != nil { - return err + return enc, err } if tsize < 0 { - return fmt.Errorf("Link has negative Tsize value [%v]", tsize) + return enc, fmt.Errorf("Link has negative Tsize value [%v]", tsize) } utsize := uint64(tsize) pbLinks[ii].tsize = utsize @@ -145,19 +159,18 @@ func Marshal(inNode ipld.Node, out io.Writer) error { // Data (optional) data, err := node.LookupByString("Data") if err != nil { - return err + return enc, err } if !data.IsAbsent() { byts, err := data.AsBytes() if err != nil { - return err + return enc, err } enc = protowire.AppendTag(enc, 1, 2) // field & wire type for Data enc = protowire.AppendBytes(enc, byts) } - _, err = out.Write(enc) - return err + return enc, err } type pbLinkSlice []pbLink diff --git a/multicodec.go b/multicodec.go index e222e45..568c777 100644 --- a/multicodec.go +++ b/multicodec.go @@ -19,27 +19,6 @@ func init() { multicodec.RegisterEncoder(0x70, Encode) } -// Decode provides an IPLD codec decode interface for DAG-PB data. Provide a -// compatible NodeAssembler and a byte source to unmarshal a DAG-PB IPLD Node. -// Use the NodeAssembler from the PBNode type for safest construction -// (Type.PBNode.NewBuilder()). A Map assembler will also work. -// This function is registered via the go-ipld-prime link loader for multicodec -// code 0x70 when this package is invoked via init. -func Decode(na ipld.NodeAssembler, r io.Reader) error { - return Unmarshal(na, r) -} - -// Encode provides an IPLD codec encode interface for DAG-PB data. Provide a -// conforming Node and a destination for bytes to marshal a DAG-PB IPLD Node. -// The Node must strictly conform to the DAG-PB schema -// (https://github.com/ipld/specs/blob/master/block-layer/codecs/dag-pb.md). -// For safest use, build Nodes using the Type.PBNode type. -// This function is registered via the go-ipld-prime link loader for multicodec -// code 0x70 when this package is invoked via init. -func Encode(n ipld.Node, w io.Writer) error { - return Marshal(n, w) -} - // AddSupportToChooser takes an existing node prototype chooser and subs in // PBNode for the dag-pb multicodec code. func AddSupportToChooser(existing traversal.LinkTargetNodePrototypeChooser) traversal.LinkTargetNodePrototypeChooser { @@ -57,11 +36,13 @@ func AddSupportToChooser(existing traversal.LinkTargetNodePrototypeChooser) trav // unnecessary to have two supported names for each API. // Deprecated: use Decode instead. -func Decoder(na ipld.NodeAssembler, r io.Reader) error { - return Unmarshal(na, r) -} +func Decoder(na ipld.NodeAssembler, r io.Reader) error { return Decode(na, r) } + +// Deprecated: use Decode instead. +func Unmarshal(na ipld.NodeAssembler, r io.Reader) error { return Decode(na, r) } // Deprecated: use Encode instead. -func Encoder(n ipld.Node, w io.Writer) error { - return Marshal(n, w) -} +func Encoder(inNode ipld.Node, w io.Writer) error { return Encode(inNode, w) } + +// Deprecated: use Encode instead. +func Marshal(inNode ipld.Node, w io.Writer) error { return Encode(inNode, w) } diff --git a/unmarshal.go b/unmarshal.go index edf1ca6..c8b6a60 100644 --- a/unmarshal.go +++ b/unmarshal.go @@ -15,21 +15,31 @@ import ( // malformed data var ErrIntOverflow = fmt.Errorf("protobuf: varint overflow") -// Unmarshal provides an IPLD codec decode interface for DAG-PB data. Provide -// a compatible NodeAssembler and a byte source to unmarshal a DAG-PB IPLD -// Node. Use the NodeAssembler from the PBNode type for safest construction +// Decode provides an IPLD codec decode interface for DAG-PB data. Provide a +// compatible NodeAssembler and a byte source to unmarshal a DAG-PB IPLD Node. +// Use the NodeAssembler from the PBNode type for safest construction // (Type.PBNode.NewBuilder()). A Map assembler will also work. -func Unmarshal(na ipld.NodeAssembler, in io.Reader) error { - var remaining []byte +// This function is registered via the go-ipld-prime link loader for multicodec +// code 0x70 when this package is invoked via init. +func Decode(na ipld.NodeAssembler, in io.Reader) error { + var src []byte if buf, ok := in.(interface{ Bytes() []byte }); ok { - remaining = buf.Bytes() + src = buf.Bytes() } else { var err error - remaining, err = ioutil.ReadAll(in) + src, err = ioutil.ReadAll(in) if err != nil { return err } } + return DecodeBytes(na, src) +} + +// DecodeBytes is like Decode, but it uses an input buffer directly. +// Decode will grab or read all the bytes from an io.Reader anyway, so this can +// save having to copy the bytes or create a bytes.Buffer. +func DecodeBytes(na ipld.NodeAssembler, src []byte) error { + remaining := src ma, err := na.BeginMap(2) if err != nil { -- GitLab