Commit 8ec6b0fb authored by Daniel Martí's avatar Daniel Martí

expose APIs without Reader/Writer overhead

An io.Writer, by definition, will always copy bytes. That's fine in
general, and can't be worked around without breaking the writer's
contract.

However, the main use of go-codec-dagpb today is go-merkledag, which
simply uses the codec to encode a node into a buffer.

So, we had to create a new bytes.Buffer, write to it, and grab its
bytes. This is one extra allocation (the bytes.Buffer object itself),
plus copying the encoded bytes an extra time, since we must copy from
Encode's internal buffer to the bytes.Buffer.

Add a lower-level append-like AppendEncode that cuts the middle man,
removing both of those extra pieces of work.

For the sake of consistency, we add DecodeBytes to mirror the above on
the decode side. Decode already had a shortcut for Bytes, but this way
it's more evident what we're doing, and we also avoid allocating a
bytes.Buffer just to call Bytes on it.

Using these new APIs in go-merkledag shows nice results:

	name         old time/op    new time/op    delta
	Roundtrip-8    4.27µs ± 0%    4.07µs ± 0%  -4.50%  (p=0.004 n=5+6)

	name         old alloc/op   new alloc/op   delta
	Roundtrip-8    6.86kB ± 0%    6.38kB ± 0%  -6.99%  (p=0.002 n=6+6)

	name         old allocs/op  new allocs/op  delta
	Roundtrip-8       106 ± 0%       103 ± 0%  -2.83%  (p=0.002 n=6+6)

While at it, we formally deprecate Marshal and Unmarshal, since we're
starting to have lots of redundant API surface.
parent a4adba8b
...@@ -19,28 +19,42 @@ type pbLink struct { ...@@ -19,28 +19,42 @@ type pbLink struct {
hasTsize bool hasTsize bool
} }
// Marshal provides an IPLD codec encode interface for DAG-PB data. Provide a // Encode provides an IPLD codec encode interface for DAG-PB data. Provide a
// conforming Node and a destination for bytes to marshal a DAG-PB IPLD Node. // conforming Node and a destination for bytes to marshal a DAG-PB IPLD Node.
// The Node must strictly conform to the DAG-PB schema // The Node must strictly conform to the DAG-PB schema
// (https://github.com/ipld/specs/blob/master/block-layer/codecs/dag-pb.md). // (https://github.com/ipld/specs/blob/master/block-layer/codecs/dag-pb.md).
// For safest use, build Nodes using the Type.PBNode type. // For safest use, build Nodes using the Type.PBNode type.
func Marshal(inNode ipld.Node, out io.Writer) error { // This function is registered via the go-ipld-prime link loader for multicodec
// code 0x70 when this package is invoked via init.
func Encode(node ipld.Node, w io.Writer) error {
// 1KiB can be allocated on the stack, and covers most small nodes
// without having to grow the buffer and cause allocations.
enc := make([]byte, 0, 1024)
enc, err := AppendEncode(enc, node)
if err != nil {
return err
}
_, err = w.Write(enc)
return err
}
// AppendEncode is like Encode, but it uses a destination buffer directly.
// This means less copying of bytes, and if the destination has enough capacity,
// fewer allocations.
func AppendEncode(enc []byte, inNode ipld.Node) ([]byte, error) {
// Wrap in a typed node for some basic schema form checking // Wrap in a typed node for some basic schema form checking
builder := Type.PBNode.NewBuilder() builder := Type.PBNode.NewBuilder()
if err := builder.AssignNode(inNode); err != nil { if err := builder.AssignNode(inNode); err != nil {
return err return enc, err
} }
node := builder.Build() node := builder.Build()
links, err := node.LookupByString("Links") links, err := node.LookupByString("Links")
if err != nil { if err != nil {
return err return enc, err
} }
// 1KiB can be allocated on the stack, and covers most small nodes
// without having to grow the buffer and cause allocations.
enc := make([]byte, 0, 1024)
if links.Length() > 0 { if links.Length() > 0 {
// collect links into a slice so we can properly sort for encoding // collect links into a slice so we can properly sort for encoding
pbLinks := make([]pbLink, links.Length()) pbLinks := make([]pbLink, links.Length())
...@@ -49,26 +63,26 @@ func Marshal(inNode ipld.Node, out io.Writer) error { ...@@ -49,26 +63,26 @@ func Marshal(inNode ipld.Node, out io.Writer) error {
for !linksIter.Done() { for !linksIter.Done() {
ii, link, err := linksIter.Next() ii, link, err := linksIter.Next()
if err != nil { if err != nil {
return err return enc, err
} }
{ // Hash (required) { // Hash (required)
d, err := link.LookupByString("Hash") d, err := link.LookupByString("Hash")
if err != nil { if err != nil {
return err return enc, err
} }
l, err := d.AsLink() l, err := d.AsLink()
if err != nil { if err != nil {
return err return enc, err
} }
if err != nil { if err != nil {
return err return enc, err
} }
cl, ok := l.(cidlink.Link) cl, ok := l.(cidlink.Link)
if !ok { if !ok {
// this _should_ be taken care of by the Typed conversion above with // this _should_ be taken care of by the Typed conversion above with
// "missing required fields: Hash" // "missing required fields: Hash"
return fmt.Errorf("invalid DAG-PB form (link must have a Hash)") return enc, fmt.Errorf("invalid DAG-PB form (link must have a Hash)")
} }
pbLinks[ii].hash = cl.Cid pbLinks[ii].hash = cl.Cid
} }
...@@ -76,12 +90,12 @@ func Marshal(inNode ipld.Node, out io.Writer) error { ...@@ -76,12 +90,12 @@ func Marshal(inNode ipld.Node, out io.Writer) error {
{ // Name (optional) { // Name (optional)
nameNode, err := link.LookupByString("Name") nameNode, err := link.LookupByString("Name")
if err != nil { if err != nil {
return err return enc, err
} }
if !nameNode.IsAbsent() { if !nameNode.IsAbsent() {
name, err := nameNode.AsString() name, err := nameNode.AsString()
if err != nil { if err != nil {
return err return enc, err
} }
pbLinks[ii].name = name pbLinks[ii].name = name
pbLinks[ii].hasName = true pbLinks[ii].hasName = true
...@@ -91,15 +105,15 @@ func Marshal(inNode ipld.Node, out io.Writer) error { ...@@ -91,15 +105,15 @@ func Marshal(inNode ipld.Node, out io.Writer) error {
{ // Tsize (optional) { // Tsize (optional)
tsizeNode, err := link.LookupByString("Tsize") tsizeNode, err := link.LookupByString("Tsize")
if err != nil { if err != nil {
return err return enc, err
} }
if !tsizeNode.IsAbsent() { if !tsizeNode.IsAbsent() {
tsize, err := tsizeNode.AsInt() tsize, err := tsizeNode.AsInt()
if err != nil { if err != nil {
return err return enc, err
} }
if tsize < 0 { if tsize < 0 {
return fmt.Errorf("Link has negative Tsize value [%v]", tsize) return enc, fmt.Errorf("Link has negative Tsize value [%v]", tsize)
} }
utsize := uint64(tsize) utsize := uint64(tsize)
pbLinks[ii].tsize = utsize pbLinks[ii].tsize = utsize
...@@ -145,19 +159,18 @@ func Marshal(inNode ipld.Node, out io.Writer) error { ...@@ -145,19 +159,18 @@ func Marshal(inNode ipld.Node, out io.Writer) error {
// Data (optional) // Data (optional)
data, err := node.LookupByString("Data") data, err := node.LookupByString("Data")
if err != nil { if err != nil {
return err return enc, err
} }
if !data.IsAbsent() { if !data.IsAbsent() {
byts, err := data.AsBytes() byts, err := data.AsBytes()
if err != nil { if err != nil {
return err return enc, err
} }
enc = protowire.AppendTag(enc, 1, 2) // field & wire type for Data enc = protowire.AppendTag(enc, 1, 2) // field & wire type for Data
enc = protowire.AppendBytes(enc, byts) enc = protowire.AppendBytes(enc, byts)
} }
_, err = out.Write(enc) return enc, err
return err
} }
type pbLinkSlice []pbLink type pbLinkSlice []pbLink
......
...@@ -19,27 +19,6 @@ func init() { ...@@ -19,27 +19,6 @@ func init() {
multicodec.RegisterEncoder(0x70, Encode) multicodec.RegisterEncoder(0x70, Encode)
} }
// Decode provides an IPLD codec decode interface for DAG-PB data. Provide a
// compatible NodeAssembler and a byte source to unmarshal a DAG-PB IPLD Node.
// Use the NodeAssembler from the PBNode type for safest construction
// (Type.PBNode.NewBuilder()). A Map assembler will also work.
// This function is registered via the go-ipld-prime link loader for multicodec
// code 0x70 when this package is invoked via init.
func Decode(na ipld.NodeAssembler, r io.Reader) error {
return Unmarshal(na, r)
}
// Encode provides an IPLD codec encode interface for DAG-PB data. Provide a
// conforming Node and a destination for bytes to marshal a DAG-PB IPLD Node.
// The Node must strictly conform to the DAG-PB schema
// (https://github.com/ipld/specs/blob/master/block-layer/codecs/dag-pb.md).
// For safest use, build Nodes using the Type.PBNode type.
// This function is registered via the go-ipld-prime link loader for multicodec
// code 0x70 when this package is invoked via init.
func Encode(n ipld.Node, w io.Writer) error {
return Marshal(n, w)
}
// AddSupportToChooser takes an existing node prototype chooser and subs in // AddSupportToChooser takes an existing node prototype chooser and subs in
// PBNode for the dag-pb multicodec code. // PBNode for the dag-pb multicodec code.
func AddSupportToChooser(existing traversal.LinkTargetNodePrototypeChooser) traversal.LinkTargetNodePrototypeChooser { func AddSupportToChooser(existing traversal.LinkTargetNodePrototypeChooser) traversal.LinkTargetNodePrototypeChooser {
...@@ -57,11 +36,13 @@ func AddSupportToChooser(existing traversal.LinkTargetNodePrototypeChooser) trav ...@@ -57,11 +36,13 @@ func AddSupportToChooser(existing traversal.LinkTargetNodePrototypeChooser) trav
// unnecessary to have two supported names for each API. // unnecessary to have two supported names for each API.
// Deprecated: use Decode instead. // Deprecated: use Decode instead.
func Decoder(na ipld.NodeAssembler, r io.Reader) error { func Decoder(na ipld.NodeAssembler, r io.Reader) error { return Decode(na, r) }
return Unmarshal(na, r)
} // Deprecated: use Decode instead.
func Unmarshal(na ipld.NodeAssembler, r io.Reader) error { return Decode(na, r) }
// Deprecated: use Encode instead. // Deprecated: use Encode instead.
func Encoder(n ipld.Node, w io.Writer) error { func Encoder(inNode ipld.Node, w io.Writer) error { return Encode(inNode, w) }
return Marshal(n, w)
} // Deprecated: use Encode instead.
func Marshal(inNode ipld.Node, w io.Writer) error { return Encode(inNode, w) }
...@@ -15,21 +15,31 @@ import ( ...@@ -15,21 +15,31 @@ import (
// malformed data // malformed data
var ErrIntOverflow = fmt.Errorf("protobuf: varint overflow") var ErrIntOverflow = fmt.Errorf("protobuf: varint overflow")
// Unmarshal provides an IPLD codec decode interface for DAG-PB data. Provide // Decode provides an IPLD codec decode interface for DAG-PB data. Provide a
// a compatible NodeAssembler and a byte source to unmarshal a DAG-PB IPLD // compatible NodeAssembler and a byte source to unmarshal a DAG-PB IPLD Node.
// Node. Use the NodeAssembler from the PBNode type for safest construction // Use the NodeAssembler from the PBNode type for safest construction
// (Type.PBNode.NewBuilder()). A Map assembler will also work. // (Type.PBNode.NewBuilder()). A Map assembler will also work.
func Unmarshal(na ipld.NodeAssembler, in io.Reader) error { // This function is registered via the go-ipld-prime link loader for multicodec
var remaining []byte // code 0x70 when this package is invoked via init.
func Decode(na ipld.NodeAssembler, in io.Reader) error {
var src []byte
if buf, ok := in.(interface{ Bytes() []byte }); ok { if buf, ok := in.(interface{ Bytes() []byte }); ok {
remaining = buf.Bytes() src = buf.Bytes()
} else { } else {
var err error var err error
remaining, err = ioutil.ReadAll(in) src, err = ioutil.ReadAll(in)
if err != nil { if err != nil {
return err return err
} }
} }
return DecodeBytes(na, src)
}
// DecodeBytes is like Decode, but it uses an input buffer directly.
// Decode will grab or read all the bytes from an io.Reader anyway, so this can
// save having to copy the bytes or create a bytes.Buffer.
func DecodeBytes(na ipld.NodeAssembler, src []byte) error {
remaining := src
ma, err := na.BeginMap(2) ma, err := na.BeginMap(2)
if err != nil { if err != nil {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment