Commit 8ec6b0fb authored by Daniel Martí's avatar Daniel Martí

expose APIs without Reader/Writer overhead

An io.Writer, by definition, will always copy bytes. That's fine in
general, and can't be worked around without breaking the writer's
contract.

However, the main use of go-codec-dagpb today is go-merkledag, which
simply uses the codec to encode a node into a buffer.

So, we had to create a new bytes.Buffer, write to it, and grab its
bytes. This is one extra allocation (the bytes.Buffer object itself),
plus copying the encoded bytes an extra time, since we must copy from
Encode's internal buffer to the bytes.Buffer.

Add a lower-level append-like AppendEncode that cuts the middle man,
removing both of those extra pieces of work.

For the sake of consistency, we add DecodeBytes to mirror the above on
the decode side. Decode already had a shortcut for Bytes, but this way
it's more evident what we're doing, and we also avoid allocating a
bytes.Buffer just to call Bytes on it.

Using these new APIs in go-merkledag shows nice results:

	name         old time/op    new time/op    delta
	Roundtrip-8    4.27µs ± 0%    4.07µs ± 0%  -4.50%  (p=0.004 n=5+6)

	name         old alloc/op   new alloc/op   delta
	Roundtrip-8    6.86kB ± 0%    6.38kB ± 0%  -6.99%  (p=0.002 n=6+6)

	name         old allocs/op  new allocs/op  delta
	Roundtrip-8       106 ± 0%       103 ± 0%  -2.83%  (p=0.002 n=6+6)

While at it, we formally deprecate Marshal and Unmarshal, since we're
starting to have lots of redundant API surface.
parent a4adba8b
......@@ -19,28 +19,42 @@ type pbLink struct {
hasTsize bool
}
// Marshal provides an IPLD codec encode interface for DAG-PB data. Provide a
// Encode provides an IPLD codec encode interface for DAG-PB data. Provide a
// conforming Node and a destination for bytes to marshal a DAG-PB IPLD Node.
// The Node must strictly conform to the DAG-PB schema
// (https://github.com/ipld/specs/blob/master/block-layer/codecs/dag-pb.md).
// For safest use, build Nodes using the Type.PBNode type.
func Marshal(inNode ipld.Node, out io.Writer) error {
// This function is registered via the go-ipld-prime link loader for multicodec
// code 0x70 when this package is invoked via init.
func Encode(node ipld.Node, w io.Writer) error {
// 1KiB can be allocated on the stack, and covers most small nodes
// without having to grow the buffer and cause allocations.
enc := make([]byte, 0, 1024)
enc, err := AppendEncode(enc, node)
if err != nil {
return err
}
_, err = w.Write(enc)
return err
}
// AppendEncode is like Encode, but it uses a destination buffer directly.
// This means less copying of bytes, and if the destination has enough capacity,
// fewer allocations.
func AppendEncode(enc []byte, inNode ipld.Node) ([]byte, error) {
// Wrap in a typed node for some basic schema form checking
builder := Type.PBNode.NewBuilder()
if err := builder.AssignNode(inNode); err != nil {
return err
return enc, err
}
node := builder.Build()
links, err := node.LookupByString("Links")
if err != nil {
return err
return enc, err
}
// 1KiB can be allocated on the stack, and covers most small nodes
// without having to grow the buffer and cause allocations.
enc := make([]byte, 0, 1024)
if links.Length() > 0 {
// collect links into a slice so we can properly sort for encoding
pbLinks := make([]pbLink, links.Length())
......@@ -49,26 +63,26 @@ func Marshal(inNode ipld.Node, out io.Writer) error {
for !linksIter.Done() {
ii, link, err := linksIter.Next()
if err != nil {
return err
return enc, err
}
{ // Hash (required)
d, err := link.LookupByString("Hash")
if err != nil {
return err
return enc, err
}
l, err := d.AsLink()
if err != nil {
return err
return enc, err
}
if err != nil {
return err
return enc, err
}
cl, ok := l.(cidlink.Link)
if !ok {
// this _should_ be taken care of by the Typed conversion above with
// "missing required fields: Hash"
return fmt.Errorf("invalid DAG-PB form (link must have a Hash)")
return enc, fmt.Errorf("invalid DAG-PB form (link must have a Hash)")
}
pbLinks[ii].hash = cl.Cid
}
......@@ -76,12 +90,12 @@ func Marshal(inNode ipld.Node, out io.Writer) error {
{ // Name (optional)
nameNode, err := link.LookupByString("Name")
if err != nil {
return err
return enc, err
}
if !nameNode.IsAbsent() {
name, err := nameNode.AsString()
if err != nil {
return err
return enc, err
}
pbLinks[ii].name = name
pbLinks[ii].hasName = true
......@@ -91,15 +105,15 @@ func Marshal(inNode ipld.Node, out io.Writer) error {
{ // Tsize (optional)
tsizeNode, err := link.LookupByString("Tsize")
if err != nil {
return err
return enc, err
}
if !tsizeNode.IsAbsent() {
tsize, err := tsizeNode.AsInt()
if err != nil {
return err
return enc, err
}
if tsize < 0 {
return fmt.Errorf("Link has negative Tsize value [%v]", tsize)
return enc, fmt.Errorf("Link has negative Tsize value [%v]", tsize)
}
utsize := uint64(tsize)
pbLinks[ii].tsize = utsize
......@@ -145,19 +159,18 @@ func Marshal(inNode ipld.Node, out io.Writer) error {
// Data (optional)
data, err := node.LookupByString("Data")
if err != nil {
return err
return enc, err
}
if !data.IsAbsent() {
byts, err := data.AsBytes()
if err != nil {
return err
return enc, err
}
enc = protowire.AppendTag(enc, 1, 2) // field & wire type for Data
enc = protowire.AppendBytes(enc, byts)
}
_, err = out.Write(enc)
return err
return enc, err
}
type pbLinkSlice []pbLink
......
......@@ -19,27 +19,6 @@ func init() {
multicodec.RegisterEncoder(0x70, Encode)
}
// Decode provides an IPLD codec decode interface for DAG-PB data. Provide a
// compatible NodeAssembler and a byte source to unmarshal a DAG-PB IPLD Node.
// Use the NodeAssembler from the PBNode type for safest construction
// (Type.PBNode.NewBuilder()). A Map assembler will also work.
// This function is registered via the go-ipld-prime link loader for multicodec
// code 0x70 when this package is invoked via init.
func Decode(na ipld.NodeAssembler, r io.Reader) error {
return Unmarshal(na, r)
}
// Encode provides an IPLD codec encode interface for DAG-PB data. Provide a
// conforming Node and a destination for bytes to marshal a DAG-PB IPLD Node.
// The Node must strictly conform to the DAG-PB schema
// (https://github.com/ipld/specs/blob/master/block-layer/codecs/dag-pb.md).
// For safest use, build Nodes using the Type.PBNode type.
// This function is registered via the go-ipld-prime link loader for multicodec
// code 0x70 when this package is invoked via init.
func Encode(n ipld.Node, w io.Writer) error {
return Marshal(n, w)
}
// AddSupportToChooser takes an existing node prototype chooser and subs in
// PBNode for the dag-pb multicodec code.
func AddSupportToChooser(existing traversal.LinkTargetNodePrototypeChooser) traversal.LinkTargetNodePrototypeChooser {
......@@ -57,11 +36,13 @@ func AddSupportToChooser(existing traversal.LinkTargetNodePrototypeChooser) trav
// unnecessary to have two supported names for each API.
// Deprecated: use Decode instead.
func Decoder(na ipld.NodeAssembler, r io.Reader) error {
return Unmarshal(na, r)
}
func Decoder(na ipld.NodeAssembler, r io.Reader) error { return Decode(na, r) }
// Deprecated: use Decode instead.
func Unmarshal(na ipld.NodeAssembler, r io.Reader) error { return Decode(na, r) }
// Deprecated: use Encode instead.
func Encoder(n ipld.Node, w io.Writer) error {
return Marshal(n, w)
}
func Encoder(inNode ipld.Node, w io.Writer) error { return Encode(inNode, w) }
// Deprecated: use Encode instead.
func Marshal(inNode ipld.Node, w io.Writer) error { return Encode(inNode, w) }
......@@ -15,21 +15,31 @@ import (
// malformed data
var ErrIntOverflow = fmt.Errorf("protobuf: varint overflow")
// Unmarshal provides an IPLD codec decode interface for DAG-PB data. Provide
// a compatible NodeAssembler and a byte source to unmarshal a DAG-PB IPLD
// Node. Use the NodeAssembler from the PBNode type for safest construction
// Decode provides an IPLD codec decode interface for DAG-PB data. Provide a
// compatible NodeAssembler and a byte source to unmarshal a DAG-PB IPLD Node.
// Use the NodeAssembler from the PBNode type for safest construction
// (Type.PBNode.NewBuilder()). A Map assembler will also work.
func Unmarshal(na ipld.NodeAssembler, in io.Reader) error {
var remaining []byte
// This function is registered via the go-ipld-prime link loader for multicodec
// code 0x70 when this package is invoked via init.
func Decode(na ipld.NodeAssembler, in io.Reader) error {
var src []byte
if buf, ok := in.(interface{ Bytes() []byte }); ok {
remaining = buf.Bytes()
src = buf.Bytes()
} else {
var err error
remaining, err = ioutil.ReadAll(in)
src, err = ioutil.ReadAll(in)
if err != nil {
return err
}
}
return DecodeBytes(na, src)
}
// DecodeBytes is like Decode, but it uses an input buffer directly.
// Decode will grab or read all the bytes from an io.Reader anyway, so this can
// save having to copy the bytes or create a bytes.Buffer.
func DecodeBytes(na ipld.NodeAssembler, src []byte) error {
remaining := src
ma, err := na.BeginMap(2)
if err != nil {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment