Commit f3d42e04 authored by Daniel Martí's avatar Daniel Martí

schema/gen/go: avoid Maybe pointers for small types

If we know that a schema type can be represented in Go with a small
amount of bytes, using a pointer to store its "maybe" is rarely a good
idea. For example, an optional string only weighs twice as much as a
pointer, so a pointer adds overhead and will barely ever save any
memory.

Add a function to work out the byte size of a schema.TypeKind, relying
on reflection and the basicnode package. Debug prints are also present
if one wants to double-check the numbers. As of today, they are:

	sizeOf(small): 32 (4x pointer size)
	sizeOf(Bool): 1
	sizeOf(Int): 8
	sizeOf(Float): 8
	sizeOf(String): 16
	sizeOf(Bytes): 24
	sizeOf(List): 24
	sizeOf(Map): 32
	sizeOf(Link): 16

Below is the result on go-merkledag's BenchmarkRoundtrip after
re-generating go-codec-dagpb with this change. Note that the dag-pb
schema contains multiple optional fields, such as strings.

	name         old time/op    new time/op    delta
	Roundtrip-8    4.24µs ± 3%    3.78µs ± 0%  -10.87%  (p=0.004 n=6+5)

	name         old alloc/op   new alloc/op   delta
	Roundtrip-8    6.38kB ± 0%    6.24kB ± 0%   -2.26%  (p=0.002 n=6+6)

	name         old allocs/op  new allocs/op  delta
	Roundtrip-8       103 ± 0%        61 ± 0%  -40.78%  (p=0.002 n=6+6)

Schema typekinds which don't directly map to basicnode prototypes, such
as structs and unions, are left as a TODO for now.

I did not do any measurements to arrive at the magic number of 4x, which
is documented in the code. We might well increase it in the future, with
more careful benchmarking. For now, it seems like a conservative starting
point that should cover all basic types.

Finally, re-generate within this repo.
parent fc2a58f3
//go:build ignore
// +build ignore
package main
......
......@@ -18,7 +18,7 @@ func (_Int__Prototype) FromInt(v int64) (Int, error) {
type _Int__Maybe struct {
m schema.Maybe
v Int
v _Int
}
type MaybeInt = *_Int__Maybe
......@@ -38,7 +38,7 @@ func (m MaybeInt) AsNode() ipld.Node {
case schema.Maybe_Null:
return ipld.Null
case schema.Maybe_Value:
return m.v
return &m.v
default:
panic("unreachable")
}
......@@ -47,7 +47,7 @@ func (m MaybeInt) Must() Int {
if !m.Exists() {
panic("unbox of a maybe rejected")
}
return m.v
return &m.v
}
var _ ipld.Node = (Int)(&_Int{})
......@@ -161,9 +161,6 @@ func (na *_Int__Assembler) AssignInt(v int64) error {
case schema.Maybe_Value, schema.Maybe_Null:
panic("invalid state: cannot assign into assembler that's already finished")
}
if na.w == nil {
na.w = &_Int{}
}
na.w.x = v
*na.m = schema.Maybe_Value
return nil
......@@ -189,11 +186,6 @@ func (na *_Int__Assembler) AssignNode(v ipld.Node) error {
case schema.Maybe_Value, schema.Maybe_Null:
panic("invalid state: cannot assign into assembler that's already finished")
}
if na.w == nil {
na.w = v2
*na.m = schema.Maybe_Value
return nil
}
*na.w = *v2
*na.m = schema.Maybe_Value
return nil
......@@ -266,7 +258,7 @@ func (itr *Map__String__Msg3__Itr) Done() bool {
type _Map__String__Msg3__Maybe struct {
m schema.Maybe
v Map__String__Msg3
v _Map__String__Msg3
}
type MaybeMap__String__Msg3 = *_Map__String__Msg3__Maybe
......@@ -286,7 +278,7 @@ func (m MaybeMap__String__Msg3) AsNode() ipld.Node {
case schema.Maybe_Null:
return ipld.Null
case schema.Maybe_Value:
return m.v
return &m.v
default:
panic("unreachable")
}
......@@ -295,7 +287,7 @@ func (m MaybeMap__String__Msg3) Must() Map__String__Msg3 {
if !m.Exists() {
panic("unbox of a maybe rejected")
}
return m.v
return &m.v
}
var _ ipld.Node = (Map__String__Msg3)(&_Map__String__Msg3{})
......@@ -440,9 +432,6 @@ func (na *_Map__String__Msg3__Assembler) BeginMap(sizeHint int64) (ipld.MapAssem
if sizeHint < 0 {
sizeHint = 0
}
if na.w == nil {
na.w = &_Map__String__Msg3{}
}
na.w.m = make(map[_String]*_Msg3, sizeHint)
na.w.t = make([]_Map__String__Msg3__entry, 0, sizeHint)
return na, nil
......@@ -493,11 +482,6 @@ func (na *_Map__String__Msg3__Assembler) AssignNode(v ipld.Node) error {
case midvalue:
panic("invalid state: cannot assign null into an assembler that's already begun working on recursive structures!")
}
if na.w == nil {
na.w = v2
*na.m = schema.Maybe_Value
return nil
}
*na.w = *v2
*na.m = schema.Maybe_Value
return nil
......@@ -782,9 +766,6 @@ func (na *_Map__String__Msg3__ReprAssembler) BeginMap(sizeHint int64) (ipld.MapA
if sizeHint < 0 {
sizeHint = 0
}
if na.w == nil {
na.w = &_Map__String__Msg3{}
}
na.w.m = make(map[_String]*_Msg3, sizeHint)
na.w.t = make([]_Map__String__Msg3__entry, 0, sizeHint)
return na, nil
......@@ -835,11 +816,6 @@ func (na *_Map__String__Msg3__ReprAssembler) AssignNode(v ipld.Node) error {
case midvalue:
panic("invalid state: cannot assign null into an assembler that's already begun working on recursive structures!")
}
if na.w == nil {
na.w = v2
*na.m = schema.Maybe_Value
return nil
}
*na.w = *v2
*na.m = schema.Maybe_Value
return nil
......@@ -2016,7 +1992,7 @@ func (_String__Prototype) FromString(v string) (String, error) {
type _String__Maybe struct {
m schema.Maybe
v String
v _String
}
type MaybeString = *_String__Maybe
......@@ -2036,7 +2012,7 @@ func (m MaybeString) AsNode() ipld.Node {
case schema.Maybe_Null:
return ipld.Null
case schema.Maybe_Value:
return m.v
return &m.v
default:
panic("unreachable")
}
......@@ -2045,7 +2021,7 @@ func (m MaybeString) Must() String {
if !m.Exists() {
panic("unbox of a maybe rejected")
}
return m.v
return &m.v
}
var _ ipld.Node = (String)(&_String{})
......@@ -2165,9 +2141,6 @@ func (na *_String__Assembler) AssignString(v string) error {
case schema.Maybe_Value, schema.Maybe_Null:
panic("invalid state: cannot assign into assembler that's already finished")
}
if na.w == nil {
na.w = &_String{}
}
na.w.x = v
*na.m = schema.Maybe_Value
return nil
......@@ -2187,11 +2160,6 @@ func (na *_String__Assembler) AssignNode(v ipld.Node) error {
case schema.Maybe_Value, schema.Maybe_Null:
panic("invalid state: cannot assign into assembler that's already finished")
}
if na.w == nil {
na.w = v2
*na.m = schema.Maybe_Value
return nil
}
*na.w = *v2
*na.m = schema.Maybe_Value
return nil
......
//go:build ignore
// +build ignore
package main
......
This diff is collapsed.
......@@ -2,8 +2,11 @@ package gengo
import (
"fmt"
"reflect"
"strings"
ipld "github.com/ipld/go-ipld-prime"
basicnode "github.com/ipld/go-ipld-prime/node/basic"
"github.com/ipld/go-ipld-prime/schema"
)
......@@ -22,7 +25,7 @@ type AdjunctCfg struct {
typeSymbolOverrides map[schema.TypeName]string
FieldSymbolLowerOverrides map[FieldTuple]string
fieldSymbolUpperOverrides map[FieldTuple]string
maybeUsesPtr map[schema.TypeName]bool // treat absent as true
maybeUsesPtr map[schema.TypeName]bool // absent uses a heuristic
CfgUnionMemlayout map[schema.TypeName]string // "embedAll"|"interface"; maybe more options later, unclear for now.
// ... some of these fields have sprouted messy name prefixes so they don't collide with their matching method names.
......@@ -74,13 +77,91 @@ func (cfg *AdjunctCfg) MaybeUsesPtr(t schema.Type) bool {
if x, ok := cfg.maybeUsesPtr[t.Name()]; ok {
return x
}
// FUTURE: we could make this default vary based on sizeof the type.
// It's generally true that for scalars it should be false by default; and that's easy to do.
// It would actually *remove* special cases from the prelude, which would be a win.
// Maps and lists should also probably default off...?
// (I have a feeling something might get touchy there. Review when implementing those.)
// Perhaps structs and unions are the only things likely to benefit from pointers.
return true
// As a simple heuristic,
// check how large the Go representation of this type will be.
// If it weighs little, we estimate that a pointer is not worthwhile,
// as storing the data directly will barely take more memory.
// Plus, the resulting code will be shorter and have fewer branches.
return sizeOfSchemaType(t) > sizeSmallEnoughForInlining
}
var (
// The cutoff for "weighs little" is any size up to this number.
// It's hasn't been measured with any benchmarks or stats just yet.
// It's possible that, with those, it might increase in the future.
// Intuitively, any type 4x the size of a pointer is fine to inline.
// Adding a pointer will already add 1x overhead, anyway.
sizeSmallEnoughForInlining = 4 * reflect.TypeOf(new(int)).Size()
sizeOfTypeKind [128]uintptr
)
func init() {
// Uncomment for debugging.
// fmt.Fprintf(os.Stderr, "sizeOf(small): %d (4x pointer size)\n", sizeSmallEnoughForInlining)
// Get the basic node sizes via basicnode.
for _, tk := range []struct {
typeKind schema.TypeKind
prototype ipld.NodePrototype
}{
{schema.TypeKind_Bool, basicnode.Prototype.Bool},
{schema.TypeKind_Int, basicnode.Prototype.Int},
{schema.TypeKind_Float, basicnode.Prototype.Float},
{schema.TypeKind_String, basicnode.Prototype.String},
{schema.TypeKind_Bytes, basicnode.Prototype.Bytes},
{schema.TypeKind_List, basicnode.Prototype.List},
{schema.TypeKind_Map, basicnode.Prototype.Map},
{schema.TypeKind_Link, basicnode.Prototype.Link},
} {
nb := tk.prototype.NewBuilder()
switch tk.typeKind {
case schema.TypeKind_List:
am, err := nb.BeginList(0)
if err != nil {
panic(err)
}
if err := am.Finish(); err != nil {
panic(err)
}
case schema.TypeKind_Map:
am, err := nb.BeginMap(0)
if err != nil {
panic(err)
}
if err := am.Finish(); err != nil {
panic(err)
}
}
// Note that the Node interface has a pointer underneath,
// so we use Elem to reach the underlying type.
size := reflect.TypeOf(nb.Build()).Elem().Size()
sizeOfTypeKind[tk.typeKind] = size
// Uncomment for debugging.
// fmt.Fprintf(os.Stderr, "sizeOf(%s): %d\n", tk.typeKind, size)
}
}
// sizeOfSchemaType returns the size of a schema type,
// relative to the size of a pointer in native Go.
//
// For example, TypeInt and TypeMap returns 1, but TypeList returns 3, as a
// slice in Go has a pointer and two integers for length and capacity.
// Any basic type smaller than a pointer, such as TypeBool, returns 1.
func sizeOfSchemaType(t schema.Type) uintptr {
kind := t.TypeKind()
// If this TypeKind is represented by the basicnode package,
// we statically know its size and we can return here.
if size := sizeOfTypeKind[kind]; size > 0 {
return size
}
// TODO: handle typekinds like structs, unions, etc.
// For now, return a large size to fall back to using a pointer.
return 100 * sizeSmallEnoughForInlining
}
// UnionMemlayout returns a plain string at present;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment