diff --git a/doc/README.md b/doc/README.md index 2d17a75c0a33c940da321be8a61c92aa61b273a8..bbd52736eda1aa6df2589bc60675e1632dafeebc 100644 --- a/doc/README.md +++ b/doc/README.md @@ -23,3 +23,7 @@ - [Implementation](./schema.md#implementation) - [Migration Techniques](./schema.md#schemas-and-migration) - [Advanced Data Layouts](./advLayout.md) + +--- + +- [Development notes: on Node implementations](./dev/node-implementations.md) diff --git a/doc/dev/node-implementations.md b/doc/dev/node-implementations.md new file mode 100644 index 0000000000000000000000000000000000000000..135fe936e01aa3eed2b4f5b39b574f4bcddc2881 --- /dev/null +++ b/doc/dev/node-implementations.md @@ -0,0 +1,130 @@ +Dev Notes: on Node implementations +================================== + +> (This document is aimed for developers and contributors to the library; +> if you're only using the library, it should not be necessary to read this.) + +The concept of "Node" in IPLD is defined by the +[IPLD Data Model](https://github.com/ipld/specs/tree/master/data-model-layer), +and the interface of `ipld.Node` in this library is designed to make this +model manipulable in Golang. + +`Node` is an interface rather than a concrete implementation because +there are many different performance tradeoffs which can be made, +and we have multiple implementations that make them! +Codegenerated types also necessitate having a `Node` interface they can conform to. + + + +Designing a Node Implementation +------------------------------- + +Concerns: + +- 0. Correctness +- 1. Immutablity +- 2. Performance + +A `Node` implementation must of course conform with the Data Model. +Some nodes (especially, those that also implement `typed.Node`) may also +have additional constraints. + +A `Node` implementation must maintain immutablity, or it shatters abstractions +and breaks features that build upon the assumption of immutable nodes (e.g. caches). + +A `Node` implementation should be as fast as possible. +(How fast this is, of course, varies -- and different implementations may make +different tradeoffs, e.g. often at the loss of generality.) + +Note that "generality" is not on the list. Some `Node` implementations are +concerned with being usable to store any shape of data; some are not. +(A `Node` implementation will usually document which camp it falls into.) + + +### Castability + +Castability refers to whether the `Node` abstraction can be added or removed +(also referred to as "boxing" and "unboxing") +by use of a cast by user code outside the library. + +Castability relates to all three of Correctness, Immutablity, and Performance. + +- if something can be unboxed via cast, and thence become mutable, we have an Immutablity problem. +- if something mutable can be boxed via cast, staying mutable, we have an Immutablity problem. +- if something can be boxed via cast, and thence skip a validator, we have a Correctness problem. + +(The relationship to performance is less black-and-white: though performance +considerations should always be backed up by benchmarks, casting can do well.) + +If castability would run into one of these problems, +then a Node implementation must avoid it. +(A typical way to do this is to make a single-field struct.) + +Whether a `Node` implementation will encounter these problems varies primarily on +the kind (literally, per `reflect.Kind`) of golang type is used in the implementation, +and whether the `Node` is "general" or can have an addition validators and constraints. + +#### Castability cases by example + +Castability for strings is safe when the `Node` is "general" (i.e. has no constraints). +With no constraints, there's no Correctness concern; +and since strings are immutable, there's no Immutablity concern. + +Castability for strings is often *unsafe* when the `Node` is a `typed.Node`. +Typed nodes may have additional constraints, so we would have a Correctness problem. +(Note that the way we handle constraints in codegeneration means users can add +them *after* the code is generated, so the generation system can't presume +the absense of constraints.) + +Castability for other scalar types (int, float, etc) are safe when the `Node` is "general" +for the same reasons it's safe for strings: all these things are pass-by-value +in golang, so they're effectively immutable, and thus we have no concerns. + +Castability for bytes is a trickier topic. +See also [#when-performance-wins-over-immutablity]. +(TODO: the recommended course of action here is not yet clear. +I'd default to suggesting it should use a `struct{[]byte}` wrapping, +but if the performance cost of that is high, the value may be dubious.) + +#### Zero values + +If a `Node` is implemented as a golang struct, zero values may be a concern. + +If the struct type is unexported, the concern is absolved: +the zero value can't be initialized outside the package. + +If the `Node` implementation has no other constraints +(e.g., it's not also a `typed.Node` in addition to just an `ipld.Node`), +the concern is (alomst certainly) absolved: +the zero value is simply a valid value. + +For the remaining cases: it's possible that the zero value is not valid. +This is problematic, because in the system as a whole, we use the existence +of a value that's boxed into a `Node` as the statement that the value is valid, +rather than suffering the need for "defensive" checks cropping up everywhere. + +(TODO: the recommended course of action here is not yet clear. +Making the type unexported and instead making an exported interface with a +single implementation may be one option, and it's possible it won't even be +noticably expensive if we already have to fit `Node`, but I'm not sure I've +reconnoitered all the other costs of that (e.g., godoc effects?). +It's possible this will be such a corner case in practice that we might +relegate the less-ergonomic mode to being an adjunct option for codegen.) + + + +When Performance wins over Immutablity +-------------------------------------- + +Ideally? Never. In practice? Unfortunately, sometimes. + + +### bytes + +There is no way to have immutable byte slices in Go. +Defensive copying is also ridiculously costly. + +Methods that return byte slices typically do so without defensive copy. + +Methods doing this should consistently document that +"It is not lawful to mutate the slice returned by this function". diff --git a/impl/free/justString.go b/impl/free/justString.go index 074033635706079a810cd72a549e37382eb45aa0..87059992ee2dce019b85609f0fea5c2782bf10df 100644 --- a/impl/free/justString.go +++ b/impl/free/justString.go @@ -6,23 +6,16 @@ import ( ) func String(value string) ipld.Node { - return justString{value} + return justString(value) } // justString is a simple boxed string that complies with ipld.Node. -// It doesn't actually contain type info or comply with typed.Node -// (which makes it cheaper: this struct doesn't trigger 'convt2e'). -// justString is particularly useful for boxing things like struct keys. -type justString struct { - x string -} - -// FUTURE: we'll also want a typed string, of course. -// Looking forward to benchmarking how that shakes out: it will almost -// certainly add cost in the form of 'convt2e', but we'll see how much. -// It'll also be particularly interesting to find out if common patterns of -// usage around map iterators will get the compiler to skip that cost if -// the key is unused by the caller. +// It's useful for many things, such as boxing map keys. +// +// The implementation is a simple typedef of a string; +// handling it as a Node incurs 'runtime.convTstring', +// which is about the best we can do. +type justString string func (justString) ReprKind() ipld.ReprKind { return ipld.ReprKind_String @@ -58,7 +51,7 @@ func (justString) AsFloat() (float64, error) { return 0, ipld.ErrWrongKind{MethodName: "AsFloat", AppropriateKind: ipld.ReprKindSet_JustFloat, ActualKind: ipld.ReprKind_String} } func (x justString) AsString() (string, error) { - return x.x, nil + return string(x), nil } func (justString) AsBytes() ([]byte, error) { return nil, ipld.ErrWrongKind{MethodName: "AsBytes", AppropriateKind: ipld.ReprKindSet_JustBytes, ActualKind: ipld.ReprKind_String} @@ -97,7 +90,7 @@ func (nb justStringNodeBuilder) CreateFloat(v float64) (ipld.Node, error) { return nil, ipld.ErrWrongKind{MethodName: "CreateFloat", AppropriateKind: ipld.ReprKindSet_JustFloat, ActualKind: ipld.ReprKind_String} } func (nb justStringNodeBuilder) CreateString(v string) (ipld.Node, error) { - return justString{v}, nil + return justString(v), nil } func (nb justStringNodeBuilder) CreateBytes(v []byte) (ipld.Node, error) { return nil, ipld.ErrWrongKind{MethodName: "CreateBytes", AppropriateKind: ipld.ReprKindSet_JustBytes, ActualKind: ipld.ReprKind_String} diff --git a/impl/free/justString_test.go b/impl/free/justString_test.go new file mode 100644 index 0000000000000000000000000000000000000000..91bb230848dbfdc8d638a9715d9875277126bbf3 --- /dev/null +++ b/impl/free/justString_test.go @@ -0,0 +1,45 @@ +package ipldfree + +import ( + "fmt" + "runtime" + "testing" + + ipld "github.com/ipld/go-ipld-prime" +) + +func BenchmarkJustString(b *testing.B) { + var node ipld.Node + for i := 0; i < b.N; i++ { + node = String("boxme") + } + _ = node +} + +func BenchmarkJustStringUse(b *testing.B) { + var node ipld.Node + for i := 0; i < b.N; i++ { + node = String("boxme") + s, err := node.AsString() + _ = s + _ = err + } +} + +func BenchmarkJustStringLogAllocs(b *testing.B) { + memUsage := func(m1, m2 *runtime.MemStats) { + fmt.Println( + "Alloc:", m2.Alloc-m1.Alloc, + "TotalAlloc:", m2.TotalAlloc-m1.TotalAlloc, + "HeapAlloc:", m2.HeapAlloc-m1.HeapAlloc, + ) + } + var m1, m2 runtime.MemStats + runtime.ReadMemStats(&m1) + var node ipld.Node = String("boxme") + runtime.ReadMemStats(&m2) + memUsage(&m1, &m2) + sinkNode = node // necessary to avoid clever elision. +} + +var sinkNode ipld.Node