Commit 6897bb3c authored by Eric Myhre's avatar Eric Myhre

schema compiler: union rules.

Commit to the strategy of having the first flunked rule for a type
result in short-circuit skipping of subsequent rules.  It's simple,
and it's sufficient.
parent 1b282cfa
......@@ -106,6 +106,12 @@ just slightly flipped about. (See `schema.TypeStruct` and `dmt.TypeStruct`, for
Re-building the immutability guarantees that we get for free from the codegen'd `dmt.*` types also
took a rather staggering amount of highly redundant code.
Since we were unable to used the codegen'd types, and we use golang native maps instead in some places,
this tends to make our logic less deterministic in its evaluation order.
We've mostly ignored this, but you may notice it in situations such as validation of a schema
that has more than one thing wrong within the same union representation specification, for example,
since those happen to use golang maps in the (mostly redundant) golang structures.
Our validation logic ended up written against the `schema.Type*` types rather than the `dmt.*` types.
It was necessary to do it this way because now we have the possibility of compiling schema types without going through `dmt.*` at all.
Since there's still a path from the `dmt.*` types to here, we didn't _lose_ any important features
......@@ -119,6 +125,9 @@ Maybe this isn't all that important. But it seems unfortunate.
But for all those drawbacks: it works.
(This whole section has a double duty: it also serves as a nice list
of cool features you get for free when using our codegen.)
### two packages, compiler is with dmt
Doesn't really fly for the same reasons as three packages.
......
package schema
import "fmt"
import (
"fmt"
"github.com/ipld/go-ipld-prime"
)
type rule struct {
// text is the name of the rule and the start of the error message body if the rule is flunked.
......@@ -67,6 +71,9 @@ func validate(ts *TypeSystem, typ Type, errs *[]error) {
//
// The short circuiting logic between subsequent rules means that
// later rules are allowed to make presumptions of things checked by earlier rules.
//
// The table-like design here hopefully will make the semantics defined within
// easier to port to other implementations in other languages.
var rules = map[TypeKind][]rule{
TypeKind_Map: []rule{
{"map declaration's key type must be defined",
......@@ -154,16 +161,66 @@ var rules = map[TypeKind][]rule{
return nil
},
},
// TODO continue with more union rules... but... they're starting to get conditional on the passage of prior rules.
// Unsure how much effort it's worth to represent this in detail.
// - Should we have flunk of one rule cause subsequent rules to be skipped on that type?
// - Should we just re-do all the prerequisite checks, but return nil if those fail (since another rule should've already reported those)?
// - Should we re-do all the prerequisite checks, and return a special 'inapplicable' error code if those fail?
// - Should we build a terribly complicated prerequisite tracking system?
// - (Okay, maybe it's not that complicated; a tree would probably suffice?)
// My original aim with this design was to get as close as possible to something table-driven,
// in the hope this would make it easier to port the semantics to other languages.
// As this code gets fancier, that goal fades fast, so a solution that's KISS is probably preferrable.
{"union's representation must specify exactly one discriminant for each member",
alwaysApplies,
func(ts *TypeSystem, t Type) (errs []error) {
t2 := t.(*TypeUnion)
// All of these are very similar, but they store the info in technically distinct places, so we have to destructure to get at it.
switch r := t2.rstrat.(type) {
case UnionRepresentation_Keyed:
checkUnionDiscriminantInfo(t2.members, r.discriminantTable, &errs)
case UnionRepresentation_Kinded:
checkUnionDiscriminantInfo2(t2.members, r.discriminantTable, &errs)
case UnionRepresentation_Envelope:
checkUnionDiscriminantInfo(t2.members, r.discriminantTable, &errs)
case UnionRepresentation_Inline:
checkUnionDiscriminantInfo(t2.members, r.discriminantTable, &errs)
case UnionRepresentation_Stringprefix:
checkUnionDiscriminantInfo(t2.members, r.discriminantTable, &errs)
case UnionRepresentation_Byteprefix:
checkUnionDiscriminantInfo(t2.members, r.discriminantTable, &errs)
}
return nil
},
},
{"kinded union's discriminants must match the member's kinds",
func(t Type) bool { _, ok := t.(*TypeUnion).rstrat.(UnionRepresentation_Kinded); return ok },
func(ts *TypeSystem, t Type) (errs []error) {
r := t.(*TypeUnion).rstrat.(UnionRepresentation_Kinded)
for k, v := range r.discriminantTable {
vrb := ts.types[TypeReference(v)].RepresentationBehavior()
if vrb == ipld.Kind_Invalid { // this indicates a kinded union (the only thing that can't statically state its representation behavior), which deserves a special error message.
errs = append(errs, fmt.Errorf("kinded unions cannot be nested and member type %s is also a kinded union", v))
} else if vrb != k {
errs = append(errs, fmt.Errorf("kind %s is declared to be received as type %s, but that type's representation kind is %s", k, v, vrb))
}
}
return
},
},
{"envelope union's magic keys must be distinct",
func(t Type) bool { _, ok := t.(*TypeUnion).rstrat.(UnionRepresentation_Envelope); return ok },
func(ts *TypeSystem, t Type) []error {
r := t.(*TypeUnion).rstrat.(UnionRepresentation_Envelope)
if r.discriminantKey == r.contentKey {
return []error{fmt.Errorf("content key and discriminant key are the same")}
}
return nil
},
},
{"inline union's members must all have map representations and not collide with the union's discriminant key",
func(t Type) bool { _, ok := t.(*TypeUnion).rstrat.(UnionRepresentation_Inline); return ok },
func(ts *TypeSystem, t Type) (errs []error) {
r := t.(*TypeUnion).rstrat.(UnionRepresentation_Inline)
for k, v := range r.discriminantTable {
// TODO: port the UnionRepresentation_Inline rules
}
return
},
},
// FUTURE: UnionRepresentation_Stringprefix will probably have additional rules too
// FUTURE: UnionRepresentation_Bytesprefix will probably have additional rules too
// TODO: port the enum rules
},
}
......@@ -247,3 +304,51 @@ func hasStringRepresentation(t Type) bool {
panic("unreachable")
}
}
func checkUnionDiscriminantInfo(members []TypeName, discriminantsMap map[string]TypeName, ee *[]error) {
covered := make([]bool, len(members))
for _, v := range discriminantsMap {
found := false
for i, v2 := range members {
if v == v2 {
if found {
*ee = append(*ee, fmt.Errorf("more than one discriminant pointing to member type %s", v2))
}
found = true
covered[i] = true
}
}
if !found {
*ee = append(*ee, fmt.Errorf("discriminant refers to a non-member type %s", v))
}
}
for i, m := range members {
if !covered[i] {
*ee = append(*ee, fmt.Errorf("missing discriminant info for member type %s", m))
}
}
}
func checkUnionDiscriminantInfo2(members []TypeName, discriminantsMap map[ipld.Kind]TypeName, ee *[]error) {
covered := make([]bool, len(members))
for _, v := range discriminantsMap {
found := false
for i, v2 := range members {
if v == v2 {
if found {
*ee = append(*ee, fmt.Errorf("more than one discriminant pointing to member type %s", v2))
}
found = true
covered[i] = true
}
}
if !found {
*ee = append(*ee, fmt.Errorf("discriminant refers to a non-member type %s", v))
}
}
for i, m := range members {
if !covered[i] {
*ee = append(*ee, fmt.Errorf("missing discriminant info for member type %s", m))
}
}
}
......@@ -51,81 +51,10 @@ func BuildTypeSystem(schdmt schemadmt.Schema) (*TypeSystem, []error) {
// - for stringprefix unions: that's sufficient (discriminant uniqueness already enforced by map).
// - for byteprefix unions: ... we'll come back to this later.
// Check for member type reference existence first.
// Build up a spare list of those type names in the process; we'll scratch stuff back off of it in a moment.
members := make([]schemadmt.TypeName, 0, t2.FieldMembers().Length())
missingTypes := false
for itr := t2.FieldMembers().Iterator(); !itr.Done(); {
_, tndmt := itr.Next()
mtdmt := typesdmt.Lookup(tndmt)
if mtdmt == nil {
missingTypes = true
ee = append(ee, fmt.Errorf("type %s refers to missing type %s as a member", tn, tndmt))
}
members = append(members, tndmt)
}
// Skip the rest of the checks if there were any missing type references.
// We'll be inspecting the value types more deeply and it's simpler to do that work while presuming everything is at least defined.
if missingTypes {
continue
}
// Do the per-representation-strategy checks.
// Every representation strategy a check that there's a discriminant for every member (though they require slightly different setup).
// Some representation strategies also include quite a few more checks.
switch r := t2.FieldRepresentation().AsInterface().(type) {
case schemadmt.UnionRepresentation_Keyed:
checkUnionDiscriminantInfo(tn, members, r, &ee)
case schemadmt.UnionRepresentation_Kinded:
checkUnionDiscriminantInfo(tn, members, r, &ee)
for itr := r.Iterator(); !itr.Done(); {
k, v := itr.Next()
// In the switch ahead, we briefly create the reified type for each member, just so we can use that to ask it its representation.
// We then let that data fall right back into the abyss. The compiler should inline and optimize all this reasonably well.
// We create these temporary things rather than looking in the typesystem map we're accumulating because it makes the process work correctly regardless of order.
// For some of the kinds, this is fairly overkill (we know that the representation behavior of a bool type is bool because it doesn't have any other representation strategies!)
// but I've ground the whole thing out in a consistent way anyway.
var mkind ipld.Kind
switch t3 := typesdmt.Lookup(v).AsInterface().(type) {
case schemadmt.TypeBool:
mkind = TypeBool{dmt: t3}.RepresentationBehavior()
case schemadmt.TypeString:
mkind = TypeString{dmt: t3}.RepresentationBehavior()
case schemadmt.TypeBytes:
mkind = TypeBytes{dmt: t3}.RepresentationBehavior()
case schemadmt.TypeInt:
mkind = TypeInt{dmt: t3}.RepresentationBehavior()
case schemadmt.TypeFloat:
mkind = TypeFloat{dmt: t3}.RepresentationBehavior()
case schemadmt.TypeMap:
mkind = TypeMap{dmt: t3}.RepresentationBehavior()
case schemadmt.TypeList:
mkind = TypeList{dmt: t3}.RepresentationBehavior()
case schemadmt.TypeLink:
mkind = TypeLink{dmt: t3}.RepresentationBehavior()
case schemadmt.TypeUnion:
mkind = TypeUnion{dmt: t3}.RepresentationBehavior() // this actually flies! it will yield Kind_Invalid for a kinded union, though, which we'll treat with a special error message.
case schemadmt.TypeStruct:
mkind = TypeStruct{dmt: t3}.RepresentationBehavior()
case schemadmt.TypeEnum:
mkind = TypeEnum{dmt: t3}.RepresentationBehavior()
case schemadmt.TypeCopy:
panic("no support for 'copy' types. I might want to reneg on whether these are even part of the schema dmt.")
default:
panic("unreachable")
}
// TODO RepresentationKind is supposed to be an enum, but is not presently generated as such. This block's use of `k` as a string should turn into something cleaner when enum gen is implemented and used for RepresentationKind.
if mkind == ipld.Kind_Invalid {
ee = append(ee, fmt.Errorf("kinded union %s declares a %s kind should be received as type %s, which is not sensible because that type is also a kinded union", tn, k, v))
} else if k.String() != mkind.String() {
ee = append(ee, fmt.Errorf("kinded union %s declares a %s kind should be received as type %s, but that type's representation kind is %s", tn, k, v, mkind))
}
}
case schemadmt.UnionRepresentation_Envelope:
checkUnionDiscriminantInfo(tn, members, r.FieldDiscriminantTable(), &ee)
if r.FieldContentKey().String() == r.FieldDiscriminantKey().String() {
ee = append(ee, fmt.Errorf("union %s has representation strategy envelope with conflicting content key and discriminant key", tn))
}
case schemadmt.UnionRepresentation_Inline:
checkUnionDiscriminantInfo(tn, members, r.FieldDiscriminantTable(), &ee)
for itr := r.FieldDiscriminantTable().Iterator(); !itr.Done(); {
......@@ -196,10 +125,6 @@ func BuildTypeSystem(schdmt schemadmt.Schema) (*TypeSystem, []error) {
ee = append(ee, fmt.Errorf("union %s has representation strategy inline, which requires all members have map representations, so %s (which has representation kind %s) is not a valid member", tn, v, mkind))
}
}
case schemadmt.UnionRepresentation_StringPrefix:
checkUnionDiscriminantInfo(tn, members, r, &ee)
case schemadmt.UnionRepresentation_BytePrefix:
panic("nyi") // TODO byteprefix needs spec work.
}
case schemadmt.TypeEnum:
// Verify that:
......@@ -249,36 +174,3 @@ func BuildTypeSystem(schdmt schemadmt.Schema) (*TypeSystem, []error) {
}
return nil, ee
}
// checkUnionDiscriminantInfo verifies that every member in the list
// appears exactly once as a value in the discriminants map, and nothing else appears in the map.
// Errors are appended to ee.
// The members slice is destructively mutated.
// The typename parameter is purely for the use in error messages.
//
// The discriminantsMap is an untyped Node because it turns out convenient to do that way:
// we happen to know all the different union representations have a map *somewhere* for this,
// but its position and key types vary. Untyped access lets us write more reusable code in this case.
func checkUnionDiscriminantInfo(tn TypeName, members []schemadmt.TypeName, discriminantsMap ipld.Node, ee *[]error) {
for itr := discriminantsMap.MapIterator(); !itr.Done(); {
_, v, _ := itr.Next()
found := false
for i, v2 := range members {
if v == v2 {
if found {
*ee = append(*ee, fmt.Errorf("type %s representation details has more than one discriminant pointing to member type %s", tn, v2))
}
found = true
members[i] = nil
}
}
if !found {
*ee = append(*ee, fmt.Errorf("type %s representation details include a discriminant refering to a non-member type %s", tn, v))
}
}
for _, m := range members {
if m != nil {
*ee = append(*ee, fmt.Errorf("type %s representation details is missing discriminant info for member type %s", tn, m))
}
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment