Commit 5c3bd1af authored by Eric Myhre's avatar Eric Myhre

Node.Keys method now returns iterators.

An "immediate" rather than generative function is also available;
the docs contain caveats.

At the moment, these distinctions are a bit forced, but we want to be
ready for when we start getting truly huge maps where the generative
usage actually *matters* for either memory reasons, latency reasons,
or both.

Separated Length rather than trying to pull double-duty the Keys
method; the previous combination was just utterly silly.

The implementations in the bind package are stubs; that package is
going to take a lot of work, and so the focus is going to be entirely
on keeping the 'free' package viable; and we'll revisit bind and such
when there's more dev time budget.
Signed-off-by: default avatarEric Myhre <hash@exultant.us>
parent 23aec76e
......@@ -104,9 +104,16 @@ func (n Node) AsLink() (v cid.Cid, _ error) {
return
}
func (n Node) Keys() ([]string, int) {
return nil, 0 // FIXME
// TODO: REVIEW: structs have clear key order; maps do not. what do?
func (n Node) Keys() ipld.KeyIterator {
panic("NYI")
}
func (n Node) KeysImmediate() ([]string, error) {
panic("NYI")
}
func (n Node) Length() int {
panic("NYI")
}
func (n Node) TraverseField(pth string) (ipld.Node, error) {
......
......@@ -29,14 +29,15 @@ var (
type Node struct {
kind ipld.ReprKind
_map map[string]ipld.Node // Value union. Only one of these has meaning, depending on the value of 'Type'.
_arr []ipld.Node // Value union. Only one of these has meaning, depending on the value of 'Type'.
_bool bool // Value union. Only one of these has meaning, depending on the value of 'Type'.
_int int // Value union. Only one of these has meaning, depending on the value of 'Type'.
_float float64 // Value union. Only one of these has meaning, depending on the value of 'Type'.
_str string // Value union. Only one of these has meaning, depending on the value of 'Type'.
_bytes []byte // Value union. Only one of these has meaning, depending on the value of 'Type'.
_link cid.Cid // Value union. Only one of these has meaning, depending on the value of 'Type'.
_map map[string]ipld.Node // Value union. Only one of these has meaning, depending on the value of 'Type'.
_mapOrd []string // Conjugate to _map, only has meaning depending on the value of 'Type'.
_arr []ipld.Node // Value union. Only one of these has meaning, depending on the value of 'Type'.
_bool bool // Value union. Only one of these has meaning, depending on the value of 'Type'.
_int int // Value union. Only one of these has meaning, depending on the value of 'Type'.
_float float64 // Value union. Only one of these has meaning, depending on the value of 'Type'.
_str string // Value union. Only one of these has meaning, depending on the value of 'Type'.
_bytes []byte // Value union. Only one of these has meaning, depending on the value of 'Type'.
_link cid.Cid // Value union. Only one of these has meaning, depending on the value of 'Type'.
}
func (n *Node) Kind() ipld.ReprKind {
......@@ -65,9 +66,38 @@ func (n *Node) AsLink() (v cid.Cid, _ error) {
return n._link, expectTyp(ipld.ReprKind_Link, n.kind)
}
func (n *Node) Keys() ([]string, int) {
return nil, 0 // FIXME
// TODO need to maintain map order now, apparently, sigh
func (n *Node) Keys() ipld.KeyIterator {
return &KeyIterator{n, 0}
}
type KeyIterator struct {
node *Node
idx int
}
func (ki *KeyIterator) Next() (string, error) {
// TODO kind check and safer range handling.
v := ki.node._mapOrd[ki.idx]
ki.idx++
return v, nil
}
func (ki *KeyIterator) HasNext() bool {
return len(ki.node._mapOrd) > ki.idx
}
func (n *Node) KeysImmediate() ([]string, error) {
return n._mapOrd, expectTyp(ipld.ReprKind_Map, n.kind)
}
func (n *Node) Length() int {
switch n.Kind() {
case ipld.ReprKind_Map:
return len(n._mapOrd)
case ipld.ReprKind_List:
return len(n._arr)
default:
return -1
}
}
func (n *Node) TraverseField(pth string) (ipld.Node, error) {
......
......@@ -14,6 +14,9 @@ func (n *Node) SetNull() {
}
func (n *Node) SetField(k string, v ipld.Node) {
n.coerceType(ipld.ReprKind_Map)
if _, exists := n._map[k]; !exists {
n._mapOrd = append(n._mapOrd, k)
}
n._map[k] = v
}
func (n *Node) SetIndex(k int, v ipld.Node) {
......@@ -98,6 +101,7 @@ func (n *Node) coerceType(newKind ipld.ReprKind) {
return
default:
n._map = nil
n._mapOrd = nil
}
case ipld.ReprKind_List:
switch newKind {
......
......@@ -31,12 +31,24 @@ type Node interface {
// If idx is out of range, a nil node and an error will be returned.
TraverseIndex(idx int) (Node, error)
// Keys returns instructions for traversing the node.
// If the node kind is a map, the keys slice has content;
// if it's a list, the length int will be positive
// (and if it's a zero length list, there's not to traverse, right?);
// and if it's a primitive type the returned values are nil and zero.
Keys() ([]string, int)
// Keys returns an iterator which will yield keys for traversing the node.
// If the node kind is anything other than a map, the iterator will
// yield error values.
Keys() KeyIterator
// KeysImmediate returns a slice containing all keys for traversing the node.
// The semantics are otherwise identical to using the Keys() iterator.
//
// KeysImmediate is for convenience of usage; callers should prefer to use
// the iterator approach where possible, as it continues to behave well
// even when using collections of extremely large size (and even when
// the collection is split between multiple serial nodes, as with
// Advanced Layouts, etc).
KeysImmediate() ([]string, error)
// Length returns the length of a list, or the number of entries in a map,
// or -1 if the node is not of list nor map kind.
Length() int
// Undefined nodes are returned when traversing a struct field that is
// defined by a schema but unset in the data. (Undefined nodes are not
......@@ -57,6 +69,23 @@ type Node interface {
AsLink() (cid.Cid, error)
}
// KeyIterator is an interface for traversing nodes of kind map.
// Sequential calls to Next() will yield keys; HasNext() describes whether
// iteration should continue.
//
// Iteration order is defined to be stable.
//
// REVIEW: should Next return error?
// Other parts of the Node interface use that for kind mismatch rejection;
// so on those grounds, I'd say "no", because we know what the key kind is
// (but then Node.Keys should return error).
// In big nodes (composites using an AdvLayout), where do we return errors?
// Since we might be streaming, there are questions here.
type KeyIterator interface {
Next() (string, error)
HasNext() bool
}
type SerializableNode interface {
CID() cid.Cid
}
......@@ -82,11 +111,9 @@ type MutableNode interface {
SetLink(v cid.Cid)
}
// REVIEW: having a an immediate-mode Keys() method rather than iterator
// might actually be a bad idea. We're aiming to reuse this interface
// for *advanced layouts as well*, and those can be *large*.
//
// Similar goes for AsBytes().
// REVIEW: immediate-mode AsBytes() method (as opposed to e.g. returning
// an io.Reader instance) might be problematic, esp. if we introduce
// AdvancedLayouts which support large bytes natively.
//
// Probable solution is having both immediate and iterator return methods.
// Returning a reader for bytes when you know you want a slice already
......
......@@ -43,7 +43,7 @@ func validate(ts Universe, t Type, node ipld.Node, pth string) []error {
if node.Kind() != ipld.ReprKind_Map {
return []error{fmt.Errorf("Schema match failed: expected type %q (which is kind %v) at path %q, but found kind %v", t2.Name(), t.ReprKind(), pth, node.Kind())}
}
keys, _ := node.Keys()
keys, _ := node.KeysImmediate()
errs := []error(nil)
for _, k := range keys {
// FUTURE: if KeyType is an enum rather than string, do membership check.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment