Unverified Commit 4612357c authored by Eric Myhre's avatar Eric Myhre Committed by GitHub

Merge pull request #47 from ipld/path-clarifications

Path clarifications
parents 530ccd68 8093be74
......@@ -4,37 +4,109 @@ import (
"strings"
)
// Path is used in describing progress in a traversal;
// and can also be used as an instruction for a specific traverse.
// Path describes a series of steps across a tree or DAG of Node,
// where each segment in the path is a map key or list index
// (literaly, Path is a slice of PathSegment values).
// Path is used in describing progress in a traversal; and
// can also be used as an instruction for traversing from one Node to another.
// Path values will also often be encountered as part of error messages.
//
// (Note that Paths are useful as an instruction for traversing from
// *one* Node to *one* other Node; to do a walk from one Node and visit
// *several* Nodes based on some sort of pattern, look to IPLD Selectors,
// and the 'traversal/selector' package in this project.)
//
// Path values are always relative.
// Observe how 'traversal.Focus' requires both a Node and a Path argument --
// where to start, and where to go, respectively.
// Similarly, error values which include a Path will be speaking in reference
// to the "starting Node" in whatever context they arose from.
//
// The canonical form of a Path is as a list of PathSegment.
// Each PathSegment is a string; by convention, the string should be
// in UTF-8 encoding and use NFC normalization, but all operations
// will regard the string as its constituent eight-bit bytes.
//
// There are no illegal or magical characters in IPLD Paths
// (in particular, do not mistake them for UNIX system paths).
// IPLD Paths can only go down: that is, each segment must traverse one node.
// There is no ".." which means "go up";
// and there is no "." which means "stay here";
// and it is not valid to have an empty path segment.
// and there is no "." which means "stay here".
// IPLD Paths have no magic behavior around characters such as "~".
// IPLD Paths do not have a concept of "globs" nor behave specially
// for a path segment string of "*" (but you may wish to see 'Selectors'
// for globbing-like features that traverse over IPLD data).
//
// An empty string is a valid PathSegment.
// (This leads to some unfortunate complications when wishing to represent
// paths in a simple string format; however, consider that maps do exist
// in serialized data in the wild where an empty string is used as the key:
// it is important we be able to correctly describe and address this!)
//
// (Note: path strings as interpreted by UnixFS may certainly have concepts
// of ".." and "."! But UnixFS is built upon IPLD; IPLD has no idea of this.)
// A string containing "/" (or even being simply "/"!) is a valid PathSegment.
// (As with empty strings, this is unfortunate (in particular, because it
// very much doesn't match up well with expectations popularized by UNIX-like
// filesystems); but, as with empty strings, maps which contain such a key
// certainly exist, and it is important that we be able to regard them!)
//
// Paths are representable as strings. When represented as a string, each
// segment is separated by a "/" character.
// (It follows that path segments may not themselves contain a "/" character.)
// (Note: escaping may be specified and supported in the future; currently, it is not.)
// A string starting, ending, or otherwise containing the NUL (\x00) byte
// is also a valid PathSegment. This follows from the rule of "a string is
// regarded as its constituent eight-bit bytes": an all-zero byte is not exceptional.
// In golang, this doesn't pose particular difficulty, but note this would be
// of marked concern for languages which have "C-style nul-terminated strings".
//
// For an IPLD Path to be represented as a string, an encoding system
// including escaping is necessary. At present, there is not a single
// canonical specification for such an escaping; we expect to decide one
// in the future, but this is not yet settled and done.
// (This implementation has a 'String' method, but it contains caveats
// and may be ambiguous for some content. This may be fixed in the future.)
type Path struct {
segments []PathSegment
}
// ParsePath converts a string to an IPLD Path, parsing the string into a segmented Path.
// NewPath returns a Path composed of the given segments.
//
// Each segment of the path string should be separated by a "/" character.
// This constructor function does a defensive copy,
// in case your segments slice should mutate in the future.
// (Use NewPathNocopy if this is a performance concern,
// and you're sure you know what you're doing.)
func NewPath(segments []PathSegment) Path {
p := Path{make([]PathSegment, len(segments))}
copy(p.segments, segments)
return p
}
// NewPathNocopy is identical to NewPath but trusts that
// the segments slice you provide will not be mutated.
func NewPathNocopy(segments []PathSegment) Path {
return Path{segments}
}
// ParsePath converts a string to an IPLD Path, doing a basic parsing of the
// string using "/" as a delimiter to produce a segmented Path.
// This is a handy, but not a general-purpose nor spec-compliant (!),
// way to create a Path: it cannot represent all valid paths.
//
// Multiple subsequent "/" characters will be silently collapsed.
// E.g., `"foo///bar"` will be treated equivalently to `"foo/bar"`.
// Prefixed and suffixed extraneous "/" characters are also discarded.
// This makes this constructor incapable of handling some possible Path values
// (specifically: paths with empty segements cannot be created with this constructor).
//
// There is no escaping mechanism used by this function.
// This makes this constructor incapable of handling some possible Path values
// (specifically, a path segment containing "/" cannot be created, because it
// will always be intepreted as a segment separator).
//
// No "cleaning" of the path occurs. See the documentation of the Path struct;
// No other "cleaning" of the path occurs. See the documentation of the Path struct;
// in particular, note that ".." does not mean "go up", nor does "." mean "stay here" --
// correspondingly, there isn't anything to "clean".
// correspondingly, there isn't anything to "clean" in the same sense as
// 'filepath.Clean' from the standard library filesystem path packages would.
//
// If the provided string contains unprintable characters, or non-UTF-8
// or non-NFC-canonicalized bytes, no remark will be made about this,
// and those bytes will remain part of the PathSegments in the resulting Path.
func ParsePath(pth string) Path {
// FUTURE: we should probably have some escaping mechanism which makes
// it possible to encode a slash in a segment. Specification needed.
......@@ -49,6 +121,18 @@ func ParsePath(pth string) Path {
// String representation of a Path is simply the join of each segment with '/'.
// It does not include a leading nor trailing slash.
//
// This is a handy, but not a general-purpose nor spec-compliant (!),
// way to reduce a Path to a string.
// There is no escaping mechanism used by this function,
// and as a result, not all possible valid Path values (such as those with
// empty segments or with segments containing "/") can be encoded unambiguously.
// For Path values containing these problematic segments, ParsePath applied
// to the string returned from this function may return a nonequal Path value.
//
// No escaping for unprintable characters is provided.
// No guarantee that the resulting string is UTF-8 nor NFC canonicalized
// is provided unless all the constituent PathSegment had those properties.
func (p Path) String() string {
l := len(p.segments)
if l == 0 {
......
......@@ -6,6 +6,12 @@ import (
// PathSegment can describe either a key in a map, or an index in a list.
//
// Create a PathSegment via either ParsePathSegment, PathSegmentOfString,
// or PathSegmentOfInt; or, via one of the constructors of Path,
// which will implicitly create PathSegment internally.
// Using PathSegment's natural zero value directly is discouraged
// (it will act like ParsePathSegment("0"), which likely not what you'd expect).
//
// Path segments are "stringly typed" -- they may be interpreted as either strings or ints depending on context.
// A path segment of "123" will be used as a string when traversing a node of map kind;
// and it will be converted to an integer when traversing a node of list kind.
......@@ -15,10 +21,21 @@ import (
// Internally, PathSegment will store either a string or an integer,
// depending on how it was constructed,
// and will automatically convert to the other on request.
// (This means if two pieces of code communicate using PathSegment, one producing ints and the other expecting ints, they will work together efficiently.)
// (This means if two pieces of code communicate using PathSegment,
// one producing ints and the other expecting ints,
// then they will work together efficiently.)
// PathSegment in a Path produced by ParsePath generally have all strings internally,
// because there is distinction possible when parsing a Path string
// (and attempting to pre-parse all strings into ints "in case" would waste time in almost all cases).
// because there is no distinction possible when parsing a Path string
// (and attempting to pre-parse all strings into ints "just in case" would waste time in almost all cases).
//
// Be cautious of attempting to use PathSegment as a map key!
// Due to the implementation detail of internal storage, it's possible for
// PathSegment values which are "equal" per PathSegment.Equal's definition
// to still be unequal in the eyes of golang's native maps.
// You should probably use the string values of the PathSegment as map keys.
// (This has the additional bonus of hitting a special fastpath that the golang
// built-in maps have specifically for plain string keys.)
//
type PathSegment struct {
/*
A quick implementation note about the Go compiler and "union" semantics:
......@@ -43,9 +60,10 @@ type PathSegment struct {
we're using the first tactic.
(We also currently get away with having no extra discriminator bit
because empty string is not considered a valid segment,
because we use a signed int for indexes, and negative values aren't valid there,
and thus we can use it as a sentinel value.
This may change if the IPLD Path spec comes to other conclusions about this.)
(Fun note: Empty strings were originally used for this sentinel,
but it turns out empty strings are valid PathSegment themselves, so!))
*/
s string
......@@ -57,16 +75,16 @@ type PathSegment struct {
// (Note: there is currently no escaping specified for PathSegments,
// so this is currently functionally equivalent to PathSegmentOfString.)
func ParsePathSegment(s string) PathSegment {
return PathSegment{s: s}
return PathSegment{s: s, i: -1}
}
// PathSegmentOfString boxes a string into a PathSegement.
// PathSegmentOfString boxes a string into a PathSegment.
// It does not attempt to parse any escaping; use ParsePathSegment for that.
func PathSegmentOfString(s string) PathSegment {
return PathSegment{s: s}
return PathSegment{s: s, i: -1}
}
// PathSegmentOfString boxes an int into a PathSegement.
// PathSegmentOfString boxes an int into a PathSegment.
func PathSegmentOfInt(i int) PathSegment {
return PathSegment{i: i}
}
......@@ -75,7 +93,7 @@ func PathSegmentOfInt(i int) PathSegment {
// but this is considered an implementation detail that's non-semantic.
// If it returns false, it implicitly means "containsInt", as these are the only options.
func (ps PathSegment) containsString() bool {
return ps.s != ""
return ps.i < 0
}
// String returns the PathSegment as a string.
......@@ -102,9 +120,14 @@ func (ps PathSegment) Index() (int, error) {
}
// Equals checks if two PathSegment values are equal.
// This is equivalent to checking if their strings are equal --
// if one of the PathSegment values is backed by an int and the other is a string,
// they may still be "equal".
//
// Because PathSegment is "stringly typed", this comparison does not
// regard if one of the segments is stored as a string and one is stored as an int;
// if string values of two segments are equal, they are "equal" overall.
// In other words, `PathSegmentOfInt(2).Equals(PathSegmentOfString("2")) == true`!
// (You should still typically prefer this method over converting two segments
// to string and comparing those, because even though that may be functionally
// correct, this method will be faster if they're both ints internally.)
func (x PathSegment) Equals(o PathSegment) bool {
if !x.containsString() && !o.containsString() {
return x.i == o.i
......
......@@ -8,18 +8,28 @@ import (
func TestParsePath(t *testing.T) {
t.Run("parsing one segment", func(t *testing.T) {
Wish(t, ParsePath("0").segments, ShouldEqual, []PathSegment{{s: "0"}})
Wish(t, ParsePath("0").segments, ShouldEqual, []PathSegment{{s: "0", i: -1}})
})
t.Run("parsing three segments", func(t *testing.T) {
Wish(t, ParsePath("0/foo/2").segments, ShouldEqual, []PathSegment{{s: "0"}, {s: "foo"}, {s: "2"}})
})
t.Run("eliding empty segments", func(t *testing.T) {
Wish(t, ParsePath("0//2").segments, ShouldEqual, []PathSegment{{s: "0"}, {s: "2"}})
Wish(t, ParsePath("0/foo/2").segments, ShouldEqual, []PathSegment{{s: "0", i: -1}, {s: "foo", i: -1}, {s: "2", i: -1}})
})
t.Run("eliding leading slashes", func(t *testing.T) {
Wish(t, ParsePath("/0/2").segments, ShouldEqual, []PathSegment{{s: "0"}, {s: "2"}})
Wish(t, ParsePath("/0/2").segments, ShouldEqual, []PathSegment{{s: "0", i: -1}, {s: "2", i: -1}})
})
t.Run("eliding trailing", func(t *testing.T) {
Wish(t, ParsePath("0/2/").segments, ShouldEqual, []PathSegment{{s: "0"}, {s: "2"}})
Wish(t, ParsePath("0/2/").segments, ShouldEqual, []PathSegment{{s: "0", i: -1}, {s: "2", i: -1}})
})
t.Run("eliding empty segments", func(t *testing.T) { // NOTE: a spec for string encoding might cause this to change in the future!
Wish(t, ParsePath("0//2").segments, ShouldEqual, []PathSegment{{s: "0", i: -1}, {s: "2", i: -1}})
})
t.Run("escaping segments", func(t *testing.T) { // NOTE: a spec for string encoding might cause this to change in the future!
Wish(t, ParsePath(`0/\//2`).segments, ShouldEqual, []PathSegment{{s: "0", i: -1}, {s: `\`, i: -1}, {s: "2", i: -1}})
})
}
func TestPathSegmentZeroValue(t *testing.T) {
Wish(t, PathSegment{}.String(), ShouldEqual, "0")
i, err := PathSegment{}.Index()
Wish(t, err, ShouldEqual, nil)
Wish(t, i, ShouldEqual, 0)
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment