diff --git a/LICENSE-APACHE b/LICENSE-APACHE new file mode 100644 index 0000000000000000000000000000000000000000..fcd6df33ca47bab508f162576ebe1641f900afb8 --- /dev/null +++ b/LICENSE-APACHE @@ -0,0 +1,13 @@ +Copyright 2020 Protocol Labs + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/LICENSE-MIT b/LICENSE-MIT new file mode 100644 index 0000000000000000000000000000000000000000..9bddb3e81a9d27bb20c43b3fd7f7e88aff96cf4a --- /dev/null +++ b/LICENSE-MIT @@ -0,0 +1,7 @@ +Copyright 2020 Protocol Labs + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..762922d450ff48280278855b8d1b9de82608688d --- /dev/null +++ b/README.md @@ -0,0 +1,35 @@ +# go-dagpb + +**An implementation of the IPLD [DAG-PB](https://github.com/ipld/specs/blob/master/block-layer/codecs/dag-pb.md) spec for [go-ipld-prime](https://github.com/ipld/go-ipld-prime/)** + +Use `Decoder(ipld.NodeAssembler, io.Reader)` and `Encoder(ipld.Node, io.Writer)` directly, or import this package to have this codec registered into the go-ipld-prime CID link loader. + +Nodes encoded with this codec _must_ conform to the DAG-PB spec. Specifically, they should have the non-optional fields shown in the DAG-PB schema: + +```ipldsch +type PBNode struct { + Links [PBLink] + Data optional Bytes +} + +type PBLink struct { + Hash Link + Name optional String + Tsize optional Int +} +``` + +Use `dagpb.Type.PBNode` and friends directly for strictness guarantees. Basic `ipld.Node`s will need to have the appropraite fields (and no others) to successfully encode using this codec. + +## License & Copyright + +Copyright © 2020 Protocol Labs + +Licensed under either of + + * Apache 2.0, ([LICENSE-APACHE](LICENSE-APACHE) / http://www.apache.org/licenses/LICENSE-2.0) + * MIT ([LICENSE-MIT](LICENSE-MIT) / http://opensource.org/licenses/MIT) + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. diff --git a/basics_test.go b/basics_test.go index 8915c6a6e982e2991ea9c44666a8ed6e88da9552..4c03d5c6cdfbb5bc651d31fd6ffd4497d8445461 100644 --- a/basics_test.go +++ b/basics_test.go @@ -96,7 +96,7 @@ func runTest(t *testing.T, bytsHex string, expected pbNode) { roundTrip := func(t *testing.T, node ipld.Node) { var buf bytes.Buffer - if err := Marshal(node, &buf); err != nil { + if err := Encoder(node, &buf); err != nil { t.Fatal(err) } @@ -108,7 +108,7 @@ func runTest(t *testing.T, bytsHex string, expected pbNode) { t.Run("basicnode", func(t *testing.T) { nb := basicnode.Prototype__Map{}.NewBuilder() - err := Unmarshal(nb, bytes.NewReader(byts)) + err := Decoder(nb, bytes.NewReader(byts)) if err != nil { t.Fatal(err) } @@ -120,7 +120,7 @@ func runTest(t *testing.T, bytsHex string, expected pbNode) { t.Run("typed", func(t *testing.T) { nb := Type.PBNode.NewBuilder() - err := Unmarshal(nb, bytes.NewReader(byts)) + err := Decoder(nb, bytes.NewReader(byts)) if err != nil { t.Fatal(err) } @@ -180,7 +180,7 @@ func TestNodeWithTwoUnsortedLinks(t *testing.T) { }) var buf bytes.Buffer - if err := Marshal(node, &buf); err != nil { + if err := Encoder(node, &buf); err != nil { t.Fatal(err) } if hex.EncodeToString(buf.Bytes()) != encoded { @@ -188,6 +188,64 @@ func TestNodeWithTwoUnsortedLinks(t *testing.T) { } } +func TestNodeWithStableSortedLinks(t *testing.T) { + cids := []string{ + "QmUGhP2X8xo9dsj45vqx1H6i5WqPqLqmLQsHTTxd3ke8mp", + "QmP7SrR76KHK9A916RbHG1ufy2TzNABZgiE23PjZDMzZXy", + "QmQg1v4o9xdT3Q14wh4S7dxZkDjyZ9ssFzFzyep1YrVJBY", + "QmdP6fartWRrydZCUjHgrJ4XpxSE4SAoRsWJZ1zJ4MWiuf", + "QmNNjUStxtMC1WaSZYiDW6CmAUrvd5Q2e17qnxPgVdwrwW", + "QmWJwqZBJWerHsN1b7g4pRDYmzGNnaMYuD3KSbnpaxsB2h", + "QmRXPSdysBS3dbUXe6w8oXevZWHdPQWaR2d3fggNsjvieL", + "QmTUZAXfws6zrhEksnMqLxsbhXZBQs4FNiarjXSYQqVrjC", + "QmNNk7dTdh8UofwgqLNauq6N78DPc6LKK2yBs1MFdx7Mbg", + "QmW5mrJfyqh7B4ywSvraZgnWjS3q9CLiYURiJpCX3aro5i", + "QmTFHZL5CkgNz19MdPnSuyLAi6AVq9fFp81zmPpaL2amED", + } + + node := fluent.MustBuildMap(basicnode.Prototype__Map{}, 2, func(fma fluent.MapAssembler) { + fma.AssembleEntry("Data").AssignBytes([]byte("some data")) + fma.AssembleEntry("Links").CreateList(len(cids), func(fla fluent.ListAssembler) { + for _, cid := range cids { + fla.AssembleValue().CreateMap(3, func(fma fluent.MapAssembler) { + fma.AssembleEntry("Name").AssignString("") + fma.AssembleEntry("Tsize").AssignInt(262158) + fma.AssembleEntry("Hash").AssignLink(cidlink.Link{Cid: mkcid(t, cid)}) + }) + } + }) + }) + + var buf bytes.Buffer + if err := Encoder(node, &buf); err != nil { + t.Fatal(err) + } + nb := basicnode.Prototype__Map{}.NewBuilder() + err := Decoder(nb, bytes.NewReader(buf.Bytes())) + if err != nil { + t.Fatal(err) + } + reencNode := nb.Build() + links, _ := reencNode.LookupByString("Links") + if links.Length() != len(cids) { + t.Fatal("Incorrect number of links after round-trip") + } + iter := links.ListIterator() + for !iter.Done() { + ii, n, _ := iter.Next() + h, _ := n.LookupByString("Hash") + l, _ := h.AsLink() + cl, _ := l.(cidlink.Link) + if cids[ii] != cl.String() { + t.Fatal("CIDs did not retain position after round-trip") + } + } + + if hex.EncodeToString(buf.Bytes()) != "122a0a2212205822d187bd40b04cc8ae7437888ebf844efac1729e098c8816d585d0fcc42b5b1200188e8010122a0a2212200b79badee10dc3f7781a7a9d0f020cc0f710b328c4975c2dbc30a170cd188e2c1200188e8010122a0a22122022ad631c69ee983095b5b8acd029ff94aff1dc6c48837878589a92b90dfea3171200188e8010122a0a221220df7fd08c4784fe6938c640df473646e4f16c7d0c6567ab79ec6981767fc3f01a1200188e8010122a0a22122000888c815ad7d055377bdb7b7779fc9740e548cb5dac90c71b9af9f51a879c2d1200188e8010122a0a221220766db372d015c5c700f538336556370165c889334791487a5e48d6080f1c99ea1200188e8010122a0a2212202f533004ceed74279b32c58eb0e3d2a23bc27ba14ab07298406c42bab8d543211200188e8010122a0a2212204c50cfdefa0209766f885919ac8ffc258e9253c3001ac23814f875d414d394731200188e8010122a0a22122000894611dfa192853020cbbade1a9a0a3f359d26e0d38caf4d72b9b306ff5a0b1200188e8010122a0a221220730ddba83e3147bbe10780b97ff0718c74c36037b97b3b79b45c4511806545811200188e8010122a0a22122048ea9d5d423f678d83d559d2349be8325527290b070c90fc1acd968f0bf70a061200188e80100a09736f6d652064617461" { + t.Fatal("Encoded form did not match expected") + } +} + func TestNodeWithUnnamedLinks(t *testing.T) { dataByts, _ := hex.DecodeString("080218cbc1819201208080e015208080e015208080e015208080e015208080e015208080e01520cbc1c10f") expected := pbNode{data: dataByts} diff --git a/doc.go b/doc.go new file mode 100644 index 0000000000000000000000000000000000000000..68663b6f486d5e5de2fafefca21e3f6c93461af3 --- /dev/null +++ b/doc.go @@ -0,0 +1,27 @@ +/* +Package dagpb provides an implementation of the IPLD DAG-PB spec +(https://github.com/ipld/specs/blob/master/block-layer/codecs/dag-pb.md) for +go-ipld-prime (https://github.com/ipld/go-ipld-prime/). + +Use Decoder() and Encoder() directly, or import this package to have this codec +registered into the go-ipld-prime CID link loader. + +Nodes encoded with this codec _must_ conform to the DAG-PB spec. Specifically, +they should have the non-optional fields shown in the DAG-PB schema: + + type PBNode struct { + Links [PBLink] + Data optional Bytes + } + + type PBLink struct { + Hash Link + Name optional String + Tsize optional Int + } + +Use dagpb.Type.PBNode and friends directly for strictness guarantees. Basic +ipld.Node's will need to have the appropraite fields (and no others) to +successfully encode using this codec. +*/ +package dagpb diff --git a/marshal.go b/marshal.go index cb3c8cd4bc99a3853ee555398903619aca605512..755eb5b7aa52a5a41d04f62ea9ce590933b99169 100644 --- a/marshal.go +++ b/marshal.go @@ -19,6 +19,11 @@ type pbLink struct { hasTsize bool } +// Marshal provides an IPLD codec encode interface for DAG-CBOR data. Provide a +// conforming Node and a destination for bytes to marshal a DAG-CBOR IPLD Node. +// The Node must strictly conform to the DAG-CBOR schema +// (https://github.com/ipld/specs/blob/master/block-layer/codecs/dag-cbor.md). +// For safest use, build Nodes using the Type.PBNode type. func Marshal(inNode ipld.Node, out io.Writer) error { // Wrap in a typed node for some basic schema form checking builder := Type.PBNode.NewBuilder() @@ -42,7 +47,7 @@ func Marshal(inNode ipld.Node, out io.Writer) error { return err } - { // Hash + { // Hash (required) d, err := link.LookupByString("Hash") if err != nil { return err @@ -56,13 +61,14 @@ func Marshal(inNode ipld.Node, out io.Writer) error { } cl, ok := l.(cidlink.Link) if !ok { - // this _should_ be taken care of by the Typed conversion above "missing required fields: Hash" + // this _should_ be taken care of by the Typed conversion above with + // "missing required fields: Hash" return xerrors.Errorf("invalid DAG-PB form (link must have a Hash)") } pbLinks[ii].hash = cl.Cid } - { // Name + { // Name (optional) nameNode, err := link.LookupByString("Name") if err != nil { return err @@ -77,7 +83,7 @@ func Marshal(inNode ipld.Node, out io.Writer) error { } } - { // Tsize + { // Tsize (optional) tsizeNode, err := link.LookupByString("Tsize") if err != nil { return err @@ -97,11 +103,13 @@ func Marshal(inNode ipld.Node, out io.Writer) error { } } // for + // links must be strictly sorted by Name before encoding, leaving stable + // ordering where the names are the same (or absent) sortLinks(pbLinks) for _, link := range pbLinks { size := link.encodedSize() chunk := make([]byte, size+sizeOfVarint(uint64(size))+1) - chunk[0] = 0x12 + chunk[0] = 0x12 // field & wire type for Links offset := encodeVarint(chunk, 1, uint64(size)) wrote, err := link.marshal(chunk, offset) if err != nil { @@ -114,6 +122,7 @@ func Marshal(inNode ipld.Node, out io.Writer) error { } } // if links + // Data (optional) data, err := node.LookupByString("Data") if err != nil { return err @@ -125,7 +134,7 @@ func Marshal(inNode ipld.Node, out io.Writer) error { } size := uint64(len(byts)) lead := make([]byte, sizeOfVarint(size)+1) - lead[0] = 0xa + lead[0] = 0xa // field and wireType for Data encodeVarint(lead, 1, size) out.Write(lead) out.Write(byts) @@ -134,6 +143,7 @@ func Marshal(inNode ipld.Node, out io.Writer) error { return nil } +// predict the byte size of the encoded Link func (link pbLink) encodedSize() (n int) { l := link.hash.ByteLen() n += 1 + l + sizeOfVarint(uint64(l)) @@ -147,26 +157,33 @@ func (link pbLink) encodedSize() (n int) { return n } +// encode a Link to PB func (link pbLink) marshal(data []byte, offset int) (int, error) { base := offset - data[offset] = 0xa + data[offset] = 0xa // field and wireType for Hash byts := link.hash.Bytes() offset = encodeVarint(data, offset+1, uint64(len(byts))) copy(data[offset:], byts) offset += len(byts) if link.hasName { - data[offset] = 0x12 + data[offset] = 0x12 // field and wireType for Name offset = encodeVarint(data, offset+1, uint64(len(link.name))) copy(data[offset:], link.name) offset += len(link.name) } if link.hasTsize { - data[offset] = 0x18 + data[offset] = 0x18 // field and wireType for Tsize offset = encodeVarint(data, offset+1, uint64(link.tsize)) } return offset - base, nil } +// predict the size of a varint for PB before creating it +func sizeOfVarint(x uint64) (n int) { + return (math_bits.Len64(x|1) + 6) / 7 +} + +// encode a varint to a PB chunk func encodeVarint(data []byte, offset int, v uint64) int { for v >= 1<<7 { data[offset] = uint8(v&0x7f | 0x80) @@ -177,6 +194,7 @@ func encodeVarint(data []byte, offset int, v uint64) int { return offset + 1 } +// stable sorting of Links using the strict sorting rules func sortLinks(links []pbLink) { sort.Stable(pbLinkSlice(links)) } @@ -190,7 +208,3 @@ func (ls pbLinkSlice) Less(a, b int) bool { return pbLinkLess(ls[a], ls[b]) } func pbLinkLess(a pbLink, b pbLink) bool { return a.name < b.name } - -func sizeOfVarint(x uint64) (n int) { - return (math_bits.Len64(x|1) + 6) / 7 -} diff --git a/multicodec.go b/multicodec.go index 7472d56c0023e9daf445b1c2491d6c5288823f85..bacb18bdcd4d9041204fb4972f166e1a1104dbcd 100644 --- a/multicodec.go +++ b/multicodec.go @@ -17,11 +17,23 @@ func init() { cidlink.RegisterMulticodecEncoder(0x70, Encoder) } +// Decoder provides an IPLD codec decode interface for DAG-CBOR data. Provide a +// compatible NodeAssembler and a byte source to unmarshal a DAG-CBOR IPLD Node. +// Use the NodeAssembler from the PBNode type for safest construction +// (Type.PBNode.NewBuilder()). A Map assembler will also work. +// This function is registered via the go-ipld-prime link loader for multicodec +// code 0x70 when this package is invoked via init. func Decoder(na ipld.NodeAssembler, r io.Reader) error { return Unmarshal(na, r) } +// Encoder provides an IPLD codec encode interface for DAG-CBOR data. Provide a +// conforming Node and a destination for bytes to marshal a DAG-CBOR IPLD Node. +// The Node must strictly conform to the DAG-CBOR schema +// (https://github.com/ipld/specs/blob/master/block-layer/codecs/dag-cbor.md). +// For safest use, build Nodes using the Type.PBNode type. +// This function is registered via the go-ipld-prime link loader for multicodec +// code 0x70 when this package is invoked via init. func Encoder(n ipld.Node, w io.Writer) error { - // return Unmarshal(na, w) - return nil + return Marshal(n, w) } diff --git a/pb/marshal.go b/pb/marshal.go index 75149268ffff9bd664024b044e4e957c96dc3215..f28ed3633299e18d117beabd7e37d7af97177f7e 100644 --- a/pb/marshal.go +++ b/pb/marshal.go @@ -8,7 +8,6 @@ import ( math_bits "math/bits" ) -// Marshal TODO func Marshal(out io.Writer, tokenSource func() (Token, error)) error { writeLead := func(wire byte, size uint64) { lead := make([]byte, SizeOfVarint(size)+1) diff --git a/unmarshal.go b/unmarshal.go index 003ad5d40fa5b247d73156f908a0b6294a3f21f9..9ffc2f78b5749965bb147b647eadb23b4f4ce5ff 100644 --- a/unmarshal.go +++ b/unmarshal.go @@ -10,13 +10,20 @@ import ( "golang.org/x/xerrors" ) +// ErrIntOverflow is returned a varint overflows during decode, it indicates +// malformed data var ErrIntOverflow = xerrors.Errorf("protobuf: varint overflow") +// Unmarshal provides an IPLD codec decode interface for DAG-CBOR data. Provide +// a compatible NodeAssembler and a byte source to unmarshal a DAG-CBOR IPLD +// Node. Use the NodeAssembler from the PBNode type for safest construction +// (Type.PBNode.NewBuilder()). A Map assembler will also work. func Unmarshal(na ipld.NodeAssembler, in io.Reader) error { ma, err := na.BeginMap(2) if err != nil { return err } + // always make "Links", even if we don't use it if err = ma.AssembleKey().AssignString("Links"); err != nil { return err } @@ -50,6 +57,8 @@ func Unmarshal(na ipld.NodeAssembler, in io.Reader) error { if chunk, err = decodeBytes(reader); err != nil { return err } + // Data must come after Links, so it's safe to close this here even if we + // didn't use it if err := links.Finish(); err != nil { return err } @@ -192,6 +201,8 @@ func unmarshalLink(reader shared.SlickReader, length int, ma ipld.MapAssembler) return nil } +// decode the lead for a PB chunk, fieldNum & wireType, that tells us which +// field in the schema we're looking at and what data type it is func decodeKey(reader shared.SlickReader) (int, int, error) { var wire uint64 var err error @@ -203,6 +214,7 @@ func decodeKey(reader shared.SlickReader) (int, int, error) { return fieldNum, wireType, nil } +// decode a byte string from PB func decodeBytes(reader shared.SlickReader) ([]byte, error) { bytesLen, err := decodeVarint(reader) if err != nil { @@ -215,6 +227,7 @@ func decodeBytes(reader shared.SlickReader) ([]byte, error) { return byts, nil } +// decode a varint from PB func decodeVarint(reader shared.SlickReader) (uint64, error) { var v uint64 for shift := uint(0); ; shift += 7 {