diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000000000000000000000000000000000000..4b86d7197f9a6f1a988c503defc8451392cd5e55 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,8 @@ +blank_issues_enabled: false +contact_links: + - name: Getting Help on IPFS + url: https://ipfs.io/help + about: All information about how and where to get help on IPFS. + - name: IPFS Official Forum + url: https://discuss.ipfs.io + about: Please post general questions, support requests, and discussions here. diff --git a/.github/ISSUE_TEMPLATE/open_an_issue.md b/.github/ISSUE_TEMPLATE/open_an_issue.md new file mode 100644 index 0000000000000000000000000000000000000000..4fcbd00aca0d513c2729d8df4afb5a01fdbe7d02 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/open_an_issue.md @@ -0,0 +1,19 @@ +--- +name: Open an issue +about: Only for actionable issues relevant to this repository. +title: '' +labels: need/triage +assignees: '' + +--- + diff --git a/.github/config.yml b/.github/config.yml new file mode 100644 index 0000000000000000000000000000000000000000..ed26646a0f7cdda6cf10ede2b0b98cac89cf67b0 --- /dev/null +++ b/.github/config.yml @@ -0,0 +1,68 @@ +# Configuration for welcome - https://github.com/behaviorbot/welcome + +# Configuration for new-issue-welcome - https://github.com/behaviorbot/new-issue-welcome +# Comment to be posted to on first time issues +newIssueWelcomeComment: > + Thank you for submitting your first issue to this repository! A maintainer + will be here shortly to triage and review. + + In the meantime, please double-check that you have provided all the + necessary information to make this process easy! Any information that can + help save additional round trips is useful! We currently aim to give + initial feedback within **two business days**. If this does not happen, feel + free to leave a comment. + + Please keep an eye on how this issue will be labeled, as labels give an + overview of priorities, assignments and additional actions requested by the + maintainers: + + - "Priority" labels will show how urgent this is for the team. + - "Status" labels will show if this is ready to be worked on, blocked, or in progress. + - "Need" labels will indicate if additional input or analysis is required. + + Finally, remember to use https://discuss.ipfs.io if you just need general + support. + +# Configuration for new-pr-welcome - https://github.com/behaviorbot/new-pr-welcome +# Comment to be posted to on PRs from first time contributors in your repository +newPRWelcomeComment: > + Thank you for submitting this PR! + + A maintainer will be here shortly to review it. + + We are super grateful, but we are also overloaded! Help us by making sure + that: + + * The context for this PR is clear, with relevant discussion, decisions + and stakeholders linked/mentioned. + + * Your contribution itself is clear (code comments, self-review for the + rest) and in its best form. Follow the [code contribution + guidelines](https://github.com/ipfs/community/blob/master/CONTRIBUTING.md#code-contribution-guidelines) + if they apply. + + Getting other community members to do a review would be great help too on + complex PRs (you can ask in the chats/forums). If you are unsure about + something, just leave us a comment. + + Next steps: + + * A maintainer will triage and assign priority to this PR, commenting on + any missing things and potentially assigning a reviewer for high + priority items. + + * The PR gets reviews, discussed and approvals as needed. + + * The PR is merged by maintainers when it has been approved and comments addressed. + + We currently aim to provide initial feedback/triaging within **two business + days**. Please keep an eye on any labelling actions, as these will indicate + priorities and status of your contribution. + + We are very grateful for your contribution! + + +# Configuration for first-pr-merge - https://github.com/behaviorbot/first-pr-merge +# Comment to be posted to on pull requests merged by a first time user +# Currently disabled +#firstPRMergeComment: "" diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..aaea8ed0a7d1d73715fe9b480d2f48729e0870ce --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +cid-fuzz.zip diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000000000000000000000000000000000000..5163d693fc757ec0c13d134e9d4cabd6cc7c85d7 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,30 @@ +os: + - linux + +language: go + +go: + - 1.11.x + +env: + global: + - GOTFLAGS="-race" + matrix: + - BUILD_DEPTYPE=gomod + + +# disable travis install +install: + - true + +script: + - bash <(curl -s https://raw.githubusercontent.com/ipfs/ci-helpers/master/travis-ci/run-standard-tests.sh) + + +cache: + directories: + - $GOPATH/pkg/mod + - $HOME/.cache/go-build + +notifications: + email: false diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..0e323020a6a25dbbe9da90f747ad7141215482ea --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2016 Protocol Labs, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..554bed32589bc52380d25951818dcfeabc19c459 --- /dev/null +++ b/Makefile @@ -0,0 +1,5 @@ +all: deps + +deps: + go get github.com/mattn/goveralls + go get golang.org/x/tools/cmd/cover diff --git a/README.md b/README.md index 394fb4edef5ac5e7bdde50630a60bb3447c52a3a..94d5ff07ee9581a027343d8f52c7c84e2ad8218f 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,108 @@ -# go-cid +go-cid +================== -dms3 go-cid \ No newline at end of file +[![](https://img.shields.io/badge/made%20by-Protocol%20Labs-blue.svg?style=flat-square)](http://ipn.io) +[![](https://img.shields.io/badge/project-IPFS-blue.svg?style=flat-square)](http://ipfs.io/) +[![](https://img.shields.io/badge/freenode-%23ipfs-blue.svg?style=flat-square)](http://webchat.freenode.net/?channels=%23ipfs) +[![](https://img.shields.io/badge/readme%20style-standard-brightgreen.svg?style=flat-square)](https://github.com/RichardLitt/standard-readme) +[![GoDoc](https://godoc.org/github.com/ipfs/go-cid?status.svg)](https://godoc.org/github.com/ipfs/go-cid) +[![Coverage Status](https://coveralls.io/repos/github/ipfs/go-cid/badge.svg?branch=master)](https://coveralls.io/github/ipfs/go-cid?branch=master) +[![Travis CI](https://travis-ci.org/ipfs/go-cid.svg?branch=master)](https://travis-ci.org/ipfs/go-cid) + +> A package to handle content IDs in Go. + +This is an implementation in Go of the [CID spec](https://github.com/ipld/cid). +It is used in `go-ipfs` and related packages to refer to a typed hunk of data. + +## Lead Maintainer + +[Eric Myhre](https://github.com/warpfork) + +## Table of Contents + +- [Install](#install) +- [Usage](#usage) +- [API](#api) +- [Contribute](#contribute) +- [License](#license) + +## Install + +`go-cid` is a standard Go module which can be installed with: + +```sh +go get github.com/ipfs/go-cid +``` + +## Usage + +### Running tests + +Run tests with `go test` from the directory root + +```sh +go test +``` + +### Examples + +#### Parsing string input from users + +```go +// Create a cid from a marshaled string +c, err := cid.Decode("bafzbeigai3eoy2ccc7ybwjfz5r3rdxqrinwi4rwytly24tdbh6yk7zslrm") +if err != nil {...} + +fmt.Println("Got CID: ", c) +``` + +#### Creating a CID from scratch + +```go +// Create a cid manually by specifying the 'prefix' parameters +pref := cid.Prefix{ + Version: 1, + Codec: cid.Raw, + MhType: mh.SHA2_256, + MhLength: -1, // default length +} + +// And then feed it some data +c, err := pref.Sum([]byte("Hello World!")) +if err != nil {...} + +fmt.Println("Created CID: ", c) +``` + +#### Check if two CIDs match + +```go +// To test if two cid's are equivalent, be sure to use the 'Equals' method: +if c1.Equals(c2) { + fmt.Println("These two refer to the same exact data!") +} +``` + +#### Check if some data matches a given CID + +```go +// To check if some data matches a given cid, +// Get your CIDs prefix, and use that to sum the data in question: +other, err := c.Prefix().Sum(mydata) +if err != nil {...} + +if !c.Equals(other) { + fmt.Println("This data is different.") +} + +``` + +## Contribute + +PRs are welcome! + +Small note: If editing the Readme, please conform to the [standard-readme](https://github.com/RichardLitt/standard-readme) specification. + +## License + +MIT © Jeromy Johnson diff --git a/_rsrch/cidiface/README.md b/_rsrch/cidiface/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f45a3bf61d1ce9092ce3c810a480e45fee557598 --- /dev/null +++ b/_rsrch/cidiface/README.md @@ -0,0 +1,168 @@ +What golang Kinds work best to implement CIDs? +============================================== + +There are many possible ways to implement CIDs. This package explores them. + +### criteria + +There's a couple different criteria to consider: + +- We want the best performance when operating on the type (getters, mostly); +- We want to minimize the number of memory allocations we need; +- We want types which can be used as map keys, because this is common. + +The priority of these criteria is open to argument, but it's probably +mapkeys > minalloc > anythingelse. +(Mapkeys and minalloc are also quite entangled, since if we don't pick a +representation that can work natively as a map key, we'll end up needing +a `KeyRepr()` method which gives us something that does work as a map key, +an that will almost certainly involve a malloc itself.) + +### options + +There are quite a few different ways to go: + +- Option A: CIDs as a struct; multihash as bytes. +- Option B: CIDs as a string. +- Option C: CIDs as an interface with multiple implementors. +- Option D: CIDs as a struct; multihash also as a struct or string. +- Option E: CIDs as a struct; content as strings plus offsets. +- Option F: CIDs as a struct wrapping only a string. + +The current approach on the master branch is Option A. + +Option D is distinctive from Option A because multihash as bytes transitively +causes the CID struct to be non-comparible and thus not suitable for map keys +as per https://golang.org/ref/spec#KeyType . (It's also a bit more work to +pursue Option D because it's just a bigger splash radius of change; but also, +something we might also want to do soon, because we *do* also have these same +map-key-usability concerns with multihash alone.) + +Option E is distinctive from Option D because Option E would always maintain +the binary format of the cid internally, and so could yield it again without +malloc, while still potentially having faster access to components than +Option B since it wouldn't need to re-parse varints to access later fields. + +Option F is actually a varation of Option B; it's distinctive from the other +struct options because it is proposing *literally* `struct{ x string }` as +the type, with no additional fields for components nor offsets. + +Option C is the avoid-choices choice, but note that interfaces are not free; +since "minimize mallocs" is one of our major goals, we cannot use interfaces +whimsically. + +Note there is no proposal for migrating to `type Cid []bytes`, because that +is generally considered to be strictly inferior to `type Cid string`. + + +Discoveries +----------- + +### using interfaces as map keys forgoes a lot of safety checks + +Using interfaces as map keys pushes a bunch of type checking to runtime. +E.g., it's totally valid at compile time to push a type which is non-comparable +into a map key; it will panic at *runtime* instead of failing at compile-time. + +There's also no way to define equality checks between implementors of the +interface: golang will always use its innate concept of comparison for the +concrete types. This means its effectively *never safe* to use two different +concrete implementations of an interface in the same map; you may add elements +which are semantically "equal" in your mind, and end up very confused later +when both impls of the same "equal" object have been stored. + +### sentinel values are possible in any impl, but some are clearer than others + +When using `*Cid`, the nil value is a clear sentinel for 'invalid'; +when using `type Cid string`, the zero value is a clear sentinel; +when using `type Cid struct` per Option A or D... the only valid check is +for a nil multihash field, since version=0 and codec=0 are both valid values. +When using `type Cid struct{string}` per Option F, zero is a clear sentinel. + +### usability as a map key is important + +We already covered this in the criteria section, but for clarity: + +- Option A: ❌ +- Option B: ✔ +- Option C: ~ (caveats, and depends on concrete impl) +- Option D: ✔ +- Option E: ✔ +- Option F: ✔ + +### living without offsets requires parsing + +Since CID (and multihash!) are defined using varints, they require parsing; +we can't just jump into the string at a known offset in order to yield e.g. +the multicodec number. + +In order to get to the 'meat' of the CID (the multihash content), we first +must parse: + +- the CID version varint; +- the multicodec varint; +- the multihash type enum varint; +- and the multihash length varint. + +Since there are many applications where we want to jump straight to the +multihash content (for example, when doing CAS sharding -- see the +[disclaimer](https://github.com/multiformats/multihash#disclaimers) about +bias in leading bytes), this overhead may be interesting. + +How much this overhead is significant is hard to say from microbenchmarking; +it depends largely on usage patterns. If these traversals are a significant +timesink, it would be an argument for Option D/E. +If these traversals are *not* a significant timesink, we might be wiser +to keep to Option B/F, because keeping a struct full of offsets will add several +words of memory usage per CID, and we keep a *lot* of CIDs. + +### interfaces cause boxing which is a significant performance cost + +See `BenchmarkCidMap_CidStr` and friends. + +Long story short: using interfaces *anywhere* will cause the compiler to +implicitly generate boxing and unboxing code (e.g. `runtime.convT2E`); +this is both another function call, and more concerningly, results in +large numbers of unbatchable memory allocations. + +Numbers without context are dangerous, but if you need one: 33%. +It's a big deal. + +This means attempts to "use interfaces, but switch to concrete impls when +performance is important" are a red herring: it doesn't work that way. + +This is not a general inditement against using interfaces -- but +if a situation is at the scale where it's become important to mind whether +or not pointers are a performance impact, then that situation also +is one where you have to think twice before using interfaces. + +### struct wrappers can be used in place of typedefs with zero overhead + +See `TestSizeOf`. + +Using the `unsafe.Sizeof` feature to inspect what the Go runtime thinks, +we can see that `type Foo string` and `type Foo struct{x string}` consume +precisely the same amount of memory. + +This is interesting because it means we can choose between either +type definition with no significant overhead anywhere we use it: +thus, we can choose freely between Option B and Option F based on which +we feel is more pleasant to work with. + +Option F (a struct wrapper) means we can prevent casting into our Cid type. +Option B (typedef string) can be declared a `const`. +Are there any other concerns that would separate the two choices? + +### one way or another: let's get rid of that star + +We should switch completely to handling `Cid` and remove `*Cid` completely. +Regardless of whether we do this by migrating to interface, or string +implementations, or simply structs with no pointers... once we get there, +refactoring to any of the *others* can become a no-op from the perspective +of any downstream code that uses CIDs. + +(This means all access via functions, never references to fields -- even if +we were to use a struct implementation. *Pretend* there's a interface, +in other words.) + +There are probably `gofix` incantations which can help us with this migration. diff --git a/_rsrch/cidiface/cid.go b/_rsrch/cidiface/cid.go new file mode 100644 index 0000000000000000000000000000000000000000..cb4b87191899cb0be5a01676acce0c5555eaf9bb --- /dev/null +++ b/_rsrch/cidiface/cid.go @@ -0,0 +1,48 @@ +package cid + +import ( + mh "github.com/multiformats/go-multihash" +) + +// Cid represents a self-describing content adressed identifier. +// +// A CID is composed of: +// +// - a Version of the CID itself, +// - a Multicodec (indicates the encoding of the referenced content), +// - and a Multihash (which identifies the referenced content). +// +// (Note that the Multihash further contains its own version and hash type +// indicators.) +type Cid interface { + // n.b. 'yields' means "without copy", 'produces' means a malloc. + + Version() uint64 // Yields the version prefix as a uint. + Multicodec() uint64 // Yields the multicodec as a uint. + Multihash() mh.Multihash // Yields the multihash segment. + + String() string // Produces the CID formatted as b58 string. + Bytes() []byte // Produces the CID formatted as raw binary. + + Prefix() Prefix // Produces a tuple of non-content metadata. + + // some change notes: + // - `KeyString() CidString` is gone because we're natively a map key now, you're welcome. + // - `StringOfBase(mbase.Encoding) (string, error)` is skipped, maybe it can come back but maybe it should be a formatter's job. + // - `Equals(o Cid) bool` is gone because it's now `==`, you're welcome. + + // TODO: make a multi-return method for {v,mc,mh} decomposition. CidStr will be able to implement this more efficiently than if one makes a series of the individual getter calls. +} + +// Prefix represents all the metadata of a Cid, +// that is, the Version, the Codec, the Multihash type +// and the Multihash length. It does not contains +// any actual content information. +// NOTE: The use -1 in MhLength to mean default length is deprecated, +// use the V0Builder or V1Builder structures instead +type Prefix struct { + Version uint64 + Codec uint64 + MhType uint64 + MhLength int +} diff --git a/_rsrch/cidiface/cidBoxingBench_test.go b/_rsrch/cidiface/cidBoxingBench_test.go new file mode 100644 index 0000000000000000000000000000000000000000..920ce1f9f50c8ab7131fc0e75cdc85806bcdc91d --- /dev/null +++ b/_rsrch/cidiface/cidBoxingBench_test.go @@ -0,0 +1,71 @@ +package cid + +import ( + "testing" +) + +// BenchmarkCidMap_CidStr estimates how fast it is to insert primitives into a map +// keyed by CidStr (concretely). +// +// We do 100 insertions per benchmark run to make sure the map initialization +// doesn't dominate the results. +// +// Sample results on linux amd64 go1.11beta: +// +// BenchmarkCidMap_CidStr-8 100000 16317 ns/op +// BenchmarkCidMap_CidIface-8 100000 20516 ns/op +// +// With benchmem on: +// +// BenchmarkCidMap_CidStr-8 100000 15579 ns/op 11223 B/op 207 allocs/op +// BenchmarkCidMap_CidIface-8 100000 19500 ns/op 12824 B/op 307 allocs/op +// BenchmarkCidMap_StrPlusHax-8 200000 10451 ns/op 7589 B/op 202 allocs/op +// +// We can see here that the impact of interface boxing is significant: +// it increases the time taken to do the inserts to 133%, largely because +// the implied `runtime.convT2E` calls cause another malloc each. +// +// There are also significant allocations in both cases because +// A) we cannot create a multihash without allocations since they are []byte; +// B) the map has to be grown several times; +// C) something I haven't quite put my finger on yet. +// Ideally we'd drive those down further as well. +// +// Pre-allocating the map reduces allocs by a very small percentage by *count*, +// but reduces the time taken by 66% overall (presumably because when a map +// re-arranges itself, it involves more or less an O(n) copy of the content +// in addition to the alloc itself). This isn't topical to the question of +// whether or not interfaces are a good idea; just for contextualizing. +// +func BenchmarkCidMap_CidStr(b *testing.B) { + for i := 0; i < b.N; i++ { + mp := map[CidStr]int{} + for x := 0; x < 100; x++ { + mp[NewCidStr(0, uint64(x), []byte{})] = x + } + } +} + +// BenchmarkCidMap_CidIface is in the family of BenchmarkCidMap_CidStr: +// it is identical except the map key type is declared as an interface +// (which forces all insertions to be boxed, changing performance). +func BenchmarkCidMap_CidIface(b *testing.B) { + for i := 0; i < b.N; i++ { + mp := map[Cid]int{} + for x := 0; x < 100; x++ { + mp[NewCidStr(0, uint64(x), []byte{})] = x + } + } +} + +// BenchmarkCidMap_CidStrAvoidMapGrowth is in the family of BenchmarkCidMap_CidStr: +// it is identical except the map is created with a size hint that removes +// some allocations (5, in practice, apparently). +func BenchmarkCidMap_CidStrAvoidMapGrowth(b *testing.B) { + for i := 0; i < b.N; i++ { + mp := make(map[CidStr]int, 100) + for x := 0; x < 100; x++ { + mp[NewCidStr(0, uint64(x), []byte{})] = x + } + } +} diff --git a/_rsrch/cidiface/cidString.go b/_rsrch/cidiface/cidString.go new file mode 100644 index 0000000000000000000000000000000000000000..129fafdfa992c94b84dd477de9af0697a9f1c860 --- /dev/null +++ b/_rsrch/cidiface/cidString.go @@ -0,0 +1,161 @@ +package cid + +import ( + "encoding/binary" + "fmt" + + mbase "github.com/multiformats/go-multibase" + mh "github.com/multiformats/go-multihash" +) + +//================= +// def & accessors +//================= + +var _ Cid = CidStr("") +var _ map[CidStr]struct{} = nil + +// CidStr is a representation of a Cid as a string type containing binary. +// +// Using golang's string type is preferable over byte slices even for binary +// data because golang strings are immutable, usable as map keys, +// trivially comparable with built-in equals operators, etc. +// +// Please do not cast strings or bytes into the CidStr type directly; +// use a parse method which validates the data and yields a CidStr. +type CidStr string + +// EmptyCidStr is a constant for a zero/uninitialized/sentinelvalue cid; +// it is declared mainly for readability in checks for sentinel values. +const EmptyCidStr = CidStr("") + +func (c CidStr) Version() uint64 { + bytes := []byte(c) + v, _ := binary.Uvarint(bytes) + return v +} + +func (c CidStr) Multicodec() uint64 { + bytes := []byte(c) + _, n := binary.Uvarint(bytes) // skip version length + codec, _ := binary.Uvarint(bytes[n:]) + return codec +} + +func (c CidStr) Multihash() mh.Multihash { + bytes := []byte(c) + _, n1 := binary.Uvarint(bytes) // skip version length + _, n2 := binary.Uvarint(bytes[n1:]) // skip codec length + return mh.Multihash(bytes[n1+n2:]) // return slice of remainder +} + +// String returns the default string representation of a Cid. +// Currently, Base58 is used as the encoding for the multibase string. +func (c CidStr) String() string { + switch c.Version() { + case 0: + return c.Multihash().B58String() + case 1: + mbstr, err := mbase.Encode(mbase.Base58BTC, []byte(c)) + if err != nil { + panic("should not error with hardcoded mbase: " + err.Error()) + } + return mbstr + default: + panic("not possible to reach this point") + } +} + +// Bytes produces a raw binary format of the CID. +// +// (For CidStr, this method is only distinct from casting because of +// compatibility with v0 CIDs.) +func (c CidStr) Bytes() []byte { + switch c.Version() { + case 0: + return c.Multihash() + case 1: + return []byte(c) + default: + panic("not possible to reach this point") + } +} + +// Prefix builds and returns a Prefix out of a Cid. +func (c CidStr) Prefix() Prefix { + dec, _ := mh.Decode(c.Multihash()) // assuming we got a valid multiaddr, this will not error + return Prefix{ + MhType: dec.Code, + MhLength: dec.Length, + Version: c.Version(), + Codec: c.Multicodec(), + } +} + +//================================== +// parsers & validators & factories +//================================== + +func NewCidStr(version uint64, codecType uint64, mhash mh.Multihash) CidStr { + hashlen := len(mhash) + // two 8 bytes (max) numbers plus hash + buf := make([]byte, 2*binary.MaxVarintLen64+hashlen) + n := binary.PutUvarint(buf, version) + n += binary.PutUvarint(buf[n:], codecType) + cn := copy(buf[n:], mhash) + if cn != hashlen { + panic("copy hash length is inconsistent") + } + return CidStr(buf[:n+hashlen]) +} + +// CidStrParse takes a binary byte slice, parses it, and returns either +// a valid CidStr, or the zero CidStr and an error. +// +// For CidV1, the data buffer is in the form: +// +// +// +// CidV0 are also supported. In particular, data buffers starting +// with length 34 bytes, which starts with bytes [18,32...] are considered +// binary multihashes. +// +// The multicodec bytes are not parsed to verify they're a valid varint; +// no further reification is performed. +// +// Multibase encoding should already have been unwrapped before parsing; +// if you have a multibase-enveloped string, use CidStrDecode instead. +// +// CidStrParse is the inverse of Cid.Bytes(). +func CidStrParse(data []byte) (CidStr, error) { + if len(data) == 34 && data[0] == 18 && data[1] == 32 { + h, err := mh.Cast(data) + if err != nil { + return EmptyCidStr, err + } + return NewCidStr(0, DagProtobuf, h), nil + } + + vers, n := binary.Uvarint(data) + if err := uvError(n); err != nil { + return EmptyCidStr, err + } + + if vers != 0 && vers != 1 { + return EmptyCidStr, fmt.Errorf("invalid cid version number: %d", vers) + } + + _, cn := binary.Uvarint(data[n:]) + if err := uvError(cn); err != nil { + return EmptyCidStr, err + } + + rest := data[n+cn:] + h, err := mh.Cast(rest) + if err != nil { + return EmptyCidStr, err + } + + // REVIEW: if the data is longer than the mh.len expects, we silently ignore it? should we? + return CidStr(data[0 : n+cn+len(h)]), nil +} diff --git a/_rsrch/cidiface/cidStruct.go b/_rsrch/cidiface/cidStruct.go new file mode 100644 index 0000000000000000000000000000000000000000..2fd5fa7ff0e7c00f5208d7511c073a4797cdeaa7 --- /dev/null +++ b/_rsrch/cidiface/cidStruct.go @@ -0,0 +1,164 @@ +package cid + +import ( + "encoding/binary" + "fmt" + + mbase "github.com/multiformats/go-multibase" + mh "github.com/multiformats/go-multihash" +) + +//================= +// def & accessors +//================= + +var _ Cid = CidStruct{} + +//var _ map[CidStruct]struct{} = nil // Will not compile! See struct def docs. +//var _ map[Cid]struct{} = map[Cid]struct{}{CidStruct{}: struct{}{}} // Legal to compile... +// but you'll get panics: "runtime error: hash of unhashable type cid.CidStruct" + +// CidStruct represents a CID in a struct format. +// +// This format complies with the exact same Cid interface as the CidStr +// implementation, but completely pre-parses the Cid metadata. +// CidStruct is a tad quicker in case of repeatedly accessed fields, +// but requires more reshuffling to parse and to serialize. +// CidStruct is not usable as a map key, because it contains a Multihash +// reference, which is a slice, and thus not "comparable" as a primitive. +// +// Beware of zero-valued CidStruct: it is difficult to distinguish an +// incorrectly-initialized "invalid" CidStruct from one representing a v0 cid. +type CidStruct struct { + version uint64 + codec uint64 + hash mh.Multihash +} + +// EmptyCidStruct is a constant for a zero/uninitialized/sentinelvalue cid; +// it is declared mainly for readability in checks for sentinel values. +// +// Note: it's not actually a const; the compiler does not allow const structs. +var EmptyCidStruct = CidStruct{} + +func (c CidStruct) Version() uint64 { + return c.version +} + +func (c CidStruct) Multicodec() uint64 { + return c.codec +} + +func (c CidStruct) Multihash() mh.Multihash { + return c.hash +} + +// String returns the default string representation of a Cid. +// Currently, Base58 is used as the encoding for the multibase string. +func (c CidStruct) String() string { + switch c.Version() { + case 0: + return c.Multihash().B58String() + case 1: + mbstr, err := mbase.Encode(mbase.Base58BTC, c.Bytes()) + if err != nil { + panic("should not error with hardcoded mbase: " + err.Error()) + } + return mbstr + default: + panic("not possible to reach this point") + } +} + +// Bytes produces a raw binary format of the CID. +func (c CidStruct) Bytes() []byte { + switch c.version { + case 0: + return []byte(c.hash) + case 1: + // two 8 bytes (max) numbers plus hash + buf := make([]byte, 2*binary.MaxVarintLen64+len(c.hash)) + n := binary.PutUvarint(buf, c.version) + n += binary.PutUvarint(buf[n:], c.codec) + cn := copy(buf[n:], c.hash) + if cn != len(c.hash) { + panic("copy hash length is inconsistent") + } + return buf[:n+len(c.hash)] + default: + panic("not possible to reach this point") + } +} + +// Prefix builds and returns a Prefix out of a Cid. +func (c CidStruct) Prefix() Prefix { + dec, _ := mh.Decode(c.hash) // assuming we got a valid multiaddr, this will not error + return Prefix{ + MhType: dec.Code, + MhLength: dec.Length, + Version: c.version, + Codec: c.codec, + } +} + +//================================== +// parsers & validators & factories +//================================== + +// CidStructParse takes a binary byte slice, parses it, and returns either +// a valid CidStruct, or the zero CidStruct and an error. +// +// For CidV1, the data buffer is in the form: +// +// +// +// CidV0 are also supported. In particular, data buffers starting +// with length 34 bytes, which starts with bytes [18,32...] are considered +// binary multihashes. +// +// The multicodec bytes are not parsed to verify they're a valid varint; +// no further reification is performed. +// +// Multibase encoding should already have been unwrapped before parsing; +// if you have a multibase-enveloped string, use CidStructDecode instead. +// +// CidStructParse is the inverse of Cid.Bytes(). +func CidStructParse(data []byte) (CidStruct, error) { + if len(data) == 34 && data[0] == 18 && data[1] == 32 { + h, err := mh.Cast(data) + if err != nil { + return EmptyCidStruct, err + } + return CidStruct{ + codec: DagProtobuf, + version: 0, + hash: h, + }, nil + } + + vers, n := binary.Uvarint(data) + if err := uvError(n); err != nil { + return EmptyCidStruct, err + } + + if vers != 0 && vers != 1 { + return EmptyCidStruct, fmt.Errorf("invalid cid version number: %d", vers) + } + + codec, cn := binary.Uvarint(data[n:]) + if err := uvError(cn); err != nil { + return EmptyCidStruct, err + } + + rest := data[n+cn:] + h, err := mh.Cast(rest) + if err != nil { + return EmptyCidStruct, err + } + + return CidStruct{ + version: vers, + codec: codec, + hash: h, + }, nil +} diff --git a/_rsrch/cidiface/enums.go b/_rsrch/cidiface/enums.go new file mode 100644 index 0000000000000000000000000000000000000000..6a1ef9b0b7ae88396cd1fef53db30005b961d152 --- /dev/null +++ b/_rsrch/cidiface/enums.go @@ -0,0 +1,79 @@ +package cid + +// These are multicodec-packed content types. The should match +// the codes described in the authoritative document: +// https://github.com/multiformats/multicodec/blob/master/table.csv +const ( + Raw = 0x55 + + DagProtobuf = 0x70 + DagCBOR = 0x71 + Libp2pKey = 0x72 + + GitRaw = 0x78 + + EthBlock = 0x90 + EthBlockList = 0x91 + EthTxTrie = 0x92 + EthTx = 0x93 + EthTxReceiptTrie = 0x94 + EthTxReceipt = 0x95 + EthStateTrie = 0x96 + EthAccountSnapshot = 0x97 + EthStorageTrie = 0x98 + BitcoinBlock = 0xb0 + BitcoinTx = 0xb1 + ZcashBlock = 0xc0 + ZcashTx = 0xc1 + DecredBlock = 0xe0 + DecredTx = 0xe1 +) + +// Codecs maps the name of a codec to its type +var Codecs = map[string]uint64{ + "v0": DagProtobuf, + "raw": Raw, + "protobuf": DagProtobuf, + "cbor": DagCBOR, + "libp2p-key": Libp2pKey, + "git-raw": GitRaw, + "eth-block": EthBlock, + "eth-block-list": EthBlockList, + "eth-tx-trie": EthTxTrie, + "eth-tx": EthTx, + "eth-tx-receipt-trie": EthTxReceiptTrie, + "eth-tx-receipt": EthTxReceipt, + "eth-state-trie": EthStateTrie, + "eth-account-snapshot": EthAccountSnapshot, + "eth-storage-trie": EthStorageTrie, + "bitcoin-block": BitcoinBlock, + "bitcoin-tx": BitcoinTx, + "zcash-block": ZcashBlock, + "zcash-tx": ZcashTx, + "decred-block": DecredBlock, + "decred-tx": DecredTx, +} + +// CodecToStr maps the numeric codec to its name +var CodecToStr = map[uint64]string{ + Raw: "raw", + DagProtobuf: "protobuf", + DagCBOR: "cbor", + Libp2pKey: "libp2p-key", + GitRaw: "git-raw", + EthBlock: "eth-block", + EthBlockList: "eth-block-list", + EthTxTrie: "eth-tx-trie", + EthTx: "eth-tx", + EthTxReceiptTrie: "eth-tx-receipt-trie", + EthTxReceipt: "eth-tx-receipt", + EthStateTrie: "eth-state-trie", + EthAccountSnapshot: "eth-account-snapshot", + EthStorageTrie: "eth-storage-trie", + BitcoinBlock: "bitcoin-block", + BitcoinTx: "bitcoin-tx", + ZcashBlock: "zcash-block", + ZcashTx: "zcash-tx", + DecredBlock: "decred-block", + DecredTx: "decred-tx", +} diff --git a/_rsrch/cidiface/errors.go b/_rsrch/cidiface/errors.go new file mode 100644 index 0000000000000000000000000000000000000000..588c62e088b061462e766e44dcbd746fe8df38d0 --- /dev/null +++ b/_rsrch/cidiface/errors.go @@ -0,0 +1,24 @@ +package cid + +import ( + "errors" +) + +var ( + // ErrVarintBuffSmall means that a buffer passed to the cid parser was not + // long enough, or did not contain an invalid cid + ErrVarintBuffSmall = errors.New("reading varint: buffer too small") + + // ErrVarintTooBig means that the varint in the given cid was above the + // limit of 2^64 + ErrVarintTooBig = errors.New("reading varint: varint bigger than 64bits" + + " and not supported") + + // ErrCidTooShort means that the cid passed to decode was not long + // enough to be a valid Cid + ErrCidTooShort = errors.New("cid too short") + + // ErrInvalidEncoding means that selected encoding is not supported + // by this Cid version + ErrInvalidEncoding = errors.New("invalid base encoding") +) diff --git a/_rsrch/cidiface/misc.go b/_rsrch/cidiface/misc.go new file mode 100644 index 0000000000000000000000000000000000000000..9a4486ad80697968883bd06f352379f2193023a4 --- /dev/null +++ b/_rsrch/cidiface/misc.go @@ -0,0 +1,12 @@ +package cid + +func uvError(read int) error { + switch { + case read == 0: + return ErrVarintBuffSmall + case read < 0: + return ErrVarintTooBig + default: + return nil + } +} diff --git a/builder.go b/builder.go new file mode 100644 index 0000000000000000000000000000000000000000..3d2fc77cbd3d63a46aa27319f5032e6be440ba7b --- /dev/null +++ b/builder.go @@ -0,0 +1,74 @@ +package cid + +import ( + mh "github.com/multiformats/go-multihash" +) + +type Builder interface { + Sum(data []byte) (Cid, error) + GetCodec() uint64 + WithCodec(uint64) Builder +} + +type V0Builder struct{} + +type V1Builder struct { + Codec uint64 + MhType uint64 + MhLength int // MhLength <= 0 means the default length +} + +func (p Prefix) GetCodec() uint64 { + return p.Codec +} + +func (p Prefix) WithCodec(c uint64) Builder { + if c == p.Codec { + return p + } + p.Codec = c + if c != DagProtobuf { + p.Version = 1 + } + return p +} + +func (p V0Builder) Sum(data []byte) (Cid, error) { + hash, err := mh.Sum(data, mh.SHA2_256, -1) + if err != nil { + return Undef, err + } + return Cid{string(hash)}, nil +} + +func (p V0Builder) GetCodec() uint64 { + return DagProtobuf +} + +func (p V0Builder) WithCodec(c uint64) Builder { + if c == DagProtobuf { + return p + } + return V1Builder{Codec: c, MhType: mh.SHA2_256} +} + +func (p V1Builder) Sum(data []byte) (Cid, error) { + mhLen := p.MhLength + if mhLen <= 0 { + mhLen = -1 + } + hash, err := mh.Sum(data, p.MhType, mhLen) + if err != nil { + return Undef, err + } + return NewCidV1(p.Codec, hash), nil +} + +func (p V1Builder) GetCodec() uint64 { + return p.Codec +} + +func (p V1Builder) WithCodec(c uint64) Builder { + p.Codec = c + return p +} diff --git a/builder_test.go b/builder_test.go new file mode 100644 index 0000000000000000000000000000000000000000..f250ea3c6f369e47dc23eed813ac65964c17f14a --- /dev/null +++ b/builder_test.go @@ -0,0 +1,112 @@ +package cid + +import ( + "testing" + + mh "github.com/multiformats/go-multihash" +) + +func TestV0Builder(t *testing.T) { + data := []byte("this is some test content") + + // Construct c1 + format := V0Builder{} + c1, err := format.Sum(data) + if err != nil { + t.Fatal(err) + } + + // Construct c2 + hash, err := mh.Sum(data, mh.SHA2_256, -1) + if err != nil { + t.Fatal(err) + } + c2 := NewCidV0(hash) + + if !c1.Equals(c2) { + t.Fatal("cids mismatch") + } + if c1.Prefix() != c2.Prefix() { + t.Fatal("prefixes mismatch") + } +} + +func TestV1Builder(t *testing.T) { + data := []byte("this is some test content") + + // Construct c1 + format := V1Builder{Codec: DagCBOR, MhType: mh.SHA2_256} + c1, err := format.Sum(data) + if err != nil { + t.Fatal(err) + } + + // Construct c2 + hash, err := mh.Sum(data, mh.SHA2_256, -1) + if err != nil { + t.Fatal(err) + } + c2 := NewCidV1(DagCBOR, hash) + + if !c1.Equals(c2) { + t.Fatal("cids mismatch") + } + if c1.Prefix() != c2.Prefix() { + t.Fatal("prefixes mismatch") + } +} + +func TestCodecChange(t *testing.T) { + t.Run("Prefix-CidV0", func(t *testing.T) { + p := Prefix{Version: 0, Codec: DagProtobuf, MhType: mh.SHA2_256, MhLength: mh.DefaultLengths[mh.SHA2_256]} + testCodecChange(t, p) + }) + t.Run("Prefix-CidV1", func(t *testing.T) { + p := Prefix{Version: 1, Codec: DagProtobuf, MhType: mh.SHA2_256, MhLength: mh.DefaultLengths[mh.SHA2_256]} + testCodecChange(t, p) + }) + t.Run("Prefix-NoChange", func(t *testing.T) { + p := Prefix{Version: 0, Codec: DagProtobuf, MhType: mh.SHA2_256, MhLength: mh.DefaultLengths[mh.SHA2_256]} + if p.GetCodec() != DagProtobuf { + t.Fatal("original builder not using Protobuf codec") + } + pn := p.WithCodec(DagProtobuf) + if pn != p { + t.Fatal("should have returned same builder") + } + }) + t.Run("V0Builder", func(t *testing.T) { + testCodecChange(t, V0Builder{}) + }) + t.Run("V0Builder-NoChange", func(t *testing.T) { + b := V0Builder{} + if b.GetCodec() != DagProtobuf { + t.Fatal("original builder not using Protobuf codec") + } + bn := b.WithCodec(DagProtobuf) + if bn != b { + t.Fatal("should have returned same builder") + } + }) + t.Run("V1Builder", func(t *testing.T) { + testCodecChange(t, V1Builder{Codec: DagProtobuf, MhType: mh.SHA2_256}) + }) +} + +func testCodecChange(t *testing.T, b Builder) { + data := []byte("this is some test content") + + if b.GetCodec() != DagProtobuf { + t.Fatal("original builder not using Protobuf codec") + } + + b = b.WithCodec(Raw) + c, err := b.Sum(data) + if err != nil { + t.Fatal(err) + } + + if c.Type() != Raw { + t.Fatal("new cid codec did not change to Raw") + } +} diff --git a/cid.go b/cid.go new file mode 100644 index 0000000000000000000000000000000000000000..2c4fd22a59a80ce6e7939fc437398ee977741573 --- /dev/null +++ b/cid.go @@ -0,0 +1,680 @@ +// Package cid implements the Content-IDentifiers specification +// (https://github.com/ipld/cid) in Go. CIDs are +// self-describing content-addressed identifiers useful for +// distributed information systems. CIDs are used in the IPFS +// (https://ipfs.io) project ecosystem. +// +// CIDs have two major versions. A CIDv0 corresponds to a multihash of type +// DagProtobuf, is deprecated and exists for compatibility reasons. Usually, +// CIDv1 should be used. +// +// A CIDv1 has four parts: +// +// ::= +// +// As shown above, the CID implementation relies heavily on Multiformats, +// particularly Multibase +// (https://github.com/multiformats/go-multibase), Multicodec +// (https://github.com/multiformats/multicodec) and Multihash +// implementations (https://github.com/multiformats/go-multihash). +package cid + +import ( + "bytes" + "encoding" + "encoding/json" + "errors" + "fmt" + "io" + "strings" + + mbase "github.com/multiformats/go-multibase" + mh "github.com/multiformats/go-multihash" + varint "github.com/multiformats/go-varint" +) + +// UnsupportedVersionString just holds an error message +const UnsupportedVersionString = "" + +var ( + // ErrCidTooShort means that the cid passed to decode was not long + // enough to be a valid Cid + ErrCidTooShort = errors.New("cid too short") + + // ErrInvalidEncoding means that selected encoding is not supported + // by this Cid version + ErrInvalidEncoding = errors.New("invalid base encoding") +) + +// These are multicodec-packed content types. The should match +// the codes described in the authoritative document: +// https://github.com/multiformats/multicodec/blob/master/table.csv +const ( + Raw = 0x55 + + DagProtobuf = 0x70 + DagCBOR = 0x71 + Libp2pKey = 0x72 + + GitRaw = 0x78 + + DagJOSE = 0x85 + EthBlock = 0x90 + EthBlockList = 0x91 + EthTxTrie = 0x92 + EthTx = 0x93 + EthTxReceiptTrie = 0x94 + EthTxReceipt = 0x95 + EthStateTrie = 0x96 + EthAccountSnapshot = 0x97 + EthStorageTrie = 0x98 + BitcoinBlock = 0xb0 + BitcoinTx = 0xb1 + ZcashBlock = 0xc0 + ZcashTx = 0xc1 + DecredBlock = 0xe0 + DecredTx = 0xe1 + DashBlock = 0xf0 + DashTx = 0xf1 + FilCommitmentUnsealed = 0xf101 + FilCommitmentSealed = 0xf102 +) + +// Codecs maps the name of a codec to its type +var Codecs = map[string]uint64{ + "v0": DagProtobuf, + "raw": Raw, + "protobuf": DagProtobuf, + "cbor": DagCBOR, + "libp2p-key": Libp2pKey, + "git-raw": GitRaw, + "eth-block": EthBlock, + "eth-block-list": EthBlockList, + "eth-tx-trie": EthTxTrie, + "eth-tx": EthTx, + "eth-tx-receipt-trie": EthTxReceiptTrie, + "eth-tx-receipt": EthTxReceipt, + "eth-state-trie": EthStateTrie, + "eth-account-snapshot": EthAccountSnapshot, + "eth-storage-trie": EthStorageTrie, + "bitcoin-block": BitcoinBlock, + "bitcoin-tx": BitcoinTx, + "zcash-block": ZcashBlock, + "zcash-tx": ZcashTx, + "decred-block": DecredBlock, + "decred-tx": DecredTx, + "dash-block": DashBlock, + "dash-tx": DashTx, + "fil-commitment-unsealed": FilCommitmentUnsealed, + "fil-commitment-sealed": FilCommitmentSealed, + "dag-jose": DagJOSE, +} + +// CodecToStr maps the numeric codec to its name +var CodecToStr = map[uint64]string{ + Raw: "raw", + DagProtobuf: "protobuf", + DagCBOR: "cbor", + GitRaw: "git-raw", + EthBlock: "eth-block", + EthBlockList: "eth-block-list", + EthTxTrie: "eth-tx-trie", + EthTx: "eth-tx", + EthTxReceiptTrie: "eth-tx-receipt-trie", + EthTxReceipt: "eth-tx-receipt", + EthStateTrie: "eth-state-trie", + EthAccountSnapshot: "eth-account-snapshot", + EthStorageTrie: "eth-storage-trie", + BitcoinBlock: "bitcoin-block", + BitcoinTx: "bitcoin-tx", + ZcashBlock: "zcash-block", + ZcashTx: "zcash-tx", + DecredBlock: "decred-block", + DecredTx: "decred-tx", + DashBlock: "dash-block", + DashTx: "dash-tx", + FilCommitmentUnsealed: "fil-commitment-unsealed", + FilCommitmentSealed: "fil-commitment-sealed", + DagJOSE: "dag-jose", +} + +// tryNewCidV0 tries to convert a multihash into a CIDv0 CID and returns an +// error on failure. +func tryNewCidV0(mhash mh.Multihash) (Cid, error) { + // Need to make sure hash is valid for CidV0 otherwise we will + // incorrectly detect it as CidV1 in the Version() method + dec, err := mh.Decode(mhash) + if err != nil { + return Undef, err + } + if dec.Code != mh.SHA2_256 || dec.Length != 32 { + return Undef, fmt.Errorf("invalid hash for cidv0 %d-%d", dec.Code, dec.Length) + } + return Cid{string(mhash)}, nil +} + +// NewCidV0 returns a Cid-wrapped multihash. +// They exist to allow IPFS to work with Cids while keeping +// compatibility with the plain-multihash format used used in IPFS. +// NewCidV1 should be used preferentially. +// +// Panics if the multihash isn't sha2-256. +func NewCidV0(mhash mh.Multihash) Cid { + c, err := tryNewCidV0(mhash) + if err != nil { + panic(err) + } + return c +} + +// NewCidV1 returns a new Cid using the given multicodec-packed +// content type. +// +// Panics if the multihash is invalid. +func NewCidV1(codecType uint64, mhash mh.Multihash) Cid { + hashlen := len(mhash) + // two 8 bytes (max) numbers plus hash + buf := make([]byte, 1+varint.UvarintSize(codecType)+hashlen) + n := varint.PutUvarint(buf, 1) + n += varint.PutUvarint(buf[n:], codecType) + cn := copy(buf[n:], mhash) + if cn != hashlen { + panic("copy hash length is inconsistent") + } + + return Cid{string(buf[:n+hashlen])} +} + +var _ encoding.BinaryMarshaler = Cid{} +var _ encoding.BinaryUnmarshaler = (*Cid)(nil) +var _ encoding.TextMarshaler = Cid{} +var _ encoding.TextUnmarshaler = (*Cid)(nil) + +// Cid represents a self-describing content addressed +// identifier. It is formed by a Version, a Codec (which indicates +// a multicodec-packed content type) and a Multihash. +type Cid struct{ str string } + +// Undef can be used to represent a nil or undefined Cid, using Cid{} +// directly is also acceptable. +var Undef = Cid{} + +// Defined returns true if a Cid is defined +// Calling any other methods on an undefined Cid will result in +// undefined behavior. +func (c Cid) Defined() bool { + return c.str != "" +} + +// Parse is a short-hand function to perform Decode, Cast etc... on +// a generic interface{} type. +func Parse(v interface{}) (Cid, error) { + switch v2 := v.(type) { + case string: + if strings.Contains(v2, "/ipfs/") { + return Decode(strings.Split(v2, "/ipfs/")[1]) + } + return Decode(v2) + case []byte: + return Cast(v2) + case mh.Multihash: + return tryNewCidV0(v2) + case Cid: + return v2, nil + default: + return Undef, fmt.Errorf("can't parse %+v as Cid", v2) + } +} + +// Decode parses a Cid-encoded string and returns a Cid object. +// For CidV1, a Cid-encoded string is primarily a multibase string: +// +// +// +// The base-encoded string represents a: +// +// +// +// Decode will also detect and parse CidV0 strings. Strings +// starting with "Qm" are considered CidV0 and treated directly +// as B58-encoded multihashes. +func Decode(v string) (Cid, error) { + if len(v) < 2 { + return Undef, ErrCidTooShort + } + + if len(v) == 46 && v[:2] == "Qm" { + hash, err := mh.FromB58String(v) + if err != nil { + return Undef, err + } + + return tryNewCidV0(hash) + } + + _, data, err := mbase.Decode(v) + if err != nil { + return Undef, err + } + + return Cast(data) +} + +// Extract the encoding from a Cid. If Decode on the same string did +// not return an error neither will this function. +func ExtractEncoding(v string) (mbase.Encoding, error) { + if len(v) < 2 { + return -1, ErrCidTooShort + } + + if len(v) == 46 && v[:2] == "Qm" { + return mbase.Base58BTC, nil + } + + encoding := mbase.Encoding(v[0]) + + // check encoding is valid + _, err := mbase.NewEncoder(encoding) + if err != nil { + return -1, err + } + + return encoding, nil +} + +// Cast takes a Cid data slice, parses it and returns a Cid. +// For CidV1, the data buffer is in the form: +// +// +// +// CidV0 are also supported. In particular, data buffers starting +// with length 34 bytes, which starts with bytes [18,32...] are considered +// binary multihashes. +// +// Please use decode when parsing a regular Cid string, as Cast does not +// expect multibase-encoded data. Cast accepts the output of Cid.Bytes(). +func Cast(data []byte) (Cid, error) { + nr, c, err := CidFromBytes(data) + if err != nil { + return Undef, err + } + + if nr != len(data) { + return Undef, fmt.Errorf("trailing bytes in data buffer passed to cid Cast") + } + + return c, nil +} + +// UnmarshalBinary is equivalent to Cast(). It implements the +// encoding.BinaryUnmarshaler interface. +func (c *Cid) UnmarshalBinary(data []byte) error { + casted, err := Cast(data) + if err != nil { + return err + } + c.str = casted.str + return nil +} + +// UnmarshalText is equivalent to Decode(). It implements the +// encoding.TextUnmarshaler interface. +func (c *Cid) UnmarshalText(text []byte) error { + decodedCid, err := Decode(string(text)) + if err != nil { + return err + } + c.str = decodedCid.str + return nil +} + +// Version returns the Cid version. +func (c Cid) Version() uint64 { + if len(c.str) == 34 && c.str[0] == 18 && c.str[1] == 32 { + return 0 + } + return 1 +} + +// Type returns the multicodec-packed content type of a Cid. +func (c Cid) Type() uint64 { + if c.Version() == 0 { + return DagProtobuf + } + _, n, _ := uvarint(c.str) + codec, _, _ := uvarint(c.str[n:]) + return codec +} + +// String returns the default string representation of a +// Cid. Currently, Base32 is used for CIDV1 as the encoding for the +// multibase string, Base58 is used for CIDV0. +func (c Cid) String() string { + switch c.Version() { + case 0: + return c.Hash().B58String() + case 1: + mbstr, err := mbase.Encode(mbase.Base32, c.Bytes()) + if err != nil { + panic("should not error with hardcoded mbase: " + err.Error()) + } + + return mbstr + default: + panic("not possible to reach this point") + } +} + +// String returns the string representation of a Cid +// encoded is selected base +func (c Cid) StringOfBase(base mbase.Encoding) (string, error) { + switch c.Version() { + case 0: + if base != mbase.Base58BTC { + return "", ErrInvalidEncoding + } + return c.Hash().B58String(), nil + case 1: + return mbase.Encode(base, c.Bytes()) + default: + panic("not possible to reach this point") + } +} + +// Encode return the string representation of a Cid in a given base +// when applicable. Version 0 Cid's are always in Base58 as they do +// not take a multibase prefix. +func (c Cid) Encode(base mbase.Encoder) string { + switch c.Version() { + case 0: + return c.Hash().B58String() + case 1: + return base.Encode(c.Bytes()) + default: + panic("not possible to reach this point") + } +} + +// Hash returns the multihash contained by a Cid. +func (c Cid) Hash() mh.Multihash { + bytes := c.Bytes() + + if c.Version() == 0 { + return mh.Multihash(bytes) + } + + // skip version length + _, n1, _ := varint.FromUvarint(bytes) + // skip codec length + _, n2, _ := varint.FromUvarint(bytes[n1:]) + + return mh.Multihash(bytes[n1+n2:]) +} + +// Bytes returns the byte representation of a Cid. +// The output of bytes can be parsed back into a Cid +// with Cast(). +func (c Cid) Bytes() []byte { + return []byte(c.str) +} + +// ByteLen returns the length of the CID in bytes. +// It's equivalent to `len(c.Bytes())`, but works without an allocation, +// and should therefore be preferred. +// +// (See also the WriteTo method for other important operations that work without allocation.) +func (c Cid) ByteLen() int { + return len(c.str) +} + +// WriteBytes writes the CID bytes to the given writer. +// This method works without incurring any allocation. +// +// (See also the ByteLen method for other important operations that work without allocation.) +func (c Cid) WriteBytes(w io.Writer) (int, error) { + n, err := io.WriteString(w, c.str) + if err != nil { + return n, err + } + if n != len(c.str) { + return n, fmt.Errorf("failed to write entire cid string") + } + return n, nil +} + +// MarshalBinary is equivalent to Bytes(). It implements the +// encoding.BinaryMarshaler interface. +func (c Cid) MarshalBinary() ([]byte, error) { + return c.Bytes(), nil +} + +// MarshalText is equivalent to String(). It implements the +// encoding.TextMarshaler interface. +func (c Cid) MarshalText() ([]byte, error) { + return []byte(c.String()), nil +} + +// Equals checks that two Cids are the same. +// In order for two Cids to be considered equal, the +// Version, the Codec and the Multihash must match. +func (c Cid) Equals(o Cid) bool { + return c == o +} + +// UnmarshalJSON parses the JSON representation of a Cid. +func (c *Cid) UnmarshalJSON(b []byte) error { + if len(b) < 2 { + return fmt.Errorf("invalid cid json blob") + } + obj := struct { + CidTarget string `json:"/"` + }{} + objptr := &obj + err := json.Unmarshal(b, &objptr) + if err != nil { + return err + } + if objptr == nil { + *c = Cid{} + return nil + } + + if obj.CidTarget == "" { + return fmt.Errorf("cid was incorrectly formatted") + } + + out, err := Decode(obj.CidTarget) + if err != nil { + return err + } + + *c = out + + return nil +} + +// MarshalJSON procudes a JSON representation of a Cid, which looks as follows: +// +// { "/": "" } +// +// Note that this formatting comes from the IPLD specification +// (https://github.com/ipld/specs/tree/master/ipld) +func (c Cid) MarshalJSON() ([]byte, error) { + if !c.Defined() { + return []byte("null"), nil + } + return []byte(fmt.Sprintf("{\"/\":\"%s\"}", c.String())), nil +} + +// KeyString returns the binary representation of the Cid as a string +func (c Cid) KeyString() string { + return c.str +} + +// Loggable returns a Loggable (as defined by +// https://godoc.org/github.com/ipfs/go-log). +func (c Cid) Loggable() map[string]interface{} { + return map[string]interface{}{ + "cid": c, + } +} + +// Prefix builds and returns a Prefix out of a Cid. +func (c Cid) Prefix() Prefix { + if c.Version() == 0 { + return Prefix{ + MhType: mh.SHA2_256, + MhLength: 32, + Version: 0, + Codec: DagProtobuf, + } + } + + offset := 0 + version, n, _ := uvarint(c.str[offset:]) + offset += n + codec, n, _ := uvarint(c.str[offset:]) + offset += n + mhtype, n, _ := uvarint(c.str[offset:]) + offset += n + mhlen, _, _ := uvarint(c.str[offset:]) + + return Prefix{ + MhType: mhtype, + MhLength: int(mhlen), + Version: version, + Codec: codec, + } +} + +// Prefix represents all the metadata of a Cid, +// that is, the Version, the Codec, the Multihash type +// and the Multihash length. It does not contains +// any actual content information. +// NOTE: The use -1 in MhLength to mean default length is deprecated, +// use the V0Builder or V1Builder structures instead +type Prefix struct { + Version uint64 + Codec uint64 + MhType uint64 + MhLength int +} + +// Sum uses the information in a prefix to perform a multihash.Sum() +// and return a newly constructed Cid with the resulting multihash. +func (p Prefix) Sum(data []byte) (Cid, error) { + length := p.MhLength + if p.MhType == mh.ID { + length = -1 + } + + if p.Version == 0 && (p.MhType != mh.SHA2_256 || + (p.MhLength != 32 && p.MhLength != -1)) { + + return Undef, fmt.Errorf("invalid v0 prefix") + } + + hash, err := mh.Sum(data, p.MhType, length) + if err != nil { + return Undef, err + } + + switch p.Version { + case 0: + return NewCidV0(hash), nil + case 1: + return NewCidV1(p.Codec, hash), nil + default: + return Undef, fmt.Errorf("invalid cid version") + } +} + +// Bytes returns a byte representation of a Prefix. It looks like: +// +// +func (p Prefix) Bytes() []byte { + size := varint.UvarintSize(p.Version) + size += varint.UvarintSize(p.Codec) + size += varint.UvarintSize(p.MhType) + size += varint.UvarintSize(uint64(p.MhLength)) + + buf := make([]byte, size) + n := varint.PutUvarint(buf, p.Version) + n += varint.PutUvarint(buf[n:], p.Codec) + n += varint.PutUvarint(buf[n:], p.MhType) + n += varint.PutUvarint(buf[n:], uint64(p.MhLength)) + if n != size { + panic("size mismatch") + } + return buf +} + +// PrefixFromBytes parses a Prefix-byte representation onto a +// Prefix. +func PrefixFromBytes(buf []byte) (Prefix, error) { + r := bytes.NewReader(buf) + vers, err := varint.ReadUvarint(r) + if err != nil { + return Prefix{}, err + } + + codec, err := varint.ReadUvarint(r) + if err != nil { + return Prefix{}, err + } + + mhtype, err := varint.ReadUvarint(r) + if err != nil { + return Prefix{}, err + } + + mhlen, err := varint.ReadUvarint(r) + if err != nil { + return Prefix{}, err + } + + return Prefix{ + Version: vers, + Codec: codec, + MhType: mhtype, + MhLength: int(mhlen), + }, nil +} + +func CidFromBytes(data []byte) (int, Cid, error) { + if len(data) > 2 && data[0] == mh.SHA2_256 && data[1] == 32 { + if len(data) < 34 { + return 0, Undef, fmt.Errorf("not enough bytes for cid v0") + } + + h, err := mh.Cast(data[:34]) + if err != nil { + return 0, Undef, err + } + + return 34, Cid{string(h)}, nil + } + + vers, n, err := varint.FromUvarint(data) + if err != nil { + return 0, Undef, err + } + + if vers != 1 { + return 0, Undef, fmt.Errorf("expected 1 as the cid version number, got: %d", vers) + } + + _, cn, err := varint.FromUvarint(data[n:]) + if err != nil { + return 0, Undef, err + } + + mhnr, _, err := mh.MHFromBytes(data[n+cn:]) + if err != nil { + return 0, Undef, err + } + + l := n + cn + mhnr + + return l, Cid{string(data[0:l])}, nil +} diff --git a/cid_fuzz.go b/cid_fuzz.go new file mode 100644 index 0000000000000000000000000000000000000000..99842b5350cf7b450d459da43382611d77c71721 --- /dev/null +++ b/cid_fuzz.go @@ -0,0 +1,37 @@ +// +build gofuzz + +package cid + +func Fuzz(data []byte) int { + cid, err := Cast(data) + + if err != nil { + return 0 + } + + _ = cid.Bytes() + _ = cid.String() + p := cid.Prefix() + _ = p.Bytes() + + if !cid.Equals(cid) { + panic("inequality") + } + + // json loop + json, err := cid.MarshalJSON() + if err != nil { + panic(err.Error()) + } + cid2 := Cid{} + err = cid2.UnmarshalJSON(json) + if err != nil { + panic(err.Error()) + } + + if !cid.Equals(cid2) { + panic("json loop not equal") + } + + return 1 +} diff --git a/cid_test.go b/cid_test.go new file mode 100644 index 0000000000000000000000000000000000000000..159ded827e91f817a1dd0f10bae44e84e007e9a6 --- /dev/null +++ b/cid_test.go @@ -0,0 +1,766 @@ +package cid + +import ( + "bytes" + "encoding/json" + "fmt" + "math/rand" + "reflect" + "strings" + "testing" + + mbase "github.com/multiformats/go-multibase" + mh "github.com/multiformats/go-multihash" +) + +// Copying the "silly test" idea from +// https://github.com/multiformats/go-multihash/blob/7aa9f26a231c6f34f4e9fad52bf580fd36627285/multihash_test.go#L13 +// Makes it so changing the table accidentally has to happen twice. +var tCodecs = map[uint64]string{ + Raw: "raw", + DagProtobuf: "protobuf", + DagCBOR: "cbor", + Libp2pKey: "libp2p-key", + GitRaw: "git-raw", + EthBlock: "eth-block", + EthBlockList: "eth-block-list", + EthTxTrie: "eth-tx-trie", + EthTx: "eth-tx", + EthTxReceiptTrie: "eth-tx-receipt-trie", + EthTxReceipt: "eth-tx-receipt", + EthStateTrie: "eth-state-trie", + EthAccountSnapshot: "eth-account-snapshot", + EthStorageTrie: "eth-storage-trie", + BitcoinBlock: "bitcoin-block", + BitcoinTx: "bitcoin-tx", + ZcashBlock: "zcash-block", + ZcashTx: "zcash-tx", + DecredBlock: "decred-block", + DecredTx: "decred-tx", + DashBlock: "dash-block", + DashTx: "dash-tx", + FilCommitmentUnsealed: "fil-commitment-unsealed", + FilCommitmentSealed: "fil-commitment-sealed", + DagJOSE: "dag-jose", +} + +func assertEqual(t *testing.T, a, b Cid) { + if a.Type() != b.Type() { + t.Fatal("mismatch on type") + } + + if a.Version() != b.Version() { + t.Fatal("mismatch on version") + } + + if !bytes.Equal(a.Hash(), b.Hash()) { + t.Fatal("multihash mismatch") + } +} + +func TestTable(t *testing.T) { + if len(tCodecs) != len(Codecs)-1 { + t.Errorf("Item count mismatch in the Table of Codec. Should be %d, got %d", len(tCodecs)+1, len(Codecs)) + } + + for k, v := range tCodecs { + if Codecs[v] != k { + t.Errorf("Table mismatch: 0x%x %s", k, v) + } + } +} + +// The table returns cid.DagProtobuf for "v0" +// so we test it apart +func TestTableForV0(t *testing.T) { + if Codecs["v0"] != DagProtobuf { + t.Error("Table mismatch: Codecs[\"v0\"] should resolve to DagProtobuf (0x70)") + } +} + +func TestPrefixSum(t *testing.T) { + // Test creating CIDs both manually and with Prefix. + // Tests: https://github.com/ipfs/go-cid/issues/83 + for _, hashfun := range []uint64{ + mh.ID, mh.SHA3, mh.SHA2_256, + } { + h1, err := mh.Sum([]byte("TEST"), hashfun, -1) + if err != nil { + t.Fatal(err) + } + c1 := NewCidV1(Raw, h1) + + h2, err := mh.Sum([]byte("foobar"), hashfun, -1) + if err != nil { + t.Fatal(err) + } + c2 := NewCidV1(Raw, h2) + + c3, err := c1.Prefix().Sum([]byte("foobar")) + if err != nil { + t.Fatal(err) + } + if !c2.Equals(c3) { + t.Fatal("expected CIDs to be equal") + } + } +} + +func TestBasicMarshaling(t *testing.T) { + h, err := mh.Sum([]byte("TEST"), mh.SHA3, 4) + if err != nil { + t.Fatal(err) + } + + cid := NewCidV1(7, h) + + data := cid.Bytes() + + out, err := Cast(data) + if err != nil { + t.Fatal(err) + } + + assertEqual(t, cid, out) + + s := cid.String() + out2, err := Decode(s) + if err != nil { + t.Fatal(err) + } + + assertEqual(t, cid, out2) +} + +func TestBasesMarshaling(t *testing.T) { + h, err := mh.Sum([]byte("TEST"), mh.SHA3, 4) + if err != nil { + t.Fatal(err) + } + + cid := NewCidV1(7, h) + + data := cid.Bytes() + + out, err := Cast(data) + if err != nil { + t.Fatal(err) + } + + assertEqual(t, cid, out) + + testBases := []mbase.Encoding{ + mbase.Base16, + mbase.Base32, + mbase.Base32hex, + mbase.Base32pad, + mbase.Base32hexPad, + mbase.Base58BTC, + mbase.Base58Flickr, + mbase.Base64pad, + mbase.Base64urlPad, + mbase.Base64url, + mbase.Base64, + } + + for _, b := range testBases { + s, err := cid.StringOfBase(b) + if err != nil { + t.Fatal(err) + } + + if s[0] != byte(b) { + t.Fatal("Invalid multibase header") + } + + out2, err := Decode(s) + if err != nil { + t.Fatal(err) + } + + assertEqual(t, cid, out2) + + encoder, err := mbase.NewEncoder(b) + if err != nil { + t.Fatal(err) + } + s2 := cid.Encode(encoder) + if s != s2 { + t.Fatalf("%q != %q", s, s2) + } + + ee, err := ExtractEncoding(s) + if err != nil { + t.Fatal(err) + } + if ee != b { + t.Fatalf("could not properly determine base (got %v)", ee) + } + } + + ee, err := ExtractEncoding("QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n") + if err != nil { + t.Fatal(err) + } + if ee != mbase.Base58BTC { + t.Fatalf("expected Base58BTC from Qm string (got %v)", ee) + } + + ee, err = ExtractEncoding("1") + if err == nil { + t.Fatal("expected too-short error from ExtractEncoding") + } + if ee != -1 { + t.Fatal("expected -1 from too-short ExtractEncoding") + } +} + +func TestBinaryMarshaling(t *testing.T) { + data := []byte("this is some test content") + hash, _ := mh.Sum(data, mh.SHA2_256, -1) + c := NewCidV1(DagCBOR, hash) + var c2 Cid + var c3 Cid + + data, err := c.MarshalBinary() + if err != nil { + t.Fatal(err) + } + if err = c2.UnmarshalBinary(data); err != nil { + t.Fatal(err) + } + if !c.Equals(c2) { + t.Errorf("cids should be the same: %s %s", c, c2) + } + var buf bytes.Buffer + wrote, err := c.WriteBytes(&buf) + if err != nil { + t.Fatal(err) + } + if wrote != 36 { + t.Fatalf("expected 36 bytes written (got %d)", wrote) + } + if err = c3.UnmarshalBinary(data); err != nil { + t.Fatal(err) + } + if !c.Equals(c3) { + t.Errorf("cids should be the same: %s %s", c, c3) + } +} + +func TestTextMarshaling(t *testing.T) { + data := []byte("this is some test content") + hash, _ := mh.Sum(data, mh.SHA2_256, -1) + c := NewCidV1(DagCBOR, hash) + var c2 Cid + + data, err := c.MarshalText() + if err != nil { + t.Fatal(err) + } + if err = c2.UnmarshalText(data); err != nil { + t.Fatal(err) + } + if !c.Equals(c2) { + t.Errorf("cids should be the same: %s %s", c, c2) + } + + if c.KeyString() != string(c.Bytes()) { + t.Errorf("got unexpected KeyString() result") + } +} + +func TestEmptyString(t *testing.T) { + _, err := Decode("") + if err == nil { + t.Fatal("shouldnt be able to parse an empty cid") + } +} + +func TestV0Handling(t *testing.T) { + old := "QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n" + + cid, err := Decode(old) + if err != nil { + t.Fatal(err) + } + + if cid.Version() != 0 { + t.Fatal("should have gotten version 0 cid") + } + + if cid.Hash().B58String() != old { + t.Fatalf("marshaling roundtrip failed: %s != %s", cid.Hash().B58String(), old) + } + + if cid.String() != old { + t.Fatal("marshaling roundtrip failed") + } + + byteLen := cid.ByteLen() + if byteLen != 34 { + t.Fatalf("expected V0 ByteLen to be 34 (got %d)", byteLen) + } + + new, err := cid.StringOfBase(mbase.Base58BTC) + if err != nil { + t.Fatal(err) + } + if new != old { + t.Fatal("StringOfBase roundtrip failed") + } + + encoder, err := mbase.NewEncoder(mbase.Base58BTC) + if err != nil { + t.Fatal(err) + } + if cid.Encode(encoder) != old { + t.Fatal("Encode roundtrip failed") + } + + _, err = cid.StringOfBase(mbase.Base32) + if err != ErrInvalidEncoding { + t.Fatalf("expected ErrInvalidEncoding for V0 StringOfBase(Base32) (got %v)", err) + } +} + +func TestV0ErrorCases(t *testing.T) { + badb58 := "QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zIII" + _, err := Decode(badb58) + if err == nil { + t.Fatal("should have failed to decode that ref") + } +} + +func TestNewPrefixV1(t *testing.T) { + data := []byte("this is some test content") + + // Construct c1 + prefix := NewPrefixV1(DagCBOR, mh.SHA2_256) + c1, err := prefix.Sum(data) + if err != nil { + t.Fatal(err) + } + + if c1.Prefix() != prefix { + t.Fatal("prefix not preserved") + } + + // Construct c2 + hash, err := mh.Sum(data, mh.SHA2_256, -1) + if err != nil { + t.Fatal(err) + } + c2 := NewCidV1(DagCBOR, hash) + + if !c1.Equals(c2) { + t.Fatal("cids mismatch") + } + if c1.Prefix() != c2.Prefix() { + t.Fatal("prefixes mismatch") + } +} + +func TestNewPrefixV0(t *testing.T) { + data := []byte("this is some test content") + + // Construct c1 + prefix := NewPrefixV0(mh.SHA2_256) + c1, err := prefix.Sum(data) + if err != nil { + t.Fatal(err) + } + + if c1.Prefix() != prefix { + t.Fatal("prefix not preserved") + } + + // Construct c2 + hash, err := mh.Sum(data, mh.SHA2_256, -1) + if err != nil { + t.Fatal(err) + } + c2 := NewCidV0(hash) + + if !c1.Equals(c2) { + t.Fatal("cids mismatch") + } + if c1.Prefix() != c2.Prefix() { + t.Fatal("prefixes mismatch") + } + +} + +func TestInvalidV0Prefix(t *testing.T) { + tests := []Prefix{ + { + MhType: mh.SHA2_256, + MhLength: 31, + }, + { + MhType: mh.SHA2_256, + MhLength: 33, + }, + { + MhType: mh.SHA2_256, + MhLength: -2, + }, + { + MhType: mh.SHA2_512, + MhLength: 32, + }, + { + MhType: mh.SHA2_512, + MhLength: -1, + }, + } + + for i, p := range tests { + t.Log(i) + _, err := p.Sum([]byte("testdata")) + if err == nil { + t.Fatalf("should error (index %d)", i) + } + } +} + +func TestBadPrefix(t *testing.T) { + p := Prefix{Version: 3, Codec: DagProtobuf, MhType: mh.SHA2_256, MhLength: 3} + _, err := p.Sum([]byte{0x00, 0x01, 0x03}) + if err == nil { + t.Fatalf("expected error on v3 prefix Sum") + } +} + +func TestPrefixRoundtrip(t *testing.T) { + data := []byte("this is some test content") + hash, _ := mh.Sum(data, mh.SHA2_256, -1) + c := NewCidV1(DagCBOR, hash) + + pref := c.Prefix() + + c2, err := pref.Sum(data) + if err != nil { + t.Fatal(err) + } + + if !c.Equals(c2) { + t.Fatal("output didnt match original") + } + + pb := pref.Bytes() + + pref2, err := PrefixFromBytes(pb) + if err != nil { + t.Fatal(err) + } + + if pref.Version != pref2.Version || pref.Codec != pref2.Codec || + pref.MhType != pref2.MhType || pref.MhLength != pref2.MhLength { + t.Fatal("input prefix didnt match output") + } +} + +func TestBadPrefixFromBytes(t *testing.T) { + _, err := PrefixFromBytes([]byte{0x80}) + if err == nil { + t.Fatal("expected error for bad byte 0") + } + _, err = PrefixFromBytes([]byte{0x01, 0x80}) + if err == nil { + t.Fatal("expected error for bad byte 1") + } + _, err = PrefixFromBytes([]byte{0x01, 0x01, 0x80}) + if err == nil { + t.Fatal("expected error for bad byte 2") + } + _, err = PrefixFromBytes([]byte{0x01, 0x01, 0x01, 0x80}) + if err == nil { + t.Fatal("expected error for bad byte 3") + } +} + +func Test16BytesVarint(t *testing.T) { + data := []byte("this is some test content") + hash, _ := mh.Sum(data, mh.SHA2_256, -1) + c := NewCidV1(1<<63, hash) + _ = c.Bytes() +} + +func TestFuzzCid(t *testing.T) { + buf := make([]byte, 128) + for i := 0; i < 200; i++ { + s := rand.Intn(128) + rand.Read(buf[:s]) + _, _ = Cast(buf[:s]) + } +} + +func TestParse(t *testing.T) { + cid, err := Parse(123) + if err == nil { + t.Fatalf("expected error from Parse()") + } + if !strings.Contains(err.Error(), "can't parse 123 as Cid") { + t.Fatalf("expected int error, got %s", err.Error()) + } + + theHash := "QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n" + h, err := mh.FromB58String(theHash) + if err != nil { + t.Fatal(err) + } + + assertions := [][]interface{}{ + []interface{}{NewCidV0(h), theHash}, + []interface{}{NewCidV0(h).Bytes(), theHash}, + []interface{}{h, theHash}, + []interface{}{theHash, theHash}, + []interface{}{"/ipfs/" + theHash, theHash}, + []interface{}{"https://ipfs.io/ipfs/" + theHash, theHash}, + []interface{}{"http://localhost:8080/ipfs/" + theHash, theHash}, + } + + assert := func(arg interface{}, expected string) error { + cid, err = Parse(arg) + if err != nil { + return err + } + if cid.Version() != 0 { + return fmt.Errorf("expected version 0, got %d", cid.Version()) + } + actual := cid.Hash().B58String() + if actual != expected { + return fmt.Errorf("expected hash %s, got %s", expected, actual) + } + actual = cid.String() + if actual != expected { + return fmt.Errorf("expected string %s, got %s", expected, actual) + } + return nil + } + + for _, args := range assertions { + if err := assert(args[0], args[1].(string)); err != nil { + t.Fatal(err) + } + } +} + +func TestHexDecode(t *testing.T) { + hexcid := "f015512209d8453505bdc6f269678e16b3e56c2a2948a41f2c792617cc9611ed363c95b63" + c, err := Decode(hexcid) + if err != nil { + t.Fatal(err) + } + + if c.String() != "bafkreie5qrjvaw64n4tjm6hbnm7fnqvcssfed4whsjqxzslbd3jwhsk3mm" { + t.Fatal("hash value failed to round trip decoding from hex") + } +} + +func ExampleDecode() { + encoded := "bafkreie5qrjvaw64n4tjm6hbnm7fnqvcssfed4whsjqxzslbd3jwhsk3mm" + c, err := Decode(encoded) + if err != nil { + fmt.Printf("Error: %s", err) + return + } + + fmt.Println(c) + // Output: bafkreie5qrjvaw64n4tjm6hbnm7fnqvcssfed4whsjqxzslbd3jwhsk3mm +} + +func TestFromJson(t *testing.T) { + cval := "bafkreie5qrjvaw64n4tjm6hbnm7fnqvcssfed4whsjqxzslbd3jwhsk3mm" + jsoncid := []byte(`{"/":"` + cval + `"}`) + var c Cid + if err := json.Unmarshal(jsoncid, &c); err != nil { + t.Fatal(err) + } + + if c.String() != cval { + t.Fatal("json parsing failed") + } +} + +func TestJsonRoundTrip(t *testing.T) { + expectedJSON := `{"/":"bafkreie5qrjvaw64n4tjm6hbnm7fnqvcssfed4whsjqxzslbd3jwhsk3mm"}` + exp, err := Decode("bafkreie5qrjvaw64n4tjm6hbnm7fnqvcssfed4whsjqxzslbd3jwhsk3mm") + if err != nil { + t.Fatal(err) + } + + // Verify it works for a *Cid. + enc, err := json.Marshal(exp) + if err != nil { + t.Fatal(err) + } + var actual Cid + if err = json.Unmarshal(enc, &actual); err != nil { + t.Fatal(err) + } + if !exp.Equals(actual) { + t.Fatal("cids not equal for *Cid") + } + + if string(enc) != expectedJSON { + t.Fatalf("did not get expected JSON form (got %q)", string(enc)) + } + + // Verify it works for a Cid. + enc, err = json.Marshal(exp) + if err != nil { + t.Fatal(err) + } + var actual2 Cid + if err = json.Unmarshal(enc, &actual2); err != nil { + t.Fatal(err) + } + if !exp.Equals(actual2) { + t.Fatal("cids not equal for Cid") + } + + if err = actual2.UnmarshalJSON([]byte("1")); err == nil { + t.Fatal("expected error for too-short JSON") + } + + if err = actual2.UnmarshalJSON([]byte(`{"nope":"nope"}`)); err == nil { + t.Fatal("expected error for bad CID JSON") + } + + if err = actual2.UnmarshalJSON([]byte(`bad "" json!`)); err == nil { + t.Fatal("expected error for bad JSON") + } + + var actual3 Cid + enc, err = actual3.MarshalJSON() + if err != nil { + t.Fatal(err) + } + if string(enc) != "null" { + t.Fatalf("expected 'null' string for undefined CID (got %q)", string(enc)) + } +} + +func BenchmarkStringV1(b *testing.B) { + data := []byte("this is some test content") + hash, _ := mh.Sum(data, mh.SHA2_256, -1) + cid := NewCidV1(Raw, hash) + + b.ReportAllocs() + b.ResetTimer() + + count := 0 + for i := 0; i < b.N; i++ { + count += len(cid.String()) + } + if count != 49*b.N { + b.FailNow() + } +} + +func TestReadCidsFromBuffer(t *testing.T) { + cidstr := []string{ + "bafkreie5qrjvaw64n4tjm6hbnm7fnqvcssfed4whsjqxzslbd3jwhsk3mm", + "k2cwueckqkibutvhkr4p2ln2pjcaxaakpd9db0e7j7ax1lxhhxy3ekpv", + "Qmf5Qzp6nGBku7CEn2UQx4mgN8TW69YUok36DrGa6NN893", + "zb2rhZi1JR4eNc2jBGaRYJKYM8JEB4ovenym8L1CmFsRAytkz", + } + + var cids []Cid + var buf []byte + for _, cs := range cidstr { + c, err := Decode(cs) + if err != nil { + t.Fatal(err) + } + cids = append(cids, c) + buf = append(buf, c.Bytes()...) + } + + var cur int + for _, expc := range cids { + n, c, err := CidFromBytes(buf[cur:]) + if err != nil { + t.Fatal(err) + } + if c != expc { + t.Fatal("cids mismatched") + } + cur += n + } + if cur != len(buf) { + t.Fatal("had trailing bytes") + } +} + +func TestBadCidFromBytes(t *testing.T) { + l, c, err := CidFromBytes([]byte{mh.SHA2_256, 32, 0x00}) + if err == nil { + t.Fatal("expected not-enough-bytes for V0 CidFromBytes") + } + if l != 0 { + t.Fatal("expected length=0 from bad CidFromBytes") + } + if c != Undef { + t.Fatal("expected Undef CID from bad CidFromBytes") + } + + c, err = Decode("bafkreie5qrjvaw64n4tjm6hbnm7fnqvcssfed4whsjqxzslbd3jwhsk3mm") + if err != nil { + t.Fatal(err) + } + byts := make([]byte, c.ByteLen()) + copy(byts, c.Bytes()) + byts[1] = 0x80 // bad codec varint + byts[2] = 0x00 + l, c, err = CidFromBytes(byts) + if err == nil { + t.Fatal("expected not-enough-bytes for V1 CidFromBytes") + } + if l != 0 { + t.Fatal("expected length=0 from bad CidFromBytes") + } + if c != Undef { + t.Fatal("expected Undef CID from bad CidFromBytes") + } + + copy(byts, c.Bytes()) + byts[2] = 0x80 // bad multihash varint + byts[3] = 0x00 + l, c, err = CidFromBytes(byts) + if err == nil { + t.Fatal("expected not-enough-bytes for V1 CidFromBytes") + } + if l != 0 { + t.Fatal("expected length=0 from bad CidFromBytes") + } + if c != Undef { + t.Fatal("expected Undef CID from bad CidFromBytes") + } +} + +func TestBadParse(t *testing.T) { + hash, err := mh.Sum([]byte("foobar"), mh.SHA3_256, -1) + if err != nil { + t.Fatal(err) + } + _, err = Parse(hash) + if err == nil { + t.Fatal("expected to fail to parse an invalid CIDv1 CID") + } +} + +func TestLoggable(t *testing.T) { + c, err := Decode("bafkreie5qrjvaw64n4tjm6hbnm7fnqvcssfed4whsjqxzslbd3jwhsk3mm") + if err != nil { + t.Fatal(err) + } + actual := c.Loggable() + expected := make(map[string]interface{}) + expected["cid"] = c + if !reflect.DeepEqual(actual, expected) { + t.Fatalf("did not get expected loggable form (got %v)", actual) + } +} diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000000000000000000000000000000000000..5f88a9ea2785f8dfafe65d5c5fa9663de93ff423 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,3 @@ +coverage: + range: "50...100" +comment: off diff --git a/deprecated.go b/deprecated.go new file mode 100644 index 0000000000000000000000000000000000000000..cd889f984a714822eb21f79f98be756eec9723e5 --- /dev/null +++ b/deprecated.go @@ -0,0 +1,28 @@ +package cid + +import ( + mh "github.com/multiformats/go-multihash" +) + +// NewPrefixV0 returns a CIDv0 prefix with the specified multihash type. +// DEPRECATED: Use V0Builder +func NewPrefixV0(mhType uint64) Prefix { + return Prefix{ + MhType: mhType, + MhLength: mh.DefaultLengths[mhType], + Version: 0, + Codec: DagProtobuf, + } +} + +// NewPrefixV1 returns a CIDv1 prefix with the specified codec and multihash +// type. +// DEPRECATED: Use V1Builder +func NewPrefixV1(codecType uint64, mhType uint64) Prefix { + return Prefix{ + MhType: mhType, + MhLength: mh.DefaultLengths[mhType], + Version: 1, + Codec: codecType, + } +} diff --git a/fuzz-data/corpus/cid0 b/fuzz-data/corpus/cid0 new file mode 100644 index 0000000000000000000000000000000000000000..56fd786c0aa2a972305c18a891ba7909ec7a50cb --- /dev/null +++ b/fuzz-data/corpus/cid0 @@ -0,0 +1 @@ + gD1e-D/q3~(7`8n \ No newline at end of file diff --git a/fuzz-data/corpus/cid1 b/fuzz-data/corpus/cid1 new file mode 100644 index 0000000000000000000000000000000000000000..e0420b60163dbd7b4544a1108e1c63d56ac92280 --- /dev/null +++ b/fuzz-data/corpus/cid1 @@ -0,0 +1 @@ +q -[ïh[ (ΰ[)D \ No newline at end of file diff --git a/go.mod b/go.mod new file mode 100644 index 0000000000000000000000000000000000000000..68c91a19a35fc21aab2319de347f9f954327ca32 --- /dev/null +++ b/go.mod @@ -0,0 +1,9 @@ +module github.com/ipfs/go-cid + +require ( + github.com/multiformats/go-multibase v0.0.3 + github.com/multiformats/go-multihash v0.0.14 + github.com/multiformats/go-varint v0.0.6 +) + +go 1.13 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000000000000000000000000000000000000..b7d4586e107b8027890b837177a1ee00a328e4e1 --- /dev/null +++ b/go.sum @@ -0,0 +1,30 @@ +github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1 h1:lYpkrQH5ajf0OXOcUbGjvZxxijuBwbbmlSxLiuofa+g= +github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1/go.mod h1:pD8RvIylQ358TN4wwqatJ8rNavkEINozVn9DtGI3dfQ= +github.com/minio/sha256-simd v0.1.1-0.20190913151208-6de447530771 h1:MHkK1uRtFbVqvAgvWxafZe54+5uBxLluGylDiKgdhwo= +github.com/minio/sha256-simd v0.1.1-0.20190913151208-6de447530771/go.mod h1:B5e1o+1/KgNmWrSQK08Y6Z1Vb5pwIktudl0J58iy0KM= +github.com/mr-tron/base58 v1.1.0 h1:Y51FGVJ91WBqCEabAi5OPUz38eAx8DakuAm5svLcsfQ= +github.com/mr-tron/base58 v1.1.0/go.mod h1:xcD2VGqlgYjBdcBLw+TuYLr8afG+Hj8g2eTVqeSzSU8= +github.com/mr-tron/base58 v1.1.3 h1:v+sk57XuaCKGXpWtVBX8YJzO7hMGx4Aajh4TQbdEFdc= +github.com/mr-tron/base58 v1.1.3/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc= +github.com/multiformats/go-base32 v0.0.3 h1:tw5+NhuwaOjJCC5Pp82QuXbrmLzWg7uxlMFp8Nq/kkI= +github.com/multiformats/go-base32 v0.0.3/go.mod h1:pLiuGC8y0QR3Ue4Zug5UzK9LjgbkL8NSQj0zQ5Nz/AA= +github.com/multiformats/go-base36 v0.1.0 h1:JR6TyF7JjGd3m6FbLU2cOxhC0Li8z8dLNGQ89tUg4F4= +github.com/multiformats/go-base36 v0.1.0/go.mod h1:kFGE83c6s80PklsHO9sRn2NCoffoRdUUOENyW/Vv6sM= +github.com/multiformats/go-multibase v0.0.3 h1:l/B6bJDQjvQ5G52jw4QGSYeOTZoAwIO77RblWplfIqk= +github.com/multiformats/go-multibase v0.0.3/go.mod h1:5+1R4eQrT3PkYZ24C3W2Ue2tPwIdYQD509ZjSb5y9Oc= +github.com/multiformats/go-multihash v0.0.13 h1:06x+mk/zj1FoMsgNejLpy6QTvJqlSt/BhLEy87zidlc= +github.com/multiformats/go-multihash v0.0.13/go.mod h1:VdAWLKTwram9oKAatUcLxBNUjdtcVwxObEQBtRfuyjc= +github.com/multiformats/go-multihash v0.0.14/go.mod h1:VdAWLKTwram9oKAatUcLxBNUjdtcVwxObEQBtRfuyjc= +github.com/multiformats/go-varint v0.0.5 h1:XVZwSo04Cs3j/jS0uAEPpT3JY6DzMcVLLoWOSnCxOjg= +github.com/multiformats/go-varint v0.0.5/go.mod h1:3Ls8CIEsrijN6+B7PbrXRPxHRPuXSrVKRY101jdMZYE= +github.com/multiformats/go-varint v0.0.6/go.mod h1:3Ls8CIEsrijN6+B7PbrXRPxHRPuXSrVKRY101jdMZYE= +github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= +github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8 h1:1wopBVtVdWnn03fZelqdXTqk7U7zPQCb+T4rbU9ZEoU= +golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d h1:+R4KGOnez64A81RvjARKc4UT5/tI9ujCIVX+P5KiHuI= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/set.go b/set.go new file mode 100644 index 0000000000000000000000000000000000000000..eb3b3f0dc15bd40037e98937a0522bdb350ace8a --- /dev/null +++ b/set.go @@ -0,0 +1,65 @@ +package cid + +// Set is a implementation of a set of Cids, that is, a structure +// to which holds a single copy of every Cids that is added to it. +type Set struct { + set map[Cid]struct{} +} + +// NewSet initializes and returns a new Set. +func NewSet() *Set { + return &Set{set: make(map[Cid]struct{})} +} + +// Add puts a Cid in the Set. +func (s *Set) Add(c Cid) { + s.set[c] = struct{}{} +} + +// Has returns if the Set contains a given Cid. +func (s *Set) Has(c Cid) bool { + _, ok := s.set[c] + return ok +} + +// Remove deletes a Cid from the Set. +func (s *Set) Remove(c Cid) { + delete(s.set, c) +} + +// Len returns how many elements the Set has. +func (s *Set) Len() int { + return len(s.set) +} + +// Keys returns the Cids in the set. +func (s *Set) Keys() []Cid { + out := make([]Cid, 0, len(s.set)) + for k := range s.set { + out = append(out, k) + } + return out +} + +// Visit adds a Cid to the set only if it is +// not in it already. +func (s *Set) Visit(c Cid) bool { + if !s.Has(c) { + s.Add(c) + return true + } + + return false +} + +// ForEach allows to run a custom function on each +// Cid in the set. +func (s *Set) ForEach(f func(c Cid) error) error { + for c := range s.set { + err := f(c) + if err != nil { + return err + } + } + return nil +} diff --git a/set_test.go b/set_test.go new file mode 100644 index 0000000000000000000000000000000000000000..fa553d0c3645472048efeb6e3ca67f0a9a40f4a3 --- /dev/null +++ b/set_test.go @@ -0,0 +1,88 @@ +package cid + +import ( + "crypto/rand" + "errors" + "testing" + + mh "github.com/multiformats/go-multihash" +) + +func makeRandomCid(t *testing.T) Cid { + p := make([]byte, 256) + _, err := rand.Read(p) + if err != nil { + t.Fatal(err) + } + + h, err := mh.Sum(p, mh.SHA3, 4) + if err != nil { + t.Fatal(err) + } + + cid := NewCidV1(7, h) + + return cid +} + +func TestSet(t *testing.T) { + cid := makeRandomCid(t) + cid2 := makeRandomCid(t) + s := NewSet() + + s.Add(cid) + + if !s.Has(cid) { + t.Error("should have the CID") + } + + if s.Len() != 1 { + t.Error("should report 1 element") + } + + keys := s.Keys() + + if len(keys) != 1 || !keys[0].Equals(cid) { + t.Error("key should correspond to Cid") + } + + if s.Visit(cid) { + t.Error("visit should return false") + } + + foreach := []Cid{} + foreachF := func(c Cid) error { + foreach = append(foreach, c) + return nil + } + + if err := s.ForEach(foreachF); err != nil { + t.Error(err) + } + + if len(foreach) != 1 { + t.Error("ForEach should have visited 1 element") + } + + foreachErr := func(c Cid) error { + return errors.New("test") + } + + if err := s.ForEach(foreachErr); err == nil { + t.Error("Should have returned an error") + } + + if !s.Visit(cid2) { + t.Error("should have visited a new Cid") + } + + if s.Len() != 2 { + t.Error("len should be 2 now") + } + + s.Remove(cid2) + + if s.Len() != 1 { + t.Error("len should be 1 now") + } +} diff --git a/varint.go b/varint.go new file mode 100644 index 0000000000000000000000000000000000000000..e25c843d647f9d0260d2880742df6c607d6e0e19 --- /dev/null +++ b/varint.go @@ -0,0 +1,37 @@ +package cid + +import ( + "github.com/multiformats/go-varint" +) + +// Version of varint function that works with a string rather than +// []byte to avoid unnecessary allocation + +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license as given at https://golang.org/LICENSE + +// uvarint decodes a uint64 from buf and returns that value and the +// number of bytes read (> 0). If an error occurred, then 0 is +// returned for both the value and the number of bytes read, and an +// error is returned. +func uvarint(buf string) (uint64, int, error) { + var x uint64 + var s uint + // we have a binary string so we can't use a range loop + for i := 0; i < len(buf); i++ { + b := buf[i] + if b < 0x80 { + if i > 9 || i == 9 && b > 1 { + return 0, 0, varint.ErrOverflow + } + if b == 0 && i > 0 { + return 0, 0, varint.ErrNotMinimal + } + return x | uint64(b)<