cid.go 13.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
// Package cid implements the Content-IDentifiers specification
// (https://github.com/ipld/cid) in Go. CIDs are
// self-describing content-addressed identifiers useful for
// distributed information systems. CIDs are used in the IPFS
// (https://ipfs.io) project ecosystem.
//
// CIDs have two major versions. A CIDv0 corresponds to a multihash of type
// DagProtobuf, is deprecated and exists for compatibility reasons. Usually,
// CIDv1 should be used.
//
// A CIDv1 has four parts:
//
//     <cidv1> ::= <multibase-prefix><cid-version><multicodec-packed-content-type><multihash-content-address>
//
// As shown above, the CID implementation relies heavily on Multiformats,
// particularly Multibase
// (https://github.com/multiformats/go-multibase), Multicodec
// (https://github.com/multiformats/multicodec) and Multihash
// implementations (https://github.com/multiformats/go-multihash).
Jeromy's avatar
Jeromy committed
20 21 22
package cid

import (
23
	"bytes"
Jeromy's avatar
Jeromy committed
24
	"encoding/binary"
Jeromy's avatar
Jeromy committed
25
	"encoding/json"
Jakub Sztandera's avatar
Jakub Sztandera committed
26
	"errors"
Jeromy's avatar
Jeromy committed
27
	"fmt"
28
	"strings"
Jeromy's avatar
Jeromy committed
29

Jeromy's avatar
Jeromy committed
30
	mbase "github.com/multiformats/go-multibase"
Jeromy's avatar
Jeromy committed
31
	mh "github.com/multiformats/go-multihash"
Jeromy's avatar
Jeromy committed
32 33
)

34
// UnsupportedVersionString just holds an error message
Jeromy's avatar
Jeromy committed
35 36
const UnsupportedVersionString = "<unsupported cid version>"

37 38 39 40 41 42 43 44 45 46 47 48 49
var (
	// ErrVarintBuffSmall means that a buffer passed to the cid parser was not
	// long enough, or did not contain an invalid cid
	ErrVarintBuffSmall = errors.New("reading varint: buffer too small")

	// ErrVarintTooBig means that the varint in the given cid was above the
	// limit of 2^64
	ErrVarintTooBig = errors.New("reading varint: varint bigger than 64bits" +
		" and not supported")

	// ErrCidTooShort means that the cid passed to decode was not long
	// enough to be a valid Cid
	ErrCidTooShort = errors.New("cid too short")
Łukasz Magiera's avatar
Łukasz Magiera committed
50 51 52 53

	// ErrInvalidEncoding means that selected encoding is not supported
	// by this Cid version
	ErrInvalidEncoding = errors.New("invalid base encoding")
54 55
)

56 57 58
// These are multicodec-packed content types. The should match
// the codes described in the authoritative document:
// https://github.com/multiformats/multicodec/blob/master/table.csv
59
const (
Jeromy's avatar
Jeromy committed
60 61 62 63
	Raw = 0x55

	DagProtobuf = 0x70
	DagCBOR     = 0x71
Jeromy's avatar
Jeromy committed
64

Łukasz Magiera's avatar
Łukasz Magiera committed
65 66
	GitRaw = 0x78

Jeromy's avatar
Jeromy committed
67 68 69 70 71 72 73 74 75 76 77 78 79
	EthBlock           = 0x90
	EthBlockList       = 0x91
	EthTxTrie          = 0x92
	EthTx              = 0x93
	EthTxReceiptTrie   = 0x94
	EthTxReceipt       = 0x95
	EthStateTrie       = 0x96
	EthAccountSnapshot = 0x97
	EthStorageTrie     = 0x98
	BitcoinBlock       = 0xb0
	BitcoinTx          = 0xb1
	ZcashBlock         = 0xc0
	ZcashTx            = 0xc1
Hector Sanjuan's avatar
Hector Sanjuan committed
80 81
	DecredBlock        = 0xe0
	DecredTx           = 0xe1
82 83
)

Herman Junge's avatar
Herman Junge committed
84
// Codecs maps the name of a codec to its type
Herman Junge's avatar
Herman Junge committed
85
var Codecs = map[string]uint64{
Herman Junge's avatar
Herman Junge committed
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
	"v0":                   DagProtobuf,
	"raw":                  Raw,
	"protobuf":             DagProtobuf,
	"cbor":                 DagCBOR,
	"git-raw":              GitRaw,
	"eth-block":            EthBlock,
	"eth-block-list":       EthBlockList,
	"eth-tx-trie":          EthTxTrie,
	"eth-tx":               EthTx,
	"eth-tx-receipt-trie":  EthTxReceiptTrie,
	"eth-tx-receipt":       EthTxReceipt,
	"eth-state-trie":       EthStateTrie,
	"eth-account-snapshot": EthAccountSnapshot,
	"eth-storage-trie":     EthStorageTrie,
	"bitcoin-block":        BitcoinBlock,
	"bitcoin-tx":           BitcoinTx,
	"zcash-block":          ZcashBlock,
	"zcash-tx":             ZcashTx,
Hector Sanjuan's avatar
Hector Sanjuan committed
104 105
	"decred-block":         DecredBlock,
	"decred-tx":            DecredTx,
Herman Junge's avatar
Herman Junge committed
106 107
}

108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
// CodecToStr maps the numeric codec to its name
var CodecToStr = map[uint64]string{
	Raw:                "raw",
	DagProtobuf:        "protobuf",
	DagCBOR:            "cbor",
	GitRaw:             "git-raw",
	EthBlock:           "eth-block",
	EthBlockList:       "eth-block-list",
	EthTxTrie:          "eth-tx-trie",
	EthTx:              "eth-tx",
	EthTxReceiptTrie:   "eth-tx-receipt-trie",
	EthTxReceipt:       "eth-tx-receipt",
	EthStateTrie:       "eth-state-trie",
	EthAccountSnapshot: "eth-account-snapshot",
	EthStorageTrie:     "eth-storage-trie",
	BitcoinBlock:       "bitcoin-block",
	BitcoinTx:          "bitcoin-tx",
	ZcashBlock:         "zcash-block",
	ZcashTx:            "zcash-tx",
Hector Sanjuan's avatar
Hector Sanjuan committed
127 128
	DecredBlock:        "decred-block",
	DecredTx:           "decred-tx",
129 130
}

131 132 133 134 135
// NewCidV0 returns a Cid-wrapped multihash.
// They exist to allow IPFS to work with Cids while keeping
// compatibility with the plain-multihash format used used in IPFS.
// NewCidV1 should be used preferentially.
func NewCidV0(mhash mh.Multihash) *Cid {
136 137
	return &Cid{
		version: 0,
Jeromy's avatar
Jeromy committed
138
		codec:   DagProtobuf,
139
		hash:    mhash,
140 141 142
	}
}

143 144 145
// NewCidV1 returns a new Cid using the given multicodec-packed
// content type.
func NewCidV1(codecType uint64, mhash mh.Multihash) *Cid {
146 147
	return &Cid{
		version: 1,
148 149
		codec:   codecType,
		hash:    mhash,
150 151 152
	}
}

153 154 155
// Cid represents a self-describing content adressed
// identifier. It is formed by a Version, a Codec (which indicates
// a multicodec-packed content type) and a Multihash.
Jeromy's avatar
Jeromy committed
156
type Cid struct {
157 158 159
	version uint64
	codec   uint64
	hash    mh.Multihash
Jeromy's avatar
Jeromy committed
160 161
}

162 163
// Parse is a short-hand function to perform Decode, Cast etc... on
// a generic interface{} type.
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
func Parse(v interface{}) (*Cid, error) {
	switch v2 := v.(type) {
	case string:
		if strings.Contains(v2, "/ipfs/") {
			return Decode(strings.Split(v2, "/ipfs/")[1])
		}
		return Decode(v2)
	case []byte:
		return Cast(v2)
	case mh.Multihash:
		return NewCidV0(v2), nil
	case *Cid:
		return v2, nil
	default:
		return nil, fmt.Errorf("can't parse %+v as Cid", v2)
	}
}

182 183 184 185 186 187 188 189 190 191 192 193
// Decode parses a Cid-encoded string and returns a Cid object.
// For CidV1, a Cid-encoded string is primarily a multibase string:
//
//     <multibase-type-code><base-encoded-string>
//
// The base-encoded string represents a:
//
// <version><codec-type><multihash>
//
// Decode will also detect and parse CidV0 strings. Strings
// starting with "Qm" are considered CidV0 and treated directly
// as B58-encoded multihashes.
Jeromy's avatar
Jeromy committed
194
func Decode(v string) (*Cid, error) {
Jeromy's avatar
Jeromy committed
195
	if len(v) < 2 {
196
		return nil, ErrCidTooShort
Jeromy's avatar
Jeromy committed
197 198
	}

Jeromy's avatar
Jeromy committed
199 200 201 202 203 204
	if len(v) == 46 && v[:2] == "Qm" {
		hash, err := mh.FromB58String(v)
		if err != nil {
			return nil, err
		}

205
		return NewCidV0(hash), nil
Jeromy's avatar
Jeromy committed
206 207
	}

Jeromy's avatar
Jeromy committed
208 209 210 211 212 213 214 215
	_, data, err := mbase.Decode(v)
	if err != nil {
		return nil, err
	}

	return Cast(data)
}

216 217 218 219 220
// Extract the encoding from a Cid.  If Decode on the same string did
// not return an error neither will this function.
func ExtractEncoding(v string) (mbase.Encoding, error) {
	if len(v) < 2 {
		return -1, ErrCidTooShort
Jeromy's avatar
Jeromy committed
221 222
	}

223 224 225 226 227 228 229 230 231 232 233
	if len(v) == 46 && v[:2] == "Qm" {
		return mbase.Base58BTC, nil
	}

	encoding := mbase.Encoding(v[0])

	// check encoding is valid
	_, err := mbase.NewEncoder(encoding)
	if err != nil {
		return -1, err
	}
234

235
	return encoding, nil
Jeromy's avatar
Jeromy committed
236 237
}

Jakub Sztandera's avatar
Jakub Sztandera committed
238 239 240 241 242 243 244 245 246 247 248
func uvError(read int) error {
	switch {
	case read == 0:
		return ErrVarintBuffSmall
	case read < 0:
		return ErrVarintTooBig
	default:
		return nil
	}
}

249 250 251 252 253 254 255 256 257 258 259
// Cast takes a Cid data slice, parses it and returns a Cid.
// For CidV1, the data buffer is in the form:
//
//     <version><codec-type><multihash>
//
// CidV0 are also supported. In particular, data buffers starting
// with length 34 bytes, which starts with bytes [18,32...] are considered
// binary multihashes.
//
// Please use decode when parsing a regular Cid string, as Cast does not
// expect multibase-encoded data. Cast accepts the output of Cid.Bytes().
Jeromy's avatar
Jeromy committed
260
func Cast(data []byte) (*Cid, error) {
261 262 263 264 265 266 267
	if len(data) == 34 && data[0] == 18 && data[1] == 32 {
		h, err := mh.Cast(data)
		if err != nil {
			return nil, err
		}

		return &Cid{
Jeromy's avatar
Jeromy committed
268
			codec:   DagProtobuf,
269 270 271 272 273
			version: 0,
			hash:    h,
		}, nil
	}

Jeromy's avatar
Jeromy committed
274
	vers, n := binary.Uvarint(data)
Jakub Sztandera's avatar
Jakub Sztandera committed
275 276 277 278
	if err := uvError(n); err != nil {
		return nil, err
	}

279 280 281 282
	if vers != 0 && vers != 1 {
		return nil, fmt.Errorf("invalid cid version number: %d", vers)
	}

Jeromy's avatar
Jeromy committed
283
	codec, cn := binary.Uvarint(data[n:])
Jakub Sztandera's avatar
Jakub Sztandera committed
284 285 286
	if err := uvError(cn); err != nil {
		return nil, err
	}
Jeromy's avatar
Jeromy committed
287 288 289 290 291 292 293 294

	rest := data[n+cn:]
	h, err := mh.Cast(rest)
	if err != nil {
		return nil, err
	}

	return &Cid{
295 296 297
		version: vers,
		codec:   codec,
		hash:    h,
Jeromy's avatar
Jeromy committed
298 299 300
	}, nil
}

301
// Type returns the multicodec-packed content type of a Cid.
302 303 304 305
func (c *Cid) Type() uint64 {
	return c.codec
}

306 307 308
// String returns the default string representation of a
// Cid. Currently, Base58 is used as the encoding for the
// multibase string.
Jeromy's avatar
Jeromy committed
309
func (c *Cid) String() string {
310
	switch c.version {
Jeromy's avatar
Jeromy committed
311
	case 0:
312
		return c.hash.B58String()
Jeromy's avatar
Jeromy committed
313 314 315 316 317 318 319 320
	case 1:
		mbstr, err := mbase.Encode(mbase.Base58BTC, c.bytesV1())
		if err != nil {
			panic("should not error with hardcoded mbase: " + err.Error())
		}

		return mbstr
	default:
321
		panic("not possible to reach this point")
Jeromy's avatar
Jeromy committed
322 323 324
	}
}

Łukasz Magiera's avatar
Łukasz Magiera committed
325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
// String returns the string representation of a Cid
// encoded is selected base
func (c *Cid) StringOfBase(base mbase.Encoding) (string, error) {
	switch c.version {
	case 0:
		if base != mbase.Base58BTC {
			return "", ErrInvalidEncoding
		}
		return c.hash.B58String(), nil
	case 1:
		return mbase.Encode(base, c.bytesV1())
	default:
		panic("not possible to reach this point")
	}
}

341 342 343
// Encode return the string representation of a Cid in a given base
// when applicable
func (c *Cid) Encode(base mbase.Encoder) string {
344 345 346 347 348 349 350 351 352 353
	switch c.version {
	case 0:
		return c.hash.B58String()
	case 1:
		return base.Encode(c.bytesV1())
	default:
		panic("not possible to reach this point")
	}
}

354
// Hash returns the multihash contained by a Cid.
355 356 357 358
func (c *Cid) Hash() mh.Multihash {
	return c.hash
}

359 360 361
// Bytes returns the byte representation of a Cid.
// The output of bytes can be parsed back into a Cid
// with Cast().
362 363
func (c *Cid) Bytes() []byte {
	switch c.version {
Jeromy's avatar
Jeromy committed
364
	case 0:
365
		return c.bytesV0()
Jeromy's avatar
Jeromy committed
366
	case 1:
367
		return c.bytesV1()
Jeromy's avatar
Jeromy committed
368
	default:
369
		panic("not possible to reach this point")
Jeromy's avatar
Jeromy committed
370 371 372 373
	}
}

func (c *Cid) bytesV0() []byte {
374
	return []byte(c.hash)
Jeromy's avatar
Jeromy committed
375 376 377
}

func (c *Cid) bytesV1() []byte {
378
	// two 8 bytes (max) numbers plus hash
379
	buf := make([]byte, 2*binary.MaxVarintLen64+len(c.hash))
380 381
	n := binary.PutUvarint(buf, c.version)
	n += binary.PutUvarint(buf[n:], c.codec)
382 383 384 385
	cn := copy(buf[n:], c.hash)
	if cn != len(c.hash) {
		panic("copy hash length is inconsistent")
	}
386 387 388

	return buf[:n+len(c.hash)]
}
Jeromy's avatar
Jeromy committed
389

390 391 392
// Equals checks that two Cids are the same.
// In order for two Cids to be considered equal, the
// Version, the Codec and the Multihash must match.
393 394 395 396
func (c *Cid) Equals(o *Cid) bool {
	return c.codec == o.codec &&
		c.version == o.version &&
		bytes.Equal(c.hash, o.hash)
Jeromy's avatar
Jeromy committed
397
}
398

399
// UnmarshalJSON parses the JSON representation of a Cid.
400 401 402 403
func (c *Cid) UnmarshalJSON(b []byte) error {
	if len(b) < 2 {
		return fmt.Errorf("invalid cid json blob")
	}
Jeromy's avatar
Jeromy committed
404 405 406 407 408 409 410 411 412 413 414 415 416
	obj := struct {
		CidTarget string `json:"/"`
	}{}
	err := json.Unmarshal(b, &obj)
	if err != nil {
		return err
	}

	if obj.CidTarget == "" {
		return fmt.Errorf("cid was incorrectly formatted")
	}

	out, err := Decode(obj.CidTarget)
417 418 419 420 421 422 423 424 425 426
	if err != nil {
		return err
	}

	c.version = out.version
	c.hash = out.hash
	c.codec = out.codec
	return nil
}

427 428 429 430 431 432
// MarshalJSON procudes a JSON representation of a Cid, which looks as follows:
//
//    { "/": "<cid-string>" }
//
// Note that this formatting comes from the IPLD specification
// (https://github.com/ipld/specs/tree/master/ipld)
Fritz Schneider's avatar
Fritz Schneider committed
433
func (c Cid) MarshalJSON() ([]byte, error) {
Jeromy's avatar
Jeromy committed
434
	return []byte(fmt.Sprintf("{\"/\":\"%s\"}", c.String())), nil
435
}
Jeromy's avatar
Jeromy committed
436

437
// KeyString casts the result of cid.Bytes() as a string, and returns it.
Jeromy's avatar
Jeromy committed
438 439 440
func (c *Cid) KeyString() string {
	return string(c.Bytes())
}
Jeromy's avatar
Jeromy committed
441

442 443
// Loggable returns a Loggable (as defined by
// https://godoc.org/github.com/ipfs/go-log).
Jeromy's avatar
Jeromy committed
444 445 446 447 448
func (c *Cid) Loggable() map[string]interface{} {
	return map[string]interface{}{
		"cid": c,
	}
}
449

450
// Prefix builds and returns a Prefix out of a Cid.
451 452 453 454 455 456 457 458 459 460
func (c *Cid) Prefix() Prefix {
	dec, _ := mh.Decode(c.hash) // assuming we got a valid multiaddr, this will not error
	return Prefix{
		MhType:   dec.Code,
		MhLength: dec.Length,
		Version:  c.version,
		Codec:    c.codec,
	}
}

461 462 463 464
// Prefix represents all the metadata of a Cid,
// that is, the Version, the Codec, the Multihash type
// and the Multihash length. It does not contains
// any actual content information.
465
// NOTE: The use -1 in MhLength to mean default length is deprecated,
Kevin Atkinson's avatar
Kevin Atkinson committed
466
//   use the V0Builder or V1Builder structures instead
467 468 469
type Prefix struct {
	Version  uint64
	Codec    uint64
Jeromy's avatar
Jeromy committed
470
	MhType   uint64
471 472 473
	MhLength int
}

474 475
// Sum uses the information in a prefix to perform a multihash.Sum()
// and return a newly constructed Cid with the resulting multihash.
476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491
func (p Prefix) Sum(data []byte) (*Cid, error) {
	hash, err := mh.Sum(data, p.MhType, p.MhLength)
	if err != nil {
		return nil, err
	}

	switch p.Version {
	case 0:
		return NewCidV0(hash), nil
	case 1:
		return NewCidV1(p.Codec, hash), nil
	default:
		return nil, fmt.Errorf("invalid cid version")
	}
}

492 493 494
// Bytes returns a byte representation of a Prefix. It looks like:
//
//     <version><codec><mh-type><mh-length>
495
func (p Prefix) Bytes() []byte {
Jakub Sztandera's avatar
Jakub Sztandera committed
496
	buf := make([]byte, 4*binary.MaxVarintLen64)
497 498 499 500 501 502 503
	n := binary.PutUvarint(buf, p.Version)
	n += binary.PutUvarint(buf[n:], p.Codec)
	n += binary.PutUvarint(buf[n:], uint64(p.MhType))
	n += binary.PutUvarint(buf[n:], uint64(p.MhLength))
	return buf[:n]
}

504 505
// PrefixFromBytes parses a Prefix-byte representation onto a
// Prefix.
506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530
func PrefixFromBytes(buf []byte) (Prefix, error) {
	r := bytes.NewReader(buf)
	vers, err := binary.ReadUvarint(r)
	if err != nil {
		return Prefix{}, err
	}

	codec, err := binary.ReadUvarint(r)
	if err != nil {
		return Prefix{}, err
	}

	mhtype, err := binary.ReadUvarint(r)
	if err != nil {
		return Prefix{}, err
	}

	mhlen, err := binary.ReadUvarint(r)
	if err != nil {
		return Prefix{}, err
	}

	return Prefix{
		Version:  vers,
		Codec:    codec,
Jeromy's avatar
Jeromy committed
531
		MhType:   mhtype,
532 533 534
		MhLength: int(mhlen),
	}, nil
}