Commit 8fef5312 authored by Eric Myhre's avatar Eric Myhre

Extract multi{codec,hash} registries better.

And, make a package which can be imported to register "all" of the
multihashes.  (Or at least all of them that you would've expected
from go-multihash.)

There are also packages that are split roughly per the transitive
dependency it brings in, so you can pick and choose.

This cascaded into more work than I might've expected.
Turns out a handful of the things we have multihash identifiers for
actually *do not* implement the standard hash.Hash contract at all.
For these, I've made small shims.

Test fixtures across the library switch to using sha2-512.
Previously I had written a bunch of them to use sha3 variants,
but since that is not in the standard library, I'm going to move away
from that so as not to re-bloat the transitive dependency tree
just for the tests and examples.
parent a1482fe2
......@@ -6,7 +6,7 @@ import (
"github.com/polydawn/refmt/cbor"
"github.com/ipld/go-ipld-prime"
"github.com/ipld/go-ipld-prime/codec"
"github.com/ipld/go-ipld-prime/multicodec"
)
var (
......@@ -15,8 +15,8 @@ var (
)
func init() {
codec.MulticodecEncoderRegistry[0x71] = Encode
codec.MulticodecDecoderRegistry[0x71] = Decode
multicodec.EncoderRegistry[0x71] = Encode
multicodec.DecoderRegistry[0x71] = Decode
}
func Decode(na ipld.NodeAssembler, r io.Reader) error {
......
......@@ -17,7 +17,7 @@ func TestRoundtripCidlink(t *testing.T) {
lp := cidlink.LinkPrototype{cid.Prefix{
Version: 1,
Codec: 0x71,
MhType: 0x17,
MhType: 0x13,
MhLength: 4,
}}
lsys := cidlink.DefaultLinkSystem()
......
......@@ -71,7 +71,7 @@ func TestRoundtripLinksAndBytes(t *testing.T) {
lnk := cidlink.LinkPrototype{cid.Prefix{
Version: 1,
Codec: 0x71,
MhType: 0x17,
MhType: 0x13,
MhLength: 4,
}}.BuildLink([]byte{1, 2, 3, 4}) // dummy value, content does not matter to this test.
......
......@@ -7,7 +7,7 @@ import (
"github.com/polydawn/refmt/json"
"github.com/ipld/go-ipld-prime"
"github.com/ipld/go-ipld-prime/codec"
"github.com/ipld/go-ipld-prime/multicodec"
)
var (
......@@ -16,8 +16,8 @@ var (
)
func init() {
codec.MulticodecEncoderRegistry[0x0129] = Encode
codec.MulticodecDecoderRegistry[0x0129] = Decode
multicodec.EncoderRegistry[0x0129] = Encode
multicodec.DecoderRegistry[0x0129] = Decode
}
func Decode(na ipld.NodeAssembler, r io.Reader) error {
......
......@@ -18,7 +18,7 @@ func TestRoundtripCidlink(t *testing.T) {
lp := cidlink.LinkPrototype{cid.Prefix{
Version: 1,
Codec: 0x0129,
MhType: 0x17,
MhType: 0x13,
MhLength: 4,
}}
lsys := cidlink.DefaultLinkSystem()
......@@ -48,7 +48,7 @@ func TestUnmarshalTrickyMapContainingLink(t *testing.T) {
lnk := cidlink.LinkPrototype{cid.Prefix{
Version: 1,
Codec: 0x71,
MhType: 0x17,
MhType: 0x13,
MhLength: 4,
}}.BuildLink([]byte{1, 2, 3, 4}) // dummy value, content does not matter to this test.
......
......@@ -5,7 +5,8 @@ import (
"hash"
"github.com/ipld/go-ipld-prime"
"github.com/ipld/go-ipld-prime/codec"
"github.com/ipld/go-ipld-prime/multicodec"
"github.com/ipld/go-ipld-prime/multihash"
)
func DefaultLinkSystem() ipld.LinkSystem {
......@@ -13,7 +14,7 @@ func DefaultLinkSystem() ipld.LinkSystem {
EncoderChooser: func(lp ipld.LinkPrototype) (ipld.Encoder, error) {
switch lp2 := lp.(type) {
case LinkPrototype:
fn, ok := codec.MulticodecEncoderRegistry[lp2.GetCodec()]
fn, ok := multicodec.EncoderRegistry[lp2.GetCodec()]
if !ok {
return nil, fmt.Errorf("no encoder registered for multicodec indicator 0x%x", lp2.GetCodec())
}
......@@ -26,7 +27,7 @@ func DefaultLinkSystem() ipld.LinkSystem {
lp := lnk.Prototype()
switch lp2 := lp.(type) {
case LinkPrototype:
fn, ok := codec.MulticodecDecoderRegistry[lp2.GetCodec()]
fn, ok := multicodec.DecoderRegistry[lp2.GetCodec()]
if !ok {
return nil, fmt.Errorf("no decoder registered for multicodec indicator 0x%x", lp2.GetCodec())
}
......@@ -38,7 +39,7 @@ func DefaultLinkSystem() ipld.LinkSystem {
HasherChooser: func(lp ipld.LinkPrototype) (hash.Hash, error) {
switch lp2 := lp.(type) {
case LinkPrototype:
fn, ok := codec.MultihashRegistry[lp2.MhType]
fn, ok := multihash.Registry[lp2.MhType]
if !ok {
return nil, fmt.Errorf("no hasher registered for multihash indicator 0x%x", lp2.MhType)
}
......
......@@ -46,8 +46,8 @@ func ExampleStoringLink() {
lp := cidlink.LinkPrototype{cid.Prefix{
Version: 1, // Usually '1'.
Codec: 0x71, // 0x71 means "dag-cbor" -- See the multicodecs table: https://github.com/multiformats/multicodec/
MhType: 0x15, // 0x15 means "sha3-384" -- See the multicodecs table: https://github.com/multiformats/multicodec/
MhLength: 48, // sha3-384 hash has a 48-byte sum.
MhType: 0x13, // 0x20 means "sha2-512" -- See the multicodecs table: https://github.com/multiformats/multicodec/
MhLength: 64, // sha2-512 hash has a 64-byte sum.
}}
// And we need some data to link to! Here's a quick piece of example data:
......@@ -85,13 +85,13 @@ func ExampleStoringLink() {
// We'll pick this data back up again in the example for loading.
// Output:
// link: bafyrkmbukvrgzcs6qlsh4wvkvbe5wp7sclcblfnapnb2xfznisbykpbnlocet2qzley3cpxofoxqrnqgm3ta
// link: bafyrgqhai26anf3i7pips7q22coa4sz2fr4gk4q4sqdtymvvjyginfzaqewveaeqdh524nsktaq43j65v22xxrybrtertmcfxufdam3da3hbk
// concrete type: `cidlink.Link`
}
func ExampleLoadingLink() {
// Let's say we want to load this link (it's the same one we just created in the example above).
cid, _ := cid.Decode("bafyrkmbukvrgzcs6qlsh4wvkvbe5wp7sclcblfnapnb2xfznisbykpbnlocet2qzley3cpxofoxqrnqgm3ta")
cid, _ := cid.Decode("bafyrgqhai26anf3i7pips7q22coa4sz2fr4gk4q4sqdtymvvjyginfzaqewveaeqdh524nsktaq43j65v22xxrybrtertmcfxufdam3da3hbk")
lnk := cidlink.Link{cid}
// Let's get a LinkSystem. We're going to be working with CID links,
......
package codec
package multicodec
import (
"crypto/md5"
"crypto/sha1"
"crypto/sha256"
"crypto/sha512"
"hash"
"golang.org/x/crypto/sha3"
"github.com/ipld/go-ipld-prime"
)
// MulticodecEncoderRegistry is a simple map which maps a multicodec indicator number
// EncoderRegistry is a simple map which maps a multicodec indicator number
// to an ipld.Encoder function.
//
// Packages which implement an IPLD codec and have a multicodec number reserved in
......@@ -29,9 +21,9 @@ import (
// If more than one package registers for the same multicodec indicator, and
// you somehow end up with both in your import tree, and yet care about which wins:
// then just don't use this registry anymore: make a LinkSystem that does what you need.)
var MulticodecEncoderRegistry = make(map[uint64]ipld.Encoder)
var EncoderRegistry = make(map[uint64]ipld.Encoder)
// MulticodecDecoderRegistry is a simple map which maps a multicodec indicator number
// DecoderRegistry is a simple map which maps a multicodec indicator number
// to an ipld.Decoder function.
//
// Packages which implement an IPLD codec and have a multicodec number reserved in
......@@ -48,37 +40,4 @@ var MulticodecEncoderRegistry = make(map[uint64]ipld.Encoder)
// If more than one package registers for the same multicodec indicator, and
// you somehow end up with both in your import tree, and yet care about which wins:
// then just don't use this registry anymore: make a LinkSystem that does what you need.)
var MulticodecDecoderRegistry = make(map[uint64]ipld.Decoder)
// MultihashRegistry is a simple map which maps a multihash indicator number
// to a standard golang Hash interface.
//
// Hashers which are available in the golang stdlib are registered here automatically.
// Some hashes from x/crypto are also included out-of-the-box.
//
// Packages which want to register more hashing functions and have a multihash number reserved in
// https://github.com/multiformats/multicodec/blob/master/table.csv
// are encouraged to do so at package init time.
// (Doing this at package init time ensures this map can be accessed without race conditions.)
//
// The linking/cid.DefaultLinkSystem will use this map to find decoders
// to use when deserializing data from storage.
//
// This registry map is only used for default behaviors.
// If you don't want to rely on it, you can always construct your own LinkSystem.
// (For this reason, there's no special effort made to detect conflicting registrations in this map.
// If more than one package registers for the same multicodec indicator, and
// you somehow end up with both in your import tree, and yet care about which wins:
// then just don't use this registry anymore: make a LinkSystem that does what you need.)
var MultihashRegistry = make(map[uint64]func() hash.Hash)
func init() {
MultihashRegistry[0xd5] = md5.New
MultihashRegistry[0x11] = sha1.New
MultihashRegistry[0x12] = sha256.New
MultihashRegistry[0x13] = sha512.New
MultihashRegistry[0x14] = sha3.New512
MultihashRegistry[0x15] = sha3.New384
MultihashRegistry[0x16] = sha3.New256
MultihashRegistry[0x17] = sha3.New224
}
var DecoderRegistry = make(map[uint64]ipld.Decoder)
package multihash
import (
"bytes"
"crypto/sha256"
"hash"
)
type identityMultihash struct {
bytes.Buffer
}
func (identityMultihash) BlockSize() int {
return 32 // A prefered block size is nonsense for the "identity" "hash". An arbitrary but unsurprising and positive nonzero number has been chosen to minimize the odds of fascinating bugs.
}
func (x identityMultihash) Size() int {
return x.Len()
}
func (x identityMultihash) Sum(digest []byte) []byte {
return x.Bytes()
}
type doubleSha256 struct {
main hash.Hash
}
func (x doubleSha256) Write(body []byte) (int, error) {
return x.main.Write(body)
}
func (doubleSha256) BlockSize() int {
return sha256.BlockSize
}
func (doubleSha256) Size() int {
return sha256.Size
}
func (x doubleSha256) Reset() {
x.main.Reset()
}
func (x doubleSha256) Sum(digest []byte) []byte {
intermediate := [sha256.Size]byte{}
x.main.Sum(intermediate[:])
h2 := sha256.New()
h2.Write(intermediate[:])
return h2.Sum(digest)
}
package multihash
import (
"crypto/md5"
"crypto/sha1"
"crypto/sha256"
"crypto/sha512"
"hash"
)
// Registry is a simple map which maps a multihash indicator number
// to a standard golang Hash interface.
//
// Multihash indicator numbers are reserved and described in
// https://github.com/multiformats/multicodec/blob/master/table.csv .
// The keys used in this map must match those reservations.
//
// Hashers which are available in the golang stdlib are registered here automatically.
//
// Packages which want to register more hashing functions (and have a multihash number reserved!)
// are encouraged to do so at package init time.
// (Doing this at package init time ensures this map can be accessed without race conditions.)
//
// The linking/cid.DefaultLinkSystem will use this map to find hashers
// to use when serializing data and computing links,
// and when loading data from storage and verifying its integrity.
//
// This registry map is only used for default behaviors.
// If you don't want to rely on it, you can always construct your own LinkSystem.
// (For this reason, there's no special effort made to detect conflicting registrations in this map.
// If more than one package registers for the same multicodec indicator, and
// you somehow end up with both in your import tree, and yet care about which wins:
// then just don't use this registry anymore: make a LinkSystem that does what you need.)
// This should never be done to make behavior alterations
// (hash functions are well standardized and so is the multihash indicator table),
// but may be relevant if one is really itching to try out different hash implementations for performance reasons.
var Registry = make(map[uint64]func() hash.Hash)
func init() {
Registry[0x00] = func() hash.Hash { return &identityMultihash{} }
Registry[0xd5] = md5.New
Registry[0x11] = sha1.New
Registry[0x12] = sha256.New
Registry[0x13] = sha512.New
// Registry[0x1f] = sha256.New224 // SOON
// Registry[0x20] = sha512.New384 // SOON
Registry[0x56] = func() hash.Hash { return &doubleSha256{} }
}
/*
This package has no purpose except to perform registration of mulithashes.
It is meant to be used as a side-effecting import, e.g.
import (
_ "github.com/ipld/go-ipld-prime/mulithash/register/all"
)
This package registers many multihashes at once.
Importing it will increase the size of your dependency tree significantly.
It's recommended that you import this package if you're building some
kind of data broker application, which may need to handle many different kinds of hashes;
if you're building an application which you know only handles a specific hash,
importing this package may bloat your builds unnecessarily.
*/
package all
import (
_ "github.com/ipld/go-ipld-prime/multihash/register/blake2"
_ "github.com/ipld/go-ipld-prime/multihash/register/murmur3"
_ "github.com/ipld/go-ipld-prime/multihash/register/sha3"
)
/*
This package has no purpose except to perform registration of multihashes.
It is meant to be used as a side-effecting import, e.g.
import (
_ "github.com/ipld/go-ipld-prime/mulithash/register/blake2"
)
This package registers several multihashes for the blake2 family
(both the 's' and the 'b' variants, and in a variety of sizes).
*/
package blake2
import (
"hash"
"github.com/minio/blake2b-simd"
"golang.org/x/crypto/blake2s"
"github.com/ipld/go-ipld-prime/multihash"
)
const (
BLAKE2B_MIN = 0xb201
BLAKE2B_MAX = 0xb240
BLAKE2S_MIN = 0xb241
BLAKE2S_MAX = 0xb260
)
func init() {
// BLAKE2S
// This package only enables support for 32byte (256 bit) blake2s.
multihash.Registry[BLAKE2S_MIN+31] = func() hash.Hash { h, _ := blake2s.New256(nil); return h }
// BLAKE2B
// There's a whole range of these.
for c := uint64(BLAKE2B_MIN); c <= BLAKE2B_MAX; c++ {
size := int(c - BLAKE2B_MIN + 1)
multihash.Registry[c] = func() hash.Hash {
hasher, err := blake2b.New(&blake2b.Config{Size: uint8(size)})
if err != nil {
panic(err)
}
return hasher
}
}
}
/*
This package has no purpose except to perform registration of multihashes.
It is meant to be used as a side-effecting import, e.g.
import (
_ "github.com/ipld/go-ipld-prime/mulithash/register/murmur3"
)
This package registers multihashes for the murmur3 family.
*/
package murmur3
// import (
// "github.com/gxed/hashland/murmur3"
//
// "github.com/ipld/go-ipld-prime/multihash"
// )
func init() {
// REVIEW: what go-multihash has done historically is New32, but this doesn't match what the multihash table says, which is 128!
// These are also very clearly noncryptographic functions and not suitable for content-addressing use (and would require writing adapters to qualify for hash.Hash), so I'm opting to... not.
// multihash.Registry[0x22] = murmur3.New32
}
/*
This package has no purpose except to perform registration of multihashes.
It is meant to be used as a side-effecting import, e.g.
import (
_ "github.com/ipld/go-ipld-prime/mulithash/register/sha3"
)
This package registers several multihashes for the sha3 family.
This also includes some functions known as "shake" and "keccak",
since they share much of their implementation and come in the same repos.
*/
package sha3
import (
"hash"
"golang.org/x/crypto/sha3"
"github.com/ipld/go-ipld-prime/multihash"
)
func init() {
multihash.Registry[0x14] = sha3.New512
multihash.Registry[0x15] = sha3.New384
multihash.Registry[0x16] = sha3.New256
multihash.Registry[0x17] = sha3.New224
multihash.Registry[0x18] = func() hash.Hash { return shakeNormalizer{sha3.NewShake128(), 128 / 8} }
multihash.Registry[0x19] = func() hash.Hash { return shakeNormalizer{sha3.NewShake256(), 256 / 8} }
multihash.Registry[0x1B] = sha3.NewLegacyKeccak256
multihash.Registry[0x1D] = sha3.NewLegacyKeccak512
}
// sha3.ShakeHash presents a somewhat odd interface, and requires a wrapper to normalize it to the usual hash.Hash interface.
//
// Some of the fiddly bits required by this normalization probably makes it undesirable for use in the highest performance applications;
// There's at least one extra allocation in constructing it (sha3.ShakeHash is an interface, so that's one heap escape; and there's a second heap escape when this normalizer struct gets boxed into a hash.Hash interface),
// and there's at least one extra allocation in getting a sum out of it (because reading a shake hash is a mutation (!) and the API only provides cloning as a way to escape this).
// Fun.
type shakeNormalizer struct {
sha3.ShakeHash
size int
}
func (shakeNormalizer) BlockSize() int {
return 32 // Shake doesn't have a prefered block size, apparently. An arbitrary but unsurprising and positive nonzero number has been chosen to minimize the odds of fascinating bugs.
}
func (x shakeNormalizer) Size() int {
return x.size
}
func (x shakeNormalizer) Sum(digest []byte) []byte {
if len(digest) != x.size {
digest = make([]byte, x.size)
}
h2 := x.Clone() // clone it, because reading mutates this kind of hash (!) which is not the standard contract for a Hash.Sum method.
h2.Read(digest) // not capable of underreading. See sha3.ShakeSum256 for similar usage.
return digest
}
......@@ -53,7 +53,7 @@ func encode(n ipld.Node) (ipld.Node, ipld.Link) {
lp := cidlink.LinkPrototype{cid.Prefix{
Version: 1,
Codec: 0x0129,
MhType: 0x17,
MhType: 0x13,
MhLength: 4,
}}
lsys := cidlink.DefaultLinkSystem()
......@@ -324,7 +324,7 @@ func TestFocusedTransformWithLinks(t *testing.T) {
Wish(t, progress.Path.String(), ShouldEqual, "linkedMap/nested/nonlink")
Wish(t, must.String(prev), ShouldEqual, "zoo")
Wish(t, progress.LastBlock.Path.String(), ShouldEqual, "linkedMap")
Wish(t, progress.LastBlock.Link.String(), ShouldEqual, "baguqefye7xlxqda")
Wish(t, progress.LastBlock.Link.String(), ShouldEqual, "baguqeeyevmbz3ga")
nb := prev.Prototype().NewBuilder()
nb.AssignString("new string!")
return nb.Build(), nil
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment