Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
dms3
go-dms3
Commits
391b78a2
Commit
391b78a2
authored
8 years ago
by
Jeromy Johnson
Committed by
GitHub
8 years ago
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #3273 from ipfs/fix/pin-fail
fix bug in pinsets and add a stress test for the scenario
parents
2fd045f0
a49483bf
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
139 additions
and
116 deletions
+139
-116
pin/set.go
pin/set.go
+79
-109
pin/set_test.go
pin/set_test.go
+60
-7
No files found.
pin/set.go
View file @
391b78a2
...
...
@@ -2,15 +2,14 @@ package pin
import
(
"bytes"
"context"
"crypto/rand"
"encoding/binary"
"errors"
"fmt"
"hash/fnv"
"sort"
"unsafe"
"context"
"github.com/ipfs/go-ipfs/merkledag"
"github.com/ipfs/go-ipfs/pin/internal/pb"
"gx/ipfs/QmYEoKZXHoAToWfhGF3vryhMn3WWhE1o2MasQ8uzY5iDi9/go-key"
...
...
@@ -19,8 +18,11 @@ import (
)
const
(
// defaultFanout specifies the default number of fan-out links per layer
defaultFanout
=
256
maxItems = 8192
// maxItems is the maximum number of items that will fit in a single bucket
maxItems
=
8192
)
func
randomSeed
()
(
uint32
,
error
)
{
...
...
@@ -40,36 +42,12 @@ func hash(seed uint32, c *cid.Cid) uint32 {
return
h
.
Sum32
()
}
type itemIterator func() (c *cid.Cid,
data []byte,
ok bool)
type
itemIterator
func
()
(
c
*
cid
.
Cid
,
ok
bool
)
type
keyObserver
func
(
*
cid
.
Cid
)
// refcount is the marshaled format of refcounts. It may change
// between versions; this is valid for version 1. Changing it may
// become desirable if there are many links with refcount > 255.
//
// There are two guarantees that need to be preserved, if this is
// changed:
//
// - the marshaled format is of fixed size, matching
// unsafe.Sizeof(refcount(0))
// - methods of refcount handle endianness, and may
// in later versions need encoding/binary.
type refcount uint8
func (r refcount) Bytes() []byte {
return []byte{byte(r)}
}
// readRefcount returns the idx'th refcount in []byte, which is
// assumed to be a sequence of refcount.Bytes results.
func (r *refcount) ReadFromIdx(buf []byte, idx int) {
*r = refcount(buf[idx])
}
type
sortByHash
struct
{
links
[]
*
merkledag
.
Link
data []byte
}
func
(
s
sortByHash
)
Len
()
int
{
...
...
@@ -82,13 +60,6 @@ func (s sortByHash) Less(a, b int) bool {
func
(
s
sortByHash
)
Swap
(
a
,
b
int
)
{
s
.
links
[
a
],
s
.
links
[
b
]
=
s
.
links
[
b
],
s
.
links
[
a
]
if len(s.data) != 0 {
const n = int(unsafe.Sizeof(refcount(0)))
tmp := make([]byte, n)
copy(tmp, s.data[a*n:a*n+n])
copy(s.data[a*n:a*n+n], s.data[b*n:b*n+n])
copy(s.data[b*n:b*n+n], tmp)
}
}
func
storeItems
(
ctx
context
.
Context
,
dag
merkledag
.
DAGService
,
estimatedLen
uint64
,
iter
itemIterator
,
internalKeys
keyObserver
)
(
*
merkledag
.
Node
,
error
)
{
...
...
@@ -96,13 +67,15 @@ func storeItems(ctx context.Context, dag merkledag.DAGService, estimatedLen uint
if
err
!=
nil
{
return
nil
,
err
}
n := &merkledag.Node{
Links: make([]*merkledag.Link, 0, defaultFanout+maxItems),
}
n
:=
&
merkledag
.
Node
{
Links
:
make
([]
*
merkledag
.
Link
,
0
,
defaultFanout
+
maxItems
)}
for
i
:=
0
;
i
<
defaultFanout
;
i
++
{
n
.
Links
=
append
(
n
.
Links
,
&
merkledag
.
Link
{
Hash
:
emptyKey
.
Hash
()})
}
// add emptyKey to our set of internal pinset objects
internalKeys
(
emptyKey
)
hdr
:=
&
pb
.
Set
{
Version
:
proto
.
Uint32
(
1
),
Fanout
:
proto
.
Uint32
(
defaultFanout
),
...
...
@@ -111,97 +84,106 @@ func storeItems(ctx context.Context, dag merkledag.DAGService, estimatedLen uint
if
err
:=
writeHdr
(
n
,
hdr
);
err
!=
nil
{
return
nil
,
err
}
hdrLen := len(n.Data())
if
estimatedLen
<
maxItems
{
// it'll probably fit
for
i
:=
0
;
i
<
maxItems
;
i
++
{
k,
data,
ok := iter()
k
,
ok
:=
iter
()
if
!
ok
{
// all done
break
}
n
.
Links
=
append
(
n
.
Links
,
&
merkledag
.
Link
{
Hash
:
k
.
Hash
()})
n.SetData(append(n.Data(), data...))
}
// sort by hash, also swap item Data
s
:=
sortByHash
{
links
:
n
.
Links
[
defaultFanout
:
],
data: n.Data()[hdrLen:],
}
sort
.
Stable
(
s
)
}
// wasteful but simple
type item struct {
c *cid.Cid
data []byte
}
hashed := make(map[uint32][]item)
hashed
:=
make
([][]
*
cid
.
Cid
,
defaultFanout
)
for
{
k, data, ok := iter()
// This loop essentially enumerates every single item in the set
// and maps them all into a set of buckets. Each bucket will be recursively
// turned into its own sub-set, and so on down the chain. Each sub-set
// gets added to the dagservice, and put into its place in a set nodes
// links array.
//
// Previously, the bucket was selected by taking an int32 from the hash of
// the input key + seed. This was erroneous as we would later be assigning
// the created sub-sets into an array of length 256 by the modulus of the
// int32 hash value with 256. This resulted in overwriting existing sub-sets
// and losing pins. The fix (a few lines down from this comment), is to
// map the hash value down to the 8 bit keyspace here while creating the
// buckets. This way, we avoid any overlapping later on.
k
,
ok
:=
iter
()
if
!
ok
{
break
}
h := hash(seed, k)
hashed[h] = append(hashed[h],
item{k, data}
)
h
:=
hash
(
seed
,
k
)
%
defaultFanout
hashed
[
h
]
=
append
(
hashed
[
h
],
k
)
}
for
h
,
items
:=
range
hashed
{
childIter := func() (c *cid.Cid, data []byte, ok bool) {
if len(items) == 0 {
return nil, nil, false
}
first := items[0]
items = items[1:]
return first.c, first.data, true
if
len
(
items
)
==
0
{
// recursion base case
continue
}
childIter
:=
getCidListIterator
(
items
)
// recursively create a pinset from the items for this bucket index
child
,
err
:=
storeItems
(
ctx
,
dag
,
uint64
(
len
(
items
)),
childIter
,
internalKeys
)
if
err
!=
nil
{
return
nil
,
err
}
size
,
err
:=
child
.
Size
()
if
err
!=
nil
{
return
nil
,
err
}
childKey
,
err
:=
dag
.
Add
(
child
)
if
err
!=
nil
{
return
nil
,
err
}
internalKeys
(
childKey
)
l := &merkledag.Link{
Name: "",
// overwrite the 'empty key' in the existing links array
n
.
Links
[
h
]
=
&
merkledag
.
Link
{
Hash
:
childKey
.
Hash
(),
Size
:
size
,
}
n.Links[int(h%defaultFanout)] = l
}
return
n
,
nil
}
func readHdr(n *merkledag.Node) (*pb.Set,
[]byte,
error) {
func
readHdr
(
n
*
merkledag
.
Node
)
(
*
pb
.
Set
,
error
)
{
hdrLenRaw
,
consumed
:=
binary
.
Uvarint
(
n
.
Data
())
if
consumed
<=
0
{
return nil,
nil,
errors.New("invalid Set header length")
return
nil
,
errors
.
New
(
"invalid Set header length"
)
}
buf := n.Data()[consumed:]
if hdrLenRaw > uint64(len(buf)) {
return nil, nil, errors.New("impossibly large Set header length")
pbdata
:=
n
.
Data
()[
consumed
:
]
if
hdrLenRaw
>
uint64
(
len
(
pbdata
))
{
return
nil
,
errors
.
New
(
"impossibly large Set header length"
)
}
// as hdrLenRaw was <= an int, we now know it fits in an int
hdrLen
:=
int
(
hdrLenRaw
)
var
hdr
pb
.
Set
if err := proto.Unmarshal(
buf
[:hdrLen], &hdr); err != nil {
return nil,
nil,
err
if
err
:=
proto
.
Unmarshal
(
pbdata
[
:
hdrLen
],
&
hdr
);
err
!=
nil
{
return
nil
,
err
}
buf = buf[hdrLen:]
if
v
:=
hdr
.
GetVersion
();
v
!=
1
{
return nil,
nil,
fmt.Errorf("unsupported Set version: %d", v)
return
nil
,
fmt
.
Errorf
(
"unsupported Set version: %d"
,
v
)
}
if
uint64
(
hdr
.
GetFanout
())
>
uint64
(
len
(
n
.
Links
))
{
return nil,
nil,
errors.New("impossibly large Fanout")
return
nil
,
errors
.
New
(
"impossibly large Fanout"
)
}
return &hdr,
buf,
nil
return
&
hdr
,
nil
}
func
writeHdr
(
n
*
merkledag
.
Node
,
hdr
*
pb
.
Set
)
error
{
...
...
@@ -209,24 +191,31 @@ func writeHdr(n *merkledag.Node, hdr *pb.Set) error {
if
err
!=
nil
{
return
err
}
n.SetData(make([]byte, binary.MaxVarintLen64, binary.MaxVarintLen64+len(hdrData)))
written := binary.PutUvarint(n.Data(), uint64(len(hdrData)))
n.SetData(n.Data()[:written])
n.SetData(append(n.Data(), hdrData...))
// make enough space for the length prefix and the marshalled header data
data
:=
make
([]
byte
,
binary
.
MaxVarintLen64
,
binary
.
MaxVarintLen64
+
len
(
hdrData
))
// write the uvarint length of the header data
uvarlen
:=
binary
.
PutUvarint
(
data
,
uint64
(
len
(
hdrData
)))
// append the actual protobuf data *after* the length value we wrote
data
=
append
(
data
[
:
uvarlen
],
hdrData
...
)
n
.
SetData
(
data
)
return
nil
}
type walkerFunc func(
buf []byte,
idx int, link *merkledag.Link) error
type
walkerFunc
func
(
idx
int
,
link
*
merkledag
.
Link
)
error
func
walkItems
(
ctx
context
.
Context
,
dag
merkledag
.
DAGService
,
n
*
merkledag
.
Node
,
fn
walkerFunc
,
children
keyObserver
)
error
{
hdr,
buf,
err := readHdr(n)
hdr
,
err
:=
readHdr
(
n
)
if
err
!=
nil
{
return
err
}
// readHdr guarantees fanout is a safe value
fanout
:=
hdr
.
GetFanout
()
for
i
,
l
:=
range
n
.
Links
[
fanout
:
]
{
if err := fn(
buf,
i, l); err != nil {
if
err
:=
fn
(
i
,
l
);
err
!=
nil
{
return
err
}
}
...
...
@@ -262,7 +251,7 @@ func loadSet(ctx context.Context, dag merkledag.DAGService, root *merkledag.Node
}
var
res
[]
*
cid
.
Cid
walk := func(
buf []byte,
idx int, link *merkledag.Link) error {
walk
:=
func
(
idx
int
,
link
*
merkledag
.
Link
)
error
{
res
=
append
(
res
,
cid
.
NewCidV0
(
link
.
Hash
))
return
nil
}
...
...
@@ -272,40 +261,21 @@ func loadSet(ctx context.Context, dag merkledag.DAGService, root *merkledag.Node
return
res
,
nil
}
func loadMultiset(ctx context.Context, dag merkledag.DAGService, root *merkledag.Node, name string, internalKeys keyObserver) (map[key.Key]uint64, error) {
l, err := root.GetNodeLink(name)
if err != nil {
return nil, fmt.Errorf("Failed to get link %s: %v", name, err)
}
c := cid.NewCidV0(l.Hash)
internalKeys(c)
n, err := l.GetNode(ctx, dag)
if err != nil {
return nil, fmt.Errorf("Failed to get node from link %s: %v", name, err)
}
refcounts := make(map[key.Key]uint64)
walk := func(buf []byte, idx int, link *merkledag.Link) error {
var r refcount
r.ReadFromIdx(buf, idx)
refcounts[key.Key(link.Hash)] += uint64(r)
return nil
}
if err := walkItems(ctx, dag, n, walk, internalKeys); err != nil {
return nil, err
}
return refcounts, nil
}
func storeSet(ctx context.Context, dag merkledag.DAGService, cids []*cid.Cid, internalKeys keyObserver) (*merkledag.Node, error) {
iter := func() (c *cid.Cid, data []byte, ok bool) {
func
getCidListIterator
(
cids
[]
*
cid
.
Cid
)
itemIterator
{
return
func
()
(
c
*
cid
.
Cid
,
ok
bool
)
{
if
len
(
cids
)
==
0
{
return nil,
nil,
false
return
nil
,
false
}
first
:=
cids
[
0
]
cids
=
cids
[
1
:
]
return first,
nil,
true
return
first
,
true
}
}
func
storeSet
(
ctx
context
.
Context
,
dag
merkledag
.
DAGService
,
cids
[]
*
cid
.
Cid
,
internalKeys
keyObserver
)
(
*
merkledag
.
Node
,
error
)
{
iter
:=
getCidListIterator
(
cids
)
n
,
err
:=
storeItems
(
ctx
,
dag
,
uint64
(
len
(
cids
)),
iter
,
internalKeys
)
if
err
!=
nil
{
return
nil
,
err
...
...
This diff is collapsed.
Click to expand it.
pin/set_test.go
View file @
391b78a2
package
pin
import
"gx/ipfs/QmYEoKZXHoAToWfhGF3vryhMn3WWhE1o2MasQ8uzY5iDi9/go-key"
import
(
"context"
"fmt"
"os"
"testing"
func
ignoreKeys
(
key
.
Key
)
{}
dag
"github.com/ipfs/go-ipfs/merkledag"
mdtest
"github.com/ipfs/go-ipfs/merkledag/test"
func
copyMap
(
m
map
[
key
.
Key
]
uint16
)
map
[
key
.
Key
]
uint64
{
c
:=
make
(
map
[
key
.
Key
]
uint64
,
len
(
m
))
for
k
,
v
:=
range
m
{
c
[
k
]
=
uint64
(
v
)
cid
"gx/ipfs/QmakyCk6Vnn16WEKjbkxieZmM2YLTzkFWizbmGowoYPjro/go-cid"
)
func
ignoreCids
(
_
*
cid
.
Cid
)
{}
func
TestSet
(
t
*
testing
.
T
)
{
ds
:=
mdtest
.
Mock
()
limit
:=
10000
// 10000 reproduces the pinloss issue fairly reliably
if
os
.
Getenv
(
"STRESS_IT_OUT_YO"
)
!=
""
{
limit
=
10000000
}
var
inputs
[]
*
cid
.
Cid
for
i
:=
0
;
i
<
limit
;
i
++
{
c
,
err
:=
ds
.
Add
(
dag
.
NodeWithData
([]
byte
(
fmt
.
Sprint
(
i
))))
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
inputs
=
append
(
inputs
,
c
)
}
out
,
err
:=
storeSet
(
context
.
Background
(),
ds
,
inputs
,
ignoreCids
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
// weird wrapper node because loadSet expects us to pass an
// object pointing to multiple named sets
setroot
:=
&
dag
.
Node
{}
err
=
setroot
.
AddNodeLinkClean
(
"foo"
,
out
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
outset
,
err
:=
loadSet
(
context
.
Background
(),
ds
,
setroot
,
"foo"
,
ignoreCids
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
if
len
(
outset
)
!=
limit
{
t
.
Fatal
(
"got wrong number"
,
len
(
outset
),
limit
)
}
seen
:=
cid
.
NewSet
()
for
_
,
c
:=
range
outset
{
seen
.
Add
(
c
)
}
for
_
,
c
:=
range
inputs
{
if
!
seen
.
Has
(
c
)
{
t
.
Fatalf
(
"expected to have %s, didnt find it"
)
}
}
return
c
}
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment