Commit 332e82f2 authored by Overbool's avatar Overbool Committed by Lucas Molas

hamt: wrap the manipulation about child and link

parent 71ede54b
......@@ -30,7 +30,6 @@ import (
ipld "github.com/ipfs/go-ipld-format"
dag "github.com/ipfs/go-merkledag"
format "github.com/ipfs/go-unixfs"
"github.com/spaolacci/murmur3"
)
const (
......@@ -46,10 +45,7 @@ func (ds *Shard) isValueNode() bool {
type Shard struct {
cid cid.Cid
bitfield bitfield.Bitfield
links []*ipld.Link
children []*Shard
childer *childer
tableSize int
tableSizeLg2 int
......@@ -74,7 +70,6 @@ func NewShard(dserv ipld.DAGService, size int) (*Shard, error) {
return nil, err
}
ds.links = make([]*ipld.Link, 0)
ds.hashFunc = HashMurmur3
return ds, nil
}
......@@ -85,14 +80,18 @@ func makeShard(ds ipld.DAGService, size int) (*Shard, error) {
return nil, err
}
maxpadding := fmt.Sprintf("%X", size-1)
return &Shard{
s := &Shard{
tableSizeLg2: lg2s,
prefixPadStr: fmt.Sprintf("%%0%dX", len(maxpadding)),
maxpadlen: len(maxpadding),
bitfield: bitfield.NewBitfield(size),
childer: newChilder(ds, size),
tableSize: size,
dserv: ds,
}, nil
}
s.childer.sd = s
return s, nil
}
// NewHamtFromDag creates new a HAMT shard from the given DAG.
......@@ -115,19 +114,16 @@ func NewHamtFromDag(dserv ipld.DAGService, nd ipld.Node) (*Shard, error) {
return nil, fmt.Errorf("only murmur3 supported as hash function")
}
ds, err := makeShard(dserv, int(fsn.Fanout()))
size := int(fsn.Fanout())
ds, err := makeShard(dserv, size)
if err != nil {
return nil, err
}
if len(pbnd.Links()) > 0 {
ds.links = make([]*ipld.Link, len(pbnd.Links()))
copy(ds.links, pbnd.Links())
}
ds.childer.makeChilder(fsn.Data(), pbnd.Links())
ds.cid = pbnd.Cid()
ds.children = make([]*Shard, len(pbnd.Links()))
ds.bitfield.SetBytes(fsn.Data())
ds.hashFunc = fsn.HashType()
ds.builder = pbnd.CidBuilder()
......@@ -152,11 +148,11 @@ func (ds *Shard) Node() (ipld.Node, error) {
cindex := 0
// TODO: optimized 'for each set bit'
for i := 0; i < ds.tableSize; i++ {
if !ds.bitfield.Bit(i) {
if !ds.childer.has(i) {
continue
}
ch := ds.children[cindex]
ch := ds.childer.child(cindex)
if ch != nil {
clnk, err := ch.Link()
if err != nil {
......@@ -169,7 +165,7 @@ func (ds *Shard) Node() (ipld.Node, error) {
}
} else {
// child unloaded, just copy in link with updated name
lnk := ds.links[cindex]
lnk := ds.childer.link(cindex)
label := lnk.Name[ds.maxpadlen:]
err := out.AddRawLink(ds.linkNamePrefix(i)+label, lnk)
......@@ -180,7 +176,7 @@ func (ds *Shard) Node() (ipld.Node, error) {
cindex++
}
data, err := format.HAMTShardData(ds.bitfield.Bytes(), uint64(ds.tableSize), HashMurmur3)
data, err := format.HAMTShardData(ds.childer.bitfield.Bytes(), uint64(ds.tableSize), HashMurmur3)
if err != nil {
return nil, err
}
......@@ -208,12 +204,6 @@ func (ds *Shard) makeShardValue(lnk *ipld.Link) (*Shard, error) {
return s, nil
}
func hash(val []byte) []byte {
h := murmur3.New128()
h.Write(val)
return h.Sum(make([]byte, 0, 128/8))
}
// Set sets 'name' = nd in the HAMT
func (ds *Shard) Set(ctx context.Context, name string, nd ipld.Node) error {
hv := &hashBits{b: hash([]byte(name))}
......@@ -271,63 +261,6 @@ func (ds *Shard) childLinkType(lnk *ipld.Link) (linkType, error) {
return shardValueLink, nil
}
// getChild returns the i'th child of this shard. If it is cached in the
// children array, it will return it from there. Otherwise, it loads the child
// node from disk.
func (ds *Shard) getChild(ctx context.Context, i int) (*Shard, error) {
if i >= len(ds.children) || i < 0 {
return nil, fmt.Errorf("invalid index passed to getChild (likely corrupt bitfield)")
}
if len(ds.children) != len(ds.links) {
return nil, fmt.Errorf("inconsistent lengths between children array and Links array")
}
c := ds.children[i]
if c != nil {
return c, nil
}
return ds.loadChild(ctx, i)
}
// loadChild reads the i'th child node of this shard from disk and returns it
// as a 'child' interface
func (ds *Shard) loadChild(ctx context.Context, i int) (*Shard, error) {
lnk := ds.links[i]
lnkLinkType, err := ds.childLinkType(lnk)
if err != nil {
return nil, err
}
var c *Shard
if lnkLinkType == shardLink {
nd, err := lnk.GetNode(ctx, ds.dserv)
if err != nil {
return nil, err
}
cds, err := NewHamtFromDag(ds.dserv, nd)
if err != nil {
return nil, err
}
c = cds
} else {
s, err := ds.makeShardValue(lnk)
if err != nil {
return nil, err
}
c = s
}
ds.children[i] = c
return c, nil
}
func (ds *Shard) setChild(i int, c *Shard) {
ds.children[i] = c
}
// Link returns a merklelink to this shard node
func (ds *Shard) Link() (*ipld.Link, error) {
if ds.isValueNode() {
......@@ -347,48 +280,13 @@ func (ds *Shard) Link() (*ipld.Link, error) {
return ipld.MakeLink(nd)
}
func (ds *Shard) insertChild(idx int, key string, lnk *ipld.Link) error {
if lnk == nil {
return os.ErrNotExist
}
i := ds.indexForBitPos(idx)
ds.bitfield.SetBit(idx)
lnk.Name = ds.linkNamePrefix(idx) + key
sv := &Shard{
key: key,
val: lnk,
}
ds.children = append(ds.children[:i], append([]*Shard{sv}, ds.children[i:]...)...)
ds.links = append(ds.links[:i], append([]*ipld.Link{nil}, ds.links[i:]...)...)
return nil
}
func (ds *Shard) rmChild(i int) error {
if i < 0 || i >= len(ds.children) || i >= len(ds.links) {
return fmt.Errorf("hamt: attempted to remove child with out of range index")
}
copy(ds.children[i:], ds.children[i+1:])
ds.children = ds.children[:len(ds.children)-1]
copy(ds.links[i:], ds.links[i+1:])
ds.links = ds.links[:len(ds.links)-1]
return nil
}
func (ds *Shard) getValue(ctx context.Context, hv *hashBits, key string, cb func(*Shard) error) error {
idx, err := hv.Next(ds.tableSizeLg2)
if err != nil {
return err
}
if ds.bitfield.Bit(int(idx)) {
cindex := ds.indexForBitPos(idx)
child, err := ds.getChild(ctx, cindex)
if ds.childer.has(idx) {
child, err := ds.childer.get(ctx, ds.childer.index(idx))
if err != nil {
return err
}
......@@ -440,7 +338,7 @@ func (ds *Shard) EnumLinksAsync(ctx context.Context) <-chan format.LinkResult {
defer cancel()
getLinks := makeAsyncTrieGetLinks(ds.dserv, linkResults)
cset := cid.NewSet()
err := dag.EnumerateChildrenAsync(ctx, getLinks, ds.nd.Cid(), cset.Visit)
err := dag.EnumerateChildrenAsync(ctx, getLinks, ds.cid, cset.Visit)
if err != nil {
emitResult(ctx, linkResults, format.LinkResult{Link: nil, Err: err})
}
......@@ -463,9 +361,9 @@ func makeAsyncTrieGetLinks(dagService ipld.DAGService, linkResults chan<- format
return nil, err
}
childShards := make([]*ipld.Link, 0, len(directoryShard.children))
links := directoryShard.links
for idx := range directoryShard.children {
childShards := make([]*ipld.Link, 0, directoryShard.childer.length())
links := directoryShard.childer.links
for idx := range directoryShard.childer.children {
lnk := links[idx]
lnkLinkType, err := directoryShard.childLinkType(lnk)
......@@ -505,23 +403,18 @@ func emitResult(ctx context.Context, linkResults chan<- format.LinkResult, r for
}
func (ds *Shard) walkTrie(ctx context.Context, cb func(*Shard) error) error {
for idx := range ds.children {
c, err := ds.getChild(ctx, idx)
if err != nil {
return err
}
if c.isValueNode() {
if err := cb(c); err != nil {
return ds.childer.each(ctx, func(s *Shard) error {
if s.isValueNode() {
if err := cb(s); err != nil {
return err
}
} else {
if err := c.walkTrie(ctx, cb); err != nil {
if err := s.walkTrie(ctx, cb); err != nil {
return err
}
}
}
return nil
return nil
})
}
func (ds *Shard) modifyValue(ctx context.Context, hv *hashBits, key string, val *ipld.Link) error {
......@@ -529,13 +422,14 @@ func (ds *Shard) modifyValue(ctx context.Context, hv *hashBits, key string, val
if err != nil {
return err
}
if !ds.bitfield.Bit(idx) {
return ds.insertChild(idx, key, val)
if !ds.childer.has(idx) {
return ds.childer.insert(key, val, idx)
}
cindex := ds.indexForBitPos(idx)
i := ds.childer.index(idx)
child, err := ds.getChild(ctx, cindex)
child, err := ds.childer.get(ctx, i)
if err != nil {
return err
}
......@@ -544,8 +438,7 @@ func (ds *Shard) modifyValue(ctx context.Context, hv *hashBits, key string, val
if child.key == key {
// value modification
if val == nil {
ds.bitfield.UnsetBit(idx)
return ds.rmChild(cindex)
return ds.childer.rm(idx)
}
child.val = val
......@@ -577,7 +470,7 @@ func (ds *Shard) modifyValue(ctx context.Context, hv *hashBits, key string, val
return err
}
ds.setChild(cindex, ns)
ds.childer.set(ns, i)
return nil
} else {
err := child.modifyValue(ctx, hv, key, val)
......@@ -586,19 +479,18 @@ func (ds *Shard) modifyValue(ctx context.Context, hv *hashBits, key string, val
}
if val == nil {
switch len(child.children) {
switch child.childer.length() {
case 0:
// empty sub-shard, prune it
// Note: this shouldnt normally ever happen
// in the event of another implementation creates flawed
// structures, this will help to normalize them.
ds.bitfield.UnsetBit(idx)
return ds.rmChild(cindex)
return ds.childer.rm(idx)
case 1:
nchild := child.children[0]
nchild := child.childer.children[0]
if nchild.isValueNode() {
// sub-shard with a single value element, collapse it
ds.setChild(cindex, nchild)
ds.childer.set(nchild, i)
}
return nil
}
......@@ -608,14 +500,170 @@ func (ds *Shard) modifyValue(ctx context.Context, hv *hashBits, key string, val
}
}
// indexForBitPos returns the index within the collapsed array corresponding to
// the given bit in the bitset. The collapsed array contains only one entry
// per bit set in the bitfield, and this function is used to map the indices.
func (ds *Shard) indexForBitPos(bp int) int {
return ds.bitfield.OnesBefore(bp)
}
// linkNamePrefix takes in the bitfield index of an entry and returns its hex prefix
func (ds *Shard) linkNamePrefix(idx int) string {
return fmt.Sprintf(ds.prefixPadStr, idx)
}
// childer wraps the links, children and bitfield
// and provides basic operation (get, rm, insert and set) of manipulating children.
type childer struct {
sd *Shard
dserv ipld.DAGService
bitfield bitfield.Bitfield
links []*ipld.Link
children []*Shard
}
func newChilder(ds ipld.DAGService, size int) *childer {
return &childer{
dserv: ds,
bitfield: bitfield.NewBitfield(size),
}
}
func (s *childer) makeChilder(data []byte, links []*ipld.Link) *childer {
s.children = make([]*Shard, len(links))
s.bitfield.SetBytes(data)
if len(links) > 0 {
s.links = make([]*ipld.Link, len(links))
copy(s.links, links)
}
return s
}
func (s *childer) index(idx int) int {
return s.bitfield.OnesBefore(idx)
}
func (s *childer) child(i int) *Shard {
return s.children[i]
}
func (s *childer) link(i int) *ipld.Link {
return s.links[i]
}
func (s *childer) insert(key string, lnk *ipld.Link, idx int) error {
if lnk == nil {
return os.ErrNotExist
}
lnk.Name = s.sd.linkNamePrefix(idx) + key
i := s.index(idx)
sd := &Shard{key: key, val: lnk}
s.children = append(s.children[:i], append([]*Shard{sd}, s.children[i:]...)...)
s.links = append(s.links[:i], append([]*ipld.Link{nil}, s.links[i:]...)...)
s.bitfield.SetBit(idx)
return nil
}
func (s *childer) set(sd *Shard, i int) {
s.children[i] = sd
}
func (s *childer) rm(idx int) error {
i := s.index(idx)
if err := s.check(i); err != nil {
return err
}
copy(s.children[i:], s.children[i+1:])
s.children = s.children[:len(s.children)-1]
copy(s.links[i:], s.links[i+1:])
s.links = s.links[:len(s.links)-1]
s.bitfield.UnsetBit(idx)
return nil
}
// get returns the i'th child of this shard. If it is cached in the
// children array, it will return it from there. Otherwise, it loads the child
// node from disk.
func (s *childer) get(ctx context.Context, i int) (*Shard, error) {
if err := s.check(i); err != nil {
return nil, err
}
c := s.child(i)
if c != nil {
return c, nil
}
return s.loadChild(ctx, i)
}
// loadChild reads the i'th child node of this shard from disk and returns it
// as a 'child' interface
func (s *childer) loadChild(ctx context.Context, i int) (*Shard, error) {
lnk := s.link(i)
lnkLinkType, err := s.sd.childLinkType(lnk)
if err != nil {
return nil, err
}
var c *Shard
if lnkLinkType == shardLink {
nd, err := lnk.GetNode(ctx, s.dserv)
if err != nil {
return nil, err
}
cds, err := NewHamtFromDag(s.dserv, nd)
if err != nil {
return nil, err
}
c = cds
} else {
s, err := s.sd.makeShardValue(lnk)
if err != nil {
return nil, err
}
c = s
}
s.set(c, i)
return c, nil
}
func (s *childer) has(idx int) bool {
return s.bitfield.Bit(idx)
}
func (s *childer) length() int {
return len(s.children)
}
func (s *childer) each(ctx context.Context, cb func(*Shard) error) error {
for i := range s.children {
c, err := s.get(ctx, i)
if err != nil {
return err
}
if err := cb(c); err != nil {
return err
}
}
return nil
}
func (s *childer) check(i int) error {
if i >= len(s.children) || i < 0 {
return fmt.Errorf("invalid index passed to operate children (likely corrupt bitfield)")
}
if len(s.children) != len(s.links) {
return fmt.Errorf("inconsistent lengths between children array and Links array")
}
return nil
}
......@@ -9,12 +9,10 @@ import (
"testing"
"time"
ipld "github.com/ipfs/go-ipld-format"
dag "github.com/ipfs/go-merkledag"
mdtest "github.com/ipfs/go-merkledag/test"
ft "github.com/ipfs/go-unixfs"
ipld "github.com/ipfs/go-ipld-format"
)
func shuffle(seed int64, arr []string) {
......@@ -488,11 +486,11 @@ func TestBitfieldIndexing(t *testing.T) {
s, _ := NewShard(ds, 256)
set := func(i int) {
s.bitfield.SetBit(i)
s.childer.bitfield.SetBit(i)
}
assert := func(i int, val int) {
if s.indexForBitPos(i) != val {
if s.childer.index(i) != val {
t.Fatalf("expected index %d to be %d", i, val)
}
}
......
......@@ -2,6 +2,8 @@ package hamt
import (
"fmt"
"github.com/spaolacci/murmur3"
"math/bits"
)
......@@ -58,3 +60,9 @@ func logtwo(v int) (int, error) {
}
return lg2, nil
}
func hash(val []byte) []byte {
h := murmur3.New64()
h.Write(val)
return h.Sum(nil)
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment