From c2e497e22825ba8d5a6329bbf9f06be08197e316 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mart=C3=AD?= Date: Mon, 19 Jul 2021 18:11:09 +0100 Subject: [PATCH] blockstore: implement UseWholeCIDs Update the tests too, as a few expected whole CIDs. For now, just make them use the option. This required index.Index to gain a new method, GetAll, so that when using whole CIDs, methods like Get can return true if any of the matching indexed CIDs are an exact whole-CID match, and not just looking at the first matching indexed CID. GetAll is akin to an iteration over all matching indexed CIDs, and its callback returns a boolean to say if the iteration should continue. This allows stopping as soon as we're done. We also remove Index.Get, instead replacing it with a helper called GetFirst, which simply makes simple uses of GetAll a single line. We remove the non-specced and unused index implementations, too. They were left in place in case they were useful again, but they haven't been so far, and their code is still in git. Keeping them around just means updating more code when refactoring. While at it, make ZeroLengthSectionAsEOF take a boolean and return an option, just like the other boolean options, for consistency. Fixes #130. --- v2/blockstore/example_test.go | 5 +- v2/blockstore/insertionindex.go | 32 +++++++- v2/blockstore/readonly.go | 127 +++++++++++++++++++++--------- v2/blockstore/readonly_test.go | 4 +- v2/blockstore/readwrite.go | 1 - v2/blockstore/readwrite_test.go | 133 ++++++++++++++++++++------------ v2/index/example_test.go | 2 +- v2/index/index.go | 50 +++++++++--- v2/index/index_test.go | 17 +--- v2/index/indexgobhash.go | 48 ------------ v2/index/indexhashed.go | 47 ----------- v2/index/indexsorted.go | 45 ++++++----- v2/index/indexsorted_test.go | 6 +- v2/index_gen.go | 2 +- v2/options.go | 6 +- 15 files changed, 281 insertions(+), 244 deletions(-) delete mode 100644 v2/index/indexgobhash.go delete mode 100644 v2/index/indexhashed.go diff --git a/v2/blockstore/example_test.go b/v2/blockstore/example_test.go index 72540e5..00a81dc 100644 --- a/v2/blockstore/example_test.go +++ b/v2/blockstore/example_test.go @@ -20,7 +20,10 @@ func ExampleOpenReadOnly() { // Note, `OpenReadOnly` accepts bot CARv1 and CARv2 formats and transparently generate index // in the background if necessary. // This instance sets ZeroLengthSectionAsEOF option to treat zero sized sections in file as EOF. - robs, err := blockstore.OpenReadOnly("../testdata/sample-v1.car", carv2.ZeroLengthSectionAsEOF) + robs, err := blockstore.OpenReadOnly("../testdata/sample-v1.car", + blockstore.UseWholeCIDs(true), + carv2.ZeroLengthSectionAsEOF(true), + ) if err != nil { panic(err) } diff --git a/v2/blockstore/insertionindex.go b/v2/blockstore/insertionindex.go index 8e664ea..00d4146 100644 --- a/v2/blockstore/insertionindex.go +++ b/v2/blockstore/insertionindex.go @@ -55,7 +55,7 @@ func newRecordFromCid(c cid.Cid, at uint64) recordDigest { panic(err) } - return recordDigest{d.Digest, index.Record{Cid: c, Idx: at}} + return recordDigest{d.Digest, index.Record{Cid: c, Offset: at}} } func (ii *insertionIndex) insertNoReplace(key cid.Cid, n uint64) { @@ -77,7 +77,31 @@ func (ii *insertionIndex) Get(c cid.Cid) (uint64, error) { return 0, errUnsupported } - return r.Record.Idx, nil + return r.Record.Offset, nil +} + +func (ii *insertionIndex) GetAll(c cid.Cid, fn func(uint64) bool) error { + d, err := multihash.Decode(c.Hash()) + if err != nil { + return err + } + entry := recordDigest{digest: d.Digest} + + any := false + iter := func(i llrb.Item) bool { + existing := i.(recordDigest) + if !bytes.Equal(existing.digest, entry.digest) { + // We've already looked at all entries with matching digests. + return false + } + any = true + return fn(existing.Record.Offset) + } + ii.items.AscendGreaterOrEqual(entry, iter) + if !any { + return index.ErrNotFound + } + return nil } func (ii *insertionIndex) Marshal(w io.Writer) error { @@ -152,6 +176,10 @@ func (ii *insertionIndex) flatten() (index.Index, error) { return si, nil } +// note that hasExactCID is very similar to GetAll, +// but it's separate as it allows us to compare Record.Cid directly, +// whereas GetAll just provides Record.Offset. + func (ii *insertionIndex) hasExactCID(c cid.Cid) bool { d, err := multihash.Decode(c.Hash()) if err != nil { diff --git a/v2/blockstore/readonly.go b/v2/blockstore/readonly.go index 34672ca..6c2397f 100644 --- a/v2/blockstore/readonly.go +++ b/v2/blockstore/readonly.go @@ -65,7 +65,6 @@ type ReadOnly struct { // go-car/v2 package. func UseWholeCIDs(enable bool) carv2.ReadOption { return func(o *carv2.ReadOptions) { - // TODO: update methods like Get, Has, and AllKeysChan to obey this. o.BlockstoreUseWholeCIDs = enable } } @@ -177,22 +176,35 @@ func (b *ReadOnly) Has(key cid.Cid) (bool, error) { b.mu.RLock() defer b.mu.RUnlock() - offset, err := b.idx.Get(key) + var fnFound bool + var fnErr error + err := b.idx.GetAll(key, func(offset uint64) bool { + uar := internalio.NewOffsetReadSeeker(b.backing, int64(offset)) + var err error + _, err = varint.ReadUvarint(uar) + if err != nil { + fnErr = err + return false + } + _, readCid, err := cid.CidFromReader(uar) + if err != nil { + fnErr = err + return false + } + if b.ropts.BlockstoreUseWholeCIDs { + fnFound = readCid.Equals(key) + return !fnFound // continue looking if we haven't found it + } else { + fnFound = bytes.Equal(readCid.Hash(), key.Hash()) + return false + } + }) if errors.Is(err, index.ErrNotFound) { return false, nil } else if err != nil { return false, err } - uar := internalio.NewOffsetReadSeeker(b.backing, int64(offset)) - _, err = varint.ReadUvarint(uar) - if err != nil { - return false, err - } - _, c, err := cid.CidFromReader(uar) - if err != nil { - return false, err - } - return bytes.Equal(key.Hash(), c.Hash()), nil + return fnFound, fnErr } // Get gets a block corresponding to the given key. @@ -200,21 +212,39 @@ func (b *ReadOnly) Get(key cid.Cid) (blocks.Block, error) { b.mu.RLock() defer b.mu.RUnlock() - offset, err := b.idx.Get(key) - if err != nil { - if err == index.ErrNotFound { - err = blockstore.ErrNotFound + var fnData []byte + var fnErr error + err := b.idx.GetAll(key, func(offset uint64) bool { + readCid, data, err := b.readBlock(int64(offset)) + if err != nil { + fnErr = err + return false } + if b.ropts.BlockstoreUseWholeCIDs { + if readCid.Equals(key) { + fnData = data + return false + } else { + return true // continue looking + } + } else { + if bytes.Equal(readCid.Hash(), key.Hash()) { + fnData = data + } + return false + } + }) + if errors.Is(err, index.ErrNotFound) { + return nil, blockstore.ErrNotFound + } else if err != nil { return nil, err + } else if fnErr != nil { + return nil, fnErr } - entry, data, err := b.readBlock(int64(offset)) - if err != nil { - return nil, err - } - if !bytes.Equal(key.Hash(), entry.Hash()) { + if fnData == nil { return nil, blockstore.ErrNotFound } - return blocks.NewBlockWithCid(data, key) + return blocks.NewBlockWithCid(fnData, key) } // GetSize gets the size of an item corresponding to the given key. @@ -222,23 +252,45 @@ func (b *ReadOnly) GetSize(key cid.Cid) (int, error) { b.mu.RLock() defer b.mu.RUnlock() - idx, err := b.idx.Get(key) - if err != nil { - return -1, err - } - rdr := internalio.NewOffsetReadSeeker(b.backing, int64(idx)) - sectionLen, err := varint.ReadUvarint(rdr) - if err != nil { + var fnSize int = -1 + var fnErr error + err := b.idx.GetAll(key, func(offset uint64) bool { + rdr := internalio.NewOffsetReadSeeker(b.backing, int64(offset)) + sectionLen, err := varint.ReadUvarint(rdr) + if err != nil { + fnErr = err + return false + } + cidLen, readCid, err := cid.CidFromReader(rdr) + if err != nil { + fnErr = err + return false + } + if b.ropts.BlockstoreUseWholeCIDs { + if readCid.Equals(key) { + fnSize = int(sectionLen) - cidLen + return false + } else { + return true // continue looking + } + } else { + if bytes.Equal(readCid.Hash(), key.Hash()) { + fnSize = int(sectionLen) - cidLen + } + return false + } + }) + if errors.Is(err, index.ErrNotFound) { return -1, blockstore.ErrNotFound + } else if err != nil { + return -1, err + } else if fnErr != nil { + return -1, fnErr } - cidLen, readCid, err := cid.CidFromReader(rdr) - if err != nil { - return 0, err - } - if !readCid.Equals(key) { + if fnSize == -1 { return -1, blockstore.ErrNotFound } - return int(sectionLen) - cidLen, err + return fnSize, nil } // Put is not supported and always returns an error. @@ -304,6 +356,11 @@ func (b *ReadOnly) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { return // TODO: log this error } + // If we're just using multihashes, flatten to the "raw" codec. + if !b.ropts.BlockstoreUseWholeCIDs { + c = cid.NewCidV1(cid.Raw, c.Hash()) + } + select { case ch <- c: case <-ctx.Done(): diff --git a/v2/blockstore/readonly_test.go b/v2/blockstore/readonly_test.go index 3d75fd9..4d443c3 100644 --- a/v2/blockstore/readonly_test.go +++ b/v2/blockstore/readonly_test.go @@ -49,7 +49,9 @@ func TestReadOnly(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - subject, err := OpenReadOnly(tt.v1OrV2path) + subject, err := OpenReadOnly(tt.v1OrV2path, + UseWholeCIDs(true), + ) t.Cleanup(func() { subject.Close() }) require.NoError(t, err) diff --git a/v2/blockstore/readwrite.go b/v2/blockstore/readwrite.go index 665a653..ba87ee6 100644 --- a/v2/blockstore/readwrite.go +++ b/v2/blockstore/readwrite.go @@ -89,7 +89,6 @@ func AllowDuplicatePuts(allow bool) carv2.WriteOption { // Resuming from finalized files is allowed. However, resumption will regenerate the index // regardless by scanning every existing block in file. func OpenReadWrite(path string, roots []cid.Cid, opts ...carv2.ReadWriteOption) (*ReadWrite, error) { - // TODO: enable deduplication by default now that resumption is automatically attempted. f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0o666) // TODO: Should the user be able to configure FileMode permissions? if err != nil { return nil, fmt.Errorf("could not open read/write file: %w", err) diff --git a/v2/blockstore/readwrite_test.go b/v2/blockstore/readwrite_test.go index 6612268..2494da6 100644 --- a/v2/blockstore/readwrite_test.go +++ b/v2/blockstore/readwrite_test.go @@ -123,21 +123,29 @@ func TestBlockstore(t *testing.T) { func TestBlockstorePutSameHashes(t *testing.T) { tdir := t.TempDir() - // wbs allows duplicate puts. - wbs, err := blockstore.OpenReadWrite( - filepath.Join(tdir, "readwrite.car"), nil, + // This blockstore allows duplicate puts, + // and identifies by multihash as per the default. + wbsAllowDups, err := blockstore.OpenReadWrite( + filepath.Join(tdir, "readwrite-allowdup.car"), nil, blockstore.AllowDuplicatePuts(true), ) require.NoError(t, err) - t.Cleanup(func() { wbs.Finalize() }) + t.Cleanup(func() { wbsAllowDups.Finalize() }) - // wbs deduplicates puts by CID. - wbsd, err := blockstore.OpenReadWrite( - filepath.Join(tdir, "readwrite-dedup.car"), nil, + // This blockstore deduplicates puts by CID. + wbsByCID, err := blockstore.OpenReadWrite( + filepath.Join(tdir, "readwrite-dedup-wholecid.car"), nil, blockstore.UseWholeCIDs(true), ) require.NoError(t, err) - t.Cleanup(func() { wbsd.Finalize() }) + t.Cleanup(func() { wbsByCID.Finalize() }) + + // This blockstore deduplicates puts by multihash. + wbsByHash, err := blockstore.OpenReadWrite( + filepath.Join(tdir, "readwrite-dedup-hash.car"), nil, + ) + require.NoError(t, err) + t.Cleanup(func() { wbsByHash.Finalize() }) var blockList []blocks.Block @@ -160,15 +168,15 @@ func TestBlockstorePutSameHashes(t *testing.T) { // However, we have multiple CIDs for each multihash. // We also have two duplicate CIDs. data1 := []byte("foo bar") - appendBlock(data1, 0, cid.Raw) - appendBlock(data1, 1, cid.Raw) + appendBlock(data1, 0, cid.DagProtobuf) + appendBlock(data1, 1, cid.DagProtobuf) appendBlock(data1, 1, cid.DagCBOR) appendBlock(data1, 1, cid.DagCBOR) // duplicate CID data2 := []byte("foo bar baz") - appendBlock(data2, 0, cid.Raw) - appendBlock(data2, 1, cid.Raw) - appendBlock(data2, 1, cid.Raw) // duplicate CID + appendBlock(data2, 0, cid.DagProtobuf) + appendBlock(data2, 1, cid.DagProtobuf) + appendBlock(data2, 1, cid.DagProtobuf) // duplicate CID appendBlock(data2, 1, cid.DagCBOR) countBlocks := func(bs *blockstore.ReadWrite) int { @@ -176,52 +184,75 @@ func TestBlockstorePutSameHashes(t *testing.T) { require.NoError(t, err) n := 0 - for range ch { + for c := range ch { + if c.Prefix().Codec == cid.Raw { + if bs == wbsByCID { + t.Error("expected blockstore with UseWholeCIDs to not flatten on AllKeysChan") + } + } else { + if bs != wbsByCID { + t.Error("expected blockstore without UseWholeCIDs to flatten on AllKeysChan") + } + } n++ } return n } - for i, block := range blockList { - // Has should never error here. - // The first block should be missing. - // Others might not, given the duplicate hashes. - has, err := wbs.Has(block.Cid()) - require.NoError(t, err) - if i == 0 { - require.False(t, has) - } + putBlockList := func(bs *blockstore.ReadWrite) { + for i, block := range blockList { + // Has should never error here. + // The first block should be missing. + // Others might not, given the duplicate hashes. + has, err := bs.Has(block.Cid()) + require.NoError(t, err) + if i == 0 { + require.False(t, has) + } - err = wbs.Put(block) - require.NoError(t, err) - } + err = bs.Put(block) + require.NoError(t, err) - for _, block := range blockList { - has, err := wbs.Has(block.Cid()) - require.NoError(t, err) - require.True(t, has) + // Has, Get, and GetSize need to work right after a Put. + has, err = bs.Has(block.Cid()) + require.NoError(t, err) + require.True(t, has) - got, err := wbs.Get(block.Cid()) - require.NoError(t, err) - require.Equal(t, block.Cid(), got.Cid()) - require.Equal(t, block.RawData(), got.RawData()) + got, err := bs.Get(block.Cid()) + require.NoError(t, err) + require.Equal(t, block.Cid(), got.Cid()) + require.Equal(t, block.RawData(), got.RawData()) + + size, err := bs.GetSize(block.Cid()) + require.NoError(t, err) + require.Equal(t, len(block.RawData()), size) + } } - require.Equal(t, len(blockList), countBlocks(wbs)) + putBlockList(wbsAllowDups) + require.Equal(t, len(blockList), countBlocks(wbsAllowDups)) - err = wbs.Finalize() + err = wbsAllowDups.Finalize() require.NoError(t, err) // Put the same list of blocks to the blockstore that // deduplicates by CID. - // We should end up with two fewer blocks. - for _, block := range blockList { - err = wbsd.Put(block) - require.NoError(t, err) - } - require.Equal(t, len(blockList)-2, countBlocks(wbsd)) + // We should end up with two fewer blocks, + // as two are entire CID duplicates. + putBlockList(wbsByCID) + require.Equal(t, len(blockList)-2, countBlocks(wbsByCID)) + + err = wbsByCID.Finalize() + require.NoError(t, err) + + // Put the same list of blocks to the blockstore that + // deduplicates by CID. + // We should end up with just two blocks, + // as the original set of blocks only has two distinct multihashes. + putBlockList(wbsByHash) + require.Equal(t, 2, countBlocks(wbsByHash)) - err = wbsd.Finalize() + err = wbsByHash.Finalize() require.NoError(t, err) } @@ -280,7 +311,8 @@ func TestBlockstoreNullPadding(t *testing.T) { // A sample null-padded CARv1 file. paddedV1 = append(paddedV1, make([]byte, 2048)...) - rbs, err := blockstore.NewReadOnly(bufferReaderAt(paddedV1), nil, carv2.ZeroLengthSectionAsEOF) + rbs, err := blockstore.NewReadOnly(bufferReaderAt(paddedV1), nil, + carv2.ZeroLengthSectionAsEOF(true)) require.NoError(t, err) roots, err := rbs.Roots() @@ -313,7 +345,8 @@ func TestBlockstoreResumption(t *testing.T) { path := filepath.Join(t.TempDir(), "readwrite-resume.car") // Create an incomplete CARv2 file with no blocks put. - subject, err := blockstore.OpenReadWrite(path, r.Header.Roots) + subject, err := blockstore.OpenReadWrite(path, r.Header.Roots, + blockstore.UseWholeCIDs(true)) require.NoError(t, err) // For each block resume on the same file, putting blocks one at a time. @@ -345,7 +378,8 @@ func TestBlockstoreResumption(t *testing.T) { // We do this to avoid resource leak during testing. require.NoError(t, subject.Close()) } - subject, err = blockstore.OpenReadWrite(path, r.Header.Roots) + subject, err = blockstore.OpenReadWrite(path, r.Header.Roots, + blockstore.UseWholeCIDs(true)) require.NoError(t, err) } require.NoError(t, subject.Put(b)) @@ -377,7 +411,8 @@ func TestBlockstoreResumption(t *testing.T) { require.NoError(t, subject.Close()) // Finalize the blockstore to complete partially written CARv2 file. - subject, err = blockstore.OpenReadWrite(path, r.Header.Roots) + subject, err = blockstore.OpenReadWrite(path, r.Header.Roots, + blockstore.UseWholeCIDs(true)) require.NoError(t, err) require.NoError(t, subject.Finalize()) @@ -528,9 +563,9 @@ func TestReadWriteWithPaddingWorksAsExpected(t *testing.T) { indexSize := stat.Size() - int64(wantIndexOffset) gotIdx, err := index.ReadFrom(io.NewSectionReader(f, int64(wantIndexOffset), indexSize)) require.NoError(t, err) - _, err = gotIdx.Get(oneTestBlockCid) + _, err = index.GetFirst(gotIdx, oneTestBlockCid) require.NoError(t, err) - _, err = gotIdx.Get(anotherTestBlockCid) + _, err = index.GetFirst(gotIdx, anotherTestBlockCid) require.NoError(t, err) } diff --git a/v2/index/example_test.go b/v2/index/example_test.go index 9f8368d..47347d8 100644 --- a/v2/index/example_test.go +++ b/v2/index/example_test.go @@ -34,7 +34,7 @@ func ExampleReadFrom() { // For each root CID print the offset relative to CARv1 data payload. for _, r := range roots { - offset, err := idx.Get(r) + offset, err := index.GetFirst(idx, r) if err != nil { panic(err) } diff --git a/v2/index/index.go b/v2/index/index.go index 1198217..3408dfb 100644 --- a/v2/index/index.go +++ b/v2/index/index.go @@ -14,33 +14,59 @@ import ( "github.com/ipfs/go-cid" ) -// Codec table is a first var-int in CAR indexes -const ( - indexHashed codec = 0x300000 + iota - indexSingleSorted - indexGobHashed -) - type ( - // codec is used as a multicodec identifier for CAR index files - codec int - // Record is a pre-processed record of a car item and location. Record struct { cid.Cid - Idx uint64 + Offset uint64 } // Index provides an interface for looking up byte offset of a given CID. + // + // Note that each indexing mechanism is free to match CIDs however it + // sees fit. For example, multicodec.CarIndexSorted only indexes + // multihash digests, meaning that Get and GetAll will find matching + // blocks even if the CID's encoding multicodec differs. Other index + // implementations might index the entire CID, the entire multihash, or + // just part of a multihash's digest. Index interface { + // Codec provides the multicodec code that the index implements. + // + // Note that this may return a reserved code if the index + // implementation is not defined in a spec. Codec() multicodec.Code + + // Marshal encodes the index in serial form. Marshal(w io.Writer) error + // Unmarshal decodes the index from its serial form. Unmarshal(r io.Reader) error - Get(cid.Cid) (uint64, error) + + // Load inserts a number of records into the index. Load([]Record) error + + // Get looks up all blocks matching a given CID, + // calling a function for each one of their offsets. + // + // If the function returns false, GetAll stops. + // + // If no error occurred and the CID isn't indexed, + // meaning that no callbacks happen, + // ErrNotFound is returned. + GetAll(cid.Cid, func(uint64) bool) error } ) +// GetFirst is a wrapper over Index.GetAll, returning the offset for the first +// matching indexed CID. +func GetFirst(idx Index, key cid.Cid) (uint64, error) { + var firstOffset uint64 + err := idx.GetAll(key, func(offset uint64) bool { + firstOffset = offset + return false + }) + return firstOffset, err +} + // New constructs a new index corresponding to the given CAR index codec. func New(codec multicodec.Code) (Index, error) { switch codec { diff --git a/v2/index/index_test.go b/v2/index/index_test.go index a32205d..03efbc9 100644 --- a/v2/index/index_test.go +++ b/v2/index/index_test.go @@ -32,21 +32,6 @@ func TestNew(t *testing.T) { codec: multicodec.Cidv1, wantErr: true, }, - { - name: "IndexSingleSortedMultiCodecIsError", - codec: multicodec.Code(indexSingleSorted), - wantErr: true, - }, - { - name: "IndexHashedMultiCodecIsError", - codec: multicodec.Code(indexHashed), - wantErr: true, - }, - { - name: "IndexGobHashedMultiCodecIsError", - codec: multicodec.Code(indexGobHashed), - wantErr: true, - }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -82,7 +67,7 @@ func TestReadFrom(t *testing.T) { require.NoError(t, err) // Get offset from the index for a CID and assert it exists - gotOffset, err := subject.Get(wantBlock.Cid()) + gotOffset, err := GetFirst(subject, wantBlock.Cid()) require.NoError(t, err) require.NotZero(t, gotOffset) diff --git a/v2/index/indexgobhash.go b/v2/index/indexgobhash.go deleted file mode 100644 index a74e8b9..0000000 --- a/v2/index/indexgobhash.go +++ /dev/null @@ -1,48 +0,0 @@ -package index - -import ( - "encoding/gob" - "io" - - "github.com/multiformats/go-multicodec" - - "github.com/ipfs/go-cid" -) - -//lint:ignore U1000 kept for potential future use. -type mapGobIndex map[cid.Cid]uint64 - -func (m *mapGobIndex) Get(c cid.Cid) (uint64, error) { - el, ok := (*m)[c] - if !ok { - return 0, ErrNotFound - } - return el, nil -} - -func (m *mapGobIndex) Marshal(w io.Writer) error { - e := gob.NewEncoder(w) - return e.Encode(m) -} - -func (m *mapGobIndex) Unmarshal(r io.Reader) error { - d := gob.NewDecoder(r) - return d.Decode(m) -} - -func (m *mapGobIndex) Codec() multicodec.Code { - return multicodec.Code(indexHashed) -} - -func (m *mapGobIndex) Load(rs []Record) error { - for _, r := range rs { - (*m)[r.Cid] = r.Idx - } - return nil -} - -//lint:ignore U1000 kept for potential future use. -func newGobHashed() Index { - mi := make(mapGobIndex) - return &mi -} diff --git a/v2/index/indexhashed.go b/v2/index/indexhashed.go deleted file mode 100644 index 84b0ad1..0000000 --- a/v2/index/indexhashed.go +++ /dev/null @@ -1,47 +0,0 @@ -package index - -import ( - "io" - - "github.com/multiformats/go-multicodec" - - "github.com/ipfs/go-cid" - cbor "github.com/whyrusleeping/cbor/go" -) - -//lint:ignore U1000 kept for potential future use. -type mapIndex map[cid.Cid]uint64 - -func (m *mapIndex) Get(c cid.Cid) (uint64, error) { - el, ok := (*m)[c] - if !ok { - return 0, ErrNotFound - } - return el, nil -} - -func (m *mapIndex) Marshal(w io.Writer) error { - return cbor.Encode(w, m) -} - -func (m *mapIndex) Unmarshal(r io.Reader) error { - d := cbor.NewDecoder(r) - return d.Decode(m) -} - -func (m *mapIndex) Codec() multicodec.Code { - return multicodec.Code(indexHashed) -} - -func (m *mapIndex) Load(rs []Record) error { - for _, r := range rs { - (*m)[r.Cid] = r.Idx - } - return nil -} - -//lint:ignore U1000 kept for potential future use. -func newHashed() Index { - mi := make(mapIndex) - return &mi -} diff --git a/v2/index/indexsorted.go b/v2/index/indexsorted.go index 65446f6..5f37eee 100644 --- a/v2/index/indexsorted.go +++ b/v2/index/indexsorted.go @@ -44,10 +44,6 @@ func (r recordSet) Swap(i, j int) { r[i], r[j] = r[j], r[i] } -func (s *singleWidthIndex) Codec() multicodec.Code { - return multicodec.Code(indexSingleSorted) -} - func (s *singleWidthIndex) Marshal(w io.Writer) error { if err := binary.Write(w, binary.LittleEndian, s.width); err != nil { return err @@ -77,25 +73,34 @@ func (s *singleWidthIndex) Less(i int, digest []byte) bool { return bytes.Compare(digest[:], s.index[i*int(s.width):((i+1)*int(s.width)-8)]) <= 0 } -func (s *singleWidthIndex) Get(c cid.Cid) (uint64, error) { +func (s *singleWidthIndex) GetAll(c cid.Cid, fn func(uint64) bool) error { d, err := multihash.Decode(c.Hash()) if err != nil { - return 0, err + return err } - return s.get(d.Digest) + return s.getAll(d.Digest, fn) } -func (s *singleWidthIndex) get(d []byte) (uint64, error) { +func (s *singleWidthIndex) getAll(d []byte, fn func(uint64) bool) error { idx := sort.Search(int(s.len), func(i int) bool { return s.Less(i, d) }) if uint64(idx) == s.len { - return 0, ErrNotFound + return ErrNotFound + } + + any := false + for bytes.Equal(d[:], s.index[idx*int(s.width):(idx+1)*int(s.width)-8]) { + any = true + offset := binary.LittleEndian.Uint64(s.index[(idx+1)*int(s.width)-8 : (idx+1)*int(s.width)]) + if !fn(offset) { + break + } } - if !bytes.Equal(d[:], s.index[idx*int(s.width):(idx+1)*int(s.width)-8]) { - return 0, ErrNotFound + if !any { + return ErrNotFound } - return binary.LittleEndian.Uint64(s.index[(idx+1)*int(s.width)-8 : (idx+1)*int(s.width)]), nil + return nil } func (s *singleWidthIndex) Load(items []Record) error { @@ -115,15 +120,15 @@ func (s *singleWidthIndex) Load(items []Record) error { return nil } -func (m *multiWidthIndex) Get(c cid.Cid) (uint64, error) { +func (m *multiWidthIndex) GetAll(c cid.Cid, fn func(uint64) bool) error { d, err := multihash.Decode(c.Hash()) if err != nil { - return 0, err + return err } if s, ok := (*m)[uint32(len(d.Digest)+8)]; ok { - return s.get(d.Digest) + return s.getAll(d.Digest, fn) } - return 0, ErrNotFound + return ErrNotFound } func (m *multiWidthIndex) Codec() multicodec.Code { @@ -184,7 +189,7 @@ func (m *multiWidthIndex) Load(items []Record) error { idxs[len(digest)] = make([]digestRecord, 0) idx = idxs[len(digest)] } - idxs[len(digest)] = append(idx, digestRecord{digest, item.Idx}) + idxs[len(digest)] = append(idx, digestRecord{digest, item.Offset}) } // Sort each list. then write to compact form. @@ -209,9 +214,3 @@ func newSorted() Index { m := make(multiWidthIndex) return &m } - -//lint:ignore U1000 kept for potential future use. -func newSingleSorted() Index { - s := singleWidthIndex{} - return &s -} diff --git a/v2/index/indexsorted_test.go b/v2/index/indexsorted_test.go index fe0ca96..767937b 100644 --- a/v2/index/indexsorted_test.go +++ b/v2/index/indexsorted_test.go @@ -18,10 +18,6 @@ func TestSortedIndex_GetReturnsNotFoundWhenCidDoesNotExist(t *testing.T) { name string subject Index }{ - { - "SingleSorted", - newSingleSorted(), - }, { "Sorted", newSorted(), @@ -29,7 +25,7 @@ func TestSortedIndex_GetReturnsNotFoundWhenCidDoesNotExist(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - gotOffset, err := tt.subject.Get(nonExistingKey) + gotOffset, err := GetFirst(tt.subject, nonExistingKey) require.Equal(t, ErrNotFound, err) require.Equal(t, uint64(0), gotOffset) }) diff --git a/v2/index_gen.go b/v2/index_gen.go index 5a60fb7..ae93849 100644 --- a/v2/index_gen.go +++ b/v2/index_gen.go @@ -75,7 +75,7 @@ func GenerateIndex(v1r io.Reader, opts ...ReadOption) (index.Index, error) { if err != nil { return nil, err } - records = append(records, index.Record{Cid: c, Idx: uint64(sectionOffset)}) + records = append(records, index.Record{Cid: c, Offset: uint64(sectionOffset)}) // Seek to the next section by skipping the block. // The section length includes the CID, so subtract it. diff --git a/v2/options.go b/v2/options.go index 89044a7..b206c83 100644 --- a/v2/options.go +++ b/v2/options.go @@ -46,8 +46,10 @@ type ReadWriteOption interface { // a zero-length section as the end of the input CAR file. For example, this can // be useful to allow "null padding" after a CARv1 without knowing where the // padding begins. -func ZeroLengthSectionAsEOF(o *ReadOptions) { - o.ZeroLengthSectionAsEOF = true +func ZeroLengthSectionAsEOF(enable bool) ReadOption { + return func(o *ReadOptions) { + o.ZeroLengthSectionAsEOF = true + } } // UseDataPadding is a write option which sets the padding to be added between -- GitLab