diff --git a/v2/blockstore/example_test.go b/v2/blockstore/example_test.go index 72540e5fb6a6dbe17d916a423f7a1be9eed8778d..00a81dc7f877b598d0156665d117e3103ca31873 100644 --- a/v2/blockstore/example_test.go +++ b/v2/blockstore/example_test.go @@ -20,7 +20,10 @@ func ExampleOpenReadOnly() { // Note, `OpenReadOnly` accepts bot CARv1 and CARv2 formats and transparently generate index // in the background if necessary. // This instance sets ZeroLengthSectionAsEOF option to treat zero sized sections in file as EOF. - robs, err := blockstore.OpenReadOnly("../testdata/sample-v1.car", carv2.ZeroLengthSectionAsEOF) + robs, err := blockstore.OpenReadOnly("../testdata/sample-v1.car", + blockstore.UseWholeCIDs(true), + carv2.ZeroLengthSectionAsEOF(true), + ) if err != nil { panic(err) } diff --git a/v2/blockstore/insertionindex.go b/v2/blockstore/insertionindex.go index 8e664eac90491b1b7a97bea0e08438dd5992d7f7..00d414656fe013a94b24a2280575ee69fb65bd18 100644 --- a/v2/blockstore/insertionindex.go +++ b/v2/blockstore/insertionindex.go @@ -55,7 +55,7 @@ func newRecordFromCid(c cid.Cid, at uint64) recordDigest { panic(err) } - return recordDigest{d.Digest, index.Record{Cid: c, Idx: at}} + return recordDigest{d.Digest, index.Record{Cid: c, Offset: at}} } func (ii *insertionIndex) insertNoReplace(key cid.Cid, n uint64) { @@ -77,7 +77,31 @@ func (ii *insertionIndex) Get(c cid.Cid) (uint64, error) { return 0, errUnsupported } - return r.Record.Idx, nil + return r.Record.Offset, nil +} + +func (ii *insertionIndex) GetAll(c cid.Cid, fn func(uint64) bool) error { + d, err := multihash.Decode(c.Hash()) + if err != nil { + return err + } + entry := recordDigest{digest: d.Digest} + + any := false + iter := func(i llrb.Item) bool { + existing := i.(recordDigest) + if !bytes.Equal(existing.digest, entry.digest) { + // We've already looked at all entries with matching digests. + return false + } + any = true + return fn(existing.Record.Offset) + } + ii.items.AscendGreaterOrEqual(entry, iter) + if !any { + return index.ErrNotFound + } + return nil } func (ii *insertionIndex) Marshal(w io.Writer) error { @@ -152,6 +176,10 @@ func (ii *insertionIndex) flatten() (index.Index, error) { return si, nil } +// note that hasExactCID is very similar to GetAll, +// but it's separate as it allows us to compare Record.Cid directly, +// whereas GetAll just provides Record.Offset. + func (ii *insertionIndex) hasExactCID(c cid.Cid) bool { d, err := multihash.Decode(c.Hash()) if err != nil { diff --git a/v2/blockstore/readonly.go b/v2/blockstore/readonly.go index 34672cad77fc0f51c58fcec6b7c05e102572f1df..6c2397f9919b68739c6e029baf70b1facd0dc4b3 100644 --- a/v2/blockstore/readonly.go +++ b/v2/blockstore/readonly.go @@ -65,7 +65,6 @@ type ReadOnly struct { // go-car/v2 package. func UseWholeCIDs(enable bool) carv2.ReadOption { return func(o *carv2.ReadOptions) { - // TODO: update methods like Get, Has, and AllKeysChan to obey this. o.BlockstoreUseWholeCIDs = enable } } @@ -177,22 +176,35 @@ func (b *ReadOnly) Has(key cid.Cid) (bool, error) { b.mu.RLock() defer b.mu.RUnlock() - offset, err := b.idx.Get(key) + var fnFound bool + var fnErr error + err := b.idx.GetAll(key, func(offset uint64) bool { + uar := internalio.NewOffsetReadSeeker(b.backing, int64(offset)) + var err error + _, err = varint.ReadUvarint(uar) + if err != nil { + fnErr = err + return false + } + _, readCid, err := cid.CidFromReader(uar) + if err != nil { + fnErr = err + return false + } + if b.ropts.BlockstoreUseWholeCIDs { + fnFound = readCid.Equals(key) + return !fnFound // continue looking if we haven't found it + } else { + fnFound = bytes.Equal(readCid.Hash(), key.Hash()) + return false + } + }) if errors.Is(err, index.ErrNotFound) { return false, nil } else if err != nil { return false, err } - uar := internalio.NewOffsetReadSeeker(b.backing, int64(offset)) - _, err = varint.ReadUvarint(uar) - if err != nil { - return false, err - } - _, c, err := cid.CidFromReader(uar) - if err != nil { - return false, err - } - return bytes.Equal(key.Hash(), c.Hash()), nil + return fnFound, fnErr } // Get gets a block corresponding to the given key. @@ -200,21 +212,39 @@ func (b *ReadOnly) Get(key cid.Cid) (blocks.Block, error) { b.mu.RLock() defer b.mu.RUnlock() - offset, err := b.idx.Get(key) - if err != nil { - if err == index.ErrNotFound { - err = blockstore.ErrNotFound + var fnData []byte + var fnErr error + err := b.idx.GetAll(key, func(offset uint64) bool { + readCid, data, err := b.readBlock(int64(offset)) + if err != nil { + fnErr = err + return false } + if b.ropts.BlockstoreUseWholeCIDs { + if readCid.Equals(key) { + fnData = data + return false + } else { + return true // continue looking + } + } else { + if bytes.Equal(readCid.Hash(), key.Hash()) { + fnData = data + } + return false + } + }) + if errors.Is(err, index.ErrNotFound) { + return nil, blockstore.ErrNotFound + } else if err != nil { return nil, err + } else if fnErr != nil { + return nil, fnErr } - entry, data, err := b.readBlock(int64(offset)) - if err != nil { - return nil, err - } - if !bytes.Equal(key.Hash(), entry.Hash()) { + if fnData == nil { return nil, blockstore.ErrNotFound } - return blocks.NewBlockWithCid(data, key) + return blocks.NewBlockWithCid(fnData, key) } // GetSize gets the size of an item corresponding to the given key. @@ -222,23 +252,45 @@ func (b *ReadOnly) GetSize(key cid.Cid) (int, error) { b.mu.RLock() defer b.mu.RUnlock() - idx, err := b.idx.Get(key) - if err != nil { - return -1, err - } - rdr := internalio.NewOffsetReadSeeker(b.backing, int64(idx)) - sectionLen, err := varint.ReadUvarint(rdr) - if err != nil { + var fnSize int = -1 + var fnErr error + err := b.idx.GetAll(key, func(offset uint64) bool { + rdr := internalio.NewOffsetReadSeeker(b.backing, int64(offset)) + sectionLen, err := varint.ReadUvarint(rdr) + if err != nil { + fnErr = err + return false + } + cidLen, readCid, err := cid.CidFromReader(rdr) + if err != nil { + fnErr = err + return false + } + if b.ropts.BlockstoreUseWholeCIDs { + if readCid.Equals(key) { + fnSize = int(sectionLen) - cidLen + return false + } else { + return true // continue looking + } + } else { + if bytes.Equal(readCid.Hash(), key.Hash()) { + fnSize = int(sectionLen) - cidLen + } + return false + } + }) + if errors.Is(err, index.ErrNotFound) { return -1, blockstore.ErrNotFound + } else if err != nil { + return -1, err + } else if fnErr != nil { + return -1, fnErr } - cidLen, readCid, err := cid.CidFromReader(rdr) - if err != nil { - return 0, err - } - if !readCid.Equals(key) { + if fnSize == -1 { return -1, blockstore.ErrNotFound } - return int(sectionLen) - cidLen, err + return fnSize, nil } // Put is not supported and always returns an error. @@ -304,6 +356,11 @@ func (b *ReadOnly) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { return // TODO: log this error } + // If we're just using multihashes, flatten to the "raw" codec. + if !b.ropts.BlockstoreUseWholeCIDs { + c = cid.NewCidV1(cid.Raw, c.Hash()) + } + select { case ch <- c: case <-ctx.Done(): diff --git a/v2/blockstore/readonly_test.go b/v2/blockstore/readonly_test.go index 3d75fd96da659215935d968de880756c78b63922..4d443c3cfbe88c287501c9a25d353e42d844d684 100644 --- a/v2/blockstore/readonly_test.go +++ b/v2/blockstore/readonly_test.go @@ -49,7 +49,9 @@ func TestReadOnly(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - subject, err := OpenReadOnly(tt.v1OrV2path) + subject, err := OpenReadOnly(tt.v1OrV2path, + UseWholeCIDs(true), + ) t.Cleanup(func() { subject.Close() }) require.NoError(t, err) diff --git a/v2/blockstore/readwrite.go b/v2/blockstore/readwrite.go index 665a65384e9555910bc64490dc22ad3898891fc3..ba87ee601cc243bcf6a561cabd26042685b3ab1b 100644 --- a/v2/blockstore/readwrite.go +++ b/v2/blockstore/readwrite.go @@ -89,7 +89,6 @@ func AllowDuplicatePuts(allow bool) carv2.WriteOption { // Resuming from finalized files is allowed. However, resumption will regenerate the index // regardless by scanning every existing block in file. func OpenReadWrite(path string, roots []cid.Cid, opts ...carv2.ReadWriteOption) (*ReadWrite, error) { - // TODO: enable deduplication by default now that resumption is automatically attempted. f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0o666) // TODO: Should the user be able to configure FileMode permissions? if err != nil { return nil, fmt.Errorf("could not open read/write file: %w", err) diff --git a/v2/blockstore/readwrite_test.go b/v2/blockstore/readwrite_test.go index 6612268f1448b3ad48880d86719ef54413bb78f9..2494da6ab8604185ecbf19a02754174fcd288ea7 100644 --- a/v2/blockstore/readwrite_test.go +++ b/v2/blockstore/readwrite_test.go @@ -123,21 +123,29 @@ func TestBlockstore(t *testing.T) { func TestBlockstorePutSameHashes(t *testing.T) { tdir := t.TempDir() - // wbs allows duplicate puts. - wbs, err := blockstore.OpenReadWrite( - filepath.Join(tdir, "readwrite.car"), nil, + // This blockstore allows duplicate puts, + // and identifies by multihash as per the default. + wbsAllowDups, err := blockstore.OpenReadWrite( + filepath.Join(tdir, "readwrite-allowdup.car"), nil, blockstore.AllowDuplicatePuts(true), ) require.NoError(t, err) - t.Cleanup(func() { wbs.Finalize() }) + t.Cleanup(func() { wbsAllowDups.Finalize() }) - // wbs deduplicates puts by CID. - wbsd, err := blockstore.OpenReadWrite( - filepath.Join(tdir, "readwrite-dedup.car"), nil, + // This blockstore deduplicates puts by CID. + wbsByCID, err := blockstore.OpenReadWrite( + filepath.Join(tdir, "readwrite-dedup-wholecid.car"), nil, blockstore.UseWholeCIDs(true), ) require.NoError(t, err) - t.Cleanup(func() { wbsd.Finalize() }) + t.Cleanup(func() { wbsByCID.Finalize() }) + + // This blockstore deduplicates puts by multihash. + wbsByHash, err := blockstore.OpenReadWrite( + filepath.Join(tdir, "readwrite-dedup-hash.car"), nil, + ) + require.NoError(t, err) + t.Cleanup(func() { wbsByHash.Finalize() }) var blockList []blocks.Block @@ -160,15 +168,15 @@ func TestBlockstorePutSameHashes(t *testing.T) { // However, we have multiple CIDs for each multihash. // We also have two duplicate CIDs. data1 := []byte("foo bar") - appendBlock(data1, 0, cid.Raw) - appendBlock(data1, 1, cid.Raw) + appendBlock(data1, 0, cid.DagProtobuf) + appendBlock(data1, 1, cid.DagProtobuf) appendBlock(data1, 1, cid.DagCBOR) appendBlock(data1, 1, cid.DagCBOR) // duplicate CID data2 := []byte("foo bar baz") - appendBlock(data2, 0, cid.Raw) - appendBlock(data2, 1, cid.Raw) - appendBlock(data2, 1, cid.Raw) // duplicate CID + appendBlock(data2, 0, cid.DagProtobuf) + appendBlock(data2, 1, cid.DagProtobuf) + appendBlock(data2, 1, cid.DagProtobuf) // duplicate CID appendBlock(data2, 1, cid.DagCBOR) countBlocks := func(bs *blockstore.ReadWrite) int { @@ -176,52 +184,75 @@ func TestBlockstorePutSameHashes(t *testing.T) { require.NoError(t, err) n := 0 - for range ch { + for c := range ch { + if c.Prefix().Codec == cid.Raw { + if bs == wbsByCID { + t.Error("expected blockstore with UseWholeCIDs to not flatten on AllKeysChan") + } + } else { + if bs != wbsByCID { + t.Error("expected blockstore without UseWholeCIDs to flatten on AllKeysChan") + } + } n++ } return n } - for i, block := range blockList { - // Has should never error here. - // The first block should be missing. - // Others might not, given the duplicate hashes. - has, err := wbs.Has(block.Cid()) - require.NoError(t, err) - if i == 0 { - require.False(t, has) - } + putBlockList := func(bs *blockstore.ReadWrite) { + for i, block := range blockList { + // Has should never error here. + // The first block should be missing. + // Others might not, given the duplicate hashes. + has, err := bs.Has(block.Cid()) + require.NoError(t, err) + if i == 0 { + require.False(t, has) + } - err = wbs.Put(block) - require.NoError(t, err) - } + err = bs.Put(block) + require.NoError(t, err) - for _, block := range blockList { - has, err := wbs.Has(block.Cid()) - require.NoError(t, err) - require.True(t, has) + // Has, Get, and GetSize need to work right after a Put. + has, err = bs.Has(block.Cid()) + require.NoError(t, err) + require.True(t, has) - got, err := wbs.Get(block.Cid()) - require.NoError(t, err) - require.Equal(t, block.Cid(), got.Cid()) - require.Equal(t, block.RawData(), got.RawData()) + got, err := bs.Get(block.Cid()) + require.NoError(t, err) + require.Equal(t, block.Cid(), got.Cid()) + require.Equal(t, block.RawData(), got.RawData()) + + size, err := bs.GetSize(block.Cid()) + require.NoError(t, err) + require.Equal(t, len(block.RawData()), size) + } } - require.Equal(t, len(blockList), countBlocks(wbs)) + putBlockList(wbsAllowDups) + require.Equal(t, len(blockList), countBlocks(wbsAllowDups)) - err = wbs.Finalize() + err = wbsAllowDups.Finalize() require.NoError(t, err) // Put the same list of blocks to the blockstore that // deduplicates by CID. - // We should end up with two fewer blocks. - for _, block := range blockList { - err = wbsd.Put(block) - require.NoError(t, err) - } - require.Equal(t, len(blockList)-2, countBlocks(wbsd)) + // We should end up with two fewer blocks, + // as two are entire CID duplicates. + putBlockList(wbsByCID) + require.Equal(t, len(blockList)-2, countBlocks(wbsByCID)) + + err = wbsByCID.Finalize() + require.NoError(t, err) + + // Put the same list of blocks to the blockstore that + // deduplicates by CID. + // We should end up with just two blocks, + // as the original set of blocks only has two distinct multihashes. + putBlockList(wbsByHash) + require.Equal(t, 2, countBlocks(wbsByHash)) - err = wbsd.Finalize() + err = wbsByHash.Finalize() require.NoError(t, err) } @@ -280,7 +311,8 @@ func TestBlockstoreNullPadding(t *testing.T) { // A sample null-padded CARv1 file. paddedV1 = append(paddedV1, make([]byte, 2048)...) - rbs, err := blockstore.NewReadOnly(bufferReaderAt(paddedV1), nil, carv2.ZeroLengthSectionAsEOF) + rbs, err := blockstore.NewReadOnly(bufferReaderAt(paddedV1), nil, + carv2.ZeroLengthSectionAsEOF(true)) require.NoError(t, err) roots, err := rbs.Roots() @@ -313,7 +345,8 @@ func TestBlockstoreResumption(t *testing.T) { path := filepath.Join(t.TempDir(), "readwrite-resume.car") // Create an incomplete CARv2 file with no blocks put. - subject, err := blockstore.OpenReadWrite(path, r.Header.Roots) + subject, err := blockstore.OpenReadWrite(path, r.Header.Roots, + blockstore.UseWholeCIDs(true)) require.NoError(t, err) // For each block resume on the same file, putting blocks one at a time. @@ -345,7 +378,8 @@ func TestBlockstoreResumption(t *testing.T) { // We do this to avoid resource leak during testing. require.NoError(t, subject.Close()) } - subject, err = blockstore.OpenReadWrite(path, r.Header.Roots) + subject, err = blockstore.OpenReadWrite(path, r.Header.Roots, + blockstore.UseWholeCIDs(true)) require.NoError(t, err) } require.NoError(t, subject.Put(b)) @@ -377,7 +411,8 @@ func TestBlockstoreResumption(t *testing.T) { require.NoError(t, subject.Close()) // Finalize the blockstore to complete partially written CARv2 file. - subject, err = blockstore.OpenReadWrite(path, r.Header.Roots) + subject, err = blockstore.OpenReadWrite(path, r.Header.Roots, + blockstore.UseWholeCIDs(true)) require.NoError(t, err) require.NoError(t, subject.Finalize()) @@ -528,9 +563,9 @@ func TestReadWriteWithPaddingWorksAsExpected(t *testing.T) { indexSize := stat.Size() - int64(wantIndexOffset) gotIdx, err := index.ReadFrom(io.NewSectionReader(f, int64(wantIndexOffset), indexSize)) require.NoError(t, err) - _, err = gotIdx.Get(oneTestBlockCid) + _, err = index.GetFirst(gotIdx, oneTestBlockCid) require.NoError(t, err) - _, err = gotIdx.Get(anotherTestBlockCid) + _, err = index.GetFirst(gotIdx, anotherTestBlockCid) require.NoError(t, err) } diff --git a/v2/index/example_test.go b/v2/index/example_test.go index 9f8368d1aa509e67b07df6ce2e15dca4effaf57e..47347d8355eaae4dc5b3faf10c0a8e46c59fa985 100644 --- a/v2/index/example_test.go +++ b/v2/index/example_test.go @@ -34,7 +34,7 @@ func ExampleReadFrom() { // For each root CID print the offset relative to CARv1 data payload. for _, r := range roots { - offset, err := idx.Get(r) + offset, err := index.GetFirst(idx, r) if err != nil { panic(err) } diff --git a/v2/index/index.go b/v2/index/index.go index 119821714e82b296dd6937fbfa04e8f5fb63cc0e..3408dfbd286f7ad8712ed0a89841d341366aef40 100644 --- a/v2/index/index.go +++ b/v2/index/index.go @@ -14,33 +14,59 @@ import ( "github.com/ipfs/go-cid" ) -// Codec table is a first var-int in CAR indexes -const ( - indexHashed codec = 0x300000 + iota - indexSingleSorted - indexGobHashed -) - type ( - // codec is used as a multicodec identifier for CAR index files - codec int - // Record is a pre-processed record of a car item and location. Record struct { cid.Cid - Idx uint64 + Offset uint64 } // Index provides an interface for looking up byte offset of a given CID. + // + // Note that each indexing mechanism is free to match CIDs however it + // sees fit. For example, multicodec.CarIndexSorted only indexes + // multihash digests, meaning that Get and GetAll will find matching + // blocks even if the CID's encoding multicodec differs. Other index + // implementations might index the entire CID, the entire multihash, or + // just part of a multihash's digest. Index interface { + // Codec provides the multicodec code that the index implements. + // + // Note that this may return a reserved code if the index + // implementation is not defined in a spec. Codec() multicodec.Code + + // Marshal encodes the index in serial form. Marshal(w io.Writer) error + // Unmarshal decodes the index from its serial form. Unmarshal(r io.Reader) error - Get(cid.Cid) (uint64, error) + + // Load inserts a number of records into the index. Load([]Record) error + + // Get looks up all blocks matching a given CID, + // calling a function for each one of their offsets. + // + // If the function returns false, GetAll stops. + // + // If no error occurred and the CID isn't indexed, + // meaning that no callbacks happen, + // ErrNotFound is returned. + GetAll(cid.Cid, func(uint64) bool) error } ) +// GetFirst is a wrapper over Index.GetAll, returning the offset for the first +// matching indexed CID. +func GetFirst(idx Index, key cid.Cid) (uint64, error) { + var firstOffset uint64 + err := idx.GetAll(key, func(offset uint64) bool { + firstOffset = offset + return false + }) + return firstOffset, err +} + // New constructs a new index corresponding to the given CAR index codec. func New(codec multicodec.Code) (Index, error) { switch codec { diff --git a/v2/index/index_test.go b/v2/index/index_test.go index a32205dcaa03cbd690aff3e6f41001693aadfc53..03efbc94d6a871d578fd63451faca9a274f421d0 100644 --- a/v2/index/index_test.go +++ b/v2/index/index_test.go @@ -32,21 +32,6 @@ func TestNew(t *testing.T) { codec: multicodec.Cidv1, wantErr: true, }, - { - name: "IndexSingleSortedMultiCodecIsError", - codec: multicodec.Code(indexSingleSorted), - wantErr: true, - }, - { - name: "IndexHashedMultiCodecIsError", - codec: multicodec.Code(indexHashed), - wantErr: true, - }, - { - name: "IndexGobHashedMultiCodecIsError", - codec: multicodec.Code(indexGobHashed), - wantErr: true, - }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -82,7 +67,7 @@ func TestReadFrom(t *testing.T) { require.NoError(t, err) // Get offset from the index for a CID and assert it exists - gotOffset, err := subject.Get(wantBlock.Cid()) + gotOffset, err := GetFirst(subject, wantBlock.Cid()) require.NoError(t, err) require.NotZero(t, gotOffset) diff --git a/v2/index/indexgobhash.go b/v2/index/indexgobhash.go deleted file mode 100644 index a74e8b92b909263bfd43948f92f4aec7e8d0327b..0000000000000000000000000000000000000000 --- a/v2/index/indexgobhash.go +++ /dev/null @@ -1,48 +0,0 @@ -package index - -import ( - "encoding/gob" - "io" - - "github.com/multiformats/go-multicodec" - - "github.com/ipfs/go-cid" -) - -//lint:ignore U1000 kept for potential future use. -type mapGobIndex map[cid.Cid]uint64 - -func (m *mapGobIndex) Get(c cid.Cid) (uint64, error) { - el, ok := (*m)[c] - if !ok { - return 0, ErrNotFound - } - return el, nil -} - -func (m *mapGobIndex) Marshal(w io.Writer) error { - e := gob.NewEncoder(w) - return e.Encode(m) -} - -func (m *mapGobIndex) Unmarshal(r io.Reader) error { - d := gob.NewDecoder(r) - return d.Decode(m) -} - -func (m *mapGobIndex) Codec() multicodec.Code { - return multicodec.Code(indexHashed) -} - -func (m *mapGobIndex) Load(rs []Record) error { - for _, r := range rs { - (*m)[r.Cid] = r.Idx - } - return nil -} - -//lint:ignore U1000 kept for potential future use. -func newGobHashed() Index { - mi := make(mapGobIndex) - return &mi -} diff --git a/v2/index/indexhashed.go b/v2/index/indexhashed.go deleted file mode 100644 index 84b0ad1575152c06359884bafd441b03926ec6e8..0000000000000000000000000000000000000000 --- a/v2/index/indexhashed.go +++ /dev/null @@ -1,47 +0,0 @@ -package index - -import ( - "io" - - "github.com/multiformats/go-multicodec" - - "github.com/ipfs/go-cid" - cbor "github.com/whyrusleeping/cbor/go" -) - -//lint:ignore U1000 kept for potential future use. -type mapIndex map[cid.Cid]uint64 - -func (m *mapIndex) Get(c cid.Cid) (uint64, error) { - el, ok := (*m)[c] - if !ok { - return 0, ErrNotFound - } - return el, nil -} - -func (m *mapIndex) Marshal(w io.Writer) error { - return cbor.Encode(w, m) -} - -func (m *mapIndex) Unmarshal(r io.Reader) error { - d := cbor.NewDecoder(r) - return d.Decode(m) -} - -func (m *mapIndex) Codec() multicodec.Code { - return multicodec.Code(indexHashed) -} - -func (m *mapIndex) Load(rs []Record) error { - for _, r := range rs { - (*m)[r.Cid] = r.Idx - } - return nil -} - -//lint:ignore U1000 kept for potential future use. -func newHashed() Index { - mi := make(mapIndex) - return &mi -} diff --git a/v2/index/indexsorted.go b/v2/index/indexsorted.go index 65446f66518394dec9278384aa912b469877ae66..5f37eee446fa7d54684636fcefbf111880216717 100644 --- a/v2/index/indexsorted.go +++ b/v2/index/indexsorted.go @@ -44,10 +44,6 @@ func (r recordSet) Swap(i, j int) { r[i], r[j] = r[j], r[i] } -func (s *singleWidthIndex) Codec() multicodec.Code { - return multicodec.Code(indexSingleSorted) -} - func (s *singleWidthIndex) Marshal(w io.Writer) error { if err := binary.Write(w, binary.LittleEndian, s.width); err != nil { return err @@ -77,25 +73,34 @@ func (s *singleWidthIndex) Less(i int, digest []byte) bool { return bytes.Compare(digest[:], s.index[i*int(s.width):((i+1)*int(s.width)-8)]) <= 0 } -func (s *singleWidthIndex) Get(c cid.Cid) (uint64, error) { +func (s *singleWidthIndex) GetAll(c cid.Cid, fn func(uint64) bool) error { d, err := multihash.Decode(c.Hash()) if err != nil { - return 0, err + return err } - return s.get(d.Digest) + return s.getAll(d.Digest, fn) } -func (s *singleWidthIndex) get(d []byte) (uint64, error) { +func (s *singleWidthIndex) getAll(d []byte, fn func(uint64) bool) error { idx := sort.Search(int(s.len), func(i int) bool { return s.Less(i, d) }) if uint64(idx) == s.len { - return 0, ErrNotFound + return ErrNotFound + } + + any := false + for bytes.Equal(d[:], s.index[idx*int(s.width):(idx+1)*int(s.width)-8]) { + any = true + offset := binary.LittleEndian.Uint64(s.index[(idx+1)*int(s.width)-8 : (idx+1)*int(s.width)]) + if !fn(offset) { + break + } } - if !bytes.Equal(d[:], s.index[idx*int(s.width):(idx+1)*int(s.width)-8]) { - return 0, ErrNotFound + if !any { + return ErrNotFound } - return binary.LittleEndian.Uint64(s.index[(idx+1)*int(s.width)-8 : (idx+1)*int(s.width)]), nil + return nil } func (s *singleWidthIndex) Load(items []Record) error { @@ -115,15 +120,15 @@ func (s *singleWidthIndex) Load(items []Record) error { return nil } -func (m *multiWidthIndex) Get(c cid.Cid) (uint64, error) { +func (m *multiWidthIndex) GetAll(c cid.Cid, fn func(uint64) bool) error { d, err := multihash.Decode(c.Hash()) if err != nil { - return 0, err + return err } if s, ok := (*m)[uint32(len(d.Digest)+8)]; ok { - return s.get(d.Digest) + return s.getAll(d.Digest, fn) } - return 0, ErrNotFound + return ErrNotFound } func (m *multiWidthIndex) Codec() multicodec.Code { @@ -184,7 +189,7 @@ func (m *multiWidthIndex) Load(items []Record) error { idxs[len(digest)] = make([]digestRecord, 0) idx = idxs[len(digest)] } - idxs[len(digest)] = append(idx, digestRecord{digest, item.Idx}) + idxs[len(digest)] = append(idx, digestRecord{digest, item.Offset}) } // Sort each list. then write to compact form. @@ -209,9 +214,3 @@ func newSorted() Index { m := make(multiWidthIndex) return &m } - -//lint:ignore U1000 kept for potential future use. -func newSingleSorted() Index { - s := singleWidthIndex{} - return &s -} diff --git a/v2/index/indexsorted_test.go b/v2/index/indexsorted_test.go index fe0ca961a04bb5b49a1399ed318c9b995b80ee87..767937b79aa45bcd17f48ca6361125df99ad874f 100644 --- a/v2/index/indexsorted_test.go +++ b/v2/index/indexsorted_test.go @@ -18,10 +18,6 @@ func TestSortedIndex_GetReturnsNotFoundWhenCidDoesNotExist(t *testing.T) { name string subject Index }{ - { - "SingleSorted", - newSingleSorted(), - }, { "Sorted", newSorted(), @@ -29,7 +25,7 @@ func TestSortedIndex_GetReturnsNotFoundWhenCidDoesNotExist(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - gotOffset, err := tt.subject.Get(nonExistingKey) + gotOffset, err := GetFirst(tt.subject, nonExistingKey) require.Equal(t, ErrNotFound, err) require.Equal(t, uint64(0), gotOffset) }) diff --git a/v2/index_gen.go b/v2/index_gen.go index 5a60fb7111044df3d1c03ffe06909eaf5c8c600a..ae938498ae4e0635d9db9eb761135760813f13a2 100644 --- a/v2/index_gen.go +++ b/v2/index_gen.go @@ -75,7 +75,7 @@ func GenerateIndex(v1r io.Reader, opts ...ReadOption) (index.Index, error) { if err != nil { return nil, err } - records = append(records, index.Record{Cid: c, Idx: uint64(sectionOffset)}) + records = append(records, index.Record{Cid: c, Offset: uint64(sectionOffset)}) // Seek to the next section by skipping the block. // The section length includes the CID, so subtract it. diff --git a/v2/options.go b/v2/options.go index 89044a761bbcc413e911f6bfe7174dd2d402b8b3..b206c8342a1799776f5cc0a36222e4646e850814 100644 --- a/v2/options.go +++ b/v2/options.go @@ -46,8 +46,10 @@ type ReadWriteOption interface { // a zero-length section as the end of the input CAR file. For example, this can // be useful to allow "null padding" after a CARv1 without knowing where the // padding begins. -func ZeroLengthSectionAsEOF(o *ReadOptions) { - o.ZeroLengthSectionAsEOF = true +func ZeroLengthSectionAsEOF(enable bool) ReadOption { + return func(o *ReadOptions) { + o.ZeroLengthSectionAsEOF = true + } } // UseDataPadding is a write option which sets the padding to be added between