blockstore: implement UseWholeCIDs

Update the tests too, as a few expected whole CIDs. For now, just make them use the option. This required index.Index to gain a new method, GetAll, so that when using whole CIDs, methods like Get can return true if any of the matching indexed CIDs are an exact whole-CID match, and not just looking at the first matching indexed CID. GetAll is akin to an iteration over all matching indexed CIDs, and its callback returns a boolean to say if the iteration should continue. This allows stopping as soon as we're done. We also remove Index.Get, instead replacing it with a helper called GetFirst, which simply makes simple uses of GetAll a single line. We remove the non-specced and unused index implementations, too. They were left in place in case they were useful again, but they haven't been so far, and their code is still in git. Keeping them around just means updating more code when refactoring. While at it, make ZeroLengthSectionAsEOF take a boolean and return an option, just like the other boolean options, for consistency. Fixes #130.

blockstore: implement UseWholeCIDs
Update the tests too, as a few expected whole CIDs. For now, just make them use the option. This required index.Index to gain a new method, GetAll, so that when using whole CIDs, methods like Get can return true if any of the matching indexed CIDs are an exact whole-CID match, and not just looking at the first matching indexed CID. GetAll is akin to an iteration over all matching indexed CIDs, and its callback returns a boolean to say if the iteration should continue. This allows stopping as soon as we're done. We also remove Index.Get, instead replacing it with a helper called GetFirst, which simply makes simple uses of GetAll a single line. We remove the non-specced and unused index implementations, too. They were left in place in case they were useful again, but they haven't been so far, and their code is still in git. Keeping them around just means updating more code when refactoring. While at it, make ZeroLengthSectionAsEOF take a boolean and return an option, just like the other boolean options, for consistency. Fixes #130.
c2e497e2 · Daniel Martí · 0ae4f9c0 · c2e497e2 · c2e497e2 · c2e497e2
Commit c2e497e2 authored Jul 19, 2021 by Daniel Martí
15 changed files
--- a/v2/blockstore/example_test.go
+++ b/v2/blockstore/example_test.go
@@ -20,7 +20,10 @@ func ExampleOpenReadOnly() {
 	// Note, `OpenReadOnly` accepts bot CARv1 and CARv2 formats and transparently generate index
 	// in the background if necessary.
 	// This instance sets ZeroLengthSectionAsEOF option to treat zero sized sections in file as EOF.
-	robs, err := blockstore.OpenReadOnly("../testdata/sample-v1.car", carv2.ZeroLengthSectionAsEOF)
+	robs, err := blockstore.OpenReadOnly("../testdata/sample-v1.car",
+		blockstore.UseWholeCIDs(true),
+		carv2.ZeroLengthSectionAsEOF(true),
+	)
 	if err != nil {
 		panic(err)
 	}

--- a/v2/blockstore/insertionindex.go
+++ b/v2/blockstore/insertionindex.go
@@ -55,7 +55,7 @@ func newRecordFromCid(c cid.Cid, at uint64) recordDigest {
 		panic(err)
 	}

-	return recordDigest{d.Digest, index.Record{Cid: c, Idx: at}}
+	return recordDigest{d.Digest, index.Record{Cid: c, Offset: at}}
 }

 func (ii *insertionIndex) insertNoReplace(key cid.Cid, n uint64) {
@@ -77,7 +77,31 @@ func (ii *insertionIndex) Get(c cid.Cid) (uint64, error) {
 		return 0, errUnsupported
 	}

-	return r.Record.Idx, nil
+	return r.Record.Offset, nil
+}
+
+func (ii *insertionIndex) GetAll(c cid.Cid, fn func(uint64) bool) error {
+	d, err := multihash.Decode(c.Hash())
+	if err != nil {
+		return err
+	}
+	entry := recordDigest{digest: d.Digest}
+
+	any := false
+	iter := func(i llrb.Item) bool {
+		existing := i.(recordDigest)
+		if !bytes.Equal(existing.digest, entry.digest) {
+			// We've already looked at all entries with matching digests.
+			return false
+		}
+		any = true
+		return fn(existing.Record.Offset)
+	}
+	ii.items.AscendGreaterOrEqual(entry, iter)
+	if !any {
+		return index.ErrNotFound
+	}
+	return nil
 }

 func (ii *insertionIndex) Marshal(w io.Writer) error {
@@ -152,6 +176,10 @@ func (ii *insertionIndex) flatten() (index.Index, error) {
 	return si, nil
 }

+// note that hasExactCID is very similar to GetAll,
+// but it's separate as it allows us to compare Record.Cid directly,
+// whereas GetAll just provides Record.Offset.
+
 func (ii *insertionIndex) hasExactCID(c cid.Cid) bool {
 	d, err := multihash.Decode(c.Hash())
 	if err != nil {

--- a/v2/blockstore/readonly.go
+++ b/v2/blockstore/readonly.go
@@ -65,7 +65,6 @@ type ReadOnly struct {
 // go-car/v2 package.
 func UseWholeCIDs(enable bool) carv2.ReadOption {
 	return func(o *carv2.ReadOptions) {
-		// TODO: update methods like Get, Has, and AllKeysChan to obey this.
 		o.BlockstoreUseWholeCIDs = enable
 	}
 }
@@ -177,22 +176,35 @@ func (b *ReadOnly) Has(key cid.Cid) (bool, error) {
 	b.mu.RLock()
 	defer b.mu.RUnlock()

-	offset, err := b.idx.Get(key)
+	var fnFound bool
+	var fnErr error
+	err := b.idx.GetAll(key, func(offset uint64) bool {
+		uar := internalio.NewOffsetReadSeeker(b.backing, int64(offset))
+		var err error
+		_, err = varint.ReadUvarint(uar)
+		if err != nil {
+			fnErr = err
+			return false
+		}
+		_, readCid, err := cid.CidFromReader(uar)
+		if err != nil {
+			fnErr = err
+			return false
+		}
+		if b.ropts.BlockstoreUseWholeCIDs {
+			fnFound = readCid.Equals(key)
+			return !fnFound // continue looking if we haven't found it
+		} else {
+			fnFound = bytes.Equal(readCid.Hash(), key.Hash())
+			return false
+		}
+	})
 	if errors.Is(err, index.ErrNotFound) {
 		return false, nil
 	} else if err != nil {
 		return false, err
 	}
-	uar := internalio.NewOffsetReadSeeker(b.backing, int64(offset))
-	_, err = varint.ReadUvarint(uar)
-	if err != nil {
-		return false, err
-	}
-	_, c, err := cid.CidFromReader(uar)
-	if err != nil {
-		return false, err
-	}
-	return bytes.Equal(key.Hash(), c.Hash()), nil
+	return fnFound, fnErr
 }

 // Get gets a block corresponding to the given key.
@@ -200,21 +212,39 @@ func (b *ReadOnly) Get(key cid.Cid) (blocks.Block, error) {
 	b.mu.RLock()
 	defer b.mu.RUnlock()

-	offset, err := b.idx.Get(key)
-	if err != nil {
-		if err == index.ErrNotFound {
-			err = blockstore.ErrNotFound
+	var fnData []byte
+	var fnErr error
+	err := b.idx.GetAll(key, func(offset uint64) bool {
+		readCid, data, err := b.readBlock(int64(offset))
+		if err != nil {
+			fnErr = err
+			return false
 		}
+		if b.ropts.BlockstoreUseWholeCIDs {
+			if readCid.Equals(key) {
+				fnData = data
+				return false
+			} else {
+				return true // continue looking
+			}
+		} else {
+			if bytes.Equal(readCid.Hash(), key.Hash()) {
+				fnData = data
+			}
+			return false
+		}
+	})
+	if errors.Is(err, index.ErrNotFound) {
+		return nil, blockstore.ErrNotFound
+	} else if err != nil {
 		return nil, err
+	} else if fnErr != nil {
+		return nil, fnErr
 	}
-	entry, data, err := b.readBlock(int64(offset))
-	if err != nil {
-		return nil, err
-	}
-	if !bytes.Equal(key.Hash(), entry.Hash()) {
+	if fnData == nil {
 		return nil, blockstore.ErrNotFound
 	}
-	return blocks.NewBlockWithCid(data, key)
+	return blocks.NewBlockWithCid(fnData, key)
 }

 // GetSize gets the size of an item corresponding to the given key.
@@ -222,23 +252,45 @@ func (b *ReadOnly) GetSize(key cid.Cid) (int, error) {
 	b.mu.RLock()
 	defer b.mu.RUnlock()

-	idx, err := b.idx.Get(key)
-	if err != nil {
-		return -1, err
-	}
-	rdr := internalio.NewOffsetReadSeeker(b.backing, int64(idx))
-	sectionLen, err := varint.ReadUvarint(rdr)
-	if err != nil {
+	var fnSize int = -1
+	var fnErr error
+	err := b.idx.GetAll(key, func(offset uint64) bool {
+		rdr := internalio.NewOffsetReadSeeker(b.backing, int64(offset))
+		sectionLen, err := varint.ReadUvarint(rdr)
+		if err != nil {
+			fnErr = err
+			return false
+		}
+		cidLen, readCid, err := cid.CidFromReader(rdr)
+		if err != nil {
+			fnErr = err
+			return false
+		}
+		if b.ropts.BlockstoreUseWholeCIDs {
+			if readCid.Equals(key) {
+				fnSize = int(sectionLen) - cidLen
+				return false
+			} else {
+				return true // continue looking
+			}
+		} else {
+			if bytes.Equal(readCid.Hash(), key.Hash()) {
+				fnSize = int(sectionLen) - cidLen
+			}
+			return false
+		}
+	})
+	if errors.Is(err, index.ErrNotFound) {
 		return -1, blockstore.ErrNotFound
+	} else if err != nil {
+		return -1, err
+	} else if fnErr != nil {
+		return -1, fnErr
 	}
-	cidLen, readCid, err := cid.CidFromReader(rdr)
-	if err != nil {
-		return 0, err
-	}
-	if !readCid.Equals(key) {
+	if fnSize == -1 {
 		return -1, blockstore.ErrNotFound
 	}
-	return int(sectionLen) - cidLen, err
+	return fnSize, nil
 }

 // Put is not supported and always returns an error.
@@ -304,6 +356,11 @@ func (b *ReadOnly) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) {
 				return // TODO: log this error
 			}

+			// If we're just using multihashes, flatten to the "raw" codec.
+			if !b.ropts.BlockstoreUseWholeCIDs {
+				c = cid.NewCidV1(cid.Raw, c.Hash())
+			}
+
 			select {
 			case ch <- c:
 			case <-ctx.Done():

--- a/v2/blockstore/readonly_test.go
+++ b/v2/blockstore/readonly_test.go
@@ -49,7 +49,9 @@ func TestReadOnly(t *testing.T) {
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			subject, err := OpenReadOnly(tt.v1OrV2path)
+			subject, err := OpenReadOnly(tt.v1OrV2path,
+				UseWholeCIDs(true),
+			)
 			t.Cleanup(func() { subject.Close() })
 			require.NoError(t, err)


--- a/v2/blockstore/readwrite.go
+++ b/v2/blockstore/readwrite.go
@@ -89,7 +89,6 @@ func AllowDuplicatePuts(allow bool) carv2.WriteOption {
 // Resuming from finalized files is allowed. However, resumption will regenerate the index
 // regardless by scanning every existing block in file.
 func OpenReadWrite(path string, roots []cid.Cid, opts ...carv2.ReadWriteOption) (*ReadWrite, error) {
-	// TODO: enable deduplication by default now that resumption is automatically attempted.
 	f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0o666) // TODO: Should the user be able to configure FileMode permissions?
 	if err != nil {
 		return nil, fmt.Errorf("could not open read/write file: %w", err)

--- a/v2/blockstore/readwrite_test.go
+++ b/v2/blockstore/readwrite_test.go
@@ -123,21 +123,29 @@ func TestBlockstore(t *testing.T) {
 func TestBlockstorePutSameHashes(t *testing.T) {
 	tdir := t.TempDir()

-	// wbs allows duplicate puts.
-	wbs, err := blockstore.OpenReadWrite(
-		filepath.Join(tdir, "readwrite.car"), nil,
+	// This blockstore allows duplicate puts,
+	// and identifies by multihash as per the default.
+	wbsAllowDups, err := blockstore.OpenReadWrite(
+		filepath.Join(tdir, "readwrite-allowdup.car"), nil,
 		blockstore.AllowDuplicatePuts(true),
 	)
 	require.NoError(t, err)
-	t.Cleanup(func() { wbs.Finalize() })
+	t.Cleanup(func() { wbsAllowDups.Finalize() })

-	// wbs deduplicates puts by CID.
-	wbsd, err := blockstore.OpenReadWrite(
-		filepath.Join(tdir, "readwrite-dedup.car"), nil,
+	// This blockstore deduplicates puts by CID.
+	wbsByCID, err := blockstore.OpenReadWrite(
+		filepath.Join(tdir, "readwrite-dedup-wholecid.car"), nil,
 		blockstore.UseWholeCIDs(true),
 	)
 	require.NoError(t, err)
-	t.Cleanup(func() { wbsd.Finalize() })
+	t.Cleanup(func() { wbsByCID.Finalize() })
+
+	// This blockstore deduplicates puts by multihash.
+	wbsByHash, err := blockstore.OpenReadWrite(
+		filepath.Join(tdir, "readwrite-dedup-hash.car"), nil,
+	)
+	require.NoError(t, err)
+	t.Cleanup(func() { wbsByHash.Finalize() })

 	var blockList []blocks.Block

@@ -160,15 +168,15 @@ func TestBlockstorePutSameHashes(t *testing.T) {
 	// However, we have multiple CIDs for each multihash.
 	// We also have two duplicate CIDs.
 	data1 := []byte("foo bar")
-	appendBlock(data1, 0, cid.Raw)
-	appendBlock(data1, 1, cid.Raw)
+	appendBlock(data1, 0, cid.DagProtobuf)
+	appendBlock(data1, 1, cid.DagProtobuf)
 	appendBlock(data1, 1, cid.DagCBOR)
 	appendBlock(data1, 1, cid.DagCBOR) // duplicate CID

 	data2 := []byte("foo bar baz")
-	appendBlock(data2, 0, cid.Raw)
-	appendBlock(data2, 1, cid.Raw)
-	appendBlock(data2, 1, cid.Raw) // duplicate CID
+	appendBlock(data2, 0, cid.DagProtobuf)
+	appendBlock(data2, 1, cid.DagProtobuf)
+	appendBlock(data2, 1, cid.DagProtobuf) // duplicate CID
 	appendBlock(data2, 1, cid.DagCBOR)

 	countBlocks := func(bs *blockstore.ReadWrite) int {
@@ -176,52 +184,75 @@ func TestBlockstorePutSameHashes(t *testing.T) {
 		require.NoError(t, err)

 		n := 0
-		for range ch {
+		for c := range ch {
+			if c.Prefix().Codec == cid.Raw {
+				if bs == wbsByCID {
+					t.Error("expected blockstore with UseWholeCIDs to not flatten on AllKeysChan")
+				}
+			} else {
+				if bs != wbsByCID {
+					t.Error("expected blockstore without UseWholeCIDs to flatten on AllKeysChan")
+				}
+			}
 			n++
 		}
 		return n
 	}

-	for i, block := range blockList {
-		// Has should never error here.
-		// The first block should be missing.
-		// Others might not, given the duplicate hashes.
-		has, err := wbs.Has(block.Cid())
-		require.NoError(t, err)
-		if i == 0 {
-			require.False(t, has)
-		}
+	putBlockList := func(bs *blockstore.ReadWrite) {
+		for i, block := range blockList {
+			// Has should never error here.
+			// The first block should be missing.
+			// Others might not, given the duplicate hashes.
+			has, err := bs.Has(block.Cid())
+			require.NoError(t, err)
+			if i == 0 {
+				require.False(t, has)
+			}

-		err = wbs.Put(block)
-		require.NoError(t, err)
-	}
+			err = bs.Put(block)
+			require.NoError(t, err)

-	for _, block := range blockList {
-		has, err := wbs.Has(block.Cid())
-		require.NoError(t, err)
-		require.True(t, has)
+			// Has, Get, and GetSize need to work right after a Put.
+			has, err = bs.Has(block.Cid())
+			require.NoError(t, err)
+			require.True(t, has)

-		got, err := wbs.Get(block.Cid())
-		require.NoError(t, err)
-		require.Equal(t, block.Cid(), got.Cid())
-		require.Equal(t, block.RawData(), got.RawData())
+			got, err := bs.Get(block.Cid())
+			require.NoError(t, err)
+			require.Equal(t, block.Cid(), got.Cid())
+			require.Equal(t, block.RawData(), got.RawData())
+
+			size, err := bs.GetSize(block.Cid())
+			require.NoError(t, err)
+			require.Equal(t, len(block.RawData()), size)
+		}
 	}

-	require.Equal(t, len(blockList), countBlocks(wbs))
+	putBlockList(wbsAllowDups)
+	require.Equal(t, len(blockList), countBlocks(wbsAllowDups))

-	err = wbs.Finalize()
+	err = wbsAllowDups.Finalize()
 	require.NoError(t, err)

 	// Put the same list of blocks to the blockstore that
 	// deduplicates by CID.
-	// We should end up with two fewer blocks.
-	for _, block := range blockList {
-		err = wbsd.Put(block)
-		require.NoError(t, err)
-	}
-	require.Equal(t, len(blockList)-2, countBlocks(wbsd))
+	// We should end up with two fewer blocks,
+	// as two are entire CID duplicates.
+	putBlockList(wbsByCID)
+	require.Equal(t, len(blockList)-2, countBlocks(wbsByCID))
+
+	err = wbsByCID.Finalize()
+	require.NoError(t, err)
+
+	// Put the same list of blocks to the blockstore that
+	// deduplicates by CID.
+	// We should end up with just two blocks,
+	// as the original set of blocks only has two distinct multihashes.
+	putBlockList(wbsByHash)
+	require.Equal(t, 2, countBlocks(wbsByHash))

-	err = wbsd.Finalize()
+	err = wbsByHash.Finalize()
 	require.NoError(t, err)
 }

@@ -280,7 +311,8 @@ func TestBlockstoreNullPadding(t *testing.T) {
 	// A sample null-padded CARv1 file.
 	paddedV1 = append(paddedV1, make([]byte, 2048)...)

-	rbs, err := blockstore.NewReadOnly(bufferReaderAt(paddedV1), nil, carv2.ZeroLengthSectionAsEOF)
+	rbs, err := blockstore.NewReadOnly(bufferReaderAt(paddedV1), nil,
+		carv2.ZeroLengthSectionAsEOF(true))
 	require.NoError(t, err)

 	roots, err := rbs.Roots()
@@ -313,7 +345,8 @@ func TestBlockstoreResumption(t *testing.T) {

 	path := filepath.Join(t.TempDir(), "readwrite-resume.car")
 	// Create an incomplete CARv2 file with no blocks put.
-	subject, err := blockstore.OpenReadWrite(path, r.Header.Roots)
+	subject, err := blockstore.OpenReadWrite(path, r.Header.Roots,
+		blockstore.UseWholeCIDs(true))
 	require.NoError(t, err)

 	// For each block resume on the same file, putting blocks one at a time.
@@ -345,7 +378,8 @@ func TestBlockstoreResumption(t *testing.T) {
 				// We do this to avoid resource leak during testing.
 				require.NoError(t, subject.Close())
 			}
-			subject, err = blockstore.OpenReadWrite(path, r.Header.Roots)
+			subject, err = blockstore.OpenReadWrite(path, r.Header.Roots,
+				blockstore.UseWholeCIDs(true))
 			require.NoError(t, err)
 		}
 		require.NoError(t, subject.Put(b))
@@ -377,7 +411,8 @@ func TestBlockstoreResumption(t *testing.T) {
 	require.NoError(t, subject.Close())

 	// Finalize the blockstore to complete partially written CARv2 file.
-	subject, err = blockstore.OpenReadWrite(path, r.Header.Roots)
+	subject, err = blockstore.OpenReadWrite(path, r.Header.Roots,
+		blockstore.UseWholeCIDs(true))
 	require.NoError(t, err)
 	require.NoError(t, subject.Finalize())

@@ -528,9 +563,9 @@ func TestReadWriteWithPaddingWorksAsExpected(t *testing.T) {
 	indexSize := stat.Size() - int64(wantIndexOffset)
 	gotIdx, err := index.ReadFrom(io.NewSectionReader(f, int64(wantIndexOffset), indexSize))
 	require.NoError(t, err)
-	_, err = gotIdx.Get(oneTestBlockCid)
+	_, err = index.GetFirst(gotIdx, oneTestBlockCid)
 	require.NoError(t, err)
-	_, err = gotIdx.Get(anotherTestBlockCid)
+	_, err = index.GetFirst(gotIdx, anotherTestBlockCid)
 	require.NoError(t, err)
 }


--- a/v2/index/example_test.go
+++ b/v2/index/example_test.go
@@ -34,7 +34,7 @@ func ExampleReadFrom() {

 	// For each root CID print the offset relative to CARv1 data payload.
 	for _, r := range roots {
-		offset, err := idx.Get(r)
+		offset, err := index.GetFirst(idx, r)
 		if err != nil {
 			panic(err)
 		}

--- a/v2/index/index.go
+++ b/v2/index/index.go
@@ -14,33 +14,59 @@ import (
 	"github.com/ipfs/go-cid"
 )

-// Codec table is a first var-int in CAR indexes
-const (
-	indexHashed codec = 0x300000 + iota
-	indexSingleSorted
-	indexGobHashed
-)
-
 type (
-	// codec is used as a multicodec identifier for CAR index files
-	codec int
-
 	// Record is a pre-processed record of a car item and location.
 	Record struct {
 		cid.Cid
-		Idx uint64
+		Offset uint64
 	}

 	// Index provides an interface for looking up byte offset of a given CID.
+	//
+	// Note that each indexing mechanism is free to match CIDs however it
+	// sees fit. For example, multicodec.CarIndexSorted only indexes
+	// multihash digests, meaning that Get and GetAll will find matching
+	// blocks even if the CID's encoding multicodec differs. Other index
+	// implementations might index the entire CID, the entire multihash, or
+	// just part of a multihash's digest.
 	Index interface {
+		// Codec provides the multicodec code that the index implements.
+		//
+		// Note that this may return a reserved code if the index
+		// implementation is not defined in a spec.
 		Codec() multicodec.Code
+
+		// Marshal encodes the index in serial form.
 		Marshal(w io.Writer) error
+		// Unmarshal decodes the index from its serial form.
 		Unmarshal(r io.Reader) error
-		Get(cid.Cid) (uint64, error)
+
+		// Load inserts a number of records into the index.
 		Load([]Record) error
+
+		// Get looks up all blocks matching a given CID,
+		// calling a function for each one of their offsets.
+		//
+		// If the function returns false, GetAll stops.
+		//
+		// If no error occurred and the CID isn't indexed,
+		// meaning that no callbacks happen,
+		// ErrNotFound is returned.
+		GetAll(cid.Cid, func(uint64) bool) error
 	}
 )

+// GetFirst is a wrapper over Index.GetAll, returning the offset for the first
+// matching indexed CID.
+func GetFirst(idx Index, key cid.Cid) (uint64, error) {
+	var firstOffset uint64
+	err := idx.GetAll(key, func(offset uint64) bool {
+		firstOffset = offset
+		return false
+	})
+	return firstOffset, err
+}
+
 // New constructs a new index corresponding to the given CAR index codec.
 func New(codec multicodec.Code) (Index, error) {
 	switch codec {

--- a/v2/index/index_test.go
+++ b/v2/index/index_test.go
@@ -32,21 +32,6 @@ func TestNew(t *testing.T) {
 			codec:   multicodec.Cidv1,
 			wantErr: true,
 		},
-		{
-			name:    "IndexSingleSortedMultiCodecIsError",
-			codec:   multicodec.Code(indexSingleSorted),
-			wantErr: true,
-		},
-		{
-			name:    "IndexHashedMultiCodecIsError",
-			codec:   multicodec.Code(indexHashed),
-			wantErr: true,
-		},
-		{
-			name:    "IndexGobHashedMultiCodecIsError",
-			codec:   multicodec.Code(indexGobHashed),
-			wantErr: true,
-		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
@@ -82,7 +67,7 @@ func TestReadFrom(t *testing.T) {
 		require.NoError(t, err)

 		// Get offset from the index for a CID and assert it exists
-		gotOffset, err := subject.Get(wantBlock.Cid())
+		gotOffset, err := GetFirst(subject, wantBlock.Cid())
 		require.NoError(t, err)
 		require.NotZero(t, gotOffset)


--- a/v2/index/indexgobhash.go
+++ b/v2/index/indexgobhash.go
-package index
-
-import (
-	"encoding/gob"
-	"io"
-
-	"github.com/multiformats/go-multicodec"
-
-	"github.com/ipfs/go-cid"
-)
-
-//lint:ignore U1000 kept for potential future use.
-type mapGobIndex map[cid.Cid]uint64
-
-func (m *mapGobIndex) Get(c cid.Cid) (uint64, error) {
-	el, ok := (*m)[c]
-	if !ok {
-		return 0, ErrNotFound
-	}
-	return el, nil
-}
-
-func (m *mapGobIndex) Marshal(w io.Writer) error {
-	e := gob.NewEncoder(w)
-	return e.Encode(m)
-}
-
-func (m *mapGobIndex) Unmarshal(r io.Reader) error {
-	d := gob.NewDecoder(r)
-	return d.Decode(m)
-}
-
-func (m *mapGobIndex) Codec() multicodec.Code {
-	return multicodec.Code(indexHashed)
-}
-
-func (m *mapGobIndex) Load(rs []Record) error {
-	for _, r := range rs {
-		(*m)[r.Cid] = r.Idx
-	}
-	return nil
-}
-
-//lint:ignore U1000 kept for potential future use.
-func newGobHashed() Index {
-	mi := make(mapGobIndex)
-	return &mi
-}
--- a/v2/index/indexhashed.go
+++ b/v2/index/indexhashed.go
-package index
-
-import (
-	"io"
-
-	"github.com/multiformats/go-multicodec"
-
-	"github.com/ipfs/go-cid"
-	cbor "github.com/whyrusleeping/cbor/go"
-)
-
-//lint:ignore U1000 kept for potential future use.
-type mapIndex map[cid.Cid]uint64
-
-func (m *mapIndex) Get(c cid.Cid) (uint64, error) {
-	el, ok := (*m)[c]
-	if !ok {
-		return 0, ErrNotFound
-	}
-	return el, nil
-}
-
-func (m *mapIndex) Marshal(w io.Writer) error {
-	return cbor.Encode(w, m)
-}
-
-func (m *mapIndex) Unmarshal(r io.Reader) error {
-	d := cbor.NewDecoder(r)
-	return d.Decode(m)
-}
-
-func (m *mapIndex) Codec() multicodec.Code {
-	return multicodec.Code(indexHashed)
-}
-
-func (m *mapIndex) Load(rs []Record) error {
-	for _, r := range rs {
-		(*m)[r.Cid] = r.Idx
-	}
-	return nil
-}
-
-//lint:ignore U1000 kept for potential future use.
-func newHashed() Index {
-	mi := make(mapIndex)
-	return &mi
-}
--- a/v2/index/indexsorted.go
+++ b/v2/index/indexsorted.go
@@ -44,10 +44,6 @@ func (r recordSet) Swap(i, j int) {
 	r[i], r[j] = r[j], r[i]
 }

-func (s *singleWidthIndex) Codec() multicodec.Code {
-	return multicodec.Code(indexSingleSorted)
-}
-
 func (s *singleWidthIndex) Marshal(w io.Writer) error {
 	if err := binary.Write(w, binary.LittleEndian, s.width); err != nil {
 		return err
@@ -77,25 +73,34 @@ func (s *singleWidthIndex) Less(i int, digest []byte) bool {
 	return bytes.Compare(digest[:], s.index[i*int(s.width):((i+1)*int(s.width)-8)]) <= 0
 }

-func (s *singleWidthIndex) Get(c cid.Cid) (uint64, error) {
+func (s *singleWidthIndex) GetAll(c cid.Cid, fn func(uint64) bool) error {
 	d, err := multihash.Decode(c.Hash())
 	if err != nil {
-		return 0, err
+		return err
 	}
-	return s.get(d.Digest)
+	return s.getAll(d.Digest, fn)
 }

-func (s *singleWidthIndex) get(d []byte) (uint64, error) {
+func (s *singleWidthIndex) getAll(d []byte, fn func(uint64) bool) error {
 	idx := sort.Search(int(s.len), func(i int) bool {
 		return s.Less(i, d)
 	})
 	if uint64(idx) == s.len {
-		return 0, ErrNotFound
+		return ErrNotFound
+	}
+
+	any := false
+	for bytes.Equal(d[:], s.index[idx*int(s.width):(idx+1)*int(s.width)-8]) {
+		any = true
+		offset := binary.LittleEndian.Uint64(s.index[(idx+1)*int(s.width)-8 : (idx+1)*int(s.width)])
+		if !fn(offset) {
+			break
+		}
 	}
-	if !bytes.Equal(d[:], s.index[idx*int(s.width):(idx+1)*int(s.width)-8]) {
-		return 0, ErrNotFound
+	if !any {
+		return ErrNotFound
 	}
-	return binary.LittleEndian.Uint64(s.index[(idx+1)*int(s.width)-8 : (idx+1)*int(s.width)]), nil
+	return nil
 }

 func (s *singleWidthIndex) Load(items []Record) error {
@@ -115,15 +120,15 @@ func (s *singleWidthIndex) Load(items []Record) error {
 	return nil
 }

-func (m *multiWidthIndex) Get(c cid.Cid) (uint64, error) {
+func (m *multiWidthIndex) GetAll(c cid.Cid, fn func(uint64) bool) error {
 	d, err := multihash.Decode(c.Hash())
 	if err != nil {
-		return 0, err
+		return err
 	}
 	if s, ok := (*m)[uint32(len(d.Digest)+8)]; ok {
-		return s.get(d.Digest)
+		return s.getAll(d.Digest, fn)
 	}
-	return 0, ErrNotFound
+	return ErrNotFound
 }

 func (m *multiWidthIndex) Codec() multicodec.Code {
@@ -184,7 +189,7 @@ func (m *multiWidthIndex) Load(items []Record) error {
 			idxs[len(digest)] = make([]digestRecord, 0)
 			idx = idxs[len(digest)]
 		}
-		idxs[len(digest)] = append(idx, digestRecord{digest, item.Idx})
+		idxs[len(digest)] = append(idx, digestRecord{digest, item.Offset})
 	}

 	// Sort each list. then write to compact form.
@@ -209,9 +214,3 @@ func newSorted() Index {
 	m := make(multiWidthIndex)
 	return &m
 }
-
-//lint:ignore U1000 kept for potential future use.
-func newSingleSorted() Index {
-	s := singleWidthIndex{}
-	return &s
-}
--- a/v2/index/indexsorted_test.go
+++ b/v2/index/indexsorted_test.go
@@ -18,10 +18,6 @@ func TestSortedIndex_GetReturnsNotFoundWhenCidDoesNotExist(t *testing.T) {
 		name    string
 		subject Index
 	}{
-		{
-			"SingleSorted",
-			newSingleSorted(),
-		},
 		{
 			"Sorted",
 			newSorted(),
@@ -29,7 +25,7 @@ func TestSortedIndex_GetReturnsNotFoundWhenCidDoesNotExist(t *testing.T) {
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			gotOffset, err := tt.subject.Get(nonExistingKey)
+			gotOffset, err := GetFirst(tt.subject, nonExistingKey)
 			require.Equal(t, ErrNotFound, err)
 			require.Equal(t, uint64(0), gotOffset)
 		})

--- a/v2/index_gen.go
+++ b/v2/index_gen.go
@@ -75,7 +75,7 @@ func GenerateIndex(v1r io.Reader, opts ...ReadOption) (index.Index, error) {
 		if err != nil {
 			return nil, err
 		}
-		records = append(records, index.Record{Cid: c, Idx: uint64(sectionOffset)})
+		records = append(records, index.Record{Cid: c, Offset: uint64(sectionOffset)})

 		// Seek to the next section by skipping the block.
 		// The section length includes the CID, so subtract it.

--- a/v2/options.go
+++ b/v2/options.go
@@ -46,8 +46,10 @@ type ReadWriteOption interface {
 // a zero-length section as the end of the input CAR file. For example, this can
 // be useful to allow "null padding" after a CARv1 without knowing where the
 // padding begins.
-func ZeroLengthSectionAsEOF(o *ReadOptions) {
-	o.ZeroLengthSectionAsEOF = true
+func ZeroLengthSectionAsEOF(enable bool) ReadOption {
+	return func(o *ReadOptions) {
+		o.ZeroLengthSectionAsEOF = true
+	}
 }

 // UseDataPadding is a write option which sets the padding to be added between