From 703b88c25262f019bd1a4be7aa213fb04785e53a Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Fri, 23 Jul 2021 12:02:00 +0100 Subject: [PATCH] Add zero-length sections as EOF option to internal CARv1 reader The CARv2 implementation uses an internal fork of CARv1 due to upstream dependency issues captured in #104. Propagate the options set in CARv2 APIs for treatment of zero-lenth sections onto internal packages so that APIs using the internal CARv1 reader behave consistently. Use a `bool` for setting the option, since it is the only option needed in CARv1. Update tests to reflect changes. Add additional tests to internal CARv1 package and ReadOnly blockstore that assert option is propagated. Fixes #190 --- v2/blockstore/readonly.go | 4 ++-- v2/blockstore/readonly_test.go | 40 +++++++++++++++++++++++++--------- v2/index/index_test.go | 2 +- v2/internal/carv1/car.go | 22 ++++++++++++++----- v2/internal/carv1/car_test.go | 34 +++++++++++++++++++++++++++++ v2/internal/carv1/util/util.go | 8 ++++--- 6 files changed, 88 insertions(+), 22 deletions(-) diff --git a/v2/blockstore/readonly.go b/v2/blockstore/readonly.go index 6c2397f..fcd9734 100644 --- a/v2/blockstore/readonly.go +++ b/v2/blockstore/readonly.go @@ -162,7 +162,7 @@ func OpenReadOnly(path string, opts ...carv2.ReadOption) (*ReadOnly, error) { } func (b *ReadOnly) readBlock(idx int64) (cid.Cid, []byte, error) { - bcid, data, err := util.ReadNode(internalio.NewOffsetReadSeeker(b.backing, idx)) + bcid, data, err := util.ReadNode(internalio.NewOffsetReadSeeker(b.backing, idx), b.ropts.ZeroLengthSectionAsEOF) return bcid, data, err } @@ -252,7 +252,7 @@ func (b *ReadOnly) GetSize(key cid.Cid) (int, error) { b.mu.RLock() defer b.mu.RUnlock() - var fnSize int = -1 + fnSize := -1 var fnErr error err := b.idx.GetAll(key, func(offset uint64) bool { rdr := internalio.NewOffsetReadSeeker(b.backing, int64(offset)) diff --git a/v2/blockstore/readonly_test.go b/v2/blockstore/readonly_test.go index 4d443c3..470ace2 100644 --- a/v2/blockstore/readonly_test.go +++ b/v2/blockstore/readonly_test.go @@ -34,25 +34,38 @@ func TestReadOnly(t *testing.T) { tests := []struct { name string v1OrV2path string + opts []carv2.ReadOption v1r *carv1.CarReader }{ { "OpenedWithCarV1", "../testdata/sample-v1.car", - newReaderFromV1File(t, "../testdata/sample-v1.car"), + []carv2.ReadOption{UseWholeCIDs(true)}, + newV1ReaderFromV1File(t, "../testdata/sample-v1.car", false), }, { "OpenedWithCarV2", "../testdata/sample-wrapped-v2.car", - newReaderFromV2File(t, "../testdata/sample-wrapped-v2.car"), + []carv2.ReadOption{UseWholeCIDs(true)}, + newV1ReaderFromV2File(t, "../testdata/sample-wrapped-v2.car", false), + }, + { + "OpenedWithCarV1ZeroLenSection", + "../testdata/sample-v1-with-zero-len-section.car", + []carv2.ReadOption{UseWholeCIDs(true), carv2.ZeroLengthSectionAsEOF(true)}, + newV1ReaderFromV1File(t, "../testdata/sample-v1-with-zero-len-section.car", true), + }, + { + "OpenedWithAnotherCarV1ZeroLenSection", + "../testdata/sample-v1-with-zero-len-section2.car", + []carv2.ReadOption{UseWholeCIDs(true), carv2.ZeroLengthSectionAsEOF(true)}, + newV1ReaderFromV1File(t, "../testdata/sample-v1-with-zero-len-section2.car", true), }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - subject, err := OpenReadOnly(tt.v1OrV2path, - UseWholeCIDs(true), - ) - t.Cleanup(func() { subject.Close() }) + subject, err := OpenReadOnly(tt.v1OrV2path, tt.opts...) + t.Cleanup(func() { require.NoError(t, subject.Close()) }) require.NoError(t, err) // Assert roots match v1 payload. @@ -118,22 +131,29 @@ func TestNewReadOnlyFailsOnUnknownVersion(t *testing.T) { require.Nil(t, subject) } -func newReaderFromV1File(t *testing.T, carv1Path string) *carv1.CarReader { +func newV1ReaderFromV1File(t *testing.T, carv1Path string, zeroLenSectionAsEOF bool) *carv1.CarReader { f, err := os.Open(carv1Path) require.NoError(t, err) t.Cleanup(func() { f.Close() }) - v1r, err := carv1.NewCarReader(f) + v1r, err := newV1Reader(f, zeroLenSectionAsEOF) require.NoError(t, err) return v1r } -func newReaderFromV2File(t *testing.T, carv2Path string) *carv1.CarReader { +func newV1ReaderFromV2File(t *testing.T, carv2Path string, zeroLenSectionAsEOF bool) *carv1.CarReader { f, err := os.Open(carv2Path) require.NoError(t, err) t.Cleanup(func() { f.Close() }) v2r, err := carv2.NewReader(f) require.NoError(t, err) - v1r, err := carv1.NewCarReader(v2r.DataReader()) + v1r, err := newV1Reader(v2r.DataReader(), zeroLenSectionAsEOF) require.NoError(t, err) return v1r } + +func newV1Reader(r io.Reader, zeroLenSectionAsEOF bool) (*carv1.CarReader, error) { + if zeroLenSectionAsEOF { + return carv1.NewCarReaderWithZeroLengthSectionAsEOF(r) + } + return carv1.NewCarReader(r) +} diff --git a/v2/index/index_test.go b/v2/index/index_test.go index acafffe..bb369cf 100644 --- a/v2/index/index_test.go +++ b/v2/index/index_test.go @@ -77,7 +77,7 @@ func TestReadFrom(t *testing.T) { require.NoError(t, err) // Read the fame at offset and assert the frame corresponds to the expected block. - gotCid, gotData, err := util.ReadNode(crf) + gotCid, gotData, err := util.ReadNode(crf, false) require.NoError(t, err) gotBlock, err := blocks.NewBlockWithCid(gotData, gotCid) require.NoError(t, err) diff --git a/v2/internal/carv1/car.go b/v2/internal/carv1/car.go index 48b4f3e..6a25e66 100644 --- a/v2/internal/carv1/car.go +++ b/v2/internal/carv1/car.go @@ -57,7 +57,7 @@ func WriteCar(ctx context.Context, ds format.NodeGetter, roots []cid.Cid, w io.W } func ReadHeader(r io.Reader) (*CarHeader, error) { - hb, err := util.LdRead(r) + hb, err := util.LdRead(r, false) if err != nil { return nil, err } @@ -106,11 +106,20 @@ func (cw *carWriter) writeNode(ctx context.Context, nd format.Node) error { } type CarReader struct { - r io.Reader - Header *CarHeader + r io.Reader + Header *CarHeader + zeroLenAsEOF bool +} + +func NewCarReaderWithZeroLengthSectionAsEOF(r io.Reader) (*CarReader, error) { + return newCarReader(r, true) } func NewCarReader(r io.Reader) (*CarReader, error) { + return newCarReader(r, false) +} + +func newCarReader(r io.Reader, zeroLenAsEOF bool) (*CarReader, error) { ch, err := ReadHeader(r) if err != nil { return nil, err @@ -125,13 +134,14 @@ func NewCarReader(r io.Reader) (*CarReader, error) { } return &CarReader{ - r: r, - Header: ch, + r: r, + Header: ch, + zeroLenAsEOF: zeroLenAsEOF, }, nil } func (cr *CarReader) Next() (blocks.Block, error) { - c, data, err := util.ReadNode(cr.r) + c, data, err := util.ReadNode(cr.r, cr.zeroLenAsEOF) if err != nil { return nil, err } diff --git a/v2/internal/carv1/car_test.go b/v2/internal/carv1/car_test.go index 70ce7d8..71e06ee 100644 --- a/v2/internal/carv1/car_test.go +++ b/v2/internal/carv1/car_test.go @@ -5,6 +5,7 @@ import ( "context" "encoding/hex" "io" + "os" "strings" "testing" @@ -297,3 +298,36 @@ func TestCarHeaderMatchess(t *testing.T) { }) } } + +func TestReadingZeroLengthSectionWithoutOptionSetIsError(t *testing.T) { + f, err := os.Open("../../testdata/sample-v1-with-zero-len-section.car") + require.NoError(t, err) + subject, err := NewCarReader(f) + require.NoError(t, err) + + for { + _, err := subject.Next() + if err == io.EOF { + break + } else if err != nil { + require.EqualError(t, err, "varints malformed, could not reach the end") + return + } + } + require.Fail(t, "expected error when reading file with zero section without option set") +} + +func TestReadingZeroLengthSectionWithOptionSetIsSuccess(t *testing.T) { + f, err := os.Open("../../testdata/sample-v1-with-zero-len-section.car") + require.NoError(t, err) + subject, err := NewCarReaderWithZeroLengthSectionAsEOF(f) + require.NoError(t, err) + + for { + _, err := subject.Next() + if err == io.EOF { + break + } + require.NoError(t, err) + } +} diff --git a/v2/internal/carv1/util/util.go b/v2/internal/carv1/util/util.go index dce5300..6b94956 100644 --- a/v2/internal/carv1/util/util.go +++ b/v2/internal/carv1/util/util.go @@ -15,8 +15,8 @@ type BytesReader interface { io.ByteReader } -func ReadNode(r io.Reader) (cid.Cid, []byte, error) { - data, err := LdRead(r) +func ReadNode(r io.Reader, zeroLenAsEOF bool) (cid.Cid, []byte, error) { + data, err := LdRead(r, zeroLenAsEOF) if err != nil { return cid.Cid{}, nil, err } @@ -61,7 +61,7 @@ func LdSize(d ...[]byte) uint64 { return sum + uint64(s) } -func LdRead(r io.Reader) ([]byte, error) { +func LdRead(r io.Reader, zeroLenAsEOF bool) ([]byte, error) { l, err := varint.ReadUvarint(internalio.ToByteReader(r)) if err != nil { // If the length of bytes read is non-zero when the error is EOF then signal an unclean EOF. @@ -69,6 +69,8 @@ func LdRead(r io.Reader) ([]byte, error) { return nil, io.ErrUnexpectedEOF } return nil, err + } else if l == 0 && zeroLenAsEOF { + return nil, io.EOF } buf := make([]byte, l) -- GitLab