From 0f79c3e852cf5fab1e9b02066267a2d78d43d991 Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Fri, 16 Jul 2021 12:51:13 +0100 Subject: [PATCH] Remove memory intensive writer and add warping tests Remove the memory intensive CARv2 writer implementation since it needs to be reimplemented anyway and not used. We will release writers if needed when it comes to SelectiveCar writer implementation. Just now it is not clear if we want a CARv2 writer that works with the deprecated `format.NodeGetter`. Add tests for V1 wrapping functionality. Reformat code in repo for consistency using `gofumpt`. --- v2/writer.go | 124 ---------------------------------------------- v2/writer_test.go | 95 +++++++++++++++++------------------ 2 files changed, 45 insertions(+), 174 deletions(-) diff --git a/v2/writer.go b/v2/writer.go index 3675972..fa94913 100644 --- a/v2/writer.go +++ b/v2/writer.go @@ -1,138 +1,14 @@ package car import ( - "bytes" - "context" "io" "os" internalio "github.com/ipld/go-car/v2/internal/io" - "github.com/ipfs/go-cid" - format "github.com/ipfs/go-ipld-format" "github.com/ipld/go-car/v2/index" - "github.com/ipld/go-car/v2/internal/carv1" ) -const bulkPaddingBytesSize = 1024 - -var bulkPadding = make([]byte, bulkPaddingBytesSize) - -type ( - // padding represents the number of padding bytes. - padding uint64 - // writer writes CAR v2 into a given io.Writer. - writer struct { - NodeGetter format.NodeGetter - CarV1Padding uint64 - IndexPadding uint64 - - ctx context.Context - roots []cid.Cid - encodedCarV1 *bytes.Buffer - } -) - -// WriteTo writes this padding to the given writer as default value bytes. -func (p padding) WriteTo(w io.Writer) (n int64, err error) { - var reminder int64 - if p > bulkPaddingBytesSize { - reminder = int64(p % bulkPaddingBytesSize) - iter := int(p / bulkPaddingBytesSize) - for i := 0; i < iter; i++ { - if _, err = w.Write(bulkPadding); err != nil { - return - } - n += bulkPaddingBytesSize - } - } else { - reminder = int64(p) - } - - paddingBytes := make([]byte, reminder) - _, err = w.Write(paddingBytes) - n += reminder - return -} - -// newWriter instantiates a new CAR v2 writer. -func newWriter(ctx context.Context, ng format.NodeGetter, roots []cid.Cid) *writer { - return &writer{ - NodeGetter: ng, - ctx: ctx, - roots: roots, - encodedCarV1: new(bytes.Buffer), - } -} - -// WriteTo writes the given root CIDs according to CAR v2 specification. -func (w *writer) WriteTo(writer io.Writer) (n int64, err error) { - _, err = writer.Write(Pragma) - if err != nil { - return - } - n += int64(PragmaSize) - // We read the entire car into memory because GenerateIndex takes a reader. - // TODO Future PRs will make this more efficient by exposing necessary interfaces in index pacakge so that - // this can be done in an streaming manner. - if err = carv1.WriteCar(w.ctx, w.NodeGetter, w.roots, w.encodedCarV1); err != nil { - return - } - carV1Len := w.encodedCarV1.Len() - - wn, err := w.writeHeader(writer, carV1Len) - if err != nil { - return - } - n += wn - - wn, err = padding(w.CarV1Padding).WriteTo(writer) - if err != nil { - return - } - n += wn - - carV1Bytes := w.encodedCarV1.Bytes() - wwn, err := writer.Write(carV1Bytes) - if err != nil { - return - } - n += int64(wwn) - - wn, err = padding(w.IndexPadding).WriteTo(writer) - if err != nil { - return - } - n += wn - - wn, err = w.writeIndex(writer, carV1Bytes) - if err == nil { - n += wn - } - return -} - -func (w *writer) writeHeader(writer io.Writer, carV1Len int) (int64, error) { - header := NewHeader(uint64(carV1Len)). - WithCarV1Padding(w.CarV1Padding). - WithIndexPadding(w.IndexPadding) - return header.WriteTo(writer) -} - -func (w *writer) writeIndex(writer io.Writer, carV1 []byte) (int64, error) { - // TODO avoid recopying the bytes by refactoring index once it is integrated here. - // Right now we copy the bytes since index takes a writer. - // Consider refactoring index to make this process more efficient. - // We should avoid reading the entire car into memory since it can be large. - reader := bytes.NewReader(carV1) - idx, err := GenerateIndex(reader) - if err != nil { - return 0, err - } - // FIXME refactor index to expose the number of bytes written. - return 0, index.WriteTo(idx, writer) -} - // WrapV1File is a wrapper around WrapV1 that takes filesystem paths. // The source path is assumed to exist, and the destination path is overwritten. // Note that the destination path might still be created even if an error diff --git a/v2/writer_test.go b/v2/writer_test.go index 633fc46..ff8f3bf 100644 --- a/v2/writer_test.go +++ b/v2/writer_test.go @@ -1,10 +1,17 @@ package car import ( - "bytes" "context" + "io" + "io/ioutil" + "os" + "path/filepath" "testing" + "github.com/ipld/go-car/v2/index" + "github.com/ipld/go-car/v2/internal/carv1" + "github.com/stretchr/testify/require" + "github.com/ipfs/go-cid" format "github.com/ipfs/go-ipld-format" "github.com/ipfs/go-merkledag" @@ -12,56 +19,44 @@ import ( "github.com/stretchr/testify/assert" ) -func TestPadding_WriteTo(t *testing.T) { - tests := []struct { - name string - padding padding - wantBytes []byte - wantN int64 - wantErr bool - }{ - { - "ZeroPaddingIsNoBytes", - padding(0), - nil, - 0, - false, - }, - { - "NonZeroPaddingIsCorrespondingZeroValueBytes", - padding(3), - []byte{0x00, 0x00, 0x00}, - 3, - false, - }, - { - "PaddingLargerThanTheBulkPaddingSizeIsCorrespondingZeroValueBytes", - padding(1025), - make([]byte, 1025), - 1025, - false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - w := &bytes.Buffer{} - gotN, gotErr := tt.padding.WriteTo(w) - if tt.wantErr { - assert.Error(t, gotErr) - return - } - gotBytes := w.Bytes() - assert.Equal(t, tt.wantN, gotN) - assert.Equal(t, tt.wantBytes, gotBytes) - }) - } -} +func TestWrapV1(t *testing.T) { + // Produce a CARv1 file to test wrapping with. + dagSvc := dstest.Mock() + src := filepath.Join(t.TempDir(), "unwrapped-test-v1.car") + sf, err := os.Create(src) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, sf.Close()) }) + require.NoError(t, carv1.WriteCar(context.Background(), dagSvc, generateRootCid(t, dagSvc), sf)) + + // Wrap the test CARv1 file + dest := filepath.Join(t.TempDir(), "wrapped-test-v1.car") + df, err := os.Create(dest) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, df.Close()) }) + _, err = sf.Seek(0, io.SeekStart) + require.NoError(t, err) + require.NoError(t, WrapV1(sf, df)) + + // Assert wrapped file is valid CARv2 with CARv1 data payload matching the original CARv1 file. + subject, err := OpenReader(dest) + t.Cleanup(func() { require.NoError(t, subject.Close()) }) + require.NoError(t, err) + + // Assert CARv1 data payloads are identical. + _, err = sf.Seek(0, io.SeekStart) + require.NoError(t, err) + wantPayload, err := ioutil.ReadAll(sf) + require.NoError(t, err) + gotPayload, err := ioutil.ReadAll(subject.CarV1Reader()) + require.NoError(t, err) + require.Equal(t, wantPayload, gotPayload) -func TestNewWriter(t *testing.T) { - dagService := dstest.Mock() - wantRoots := generateRootCid(t, dagService) - writer := newWriter(context.Background(), dagService, wantRoots) - assert.Equal(t, wantRoots, writer.roots) + // Assert embedded index in CARv2 is same as index generated from the original CARv1. + wantIdx, err := GenerateIndexFromFile(src) + require.NoError(t, err) + gotIdx, err := index.ReadFrom(subject.IndexReader()) + require.NoError(t, err) + require.Equal(t, wantIdx, gotIdx) } func generateRootCid(t *testing.T, adder format.NodeAdder) []cid.Cid { -- GitLab