diff --git a/v2/writer.go b/v2/writer.go index 3675972283cf1cd1908ced00e3895952798f4668..fa94913e9a819fec02f9362a7aa3fb4300c9b124 100644 --- a/v2/writer.go +++ b/v2/writer.go @@ -1,138 +1,14 @@ package car import ( - "bytes" - "context" "io" "os" internalio "github.com/ipld/go-car/v2/internal/io" - "github.com/ipfs/go-cid" - format "github.com/ipfs/go-ipld-format" "github.com/ipld/go-car/v2/index" - "github.com/ipld/go-car/v2/internal/carv1" ) -const bulkPaddingBytesSize = 1024 - -var bulkPadding = make([]byte, bulkPaddingBytesSize) - -type ( - // padding represents the number of padding bytes. - padding uint64 - // writer writes CAR v2 into a given io.Writer. - writer struct { - NodeGetter format.NodeGetter - CarV1Padding uint64 - IndexPadding uint64 - - ctx context.Context - roots []cid.Cid - encodedCarV1 *bytes.Buffer - } -) - -// WriteTo writes this padding to the given writer as default value bytes. -func (p padding) WriteTo(w io.Writer) (n int64, err error) { - var reminder int64 - if p > bulkPaddingBytesSize { - reminder = int64(p % bulkPaddingBytesSize) - iter := int(p / bulkPaddingBytesSize) - for i := 0; i < iter; i++ { - if _, err = w.Write(bulkPadding); err != nil { - return - } - n += bulkPaddingBytesSize - } - } else { - reminder = int64(p) - } - - paddingBytes := make([]byte, reminder) - _, err = w.Write(paddingBytes) - n += reminder - return -} - -// newWriter instantiates a new CAR v2 writer. -func newWriter(ctx context.Context, ng format.NodeGetter, roots []cid.Cid) *writer { - return &writer{ - NodeGetter: ng, - ctx: ctx, - roots: roots, - encodedCarV1: new(bytes.Buffer), - } -} - -// WriteTo writes the given root CIDs according to CAR v2 specification. -func (w *writer) WriteTo(writer io.Writer) (n int64, err error) { - _, err = writer.Write(Pragma) - if err != nil { - return - } - n += int64(PragmaSize) - // We read the entire car into memory because GenerateIndex takes a reader. - // TODO Future PRs will make this more efficient by exposing necessary interfaces in index pacakge so that - // this can be done in an streaming manner. - if err = carv1.WriteCar(w.ctx, w.NodeGetter, w.roots, w.encodedCarV1); err != nil { - return - } - carV1Len := w.encodedCarV1.Len() - - wn, err := w.writeHeader(writer, carV1Len) - if err != nil { - return - } - n += wn - - wn, err = padding(w.CarV1Padding).WriteTo(writer) - if err != nil { - return - } - n += wn - - carV1Bytes := w.encodedCarV1.Bytes() - wwn, err := writer.Write(carV1Bytes) - if err != nil { - return - } - n += int64(wwn) - - wn, err = padding(w.IndexPadding).WriteTo(writer) - if err != nil { - return - } - n += wn - - wn, err = w.writeIndex(writer, carV1Bytes) - if err == nil { - n += wn - } - return -} - -func (w *writer) writeHeader(writer io.Writer, carV1Len int) (int64, error) { - header := NewHeader(uint64(carV1Len)). - WithCarV1Padding(w.CarV1Padding). - WithIndexPadding(w.IndexPadding) - return header.WriteTo(writer) -} - -func (w *writer) writeIndex(writer io.Writer, carV1 []byte) (int64, error) { - // TODO avoid recopying the bytes by refactoring index once it is integrated here. - // Right now we copy the bytes since index takes a writer. - // Consider refactoring index to make this process more efficient. - // We should avoid reading the entire car into memory since it can be large. - reader := bytes.NewReader(carV1) - idx, err := GenerateIndex(reader) - if err != nil { - return 0, err - } - // FIXME refactor index to expose the number of bytes written. - return 0, index.WriteTo(idx, writer) -} - // WrapV1File is a wrapper around WrapV1 that takes filesystem paths. // The source path is assumed to exist, and the destination path is overwritten. // Note that the destination path might still be created even if an error diff --git a/v2/writer_test.go b/v2/writer_test.go index 633fc46260cbd63004f93dcf0b622e093f8a222c..ff8f3bff2cae83d402c884fddea43d68f328301c 100644 --- a/v2/writer_test.go +++ b/v2/writer_test.go @@ -1,10 +1,17 @@ package car import ( - "bytes" "context" + "io" + "io/ioutil" + "os" + "path/filepath" "testing" + "github.com/ipld/go-car/v2/index" + "github.com/ipld/go-car/v2/internal/carv1" + "github.com/stretchr/testify/require" + "github.com/ipfs/go-cid" format "github.com/ipfs/go-ipld-format" "github.com/ipfs/go-merkledag" @@ -12,56 +19,44 @@ import ( "github.com/stretchr/testify/assert" ) -func TestPadding_WriteTo(t *testing.T) { - tests := []struct { - name string - padding padding - wantBytes []byte - wantN int64 - wantErr bool - }{ - { - "ZeroPaddingIsNoBytes", - padding(0), - nil, - 0, - false, - }, - { - "NonZeroPaddingIsCorrespondingZeroValueBytes", - padding(3), - []byte{0x00, 0x00, 0x00}, - 3, - false, - }, - { - "PaddingLargerThanTheBulkPaddingSizeIsCorrespondingZeroValueBytes", - padding(1025), - make([]byte, 1025), - 1025, - false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - w := &bytes.Buffer{} - gotN, gotErr := tt.padding.WriteTo(w) - if tt.wantErr { - assert.Error(t, gotErr) - return - } - gotBytes := w.Bytes() - assert.Equal(t, tt.wantN, gotN) - assert.Equal(t, tt.wantBytes, gotBytes) - }) - } -} +func TestWrapV1(t *testing.T) { + // Produce a CARv1 file to test wrapping with. + dagSvc := dstest.Mock() + src := filepath.Join(t.TempDir(), "unwrapped-test-v1.car") + sf, err := os.Create(src) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, sf.Close()) }) + require.NoError(t, carv1.WriteCar(context.Background(), dagSvc, generateRootCid(t, dagSvc), sf)) + + // Wrap the test CARv1 file + dest := filepath.Join(t.TempDir(), "wrapped-test-v1.car") + df, err := os.Create(dest) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, df.Close()) }) + _, err = sf.Seek(0, io.SeekStart) + require.NoError(t, err) + require.NoError(t, WrapV1(sf, df)) + + // Assert wrapped file is valid CARv2 with CARv1 data payload matching the original CARv1 file. + subject, err := OpenReader(dest) + t.Cleanup(func() { require.NoError(t, subject.Close()) }) + require.NoError(t, err) + + // Assert CARv1 data payloads are identical. + _, err = sf.Seek(0, io.SeekStart) + require.NoError(t, err) + wantPayload, err := ioutil.ReadAll(sf) + require.NoError(t, err) + gotPayload, err := ioutil.ReadAll(subject.CarV1Reader()) + require.NoError(t, err) + require.Equal(t, wantPayload, gotPayload) -func TestNewWriter(t *testing.T) { - dagService := dstest.Mock() - wantRoots := generateRootCid(t, dagService) - writer := newWriter(context.Background(), dagService, wantRoots) - assert.Equal(t, wantRoots, writer.roots) + // Assert embedded index in CARv2 is same as index generated from the original CARv1. + wantIdx, err := GenerateIndexFromFile(src) + require.NoError(t, err) + gotIdx, err := index.ReadFrom(subject.IndexReader()) + require.NoError(t, err) + require.Equal(t, wantIdx, gotIdx) } func generateRootCid(t *testing.T, adder format.NodeAdder) []cid.Cid {