Commit 0f79c3e8 authored by Masih H. Derkani's avatar Masih H. Derkani Committed by Daniel Martí

Remove memory intensive writer and add warping tests

Remove the memory intensive CARv2 writer implementation since it needs
to be reimplemented anyway and not used. We will release writers if
needed when it comes to SelectiveCar writer implementation. Just now it
is not clear if we want a CARv2 writer that works with the deprecated
`format.NodeGetter`.

Add tests for V1 wrapping functionality.

Reformat code in repo for consistency using `gofumpt`.
parent 0aefa5b1
package car
import (
"bytes"
"context"
"io"
"os"
internalio "github.com/ipld/go-car/v2/internal/io"
"github.com/ipfs/go-cid"
format "github.com/ipfs/go-ipld-format"
"github.com/ipld/go-car/v2/index"
"github.com/ipld/go-car/v2/internal/carv1"
)
const bulkPaddingBytesSize = 1024
var bulkPadding = make([]byte, bulkPaddingBytesSize)
type (
// padding represents the number of padding bytes.
padding uint64
// writer writes CAR v2 into a given io.Writer.
writer struct {
NodeGetter format.NodeGetter
CarV1Padding uint64
IndexPadding uint64
ctx context.Context
roots []cid.Cid
encodedCarV1 *bytes.Buffer
}
)
// WriteTo writes this padding to the given writer as default value bytes.
func (p padding) WriteTo(w io.Writer) (n int64, err error) {
var reminder int64
if p > bulkPaddingBytesSize {
reminder = int64(p % bulkPaddingBytesSize)
iter := int(p / bulkPaddingBytesSize)
for i := 0; i < iter; i++ {
if _, err = w.Write(bulkPadding); err != nil {
return
}
n += bulkPaddingBytesSize
}
} else {
reminder = int64(p)
}
paddingBytes := make([]byte, reminder)
_, err = w.Write(paddingBytes)
n += reminder
return
}
// newWriter instantiates a new CAR v2 writer.
func newWriter(ctx context.Context, ng format.NodeGetter, roots []cid.Cid) *writer {
return &writer{
NodeGetter: ng,
ctx: ctx,
roots: roots,
encodedCarV1: new(bytes.Buffer),
}
}
// WriteTo writes the given root CIDs according to CAR v2 specification.
func (w *writer) WriteTo(writer io.Writer) (n int64, err error) {
_, err = writer.Write(Pragma)
if err != nil {
return
}
n += int64(PragmaSize)
// We read the entire car into memory because GenerateIndex takes a reader.
// TODO Future PRs will make this more efficient by exposing necessary interfaces in index pacakge so that
// this can be done in an streaming manner.
if err = carv1.WriteCar(w.ctx, w.NodeGetter, w.roots, w.encodedCarV1); err != nil {
return
}
carV1Len := w.encodedCarV1.Len()
wn, err := w.writeHeader(writer, carV1Len)
if err != nil {
return
}
n += wn
wn, err = padding(w.CarV1Padding).WriteTo(writer)
if err != nil {
return
}
n += wn
carV1Bytes := w.encodedCarV1.Bytes()
wwn, err := writer.Write(carV1Bytes)
if err != nil {
return
}
n += int64(wwn)
wn, err = padding(w.IndexPadding).WriteTo(writer)
if err != nil {
return
}
n += wn
wn, err = w.writeIndex(writer, carV1Bytes)
if err == nil {
n += wn
}
return
}
func (w *writer) writeHeader(writer io.Writer, carV1Len int) (int64, error) {
header := NewHeader(uint64(carV1Len)).
WithCarV1Padding(w.CarV1Padding).
WithIndexPadding(w.IndexPadding)
return header.WriteTo(writer)
}
func (w *writer) writeIndex(writer io.Writer, carV1 []byte) (int64, error) {
// TODO avoid recopying the bytes by refactoring index once it is integrated here.
// Right now we copy the bytes since index takes a writer.
// Consider refactoring index to make this process more efficient.
// We should avoid reading the entire car into memory since it can be large.
reader := bytes.NewReader(carV1)
idx, err := GenerateIndex(reader)
if err != nil {
return 0, err
}
// FIXME refactor index to expose the number of bytes written.
return 0, index.WriteTo(idx, writer)
}
// WrapV1File is a wrapper around WrapV1 that takes filesystem paths.
// The source path is assumed to exist, and the destination path is overwritten.
// Note that the destination path might still be created even if an error
......
package car
import (
"bytes"
"context"
"io"
"io/ioutil"
"os"
"path/filepath"
"testing"
"github.com/ipld/go-car/v2/index"
"github.com/ipld/go-car/v2/internal/carv1"
"github.com/stretchr/testify/require"
"github.com/ipfs/go-cid"
format "github.com/ipfs/go-ipld-format"
"github.com/ipfs/go-merkledag"
......@@ -12,56 +19,44 @@ import (
"github.com/stretchr/testify/assert"
)
func TestPadding_WriteTo(t *testing.T) {
tests := []struct {
name string
padding padding
wantBytes []byte
wantN int64
wantErr bool
}{
{
"ZeroPaddingIsNoBytes",
padding(0),
nil,
0,
false,
},
{
"NonZeroPaddingIsCorrespondingZeroValueBytes",
padding(3),
[]byte{0x00, 0x00, 0x00},
3,
false,
},
{
"PaddingLargerThanTheBulkPaddingSizeIsCorrespondingZeroValueBytes",
padding(1025),
make([]byte, 1025),
1025,
false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
w := &bytes.Buffer{}
gotN, gotErr := tt.padding.WriteTo(w)
if tt.wantErr {
assert.Error(t, gotErr)
return
}
gotBytes := w.Bytes()
assert.Equal(t, tt.wantN, gotN)
assert.Equal(t, tt.wantBytes, gotBytes)
})
}
}
func TestWrapV1(t *testing.T) {
// Produce a CARv1 file to test wrapping with.
dagSvc := dstest.Mock()
src := filepath.Join(t.TempDir(), "unwrapped-test-v1.car")
sf, err := os.Create(src)
require.NoError(t, err)
t.Cleanup(func() { require.NoError(t, sf.Close()) })
require.NoError(t, carv1.WriteCar(context.Background(), dagSvc, generateRootCid(t, dagSvc), sf))
// Wrap the test CARv1 file
dest := filepath.Join(t.TempDir(), "wrapped-test-v1.car")
df, err := os.Create(dest)
require.NoError(t, err)
t.Cleanup(func() { require.NoError(t, df.Close()) })
_, err = sf.Seek(0, io.SeekStart)
require.NoError(t, err)
require.NoError(t, WrapV1(sf, df))
// Assert wrapped file is valid CARv2 with CARv1 data payload matching the original CARv1 file.
subject, err := OpenReader(dest)
t.Cleanup(func() { require.NoError(t, subject.Close()) })
require.NoError(t, err)
// Assert CARv1 data payloads are identical.
_, err = sf.Seek(0, io.SeekStart)
require.NoError(t, err)
wantPayload, err := ioutil.ReadAll(sf)
require.NoError(t, err)
gotPayload, err := ioutil.ReadAll(subject.CarV1Reader())
require.NoError(t, err)
require.Equal(t, wantPayload, gotPayload)
func TestNewWriter(t *testing.T) {
dagService := dstest.Mock()
wantRoots := generateRootCid(t, dagService)
writer := newWriter(context.Background(), dagService, wantRoots)
assert.Equal(t, wantRoots, writer.roots)
// Assert embedded index in CARv2 is same as index generated from the original CARv1.
wantIdx, err := GenerateIndexFromFile(src)
require.NoError(t, err)
gotIdx, err := index.ReadFrom(subject.IndexReader())
require.NoError(t, err)
require.Equal(t, wantIdx, gotIdx)
}
func generateRootCid(t *testing.T, adder format.NodeAdder) []cid.Cid {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment