Commit aa62719d authored by Masih H. Derkani's avatar Masih H. Derkani Committed by Daniel Martí

Rename `CarV1` with `Data` in API to be consistent with spec

Rename terminology to match what the spec uses to describe the inner
CARv1 payload, i.e. Data payload.

Update docs to use CARv1 and CARv2 consistently.

Fix typo in Options API.

See:
- https://ipld.io/specs/transport/car/carv2/
parent 0f79c3e8
......@@ -2,11 +2,11 @@
// This package provides two flavours of blockstore: ReadOnly and ReadWrite.
//
// The ReadOnly blockstore provides a read-only random access from a given data payload either in
// unindexed v1 format or indexed/unindexed v2 format:
// * ReadOnly.NewReadOnly can be used to instantiate a new read-only blockstore for a given CAR v1
// or CAR v2 data payload with an optional index override.
// * ReadOnly.OpenReadOnly can be used to instantiate a new read-only blockstore for a given CAR v1
// or CAR v2 file with automatic index generation if the index is not present.
// unindexed CARv1 format or indexed/unindexed v2 format:
// * ReadOnly.NewReadOnly can be used to instantiate a new read-only blockstore for a given CARv1
// or CARv2 data payload with an optional index override.
// * ReadOnly.OpenReadOnly can be used to instantiate a new read-only blockstore for a given CARv1
// or CARv2 file with automatic index generation if the index is not present.
//
// The ReadWrite blockstore allows writing and reading of the blocks concurrently. The user of this
// blockstore is responsible for calling ReadWrite.Finalize when finished writing blocks.
......
......@@ -34,9 +34,9 @@ type ReadOnly struct {
// For simplicity, the entirety of the blockstore methods grab the mutex.
mu sync.RWMutex
// The backing containing the CAR in v1 format.
// The backing containing the data payload in CARv1 format.
backing io.ReaderAt
// The CAR v1 content index.
// The CARv1 content index.
idx index.Index
// If we called carv2.NewReaderMmap, remember to close it too.
......@@ -54,8 +54,6 @@ type ReadOnly struct {
// • Get, Has, and HasSize will only return a block
// only if the entire CID is present in the CAR file.
//
// • DeleteBlock will delete a block only when the entire CID matches.
//
// • AllKeysChan will return the original whole CIDs, instead of with their
// multicodec set to "raw" to just provide multihashes.
//
......@@ -73,12 +71,12 @@ func UseWholeCIDs(enable bool) carv2.ReadOption {
}
// NewReadOnly creates a new ReadOnly blockstore from the backing with a optional index as idx.
// This function accepts both CAR v1 and v2 backing.
// This function accepts both CARv1 and CARv2 backing.
// The blockstore is instantiated with the given index if it is not nil.
//
// Otherwise:
// * For a CAR v1 backing an index is generated.
// * For a CAR v2 backing an index is only generated if Header.HasIndex returns false.
// * For a CARv1 backing an index is generated.
// * For a CARv2 backing an index is only generated if Header.HasIndex returns false.
//
// There is no need to call ReadOnly.Close on instances returned by this function.
func NewReadOnly(backing io.ReaderAt, idx index.Index, opts ...carv2.ReadOption) (*ReadOnly, error) {
......@@ -112,11 +110,11 @@ func NewReadOnly(backing io.ReaderAt, idx index.Index, opts ...carv2.ReadOption)
if err != nil {
return nil, err
}
} else if idx, err = generateIndex(v2r.CarV1Reader(), opts...); err != nil {
} else if idx, err = generateIndex(v2r.DataReader(), opts...); err != nil {
return nil, err
}
}
b.backing = v2r.CarV1Reader()
b.backing = v2r.DataReader()
b.idx = idx
return b, nil
default:
......@@ -169,7 +167,7 @@ func (b *ReadOnly) readBlock(idx int64) (cid.Cid, []byte, error) {
return bcid, data, err
}
// DeleteBlock is unsupported and always returns an error.
// DeleteBlock is unsupported and always panics.
func (b *ReadOnly) DeleteBlock(_ cid.Cid) error {
panic("called write method on a read-only blockstore")
}
......@@ -289,7 +287,7 @@ func (b *ReadOnly) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) {
// Null padding; by default it's an error.
if length == 0 {
if b.ropts.ZeroLegthSectionAsEOF {
if b.ropts.ZeroLengthSectionAsEOF {
break
} else {
return // TODO: log this error
......
......@@ -131,7 +131,7 @@ func newReaderFromV2File(t *testing.T, carv2Path string) *carv1.CarReader {
t.Cleanup(func() { f.Close() })
v2r, err := carv2.NewReader(f)
require.NoError(t, err)
v1r, err := carv1.NewCarReader(v2r.CarV1Reader())
v1r, err := carv1.NewCarReader(v2r.DataReader())
require.NoError(t, err)
return v1r
}
......@@ -23,7 +23,7 @@ import (
var _ blockstore.Blockstore = (*ReadWrite)(nil)
// ReadWrite implements a blockstore that stores blocks in CAR v2 format.
// ReadWrite implements a blockstore that stores blocks in CARv2 format.
// Blocks put into the blockstore can be read back once they are successfully written.
// This implementation is preferable for a write-heavy workload.
// The blocks are written immediately on Put and PutAll calls, while the index is stored in memory
......@@ -33,8 +33,8 @@ var _ blockstore.Blockstore = (*ReadWrite)(nil)
// Upon calling Finalize header is finalized and index is written out.
// Once finalized, all read and write calls to this blockstore will result in panics.
type ReadWrite struct {
f *os.File
carV1Writer *internalio.OffsetWriteSeeker
f *os.File
dataWriter *internalio.OffsetWriteSeeker
ReadOnly
idx *insertionIndex
header carv2.Header
......@@ -57,13 +57,13 @@ func AllowDuplicatePuts(allow bool) carv2.WriteOption {
// OpenReadWrite creates a new ReadWrite at the given path with a provided set of root CIDs and options.
//
// ReadWrite.Finalize must be called once putting and reading blocks are no longer needed.
// Upon calling ReadWrite.Finalize the CAR v2 header and index are written out onto the file and the
// Upon calling ReadWrite.Finalize the CARv2 header and index are written out onto the file and the
// backing file is closed. Once finalized, all read and write calls to this blockstore will result
// in panics. Note, ReadWrite.Finalize must be called on an open instance regardless of whether any
// blocks were put or not.
//
// If a file at given path does not exist, the instantiation will write car.Pragma and data payload
// header (i.e. the inner CAR v1 header) onto the file before returning.
// header (i.e. the inner CARv1 header) onto the file before returning.
//
// When the given path already exists, the blockstore will attempt to resume from it.
// On resumption the existing data sections in file are re-indexed, allowing the caller to continue
......@@ -73,15 +73,15 @@ func AllowDuplicatePuts(allow bool) carv2.WriteOption {
// Resumption only works on files that were created by a previous instance of a ReadWrite
// blockstore. This means a file created as a result of a successful call to OpenReadWrite can be
// resumed from as long as write operations such as ReadWrite.Put, ReadWrite.PutMany returned
// successfully. On resumption the roots argument and WithCarV1Padding option must match the
// successfully. On resumption the roots argument and WithDataPadding option must match the
// previous instantiation of ReadWrite blockstore that created the file. More explicitly, the file
// resuming from must:
// 1. start with a complete CAR v2 car.Pragma.
// 2. contain a complete CAR v1 data header with root CIDs matching the CIDs passed to the
// constructor, starting at offset optionally padded by WithCarV1Padding, followed by zero or
// 1. start with a complete CARv2 car.Pragma.
// 2. contain a complete CARv1 data header with root CIDs matching the CIDs passed to the
// constructor, starting at offset optionally padded by WithDataPadding, followed by zero or
// more complete data sections. If any corrupt data sections are present the resumption will fail.
// Note, if set previously, the blockstore must use the same WithCarV1Padding option as before,
// since this option is used to locate the CAR v1 data payload.
// Note, if set previously, the blockstore must use the same WithDataPadding option as before,
// since this option is used to locate the CARv1 data payload.
//
// Note, resumption should be used with WithCidDeduplication, so that blocks that are successfully
// written into the file are not re-written. Unless, the user explicitly wants duplicate blocks.
......@@ -124,15 +124,15 @@ func OpenReadWrite(path string, roots []cid.Cid, opts ...carv2.ReadWriteOption)
opt(&rwbs.wopts)
}
}
if p := rwbs.wopts.CarV1Padding; p > 0 {
rwbs.header = rwbs.header.WithCarV1Padding(p)
if p := rwbs.wopts.DataPadding; p > 0 {
rwbs.header = rwbs.header.WithDataPadding(p)
}
if p := rwbs.wopts.IndexPadding; p > 0 {
rwbs.header = rwbs.header.WithIndexPadding(p)
}
rwbs.carV1Writer = internalio.NewOffsetWriter(rwbs.f, int64(rwbs.header.CarV1Offset))
v1r := internalio.NewOffsetReadSeeker(rwbs.f, int64(rwbs.header.CarV1Offset))
rwbs.dataWriter = internalio.NewOffsetWriter(rwbs.f, int64(rwbs.header.DataOffset))
v1r := internalio.NewOffsetReadSeeker(rwbs.f, int64(rwbs.header.DataOffset))
rwbs.ReadOnly.backing = v1r
rwbs.ReadOnly.idx = rwbs.idx
rwbs.ReadOnly.carv2Closer = rwbs.f
......@@ -154,13 +154,13 @@ func (b *ReadWrite) initWithRoots(roots []cid.Cid) error {
if _, err := b.f.WriteAt(carv2.Pragma, 0); err != nil {
return err
}
return carv1.WriteHeader(&carv1.CarHeader{Roots: roots, Version: 1}, b.carV1Writer)
return carv1.WriteHeader(&carv1.CarHeader{Roots: roots, Version: 1}, b.dataWriter)
}
func (b *ReadWrite) resumeWithRoots(roots []cid.Cid) error {
// On resumption it is expected that the CAR v2 Pragma, and the CAR v1 header is successfully written.
// On resumption it is expected that the CARv2 Pragma, and the CARv1 header is successfully written.
// Otherwise we cannot resume from the file.
// Read pragma to assert if b.f is indeed a CAR v2.
// Read pragma to assert if b.f is indeed a CARv2.
version, err := carv2.ReadVersion(b.f)
if err != nil {
// The file is not a valid CAR file and cannot resume from it.
......@@ -168,36 +168,36 @@ func (b *ReadWrite) resumeWithRoots(roots []cid.Cid) error {
return err
}
if version != 2 {
// The file is not a CAR v2 and we cannot resume from it.
// The file is not a CARv2 and we cannot resume from it.
return fmt.Errorf("cannot resume on CAR file with version %v", version)
}
// Check if file was finalized by trying to read the CAR v2 header.
// Check if file was finalized by trying to read the CARv2 header.
// We check because if finalized the CARv1 reader behaviour needs to be adjusted since
// EOF will not signify end of CAR v1 payload. i.e. index is most likely present.
// EOF will not signify end of CARv1 payload. i.e. index is most likely present.
var headerInFile carv2.Header
_, err = headerInFile.ReadFrom(internalio.NewOffsetReadSeeker(b.f, carv2.PragmaSize))
// If reading CARv2 header succeeded, and CARv1 offset in header is not zero then the file is
// most-likely finalized. Check padding and truncate the file to remove index.
// Otherwise, carry on reading the v1 payload at offset determined from b.header.
if err == nil && headerInFile.CarV1Offset != 0 {
if headerInFile.CarV1Offset != b.header.CarV1Offset {
// Assert that the padding on file matches the given WithCarV1Padding option.
wantPadding := headerInFile.CarV1Offset - carv2.PragmaSize - carv2.HeaderSize
gotPadding := b.header.CarV1Offset - carv2.PragmaSize - carv2.HeaderSize
if err == nil && headerInFile.DataOffset != 0 {
if headerInFile.DataOffset != b.header.DataOffset {
// Assert that the padding on file matches the given WithDataPadding option.
wantPadding := headerInFile.DataOffset - carv2.PragmaSize - carv2.HeaderSize
gotPadding := b.header.DataOffset - carv2.PragmaSize - carv2.HeaderSize
return fmt.Errorf(
"cannot resume from file with mismatched CARv1 offset; "+
"`WithCarV1Padding` option must match the padding on file. "+
"`WithDataPadding` option must match the padding on file. "+
"Expected padding value of %v but got %v", wantPadding, gotPadding,
)
} else if headerInFile.CarV1Size != 0 {
} else if headerInFile.DataSize != 0 {
// If header in file contains the size of car v1, then the index is most likely present.
// Since we will need to re-generate the index, as the one in file is flattened, truncate
// the file so that the Readonly.backing has the right set of bytes to deal with.
// This effectively means resuming from a finalized file will wipe its index even if there
// are no blocks put unless the user calls finalize.
if err := b.f.Truncate(int64(headerInFile.CarV1Offset + headerInFile.CarV1Size)); err != nil {
if err := b.f.Truncate(int64(headerInFile.DataOffset + headerInFile.DataSize)); err != nil {
return err
}
} else {
......@@ -214,11 +214,11 @@ func (b *ReadWrite) resumeWithRoots(roots []cid.Cid) error {
}
}
// Use the given CAR v1 padding to instantiate the CAR v1 reader on file.
// Use the given CARv1 padding to instantiate the CARv1 reader on file.
v1r := internalio.NewOffsetReadSeeker(b.ReadOnly.backing, 0)
header, err := carv1.ReadHeader(v1r)
if err != nil {
// Cannot read the CAR v1 header; the file is most likely corrupt.
// Cannot read the CARv1 header; the file is most likely corrupt.
return fmt.Errorf("error reading car header: %w", err)
}
if !header.Matches(carv1.CarHeader{Roots: roots, Version: 1}) {
......@@ -255,7 +255,7 @@ func (b *ReadWrite) resumeWithRoots(roots []cid.Cid) error {
// Null padding; by default it's an error.
if length == 0 {
if b.ropts.ZeroLegthSectionAsEOF {
if b.ropts.ZeroLengthSectionAsEOF {
break
} else {
return fmt.Errorf("carv1 null padding not allowed by default; see WithZeroLegthSectionAsEOF")
......@@ -276,7 +276,7 @@ func (b *ReadWrite) resumeWithRoots(roots []cid.Cid) error {
}
}
// Seek to the end of last skipped block where the writer should resume writing.
_, err = b.carV1Writer.Seek(sectionOffset, io.SeekStart)
_, err = b.dataWriter.Seek(sectionOffset, io.SeekStart)
return err
}
......@@ -286,7 +286,7 @@ func (b *ReadWrite) unfinalize() error {
}
func (b *ReadWrite) panicIfFinalized() {
if b.header.CarV1Size != 0 {
if b.header.DataSize != 0 {
panic("must not use a read-write blockstore after finalizing")
}
}
......@@ -320,8 +320,8 @@ func (b *ReadWrite) PutMany(blks []blocks.Block) error {
}
}
n := uint64(b.carV1Writer.Position())
if err := util.LdWrite(b.carV1Writer, c.Bytes(), bl.RawData()); err != nil {
n := uint64(b.dataWriter.Position())
if err := util.LdWrite(b.dataWriter, c.Bytes(), bl.RawData()); err != nil {
return err
}
b.idx.insertNoReplace(c, n)
......@@ -329,11 +329,11 @@ func (b *ReadWrite) PutMany(blks []blocks.Block) error {
return nil
}
// Finalize finalizes this blockstore by writing the CAR v2 header, along with flattened index
// Finalize finalizes this blockstore by writing the CARv2 header, along with flattened index
// for more efficient subsequent read.
// After this call, this blockstore can no longer be used for read or write.
func (b *ReadWrite) Finalize() error {
if b.header.CarV1Size != 0 {
if b.header.DataSize != 0 {
// Allow duplicate Finalize calls, just like Close.
// Still error, just like ReadOnly.Close; it should be discarded.
return fmt.Errorf("called Finalize twice")
......@@ -342,7 +342,7 @@ func (b *ReadWrite) Finalize() error {
b.mu.Lock()
defer b.mu.Unlock()
// TODO check if add index option is set and don't write the index then set index offset to zero.
b.header = b.header.WithCarV1Size(uint64(b.carV1Writer.Position()))
b.header = b.header.WithDataSize(uint64(b.dataWriter.Position()))
defer b.Close()
// TODO if index not needed don't bother flattening it.
......
......@@ -280,7 +280,7 @@ func TestBlockstoreNullPadding(t *testing.T) {
// A sample null-padded CARv1 file.
paddedV1 = append(paddedV1, make([]byte, 2048)...)
rbs, err := blockstore.NewReadOnly(bufferReaderAt(paddedV1), nil, carv2.ZeroLegthSectionAsEOF)
rbs, err := blockstore.NewReadOnly(bufferReaderAt(paddedV1), nil, carv2.ZeroLengthSectionAsEOF)
require.NoError(t, err)
roots, err := rbs.Roots()
......@@ -312,7 +312,7 @@ func TestBlockstoreResumption(t *testing.T) {
require.NoError(t, err)
path := filepath.Join(t.TempDir(), "readwrite-resume.car")
// Create an incomplete CAR v2 file with no blocks put.
// Create an incomplete CARv2 file with no blocks put.
subject, err := blockstore.OpenReadWrite(path, r.Header.Roots)
require.NoError(t, err)
......@@ -330,7 +330,7 @@ func TestBlockstoreResumption(t *testing.T) {
// 30% chance of subject failing; more concretely: re-instantiating blockstore with the same
// file without calling Finalize. The higher this percentage the slower the test runs
// considering the number of blocks in the original CAR v1 test payload.
// considering the number of blocks in the original CARv1 test payload.
resume := rng.Float32() <= 0.3
// If testing resume case, then flip a coin to decide whether to finalize before blockstore
// re-instantiation or not. Note, both cases should work for resumption since we do not
......@@ -376,12 +376,12 @@ func TestBlockstoreResumption(t *testing.T) {
}
require.NoError(t, subject.Close())
// Finalize the blockstore to complete partially written CAR v2 file.
// Finalize the blockstore to complete partially written CARv2 file.
subject, err = blockstore.OpenReadWrite(path, r.Header.Roots)
require.NoError(t, err)
require.NoError(t, subject.Finalize())
// Assert resumed from file is a valid CAR v2 with index.
// Assert resumed from file is a valid CARv2 with index.
v2f, err := os.Open(path)
require.NoError(t, err)
t.Cleanup(func() { assert.NoError(t, v2f.Close()) })
......@@ -389,13 +389,13 @@ func TestBlockstoreResumption(t *testing.T) {
require.NoError(t, err)
require.True(t, v2r.Header.HasIndex())
// Assert CAR v1 payload in file matches the original CAR v1 payload.
// Assert CARv1 payload in file matches the original CARv1 payload.
_, err = v1f.Seek(0, io.SeekStart)
require.NoError(t, err)
wantPayloadReader, err := carv1.NewCarReader(v1f)
require.NoError(t, err)
gotPayloadReader, err := carv1.NewCarReader(v2r.CarV1Reader())
gotPayloadReader, err := carv1.NewCarReader(v2r.DataReader())
require.NoError(t, err)
require.Equal(t, wantPayloadReader.Header, gotPayloadReader.Header)
......@@ -411,7 +411,7 @@ func TestBlockstoreResumption(t *testing.T) {
require.Equal(t, wantNextBlock, gotNextBlock)
}
// Assert index in resumed from file is identical to index generated directly from original CAR v1 payload.
// Assert index in resumed from file is identical to index generated directly from original CARv1 payload.
_, err = v1f.Seek(0, io.SeekStart)
require.NoError(t, err)
gotIdx, err := index.ReadFrom(v2r.IndexReader())
......@@ -423,7 +423,7 @@ func TestBlockstoreResumption(t *testing.T) {
func TestBlockstoreResumptionIsSupportedOnFinalizedFile(t *testing.T) {
path := filepath.Join(t.TempDir(), "readwrite-resume-finalized.car")
// Create an incomplete CAR v2 file with no blocks put.
// Create an incomplete CARv2 file with no blocks put.
subject, err := blockstore.OpenReadWrite(path, []cid.Cid{})
require.NoError(t, err)
require.NoError(t, subject.Finalize())
......@@ -487,7 +487,7 @@ func TestReadWriteWithPaddingWorksAsExpected(t *testing.T) {
subject, err := blockstore.OpenReadWrite(
path,
WantRoots,
carv2.UseCarV1Padding(wantCarV1Padding),
carv2.UseDataPadding(wantCarV1Padding),
carv2.UseIndexPadding(wantIndexPadding))
require.NoError(t, err)
t.Cleanup(func() { subject.Close() })
......@@ -500,8 +500,8 @@ func TestReadWriteWithPaddingWorksAsExpected(t *testing.T) {
t.Cleanup(func() { gotCarV2.Close() })
require.NoError(t, err)
wantCarV1Offset := carv2.PragmaSize + carv2.HeaderSize + wantCarV1Padding
wantIndexOffset := wantCarV1Offset + gotCarV2.Header.CarV1Size + wantIndexPadding
require.Equal(t, wantCarV1Offset, gotCarV2.Header.CarV1Offset)
wantIndexOffset := wantCarV1Offset + gotCarV2.Header.DataSize + wantIndexPadding
require.Equal(t, wantCarV1Offset, gotCarV2.Header.DataOffset)
require.Equal(t, wantIndexOffset, gotCarV2.Header.IndexOffset)
require.NoError(t, gotCarV2.Close())
......@@ -510,7 +510,7 @@ func TestReadWriteWithPaddingWorksAsExpected(t *testing.T) {
t.Cleanup(func() { f.Close() })
// Assert reading CARv1 directly at offset and size is as expected.
gotCarV1, err := carv1.NewCarReader(io.NewSectionReader(f, int64(wantCarV1Offset), int64(gotCarV2.Header.CarV1Size)))
gotCarV1, err := carv1.NewCarReader(io.NewSectionReader(f, int64(wantCarV1Offset), int64(gotCarV2.Header.DataSize)))
require.NoError(t, err)
require.Equal(t, WantRoots, gotCarV1.Header.Roots)
gotOneBlock, err := gotCarV1.Next()
......@@ -548,7 +548,7 @@ func TestReadWriteResumptionFromFileWithDifferentCarV1PaddingIsError(t *testing.
subject, err := blockstore.OpenReadWrite(
path,
WantRoots,
carv2.UseCarV1Padding(1413))
carv2.UseDataPadding(1413))
require.NoError(t, err)
t.Cleanup(func() { subject.Close() })
require.NoError(t, subject.Put(oneTestBlockWithCidV1))
......@@ -557,9 +557,9 @@ func TestReadWriteResumptionFromFileWithDifferentCarV1PaddingIsError(t *testing.
resumingSubject, err := blockstore.OpenReadWrite(
path,
WantRoots,
carv2.UseCarV1Padding(1314))
carv2.UseDataPadding(1314))
require.EqualError(t, err, "cannot resume from file with mismatched CARv1 offset; "+
"`WithCarV1Padding` option must match the padding on file. "+
"`WithDataPadding` option must match the padding on file. "+
"Expected padding value of 1413 but got 1314")
require.Nil(t, resumingSubject)
}
......@@ -6,16 +6,16 @@ import (
)
const (
// PragmaSize is the size of the CAR v2 pragma in bytes.
// PragmaSize is the size of the CARv2 pragma in bytes.
PragmaSize = 11
// HeaderSize is the fixed size of CAR v2 header in number of bytes.
// HeaderSize is the fixed size of CARv2 header in number of bytes.
HeaderSize = 40
// CharacteristicsSize is the fixed size of Characteristics bitfield within CAR v2 header in number of bytes.
// CharacteristicsSize is the fixed size of Characteristics bitfield within CARv2 header in number of bytes.
CharacteristicsSize = 16
)
// The pragma of a CAR v2, containing the version number..
// This is a valid CAR v1 header, with version number set to 2.
// The pragma of a CARv2, containing the version number.
// This is a valid CARv1 header, with version number of 2 and no root CIDs.
var Pragma = []byte{
0x0a, // unit(10)
0xa1, // map(1)
......@@ -25,18 +25,18 @@ var Pragma = []byte{
}
type (
// Header represents the CAR v2 header/pragma.
// Header represents the CARv2 header/pragma.
Header struct {
// 128-bit characteristics of this CAR v2 file, such as order, deduplication, etc. Reserved for future use.
// 128-bit characteristics of this CARv2 file, such as order, deduplication, etc. Reserved for future use.
Characteristics Characteristics
// The offset from the beginning of the file at which the dump of CAR v1 starts.
CarV1Offset uint64
// The size of CAR v1 encapsulated in this CAR v2 as bytes.
CarV1Size uint64
// The offset from the beginning of the file at which the CAR v2 index begins.
// The byte-offset from the beginning of the CARv2 to the first byte of the CARv1 data payload.
DataOffset uint64
// The byte-length of the CARv1 data payload.
DataSize uint64
// The byte-offset from the beginning of the CARv2 to the first byte of the index payload. This value may be 0 to indicate the absence of index data.
IndexOffset uint64
}
// Characteristics is a bitfield placeholder for capturing the characteristics of a CAR v2 such as order and determinism.
// Characteristics is a bitfield placeholder for capturing the characteristics of a CARv2 such as order and determinism.
Characteristics struct {
Hi uint64
Lo uint64
......@@ -64,37 +64,37 @@ func (c *Characteristics) ReadFrom(r io.Reader) (int64, error) {
return n, nil
}
// NewHeader instantiates a new CAR v2 header, given the byte length of a CAR v1.
func NewHeader(carV1Size uint64) Header {
// NewHeader instantiates a new CARv2 header, given the data size.
func NewHeader(dataSize uint64) Header {
header := Header{
CarV1Size: carV1Size,
DataSize: dataSize,
}
header.CarV1Offset = PragmaSize + HeaderSize
header.IndexOffset = header.CarV1Offset + carV1Size
header.DataOffset = PragmaSize + HeaderSize
header.IndexOffset = header.DataOffset + dataSize
return header
}
// WithIndexPadding sets the index offset from the beginning of the file for this header and returns the
// header for convenient chained calls.
// WithIndexPadding sets the index offset from the beginning of the file for this header and returns
// the header for convenient chained calls.
// The index offset is calculated as the sum of PragmaSize, HeaderSize,
// Header.CarV1Size, and the given padding.
// Header.DataSize, and the given padding.
func (h Header) WithIndexPadding(padding uint64) Header {
h.IndexOffset = h.IndexOffset + padding
return h
}
// WithCarV1Padding sets the CAR v1 dump offset from the beginning of the file for this header and returns the
// header for convenient chained calls.
// The CAR v1 offset is calculated as the sum of PragmaSize, HeaderSize and the given padding.
// WithDataPadding sets the data payload byte-offset from the beginning of the file for this header
// and returns the header for convenient chained calls.
// The Data offset is calculated as the sum of PragmaSize, HeaderSize and the given padding.
// The call to this function also shifts the Header.IndexOffset forward by the given padding.
func (h Header) WithCarV1Padding(padding uint64) Header {
h.CarV1Offset = PragmaSize + HeaderSize + padding
func (h Header) WithDataPadding(padding uint64) Header {
h.DataOffset = PragmaSize + HeaderSize + padding
h.IndexOffset = h.IndexOffset + padding
return h
}
func (h Header) WithCarV1Size(size uint64) Header {
h.CarV1Size = size
func (h Header) WithDataSize(size uint64) Header {
h.DataSize = size
h.IndexOffset = size + h.IndexOffset
return h
}
......@@ -112,8 +112,8 @@ func (h Header) WriteTo(w io.Writer) (n int64, err error) {
return
}
buf := make([]byte, 24)
binary.LittleEndian.PutUint64(buf[:8], h.CarV1Offset)
binary.LittleEndian.PutUint64(buf[8:16], h.CarV1Size)
binary.LittleEndian.PutUint64(buf[:8], h.DataOffset)
binary.LittleEndian.PutUint64(buf[8:16], h.DataSize)
binary.LittleEndian.PutUint64(buf[16:], h.IndexOffset)
written, err := w.Write(buf)
n += int64(written)
......@@ -132,8 +132,8 @@ func (h *Header) ReadFrom(r io.Reader) (int64, error) {
if err != nil {
return n, err
}
h.CarV1Offset = binary.LittleEndian.Uint64(buf[:8])
h.CarV1Size = binary.LittleEndian.Uint64(buf[8:16])
h.DataOffset = binary.LittleEndian.Uint64(buf[:8])
h.DataSize = binary.LittleEndian.Uint64(buf[8:16])
h.IndexOffset = binary.LittleEndian.Uint64(buf[16:])
return n, nil
}
......@@ -36,11 +36,11 @@ func TestCarV2PragmaLength(t *testing.T) {
func TestCarV2PragmaIsValidCarV1Header(t *testing.T) {
v1h, err := carv1.ReadHeader(bytes.NewReader(carv2.Pragma))
assert.NoError(t, err, "cannot decode pragma as CBOR with CAR v1 header structure")
assert.NoError(t, err, "cannot decode pragma as CBOR with CARv1 header structure")
assert.Equal(t, &carv1.CarHeader{
Roots: nil,
Version: 2,
}, v1h, "CAR v2 pragma must be a valid CAR v1 header")
}, v1h, "CARv2 pragma must be a valid CARv1 header")
}
func TestHeader_WriteTo(t *testing.T) {
......@@ -70,8 +70,8 @@ func TestHeader_WriteTo(t *testing.T) {
Characteristics: carv2.Characteristics{
Hi: 1001, Lo: 1002,
},
CarV1Offset: 99,
CarV1Size: 100,
DataOffset: 99,
DataSize: 100,
IndexOffset: 101,
},
[]byte{
......@@ -94,8 +94,8 @@ func TestHeader_WriteTo(t *testing.T) {
}
gotWrite := buf.Bytes()
assert.Equal(t, tt.wantWrite, gotWrite, "Header.WriteTo() gotWrite = %v, wantWrite %v", gotWrite, tt.wantWrite)
assert.EqualValues(t, carv2.HeaderSize, uint64(len(gotWrite)), "WriteTo() CAR v2 header length must always be %v bytes long", carv2.HeaderSize)
assert.EqualValues(t, carv2.HeaderSize, uint64(written), "WriteTo() CAR v2 header byte count must always be %v bytes long", carv2.HeaderSize)
assert.EqualValues(t, carv2.HeaderSize, uint64(len(gotWrite)), "WriteTo() CARv2 header length must always be %v bytes long", carv2.HeaderSize)
assert.EqualValues(t, carv2.HeaderSize, uint64(written), "WriteTo() CARv2 header byte count must always be %v bytes long", carv2.HeaderSize)
})
}
}
......@@ -135,8 +135,8 @@ func TestHeader_ReadFrom(t *testing.T) {
Characteristics: carv2.Characteristics{
Hi: 1001, Lo: 1002,
},
CarV1Offset: 99,
CarV1Size: 100,
DataOffset: 99,
DataSize: 100,
IndexOffset: 101,
},
false,
......@@ -168,13 +168,13 @@ func TestHeader_WithPadding(t *testing.T) {
},
{
"WhenOnlyPaddingCarV1BothOffsetsShift",
carv2.NewHeader(123).WithCarV1Padding(3),
carv2.NewHeader(123).WithDataPadding(3),
carv2.PragmaSize + carv2.HeaderSize + 3,
carv2.PragmaSize + carv2.HeaderSize + 3 + 123,
},
{
"WhenPaddingBothCarV1AndIndexBothOffsetsShiftWithAdditionalIndexShift",
carv2.NewHeader(123).WithCarV1Padding(3).WithIndexPadding(7),
carv2.NewHeader(123).WithDataPadding(3).WithIndexPadding(7),
carv2.PragmaSize + carv2.HeaderSize + 3,
carv2.PragmaSize + carv2.HeaderSize + 3 + 123 + 7,
},
......@@ -182,7 +182,7 @@ func TestHeader_WithPadding(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.EqualValues(t, tt.wantCarV1Offset, tt.subject.CarV1Offset)
assert.EqualValues(t, tt.wantCarV1Offset, tt.subject.DataOffset)
assert.EqualValues(t, tt.wantIndexOffset, tt.subject.IndexOffset)
})
}
......@@ -192,8 +192,8 @@ func TestNewHeaderHasExpectedValues(t *testing.T) {
wantCarV1Len := uint64(1413)
want := carv2.Header{
Characteristics: carv2.Characteristics{},
CarV1Offset: carv2.PragmaSize + carv2.HeaderSize,
CarV1Size: wantCarV1Len,
DataOffset: carv2.PragmaSize + carv2.HeaderSize,
DataSize: wantCarV1Len,
IndexOffset: carv2.PragmaSize + carv2.HeaderSize + wantCarV1Len,
}
got := carv2.NewHeader(wantCarV1Len)
......
// Package car represents the CAR v2 implementation.
// TODO add CAR v2 byte structure here.
// Package car represents the CARv2 implementation.
// TODO add CARv2 byte structure here.
package car
......@@ -38,7 +38,7 @@ func ExampleWrapV1File() {
if err != nil {
panic(err)
}
inner, err := ioutil.ReadAll(cr.CarV1Reader())
inner, err := ioutil.ReadAll(cr.DataReader())
if err != nil {
panic(err)
}
......
// package index provides indexing functionality for CAR v1 data payload represented as a mapping of
// CID to offset. This can then be used to implement random access over a CAR v1.
// package index provides indexing functionality for CARv1 data payload represented as a mapping of
// CID to offset. This can then be used to implement random access over a CARv1.
//
// Index can be written or read using the following static functions: index.WriteTo and
// index.ReadFrom.
......
......@@ -63,10 +63,10 @@ func GenerateIndex(v1r io.Reader, opts ...ReadOption) (index.Index, error) {
// Null padding; by default it's an error.
if sectionLen == 0 {
if o.ZeroLegthSectionAsEOF {
if o.ZeroLengthSectionAsEOF {
break
} else {
return nil, fmt.Errorf("carv1 null padding not allowed by default; see ZeroLegthSectionAsEOF")
return nil, fmt.Errorf("carv1 null padding not allowed by default; see ZeroLengthSectionAsEOF")
}
}
......@@ -103,10 +103,10 @@ func GenerateIndexFromFile(path string) (index.Index, error) {
return GenerateIndex(f)
}
// ReadOrGenerateIndex accepts both CAR v1 and v2 format, and reads or generates an index for it.
// When the given reader is in CAR v1 format an index is always generated.
// For a payload in CAR v2 format, an index is only generated if Header.HasIndex returns false.
// An error is returned for all other formats, i.e. versions other than 1 or 2.
// ReadOrGenerateIndex accepts both CARv1 and CARv2 formats, and reads or generates an index for it.
// When the given reader is in CARv1 format an index is always generated.
// For a payload in CARv2 format, an index is only generated if Header.HasIndex returns false.
// An error is returned for all other formats, i.e. pragma with versions other than 1 or 2.
//
// Note, the returned index lives entirely in memory and will not depend on the
// given reader to fulfill index lookup.
......@@ -126,7 +126,7 @@ func ReadOrGenerateIndex(rs io.ReadSeeker) (index.Index, error) {
// Simply generate the index, since there can't be a pre-existing one.
return GenerateIndex(rs)
case 2:
// Read CAR v2 format
// Read CARv2 format
v2r, err := NewReader(internalio.ToReaderAt(rs))
if err != nil {
return nil, err
......@@ -135,8 +135,8 @@ func ReadOrGenerateIndex(rs io.ReadSeeker) (index.Index, error) {
if v2r.Header.HasIndex() {
return index.ReadFrom(v2r.IndexReader())
}
// Otherwise, generate index from CAR v1 payload wrapped within CAR v2 format.
return GenerateIndex(v2r.CarV1Reader())
// Otherwise, generate index from CARv1 payload wrapped within CARv2 format.
return GenerateIndex(v2r.DataReader())
default:
return nil, fmt.Errorf("unknown version %v", version)
}
......
// Forked from CAR v1 to avoid dependency to ipld-prime 0.9.0 due to outstanding upgrades in filecoin.
// Forked from CARv1 to avoid dependency to ipld-prime 0.9.0 due to outstanding upgrades in filecoin.
package carv1
......@@ -6,7 +6,7 @@ package car
// This type should not be used directly by end users; it's only exposed as a
// side effect of ReadOption.
type ReadOptions struct {
ZeroLegthSectionAsEOF bool
ZeroLengthSectionAsEOF bool
BlockstoreUseWholeCIDs bool
}
......@@ -24,7 +24,7 @@ var _ ReadWriteOption = ReadOption(nil)
// This type should not be used directly by end users; it's only exposed as a
// side effect of WriteOption.
type WriteOptions struct {
CarV1Padding uint64
DataPadding uint64
IndexPadding uint64
BlockstoreAllowDuplicatePuts bool
......@@ -42,19 +42,19 @@ type ReadWriteOption interface {
readWriteOption()
}
// ZeroLegthSectionAsEOF is a read option which allows a CARv1 decoder to treat
// ZeroLengthSectionAsEOF is a read option which allows a CARv1 decoder to treat
// a zero-length section as the end of the input CAR file. For example, this can
// be useful to allow "null padding" after a CARv1 without knowing where the
// padding begins.
func ZeroLegthSectionAsEOF(o *ReadOptions) {
o.ZeroLegthSectionAsEOF = true
func ZeroLengthSectionAsEOF(o *ReadOptions) {
o.ZeroLengthSectionAsEOF = true
}
// UseCarV1Padding is a write option which sets the padding to be added between
// CAR v2 header and its data payload on Finalize.
func UseCarV1Padding(p uint64) WriteOption {
// UseDataPadding is a write option which sets the padding to be added between
// CARv2 header and its data payload on Finalize.
func UseDataPadding(p uint64) WriteOption {
return func(o *WriteOptions) {
o.CarV1Padding = p
o.DataPadding = p
}
}
......
......@@ -11,12 +11,12 @@ import (
"golang.org/x/exp/mmap"
)
// Reader represents a reader of CAR v2.
// Reader represents a reader of CARv2.
type Reader struct {
Header Header
r io.ReaderAt
roots []cid.Cid
carv2Closer io.Closer
Header Header
r io.ReaderAt
roots []cid.Cid
closer io.Closer
}
// OpenReader is a wrapper for NewReader which opens the file at path.
......@@ -31,13 +31,13 @@ func OpenReader(path string, opts ...ReadOption) (*Reader, error) {
return nil, err
}
r.carv2Closer = f
r.closer = f
return r, nil
}
// NewReader constructs a new reader that reads CAR v2 from the given r.
// NewReader constructs a new reader that reads CARv2 from the given r.
// Upon instantiation, the reader inspects the payload by reading the pragma and will return
// an error if the pragma does not represent a CAR v2.
// an error if the pragma does not represent a CARv2.
func NewReader(r io.ReaderAt, opts ...ReadOption) (*Reader, error) {
cr := &Reader{
r: r,
......@@ -63,12 +63,13 @@ func (r *Reader) requireVersion2() (err error) {
return
}
// Roots returns the root CIDs of this CAR
// Roots returns the root CIDs.
// The root CIDs are extracted lazily from the data payload header.
func (r *Reader) Roots() ([]cid.Cid, error) {
if r.roots != nil {
return r.roots, nil
}
header, err := carv1.ReadHeader(r.CarV1Reader())
header, err := carv1.ReadHeader(r.DataReader())
if err != nil {
return nil, err
}
......@@ -91,26 +92,26 @@ type SectionReader interface {
io.ReaderAt
}
// CarV1Reader provides a reader containing the CAR v1 section encapsulated in this CAR v2.
func (r *Reader) CarV1Reader() SectionReader {
return io.NewSectionReader(r.r, int64(r.Header.CarV1Offset), int64(r.Header.CarV1Size))
// DataReader provides a reader containing the data payload in CARv1 format.
func (r *Reader) DataReader() SectionReader {
return io.NewSectionReader(r.r, int64(r.Header.DataOffset), int64(r.Header.DataSize))
}
// IndexReader provides an io.Reader containing the index of this CAR v2.
// IndexReader provides an io.Reader containing the index for the data payload.
func (r *Reader) IndexReader() io.Reader {
return internalio.NewOffsetReadSeeker(r.r, int64(r.Header.IndexOffset))
}
// Close closes the underlying reader if it was opened by OpenReader.
func (r *Reader) Close() error {
if r.carv2Closer != nil {
return r.carv2Closer.Close()
if r.closer != nil {
return r.closer.Close()
}
return nil
}
// ReadVersion reads the version from the pragma.
// This function accepts both CAR v1 and v2 payloads.
// This function accepts both CARv1 and CARv2 payloads.
func ReadVersion(r io.Reader) (version uint64, err error) {
// TODO if the user provides a reader that sufficiently satisfies what carv1.ReadHeader is asking then use that instead of wrapping every time.
header, err := carv1.ReadHeader(r)
......
......@@ -79,11 +79,11 @@ func WrapV1(src io.ReadSeeker, dst io.Writer) error {
return nil
}
// AttachIndex attaches a given index to an existing car v2 file at given path and offset.
// AttachIndex attaches a given index to an existing CARv2 file at given path and offset.
func AttachIndex(path string, idx index.Index, offset uint64) error {
// TODO: instead of offset, maybe take padding?
// TODO: check that the given path is indeed a CAR v2.
// TODO: update CAR v2 header according to the offset at which index is written out.
// TODO: check that the given path is indeed a CARv2.
// TODO: update CARv2 header according to the offset at which index is written out.
out, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o640)
if err != nil {
return err
......
......@@ -47,7 +47,7 @@ func TestWrapV1(t *testing.T) {
require.NoError(t, err)
wantPayload, err := ioutil.ReadAll(sf)
require.NoError(t, err)
gotPayload, err := ioutil.ReadAll(subject.CarV1Reader())
gotPayload, err := ioutil.ReadAll(subject.DataReader())
require.NoError(t, err)
require.Equal(t, wantPayload, gotPayload)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment