diff --git a/v2/blockstore/doc.go b/v2/blockstore/doc.go index 7210d742b989268ea5f8b4f33002f18ecde69abd..180dc7292f431f7abd61e5f4e8dc7d60187227a1 100644 --- a/v2/blockstore/doc.go +++ b/v2/blockstore/doc.go @@ -2,11 +2,11 @@ // This package provides two flavours of blockstore: ReadOnly and ReadWrite. // // The ReadOnly blockstore provides a read-only random access from a given data payload either in -// unindexed v1 format or indexed/unindexed v2 format: -// * ReadOnly.NewReadOnly can be used to instantiate a new read-only blockstore for a given CAR v1 -// or CAR v2 data payload with an optional index override. -// * ReadOnly.OpenReadOnly can be used to instantiate a new read-only blockstore for a given CAR v1 -// or CAR v2 file with automatic index generation if the index is not present. +// unindexed CARv1 format or indexed/unindexed v2 format: +// * ReadOnly.NewReadOnly can be used to instantiate a new read-only blockstore for a given CARv1 +// or CARv2 data payload with an optional index override. +// * ReadOnly.OpenReadOnly can be used to instantiate a new read-only blockstore for a given CARv1 +// or CARv2 file with automatic index generation if the index is not present. // // The ReadWrite blockstore allows writing and reading of the blocks concurrently. The user of this // blockstore is responsible for calling ReadWrite.Finalize when finished writing blocks. diff --git a/v2/blockstore/readonly.go b/v2/blockstore/readonly.go index 5b9c8535ddfd17c0db3318791b9fff43cb854852..bbbba8f4e1ec17e7804d5bff1aa5910d138a70c5 100644 --- a/v2/blockstore/readonly.go +++ b/v2/blockstore/readonly.go @@ -34,9 +34,9 @@ type ReadOnly struct { // For simplicity, the entirety of the blockstore methods grab the mutex. mu sync.RWMutex - // The backing containing the CAR in v1 format. + // The backing containing the data payload in CARv1 format. backing io.ReaderAt - // The CAR v1 content index. + // The CARv1 content index. idx index.Index // If we called carv2.NewReaderMmap, remember to close it too. @@ -54,8 +54,6 @@ type ReadOnly struct { // • Get, Has, and HasSize will only return a block // only if the entire CID is present in the CAR file. // -// • DeleteBlock will delete a block only when the entire CID matches. -// // • AllKeysChan will return the original whole CIDs, instead of with their // multicodec set to "raw" to just provide multihashes. // @@ -73,12 +71,12 @@ func UseWholeCIDs(enable bool) carv2.ReadOption { } // NewReadOnly creates a new ReadOnly blockstore from the backing with a optional index as idx. -// This function accepts both CAR v1 and v2 backing. +// This function accepts both CARv1 and CARv2 backing. // The blockstore is instantiated with the given index if it is not nil. // // Otherwise: -// * For a CAR v1 backing an index is generated. -// * For a CAR v2 backing an index is only generated if Header.HasIndex returns false. +// * For a CARv1 backing an index is generated. +// * For a CARv2 backing an index is only generated if Header.HasIndex returns false. // // There is no need to call ReadOnly.Close on instances returned by this function. func NewReadOnly(backing io.ReaderAt, idx index.Index, opts ...carv2.ReadOption) (*ReadOnly, error) { @@ -112,11 +110,11 @@ func NewReadOnly(backing io.ReaderAt, idx index.Index, opts ...carv2.ReadOption) if err != nil { return nil, err } - } else if idx, err = generateIndex(v2r.CarV1Reader(), opts...); err != nil { + } else if idx, err = generateIndex(v2r.DataReader(), opts...); err != nil { return nil, err } } - b.backing = v2r.CarV1Reader() + b.backing = v2r.DataReader() b.idx = idx return b, nil default: @@ -169,7 +167,7 @@ func (b *ReadOnly) readBlock(idx int64) (cid.Cid, []byte, error) { return bcid, data, err } -// DeleteBlock is unsupported and always returns an error. +// DeleteBlock is unsupported and always panics. func (b *ReadOnly) DeleteBlock(_ cid.Cid) error { panic("called write method on a read-only blockstore") } @@ -289,7 +287,7 @@ func (b *ReadOnly) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { // Null padding; by default it's an error. if length == 0 { - if b.ropts.ZeroLegthSectionAsEOF { + if b.ropts.ZeroLengthSectionAsEOF { break } else { return // TODO: log this error diff --git a/v2/blockstore/readonly_test.go b/v2/blockstore/readonly_test.go index 10f9804cd9a6a7d4f981e14b30a5786cbc245af9..3d75fd96da659215935d968de880756c78b63922 100644 --- a/v2/blockstore/readonly_test.go +++ b/v2/blockstore/readonly_test.go @@ -131,7 +131,7 @@ func newReaderFromV2File(t *testing.T, carv2Path string) *carv1.CarReader { t.Cleanup(func() { f.Close() }) v2r, err := carv2.NewReader(f) require.NoError(t, err) - v1r, err := carv1.NewCarReader(v2r.CarV1Reader()) + v1r, err := carv1.NewCarReader(v2r.DataReader()) require.NoError(t, err) return v1r } diff --git a/v2/blockstore/readwrite.go b/v2/blockstore/readwrite.go index a1c7fd84f798b544d4454588838b2ac63ab28b2b..693e52f9134597d115d7daa3558b0738f578cb88 100644 --- a/v2/blockstore/readwrite.go +++ b/v2/blockstore/readwrite.go @@ -23,7 +23,7 @@ import ( var _ blockstore.Blockstore = (*ReadWrite)(nil) -// ReadWrite implements a blockstore that stores blocks in CAR v2 format. +// ReadWrite implements a blockstore that stores blocks in CARv2 format. // Blocks put into the blockstore can be read back once they are successfully written. // This implementation is preferable for a write-heavy workload. // The blocks are written immediately on Put and PutAll calls, while the index is stored in memory @@ -33,8 +33,8 @@ var _ blockstore.Blockstore = (*ReadWrite)(nil) // Upon calling Finalize header is finalized and index is written out. // Once finalized, all read and write calls to this blockstore will result in panics. type ReadWrite struct { - f *os.File - carV1Writer *internalio.OffsetWriteSeeker + f *os.File + dataWriter *internalio.OffsetWriteSeeker ReadOnly idx *insertionIndex header carv2.Header @@ -57,13 +57,13 @@ func AllowDuplicatePuts(allow bool) carv2.WriteOption { // OpenReadWrite creates a new ReadWrite at the given path with a provided set of root CIDs and options. // // ReadWrite.Finalize must be called once putting and reading blocks are no longer needed. -// Upon calling ReadWrite.Finalize the CAR v2 header and index are written out onto the file and the +// Upon calling ReadWrite.Finalize the CARv2 header and index are written out onto the file and the // backing file is closed. Once finalized, all read and write calls to this blockstore will result // in panics. Note, ReadWrite.Finalize must be called on an open instance regardless of whether any // blocks were put or not. // // If a file at given path does not exist, the instantiation will write car.Pragma and data payload -// header (i.e. the inner CAR v1 header) onto the file before returning. +// header (i.e. the inner CARv1 header) onto the file before returning. // // When the given path already exists, the blockstore will attempt to resume from it. // On resumption the existing data sections in file are re-indexed, allowing the caller to continue @@ -73,15 +73,15 @@ func AllowDuplicatePuts(allow bool) carv2.WriteOption { // Resumption only works on files that were created by a previous instance of a ReadWrite // blockstore. This means a file created as a result of a successful call to OpenReadWrite can be // resumed from as long as write operations such as ReadWrite.Put, ReadWrite.PutMany returned -// successfully. On resumption the roots argument and WithCarV1Padding option must match the +// successfully. On resumption the roots argument and WithDataPadding option must match the // previous instantiation of ReadWrite blockstore that created the file. More explicitly, the file // resuming from must: -// 1. start with a complete CAR v2 car.Pragma. -// 2. contain a complete CAR v1 data header with root CIDs matching the CIDs passed to the -// constructor, starting at offset optionally padded by WithCarV1Padding, followed by zero or +// 1. start with a complete CARv2 car.Pragma. +// 2. contain a complete CARv1 data header with root CIDs matching the CIDs passed to the +// constructor, starting at offset optionally padded by WithDataPadding, followed by zero or // more complete data sections. If any corrupt data sections are present the resumption will fail. -// Note, if set previously, the blockstore must use the same WithCarV1Padding option as before, -// since this option is used to locate the CAR v1 data payload. +// Note, if set previously, the blockstore must use the same WithDataPadding option as before, +// since this option is used to locate the CARv1 data payload. // // Note, resumption should be used with WithCidDeduplication, so that blocks that are successfully // written into the file are not re-written. Unless, the user explicitly wants duplicate blocks. @@ -124,15 +124,15 @@ func OpenReadWrite(path string, roots []cid.Cid, opts ...carv2.ReadWriteOption) opt(&rwbs.wopts) } } - if p := rwbs.wopts.CarV1Padding; p > 0 { - rwbs.header = rwbs.header.WithCarV1Padding(p) + if p := rwbs.wopts.DataPadding; p > 0 { + rwbs.header = rwbs.header.WithDataPadding(p) } if p := rwbs.wopts.IndexPadding; p > 0 { rwbs.header = rwbs.header.WithIndexPadding(p) } - rwbs.carV1Writer = internalio.NewOffsetWriter(rwbs.f, int64(rwbs.header.CarV1Offset)) - v1r := internalio.NewOffsetReadSeeker(rwbs.f, int64(rwbs.header.CarV1Offset)) + rwbs.dataWriter = internalio.NewOffsetWriter(rwbs.f, int64(rwbs.header.DataOffset)) + v1r := internalio.NewOffsetReadSeeker(rwbs.f, int64(rwbs.header.DataOffset)) rwbs.ReadOnly.backing = v1r rwbs.ReadOnly.idx = rwbs.idx rwbs.ReadOnly.carv2Closer = rwbs.f @@ -154,13 +154,13 @@ func (b *ReadWrite) initWithRoots(roots []cid.Cid) error { if _, err := b.f.WriteAt(carv2.Pragma, 0); err != nil { return err } - return carv1.WriteHeader(&carv1.CarHeader{Roots: roots, Version: 1}, b.carV1Writer) + return carv1.WriteHeader(&carv1.CarHeader{Roots: roots, Version: 1}, b.dataWriter) } func (b *ReadWrite) resumeWithRoots(roots []cid.Cid) error { - // On resumption it is expected that the CAR v2 Pragma, and the CAR v1 header is successfully written. + // On resumption it is expected that the CARv2 Pragma, and the CARv1 header is successfully written. // Otherwise we cannot resume from the file. - // Read pragma to assert if b.f is indeed a CAR v2. + // Read pragma to assert if b.f is indeed a CARv2. version, err := carv2.ReadVersion(b.f) if err != nil { // The file is not a valid CAR file and cannot resume from it. @@ -168,36 +168,36 @@ func (b *ReadWrite) resumeWithRoots(roots []cid.Cid) error { return err } if version != 2 { - // The file is not a CAR v2 and we cannot resume from it. + // The file is not a CARv2 and we cannot resume from it. return fmt.Errorf("cannot resume on CAR file with version %v", version) } - // Check if file was finalized by trying to read the CAR v2 header. + // Check if file was finalized by trying to read the CARv2 header. // We check because if finalized the CARv1 reader behaviour needs to be adjusted since - // EOF will not signify end of CAR v1 payload. i.e. index is most likely present. + // EOF will not signify end of CARv1 payload. i.e. index is most likely present. var headerInFile carv2.Header _, err = headerInFile.ReadFrom(internalio.NewOffsetReadSeeker(b.f, carv2.PragmaSize)) // If reading CARv2 header succeeded, and CARv1 offset in header is not zero then the file is // most-likely finalized. Check padding and truncate the file to remove index. // Otherwise, carry on reading the v1 payload at offset determined from b.header. - if err == nil && headerInFile.CarV1Offset != 0 { - if headerInFile.CarV1Offset != b.header.CarV1Offset { - // Assert that the padding on file matches the given WithCarV1Padding option. - wantPadding := headerInFile.CarV1Offset - carv2.PragmaSize - carv2.HeaderSize - gotPadding := b.header.CarV1Offset - carv2.PragmaSize - carv2.HeaderSize + if err == nil && headerInFile.DataOffset != 0 { + if headerInFile.DataOffset != b.header.DataOffset { + // Assert that the padding on file matches the given WithDataPadding option. + wantPadding := headerInFile.DataOffset - carv2.PragmaSize - carv2.HeaderSize + gotPadding := b.header.DataOffset - carv2.PragmaSize - carv2.HeaderSize return fmt.Errorf( "cannot resume from file with mismatched CARv1 offset; "+ - "`WithCarV1Padding` option must match the padding on file. "+ + "`WithDataPadding` option must match the padding on file. "+ "Expected padding value of %v but got %v", wantPadding, gotPadding, ) - } else if headerInFile.CarV1Size != 0 { + } else if headerInFile.DataSize != 0 { // If header in file contains the size of car v1, then the index is most likely present. // Since we will need to re-generate the index, as the one in file is flattened, truncate // the file so that the Readonly.backing has the right set of bytes to deal with. // This effectively means resuming from a finalized file will wipe its index even if there // are no blocks put unless the user calls finalize. - if err := b.f.Truncate(int64(headerInFile.CarV1Offset + headerInFile.CarV1Size)); err != nil { + if err := b.f.Truncate(int64(headerInFile.DataOffset + headerInFile.DataSize)); err != nil { return err } } else { @@ -214,11 +214,11 @@ func (b *ReadWrite) resumeWithRoots(roots []cid.Cid) error { } } - // Use the given CAR v1 padding to instantiate the CAR v1 reader on file. + // Use the given CARv1 padding to instantiate the CARv1 reader on file. v1r := internalio.NewOffsetReadSeeker(b.ReadOnly.backing, 0) header, err := carv1.ReadHeader(v1r) if err != nil { - // Cannot read the CAR v1 header; the file is most likely corrupt. + // Cannot read the CARv1 header; the file is most likely corrupt. return fmt.Errorf("error reading car header: %w", err) } if !header.Matches(carv1.CarHeader{Roots: roots, Version: 1}) { @@ -255,7 +255,7 @@ func (b *ReadWrite) resumeWithRoots(roots []cid.Cid) error { // Null padding; by default it's an error. if length == 0 { - if b.ropts.ZeroLegthSectionAsEOF { + if b.ropts.ZeroLengthSectionAsEOF { break } else { return fmt.Errorf("carv1 null padding not allowed by default; see WithZeroLegthSectionAsEOF") @@ -276,7 +276,7 @@ func (b *ReadWrite) resumeWithRoots(roots []cid.Cid) error { } } // Seek to the end of last skipped block where the writer should resume writing. - _, err = b.carV1Writer.Seek(sectionOffset, io.SeekStart) + _, err = b.dataWriter.Seek(sectionOffset, io.SeekStart) return err } @@ -286,7 +286,7 @@ func (b *ReadWrite) unfinalize() error { } func (b *ReadWrite) panicIfFinalized() { - if b.header.CarV1Size != 0 { + if b.header.DataSize != 0 { panic("must not use a read-write blockstore after finalizing") } } @@ -320,8 +320,8 @@ func (b *ReadWrite) PutMany(blks []blocks.Block) error { } } - n := uint64(b.carV1Writer.Position()) - if err := util.LdWrite(b.carV1Writer, c.Bytes(), bl.RawData()); err != nil { + n := uint64(b.dataWriter.Position()) + if err := util.LdWrite(b.dataWriter, c.Bytes(), bl.RawData()); err != nil { return err } b.idx.insertNoReplace(c, n) @@ -329,11 +329,11 @@ func (b *ReadWrite) PutMany(blks []blocks.Block) error { return nil } -// Finalize finalizes this blockstore by writing the CAR v2 header, along with flattened index +// Finalize finalizes this blockstore by writing the CARv2 header, along with flattened index // for more efficient subsequent read. // After this call, this blockstore can no longer be used for read or write. func (b *ReadWrite) Finalize() error { - if b.header.CarV1Size != 0 { + if b.header.DataSize != 0 { // Allow duplicate Finalize calls, just like Close. // Still error, just like ReadOnly.Close; it should be discarded. return fmt.Errorf("called Finalize twice") @@ -342,7 +342,7 @@ func (b *ReadWrite) Finalize() error { b.mu.Lock() defer b.mu.Unlock() // TODO check if add index option is set and don't write the index then set index offset to zero. - b.header = b.header.WithCarV1Size(uint64(b.carV1Writer.Position())) + b.header = b.header.WithDataSize(uint64(b.dataWriter.Position())) defer b.Close() // TODO if index not needed don't bother flattening it. diff --git a/v2/blockstore/readwrite_test.go b/v2/blockstore/readwrite_test.go index 3a8099fbd2991f0f80277aabe710a36b0fd13a34..6612268f1448b3ad48880d86719ef54413bb78f9 100644 --- a/v2/blockstore/readwrite_test.go +++ b/v2/blockstore/readwrite_test.go @@ -280,7 +280,7 @@ func TestBlockstoreNullPadding(t *testing.T) { // A sample null-padded CARv1 file. paddedV1 = append(paddedV1, make([]byte, 2048)...) - rbs, err := blockstore.NewReadOnly(bufferReaderAt(paddedV1), nil, carv2.ZeroLegthSectionAsEOF) + rbs, err := blockstore.NewReadOnly(bufferReaderAt(paddedV1), nil, carv2.ZeroLengthSectionAsEOF) require.NoError(t, err) roots, err := rbs.Roots() @@ -312,7 +312,7 @@ func TestBlockstoreResumption(t *testing.T) { require.NoError(t, err) path := filepath.Join(t.TempDir(), "readwrite-resume.car") - // Create an incomplete CAR v2 file with no blocks put. + // Create an incomplete CARv2 file with no blocks put. subject, err := blockstore.OpenReadWrite(path, r.Header.Roots) require.NoError(t, err) @@ -330,7 +330,7 @@ func TestBlockstoreResumption(t *testing.T) { // 30% chance of subject failing; more concretely: re-instantiating blockstore with the same // file without calling Finalize. The higher this percentage the slower the test runs - // considering the number of blocks in the original CAR v1 test payload. + // considering the number of blocks in the original CARv1 test payload. resume := rng.Float32() <= 0.3 // If testing resume case, then flip a coin to decide whether to finalize before blockstore // re-instantiation or not. Note, both cases should work for resumption since we do not @@ -376,12 +376,12 @@ func TestBlockstoreResumption(t *testing.T) { } require.NoError(t, subject.Close()) - // Finalize the blockstore to complete partially written CAR v2 file. + // Finalize the blockstore to complete partially written CARv2 file. subject, err = blockstore.OpenReadWrite(path, r.Header.Roots) require.NoError(t, err) require.NoError(t, subject.Finalize()) - // Assert resumed from file is a valid CAR v2 with index. + // Assert resumed from file is a valid CARv2 with index. v2f, err := os.Open(path) require.NoError(t, err) t.Cleanup(func() { assert.NoError(t, v2f.Close()) }) @@ -389,13 +389,13 @@ func TestBlockstoreResumption(t *testing.T) { require.NoError(t, err) require.True(t, v2r.Header.HasIndex()) - // Assert CAR v1 payload in file matches the original CAR v1 payload. + // Assert CARv1 payload in file matches the original CARv1 payload. _, err = v1f.Seek(0, io.SeekStart) require.NoError(t, err) wantPayloadReader, err := carv1.NewCarReader(v1f) require.NoError(t, err) - gotPayloadReader, err := carv1.NewCarReader(v2r.CarV1Reader()) + gotPayloadReader, err := carv1.NewCarReader(v2r.DataReader()) require.NoError(t, err) require.Equal(t, wantPayloadReader.Header, gotPayloadReader.Header) @@ -411,7 +411,7 @@ func TestBlockstoreResumption(t *testing.T) { require.Equal(t, wantNextBlock, gotNextBlock) } - // Assert index in resumed from file is identical to index generated directly from original CAR v1 payload. + // Assert index in resumed from file is identical to index generated directly from original CARv1 payload. _, err = v1f.Seek(0, io.SeekStart) require.NoError(t, err) gotIdx, err := index.ReadFrom(v2r.IndexReader()) @@ -423,7 +423,7 @@ func TestBlockstoreResumption(t *testing.T) { func TestBlockstoreResumptionIsSupportedOnFinalizedFile(t *testing.T) { path := filepath.Join(t.TempDir(), "readwrite-resume-finalized.car") - // Create an incomplete CAR v2 file with no blocks put. + // Create an incomplete CARv2 file with no blocks put. subject, err := blockstore.OpenReadWrite(path, []cid.Cid{}) require.NoError(t, err) require.NoError(t, subject.Finalize()) @@ -487,7 +487,7 @@ func TestReadWriteWithPaddingWorksAsExpected(t *testing.T) { subject, err := blockstore.OpenReadWrite( path, WantRoots, - carv2.UseCarV1Padding(wantCarV1Padding), + carv2.UseDataPadding(wantCarV1Padding), carv2.UseIndexPadding(wantIndexPadding)) require.NoError(t, err) t.Cleanup(func() { subject.Close() }) @@ -500,8 +500,8 @@ func TestReadWriteWithPaddingWorksAsExpected(t *testing.T) { t.Cleanup(func() { gotCarV2.Close() }) require.NoError(t, err) wantCarV1Offset := carv2.PragmaSize + carv2.HeaderSize + wantCarV1Padding - wantIndexOffset := wantCarV1Offset + gotCarV2.Header.CarV1Size + wantIndexPadding - require.Equal(t, wantCarV1Offset, gotCarV2.Header.CarV1Offset) + wantIndexOffset := wantCarV1Offset + gotCarV2.Header.DataSize + wantIndexPadding + require.Equal(t, wantCarV1Offset, gotCarV2.Header.DataOffset) require.Equal(t, wantIndexOffset, gotCarV2.Header.IndexOffset) require.NoError(t, gotCarV2.Close()) @@ -510,7 +510,7 @@ func TestReadWriteWithPaddingWorksAsExpected(t *testing.T) { t.Cleanup(func() { f.Close() }) // Assert reading CARv1 directly at offset and size is as expected. - gotCarV1, err := carv1.NewCarReader(io.NewSectionReader(f, int64(wantCarV1Offset), int64(gotCarV2.Header.CarV1Size))) + gotCarV1, err := carv1.NewCarReader(io.NewSectionReader(f, int64(wantCarV1Offset), int64(gotCarV2.Header.DataSize))) require.NoError(t, err) require.Equal(t, WantRoots, gotCarV1.Header.Roots) gotOneBlock, err := gotCarV1.Next() @@ -548,7 +548,7 @@ func TestReadWriteResumptionFromFileWithDifferentCarV1PaddingIsError(t *testing. subject, err := blockstore.OpenReadWrite( path, WantRoots, - carv2.UseCarV1Padding(1413)) + carv2.UseDataPadding(1413)) require.NoError(t, err) t.Cleanup(func() { subject.Close() }) require.NoError(t, subject.Put(oneTestBlockWithCidV1)) @@ -557,9 +557,9 @@ func TestReadWriteResumptionFromFileWithDifferentCarV1PaddingIsError(t *testing. resumingSubject, err := blockstore.OpenReadWrite( path, WantRoots, - carv2.UseCarV1Padding(1314)) + carv2.UseDataPadding(1314)) require.EqualError(t, err, "cannot resume from file with mismatched CARv1 offset; "+ - "`WithCarV1Padding` option must match the padding on file. "+ + "`WithDataPadding` option must match the padding on file. "+ "Expected padding value of 1413 but got 1314") require.Nil(t, resumingSubject) } diff --git a/v2/car.go b/v2/car.go index 4d8c3f50a916d98cbc3b00cc6a8aae9e45e994be..d12683313bceabca64dc3f785b030e02686ba122 100644 --- a/v2/car.go +++ b/v2/car.go @@ -6,16 +6,16 @@ import ( ) const ( - // PragmaSize is the size of the CAR v2 pragma in bytes. + // PragmaSize is the size of the CARv2 pragma in bytes. PragmaSize = 11 - // HeaderSize is the fixed size of CAR v2 header in number of bytes. + // HeaderSize is the fixed size of CARv2 header in number of bytes. HeaderSize = 40 - // CharacteristicsSize is the fixed size of Characteristics bitfield within CAR v2 header in number of bytes. + // CharacteristicsSize is the fixed size of Characteristics bitfield within CARv2 header in number of bytes. CharacteristicsSize = 16 ) -// The pragma of a CAR v2, containing the version number.. -// This is a valid CAR v1 header, with version number set to 2. +// The pragma of a CARv2, containing the version number. +// This is a valid CARv1 header, with version number of 2 and no root CIDs. var Pragma = []byte{ 0x0a, // unit(10) 0xa1, // map(1) @@ -25,18 +25,18 @@ var Pragma = []byte{ } type ( - // Header represents the CAR v2 header/pragma. + // Header represents the CARv2 header/pragma. Header struct { - // 128-bit characteristics of this CAR v2 file, such as order, deduplication, etc. Reserved for future use. + // 128-bit characteristics of this CARv2 file, such as order, deduplication, etc. Reserved for future use. Characteristics Characteristics - // The offset from the beginning of the file at which the dump of CAR v1 starts. - CarV1Offset uint64 - // The size of CAR v1 encapsulated in this CAR v2 as bytes. - CarV1Size uint64 - // The offset from the beginning of the file at which the CAR v2 index begins. + // The byte-offset from the beginning of the CARv2 to the first byte of the CARv1 data payload. + DataOffset uint64 + // The byte-length of the CARv1 data payload. + DataSize uint64 + // The byte-offset from the beginning of the CARv2 to the first byte of the index payload. This value may be 0 to indicate the absence of index data. IndexOffset uint64 } - // Characteristics is a bitfield placeholder for capturing the characteristics of a CAR v2 such as order and determinism. + // Characteristics is a bitfield placeholder for capturing the characteristics of a CARv2 such as order and determinism. Characteristics struct { Hi uint64 Lo uint64 @@ -64,37 +64,37 @@ func (c *Characteristics) ReadFrom(r io.Reader) (int64, error) { return n, nil } -// NewHeader instantiates a new CAR v2 header, given the byte length of a CAR v1. -func NewHeader(carV1Size uint64) Header { +// NewHeader instantiates a new CARv2 header, given the data size. +func NewHeader(dataSize uint64) Header { header := Header{ - CarV1Size: carV1Size, + DataSize: dataSize, } - header.CarV1Offset = PragmaSize + HeaderSize - header.IndexOffset = header.CarV1Offset + carV1Size + header.DataOffset = PragmaSize + HeaderSize + header.IndexOffset = header.DataOffset + dataSize return header } -// WithIndexPadding sets the index offset from the beginning of the file for this header and returns the -// header for convenient chained calls. +// WithIndexPadding sets the index offset from the beginning of the file for this header and returns +// the header for convenient chained calls. // The index offset is calculated as the sum of PragmaSize, HeaderSize, -// Header.CarV1Size, and the given padding. +// Header.DataSize, and the given padding. func (h Header) WithIndexPadding(padding uint64) Header { h.IndexOffset = h.IndexOffset + padding return h } -// WithCarV1Padding sets the CAR v1 dump offset from the beginning of the file for this header and returns the -// header for convenient chained calls. -// The CAR v1 offset is calculated as the sum of PragmaSize, HeaderSize and the given padding. +// WithDataPadding sets the data payload byte-offset from the beginning of the file for this header +// and returns the header for convenient chained calls. +// The Data offset is calculated as the sum of PragmaSize, HeaderSize and the given padding. // The call to this function also shifts the Header.IndexOffset forward by the given padding. -func (h Header) WithCarV1Padding(padding uint64) Header { - h.CarV1Offset = PragmaSize + HeaderSize + padding +func (h Header) WithDataPadding(padding uint64) Header { + h.DataOffset = PragmaSize + HeaderSize + padding h.IndexOffset = h.IndexOffset + padding return h } -func (h Header) WithCarV1Size(size uint64) Header { - h.CarV1Size = size +func (h Header) WithDataSize(size uint64) Header { + h.DataSize = size h.IndexOffset = size + h.IndexOffset return h } @@ -112,8 +112,8 @@ func (h Header) WriteTo(w io.Writer) (n int64, err error) { return } buf := make([]byte, 24) - binary.LittleEndian.PutUint64(buf[:8], h.CarV1Offset) - binary.LittleEndian.PutUint64(buf[8:16], h.CarV1Size) + binary.LittleEndian.PutUint64(buf[:8], h.DataOffset) + binary.LittleEndian.PutUint64(buf[8:16], h.DataSize) binary.LittleEndian.PutUint64(buf[16:], h.IndexOffset) written, err := w.Write(buf) n += int64(written) @@ -132,8 +132,8 @@ func (h *Header) ReadFrom(r io.Reader) (int64, error) { if err != nil { return n, err } - h.CarV1Offset = binary.LittleEndian.Uint64(buf[:8]) - h.CarV1Size = binary.LittleEndian.Uint64(buf[8:16]) + h.DataOffset = binary.LittleEndian.Uint64(buf[:8]) + h.DataSize = binary.LittleEndian.Uint64(buf[8:16]) h.IndexOffset = binary.LittleEndian.Uint64(buf[16:]) return n, nil } diff --git a/v2/car_test.go b/v2/car_test.go index 83a552ba65031b6e9a9873c362e513c15c26f439..4223a5600e1fd9769244372cba3fb119d8cbd029 100644 --- a/v2/car_test.go +++ b/v2/car_test.go @@ -36,11 +36,11 @@ func TestCarV2PragmaLength(t *testing.T) { func TestCarV2PragmaIsValidCarV1Header(t *testing.T) { v1h, err := carv1.ReadHeader(bytes.NewReader(carv2.Pragma)) - assert.NoError(t, err, "cannot decode pragma as CBOR with CAR v1 header structure") + assert.NoError(t, err, "cannot decode pragma as CBOR with CARv1 header structure") assert.Equal(t, &carv1.CarHeader{ Roots: nil, Version: 2, - }, v1h, "CAR v2 pragma must be a valid CAR v1 header") + }, v1h, "CARv2 pragma must be a valid CARv1 header") } func TestHeader_WriteTo(t *testing.T) { @@ -70,8 +70,8 @@ func TestHeader_WriteTo(t *testing.T) { Characteristics: carv2.Characteristics{ Hi: 1001, Lo: 1002, }, - CarV1Offset: 99, - CarV1Size: 100, + DataOffset: 99, + DataSize: 100, IndexOffset: 101, }, []byte{ @@ -94,8 +94,8 @@ func TestHeader_WriteTo(t *testing.T) { } gotWrite := buf.Bytes() assert.Equal(t, tt.wantWrite, gotWrite, "Header.WriteTo() gotWrite = %v, wantWrite %v", gotWrite, tt.wantWrite) - assert.EqualValues(t, carv2.HeaderSize, uint64(len(gotWrite)), "WriteTo() CAR v2 header length must always be %v bytes long", carv2.HeaderSize) - assert.EqualValues(t, carv2.HeaderSize, uint64(written), "WriteTo() CAR v2 header byte count must always be %v bytes long", carv2.HeaderSize) + assert.EqualValues(t, carv2.HeaderSize, uint64(len(gotWrite)), "WriteTo() CARv2 header length must always be %v bytes long", carv2.HeaderSize) + assert.EqualValues(t, carv2.HeaderSize, uint64(written), "WriteTo() CARv2 header byte count must always be %v bytes long", carv2.HeaderSize) }) } } @@ -135,8 +135,8 @@ func TestHeader_ReadFrom(t *testing.T) { Characteristics: carv2.Characteristics{ Hi: 1001, Lo: 1002, }, - CarV1Offset: 99, - CarV1Size: 100, + DataOffset: 99, + DataSize: 100, IndexOffset: 101, }, false, @@ -168,13 +168,13 @@ func TestHeader_WithPadding(t *testing.T) { }, { "WhenOnlyPaddingCarV1BothOffsetsShift", - carv2.NewHeader(123).WithCarV1Padding(3), + carv2.NewHeader(123).WithDataPadding(3), carv2.PragmaSize + carv2.HeaderSize + 3, carv2.PragmaSize + carv2.HeaderSize + 3 + 123, }, { "WhenPaddingBothCarV1AndIndexBothOffsetsShiftWithAdditionalIndexShift", - carv2.NewHeader(123).WithCarV1Padding(3).WithIndexPadding(7), + carv2.NewHeader(123).WithDataPadding(3).WithIndexPadding(7), carv2.PragmaSize + carv2.HeaderSize + 3, carv2.PragmaSize + carv2.HeaderSize + 3 + 123 + 7, }, @@ -182,7 +182,7 @@ func TestHeader_WithPadding(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - assert.EqualValues(t, tt.wantCarV1Offset, tt.subject.CarV1Offset) + assert.EqualValues(t, tt.wantCarV1Offset, tt.subject.DataOffset) assert.EqualValues(t, tt.wantIndexOffset, tt.subject.IndexOffset) }) } @@ -192,8 +192,8 @@ func TestNewHeaderHasExpectedValues(t *testing.T) { wantCarV1Len := uint64(1413) want := carv2.Header{ Characteristics: carv2.Characteristics{}, - CarV1Offset: carv2.PragmaSize + carv2.HeaderSize, - CarV1Size: wantCarV1Len, + DataOffset: carv2.PragmaSize + carv2.HeaderSize, + DataSize: wantCarV1Len, IndexOffset: carv2.PragmaSize + carv2.HeaderSize + wantCarV1Len, } got := carv2.NewHeader(wantCarV1Len) diff --git a/v2/doc.go b/v2/doc.go index 5b210211bf4a96528b3d8ef96b359554949f24ee..2029d7cf380d4d5e2fdb5c256f8fde163cccaf1d 100644 --- a/v2/doc.go +++ b/v2/doc.go @@ -1,3 +1,3 @@ -// Package car represents the CAR v2 implementation. -// TODO add CAR v2 byte structure here. +// Package car represents the CARv2 implementation. +// TODO add CARv2 byte structure here. package car diff --git a/v2/example_test.go b/v2/example_test.go index 4e744662891c91f376c578ab3d8565bb9e069d7a..6944f2fa7c7442a20c44294d3412187411eefd1d 100644 --- a/v2/example_test.go +++ b/v2/example_test.go @@ -38,7 +38,7 @@ func ExampleWrapV1File() { if err != nil { panic(err) } - inner, err := ioutil.ReadAll(cr.CarV1Reader()) + inner, err := ioutil.ReadAll(cr.DataReader()) if err != nil { panic(err) } diff --git a/v2/index/doc.go b/v2/index/doc.go index 4c7beb1f8b3fbc899a82d79d6d8fa72eeb5ef4a2..41b860216c927be988adf2d9c88f3b6d82c59826 100644 --- a/v2/index/doc.go +++ b/v2/index/doc.go @@ -1,5 +1,5 @@ -// package index provides indexing functionality for CAR v1 data payload represented as a mapping of -// CID to offset. This can then be used to implement random access over a CAR v1. +// package index provides indexing functionality for CARv1 data payload represented as a mapping of +// CID to offset. This can then be used to implement random access over a CARv1. // // Index can be written or read using the following static functions: index.WriteTo and // index.ReadFrom. diff --git a/v2/index_gen.go b/v2/index_gen.go index 4fa9ac1df203b992604b1773f9ae31b0418c6f2e..5a60fb7111044df3d1c03ffe06909eaf5c8c600a 100644 --- a/v2/index_gen.go +++ b/v2/index_gen.go @@ -63,10 +63,10 @@ func GenerateIndex(v1r io.Reader, opts ...ReadOption) (index.Index, error) { // Null padding; by default it's an error. if sectionLen == 0 { - if o.ZeroLegthSectionAsEOF { + if o.ZeroLengthSectionAsEOF { break } else { - return nil, fmt.Errorf("carv1 null padding not allowed by default; see ZeroLegthSectionAsEOF") + return nil, fmt.Errorf("carv1 null padding not allowed by default; see ZeroLengthSectionAsEOF") } } @@ -103,10 +103,10 @@ func GenerateIndexFromFile(path string) (index.Index, error) { return GenerateIndex(f) } -// ReadOrGenerateIndex accepts both CAR v1 and v2 format, and reads or generates an index for it. -// When the given reader is in CAR v1 format an index is always generated. -// For a payload in CAR v2 format, an index is only generated if Header.HasIndex returns false. -// An error is returned for all other formats, i.e. versions other than 1 or 2. +// ReadOrGenerateIndex accepts both CARv1 and CARv2 formats, and reads or generates an index for it. +// When the given reader is in CARv1 format an index is always generated. +// For a payload in CARv2 format, an index is only generated if Header.HasIndex returns false. +// An error is returned for all other formats, i.e. pragma with versions other than 1 or 2. // // Note, the returned index lives entirely in memory and will not depend on the // given reader to fulfill index lookup. @@ -126,7 +126,7 @@ func ReadOrGenerateIndex(rs io.ReadSeeker) (index.Index, error) { // Simply generate the index, since there can't be a pre-existing one. return GenerateIndex(rs) case 2: - // Read CAR v2 format + // Read CARv2 format v2r, err := NewReader(internalio.ToReaderAt(rs)) if err != nil { return nil, err @@ -135,8 +135,8 @@ func ReadOrGenerateIndex(rs io.ReadSeeker) (index.Index, error) { if v2r.Header.HasIndex() { return index.ReadFrom(v2r.IndexReader()) } - // Otherwise, generate index from CAR v1 payload wrapped within CAR v2 format. - return GenerateIndex(v2r.CarV1Reader()) + // Otherwise, generate index from CARv1 payload wrapped within CARv2 format. + return GenerateIndex(v2r.DataReader()) default: return nil, fmt.Errorf("unknown version %v", version) } diff --git a/v2/internal/carv1/doc.go b/v2/internal/carv1/doc.go index a13ffdfc2ab4b96fd0f1705d7aac0742cee9a9f1..821ca2f0aaab084395d5fa0b9c86884ff34ad23c 100644 --- a/v2/internal/carv1/doc.go +++ b/v2/internal/carv1/doc.go @@ -1,2 +1,2 @@ -// Forked from CAR v1 to avoid dependency to ipld-prime 0.9.0 due to outstanding upgrades in filecoin. +// Forked from CARv1 to avoid dependency to ipld-prime 0.9.0 due to outstanding upgrades in filecoin. package carv1 diff --git a/v2/options.go b/v2/options.go index ad859d1a0a5a941f63ad3e7a175024c93d0a7965..89044a761bbcc413e911f6bfe7174dd2d402b8b3 100644 --- a/v2/options.go +++ b/v2/options.go @@ -6,7 +6,7 @@ package car // This type should not be used directly by end users; it's only exposed as a // side effect of ReadOption. type ReadOptions struct { - ZeroLegthSectionAsEOF bool + ZeroLengthSectionAsEOF bool BlockstoreUseWholeCIDs bool } @@ -24,7 +24,7 @@ var _ ReadWriteOption = ReadOption(nil) // This type should not be used directly by end users; it's only exposed as a // side effect of WriteOption. type WriteOptions struct { - CarV1Padding uint64 + DataPadding uint64 IndexPadding uint64 BlockstoreAllowDuplicatePuts bool @@ -42,19 +42,19 @@ type ReadWriteOption interface { readWriteOption() } -// ZeroLegthSectionAsEOF is a read option which allows a CARv1 decoder to treat +// ZeroLengthSectionAsEOF is a read option which allows a CARv1 decoder to treat // a zero-length section as the end of the input CAR file. For example, this can // be useful to allow "null padding" after a CARv1 without knowing where the // padding begins. -func ZeroLegthSectionAsEOF(o *ReadOptions) { - o.ZeroLegthSectionAsEOF = true +func ZeroLengthSectionAsEOF(o *ReadOptions) { + o.ZeroLengthSectionAsEOF = true } -// UseCarV1Padding is a write option which sets the padding to be added between -// CAR v2 header and its data payload on Finalize. -func UseCarV1Padding(p uint64) WriteOption { +// UseDataPadding is a write option which sets the padding to be added between +// CARv2 header and its data payload on Finalize. +func UseDataPadding(p uint64) WriteOption { return func(o *WriteOptions) { - o.CarV1Padding = p + o.DataPadding = p } } diff --git a/v2/reader.go b/v2/reader.go index f845a81ce7d1c76b635749faf7e1f51c1ca706ac..98d3715d37154a8470d5e12c894097ffb5ee2cc0 100644 --- a/v2/reader.go +++ b/v2/reader.go @@ -11,12 +11,12 @@ import ( "golang.org/x/exp/mmap" ) -// Reader represents a reader of CAR v2. +// Reader represents a reader of CARv2. type Reader struct { - Header Header - r io.ReaderAt - roots []cid.Cid - carv2Closer io.Closer + Header Header + r io.ReaderAt + roots []cid.Cid + closer io.Closer } // OpenReader is a wrapper for NewReader which opens the file at path. @@ -31,13 +31,13 @@ func OpenReader(path string, opts ...ReadOption) (*Reader, error) { return nil, err } - r.carv2Closer = f + r.closer = f return r, nil } -// NewReader constructs a new reader that reads CAR v2 from the given r. +// NewReader constructs a new reader that reads CARv2 from the given r. // Upon instantiation, the reader inspects the payload by reading the pragma and will return -// an error if the pragma does not represent a CAR v2. +// an error if the pragma does not represent a CARv2. func NewReader(r io.ReaderAt, opts ...ReadOption) (*Reader, error) { cr := &Reader{ r: r, @@ -63,12 +63,13 @@ func (r *Reader) requireVersion2() (err error) { return } -// Roots returns the root CIDs of this CAR +// Roots returns the root CIDs. +// The root CIDs are extracted lazily from the data payload header. func (r *Reader) Roots() ([]cid.Cid, error) { if r.roots != nil { return r.roots, nil } - header, err := carv1.ReadHeader(r.CarV1Reader()) + header, err := carv1.ReadHeader(r.DataReader()) if err != nil { return nil, err } @@ -91,26 +92,26 @@ type SectionReader interface { io.ReaderAt } -// CarV1Reader provides a reader containing the CAR v1 section encapsulated in this CAR v2. -func (r *Reader) CarV1Reader() SectionReader { - return io.NewSectionReader(r.r, int64(r.Header.CarV1Offset), int64(r.Header.CarV1Size)) +// DataReader provides a reader containing the data payload in CARv1 format. +func (r *Reader) DataReader() SectionReader { + return io.NewSectionReader(r.r, int64(r.Header.DataOffset), int64(r.Header.DataSize)) } -// IndexReader provides an io.Reader containing the index of this CAR v2. +// IndexReader provides an io.Reader containing the index for the data payload. func (r *Reader) IndexReader() io.Reader { return internalio.NewOffsetReadSeeker(r.r, int64(r.Header.IndexOffset)) } // Close closes the underlying reader if it was opened by OpenReader. func (r *Reader) Close() error { - if r.carv2Closer != nil { - return r.carv2Closer.Close() + if r.closer != nil { + return r.closer.Close() } return nil } // ReadVersion reads the version from the pragma. -// This function accepts both CAR v1 and v2 payloads. +// This function accepts both CARv1 and CARv2 payloads. func ReadVersion(r io.Reader) (version uint64, err error) { // TODO if the user provides a reader that sufficiently satisfies what carv1.ReadHeader is asking then use that instead of wrapping every time. header, err := carv1.ReadHeader(r) diff --git a/v2/writer.go b/v2/writer.go index fa94913e9a819fec02f9362a7aa3fb4300c9b124..40004648e0f04df609854d3529d48aae0ed4f3d3 100644 --- a/v2/writer.go +++ b/v2/writer.go @@ -79,11 +79,11 @@ func WrapV1(src io.ReadSeeker, dst io.Writer) error { return nil } -// AttachIndex attaches a given index to an existing car v2 file at given path and offset. +// AttachIndex attaches a given index to an existing CARv2 file at given path and offset. func AttachIndex(path string, idx index.Index, offset uint64) error { // TODO: instead of offset, maybe take padding? - // TODO: check that the given path is indeed a CAR v2. - // TODO: update CAR v2 header according to the offset at which index is written out. + // TODO: check that the given path is indeed a CARv2. + // TODO: update CARv2 header according to the offset at which index is written out. out, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o640) if err != nil { return err diff --git a/v2/writer_test.go b/v2/writer_test.go index ff8f3bff2cae83d402c884fddea43d68f328301c..3cf119cee032a4e97fc5c7984f278e47e58e8b73 100644 --- a/v2/writer_test.go +++ b/v2/writer_test.go @@ -47,7 +47,7 @@ func TestWrapV1(t *testing.T) { require.NoError(t, err) wantPayload, err := ioutil.ReadAll(sf) require.NoError(t, err) - gotPayload, err := ioutil.ReadAll(subject.CarV1Reader()) + gotPayload, err := ioutil.ReadAll(subject.DataReader()) require.NoError(t, err) require.Equal(t, wantPayload, gotPayload)