diff --git a/v2/bench_test.go b/v2/bench_test.go index 16ae9337839bb63bc622724017184ce7fe28bac1..15ae0c24fa0d137fe21987be51c275565765a794 100644 --- a/v2/bench_test.go +++ b/v2/bench_test.go @@ -14,8 +14,6 @@ import ( carv2 "github.com/ipld/go-car/v2" ) -var rng = rand.New(rand.NewSource(1413)) - // BenchmarkReadBlocks instantiates a BlockReader, and iterates over all blocks. // It essentially looks at the contents of any CARv1 or CARv2 file. // Note that this also uses internal carv1.ReadHeader underneath. @@ -59,7 +57,7 @@ func BenchmarkReadBlocks(b *testing.B) { // BenchmarkExtractV1File extracts inner CARv1 payload from a sample CARv2 file using ExtractV1File. func BenchmarkExtractV1File(b *testing.B) { path := filepath.Join(b.TempDir(), "bench-large-v2.car") - generateRandomCarV2File(b, path, 10*1024*1024) // 10 MiB + generateRandomCarV2File(b, path, 10<<20) // 10 MiB defer os.Remove(path) info, err := os.Stat(path) @@ -87,7 +85,7 @@ func BenchmarkExtractV1File(b *testing.B) { // BenchmarkExtractV1File. func BenchmarkExtractV1UsingReader(b *testing.B) { path := filepath.Join(b.TempDir(), "bench-large-v2.car") - generateRandomCarV2File(b, path, 10*1024*1024) // 10 MiB + generateRandomCarV2File(b, path, 10<<20) // 10 MiB defer os.Remove(path) info, err := os.Stat(path) @@ -121,6 +119,8 @@ func BenchmarkExtractV1UsingReader(b *testing.B) { } func generateRandomCarV2File(b *testing.B, path string, minTotalBlockSize int) { + // Use fixed RNG for determinism across benchmarks. + rng := rand.New(rand.NewSource(1413)) bs, err := blockstore.OpenReadWrite(path, []cid.Cid{}) defer func() { if err := bs.Finalize(); err != nil { @@ -130,7 +130,7 @@ func generateRandomCarV2File(b *testing.B, path string, minTotalBlockSize int) { if err != nil { b.Fatal(err) } - buf := make([]byte, 1024) + buf := make([]byte, 32<<10) // 32 KiB var totalBlockSize int for totalBlockSize < minTotalBlockSize { size, err := rng.Read(buf) diff --git a/v2/writer.go b/v2/writer.go index 91b5340163554e6dc3cf4199e77f81c2b873c082..c344828159039861e624a816329fa783b3704b2e 100644 --- a/v2/writer.go +++ b/v2/writer.go @@ -92,6 +92,12 @@ func WrapV1(src io.ReadSeeker, dst io.Writer) error { // Note that the destination path might still be created even if an error // occurred. // If srcPath and dstPath are the same, then the dstPath is converted, in-place, to CARv1. +// +// This function aims to extract the CARv1 payload as efficiently as possible. +// The method is best-effort and depends on your operating system; +// for example, it should use copy_file_range on recent Linux versions. +// This API should be preferred over copying directly via Reader.DataReader, +// as it should allow for better performance while always being at least as efficient. func ExtractV1File(srcPath, dstPath string) (err error) { src, err := os.Open(srcPath) if err != nil { @@ -110,7 +116,7 @@ func ExtractV1File(srcPath, dstPath string) (err error) { return ErrAlreadyV1 } if version != 2 { - return fmt.Errorf("invalid source version: %v", version) + return fmt.Errorf("source version must be 2; got: %d", version) } // Read CARv2 header to locate data payload. @@ -123,11 +129,11 @@ func ExtractV1File(srcPath, dstPath string) (err error) { // Validate header dataOffset := int64(v2h.DataOffset) if dataOffset < PragmaSize+HeaderSize { - return fmt.Errorf("invalid data payload offset: %v", dataOffset) + return fmt.Errorf("invalid data payload offset: %d", dataOffset) } dataSize := int64(v2h.DataSize) if dataSize <= 0 { - return fmt.Errorf("invalid data payload size: %v", dataSize) + return fmt.Errorf("invalid data payload size: %d", dataSize) } // Seek to the point where the data payload starts @@ -163,7 +169,7 @@ func ExtractV1File(srcPath, dstPath string) (err error) { return err } if written != dataSize { - return fmt.Errorf("expected to write exactly %v but wrote %v", dataSize, written) + return fmt.Errorf("expected to write exactly %d but wrote %d", dataSize, written) } // Check that the size destination file matches expected size. diff --git a/v2/writer_test.go b/v2/writer_test.go index c35beb4a425bbc6a5fa9184320c2d2fa5d2b69b3..c29b43397344510afbe7684b56c491356fad6d05 100644 --- a/v2/writer_test.go +++ b/v2/writer_test.go @@ -93,7 +93,7 @@ func TestExtractV1(t *testing.T) { func TestExtractV1WithUnknownVersionIsError(t *testing.T) { dstPath := filepath.Join(t.TempDir(), "extract-dst-file-test-v42.car") err := ExtractV1File("testdata/sample-rootless-v42.car", dstPath) - require.EqualError(t, err, "invalid source version: 42") + require.EqualError(t, err, "source version must be 2; got: 42") } func TestExtractV1FromACarV1IsError(t *testing.T) {