Commit d936d8ce authored by Jeromy's avatar Jeromy

Refactor to new algorithm, add tests and tool

parent ba1ec22f
package car
import (
"archive/tar"
"bufio"
"context"
"fmt"
"io"
"io/ioutil"
"github.com/ipfs/go-block-format"
cid "github.com/ipfs/go-cid"
bstore "github.com/ipfs/go-ipfs-blockstore"
format "github.com/ipfs/go-ipld-format"
dag "github.com/ipfs/go-merkledag"
util "github.com/ipfs/go-car/util"
cbor "gx/ipfs/QmSyK1ZiAP98YvnxsTfQpb669V2xeTHRbG4Y6fgKS3vVSd/go-ipld-cbor"
"gx/ipfs/QmVzK524a2VWLqyvtBeiHKsUAWYgeAk4DBeZoY7vpNPNRx/go-block-format"
cid "gx/ipfs/QmYVNvtQkeZ6AKSwDrjQTs432QtL6umrrK41EBq3cu7iSP/go-cid"
format "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format"
bstore "gx/ipfs/QmcD7SqfyQyA91TZUQ7VPRYbGarxmY7EsQewVYMuN5LNSv/go-ipfs-blockstore"
dag "gx/ipfs/QmeCaeBmCCEJrZahwXY4G2G8zRaNBWskrfKWoQ6Xv6c1DR/go-merkledag"
)
func WriteCar(ctx context.Context, ds format.DAGService, root *cid.Cid, w io.Writer) error {
tw := tar.NewWriter(w)
func init() {
cbor.RegisterCborType(CarHeader{})
}
rh := &tar.Header{
Typeflag: tar.TypeSymlink,
Name: "root",
Linkname: root.String(),
}
if err := tw.WriteHeader(rh); err != nil {
return err
type CarHeader struct {
Roots []*cid.Cid
Version uint64
}
type carWriter struct {
ds format.DAGService
w io.Writer
}
func WriteCar(ctx context.Context, ds format.DAGService, roots []*cid.Cid, w io.Writer) error {
cw := &carWriter{ds: ds, w: w}
h := &CarHeader{
Roots: roots,
Version: 1,
}
cw := &carWriter{ds: ds, tw: tw}
if err := cw.WriteHeader(h); err != nil {
return fmt.Errorf("failed to write car header: %s", err)
}
seen := cid.NewSet()
if err := dag.EnumerateChildren(ctx, cw.enumGetLinks, root, seen.Visit); err != nil {
return err
for _, r := range roots {
if err := dag.EnumerateChildren(ctx, cw.enumGetLinks, r, seen.Visit); err != nil {
return err
}
}
return tw.Flush()
return nil
}
func LoadCar(ctx context.Context, bs bstore.Blockstore, r io.Reader) (*cid.Cid, error) {
tr := tar.NewReader(r)
root, err := tr.Next()
func ReadHeader(br *bufio.Reader) (*CarHeader, error) {
hb, err := util.LdRead(br)
if err != nil {
return nil, err
}
if root.Name != "root" || root.Typeflag != tar.TypeSymlink {
return nil, fmt.Errorf("expected first entry in CAR to by symlink named 'root'")
var ch CarHeader
if err := cbor.DecodeInto(hb, &ch); err != nil {
return nil, err
}
rootcid, err := cid.Decode(root.Linkname)
return &ch, nil
}
func (cw *carWriter) WriteHeader(h *CarHeader) error {
hb, err := cbor.DumpObject(h)
if err != nil {
return nil, err
return err
}
for {
obj, err := tr.Next()
if err != nil {
if err == io.EOF {
break
}
return nil, err
}
return util.LdWrite(cw.w, hb)
}
c, err := cid.Decode(obj.Name)
if err != nil {
return nil, err
}
func (cw *carWriter) enumGetLinks(ctx context.Context, c *cid.Cid) ([]*format.Link, error) {
nd, err := cw.ds.Get(ctx, c)
if err != nil {
return nil, err
}
// safety 1st
limr := io.LimitReader(tr, 2<<20)
data, err := ioutil.ReadAll(limr)
if err != nil {
return nil, err
}
if err := cw.writeNode(ctx, nd); err != nil {
return nil, err
}
hashed, err := c.Prefix().Sum(data)
if err != nil {
return nil, err
}
return nd.Links(), nil
}
if !hashed.Equals(c) {
return nil, fmt.Errorf("mismatch in content integrity, name: %s, data: %s", c, hashed)
}
func (cw *carWriter) writeNode(ctx context.Context, nd format.Node) error {
return util.LdWrite(cw.w, nd.Cid().Bytes(), nd.RawData())
}
blk, err := blocks.NewBlockWithCid(data, c)
if err != nil {
return nil, err
}
type carReader struct {
br *bufio.Reader
Header *CarHeader
}
if err := bs.Put(blk); err != nil {
return nil, err
}
func NewCarReader(r io.Reader) (*carReader, error) {
br := bufio.NewReader(r)
ch, err := ReadHeader(br)
if err != nil {
return nil, err
}
return rootcid, nil
}
if len(ch.Roots) == 0 {
return nil, fmt.Errorf("empty car")
}
type carWriter struct {
ds format.DAGService
tw *tar.Writer
if ch.Version != 1 {
return nil, fmt.Errorf("invalid car version: %d", ch.Version)
}
return &carReader{
br: br,
Header: ch,
}, nil
}
func (cw *carWriter) enumGetLinks(ctx context.Context, c *cid.Cid) ([]*format.Link, error) {
nd, err := cw.ds.Get(ctx, c)
func (cr *carReader) Next() (blocks.Block, error) {
c, data, err := util.ReadNode(cr.br)
if err != nil {
return nil, err
}
if err := cw.writeNode(ctx, nd); err != nil {
hashed, err := c.Prefix().Sum(data)
if err != nil {
return nil, err
}
return nd.Links(), nil
if !hashed.Equals(c) {
return nil, fmt.Errorf("mismatch in content integrity, name: %s, data: %s", c, hashed)
}
return blocks.NewBlockWithCid(data, c)
}
func (cw *carWriter) writeNode(ctx context.Context, nd format.Node) error {
hdr := &tar.Header{
Name: nd.Cid().String(),
Typeflag: tar.TypeReg,
Size: int64(len(nd.RawData())),
func LoadCar(bs bstore.Blockstore, r io.Reader) (*CarHeader, error) {
cr, err := NewCarReader(r)
if err != nil {
return nil, err
}
if err := cw.tw.WriteHeader(hdr); err != nil {
return err
}
for {
blk, err := cr.Next()
switch err {
case io.EOF:
return cr.Header, nil
default:
return nil, err
case nil:
}
if _, err := cw.tw.Write(nd.RawData()); err != nil {
return err
if err := bs.Put(blk); err != nil {
return nil, err
}
}
return nil
}
package main
import (
"bufio"
"encoding/json"
"fmt"
"io"
"os"
"github.com/ipfs/go-car"
cli "github.com/urfave/cli"
)
var headerCmd = cli.Command{
Name: "header",
Action: func(c *cli.Context) error {
if !c.Args().Present() {
return fmt.Errorf("must pass a car file to inspect")
}
arg := c.Args().First()
fi, err := os.Open(arg)
if err != nil {
return err
}
defer fi.Close()
ch, err := car.ReadHeader(bufio.NewReader(fi))
if err != nil {
return err
}
b, err := json.MarshalIndent(ch, "", " ")
if err != nil {
return err
}
fmt.Println(string(b))
return nil
},
}
var verifyCmd = cli.Command{
Name: "verify",
Action: func(c *cli.Context) error {
if !c.Args().Present() {
return fmt.Errorf("must pass a car file to inspect")
}
arg := c.Args().First()
fi, err := os.Open(arg)
if err != nil {
return err
}
defer fi.Close()
cr, err := car.NewCarReader(fi)
if err != nil {
return err
}
for {
_, err := cr.Next()
switch err {
case io.EOF:
return nil
default:
return err
case nil:
}
}
},
}
var lsCmd = cli.Command{
Name: "ls",
Action: func(c *cli.Context) error {
if !c.Args().Present() {
return fmt.Errorf("must pass a car file to inspect")
}
arg := c.Args().First()
fi, err := os.Open(arg)
if err != nil {
return err
}
defer fi.Close()
cr, err := car.NewCarReader(fi)
if err != nil {
return err
}
for {
blk, err := cr.Next()
switch err {
case io.EOF:
return nil
default:
return err
case nil:
}
fmt.Println(blk.Cid())
}
},
}
func main() {
app := cli.NewApp()
app.Commands = []cli.Command{
headerCmd,
lsCmd,
verifyCmd,
}
app.RunAndExitOnError()
}
package car
import (
"bytes"
"context"
"testing"
cid "gx/ipfs/QmYVNvtQkeZ6AKSwDrjQTs432QtL6umrrK41EBq3cu7iSP/go-cid"
format "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format"
dag "gx/ipfs/QmeCaeBmCCEJrZahwXY4G2G8zRaNBWskrfKWoQ6Xv6c1DR/go-merkledag"
dstest "gx/ipfs/QmeCaeBmCCEJrZahwXY4G2G8zRaNBWskrfKWoQ6Xv6c1DR/go-merkledag/test"
)
func assertAddNodes(t *testing.T, ds format.DAGService, nds ...format.Node) {
for _, nd := range nds {
if err := ds.Add(context.Background(), nd); err != nil {
t.Fatal(err)
}
}
}
func TestRoundtrip(t *testing.T) {
dserv := dstest.Mock()
a := dag.NewRawNode([]byte("aaaa"))
b := dag.NewRawNode([]byte("bbbb"))
c := dag.NewRawNode([]byte("cccc"))
nd1 := &dag.ProtoNode{}
nd1.AddNodeLink("cat", a)
nd2 := &dag.ProtoNode{}
nd2.AddNodeLink("first", nd1)
nd2.AddNodeLink("dog", b)
nd3 := &dag.ProtoNode{}
nd3.AddNodeLink("second", nd2)
nd3.AddNodeLink("bear", c)
assertAddNodes(t, dserv, a, b, c, nd1, nd2, nd3)
buf := new(bytes.Buffer)
if err := WriteCar(context.Background(), dserv, []*cid.Cid{nd3.Cid()}, buf); err != nil {
t.Fatal(err)
}
bserv := dstest.Bserv()
ch, err := LoadCar(bserv.Blockstore(), buf)
if err != nil {
t.Fatal(err)
}
if len(ch.Roots) != 1 {
t.Fatal("should have one root")
}
if !ch.Roots[0].Equals(nd3.Cid()) {
t.Fatal("got wrong cid")
}
bs := bserv.Blockstore()
for _, nd := range []format.Node{a, b, c, nd1, nd2, nd3} {
has, err := bs.Has(nd.Cid())
if err != nil {
t.Fatal(err)
}
if !has {
t.Fatal("should have cid in blockstore")
}
}
}
......@@ -12,6 +12,12 @@
"hash": "QmeCaeBmCCEJrZahwXY4G2G8zRaNBWskrfKWoQ6Xv6c1DR",
"name": "go-merkledag",
"version": "1.0.3"
},
{
"author": "whyrusleeping",
"hash": "QmSyK1ZiAP98YvnxsTfQpb669V2xeTHRbG4Y6fgKS3vVSd",
"name": "go-ipld-cbor",
"version": "1.4.6"
}
],
"gxVersion": "0.12.1",
......@@ -19,6 +25,6 @@
"license": "",
"name": "go-car",
"releaseCmd": "git commit -a -m \"gx publish $VERSION\"",
"version": "1.0.1"
"version": "1.1.0"
}
package util
import (
"bufio"
"bytes"
"encoding/binary"
"fmt"
"io"
mh "gx/ipfs/QmPnFwZ2JXKnXgMw8CdBPxn7FWh6LLdjUjxV1fKHuJnkr8/go-multihash"
cid "gx/ipfs/QmYVNvtQkeZ6AKSwDrjQTs432QtL6umrrK41EBq3cu7iSP/go-cid"
)
var cidv0Pref = []byte{0x12, 0x20}
type BytesReader interface {
io.Reader
io.ByteReader
}
// TODO: this belongs in the go-cid package
func ReadCid(buf []byte) (*cid.Cid, int, error) {
if bytes.Equal(buf[:2], cidv0Pref) {
c, err := cid.Cast(buf[:34])
return c, 34, err
}
br := bytes.NewReader(buf)
// assume cidv1
vers, err := binary.ReadUvarint(br)
if err != nil {
return nil, 0, err
}
// TODO: the go-cid package allows version 0 here as well
if vers != 1 {
return nil, 0, fmt.Errorf("invalid cid version number")
}
codec, err := binary.ReadUvarint(br)
if err != nil {
return nil, 0, err
}
mhr := mh.NewReader(br)
h, err := mhr.ReadMultihash()
if err != nil {
return nil, 0, err
}
return cid.NewCidV1(codec, h), len(buf) - br.Len(), nil
}
func ReadNode(br *bufio.Reader) (*cid.Cid, []byte, error) {
data, err := LdRead(br)
if err != nil {
return nil, nil, err
}
c, n, err := ReadCid(data)
if err != nil {
return nil, nil, err
}
return c, data[n:], nil
}
func LdWrite(w io.Writer, d ...[]byte) error {
var sum uint64
for _, s := range d {
sum += uint64(len(s))
}
buf := make([]byte, 8)
n := binary.PutUvarint(buf, sum)
_, err := w.Write(buf[:n])
if err != nil {
return err
}
for _, s := range d {
_, err = w.Write(s)
if err != nil {
return err
}
}
return nil
}
func LdRead(r *bufio.Reader) ([]byte, error) {
l, err := binary.ReadUvarint(r)
if err != nil {
return nil, err
}
buf := make([]byte, l)
if _, err := io.ReadFull(r, buf); err != nil {
return nil, err
}
return buf, nil
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment