// Package gc provides garbage collection for go-ipfs. package gc import ( "context" "errors" "fmt" bstore "github.com/ipfs/go-ipfs/blocks/blockstore" bserv "github.com/ipfs/go-ipfs/blockservice" offline "github.com/ipfs/go-ipfs/exchange/offline" dag "github.com/ipfs/go-ipfs/merkledag" pin "github.com/ipfs/go-ipfs/pin" dstore "gx/ipfs/QmPpegoMqhAEqjncrzArm7KVWAkCm78rqL2DPuNjhPrshg/go-datastore" logging "gx/ipfs/QmRb5jh8z2E8hMGN2tkvs1yHynUanqnZ3UeKwgN1i9P1F8/go-log" cid "gx/ipfs/QmcZfnkapfECQGcLZaf9B79NRg7cRa9EnZh4LSbkCzwNvY/go-cid" ipld "gx/ipfs/Qme5bWv7wtjUNGsK2BNGVUFPKiuxWrsqrtvYwCLRw8YFES/go-ipld-format" ) var log = logging.Logger("gc") // Result represents an incremental output from a garbage collection // run. It contains either an error, or the cid of a removed object. type Result struct { KeyRemoved *cid.Cid Error error } // GC performs a mark and sweep garbage collection of the blocks in the blockstore // first, it creates a 'marked' set and adds to it the following: // - all recursively pinned blocks, plus all of their descendants (recursively) // - bestEffortRoots, plus all of its descendants (recursively) // - all directly pinned blocks // - all blocks utilized internally by the pinner // // The routine then iterates over every block in the blockstore and // deletes any block that is not found in the marked set. func GC(ctx context.Context, bs bstore.GCBlockstore, dstor dstore.Datastore, pn pin.Pinner, bestEffortRoots []*cid.Cid) <-chan Result { elock := log.EventBegin(ctx, "GC.lockWait") unlocker := bs.GCLock() elock.Done() elock = log.EventBegin(ctx, "GC.locked") emark := log.EventBegin(ctx, "GC.mark") bsrv := bserv.New(bs, offline.Exchange(bs)) ds := dag.NewDAGService(bsrv) output := make(chan Result, 128) go func() { defer close(output) defer unlocker.Unlock() defer elock.Done() gcs, err := ColoredSet(ctx, pn, ds, bestEffortRoots, output) if err != nil { output <- Result{Error: err} return } emark.Append(logging.LoggableMap{ "blackSetSize": fmt.Sprintf("%d", gcs.Len()), }) emark.Done() esweep := log.EventBegin(ctx, "GC.sweep") keychan, err := bs.AllKeysChan(ctx) if err != nil { output <- Result{Error: err} return } errors := false var removed uint64 loop: for { select { case k, ok := <-keychan: if !ok { break loop } if !gcs.Has(k) { err := bs.DeleteBlock(k) removed++ if err != nil { errors = true output <- Result{Error: &CannotDeleteBlockError{k, err}} //log.Errorf("Error removing key from blockstore: %s", err) // continue as error is non-fatal continue loop } select { case output <- Result{KeyRemoved: k}: case <-ctx.Done(): break loop } } case <-ctx.Done(): break loop } } esweep.Append(logging.LoggableMap{ "whiteSetSize": fmt.Sprintf("%d", removed), }) esweep.Done() if errors { output <- Result{Error: ErrCannotDeleteSomeBlocks} } defer log.EventBegin(ctx, "GC.datastore").Done() gds, ok := dstor.(dstore.GCDatastore) if !ok { return } err = gds.CollectGarbage() if err != nil { output <- Result{Error: err} return } }() return output } // Descendants recursively finds all the descendants of the given roots and // adds them to the given cid.Set, using the provided dag.GetLinks function // to walk the tree. func Descendants(ctx context.Context, getLinks dag.GetLinks, set *cid.Set, roots []*cid.Cid) error { for _, c := range roots { set.Add(c) // EnumerateChildren recursively walks the dag and adds the keys to the given set err := dag.EnumerateChildren(ctx, getLinks, c, set.Visit) if err != nil { return err } } return nil } // ColoredSet computes the set of nodes in the graph that are pinned by the // pins in the given pinner. func ColoredSet(ctx context.Context, pn pin.Pinner, ng ipld.NodeGetter, bestEffortRoots []*cid.Cid, output chan<- Result) (*cid.Set, error) { // KeySet currently implemented in memory, in the future, may be bloom filter or // disk backed to conserve memory. errors := false gcs := cid.NewSet() getLinks := func(ctx context.Context, cid *cid.Cid) ([]*ipld.Link, error) { links, err := ipld.GetLinks(ctx, ng, cid) if err != nil { errors = true output <- Result{Error: &CannotFetchLinksError{cid, err}} } return links, nil } err := Descendants(ctx, getLinks, gcs, pn.RecursiveKeys()) if err != nil { errors = true output <- Result{Error: err} } bestEffortGetLinks := func(ctx context.Context, cid *cid.Cid) ([]*ipld.Link, error) { links, err := ipld.GetLinks(ctx, ng, cid) if err != nil && err != ipld.ErrNotFound { errors = true output <- Result{Error: &CannotFetchLinksError{cid, err}} } return links, nil } err = Descendants(ctx, bestEffortGetLinks, gcs, bestEffortRoots) if err != nil { errors = true output <- Result{Error: err} } for _, k := range pn.DirectKeys() { gcs.Add(k) } err = Descendants(ctx, getLinks, gcs, pn.InternalPins()) if err != nil { errors = true output <- Result{Error: err} } if errors { return nil, ErrCannotFetchAllLinks } return gcs, nil } // ErrCannotFetchAllLinks is returned as the last Result in the GC output // channel when there was a error creating the marked set because of a // problem when finding descendants. var ErrCannotFetchAllLinks = errors.New("garbage collection aborted: could not retrieve some links") // ErrCannotDeleteSomeBlocks is returned when removing blocks marked for // deletion fails as the last Result in GC output channel. var ErrCannotDeleteSomeBlocks = errors.New("garbage collection incomplete: could not delete some blocks") // CannotFetchLinksError provides detailed information about which links // could not be fetched and can appear as a Result in the GC output channel. type CannotFetchLinksError struct { Key *cid.Cid Err error } // Error implements the error interface for this type with a useful // message. func (e *CannotFetchLinksError) Error() string { return fmt.Sprintf("could not retrieve links for %s: %s", e.Key, e.Err) } // CannotDeleteBlockError provides detailed information about which // blocks could not be deleted and can appear as a Result in the GC output // channel. type CannotDeleteBlockError struct { Key *cid.Cid Err error } // Error implements the error interface for this type with a // useful message. func (e *CannotDeleteBlockError) Error() string { return fmt.Sprintf("could not remove %s: %s", e.Key, e.Err) }