gc.go 7.28 KB
Newer Older
tavit ohanian's avatar
tavit ohanian committed
1
// Package gc provides garbage collection for go-dms3.
Jeromy's avatar
Jeromy committed
2 3 4
package gc

import (
5
	"context"
6 7
	"errors"
	"fmt"
8
	"strings"
9

tavit ohanian's avatar
tavit ohanian committed
10 11 12 13 14 15 16 17 18 19
	bserv "gitlab.dms3.io/dms3/go-blockservice"
	cid "gitlab.dms3.io/dms3/go-cid"
	dstore "gitlab.dms3.io/dms3/go-datastore"
	bstore "gitlab.dms3.io/dms3/go-dms3-blockstore"
	offline "gitlab.dms3.io/dms3/go-dms3-exchange-offline"
	pin "gitlab.dms3.io/dms3/go-dms3-pinner"
	ld "gitlab.dms3.io/dms3/go-ld-format"
	logging "gitlab.dms3.io/dms3/go-log"
	dag "gitlab.dms3.io/dms3/go-merkledag"
	"gitlab.dms3.io/dms3/go-verifcid"
Jeromy's avatar
Jeromy committed
20 21
)

22 23
var log = logging.Logger("gc")

Kevin Atkinson's avatar
Kevin Atkinson committed
24 25
// Result represents an incremental output from a garbage collection
// run.  It contains either an error, or the cid of a removed object.
26
type Result struct {
27
	KeyRemoved cid.Cid
28 29 30
	Error      error
}

Jeromy's avatar
Jeromy committed
31 32 33
// GC performs a mark and sweep garbage collection of the blocks in the blockstore
// first, it creates a 'marked' set and adds to it the following:
// - all recursively pinned blocks, plus all of their descendants (recursively)
34
// - bestEffortRoots, plus all of its descendants (recursively)
Jeromy's avatar
Jeromy committed
35 36 37 38 39
// - all directly pinned blocks
// - all blocks utilized internally by the pinner
//
// The routine then iterates over every block in the blockstore and
// deletes any block that is not found in the marked set.
40
func GC(ctx context.Context, bs bstore.GCBlockstore, dstor dstore.Datastore, pn pin.Pinner, bestEffortRoots []cid.Cid) <-chan Result {
Steven Allen's avatar
Steven Allen committed
41
	ctx, cancel := context.WithCancel(ctx)
42

43
	unlocker := bs.GCLock()
44

45 46
	bsrv := bserv.New(bs, offline.Exchange(bs))
	ds := dag.NewDAGService(bsrv)
Jeromy's avatar
Jeromy committed
47

48
	output := make(chan Result, 128)
49

Jeromy's avatar
Jeromy committed
50
	go func() {
Steven Allen's avatar
Steven Allen committed
51
		defer cancel()
Jeromy's avatar
Jeromy committed
52
		defer close(output)
53
		defer unlocker.Unlock()
54

55
		gcs, err := ColoredSet(ctx, pn, ds, bestEffortRoots, output)
56
		if err != nil {
Overbool's avatar
Overbool committed
57 58 59 60
			select {
			case output <- Result{Error: err}:
			case <-ctx.Done():
			}
61 62 63 64
			return
		}
		keychan, err := bs.AllKeysChan(ctx)
		if err != nil {
Overbool's avatar
Overbool committed
65 66 67 68
			select {
			case output <- Result{Error: err}:
			case <-ctx.Done():
			}
69 70 71 72
			return
		}

		errors := false
73
		var removed uint64
74 75

	loop:
Steven Allen's avatar
Steven Allen committed
76
		for ctx.Err() == nil { // select may not notice that we're "done".
Jeromy's avatar
Jeromy committed
77 78 79
			select {
			case k, ok := <-keychan:
				if !ok {
80
					break loop
Jeromy's avatar
Jeromy committed
81 82 83
				}
				if !gcs.Has(k) {
					err := bs.DeleteBlock(k)
84
					removed++
Jeromy's avatar
Jeromy committed
85
					if err != nil {
86
						errors = true
Steven Allen's avatar
Steven Allen committed
87 88 89 90 91
						select {
						case output <- Result{Error: &CannotDeleteBlockError{k, err}}:
						case <-ctx.Done():
							break loop
						}
92 93
						// continue as error is non-fatal
						continue loop
Jeromy's avatar
Jeromy committed
94 95
					}
					select {
96
					case output <- Result{KeyRemoved: k}:
Jeromy's avatar
Jeromy committed
97
					case <-ctx.Done():
98
						break loop
Jeromy's avatar
Jeromy committed
99 100 101
					}
				}
			case <-ctx.Done():
102
				break loop
Jeromy's avatar
Jeromy committed
103 104
			}
		}
105
		if errors {
Overbool's avatar
Overbool committed
106 107 108 109 110
			select {
			case output <- Result{Error: ErrCannotDeleteSomeBlocks}:
			case <-ctx.Done():
				return
			}
111
		}
112 113 114 115 116 117 118 119

		gds, ok := dstor.(dstore.GCDatastore)
		if !ok {
			return
		}

		err = gds.CollectGarbage()
		if err != nil {
Overbool's avatar
Overbool committed
120 121 122 123
			select {
			case output <- Result{Error: err}:
			case <-ctx.Done():
			}
124 125
			return
		}
Jeromy's avatar
Jeromy committed
126 127
	}()

128
	return output
Jeromy's avatar
Jeromy committed
129
}
Jeromy's avatar
Jeromy committed
130

Hector Sanjuan's avatar
Hector Sanjuan committed
131 132 133
// Descendants recursively finds all the descendants of the given roots and
// adds them to the given cid.Set, using the provided dag.GetLinks function
// to walk the tree.
134
func Descendants(ctx context.Context, getLinks dag.GetLinks, set *cid.Set, roots []cid.Cid) error {
tavit ohanian's avatar
tavit ohanian committed
135
	verifyGetLinks := func(ctx context.Context, c cid.Cid) ([]*ld.Link, error) {
136 137 138 139 140 141 142 143 144 145 146
		err := verifcid.ValidateCid(c)
		if err != nil {
			return nil, err
		}

		return getLinks(ctx, c)
	}

	verboseCidError := func(err error) error {
		if strings.Contains(err.Error(), verifcid.ErrBelowMinimumHashLength.Error()) ||
			strings.Contains(err.Error(), verifcid.ErrPossiblyInsecureHashFunction.Error()) {
tavit ohanian's avatar
tavit ohanian committed
147
			err = fmt.Errorf("\"%s\"\nPlease run 'dms3 pin verify'"+
148
				" to list insecure hashes. If you want to read them,"+
tavit ohanian's avatar
tavit ohanian committed
149
				" please downgrade your go-dms3 to 0.4.13\n", err)
150 151 152 153 154
			log.Error(err)
		}
		return err
	}

Jeromy's avatar
Jeromy committed
155
	for _, c := range roots {
156
		// Walk recursively walks the dag and adds the keys to the given set
157
		err := dag.Walk(ctx, verifyGetLinks, c, set.Visit, dag.Concurrent())
158

Jeromy's avatar
Jeromy committed
159
		if err != nil {
160
			err = verboseCidError(err)
Jeromy's avatar
Jeromy committed
161 162 163 164 165 166 167
			return err
		}
	}

	return nil
}

Kevin Atkinson's avatar
Kevin Atkinson committed
168 169
// ColoredSet computes the set of nodes in the graph that are pinned by the
// pins in the given pinner.
tavit ohanian's avatar
tavit ohanian committed
170
func ColoredSet(ctx context.Context, pn pin.Pinner, ng ld.NodeGetter, bestEffortRoots []cid.Cid, output chan<- Result) (*cid.Set, error) {
Jeromy's avatar
Jeromy committed
171 172
	// KeySet currently implemented in memory, in the future, may be bloom filter or
	// disk backed to conserve memory.
173
	errors := false
174
	gcs := cid.NewSet()
tavit ohanian's avatar
tavit ohanian committed
175 176
	getLinks := func(ctx context.Context, cid cid.Cid) ([]*ld.Link, error) {
		links, err := ld.GetLinks(ctx, ng, cid)
177
		if err != nil {
178
			errors = true
Overbool's avatar
Overbool committed
179 180 181
			select {
			case output <- Result{Error: &CannotFetchLinksError{cid, err}}:
			case <-ctx.Done():
Overbool's avatar
Overbool committed
182
				return nil, ctx.Err()
Overbool's avatar
Overbool committed
183
			}
184 185 186
		}
		return links, nil
	}
187 188 189 190 191
	rkeys, err := pn.RecursiveKeys(ctx)
	if err != nil {
		return nil, err
	}
	err = Descendants(ctx, getLinks, gcs, rkeys)
192
	if err != nil {
193
		errors = true
Overbool's avatar
Overbool committed
194 195 196
		select {
		case output <- Result{Error: err}:
		case <-ctx.Done():
Overbool's avatar
Overbool committed
197
			return nil, ctx.Err()
Overbool's avatar
Overbool committed
198
		}
199 200
	}

tavit ohanian's avatar
tavit ohanian committed
201 202 203
	bestEffortGetLinks := func(ctx context.Context, cid cid.Cid) ([]*ld.Link, error) {
		links, err := ld.GetLinks(ctx, ng, cid)
		if err != nil && err != ld.ErrNotFound {
204
			errors = true
Overbool's avatar
Overbool committed
205 206 207
			select {
			case output <- Result{Error: &CannotFetchLinksError{cid, err}}:
			case <-ctx.Done():
Overbool's avatar
Overbool committed
208
				return nil, ctx.Err()
Overbool's avatar
Overbool committed
209
			}
210
		}
211
		return links, nil
212 213
	}
	err = Descendants(ctx, bestEffortGetLinks, gcs, bestEffortRoots)
Jeromy's avatar
Jeromy committed
214
	if err != nil {
215
		errors = true
Overbool's avatar
Overbool committed
216 217 218
		select {
		case output <- Result{Error: err}:
		case <-ctx.Done():
Overbool's avatar
Overbool committed
219
			return nil, ctx.Err()
Overbool's avatar
Overbool committed
220
		}
Jeromy's avatar
Jeromy committed
221 222
	}

223 224 225 226 227
	dkeys, err := pn.DirectKeys(ctx)
	if err != nil {
		return nil, err
	}
	for _, k := range dkeys {
228
		gcs.Add(k)
Jeromy's avatar
Jeromy committed
229 230
	}

231 232 233 234 235
	ikeys, err := pn.InternalPins(ctx)
	if err != nil {
		return nil, err
	}
	err = Descendants(ctx, getLinks, gcs, ikeys)
Jeromy's avatar
Jeromy committed
236
	if err != nil {
237
		errors = true
Overbool's avatar
Overbool committed
238 239 240
		select {
		case output <- Result{Error: err}:
		case <-ctx.Done():
Overbool's avatar
Overbool committed
241
			return nil, ctx.Err()
Overbool's avatar
Overbool committed
242
		}
243 244
	}

245
	if errors {
Kevin Atkinson's avatar
Kevin Atkinson committed
246
		return nil, ErrCannotFetchAllLinks
Jeromy's avatar
Jeromy committed
247
	}
Kevin Atkinson's avatar
Kevin Atkinson committed
248 249

	return gcs, nil
250
}
Jeromy's avatar
Jeromy committed
251

Hector Sanjuan's avatar
Hector Sanjuan committed
252
// ErrCannotFetchAllLinks is returned as the last Result in the GC output
Dimitris Apostolou's avatar
Dimitris Apostolou committed
253
// channel when there was an error creating the marked set because of a
Hector Sanjuan's avatar
Hector Sanjuan committed
254
// problem when finding descendants.
Kevin Atkinson's avatar
Kevin Atkinson committed
255
var ErrCannotFetchAllLinks = errors.New("garbage collection aborted: could not retrieve some links")
256

Hector Sanjuan's avatar
Hector Sanjuan committed
257 258
// ErrCannotDeleteSomeBlocks is returned when removing blocks marked for
// deletion fails as the last Result in GC output channel.
Kevin Atkinson's avatar
Kevin Atkinson committed
259
var ErrCannotDeleteSomeBlocks = errors.New("garbage collection incomplete: could not delete some blocks")
260

Hector Sanjuan's avatar
Hector Sanjuan committed
261 262
// CannotFetchLinksError provides detailed information about which links
// could not be fetched and can appear as a Result in the GC output channel.
Kevin Atkinson's avatar
Kevin Atkinson committed
263
type CannotFetchLinksError struct {
264
	Key cid.Cid
265 266 267
	Err error
}

Hector Sanjuan's avatar
Hector Sanjuan committed
268 269
// Error implements the error interface for this type with a useful
// message.
Kevin Atkinson's avatar
Kevin Atkinson committed
270
func (e *CannotFetchLinksError) Error() string {
271
	return fmt.Sprintf("could not retrieve links for %s: %s", e.Key, e.Err)
272 273
}

Hector Sanjuan's avatar
Hector Sanjuan committed
274 275 276
// CannotDeleteBlockError provides detailed information about which
// blocks could not be deleted and can appear as a Result in the GC output
// channel.
Kevin Atkinson's avatar
Kevin Atkinson committed
277
type CannotDeleteBlockError struct {
278
	Key cid.Cid
279 280 281
	Err error
}

Hector Sanjuan's avatar
Hector Sanjuan committed
282 283
// Error implements the error interface for this type with a
// useful message.
Kevin Atkinson's avatar
Kevin Atkinson committed
284
func (e *CannotDeleteBlockError) Error() string {
285
	return fmt.Sprintf("could not remove %s: %s", e.Key, e.Err)
Jeromy's avatar
Jeromy committed
286
}