Commit 9b83d068 authored by Steven Allen's avatar Steven Allen

feat: move retries lower

Instead or retrying the whole operation, retry the specific parts that can fail.

I believe this also fixes cases where we might end up with a retry loop within a
retry loop? I'm not sure.
parent bcc4dcb5
......@@ -52,6 +52,10 @@ var (
// RetryDelay is a timeout for a backoff on retrying operations
// that fail due to transient errors like too many file descriptors open.
RetryDelay = time.Millisecond * 200
// RetryAttempts is the maximum number of retries that will be attempted
// before giving up.
RetryAttempts = 6
)
const (
......@@ -369,8 +373,6 @@ func (fs *Datastore) renameAndUpdateDiskUsage(tmpPath, path string) error {
return err
}
var putMaxRetries = 6
// Put stores a key/value in the datastore.
//
// Note, that we do not guarantee order of write operations (Put or Delete)
......@@ -392,24 +394,11 @@ func (fs *Datastore) Put(key datastore.Key, value []byte) error {
return ErrClosed
}
var err error
for i := 1; i <= putMaxRetries; i++ {
_, err = fs.doWriteOp(&op{
typ: opPut,
key: key,
v: value,
})
if err == nil {
break
}
if !strings.Contains(err.Error(), "too many open files") {
break
}
log.Errorf("too many open files, retrying in %dms", 100*i)
time.Sleep(time.Millisecond * 100 * time.Duration(i))
}
_, err := fs.doWriteOp(&op{
typ: opPut,
key: key,
v: value,
})
return err
}
......@@ -462,15 +451,7 @@ func (fs *Datastore) doWriteOp(oper *op) (done bool, err error) {
return false, nil
}
// Do the operation
for i := 0; i < 6; i++ {
err = fs.doOp(oper)
if err == nil || !isTooManyFDError(err) {
break
}
time.Sleep(time.Duration(i+1) * RetryDelay)
}
err = fs.doOp(oper)
// Finish it. If no error, it will signal other operations
// waiting on this result to succeed. Otherwise, they will
......@@ -585,22 +566,17 @@ func (fs *Datastore) putMany(data map[datastore.Key][]byte) error {
}
dirsToSync[dir] = struct{}{}
tmp, err := fs.tempFile()
tmp, err := fs.tempFileOnce()
// Fallback retry for temporary error.
if err != nil && isTooManyFDError(err) {
// If we have too many files open, try closing some, then try
// again repeatedly.
if isTooManyFDError(err) {
if err = closer(); err != nil {
return err
}
for i := 0; i < 6; i++ {
time.Sleep(time.Duration(i+1) * RetryDelay)
tmp, err = fs.tempFile()
if err == nil || !isTooManyFDError(err) {
break
}
}
tmp, err = fs.tempFile()
}
if err != nil {
return err
}
......@@ -1122,6 +1098,11 @@ func (fs *Datastore) tempFile() (*os.File, error) {
return file, err
}
func (fs *Datastore) tempFileOnce() (*os.File, error) {
file, err := tempFileOnce(fs.tempPath, "temp-")
return file, err
}
// only call this on directories.
func (fs *Datastore) walk(path string, qrb *query.ResultBuilder) error {
dir, err := os.Open(path)
......
......@@ -23,7 +23,7 @@ func DirIsEmpty(name string) (bool, error) {
func readFile(filename string) (data []byte, err error) {
// Fallback retry for temporary error.
for i := 0; i < 6; i++ {
for i := 0; i < RetryAttempts; i++ {
data, err = readFileOnce(filename)
if err == nil || !isTooManyFDError(err) {
break
......@@ -32,3 +32,14 @@ func readFile(filename string) (data []byte, err error) {
}
return data, err
}
func tempFile(dir, pattern string) (fi *os.File, err error) {
for i := 0; i < RetryAttempts; i++ {
fi, err = tempFileOnce(dir, pattern)
if err == nil || !isTooManyFDError(err) {
break
}
time.Sleep(time.Duration(i+1) * RetryDelay)
}
return fi, err
}
......@@ -7,7 +7,7 @@ import (
"os"
)
func tempFile(dir, pattern string) (*os.File, error) {
func tempFileOnce(dir, pattern string) (*os.File, error) {
return ioutil.TempFile(dir, pattern)
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment