repo.go 4.19 KB
Newer Older
tavit ohanian's avatar
tavit ohanian committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
package coreindex

import (
	"errors"
	"fmt"
	"io/ioutil"
	"path/filepath"
	"sort"
	"strconv"
	"time"

	util "gitlab.dms3.io/dms3/go-dms3-util"
	idxconfig "gitlab.dms3.io/is/go-idx-config"
)

//
// local filesystem repository file folder hierarchy.
//
// index <repo root>, cfg parameter Indexer.Path, must be a relative path
// 	  - <index>
// reposet root
// 	  - <index>/reposet
// reposet kind root
//	  - <index>/reposet/<kind>
// 	  - <index>/reposet/<kind>/params, not a cfg parameter
//      common params file for all repos of a kind
// 	  - <index>/reposet/<kind>/stopword, optional, not in cfg, nor cli option
// reposet root folder
// 	  - <index>/reposet/<kind>/<name>
//      store name is prefixed with "infostore-" or "metastore-"
// repo root folder and sub-folders, not a cfg parameter
// 	  - <index>/reposet/<kind>/<name>/<number>
//	  - <index>/reposet/<kind>/<name>/<number>/corpus, cfg parameter Indexer.Corpus.Path
//	  - <index>/reposet/<kind>/<name>/<number>/metadata, cfg parameter Indexer.Corpus.Metadata
//    - repo name, composed as:
//	    - window: uint64,    // creation time (Unix, seconds), sharding tag
//	    - area: uint8,       // area number, sharding tag
//	    - cat: uint8,        // category number, sharding tag
//	    - offset: int64,     // time since creation (seconds), recovery tag
//

func ReposetExists(kind, name string) (found bool, path string, err error) {
	if kind == "" {
		err = errors.New("reposet kind must not be null.")
		return
	}
	if name == "" {
		err = errors.New("reposet name must not be null.")
		return
	}
	if path, err = ReposetLocalPath(kind, name); err != nil {
		return
	}
	found = PathExists(path)
	return
}

func PathExists(p string) bool {
	if !util.FileExists(p) {
		return false
	}
	return true
}

func ReposetLocalPath(kind, name string) (path string, err error) {
	var rootpath string
	if rootpath, err = idxconfig.PathRoot(); err != nil {
		return
	}
	path = filepath.Join(rootpath, "reposet", kind, name)
	return
}

func ParamsLocalFile(kind string) (path string, err error) {
	var rootpath string
	if rootpath, err = idxconfig.PathRoot(); err != nil {
		return
	}
	path = filepath.Join(rootpath, "reposet", kind, "params")
	return
}

func StopwordsLocalFile(kind string) (path string, err error) {
	var rootpath string
	if rootpath, err = idxconfig.PathRoot(); err != nil {
		return
	}
	path = filepath.Join(rootpath, "stopwords")
	if !PathExists(path) {
		err = fmt.Errorf("stopwords file is missing, must be generated at %v\n", path)
		return
	}
	return
}

// get count of repos in a reposet.
func GetRepoCount(reposetPath string) (repoCount int) {

	// get list of repo entries in reposet
	files, err := ioutil.ReadDir(reposetPath)
	if err != nil {
		return
	}

	// repos form an ordered sequential set from 1 to a limit of 255.
	// we ignore holes in the ordered set (when repos are manually deleted
	// on disk).
	rs := new([255]int)

	// filter non-numeric sub-folders, and out of range repo numbers
	for _, file := range files {
		if file.IsDir() {
			i, err := strconv.Atoi(file.Name())
			if err == nil && i > 0 && i < 256 {
				rs[i-1] = i
			}
		}
	}

	// sort repo numbers, don't rely on os ordering
	s := rs[0:255]
	sort.Ints(s)

	// return largest repo number up to valid limit
	repoCount = s[len(s)-1]

	return
}

// path to next repo number in reposet
func NextRepoPath(reposetPath string, repoCount int) (repoPath string) {

	if repoCount >= 0 && repoCount < 255 {
		repoPath = filepath.Join(reposetPath, strconv.Itoa(repoCount+1))
	} else {
		repoPath = ""
	}
	return
}

// not used, but keep for reference
// index folders under repoRootPath are managed by infospace
func repoName(repoRootPath string) (reponame string, createtime time.Time) {

	t := time.Now() // repo create time
	o := t.Sub(t)   // seconds since repo create (zero at creation time)
	a := 0          // current area (zero ==> none)
	c := 0          // current category (zero ==> none)
	createtime = t

	window := fmt.Sprintf("w%d", t.Unix()) // seconds since Unix epoch
	area := fmt.Sprintf("-a%d", a+1)       // start at 1. 0 ==> N/A
	category := fmt.Sprintf("-c%d", c+1)   // start at 1. 0 ==> N/A
	offset := fmt.Sprintf("-o%d", o)       // 0 offset is ok

	reponame = window + area + category + offset

	return
}