package coreindex import ( "errors" "fmt" "io/ioutil" "path/filepath" "sort" "strconv" "time" util "gitlab.dms3.io/dms3/go-dms3-util" idxconfig "gitlab.dms3.io/is/go-idx-config" ) // // local filesystem repository file folder hierarchy. // // index , cfg parameter Indexer.Path, must be a relative path // - // reposet root // - /reposet // reposet kind root // - /reposet/ // - /reposet//params, not a cfg parameter // common params file for all repos of a kind // - /reposet//stopword, optional, not in cfg, nor cli option // reposet root folder // - /reposet// // store name is prefixed with "infostore-" or "metastore-" // repo root folder and sub-folders, not a cfg parameter // - /reposet/// // - /reposet////corpus, cfg parameter Indexer.Corpus.Path // - /reposet////metadata, cfg parameter Indexer.Corpus.Metadata // - repo name, composed as: // - window: uint64, // creation time (Unix, seconds), sharding tag // - area: uint8, // area number, sharding tag // - cat: uint8, // category number, sharding tag // - offset: int64, // time since creation (seconds), recovery tag // func ReposetExists(kind, name string) (found bool, path string, err error) { if kind == "" { err = errors.New("reposet kind must not be null.") return } if name == "" { err = errors.New("reposet name must not be null.") return } if path, err = ReposetLocalPath(kind, name); err != nil { return } found = PathExists(path) return } func PathExists(p string) bool { if !util.FileExists(p) { return false } return true } func ReposetLocalPath(kind, name string) (path string, err error) { var rootpath string if rootpath, err = idxconfig.PathRoot(); err != nil { return } path = filepath.Join(rootpath, "reposet", kind, name) return } func ParamsLocalFile(kind string) (path string, err error) { var rootpath string if rootpath, err = idxconfig.PathRoot(); err != nil { return } path = filepath.Join(rootpath, "reposet", kind, "params") return } func StopwordsLocalFile(kind string) (path string, err error) { var rootpath string if rootpath, err = idxconfig.PathRoot(); err != nil { return } path = filepath.Join(rootpath, "stopwords") if !PathExists(path) { err = fmt.Errorf("stopwords file is missing, must be generated at %v\n", path) return } return } // get count of repos in a reposet. func GetRepoCount(reposetPath string) (repoCount int) { // get list of repo entries in reposet files, err := ioutil.ReadDir(reposetPath) if err != nil { return } // repos form an ordered sequential set from 1 to a limit of 255. // we ignore holes in the ordered set (when repos are manually deleted // on disk). rs := new([255]int) // filter non-numeric sub-folders, and out of range repo numbers for _, file := range files { if file.IsDir() { i, err := strconv.Atoi(file.Name()) if err == nil && i > 0 && i < 256 { rs[i-1] = i } } } // sort repo numbers, don't rely on os ordering s := rs[0:255] sort.Ints(s) // return largest repo number up to valid limit repoCount = s[len(s)-1] return } // path to next repo number in reposet func NextRepoPath(reposetPath string, repoCount int) (repoPath string) { if repoCount >= 0 && repoCount < 255 { repoPath = filepath.Join(reposetPath, strconv.Itoa(repoCount+1)) } else { repoPath = "" } return } // not used, but keep for reference // index folders under repoRootPath are managed by infospace func repoName(repoRootPath string) (reponame string, createtime time.Time) { t := time.Now() // repo create time o := t.Sub(t) // seconds since repo create (zero at creation time) a := 0 // current area (zero ==> none) c := 0 // current category (zero ==> none) createtime = t window := fmt.Sprintf("w%d", t.Unix()) // seconds since Unix epoch area := fmt.Sprintf("-a%d", a+1) // start at 1. 0 ==> N/A category := fmt.Sprintf("-c%d", c+1) // start at 1. 0 ==> N/A offset := fmt.Sprintf("-o%d", o) // 0 offset is ok reponame = window + area + category + offset return }