Support elastic search for code search (#10273)

* Support elastic search for code search

* Finished elastic search implementation and add some tests

* Enable test on drone and added docs

* Add new fields to elastic search

* Fix bug

* remove unused changes

* Use indexer alias to keep the gitea indexer version

* Improve codes

* Some code improvements

* The real indexer name changed to xxx.v1

Co-authored-by: zeripath <art27@cantab.net>
This commit is contained in:
Lunny Xiao 2020-08-31 00:08:01 +08:00 committed by GitHub
parent d257485bc0
commit 9bc69ff26e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 694 additions and 164 deletions

View file

@ -7,8 +7,11 @@ package code
import (
"context"
"os"
"strconv"
"strings"
"time"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
@ -37,12 +40,33 @@ type SearchResultLanguages struct {
// Indexer defines an interface to indexer issues contents
type Indexer interface {
Index(repoID int64) error
Index(repo *models.Repository, sha string, changes *repoChanges) error
Delete(repoID int64) error
Search(repoIDs []int64, language, keyword string, page, pageSize int) (int64, []*SearchResult, []*SearchResultLanguages, error)
Close()
}
func filenameIndexerID(repoID int64, filename string) string {
return indexerID(repoID) + "_" + filename
}
func parseIndexerID(indexerID string) (int64, string) {
index := strings.IndexByte(indexerID, '_')
if index == -1 {
log.Error("Unexpected ID in repo indexer: %s", indexerID)
}
repoID, _ := strconv.ParseInt(indexerID[:index], 10, 64)
return repoID, indexerID[index+1:]
}
func filenameOfIndexerID(indexerID string) string {
index := strings.IndexByte(indexerID, '_')
if index == -1 {
log.Error("Unexpected ID in repo indexer: %s", indexerID)
}
return indexerID[index+1:]
}
// Init initialize the repo indexer
func Init() {
if !setting.Indexer.RepoIndexerEnabled {
@ -63,33 +87,61 @@ func Init() {
waitChannel := make(chan time.Duration)
go func() {
start := time.Now()
log.Info("PID: %d Initializing Repository Indexer at: %s", os.Getpid(), setting.Indexer.RepoPath)
defer func() {
if err := recover(); err != nil {
log.Error("PANIC whilst initializing repository indexer: %v\nStacktrace: %s", err, log.Stack(2))
log.Error("The indexer files are likely corrupted and may need to be deleted")
log.Error("You can completely remove the %q directory to make Gitea recreate the indexes", setting.Indexer.RepoPath)
var (
rIndexer Indexer
populate bool
err error
)
switch setting.Indexer.RepoType {
case "bleve":
log.Info("PID: %d Initializing Repository Indexer at: %s", os.Getpid(), setting.Indexer.RepoPath)
defer func() {
if err := recover(); err != nil {
log.Error("PANIC whilst initializing repository indexer: %v\nStacktrace: %s", err, log.Stack(2))
log.Error("The indexer files are likely corrupted and may need to be deleted")
log.Error("You can completely remove the \"%s\" directory to make Gitea recreate the indexes", setting.Indexer.RepoPath)
}
}()
rIndexer, populate, err = NewBleveIndexer(setting.Indexer.RepoPath)
if err != nil {
if rIndexer != nil {
rIndexer.Close()
}
cancel()
indexer.Close()
close(waitChannel)
log.Fatal("PID: %d Unable to initialize the Repository Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.RepoPath, err)
log.Fatal("PID: %d Unable to initialize the bleve Repository Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.RepoPath, err)
}
}()
bleveIndexer, created, err := NewBleveIndexer(setting.Indexer.RepoPath)
if err != nil {
if bleveIndexer != nil {
bleveIndexer.Close()
case "elasticsearch":
log.Info("PID: %d Initializing Repository Indexer at: %s", os.Getpid(), setting.Indexer.RepoConnStr)
defer func() {
if err := recover(); err != nil {
log.Error("PANIC whilst initializing repository indexer: %v\nStacktrace: %s", err, log.Stack(2))
log.Error("The indexer files are likely corrupted and may need to be deleted")
log.Error("You can completely remove the \"%s\" index to make Gitea recreate the indexes", setting.Indexer.RepoConnStr)
}
}()
rIndexer, populate, err = NewElasticSearchIndexer(setting.Indexer.RepoConnStr, setting.Indexer.RepoIndexerName)
if err != nil {
if rIndexer != nil {
rIndexer.Close()
}
cancel()
indexer.Close()
close(waitChannel)
log.Fatal("PID: %d Unable to initialize the elasticsearch Repository Indexer connstr: %s Error: %v", os.Getpid(), setting.Indexer.RepoConnStr, err)
}
cancel()
indexer.Close()
close(waitChannel)
log.Fatal("PID: %d Unable to initialize the Repository Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.RepoPath, err)
default:
log.Fatal("PID: %d Unknown Indexer type: %s", os.Getpid(), setting.Indexer.RepoType)
}
indexer.set(bleveIndexer)
indexer.set(rIndexer)
go processRepoIndexerOperationQueue(indexer)
if created {
if populate {
go populateRepoIndexer()
}
select {