You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

199 lines
5.5 KiB

  1. // Copyright 2017 The Gitea Authors. All rights reserved.
  2. // Use of this source code is governed by a MIT-style
  3. // license that can be found in the LICENSE file.
  4. package indexer
  5. import (
  6. "os"
  7. "strings"
  8. "code.gitea.io/gitea/modules/log"
  9. "code.gitea.io/gitea/modules/setting"
  10. "github.com/blevesearch/bleve"
  11. "github.com/blevesearch/bleve/analysis/analyzer/custom"
  12. "github.com/blevesearch/bleve/analysis/token/camelcase"
  13. "github.com/blevesearch/bleve/analysis/token/lowercase"
  14. "github.com/blevesearch/bleve/analysis/tokenizer/unicode"
  15. )
  16. const repoIndexerAnalyzer = "repoIndexerAnalyzer"
  17. // repoIndexer (thread-safe) index for repository contents
  18. var repoIndexer bleve.Index
  19. // RepoIndexerOp type of operation to perform on repo indexer
  20. type RepoIndexerOp int
  21. const (
  22. // RepoIndexerOpUpdate add/update a file's contents
  23. RepoIndexerOpUpdate = iota
  24. // RepoIndexerOpDelete delete a file
  25. RepoIndexerOpDelete
  26. )
  27. // RepoIndexerData data stored in the repo indexer
  28. type RepoIndexerData struct {
  29. RepoID int64
  30. Content string
  31. }
  32. // RepoIndexerUpdate an update to the repo indexer
  33. type RepoIndexerUpdate struct {
  34. Filepath string
  35. Op RepoIndexerOp
  36. Data *RepoIndexerData
  37. }
  38. func (update RepoIndexerUpdate) addToBatch(batch *bleve.Batch) error {
  39. id := filenameIndexerID(update.Data.RepoID, update.Filepath)
  40. switch update.Op {
  41. case RepoIndexerOpUpdate:
  42. return batch.Index(id, update.Data)
  43. case RepoIndexerOpDelete:
  44. batch.Delete(id)
  45. default:
  46. log.Error(4, "Unrecognized repo indexer op: %d", update.Op)
  47. }
  48. return nil
  49. }
  50. // InitRepoIndexer initialize repo indexer
  51. func InitRepoIndexer(populateIndexer func() error) {
  52. _, err := os.Stat(setting.Indexer.RepoPath)
  53. if err != nil {
  54. if os.IsNotExist(err) {
  55. if err = createRepoIndexer(); err != nil {
  56. log.Fatal(4, "CreateRepoIndexer: %v", err)
  57. }
  58. if err = populateIndexer(); err != nil {
  59. log.Fatal(4, "PopulateRepoIndex: %v", err)
  60. }
  61. } else {
  62. log.Fatal(4, "InitRepoIndexer: %v", err)
  63. }
  64. } else {
  65. repoIndexer, err = bleve.Open(setting.Indexer.RepoPath)
  66. if err != nil {
  67. log.Fatal(4, "InitRepoIndexer, open index: %v", err)
  68. }
  69. }
  70. }
  71. // createRepoIndexer create a repo indexer if one does not already exist
  72. func createRepoIndexer() error {
  73. docMapping := bleve.NewDocumentMapping()
  74. docMapping.AddFieldMappingsAt("RepoID", bleve.NewNumericFieldMapping())
  75. textFieldMapping := bleve.NewTextFieldMapping()
  76. docMapping.AddFieldMappingsAt("Content", textFieldMapping)
  77. mapping := bleve.NewIndexMapping()
  78. if err := addUnicodeNormalizeTokenFilter(mapping); err != nil {
  79. return err
  80. } else if err := mapping.AddCustomAnalyzer(repoIndexerAnalyzer, map[string]interface{}{
  81. "type": custom.Name,
  82. "char_filters": []string{},
  83. "tokenizer": unicode.Name,
  84. "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
  85. }); err != nil {
  86. return err
  87. }
  88. mapping.DefaultAnalyzer = repoIndexerAnalyzer
  89. mapping.AddDocumentMapping("repo", docMapping)
  90. var err error
  91. repoIndexer, err = bleve.New(setting.Indexer.RepoPath, mapping)
  92. return err
  93. }
  94. func filenameIndexerID(repoID int64, filename string) string {
  95. return indexerID(repoID) + "_" + filename
  96. }
  97. func filenameOfIndexerID(indexerID string) string {
  98. index := strings.IndexByte(indexerID, '_')
  99. if index == -1 {
  100. log.Error(4, "Unexpected ID in repo indexer: %s", indexerID)
  101. }
  102. return indexerID[index+1:]
  103. }
  104. // RepoIndexerBatch batch to add updates to
  105. func RepoIndexerBatch() *Batch {
  106. return &Batch{
  107. batch: repoIndexer.NewBatch(),
  108. index: repoIndexer,
  109. }
  110. }
  111. // DeleteRepoFromIndexer delete all of a repo's files from indexer
  112. func DeleteRepoFromIndexer(repoID int64) error {
  113. query := numericEqualityQuery(repoID, "RepoID")
  114. searchRequest := bleve.NewSearchRequestOptions(query, 2147483647, 0, false)
  115. result, err := repoIndexer.Search(searchRequest)
  116. if err != nil {
  117. return err
  118. }
  119. batch := RepoIndexerBatch()
  120. for _, hit := range result.Hits {
  121. batch.batch.Delete(hit.ID)
  122. if err = batch.flushIfFull(); err != nil {
  123. return err
  124. }
  125. }
  126. return batch.Flush()
  127. }
  128. // RepoSearchResult result of performing a search in a repo
  129. type RepoSearchResult struct {
  130. StartIndex int
  131. EndIndex int
  132. Filename string
  133. Content string
  134. }
  135. // SearchRepoByKeyword searches for files in the specified repo.
  136. // Returns the matching file-paths
  137. func SearchRepoByKeyword(repoID int64, keyword string, page, pageSize int) (int64, []*RepoSearchResult, error) {
  138. phraseQuery := bleve.NewMatchPhraseQuery(keyword)
  139. phraseQuery.FieldVal = "Content"
  140. phraseQuery.Analyzer = repoIndexerAnalyzer
  141. indexerQuery := bleve.NewConjunctionQuery(
  142. numericEqualityQuery(repoID, "RepoID"),
  143. phraseQuery,
  144. )
  145. from := (page - 1) * pageSize
  146. searchRequest := bleve.NewSearchRequestOptions(indexerQuery, pageSize, from, false)
  147. searchRequest.Fields = []string{"Content"}
  148. searchRequest.IncludeLocations = true
  149. result, err := repoIndexer.Search(searchRequest)
  150. if err != nil {
  151. return 0, nil, err
  152. }
  153. searchResults := make([]*RepoSearchResult, len(result.Hits))
  154. for i, hit := range result.Hits {
  155. var startIndex, endIndex int = -1, -1
  156. for _, locations := range hit.Locations["Content"] {
  157. location := locations[0]
  158. locationStart := int(location.Start)
  159. locationEnd := int(location.End)
  160. if startIndex < 0 || locationStart < startIndex {
  161. startIndex = locationStart
  162. }
  163. if endIndex < 0 || locationEnd > endIndex {
  164. endIndex = locationEnd
  165. }
  166. }
  167. searchResults[i] = &RepoSearchResult{
  168. StartIndex: startIndex,
  169. EndIndex: endIndex,
  170. Filename: filenameOfIndexerID(hit.ID),
  171. Content: hit.Fields["Content"].(string),
  172. }
  173. }
  174. return int64(result.Total), searchResults, nil
  175. }