You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

343 lines
8.5 KiB

  1. // Copyright (c) 2018 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package searcher
  15. import (
  16. "bytes"
  17. "container/heap"
  18. "math"
  19. "reflect"
  20. "github.com/blevesearch/bleve/index"
  21. "github.com/blevesearch/bleve/search"
  22. "github.com/blevesearch/bleve/search/scorer"
  23. "github.com/blevesearch/bleve/size"
  24. )
  25. var reflectStaticSizeDisjunctionHeapSearcher int
  26. var reflectStaticSizeSearcherCurr int
  27. func init() {
  28. var dhs DisjunctionHeapSearcher
  29. reflectStaticSizeDisjunctionHeapSearcher = int(reflect.TypeOf(dhs).Size())
  30. var sc SearcherCurr
  31. reflectStaticSizeSearcherCurr = int(reflect.TypeOf(sc).Size())
  32. }
  33. type SearcherCurr struct {
  34. searcher search.Searcher
  35. curr *search.DocumentMatch
  36. }
  37. type DisjunctionHeapSearcher struct {
  38. indexReader index.IndexReader
  39. numSearchers int
  40. scorer *scorer.DisjunctionQueryScorer
  41. min int
  42. queryNorm float64
  43. initialized bool
  44. searchers []search.Searcher
  45. heap []*SearcherCurr
  46. matching []*search.DocumentMatch
  47. matchingCurrs []*SearcherCurr
  48. }
  49. func newDisjunctionHeapSearcher(indexReader index.IndexReader,
  50. searchers []search.Searcher, min float64, options search.SearcherOptions,
  51. limit bool) (
  52. *DisjunctionHeapSearcher, error) {
  53. if limit && tooManyClauses(len(searchers)) {
  54. return nil, tooManyClausesErr("", len(searchers))
  55. }
  56. // build our searcher
  57. rv := DisjunctionHeapSearcher{
  58. indexReader: indexReader,
  59. searchers: searchers,
  60. numSearchers: len(searchers),
  61. scorer: scorer.NewDisjunctionQueryScorer(options),
  62. min: int(min),
  63. matching: make([]*search.DocumentMatch, len(searchers)),
  64. matchingCurrs: make([]*SearcherCurr, len(searchers)),
  65. heap: make([]*SearcherCurr, 0, len(searchers)),
  66. }
  67. rv.computeQueryNorm()
  68. return &rv, nil
  69. }
  70. func (s *DisjunctionHeapSearcher) Size() int {
  71. sizeInBytes := reflectStaticSizeDisjunctionHeapSearcher + size.SizeOfPtr +
  72. s.scorer.Size()
  73. for _, entry := range s.searchers {
  74. sizeInBytes += entry.Size()
  75. }
  76. for _, entry := range s.matching {
  77. if entry != nil {
  78. sizeInBytes += entry.Size()
  79. }
  80. }
  81. // for matchingCurrs and heap, just use static size * len
  82. // since searchers and document matches already counted above
  83. sizeInBytes += len(s.matchingCurrs) * reflectStaticSizeSearcherCurr
  84. sizeInBytes += len(s.heap) * reflectStaticSizeSearcherCurr
  85. return sizeInBytes
  86. }
  87. func (s *DisjunctionHeapSearcher) computeQueryNorm() {
  88. // first calculate sum of squared weights
  89. sumOfSquaredWeights := 0.0
  90. for _, searcher := range s.searchers {
  91. sumOfSquaredWeights += searcher.Weight()
  92. }
  93. // now compute query norm from this
  94. s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
  95. // finally tell all the downstream searchers the norm
  96. for _, searcher := range s.searchers {
  97. searcher.SetQueryNorm(s.queryNorm)
  98. }
  99. }
  100. func (s *DisjunctionHeapSearcher) initSearchers(ctx *search.SearchContext) error {
  101. // alloc a single block of SearcherCurrs
  102. block := make([]SearcherCurr, len(s.searchers))
  103. // get all searchers pointing at their first match
  104. for i, searcher := range s.searchers {
  105. curr, err := searcher.Next(ctx)
  106. if err != nil {
  107. return err
  108. }
  109. if curr != nil {
  110. block[i].searcher = searcher
  111. block[i].curr = curr
  112. heap.Push(s, &block[i])
  113. }
  114. }
  115. err := s.updateMatches()
  116. if err != nil {
  117. return err
  118. }
  119. s.initialized = true
  120. return nil
  121. }
  122. func (s *DisjunctionHeapSearcher) updateMatches() error {
  123. matching := s.matching[:0]
  124. matchingCurrs := s.matchingCurrs[:0]
  125. if len(s.heap) > 0 {
  126. // top of the heap is our next hit
  127. next := heap.Pop(s).(*SearcherCurr)
  128. matching = append(matching, next.curr)
  129. matchingCurrs = append(matchingCurrs, next)
  130. // now as long as top of heap matches, keep popping
  131. for len(s.heap) > 0 && bytes.Compare(next.curr.IndexInternalID, s.heap[0].curr.IndexInternalID) == 0 {
  132. next = heap.Pop(s).(*SearcherCurr)
  133. matching = append(matching, next.curr)
  134. matchingCurrs = append(matchingCurrs, next)
  135. }
  136. }
  137. s.matching = matching
  138. s.matchingCurrs = matchingCurrs
  139. return nil
  140. }
  141. func (s *DisjunctionHeapSearcher) Weight() float64 {
  142. var rv float64
  143. for _, searcher := range s.searchers {
  144. rv += searcher.Weight()
  145. }
  146. return rv
  147. }
  148. func (s *DisjunctionHeapSearcher) SetQueryNorm(qnorm float64) {
  149. for _, searcher := range s.searchers {
  150. searcher.SetQueryNorm(qnorm)
  151. }
  152. }
  153. func (s *DisjunctionHeapSearcher) Next(ctx *search.SearchContext) (
  154. *search.DocumentMatch, error) {
  155. if !s.initialized {
  156. err := s.initSearchers(ctx)
  157. if err != nil {
  158. return nil, err
  159. }
  160. }
  161. var rv *search.DocumentMatch
  162. found := false
  163. for !found && len(s.matching) > 0 {
  164. if len(s.matching) >= s.min {
  165. found = true
  166. // score this match
  167. rv = s.scorer.Score(ctx, s.matching, len(s.matching), s.numSearchers)
  168. }
  169. // invoke next on all the matching searchers
  170. for _, matchingCurr := range s.matchingCurrs {
  171. if matchingCurr.curr != rv {
  172. ctx.DocumentMatchPool.Put(matchingCurr.curr)
  173. }
  174. curr, err := matchingCurr.searcher.Next(ctx)
  175. if err != nil {
  176. return nil, err
  177. }
  178. if curr != nil {
  179. matchingCurr.curr = curr
  180. heap.Push(s, matchingCurr)
  181. }
  182. }
  183. err := s.updateMatches()
  184. if err != nil {
  185. return nil, err
  186. }
  187. }
  188. return rv, nil
  189. }
  190. func (s *DisjunctionHeapSearcher) Advance(ctx *search.SearchContext,
  191. ID index.IndexInternalID) (*search.DocumentMatch, error) {
  192. if !s.initialized {
  193. err := s.initSearchers(ctx)
  194. if err != nil {
  195. return nil, err
  196. }
  197. }
  198. // if there is anything in matching, toss it back onto the heap
  199. for _, matchingCurr := range s.matchingCurrs {
  200. heap.Push(s, matchingCurr)
  201. }
  202. s.matching = s.matching[:0]
  203. s.matchingCurrs = s.matchingCurrs[:0]
  204. // find all searchers that actually need to be advanced
  205. // advance them, using s.matchingCurrs as temp storage
  206. for len(s.heap) > 0 && bytes.Compare(s.heap[0].curr.IndexInternalID, ID) < 0 {
  207. searcherCurr := heap.Pop(s).(*SearcherCurr)
  208. ctx.DocumentMatchPool.Put(searcherCurr.curr)
  209. curr, err := searcherCurr.searcher.Advance(ctx, ID)
  210. if err != nil {
  211. return nil, err
  212. }
  213. if curr != nil {
  214. searcherCurr.curr = curr
  215. s.matchingCurrs = append(s.matchingCurrs, searcherCurr)
  216. }
  217. }
  218. // now all of the searchers that we advanced have to be pushed back
  219. for _, matchingCurr := range s.matchingCurrs {
  220. heap.Push(s, matchingCurr)
  221. }
  222. // reset our temp space
  223. s.matchingCurrs = s.matchingCurrs[:0]
  224. err := s.updateMatches()
  225. if err != nil {
  226. return nil, err
  227. }
  228. return s.Next(ctx)
  229. }
  230. func (s *DisjunctionHeapSearcher) Count() uint64 {
  231. // for now return a worst case
  232. var sum uint64
  233. for _, searcher := range s.searchers {
  234. sum += searcher.Count()
  235. }
  236. return sum
  237. }
  238. func (s *DisjunctionHeapSearcher) Close() (rv error) {
  239. for _, searcher := range s.searchers {
  240. err := searcher.Close()
  241. if err != nil && rv == nil {
  242. rv = err
  243. }
  244. }
  245. return rv
  246. }
  247. func (s *DisjunctionHeapSearcher) Min() int {
  248. return s.min
  249. }
  250. func (s *DisjunctionHeapSearcher) DocumentMatchPoolSize() int {
  251. rv := len(s.searchers)
  252. for _, s := range s.searchers {
  253. rv += s.DocumentMatchPoolSize()
  254. }
  255. return rv
  256. }
  257. // a disjunction searcher implements the index.Optimizable interface
  258. // but only activates on an edge case where the disjunction is a
  259. // wrapper around a single Optimizable child searcher
  260. func (s *DisjunctionHeapSearcher) Optimize(kind string, octx index.OptimizableContext) (
  261. index.OptimizableContext, error) {
  262. if len(s.searchers) == 1 {
  263. o, ok := s.searchers[0].(index.Optimizable)
  264. if ok {
  265. return o.Optimize(kind, octx)
  266. }
  267. }
  268. return nil, nil
  269. }
  270. // heap impl
  271. func (s *DisjunctionHeapSearcher) Len() int { return len(s.heap) }
  272. func (s *DisjunctionHeapSearcher) Less(i, j int) bool {
  273. if s.heap[i].curr == nil {
  274. return true
  275. } else if s.heap[j].curr == nil {
  276. return false
  277. }
  278. return bytes.Compare(s.heap[i].curr.IndexInternalID, s.heap[j].curr.IndexInternalID) < 0
  279. }
  280. func (s *DisjunctionHeapSearcher) Swap(i, j int) {
  281. s.heap[i], s.heap[j] = s.heap[j], s.heap[i]
  282. }
  283. func (s *DisjunctionHeapSearcher) Push(x interface{}) {
  284. s.heap = append(s.heap, x.(*SearcherCurr))
  285. }
  286. func (s *DisjunctionHeapSearcher) Pop() interface{} {
  287. old := s.heap
  288. n := len(old)
  289. x := old[n-1]
  290. s.heap = old[0 : n-1]
  291. return x
  292. }