|
|
- // Copyright (c) 2018 Couchbase, Inc.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
-
- package searcher
-
- import (
- "bytes"
- "container/heap"
- "math"
- "reflect"
-
- "github.com/blevesearch/bleve/index"
- "github.com/blevesearch/bleve/search"
- "github.com/blevesearch/bleve/search/scorer"
- "github.com/blevesearch/bleve/size"
- )
-
- var reflectStaticSizeDisjunctionHeapSearcher int
- var reflectStaticSizeSearcherCurr int
-
- func init() {
- var dhs DisjunctionHeapSearcher
- reflectStaticSizeDisjunctionHeapSearcher = int(reflect.TypeOf(dhs).Size())
-
- var sc SearcherCurr
- reflectStaticSizeSearcherCurr = int(reflect.TypeOf(sc).Size())
- }
-
- type SearcherCurr struct {
- searcher search.Searcher
- curr *search.DocumentMatch
- }
-
- type DisjunctionHeapSearcher struct {
- indexReader index.IndexReader
-
- numSearchers int
- scorer *scorer.DisjunctionQueryScorer
- min int
- queryNorm float64
- initialized bool
- searchers []search.Searcher
- heap []*SearcherCurr
-
- matching []*search.DocumentMatch
- matchingCurrs []*SearcherCurr
- }
-
- func newDisjunctionHeapSearcher(indexReader index.IndexReader,
- searchers []search.Searcher, min float64, options search.SearcherOptions,
- limit bool) (
- *DisjunctionHeapSearcher, error) {
- if limit && tooManyClauses(len(searchers)) {
- return nil, tooManyClausesErr("", len(searchers))
- }
-
- // build our searcher
- rv := DisjunctionHeapSearcher{
- indexReader: indexReader,
- searchers: searchers,
- numSearchers: len(searchers),
- scorer: scorer.NewDisjunctionQueryScorer(options),
- min: int(min),
- matching: make([]*search.DocumentMatch, len(searchers)),
- matchingCurrs: make([]*SearcherCurr, len(searchers)),
- heap: make([]*SearcherCurr, 0, len(searchers)),
- }
- rv.computeQueryNorm()
- return &rv, nil
- }
-
- func (s *DisjunctionHeapSearcher) Size() int {
- sizeInBytes := reflectStaticSizeDisjunctionHeapSearcher + size.SizeOfPtr +
- s.scorer.Size()
-
- for _, entry := range s.searchers {
- sizeInBytes += entry.Size()
- }
-
- for _, entry := range s.matching {
- if entry != nil {
- sizeInBytes += entry.Size()
- }
- }
-
- // for matchingCurrs and heap, just use static size * len
- // since searchers and document matches already counted above
- sizeInBytes += len(s.matchingCurrs) * reflectStaticSizeSearcherCurr
- sizeInBytes += len(s.heap) * reflectStaticSizeSearcherCurr
-
- return sizeInBytes
- }
-
- func (s *DisjunctionHeapSearcher) computeQueryNorm() {
- // first calculate sum of squared weights
- sumOfSquaredWeights := 0.0
- for _, searcher := range s.searchers {
- sumOfSquaredWeights += searcher.Weight()
- }
- // now compute query norm from this
- s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
- // finally tell all the downstream searchers the norm
- for _, searcher := range s.searchers {
- searcher.SetQueryNorm(s.queryNorm)
- }
- }
-
- func (s *DisjunctionHeapSearcher) initSearchers(ctx *search.SearchContext) error {
- // alloc a single block of SearcherCurrs
- block := make([]SearcherCurr, len(s.searchers))
-
- // get all searchers pointing at their first match
- for i, searcher := range s.searchers {
- curr, err := searcher.Next(ctx)
- if err != nil {
- return err
- }
- if curr != nil {
- block[i].searcher = searcher
- block[i].curr = curr
- heap.Push(s, &block[i])
- }
- }
-
- err := s.updateMatches()
- if err != nil {
- return err
- }
- s.initialized = true
- return nil
- }
-
- func (s *DisjunctionHeapSearcher) updateMatches() error {
- matching := s.matching[:0]
- matchingCurrs := s.matchingCurrs[:0]
-
- if len(s.heap) > 0 {
-
- // top of the heap is our next hit
- next := heap.Pop(s).(*SearcherCurr)
- matching = append(matching, next.curr)
- matchingCurrs = append(matchingCurrs, next)
-
- // now as long as top of heap matches, keep popping
- for len(s.heap) > 0 && bytes.Compare(next.curr.IndexInternalID, s.heap[0].curr.IndexInternalID) == 0 {
- next = heap.Pop(s).(*SearcherCurr)
- matching = append(matching, next.curr)
- matchingCurrs = append(matchingCurrs, next)
- }
- }
-
- s.matching = matching
- s.matchingCurrs = matchingCurrs
-
- return nil
- }
-
- func (s *DisjunctionHeapSearcher) Weight() float64 {
- var rv float64
- for _, searcher := range s.searchers {
- rv += searcher.Weight()
- }
- return rv
- }
-
- func (s *DisjunctionHeapSearcher) SetQueryNorm(qnorm float64) {
- for _, searcher := range s.searchers {
- searcher.SetQueryNorm(qnorm)
- }
- }
-
- func (s *DisjunctionHeapSearcher) Next(ctx *search.SearchContext) (
- *search.DocumentMatch, error) {
- if !s.initialized {
- err := s.initSearchers(ctx)
- if err != nil {
- return nil, err
- }
- }
-
- var rv *search.DocumentMatch
- found := false
- for !found && len(s.matching) > 0 {
- if len(s.matching) >= s.min {
- found = true
- // score this match
- rv = s.scorer.Score(ctx, s.matching, len(s.matching), s.numSearchers)
- }
-
- // invoke next on all the matching searchers
- for _, matchingCurr := range s.matchingCurrs {
- if matchingCurr.curr != rv {
- ctx.DocumentMatchPool.Put(matchingCurr.curr)
- }
- curr, err := matchingCurr.searcher.Next(ctx)
- if err != nil {
- return nil, err
- }
- if curr != nil {
- matchingCurr.curr = curr
- heap.Push(s, matchingCurr)
- }
- }
-
- err := s.updateMatches()
- if err != nil {
- return nil, err
- }
- }
-
- return rv, nil
- }
-
- func (s *DisjunctionHeapSearcher) Advance(ctx *search.SearchContext,
- ID index.IndexInternalID) (*search.DocumentMatch, error) {
- if !s.initialized {
- err := s.initSearchers(ctx)
- if err != nil {
- return nil, err
- }
- }
-
- // if there is anything in matching, toss it back onto the heap
- for _, matchingCurr := range s.matchingCurrs {
- heap.Push(s, matchingCurr)
- }
- s.matching = s.matching[:0]
- s.matchingCurrs = s.matchingCurrs[:0]
-
- // find all searchers that actually need to be advanced
- // advance them, using s.matchingCurrs as temp storage
- for len(s.heap) > 0 && bytes.Compare(s.heap[0].curr.IndexInternalID, ID) < 0 {
- searcherCurr := heap.Pop(s).(*SearcherCurr)
- ctx.DocumentMatchPool.Put(searcherCurr.curr)
- curr, err := searcherCurr.searcher.Advance(ctx, ID)
- if err != nil {
- return nil, err
- }
- if curr != nil {
- searcherCurr.curr = curr
- s.matchingCurrs = append(s.matchingCurrs, searcherCurr)
- }
- }
- // now all of the searchers that we advanced have to be pushed back
- for _, matchingCurr := range s.matchingCurrs {
- heap.Push(s, matchingCurr)
- }
- // reset our temp space
- s.matchingCurrs = s.matchingCurrs[:0]
-
- err := s.updateMatches()
- if err != nil {
- return nil, err
- }
-
- return s.Next(ctx)
- }
-
- func (s *DisjunctionHeapSearcher) Count() uint64 {
- // for now return a worst case
- var sum uint64
- for _, searcher := range s.searchers {
- sum += searcher.Count()
- }
- return sum
- }
-
- func (s *DisjunctionHeapSearcher) Close() (rv error) {
- for _, searcher := range s.searchers {
- err := searcher.Close()
- if err != nil && rv == nil {
- rv = err
- }
- }
- return rv
- }
-
- func (s *DisjunctionHeapSearcher) Min() int {
- return s.min
- }
-
- func (s *DisjunctionHeapSearcher) DocumentMatchPoolSize() int {
- rv := len(s.searchers)
- for _, s := range s.searchers {
- rv += s.DocumentMatchPoolSize()
- }
- return rv
- }
-
- // a disjunction searcher implements the index.Optimizable interface
- // but only activates on an edge case where the disjunction is a
- // wrapper around a single Optimizable child searcher
- func (s *DisjunctionHeapSearcher) Optimize(kind string, octx index.OptimizableContext) (
- index.OptimizableContext, error) {
- if len(s.searchers) == 1 {
- o, ok := s.searchers[0].(index.Optimizable)
- if ok {
- return o.Optimize(kind, octx)
- }
- }
-
- return nil, nil
- }
-
- // heap impl
-
- func (s *DisjunctionHeapSearcher) Len() int { return len(s.heap) }
-
- func (s *DisjunctionHeapSearcher) Less(i, j int) bool {
- if s.heap[i].curr == nil {
- return true
- } else if s.heap[j].curr == nil {
- return false
- }
- return bytes.Compare(s.heap[i].curr.IndexInternalID, s.heap[j].curr.IndexInternalID) < 0
- }
-
- func (s *DisjunctionHeapSearcher) Swap(i, j int) {
- s.heap[i], s.heap[j] = s.heap[j], s.heap[i]
- }
-
- func (s *DisjunctionHeapSearcher) Push(x interface{}) {
- s.heap = append(s.heap, x.(*SearcherCurr))
- }
-
- func (s *DisjunctionHeapSearcher) Pop() interface{} {
- old := s.heap
- n := len(old)
- x := old[n-1]
- s.heap = old[0 : n-1]
- return x
- }
|