You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

260 lines
6.3 KiB

  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package searcher
  15. import (
  16. "bytes"
  17. "math"
  18. "sort"
  19. "github.com/blevesearch/bleve/index"
  20. "github.com/blevesearch/bleve/numeric"
  21. "github.com/blevesearch/bleve/search"
  22. )
  23. func NewNumericRangeSearcher(indexReader index.IndexReader,
  24. min *float64, max *float64, inclusiveMin, inclusiveMax *bool, field string,
  25. boost float64, options search.SearcherOptions) (search.Searcher, error) {
  26. // account for unbounded edges
  27. if min == nil {
  28. negInf := math.Inf(-1)
  29. min = &negInf
  30. }
  31. if max == nil {
  32. Inf := math.Inf(1)
  33. max = &Inf
  34. }
  35. if inclusiveMin == nil {
  36. defaultInclusiveMin := true
  37. inclusiveMin = &defaultInclusiveMin
  38. }
  39. if inclusiveMax == nil {
  40. defaultInclusiveMax := false
  41. inclusiveMax = &defaultInclusiveMax
  42. }
  43. // find all the ranges
  44. minInt64 := numeric.Float64ToInt64(*min)
  45. if !*inclusiveMin && minInt64 != math.MaxInt64 {
  46. minInt64++
  47. }
  48. maxInt64 := numeric.Float64ToInt64(*max)
  49. if !*inclusiveMax && maxInt64 != math.MinInt64 {
  50. maxInt64--
  51. }
  52. var fieldDict index.FieldDictContains
  53. var isIndexed filterFunc
  54. var err error
  55. if irr, ok := indexReader.(index.IndexReaderContains); ok {
  56. fieldDict, err = irr.FieldDictContains(field)
  57. if err != nil {
  58. return nil, err
  59. }
  60. isIndexed = func(term []byte) bool {
  61. found, err := fieldDict.Contains(term)
  62. return err == nil && found
  63. }
  64. }
  65. // FIXME hard-coded precision, should match field declaration
  66. termRanges := splitInt64Range(minInt64, maxInt64, 4)
  67. terms := termRanges.Enumerate(isIndexed)
  68. if fieldDict != nil {
  69. if fd, ok := fieldDict.(index.FieldDict); ok {
  70. if err = fd.Close(); err != nil {
  71. return nil, err
  72. }
  73. }
  74. }
  75. if len(terms) < 1 {
  76. // cannot return MatchNoneSearcher because of interaction with
  77. // commit f391b991c20f02681bacd197afc6d8aed444e132
  78. return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options,
  79. true)
  80. }
  81. // for upside_down
  82. if isIndexed == nil {
  83. terms, err = filterCandidateTerms(indexReader, terms, field)
  84. if err != nil {
  85. return nil, err
  86. }
  87. }
  88. if tooManyClauses(len(terms)) {
  89. return nil, tooManyClausesErr(field, len(terms))
  90. }
  91. return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options,
  92. true)
  93. }
  94. func filterCandidateTerms(indexReader index.IndexReader,
  95. terms [][]byte, field string) (rv [][]byte, err error) {
  96. if ir, ok := indexReader.(index.IndexReaderOnly); ok {
  97. fieldDict, err := ir.FieldDictOnly(field, terms, false)
  98. if err != nil {
  99. return nil, err
  100. }
  101. // enumerate the terms (no need to check them again)
  102. tfd, err := fieldDict.Next()
  103. for err == nil && tfd != nil {
  104. rv = append(rv, []byte(tfd.Term))
  105. tfd, err = fieldDict.Next()
  106. }
  107. if cerr := fieldDict.Close(); cerr != nil && err == nil {
  108. err = cerr
  109. }
  110. return rv, err
  111. }
  112. fieldDict, err := indexReader.FieldDictRange(field, terms[0], terms[len(terms)-1])
  113. if err != nil {
  114. return nil, err
  115. }
  116. // enumerate the terms and check against list of terms
  117. tfd, err := fieldDict.Next()
  118. for err == nil && tfd != nil {
  119. termBytes := []byte(tfd.Term)
  120. i := sort.Search(len(terms), func(i int) bool { return bytes.Compare(terms[i], termBytes) >= 0 })
  121. if i < len(terms) && bytes.Compare(terms[i], termBytes) == 0 {
  122. rv = append(rv, terms[i])
  123. }
  124. terms = terms[i:]
  125. tfd, err = fieldDict.Next()
  126. }
  127. if cerr := fieldDict.Close(); cerr != nil && err == nil {
  128. err = cerr
  129. }
  130. return rv, err
  131. }
  132. type termRange struct {
  133. startTerm []byte
  134. endTerm []byte
  135. }
  136. func (t *termRange) Enumerate(filter filterFunc) [][]byte {
  137. var rv [][]byte
  138. next := t.startTerm
  139. for bytes.Compare(next, t.endTerm) <= 0 {
  140. if filter != nil {
  141. if filter(next) {
  142. rv = append(rv, next)
  143. }
  144. } else {
  145. rv = append(rv, next)
  146. }
  147. next = incrementBytes(next)
  148. }
  149. return rv
  150. }
  151. func incrementBytes(in []byte) []byte {
  152. rv := make([]byte, len(in))
  153. copy(rv, in)
  154. for i := len(rv) - 1; i >= 0; i-- {
  155. rv[i] = rv[i] + 1
  156. if rv[i] != 0 {
  157. // didn't overflow, so stop
  158. break
  159. }
  160. }
  161. return rv
  162. }
  163. type termRanges []*termRange
  164. func (tr termRanges) Enumerate(filter filterFunc) [][]byte {
  165. var rv [][]byte
  166. for _, tri := range tr {
  167. trie := tri.Enumerate(filter)
  168. rv = append(rv, trie...)
  169. }
  170. return rv
  171. }
  172. func splitInt64Range(minBound, maxBound int64, precisionStep uint) termRanges {
  173. rv := make(termRanges, 0)
  174. if minBound > maxBound {
  175. return rv
  176. }
  177. for shift := uint(0); ; shift += precisionStep {
  178. diff := int64(1) << (shift + precisionStep)
  179. mask := ((int64(1) << precisionStep) - int64(1)) << shift
  180. hasLower := (minBound & mask) != int64(0)
  181. hasUpper := (maxBound & mask) != mask
  182. var nextMinBound int64
  183. if hasLower {
  184. nextMinBound = (minBound + diff) &^ mask
  185. } else {
  186. nextMinBound = minBound &^ mask
  187. }
  188. var nextMaxBound int64
  189. if hasUpper {
  190. nextMaxBound = (maxBound - diff) &^ mask
  191. } else {
  192. nextMaxBound = maxBound &^ mask
  193. }
  194. lowerWrapped := nextMinBound < minBound
  195. upperWrapped := nextMaxBound > maxBound
  196. if shift+precisionStep >= 64 || nextMinBound > nextMaxBound ||
  197. lowerWrapped || upperWrapped {
  198. // We are in the lowest precision or the next precision is not available.
  199. rv = append(rv, newRange(minBound, maxBound, shift))
  200. // exit the split recursion loop
  201. break
  202. }
  203. if hasLower {
  204. rv = append(rv, newRange(minBound, minBound|mask, shift))
  205. }
  206. if hasUpper {
  207. rv = append(rv, newRange(maxBound&^mask, maxBound, shift))
  208. }
  209. // recurse to next precision
  210. minBound = nextMinBound
  211. maxBound = nextMaxBound
  212. }
  213. return rv
  214. }
  215. func newRange(minBound, maxBound int64, shift uint) *termRange {
  216. maxBound |= (int64(1) << shift) - int64(1)
  217. minBytes := numeric.MustNewPrefixCodedInt64(minBound, shift)
  218. maxBytes := numeric.MustNewPrefixCodedInt64(maxBound, shift)
  219. return newRangeBytes(minBytes, maxBytes)
  220. }
  221. func newRangeBytes(minBytes, maxBytes []byte) *termRange {
  222. return &termRange{
  223. startTerm: minBytes,
  224. endTerm: maxBytes,
  225. }
  226. }