You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

130 lines
3.5 KiB

  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package document
  15. import (
  16. "fmt"
  17. "github.com/blevesearch/bleve/analysis"
  18. "github.com/blevesearch/bleve/numeric"
  19. )
  20. const DefaultNumericIndexingOptions = StoreField | IndexField
  21. const DefaultPrecisionStep uint = 4
  22. type NumericField struct {
  23. name string
  24. arrayPositions []uint64
  25. options IndexingOptions
  26. value numeric.PrefixCoded
  27. numPlainTextBytes uint64
  28. }
  29. func (n *NumericField) Name() string {
  30. return n.name
  31. }
  32. func (n *NumericField) ArrayPositions() []uint64 {
  33. return n.arrayPositions
  34. }
  35. func (n *NumericField) Options() IndexingOptions {
  36. return n.options
  37. }
  38. func (n *NumericField) Analyze() (int, analysis.TokenFrequencies) {
  39. tokens := make(analysis.TokenStream, 0)
  40. tokens = append(tokens, &analysis.Token{
  41. Start: 0,
  42. End: len(n.value),
  43. Term: n.value,
  44. Position: 1,
  45. Type: analysis.Numeric,
  46. })
  47. original, err := n.value.Int64()
  48. if err == nil {
  49. shift := DefaultPrecisionStep
  50. for shift < 64 {
  51. shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift)
  52. if err != nil {
  53. break
  54. }
  55. token := analysis.Token{
  56. Start: 0,
  57. End: len(shiftEncoded),
  58. Term: shiftEncoded,
  59. Position: 1,
  60. Type: analysis.Numeric,
  61. }
  62. tokens = append(tokens, &token)
  63. shift += DefaultPrecisionStep
  64. }
  65. }
  66. fieldLength := len(tokens)
  67. tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions, n.options.IncludeTermVectors())
  68. return fieldLength, tokenFreqs
  69. }
  70. func (n *NumericField) Value() []byte {
  71. return n.value
  72. }
  73. func (n *NumericField) Number() (float64, error) {
  74. i64, err := n.value.Int64()
  75. if err != nil {
  76. return 0.0, err
  77. }
  78. return numeric.Int64ToFloat64(i64), nil
  79. }
  80. func (n *NumericField) GoString() string {
  81. return fmt.Sprintf("&document.NumericField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
  82. }
  83. func (n *NumericField) NumPlainTextBytes() uint64 {
  84. return n.numPlainTextBytes
  85. }
  86. func NewNumericFieldFromBytes(name string, arrayPositions []uint64, value []byte) *NumericField {
  87. return &NumericField{
  88. name: name,
  89. arrayPositions: arrayPositions,
  90. value: value,
  91. options: DefaultNumericIndexingOptions,
  92. numPlainTextBytes: uint64(len(value)),
  93. }
  94. }
  95. func NewNumericField(name string, arrayPositions []uint64, number float64) *NumericField {
  96. return NewNumericFieldWithIndexingOptions(name, arrayPositions, number, DefaultNumericIndexingOptions)
  97. }
  98. func NewNumericFieldWithIndexingOptions(name string, arrayPositions []uint64, number float64, options IndexingOptions) *NumericField {
  99. numberInt64 := numeric.Float64ToInt64(number)
  100. prefixCoded := numeric.MustNewPrefixCodedInt64(numberInt64, 0)
  101. return &NumericField{
  102. name: name,
  103. arrayPositions: arrayPositions,
  104. value: prefixCoded,
  105. options: options,
  106. // not correct, just a place holder until we revisit how fields are
  107. // represented and can fix this better
  108. numPlainTextBytes: uint64(8),
  109. }
  110. }