You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

111 lines
3.3 KiB

  1. // Copyright (c) 2017 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. /*
  15. Package vellum is a library for building, serializing and executing an FST (finite
  16. state transducer).
  17. There are two distinct phases, building an FST and using it.
  18. When building an FST, you insert keys ([]byte) and their associated value
  19. (uint64). Insert operations MUST be done in lexicographic order. While
  20. building the FST, data is streamed to an underlying Writer. At the conclusion
  21. of building, you MUST call Close() on the builder.
  22. After completion of the build phase, you can either Open() the FST if you
  23. serialized it to disk. Alternatively, if you already have the bytes in
  24. memory, you can use Load(). By default, Open() will use mmap to avoid loading
  25. the entire file into memory.
  26. Once the FST is ready, you can use the Contains() method to see if a keys is
  27. in the FST. You can use the Get() method to see if a key is in the FST and
  28. retrieve it's associated value. And, you can use the Iterator method to
  29. enumerate key/value pairs within a specified range.
  30. */
  31. package vellum
  32. import (
  33. "errors"
  34. "io"
  35. )
  36. // ErrOutOfOrder is returned when values are not inserted in
  37. // lexicographic order.
  38. var ErrOutOfOrder = errors.New("values not inserted in lexicographic order")
  39. // ErrIteratorDone is returned by Iterator/Next/Seek methods when the
  40. // Current() value pointed to by the iterator is greater than the last
  41. // key in this FST, or outside the configured startKeyInclusive/endKeyExclusive
  42. // range of the Iterator.
  43. var ErrIteratorDone = errors.New("iterator-done")
  44. // BuilderOpts is a structure to let advanced users customize the behavior
  45. // of the builder and some aspects of the generated FST.
  46. type BuilderOpts struct {
  47. Encoder int
  48. RegistryTableSize int
  49. RegistryMRUSize int
  50. }
  51. // New returns a new Builder which will stream out the
  52. // underlying representation to the provided Writer as the set is built.
  53. func New(w io.Writer, opts *BuilderOpts) (*Builder, error) {
  54. return newBuilder(w, opts)
  55. }
  56. // Open loads the FST stored in the provided path
  57. func Open(path string) (*FST, error) {
  58. return open(path)
  59. }
  60. // Load will return the FST represented by the provided byte slice.
  61. func Load(data []byte) (*FST, error) {
  62. return new(data, nil)
  63. }
  64. // Merge will iterate through the provided Iterators, merge duplicate keys
  65. // with the provided MergeFunc, and build a new FST to the provided Writer.
  66. func Merge(w io.Writer, opts *BuilderOpts, itrs []Iterator, f MergeFunc) error {
  67. builder, err := New(w, opts)
  68. if err != nil {
  69. return err
  70. }
  71. itr, err := NewMergeIterator(itrs, f)
  72. for err == nil {
  73. k, v := itr.Current()
  74. err = builder.Insert(k, v)
  75. if err != nil {
  76. return err
  77. }
  78. err = itr.Next()
  79. }
  80. if err != nil && err != ErrIteratorDone {
  81. return err
  82. }
  83. err = itr.Close()
  84. if err != nil {
  85. return err
  86. }
  87. err = builder.Close()
  88. if err != nil {
  89. return err
  90. }
  91. return nil
  92. }