You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

67 lines
2.5 KiB

  1. // Copyright (c) 2019 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package zap
  15. import (
  16. "fmt"
  17. )
  18. // LegacyChunkMode was the original chunk mode (always chunk size 1024)
  19. // this mode is still used for chunking doc values.
  20. var LegacyChunkMode uint32 = 1024
  21. // DefaultChunkMode is the most recent improvement to chunking and should
  22. // be used by default.
  23. var DefaultChunkMode uint32 = 1026
  24. func getChunkSize(chunkMode uint32, cardinality uint64, maxDocs uint64) (uint64, error) {
  25. switch {
  26. // any chunkMode <= 1024 will always chunk with chunkSize=chunkMode
  27. case chunkMode <= 1024:
  28. // legacy chunk size
  29. return uint64(chunkMode), nil
  30. case chunkMode == 1025:
  31. // attempt at simple improvement
  32. // theory - the point of chunking is to put a bound on the maximum number of
  33. // calls to Next() needed to find a random document. ie, you should be able
  34. // to do one jump to the correct chunk, and then walk through at most
  35. // chunk-size items
  36. // previously 1024 was chosen as the chunk size, but this is particularly
  37. // wasteful for low cardinality terms. the observation is that if there
  38. // are less than 1024 items, why not put them all in one chunk,
  39. // this way you'll still achieve the same goal of visiting at most
  40. // chunk-size items.
  41. // no attempt is made to tweak any other case
  42. if cardinality <= 1024 {
  43. return maxDocs, nil
  44. }
  45. return 1024, nil
  46. case chunkMode == 1026:
  47. // improve upon the ideas tested in chunkMode 1025
  48. // the observation that the fewest number of dense chunks is the most
  49. // desirable layout, given the built-in assumptions of chunking
  50. // (that we want to put an upper-bound on the number of items you must
  51. // walk over without skipping, currently tuned to 1024)
  52. //
  53. // 1. compute the number of chunks needed (max 1024/chunk)
  54. // 2. convert to chunkSize, dividing into maxDocs
  55. numChunks := (cardinality / 1024) + 1
  56. chunkSize := maxDocs / numChunks
  57. return chunkSize, nil
  58. }
  59. return 0, fmt.Errorf("unknown chunk mode %d", chunkMode)
  60. }