You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

316 lines
7.2 KiB

  1. // Copyright (c) 2017 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package vellum
  15. import (
  16. "bytes"
  17. "encoding/binary"
  18. "fmt"
  19. "strconv"
  20. )
  21. func init() {
  22. registerDecoder(versionV1, func(data []byte) decoder {
  23. return newDecoderV1(data)
  24. })
  25. }
  26. type decoderV1 struct {
  27. data []byte
  28. root uint64
  29. len uint64
  30. }
  31. func newDecoderV1(data []byte) *decoderV1 {
  32. return &decoderV1{
  33. data: data,
  34. }
  35. }
  36. func (d *decoderV1) getRoot() int {
  37. if len(d.data) < footerSizeV1 {
  38. return noneAddr
  39. }
  40. footer := d.data[len(d.data)-footerSizeV1:]
  41. root := binary.LittleEndian.Uint64(footer[8:])
  42. return int(root)
  43. }
  44. func (d *decoderV1) getLen() int {
  45. if len(d.data) < footerSizeV1 {
  46. return 0
  47. }
  48. footer := d.data[len(d.data)-footerSizeV1:]
  49. dlen := binary.LittleEndian.Uint64(footer)
  50. return int(dlen)
  51. }
  52. func (d *decoderV1) stateAt(addr int, prealloc fstState) (fstState, error) {
  53. state, ok := prealloc.(*fstStateV1)
  54. if ok && state != nil {
  55. *state = fstStateV1{} // clear the struct
  56. } else {
  57. state = &fstStateV1{}
  58. }
  59. err := state.at(d.data, addr)
  60. if err != nil {
  61. return nil, err
  62. }
  63. return state, nil
  64. }
  65. type fstStateV1 struct {
  66. data []byte
  67. top int
  68. bottom int
  69. numTrans int
  70. // single trans only
  71. singleTransChar byte
  72. singleTransNext bool
  73. singleTransAddr uint64
  74. singleTransOut uint64
  75. // shared
  76. transSize int
  77. outSize int
  78. // multiple trans only
  79. final bool
  80. transTop int
  81. transBottom int
  82. destTop int
  83. destBottom int
  84. outTop int
  85. outBottom int
  86. outFinal int
  87. }
  88. func (f *fstStateV1) isEncodedSingle() bool {
  89. if f.data[f.top]>>7 > 0 {
  90. return true
  91. }
  92. return false
  93. }
  94. func (f *fstStateV1) at(data []byte, addr int) error {
  95. f.data = data
  96. if addr == emptyAddr {
  97. return f.atZero()
  98. } else if addr == noneAddr {
  99. return f.atNone()
  100. }
  101. if addr > len(data) || addr < 16 {
  102. return fmt.Errorf("invalid address %d/%d", addr, len(data))
  103. }
  104. f.top = addr
  105. f.bottom = addr
  106. if f.isEncodedSingle() {
  107. return f.atSingle(data, addr)
  108. }
  109. return f.atMulti(data, addr)
  110. }
  111. func (f *fstStateV1) atZero() error {
  112. f.top = 0
  113. f.bottom = 1
  114. f.numTrans = 0
  115. f.final = true
  116. f.outFinal = 0
  117. return nil
  118. }
  119. func (f *fstStateV1) atNone() error {
  120. f.top = 0
  121. f.bottom = 1
  122. f.numTrans = 0
  123. f.final = false
  124. f.outFinal = 0
  125. return nil
  126. }
  127. func (f *fstStateV1) atSingle(data []byte, addr int) error {
  128. // handle single transition case
  129. f.numTrans = 1
  130. f.singleTransNext = data[f.top]&transitionNext > 0
  131. f.singleTransChar = data[f.top] & maxCommon
  132. if f.singleTransChar == 0 {
  133. f.bottom-- // extra byte for uncommon
  134. f.singleTransChar = data[f.bottom]
  135. } else {
  136. f.singleTransChar = decodeCommon(f.singleTransChar)
  137. }
  138. if f.singleTransNext {
  139. // now we know the bottom, can compute next addr
  140. f.singleTransAddr = uint64(f.bottom - 1)
  141. f.singleTransOut = 0
  142. } else {
  143. f.bottom-- // extra byte with pack sizes
  144. f.transSize, f.outSize = decodePackSize(data[f.bottom])
  145. f.bottom -= f.transSize // exactly one trans
  146. f.singleTransAddr = readPackedUint(data[f.bottom : f.bottom+f.transSize])
  147. if f.outSize > 0 {
  148. f.bottom -= f.outSize // exactly one out (could be length 0 though)
  149. f.singleTransOut = readPackedUint(data[f.bottom : f.bottom+f.outSize])
  150. } else {
  151. f.singleTransOut = 0
  152. }
  153. // need to wait till we know bottom
  154. if f.singleTransAddr != 0 {
  155. f.singleTransAddr = uint64(f.bottom) - f.singleTransAddr
  156. }
  157. }
  158. return nil
  159. }
  160. func (f *fstStateV1) atMulti(data []byte, addr int) error {
  161. // handle multiple transitions case
  162. f.final = data[f.top]&stateFinal > 0
  163. f.numTrans = int(data[f.top] & maxNumTrans)
  164. if f.numTrans == 0 {
  165. f.bottom-- // extra byte for number of trans
  166. f.numTrans = int(data[f.bottom])
  167. if f.numTrans == 1 {
  168. // can't really be 1 here, this is special case that means 256
  169. f.numTrans = 256
  170. }
  171. }
  172. f.bottom-- // extra byte with pack sizes
  173. f.transSize, f.outSize = decodePackSize(data[f.bottom])
  174. f.transTop = f.bottom
  175. f.bottom -= f.numTrans // one byte for each transition
  176. f.transBottom = f.bottom
  177. f.destTop = f.bottom
  178. f.bottom -= f.numTrans * f.transSize
  179. f.destBottom = f.bottom
  180. if f.outSize > 0 {
  181. f.outTop = f.bottom
  182. f.bottom -= f.numTrans * f.outSize
  183. f.outBottom = f.bottom
  184. if f.final {
  185. f.bottom -= f.outSize
  186. f.outFinal = f.bottom
  187. }
  188. }
  189. return nil
  190. }
  191. func (f *fstStateV1) Address() int {
  192. return f.top
  193. }
  194. func (f *fstStateV1) Final() bool {
  195. return f.final
  196. }
  197. func (f *fstStateV1) FinalOutput() uint64 {
  198. if f.numTrans > 0 && f.final && f.outSize > 0 {
  199. return readPackedUint(f.data[f.outFinal : f.outFinal+f.outSize])
  200. }
  201. return 0
  202. }
  203. func (f *fstStateV1) NumTransitions() int {
  204. return f.numTrans
  205. }
  206. func (f *fstStateV1) TransitionAt(i int) byte {
  207. if f.isEncodedSingle() {
  208. return f.singleTransChar
  209. }
  210. transitionKeys := f.data[f.transBottom:f.transTop]
  211. return transitionKeys[f.numTrans-i-1]
  212. }
  213. func (f *fstStateV1) TransitionFor(b byte) (int, int, uint64) {
  214. if f.isEncodedSingle() {
  215. if f.singleTransChar == b {
  216. return 0, int(f.singleTransAddr), f.singleTransOut
  217. }
  218. return -1, noneAddr, 0
  219. }
  220. transitionKeys := f.data[f.transBottom:f.transTop]
  221. pos := bytes.IndexByte(transitionKeys, b)
  222. if pos < 0 {
  223. return -1, noneAddr, 0
  224. }
  225. transDests := f.data[f.destBottom:f.destTop]
  226. dest := int(readPackedUint(transDests[pos*f.transSize : pos*f.transSize+f.transSize]))
  227. if dest > 0 {
  228. // convert delta
  229. dest = f.bottom - dest
  230. }
  231. transVals := f.data[f.outBottom:f.outTop]
  232. var out uint64
  233. if f.outSize > 0 {
  234. out = readPackedUint(transVals[pos*f.outSize : pos*f.outSize+f.outSize])
  235. }
  236. return f.numTrans - pos - 1, dest, out
  237. }
  238. func (f *fstStateV1) String() string {
  239. rv := ""
  240. rv += fmt.Sprintf("State: %d (%#x)", f.top, f.top)
  241. if f.final {
  242. rv += " final"
  243. fout := f.FinalOutput()
  244. if fout != 0 {
  245. rv += fmt.Sprintf(" (%d)", fout)
  246. }
  247. }
  248. rv += "\n"
  249. rv += fmt.Sprintf("Data: % x\n", f.data[f.bottom:f.top+1])
  250. for i := 0; i < f.numTrans; i++ {
  251. transChar := f.TransitionAt(i)
  252. _, transDest, transOut := f.TransitionFor(transChar)
  253. rv += fmt.Sprintf(" - %d (%#x) '%s' ---> %d (%#x) with output: %d", transChar, transChar, string(transChar), transDest, transDest, transOut)
  254. rv += "\n"
  255. }
  256. if f.numTrans == 0 {
  257. rv += "\n"
  258. }
  259. return rv
  260. }
  261. func (f *fstStateV1) DotString(num int) string {
  262. rv := ""
  263. label := fmt.Sprintf("%d", num)
  264. final := ""
  265. if f.final {
  266. final = ",peripheries=2"
  267. }
  268. rv += fmt.Sprintf(" %d [label=\"%s\"%s];\n", f.top, label, final)
  269. for i := 0; i < f.numTrans; i++ {
  270. transChar := f.TransitionAt(i)
  271. _, transDest, transOut := f.TransitionFor(transChar)
  272. out := ""
  273. if transOut != 0 {
  274. out = fmt.Sprintf("/%d", transOut)
  275. }
  276. rv += fmt.Sprintf(" %d -> %d [label=\"%s%s\"];\n", f.top, transDest, escapeInput(transChar), out)
  277. }
  278. return rv
  279. }
  280. func escapeInput(b byte) string {
  281. x := strconv.AppendQuoteRune(nil, rune(b))
  282. return string(x[1:(len(x) - 1)])
  283. }