You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

229 lines
5.7 KiB

  1. package govarint
  2. import "encoding/binary"
  3. import "io"
  4. type U32VarintEncoder interface {
  5. PutU32(x uint32) int
  6. Close()
  7. }
  8. type U32VarintDecoder interface {
  9. GetU32() (uint32, error)
  10. }
  11. ///
  12. type U64VarintEncoder interface {
  13. PutU64(x uint64) int
  14. Close()
  15. }
  16. type U64VarintDecoder interface {
  17. GetU64() (uint64, error)
  18. }
  19. ///
  20. type U32GroupVarintEncoder struct {
  21. w io.Writer
  22. index int
  23. store [4]uint32
  24. temp [17]byte
  25. }
  26. func NewU32GroupVarintEncoder(w io.Writer) *U32GroupVarintEncoder { return &U32GroupVarintEncoder{w: w} }
  27. func (b *U32GroupVarintEncoder) Flush() (int, error) {
  28. // TODO: Is it more efficient to have a tailored version that's called only in Close()?
  29. // If index is zero, there are no integers to flush
  30. if b.index == 0 {
  31. return 0, nil
  32. }
  33. // In the case we're flushing (the group isn't of size four), the non-values should be zero
  34. // This ensures the unused entries are all zero in the sizeByte
  35. for i := b.index; i < 4; i++ {
  36. b.store[i] = 0
  37. }
  38. length := 1
  39. // We need to reset the size byte to zero as we only bitwise OR into it, we don't overwrite it
  40. b.temp[0] = 0
  41. for i, x := range b.store {
  42. size := byte(0)
  43. shifts := []byte{24, 16, 8, 0}
  44. for _, shift := range shifts {
  45. // Always writes at least one byte -- the first one (shift = 0)
  46. // Will write more bytes until the rest of the integer is all zeroes
  47. if (x>>shift) != 0 || shift == 0 {
  48. size += 1
  49. b.temp[length] = byte(x >> shift)
  50. length += 1
  51. }
  52. }
  53. // We store the size in two of the eight bits in the first byte (sizeByte)
  54. // 0 means there is one byte in total, hence why we subtract one from size
  55. b.temp[0] |= (size - 1) << (uint8(3-i) * 2)
  56. }
  57. // If we're flushing without a full group of four, remove the unused bytes we computed
  58. // This enables us to realize it's a partial group on decoding thanks to EOF
  59. if b.index != 4 {
  60. length -= 4 - b.index
  61. }
  62. _, err := b.w.Write(b.temp[:length])
  63. return length, err
  64. }
  65. func (b *U32GroupVarintEncoder) PutU32(x uint32) (int, error) {
  66. bytesWritten := 0
  67. b.store[b.index] = x
  68. b.index += 1
  69. if b.index == 4 {
  70. n, err := b.Flush()
  71. if err != nil {
  72. return n, err
  73. }
  74. bytesWritten += n
  75. b.index = 0
  76. }
  77. return bytesWritten, nil
  78. }
  79. func (b *U32GroupVarintEncoder) Close() {
  80. // On Close, we flush any remaining values that might not have been in a full group
  81. b.Flush()
  82. }
  83. ///
  84. type U32GroupVarintDecoder struct {
  85. r io.ByteReader
  86. group [4]uint32
  87. pos int
  88. finished bool
  89. capacity int
  90. }
  91. func NewU32GroupVarintDecoder(r io.ByteReader) *U32GroupVarintDecoder {
  92. return &U32GroupVarintDecoder{r: r, pos: 4, capacity: 4}
  93. }
  94. func (b *U32GroupVarintDecoder) getGroup() error {
  95. // We should always receive a sizeByte if there are more values to read
  96. sizeByte, err := b.r.ReadByte()
  97. if err != nil {
  98. return err
  99. }
  100. // Calculate the size of the four incoming 32 bit integers
  101. // 0b00 means 1 byte to read, 0b01 = 2, etc
  102. b.group[0] = uint32((sizeByte >> 6) & 3)
  103. b.group[1] = uint32((sizeByte >> 4) & 3)
  104. b.group[2] = uint32((sizeByte >> 2) & 3)
  105. b.group[3] = uint32(sizeByte & 3)
  106. //
  107. for index, size := range b.group {
  108. b.group[index] = 0
  109. // Any error that occurs in earlier byte reads should be repeated at the end one
  110. // Hence we only catch and report the final ReadByte's error
  111. var err error
  112. switch size {
  113. case 0:
  114. var x byte
  115. x, err = b.r.ReadByte()
  116. b.group[index] = uint32(x)
  117. case 1:
  118. var x, y byte
  119. x, _ = b.r.ReadByte()
  120. y, err = b.r.ReadByte()
  121. b.group[index] = uint32(x)<<8 | uint32(y)
  122. case 2:
  123. var x, y, z byte
  124. x, _ = b.r.ReadByte()
  125. y, _ = b.r.ReadByte()
  126. z, err = b.r.ReadByte()
  127. b.group[index] = uint32(x)<<16 | uint32(y)<<8 | uint32(z)
  128. case 3:
  129. var x, y, z, zz byte
  130. x, _ = b.r.ReadByte()
  131. y, _ = b.r.ReadByte()
  132. z, _ = b.r.ReadByte()
  133. zz, err = b.r.ReadByte()
  134. b.group[index] = uint32(x)<<24 | uint32(y)<<16 | uint32(z)<<8 | uint32(zz)
  135. }
  136. if err != nil {
  137. if err == io.EOF {
  138. // If we hit EOF here, we have found a partial group
  139. // We've return any valid entries we have read and return EOF once we run out
  140. b.capacity = index
  141. b.finished = true
  142. break
  143. } else {
  144. return err
  145. }
  146. }
  147. }
  148. // Reset the pos pointer to the beginning of the read values
  149. b.pos = 0
  150. return nil
  151. }
  152. func (b *U32GroupVarintDecoder) GetU32() (uint32, error) {
  153. // Check if we have any more values to give out - if not, let's get them
  154. if b.pos == b.capacity {
  155. // If finished is set, there is nothing else to do
  156. if b.finished {
  157. return 0, io.EOF
  158. }
  159. err := b.getGroup()
  160. if err != nil {
  161. return 0, err
  162. }
  163. }
  164. // Increment pointer and return the value stored at that point
  165. b.pos += 1
  166. return b.group[b.pos-1], nil
  167. }
  168. ///
  169. type Base128Encoder struct {
  170. w io.Writer
  171. tmpBytes []byte
  172. }
  173. func NewU32Base128Encoder(w io.Writer) *Base128Encoder {
  174. return &Base128Encoder{w: w, tmpBytes: make([]byte, binary.MaxVarintLen32)}
  175. }
  176. func NewU64Base128Encoder(w io.Writer) *Base128Encoder {
  177. return &Base128Encoder{w: w, tmpBytes: make([]byte, binary.MaxVarintLen64)}
  178. }
  179. func (b *Base128Encoder) PutU32(x uint32) (int, error) {
  180. writtenBytes := binary.PutUvarint(b.tmpBytes, uint64(x))
  181. return b.w.Write(b.tmpBytes[:writtenBytes])
  182. }
  183. func (b *Base128Encoder) PutU64(x uint64) (int, error) {
  184. writtenBytes := binary.PutUvarint(b.tmpBytes, x)
  185. return b.w.Write(b.tmpBytes[:writtenBytes])
  186. }
  187. func (b *Base128Encoder) Close() {
  188. }
  189. ///
  190. type Base128Decoder struct {
  191. r io.ByteReader
  192. }
  193. func NewU32Base128Decoder(r io.ByteReader) *Base128Decoder { return &Base128Decoder{r: r} }
  194. func NewU64Base128Decoder(r io.ByteReader) *Base128Decoder { return &Base128Decoder{r: r} }
  195. func (b *Base128Decoder) GetU32() (uint32, error) {
  196. v, err := binary.ReadUvarint(b.r)
  197. return uint32(v), err
  198. }
  199. func (b *Base128Decoder) GetU64() (uint64, error) {
  200. return binary.ReadUvarint(b.r)
  201. }