You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

893 lines
23 KiB

  1. package roaring
  2. import (
  3. "bytes"
  4. "encoding/binary"
  5. "fmt"
  6. "io"
  7. "io/ioutil"
  8. snappy "github.com/glycerine/go-unsnap-stream"
  9. "github.com/tinylib/msgp/msgp"
  10. )
  11. //go:generate msgp -unexported
  12. type container interface {
  13. clone() container
  14. and(container) container
  15. andCardinality(container) int
  16. iand(container) container // i stands for inplace
  17. andNot(container) container
  18. iandNot(container) container // i stands for inplace
  19. getCardinality() int
  20. // rank returns the number of integers that are
  21. // smaller or equal to x. rank(infinity) would be getCardinality().
  22. rank(uint16) int
  23. iadd(x uint16) bool // inplace, returns true if x was new.
  24. iaddReturnMinimized(uint16) container // may change return type to minimize storage.
  25. //addRange(start, final int) container // range is [firstOfRange,lastOfRange) (unused)
  26. iaddRange(start, endx int) container // i stands for inplace, range is [firstOfRange,endx)
  27. iremove(x uint16) bool // inplace, returns true if x was present.
  28. iremoveReturnMinimized(uint16) container // may change return type to minimize storage.
  29. not(start, final int) container // range is [firstOfRange,lastOfRange)
  30. inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx)
  31. xor(r container) container
  32. getShortIterator() shortIterable
  33. getManyIterator() manyIterable
  34. contains(i uint16) bool
  35. maximum() uint16
  36. minimum() uint16
  37. // equals is now logical equals; it does not require the
  38. // same underlying container types, but compares across
  39. // any of the implementations.
  40. equals(r container) bool
  41. fillLeastSignificant16bits(array []uint32, i int, mask uint32)
  42. or(r container) container
  43. orCardinality(r container) int
  44. isFull() bool
  45. ior(r container) container // i stands for inplace
  46. intersects(r container) bool // whether the two containers intersect
  47. lazyOR(r container) container
  48. lazyIOR(r container) container
  49. getSizeInBytes() int
  50. //removeRange(start, final int) container // range is [firstOfRange,lastOfRange) (unused)
  51. iremoveRange(start, final int) container // i stands for inplace, range is [firstOfRange,lastOfRange)
  52. selectInt(x uint16) int // selectInt returns the xth integer in the container
  53. serializedSizeInBytes() int
  54. readFrom(io.Reader) (int, error)
  55. writeTo(io.Writer) (int, error)
  56. numberOfRuns() int
  57. toEfficientContainer() container
  58. String() string
  59. containerType() contype
  60. }
  61. type contype uint8
  62. const (
  63. bitmapContype contype = iota
  64. arrayContype
  65. run16Contype
  66. run32Contype
  67. )
  68. // careful: range is [firstOfRange,lastOfRange]
  69. func rangeOfOnes(start, last int) container {
  70. if start > MaxUint16 {
  71. panic("rangeOfOnes called with start > MaxUint16")
  72. }
  73. if last > MaxUint16 {
  74. panic("rangeOfOnes called with last > MaxUint16")
  75. }
  76. if start < 0 {
  77. panic("rangeOfOnes called with start < 0")
  78. }
  79. if last < 0 {
  80. panic("rangeOfOnes called with last < 0")
  81. }
  82. return newRunContainer16Range(uint16(start), uint16(last))
  83. }
  84. type roaringArray struct {
  85. keys []uint16
  86. containers []container `msg:"-"` // don't try to serialize directly.
  87. needCopyOnWrite []bool
  88. copyOnWrite bool
  89. // conserz is used at serialization time
  90. // to serialize containers. Otherwise empty.
  91. conserz []containerSerz
  92. }
  93. // containerSerz facilitates serializing container (tricky to
  94. // serialize because it is an interface) by providing a
  95. // light wrapper with a type identifier.
  96. type containerSerz struct {
  97. t contype `msg:"t"` // type
  98. r msgp.Raw `msg:"r"` // Raw msgpack of the actual container type
  99. }
  100. func newRoaringArray() *roaringArray {
  101. return &roaringArray{}
  102. }
  103. // runOptimize compresses the element containers to minimize space consumed.
  104. // Q: how does this interact with copyOnWrite and needCopyOnWrite?
  105. // A: since we aren't changing the logical content, just the representation,
  106. // we don't bother to check the needCopyOnWrite bits. We replace
  107. // (possibly all) elements of ra.containers in-place with space
  108. // optimized versions.
  109. func (ra *roaringArray) runOptimize() {
  110. for i := range ra.containers {
  111. ra.containers[i] = ra.containers[i].toEfficientContainer()
  112. }
  113. }
  114. func (ra *roaringArray) appendContainer(key uint16, value container, mustCopyOnWrite bool) {
  115. ra.keys = append(ra.keys, key)
  116. ra.containers = append(ra.containers, value)
  117. ra.needCopyOnWrite = append(ra.needCopyOnWrite, mustCopyOnWrite)
  118. }
  119. func (ra *roaringArray) appendWithoutCopy(sa roaringArray, startingindex int) {
  120. mustCopyOnWrite := sa.needCopyOnWrite[startingindex]
  121. ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex], mustCopyOnWrite)
  122. }
  123. func (ra *roaringArray) appendCopy(sa roaringArray, startingindex int) {
  124. // cow only if the two request it, or if we already have a lightweight copy
  125. copyonwrite := (ra.copyOnWrite && sa.copyOnWrite) || sa.needsCopyOnWrite(startingindex)
  126. if !copyonwrite {
  127. // since there is no copy-on-write, we need to clone the container (this is important)
  128. ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex].clone(), copyonwrite)
  129. } else {
  130. ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex], copyonwrite)
  131. if !sa.needsCopyOnWrite(startingindex) {
  132. sa.setNeedsCopyOnWrite(startingindex)
  133. }
  134. }
  135. }
  136. func (ra *roaringArray) appendWithoutCopyMany(sa roaringArray, startingindex, end int) {
  137. for i := startingindex; i < end; i++ {
  138. ra.appendWithoutCopy(sa, i)
  139. }
  140. }
  141. func (ra *roaringArray) appendCopyMany(sa roaringArray, startingindex, end int) {
  142. for i := startingindex; i < end; i++ {
  143. ra.appendCopy(sa, i)
  144. }
  145. }
  146. func (ra *roaringArray) appendCopiesUntil(sa roaringArray, stoppingKey uint16) {
  147. // cow only if the two request it, or if we already have a lightweight copy
  148. copyonwrite := ra.copyOnWrite && sa.copyOnWrite
  149. for i := 0; i < sa.size(); i++ {
  150. if sa.keys[i] >= stoppingKey {
  151. break
  152. }
  153. thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i)
  154. if thiscopyonewrite {
  155. ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite)
  156. if !sa.needsCopyOnWrite(i) {
  157. sa.setNeedsCopyOnWrite(i)
  158. }
  159. } else {
  160. // since there is no copy-on-write, we need to clone the container (this is important)
  161. ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite)
  162. }
  163. }
  164. }
  165. func (ra *roaringArray) appendCopiesAfter(sa roaringArray, beforeStart uint16) {
  166. // cow only if the two request it, or if we already have a lightweight copy
  167. copyonwrite := ra.copyOnWrite && sa.copyOnWrite
  168. startLocation := sa.getIndex(beforeStart)
  169. if startLocation >= 0 {
  170. startLocation++
  171. } else {
  172. startLocation = -startLocation - 1
  173. }
  174. for i := startLocation; i < sa.size(); i++ {
  175. thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i)
  176. if thiscopyonewrite {
  177. ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite)
  178. if !sa.needsCopyOnWrite(i) {
  179. sa.setNeedsCopyOnWrite(i)
  180. }
  181. } else {
  182. // since there is no copy-on-write, we need to clone the container (this is important)
  183. ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite)
  184. }
  185. }
  186. }
  187. func (ra *roaringArray) removeIndexRange(begin, end int) {
  188. if end <= begin {
  189. return
  190. }
  191. r := end - begin
  192. copy(ra.keys[begin:], ra.keys[end:])
  193. copy(ra.containers[begin:], ra.containers[end:])
  194. copy(ra.needCopyOnWrite[begin:], ra.needCopyOnWrite[end:])
  195. ra.resize(len(ra.keys) - r)
  196. }
  197. func (ra *roaringArray) resize(newsize int) {
  198. for k := newsize; k < len(ra.containers); k++ {
  199. ra.containers[k] = nil
  200. }
  201. ra.keys = ra.keys[:newsize]
  202. ra.containers = ra.containers[:newsize]
  203. ra.needCopyOnWrite = ra.needCopyOnWrite[:newsize]
  204. }
  205. func (ra *roaringArray) clear() {
  206. ra.resize(0)
  207. ra.copyOnWrite = false
  208. ra.conserz = nil
  209. }
  210. func (ra *roaringArray) clone() *roaringArray {
  211. sa := roaringArray{}
  212. sa.copyOnWrite = ra.copyOnWrite
  213. // this is where copyOnWrite is used.
  214. if ra.copyOnWrite {
  215. sa.keys = make([]uint16, len(ra.keys))
  216. copy(sa.keys, ra.keys)
  217. sa.containers = make([]container, len(ra.containers))
  218. copy(sa.containers, ra.containers)
  219. sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite))
  220. ra.markAllAsNeedingCopyOnWrite()
  221. sa.markAllAsNeedingCopyOnWrite()
  222. // sa.needCopyOnWrite is shared
  223. } else {
  224. // make a full copy
  225. sa.keys = make([]uint16, len(ra.keys))
  226. copy(sa.keys, ra.keys)
  227. sa.containers = make([]container, len(ra.containers))
  228. for i := range sa.containers {
  229. sa.containers[i] = ra.containers[i].clone()
  230. }
  231. sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite))
  232. }
  233. return &sa
  234. }
  235. // unused function:
  236. //func (ra *roaringArray) containsKey(x uint16) bool {
  237. // return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0)
  238. //}
  239. func (ra *roaringArray) getContainer(x uint16) container {
  240. i := ra.binarySearch(0, int64(len(ra.keys)), x)
  241. if i < 0 {
  242. return nil
  243. }
  244. return ra.containers[i]
  245. }
  246. func (ra *roaringArray) getContainerAtIndex(i int) container {
  247. return ra.containers[i]
  248. }
  249. func (ra *roaringArray) getFastContainerAtIndex(i int, needsWriteable bool) container {
  250. c := ra.getContainerAtIndex(i)
  251. switch t := c.(type) {
  252. case *arrayContainer:
  253. c = t.toBitmapContainer()
  254. case *runContainer16:
  255. if !t.isFull() {
  256. c = t.toBitmapContainer()
  257. }
  258. case *bitmapContainer:
  259. if needsWriteable && ra.needCopyOnWrite[i] {
  260. c = ra.containers[i].clone()
  261. }
  262. }
  263. return c
  264. }
  265. func (ra *roaringArray) getWritableContainerAtIndex(i int) container {
  266. if ra.needCopyOnWrite[i] {
  267. ra.containers[i] = ra.containers[i].clone()
  268. ra.needCopyOnWrite[i] = false
  269. }
  270. return ra.containers[i]
  271. }
  272. func (ra *roaringArray) getIndex(x uint16) int {
  273. // before the binary search, we optimize for frequent cases
  274. size := len(ra.keys)
  275. if (size == 0) || (ra.keys[size-1] == x) {
  276. return size - 1
  277. }
  278. return ra.binarySearch(0, int64(size), x)
  279. }
  280. func (ra *roaringArray) getKeyAtIndex(i int) uint16 {
  281. return ra.keys[i]
  282. }
  283. func (ra *roaringArray) insertNewKeyValueAt(i int, key uint16, value container) {
  284. ra.keys = append(ra.keys, 0)
  285. ra.containers = append(ra.containers, nil)
  286. copy(ra.keys[i+1:], ra.keys[i:])
  287. copy(ra.containers[i+1:], ra.containers[i:])
  288. ra.keys[i] = key
  289. ra.containers[i] = value
  290. ra.needCopyOnWrite = append(ra.needCopyOnWrite, false)
  291. copy(ra.needCopyOnWrite[i+1:], ra.needCopyOnWrite[i:])
  292. ra.needCopyOnWrite[i] = false
  293. }
  294. func (ra *roaringArray) remove(key uint16) bool {
  295. i := ra.binarySearch(0, int64(len(ra.keys)), key)
  296. if i >= 0 { // if a new key
  297. ra.removeAtIndex(i)
  298. return true
  299. }
  300. return false
  301. }
  302. func (ra *roaringArray) removeAtIndex(i int) {
  303. copy(ra.keys[i:], ra.keys[i+1:])
  304. copy(ra.containers[i:], ra.containers[i+1:])
  305. copy(ra.needCopyOnWrite[i:], ra.needCopyOnWrite[i+1:])
  306. ra.resize(len(ra.keys) - 1)
  307. }
  308. func (ra *roaringArray) setContainerAtIndex(i int, c container) {
  309. ra.containers[i] = c
  310. }
  311. func (ra *roaringArray) replaceKeyAndContainerAtIndex(i int, key uint16, c container, mustCopyOnWrite bool) {
  312. ra.keys[i] = key
  313. ra.containers[i] = c
  314. ra.needCopyOnWrite[i] = mustCopyOnWrite
  315. }
  316. func (ra *roaringArray) size() int {
  317. return len(ra.keys)
  318. }
  319. func (ra *roaringArray) binarySearch(begin, end int64, ikey uint16) int {
  320. low := begin
  321. high := end - 1
  322. for low+16 <= high {
  323. middleIndex := low + (high-low)/2 // avoid overflow
  324. middleValue := ra.keys[middleIndex]
  325. if middleValue < ikey {
  326. low = middleIndex + 1
  327. } else if middleValue > ikey {
  328. high = middleIndex - 1
  329. } else {
  330. return int(middleIndex)
  331. }
  332. }
  333. for ; low <= high; low++ {
  334. val := ra.keys[low]
  335. if val >= ikey {
  336. if val == ikey {
  337. return int(low)
  338. }
  339. break
  340. }
  341. }
  342. return -int(low + 1)
  343. }
  344. func (ra *roaringArray) equals(o interface{}) bool {
  345. srb, ok := o.(roaringArray)
  346. if ok {
  347. if srb.size() != ra.size() {
  348. return false
  349. }
  350. for i, k := range ra.keys {
  351. if k != srb.keys[i] {
  352. return false
  353. }
  354. }
  355. for i, c := range ra.containers {
  356. if !c.equals(srb.containers[i]) {
  357. return false
  358. }
  359. }
  360. return true
  361. }
  362. return false
  363. }
  364. func (ra *roaringArray) headerSize() uint64 {
  365. size := uint64(len(ra.keys))
  366. if ra.hasRunCompression() {
  367. if size < noOffsetThreshold { // for small bitmaps, we omit the offsets
  368. return 4 + (size+7)/8 + 4*size
  369. }
  370. return 4 + (size+7)/8 + 8*size // - 4 because we pack the size with the cookie
  371. }
  372. return 4 + 4 + 8*size
  373. }
  374. // should be dirt cheap
  375. func (ra *roaringArray) serializedSizeInBytes() uint64 {
  376. answer := ra.headerSize()
  377. for _, c := range ra.containers {
  378. answer += uint64(c.serializedSizeInBytes())
  379. }
  380. return answer
  381. }
  382. //
  383. // spec: https://github.com/RoaringBitmap/RoaringFormatSpec
  384. //
  385. func (ra *roaringArray) toBytes() ([]byte, error) {
  386. stream := &bytes.Buffer{}
  387. hasRun := ra.hasRunCompression()
  388. isRunSizeInBytes := 0
  389. cookieSize := 8
  390. if hasRun {
  391. cookieSize = 4
  392. isRunSizeInBytes = (len(ra.keys) + 7) / 8
  393. }
  394. descriptiveHeaderSize := 4 * len(ra.keys)
  395. preambleSize := cookieSize + isRunSizeInBytes + descriptiveHeaderSize
  396. buf := make([]byte, preambleSize+4*len(ra.keys))
  397. nw := 0
  398. if hasRun {
  399. binary.LittleEndian.PutUint16(buf[0:], uint16(serialCookie))
  400. nw += 2
  401. binary.LittleEndian.PutUint16(buf[2:], uint16(len(ra.keys)-1))
  402. nw += 2
  403. // compute isRun bitmap
  404. var ir []byte
  405. isRun := newBitmapContainer()
  406. for i, c := range ra.containers {
  407. switch c.(type) {
  408. case *runContainer16:
  409. isRun.iadd(uint16(i))
  410. }
  411. }
  412. // convert to little endian
  413. ir = isRun.asLittleEndianByteSlice()[:isRunSizeInBytes]
  414. nw += copy(buf[nw:], ir)
  415. } else {
  416. binary.LittleEndian.PutUint32(buf[0:], uint32(serialCookieNoRunContainer))
  417. nw += 4
  418. binary.LittleEndian.PutUint32(buf[4:], uint32(len(ra.keys)))
  419. nw += 4
  420. }
  421. // descriptive header
  422. for i, key := range ra.keys {
  423. binary.LittleEndian.PutUint16(buf[nw:], key)
  424. nw += 2
  425. c := ra.containers[i]
  426. binary.LittleEndian.PutUint16(buf[nw:], uint16(c.getCardinality()-1))
  427. nw += 2
  428. }
  429. startOffset := int64(preambleSize + 4*len(ra.keys))
  430. if !hasRun || (len(ra.keys) >= noOffsetThreshold) {
  431. // offset header
  432. for _, c := range ra.containers {
  433. binary.LittleEndian.PutUint32(buf[nw:], uint32(startOffset))
  434. nw += 4
  435. switch rc := c.(type) {
  436. case *runContainer16:
  437. startOffset += 2 + int64(len(rc.iv))*4
  438. default:
  439. startOffset += int64(getSizeInBytesFromCardinality(c.getCardinality()))
  440. }
  441. }
  442. }
  443. _, err := stream.Write(buf[:nw])
  444. if err != nil {
  445. return nil, err
  446. }
  447. for i, c := range ra.containers {
  448. _ = i
  449. _, err := c.writeTo(stream)
  450. if err != nil {
  451. return nil, err
  452. }
  453. }
  454. return stream.Bytes(), nil
  455. }
  456. //
  457. // spec: https://github.com/RoaringBitmap/RoaringFormatSpec
  458. //
  459. func (ra *roaringArray) writeTo(out io.Writer) (int64, error) {
  460. by, err := ra.toBytes()
  461. if err != nil {
  462. return 0, err
  463. }
  464. n, err := out.Write(by)
  465. if err == nil && n < len(by) {
  466. err = io.ErrShortWrite
  467. }
  468. return int64(n), err
  469. }
  470. func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) {
  471. pos := 0
  472. if len(buf) < 8 {
  473. return 0, fmt.Errorf("buffer too small, expecting at least 8 bytes, was %d", len(buf))
  474. }
  475. cookie := binary.LittleEndian.Uint32(buf)
  476. pos += 4
  477. var size uint32 // number of containers
  478. haveRunContainers := false
  479. var isRunBitmap []byte
  480. // cookie header
  481. if cookie&0x0000FFFF == serialCookie {
  482. haveRunContainers = true
  483. size = uint32(uint16(cookie>>16) + 1) // number of containers
  484. // create is-run-container bitmap
  485. isRunBitmapSize := (int(size) + 7) / 8
  486. if pos+isRunBitmapSize > len(buf) {
  487. return 0, fmt.Errorf("malformed bitmap, is-run bitmap overruns buffer at %d", pos+isRunBitmapSize)
  488. }
  489. isRunBitmap = buf[pos : pos+isRunBitmapSize]
  490. pos += isRunBitmapSize
  491. } else if cookie == serialCookieNoRunContainer {
  492. size = binary.LittleEndian.Uint32(buf[pos:])
  493. pos += 4
  494. } else {
  495. return 0, fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
  496. }
  497. if size > (1 << 16) {
  498. return 0, fmt.Errorf("It is logically impossible to have more than (1<<16) containers.")
  499. }
  500. // descriptive header
  501. // keycard - is {key, cardinality} tuple slice
  502. if pos+2*2*int(size) > len(buf) {
  503. return 0, fmt.Errorf("malfomred bitmap, key-cardinality slice overruns buffer at %d", pos+2*2*int(size))
  504. }
  505. keycard := byteSliceAsUint16Slice(buf[pos : pos+2*2*int(size)])
  506. pos += 2 * 2 * int(size)
  507. if !haveRunContainers || size >= noOffsetThreshold {
  508. pos += 4 * int(size)
  509. }
  510. // Allocate slices upfront as number of containers is known
  511. if cap(ra.containers) >= int(size) {
  512. ra.containers = ra.containers[:size]
  513. } else {
  514. ra.containers = make([]container, size)
  515. }
  516. if cap(ra.keys) >= int(size) {
  517. ra.keys = ra.keys[:size]
  518. } else {
  519. ra.keys = make([]uint16, size)
  520. }
  521. if cap(ra.needCopyOnWrite) >= int(size) {
  522. ra.needCopyOnWrite = ra.needCopyOnWrite[:size]
  523. } else {
  524. ra.needCopyOnWrite = make([]bool, size)
  525. }
  526. for i := uint32(0); i < size; i++ {
  527. key := uint16(keycard[2*i])
  528. card := int(keycard[2*i+1]) + 1
  529. ra.keys[i] = key
  530. ra.needCopyOnWrite[i] = true
  531. if haveRunContainers && isRunBitmap[i/8]&(1<<(i%8)) != 0 {
  532. // run container
  533. nr := binary.LittleEndian.Uint16(buf[pos:])
  534. pos += 2
  535. if pos+int(nr)*4 > len(buf) {
  536. return 0, fmt.Errorf("malformed bitmap, a run container overruns buffer at %d:%d", pos, pos+int(nr)*4)
  537. }
  538. nb := runContainer16{
  539. iv: byteSliceAsInterval16Slice(buf[pos : pos+int(nr)*4]),
  540. card: int64(card),
  541. }
  542. pos += int(nr) * 4
  543. ra.containers[i] = &nb
  544. } else if card > arrayDefaultMaxSize {
  545. // bitmap container
  546. nb := bitmapContainer{
  547. cardinality: card,
  548. bitmap: byteSliceAsUint64Slice(buf[pos : pos+arrayDefaultMaxSize*2]),
  549. }
  550. pos += arrayDefaultMaxSize * 2
  551. ra.containers[i] = &nb
  552. } else {
  553. // array container
  554. nb := arrayContainer{
  555. byteSliceAsUint16Slice(buf[pos : pos+card*2]),
  556. }
  557. pos += card * 2
  558. ra.containers[i] = &nb
  559. }
  560. }
  561. return int64(pos), nil
  562. }
  563. func (ra *roaringArray) readFrom(stream io.Reader) (int64, error) {
  564. pos := 0
  565. var cookie uint32
  566. err := binary.Read(stream, binary.LittleEndian, &cookie)
  567. if err != nil {
  568. return 0, fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err)
  569. }
  570. pos += 4
  571. var size uint32
  572. haveRunContainers := false
  573. var isRun *bitmapContainer
  574. if cookie&0x0000FFFF == serialCookie {
  575. haveRunContainers = true
  576. size = uint32(uint16(cookie>>16) + 1)
  577. bytesToRead := (int(size) + 7) / 8
  578. numwords := (bytesToRead + 7) / 8
  579. by := make([]byte, bytesToRead, numwords*8)
  580. nr, err := io.ReadFull(stream, by)
  581. if err != nil {
  582. return 8 + int64(nr), fmt.Errorf("error in readFrom: could not read the "+
  583. "runContainer bit flags of length %v bytes: %v", bytesToRead, err)
  584. }
  585. pos += bytesToRead
  586. by = by[:cap(by)]
  587. isRun = newBitmapContainer()
  588. for i := 0; i < numwords; i++ {
  589. isRun.bitmap[i] = binary.LittleEndian.Uint64(by)
  590. by = by[8:]
  591. }
  592. } else if cookie == serialCookieNoRunContainer {
  593. err = binary.Read(stream, binary.LittleEndian, &size)
  594. if err != nil {
  595. return 0, fmt.Errorf("error in roaringArray.readFrom: when reading size, got: %s", err)
  596. }
  597. pos += 4
  598. } else {
  599. return 0, fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
  600. }
  601. if size > (1 << 16) {
  602. return 0, fmt.Errorf("It is logically impossible to have more than (1<<16) containers.")
  603. }
  604. // descriptive header
  605. keycard := make([]uint16, 2*size, 2*size)
  606. err = binary.Read(stream, binary.LittleEndian, keycard)
  607. if err != nil {
  608. return 0, err
  609. }
  610. pos += 2 * 2 * int(size)
  611. // offset header
  612. if !haveRunContainers || size >= noOffsetThreshold {
  613. io.CopyN(ioutil.Discard, stream, 4*int64(size)) // we never skip ahead so this data can be ignored
  614. pos += 4 * int(size)
  615. }
  616. for i := uint32(0); i < size; i++ {
  617. key := int(keycard[2*i])
  618. card := int(keycard[2*i+1]) + 1
  619. if haveRunContainers && isRun.contains(uint16(i)) {
  620. nb := newRunContainer16()
  621. nr, err := nb.readFrom(stream)
  622. if err != nil {
  623. return 0, err
  624. }
  625. pos += nr
  626. ra.appendContainer(uint16(key), nb, false)
  627. } else if card > arrayDefaultMaxSize {
  628. nb := newBitmapContainer()
  629. nr, err := nb.readFrom(stream)
  630. if err != nil {
  631. return 0, err
  632. }
  633. nb.cardinality = card
  634. pos += nr
  635. ra.appendContainer(keycard[2*i], nb, false)
  636. } else {
  637. nb := newArrayContainerSize(card)
  638. nr, err := nb.readFrom(stream)
  639. if err != nil {
  640. return 0, err
  641. }
  642. pos += nr
  643. ra.appendContainer(keycard[2*i], nb, false)
  644. }
  645. }
  646. return int64(pos), nil
  647. }
  648. func (ra *roaringArray) hasRunCompression() bool {
  649. for _, c := range ra.containers {
  650. switch c.(type) {
  651. case *runContainer16:
  652. return true
  653. }
  654. }
  655. return false
  656. }
  657. func (ra *roaringArray) writeToMsgpack(stream io.Writer) error {
  658. ra.conserz = make([]containerSerz, len(ra.containers))
  659. for i, v := range ra.containers {
  660. switch cn := v.(type) {
  661. case *bitmapContainer:
  662. bts, err := cn.MarshalMsg(nil)
  663. if err != nil {
  664. return err
  665. }
  666. ra.conserz[i].t = bitmapContype
  667. ra.conserz[i].r = bts
  668. case *arrayContainer:
  669. bts, err := cn.MarshalMsg(nil)
  670. if err != nil {
  671. return err
  672. }
  673. ra.conserz[i].t = arrayContype
  674. ra.conserz[i].r = bts
  675. case *runContainer16:
  676. bts, err := cn.MarshalMsg(nil)
  677. if err != nil {
  678. return err
  679. }
  680. ra.conserz[i].t = run16Contype
  681. ra.conserz[i].r = bts
  682. default:
  683. panic(fmt.Errorf("Unrecognized container implementation: %T", cn))
  684. }
  685. }
  686. w := snappy.NewWriter(stream)
  687. err := msgp.Encode(w, ra)
  688. ra.conserz = nil
  689. return err
  690. }
  691. func (ra *roaringArray) readFromMsgpack(stream io.Reader) error {
  692. r := snappy.NewReader(stream)
  693. err := msgp.Decode(r, ra)
  694. if err != nil {
  695. return err
  696. }
  697. if len(ra.containers) != len(ra.keys) {
  698. ra.containers = make([]container, len(ra.keys))
  699. }
  700. for i, v := range ra.conserz {
  701. switch v.t {
  702. case bitmapContype:
  703. c := &bitmapContainer{}
  704. _, err = c.UnmarshalMsg(v.r)
  705. if err != nil {
  706. return err
  707. }
  708. ra.containers[i] = c
  709. case arrayContype:
  710. c := &arrayContainer{}
  711. _, err = c.UnmarshalMsg(v.r)
  712. if err != nil {
  713. return err
  714. }
  715. ra.containers[i] = c
  716. case run16Contype:
  717. c := &runContainer16{}
  718. _, err = c.UnmarshalMsg(v.r)
  719. if err != nil {
  720. return err
  721. }
  722. ra.containers[i] = c
  723. default:
  724. return fmt.Errorf("unrecognized contype serialization code: '%v'", v.t)
  725. }
  726. }
  727. ra.conserz = nil
  728. return nil
  729. }
  730. func (ra *roaringArray) advanceUntil(min uint16, pos int) int {
  731. lower := pos + 1
  732. if lower >= len(ra.keys) || ra.keys[lower] >= min {
  733. return lower
  734. }
  735. spansize := 1
  736. for lower+spansize < len(ra.keys) && ra.keys[lower+spansize] < min {
  737. spansize *= 2
  738. }
  739. var upper int
  740. if lower+spansize < len(ra.keys) {
  741. upper = lower + spansize
  742. } else {
  743. upper = len(ra.keys) - 1
  744. }
  745. if ra.keys[upper] == min {
  746. return upper
  747. }
  748. if ra.keys[upper] < min {
  749. // means
  750. // array
  751. // has no
  752. // item
  753. // >= min
  754. // pos = array.length;
  755. return len(ra.keys)
  756. }
  757. // we know that the next-smallest span was too small
  758. lower += (spansize >> 1)
  759. mid := 0
  760. for lower+1 != upper {
  761. mid = (lower + upper) >> 1
  762. if ra.keys[mid] == min {
  763. return mid
  764. } else if ra.keys[mid] < min {
  765. lower = mid
  766. } else {
  767. upper = mid
  768. }
  769. }
  770. return upper
  771. }
  772. func (ra *roaringArray) markAllAsNeedingCopyOnWrite() {
  773. for i := range ra.needCopyOnWrite {
  774. ra.needCopyOnWrite[i] = true
  775. }
  776. }
  777. func (ra *roaringArray) needsCopyOnWrite(i int) bool {
  778. return ra.needCopyOnWrite[i]
  779. }
  780. func (ra *roaringArray) setNeedsCopyOnWrite(i int) {
  781. ra.needCopyOnWrite[i] = true
  782. }