You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

695 lines
17 KiB

  1. package roaring
  2. ///////////////////////////////////////////////////
  3. //
  4. // container interface methods for runContainer16
  5. //
  6. ///////////////////////////////////////////////////
  7. import (
  8. "fmt"
  9. )
  10. // compile time verify we meet interface requirements
  11. var _ container = &runContainer16{}
  12. func (rc *runContainer16) clone() container {
  13. return newRunContainer16CopyIv(rc.iv)
  14. }
  15. func (rc *runContainer16) minimum() uint16 {
  16. return rc.iv[0].start // assume not empty
  17. }
  18. func (rc *runContainer16) maximum() uint16 {
  19. return rc.iv[len(rc.iv)-1].last() // assume not empty
  20. }
  21. func (rc *runContainer16) isFull() bool {
  22. return (len(rc.iv) == 1) && ((rc.iv[0].start == 0) && (rc.iv[0].last() == MaxUint16))
  23. }
  24. func (rc *runContainer16) and(a container) container {
  25. if rc.isFull() {
  26. return a.clone()
  27. }
  28. switch c := a.(type) {
  29. case *runContainer16:
  30. return rc.intersect(c)
  31. case *arrayContainer:
  32. return rc.andArray(c)
  33. case *bitmapContainer:
  34. return rc.andBitmapContainer(c)
  35. }
  36. panic("unsupported container type")
  37. }
  38. func (rc *runContainer16) andCardinality(a container) int {
  39. switch c := a.(type) {
  40. case *runContainer16:
  41. return int(rc.intersectCardinality(c))
  42. case *arrayContainer:
  43. return rc.andArrayCardinality(c)
  44. case *bitmapContainer:
  45. return rc.andBitmapContainerCardinality(c)
  46. }
  47. panic("unsupported container type")
  48. }
  49. // andBitmapContainer finds the intersection of rc and b.
  50. func (rc *runContainer16) andBitmapContainer(bc *bitmapContainer) container {
  51. bc2 := newBitmapContainerFromRun(rc)
  52. return bc2.andBitmap(bc)
  53. }
  54. func (rc *runContainer16) andArrayCardinality(ac *arrayContainer) int {
  55. pos := 0
  56. answer := 0
  57. maxpos := ac.getCardinality()
  58. if maxpos == 0 {
  59. return 0 // won't happen in actual code
  60. }
  61. v := ac.content[pos]
  62. mainloop:
  63. for _, p := range rc.iv {
  64. for v < p.start {
  65. pos++
  66. if pos == maxpos {
  67. break mainloop
  68. }
  69. v = ac.content[pos]
  70. }
  71. for v <= p.last() {
  72. answer++
  73. pos++
  74. if pos == maxpos {
  75. break mainloop
  76. }
  77. v = ac.content[pos]
  78. }
  79. }
  80. return answer
  81. }
  82. func (rc *runContainer16) iand(a container) container {
  83. if rc.isFull() {
  84. return a.clone()
  85. }
  86. switch c := a.(type) {
  87. case *runContainer16:
  88. return rc.inplaceIntersect(c)
  89. case *arrayContainer:
  90. return rc.andArray(c)
  91. case *bitmapContainer:
  92. return rc.iandBitmapContainer(c)
  93. }
  94. panic("unsupported container type")
  95. }
  96. func (rc *runContainer16) inplaceIntersect(rc2 *runContainer16) container {
  97. // TODO: optimize by doing less allocation, possibly?
  98. // sect will be new
  99. sect := rc.intersect(rc2)
  100. *rc = *sect
  101. return rc
  102. }
  103. func (rc *runContainer16) iandBitmapContainer(bc *bitmapContainer) container {
  104. isect := rc.andBitmapContainer(bc)
  105. *rc = *newRunContainer16FromContainer(isect)
  106. return rc
  107. }
  108. func (rc *runContainer16) andArray(ac *arrayContainer) container {
  109. if len(rc.iv) == 0 {
  110. return newArrayContainer()
  111. }
  112. acCardinality := ac.getCardinality()
  113. c := newArrayContainerCapacity(acCardinality)
  114. for rlePos, arrayPos := 0, 0; arrayPos < acCardinality; {
  115. iv := rc.iv[rlePos]
  116. arrayVal := ac.content[arrayPos]
  117. for iv.last() < arrayVal {
  118. rlePos++
  119. if rlePos == len(rc.iv) {
  120. return c
  121. }
  122. iv = rc.iv[rlePos]
  123. }
  124. if iv.start > arrayVal {
  125. arrayPos = advanceUntil(ac.content, arrayPos, len(ac.content), iv.start)
  126. } else {
  127. c.content = append(c.content, arrayVal)
  128. arrayPos++
  129. }
  130. }
  131. return c
  132. }
  133. func (rc *runContainer16) andNot(a container) container {
  134. switch c := a.(type) {
  135. case *arrayContainer:
  136. return rc.andNotArray(c)
  137. case *bitmapContainer:
  138. return rc.andNotBitmap(c)
  139. case *runContainer16:
  140. return rc.andNotRunContainer16(c)
  141. }
  142. panic("unsupported container type")
  143. }
  144. func (rc *runContainer16) fillLeastSignificant16bits(x []uint32, i int, mask uint32) {
  145. k := 0
  146. var val int64
  147. for _, p := range rc.iv {
  148. n := p.runlen()
  149. for j := int64(0); j < n; j++ {
  150. val = int64(p.start) + j
  151. x[k+i] = uint32(val) | mask
  152. k++
  153. }
  154. }
  155. }
  156. func (rc *runContainer16) getShortIterator() shortIterable {
  157. return rc.newRunIterator16()
  158. }
  159. func (rc *runContainer16) getManyIterator() manyIterable {
  160. return rc.newManyRunIterator16()
  161. }
  162. // add the values in the range [firstOfRange, endx). endx
  163. // is still abe to express 2^16 because it is an int not an uint16.
  164. func (rc *runContainer16) iaddRange(firstOfRange, endx int) container {
  165. if firstOfRange >= endx {
  166. panic(fmt.Sprintf("invalid %v = endx >= firstOfRange", endx))
  167. }
  168. addme := newRunContainer16TakeOwnership([]interval16{
  169. {
  170. start: uint16(firstOfRange),
  171. length: uint16(endx - 1 - firstOfRange),
  172. },
  173. })
  174. *rc = *rc.union(addme)
  175. return rc
  176. }
  177. // remove the values in the range [firstOfRange,endx)
  178. func (rc *runContainer16) iremoveRange(firstOfRange, endx int) container {
  179. if firstOfRange >= endx {
  180. panic(fmt.Sprintf("request to iremove empty set [%v, %v),"+
  181. " nothing to do.", firstOfRange, endx))
  182. //return rc
  183. }
  184. x := newInterval16Range(uint16(firstOfRange), uint16(endx-1))
  185. rc.isubtract(x)
  186. return rc
  187. }
  188. // not flip the values in the range [firstOfRange,endx)
  189. func (rc *runContainer16) not(firstOfRange, endx int) container {
  190. if firstOfRange >= endx {
  191. panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange))
  192. }
  193. return rc.Not(firstOfRange, endx)
  194. }
  195. // Not flips the values in the range [firstOfRange,endx).
  196. // This is not inplace. Only the returned value has the flipped bits.
  197. //
  198. // Currently implemented as (!A intersect B) union (A minus B),
  199. // where A is rc, and B is the supplied [firstOfRange, endx) interval.
  200. //
  201. // TODO(time optimization): convert this to a single pass
  202. // algorithm by copying AndNotRunContainer16() and modifying it.
  203. // Current routine is correct but
  204. // makes 2 more passes through the arrays than should be
  205. // strictly necessary. Measure both ways though--this may not matter.
  206. //
  207. func (rc *runContainer16) Not(firstOfRange, endx int) *runContainer16 {
  208. if firstOfRange >= endx {
  209. panic(fmt.Sprintf("invalid %v = endx >= firstOfRange == %v", endx, firstOfRange))
  210. }
  211. if firstOfRange >= endx {
  212. return rc.Clone()
  213. }
  214. a := rc
  215. // algo:
  216. // (!A intersect B) union (A minus B)
  217. nota := a.invert()
  218. bs := []interval16{newInterval16Range(uint16(firstOfRange), uint16(endx-1))}
  219. b := newRunContainer16TakeOwnership(bs)
  220. notAintersectB := nota.intersect(b)
  221. aMinusB := a.AndNotRunContainer16(b)
  222. rc2 := notAintersectB.union(aMinusB)
  223. return rc2
  224. }
  225. // equals is now logical equals; it does not require the
  226. // same underlying container type.
  227. func (rc *runContainer16) equals(o container) bool {
  228. srb, ok := o.(*runContainer16)
  229. if !ok {
  230. // maybe value instead of pointer
  231. val, valok := o.(*runContainer16)
  232. if valok {
  233. srb = val
  234. ok = true
  235. }
  236. }
  237. if ok {
  238. // Check if the containers are the same object.
  239. if rc == srb {
  240. return true
  241. }
  242. if len(srb.iv) != len(rc.iv) {
  243. return false
  244. }
  245. for i, v := range rc.iv {
  246. if v != srb.iv[i] {
  247. return false
  248. }
  249. }
  250. return true
  251. }
  252. // use generic comparison
  253. if o.getCardinality() != rc.getCardinality() {
  254. return false
  255. }
  256. rit := rc.getShortIterator()
  257. bit := o.getShortIterator()
  258. //k := 0
  259. for rit.hasNext() {
  260. if bit.next() != rit.next() {
  261. return false
  262. }
  263. //k++
  264. }
  265. return true
  266. }
  267. func (rc *runContainer16) iaddReturnMinimized(x uint16) container {
  268. rc.Add(x)
  269. return rc
  270. }
  271. func (rc *runContainer16) iadd(x uint16) (wasNew bool) {
  272. return rc.Add(x)
  273. }
  274. func (rc *runContainer16) iremoveReturnMinimized(x uint16) container {
  275. rc.removeKey(x)
  276. return rc
  277. }
  278. func (rc *runContainer16) iremove(x uint16) bool {
  279. return rc.removeKey(x)
  280. }
  281. func (rc *runContainer16) or(a container) container {
  282. if rc.isFull() {
  283. return rc.clone()
  284. }
  285. switch c := a.(type) {
  286. case *runContainer16:
  287. return rc.union(c)
  288. case *arrayContainer:
  289. return rc.orArray(c)
  290. case *bitmapContainer:
  291. return rc.orBitmapContainer(c)
  292. }
  293. panic("unsupported container type")
  294. }
  295. func (rc *runContainer16) orCardinality(a container) int {
  296. switch c := a.(type) {
  297. case *runContainer16:
  298. return int(rc.unionCardinality(c))
  299. case *arrayContainer:
  300. return rc.orArrayCardinality(c)
  301. case *bitmapContainer:
  302. return rc.orBitmapContainerCardinality(c)
  303. }
  304. panic("unsupported container type")
  305. }
  306. // orBitmapContainer finds the union of rc and bc.
  307. func (rc *runContainer16) orBitmapContainer(bc *bitmapContainer) container {
  308. bc2 := newBitmapContainerFromRun(rc)
  309. return bc2.iorBitmap(bc)
  310. }
  311. func (rc *runContainer16) andBitmapContainerCardinality(bc *bitmapContainer) int {
  312. answer := 0
  313. for i := range rc.iv {
  314. answer += bc.getCardinalityInRange(uint(rc.iv[i].start), uint(rc.iv[i].last())+1)
  315. }
  316. //bc.computeCardinality()
  317. return answer
  318. }
  319. func (rc *runContainer16) orBitmapContainerCardinality(bc *bitmapContainer) int {
  320. return rc.getCardinality() + bc.getCardinality() - rc.andBitmapContainerCardinality(bc)
  321. }
  322. // orArray finds the union of rc and ac.
  323. func (rc *runContainer16) orArray(ac *arrayContainer) container {
  324. bc1 := newBitmapContainerFromRun(rc)
  325. bc2 := ac.toBitmapContainer()
  326. return bc1.orBitmap(bc2)
  327. }
  328. // orArray finds the union of rc and ac.
  329. func (rc *runContainer16) orArrayCardinality(ac *arrayContainer) int {
  330. return ac.getCardinality() + rc.getCardinality() - rc.andArrayCardinality(ac)
  331. }
  332. func (rc *runContainer16) ior(a container) container {
  333. if rc.isFull() {
  334. return rc
  335. }
  336. switch c := a.(type) {
  337. case *runContainer16:
  338. return rc.inplaceUnion(c)
  339. case *arrayContainer:
  340. return rc.iorArray(c)
  341. case *bitmapContainer:
  342. return rc.iorBitmapContainer(c)
  343. }
  344. panic("unsupported container type")
  345. }
  346. func (rc *runContainer16) inplaceUnion(rc2 *runContainer16) container {
  347. p("rc.inplaceUnion with len(rc2.iv)=%v", len(rc2.iv))
  348. for _, p := range rc2.iv {
  349. last := int64(p.last())
  350. for i := int64(p.start); i <= last; i++ {
  351. rc.Add(uint16(i))
  352. }
  353. }
  354. return rc
  355. }
  356. func (rc *runContainer16) iorBitmapContainer(bc *bitmapContainer) container {
  357. it := bc.getShortIterator()
  358. for it.hasNext() {
  359. rc.Add(it.next())
  360. }
  361. return rc
  362. }
  363. func (rc *runContainer16) iorArray(ac *arrayContainer) container {
  364. it := ac.getShortIterator()
  365. for it.hasNext() {
  366. rc.Add(it.next())
  367. }
  368. return rc
  369. }
  370. // lazyIOR is described (not yet implemented) in
  371. // this nice note from @lemire on
  372. // https://github.com/RoaringBitmap/roaring/pull/70#issuecomment-263613737
  373. //
  374. // Description of lazyOR and lazyIOR from @lemire:
  375. //
  376. // Lazy functions are optional and can be simply
  377. // wrapper around non-lazy functions.
  378. //
  379. // The idea of "laziness" is as follows. It is
  380. // inspired by the concept of lazy evaluation
  381. // you might be familiar with (functional programming
  382. // and all that). So a roaring bitmap is
  383. // such that all its containers are, in some
  384. // sense, chosen to use as little memory as
  385. // possible. This is nice. Also, all bitsets
  386. // are "cardinality aware" so that you can do
  387. // fast rank/select queries, or query the
  388. // cardinality of the whole bitmap... very fast,
  389. // without latency.
  390. //
  391. // However, imagine that you are aggregating 100
  392. // bitmaps together. So you OR the first two, then OR
  393. // that with the third one and so forth. Clearly,
  394. // intermediate bitmaps don't need to be as
  395. // compressed as possible, right? They can be
  396. // in a "dirty state". You only need the end
  397. // result to be in a nice state... which you
  398. // can achieve by calling repairAfterLazy at the end.
  399. //
  400. // The Java/C code does something special for
  401. // the in-place lazy OR runs. The idea is that
  402. // instead of taking two run containers and
  403. // generating a new one, we actually try to
  404. // do the computation in-place through a
  405. // technique invented by @gssiyankai (pinging him!).
  406. // What you do is you check whether the host
  407. // run container has lots of extra capacity.
  408. // If it does, you move its data at the end of
  409. // the backing array, and then you write
  410. // the answer at the beginning. What this
  411. // trick does is minimize memory allocations.
  412. //
  413. func (rc *runContainer16) lazyIOR(a container) container {
  414. // not lazy at the moment
  415. // TODO: make it lazy
  416. return rc.ior(a)
  417. /*
  418. switch c := a.(type) {
  419. case *arrayContainer:
  420. return rc.lazyIorArray(c)
  421. case *bitmapContainer:
  422. return rc.lazyIorBitmap(c)
  423. case *runContainer16:
  424. return rc.lazyIorRun16(c)
  425. }
  426. panic("unsupported container type")
  427. */
  428. }
  429. // lazyOR is described above in lazyIOR.
  430. func (rc *runContainer16) lazyOR(a container) container {
  431. // not lazy at the moment
  432. // TODO: make it lazy
  433. return rc.or(a)
  434. /*
  435. switch c := a.(type) {
  436. case *arrayContainer:
  437. return rc.lazyOrArray(c)
  438. case *bitmapContainer:
  439. return rc.lazyOrBitmap(c)
  440. case *runContainer16:
  441. return rc.lazyOrRunContainer16(c)
  442. }
  443. panic("unsupported container type")
  444. */
  445. }
  446. func (rc *runContainer16) intersects(a container) bool {
  447. // TODO: optimize by doing inplace/less allocation, possibly?
  448. isect := rc.and(a)
  449. return isect.getCardinality() > 0
  450. }
  451. func (rc *runContainer16) xor(a container) container {
  452. switch c := a.(type) {
  453. case *arrayContainer:
  454. return rc.xorArray(c)
  455. case *bitmapContainer:
  456. return rc.xorBitmap(c)
  457. case *runContainer16:
  458. return rc.xorRunContainer16(c)
  459. }
  460. panic("unsupported container type")
  461. }
  462. func (rc *runContainer16) iandNot(a container) container {
  463. switch c := a.(type) {
  464. case *arrayContainer:
  465. return rc.iandNotArray(c)
  466. case *bitmapContainer:
  467. return rc.iandNotBitmap(c)
  468. case *runContainer16:
  469. return rc.iandNotRunContainer16(c)
  470. }
  471. panic("unsupported container type")
  472. }
  473. // flip the values in the range [firstOfRange,endx)
  474. func (rc *runContainer16) inot(firstOfRange, endx int) container {
  475. if firstOfRange >= endx {
  476. panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange))
  477. }
  478. // TODO: minimize copies, do it all inplace; not() makes a copy.
  479. rc = rc.Not(firstOfRange, endx)
  480. return rc
  481. }
  482. func (rc *runContainer16) getCardinality() int {
  483. return int(rc.cardinality())
  484. }
  485. func (rc *runContainer16) rank(x uint16) int {
  486. n := int64(len(rc.iv))
  487. xx := int64(x)
  488. w, already, _ := rc.search(xx, nil)
  489. if w < 0 {
  490. return 0
  491. }
  492. if !already && w == n-1 {
  493. return rc.getCardinality()
  494. }
  495. var rnk int64
  496. if !already {
  497. for i := int64(0); i <= w; i++ {
  498. rnk += rc.iv[i].runlen()
  499. }
  500. return int(rnk)
  501. }
  502. for i := int64(0); i < w; i++ {
  503. rnk += rc.iv[i].runlen()
  504. }
  505. rnk += int64(x-rc.iv[w].start) + 1
  506. return int(rnk)
  507. }
  508. func (rc *runContainer16) selectInt(x uint16) int {
  509. return rc.selectInt16(x)
  510. }
  511. func (rc *runContainer16) andNotRunContainer16(b *runContainer16) container {
  512. return rc.AndNotRunContainer16(b)
  513. }
  514. func (rc *runContainer16) andNotArray(ac *arrayContainer) container {
  515. rcb := rc.toBitmapContainer()
  516. acb := ac.toBitmapContainer()
  517. return rcb.andNotBitmap(acb)
  518. }
  519. func (rc *runContainer16) andNotBitmap(bc *bitmapContainer) container {
  520. rcb := rc.toBitmapContainer()
  521. return rcb.andNotBitmap(bc)
  522. }
  523. func (rc *runContainer16) toBitmapContainer() *bitmapContainer {
  524. p("run16 toBitmap starting; rc has %v ranges", len(rc.iv))
  525. bc := newBitmapContainer()
  526. for i := range rc.iv {
  527. bc.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1)
  528. }
  529. bc.computeCardinality()
  530. return bc
  531. }
  532. func (rc *runContainer16) iandNotRunContainer16(x2 *runContainer16) container {
  533. rcb := rc.toBitmapContainer()
  534. x2b := x2.toBitmapContainer()
  535. rcb.iandNotBitmapSurely(x2b)
  536. // TODO: check size and optimize the return value
  537. // TODO: is inplace modification really required? If not, elide the copy.
  538. rc2 := newRunContainer16FromBitmapContainer(rcb)
  539. *rc = *rc2
  540. return rc
  541. }
  542. func (rc *runContainer16) iandNotArray(ac *arrayContainer) container {
  543. rcb := rc.toBitmapContainer()
  544. acb := ac.toBitmapContainer()
  545. rcb.iandNotBitmapSurely(acb)
  546. // TODO: check size and optimize the return value
  547. // TODO: is inplace modification really required? If not, elide the copy.
  548. rc2 := newRunContainer16FromBitmapContainer(rcb)
  549. *rc = *rc2
  550. return rc
  551. }
  552. func (rc *runContainer16) iandNotBitmap(bc *bitmapContainer) container {
  553. rcb := rc.toBitmapContainer()
  554. rcb.iandNotBitmapSurely(bc)
  555. // TODO: check size and optimize the return value
  556. // TODO: is inplace modification really required? If not, elide the copy.
  557. rc2 := newRunContainer16FromBitmapContainer(rcb)
  558. *rc = *rc2
  559. return rc
  560. }
  561. func (rc *runContainer16) xorRunContainer16(x2 *runContainer16) container {
  562. rcb := rc.toBitmapContainer()
  563. x2b := x2.toBitmapContainer()
  564. return rcb.xorBitmap(x2b)
  565. }
  566. func (rc *runContainer16) xorArray(ac *arrayContainer) container {
  567. rcb := rc.toBitmapContainer()
  568. acb := ac.toBitmapContainer()
  569. return rcb.xorBitmap(acb)
  570. }
  571. func (rc *runContainer16) xorBitmap(bc *bitmapContainer) container {
  572. rcb := rc.toBitmapContainer()
  573. return rcb.xorBitmap(bc)
  574. }
  575. // convert to bitmap or array *if needed*
  576. func (rc *runContainer16) toEfficientContainer() container {
  577. // runContainer16SerializedSizeInBytes(numRuns)
  578. sizeAsRunContainer := rc.getSizeInBytes()
  579. sizeAsBitmapContainer := bitmapContainerSizeInBytes()
  580. card := int(rc.cardinality())
  581. sizeAsArrayContainer := arrayContainerSizeInBytes(card)
  582. if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
  583. return rc
  584. }
  585. if card <= arrayDefaultMaxSize {
  586. return rc.toArrayContainer()
  587. }
  588. bc := newBitmapContainerFromRun(rc)
  589. return bc
  590. }
  591. func (rc *runContainer16) toArrayContainer() *arrayContainer {
  592. ac := newArrayContainer()
  593. for i := range rc.iv {
  594. ac.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1)
  595. }
  596. return ac
  597. }
  598. func newRunContainer16FromContainer(c container) *runContainer16 {
  599. switch x := c.(type) {
  600. case *runContainer16:
  601. return x.Clone()
  602. case *arrayContainer:
  603. return newRunContainer16FromArray(x)
  604. case *bitmapContainer:
  605. return newRunContainer16FromBitmapContainer(x)
  606. }
  607. panic("unsupported container type")
  608. }