You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1039 lines
28 KiB

  1. package bolt
  2. import (
  3. "errors"
  4. "fmt"
  5. "hash/fnv"
  6. "log"
  7. "os"
  8. "runtime"
  9. "runtime/debug"
  10. "strings"
  11. "sync"
  12. "time"
  13. "unsafe"
  14. )
  15. // The largest step that can be taken when remapping the mmap.
  16. const maxMmapStep = 1 << 30 // 1GB
  17. // The data file format version.
  18. const version = 2
  19. // Represents a marker value to indicate that a file is a Bolt DB.
  20. const magic uint32 = 0xED0CDAED
  21. // IgnoreNoSync specifies whether the NoSync field of a DB is ignored when
  22. // syncing changes to a file. This is required as some operating systems,
  23. // such as OpenBSD, do not have a unified buffer cache (UBC) and writes
  24. // must be synchronized using the msync(2) syscall.
  25. const IgnoreNoSync = runtime.GOOS == "openbsd"
  26. // Default values if not set in a DB instance.
  27. const (
  28. DefaultMaxBatchSize int = 1000
  29. DefaultMaxBatchDelay = 10 * time.Millisecond
  30. DefaultAllocSize = 16 * 1024 * 1024
  31. )
  32. // default page size for db is set to the OS page size.
  33. var defaultPageSize = os.Getpagesize()
  34. // DB represents a collection of buckets persisted to a file on disk.
  35. // All data access is performed through transactions which can be obtained through the DB.
  36. // All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called.
  37. type DB struct {
  38. // When enabled, the database will perform a Check() after every commit.
  39. // A panic is issued if the database is in an inconsistent state. This
  40. // flag has a large performance impact so it should only be used for
  41. // debugging purposes.
  42. StrictMode bool
  43. // Setting the NoSync flag will cause the database to skip fsync()
  44. // calls after each commit. This can be useful when bulk loading data
  45. // into a database and you can restart the bulk load in the event of
  46. // a system failure or database corruption. Do not set this flag for
  47. // normal use.
  48. //
  49. // If the package global IgnoreNoSync constant is true, this value is
  50. // ignored. See the comment on that constant for more details.
  51. //
  52. // THIS IS UNSAFE. PLEASE USE WITH CAUTION.
  53. NoSync bool
  54. // When true, skips the truncate call when growing the database.
  55. // Setting this to true is only safe on non-ext3/ext4 systems.
  56. // Skipping truncation avoids preallocation of hard drive space and
  57. // bypasses a truncate() and fsync() syscall on remapping.
  58. //
  59. // https://github.com/boltdb/bolt/issues/284
  60. NoGrowSync bool
  61. // If you want to read the entire database fast, you can set MmapFlag to
  62. // syscall.MAP_POPULATE on Linux 2.6.23+ for sequential read-ahead.
  63. MmapFlags int
  64. // MaxBatchSize is the maximum size of a batch. Default value is
  65. // copied from DefaultMaxBatchSize in Open.
  66. //
  67. // If <=0, disables batching.
  68. //
  69. // Do not change concurrently with calls to Batch.
  70. MaxBatchSize int
  71. // MaxBatchDelay is the maximum delay before a batch starts.
  72. // Default value is copied from DefaultMaxBatchDelay in Open.
  73. //
  74. // If <=0, effectively disables batching.
  75. //
  76. // Do not change concurrently with calls to Batch.
  77. MaxBatchDelay time.Duration
  78. // AllocSize is the amount of space allocated when the database
  79. // needs to create new pages. This is done to amortize the cost
  80. // of truncate() and fsync() when growing the data file.
  81. AllocSize int
  82. path string
  83. file *os.File
  84. lockfile *os.File // windows only
  85. dataref []byte // mmap'ed readonly, write throws SEGV
  86. data *[maxMapSize]byte
  87. datasz int
  88. filesz int // current on disk file size
  89. meta0 *meta
  90. meta1 *meta
  91. pageSize int
  92. opened bool
  93. rwtx *Tx
  94. txs []*Tx
  95. freelist *freelist
  96. stats Stats
  97. pagePool sync.Pool
  98. batchMu sync.Mutex
  99. batch *batch
  100. rwlock sync.Mutex // Allows only one writer at a time.
  101. metalock sync.Mutex // Protects meta page access.
  102. mmaplock sync.RWMutex // Protects mmap access during remapping.
  103. statlock sync.RWMutex // Protects stats access.
  104. ops struct {
  105. writeAt func(b []byte, off int64) (n int, err error)
  106. }
  107. // Read only mode.
  108. // When true, Update() and Begin(true) return ErrDatabaseReadOnly immediately.
  109. readOnly bool
  110. }
  111. // Path returns the path to currently open database file.
  112. func (db *DB) Path() string {
  113. return db.path
  114. }
  115. // GoString returns the Go string representation of the database.
  116. func (db *DB) GoString() string {
  117. return fmt.Sprintf("bolt.DB{path:%q}", db.path)
  118. }
  119. // String returns the string representation of the database.
  120. func (db *DB) String() string {
  121. return fmt.Sprintf("DB<%q>", db.path)
  122. }
  123. // Open creates and opens a database at the given path.
  124. // If the file does not exist then it will be created automatically.
  125. // Passing in nil options will cause Bolt to open the database with the default options.
  126. func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
  127. var db = &DB{opened: true}
  128. // Set default options if no options are provided.
  129. if options == nil {
  130. options = DefaultOptions
  131. }
  132. db.NoGrowSync = options.NoGrowSync
  133. db.MmapFlags = options.MmapFlags
  134. // Set default values for later DB operations.
  135. db.MaxBatchSize = DefaultMaxBatchSize
  136. db.MaxBatchDelay = DefaultMaxBatchDelay
  137. db.AllocSize = DefaultAllocSize
  138. flag := os.O_RDWR
  139. if options.ReadOnly {
  140. flag = os.O_RDONLY
  141. db.readOnly = true
  142. }
  143. // Open data file and separate sync handler for metadata writes.
  144. db.path = path
  145. var err error
  146. if db.file, err = os.OpenFile(db.path, flag|os.O_CREATE, mode); err != nil {
  147. _ = db.close()
  148. return nil, err
  149. }
  150. // Lock file so that other processes using Bolt in read-write mode cannot
  151. // use the database at the same time. This would cause corruption since
  152. // the two processes would write meta pages and free pages separately.
  153. // The database file is locked exclusively (only one process can grab the lock)
  154. // if !options.ReadOnly.
  155. // The database file is locked using the shared lock (more than one process may
  156. // hold a lock at the same time) otherwise (options.ReadOnly is set).
  157. if err := flock(db, mode, !db.readOnly, options.Timeout); err != nil {
  158. _ = db.close()
  159. return nil, err
  160. }
  161. // Default values for test hooks
  162. db.ops.writeAt = db.file.WriteAt
  163. // Initialize the database if it doesn't exist.
  164. if info, err := db.file.Stat(); err != nil {
  165. return nil, err
  166. } else if info.Size() == 0 {
  167. // Initialize new files with meta pages.
  168. if err := db.init(); err != nil {
  169. return nil, err
  170. }
  171. } else {
  172. // Read the first meta page to determine the page size.
  173. var buf [0x1000]byte
  174. if _, err := db.file.ReadAt(buf[:], 0); err == nil {
  175. m := db.pageInBuffer(buf[:], 0).meta()
  176. if err := m.validate(); err != nil {
  177. // If we can't read the page size, we can assume it's the same
  178. // as the OS -- since that's how the page size was chosen in the
  179. // first place.
  180. //
  181. // If the first page is invalid and this OS uses a different
  182. // page size than what the database was created with then we
  183. // are out of luck and cannot access the database.
  184. db.pageSize = os.Getpagesize()
  185. } else {
  186. db.pageSize = int(m.pageSize)
  187. }
  188. }
  189. }
  190. // Initialize page pool.
  191. db.pagePool = sync.Pool{
  192. New: func() interface{} {
  193. return make([]byte, db.pageSize)
  194. },
  195. }
  196. // Memory map the data file.
  197. if err := db.mmap(options.InitialMmapSize); err != nil {
  198. _ = db.close()
  199. return nil, err
  200. }
  201. // Read in the freelist.
  202. db.freelist = newFreelist()
  203. db.freelist.read(db.page(db.meta().freelist))
  204. // Mark the database as opened and return.
  205. return db, nil
  206. }
  207. // mmap opens the underlying memory-mapped file and initializes the meta references.
  208. // minsz is the minimum size that the new mmap can be.
  209. func (db *DB) mmap(minsz int) error {
  210. db.mmaplock.Lock()
  211. defer db.mmaplock.Unlock()
  212. info, err := db.file.Stat()
  213. if err != nil {
  214. return fmt.Errorf("mmap stat error: %s", err)
  215. } else if int(info.Size()) < db.pageSize*2 {
  216. return fmt.Errorf("file size too small")
  217. }
  218. // Ensure the size is at least the minimum size.
  219. var size = int(info.Size())
  220. if size < minsz {
  221. size = minsz
  222. }
  223. size, err = db.mmapSize(size)
  224. if err != nil {
  225. return err
  226. }
  227. // Dereference all mmap references before unmapping.
  228. if db.rwtx != nil {
  229. db.rwtx.root.dereference()
  230. }
  231. // Unmap existing data before continuing.
  232. if err := db.munmap(); err != nil {
  233. return err
  234. }
  235. // Memory-map the data file as a byte slice.
  236. if err := mmap(db, size); err != nil {
  237. return err
  238. }
  239. // Save references to the meta pages.
  240. db.meta0 = db.page(0).meta()
  241. db.meta1 = db.page(1).meta()
  242. // Validate the meta pages. We only return an error if both meta pages fail
  243. // validation, since meta0 failing validation means that it wasn't saved
  244. // properly -- but we can recover using meta1. And vice-versa.
  245. err0 := db.meta0.validate()
  246. err1 := db.meta1.validate()
  247. if err0 != nil && err1 != nil {
  248. return err0
  249. }
  250. return nil
  251. }
  252. // munmap unmaps the data file from memory.
  253. func (db *DB) munmap() error {
  254. if err := munmap(db); err != nil {
  255. return fmt.Errorf("unmap error: " + err.Error())
  256. }
  257. return nil
  258. }
  259. // mmapSize determines the appropriate size for the mmap given the current size
  260. // of the database. The minimum size is 32KB and doubles until it reaches 1GB.
  261. // Returns an error if the new mmap size is greater than the max allowed.
  262. func (db *DB) mmapSize(size int) (int, error) {
  263. // Double the size from 32KB until 1GB.
  264. for i := uint(15); i <= 30; i++ {
  265. if size <= 1<<i {
  266. return 1 << i, nil
  267. }
  268. }
  269. // Verify the requested size is not above the maximum allowed.
  270. if size > maxMapSize {
  271. return 0, fmt.Errorf("mmap too large")
  272. }
  273. // If larger than 1GB then grow by 1GB at a time.
  274. sz := int64(size)
  275. if remainder := sz % int64(maxMmapStep); remainder > 0 {
  276. sz += int64(maxMmapStep) - remainder
  277. }
  278. // Ensure that the mmap size is a multiple of the page size.
  279. // This should always be true since we're incrementing in MBs.
  280. pageSize := int64(db.pageSize)
  281. if (sz % pageSize) != 0 {
  282. sz = ((sz / pageSize) + 1) * pageSize
  283. }
  284. // If we've exceeded the max size then only grow up to the max size.
  285. if sz > maxMapSize {
  286. sz = maxMapSize
  287. }
  288. return int(sz), nil
  289. }
  290. // init creates a new database file and initializes its meta pages.
  291. func (db *DB) init() error {
  292. // Set the page size to the OS page size.
  293. db.pageSize = os.Getpagesize()
  294. // Create two meta pages on a buffer.
  295. buf := make([]byte, db.pageSize*4)
  296. for i := 0; i < 2; i++ {
  297. p := db.pageInBuffer(buf[:], pgid(i))
  298. p.id = pgid(i)
  299. p.flags = metaPageFlag
  300. // Initialize the meta page.
  301. m := p.meta()
  302. m.magic = magic
  303. m.version = version
  304. m.pageSize = uint32(db.pageSize)
  305. m.freelist = 2
  306. m.root = bucket{root: 3}
  307. m.pgid = 4
  308. m.txid = txid(i)
  309. m.checksum = m.sum64()
  310. }
  311. // Write an empty freelist at page 3.
  312. p := db.pageInBuffer(buf[:], pgid(2))
  313. p.id = pgid(2)
  314. p.flags = freelistPageFlag
  315. p.count = 0
  316. // Write an empty leaf page at page 4.
  317. p = db.pageInBuffer(buf[:], pgid(3))
  318. p.id = pgid(3)
  319. p.flags = leafPageFlag
  320. p.count = 0
  321. // Write the buffer to our data file.
  322. if _, err := db.ops.writeAt(buf, 0); err != nil {
  323. return err
  324. }
  325. if err := fdatasync(db); err != nil {
  326. return err
  327. }
  328. return nil
  329. }
  330. // Close releases all database resources.
  331. // All transactions must be closed before closing the database.
  332. func (db *DB) Close() error {
  333. db.rwlock.Lock()
  334. defer db.rwlock.Unlock()
  335. db.metalock.Lock()
  336. defer db.metalock.Unlock()
  337. db.mmaplock.RLock()
  338. defer db.mmaplock.RUnlock()
  339. return db.close()
  340. }
  341. func (db *DB) close() error {
  342. if !db.opened {
  343. return nil
  344. }
  345. db.opened = false
  346. db.freelist = nil
  347. // Clear ops.
  348. db.ops.writeAt = nil
  349. // Close the mmap.
  350. if err := db.munmap(); err != nil {
  351. return err
  352. }
  353. // Close file handles.
  354. if db.file != nil {
  355. // No need to unlock read-only file.
  356. if !db.readOnly {
  357. // Unlock the file.
  358. if err := funlock(db); err != nil {
  359. log.Printf("bolt.Close(): funlock error: %s", err)
  360. }
  361. }
  362. // Close the file descriptor.
  363. if err := db.file.Close(); err != nil {
  364. return fmt.Errorf("db file close: %s", err)
  365. }
  366. db.file = nil
  367. }
  368. db.path = ""
  369. return nil
  370. }
  371. // Begin starts a new transaction.
  372. // Multiple read-only transactions can be used concurrently but only one
  373. // write transaction can be used at a time. Starting multiple write transactions
  374. // will cause the calls to block and be serialized until the current write
  375. // transaction finishes.
  376. //
  377. // Transactions should not be dependent on one another. Opening a read
  378. // transaction and a write transaction in the same goroutine can cause the
  379. // writer to deadlock because the database periodically needs to re-mmap itself
  380. // as it grows and it cannot do that while a read transaction is open.
  381. //
  382. // If a long running read transaction (for example, a snapshot transaction) is
  383. // needed, you might want to set DB.InitialMmapSize to a large enough value
  384. // to avoid potential blocking of write transaction.
  385. //
  386. // IMPORTANT: You must close read-only transactions after you are finished or
  387. // else the database will not reclaim old pages.
  388. func (db *DB) Begin(writable bool) (*Tx, error) {
  389. if writable {
  390. return db.beginRWTx()
  391. }
  392. return db.beginTx()
  393. }
  394. func (db *DB) beginTx() (*Tx, error) {
  395. // Lock the meta pages while we initialize the transaction. We obtain
  396. // the meta lock before the mmap lock because that's the order that the
  397. // write transaction will obtain them.
  398. db.metalock.Lock()
  399. // Obtain a read-only lock on the mmap. When the mmap is remapped it will
  400. // obtain a write lock so all transactions must finish before it can be
  401. // remapped.
  402. db.mmaplock.RLock()
  403. // Exit if the database is not open yet.
  404. if !db.opened {
  405. db.mmaplock.RUnlock()
  406. db.metalock.Unlock()
  407. return nil, ErrDatabaseNotOpen
  408. }
  409. // Create a transaction associated with the database.
  410. t := &Tx{}
  411. t.init(db)
  412. // Keep track of transaction until it closes.
  413. db.txs = append(db.txs, t)
  414. n := len(db.txs)
  415. // Unlock the meta pages.
  416. db.metalock.Unlock()
  417. // Update the transaction stats.
  418. db.statlock.Lock()
  419. db.stats.TxN++
  420. db.stats.OpenTxN = n
  421. db.statlock.Unlock()
  422. return t, nil
  423. }
  424. func (db *DB) beginRWTx() (*Tx, error) {
  425. // If the database was opened with Options.ReadOnly, return an error.
  426. if db.readOnly {
  427. return nil, ErrDatabaseReadOnly
  428. }
  429. // Obtain writer lock. This is released by the transaction when it closes.
  430. // This enforces only one writer transaction at a time.
  431. db.rwlock.Lock()
  432. // Once we have the writer lock then we can lock the meta pages so that
  433. // we can set up the transaction.
  434. db.metalock.Lock()
  435. defer db.metalock.Unlock()
  436. // Exit if the database is not open yet.
  437. if !db.opened {
  438. db.rwlock.Unlock()
  439. return nil, ErrDatabaseNotOpen
  440. }
  441. // Create a transaction associated with the database.
  442. t := &Tx{writable: true}
  443. t.init(db)
  444. db.rwtx = t
  445. // Free any pages associated with closed read-only transactions.
  446. var minid txid = 0xFFFFFFFFFFFFFFFF
  447. for _, t := range db.txs {
  448. if t.meta.txid < minid {
  449. minid = t.meta.txid
  450. }
  451. }
  452. if minid > 0 {
  453. db.freelist.release(minid - 1)
  454. }
  455. return t, nil
  456. }
  457. // removeTx removes a transaction from the database.
  458. func (db *DB) removeTx(tx *Tx) {
  459. // Release the read lock on the mmap.
  460. db.mmaplock.RUnlock()
  461. // Use the meta lock to restrict access to the DB object.
  462. db.metalock.Lock()
  463. // Remove the transaction.
  464. for i, t := range db.txs {
  465. if t == tx {
  466. last := len(db.txs) - 1
  467. db.txs[i] = db.txs[last]
  468. db.txs[last] = nil
  469. db.txs = db.txs[:last]
  470. break
  471. }
  472. }
  473. n := len(db.txs)
  474. // Unlock the meta pages.
  475. db.metalock.Unlock()
  476. // Merge statistics.
  477. db.statlock.Lock()
  478. db.stats.OpenTxN = n
  479. db.stats.TxStats.add(&tx.stats)
  480. db.statlock.Unlock()
  481. }
  482. // Update executes a function within the context of a read-write managed transaction.
  483. // If no error is returned from the function then the transaction is committed.
  484. // If an error is returned then the entire transaction is rolled back.
  485. // Any error that is returned from the function or returned from the commit is
  486. // returned from the Update() method.
  487. //
  488. // Attempting to manually commit or rollback within the function will cause a panic.
  489. func (db *DB) Update(fn func(*Tx) error) error {
  490. t, err := db.Begin(true)
  491. if err != nil {
  492. return err
  493. }
  494. // Make sure the transaction rolls back in the event of a panic.
  495. defer func() {
  496. if t.db != nil {
  497. t.rollback()
  498. }
  499. }()
  500. // Mark as a managed tx so that the inner function cannot manually commit.
  501. t.managed = true
  502. // If an error is returned from the function then rollback and return error.
  503. err = fn(t)
  504. t.managed = false
  505. if err != nil {
  506. _ = t.Rollback()
  507. return err
  508. }
  509. return t.Commit()
  510. }
  511. // View executes a function within the context of a managed read-only transaction.
  512. // Any error that is returned from the function is returned from the View() method.
  513. //
  514. // Attempting to manually rollback within the function will cause a panic.
  515. func (db *DB) View(fn func(*Tx) error) error {
  516. t, err := db.Begin(false)
  517. if err != nil {
  518. return err
  519. }
  520. // Make sure the transaction rolls back in the event of a panic.
  521. defer func() {
  522. if t.db != nil {
  523. t.rollback()
  524. }
  525. }()
  526. // Mark as a managed tx so that the inner function cannot manually rollback.
  527. t.managed = true
  528. // If an error is returned from the function then pass it through.
  529. err = fn(t)
  530. t.managed = false
  531. if err != nil {
  532. _ = t.Rollback()
  533. return err
  534. }
  535. if err := t.Rollback(); err != nil {
  536. return err
  537. }
  538. return nil
  539. }
  540. // Batch calls fn as part of a batch. It behaves similar to Update,
  541. // except:
  542. //
  543. // 1. concurrent Batch calls can be combined into a single Bolt
  544. // transaction.
  545. //
  546. // 2. the function passed to Batch may be called multiple times,
  547. // regardless of whether it returns error or not.
  548. //
  549. // This means that Batch function side effects must be idempotent and
  550. // take permanent effect only after a successful return is seen in
  551. // caller.
  552. //
  553. // The maximum batch size and delay can be adjusted with DB.MaxBatchSize
  554. // and DB.MaxBatchDelay, respectively.
  555. //
  556. // Batch is only useful when there are multiple goroutines calling it.
  557. func (db *DB) Batch(fn func(*Tx) error) error {
  558. errCh := make(chan error, 1)
  559. db.batchMu.Lock()
  560. if (db.batch == nil) || (db.batch != nil && len(db.batch.calls) >= db.MaxBatchSize) {
  561. // There is no existing batch, or the existing batch is full; start a new one.
  562. db.batch = &batch{
  563. db: db,
  564. }
  565. db.batch.timer = time.AfterFunc(db.MaxBatchDelay, db.batch.trigger)
  566. }
  567. db.batch.calls = append(db.batch.calls, call{fn: fn, err: errCh})
  568. if len(db.batch.calls) >= db.MaxBatchSize {
  569. // wake up batch, it's ready to run
  570. go db.batch.trigger()
  571. }
  572. db.batchMu.Unlock()
  573. err := <-errCh
  574. if err == trySolo {
  575. err = db.Update(fn)
  576. }
  577. return err
  578. }
  579. type call struct {
  580. fn func(*Tx) error
  581. err chan<- error
  582. }
  583. type batch struct {
  584. db *DB
  585. timer *time.Timer
  586. start sync.Once
  587. calls []call
  588. }
  589. // trigger runs the batch if it hasn't already been run.
  590. func (b *batch) trigger() {
  591. b.start.Do(b.run)
  592. }
  593. // run performs the transactions in the batch and communicates results
  594. // back to DB.Batch.
  595. func (b *batch) run() {
  596. b.db.batchMu.Lock()
  597. b.timer.Stop()
  598. // Make sure no new work is added to this batch, but don't break
  599. // other batches.
  600. if b.db.batch == b {
  601. b.db.batch = nil
  602. }
  603. b.db.batchMu.Unlock()
  604. retry:
  605. for len(b.calls) > 0 {
  606. var failIdx = -1
  607. err := b.db.Update(func(tx *Tx) error {
  608. for i, c := range b.calls {
  609. if err := safelyCall(c.fn, tx); err != nil {
  610. failIdx = i
  611. return err
  612. }
  613. }
  614. return nil
  615. })
  616. if failIdx >= 0 {
  617. // take the failing transaction out of the batch. it's
  618. // safe to shorten b.calls here because db.batch no longer
  619. // points to us, and we hold the mutex anyway.
  620. c := b.calls[failIdx]
  621. b.calls[failIdx], b.calls = b.calls[len(b.calls)-1], b.calls[:len(b.calls)-1]
  622. // tell the submitter re-run it solo, continue with the rest of the batch
  623. c.err <- trySolo
  624. continue retry
  625. }
  626. // pass success, or bolt internal errors, to all callers
  627. for _, c := range b.calls {
  628. if c.err != nil {
  629. c.err <- err
  630. }
  631. }
  632. break retry
  633. }
  634. }
  635. // trySolo is a special sentinel error value used for signaling that a
  636. // transaction function should be re-run. It should never be seen by
  637. // callers.
  638. var trySolo = errors.New("batch function returned an error and should be re-run solo")
  639. type panicked struct {
  640. reason interface{}
  641. }
  642. func (p panicked) Error() string {
  643. if err, ok := p.reason.(error); ok {
  644. return err.Error()
  645. }
  646. return fmt.Sprintf("panic: %v", p.reason)
  647. }
  648. func safelyCall(fn func(*Tx) error, tx *Tx) (err error) {
  649. defer func() {
  650. if p := recover(); p != nil {
  651. err = panicked{p}
  652. }
  653. }()
  654. return fn(tx)
  655. }
  656. // Sync executes fdatasync() against the database file handle.
  657. //
  658. // This is not necessary under normal operation, however, if you use NoSync
  659. // then it allows you to force the database file to sync against the disk.
  660. func (db *DB) Sync() error { return fdatasync(db) }
  661. // Stats retrieves ongoing performance stats for the database.
  662. // This is only updated when a transaction closes.
  663. func (db *DB) Stats() Stats {
  664. db.statlock.RLock()
  665. defer db.statlock.RUnlock()
  666. return db.stats
  667. }
  668. // This is for internal access to the raw data bytes from the C cursor, use
  669. // carefully, or not at all.
  670. func (db *DB) Info() *Info {
  671. return &Info{uintptr(unsafe.Pointer(&db.data[0])), db.pageSize}
  672. }
  673. // page retrieves a page reference from the mmap based on the current page size.
  674. func (db *DB) page(id pgid) *page {
  675. pos := id * pgid(db.pageSize)
  676. return (*page)(unsafe.Pointer(&db.data[pos]))
  677. }
  678. // pageInBuffer retrieves a page reference from a given byte array based on the current page size.
  679. func (db *DB) pageInBuffer(b []byte, id pgid) *page {
  680. return (*page)(unsafe.Pointer(&b[id*pgid(db.pageSize)]))
  681. }
  682. // meta retrieves the current meta page reference.
  683. func (db *DB) meta() *meta {
  684. // We have to return the meta with the highest txid which doesn't fail
  685. // validation. Otherwise, we can cause errors when in fact the database is
  686. // in a consistent state. metaA is the one with the higher txid.
  687. metaA := db.meta0
  688. metaB := db.meta1
  689. if db.meta1.txid > db.meta0.txid {
  690. metaA = db.meta1
  691. metaB = db.meta0
  692. }
  693. // Use higher meta page if valid. Otherwise fallback to previous, if valid.
  694. if err := metaA.validate(); err == nil {
  695. return metaA
  696. } else if err := metaB.validate(); err == nil {
  697. return metaB
  698. }
  699. // This should never be reached, because both meta1 and meta0 were validated
  700. // on mmap() and we do fsync() on every write.
  701. panic("bolt.DB.meta(): invalid meta pages")
  702. }
  703. // allocate returns a contiguous block of memory starting at a given page.
  704. func (db *DB) allocate(count int) (*page, error) {
  705. // Allocate a temporary buffer for the page.
  706. var buf []byte
  707. if count == 1 {
  708. buf = db.pagePool.Get().([]byte)
  709. } else {
  710. buf = make([]byte, count*db.pageSize)
  711. }
  712. p := (*page)(unsafe.Pointer(&buf[0]))
  713. p.overflow = uint32(count - 1)
  714. // Use pages from the freelist if they are available.
  715. if p.id = db.freelist.allocate(count); p.id != 0 {
  716. return p, nil
  717. }
  718. // Resize mmap() if we're at the end.
  719. p.id = db.rwtx.meta.pgid
  720. var minsz = int((p.id+pgid(count))+1) * db.pageSize
  721. if minsz >= db.datasz {
  722. if err := db.mmap(minsz); err != nil {
  723. return nil, fmt.Errorf("mmap allocate error: %s", err)
  724. }
  725. }
  726. // Move the page id high water mark.
  727. db.rwtx.meta.pgid += pgid(count)
  728. return p, nil
  729. }
  730. // grow grows the size of the database to the given sz.
  731. func (db *DB) grow(sz int) error {
  732. // Ignore if the new size is less than available file size.
  733. if sz <= db.filesz {
  734. return nil
  735. }
  736. // If the data is smaller than the alloc size then only allocate what's needed.
  737. // Once it goes over the allocation size then allocate in chunks.
  738. if db.datasz < db.AllocSize {
  739. sz = db.datasz
  740. } else {
  741. sz += db.AllocSize
  742. }
  743. // Truncate and fsync to ensure file size metadata is flushed.
  744. // https://github.com/boltdb/bolt/issues/284
  745. if !db.NoGrowSync && !db.readOnly {
  746. if runtime.GOOS != "windows" {
  747. if err := db.file.Truncate(int64(sz)); err != nil {
  748. return fmt.Errorf("file resize error: %s", err)
  749. }
  750. }
  751. if err := db.file.Sync(); err != nil {
  752. return fmt.Errorf("file sync error: %s", err)
  753. }
  754. }
  755. db.filesz = sz
  756. return nil
  757. }
  758. func (db *DB) IsReadOnly() bool {
  759. return db.readOnly
  760. }
  761. // Options represents the options that can be set when opening a database.
  762. type Options struct {
  763. // Timeout is the amount of time to wait to obtain a file lock.
  764. // When set to zero it will wait indefinitely. This option is only
  765. // available on Darwin and Linux.
  766. Timeout time.Duration
  767. // Sets the DB.NoGrowSync flag before memory mapping the file.
  768. NoGrowSync bool
  769. // Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
  770. // grab a shared lock (UNIX).
  771. ReadOnly bool
  772. // Sets the DB.MmapFlags flag before memory mapping the file.
  773. MmapFlags int
  774. // InitialMmapSize is the initial mmap size of the database
  775. // in bytes. Read transactions won't block write transaction
  776. // if the InitialMmapSize is large enough to hold database mmap
  777. // size. (See DB.Begin for more information)
  778. //
  779. // If <=0, the initial map size is 0.
  780. // If initialMmapSize is smaller than the previous database size,
  781. // it takes no effect.
  782. InitialMmapSize int
  783. }
  784. // DefaultOptions represent the options used if nil options are passed into Open().
  785. // No timeout is used which will cause Bolt to wait indefinitely for a lock.
  786. var DefaultOptions = &Options{
  787. Timeout: 0,
  788. NoGrowSync: false,
  789. }
  790. // Stats represents statistics about the database.
  791. type Stats struct {
  792. // Freelist stats
  793. FreePageN int // total number of free pages on the freelist
  794. PendingPageN int // total number of pending pages on the freelist
  795. FreeAlloc int // total bytes allocated in free pages
  796. FreelistInuse int // total bytes used by the freelist
  797. // Transaction stats
  798. TxN int // total number of started read transactions
  799. OpenTxN int // number of currently open read transactions
  800. TxStats TxStats // global, ongoing stats.
  801. }
  802. // Sub calculates and returns the difference between two sets of database stats.
  803. // This is useful when obtaining stats at two different points and time and
  804. // you need the performance counters that occurred within that time span.
  805. func (s *Stats) Sub(other *Stats) Stats {
  806. if other == nil {
  807. return *s
  808. }
  809. var diff Stats
  810. diff.FreePageN = s.FreePageN
  811. diff.PendingPageN = s.PendingPageN
  812. diff.FreeAlloc = s.FreeAlloc
  813. diff.FreelistInuse = s.FreelistInuse
  814. diff.TxN = s.TxN - other.TxN
  815. diff.TxStats = s.TxStats.Sub(&other.TxStats)
  816. return diff
  817. }
  818. func (s *Stats) add(other *Stats) {
  819. s.TxStats.add(&other.TxStats)
  820. }
  821. type Info struct {
  822. Data uintptr
  823. PageSize int
  824. }
  825. type meta struct {
  826. magic uint32
  827. version uint32
  828. pageSize uint32
  829. flags uint32
  830. root bucket
  831. freelist pgid
  832. pgid pgid
  833. txid txid
  834. checksum uint64
  835. }
  836. // validate checks the marker bytes and version of the meta page to ensure it matches this binary.
  837. func (m *meta) validate() error {
  838. if m.magic != magic {
  839. return ErrInvalid
  840. } else if m.version != version {
  841. return ErrVersionMismatch
  842. } else if m.checksum != 0 && m.checksum != m.sum64() {
  843. return ErrChecksum
  844. }
  845. return nil
  846. }
  847. // copy copies one meta object to another.
  848. func (m *meta) copy(dest *meta) {
  849. *dest = *m
  850. }
  851. // write writes the meta onto a page.
  852. func (m *meta) write(p *page) {
  853. if m.root.root >= m.pgid {
  854. panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid))
  855. } else if m.freelist >= m.pgid {
  856. panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid))
  857. }
  858. // Page id is either going to be 0 or 1 which we can determine by the transaction ID.
  859. p.id = pgid(m.txid % 2)
  860. p.flags |= metaPageFlag
  861. // Calculate the checksum.
  862. m.checksum = m.sum64()
  863. m.copy(p.meta())
  864. }
  865. // generates the checksum for the meta.
  866. func (m *meta) sum64() uint64 {
  867. var h = fnv.New64a()
  868. _, _ = h.Write((*[unsafe.Offsetof(meta{}.checksum)]byte)(unsafe.Pointer(m))[:])
  869. return h.Sum64()
  870. }
  871. // _assert will panic with a given formatted message if the given condition is false.
  872. func _assert(condition bool, msg string, v ...interface{}) {
  873. if !condition {
  874. panic(fmt.Sprintf("assertion failed: "+msg, v...))
  875. }
  876. }
  877. func warn(v ...interface{}) { fmt.Fprintln(os.Stderr, v...) }
  878. func warnf(msg string, v ...interface{}) { fmt.Fprintf(os.Stderr, msg+"\n", v...) }
  879. func printstack() {
  880. stack := strings.Join(strings.Split(string(debug.Stack()), "\n")[2:], "\n")
  881. fmt.Fprintln(os.Stderr, stack)
  882. }