You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

729 lines
16 KiB

  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package bleve
  15. import (
  16. "encoding/json"
  17. "fmt"
  18. "os"
  19. "sync"
  20. "sync/atomic"
  21. "time"
  22. "golang.org/x/net/context"
  23. "github.com/blevesearch/bleve/document"
  24. "github.com/blevesearch/bleve/index"
  25. "github.com/blevesearch/bleve/index/store"
  26. "github.com/blevesearch/bleve/index/upsidedown"
  27. "github.com/blevesearch/bleve/mapping"
  28. "github.com/blevesearch/bleve/registry"
  29. "github.com/blevesearch/bleve/search"
  30. "github.com/blevesearch/bleve/search/collector"
  31. "github.com/blevesearch/bleve/search/facet"
  32. "github.com/blevesearch/bleve/search/highlight"
  33. )
  34. type indexImpl struct {
  35. path string
  36. name string
  37. meta *indexMeta
  38. i index.Index
  39. m mapping.IndexMapping
  40. mutex sync.RWMutex
  41. open bool
  42. stats *IndexStat
  43. }
  44. const storePath = "store"
  45. var mappingInternalKey = []byte("_mapping")
  46. func indexStorePath(path string) string {
  47. return path + string(os.PathSeparator) + storePath
  48. }
  49. func newIndexUsing(path string, mapping mapping.IndexMapping, indexType string, kvstore string, kvconfig map[string]interface{}) (*indexImpl, error) {
  50. // first validate the mapping
  51. err := mapping.Validate()
  52. if err != nil {
  53. return nil, err
  54. }
  55. if kvconfig == nil {
  56. kvconfig = map[string]interface{}{}
  57. }
  58. if kvstore == "" {
  59. return nil, fmt.Errorf("bleve not configured for file based indexing")
  60. }
  61. rv := indexImpl{
  62. path: path,
  63. name: path,
  64. m: mapping,
  65. meta: newIndexMeta(indexType, kvstore, kvconfig),
  66. }
  67. rv.stats = &IndexStat{i: &rv}
  68. // at this point there is hope that we can be successful, so save index meta
  69. if path != "" {
  70. err = rv.meta.Save(path)
  71. if err != nil {
  72. return nil, err
  73. }
  74. kvconfig["create_if_missing"] = true
  75. kvconfig["error_if_exists"] = true
  76. kvconfig["path"] = indexStorePath(path)
  77. } else {
  78. kvconfig["path"] = ""
  79. }
  80. // open the index
  81. indexTypeConstructor := registry.IndexTypeConstructorByName(rv.meta.IndexType)
  82. if indexTypeConstructor == nil {
  83. return nil, ErrorUnknownIndexType
  84. }
  85. rv.i, err = indexTypeConstructor(rv.meta.Storage, kvconfig, Config.analysisQueue)
  86. if err != nil {
  87. return nil, err
  88. }
  89. err = rv.i.Open()
  90. if err != nil {
  91. if err == index.ErrorUnknownStorageType {
  92. return nil, ErrorUnknownStorageType
  93. }
  94. return nil, err
  95. }
  96. // now persist the mapping
  97. mappingBytes, err := json.Marshal(mapping)
  98. if err != nil {
  99. return nil, err
  100. }
  101. err = rv.i.SetInternal(mappingInternalKey, mappingBytes)
  102. if err != nil {
  103. return nil, err
  104. }
  105. // mark the index as open
  106. rv.mutex.Lock()
  107. defer rv.mutex.Unlock()
  108. rv.open = true
  109. indexStats.Register(&rv)
  110. return &rv, nil
  111. }
  112. func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *indexImpl, err error) {
  113. rv = &indexImpl{
  114. path: path,
  115. name: path,
  116. }
  117. rv.stats = &IndexStat{i: rv}
  118. rv.meta, err = openIndexMeta(path)
  119. if err != nil {
  120. return nil, err
  121. }
  122. // backwards compatibility if index type is missing
  123. if rv.meta.IndexType == "" {
  124. rv.meta.IndexType = upsidedown.Name
  125. }
  126. storeConfig := rv.meta.Config
  127. if storeConfig == nil {
  128. storeConfig = map[string]interface{}{}
  129. }
  130. storeConfig["path"] = indexStorePath(path)
  131. storeConfig["create_if_missing"] = false
  132. storeConfig["error_if_exists"] = false
  133. for rck, rcv := range runtimeConfig {
  134. storeConfig[rck] = rcv
  135. }
  136. // open the index
  137. indexTypeConstructor := registry.IndexTypeConstructorByName(rv.meta.IndexType)
  138. if indexTypeConstructor == nil {
  139. return nil, ErrorUnknownIndexType
  140. }
  141. rv.i, err = indexTypeConstructor(rv.meta.Storage, storeConfig, Config.analysisQueue)
  142. if err != nil {
  143. return nil, err
  144. }
  145. err = rv.i.Open()
  146. if err != nil {
  147. if err == index.ErrorUnknownStorageType {
  148. return nil, ErrorUnknownStorageType
  149. }
  150. return nil, err
  151. }
  152. // now load the mapping
  153. indexReader, err := rv.i.Reader()
  154. if err != nil {
  155. return nil, err
  156. }
  157. defer func() {
  158. if cerr := indexReader.Close(); cerr != nil && err == nil {
  159. err = cerr
  160. }
  161. }()
  162. mappingBytes, err := indexReader.GetInternal(mappingInternalKey)
  163. if err != nil {
  164. return nil, err
  165. }
  166. var im *mapping.IndexMappingImpl
  167. err = json.Unmarshal(mappingBytes, &im)
  168. if err != nil {
  169. return nil, fmt.Errorf("error parsing mapping JSON: %v\nmapping contents:\n%s", err, string(mappingBytes))
  170. }
  171. // mark the index as open
  172. rv.mutex.Lock()
  173. defer rv.mutex.Unlock()
  174. rv.open = true
  175. // validate the mapping
  176. err = im.Validate()
  177. if err != nil {
  178. // note even if the mapping is invalid
  179. // we still return an open usable index
  180. return rv, err
  181. }
  182. rv.m = im
  183. indexStats.Register(rv)
  184. return rv, err
  185. }
  186. // Advanced returns implementation internals
  187. // necessary ONLY for advanced usage.
  188. func (i *indexImpl) Advanced() (index.Index, store.KVStore, error) {
  189. s, err := i.i.Advanced()
  190. if err != nil {
  191. return nil, nil, err
  192. }
  193. return i.i, s, nil
  194. }
  195. // Mapping returns the IndexMapping in use by this
  196. // Index.
  197. func (i *indexImpl) Mapping() mapping.IndexMapping {
  198. return i.m
  199. }
  200. // Index the object with the specified identifier.
  201. // The IndexMapping for this index will determine
  202. // how the object is indexed.
  203. func (i *indexImpl) Index(id string, data interface{}) (err error) {
  204. if id == "" {
  205. return ErrorEmptyID
  206. }
  207. i.mutex.RLock()
  208. defer i.mutex.RUnlock()
  209. if !i.open {
  210. return ErrorIndexClosed
  211. }
  212. doc := document.NewDocument(id)
  213. err = i.m.MapDocument(doc, data)
  214. if err != nil {
  215. return
  216. }
  217. err = i.i.Update(doc)
  218. return
  219. }
  220. // Delete entries for the specified identifier from
  221. // the index.
  222. func (i *indexImpl) Delete(id string) (err error) {
  223. if id == "" {
  224. return ErrorEmptyID
  225. }
  226. i.mutex.RLock()
  227. defer i.mutex.RUnlock()
  228. if !i.open {
  229. return ErrorIndexClosed
  230. }
  231. err = i.i.Delete(id)
  232. return
  233. }
  234. // Batch executes multiple Index and Delete
  235. // operations at the same time. There are often
  236. // significant performance benefits when performing
  237. // operations in a batch.
  238. func (i *indexImpl) Batch(b *Batch) error {
  239. i.mutex.RLock()
  240. defer i.mutex.RUnlock()
  241. if !i.open {
  242. return ErrorIndexClosed
  243. }
  244. return i.i.Batch(b.internal)
  245. }
  246. // Document is used to find the values of all the
  247. // stored fields for a document in the index. These
  248. // stored fields are put back into a Document object
  249. // and returned.
  250. func (i *indexImpl) Document(id string) (doc *document.Document, err error) {
  251. i.mutex.RLock()
  252. defer i.mutex.RUnlock()
  253. if !i.open {
  254. return nil, ErrorIndexClosed
  255. }
  256. indexReader, err := i.i.Reader()
  257. if err != nil {
  258. return nil, err
  259. }
  260. defer func() {
  261. if cerr := indexReader.Close(); err == nil && cerr != nil {
  262. err = cerr
  263. }
  264. }()
  265. doc, err = indexReader.Document(id)
  266. if err != nil {
  267. return nil, err
  268. }
  269. return doc, nil
  270. }
  271. // DocCount returns the number of documents in the
  272. // index.
  273. func (i *indexImpl) DocCount() (count uint64, err error) {
  274. i.mutex.RLock()
  275. defer i.mutex.RUnlock()
  276. if !i.open {
  277. return 0, ErrorIndexClosed
  278. }
  279. // open a reader for this search
  280. indexReader, err := i.i.Reader()
  281. if err != nil {
  282. return 0, fmt.Errorf("error opening index reader %v", err)
  283. }
  284. defer func() {
  285. if cerr := indexReader.Close(); err == nil && cerr != nil {
  286. err = cerr
  287. }
  288. }()
  289. count, err = indexReader.DocCount()
  290. return
  291. }
  292. // Search executes a search request operation.
  293. // Returns a SearchResult object or an error.
  294. func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) {
  295. return i.SearchInContext(context.Background(), req)
  296. }
  297. // SearchInContext executes a search request operation within the provided
  298. // Context. Returns a SearchResult object or an error.
  299. func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr *SearchResult, err error) {
  300. i.mutex.RLock()
  301. defer i.mutex.RUnlock()
  302. searchStart := time.Now()
  303. if !i.open {
  304. return nil, ErrorIndexClosed
  305. }
  306. collector := collector.NewTopNCollector(req.Size, req.From, req.Sort)
  307. // open a reader for this search
  308. indexReader, err := i.i.Reader()
  309. if err != nil {
  310. return nil, fmt.Errorf("error opening index reader %v", err)
  311. }
  312. defer func() {
  313. if cerr := indexReader.Close(); err == nil && cerr != nil {
  314. err = cerr
  315. }
  316. }()
  317. searcher, err := req.Query.Searcher(indexReader, i.m, req.Explain)
  318. if err != nil {
  319. return nil, err
  320. }
  321. defer func() {
  322. if serr := searcher.Close(); err == nil && serr != nil {
  323. err = serr
  324. }
  325. }()
  326. if req.Facets != nil {
  327. facetsBuilder := search.NewFacetsBuilder(indexReader)
  328. for facetName, facetRequest := range req.Facets {
  329. if facetRequest.NumericRanges != nil {
  330. // build numeric range facet
  331. facetBuilder := facet.NewNumericFacetBuilder(facetRequest.Field, facetRequest.Size)
  332. for _, nr := range facetRequest.NumericRanges {
  333. facetBuilder.AddRange(nr.Name, nr.Min, nr.Max)
  334. }
  335. facetsBuilder.Add(facetName, facetBuilder)
  336. } else if facetRequest.DateTimeRanges != nil {
  337. // build date range facet
  338. facetBuilder := facet.NewDateTimeFacetBuilder(facetRequest.Field, facetRequest.Size)
  339. dateTimeParser := i.m.DateTimeParserNamed("")
  340. for _, dr := range facetRequest.DateTimeRanges {
  341. start, end := dr.ParseDates(dateTimeParser)
  342. facetBuilder.AddRange(dr.Name, start, end)
  343. }
  344. facetsBuilder.Add(facetName, facetBuilder)
  345. } else {
  346. // build terms facet
  347. facetBuilder := facet.NewTermsFacetBuilder(facetRequest.Field, facetRequest.Size)
  348. facetsBuilder.Add(facetName, facetBuilder)
  349. }
  350. }
  351. collector.SetFacetsBuilder(facetsBuilder)
  352. }
  353. err = collector.Collect(ctx, searcher, indexReader)
  354. if err != nil {
  355. return nil, err
  356. }
  357. hits := collector.Results()
  358. var highlighter highlight.Highlighter
  359. if req.Highlight != nil {
  360. // get the right highlighter
  361. highlighter, err = Config.Cache.HighlighterNamed(Config.DefaultHighlighter)
  362. if err != nil {
  363. return nil, err
  364. }
  365. if req.Highlight.Style != nil {
  366. highlighter, err = Config.Cache.HighlighterNamed(*req.Highlight.Style)
  367. if err != nil {
  368. return nil, err
  369. }
  370. }
  371. if highlighter == nil {
  372. return nil, fmt.Errorf("no highlighter named `%s` registered", *req.Highlight.Style)
  373. }
  374. }
  375. for _, hit := range hits {
  376. if len(req.Fields) > 0 || highlighter != nil {
  377. doc, err := indexReader.Document(hit.ID)
  378. if err == nil && doc != nil {
  379. if len(req.Fields) > 0 {
  380. for _, f := range req.Fields {
  381. for _, docF := range doc.Fields {
  382. if f == "*" || docF.Name() == f {
  383. var value interface{}
  384. switch docF := docF.(type) {
  385. case *document.TextField:
  386. value = string(docF.Value())
  387. case *document.NumericField:
  388. num, err := docF.Number()
  389. if err == nil {
  390. value = num
  391. }
  392. case *document.DateTimeField:
  393. datetime, err := docF.DateTime()
  394. if err == nil {
  395. value = datetime.Format(time.RFC3339)
  396. }
  397. case *document.BooleanField:
  398. boolean, err := docF.Boolean()
  399. if err == nil {
  400. value = boolean
  401. }
  402. }
  403. if value != nil {
  404. hit.AddFieldValue(docF.Name(), value)
  405. }
  406. }
  407. }
  408. }
  409. }
  410. if highlighter != nil {
  411. highlightFields := req.Highlight.Fields
  412. if highlightFields == nil {
  413. // add all fields with matches
  414. highlightFields = make([]string, 0, len(hit.Locations))
  415. for k := range hit.Locations {
  416. highlightFields = append(highlightFields, k)
  417. }
  418. }
  419. for _, hf := range highlightFields {
  420. highlighter.BestFragmentsInField(hit, doc, hf, 1)
  421. }
  422. }
  423. } else if doc == nil {
  424. // unexpected case, a doc ID that was found as a search hit
  425. // was unable to be found during document lookup
  426. return nil, ErrorIndexReadInconsistency
  427. }
  428. }
  429. if i.name != "" {
  430. hit.Index = i.name
  431. }
  432. }
  433. atomic.AddUint64(&i.stats.searches, 1)
  434. searchDuration := time.Since(searchStart)
  435. atomic.AddUint64(&i.stats.searchTime, uint64(searchDuration))
  436. if Config.SlowSearchLogThreshold > 0 &&
  437. searchDuration > Config.SlowSearchLogThreshold {
  438. logger.Printf("slow search took %s - %v", searchDuration, req)
  439. }
  440. return &SearchResult{
  441. Status: &SearchStatus{
  442. Total: 1,
  443. Failed: 0,
  444. Successful: 1,
  445. Errors: make(map[string]error),
  446. },
  447. Request: req,
  448. Hits: hits,
  449. Total: collector.Total(),
  450. MaxScore: collector.MaxScore(),
  451. Took: searchDuration,
  452. Facets: collector.FacetResults(),
  453. }, nil
  454. }
  455. // Fields returns the name of all the fields this
  456. // Index has operated on.
  457. func (i *indexImpl) Fields() (fields []string, err error) {
  458. i.mutex.RLock()
  459. defer i.mutex.RUnlock()
  460. if !i.open {
  461. return nil, ErrorIndexClosed
  462. }
  463. indexReader, err := i.i.Reader()
  464. if err != nil {
  465. return nil, err
  466. }
  467. defer func() {
  468. if cerr := indexReader.Close(); err == nil && cerr != nil {
  469. err = cerr
  470. }
  471. }()
  472. fields, err = indexReader.Fields()
  473. if err != nil {
  474. return nil, err
  475. }
  476. return fields, nil
  477. }
  478. func (i *indexImpl) FieldDict(field string) (index.FieldDict, error) {
  479. i.mutex.RLock()
  480. if !i.open {
  481. i.mutex.RUnlock()
  482. return nil, ErrorIndexClosed
  483. }
  484. indexReader, err := i.i.Reader()
  485. if err != nil {
  486. i.mutex.RUnlock()
  487. return nil, err
  488. }
  489. fieldDict, err := indexReader.FieldDict(field)
  490. if err != nil {
  491. i.mutex.RUnlock()
  492. return nil, err
  493. }
  494. return &indexImplFieldDict{
  495. index: i,
  496. indexReader: indexReader,
  497. fieldDict: fieldDict,
  498. }, nil
  499. }
  500. func (i *indexImpl) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) {
  501. i.mutex.RLock()
  502. if !i.open {
  503. i.mutex.RUnlock()
  504. return nil, ErrorIndexClosed
  505. }
  506. indexReader, err := i.i.Reader()
  507. if err != nil {
  508. i.mutex.RUnlock()
  509. return nil, err
  510. }
  511. fieldDict, err := indexReader.FieldDictRange(field, startTerm, endTerm)
  512. if err != nil {
  513. i.mutex.RUnlock()
  514. return nil, err
  515. }
  516. return &indexImplFieldDict{
  517. index: i,
  518. indexReader: indexReader,
  519. fieldDict: fieldDict,
  520. }, nil
  521. }
  522. func (i *indexImpl) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) {
  523. i.mutex.RLock()
  524. if !i.open {
  525. i.mutex.RUnlock()
  526. return nil, ErrorIndexClosed
  527. }
  528. indexReader, err := i.i.Reader()
  529. if err != nil {
  530. i.mutex.RUnlock()
  531. return nil, err
  532. }
  533. fieldDict, err := indexReader.FieldDictPrefix(field, termPrefix)
  534. if err != nil {
  535. i.mutex.RUnlock()
  536. return nil, err
  537. }
  538. return &indexImplFieldDict{
  539. index: i,
  540. indexReader: indexReader,
  541. fieldDict: fieldDict,
  542. }, nil
  543. }
  544. func (i *indexImpl) Close() error {
  545. i.mutex.Lock()
  546. defer i.mutex.Unlock()
  547. indexStats.UnRegister(i)
  548. i.open = false
  549. return i.i.Close()
  550. }
  551. func (i *indexImpl) Stats() *IndexStat {
  552. return i.stats
  553. }
  554. func (i *indexImpl) StatsMap() map[string]interface{} {
  555. return i.stats.statsMap()
  556. }
  557. func (i *indexImpl) GetInternal(key []byte) (val []byte, err error) {
  558. i.mutex.RLock()
  559. defer i.mutex.RUnlock()
  560. if !i.open {
  561. return nil, ErrorIndexClosed
  562. }
  563. reader, err := i.i.Reader()
  564. if err != nil {
  565. return nil, err
  566. }
  567. defer func() {
  568. if cerr := reader.Close(); err == nil && cerr != nil {
  569. err = cerr
  570. }
  571. }()
  572. val, err = reader.GetInternal(key)
  573. if err != nil {
  574. return nil, err
  575. }
  576. return val, nil
  577. }
  578. func (i *indexImpl) SetInternal(key, val []byte) error {
  579. i.mutex.RLock()
  580. defer i.mutex.RUnlock()
  581. if !i.open {
  582. return ErrorIndexClosed
  583. }
  584. return i.i.SetInternal(key, val)
  585. }
  586. func (i *indexImpl) DeleteInternal(key []byte) error {
  587. i.mutex.RLock()
  588. defer i.mutex.RUnlock()
  589. if !i.open {
  590. return ErrorIndexClosed
  591. }
  592. return i.i.DeleteInternal(key)
  593. }
  594. // NewBatch creates a new empty batch.
  595. func (i *indexImpl) NewBatch() *Batch {
  596. return &Batch{
  597. index: i,
  598. internal: index.NewBatch(),
  599. }
  600. }
  601. func (i *indexImpl) Name() string {
  602. return i.name
  603. }
  604. func (i *indexImpl) SetName(name string) {
  605. indexStats.UnRegister(i)
  606. i.name = name
  607. indexStats.Register(i)
  608. }
  609. type indexImplFieldDict struct {
  610. index *indexImpl
  611. indexReader index.IndexReader
  612. fieldDict index.FieldDict
  613. }
  614. func (f *indexImplFieldDict) Next() (*index.DictEntry, error) {
  615. return f.fieldDict.Next()
  616. }
  617. func (f *indexImplFieldDict) Close() error {
  618. defer f.index.mutex.RUnlock()
  619. err := f.fieldDict.Close()
  620. if err != nil {
  621. return err
  622. }
  623. return f.indexReader.Close()
  624. }