You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

443 lines
13 KiB

  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package mapping
  15. import (
  16. "encoding/json"
  17. "fmt"
  18. "github.com/blevesearch/bleve/analysis"
  19. "github.com/blevesearch/bleve/analysis/analyzer/standard"
  20. "github.com/blevesearch/bleve/analysis/datetime/optional"
  21. "github.com/blevesearch/bleve/document"
  22. "github.com/blevesearch/bleve/registry"
  23. )
  24. var MappingJSONStrict = false
  25. const defaultTypeField = "_type"
  26. const defaultType = "_default"
  27. const defaultField = "_all"
  28. const defaultAnalyzer = standard.Name
  29. const defaultDateTimeParser = optional.Name
  30. // An IndexMappingImpl controls how objects are placed
  31. // into an index.
  32. // First the type of the object is determined.
  33. // Once the type is know, the appropriate
  34. // DocumentMapping is selected by the type.
  35. // If no mapping was determined for that type,
  36. // a DefaultMapping will be used.
  37. type IndexMappingImpl struct {
  38. TypeMapping map[string]*DocumentMapping `json:"types,omitempty"`
  39. DefaultMapping *DocumentMapping `json:"default_mapping"`
  40. TypeField string `json:"type_field"`
  41. DefaultType string `json:"default_type"`
  42. DefaultAnalyzer string `json:"default_analyzer"`
  43. DefaultDateTimeParser string `json:"default_datetime_parser"`
  44. DefaultField string `json:"default_field"`
  45. StoreDynamic bool `json:"store_dynamic"`
  46. IndexDynamic bool `json:"index_dynamic"`
  47. DocValuesDynamic bool `json:"docvalues_dynamic,omitempty"`
  48. CustomAnalysis *customAnalysis `json:"analysis,omitempty"`
  49. cache *registry.Cache
  50. }
  51. // AddCustomCharFilter defines a custom char filter for use in this mapping
  52. func (im *IndexMappingImpl) AddCustomCharFilter(name string, config map[string]interface{}) error {
  53. _, err := im.cache.DefineCharFilter(name, config)
  54. if err != nil {
  55. return err
  56. }
  57. im.CustomAnalysis.CharFilters[name] = config
  58. return nil
  59. }
  60. // AddCustomTokenizer defines a custom tokenizer for use in this mapping
  61. func (im *IndexMappingImpl) AddCustomTokenizer(name string, config map[string]interface{}) error {
  62. _, err := im.cache.DefineTokenizer(name, config)
  63. if err != nil {
  64. return err
  65. }
  66. im.CustomAnalysis.Tokenizers[name] = config
  67. return nil
  68. }
  69. // AddCustomTokenMap defines a custom token map for use in this mapping
  70. func (im *IndexMappingImpl) AddCustomTokenMap(name string, config map[string]interface{}) error {
  71. _, err := im.cache.DefineTokenMap(name, config)
  72. if err != nil {
  73. return err
  74. }
  75. im.CustomAnalysis.TokenMaps[name] = config
  76. return nil
  77. }
  78. // AddCustomTokenFilter defines a custom token filter for use in this mapping
  79. func (im *IndexMappingImpl) AddCustomTokenFilter(name string, config map[string]interface{}) error {
  80. _, err := im.cache.DefineTokenFilter(name, config)
  81. if err != nil {
  82. return err
  83. }
  84. im.CustomAnalysis.TokenFilters[name] = config
  85. return nil
  86. }
  87. // AddCustomAnalyzer defines a custom analyzer for use in this mapping. The
  88. // config map must have a "type" string entry to resolve the analyzer
  89. // constructor. The constructor is invoked with the remaining entries and
  90. // returned analyzer is registered in the IndexMapping.
  91. //
  92. // bleve comes with predefined analyzers, like
  93. // github.com/blevesearch/bleve/analysis/analyzer/custom. They are
  94. // available only if their package is imported by client code. To achieve this,
  95. // use their metadata to fill configuration entries:
  96. //
  97. // import (
  98. // "github.com/blevesearch/bleve/analysis/analyzer/custom"
  99. // "github.com/blevesearch/bleve/analysis/char/html"
  100. // "github.com/blevesearch/bleve/analysis/token/lowercase"
  101. // "github.com/blevesearch/bleve/analysis/tokenizer/unicode"
  102. // )
  103. //
  104. // m := bleve.NewIndexMapping()
  105. // err := m.AddCustomAnalyzer("html", map[string]interface{}{
  106. // "type": custom.Name,
  107. // "char_filters": []string{
  108. // html.Name,
  109. // },
  110. // "tokenizer": unicode.Name,
  111. // "token_filters": []string{
  112. // lowercase.Name,
  113. // ...
  114. // },
  115. // })
  116. func (im *IndexMappingImpl) AddCustomAnalyzer(name string, config map[string]interface{}) error {
  117. _, err := im.cache.DefineAnalyzer(name, config)
  118. if err != nil {
  119. return err
  120. }
  121. im.CustomAnalysis.Analyzers[name] = config
  122. return nil
  123. }
  124. // AddCustomDateTimeParser defines a custom date time parser for use in this mapping
  125. func (im *IndexMappingImpl) AddCustomDateTimeParser(name string, config map[string]interface{}) error {
  126. _, err := im.cache.DefineDateTimeParser(name, config)
  127. if err != nil {
  128. return err
  129. }
  130. im.CustomAnalysis.DateTimeParsers[name] = config
  131. return nil
  132. }
  133. // NewIndexMapping creates a new IndexMapping that will use all the default indexing rules
  134. func NewIndexMapping() *IndexMappingImpl {
  135. return &IndexMappingImpl{
  136. TypeMapping: make(map[string]*DocumentMapping),
  137. DefaultMapping: NewDocumentMapping(),
  138. TypeField: defaultTypeField,
  139. DefaultType: defaultType,
  140. DefaultAnalyzer: defaultAnalyzer,
  141. DefaultDateTimeParser: defaultDateTimeParser,
  142. DefaultField: defaultField,
  143. IndexDynamic: IndexDynamic,
  144. StoreDynamic: StoreDynamic,
  145. DocValuesDynamic: DocValuesDynamic,
  146. CustomAnalysis: newCustomAnalysis(),
  147. cache: registry.NewCache(),
  148. }
  149. }
  150. // Validate will walk the entire structure ensuring the following
  151. // explicitly named and default analyzers can be built
  152. func (im *IndexMappingImpl) Validate() error {
  153. _, err := im.cache.AnalyzerNamed(im.DefaultAnalyzer)
  154. if err != nil {
  155. return err
  156. }
  157. _, err = im.cache.DateTimeParserNamed(im.DefaultDateTimeParser)
  158. if err != nil {
  159. return err
  160. }
  161. err = im.DefaultMapping.Validate(im.cache)
  162. if err != nil {
  163. return err
  164. }
  165. for _, docMapping := range im.TypeMapping {
  166. err = docMapping.Validate(im.cache)
  167. if err != nil {
  168. return err
  169. }
  170. }
  171. return nil
  172. }
  173. // AddDocumentMapping sets a custom document mapping for the specified type
  174. func (im *IndexMappingImpl) AddDocumentMapping(doctype string, dm *DocumentMapping) {
  175. im.TypeMapping[doctype] = dm
  176. }
  177. func (im *IndexMappingImpl) mappingForType(docType string) *DocumentMapping {
  178. docMapping := im.TypeMapping[docType]
  179. if docMapping == nil {
  180. docMapping = im.DefaultMapping
  181. }
  182. return docMapping
  183. }
  184. // UnmarshalJSON offers custom unmarshaling with optional strict validation
  185. func (im *IndexMappingImpl) UnmarshalJSON(data []byte) error {
  186. var tmp map[string]json.RawMessage
  187. err := json.Unmarshal(data, &tmp)
  188. if err != nil {
  189. return err
  190. }
  191. // set defaults for fields which might have been omitted
  192. im.cache = registry.NewCache()
  193. im.CustomAnalysis = newCustomAnalysis()
  194. im.TypeField = defaultTypeField
  195. im.DefaultType = defaultType
  196. im.DefaultAnalyzer = defaultAnalyzer
  197. im.DefaultDateTimeParser = defaultDateTimeParser
  198. im.DefaultField = defaultField
  199. im.DefaultMapping = NewDocumentMapping()
  200. im.TypeMapping = make(map[string]*DocumentMapping)
  201. im.StoreDynamic = StoreDynamic
  202. im.IndexDynamic = IndexDynamic
  203. im.DocValuesDynamic = DocValuesDynamic
  204. var invalidKeys []string
  205. for k, v := range tmp {
  206. switch k {
  207. case "analysis":
  208. err := json.Unmarshal(v, &im.CustomAnalysis)
  209. if err != nil {
  210. return err
  211. }
  212. case "type_field":
  213. err := json.Unmarshal(v, &im.TypeField)
  214. if err != nil {
  215. return err
  216. }
  217. case "default_type":
  218. err := json.Unmarshal(v, &im.DefaultType)
  219. if err != nil {
  220. return err
  221. }
  222. case "default_analyzer":
  223. err := json.Unmarshal(v, &im.DefaultAnalyzer)
  224. if err != nil {
  225. return err
  226. }
  227. case "default_datetime_parser":
  228. err := json.Unmarshal(v, &im.DefaultDateTimeParser)
  229. if err != nil {
  230. return err
  231. }
  232. case "default_field":
  233. err := json.Unmarshal(v, &im.DefaultField)
  234. if err != nil {
  235. return err
  236. }
  237. case "default_mapping":
  238. err := json.Unmarshal(v, &im.DefaultMapping)
  239. if err != nil {
  240. return err
  241. }
  242. case "types":
  243. err := json.Unmarshal(v, &im.TypeMapping)
  244. if err != nil {
  245. return err
  246. }
  247. case "store_dynamic":
  248. err := json.Unmarshal(v, &im.StoreDynamic)
  249. if err != nil {
  250. return err
  251. }
  252. case "index_dynamic":
  253. err := json.Unmarshal(v, &im.IndexDynamic)
  254. if err != nil {
  255. return err
  256. }
  257. case "docvalues_dynamic":
  258. err := json.Unmarshal(v, &im.DocValuesDynamic)
  259. if err != nil {
  260. return err
  261. }
  262. default:
  263. invalidKeys = append(invalidKeys, k)
  264. }
  265. }
  266. if MappingJSONStrict && len(invalidKeys) > 0 {
  267. return fmt.Errorf("index mapping contains invalid keys: %v", invalidKeys)
  268. }
  269. err = im.CustomAnalysis.registerAll(im)
  270. if err != nil {
  271. return err
  272. }
  273. return nil
  274. }
  275. func (im *IndexMappingImpl) determineType(data interface{}) string {
  276. // first see if the object implements bleveClassifier
  277. bleveClassifier, ok := data.(bleveClassifier)
  278. if ok {
  279. return bleveClassifier.BleveType()
  280. }
  281. // next see if the object implements Classifier
  282. classifier, ok := data.(Classifier)
  283. if ok {
  284. return classifier.Type()
  285. }
  286. // now see if we can find a type using the mapping
  287. typ, ok := mustString(lookupPropertyPath(data, im.TypeField))
  288. if ok {
  289. return typ
  290. }
  291. return im.DefaultType
  292. }
  293. func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}) error {
  294. docType := im.determineType(data)
  295. docMapping := im.mappingForType(docType)
  296. if docMapping.Enabled {
  297. walkContext := im.newWalkContext(doc, docMapping)
  298. docMapping.walkDocument(data, []string{}, []uint64{}, walkContext)
  299. // see if the _all field was disabled
  300. allMapping := docMapping.documentMappingForPath("_all")
  301. if allMapping == nil || allMapping.Enabled {
  302. field := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, walkContext.excludedFromAll, document.IndexField|document.IncludeTermVectors)
  303. doc.AddField(field)
  304. }
  305. }
  306. return nil
  307. }
  308. type walkContext struct {
  309. doc *document.Document
  310. im *IndexMappingImpl
  311. dm *DocumentMapping
  312. excludedFromAll []string
  313. }
  314. func (im *IndexMappingImpl) newWalkContext(doc *document.Document, dm *DocumentMapping) *walkContext {
  315. return &walkContext{
  316. doc: doc,
  317. im: im,
  318. dm: dm,
  319. excludedFromAll: []string{"_id"},
  320. }
  321. }
  322. // AnalyzerNameForPath attempts to find the best analyzer to use with only a
  323. // field name will walk all the document types, look for field mappings at the
  324. // provided path, if one exists and it has an explicit analyzer that is
  325. // returned.
  326. func (im *IndexMappingImpl) AnalyzerNameForPath(path string) string {
  327. // first we look for explicit mapping on the field
  328. for _, docMapping := range im.TypeMapping {
  329. analyzerName := docMapping.analyzerNameForPath(path)
  330. if analyzerName != "" {
  331. return analyzerName
  332. }
  333. }
  334. // now try the default mapping
  335. pathMapping := im.DefaultMapping.documentMappingForPath(path)
  336. if pathMapping != nil {
  337. if len(pathMapping.Fields) > 0 {
  338. if pathMapping.Fields[0].Analyzer != "" {
  339. return pathMapping.Fields[0].Analyzer
  340. }
  341. }
  342. }
  343. // next we will try default analyzers for the path
  344. pathDecoded := decodePath(path)
  345. for _, docMapping := range im.TypeMapping {
  346. rv := docMapping.defaultAnalyzerName(pathDecoded)
  347. if rv != "" {
  348. return rv
  349. }
  350. }
  351. return im.DefaultAnalyzer
  352. }
  353. func (im *IndexMappingImpl) AnalyzerNamed(name string) *analysis.Analyzer {
  354. analyzer, err := im.cache.AnalyzerNamed(name)
  355. if err != nil {
  356. logger.Printf("error using analyzer named: %s", name)
  357. return nil
  358. }
  359. return analyzer
  360. }
  361. func (im *IndexMappingImpl) DateTimeParserNamed(name string) analysis.DateTimeParser {
  362. if name == "" {
  363. name = im.DefaultDateTimeParser
  364. }
  365. dateTimeParser, err := im.cache.DateTimeParserNamed(name)
  366. if err != nil {
  367. logger.Printf("error using datetime parser named: %s", name)
  368. return nil
  369. }
  370. return dateTimeParser
  371. }
  372. func (im *IndexMappingImpl) datetimeParserNameForPath(path string) string {
  373. // first we look for explicit mapping on the field
  374. for _, docMapping := range im.TypeMapping {
  375. pathMapping := docMapping.documentMappingForPath(path)
  376. if pathMapping != nil {
  377. if len(pathMapping.Fields) > 0 {
  378. if pathMapping.Fields[0].Analyzer != "" {
  379. return pathMapping.Fields[0].Analyzer
  380. }
  381. }
  382. }
  383. }
  384. return im.DefaultDateTimeParser
  385. }
  386. func (im *IndexMappingImpl) AnalyzeText(analyzerName string, text []byte) (analysis.TokenStream, error) {
  387. analyzer, err := im.cache.AnalyzerNamed(analyzerName)
  388. if err != nil {
  389. return nil, err
  390. }
  391. return analyzer.Analyze(text), nil
  392. }
  393. // FieldAnalyzer returns the name of the analyzer used on a field.
  394. func (im *IndexMappingImpl) FieldAnalyzer(field string) string {
  395. return im.AnalyzerNameForPath(field)
  396. }
  397. // wrapper to satisfy new interface
  398. func (im *IndexMappingImpl) DefaultSearchField() string {
  399. return im.DefaultField
  400. }