You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

490 lines
15 KiB

  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package mapping
  15. import (
  16. "encoding/json"
  17. "fmt"
  18. "reflect"
  19. "time"
  20. "github.com/blevesearch/bleve/registry"
  21. )
  22. // A DocumentMapping describes how a type of document
  23. // should be indexed.
  24. // As documents can be hierarchical, named sub-sections
  25. // of documents are mapped using the same structure in
  26. // the Properties field.
  27. // Each value inside a document can be indexed 0 or more
  28. // ways. These index entries are called fields and
  29. // are stored in the Fields field.
  30. // Entire sections of a document can be ignored or
  31. // excluded by setting Enabled to false.
  32. // If not explicitly mapped, default mapping operations
  33. // are used. To disable this automatic handling, set
  34. // Dynamic to false.
  35. type DocumentMapping struct {
  36. Enabled bool `json:"enabled"`
  37. Dynamic bool `json:"dynamic"`
  38. Properties map[string]*DocumentMapping `json:"properties,omitempty"`
  39. Fields []*FieldMapping `json:"fields,omitempty"`
  40. DefaultAnalyzer string `json:"default_analyzer"`
  41. // StructTagKey overrides "json" when looking for field names in struct tags
  42. StructTagKey string `json:"struct_tag_key,omitempty"`
  43. }
  44. func (dm *DocumentMapping) Validate(cache *registry.Cache) error {
  45. var err error
  46. if dm.DefaultAnalyzer != "" {
  47. _, err := cache.AnalyzerNamed(dm.DefaultAnalyzer)
  48. if err != nil {
  49. return err
  50. }
  51. }
  52. for _, property := range dm.Properties {
  53. err = property.Validate(cache)
  54. if err != nil {
  55. return err
  56. }
  57. }
  58. for _, field := range dm.Fields {
  59. if field.Analyzer != "" {
  60. _, err = cache.AnalyzerNamed(field.Analyzer)
  61. if err != nil {
  62. return err
  63. }
  64. }
  65. if field.DateFormat != "" {
  66. _, err = cache.DateTimeParserNamed(field.DateFormat)
  67. if err != nil {
  68. return err
  69. }
  70. }
  71. switch field.Type {
  72. case "text", "datetime", "number", "boolean":
  73. default:
  74. return fmt.Errorf("unknown field type: '%s'", field.Type)
  75. }
  76. }
  77. return nil
  78. }
  79. // analyzerNameForPath attempts to first find the field
  80. // described by this path, then returns the analyzer
  81. // configured for that field
  82. func (dm *DocumentMapping) analyzerNameForPath(path string) string {
  83. field := dm.fieldDescribedByPath(path)
  84. if field != nil {
  85. return field.Analyzer
  86. }
  87. return ""
  88. }
  89. func (dm *DocumentMapping) fieldDescribedByPath(path string) *FieldMapping {
  90. pathElements := decodePath(path)
  91. if len(pathElements) > 1 {
  92. // easy case, there is more than 1 path element remaining
  93. // the next path element must match a property name
  94. // at this level
  95. for propName, subDocMapping := range dm.Properties {
  96. if propName == pathElements[0] {
  97. return subDocMapping.fieldDescribedByPath(encodePath(pathElements[1:]))
  98. }
  99. }
  100. } else {
  101. // just 1 path elememnt
  102. // first look for property name with empty field
  103. for propName, subDocMapping := range dm.Properties {
  104. if propName == pathElements[0] {
  105. // found property name match, now look at its fields
  106. for _, field := range subDocMapping.Fields {
  107. if field.Name == "" || field.Name == pathElements[0] {
  108. // match
  109. return field
  110. }
  111. }
  112. }
  113. }
  114. // next, walk the properties again, looking for field overriding the name
  115. for propName, subDocMapping := range dm.Properties {
  116. if propName != pathElements[0] {
  117. // property name isn't a match, but field name could override it
  118. for _, field := range subDocMapping.Fields {
  119. if field.Name == pathElements[0] {
  120. return field
  121. }
  122. }
  123. }
  124. }
  125. }
  126. return nil
  127. }
  128. // documentMappingForPath only returns EXACT matches for a sub document
  129. // or for an explicitly mapped field, if you want to find the
  130. // closest document mapping to a field not explicitly mapped
  131. // use closestDocMapping
  132. func (dm *DocumentMapping) documentMappingForPath(path string) *DocumentMapping {
  133. pathElements := decodePath(path)
  134. current := dm
  135. OUTER:
  136. for i, pathElement := range pathElements {
  137. for name, subDocMapping := range current.Properties {
  138. if name == pathElement {
  139. current = subDocMapping
  140. continue OUTER
  141. }
  142. }
  143. // no subDocMapping matches this pathElement
  144. // only if this is the last element check for field name
  145. if i == len(pathElements)-1 {
  146. for _, field := range current.Fields {
  147. if field.Name == pathElement {
  148. break
  149. }
  150. }
  151. }
  152. return nil
  153. }
  154. return current
  155. }
  156. // closestDocMapping findest the most specific document mapping that matches
  157. // part of the provided path
  158. func (dm *DocumentMapping) closestDocMapping(path string) *DocumentMapping {
  159. pathElements := decodePath(path)
  160. current := dm
  161. OUTER:
  162. for _, pathElement := range pathElements {
  163. for name, subDocMapping := range current.Properties {
  164. if name == pathElement {
  165. current = subDocMapping
  166. continue OUTER
  167. }
  168. }
  169. }
  170. return current
  171. }
  172. // NewDocumentMapping returns a new document mapping
  173. // with all the default values.
  174. func NewDocumentMapping() *DocumentMapping {
  175. return &DocumentMapping{
  176. Enabled: true,
  177. Dynamic: true,
  178. }
  179. }
  180. // NewDocumentStaticMapping returns a new document
  181. // mapping that will not automatically index parts
  182. // of a document without an explicit mapping.
  183. func NewDocumentStaticMapping() *DocumentMapping {
  184. return &DocumentMapping{
  185. Enabled: true,
  186. }
  187. }
  188. // NewDocumentDisabledMapping returns a new document
  189. // mapping that will not perform any indexing.
  190. func NewDocumentDisabledMapping() *DocumentMapping {
  191. return &DocumentMapping{}
  192. }
  193. // AddSubDocumentMapping adds the provided DocumentMapping as a sub-mapping
  194. // for the specified named subsection.
  195. func (dm *DocumentMapping) AddSubDocumentMapping(property string, sdm *DocumentMapping) {
  196. if dm.Properties == nil {
  197. dm.Properties = make(map[string]*DocumentMapping)
  198. }
  199. dm.Properties[property] = sdm
  200. }
  201. // AddFieldMappingsAt adds one or more FieldMappings
  202. // at the named sub-document. If the named sub-document
  203. // doesn't yet exist it is created for you.
  204. // This is a convenience function to make most common
  205. // mappings more concise.
  206. // Otherwise, you would:
  207. // subMapping := NewDocumentMapping()
  208. // subMapping.AddFieldMapping(fieldMapping)
  209. // parentMapping.AddSubDocumentMapping(property, subMapping)
  210. func (dm *DocumentMapping) AddFieldMappingsAt(property string, fms ...*FieldMapping) {
  211. if dm.Properties == nil {
  212. dm.Properties = make(map[string]*DocumentMapping)
  213. }
  214. sdm, ok := dm.Properties[property]
  215. if !ok {
  216. sdm = NewDocumentMapping()
  217. }
  218. for _, fm := range fms {
  219. sdm.AddFieldMapping(fm)
  220. }
  221. dm.Properties[property] = sdm
  222. }
  223. // AddFieldMapping adds the provided FieldMapping for this section
  224. // of the document.
  225. func (dm *DocumentMapping) AddFieldMapping(fm *FieldMapping) {
  226. if dm.Fields == nil {
  227. dm.Fields = make([]*FieldMapping, 0)
  228. }
  229. dm.Fields = append(dm.Fields, fm)
  230. }
  231. // UnmarshalJSON offers custom unmarshaling with optional strict validation
  232. func (dm *DocumentMapping) UnmarshalJSON(data []byte) error {
  233. var tmp map[string]json.RawMessage
  234. err := json.Unmarshal(data, &tmp)
  235. if err != nil {
  236. return err
  237. }
  238. // set defaults for fields which might have been omitted
  239. dm.Enabled = true
  240. dm.Dynamic = true
  241. var invalidKeys []string
  242. for k, v := range tmp {
  243. switch k {
  244. case "enabled":
  245. err := json.Unmarshal(v, &dm.Enabled)
  246. if err != nil {
  247. return err
  248. }
  249. case "dynamic":
  250. err := json.Unmarshal(v, &dm.Dynamic)
  251. if err != nil {
  252. return err
  253. }
  254. case "default_analyzer":
  255. err := json.Unmarshal(v, &dm.DefaultAnalyzer)
  256. if err != nil {
  257. return err
  258. }
  259. case "properties":
  260. err := json.Unmarshal(v, &dm.Properties)
  261. if err != nil {
  262. return err
  263. }
  264. case "fields":
  265. err := json.Unmarshal(v, &dm.Fields)
  266. if err != nil {
  267. return err
  268. }
  269. case "struct_tag_key":
  270. err := json.Unmarshal(v, &dm.StructTagKey)
  271. if err != nil {
  272. return err
  273. }
  274. default:
  275. invalidKeys = append(invalidKeys, k)
  276. }
  277. }
  278. if MappingJSONStrict && len(invalidKeys) > 0 {
  279. return fmt.Errorf("document mapping contains invalid keys: %v", invalidKeys)
  280. }
  281. return nil
  282. }
  283. func (dm *DocumentMapping) defaultAnalyzerName(path []string) string {
  284. rv := ""
  285. current := dm
  286. for _, pathElement := range path {
  287. var ok bool
  288. current, ok = current.Properties[pathElement]
  289. if !ok {
  290. break
  291. }
  292. if current.DefaultAnalyzer != "" {
  293. rv = current.DefaultAnalyzer
  294. }
  295. }
  296. return rv
  297. }
  298. func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) {
  299. // allow default "json" tag to be overriden
  300. structTagKey := dm.StructTagKey
  301. if structTagKey == "" {
  302. structTagKey = "json"
  303. }
  304. val := reflect.ValueOf(data)
  305. typ := val.Type()
  306. switch typ.Kind() {
  307. case reflect.Map:
  308. // FIXME can add support for other map keys in the future
  309. if typ.Key().Kind() == reflect.String {
  310. for _, key := range val.MapKeys() {
  311. fieldName := key.String()
  312. fieldVal := val.MapIndex(key).Interface()
  313. dm.processProperty(fieldVal, append(path, fieldName), indexes, context)
  314. }
  315. }
  316. case reflect.Struct:
  317. for i := 0; i < val.NumField(); i++ {
  318. field := typ.Field(i)
  319. fieldName := field.Name
  320. // anonymous fields of type struct can elide the type name
  321. if field.Anonymous && field.Type.Kind() == reflect.Struct {
  322. fieldName = ""
  323. }
  324. // if the field has a name under the specified tag, prefer that
  325. tag := field.Tag.Get(structTagKey)
  326. tagFieldName := parseTagName(tag)
  327. if tagFieldName == "-" {
  328. continue
  329. }
  330. // allow tag to set field name to empty, only if anonymous
  331. if field.Tag != "" && (tagFieldName != "" || field.Anonymous) {
  332. fieldName = tagFieldName
  333. }
  334. if val.Field(i).CanInterface() {
  335. fieldVal := val.Field(i).Interface()
  336. newpath := path
  337. if fieldName != "" {
  338. newpath = append(path, fieldName)
  339. }
  340. dm.processProperty(fieldVal, newpath, indexes, context)
  341. }
  342. }
  343. case reflect.Slice, reflect.Array:
  344. for i := 0; i < val.Len(); i++ {
  345. if val.Index(i).CanInterface() {
  346. fieldVal := val.Index(i).Interface()
  347. dm.processProperty(fieldVal, path, append(indexes, uint64(i)), context)
  348. }
  349. }
  350. case reflect.Ptr:
  351. ptrElem := val.Elem()
  352. if ptrElem.IsValid() && ptrElem.CanInterface() {
  353. dm.processProperty(ptrElem.Interface(), path, indexes, context)
  354. }
  355. case reflect.String:
  356. dm.processProperty(val.String(), path, indexes, context)
  357. case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
  358. dm.processProperty(float64(val.Int()), path, indexes, context)
  359. case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
  360. dm.processProperty(float64(val.Uint()), path, indexes, context)
  361. case reflect.Float32, reflect.Float64:
  362. dm.processProperty(float64(val.Float()), path, indexes, context)
  363. case reflect.Bool:
  364. dm.processProperty(val.Bool(), path, indexes, context)
  365. }
  366. }
  367. func (dm *DocumentMapping) processProperty(property interface{}, path []string, indexes []uint64, context *walkContext) {
  368. pathString := encodePath(path)
  369. // look to see if there is a mapping for this field
  370. subDocMapping := dm.documentMappingForPath(pathString)
  371. closestDocMapping := dm.closestDocMapping(pathString)
  372. // check to see if we even need to do further processing
  373. if subDocMapping != nil && !subDocMapping.Enabled {
  374. return
  375. }
  376. propertyValue := reflect.ValueOf(property)
  377. if !propertyValue.IsValid() {
  378. // cannot do anything with the zero value
  379. return
  380. }
  381. propertyType := propertyValue.Type()
  382. switch propertyType.Kind() {
  383. case reflect.String:
  384. propertyValueString := propertyValue.String()
  385. if subDocMapping != nil {
  386. // index by explicit mapping
  387. for _, fieldMapping := range subDocMapping.Fields {
  388. fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
  389. }
  390. } else if closestDocMapping.Dynamic {
  391. // automatic indexing behavior
  392. // first see if it can be parsed by the default date parser
  393. dateTimeParser := context.im.DateTimeParserNamed(context.im.DefaultDateTimeParser)
  394. if dateTimeParser != nil {
  395. parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString)
  396. if err != nil {
  397. // index as text
  398. fieldMapping := newTextFieldMappingDynamic(context.im)
  399. fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
  400. } else {
  401. // index as datetime
  402. fieldMapping := newDateTimeFieldMappingDynamic(context.im)
  403. fieldMapping.processTime(parsedDateTime, pathString, path, indexes, context)
  404. }
  405. }
  406. }
  407. case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
  408. dm.processProperty(float64(propertyValue.Int()), path, indexes, context)
  409. return
  410. case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
  411. dm.processProperty(float64(propertyValue.Uint()), path, indexes, context)
  412. return
  413. case reflect.Float64, reflect.Float32:
  414. propertyValFloat := propertyValue.Float()
  415. if subDocMapping != nil {
  416. // index by explicit mapping
  417. for _, fieldMapping := range subDocMapping.Fields {
  418. fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
  419. }
  420. } else if closestDocMapping.Dynamic {
  421. // automatic indexing behavior
  422. fieldMapping := newNumericFieldMappingDynamic(context.im)
  423. fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
  424. }
  425. case reflect.Bool:
  426. propertyValBool := propertyValue.Bool()
  427. if subDocMapping != nil {
  428. // index by explicit mapping
  429. for _, fieldMapping := range subDocMapping.Fields {
  430. fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)
  431. }
  432. } else if closestDocMapping.Dynamic {
  433. // automatic indexing behavior
  434. fieldMapping := newBooleanFieldMappingDynamic(context.im)
  435. fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)
  436. }
  437. case reflect.Struct:
  438. switch property := property.(type) {
  439. case time.Time:
  440. // don't descend into the time struct
  441. if subDocMapping != nil {
  442. // index by explicit mapping
  443. for _, fieldMapping := range subDocMapping.Fields {
  444. fieldMapping.processTime(property, pathString, path, indexes, context)
  445. }
  446. } else if closestDocMapping.Dynamic {
  447. fieldMapping := newDateTimeFieldMappingDynamic(context.im)
  448. fieldMapping.processTime(property, pathString, path, indexes, context)
  449. }
  450. default:
  451. dm.walkDocument(property, path, indexes, context)
  452. }
  453. default:
  454. dm.walkDocument(property, path, indexes, context)
  455. }
  456. }