|
|
@ -90,6 +90,7 @@ const ( |
|
|
|
}, |
|
|
|
"content": { |
|
|
|
"type": "text", |
|
|
|
"term_vector": "with_positions_offsets", |
|
|
|
"index": true |
|
|
|
}, |
|
|
|
"commit_id": { |
|
|
@ -251,6 +252,22 @@ func (b *ElasticSearchIndexer) Delete(repoID int64) error { |
|
|
|
return err |
|
|
|
} |
|
|
|
|
|
|
|
// indexPos find words positions for start and the following end on content. It will
|
|
|
|
// return the beginning position of the frist start and the ending position of the
|
|
|
|
// first end following the start string.
|
|
|
|
// If not found any of the positions, it will return -1, -1.
|
|
|
|
func indexPos(content, start, end string) (int, int) { |
|
|
|
startIdx := strings.Index(content, start) |
|
|
|
if startIdx < 0 { |
|
|
|
return -1, -1 |
|
|
|
} |
|
|
|
endIdx := strings.Index(content[startIdx+len(start):], end) |
|
|
|
if endIdx < 0 { |
|
|
|
return -1, -1 |
|
|
|
} |
|
|
|
return startIdx, startIdx + len(start) + endIdx + len(end) |
|
|
|
} |
|
|
|
|
|
|
|
func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) (int64, []*SearchResult, []*SearchResultLanguages, error) { |
|
|
|
hits := make([]*SearchResult, 0, pageSize) |
|
|
|
for _, hit := range searchResult.Hits.Hits { |
|
|
@ -260,18 +277,12 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) |
|
|
|
var startIndex, endIndex int = -1, -1 |
|
|
|
c, ok := hit.Highlight["content"] |
|
|
|
if ok && len(c) > 0 { |
|
|
|
var subStr = make([]rune, 0, len(kw)) |
|
|
|
startIndex = strings.IndexFunc(c[0], func(r rune) bool { |
|
|
|
if len(subStr) >= len(kw) { |
|
|
|
subStr = subStr[1:] |
|
|
|
} |
|
|
|
subStr = append(subStr, r) |
|
|
|
return strings.EqualFold(kw, string(subStr)) |
|
|
|
}) |
|
|
|
if startIndex > -1 { |
|
|
|
endIndex = startIndex + len(kw) |
|
|
|
} else { |
|
|
|
panic(fmt.Sprintf("1===%#v", hit.Highlight)) |
|
|
|
// FIXME: Since the high lighting content will include <em> and </em> for the keywords,
|
|
|
|
// now we should find the poisitions. But how to avoid html content which contains the
|
|
|
|
// <em> and </em> tags? If elastic search has handled that?
|
|
|
|
startIndex, endIndex = indexPos(c[0], "<em>", "</em>") |
|
|
|
if startIndex == -1 { |
|
|
|
panic(fmt.Sprintf("1===%s,,,%#v,,,%s", kw, hit.Highlight, c[0])) |
|
|
|
} |
|
|
|
} else { |
|
|
|
panic(fmt.Sprintf("2===%#v", hit.Highlight)) |
|
|
@ -293,7 +304,7 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) |
|
|
|
UpdatedUnix: timeutil.TimeStamp(res["updated_at"].(float64)), |
|
|
|
Language: language, |
|
|
|
StartIndex: startIndex, |
|
|
|
EndIndex: endIndex, |
|
|
|
EndIndex: endIndex - 9, // remove the length <em></em> since we give Content the original data
|
|
|
|
Color: enry.GetColor(language), |
|
|
|
}) |
|
|
|
} |
|
|
@ -347,7 +358,12 @@ func (b *ElasticSearchIndexer) Search(repoIDs []int64, language, keyword string, |
|
|
|
Index(b.indexerAliasName). |
|
|
|
Aggregation("language", aggregation). |
|
|
|
Query(query). |
|
|
|
Highlight(elastic.NewHighlight().Field("content")). |
|
|
|
Highlight( |
|
|
|
elastic.NewHighlight(). |
|
|
|
Field("content"). |
|
|
|
NumOfFragments(0). // return all highting content on fragments
|
|
|
|
HighlighterType("fvh"), |
|
|
|
). |
|
|
|
Sort("repo_id", true). |
|
|
|
From(start).Size(pageSize). |
|
|
|
Do(context.Background()) |
|
|
@ -373,7 +389,12 @@ func (b *ElasticSearchIndexer) Search(repoIDs []int64, language, keyword string, |
|
|
|
searchResult, err := b.client.Search(). |
|
|
|
Index(b.indexerAliasName). |
|
|
|
Query(query). |
|
|
|
Highlight(elastic.NewHighlight().Field("content")). |
|
|
|
Highlight( |
|
|
|
elastic.NewHighlight(). |
|
|
|
Field("content"). |
|
|
|
NumOfFragments(0). // return all highting content on fragments
|
|
|
|
HighlighterType("fvh"), |
|
|
|
). |
|
|
|
Sort("repo_id", true). |
|
|
|
From(start).Size(pageSize). |
|
|
|
Do(context.Background()) |
|
|
|