|
|
- // Copyright (c) 2012-2016 The go-diff authors. All rights reserved.
- // https://github.com/sergi/go-diff
- // See the included LICENSE file for license details.
- //
- // go-diff is a Go implementation of Google's Diff, Match, and Patch library
- // Original library is Copyright (c) 2006 Google Inc.
- // http://code.google.com/p/google-diff-match-patch/
-
- package diffmatchpatch
-
- import (
- "math"
- )
-
- // MatchMain locates the best instance of 'pattern' in 'text' near 'loc'.
- // Returns -1 if no match found.
- func (dmp *DiffMatchPatch) MatchMain(text, pattern string, loc int) int {
- // Check for null inputs not needed since null can't be passed in C#.
-
- loc = int(math.Max(0, math.Min(float64(loc), float64(len(text)))))
- if text == pattern {
- // Shortcut (potentially not guaranteed by the algorithm)
- return 0
- } else if len(text) == 0 {
- // Nothing to match.
- return -1
- } else if loc+len(pattern) <= len(text) && text[loc:loc+len(pattern)] == pattern {
- // Perfect match at the perfect spot! (Includes case of null pattern)
- return loc
- }
- // Do a fuzzy compare.
- return dmp.MatchBitap(text, pattern, loc)
- }
-
- // MatchBitap locates the best instance of 'pattern' in 'text' near 'loc' using the Bitap algorithm.
- // Returns -1 if no match was found.
- func (dmp *DiffMatchPatch) MatchBitap(text, pattern string, loc int) int {
- // Initialise the alphabet.
- s := dmp.MatchAlphabet(pattern)
-
- // Highest score beyond which we give up.
- scoreThreshold := dmp.MatchThreshold
- // Is there a nearby exact match? (speedup)
- bestLoc := indexOf(text, pattern, loc)
- if bestLoc != -1 {
- scoreThreshold = math.Min(dmp.matchBitapScore(0, bestLoc, loc,
- pattern), scoreThreshold)
- // What about in the other direction? (speedup)
- bestLoc = lastIndexOf(text, pattern, loc+len(pattern))
- if bestLoc != -1 {
- scoreThreshold = math.Min(dmp.matchBitapScore(0, bestLoc, loc,
- pattern), scoreThreshold)
- }
- }
-
- // Initialise the bit arrays.
- matchmask := 1 << uint((len(pattern) - 1))
- bestLoc = -1
-
- var binMin, binMid int
- binMax := len(pattern) + len(text)
- lastRd := []int{}
- for d := 0; d < len(pattern); d++ {
- // Scan for the best match; each iteration allows for one more error. Run a binary search to determine how far from 'loc' we can stray at this error level.
- binMin = 0
- binMid = binMax
- for binMin < binMid {
- if dmp.matchBitapScore(d, loc+binMid, loc, pattern) <= scoreThreshold {
- binMin = binMid
- } else {
- binMax = binMid
- }
- binMid = (binMax-binMin)/2 + binMin
- }
- // Use the result from this iteration as the maximum for the next.
- binMax = binMid
- start := int(math.Max(1, float64(loc-binMid+1)))
- finish := int(math.Min(float64(loc+binMid), float64(len(text))) + float64(len(pattern)))
-
- rd := make([]int, finish+2)
- rd[finish+1] = (1 << uint(d)) - 1
-
- for j := finish; j >= start; j-- {
- var charMatch int
- if len(text) <= j-1 {
- // Out of range.
- charMatch = 0
- } else if _, ok := s[text[j-1]]; !ok {
- charMatch = 0
- } else {
- charMatch = s[text[j-1]]
- }
-
- if d == 0 {
- // First pass: exact match.
- rd[j] = ((rd[j+1] << 1) | 1) & charMatch
- } else {
- // Subsequent passes: fuzzy match.
- rd[j] = ((rd[j+1]<<1)|1)&charMatch | (((lastRd[j+1] | lastRd[j]) << 1) | 1) | lastRd[j+1]
- }
- if (rd[j] & matchmask) != 0 {
- score := dmp.matchBitapScore(d, j-1, loc, pattern)
- // This match will almost certainly be better than any existing match. But check anyway.
- if score <= scoreThreshold {
- // Told you so.
- scoreThreshold = score
- bestLoc = j - 1
- if bestLoc > loc {
- // When passing loc, don't exceed our current distance from loc.
- start = int(math.Max(1, float64(2*loc-bestLoc)))
- } else {
- // Already passed loc, downhill from here on in.
- break
- }
- }
- }
- }
- if dmp.matchBitapScore(d+1, loc, loc, pattern) > scoreThreshold {
- // No hope for a (better) match at greater error levels.
- break
- }
- lastRd = rd
- }
- return bestLoc
- }
-
- // matchBitapScore computes and returns the score for a match with e errors and x location.
- func (dmp *DiffMatchPatch) matchBitapScore(e, x, loc int, pattern string) float64 {
- accuracy := float64(e) / float64(len(pattern))
- proximity := math.Abs(float64(loc - x))
- if dmp.MatchDistance == 0 {
- // Dodge divide by zero error.
- if proximity == 0 {
- return accuracy
- }
-
- return 1.0
- }
- return accuracy + (proximity / float64(dmp.MatchDistance))
- }
-
- // MatchAlphabet initialises the alphabet for the Bitap algorithm.
- func (dmp *DiffMatchPatch) MatchAlphabet(pattern string) map[byte]int {
- s := map[byte]int{}
- charPattern := []byte(pattern)
- for _, c := range charPattern {
- _, ok := s[c]
- if !ok {
- s[c] = 0
- }
- }
- i := 0
-
- for _, c := range charPattern {
- value := s[c] | int(uint(1)<<uint((len(pattern)-i-1)))
- s[c] = value
- i++
- }
- return s
- }
|