main
Raw Download raw file
  1// Copyright (c) 2012-2016 The go-diff authors. All rights reserved.
  2// https://github.com/sergi/go-diff
  3// See the included LICENSE file for license details.
  4//
  5// go-diff is a Go implementation of Google's Diff, Match, and Patch library
  6// Original library is Copyright (c) 2006 Google Inc.
  7// http://code.google.com/p/google-diff-match-patch/
  8
  9package diffmatchpatch
 10
 11import (
 12	"math"
 13)
 14
 15// MatchMain locates the best instance of 'pattern' in 'text' near 'loc'.
 16// Returns -1 if no match found.
 17func (dmp *DiffMatchPatch) MatchMain(text, pattern string, loc int) int {
 18	// Check for null inputs not needed since null can't be passed in C#.
 19
 20	loc = int(math.Max(0, math.Min(float64(loc), float64(len(text)))))
 21	if text == pattern {
 22		// Shortcut (potentially not guaranteed by the algorithm)
 23		return 0
 24	} else if len(text) == 0 {
 25		// Nothing to match.
 26		return -1
 27	} else if loc+len(pattern) <= len(text) && text[loc:loc+len(pattern)] == pattern {
 28		// Perfect match at the perfect spot!  (Includes case of null pattern)
 29		return loc
 30	}
 31	// Do a fuzzy compare.
 32	return dmp.MatchBitap(text, pattern, loc)
 33}
 34
 35// MatchBitap locates the best instance of 'pattern' in 'text' near 'loc' using the Bitap algorithm.
 36// Returns -1 if no match was found.
 37func (dmp *DiffMatchPatch) MatchBitap(text, pattern string, loc int) int {
 38	// Initialise the alphabet.
 39	s := dmp.MatchAlphabet(pattern)
 40
 41	// Highest score beyond which we give up.
 42	scoreThreshold := dmp.MatchThreshold
 43	// Is there a nearby exact match? (speedup)
 44	bestLoc := indexOf(text, pattern, loc)
 45	if bestLoc != -1 {
 46		scoreThreshold = math.Min(dmp.matchBitapScore(0, bestLoc, loc,
 47			pattern), scoreThreshold)
 48		// What about in the other direction? (speedup)
 49		bestLoc = lastIndexOf(text, pattern, loc+len(pattern))
 50		if bestLoc != -1 {
 51			scoreThreshold = math.Min(dmp.matchBitapScore(0, bestLoc, loc,
 52				pattern), scoreThreshold)
 53		}
 54	}
 55
 56	// Initialise the bit arrays.
 57	matchmask := 1 << uint((len(pattern) - 1))
 58	bestLoc = -1
 59
 60	var binMin, binMid int
 61	binMax := len(pattern) + len(text)
 62	lastRd := []int{}
 63	for d := 0; d < len(pattern); d++ {
 64		// Scan for the best match; each iteration allows for one more error. Run a binary search to determine how far from 'loc' we can stray at this error level.
 65		binMin = 0
 66		binMid = binMax
 67		for binMin < binMid {
 68			if dmp.matchBitapScore(d, loc+binMid, loc, pattern) <= scoreThreshold {
 69				binMin = binMid
 70			} else {
 71				binMax = binMid
 72			}
 73			binMid = (binMax-binMin)/2 + binMin
 74		}
 75		// Use the result from this iteration as the maximum for the next.
 76		binMax = binMid
 77		start := int(math.Max(1, float64(loc-binMid+1)))
 78		finish := int(math.Min(float64(loc+binMid), float64(len(text))) + float64(len(pattern)))
 79
 80		rd := make([]int, finish+2)
 81		rd[finish+1] = (1 << uint(d)) - 1
 82
 83		for j := finish; j >= start; j-- {
 84			var charMatch int
 85			if len(text) <= j-1 {
 86				// Out of range.
 87				charMatch = 0
 88			} else if _, ok := s[text[j-1]]; !ok {
 89				charMatch = 0
 90			} else {
 91				charMatch = s[text[j-1]]
 92			}
 93
 94			if d == 0 {
 95				// First pass: exact match.
 96				rd[j] = ((rd[j+1] << 1) | 1) & charMatch
 97			} else {
 98				// Subsequent passes: fuzzy match.
 99				rd[j] = ((rd[j+1]<<1)|1)&charMatch | (((lastRd[j+1] | lastRd[j]) << 1) | 1) | lastRd[j+1]
100			}
101			if (rd[j] & matchmask) != 0 {
102				score := dmp.matchBitapScore(d, j-1, loc, pattern)
103				// This match will almost certainly be better than any existing match.  But check anyway.
104				if score <= scoreThreshold {
105					// Told you so.
106					scoreThreshold = score
107					bestLoc = j - 1
108					if bestLoc > loc {
109						// When passing loc, don't exceed our current distance from loc.
110						start = int(math.Max(1, float64(2*loc-bestLoc)))
111					} else {
112						// Already passed loc, downhill from here on in.
113						break
114					}
115				}
116			}
117		}
118		if dmp.matchBitapScore(d+1, loc, loc, pattern) > scoreThreshold {
119			// No hope for a (better) match at greater error levels.
120			break
121		}
122		lastRd = rd
123	}
124	return bestLoc
125}
126
127// matchBitapScore computes and returns the score for a match with e errors and x location.
128func (dmp *DiffMatchPatch) matchBitapScore(e, x, loc int, pattern string) float64 {
129	accuracy := float64(e) / float64(len(pattern))
130	proximity := math.Abs(float64(loc - x))
131	if dmp.MatchDistance == 0 {
132		// Dodge divide by zero error.
133		if proximity == 0 {
134			return accuracy
135		}
136
137		return 1.0
138	}
139	return accuracy + (proximity / float64(dmp.MatchDistance))
140}
141
142// MatchAlphabet initialises the alphabet for the Bitap algorithm.
143func (dmp *DiffMatchPatch) MatchAlphabet(pattern string) map[byte]int {
144	s := map[byte]int{}
145	charPattern := []byte(pattern)
146	for _, c := range charPattern {
147		_, ok := s[c]
148		if !ok {
149			s[c] = 0
150		}
151	}
152	i := 0
153
154	for _, c := range charPattern {
155		value := s[c] | int(uint(1)<<uint((len(pattern)-i-1)))
156		s[c] = value
157		i++
158	}
159	return s
160}