main
1// Copyright (c) 2012-2016 The go-diff authors. All rights reserved.
2// https://github.com/sergi/go-diff
3// See the included LICENSE file for license details.
4//
5// go-diff is a Go implementation of Google's Diff, Match, and Patch library
6// Original library is Copyright (c) 2006 Google Inc.
7// http://code.google.com/p/google-diff-match-patch/
8
9package diffmatchpatch
10
11import (
12 "math"
13)
14
15// MatchMain locates the best instance of 'pattern' in 'text' near 'loc'.
16// Returns -1 if no match found.
17func (dmp *DiffMatchPatch) MatchMain(text, pattern string, loc int) int {
18 // Check for null inputs not needed since null can't be passed in C#.
19
20 loc = int(math.Max(0, math.Min(float64(loc), float64(len(text)))))
21 if text == pattern {
22 // Shortcut (potentially not guaranteed by the algorithm)
23 return 0
24 } else if len(text) == 0 {
25 // Nothing to match.
26 return -1
27 } else if loc+len(pattern) <= len(text) && text[loc:loc+len(pattern)] == pattern {
28 // Perfect match at the perfect spot! (Includes case of null pattern)
29 return loc
30 }
31 // Do a fuzzy compare.
32 return dmp.MatchBitap(text, pattern, loc)
33}
34
35// MatchBitap locates the best instance of 'pattern' in 'text' near 'loc' using the Bitap algorithm.
36// Returns -1 if no match was found.
37func (dmp *DiffMatchPatch) MatchBitap(text, pattern string, loc int) int {
38 // Initialise the alphabet.
39 s := dmp.MatchAlphabet(pattern)
40
41 // Highest score beyond which we give up.
42 scoreThreshold := dmp.MatchThreshold
43 // Is there a nearby exact match? (speedup)
44 bestLoc := indexOf(text, pattern, loc)
45 if bestLoc != -1 {
46 scoreThreshold = math.Min(dmp.matchBitapScore(0, bestLoc, loc,
47 pattern), scoreThreshold)
48 // What about in the other direction? (speedup)
49 bestLoc = lastIndexOf(text, pattern, loc+len(pattern))
50 if bestLoc != -1 {
51 scoreThreshold = math.Min(dmp.matchBitapScore(0, bestLoc, loc,
52 pattern), scoreThreshold)
53 }
54 }
55
56 // Initialise the bit arrays.
57 matchmask := 1 << uint((len(pattern) - 1))
58 bestLoc = -1
59
60 var binMin, binMid int
61 binMax := len(pattern) + len(text)
62 lastRd := []int{}
63 for d := 0; d < len(pattern); d++ {
64 // Scan for the best match; each iteration allows for one more error. Run a binary search to determine how far from 'loc' we can stray at this error level.
65 binMin = 0
66 binMid = binMax
67 for binMin < binMid {
68 if dmp.matchBitapScore(d, loc+binMid, loc, pattern) <= scoreThreshold {
69 binMin = binMid
70 } else {
71 binMax = binMid
72 }
73 binMid = (binMax-binMin)/2 + binMin
74 }
75 // Use the result from this iteration as the maximum for the next.
76 binMax = binMid
77 start := int(math.Max(1, float64(loc-binMid+1)))
78 finish := int(math.Min(float64(loc+binMid), float64(len(text))) + float64(len(pattern)))
79
80 rd := make([]int, finish+2)
81 rd[finish+1] = (1 << uint(d)) - 1
82
83 for j := finish; j >= start; j-- {
84 var charMatch int
85 if len(text) <= j-1 {
86 // Out of range.
87 charMatch = 0
88 } else if _, ok := s[text[j-1]]; !ok {
89 charMatch = 0
90 } else {
91 charMatch = s[text[j-1]]
92 }
93
94 if d == 0 {
95 // First pass: exact match.
96 rd[j] = ((rd[j+1] << 1) | 1) & charMatch
97 } else {
98 // Subsequent passes: fuzzy match.
99 rd[j] = ((rd[j+1]<<1)|1)&charMatch | (((lastRd[j+1] | lastRd[j]) << 1) | 1) | lastRd[j+1]
100 }
101 if (rd[j] & matchmask) != 0 {
102 score := dmp.matchBitapScore(d, j-1, loc, pattern)
103 // This match will almost certainly be better than any existing match. But check anyway.
104 if score <= scoreThreshold {
105 // Told you so.
106 scoreThreshold = score
107 bestLoc = j - 1
108 if bestLoc > loc {
109 // When passing loc, don't exceed our current distance from loc.
110 start = int(math.Max(1, float64(2*loc-bestLoc)))
111 } else {
112 // Already passed loc, downhill from here on in.
113 break
114 }
115 }
116 }
117 }
118 if dmp.matchBitapScore(d+1, loc, loc, pattern) > scoreThreshold {
119 // No hope for a (better) match at greater error levels.
120 break
121 }
122 lastRd = rd
123 }
124 return bestLoc
125}
126
127// matchBitapScore computes and returns the score for a match with e errors and x location.
128func (dmp *DiffMatchPatch) matchBitapScore(e, x, loc int, pattern string) float64 {
129 accuracy := float64(e) / float64(len(pattern))
130 proximity := math.Abs(float64(loc - x))
131 if dmp.MatchDistance == 0 {
132 // Dodge divide by zero error.
133 if proximity == 0 {
134 return accuracy
135 }
136
137 return 1.0
138 }
139 return accuracy + (proximity / float64(dmp.MatchDistance))
140}
141
142// MatchAlphabet initialises the alphabet for the Bitap algorithm.
143func (dmp *DiffMatchPatch) MatchAlphabet(pattern string) map[byte]int {
144 s := map[byte]int{}
145 charPattern := []byte(pattern)
146 for _, c := range charPattern {
147 _, ok := s[c]
148 if !ok {
149 s[c] = 0
150 }
151 }
152 i := 0
153
154 for _, c := range charPattern {
155 value := s[c] | int(uint(1)<<uint((len(pattern)-i-1)))
156 s[c] = value
157 i++
158 }
159 return s
160}