main
1// Copyright (c) 2012-2016 The go-diff authors. All rights reserved.
2// https://github.com/sergi/go-diff
3// See the included LICENSE file for license details.
4//
5// go-diff is a Go implementation of Google's Diff, Match, and Patch library
6// Original library is Copyright (c) 2006 Google Inc.
7// http://code.google.com/p/google-diff-match-patch/
8
9package diffmatchpatch
10
11import (
12 "bytes"
13 "errors"
14 "fmt"
15 "html"
16 "math"
17 "net/url"
18 "regexp"
19 "strconv"
20 "strings"
21 "time"
22 "unicode/utf8"
23)
24
25// Operation defines the operation of a diff item.
26type Operation int8
27
28//go:generate stringer -type=Operation -trimprefix=Diff
29
30const (
31 // DiffDelete item represents a delete diff.
32 DiffDelete Operation = -1
33 // DiffInsert item represents an insert diff.
34 DiffInsert Operation = 1
35 // DiffEqual item represents an equal diff.
36 DiffEqual Operation = 0
37)
38
39// Diff represents one diff operation
40type Diff struct {
41 Type Operation
42 Text string
43}
44
45// splice removes amount elements from slice at index index, replacing them with elements.
46func splice(slice []Diff, index int, amount int, elements ...Diff) []Diff {
47 if len(elements) == amount {
48 // Easy case: overwrite the relevant items.
49 copy(slice[index:], elements)
50 return slice
51 }
52 if len(elements) < amount {
53 // Fewer new items than old.
54 // Copy in the new items.
55 copy(slice[index:], elements)
56 // Shift the remaining items left.
57 copy(slice[index+len(elements):], slice[index+amount:])
58 // Calculate the new end of the slice.
59 end := len(slice) - amount + len(elements)
60 // Zero stranded elements at end so that they can be garbage collected.
61 tail := slice[end:]
62 for i := range tail {
63 tail[i] = Diff{}
64 }
65 return slice[:end]
66 }
67 // More new items than old.
68 // Make room in slice for new elements.
69 // There's probably an even more efficient way to do this,
70 // but this is simple and clear.
71 need := len(slice) - amount + len(elements)
72 for len(slice) < need {
73 slice = append(slice, Diff{})
74 }
75 // Shift slice elements right to make room for new elements.
76 copy(slice[index+len(elements):], slice[index+amount:])
77 // Copy in new elements.
78 copy(slice[index:], elements)
79 return slice
80}
81
82// DiffMain finds the differences between two texts.
83// If an invalid UTF-8 sequence is encountered, it will be replaced by the Unicode replacement character.
84func (dmp *DiffMatchPatch) DiffMain(text1, text2 string, checklines bool) []Diff {
85 return dmp.DiffMainRunes([]rune(text1), []rune(text2), checklines)
86}
87
88// DiffMainRunes finds the differences between two rune sequences.
89// If an invalid UTF-8 sequence is encountered, it will be replaced by the Unicode replacement character.
90func (dmp *DiffMatchPatch) DiffMainRunes(text1, text2 []rune, checklines bool) []Diff {
91 var deadline time.Time
92 if dmp.DiffTimeout > 0 {
93 deadline = time.Now().Add(dmp.DiffTimeout)
94 }
95 return dmp.diffMainRunes(text1, text2, checklines, deadline)
96}
97
98func (dmp *DiffMatchPatch) diffMainRunes(text1, text2 []rune, checklines bool, deadline time.Time) []Diff {
99 if runesEqual(text1, text2) {
100 var diffs []Diff
101 if len(text1) > 0 {
102 diffs = append(diffs, Diff{DiffEqual, string(text1)})
103 }
104 return diffs
105 }
106 // Trim off common prefix (speedup).
107 commonlength := commonPrefixLength(text1, text2)
108 commonprefix := text1[:commonlength]
109 text1 = text1[commonlength:]
110 text2 = text2[commonlength:]
111
112 // Trim off common suffix (speedup).
113 commonlength = commonSuffixLength(text1, text2)
114 commonsuffix := text1[len(text1)-commonlength:]
115 text1 = text1[:len(text1)-commonlength]
116 text2 = text2[:len(text2)-commonlength]
117
118 // Compute the diff on the middle block.
119 diffs := dmp.diffCompute(text1, text2, checklines, deadline)
120
121 // Restore the prefix and suffix.
122 if len(commonprefix) != 0 {
123 diffs = append([]Diff{{DiffEqual, string(commonprefix)}}, diffs...)
124 }
125 if len(commonsuffix) != 0 {
126 diffs = append(diffs, Diff{DiffEqual, string(commonsuffix)})
127 }
128
129 return dmp.DiffCleanupMerge(diffs)
130}
131
132// diffCompute finds the differences between two rune slices. Assumes that the texts do not have any common prefix or suffix.
133func (dmp *DiffMatchPatch) diffCompute(text1, text2 []rune, checklines bool, deadline time.Time) []Diff {
134 diffs := []Diff{}
135 if len(text1) == 0 {
136 // Just add some text (speedup).
137 return append(diffs, Diff{DiffInsert, string(text2)})
138 } else if len(text2) == 0 {
139 // Just delete some text (speedup).
140 return append(diffs, Diff{DiffDelete, string(text1)})
141 }
142
143 var longtext, shorttext []rune
144 if len(text1) > len(text2) {
145 longtext = text1
146 shorttext = text2
147 } else {
148 longtext = text2
149 shorttext = text1
150 }
151
152 if i := runesIndex(longtext, shorttext); i != -1 {
153 op := DiffInsert
154 // Swap insertions for deletions if diff is reversed.
155 if len(text1) > len(text2) {
156 op = DiffDelete
157 }
158 // Shorter text is inside the longer text (speedup).
159 return []Diff{
160 Diff{op, string(longtext[:i])},
161 Diff{DiffEqual, string(shorttext)},
162 Diff{op, string(longtext[i+len(shorttext):])},
163 }
164 } else if len(shorttext) == 1 {
165 // Single character string.
166 // After the previous speedup, the character can't be an equality.
167 return []Diff{
168 {DiffDelete, string(text1)},
169 {DiffInsert, string(text2)},
170 }
171 // Check to see if the problem can be split in two.
172 } else if hm := dmp.diffHalfMatch(text1, text2); hm != nil {
173 // A half-match was found, sort out the return data.
174 text1A := hm[0]
175 text1B := hm[1]
176 text2A := hm[2]
177 text2B := hm[3]
178 midCommon := hm[4]
179 // Send both pairs off for separate processing.
180 diffsA := dmp.diffMainRunes(text1A, text2A, checklines, deadline)
181 diffsB := dmp.diffMainRunes(text1B, text2B, checklines, deadline)
182 // Merge the results.
183 diffs := diffsA
184 diffs = append(diffs, Diff{DiffEqual, string(midCommon)})
185 diffs = append(diffs, diffsB...)
186 return diffs
187 } else if checklines && len(text1) > 100 && len(text2) > 100 {
188 return dmp.diffLineMode(text1, text2, deadline)
189 }
190 return dmp.diffBisect(text1, text2, deadline)
191}
192
193// diffLineMode does a quick line-level diff on both []runes, then rediff the parts for greater accuracy. This speedup can produce non-minimal diffs.
194func (dmp *DiffMatchPatch) diffLineMode(text1, text2 []rune, deadline time.Time) []Diff {
195 // Scan the text on a line-by-line basis first.
196 text1, text2, linearray := dmp.DiffLinesToRunes(string(text1), string(text2))
197
198 diffs := dmp.diffMainRunes(text1, text2, false, deadline)
199
200 // Convert the diff back to original text.
201 diffs = dmp.DiffCharsToLines(diffs, linearray)
202 // Eliminate freak matches (e.g. blank lines)
203 diffs = dmp.DiffCleanupSemantic(diffs)
204
205 // Rediff any replacement blocks, this time character-by-character.
206 // Add a dummy entry at the end.
207 diffs = append(diffs, Diff{DiffEqual, ""})
208
209 pointer := 0
210 countDelete := 0
211 countInsert := 0
212
213 // NOTE: Rune slices are slower than using strings in this case.
214 textDelete := ""
215 textInsert := ""
216
217 for pointer < len(diffs) {
218 switch diffs[pointer].Type {
219 case DiffInsert:
220 countInsert++
221 textInsert += diffs[pointer].Text
222 case DiffDelete:
223 countDelete++
224 textDelete += diffs[pointer].Text
225 case DiffEqual:
226 // Upon reaching an equality, check for prior redundancies.
227 if countDelete >= 1 && countInsert >= 1 {
228 // Delete the offending records and add the merged ones.
229 diffs = splice(diffs, pointer-countDelete-countInsert,
230 countDelete+countInsert)
231
232 pointer = pointer - countDelete - countInsert
233 a := dmp.diffMainRunes([]rune(textDelete), []rune(textInsert), false, deadline)
234 for j := len(a) - 1; j >= 0; j-- {
235 diffs = splice(diffs, pointer, 0, a[j])
236 }
237 pointer = pointer + len(a)
238 }
239
240 countInsert = 0
241 countDelete = 0
242 textDelete = ""
243 textInsert = ""
244 }
245 pointer++
246 }
247
248 return diffs[:len(diffs)-1] // Remove the dummy entry at the end.
249}
250
251// DiffBisect finds the 'middle snake' of a diff, split the problem in two and return the recursively constructed diff.
252// If an invalid UTF-8 sequence is encountered, it will be replaced by the Unicode replacement character.
253// See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
254func (dmp *DiffMatchPatch) DiffBisect(text1, text2 string, deadline time.Time) []Diff {
255 // Unused in this code, but retained for interface compatibility.
256 return dmp.diffBisect([]rune(text1), []rune(text2), deadline)
257}
258
259// diffBisect finds the 'middle snake' of a diff, splits the problem in two and returns the recursively constructed diff.
260// See Myers's 1986 paper: An O(ND) Difference Algorithm and Its Variations.
261func (dmp *DiffMatchPatch) diffBisect(runes1, runes2 []rune, deadline time.Time) []Diff {
262 // Cache the text lengths to prevent multiple calls.
263 runes1Len, runes2Len := len(runes1), len(runes2)
264
265 maxD := (runes1Len + runes2Len + 1) / 2
266 vOffset := maxD
267 vLength := 2 * maxD
268
269 v1 := make([]int, vLength)
270 v2 := make([]int, vLength)
271 for i := range v1 {
272 v1[i] = -1
273 v2[i] = -1
274 }
275 v1[vOffset+1] = 0
276 v2[vOffset+1] = 0
277
278 delta := runes1Len - runes2Len
279 // If the total number of characters is odd, then the front path will collide with the reverse path.
280 front := (delta%2 != 0)
281 // Offsets for start and end of k loop. Prevents mapping of space beyond the grid.
282 k1start := 0
283 k1end := 0
284 k2start := 0
285 k2end := 0
286 for d := 0; d < maxD; d++ {
287 // Bail out if deadline is reached.
288 if !deadline.IsZero() && d%16 == 0 && time.Now().After(deadline) {
289 break
290 }
291
292 // Walk the front path one step.
293 for k1 := -d + k1start; k1 <= d-k1end; k1 += 2 {
294 k1Offset := vOffset + k1
295 var x1 int
296
297 if k1 == -d || (k1 != d && v1[k1Offset-1] < v1[k1Offset+1]) {
298 x1 = v1[k1Offset+1]
299 } else {
300 x1 = v1[k1Offset-1] + 1
301 }
302
303 y1 := x1 - k1
304 for x1 < runes1Len && y1 < runes2Len {
305 if runes1[x1] != runes2[y1] {
306 break
307 }
308 x1++
309 y1++
310 }
311 v1[k1Offset] = x1
312 if x1 > runes1Len {
313 // Ran off the right of the graph.
314 k1end += 2
315 } else if y1 > runes2Len {
316 // Ran off the bottom of the graph.
317 k1start += 2
318 } else if front {
319 k2Offset := vOffset + delta - k1
320 if k2Offset >= 0 && k2Offset < vLength && v2[k2Offset] != -1 {
321 // Mirror x2 onto top-left coordinate system.
322 x2 := runes1Len - v2[k2Offset]
323 if x1 >= x2 {
324 // Overlap detected.
325 return dmp.diffBisectSplit(runes1, runes2, x1, y1, deadline)
326 }
327 }
328 }
329 }
330 // Walk the reverse path one step.
331 for k2 := -d + k2start; k2 <= d-k2end; k2 += 2 {
332 k2Offset := vOffset + k2
333 var x2 int
334 if k2 == -d || (k2 != d && v2[k2Offset-1] < v2[k2Offset+1]) {
335 x2 = v2[k2Offset+1]
336 } else {
337 x2 = v2[k2Offset-1] + 1
338 }
339 var y2 = x2 - k2
340 for x2 < runes1Len && y2 < runes2Len {
341 if runes1[runes1Len-x2-1] != runes2[runes2Len-y2-1] {
342 break
343 }
344 x2++
345 y2++
346 }
347 v2[k2Offset] = x2
348 if x2 > runes1Len {
349 // Ran off the left of the graph.
350 k2end += 2
351 } else if y2 > runes2Len {
352 // Ran off the top of the graph.
353 k2start += 2
354 } else if !front {
355 k1Offset := vOffset + delta - k2
356 if k1Offset >= 0 && k1Offset < vLength && v1[k1Offset] != -1 {
357 x1 := v1[k1Offset]
358 y1 := vOffset + x1 - k1Offset
359 // Mirror x2 onto top-left coordinate system.
360 x2 = runes1Len - x2
361 if x1 >= x2 {
362 // Overlap detected.
363 return dmp.diffBisectSplit(runes1, runes2, x1, y1, deadline)
364 }
365 }
366 }
367 }
368 }
369 // Diff took too long and hit the deadline or number of diffs equals number of characters, no commonality at all.
370 return []Diff{
371 {DiffDelete, string(runes1)},
372 {DiffInsert, string(runes2)},
373 }
374}
375
376func (dmp *DiffMatchPatch) diffBisectSplit(runes1, runes2 []rune, x, y int,
377 deadline time.Time) []Diff {
378 runes1a := runes1[:x]
379 runes2a := runes2[:y]
380 runes1b := runes1[x:]
381 runes2b := runes2[y:]
382
383 // Compute both diffs serially.
384 diffs := dmp.diffMainRunes(runes1a, runes2a, false, deadline)
385 diffsb := dmp.diffMainRunes(runes1b, runes2b, false, deadline)
386
387 return append(diffs, diffsb...)
388}
389
390// DiffLinesToChars splits two texts into a list of strings, and educes the texts to a string of hashes where each Unicode character represents one line.
391// It's slightly faster to call DiffLinesToRunes first, followed by DiffMainRunes.
392func (dmp *DiffMatchPatch) DiffLinesToChars(text1, text2 string) (string, string, []string) {
393 chars1, chars2, lineArray := dmp.diffLinesToStrings(text1, text2)
394 return chars1, chars2, lineArray
395}
396
397// DiffLinesToRunes splits two texts into a list of runes.
398func (dmp *DiffMatchPatch) DiffLinesToRunes(text1, text2 string) ([]rune, []rune, []string) {
399 chars1, chars2, lineArray := dmp.diffLinesToStrings(text1, text2)
400 return []rune(chars1), []rune(chars2), lineArray
401}
402
403// DiffCharsToLines rehydrates the text in a diff from a string of line hashes to real lines of text.
404func (dmp *DiffMatchPatch) DiffCharsToLines(diffs []Diff, lineArray []string) []Diff {
405 hydrated := make([]Diff, 0, len(diffs))
406 for _, aDiff := range diffs {
407 runes := []rune(aDiff.Text)
408 text := make([]string, len(runes))
409
410 for i, r := range runes {
411 text[i] = lineArray[runeToInt(r)]
412 }
413
414 aDiff.Text = strings.Join(text, "")
415 hydrated = append(hydrated, aDiff)
416 }
417 return hydrated
418}
419
420// DiffCommonPrefix determines the common prefix length of two strings.
421func (dmp *DiffMatchPatch) DiffCommonPrefix(text1, text2 string) int {
422 // Unused in this code, but retained for interface compatibility.
423 return commonPrefixLength([]rune(text1), []rune(text2))
424}
425
426// DiffCommonSuffix determines the common suffix length of two strings.
427func (dmp *DiffMatchPatch) DiffCommonSuffix(text1, text2 string) int {
428 // Unused in this code, but retained for interface compatibility.
429 return commonSuffixLength([]rune(text1), []rune(text2))
430}
431
432// commonPrefixLength returns the length of the common prefix of two rune slices.
433func commonPrefixLength(text1, text2 []rune) int {
434 // Linear search. See comment in commonSuffixLength.
435 n := 0
436 for ; n < len(text1) && n < len(text2); n++ {
437 if text1[n] != text2[n] {
438 return n
439 }
440 }
441 return n
442}
443
444// commonSuffixLength returns the length of the common suffix of two rune slices.
445func commonSuffixLength(text1, text2 []rune) int {
446 // Use linear search rather than the binary search discussed at https://neil.fraser.name/news/2007/10/09/.
447 // See discussion at https://github.com/sergi/go-diff/issues/54.
448 i1 := len(text1)
449 i2 := len(text2)
450 for n := 0; ; n++ {
451 i1--
452 i2--
453 if i1 < 0 || i2 < 0 || text1[i1] != text2[i2] {
454 return n
455 }
456 }
457}
458
459// DiffCommonOverlap determines if the suffix of one string is the prefix of another.
460func (dmp *DiffMatchPatch) DiffCommonOverlap(text1 string, text2 string) int {
461 // Cache the text lengths to prevent multiple calls.
462 text1Length := len(text1)
463 text2Length := len(text2)
464 // Eliminate the null case.
465 if text1Length == 0 || text2Length == 0 {
466 return 0
467 }
468 // Truncate the longer string.
469 if text1Length > text2Length {
470 text1 = text1[text1Length-text2Length:]
471 } else if text1Length < text2Length {
472 text2 = text2[0:text1Length]
473 }
474 textLength := int(math.Min(float64(text1Length), float64(text2Length)))
475 // Quick check for the worst case.
476 if text1 == text2 {
477 return textLength
478 }
479
480 // Start by looking for a single character match and increase length until no match is found. Performance analysis: http://neil.fraser.name/news/2010/11/04/
481 best := 0
482 length := 1
483 for {
484 pattern := text1[textLength-length:]
485 found := strings.Index(text2, pattern)
486 if found == -1 {
487 break
488 }
489 length += found
490 if found == 0 || text1[textLength-length:] == text2[0:length] {
491 best = length
492 length++
493 }
494 }
495
496 return best
497}
498
499// DiffHalfMatch checks whether the two texts share a substring which is at least half the length of the longer text. This speedup can produce non-minimal diffs.
500func (dmp *DiffMatchPatch) DiffHalfMatch(text1, text2 string) []string {
501 // Unused in this code, but retained for interface compatibility.
502 runeSlices := dmp.diffHalfMatch([]rune(text1), []rune(text2))
503 if runeSlices == nil {
504 return nil
505 }
506
507 result := make([]string, len(runeSlices))
508 for i, r := range runeSlices {
509 result[i] = string(r)
510 }
511 return result
512}
513
514func (dmp *DiffMatchPatch) diffHalfMatch(text1, text2 []rune) [][]rune {
515 if dmp.DiffTimeout <= 0 {
516 // Don't risk returning a non-optimal diff if we have unlimited time.
517 return nil
518 }
519
520 var longtext, shorttext []rune
521 if len(text1) > len(text2) {
522 longtext = text1
523 shorttext = text2
524 } else {
525 longtext = text2
526 shorttext = text1
527 }
528
529 if len(longtext) < 4 || len(shorttext)*2 < len(longtext) {
530 return nil // Pointless.
531 }
532
533 // First check if the second quarter is the seed for a half-match.
534 hm1 := dmp.diffHalfMatchI(longtext, shorttext, int(float64(len(longtext)+3)/4))
535
536 // Check again based on the third quarter.
537 hm2 := dmp.diffHalfMatchI(longtext, shorttext, int(float64(len(longtext)+1)/2))
538
539 hm := [][]rune{}
540 if hm1 == nil && hm2 == nil {
541 return nil
542 } else if hm2 == nil {
543 hm = hm1
544 } else if hm1 == nil {
545 hm = hm2
546 } else {
547 // Both matched. Select the longest.
548 if len(hm1[4]) > len(hm2[4]) {
549 hm = hm1
550 } else {
551 hm = hm2
552 }
553 }
554
555 // A half-match was found, sort out the return data.
556 if len(text1) > len(text2) {
557 return hm
558 }
559
560 return [][]rune{hm[2], hm[3], hm[0], hm[1], hm[4]}
561}
562
563// diffHalfMatchI checks if a substring of shorttext exist within longtext such that the substring is at least half the length of longtext?
564// Returns a slice containing the prefix of longtext, the suffix of longtext, the prefix of shorttext, the suffix of shorttext and the common middle, or null if there was no match.
565func (dmp *DiffMatchPatch) diffHalfMatchI(l, s []rune, i int) [][]rune {
566 var bestCommonA []rune
567 var bestCommonB []rune
568 var bestCommonLen int
569 var bestLongtextA []rune
570 var bestLongtextB []rune
571 var bestShorttextA []rune
572 var bestShorttextB []rune
573
574 // Start with a 1/4 length substring at position i as a seed.
575 seed := l[i : i+len(l)/4]
576
577 for j := runesIndexOf(s, seed, 0); j != -1; j = runesIndexOf(s, seed, j+1) {
578 prefixLength := commonPrefixLength(l[i:], s[j:])
579 suffixLength := commonSuffixLength(l[:i], s[:j])
580
581 if bestCommonLen < suffixLength+prefixLength {
582 bestCommonA = s[j-suffixLength : j]
583 bestCommonB = s[j : j+prefixLength]
584 bestCommonLen = len(bestCommonA) + len(bestCommonB)
585 bestLongtextA = l[:i-suffixLength]
586 bestLongtextB = l[i+prefixLength:]
587 bestShorttextA = s[:j-suffixLength]
588 bestShorttextB = s[j+prefixLength:]
589 }
590 }
591
592 if bestCommonLen*2 < len(l) {
593 return nil
594 }
595
596 return [][]rune{
597 bestLongtextA,
598 bestLongtextB,
599 bestShorttextA,
600 bestShorttextB,
601 append(bestCommonA, bestCommonB...),
602 }
603}
604
605// DiffCleanupSemantic reduces the number of edits by eliminating semantically trivial equalities.
606func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
607 changes := false
608 // Stack of indices where equalities are found.
609 equalities := make([]int, 0, len(diffs))
610
611 var lastequality string
612 // Always equal to diffs[equalities[equalitiesLength - 1]][1]
613 var pointer int // Index of current position.
614 // Number of characters that changed prior to the equality.
615 var lengthInsertions1, lengthDeletions1 int
616 // Number of characters that changed after the equality.
617 var lengthInsertions2, lengthDeletions2 int
618
619 for pointer < len(diffs) {
620 if diffs[pointer].Type == DiffEqual {
621 // Equality found.
622 equalities = append(equalities, pointer)
623 lengthInsertions1 = lengthInsertions2
624 lengthDeletions1 = lengthDeletions2
625 lengthInsertions2 = 0
626 lengthDeletions2 = 0
627 lastequality = diffs[pointer].Text
628 } else {
629 // An insertion or deletion.
630
631 if diffs[pointer].Type == DiffInsert {
632 lengthInsertions2 += utf8.RuneCountInString(diffs[pointer].Text)
633 } else {
634 lengthDeletions2 += utf8.RuneCountInString(diffs[pointer].Text)
635 }
636 // Eliminate an equality that is smaller or equal to the edits on both sides of it.
637 difference1 := int(math.Max(float64(lengthInsertions1), float64(lengthDeletions1)))
638 difference2 := int(math.Max(float64(lengthInsertions2), float64(lengthDeletions2)))
639 if utf8.RuneCountInString(lastequality) > 0 &&
640 (utf8.RuneCountInString(lastequality) <= difference1) &&
641 (utf8.RuneCountInString(lastequality) <= difference2) {
642 // Duplicate record.
643 insPoint := equalities[len(equalities)-1]
644 diffs = splice(diffs, insPoint, 0, Diff{DiffDelete, lastequality})
645
646 // Change second copy to insert.
647 diffs[insPoint+1].Type = DiffInsert
648 // Throw away the equality we just deleted.
649 equalities = equalities[:len(equalities)-1]
650
651 if len(equalities) > 0 {
652 equalities = equalities[:len(equalities)-1]
653 }
654 pointer = -1
655 if len(equalities) > 0 {
656 pointer = equalities[len(equalities)-1]
657 }
658
659 lengthInsertions1 = 0 // Reset the counters.
660 lengthDeletions1 = 0
661 lengthInsertions2 = 0
662 lengthDeletions2 = 0
663 lastequality = ""
664 changes = true
665 }
666 }
667 pointer++
668 }
669
670 // Normalize the diff.
671 if changes {
672 diffs = dmp.DiffCleanupMerge(diffs)
673 }
674 diffs = dmp.DiffCleanupSemanticLossless(diffs)
675 // Find any overlaps between deletions and insertions.
676 // e.g: <del>abcxxx</del><ins>xxxdef</ins>
677 // -> <del>abc</del>xxx<ins>def</ins>
678 // e.g: <del>xxxabc</del><ins>defxxx</ins>
679 // -> <ins>def</ins>xxx<del>abc</del>
680 // Only extract an overlap if it is as big as the edit ahead or behind it.
681 pointer = 1
682 for pointer < len(diffs) {
683 if diffs[pointer-1].Type == DiffDelete &&
684 diffs[pointer].Type == DiffInsert {
685 deletion := diffs[pointer-1].Text
686 insertion := diffs[pointer].Text
687 overlapLength1 := dmp.DiffCommonOverlap(deletion, insertion)
688 overlapLength2 := dmp.DiffCommonOverlap(insertion, deletion)
689 if overlapLength1 >= overlapLength2 {
690 if float64(overlapLength1) >= float64(utf8.RuneCountInString(deletion))/2 ||
691 float64(overlapLength1) >= float64(utf8.RuneCountInString(insertion))/2 {
692
693 // Overlap found. Insert an equality and trim the surrounding edits.
694 diffs = splice(diffs, pointer, 0, Diff{DiffEqual, insertion[:overlapLength1]})
695 diffs[pointer-1].Text =
696 deletion[0 : len(deletion)-overlapLength1]
697 diffs[pointer+1].Text = insertion[overlapLength1:]
698 pointer++
699 }
700 } else {
701 if float64(overlapLength2) >= float64(utf8.RuneCountInString(deletion))/2 ||
702 float64(overlapLength2) >= float64(utf8.RuneCountInString(insertion))/2 {
703 // Reverse overlap found. Insert an equality and swap and trim the surrounding edits.
704 overlap := Diff{DiffEqual, deletion[:overlapLength2]}
705 diffs = splice(diffs, pointer, 0, overlap)
706 diffs[pointer-1].Type = DiffInsert
707 diffs[pointer-1].Text = insertion[0 : len(insertion)-overlapLength2]
708 diffs[pointer+1].Type = DiffDelete
709 diffs[pointer+1].Text = deletion[overlapLength2:]
710 pointer++
711 }
712 }
713 pointer++
714 }
715 pointer++
716 }
717
718 return diffs
719}
720
721// Define some regex patterns for matching boundaries.
722var (
723 nonAlphaNumericRegex = regexp.MustCompile(`[^a-zA-Z0-9]`)
724 whitespaceRegex = regexp.MustCompile(`\s`)
725 linebreakRegex = regexp.MustCompile(`[\r\n]`)
726 blanklineEndRegex = regexp.MustCompile(`\n\r?\n$`)
727 blanklineStartRegex = regexp.MustCompile(`^\r?\n\r?\n`)
728)
729
730// diffCleanupSemanticScore computes a score representing whether the internal boundary falls on logical boundaries.
731// Scores range from 6 (best) to 0 (worst). Closure, but does not reference any external variables.
732func diffCleanupSemanticScore(one, two string) int {
733 if len(one) == 0 || len(two) == 0 {
734 // Edges are the best.
735 return 6
736 }
737
738 // Each port of this function behaves slightly differently due to subtle differences in each language's definition of things like 'whitespace'. Since this function's purpose is largely cosmetic, the choice has been made to use each language's native features rather than force total conformity.
739 rune1, _ := utf8.DecodeLastRuneInString(one)
740 rune2, _ := utf8.DecodeRuneInString(two)
741 char1 := string(rune1)
742 char2 := string(rune2)
743
744 nonAlphaNumeric1 := nonAlphaNumericRegex.MatchString(char1)
745 nonAlphaNumeric2 := nonAlphaNumericRegex.MatchString(char2)
746 whitespace1 := nonAlphaNumeric1 && whitespaceRegex.MatchString(char1)
747 whitespace2 := nonAlphaNumeric2 && whitespaceRegex.MatchString(char2)
748 lineBreak1 := whitespace1 && linebreakRegex.MatchString(char1)
749 lineBreak2 := whitespace2 && linebreakRegex.MatchString(char2)
750 blankLine1 := lineBreak1 && blanklineEndRegex.MatchString(one)
751 blankLine2 := lineBreak2 && blanklineEndRegex.MatchString(two)
752
753 if blankLine1 || blankLine2 {
754 // Five points for blank lines.
755 return 5
756 } else if lineBreak1 || lineBreak2 {
757 // Four points for line breaks.
758 return 4
759 } else if nonAlphaNumeric1 && !whitespace1 && whitespace2 {
760 // Three points for end of sentences.
761 return 3
762 } else if whitespace1 || whitespace2 {
763 // Two points for whitespace.
764 return 2
765 } else if nonAlphaNumeric1 || nonAlphaNumeric2 {
766 // One point for non-alphanumeric.
767 return 1
768 }
769 return 0
770}
771
772// DiffCleanupSemanticLossless looks for single edits surrounded on both sides by equalities which can be shifted sideways to align the edit to a word boundary.
773// E.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
774func (dmp *DiffMatchPatch) DiffCleanupSemanticLossless(diffs []Diff) []Diff {
775 pointer := 1
776
777 // Intentionally ignore the first and last element (don't need checking).
778 for pointer < len(diffs)-1 {
779 if diffs[pointer-1].Type == DiffEqual &&
780 diffs[pointer+1].Type == DiffEqual {
781
782 // This is a single edit surrounded by equalities.
783 equality1 := diffs[pointer-1].Text
784 edit := diffs[pointer].Text
785 equality2 := diffs[pointer+1].Text
786
787 // First, shift the edit as far left as possible.
788 commonOffset := dmp.DiffCommonSuffix(equality1, edit)
789 if commonOffset > 0 {
790 commonString := edit[len(edit)-commonOffset:]
791 equality1 = equality1[0 : len(equality1)-commonOffset]
792 edit = commonString + edit[:len(edit)-commonOffset]
793 equality2 = commonString + equality2
794 }
795
796 // Second, step character by character right, looking for the best fit.
797 bestEquality1 := equality1
798 bestEdit := edit
799 bestEquality2 := equality2
800 bestScore := diffCleanupSemanticScore(equality1, edit) +
801 diffCleanupSemanticScore(edit, equality2)
802
803 for len(edit) != 0 && len(equality2) != 0 {
804 _, sz := utf8.DecodeRuneInString(edit)
805 if len(equality2) < sz || edit[:sz] != equality2[:sz] {
806 break
807 }
808 equality1 += edit[:sz]
809 edit = edit[sz:] + equality2[:sz]
810 equality2 = equality2[sz:]
811 score := diffCleanupSemanticScore(equality1, edit) +
812 diffCleanupSemanticScore(edit, equality2)
813 // The >= encourages trailing rather than leading whitespace on edits.
814 if score >= bestScore {
815 bestScore = score
816 bestEquality1 = equality1
817 bestEdit = edit
818 bestEquality2 = equality2
819 }
820 }
821
822 if diffs[pointer-1].Text != bestEquality1 {
823 // We have an improvement, save it back to the diff.
824 if len(bestEquality1) != 0 {
825 diffs[pointer-1].Text = bestEquality1
826 } else {
827 diffs = splice(diffs, pointer-1, 1)
828 pointer--
829 }
830
831 diffs[pointer].Text = bestEdit
832 if len(bestEquality2) != 0 {
833 diffs[pointer+1].Text = bestEquality2
834 } else {
835 diffs = append(diffs[:pointer+1], diffs[pointer+2:]...)
836 pointer--
837 }
838 }
839 }
840 pointer++
841 }
842
843 return diffs
844}
845
846// DiffCleanupEfficiency reduces the number of edits by eliminating operationally trivial equalities.
847func (dmp *DiffMatchPatch) DiffCleanupEfficiency(diffs []Diff) []Diff {
848 changes := false
849 // Stack of indices where equalities are found.
850 type equality struct {
851 data int
852 next *equality
853 }
854 var equalities *equality
855 // Always equal to equalities[equalitiesLength-1][1]
856 lastequality := ""
857 pointer := 0 // Index of current position.
858 // Is there an insertion operation before the last equality.
859 preIns := false
860 // Is there a deletion operation before the last equality.
861 preDel := false
862 // Is there an insertion operation after the last equality.
863 postIns := false
864 // Is there a deletion operation after the last equality.
865 postDel := false
866 for pointer < len(diffs) {
867 if diffs[pointer].Type == DiffEqual { // Equality found.
868 if len(diffs[pointer].Text) < dmp.DiffEditCost &&
869 (postIns || postDel) {
870 // Candidate found.
871 equalities = &equality{
872 data: pointer,
873 next: equalities,
874 }
875 preIns = postIns
876 preDel = postDel
877 lastequality = diffs[pointer].Text
878 } else {
879 // Not a candidate, and can never become one.
880 equalities = nil
881 lastequality = ""
882 }
883 postIns = false
884 postDel = false
885 } else { // An insertion or deletion.
886 if diffs[pointer].Type == DiffDelete {
887 postDel = true
888 } else {
889 postIns = true
890 }
891
892 // Five types to be split:
893 // <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
894 // <ins>A</ins>X<ins>C</ins><del>D</del>
895 // <ins>A</ins><del>B</del>X<ins>C</ins>
896 // <ins>A</del>X<ins>C</ins><del>D</del>
897 // <ins>A</ins><del>B</del>X<del>C</del>
898 var sumPres int
899 if preIns {
900 sumPres++
901 }
902 if preDel {
903 sumPres++
904 }
905 if postIns {
906 sumPres++
907 }
908 if postDel {
909 sumPres++
910 }
911 if len(lastequality) > 0 &&
912 ((preIns && preDel && postIns && postDel) ||
913 ((len(lastequality) < dmp.DiffEditCost/2) && sumPres == 3)) {
914
915 insPoint := equalities.data
916
917 // Duplicate record.
918 diffs = splice(diffs, insPoint, 0, Diff{DiffDelete, lastequality})
919
920 // Change second copy to insert.
921 diffs[insPoint+1].Type = DiffInsert
922 // Throw away the equality we just deleted.
923 equalities = equalities.next
924 lastequality = ""
925
926 if preIns && preDel {
927 // No changes made which could affect previous entry, keep going.
928 postIns = true
929 postDel = true
930 equalities = nil
931 } else {
932 if equalities != nil {
933 equalities = equalities.next
934 }
935 if equalities != nil {
936 pointer = equalities.data
937 } else {
938 pointer = -1
939 }
940 postIns = false
941 postDel = false
942 }
943 changes = true
944 }
945 }
946 pointer++
947 }
948
949 if changes {
950 diffs = dmp.DiffCleanupMerge(diffs)
951 }
952
953 return diffs
954}
955
956// DiffCleanupMerge reorders and merges like edit sections. Merge equalities.
957// Any edit section can move as long as it doesn't cross an equality.
958func (dmp *DiffMatchPatch) DiffCleanupMerge(diffs []Diff) []Diff {
959 // Add a dummy entry at the end.
960 diffs = append(diffs, Diff{DiffEqual, ""})
961 pointer := 0
962 countDelete := 0
963 countInsert := 0
964 commonlength := 0
965 textDelete := []rune(nil)
966 textInsert := []rune(nil)
967
968 for pointer < len(diffs) {
969 switch diffs[pointer].Type {
970 case DiffInsert:
971 countInsert++
972 textInsert = append(textInsert, []rune(diffs[pointer].Text)...)
973 pointer++
974 break
975 case DiffDelete:
976 countDelete++
977 textDelete = append(textDelete, []rune(diffs[pointer].Text)...)
978 pointer++
979 break
980 case DiffEqual:
981 // Upon reaching an equality, check for prior redundancies.
982 if countDelete+countInsert > 1 {
983 if countDelete != 0 && countInsert != 0 {
984 // Factor out any common prefixies.
985 commonlength = commonPrefixLength(textInsert, textDelete)
986 if commonlength != 0 {
987 x := pointer - countDelete - countInsert
988 if x > 0 && diffs[x-1].Type == DiffEqual {
989 diffs[x-1].Text += string(textInsert[:commonlength])
990 } else {
991 diffs = append([]Diff{{DiffEqual, string(textInsert[:commonlength])}}, diffs...)
992 pointer++
993 }
994 textInsert = textInsert[commonlength:]
995 textDelete = textDelete[commonlength:]
996 }
997 // Factor out any common suffixies.
998 commonlength = commonSuffixLength(textInsert, textDelete)
999 if commonlength != 0 {
1000 insertIndex := len(textInsert) - commonlength
1001 deleteIndex := len(textDelete) - commonlength
1002 diffs[pointer].Text = string(textInsert[insertIndex:]) + diffs[pointer].Text
1003 textInsert = textInsert[:insertIndex]
1004 textDelete = textDelete[:deleteIndex]
1005 }
1006 }
1007 // Delete the offending records and add the merged ones.
1008 if countDelete == 0 {
1009 diffs = splice(diffs, pointer-countInsert,
1010 countDelete+countInsert,
1011 Diff{DiffInsert, string(textInsert)})
1012 } else if countInsert == 0 {
1013 diffs = splice(diffs, pointer-countDelete,
1014 countDelete+countInsert,
1015 Diff{DiffDelete, string(textDelete)})
1016 } else {
1017 diffs = splice(diffs, pointer-countDelete-countInsert,
1018 countDelete+countInsert,
1019 Diff{DiffDelete, string(textDelete)},
1020 Diff{DiffInsert, string(textInsert)})
1021 }
1022
1023 pointer = pointer - countDelete - countInsert + 1
1024 if countDelete != 0 {
1025 pointer++
1026 }
1027 if countInsert != 0 {
1028 pointer++
1029 }
1030 } else if pointer != 0 && diffs[pointer-1].Type == DiffEqual {
1031 // Merge this equality with the previous one.
1032 diffs[pointer-1].Text += diffs[pointer].Text
1033 diffs = append(diffs[:pointer], diffs[pointer+1:]...)
1034 } else {
1035 pointer++
1036 }
1037 countInsert = 0
1038 countDelete = 0
1039 textDelete = nil
1040 textInsert = nil
1041 break
1042 }
1043 }
1044
1045 if len(diffs[len(diffs)-1].Text) == 0 {
1046 diffs = diffs[0 : len(diffs)-1] // Remove the dummy entry at the end.
1047 }
1048
1049 // Second pass: look for single edits surrounded on both sides by equalities which can be shifted sideways to eliminate an equality. E.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
1050 changes := false
1051 pointer = 1
1052 // Intentionally ignore the first and last element (don't need checking).
1053 for pointer < (len(diffs) - 1) {
1054 if diffs[pointer-1].Type == DiffEqual &&
1055 diffs[pointer+1].Type == DiffEqual {
1056 // This is a single edit surrounded by equalities.
1057 if strings.HasSuffix(diffs[pointer].Text, diffs[pointer-1].Text) {
1058 // Shift the edit over the previous equality.
1059 diffs[pointer].Text = diffs[pointer-1].Text +
1060 diffs[pointer].Text[:len(diffs[pointer].Text)-len(diffs[pointer-1].Text)]
1061 diffs[pointer+1].Text = diffs[pointer-1].Text + diffs[pointer+1].Text
1062 diffs = splice(diffs, pointer-1, 1)
1063 changes = true
1064 } else if strings.HasPrefix(diffs[pointer].Text, diffs[pointer+1].Text) {
1065 // Shift the edit over the next equality.
1066 diffs[pointer-1].Text += diffs[pointer+1].Text
1067 diffs[pointer].Text =
1068 diffs[pointer].Text[len(diffs[pointer+1].Text):] + diffs[pointer+1].Text
1069 diffs = splice(diffs, pointer+1, 1)
1070 changes = true
1071 }
1072 }
1073 pointer++
1074 }
1075
1076 // If shifts were made, the diff needs reordering and another shift sweep.
1077 if changes {
1078 diffs = dmp.DiffCleanupMerge(diffs)
1079 }
1080
1081 return diffs
1082}
1083
1084// DiffXIndex returns the equivalent location in s2.
1085func (dmp *DiffMatchPatch) DiffXIndex(diffs []Diff, loc int) int {
1086 chars1 := 0
1087 chars2 := 0
1088 lastChars1 := 0
1089 lastChars2 := 0
1090 lastDiff := Diff{}
1091 for i := 0; i < len(diffs); i++ {
1092 aDiff := diffs[i]
1093 if aDiff.Type != DiffInsert {
1094 // Equality or deletion.
1095 chars1 += len(aDiff.Text)
1096 }
1097 if aDiff.Type != DiffDelete {
1098 // Equality or insertion.
1099 chars2 += len(aDiff.Text)
1100 }
1101 if chars1 > loc {
1102 // Overshot the location.
1103 lastDiff = aDiff
1104 break
1105 }
1106 lastChars1 = chars1
1107 lastChars2 = chars2
1108 }
1109 if lastDiff.Type == DiffDelete {
1110 // The location was deleted.
1111 return lastChars2
1112 }
1113 // Add the remaining character length.
1114 return lastChars2 + (loc - lastChars1)
1115}
1116
1117// DiffPrettyHtml converts a []Diff into a pretty HTML report.
1118// It is intended as an example from which to write one's own display functions.
1119func (dmp *DiffMatchPatch) DiffPrettyHtml(diffs []Diff) string {
1120 var buff bytes.Buffer
1121 for _, diff := range diffs {
1122 text := strings.Replace(html.EscapeString(diff.Text), "\n", "¶<br>", -1)
1123 switch diff.Type {
1124 case DiffInsert:
1125 _, _ = buff.WriteString("<ins style=\"background:#e6ffe6;\">")
1126 _, _ = buff.WriteString(text)
1127 _, _ = buff.WriteString("</ins>")
1128 case DiffDelete:
1129 _, _ = buff.WriteString("<del style=\"background:#ffe6e6;\">")
1130 _, _ = buff.WriteString(text)
1131 _, _ = buff.WriteString("</del>")
1132 case DiffEqual:
1133 _, _ = buff.WriteString("<span>")
1134 _, _ = buff.WriteString(text)
1135 _, _ = buff.WriteString("</span>")
1136 }
1137 }
1138 return buff.String()
1139}
1140
1141// DiffPrettyText converts a []Diff into a colored text report.
1142func (dmp *DiffMatchPatch) DiffPrettyText(diffs []Diff) string {
1143 var buff bytes.Buffer
1144 for _, diff := range diffs {
1145 text := diff.Text
1146
1147 switch diff.Type {
1148 case DiffInsert:
1149 _, _ = buff.WriteString("\x1b[32m")
1150 _, _ = buff.WriteString(text)
1151 _, _ = buff.WriteString("\x1b[0m")
1152 case DiffDelete:
1153 _, _ = buff.WriteString("\x1b[31m")
1154 _, _ = buff.WriteString(text)
1155 _, _ = buff.WriteString("\x1b[0m")
1156 case DiffEqual:
1157 _, _ = buff.WriteString(text)
1158 }
1159 }
1160
1161 return buff.String()
1162}
1163
1164// DiffText1 computes and returns the source text (all equalities and deletions).
1165func (dmp *DiffMatchPatch) DiffText1(diffs []Diff) string {
1166 //StringBuilder text = new StringBuilder()
1167 var text bytes.Buffer
1168
1169 for _, aDiff := range diffs {
1170 if aDiff.Type != DiffInsert {
1171 _, _ = text.WriteString(aDiff.Text)
1172 }
1173 }
1174 return text.String()
1175}
1176
1177// DiffText2 computes and returns the destination text (all equalities and insertions).
1178func (dmp *DiffMatchPatch) DiffText2(diffs []Diff) string {
1179 var text bytes.Buffer
1180
1181 for _, aDiff := range diffs {
1182 if aDiff.Type != DiffDelete {
1183 _, _ = text.WriteString(aDiff.Text)
1184 }
1185 }
1186 return text.String()
1187}
1188
1189// DiffLevenshtein computes the Levenshtein distance that is the number of inserted, deleted or substituted characters.
1190func (dmp *DiffMatchPatch) DiffLevenshtein(diffs []Diff) int {
1191 levenshtein := 0
1192 insertions := 0
1193 deletions := 0
1194
1195 for _, aDiff := range diffs {
1196 switch aDiff.Type {
1197 case DiffInsert:
1198 insertions += utf8.RuneCountInString(aDiff.Text)
1199 case DiffDelete:
1200 deletions += utf8.RuneCountInString(aDiff.Text)
1201 case DiffEqual:
1202 // A deletion and an insertion is one substitution.
1203 levenshtein += max(insertions, deletions)
1204 insertions = 0
1205 deletions = 0
1206 }
1207 }
1208
1209 levenshtein += max(insertions, deletions)
1210 return levenshtein
1211}
1212
1213// DiffToDelta crushes the diff into an encoded string which describes the operations required to transform text1 into text2.
1214// E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. Operations are tab-separated. Inserted text is escaped using %xx notation.
1215func (dmp *DiffMatchPatch) DiffToDelta(diffs []Diff) string {
1216 var text bytes.Buffer
1217 for _, aDiff := range diffs {
1218 switch aDiff.Type {
1219 case DiffInsert:
1220 _, _ = text.WriteString("+")
1221 _, _ = text.WriteString(strings.Replace(url.QueryEscape(aDiff.Text), "+", " ", -1))
1222 _, _ = text.WriteString("\t")
1223 break
1224 case DiffDelete:
1225 _, _ = text.WriteString("-")
1226 _, _ = text.WriteString(strconv.Itoa(utf8.RuneCountInString(aDiff.Text)))
1227 _, _ = text.WriteString("\t")
1228 break
1229 case DiffEqual:
1230 _, _ = text.WriteString("=")
1231 _, _ = text.WriteString(strconv.Itoa(utf8.RuneCountInString(aDiff.Text)))
1232 _, _ = text.WriteString("\t")
1233 break
1234 }
1235 }
1236 delta := text.String()
1237 if len(delta) != 0 {
1238 // Strip off trailing tab character.
1239 delta = delta[0 : utf8.RuneCountInString(delta)-1]
1240 delta = unescaper.Replace(delta)
1241 }
1242 return delta
1243}
1244
1245// DiffFromDelta given the original text1, and an encoded string which describes the operations required to transform text1 into text2, comAdde the full diff.
1246func (dmp *DiffMatchPatch) DiffFromDelta(text1 string, delta string) (diffs []Diff, err error) {
1247 i := 0
1248 runes := []rune(text1)
1249
1250 for _, token := range strings.Split(delta, "\t") {
1251 if len(token) == 0 {
1252 // Blank tokens are ok (from a trailing \t).
1253 continue
1254 }
1255
1256 // Each token begins with a one character parameter which specifies the operation of this token (delete, insert, equality).
1257 param := token[1:]
1258
1259 switch op := token[0]; op {
1260 case '+':
1261 // Decode would Diff all "+" to " "
1262 param = strings.Replace(param, "+", "%2b", -1)
1263 param, err = url.QueryUnescape(param)
1264 if err != nil {
1265 return nil, err
1266 }
1267 if !utf8.ValidString(param) {
1268 return nil, fmt.Errorf("invalid UTF-8 token: %q", param)
1269 }
1270
1271 diffs = append(diffs, Diff{DiffInsert, param})
1272 case '=', '-':
1273 n, err := strconv.ParseInt(param, 10, 0)
1274 if err != nil {
1275 return nil, err
1276 } else if n < 0 {
1277 return nil, errors.New("Negative number in DiffFromDelta: " + param)
1278 }
1279
1280 i += int(n)
1281 // Break out if we are out of bounds, go1.6 can't handle this very well
1282 if i > len(runes) {
1283 break
1284 }
1285 // Remember that string slicing is by byte - we want by rune here.
1286 text := string(runes[i-int(n) : i])
1287
1288 if op == '=' {
1289 diffs = append(diffs, Diff{DiffEqual, text})
1290 } else {
1291 diffs = append(diffs, Diff{DiffDelete, text})
1292 }
1293 default:
1294 // Anything else is an error.
1295 return nil, errors.New("Invalid diff operation in DiffFromDelta: " + string(token[0]))
1296 }
1297 }
1298
1299 if i != len(runes) {
1300 return nil, fmt.Errorf("Delta length (%v) is different from source text length (%v)", i, len(text1))
1301 }
1302
1303 return diffs, nil
1304}
1305
1306// diffLinesToStrings splits two texts into a list of strings. Each string represents one line.
1307func (dmp *DiffMatchPatch) diffLinesToStrings(text1, text2 string) (string, string, []string) {
1308 // '\x00' is a valid character, but various debuggers don't like it. So we'll insert a junk entry to avoid generating a null character.
1309 lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n'
1310
1311 lineHash := make(map[string]int)
1312 //Each string has the index of lineArray which it points to
1313 strIndexArray1 := dmp.diffLinesToStringsMunge(text1, &lineArray, lineHash)
1314 strIndexArray2 := dmp.diffLinesToStringsMunge(text2, &lineArray, lineHash)
1315
1316 return intArrayToString(strIndexArray1), intArrayToString(strIndexArray2), lineArray
1317}
1318
1319// diffLinesToStringsMunge splits a text into an array of strings, and reduces the texts to a []string.
1320func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string, lineHash map[string]int) []uint32 {
1321 // Walk the text, pulling out a substring for each line. text.split('\n') would would temporarily double our memory footprint. Modifying text would create many large strings to garbage collect.
1322 lineStart := 0
1323 lineEnd := -1
1324 strs := []uint32{}
1325
1326 for lineEnd < len(text)-1 {
1327 lineEnd = indexOf(text, "\n", lineStart)
1328
1329 if lineEnd == -1 {
1330 lineEnd = len(text) - 1
1331 }
1332
1333 line := text[lineStart : lineEnd+1]
1334 lineStart = lineEnd + 1
1335 lineValue, ok := lineHash[line]
1336
1337 if ok {
1338 strs = append(strs, uint32(lineValue))
1339 } else {
1340 *lineArray = append(*lineArray, line)
1341 lineHash[line] = len(*lineArray) - 1
1342 strs = append(strs, uint32(len(*lineArray)-1))
1343 }
1344 }
1345
1346 return strs
1347}