main
1// Copyright (c) 2012-2016 The go-diff authors. All rights reserved.
2// https://github.com/sergi/go-diff
3// See the included LICENSE file for license details.
4//
5// go-diff is a Go implementation of Google's Diff, Match, and Patch library
6// Original library is Copyright (c) 2006 Google Inc.
7// http://code.google.com/p/google-diff-match-patch/
8
9package diffmatchpatch
10
11import (
12 "fmt"
13 "strings"
14 "unicode/utf8"
15)
16
17const UNICODE_INVALID_RANGE_START = 0xD800
18const UNICODE_INVALID_RANGE_END = 0xDFFF
19const UNICODE_INVALID_RANGE_DELTA = UNICODE_INVALID_RANGE_END - UNICODE_INVALID_RANGE_START + 1
20const UNICODE_RANGE_MAX = 0x10FFFF
21
22// unescaper unescapes selected chars for compatibility with JavaScript's encodeURI.
23// In speed critical applications this could be dropped since the receiving application will certainly decode these fine. Note that this function is case-sensitive. Thus "%3F" would not be unescaped. But this is ok because it is only called with the output of HttpUtility.UrlEncode which returns lowercase hex. Example: "%3f" -> "?", "%24" -> "$", etc.
24var unescaper = strings.NewReplacer(
25 "%21", "!", "%7E", "~", "%27", "'",
26 "%28", "(", "%29", ")", "%3B", ";",
27 "%2F", "/", "%3F", "?", "%3A", ":",
28 "%40", "@", "%26", "&", "%3D", "=",
29 "%2B", "+", "%24", "$", "%2C", ",", "%23", "#", "%2A", "*")
30
31// indexOf returns the first index of pattern in str, starting at str[i].
32func indexOf(str string, pattern string, i int) int {
33 if i > len(str)-1 {
34 return -1
35 }
36 if i <= 0 {
37 return strings.Index(str, pattern)
38 }
39 ind := strings.Index(str[i:], pattern)
40 if ind == -1 {
41 return -1
42 }
43 return ind + i
44}
45
46// lastIndexOf returns the last index of pattern in str, starting at str[i].
47func lastIndexOf(str string, pattern string, i int) int {
48 if i < 0 {
49 return -1
50 }
51 if i >= len(str) {
52 return strings.LastIndex(str, pattern)
53 }
54 _, size := utf8.DecodeRuneInString(str[i:])
55 return strings.LastIndex(str[:i+size], pattern)
56}
57
58// runesIndexOf returns the index of pattern in target, starting at target[i].
59func runesIndexOf(target, pattern []rune, i int) int {
60 if i > len(target)-1 {
61 return -1
62 }
63 if i <= 0 {
64 return runesIndex(target, pattern)
65 }
66 ind := runesIndex(target[i:], pattern)
67 if ind == -1 {
68 return -1
69 }
70 return ind + i
71}
72
73func runesEqual(r1, r2 []rune) bool {
74 if len(r1) != len(r2) {
75 return false
76 }
77 for i, c := range r1 {
78 if c != r2[i] {
79 return false
80 }
81 }
82 return true
83}
84
85// runesIndex is the equivalent of strings.Index for rune slices.
86func runesIndex(r1, r2 []rune) int {
87 last := len(r1) - len(r2)
88 for i := 0; i <= last; i++ {
89 if runesEqual(r1[i:i+len(r2)], r2) {
90 return i
91 }
92 }
93 return -1
94}
95
96func intArrayToString(ns []uint32) string {
97 if len(ns) == 0 {
98 return ""
99 }
100
101 b := []rune{}
102 for _, n := range ns {
103 b = append(b, intToRune(n))
104 }
105 return string(b)
106}
107
108// These constants define the number of bits representable
109// in 1,2,3,4 byte utf8 sequences, respectively.
110const ONE_BYTE_BITS = 7
111const TWO_BYTE_BITS = 11
112const THREE_BYTE_BITS = 16
113const FOUR_BYTE_BITS = 21
114
115// Helper for getting a sequence of bits from an integer.
116func getBits(i uint32, cnt byte, from byte) byte {
117 return byte((i >> from) & ((1 << cnt) - 1))
118}
119
120// Converts an integer in the range 0~1112060 into a rune.
121// Based on the ranges table in https://en.wikipedia.org/wiki/UTF-8
122func intToRune(i uint32) rune {
123 if i < (1 << ONE_BYTE_BITS) {
124 return rune(i)
125 }
126
127 if i < (1 << TWO_BYTE_BITS) {
128 r, size := utf8.DecodeRune([]byte{0b11000000 | getBits(i, 5, 6), 0b10000000 | getBits(i, 6, 0)})
129 if size != 2 || r == utf8.RuneError {
130 panic(fmt.Sprintf("Error encoding an int %d with size 2, got rune %v and size %d", size, r, i))
131 }
132 return r
133 }
134
135 // Last -3 here needed because for some reason 3rd to last codepoint 65533 in this range
136 // was returning utf8.RuneError during encoding.
137 if i < ((1 << THREE_BYTE_BITS) - UNICODE_INVALID_RANGE_DELTA - 3) {
138 if i >= UNICODE_INVALID_RANGE_START {
139 i += UNICODE_INVALID_RANGE_DELTA
140 }
141
142 r, size := utf8.DecodeRune([]byte{0b11100000 | getBits(i, 4, 12), 0b10000000 | getBits(i, 6, 6), 0b10000000 | getBits(i, 6, 0)})
143 if size != 3 || r == utf8.RuneError {
144 panic(fmt.Sprintf("Error encoding an int %d with size 3, got rune %v and size %d", size, r, i))
145 }
146 return r
147 }
148
149 if i < (1<<FOUR_BYTE_BITS - UNICODE_INVALID_RANGE_DELTA - 3) {
150 i += UNICODE_INVALID_RANGE_DELTA + 3
151 r, size := utf8.DecodeRune([]byte{0b11110000 | getBits(i, 3, 18), 0b10000000 | getBits(i, 6, 12), 0b10000000 | getBits(i, 6, 6), 0b10000000 | getBits(i, 6, 0)})
152 if size != 4 || r == utf8.RuneError {
153 panic(fmt.Sprintf("Error encoding an int %d with size 4, got rune %v and size %d", size, r, i))
154 }
155 return r
156 }
157 panic(fmt.Sprintf("The integer %d is too large for runeToInt()", i))
158}
159
160// Converts a rune generated by intToRune back to an integer
161func runeToInt(r rune) uint32 {
162 i := uint32(r)
163 if i < (1 << ONE_BYTE_BITS) {
164 return i
165 }
166
167 bytes := []byte{0, 0, 0, 0}
168
169 size := utf8.EncodeRune(bytes, r)
170
171 if size == 2 {
172 return uint32(bytes[0]&0b11111)<<6 | uint32(bytes[1]&0b111111)
173 }
174
175 if size == 3 {
176 result := uint32(bytes[0]&0b1111)<<12 | uint32(bytes[1]&0b111111)<<6 | uint32(bytes[2]&0b111111)
177 if result >= UNICODE_INVALID_RANGE_END {
178 return result - UNICODE_INVALID_RANGE_DELTA
179 }
180
181 return result
182 }
183
184 if size == 4 {
185 result := uint32(bytes[0]&0b111)<<18 | uint32(bytes[1]&0b111111)<<12 | uint32(bytes[2]&0b111111)<<6 | uint32(bytes[3]&0b111111)
186 return result - UNICODE_INVALID_RANGE_DELTA - 3
187 }
188
189 panic(fmt.Sprintf("Unexpected state decoding rune=%v size=%d", r, size))
190}