main
Raw Download raw file
  1package ansi
  2
  3import (
  4	"bytes"
  5	"unicode"
  6	"unicode/utf8"
  7
  8	"github.com/charmbracelet/x/ansi/parser"
  9	"github.com/mattn/go-runewidth"
 10	"github.com/rivo/uniseg"
 11)
 12
 13// nbsp is a non-breaking space
 14const nbsp = 0xA0
 15
 16// Hardwrap wraps a string or a block of text to a given line length, breaking
 17// word boundaries. This will preserve ANSI escape codes and will account for
 18// wide-characters in the string.
 19// When preserveSpace is true, spaces at the beginning of a line will be
 20// preserved.
 21// This treats the text as a sequence of graphemes.
 22func Hardwrap(s string, limit int, preserveSpace bool) string {
 23	return hardwrap(GraphemeWidth, s, limit, preserveSpace)
 24}
 25
 26// HardwrapWc wraps a string or a block of text to a given line length, breaking
 27// word boundaries. This will preserve ANSI escape codes and will account for
 28// wide-characters in the string.
 29// When preserveSpace is true, spaces at the beginning of a line will be
 30// preserved.
 31// This treats the text as a sequence of wide characters and runes.
 32func HardwrapWc(s string, limit int, preserveSpace bool) string {
 33	return hardwrap(WcWidth, s, limit, preserveSpace)
 34}
 35
 36func hardwrap(m Method, s string, limit int, preserveSpace bool) string {
 37	if limit < 1 {
 38		return s
 39	}
 40
 41	var (
 42		cluster      []byte
 43		buf          bytes.Buffer
 44		curWidth     int
 45		forceNewline bool
 46		pstate       = parser.GroundState // initial state
 47		b            = []byte(s)
 48	)
 49
 50	addNewline := func() {
 51		buf.WriteByte('\n')
 52		curWidth = 0
 53	}
 54
 55	i := 0
 56	for i < len(b) {
 57		state, action := parser.Table.Transition(pstate, b[i])
 58		if state == parser.Utf8State {
 59			var width int
 60			cluster, _, width, _ = uniseg.FirstGraphemeCluster(b[i:], -1)
 61			if m == WcWidth {
 62				width = runewidth.StringWidth(string(cluster))
 63			}
 64			i += len(cluster)
 65
 66			if curWidth+width > limit {
 67				addNewline()
 68			}
 69			if !preserveSpace && curWidth == 0 && len(cluster) <= 4 {
 70				// Skip spaces at the beginning of a line
 71				if r, _ := utf8.DecodeRune(cluster); r != utf8.RuneError && unicode.IsSpace(r) {
 72					pstate = parser.GroundState
 73					continue
 74				}
 75			}
 76
 77			buf.Write(cluster)
 78			curWidth += width
 79			pstate = parser.GroundState
 80			continue
 81		}
 82
 83		switch action {
 84		case parser.PrintAction, parser.ExecuteAction:
 85			if b[i] == '\n' {
 86				addNewline()
 87				forceNewline = false
 88				break
 89			}
 90
 91			if curWidth+1 > limit {
 92				addNewline()
 93				forceNewline = true
 94			}
 95
 96			// Skip spaces at the beginning of a line
 97			if curWidth == 0 {
 98				if !preserveSpace && forceNewline && unicode.IsSpace(rune(b[i])) {
 99					break
100				}
101				forceNewline = false
102			}
103
104			buf.WriteByte(b[i])
105			if action == parser.PrintAction {
106				curWidth++
107			}
108		default:
109			buf.WriteByte(b[i])
110		}
111
112		// We manage the UTF8 state separately manually above.
113		if pstate != parser.Utf8State {
114			pstate = state
115		}
116		i++
117	}
118
119	return buf.String()
120}
121
122// Wordwrap wraps a string or a block of text to a given line length, not
123// breaking word boundaries. This will preserve ANSI escape codes and will
124// account for wide-characters in the string.
125// The breakpoints string is a list of characters that are considered
126// breakpoints for word wrapping. A hyphen (-) is always considered a
127// breakpoint.
128//
129// Note: breakpoints must be a string of 1-cell wide rune characters.
130//
131// This treats the text as a sequence of graphemes.
132func Wordwrap(s string, limit int, breakpoints string) string {
133	return wordwrap(GraphemeWidth, s, limit, breakpoints)
134}
135
136// WordwrapWc wraps a string or a block of text to a given line length, not
137// breaking word boundaries. This will preserve ANSI escape codes and will
138// account for wide-characters in the string.
139// The breakpoints string is a list of characters that are considered
140// breakpoints for word wrapping. A hyphen (-) is always considered a
141// breakpoint.
142//
143// Note: breakpoints must be a string of 1-cell wide rune characters.
144//
145// This treats the text as a sequence of wide characters and runes.
146func WordwrapWc(s string, limit int, breakpoints string) string {
147	return wordwrap(WcWidth, s, limit, breakpoints)
148}
149
150func wordwrap(m Method, s string, limit int, breakpoints string) string {
151	if limit < 1 {
152		return s
153	}
154
155	var (
156		cluster  []byte
157		buf      bytes.Buffer
158		word     bytes.Buffer
159		space    bytes.Buffer
160		curWidth int
161		wordLen  int
162		pstate   = parser.GroundState // initial state
163		b        = []byte(s)
164	)
165
166	addSpace := func() {
167		curWidth += space.Len()
168		buf.Write(space.Bytes())
169		space.Reset()
170	}
171
172	addWord := func() {
173		if word.Len() == 0 {
174			return
175		}
176
177		addSpace()
178		curWidth += wordLen
179		buf.Write(word.Bytes())
180		word.Reset()
181		wordLen = 0
182	}
183
184	addNewline := func() {
185		buf.WriteByte('\n')
186		curWidth = 0
187		space.Reset()
188	}
189
190	i := 0
191	for i < len(b) {
192		state, action := parser.Table.Transition(pstate, b[i])
193		if state == parser.Utf8State {
194			var width int
195			cluster, _, width, _ = uniseg.FirstGraphemeCluster(b[i:], -1)
196			if m == WcWidth {
197				width = runewidth.StringWidth(string(cluster))
198			}
199			i += len(cluster)
200
201			r, _ := utf8.DecodeRune(cluster)
202			if r != utf8.RuneError && unicode.IsSpace(r) && r != nbsp {
203				addWord()
204				space.WriteRune(r)
205			} else if bytes.ContainsAny(cluster, breakpoints) {
206				addSpace()
207				addWord()
208				buf.Write(cluster)
209				curWidth++
210			} else {
211				word.Write(cluster)
212				wordLen += width
213				if curWidth+space.Len()+wordLen > limit &&
214					wordLen < limit {
215					addNewline()
216				}
217			}
218
219			pstate = parser.GroundState
220			continue
221		}
222
223		switch action {
224		case parser.PrintAction, parser.ExecuteAction:
225			r := rune(b[i])
226			switch {
227			case r == '\n':
228				if wordLen == 0 {
229					if curWidth+space.Len() > limit {
230						curWidth = 0
231					} else {
232						buf.Write(space.Bytes())
233					}
234					space.Reset()
235				}
236
237				addWord()
238				addNewline()
239			case unicode.IsSpace(r):
240				addWord()
241				space.WriteByte(b[i])
242			case r == '-':
243				fallthrough
244			case runeContainsAny(r, breakpoints):
245				addSpace()
246				addWord()
247				buf.WriteByte(b[i])
248				curWidth++
249			default:
250				word.WriteByte(b[i])
251				wordLen++
252				if curWidth+space.Len()+wordLen > limit &&
253					wordLen < limit {
254					addNewline()
255				}
256			}
257
258		default:
259			word.WriteByte(b[i])
260		}
261
262		// We manage the UTF8 state separately manually above.
263		if pstate != parser.Utf8State {
264			pstate = state
265		}
266		i++
267	}
268
269	addWord()
270
271	return buf.String()
272}
273
274// Wrap wraps a string or a block of text to a given line length, breaking word
275// boundaries if necessary. This will preserve ANSI escape codes and will
276// account for wide-characters in the string. The breakpoints string is a list
277// of characters that are considered breakpoints for word wrapping. A hyphen
278// (-) is always considered a breakpoint.
279//
280// Note: breakpoints must be a string of 1-cell wide rune characters.
281//
282// This treats the text as a sequence of graphemes.
283func Wrap(s string, limit int, breakpoints string) string {
284	return wrap(GraphemeWidth, s, limit, breakpoints)
285}
286
287// WrapWc wraps a string or a block of text to a given line length, breaking word
288// boundaries if necessary. This will preserve ANSI escape codes and will
289// account for wide-characters in the string. The breakpoints string is a list
290// of characters that are considered breakpoints for word wrapping. A hyphen
291// (-) is always considered a breakpoint.
292//
293// Note: breakpoints must be a string of 1-cell wide rune characters.
294//
295// This treats the text as a sequence of wide characters and runes.
296func WrapWc(s string, limit int, breakpoints string) string {
297	return wrap(WcWidth, s, limit, breakpoints)
298}
299
300func wrap(m Method, s string, limit int, breakpoints string) string {
301	if limit < 1 {
302		return s
303	}
304
305	var (
306		cluster  []byte
307		buf      bytes.Buffer
308		word     bytes.Buffer
309		space    bytes.Buffer
310		curWidth int                  // written width of the line
311		wordLen  int                  // word buffer len without ANSI escape codes
312		pstate   = parser.GroundState // initial state
313		b        = []byte(s)
314	)
315
316	addSpace := func() {
317		curWidth += space.Len()
318		buf.Write(space.Bytes())
319		space.Reset()
320	}
321
322	addWord := func() {
323		if word.Len() == 0 {
324			return
325		}
326
327		addSpace()
328		curWidth += wordLen
329		buf.Write(word.Bytes())
330		word.Reset()
331		wordLen = 0
332	}
333
334	addNewline := func() {
335		buf.WriteByte('\n')
336		curWidth = 0
337		space.Reset()
338	}
339
340	i := 0
341	for i < len(b) {
342		state, action := parser.Table.Transition(pstate, b[i])
343		if state == parser.Utf8State {
344			var width int
345			cluster, _, width, _ = uniseg.FirstGraphemeCluster(b[i:], -1)
346			if m == WcWidth {
347				width = runewidth.StringWidth(string(cluster))
348			}
349			i += len(cluster)
350
351			r, _ := utf8.DecodeRune(cluster)
352			switch {
353			case r != utf8.RuneError && unicode.IsSpace(r) && r != nbsp: // nbsp is a non-breaking space
354				addWord()
355				space.WriteRune(r)
356			case bytes.ContainsAny(cluster, breakpoints):
357				addSpace()
358				if curWidth+wordLen+width > limit {
359					word.Write(cluster)
360					wordLen += width
361				} else {
362					addWord()
363					buf.Write(cluster)
364					curWidth += width
365				}
366			default:
367				if wordLen+width > limit {
368					// Hardwrap the word if it's too long
369					addWord()
370				}
371
372				word.Write(cluster)
373				wordLen += width
374
375				if curWidth+wordLen+space.Len() > limit {
376					addNewline()
377				}
378			}
379
380			pstate = parser.GroundState
381			continue
382		}
383
384		switch action {
385		case parser.PrintAction, parser.ExecuteAction:
386			switch r := rune(b[i]); {
387			case r == '\n':
388				if wordLen == 0 {
389					if curWidth+space.Len() > limit {
390						curWidth = 0
391					} else {
392						// preserve whitespaces
393						buf.Write(space.Bytes())
394					}
395					space.Reset()
396				}
397
398				addWord()
399				addNewline()
400			case unicode.IsSpace(r):
401				addWord()
402				space.WriteRune(r)
403			case r == '-':
404				fallthrough
405			case runeContainsAny(r, breakpoints):
406				addSpace()
407				if curWidth+wordLen >= limit {
408					// We can't fit the breakpoint in the current line, treat
409					// it as part of the word.
410					word.WriteRune(r)
411					wordLen++
412				} else {
413					addWord()
414					buf.WriteRune(r)
415					curWidth++
416				}
417			default:
418				if curWidth == limit {
419					addNewline()
420				}
421				word.WriteRune(r)
422				wordLen++
423
424				if wordLen == limit {
425					// Hardwrap the word if it's too long
426					addWord()
427				}
428
429				if curWidth+wordLen+space.Len() > limit {
430					addNewline()
431				}
432			}
433
434		default:
435			word.WriteByte(b[i])
436		}
437
438		// We manage the UTF8 state separately manually above.
439		if pstate != parser.Utf8State {
440			pstate = state
441		}
442		i++
443	}
444
445	if wordLen == 0 {
446		if curWidth+space.Len() > limit {
447			curWidth = 0
448		} else {
449			// preserve whitespaces
450			buf.Write(space.Bytes())
451		}
452		space.Reset()
453	}
454
455	addWord()
456
457	return buf.String()
458}
459
460func runeContainsAny(r rune, s string) bool {
461	for _, c := range s {
462		if c == r {
463			return true
464		}
465	}
466	return false
467}