From bd6f1cf79e20f552c1a6c9403595fbabf96be290 Mon Sep 17 00:00:00 2001
From: "Luke I. Wilson" <thelukaswils@gmail.com>
Date: Mon, 22 Mar 2021 16:10:23 -0500
Subject: [PATCH] Added RopeBuffer Buffer implementation

---
 go.mod                 |   1 +
 go.sum                 |   2 +
 ui/buffer/buffer.go    |  56 +++++++++++
 ui/buffer/rope.go      | 213 +++++++++++++++++++++++++++++++++++++++++
 ui/buffer/rope_test.go |  47 +++++++++
 ui/textedit.go         |  19 ++--
 6 files changed, 329 insertions(+), 9 deletions(-)
 create mode 100644 ui/buffer/buffer.go
 create mode 100644 ui/buffer/rope.go
 create mode 100644 ui/buffer/rope_test.go

diff --git a/go.mod b/go.mod
index 536d30a..fbc32e1 100644
--- a/go.mod
+++ b/go.mod
@@ -6,4 +6,5 @@ require (
 	github.com/gdamore/tcell/v2 v2.2.0
 	github.com/mattn/go-runewidth v0.0.10 // indirect
 	github.com/zyedidia/clipboard v1.0.3 // indirect
+	github.com/zyedidia/rope v0.0.0-20210117014038-fa241571635a // indirect
 )
diff --git a/go.sum b/go.sum
index 49102a5..2b16a29 100644
--- a/go.sum
+++ b/go.sum
@@ -10,6 +10,8 @@ github.com/rivo/uniseg v0.1.0 h1:+2KBaVoUmb9XzDsrx/Ct0W/EYOSFf/nWTauy++DprtY=
 github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
 github.com/zyedidia/clipboard v1.0.3 h1:F/nCDVYMdbDWTmY8s8cJl0tnwX32q96IF09JHM14bUI=
 github.com/zyedidia/clipboard v1.0.3/go.mod h1:zykFnZUXX0ErxqvYLUFEq7QDJKId8rmh2FgD0/Y8cjA=
+github.com/zyedidia/rope v0.0.0-20210117014038-fa241571635a h1:GqVrOhVdC792A34K1Fw1HN6RC0wr5yAJVdtt8xaxXBA=
+github.com/zyedidia/rope v0.0.0-20210117014038-fa241571635a/go.mod h1:IKo1js3O2gdESAUH9F3B33wlrRBnpJMJR/gXyY0a3ho=
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68 h1:nxC68pudNYkKU6jWhgrqdreuFiOQWj1Fs7T3VrH4Pjw=
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/term v0.0.0-20201210144234-2321bbc49cbf h1:MZ2shdL+ZM/XzY3ZGOnh4Nlpnxz5GSOhOmtHo3iPU6M=
diff --git a/ui/buffer/buffer.go b/ui/buffer/buffer.go
new file mode 100644
index 0000000..02f9d63
--- /dev/null
+++ b/ui/buffer/buffer.go
@@ -0,0 +1,56 @@
+package buffer
+
+import (
+	"io"
+)
+
+// A Buffer is wrapper around any buffer data structure like ropes or a gap buffer
+// that can be used for text editors. One way this interface helps is by making
+// all API function parameters line and column indexes, so it is simple and easy
+// to index and use like a text editor. All lines and columns start at zero, and
+// all "end" ranges are inclusive.
+//
+// Any bounds out of range are panics! If you are unsure your position or range
+// may be out of bounds, use ClampLineCol() or compare with Lines() or ColsInLine().
+type Buffer interface {
+	// Line returns a slice of the data at the given line, including the ending line-
+	// delimiter. line starts from zero. Data returned may or may not be a copy: do not
+	// write it.
+	Line(line int) []byte
+
+	// Returns a slice of the buffer from startLine, startCol, to endLine, endCol,
+	// inclusive bounds. The returned value may or may not be a copy of the data,
+	// so do not write to it.
+	Slice(startLine, startCol, endLine, endCol int) []byte
+
+	// Bytes returns all of the bytes in the buffer. This function is very likely
+	// to copy all of the data in the buffer. Use sparingly. Try using other methods,
+	// where possible.
+	Bytes() []byte
+
+	// Insert copies a byte slice (inserting it) into the position at line, col.
+	Insert(line, col int, value []byte)
+
+	// Remove deletes any characters between startLine, startCol, and endLine,
+	// endCol, inclusive bounds.
+	Remove(startLine, startCol, endLine, endCol int)
+
+	// Len returns the number of bytes in the buffer.
+	Len() int
+
+	// Lines returns the number of lines in the buffer. If the buffer is empty,
+	// 1 is returned, because there is always at least one line. This function
+	// basically counts the number of newline ('\n') characters in a buffer.
+	Lines() int
+
+	// ColsInLine returns the number of columns in the given line. That is, the
+	// number of Utf-8 codepoints (or runes) in line, not bytes.
+	RunesInLine(line int) int
+
+	// ClampLineCol is a utility function to clamp any provided line and col to
+	// only possible values within the buffer. It first clamps the line, then clamps
+	// the column within that line.
+	ClampLineCol(line, col int) (int, int)
+
+	WriteTo(w io.Writer) (int64, error)
+}
diff --git a/ui/buffer/rope.go b/ui/buffer/rope.go
new file mode 100644
index 0000000..e5c991f
--- /dev/null
+++ b/ui/buffer/rope.go
@@ -0,0 +1,213 @@
+package buffer
+
+import (
+	"io"
+	"unicode/utf8"
+
+	"github.com/zyedidia/rope"
+)
+
+type RopeBuffer rope.Node
+
+func NewRopeBuffer(contents []byte) *RopeBuffer {
+	return (*RopeBuffer)(rope.New(contents))
+}
+
+// Returns the index of the byte at line, col. If line is less than zero, or more
+// than the number of available lines, the function will panic. If col is less than
+// zero, the function will panic. If col is greater than the length of the line,
+// the position of the last byte of the line is returned, instead.
+func (b *RopeBuffer) pos(line, col int) int {
+	var pos int
+
+	_rope := (*rope.Node)(b)
+
+	pos = b.getLineStartPos(line)
+
+	// Have to do this algorithm for safety. If this function was declared to panic
+	// or index out of bounds memory, if col > the given line length, it would be
+	// more efficient and simpler. But unfortunately, I believe it is necessary.
+	if col > 0 {
+		_, r := _rope.SplitAt(pos)
+		l, _ := r.SplitAt(_rope.Len() - pos)
+
+		l.EachLeaf(func(n *rope.Node) bool {
+			data := n.Value() // Reference; not a copy.
+			var i int
+			for i < len(data) {
+				if col == 0 || data[i] == '\n' {
+					return true // Found the position of the column
+				}
+				pos++
+				col--
+
+				// Respect Utf-8 codepoint boundaries
+				_, size := utf8.DecodeRune(data[i:])
+				i += size
+			}
+			return false // Have not gotten to the appropriate position, yet
+		})
+	}
+
+	return pos
+}
+
+// Line returns a slice of the data at the given line, including the ending line-
+// delimiter. line starts from zero. Data returned may or may not be a copy: do not
+// write it.
+func (b *RopeBuffer) Line(line int) []byte {
+	pos   := b.getLineStartPos(line)
+	bytes := 0
+
+	_rope := (*rope.Node)(b)
+	_, r := _rope.SplitAt(pos)
+	l, _ := r.SplitAt(_rope.Len() - pos)
+
+	l.EachLeaf(func(n *rope.Node) bool {
+		data := n.Value() // Reference; not a copy.
+		var i int
+		for i < len(data) {
+			if data[i] == '\n' {
+				bytes++ // Add the newline byte
+				return true // Read (past-tense) the whole line
+			}
+
+			// Respect Utf-8 codepoint boundaries
+			_, size := utf8.DecodeRune(data[i:])
+			bytes += size
+			i += size
+		}
+		return false // Have not read the whole line, yet
+	})
+
+	return _rope.Slice(pos, pos+bytes) // NOTE: may be faster to do it ourselves
+}
+
+// Returns a slice of the buffer from startLine, startCol, to endLine, endCol,
+// inclusive bounds. The returned value may or may not be a copy of the data,
+// so do not write to it.
+func (b *RopeBuffer) Slice(startLine, startCol, endLine, endCol int) []byte {
+	return (*rope.Node)(b).Slice(b.pos(startLine, startCol), b.pos(endLine, endCol)+1)
+}
+
+// Bytes returns all of the bytes in the buffer. This function is very likely
+// to copy all of the data in the buffer. Use sparingly. Try using other methods,
+// where possible.
+func (b *RopeBuffer) Bytes() []byte {
+	return (*rope.Node)(b).Value()
+}
+
+// Insert copies a byte slice (inserting it) into the position at line, col.
+func (b *RopeBuffer) Insert(line, col int, value []byte) {
+	(*rope.Node)(b).Insert(b.pos(line, col), value)
+}
+
+// Remove deletes any characters between startLine, startCol, and endLine,
+// endCol, inclusive bounds.
+func (b *RopeBuffer) Remove(startLine, startCol, endLine, endCol int) {
+	(*rope.Node)(b).Remove(b.pos(startLine, startCol), b.pos(endLine, endCol)+1)
+}
+
+// Len returns the number of bytes in the buffer.
+func (b *RopeBuffer) Len() int {
+	return (*rope.Node)(b).Len()
+}
+
+// Lines returns the number of lines in the buffer. If the buffer is empty,
+// 1 is returned, because there is always at least one line. This function
+// basically counts the number of newline ('\n') characters in a buffer.
+func (b *RopeBuffer) Lines() int {
+	rope := (*rope.Node)(b)
+	return rope.Count(0, rope.Len(), []byte{'\n'}) + 1
+}
+
+// pos is the first byte index in the line we want to count runes/cols. pos can point to
+// a newline or be greater than or equal to the number of bytes in the buffer, and will
+// return 0 for both cases.
+func (b *RopeBuffer) runesInLine(pos int) int {
+	_rope := (*rope.Node)(b)
+	ropeLen := _rope.Len()
+
+	if pos >= ropeLen {
+		return 0
+	}
+
+	var count int
+
+	_, r := _rope.SplitAt(pos)
+	l, _ := r.SplitAt(ropeLen - pos)
+
+	l.EachLeaf(func(n *rope.Node) bool {
+		data := n.Value() // Reference; not a copy.
+		var i int
+		for i < len(data) {
+			if data[i] == '\n' {
+				return true // Read (past-tense) the whole line
+			}
+			count++
+
+			// Respect Utf-8 codepoint boundaries
+			_, size := utf8.DecodeRune(data[i:])
+			i += size
+		}
+		return false // Have not read the whole line, yet
+	})
+
+	return count
+}
+
+// getLineStartPos returns the first byte index of the given line (starting from zero).
+// The returned index can be equal to the length of the buffer, not pointing to any byte,
+// which means the byte is on the last, and empty, line of the buffer. If line is greater
+// than or equal to the number of lines in the buffer, a panic is issued.
+func (b *RopeBuffer) getLineStartPos(line int) int {
+	_rope := (*rope.Node)(b)
+	var pos int
+
+	if line > 0 {
+		_rope.IndexAllFunc(0, _rope.Len(), []byte{'\n'}, func(idx int) bool {
+			line--
+			pos = idx + 1 // idx+1 = start of line after delimiter
+			if line <= 0 { // If pos is now the start of the line we're searching for...
+				return true // Stop indexing
+			}
+			return false
+		})
+	}
+
+	if line > 0 { // If there aren't enough lines to reach line...
+		panic("getLineStartPos: not enough lines in buffer to reach position")
+	}
+
+	return pos
+}
+
+// RunesInLine returns the number of runes in the given line. That is, the
+// number of Utf-8 codepoints in the line, not bytes.
+func (b *RopeBuffer) RunesInLine(line int) int {
+	linePos := b.getLineStartPos(line)
+	return b.runesInLine(linePos)
+}
+
+// ClampLineCol is a utility function to clamp any provided line and col to
+// only possible values within the buffer, pointing to runes. It first clamps
+// the line, then clamps the column.
+func (b *RopeBuffer) ClampLineCol(line, col int) (int, int) {
+	if line < 0 {
+		line = 0
+	} else if lines := b.Lines()-1; line > lines {
+		line = lines		
+	}
+
+	if col < 0 {
+		col = 0
+	} else if cols := b.RunesInLine(line)-1; col > cols {
+		col = cols
+	}
+
+	return line, col
+}
+
+func (b *RopeBuffer) WriteTo(w io.Writer) (int64, error) {
+	return (*rope.Node)(b).WriteTo(w)
+}
diff --git a/ui/buffer/rope_test.go b/ui/buffer/rope_test.go
new file mode 100644
index 0000000..1fa8aca
--- /dev/null
+++ b/ui/buffer/rope_test.go
@@ -0,0 +1,47 @@
+package buffer
+
+import "testing"
+
+func TestRopeInserting(t *testing.T) {
+	var buf Buffer = NewRopeBuffer([]byte("some"))
+	buf.Insert(0, 4, []byte(" text")) // Insert " text" after "some"
+	buf.Insert(0, 0, []byte("with\n\t"))
+	// "with\n\tsome text"
+
+	buf.Remove(0, 4, 1, 5) // Delete from line 0, col 4, to line 1, col 6 "\n\tsome "
+
+	if str := string(buf.Bytes()); str != "withtext" {
+		t.Errorf("string does not match \"withtext\", got %#v", str)
+		t.Fail()
+	}
+}
+
+func TestRopeBounds(t *testing.T) {
+	var buf Buffer = NewRopeBuffer([]byte("this\nis (は)\n\tsome\ntext\n"))
+	//this
+	//is (は)
+	//	some
+	//text
+	//
+
+	if buf.Lines() != 5 {
+		t.Errorf("Expected buf.Lines() == 5")
+		t.Fail()
+	}
+
+	if len := buf.RunesInLine(1); len != 6 { // "is" in English and in japanese
+		t.Errorf("Expected 6 runes in line 2, found %v", len)
+		t.Fail()
+	}
+
+	line, col := buf.ClampLineCol(15, 5) // Should become last line, first column
+	if line != 4 && col != 0 {
+		t.Errorf("Expected to clamp line col to 4,0 got %v,%v", line, col)
+		t.Fail()
+	}
+
+	if line := string(buf.Line(2)); line != "\tsome\n" {
+		t.Errorf("Expected line 3 to equal \"\\tsome\", got %#v", line)
+		t.Fail()
+	}
+}
diff --git a/ui/textedit.go b/ui/textedit.go
index 16157d6..7cec23d 100644
--- a/ui/textedit.go
+++ b/ui/textedit.go
@@ -5,7 +5,9 @@ import (
 	"math"
 	"strconv"
 	"strings"
+	"unicode/utf8"
 
+	"github.com/fivemoreminix/diesel/ui/buffer"
 	"github.com/gdamore/tcell/v2"
 )
 
@@ -24,6 +26,7 @@ type Region struct {
 // tools, is autocomplete ready, and contains the various information about
 // content being edited.
 type TextEdit struct {
+	Buffer      buffer.Buffer
 	LineNumbers bool   // Whether to render line numbers (and therefore the column)
 	Dirty       bool   // Whether the buffer has been edited
 	UseHardTabs bool   // When true, tabs are '\t'
@@ -31,7 +34,6 @@ type TextEdit struct {
 	IsCRLF      bool   // Whether the file's line endings are CRLF (\r\n) or LF (\n)
 	FilePath    string // Will be empty if the file has not been saved yet
 
-	buffer           []string      // TODO: replace line-based buffer with gap buffer
 	screen           *tcell.Screen // We keep our own reference to the screen for cursor purposes.
 	x, y             int
 	width, height    int
@@ -50,11 +52,11 @@ type TextEdit struct {
 // it can be assumed that the TextEdit has no file association, or it is unsaved.
 func NewTextEdit(screen *tcell.Screen, filePath, contents string, theme *Theme) *TextEdit {
 	te := &TextEdit{
+		Buffer:      nil,
 		LineNumbers: true,
 		UseHardTabs: true,
 		TabSize:     4,
 		FilePath:    filePath,
-		buffer:      nil,
 		screen:      screen,
 		Theme:       theme,
 	}
@@ -65,8 +67,9 @@ func NewTextEdit(screen *tcell.Screen, filePath, contents string, theme *Theme)
 // SetContents applies the string to the internal buffer of the TextEdit component.
 // The string is determined to be either CRLF or LF based on line-endings.
 func (t *TextEdit) SetContents(contents string) {
+	var i int
 loop:
-	for _, r := range contents {
+	for i < len(contents) {
 		switch r {
 		case '\n':
 			t.IsCRLF = false
@@ -76,14 +79,10 @@ loop:
 			t.IsCRLF = true
 			break loop
 		}
+		i += utf8.DecodeRune(contents[i:])
 	}
 
-	delimiter := "\n"
-	if t.IsCRLF {
-		delimiter = "\r\n"
-	}
-
-	t.buffer = strings.Split(contents, delimiter) // Split contents into lines
+	t.Buffer = buffer.NewRopeBuffer(contents)
 }
 
 // GetLineDelimiter returns "\r\n" for a CRLF buffer, or "\n" for an LF buffer.
@@ -232,6 +231,8 @@ func (t *TextEdit) Insert(contents string) {
 // column of that new line. Text before the cursor on the current line remains on that line,
 // text at or after the cursor on the current line is moved to the new line.
 func (t *TextEdit) insertNewLine() {
+	t.Dirty = true
+
 	lineRunes := []rune(t.buffer[t.cury]) // A slice of runes of the old line
 	movedRunes := lineRunes[t.curx:]      // A slice of the old line containing runes to be moved
 	newLineRunes := make([]rune, len(movedRunes))