From bd6f1cf79e20f552c1a6c9403595fbabf96be290 Mon Sep 17 00:00:00 2001 From: "Luke I. Wilson" Date: Mon, 22 Mar 2021 16:10:23 -0500 Subject: [PATCH] Added RopeBuffer Buffer implementation --- go.mod | 1 + go.sum | 2 + ui/buffer/buffer.go | 56 +++++++++++ ui/buffer/rope.go | 213 +++++++++++++++++++++++++++++++++++++++++ ui/buffer/rope_test.go | 47 +++++++++ ui/textedit.go | 19 ++-- 6 files changed, 329 insertions(+), 9 deletions(-) create mode 100644 ui/buffer/buffer.go create mode 100644 ui/buffer/rope.go create mode 100644 ui/buffer/rope_test.go diff --git a/go.mod b/go.mod index 536d30a..fbc32e1 100644 --- a/go.mod +++ b/go.mod @@ -6,4 +6,5 @@ require ( github.com/gdamore/tcell/v2 v2.2.0 github.com/mattn/go-runewidth v0.0.10 // indirect github.com/zyedidia/clipboard v1.0.3 // indirect + github.com/zyedidia/rope v0.0.0-20210117014038-fa241571635a // indirect ) diff --git a/go.sum b/go.sum index 49102a5..2b16a29 100644 --- a/go.sum +++ b/go.sum @@ -10,6 +10,8 @@ github.com/rivo/uniseg v0.1.0 h1:+2KBaVoUmb9XzDsrx/Ct0W/EYOSFf/nWTauy++DprtY= github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/zyedidia/clipboard v1.0.3 h1:F/nCDVYMdbDWTmY8s8cJl0tnwX32q96IF09JHM14bUI= github.com/zyedidia/clipboard v1.0.3/go.mod h1:zykFnZUXX0ErxqvYLUFEq7QDJKId8rmh2FgD0/Y8cjA= +github.com/zyedidia/rope v0.0.0-20210117014038-fa241571635a h1:GqVrOhVdC792A34K1Fw1HN6RC0wr5yAJVdtt8xaxXBA= +github.com/zyedidia/rope v0.0.0-20210117014038-fa241571635a/go.mod h1:IKo1js3O2gdESAUH9F3B33wlrRBnpJMJR/gXyY0a3ho= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68 h1:nxC68pudNYkKU6jWhgrqdreuFiOQWj1Fs7T3VrH4Pjw= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/term v0.0.0-20201210144234-2321bbc49cbf h1:MZ2shdL+ZM/XzY3ZGOnh4Nlpnxz5GSOhOmtHo3iPU6M= diff --git a/ui/buffer/buffer.go b/ui/buffer/buffer.go new file mode 100644 index 0000000..02f9d63 --- /dev/null +++ b/ui/buffer/buffer.go @@ -0,0 +1,56 @@ +package buffer + +import ( + "io" +) + +// A Buffer is wrapper around any buffer data structure like ropes or a gap buffer +// that can be used for text editors. One way this interface helps is by making +// all API function parameters line and column indexes, so it is simple and easy +// to index and use like a text editor. All lines and columns start at zero, and +// all "end" ranges are inclusive. +// +// Any bounds out of range are panics! If you are unsure your position or range +// may be out of bounds, use ClampLineCol() or compare with Lines() or ColsInLine(). +type Buffer interface { + // Line returns a slice of the data at the given line, including the ending line- + // delimiter. line starts from zero. Data returned may or may not be a copy: do not + // write it. + Line(line int) []byte + + // Returns a slice of the buffer from startLine, startCol, to endLine, endCol, + // inclusive bounds. The returned value may or may not be a copy of the data, + // so do not write to it. + Slice(startLine, startCol, endLine, endCol int) []byte + + // Bytes returns all of the bytes in the buffer. This function is very likely + // to copy all of the data in the buffer. Use sparingly. Try using other methods, + // where possible. + Bytes() []byte + + // Insert copies a byte slice (inserting it) into the position at line, col. + Insert(line, col int, value []byte) + + // Remove deletes any characters between startLine, startCol, and endLine, + // endCol, inclusive bounds. + Remove(startLine, startCol, endLine, endCol int) + + // Len returns the number of bytes in the buffer. + Len() int + + // Lines returns the number of lines in the buffer. If the buffer is empty, + // 1 is returned, because there is always at least one line. This function + // basically counts the number of newline ('\n') characters in a buffer. + Lines() int + + // ColsInLine returns the number of columns in the given line. That is, the + // number of Utf-8 codepoints (or runes) in line, not bytes. + RunesInLine(line int) int + + // ClampLineCol is a utility function to clamp any provided line and col to + // only possible values within the buffer. It first clamps the line, then clamps + // the column within that line. + ClampLineCol(line, col int) (int, int) + + WriteTo(w io.Writer) (int64, error) +} diff --git a/ui/buffer/rope.go b/ui/buffer/rope.go new file mode 100644 index 0000000..e5c991f --- /dev/null +++ b/ui/buffer/rope.go @@ -0,0 +1,213 @@ +package buffer + +import ( + "io" + "unicode/utf8" + + "github.com/zyedidia/rope" +) + +type RopeBuffer rope.Node + +func NewRopeBuffer(contents []byte) *RopeBuffer { + return (*RopeBuffer)(rope.New(contents)) +} + +// Returns the index of the byte at line, col. If line is less than zero, or more +// than the number of available lines, the function will panic. If col is less than +// zero, the function will panic. If col is greater than the length of the line, +// the position of the last byte of the line is returned, instead. +func (b *RopeBuffer) pos(line, col int) int { + var pos int + + _rope := (*rope.Node)(b) + + pos = b.getLineStartPos(line) + + // Have to do this algorithm for safety. If this function was declared to panic + // or index out of bounds memory, if col > the given line length, it would be + // more efficient and simpler. But unfortunately, I believe it is necessary. + if col > 0 { + _, r := _rope.SplitAt(pos) + l, _ := r.SplitAt(_rope.Len() - pos) + + l.EachLeaf(func(n *rope.Node) bool { + data := n.Value() // Reference; not a copy. + var i int + for i < len(data) { + if col == 0 || data[i] == '\n' { + return true // Found the position of the column + } + pos++ + col-- + + // Respect Utf-8 codepoint boundaries + _, size := utf8.DecodeRune(data[i:]) + i += size + } + return false // Have not gotten to the appropriate position, yet + }) + } + + return pos +} + +// Line returns a slice of the data at the given line, including the ending line- +// delimiter. line starts from zero. Data returned may or may not be a copy: do not +// write it. +func (b *RopeBuffer) Line(line int) []byte { + pos := b.getLineStartPos(line) + bytes := 0 + + _rope := (*rope.Node)(b) + _, r := _rope.SplitAt(pos) + l, _ := r.SplitAt(_rope.Len() - pos) + + l.EachLeaf(func(n *rope.Node) bool { + data := n.Value() // Reference; not a copy. + var i int + for i < len(data) { + if data[i] == '\n' { + bytes++ // Add the newline byte + return true // Read (past-tense) the whole line + } + + // Respect Utf-8 codepoint boundaries + _, size := utf8.DecodeRune(data[i:]) + bytes += size + i += size + } + return false // Have not read the whole line, yet + }) + + return _rope.Slice(pos, pos+bytes) // NOTE: may be faster to do it ourselves +} + +// Returns a slice of the buffer from startLine, startCol, to endLine, endCol, +// inclusive bounds. The returned value may or may not be a copy of the data, +// so do not write to it. +func (b *RopeBuffer) Slice(startLine, startCol, endLine, endCol int) []byte { + return (*rope.Node)(b).Slice(b.pos(startLine, startCol), b.pos(endLine, endCol)+1) +} + +// Bytes returns all of the bytes in the buffer. This function is very likely +// to copy all of the data in the buffer. Use sparingly. Try using other methods, +// where possible. +func (b *RopeBuffer) Bytes() []byte { + return (*rope.Node)(b).Value() +} + +// Insert copies a byte slice (inserting it) into the position at line, col. +func (b *RopeBuffer) Insert(line, col int, value []byte) { + (*rope.Node)(b).Insert(b.pos(line, col), value) +} + +// Remove deletes any characters between startLine, startCol, and endLine, +// endCol, inclusive bounds. +func (b *RopeBuffer) Remove(startLine, startCol, endLine, endCol int) { + (*rope.Node)(b).Remove(b.pos(startLine, startCol), b.pos(endLine, endCol)+1) +} + +// Len returns the number of bytes in the buffer. +func (b *RopeBuffer) Len() int { + return (*rope.Node)(b).Len() +} + +// Lines returns the number of lines in the buffer. If the buffer is empty, +// 1 is returned, because there is always at least one line. This function +// basically counts the number of newline ('\n') characters in a buffer. +func (b *RopeBuffer) Lines() int { + rope := (*rope.Node)(b) + return rope.Count(0, rope.Len(), []byte{'\n'}) + 1 +} + +// pos is the first byte index in the line we want to count runes/cols. pos can point to +// a newline or be greater than or equal to the number of bytes in the buffer, and will +// return 0 for both cases. +func (b *RopeBuffer) runesInLine(pos int) int { + _rope := (*rope.Node)(b) + ropeLen := _rope.Len() + + if pos >= ropeLen { + return 0 + } + + var count int + + _, r := _rope.SplitAt(pos) + l, _ := r.SplitAt(ropeLen - pos) + + l.EachLeaf(func(n *rope.Node) bool { + data := n.Value() // Reference; not a copy. + var i int + for i < len(data) { + if data[i] == '\n' { + return true // Read (past-tense) the whole line + } + count++ + + // Respect Utf-8 codepoint boundaries + _, size := utf8.DecodeRune(data[i:]) + i += size + } + return false // Have not read the whole line, yet + }) + + return count +} + +// getLineStartPos returns the first byte index of the given line (starting from zero). +// The returned index can be equal to the length of the buffer, not pointing to any byte, +// which means the byte is on the last, and empty, line of the buffer. If line is greater +// than or equal to the number of lines in the buffer, a panic is issued. +func (b *RopeBuffer) getLineStartPos(line int) int { + _rope := (*rope.Node)(b) + var pos int + + if line > 0 { + _rope.IndexAllFunc(0, _rope.Len(), []byte{'\n'}, func(idx int) bool { + line-- + pos = idx + 1 // idx+1 = start of line after delimiter + if line <= 0 { // If pos is now the start of the line we're searching for... + return true // Stop indexing + } + return false + }) + } + + if line > 0 { // If there aren't enough lines to reach line... + panic("getLineStartPos: not enough lines in buffer to reach position") + } + + return pos +} + +// RunesInLine returns the number of runes in the given line. That is, the +// number of Utf-8 codepoints in the line, not bytes. +func (b *RopeBuffer) RunesInLine(line int) int { + linePos := b.getLineStartPos(line) + return b.runesInLine(linePos) +} + +// ClampLineCol is a utility function to clamp any provided line and col to +// only possible values within the buffer, pointing to runes. It first clamps +// the line, then clamps the column. +func (b *RopeBuffer) ClampLineCol(line, col int) (int, int) { + if line < 0 { + line = 0 + } else if lines := b.Lines()-1; line > lines { + line = lines + } + + if col < 0 { + col = 0 + } else if cols := b.RunesInLine(line)-1; col > cols { + col = cols + } + + return line, col +} + +func (b *RopeBuffer) WriteTo(w io.Writer) (int64, error) { + return (*rope.Node)(b).WriteTo(w) +} diff --git a/ui/buffer/rope_test.go b/ui/buffer/rope_test.go new file mode 100644 index 0000000..1fa8aca --- /dev/null +++ b/ui/buffer/rope_test.go @@ -0,0 +1,47 @@ +package buffer + +import "testing" + +func TestRopeInserting(t *testing.T) { + var buf Buffer = NewRopeBuffer([]byte("some")) + buf.Insert(0, 4, []byte(" text")) // Insert " text" after "some" + buf.Insert(0, 0, []byte("with\n\t")) + // "with\n\tsome text" + + buf.Remove(0, 4, 1, 5) // Delete from line 0, col 4, to line 1, col 6 "\n\tsome " + + if str := string(buf.Bytes()); str != "withtext" { + t.Errorf("string does not match \"withtext\", got %#v", str) + t.Fail() + } +} + +func TestRopeBounds(t *testing.T) { + var buf Buffer = NewRopeBuffer([]byte("this\nis (は)\n\tsome\ntext\n")) + //this + //is (は) + // some + //text + // + + if buf.Lines() != 5 { + t.Errorf("Expected buf.Lines() == 5") + t.Fail() + } + + if len := buf.RunesInLine(1); len != 6 { // "is" in English and in japanese + t.Errorf("Expected 6 runes in line 2, found %v", len) + t.Fail() + } + + line, col := buf.ClampLineCol(15, 5) // Should become last line, first column + if line != 4 && col != 0 { + t.Errorf("Expected to clamp line col to 4,0 got %v,%v", line, col) + t.Fail() + } + + if line := string(buf.Line(2)); line != "\tsome\n" { + t.Errorf("Expected line 3 to equal \"\\tsome\", got %#v", line) + t.Fail() + } +} diff --git a/ui/textedit.go b/ui/textedit.go index 16157d6..7cec23d 100644 --- a/ui/textedit.go +++ b/ui/textedit.go @@ -5,7 +5,9 @@ import ( "math" "strconv" "strings" + "unicode/utf8" + "github.com/fivemoreminix/diesel/ui/buffer" "github.com/gdamore/tcell/v2" ) @@ -24,6 +26,7 @@ type Region struct { // tools, is autocomplete ready, and contains the various information about // content being edited. type TextEdit struct { + Buffer buffer.Buffer LineNumbers bool // Whether to render line numbers (and therefore the column) Dirty bool // Whether the buffer has been edited UseHardTabs bool // When true, tabs are '\t' @@ -31,7 +34,6 @@ type TextEdit struct { IsCRLF bool // Whether the file's line endings are CRLF (\r\n) or LF (\n) FilePath string // Will be empty if the file has not been saved yet - buffer []string // TODO: replace line-based buffer with gap buffer screen *tcell.Screen // We keep our own reference to the screen for cursor purposes. x, y int width, height int @@ -50,11 +52,11 @@ type TextEdit struct { // it can be assumed that the TextEdit has no file association, or it is unsaved. func NewTextEdit(screen *tcell.Screen, filePath, contents string, theme *Theme) *TextEdit { te := &TextEdit{ + Buffer: nil, LineNumbers: true, UseHardTabs: true, TabSize: 4, FilePath: filePath, - buffer: nil, screen: screen, Theme: theme, } @@ -65,8 +67,9 @@ func NewTextEdit(screen *tcell.Screen, filePath, contents string, theme *Theme) // SetContents applies the string to the internal buffer of the TextEdit component. // The string is determined to be either CRLF or LF based on line-endings. func (t *TextEdit) SetContents(contents string) { + var i int loop: - for _, r := range contents { + for i < len(contents) { switch r { case '\n': t.IsCRLF = false @@ -76,14 +79,10 @@ loop: t.IsCRLF = true break loop } + i += utf8.DecodeRune(contents[i:]) } - delimiter := "\n" - if t.IsCRLF { - delimiter = "\r\n" - } - - t.buffer = strings.Split(contents, delimiter) // Split contents into lines + t.Buffer = buffer.NewRopeBuffer(contents) } // GetLineDelimiter returns "\r\n" for a CRLF buffer, or "\n" for an LF buffer. @@ -232,6 +231,8 @@ func (t *TextEdit) Insert(contents string) { // column of that new line. Text before the cursor on the current line remains on that line, // text at or after the cursor on the current line is moved to the new line. func (t *TextEdit) insertNewLine() { + t.Dirty = true + lineRunes := []rune(t.buffer[t.cury]) // A slice of runes of the old line movedRunes := lineRunes[t.curx:] // A slice of the old line containing runes to be moved newLineRunes := make([]rune, len(movedRunes))