Added Series statistical functions

This commit is contained in:
Luke I. Wilson 2023-05-14 14:46:24 -05:00
parent 9e5239c20a
commit 7899f3f509
2 changed files with 404 additions and 40 deletions

377
data.go
View File

@ -3,31 +3,44 @@ package autotrader
import (
"encoding/csv"
"errors"
"fmt"
"io"
"math"
"os"
"strconv"
"time"
df "github.com/rocketlaunchr/dataframe-go"
"golang.org/x/exp/slices"
)
// EasyIndex returns an index to the `n` -length object that allows for negative indexing. For example, EasyIndex(-1, 5) returns 4. This is similar to Python's negative indexing. The return value may be less than zero if (-i) > n.
func EasyIndex(i, n int) int {
if i < 0 {
return n + i
}
return i
}
type Series interface {
Copy() Series
Len() int
// Statistical functions.
Rolling(period int) *RollingSeries
// Data access functions.
Value(i int) interface{}
Float(i int) float64
Int(i int) int64
String(i int) string
Time(i int) time.Time
}
type Frame interface {
Copy() Frame
Len() int
// Comparison functions.
Equal(other Frame) bool
NotEqual(other Frame) bool
Less(other Frame) bool
LessEqual(other Frame) bool
Greater(other Frame) bool
GreaterEqual(other Frame) bool
// Easy access functions.
Date(i int) time.Time
Open(i int) float64
@ -43,27 +56,212 @@ type Frame interface {
Volumes() Series
// Custom data columns
Series(name string) Series
Value(column string, i int) interface{}
Float(column string, i int) float64
Int(column string, i int) int
Int(column string, i int) int64
String(column string, i int) string
// Time returns the value of the column at index i. The first value is at index 0. A negative value for i (-n) can be used to get n values from the latest, like Python's negative indexing. If i is out of bounds, 0 is returned.
Time(column string, i int) time.Time
}
// AppliedSeries is like Series, but it applies a function to each row of data before returning it.
type AppliedSeries struct {
Series
apply func(i int, val interface{}) interface{}
}
func (s *AppliedSeries) Value(i int) interface{} {
return s.apply(EasyIndex(i, s.Len()), s.Series.Value(i))
}
func NewAppliedSeries(s Series, apply func(i int, val interface{}) interface{}) *AppliedSeries {
return &AppliedSeries{
Series: s,
apply: apply,
}
}
type RollingSeries struct {
Series
period int
}
func (s *RollingSeries) Mean() *AppliedSeries {
return &AppliedSeries{
Series: s,
apply: func(_ int, v interface{}) interface{} {
switch v := v.(type) {
case []interface{}:
if len(v) == 0 {
return nil
}
switch v[0].(type) {
case float64:
var sum float64
for _, v := range v {
sum += v.(float64)
}
return sum / float64(len(v))
case int64:
var sum int64
for _, v := range v {
sum += v.(int64)
}
return sum / int64(len(v))
default:
return v[len(v)-1] // Do nothing
}
default:
panic(fmt.Sprintf("expected a slice of values, got %t", v))
}
},
}
}
func (s *RollingSeries) EMA() *AppliedSeries {
return &AppliedSeries{
Series: s,
apply: func(i int, v interface{}) interface{} {
switch v := v.(type) {
case []interface{}:
if len(v) == 0 {
return nil
}
switch v[0].(type) {
case float64:
ema := v[0].(float64)
for _, v := range v[1:] {
ema += (v.(float64) - ema) * 2 / (float64(s.period) + 1)
}
return ema
case int64:
ema := v[0].(int64)
for _, v := range v[1:] {
ema += (v.(int64) - ema) * 2 / (int64(s.period) + 1)
}
return ema
default: // string, time.Time
return v[len(v)-1] // Do nothing
}
default:
panic(fmt.Sprintf("expected a slice of values, got %t", v))
}
},
}
}
func (s *RollingSeries) Median() *AppliedSeries {
return &AppliedSeries{
Series: s,
apply: func(_ int, v interface{}) interface{} {
switch v := v.(type) {
case []interface{}:
if len(v) == 0 {
return nil
}
switch v[0].(type) {
case float64:
if len(v) == 0 {
return float64(0)
}
slices.SortFunc(v, func(a, b interface{}) bool {
x, y := a.(float64), b.(float64)
return x < y || (math.IsNaN(x) && !math.IsNaN(y))
})
if len(v)%2 == 0 {
return (v[len(v)/2-1].(float64) + v[len(v)/2].(float64)) / 2
}
return v[len(v)/2]
case int64:
if len(v) == 0 {
return int64(0)
}
slices.SortFunc(v, func(a, b interface{}) bool {
x, y := a.(int64), b.(int64)
return x < y
})
if len(v)%2 == 0 {
return (v[len(v)/2-1].(int64) + v[len(v)/2].(int64)) / 2
}
return v[len(v)/2]
default: // string, time.Time
return v[len(v)-1] // Do nothing
}
default:
panic(fmt.Sprintf("expected a slice of values, got %t", v))
}
},
}
}
func (s *RollingSeries) StdDev() *AppliedSeries {
return &AppliedSeries{
Series: s,
apply: func(i int, v interface{}) interface{} {
switch v := v.(type) {
case []interface{}:
if len(v) == 0 {
return nil
}
switch v[0].(type) {
case float64:
mean := s.Mean().Value(i).(float64) // Take the mean of the last period values for the current index
var sum float64
for _, v := range v {
sum += (v.(float64) - mean) * (v.(float64) - mean)
}
return math.Sqrt(sum / float64(len(v)))
case int64:
mean := s.Mean().Value(i).(int64)
var sum int64
for _, v := range v {
sum += (v.(int64) - mean) * (v.(int64) - mean)
}
return int64(math.Sqrt(float64(sum) / float64(len(v))))
default: // A slice of something else, just return the last value
return v[len(v)-1] // Do nothing
}
default:
panic(fmt.Sprintf("expected a slice of values, got %t", v))
}
},
}
}
// Value returns []interface{} up to `period` long. The last item in the slice is the item at i. If i is out of bounds, nil is returned.
func (s *RollingSeries) Value(i int) interface{} {
items := make([]interface{}, 0, s.period)
i = EasyIndex(i, s.Len())
if i < 0 || i >= s.Len() {
return items
}
for j := i; j > i-s.period && j >= 0; j-- {
// items = append(items, s.Series.Value(j))
items = slices.Insert(items, 0, s.Series.Value(j))
}
return items
}
// DataSeries is a Series that wraps a column of data. The data can be of the following types: float64, int64, string, or time.Time.
type DataSeries struct {
data df.Series
}
type DataFrame struct {
*df.DataFrame // DataFrame with a Date, Open, High, Low, Close, and Volume column.
data *df.DataFrame // DataFrame with a Date, Open, High, Low, Close, and Volume column.
}
func (o *DataFrame) Copy() *DataFrame {
return &DataFrame{o.DataFrame.Copy()}
return &DataFrame{o.data.Copy()}
}
// Len returns the number of rows in the DataFrame or 0 if the DataFrame is nil.
func (o *DataFrame) Len() int {
if o.DataFrame == nil {
if o.data == nil {
return 0
}
return o.NRows()
return o.data.NRows()
}
// Date returns the value of the Date column at index i. The first value is at index 0. A negative value for i (-n) can be used to get n values from the latest, like Python's negative indexing. If i is out of bounds, 0 is returned.
@ -102,21 +300,59 @@ func (o *DataFrame) Volume(i int) float64 {
return o.Float("Volume", i)
}
// Value returns the value of the column at index i. The first value is at index 0. A negative value for i (-n) can be used to get n values from the latest, like Python's negative indexing. If i is out of bounds, nil is returned.
func (o *DataFrame) Value(column string, i int) interface{} {
colIdx, err := o.DataFrame.NameToColumn(column)
// Dates returns a Series of all the dates in the DataFrame.
func (o *DataFrame) Dates() Series {
return o.Series("Date")
}
// Opens returns a Series of all the open prices in the DataFrame.
func (o *DataFrame) Opens() Series {
return o.Series("Open")
}
// Highs returns a Series of all the high prices in the DataFrame.
func (o *DataFrame) Highs() Series {
return o.Series("High")
}
// Lows returns a Series of all the low prices in the DataFrame.
func (o *DataFrame) Lows() Series {
return o.Series("Low")
}
// Closes returns a Series of all the close prices in the DataFrame.
func (o *DataFrame) Closes() Series {
return o.Series("Close")
}
// Volumes returns a Series of all the volumes in the DataFrame.
func (o *DataFrame) Volumes() Series {
return o.Series("Volume")
}
// Series returns a Series of the column with the given name. If the column does not exist, nil is returned.
func (o *DataFrame) Series(name string) Series {
if o.data == nil {
return nil
}
colIdx, err := o.data.NameToColumn(name)
if err != nil {
return nil
} else if o.DataFrame == nil || i >= o.Len() {
return 0
} else if i < 0 {
i = o.Len() - i
if i < 0 {
return 0
}
return o.Series[colIdx].Value(i)
}
return o.Series[colIdx].Value(i)
return &DataSeries{o.data.Series[colIdx]}
}
// Value returns the value of the column at index i. The first value is at index 0. A negative value for i can be used to get i values from the latest, like Python's negative indexing. If i is out of bounds, nil is returned.
func (o *DataFrame) Value(column string, i int) interface{} {
if o.data == nil {
return nil
}
i = EasyIndex(i, o.Len()) // Allow for negative indexing.
colIdx, err := o.data.NameToColumn(column)
if err != nil || i < 0 || i >= o.Len() { // Prevent out of bounds access.
return nil
}
return o.data.Series[colIdx].Value(i)
}
// Float returns the value of the column at index i casted to float64. The first value is at index 0. A negative value for i (-n) can be used to get n values from the latest, like Python's negative indexing. If i is out of bounds, 0 is returned.
@ -175,13 +411,83 @@ func (o *DataFrame) Time(column string, i int) time.Time {
}
}
func NewChartData(data *df.DataFrame) *DataFrame {
func NewDataFrame(data *df.DataFrame) *DataFrame {
return &DataFrame{data}
}
type RollingWindow struct {
DataFrame
Period int
func (s *DataSeries) Copy() Series {
return &DataSeries{s.data.Copy()}
}
func (s *DataSeries) Len() int {
if s.data == nil {
return 0
}
return s.data.NRows()
}
func (s *DataSeries) Rolling(period int) *RollingSeries {
return &RollingSeries{s, period}
}
func (s *DataSeries) Value(i int) interface{} {
if s.data == nil {
return nil
}
i = EasyIndex(i, s.Len()) // Allow for negative indexing.
return s.data.Value(i)
}
func (s *DataSeries) Float(i int) float64 {
val := s.Value(i)
if val == nil {
return 0
}
switch val := val.(type) {
case float64:
return val
default:
return 0
}
}
func (s *DataSeries) Int(i int) int64 {
val := s.Value(i)
if val == nil {
return 0
}
switch val := val.(type) {
case int64:
return val
default:
return 0
}
}
func (s *DataSeries) String(i int) string {
val := s.Value(i)
if val == nil {
return ""
}
switch val := val.(type) {
case string:
return val
default:
return ""
}
}
func (s *DataSeries) Time(i int) time.Time {
val := s.Value(i)
if val == nil {
return time.Time{}
}
switch val := val.(type) {
case time.Time:
return val
default:
return time.Time{}
}
}
type DataCSVLayout struct {
@ -204,6 +510,19 @@ func ReadDataCSV(path string, layout DataCSVLayout) (*df.DataFrame, error) {
return ReadDataCSVFromReader(f, layout)
}
func ReadEURUSDDataCSV() (*df.DataFrame, error) {
return ReadDataCSV("./EUR_USD Historical Data.csv", DataCSVLayout{
LatestFirst: true,
DateFormat: "01/02/2006",
Date: "\ufeff\"Date\"",
Open: "Open",
High: "High",
Low: "Low",
Close: "Price",
Volume: "Vol.",
})
}
func ReadDataCSVFromReader(r io.Reader, layout DataCSVLayout) (*df.DataFrame, error) {
data, err := ReadCSVFromReader(r, layout.DateFormat, layout.LatestFirst)
if err != nil {

View File

@ -1,18 +1,63 @@
package autotrader
import "testing"
import (
"testing"
"time"
)
func newTestingDataFrame() *DataFrame {
_dataframe, err := ReadEURUSDDataCSV()
if err != nil {
return nil
}
return NewDataFrame(_dataframe)
}
func TestDataSeries(t *testing.T) {
data := newTestingDataFrame()
if data == nil {
t.Fatal("Could not create DataFrame")
}
dates, closes := data.Dates(), data.Closes()
if dates.Len() != 2610 {
t.Fatalf("Expected 2610 rows, got %d", dates.Len())
}
if closes.Len() != 2610 {
t.Fatalf("Expected 2610 rows, got %d", closes.Len())
}
sma10 := closes.Rolling(10).Mean()
if sma10.Len() != 2610 {
t.Fatalf("Expected 2610 rows, got %d", sma10.Len())
}
if sma10.Value(-1) != 1.10039 { // Latest closing price averaged over 10 periods.
t.Fatalf("Expected 1.10039, got %f", sma10.Value(-1))
}
}
func TestDataFrame(t *testing.T) {
data := newTestingDataFrame()
if data == nil {
t.Fatal("Could not create DataFrame")
}
if data.Len() != 2610 {
t.Fatalf("Expected 2610 rows, got %d", data.Len())
}
if data.Close(-1) != 1.0967 {
t.Fatalf("Expected 1.0967, got %f", data.Close(-1))
}
date := data.Date(2) // Get the 3rd earliest date from the Date column.
if date.Year() != 2013 || date.Month() != 5 || date.Day() != 13 {
t.Fatalf("Expected 2013-05-13, got %s", date.Format(time.DateOnly))
}
}
func TestReadDataCSV(t *testing.T) {
data, err := ReadDataCSV("./EUR_USD Historical Data.csv", DataCSVLayout{
LatestFirst: true,
DateFormat: "01/02/2006",
Date: "\ufeff\"Date\"",
Open: "Open",
High: "High",
Low: "Low",
Close: "Price",
Volume: "Vol.",
})
data, err := ReadEURUSDDataCSV()
if err != nil {
t.Fatal(err)
}