mirror of
https://github.com/lukewilson2002/autotrader.git
synced 2025-06-15 08:23:51 +00:00
Added Series statistical functions
This commit is contained in:
parent
9e5239c20a
commit
7899f3f509
377
data.go
377
data.go
@ -3,31 +3,44 @@ package autotrader
|
||||
import (
|
||||
"encoding/csv"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"os"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
df "github.com/rocketlaunchr/dataframe-go"
|
||||
"golang.org/x/exp/slices"
|
||||
)
|
||||
|
||||
// EasyIndex returns an index to the `n` -length object that allows for negative indexing. For example, EasyIndex(-1, 5) returns 4. This is similar to Python's negative indexing. The return value may be less than zero if (-i) > n.
|
||||
func EasyIndex(i, n int) int {
|
||||
if i < 0 {
|
||||
return n + i
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
type Series interface {
|
||||
Copy() Series
|
||||
Len() int
|
||||
|
||||
// Statistical functions.
|
||||
Rolling(period int) *RollingSeries
|
||||
|
||||
// Data access functions.
|
||||
Value(i int) interface{}
|
||||
Float(i int) float64
|
||||
Int(i int) int64
|
||||
String(i int) string
|
||||
Time(i int) time.Time
|
||||
}
|
||||
|
||||
type Frame interface {
|
||||
Copy() Frame
|
||||
Len() int
|
||||
|
||||
// Comparison functions.
|
||||
Equal(other Frame) bool
|
||||
NotEqual(other Frame) bool
|
||||
Less(other Frame) bool
|
||||
LessEqual(other Frame) bool
|
||||
Greater(other Frame) bool
|
||||
GreaterEqual(other Frame) bool
|
||||
|
||||
// Easy access functions.
|
||||
Date(i int) time.Time
|
||||
Open(i int) float64
|
||||
@ -43,27 +56,212 @@ type Frame interface {
|
||||
Volumes() Series
|
||||
|
||||
// Custom data columns
|
||||
Series(name string) Series
|
||||
Value(column string, i int) interface{}
|
||||
Float(column string, i int) float64
|
||||
Int(column string, i int) int
|
||||
Int(column string, i int) int64
|
||||
String(column string, i int) string
|
||||
// Time returns the value of the column at index i. The first value is at index 0. A negative value for i (-n) can be used to get n values from the latest, like Python's negative indexing. If i is out of bounds, 0 is returned.
|
||||
Time(column string, i int) time.Time
|
||||
}
|
||||
|
||||
// AppliedSeries is like Series, but it applies a function to each row of data before returning it.
|
||||
type AppliedSeries struct {
|
||||
Series
|
||||
apply func(i int, val interface{}) interface{}
|
||||
}
|
||||
|
||||
func (s *AppliedSeries) Value(i int) interface{} {
|
||||
return s.apply(EasyIndex(i, s.Len()), s.Series.Value(i))
|
||||
}
|
||||
|
||||
func NewAppliedSeries(s Series, apply func(i int, val interface{}) interface{}) *AppliedSeries {
|
||||
return &AppliedSeries{
|
||||
Series: s,
|
||||
apply: apply,
|
||||
}
|
||||
}
|
||||
|
||||
type RollingSeries struct {
|
||||
Series
|
||||
period int
|
||||
}
|
||||
|
||||
func (s *RollingSeries) Mean() *AppliedSeries {
|
||||
return &AppliedSeries{
|
||||
Series: s,
|
||||
apply: func(_ int, v interface{}) interface{} {
|
||||
switch v := v.(type) {
|
||||
case []interface{}:
|
||||
if len(v) == 0 {
|
||||
return nil
|
||||
}
|
||||
switch v[0].(type) {
|
||||
case float64:
|
||||
var sum float64
|
||||
for _, v := range v {
|
||||
sum += v.(float64)
|
||||
}
|
||||
return sum / float64(len(v))
|
||||
case int64:
|
||||
var sum int64
|
||||
for _, v := range v {
|
||||
sum += v.(int64)
|
||||
}
|
||||
return sum / int64(len(v))
|
||||
default:
|
||||
return v[len(v)-1] // Do nothing
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("expected a slice of values, got %t", v))
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (s *RollingSeries) EMA() *AppliedSeries {
|
||||
return &AppliedSeries{
|
||||
Series: s,
|
||||
apply: func(i int, v interface{}) interface{} {
|
||||
switch v := v.(type) {
|
||||
case []interface{}:
|
||||
if len(v) == 0 {
|
||||
return nil
|
||||
}
|
||||
switch v[0].(type) {
|
||||
case float64:
|
||||
ema := v[0].(float64)
|
||||
for _, v := range v[1:] {
|
||||
ema += (v.(float64) - ema) * 2 / (float64(s.period) + 1)
|
||||
}
|
||||
return ema
|
||||
case int64:
|
||||
ema := v[0].(int64)
|
||||
for _, v := range v[1:] {
|
||||
ema += (v.(int64) - ema) * 2 / (int64(s.period) + 1)
|
||||
}
|
||||
return ema
|
||||
default: // string, time.Time
|
||||
return v[len(v)-1] // Do nothing
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("expected a slice of values, got %t", v))
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (s *RollingSeries) Median() *AppliedSeries {
|
||||
return &AppliedSeries{
|
||||
Series: s,
|
||||
apply: func(_ int, v interface{}) interface{} {
|
||||
switch v := v.(type) {
|
||||
case []interface{}:
|
||||
if len(v) == 0 {
|
||||
return nil
|
||||
}
|
||||
switch v[0].(type) {
|
||||
case float64:
|
||||
if len(v) == 0 {
|
||||
return float64(0)
|
||||
}
|
||||
slices.SortFunc(v, func(a, b interface{}) bool {
|
||||
x, y := a.(float64), b.(float64)
|
||||
return x < y || (math.IsNaN(x) && !math.IsNaN(y))
|
||||
})
|
||||
if len(v)%2 == 0 {
|
||||
return (v[len(v)/2-1].(float64) + v[len(v)/2].(float64)) / 2
|
||||
}
|
||||
return v[len(v)/2]
|
||||
case int64:
|
||||
if len(v) == 0 {
|
||||
return int64(0)
|
||||
}
|
||||
slices.SortFunc(v, func(a, b interface{}) bool {
|
||||
x, y := a.(int64), b.(int64)
|
||||
return x < y
|
||||
})
|
||||
if len(v)%2 == 0 {
|
||||
return (v[len(v)/2-1].(int64) + v[len(v)/2].(int64)) / 2
|
||||
}
|
||||
return v[len(v)/2]
|
||||
default: // string, time.Time
|
||||
return v[len(v)-1] // Do nothing
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("expected a slice of values, got %t", v))
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (s *RollingSeries) StdDev() *AppliedSeries {
|
||||
return &AppliedSeries{
|
||||
Series: s,
|
||||
apply: func(i int, v interface{}) interface{} {
|
||||
switch v := v.(type) {
|
||||
case []interface{}:
|
||||
if len(v) == 0 {
|
||||
return nil
|
||||
}
|
||||
switch v[0].(type) {
|
||||
case float64:
|
||||
mean := s.Mean().Value(i).(float64) // Take the mean of the last period values for the current index
|
||||
var sum float64
|
||||
for _, v := range v {
|
||||
sum += (v.(float64) - mean) * (v.(float64) - mean)
|
||||
}
|
||||
return math.Sqrt(sum / float64(len(v)))
|
||||
case int64:
|
||||
mean := s.Mean().Value(i).(int64)
|
||||
var sum int64
|
||||
for _, v := range v {
|
||||
sum += (v.(int64) - mean) * (v.(int64) - mean)
|
||||
}
|
||||
return int64(math.Sqrt(float64(sum) / float64(len(v))))
|
||||
default: // A slice of something else, just return the last value
|
||||
return v[len(v)-1] // Do nothing
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("expected a slice of values, got %t", v))
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Value returns []interface{} up to `period` long. The last item in the slice is the item at i. If i is out of bounds, nil is returned.
|
||||
func (s *RollingSeries) Value(i int) interface{} {
|
||||
items := make([]interface{}, 0, s.period)
|
||||
i = EasyIndex(i, s.Len())
|
||||
if i < 0 || i >= s.Len() {
|
||||
return items
|
||||
}
|
||||
for j := i; j > i-s.period && j >= 0; j-- {
|
||||
// items = append(items, s.Series.Value(j))
|
||||
items = slices.Insert(items, 0, s.Series.Value(j))
|
||||
}
|
||||
return items
|
||||
}
|
||||
|
||||
// DataSeries is a Series that wraps a column of data. The data can be of the following types: float64, int64, string, or time.Time.
|
||||
type DataSeries struct {
|
||||
data df.Series
|
||||
}
|
||||
|
||||
type DataFrame struct {
|
||||
*df.DataFrame // DataFrame with a Date, Open, High, Low, Close, and Volume column.
|
||||
data *df.DataFrame // DataFrame with a Date, Open, High, Low, Close, and Volume column.
|
||||
}
|
||||
|
||||
func (o *DataFrame) Copy() *DataFrame {
|
||||
return &DataFrame{o.DataFrame.Copy()}
|
||||
return &DataFrame{o.data.Copy()}
|
||||
}
|
||||
|
||||
// Len returns the number of rows in the DataFrame or 0 if the DataFrame is nil.
|
||||
func (o *DataFrame) Len() int {
|
||||
if o.DataFrame == nil {
|
||||
if o.data == nil {
|
||||
return 0
|
||||
}
|
||||
return o.NRows()
|
||||
return o.data.NRows()
|
||||
}
|
||||
|
||||
// Date returns the value of the Date column at index i. The first value is at index 0. A negative value for i (-n) can be used to get n values from the latest, like Python's negative indexing. If i is out of bounds, 0 is returned.
|
||||
@ -102,21 +300,59 @@ func (o *DataFrame) Volume(i int) float64 {
|
||||
return o.Float("Volume", i)
|
||||
}
|
||||
|
||||
// Value returns the value of the column at index i. The first value is at index 0. A negative value for i (-n) can be used to get n values from the latest, like Python's negative indexing. If i is out of bounds, nil is returned.
|
||||
func (o *DataFrame) Value(column string, i int) interface{} {
|
||||
colIdx, err := o.DataFrame.NameToColumn(column)
|
||||
// Dates returns a Series of all the dates in the DataFrame.
|
||||
func (o *DataFrame) Dates() Series {
|
||||
return o.Series("Date")
|
||||
}
|
||||
|
||||
// Opens returns a Series of all the open prices in the DataFrame.
|
||||
func (o *DataFrame) Opens() Series {
|
||||
return o.Series("Open")
|
||||
}
|
||||
|
||||
// Highs returns a Series of all the high prices in the DataFrame.
|
||||
func (o *DataFrame) Highs() Series {
|
||||
return o.Series("High")
|
||||
}
|
||||
|
||||
// Lows returns a Series of all the low prices in the DataFrame.
|
||||
func (o *DataFrame) Lows() Series {
|
||||
return o.Series("Low")
|
||||
}
|
||||
|
||||
// Closes returns a Series of all the close prices in the DataFrame.
|
||||
func (o *DataFrame) Closes() Series {
|
||||
return o.Series("Close")
|
||||
}
|
||||
|
||||
// Volumes returns a Series of all the volumes in the DataFrame.
|
||||
func (o *DataFrame) Volumes() Series {
|
||||
return o.Series("Volume")
|
||||
}
|
||||
|
||||
// Series returns a Series of the column with the given name. If the column does not exist, nil is returned.
|
||||
func (o *DataFrame) Series(name string) Series {
|
||||
if o.data == nil {
|
||||
return nil
|
||||
}
|
||||
colIdx, err := o.data.NameToColumn(name)
|
||||
if err != nil {
|
||||
return nil
|
||||
} else if o.DataFrame == nil || i >= o.Len() {
|
||||
return 0
|
||||
} else if i < 0 {
|
||||
i = o.Len() - i
|
||||
if i < 0 {
|
||||
return 0
|
||||
}
|
||||
return o.Series[colIdx].Value(i)
|
||||
}
|
||||
return o.Series[colIdx].Value(i)
|
||||
return &DataSeries{o.data.Series[colIdx]}
|
||||
}
|
||||
|
||||
// Value returns the value of the column at index i. The first value is at index 0. A negative value for i can be used to get i values from the latest, like Python's negative indexing. If i is out of bounds, nil is returned.
|
||||
func (o *DataFrame) Value(column string, i int) interface{} {
|
||||
if o.data == nil {
|
||||
return nil
|
||||
}
|
||||
i = EasyIndex(i, o.Len()) // Allow for negative indexing.
|
||||
colIdx, err := o.data.NameToColumn(column)
|
||||
if err != nil || i < 0 || i >= o.Len() { // Prevent out of bounds access.
|
||||
return nil
|
||||
}
|
||||
return o.data.Series[colIdx].Value(i)
|
||||
}
|
||||
|
||||
// Float returns the value of the column at index i casted to float64. The first value is at index 0. A negative value for i (-n) can be used to get n values from the latest, like Python's negative indexing. If i is out of bounds, 0 is returned.
|
||||
@ -175,13 +411,83 @@ func (o *DataFrame) Time(column string, i int) time.Time {
|
||||
}
|
||||
}
|
||||
|
||||
func NewChartData(data *df.DataFrame) *DataFrame {
|
||||
func NewDataFrame(data *df.DataFrame) *DataFrame {
|
||||
return &DataFrame{data}
|
||||
}
|
||||
|
||||
type RollingWindow struct {
|
||||
DataFrame
|
||||
Period int
|
||||
func (s *DataSeries) Copy() Series {
|
||||
return &DataSeries{s.data.Copy()}
|
||||
}
|
||||
|
||||
func (s *DataSeries) Len() int {
|
||||
if s.data == nil {
|
||||
return 0
|
||||
}
|
||||
return s.data.NRows()
|
||||
}
|
||||
|
||||
func (s *DataSeries) Rolling(period int) *RollingSeries {
|
||||
return &RollingSeries{s, period}
|
||||
}
|
||||
|
||||
func (s *DataSeries) Value(i int) interface{} {
|
||||
if s.data == nil {
|
||||
return nil
|
||||
}
|
||||
i = EasyIndex(i, s.Len()) // Allow for negative indexing.
|
||||
return s.data.Value(i)
|
||||
}
|
||||
|
||||
func (s *DataSeries) Float(i int) float64 {
|
||||
val := s.Value(i)
|
||||
if val == nil {
|
||||
return 0
|
||||
}
|
||||
switch val := val.(type) {
|
||||
case float64:
|
||||
return val
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func (s *DataSeries) Int(i int) int64 {
|
||||
val := s.Value(i)
|
||||
if val == nil {
|
||||
return 0
|
||||
}
|
||||
switch val := val.(type) {
|
||||
case int64:
|
||||
return val
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func (s *DataSeries) String(i int) string {
|
||||
val := s.Value(i)
|
||||
if val == nil {
|
||||
return ""
|
||||
}
|
||||
switch val := val.(type) {
|
||||
case string:
|
||||
return val
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func (s *DataSeries) Time(i int) time.Time {
|
||||
val := s.Value(i)
|
||||
if val == nil {
|
||||
return time.Time{}
|
||||
}
|
||||
switch val := val.(type) {
|
||||
case time.Time:
|
||||
return val
|
||||
default:
|
||||
return time.Time{}
|
||||
}
|
||||
}
|
||||
|
||||
type DataCSVLayout struct {
|
||||
@ -204,6 +510,19 @@ func ReadDataCSV(path string, layout DataCSVLayout) (*df.DataFrame, error) {
|
||||
return ReadDataCSVFromReader(f, layout)
|
||||
}
|
||||
|
||||
func ReadEURUSDDataCSV() (*df.DataFrame, error) {
|
||||
return ReadDataCSV("./EUR_USD Historical Data.csv", DataCSVLayout{
|
||||
LatestFirst: true,
|
||||
DateFormat: "01/02/2006",
|
||||
Date: "\ufeff\"Date\"",
|
||||
Open: "Open",
|
||||
High: "High",
|
||||
Low: "Low",
|
||||
Close: "Price",
|
||||
Volume: "Vol.",
|
||||
})
|
||||
}
|
||||
|
||||
func ReadDataCSVFromReader(r io.Reader, layout DataCSVLayout) (*df.DataFrame, error) {
|
||||
data, err := ReadCSVFromReader(r, layout.DateFormat, layout.LatestFirst)
|
||||
if err != nil {
|
||||
|
67
data_test.go
67
data_test.go
@ -1,18 +1,63 @@
|
||||
package autotrader
|
||||
|
||||
import "testing"
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func newTestingDataFrame() *DataFrame {
|
||||
_dataframe, err := ReadEURUSDDataCSV()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return NewDataFrame(_dataframe)
|
||||
}
|
||||
|
||||
func TestDataSeries(t *testing.T) {
|
||||
data := newTestingDataFrame()
|
||||
if data == nil {
|
||||
t.Fatal("Could not create DataFrame")
|
||||
}
|
||||
|
||||
dates, closes := data.Dates(), data.Closes()
|
||||
|
||||
if dates.Len() != 2610 {
|
||||
t.Fatalf("Expected 2610 rows, got %d", dates.Len())
|
||||
}
|
||||
if closes.Len() != 2610 {
|
||||
t.Fatalf("Expected 2610 rows, got %d", closes.Len())
|
||||
}
|
||||
|
||||
sma10 := closes.Rolling(10).Mean()
|
||||
if sma10.Len() != 2610 {
|
||||
t.Fatalf("Expected 2610 rows, got %d", sma10.Len())
|
||||
}
|
||||
if sma10.Value(-1) != 1.10039 { // Latest closing price averaged over 10 periods.
|
||||
t.Fatalf("Expected 1.10039, got %f", sma10.Value(-1))
|
||||
}
|
||||
}
|
||||
|
||||
func TestDataFrame(t *testing.T) {
|
||||
data := newTestingDataFrame()
|
||||
if data == nil {
|
||||
t.Fatal("Could not create DataFrame")
|
||||
}
|
||||
|
||||
if data.Len() != 2610 {
|
||||
t.Fatalf("Expected 2610 rows, got %d", data.Len())
|
||||
}
|
||||
if data.Close(-1) != 1.0967 {
|
||||
t.Fatalf("Expected 1.0967, got %f", data.Close(-1))
|
||||
}
|
||||
|
||||
date := data.Date(2) // Get the 3rd earliest date from the Date column.
|
||||
if date.Year() != 2013 || date.Month() != 5 || date.Day() != 13 {
|
||||
t.Fatalf("Expected 2013-05-13, got %s", date.Format(time.DateOnly))
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadDataCSV(t *testing.T) {
|
||||
data, err := ReadDataCSV("./EUR_USD Historical Data.csv", DataCSVLayout{
|
||||
LatestFirst: true,
|
||||
DateFormat: "01/02/2006",
|
||||
Date: "\ufeff\"Date\"",
|
||||
Open: "Open",
|
||||
High: "High",
|
||||
Low: "Low",
|
||||
Close: "Price",
|
||||
Volume: "Vol.",
|
||||
})
|
||||
data, err := ReadEURUSDDataCSV()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user