-
Notifications
You must be signed in to change notification settings - Fork 33
/
Copy pathgrouper.go
125 lines (107 loc) · 3.79 KB
/
grouper.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
package qframe
import (
"github.com/tobgu/qframe/internal/grouper"
"github.com/tobgu/qframe/internal/icolumn"
"github.com/tobgu/qframe/internal/index"
"github.com/tobgu/qframe/qerrors"
"github.com/tobgu/qframe/types"
)
// GroupStats contains internal statistics for grouping.
// Clients should not depend on this for any type of decision making. It is strictly "for info".
// The layout may change if the underlying grouping mechanisms change.
type GroupStats grouper.GroupStats
// Grouper contains groups of rows produced by the QFrame.GroupBy function.
type Grouper struct {
indices []index.Int
groupedColumns []string
columns []namedColumn
columnsByName map[string]namedColumn
Err error
Stats GroupStats
}
// Aggregation represents a function to apply to a column.
type Aggregation struct {
// Fn is the aggregation function to apply.
//
// IMPORTANT: For pointer and reference types you must not assume that the data passed argument
// to this function is valid after the function returns. If you plan to keep it around you need
// to take a copy of the data.
Fn types.SliceFuncOrBuiltInId
// Column is the name of the column to apply the aggregation to.
Column string
// As can be used to specify the destination column name, if not given defaults to the
// value of Column.
As string
}
// Aggregate applies the given aggregations to all row groups in the Grouper.
//
// Time complexity O(m*n) where m = number of aggregations, n = number of rows.
func (g Grouper) Aggregate(aggs ...Aggregation) QFrame {
if g.Err != nil {
return QFrame{Err: g.Err}
}
// Loop over all groups and pick the first row in each of the groups.
// This index will be used to populate the grouped by columns below.
firstElementIx := make(index.Int, len(g.indices))
for i, ix := range g.indices {
firstElementIx[i] = ix[0]
}
newColumnsByName := make(map[string]namedColumn, len(g.groupedColumns)+len(aggs))
newColumns := make([]namedColumn, 0, len(g.groupedColumns)+len(aggs))
for i, colName := range g.groupedColumns {
col := g.columnsByName[colName]
col.pos = i
col.Column = col.Subset(firstElementIx)
newColumnsByName[colName] = col
newColumns = append(newColumns, col)
}
var err error
for _, agg := range aggs {
col, ok := g.columnsByName[agg.Column]
if !ok {
return QFrame{Err: qerrors.New("Aggregate", unknownCol(agg.Column))}
}
newColumnName := agg.Column
if agg.As != "" {
newColumnName = agg.As
}
col.name = newColumnName
_, ok = newColumnsByName[newColumnName]
if ok {
return QFrame{Err: qerrors.New(
"Aggregate",
"cannot aggregate on column that is part of group by or is already an aggregate: %s", newColumnName)}
}
if agg.Fn == "count" {
// Special convenience case for "count" which would normally require a cast from
// any other type of column to int before being executed.
counts := make([]int, len(g.indices))
for i, ix := range g.indices {
counts[i] = len(ix)
}
col.Column = icolumn.New(counts)
} else {
col.Column, err = col.Aggregate(g.indices, agg.Fn)
if err != nil {
return QFrame{Err: qerrors.Propagate("Aggregate", err)}
}
}
newColumnsByName[newColumnName] = col
newColumns = append(newColumns, col)
}
return QFrame{columns: newColumns, columnsByName: newColumnsByName, index: index.NewAscending(uint32(len(g.indices)))}
}
// QFrames returns a slice of QFrame where each frame represents the content of one group.
//
// Time complexity O(n) where n = number of groups.
func (g Grouper) QFrames() ([]QFrame, error) {
if g.Err != nil {
return nil, g.Err
}
baseFrame := QFrame{columns: g.columns, columnsByName: g.columnsByName, index: index.Int{}}
result := make([]QFrame, len(g.indices))
for i, ix := range g.indices {
result[i] = baseFrame.withIndex(ix)
}
return result, nil
}