-
Notifications
You must be signed in to change notification settings - Fork 829
add logical plan distributed optimizer to query frontend #6974
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
package distributed_execution | ||
|
||
import ( | ||
"fmt" | ||
|
||
"github.com/prometheus/prometheus/util/annotations" | ||
"github.com/thanos-io/promql-engine/logicalplan" | ||
) | ||
|
||
// This is a simplified implementation that only handles binary aggregation cases | ||
// Future versions of the distributed optimizer are expected to: | ||
// - Support more complex query patterns | ||
// - Incorporate diverse optimization strategies | ||
// - Extend support to node types beyond binary operations | ||
|
||
type DistributedOptimizer struct{} | ||
|
||
func (d *DistributedOptimizer) Optimize(root logicalplan.Node) (logicalplan.Node, annotations.Annotations, error) { | ||
warns := annotations.New() | ||
|
||
if root == nil { | ||
return nil, *warns, fmt.Errorf("nil root node") | ||
} | ||
|
||
logicalplan.TraverseBottomUp(nil, &root, func(parent, current *logicalplan.Node) bool { | ||
|
||
if (*current).Type() == logicalplan.BinaryNode { | ||
ch := (*current).Children() | ||
|
||
for _, child := range ch { | ||
temp := (*child).Clone() | ||
*child = NewRemoteNode() | ||
*(*child).Children()[0] = temp | ||
} | ||
} | ||
|
||
return false | ||
}) | ||
return root, *warns, nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
package distributed_execution | ||
|
||
import ( | ||
"testing" | ||
"time" | ||
|
||
"github.com/prometheus/prometheus/promql/parser" | ||
"github.com/stretchr/testify/require" | ||
"github.com/thanos-io/promql-engine/logicalplan" | ||
"github.com/thanos-io/promql-engine/query" | ||
) | ||
|
||
func TestDistributedOptimizer(t *testing.T) { | ||
now := time.Now() | ||
testCases := []struct { | ||
name string | ||
query string | ||
start time.Time | ||
end time.Time | ||
step time.Duration | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit. We don't have to parameterize |
||
remoteExecCount int | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's compare result logical plan instead of |
||
}{ | ||
{ | ||
name: "binary operation with aggregations", | ||
query: "sum(rate(node_cpu_seconds_total{mode!=\"idle\"}[5m])) + sum(rate(node_memory_Active_bytes[5m]))", | ||
start: now, | ||
end: now, | ||
step: time.Minute, | ||
remoteExecCount: 2, | ||
}, | ||
{ | ||
name: "multiple binary operations with aggregations", | ||
query: "sum(rate(http_requests_total{job=\"api\"}[5m])) + sum(rate(http_requests_total{job=\"web\"}[5m])) - sum(rate(http_requests_total{job=\"cache\"}[5m]))", | ||
start: now, | ||
end: now, | ||
step: time.Minute, | ||
remoteExecCount: 4, | ||
}, | ||
{ | ||
name: "subquery with aggregation", | ||
query: "sum(rate(container_network_transmit_bytes_total[5m:1m]))", | ||
start: now, | ||
end: now, | ||
step: time.Minute, | ||
remoteExecCount: 0, | ||
}, | ||
{ | ||
name: "function applied on binary operation", | ||
query: "rate(http_requests_total[5m]) + rate(http_errors_total[5m]) > bool 0", | ||
start: now, | ||
end: now, | ||
step: time.Minute, | ||
remoteExecCount: 4, | ||
}, | ||
{ | ||
name: "numerical binary query", | ||
query: "(1 + 1) + (1 + 1)", | ||
start: now, | ||
end: now, | ||
step: time.Minute, | ||
remoteExecCount: 0, | ||
}, | ||
} | ||
|
||
for _, tc := range testCases { | ||
t.Run(tc.name, func(t *testing.T) { | ||
lp, _, err := CreateTestLogicalPlan(tc.query, tc.start, tc.end, tc.step) | ||
require.NoError(t, err) | ||
|
||
d := DistributedOptimizer{} | ||
newRoot, _, err := d.Optimize((*lp).Root()) | ||
require.NoError(t, err) | ||
|
||
remoteNodeCount := 0 | ||
logicalplan.TraverseBottomUp(nil, &newRoot, func(parent, current *logicalplan.Node) bool { | ||
if RemoteNode == (*current).Type() { | ||
remoteNodeCount++ | ||
} | ||
return false | ||
}) | ||
require.Equal(t, tc.remoteExecCount, remoteNodeCount) | ||
}) | ||
} | ||
} | ||
|
||
func getStartAndEnd(start time.Time, end time.Time, step time.Duration) (time.Time, time.Time) { | ||
if step == 0 { | ||
return start, start | ||
} | ||
return start, end | ||
} | ||
|
||
func CreateTestLogicalPlan(qs string, start time.Time, end time.Time, step time.Duration) (*logicalplan.Plan, query.Options, error) { | ||
|
||
start, end = getStartAndEnd(start, end, step) | ||
|
||
qOpts := query.Options{ | ||
Start: start, | ||
End: end, | ||
Step: step, | ||
StepsBatch: 10, | ||
NoStepSubqueryIntervalFn: func(duration time.Duration) time.Duration { | ||
return 0 | ||
}, | ||
LookbackDelta: 0, | ||
EnablePerStepStats: false, | ||
} | ||
|
||
expr, err := parser.NewParser(qs, parser.WithFunctions(parser.Functions)).ParseExpr() | ||
if err != nil { | ||
return nil, qOpts, err | ||
} | ||
|
||
planOpts := logicalplan.PlanOptions{ | ||
DisableDuplicateLabelCheck: false, | ||
} | ||
|
||
logicalPlan, err := logicalplan.NewFromAST(expr, &qOpts, planOpts) | ||
if err != nil { | ||
return nil, qOpts, err | ||
} | ||
optimizedPlan, _ := logicalPlan.Optimize(logicalplan.DefaultOptimizers) | ||
|
||
return &optimizedPlan, qOpts, nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
package distributed_execution | ||
|
||
type FragmentKey struct { | ||
queryID uint64 | ||
fragmentID uint64 | ||
} | ||
|
||
func MakeFragmentKey(queryID uint64, fragmentID uint64) *FragmentKey { | ||
return &FragmentKey{ | ||
queryID: queryID, | ||
fragmentID: fragmentID, | ||
} | ||
} | ||
|
||
func (f FragmentKey) GetQueryID() uint64 { | ||
return f.queryID | ||
} | ||
|
||
func (f FragmentKey) GetFragmentID() uint64 { | ||
return f.fragmentID | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
package distributed_execution | ||
|
||
import ( | ||
"encoding/json" | ||
"fmt" | ||
|
||
"github.com/prometheus/prometheus/promql/parser" | ||
"github.com/thanos-io/promql-engine/logicalplan" | ||
) | ||
|
||
type NodeType = logicalplan.NodeType | ||
type Node = logicalplan.Node | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Those seems not needed. When you return in the function you can just specify return type to be |
||
|
||
const ( | ||
RemoteNode = "RemoteNode" | ||
) | ||
|
||
// (to verify interface implementations) | ||
var _ logicalplan.Node = (*Remote)(nil) | ||
|
||
type Remote struct { | ||
Op parser.ItemType | ||
Expr Node `json:"-"` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need |
||
|
||
FragmentKey FragmentKey | ||
FragmentAddr string | ||
} | ||
|
||
func NewRemoteNode() Node { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This might need to take expr as a parameter |
||
return &Remote{ | ||
// initialize the fragment key pointer first | ||
FragmentKey: FragmentKey{}, | ||
} | ||
} | ||
func (r *Remote) Clone() Node { | ||
return &Remote{Op: r.Op, Expr: r.Expr.Clone(), FragmentKey: r.FragmentKey, FragmentAddr: r.FragmentAddr} | ||
} | ||
func (r *Remote) Children() []*Node { | ||
return []*Node{&r.Expr} | ||
} | ||
func (r *Remote) String() string { | ||
return fmt.Sprintf("%s%s", r.Op.String(), r.Expr.String()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to mention the node name |
||
} | ||
func (r *Remote) ReturnType() parser.ValueType { | ||
return r.Expr.ReturnType() | ||
} | ||
func (r *Remote) Type() NodeType { return RemoteNode } | ||
|
||
type remote struct { | ||
QueryID uint64 | ||
FragmentID uint64 | ||
FragmentAddr string | ||
} | ||
|
||
func (r *Remote) MarshalJSON() ([]byte, error) { | ||
return json.Marshal(remote{ | ||
QueryID: r.FragmentKey.queryID, | ||
FragmentID: r.FragmentKey.fragmentID, | ||
FragmentAddr: r.FragmentAddr, | ||
}) | ||
} | ||
|
||
func (r *Remote) UnmarshalJSON(data []byte) error { | ||
re := remote{} | ||
if err := json.Unmarshal(data, &re); err != nil { | ||
return err | ||
} | ||
|
||
r.FragmentKey = *MakeFragmentKey(re.QueryID, re.FragmentID) | ||
r.FragmentAddr = re.FragmentAddr | ||
return nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
package distributed_execution | ||
|
||
import ( | ||
"encoding/json" | ||
"testing" | ||
|
||
"github.com/prometheus/prometheus/promql/parser" | ||
"github.com/stretchr/testify/require" | ||
"github.com/thanos-io/promql-engine/logicalplan" | ||
) | ||
|
||
func TestRemoteNode(t *testing.T) { | ||
t.Run("NewRemoteNode creates valid node", func(t *testing.T) { | ||
node := NewRemoteNode() | ||
require.NotNil(t, node) | ||
require.IsType(t, &Remote{}, node) | ||
require.Equal(t, (&Remote{}).Type(), node.Type()) | ||
}) | ||
|
||
t.Run("Clone creates correct copy", func(t *testing.T) { | ||
original := &Remote{ | ||
Op: parser.ADD, | ||
FragmentKey: FragmentKey{queryID: 1, fragmentID: 2}, | ||
FragmentAddr: "[IP_ADDRESS]:9090", | ||
Expr: &logicalplan.NumberLiteral{Val: 42}, | ||
} | ||
|
||
cloned := original.Clone() | ||
require.NotNil(t, cloned) | ||
|
||
remote, ok := cloned.(*Remote) | ||
require.True(t, ok) | ||
require.Equal(t, original.Op, remote.Op) | ||
require.Equal(t, original.FragmentKey, remote.FragmentKey) | ||
require.Equal(t, original.FragmentAddr, remote.FragmentAddr) | ||
require.Equal(t, original.Expr.String(), remote.Expr.String()) | ||
}) | ||
|
||
t.Run("JSON marshaling/unmarshaling", func(t *testing.T) { | ||
original := &Remote{ | ||
FragmentKey: *MakeFragmentKey(1, 2), | ||
FragmentAddr: "[IP_ADDRESS]:9090", | ||
} | ||
|
||
data, err := json.Marshal(original) | ||
require.NoError(t, err) | ||
|
||
var unmarshaled Remote | ||
err = json.Unmarshal(data, &unmarshaled) | ||
require.NoError(t, err) | ||
|
||
require.Equal(t, original.FragmentKey.queryID, unmarshaled.FragmentKey.queryID) | ||
require.Equal(t, original.FragmentKey.fragmentID, unmarshaled.FragmentKey.fragmentID) | ||
require.Equal(t, original.FragmentAddr, unmarshaled.FragmentAddr) | ||
}) | ||
|
||
t.Run("Children returns correct nodes", func(t *testing.T) { | ||
expr := &logicalplan.NumberLiteral{Val: 42} | ||
node := &Remote{ | ||
Expr: expr, | ||
} | ||
|
||
children := node.Children() | ||
require.Len(t, children, 1) | ||
require.Equal(t, expr, *children[0]) | ||
}) | ||
|
||
t.Run("ReturnType matches expression type", func(t *testing.T) { | ||
expr := &logicalplan.NumberLiteral{Val: 42} | ||
node := &Remote{ | ||
Expr: expr, | ||
} | ||
|
||
require.Equal(t, expr.ReturnType(), node.ReturnType()) | ||
}) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Even though it is just a dummy optimizer, we should probably add constraints to only mark as remote node if the child has aggregation. We don't want to optimize queries like
up + up
as each child returns raw data instead of aggregated data