From 2df33d95f8e906672cb00456c02728985a4155b3 Mon Sep 17 00:00:00 2001 From: Mihael Konjevic Date: Fri, 23 Aug 2024 17:28:36 +0200 Subject: [PATCH] refactor: rewrite query planning code --- src/__tests__/clone.test.ts | 2 +- src/__tests__/index.test.ts | 10 +- ...ocess-query-and-expand-to-segments.test.ts | 135 ------- .../query-builder/query-plan.test.ts | 213 +++++++++++ src/lib/query-builder.ts | 59 +-- src/lib/query-builder/build-query.ts | 320 ++++++---------- src/lib/query-builder/filter-builder.ts | 59 +-- .../process-query-and-expand-to-segments.ts | 246 ------------- src/lib/query-builder/query-plan.ts | 343 ++++++++++++++++++ src/lib/types.ts | 23 -- 10 files changed, 695 insertions(+), 715 deletions(-) delete mode 100644 src/__tests__/query-builder/process-query-and-expand-to-segments.test.ts create mode 100644 src/__tests__/query-builder/query-plan.test.ts delete mode 100644 src/lib/query-builder/process-query-and-expand-to-segments.ts create mode 100644 src/lib/query-builder/query-plan.ts diff --git a/src/__tests__/clone.test.ts b/src/__tests__/clone.test.ts index 37c7019..5188897 100644 --- a/src/__tests__/clone.test.ts +++ b/src/__tests__/clone.test.ts @@ -102,7 +102,7 @@ describe("clone", async () => { assert.equal( query.sql, - 'select "q0"."customer___user_id" as "customer___user_id", "q0"."employee___user_id" as "employee___user_id", "q0"."invoice___invoice_id" as "invoice___invoice_id", "q0"."customer___count" as "customer___count", "q1"."employee___count" as "employee___count" from (select "customer_query"."customer___user_id" as "customer___user_id", "customer_query"."employee___user_id" as "employee___user_id", "customer_query"."invoice___invoice_id" as "invoice___invoice_id", COUNT(DISTINCT "count___metric_ref_0") as "customer___count" from (select distinct "User"."UserId" as "count___metric_ref_0", "User"."UserId" as "customer___user_id", "Invoice"."InvoiceId" as "invoice___invoice_id", "User"."UserId" as "employee___user_id" from "User" left join "Invoice" on "User"."UserId" = "Invoice"."CustomerId" right join "User" on "User"."UserId" = "Invoice"."EmployeeId") as "customer_query" group by "customer_query"."customer___user_id", "customer_query"."employee___user_id", "customer_query"."invoice___invoice_id") as "q0" inner join (select "employee_query"."customer___user_id" as "customer___user_id", "employee_query"."employee___user_id" as "employee___user_id", "employee_query"."invoice___invoice_id" as "invoice___invoice_id", COUNT(DISTINCT "count___metric_ref_0") as "employee___count" from (select distinct "User"."UserId" as "count___metric_ref_0", "User"."UserId" as "employee___user_id", "Invoice"."InvoiceId" as "invoice___invoice_id", "User"."UserId" as "customer___user_id" from "User" left join "Invoice" on "User"."UserId" = "Invoice"."EmployeeId" right join "User" on "User"."UserId" = "Invoice"."CustomerId") as "employee_query" group by "employee_query"."customer___user_id", "employee_query"."employee___user_id", "employee_query"."invoice___invoice_id") as "q1" on "q0"."customer___user_id" = "q1"."customer___user_id" and "q0"."employee___user_id" = "q1"."employee___user_id" and "q0"."invoice___invoice_id" = "q1"."invoice___invoice_id" order by "customer___count" desc limit $1 offset $2', + 'select "q0"."customer___user_id" as "customer___user_id", "q0"."employee___user_id" as "employee___user_id", "q0"."invoice___invoice_id" as "invoice___invoice_id", "q0"."customer___count" as "customer___count", "q1"."employee___count" as "employee___count" from (select "s0"."customer___user_id" as "customer___user_id", "s0"."employee___user_id" as "employee___user_id", "s0"."invoice___invoice_id" as "invoice___invoice_id", COUNT(DISTINCT "count___metric_ref_0") as "customer___count" from (select distinct "User"."UserId" as "customer___user_id", "User"."UserId" as "employee___user_id", "Invoice"."InvoiceId" as "invoice___invoice_id", "User"."UserId" as "count___metric_ref_0" from "User" left join "Invoice" on "User"."UserId" = "Invoice"."CustomerId" right join "User" on "User"."UserId" = "Invoice"."EmployeeId") as "s0" group by "s0"."customer___user_id", "s0"."employee___user_id", "s0"."invoice___invoice_id") as "q0" inner join (select "s1"."customer___user_id" as "customer___user_id", "s1"."employee___user_id" as "employee___user_id", "s1"."invoice___invoice_id" as "invoice___invoice_id", COUNT(DISTINCT "count___metric_ref_0") as "employee___count" from (select distinct "User"."UserId" as "customer___user_id", "User"."UserId" as "employee___user_id", "Invoice"."InvoiceId" as "invoice___invoice_id", "User"."UserId" as "count___metric_ref_0" from "User" left join "Invoice" on "User"."UserId" = "Invoice"."EmployeeId" right join "User" on "User"."UserId" = "Invoice"."CustomerId") as "s1" group by "s1"."customer___user_id", "s1"."employee___user_id", "s1"."invoice___invoice_id") as "q1" on "q0"."customer___user_id" = "q1"."customer___user_id" and "q0"."employee___user_id" = "q1"."employee___user_id" and "q0"."invoice___invoice_id" = "q1"."invoice___invoice_id" order by "customer___count" desc limit $1 offset $2', ); }); }); diff --git a/src/__tests__/index.test.ts b/src/__tests__/index.test.ts index 97b3a33..b95d3a0 100644 --- a/src/__tests__/index.test.ts +++ b/src/__tests__/index.test.ts @@ -448,7 +448,7 @@ describe("semantic layer", async () => { assert.equal( query.sql, - `select "q0"."customers___customer_id" as "customers___customer_id", "q0"."customers___full_name" as "customers___full_name", "q0"."invoice_lines___invoice_id" as "invoice_lines___invoice_id" from (select "InvoiceLine"."InvoiceId" as "invoice_lines___invoice_id", "Customer"."CustomerId" as "customers___customer_id", "Customer"."FirstName" || ' ' || "Customer"."LastName" as "customers___full_name" from "InvoiceLine" right join "Invoice" on "Invoice"."InvoiceId" = "InvoiceLine"."InvoiceId" right join "Customer" on "Customer"."CustomerId" = "Invoice"."CustomerId" where "Customer"."CustomerId" = $1) as "q0" group by "q0"."customers___customer_id", "q0"."customers___full_name", "q0"."invoice_lines___invoice_id" order by "customers___customer_id" asc limit $2 offset $3`, + `select "q0"."customers___customer_id" as "customers___customer_id", "q0"."customers___full_name" as "customers___full_name", "q0"."invoice_lines___invoice_id" as "invoice_lines___invoice_id" from (select distinct "Customer"."CustomerId" as "customers___customer_id", "Customer"."FirstName" || ' ' || "Customer"."LastName" as "customers___full_name", "InvoiceLine"."InvoiceId" as "invoice_lines___invoice_id" from "Customer" left join "Invoice" on "Customer"."CustomerId" = "Invoice"."CustomerId" left join "InvoiceLine" on "Invoice"."InvoiceId" = "InvoiceLine"."InvoiceId" where "Customer"."CustomerId" = $1) as "q0" group by "q0"."customers___customer_id", "q0"."customers___full_name", "q0"."invoice_lines___invoice_id" order by "customers___customer_id" asc limit $2 offset $3`, ); const result = await client.query>( @@ -456,7 +456,7 @@ describe("semantic layer", async () => { query.bindings, ); - // Sort the rows on the client, because they will be sorted by customer id which is shared by all rows + // Sort the rows on the client, because they will be sorted by customer id which is same for all rows assert.deepEqual( result.rows.sort(), [ @@ -2210,7 +2210,7 @@ describe("semantic layer", async () => { assert.equal( query.sql, - 'select "q0"."customers___customer_id" as "customers___customer_id", "q0"."invoices___invoice_id" as "invoices___invoice_id" from (select "Invoice"."InvoiceId" as "invoices___invoice_id", "customers"."CustomerId" || cast($1 as text) as "customers___customer_id" from "Invoice" right join (select * from "Customer" where "CustomerId" = $2) as "customers" on "customers"."CustomerId" || cast($3 as text) = "Invoice"."CustomerId" and $4 = $5) as "q0" group by "q0"."customers___customer_id", "q0"."invoices___invoice_id" order by "customers___customer_id" asc limit $6 offset $7', + 'select "q0"."customers___customer_id" as "customers___customer_id", "q0"."invoices___invoice_id" as "invoices___invoice_id" from (select distinct "customers"."CustomerId" || cast($1 as text) as "customers___customer_id", "Invoice"."InvoiceId" as "invoices___invoice_id" from (select * from "Customer" where "CustomerId" = $2) as "customers" left join "Invoice" on "customers"."CustomerId" || cast($3 as text) = "Invoice"."CustomerId" and $4 = $5) as "q0" group by "q0"."customers___customer_id", "q0"."invoices___invoice_id" order by "customers___customer_id" asc limit $6 offset $7', ); // First 5 bindings are for the customerId, last one is for the limit @@ -2244,7 +2244,7 @@ describe("semantic layer", async () => { assert.equal( query.sql, - 'select "q0"."customers___customer_id" as "customers___customer_id", "q0"."invoices___invoice_id" as "invoices___invoice_id" from (select "Invoice"."InvoiceId" as "invoices___invoice_id", "customers"."CustomerId" || cast($1 as text) as "customers___customer_id" from "Invoice" right join (select * from "Customer" where "CustomerId" = $2) as "customers" on "customers"."CustomerId" || cast($3 as text) = "Invoice"."CustomerId" and $4 = $5 where "customers"."CustomerId" || cast($6 as text) in (select "q0"."customers___customer_id" as "customers___customer_id" from (select "customers"."CustomerId" || cast($7 as text) as "customers___customer_id" from (select * from "Customer" where "CustomerId" = $8) as "customers" where "customers"."CustomerId" || cast($9 as text) = $10) as "q0" group by "q0"."customers___customer_id" order by "customers___customer_id" asc limit $11 offset $12)) as "q0" group by "q0"."customers___customer_id", "q0"."invoices___invoice_id" order by "customers___customer_id" asc limit $13 offset $14', + 'select "q0"."customers___customer_id" as "customers___customer_id", "q0"."invoices___invoice_id" as "invoices___invoice_id" from (select distinct "customers"."CustomerId" || cast($1 as text) as "customers___customer_id", "Invoice"."InvoiceId" as "invoices___invoice_id" from (select * from "Customer" where "CustomerId" = $2) as "customers" left join "Invoice" on "customers"."CustomerId" || cast($3 as text) = "Invoice"."CustomerId" and $4 = $5 where "customers"."CustomerId" || cast($6 as text) in (select "q0"."customers___customer_id" as "customers___customer_id" from (select "customers"."CustomerId" || cast($7 as text) as "customers___customer_id" from (select * from "Customer" where "CustomerId" = $8) as "customers" where "customers"."CustomerId" || cast($9 as text) = $10) as "q0" group by "q0"."customers___customer_id" order by "customers___customer_id" asc limit $11 offset $12)) as "q0" group by "q0"."customers___customer_id", "q0"."invoices___invoice_id" order by "customers___customer_id" asc limit $13 offset $14', ); assert.deepEqual( @@ -2342,7 +2342,7 @@ describe("semantic layer", async () => { assert.equal( query.sql, - 'select "q0"."customers___customer_id" as "customers___customer_id", "q0"."invoices___invoice_id" as "invoices___invoice_id", "q0"."invoice_lines___invoice_line_id" as "invoice_lines___invoice_line_id" from (select "public"."InvoiceLine"."InvoiceLineId" as "invoice_lines___invoice_line_id", "invoices"."InvoiceId" as "invoices___invoice_id", "public"."Customer"."CustomerId" as "customers___customer_id" from "public"."InvoiceLine" right join (select * from "public"."Invoice") as "invoices" on "invoices"."InvoiceId" = "public"."InvoiceLine"."InvoiceId" right join "public"."Customer" on "public"."Customer"."CustomerId" = "invoices"."CustomerId") as "q0" group by "q0"."customers___customer_id", "q0"."invoices___invoice_id", "q0"."invoice_lines___invoice_line_id" order by "customers___customer_id" asc limit $1 offset $2', + 'select "q0"."customers___customer_id" as "customers___customer_id", "q0"."invoices___invoice_id" as "invoices___invoice_id", "q0"."invoice_lines___invoice_line_id" as "invoice_lines___invoice_line_id" from (select distinct "public"."Customer"."CustomerId" as "customers___customer_id", "invoices"."InvoiceId" as "invoices___invoice_id", "public"."InvoiceLine"."InvoiceLineId" as "invoice_lines___invoice_line_id" from "public"."Customer" left join (select * from "public"."Invoice") as "invoices" on "public"."Customer"."CustomerId" = "invoices"."CustomerId" left join "public"."InvoiceLine" on "invoices"."InvoiceId" = "public"."InvoiceLine"."InvoiceId") as "q0" group by "q0"."customers___customer_id", "q0"."invoices___invoice_id", "q0"."invoice_lines___invoice_line_id" order by "customers___customer_id" asc limit $1 offset $2', ); assert.deepEqual(query.bindings, [5000, 0]); diff --git a/src/__tests__/query-builder/process-query-and-expand-to-segments.test.ts b/src/__tests__/query-builder/process-query-and-expand-to-segments.test.ts deleted file mode 100644 index fdcb1c9..0000000 --- a/src/__tests__/query-builder/process-query-and-expand-to-segments.test.ts +++ /dev/null @@ -1,135 +0,0 @@ -import * as semanticLayer from "../../index.js"; -import * as fullRepository from "../full-repository.js"; - -import { assert, it } from "vitest"; - -import { processQueryAndExpandToSegments } from "../../lib/query-builder/process-query-and-expand-to-segments.js"; - -it("can process query and expand to segments", () => { - const { queryBuilder } = fullRepository; - const query: semanticLayer.Query = { - dimensions: ["artists.name"], - metrics: ["tracks.unit_price", "invoices.total"], - filters: [ - { - operator: "equals", - member: "genres.name", - value: ["Rock"], - }, - { operator: "gt", member: "invoices.total", value: [100] }, - ], - order: [{ member: "artists.name", direction: "asc" }], - }; - - const processed = processQueryAndExpandToSegments( - queryBuilder.repository, - query, - ); - - assert.deepEqual(processed, { - query: { - dimensions: ["artists.name"], - metrics: ["tracks.unit_price", "invoices.total"], - filters: [ - { operator: "equals", member: "genres.name", value: ["Rock"] }, - { - member: "invoices.total", - operator: "gt", - value: [100], - }, - ], - order: [{ member: "artists.name", direction: "asc" }], - }, - referencedModels: { - all: ["artists", "tracks", "invoices", "genres"], - dimensions: ["artists"], - metrics: ["tracks", "invoices"], - }, - segments: [ - { - query: { - dimensions: ["artists.name"], - metrics: ["tracks.unit_price"], - filters: [ - { operator: "equals", member: "genres.name", value: ["Rock"] }, - { - member: "invoices.total", - operator: "gt", - value: [100], - }, - ], - }, - projectedQuery: { - dimensions: ["artists.name"], - metrics: ["tracks.unit_price"], - filters: [ - { operator: "equals", member: "genres.name", value: ["Rock"] }, - { - member: "invoices.total", - operator: "gt", - value: [100], - }, - ], - }, - referencedModels: { - all: ["artists", "tracks", "invoices", "genres"], - dimensions: ["artists"], - metrics: ["tracks"], - }, - modelQueries: { - artists: { - dimensions: new Set(["artists.name"]), - metrics: new Set(), - }, - tracks: { - dimensions: new Set(), - metrics: new Set(["tracks.unit_price"]), - }, - }, - metricModel: "tracks", - }, - { - query: { - dimensions: ["artists.name"], - metrics: ["invoices.total"], - filters: [ - { operator: "equals", member: "genres.name", value: ["Rock"] }, - { - member: "invoices.total", - operator: "gt", - value: [100], - }, - ], - }, - projectedQuery: { - dimensions: ["artists.name"], - metrics: ["invoices.total"], - filters: [ - { operator: "equals", member: "genres.name", value: ["Rock"] }, - { - member: "invoices.total", - operator: "gt", - value: [100], - }, - ], - }, - referencedModels: { - all: ["artists", "tracks", "invoices", "genres"], - dimensions: ["artists"], - metrics: ["invoices"], - }, - modelQueries: { - artists: { - dimensions: new Set(["artists.name"]), - metrics: new Set(), - }, - invoices: { - dimensions: new Set(), - metrics: new Set(["invoices.total"]), - }, - }, - metricModel: "invoices", - }, - ], - }); -}); diff --git a/src/__tests__/query-builder/query-plan.test.ts b/src/__tests__/query-builder/query-plan.test.ts new file mode 100644 index 0000000..d5a2a73 --- /dev/null +++ b/src/__tests__/query-builder/query-plan.test.ts @@ -0,0 +1,213 @@ +import * as fullRepository from "../full-repository.js"; + +import { expect, it } from "vitest"; + +import { getQueryPlan } from "../../lib/query-builder/query-plan.js"; + +it("can crate a query plan", () => { + const { queryBuilder } = fullRepository; + + const queryPlan = getQueryPlan(queryBuilder.repository, { + members: [ + "artists.name", + "tracks.name", + "albums.title", + "tracks.unit_price", + "invoice_lines.quantity", + ], + filters: [ + { + operator: "equals", + member: "genres.name", + value: ["Rock"], + }, + { operator: "gt", member: "invoice_lines.unit_price", value: [0] }, + { operator: "gt", member: "invoice_lines.quantity", value: [0] }, + { operator: "gt", member: "tracks.unit_price", value: [0] }, + { operator: "gt", member: "invoices.total", value: [100] }, + ], + order: [{ member: "artists.name", direction: "asc" }], + }); + + expect(queryPlan).toMatchObject({ + segments: [ + { + models: ["artists", "tracks", "albums", "genres"], + modelQuery: { + dimensions: [ + "artists.name", + "tracks.name", + "albums.title", + "genres.name", + "tracks.track_id", + ], + metrics: ["tracks.unit_price"], + members: [ + "artists.name", + "tracks.name", + "albums.title", + "genres.name", + "tracks.track_id", + "tracks.unit_price", + ], + }, + segmentQuery: { + dimensions: ["artists.name", "tracks.name", "albums.title"], + metrics: ["tracks.unit_price"], + members: [ + "artists.name", + "tracks.name", + "albums.title", + "tracks.unit_price", + ], + }, + rootQuery: { + dimensions: ["artists.name", "tracks.name", "albums.title"], + metrics: ["tracks.unit_price"], + members: [ + "artists.name", + "tracks.name", + "albums.title", + "tracks.unit_price", + ], + }, + alias: "s0", + initialModel: "tracks", + filters: [ + { + operator: "equals", + member: "genres.name", + value: ["Rock"], + }, + ], + }, + { + models: ["artists", "tracks", "albums", "genres", "invoice_lines"], + modelQuery: { + dimensions: [ + "artists.name", + "tracks.name", + "albums.title", + "genres.name", + "invoice_lines.invoice_line_id", + ], + metrics: ["invoice_lines.quantity", "invoice_lines.unit_price"], + members: [ + "artists.name", + "tracks.name", + "albums.title", + "genres.name", + "invoice_lines.invoice_line_id", + "invoice_lines.quantity", + "invoice_lines.unit_price", + ], + }, + segmentQuery: { + dimensions: ["artists.name", "tracks.name", "albums.title"], + metrics: ["invoice_lines.quantity", "invoice_lines.unit_price"], + members: [ + "artists.name", + "tracks.name", + "albums.title", + "invoice_lines.quantity", + "invoice_lines.unit_price", + ], + }, + rootQuery: { + dimensions: ["artists.name", "tracks.name", "albums.title"], + metrics: ["invoice_lines.quantity"], + members: [ + "artists.name", + "tracks.name", + "albums.title", + "invoice_lines.quantity", + ], + }, + alias: "s1", + initialModel: "invoice_lines", + filters: [ + { + operator: "equals", + member: "genres.name", + value: ["Rock"], + }, + ], + }, + { + models: ["artists", "tracks", "albums", "genres", "invoices"], + modelQuery: { + dimensions: [ + "artists.name", + "tracks.name", + "albums.title", + "genres.name", + "invoices.invoice_id", + ], + metrics: ["invoices.total"], + members: [ + "artists.name", + "tracks.name", + "albums.title", + "genres.name", + "invoices.invoice_id", + "invoices.total", + ], + }, + segmentQuery: { + dimensions: ["artists.name", "tracks.name", "albums.title"], + metrics: ["invoices.total"], + members: [ + "artists.name", + "tracks.name", + "albums.title", + "invoices.total", + ], + }, + rootQuery: { + dimensions: ["artists.name", "tracks.name", "albums.title"], + metrics: [], + members: ["artists.name", "tracks.name", "albums.title"], + }, + alias: "s2", + initialModel: "invoices", + filters: [ + { + operator: "equals", + member: "genres.name", + value: ["Rock"], + }, + ], + }, + ], + filters: [ + { + operator: "gt", + member: "invoice_lines.unit_price", + value: [0], + }, + { + operator: "gt", + member: "invoice_lines.quantity", + value: [0], + }, + { + operator: "gt", + member: "tracks.unit_price", + value: [0], + }, + { + operator: "gt", + member: "invoices.total", + value: [100], + }, + ], + projectedDimensions: ["artists.name", "tracks.name", "albums.title"], + projectedMetrics: ["tracks.unit_price", "invoice_lines.quantity"], + order: [ + { + member: "artists.name", + direction: "asc", + }, + ], + }); +}); diff --git a/src/lib/query-builder.ts b/src/lib/query-builder.ts index 8976ed3..3dc8584 100644 --- a/src/lib/query-builder.ts +++ b/src/lib/query-builder.ts @@ -1,12 +1,10 @@ import { AnyInputQuery, AnyMemberFormat, - FilterType, HierarchyConfig, InputQuery, IntrospectionResult, MemberNameToType, - Query, QueryMemberName, QueryReturnType, SqlQueryResult, @@ -18,8 +16,7 @@ import { AnyBaseDialect } from "./dialect/base.js"; import { HierarchyElementConfig } from "./hierarchy.js"; import { buildQuery } from "./query-builder/build-query.js"; import { FilterBuilder } from "./query-builder/filter-builder.js"; -import { findOptimalJoinGraph } from "./query-builder/optimal-join-graph.js"; -import { processQueryAndExpandToSegments } from "./query-builder/process-query-and-expand-to-segments.js"; +import { getQueryPlan } from "./query-builder/query-plan.js"; import { QuerySchema, buildQuerySchema } from "./query-schema.js"; import type { AnyRepository } from "./repository.js"; import { SqlQuery } from "./sql-builder/to-sql.js"; @@ -30,33 +27,6 @@ function isValidGranularityConfigElements( return elements.length > 0; } -function transformInputQueryToQuery( - queryBuilder: AnyQueryBuilder, - parsedQuery: AnyInputQuery, -) { - const { members, ...restQuery } = parsedQuery; - const dimensionsAndMetrics = members.reduce<{ - dimensions: string[]; - metrics: string[]; - }>( - (acc, memberName) => { - const member = queryBuilder.repository.getMember(memberName); - if (member.isDimension()) { - acc.dimensions.push(memberName); - } else { - acc.metrics.push(memberName); - } - - return acc; - }, - { dimensions: [], metrics: [] }, - ); - - return { - ...dimensionsAndMetrics, - ...restQuery, - } as Query; -} export class QueryBuilder< C, D extends MemberNameToType, @@ -159,23 +129,8 @@ export class QueryBuilder< parsedQuery: AnyInputQuery, context: unknown, ): SqlQuery { - const transformedQuery = transformInputQueryToQuery(this, parsedQuery); - const { query, referencedModels, segments } = - processQueryAndExpandToSegments(this.repository, transformedQuery); - - const joinGraph = findOptimalJoinGraph( - this.repository.graph, - referencedModels.all, - ); - - const sqlQuery = buildQuery( - this, - context, - query, - referencedModels, - joinGraph, - segments, - ); + const queryPlan = getQueryPlan(this.repository, parsedQuery); + const sqlQuery = buildQuery(this, context, queryPlan); return sqlQuery.toSQL(); } @@ -220,14 +175,10 @@ export class QueryBuilder< return result; } - getFilterBuilder( - filterType: FilterType, - referencedModels: string[], - metricPrefixes?: Record, - ): FilterBuilder { + getFilterBuilder(): FilterBuilder { return this.repository .getFilterFragmentBuilderRegistry() - .getFilterBuilder(this, filterType, referencedModels, metricPrefixes); + .getFilterBuilder(this); } introspect(query: AnyInputQuery): IntrospectionResult { diff --git a/src/lib/query-builder/build-query.ts b/src/lib/query-builder/build-query.ts index eb9d853..4c5fb32 100644 --- a/src/lib/query-builder/build-query.ts +++ b/src/lib/query-builder/build-query.ts @@ -1,23 +1,19 @@ -import * as graphlib from "@dagrejs/graphlib"; - -import { ModelQuery, Order, Query, QuerySegment } from "../types.js"; - import invariant from "tiny-invariant"; import type { AnyJoin } from "../join.js"; import { AnyQueryBuilder } from "../query-builder.js"; import type { AnyRepository } from "../repository.js"; - -interface ReferencedModels { - all: string[]; - dimensions: string[]; - metrics: string[]; -} - -function getDefaultOrderBy(repository: AnyRepository, query: Query): Order[] { - const firstDimensionName = query.dimensions?.[0]; - const firstMetricName = query.metrics?.[0]; - - for (const dimensionName of query.dimensions ?? []) { +import { SqlQueryBuilder } from "../sql-builder.js"; +import { Order } from "../types.js"; +import { QueryPlan } from "./query-plan.js"; + +function getDefaultOrderBy( + repository: AnyRepository, + query: QueryPlan, +): Order[] { + const firstDimensionName = query.projectedDimensions?.[0]; + const firstMetricName = query.projectedMetrics?.[0]; + + for (const dimensionName of query.projectedDimensions ?? []) { const dimension = repository.getDimension(dimensionName); if (dimension.getGranularity()) { return [{ member: dimensionName, direction: "asc" }]; @@ -35,16 +31,22 @@ function getDefaultOrderBy(repository: AnyRepository, query: Query): Order[] { return []; } -// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: Essential complexity +function getAlias(index: number) { + return `q${index}`; +} + +function arrayHasAtLeastOneElement(value: T[]): value is [T, ...T[]] { + return value.length > 0; +} + +// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: function buildModelQuery( queryBuilder: AnyQueryBuilder, context: unknown, - joinGraph: graphlib.Graph, - modelQueries: Record, - source: string, + segment: QueryPlan["segments"][number], ) { const visitedModels = new Set(); - const model = queryBuilder.repository.getModel(source); + const model = queryBuilder.repository.getModel(segment.initialModel); const sqlQuery = queryBuilder.dialect.from( model.getTableNameOrSql( queryBuilder.repository, @@ -53,30 +55,33 @@ function buildModelQuery( ), ); - const modelStack: { modelName: string; join?: AnyJoin }[] = [ - { modelName: source }, + for (const memberName of segment.modelQuery.members) { + const metric = queryBuilder.repository.getMember(memberName); + const modelQueryProjection = metric.getModelQueryProjection( + queryBuilder.repository, + queryBuilder.dialect, + context, + ); + + for (const fragment of modelQueryProjection) { + sqlQuery.select(fragment); + } + } + + const modelsToProcess: { modelName: string; join?: AnyJoin }[] = [ + { modelName: segment.initialModel }, ]; - while (modelStack.length > 0) { - const { modelName, join } = modelStack.pop()!; + while (modelsToProcess.length > 0) { + const { modelName, join } = modelsToProcess.pop()!; if (visitedModels.has(modelName)) { continue; } visitedModels.add(modelName); - const modelQuery = modelQueries[modelName]; - const model = queryBuilder.repository.getModel(modelName); - const hasMetrics = modelQuery?.metrics && modelQuery.metrics.size > 0; - const unvisitedNeighbors = (joinGraph.neighbors(modelName) ?? []).filter( - (modelName) => !visitedModels.has(modelName), - ); - const dimensionNames = new Set(modelQuery?.dimensions || []); - - if (hasMetrics) { - for (const d of model.getPrimaryKeyDimensions()) { - dimensionNames.add(d.getPath()); - } - } + const unvisitedNeighbors = ( + segment.joinGraph.neighbors(modelName) ?? [] + ).filter((modelName) => !visitedModels.has(modelName)); if (join) { const joinType = join.reversed ? "rightJoin" : "leftJoin"; @@ -101,33 +106,7 @@ function buildModelQuery( } } - for (const metricName of modelQuery?.metrics || []) { - const metric = queryBuilder.repository.getMetric(metricName); - const modelQueryProjection = metric.getModelQueryProjection( - queryBuilder.repository, - queryBuilder.dialect, - context, - ); - - for (const fragment of modelQueryProjection) { - sqlQuery.select(fragment); - } - } - - for (const dimensionName of dimensionNames) { - const dimension = queryBuilder.repository.getDimension(dimensionName); - const modelQueryProjection = dimension.getModelQueryProjection( - queryBuilder.repository, - queryBuilder.dialect, - context, - ); - - for (const fragment of modelQueryProjection) { - sqlQuery.select(fragment); - } - } - - modelStack.push( + modelsToProcess.push( ...unvisitedNeighbors.map((unvisitedModelName) => ({ modelName: unvisitedModelName, join: queryBuilder.repository.getJoin(modelName, unvisitedModelName), @@ -138,37 +117,20 @@ function buildModelQuery( return sqlQuery; } -// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: Essential complexity function buildSegmentQuery( queryBuilder: AnyQueryBuilder, context: unknown, - joinGraph: graphlib.Graph, - segment: QuerySegment, - overrideModelQueryAlias?: string, -) { - const sources = joinGraph.sources(); - - const source = - segment.referencedModels.metrics.length > 0 - ? segment.referencedModels.metrics[0] - : sources[0]; + segment: QueryPlan["segments"][number], + alias?: string, +): SqlQueryBuilder { + const modelQueryAlias = alias ?? segment.alias; - invariant(source, "No source found for segment"); + const initialSqlQuery = buildModelQuery(queryBuilder, context, segment); - const modelQueryAlias = overrideModelQueryAlias ?? `${source}_query`; - - const initialSqlQuery = buildModelQuery( - queryBuilder, - context, - joinGraph, - segment.modelQueries, - source, - ); - - if (segment.query.filters) { + if (segment.filters) { const filter = queryBuilder - .getFilterBuilder("dimension", segment.referencedModels.all) - .buildFilters(segment.query.filters, "and", context); + .getFilterBuilder() + .buildFilters(segment.filters, "and", context); if (filter) { initialSqlQuery.where( @@ -184,35 +146,9 @@ function buildSegmentQuery( initialSqlQuery.as(modelQueryAlias), ); - for (const dimensionName of segment.query.dimensions || []) { - const dimension = queryBuilder.repository.getDimension(dimensionName); - const segmentQueryProjection = dimension.getSegmentQueryProjection( - queryBuilder.repository, - queryBuilder.dialect, - context, - modelQueryAlias, - ); - - for (const fragment of segmentQueryProjection) { - sqlQuery.select(fragment); - } - - // Always GROUP BY by the dimensions, if there are no metrics, it will behave as DISTINCT - const segmentQueryGroupBy = dimension.getSegmentQueryGroupBy( - queryBuilder.repository, - queryBuilder.dialect, - context, - modelQueryAlias, - ); - - for (const fragment of segmentQueryGroupBy) { - sqlQuery.groupBy(fragment); - } - } - - for (const metricName of segment.query.metrics || []) { - const metric = queryBuilder.repository.getMetric(metricName); - const segmentQueryProjection = metric.getSegmentQueryProjection( + for (const memberName of segment.segmentQuery.members) { + const member = queryBuilder.repository.getMember(memberName); + const segmentQueryProjection = member.getSegmentQueryProjection( queryBuilder.repository, queryBuilder.dialect, context, @@ -223,7 +159,9 @@ function buildSegmentQuery( sqlQuery.select(fragment); } - const segmentQueryGroupBy = metric.getSegmentQueryGroupBy( + // We always GROUP BY the dimensions, if there are no metrics, it will behave as DISTINCT + // For metrics, this is currently NOOP because Metric returns an empty array + const segmentQueryGroupBy = member.getSegmentQueryGroupBy( queryBuilder.repository, queryBuilder.dialect, context, @@ -235,53 +173,57 @@ function buildSegmentQuery( } } - return { ...segment, sqlQuery }; -} - -function getAlias(index: number) { - return `q${index}`; + return sqlQuery; } -// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: Essential complexity -export function buildRootQuery( +function buildRootQuery( queryBuilder: AnyQueryBuilder, context: unknown, - query: Query, - joinGraph: graphlib.Graph, - segments: QuerySegment[], -) { + queryPlan: QueryPlan, +): SqlQueryBuilder { + const segments = queryPlan.segments; + + invariant(arrayHasAtLeastOneElement(segments), "No query segments found"); + if (segments.length === 1) { - const sqlQuerySegment = buildSegmentQuery( + const sqlQuery = buildSegmentQuery( queryBuilder, context, - joinGraph, segments[0]!, getAlias(0), ); - return sqlQuerySegment.sqlQuery; + return sqlQuery; } - const sqlQuerySegments = segments.map((segment) => - buildSegmentQuery(queryBuilder, context, joinGraph, segment), + const segmentsWithSqlQuery = segments.map((segment) => ({ + segment, + sqlQuery: buildSegmentQuery(queryBuilder, context, segment), + })); + + invariant( + arrayHasAtLeastOneElement(segmentsWithSqlQuery), + "No segments with sql query found", ); - const [initialSqlQuerySegment, ...restSqlQuerySegments] = sqlQuerySegments; - invariant(initialSqlQuerySegment, "No initial sql query segment found"); + const [initialSegmentWithSqlQuery, ...restSegmentsWithSqlQuery] = + segmentsWithSqlQuery; - const joinOnDimensions = query.dimensions?.map((dimensionName) => { - return queryBuilder.repository.getDimension(dimensionName); - }); + const joinOnDimensions = queryPlan.projectedDimensions.map( + (dimensionName) => { + return queryBuilder.repository.getDimension(dimensionName); + }, + ); const rootQueryAlias = getAlias(0); const rootSqlQuery = queryBuilder.dialect.from( - initialSqlQuerySegment.sqlQuery.as(rootQueryAlias), + initialSegmentWithSqlQuery.sqlQuery.as(rootQueryAlias), ); - for (const dimensionName of initialSqlQuerySegment.projectedQuery - .dimensions || []) { - const dimension = queryBuilder.repository.getDimension(dimensionName); - const rootQueryProjection = dimension.getRootQueryProjection( + for (const memberName of initialSegmentWithSqlQuery.segment.rootQuery + .members) { + const member = queryBuilder.repository.getMember(memberName); + const rootQueryProjection = member.getRootQueryProjection( queryBuilder.repository, queryBuilder.dialect, context, @@ -293,24 +235,8 @@ export function buildRootQuery( } } - for (const metricName of initialSqlQuerySegment.projectedQuery.metrics || - []) { - const metric = queryBuilder.repository.getMetric(metricName); - - const rootQueryProjection = metric.getRootQueryProjection( - queryBuilder.repository, - queryBuilder.dialect, - context, - rootQueryAlias, - ); - - for (const fragment of rootQueryProjection) { - rootSqlQuery.select(fragment); - } - } - - for (let i = 0; i < restSqlQuerySegments.length; i++) { - const segment = restSqlQuerySegments[i]!; + for (let i = 0; i < restSegmentsWithSqlQuery.length; i++) { + const segmentWithSqlQuery = restSegmentsWithSqlQuery[i]!; const segmentQueryAlias = getAlias(i + 1); const joinOn = joinOnDimensions && joinOnDimensions.length > 0 @@ -330,23 +256,21 @@ export function buildRootQuery( : "1 = 1"; rootSqlQuery.innerJoin( - segment.sqlQuery.as(segmentQueryAlias), + segmentWithSqlQuery.sqlQuery.as(segmentQueryAlias), queryBuilder.dialect.fragment(joinOn), ); - for (const metricName of segment.projectedQuery.metrics || []) { - if ((query.metrics ?? []).includes(metricName)) { - const metric = queryBuilder.repository.getMetric(metricName); - const rootQueryProjection = metric.getRootQueryProjection( - queryBuilder.repository, - queryBuilder.dialect, - context, - segmentQueryAlias, - ); - - for (const fragment of rootQueryProjection) { - rootSqlQuery.select(fragment); - } + for (const metricName of segmentWithSqlQuery.segment.rootQuery.metrics) { + const metric = queryBuilder.repository.getMetric(metricName); + const rootQueryProjection = metric.getRootQueryProjection( + queryBuilder.repository, + queryBuilder.dialect, + context, + segmentQueryAlias, + ); + + for (const fragment of rootQueryProjection) { + rootSqlQuery.select(fragment); } } } @@ -356,42 +280,24 @@ export function buildRootQuery( export function buildQuery( queryBuilder: AnyQueryBuilder, context: unknown, - query: Query, - referencedModels: ReferencedModels, - joinGraph: graphlib.Graph, - segments: QuerySegment[], + queryPlan: QueryPlan, ) { - const rootSqlQuery = buildRootQuery( - queryBuilder, - context, - query, - joinGraph, - segments, - ); + const rootQuery = buildRootQuery(queryBuilder, context, queryPlan); - if (query.filters) { - const metricPrefixes = segments.reduce>( - (acc, segment, idx) => { - if (segment.metricModel) { - acc[segment.metricModel] = getAlias(idx); - } - return acc; - }, - {}, - ); + if (queryPlan.filters) { const filter = queryBuilder - .getFilterBuilder("metric", referencedModels.metrics, metricPrefixes) - .buildFilters(query.filters, "and", context); + .getFilterBuilder() + .buildFilters(queryPlan.filters, "and", context); if (filter) { - rootSqlQuery.where( + rootQuery.where( queryBuilder.dialect.fragment(filter.sql, filter.bindings), ); } } const orderBy = ( - query.order || getDefaultOrderBy(queryBuilder.repository, query) + queryPlan.order || getDefaultOrderBy(queryBuilder.repository, queryPlan) ).map(({ member, direction }) => { const quotedMemberAlias = queryBuilder.dialect.asIdentifier( queryBuilder.repository.getMember(member).getAlias(), @@ -400,11 +306,11 @@ export function buildQuery( }); if (orderBy.length > 0) { - rootSqlQuery.orderBy(orderBy.join(", ")); + rootQuery.orderBy(orderBy.join(", ")); } - rootSqlQuery.limit(query.limit ?? 5000); - rootSqlQuery.offset(query.offset ?? 0); + rootQuery.limit(queryPlan.limit ?? 5000); + rootQuery.offset(queryPlan.offset ?? 0); - return rootSqlQuery; + return rootQuery; } diff --git a/src/lib/query-builder/filter-builder.ts b/src/lib/query-builder/filter-builder.ts index 8d733fd..adec6dd 100644 --- a/src/lib/query-builder/filter-builder.ts +++ b/src/lib/query-builder/filter-builder.ts @@ -1,9 +1,4 @@ -import { - AndConnective, - AnyQueryFilter, - FilterType, - OrConnective, -} from "../types.js"; +import { AndConnective, AnyQueryFilter, OrConnective } from "../types.js"; import { afterDate as filterAfterDate, beforeDate as filterBeforeDate, @@ -48,39 +43,26 @@ import { AnyQueryBuilder } from "../query-builder.js"; import { SqlFragment } from "../sql-builder.js"; export class FilterBuilder { - private readonly referencedModels: Set; - constructor( private readonly filterFragmentBuilders: Record< string, AnyFilterFragmentBuilder >, public readonly queryBuilder: AnyQueryBuilder, - private readonly filterType: FilterType, - referencedModels: string[], - private readonly metricPrefixes?: Record, - ) { - this.referencedModels = new Set(referencedModels); - } + ) {} getMemberSql(memberName: string, context: unknown): SqlFragment | undefined { const member = this.queryBuilder.repository.getMember(memberName); - if (this.referencedModels.has(member.model.name)) { - if (this.filterType === "dimension" && member.isDimension()) { - return member.getSql( - this.queryBuilder.repository, - this.queryBuilder.dialect, - context, - ); - } - if (this.filterType === "metric" && member.isMetric()) { - const prefix = this.metricPrefixes?.[member.model.name]; - const sql = this.queryBuilder.dialect.asIdentifier(member.getAlias()); - return SqlFragment.fromSql( - prefix - ? `${this.queryBuilder.dialect.asIdentifier(prefix)}.${sql}` - : sql, - ); - } + + if (member.isDimension()) { + return member.getSql( + this.queryBuilder.repository, + this.queryBuilder.dialect, + context, + ); + } + if (member.isMetric()) { + const sql = this.queryBuilder.dialect.asIdentifier(member.getAlias()); + return SqlFragment.fromSql(sql); } } @@ -159,19 +141,8 @@ export class FilterFragmentBuilderRegistry { getFilterFragmentBuilders() { return Object.values(this.filterFragmentBuilders); } - getFilterBuilder( - queryBuilder: AnyQueryBuilder, - filterType: FilterType, - referencedModels: string[], - metricPrefixes?: Record, - ): FilterBuilder { - return new FilterBuilder( - this.filterFragmentBuilders, - queryBuilder, - filterType, - referencedModels, - metricPrefixes, - ); + getFilterBuilder(queryBuilder: AnyQueryBuilder): FilterBuilder { + return new FilterBuilder(this.filterFragmentBuilders, queryBuilder); } } diff --git a/src/lib/query-builder/process-query-and-expand-to-segments.ts b/src/lib/query-builder/process-query-and-expand-to-segments.ts deleted file mode 100644 index 37cf802..0000000 --- a/src/lib/query-builder/process-query-and-expand-to-segments.ts +++ /dev/null @@ -1,246 +0,0 @@ -import { - AnyQueryFilter, - ModelQuery, - Order, - Query, - QuerySegment, -} from "../types.js"; - -import { AnyRepository } from "../repository.js"; - -// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: -function analyzeQuery(repository: AnyRepository, query: Query) { - const allModels = new Set(); - const dimensionModels = new Set(); - const metricModels = new Set(); - const projectedDimensionsByModel: Record> = {}; - const dimensionsByModel: Record> = {}; - const projectedMetricsByModel: Record> = {}; - const metricsByModel: Record> = {}; - const allMemberNames = new Set(); - const allProjectedMemberNames = new Set(); - - for (const dimension of query.dimensions || []) { - const modelName = repository.getDimension(dimension).model.name; - allModels.add(modelName); - allMemberNames.add(dimension); - allProjectedMemberNames.add(dimension); - dimensionModels.add(modelName); - dimensionsByModel[modelName] ||= new Set(); - dimensionsByModel[modelName]!.add(dimension); - projectedDimensionsByModel[modelName] ||= new Set(); - projectedDimensionsByModel[modelName]!.add(dimension); - } - - for (const metricName of query.metrics || []) { - const modelName = repository.getMetric(metricName).model.name; - allModels.add(modelName); - allMemberNames.add(metricName); - allProjectedMemberNames.add(metricName); - metricModels.add(modelName); - metricsByModel[modelName] ||= new Set(); - metricsByModel[modelName]!.add(metricName); - projectedMetricsByModel[modelName] ||= new Set(); - projectedMetricsByModel[modelName]!.add(metricName); - } - - const filterStack: AnyQueryFilter[] = [...(query.filters || [])]; - - while (filterStack.length > 0) { - const filter = filterStack.pop()!; - if (filter.operator === "and" || filter.operator === "or") { - filterStack.push(...filter.filters); - } else { - const member = repository.getMember(filter.member); - const modelName = member.model.name; - - allModels.add(modelName); - allMemberNames.add(filter.member); - - if (member.isDimension()) { - // dimensionModels are used for join of query segments - // so we're not adding them here, because we don't have - // a guarantee that join on dimensions will be projected - // (and if we projected them automatically, we'd get wrong results) - // In the segment query allModels are used to join models, which - // means that any dimension filters will work - dimensionsByModel[modelName] ||= new Set(); - dimensionsByModel[modelName]!.add(filter.member); - } else { - metricModels.add(modelName); - metricsByModel[modelName] ||= new Set(); - metricsByModel[modelName]!.add(filter.member); - } - } - } - - const orderByWithoutNonProjectedMembers = (query.order ?? []).reduce( - (acc, { member, direction }) => { - if (allProjectedMemberNames.has(member)) { - acc.push({ member, direction }); - } - return acc; - }, - [], - ); - - return { - allModels, - dimensionModels, - metricModels, - dimensionsByModel, - projectedDimensionsByModel, - metricsByModel, - projectedMetricsByModel, - order: - Object.keys(orderByWithoutNonProjectedMembers).length > 0 - ? orderByWithoutNonProjectedMembers - : undefined, - }; -} - -interface PreparedQuery { - dimensions: Set; - metrics: Set; - filters: []; -} - -// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: -function getQuerySegment( - queryAnalysis: ReturnType, - metricModel: string | null, -): QuerySegment { - const queries: { - query: PreparedQuery; - projectedQuery: PreparedQuery; - } = { - query: { - dimensions: new Set(), - metrics: new Set(), - filters: [], - }, - projectedQuery: { - dimensions: new Set(), - metrics: new Set(), - filters: [], - }, - }; - - const queriesKeys = Object.keys(queries) as (keyof typeof queries)[]; - - const referencedModels = { - all: new Set(queryAnalysis.allModels), - dimensions: new Set(), - metrics: new Set(), - }; - - const modelQueries: Record = {}; - - for (const q of queriesKeys) { - for (const [modelName, dimensions] of Object.entries( - queryAnalysis.projectedDimensionsByModel, - )) { - referencedModels.all.add(modelName); - referencedModels.dimensions.add(modelName); - - for (const dimension of dimensions) { - queries[q].dimensions.add(dimension); - } - - modelQueries[modelName] = { - dimensions: new Set(dimensions), - metrics: new Set(), - }; - } - } - - if (metricModel) { - referencedModels.all.add(metricModel); - referencedModels.metrics.add(metricModel); - modelQueries[metricModel] ||= { - dimensions: new Set(), - metrics: new Set(), - }; - - for (const q of queriesKeys) { - const metrics = metricModel - ? queryAnalysis[ - q === "query" ? "metricsByModel" : "projectedMetricsByModel" - ][metricModel] ?? new Set() - : new Set(); - for (const metric of metrics) { - queries[q].metrics.add(metric); - modelQueries[metricModel]!.metrics.add(metric); - } - } - } - - return { - query: { - ...queries.query, - dimensions: Array.from(queries.query.dimensions), - metrics: Array.from(queries.query.metrics), - }, - projectedQuery: { - ...queries.projectedQuery, - dimensions: Array.from(queries.projectedQuery.dimensions), - metrics: Array.from(queries.projectedQuery.metrics), - }, - referencedModels: { - all: Array.from(referencedModels.all), - dimensions: Array.from(referencedModels.dimensions), - metrics: Array.from(referencedModels.metrics), - }, - modelQueries: modelQueries, - metricModel: metricModel, - }; -} - -function mergeQuerySegmentWithFilters( - segment: QuerySegment, - filters: AnyQueryFilter[] | undefined, -): QuerySegment { - return { - ...segment, - query: { - ...segment.query, - filters: filters || [], - }, - projectedQuery: { - ...segment.projectedQuery, - filters: filters || [], - }, - }; -} - -export function processQueryAndExpandToSegments( - repository: AnyRepository, - query: Query, -) { - const queryAnalysis = analyzeQuery(repository, query); - const metricModels = Array.from(queryAnalysis.metricModels); - const segments = - metricModels.length === 0 - ? [ - mergeQuerySegmentWithFilters( - getQuerySegment(queryAnalysis, null), - query.filters, - ), - ] - : metricModels.map((model) => - mergeQuerySegmentWithFilters( - getQuerySegment(queryAnalysis, model), - query.filters, - ), - ); - - return { - query: { ...query, order: queryAnalysis.order }, - referencedModels: { - all: Array.from(queryAnalysis.allModels), - dimensions: Array.from(queryAnalysis.dimensionModels), - metrics: Array.from(queryAnalysis.metricModels), - }, - segments, - }; -} diff --git a/src/lib/query-builder/query-plan.ts b/src/lib/query-builder/query-plan.ts new file mode 100644 index 0000000..c5a25b5 --- /dev/null +++ b/src/lib/query-builder/query-plan.ts @@ -0,0 +1,343 @@ +import { + AnyModel, + BasicDimension, + BasicMetric, + Member, +} from "../semantic-layer.js"; +import { AnyInputQuery, Order } from "../types.js"; + +import invariant from "tiny-invariant"; +import { AnyRepository } from "../repository.js"; +import { findOptimalJoinGraph } from "./optimal-join-graph.js"; + +export type QueryFilterConnective = { + operator: "and" | "or"; + filters: QueryFilter[]; +}; + +export type QueryFilter = + | QueryFilterConnective + | { + operator: string; + member: string; + value: any; + }; + +function filterIsConnective( + filter: QueryFilter, +): filter is QueryFilterConnective { + return filter.operator === "and" || filter.operator === "or"; +} + +function getFirstMemberFilter(filter: QueryFilter) { + if (filterIsConnective(filter)) { + return getFirstMemberFilter(filter.filters[0]!); + } + return filter; +} + +function getDimensionAndMetricFilters( + repository: AnyRepository, + filters: QueryFilter[] | undefined, +) { + return (filters ?? []).reduce<{ + dimensionFilters: QueryFilter[]; + metricFilters: QueryFilter[]; + }>( + (acc, filter) => { + const memberFilter = getFirstMemberFilter(filter); + const member = repository.getMember(memberFilter.member); + if (member.isDimension()) { + acc.dimensionFilters.push(filter); + } else { + acc.metricFilters.push(filter); + } + return acc; + }, + { dimensionFilters: [], metricFilters: [] }, + ); +} + +function getFiltersMembers(repository: AnyRepository, filters: QueryFilter[]) { + const members: Member[] = []; + const filtersToProcess = [...filters]; + while (filtersToProcess.length > 0) { + const filter = filtersToProcess.pop()!; + if (filterIsConnective(filter)) { + filtersToProcess.push(...filter.filters); + } else { + const member = repository.getMember(filter.member); + members.push(member); + } + } + return members; +} + +function getMembersDimensionsAndMetrics( + repository: AnyRepository, + members: string[], +) { + return members.reduce<{ + dimensions: BasicDimension[]; + metrics: BasicMetric[]; + }>( + (acc, memberName) => { + const member = repository.getMember(memberName); + if (member.isDimension()) { + acc.dimensions.push(member); + } else { + acc.metrics.push(member); + } + return acc; + }, + { dimensions: [], metrics: [] }, + ); +} + +function getSegmentAlias(index: number) { + return `s${index}`; +} + +function getSegmentQueryModelsAndMembers({ + dimensions, + metrics, +}: { + dimensions: { projected: BasicDimension[]; filter: BasicDimension[] }; + metrics?: { + projected: BasicMetric[]; + filter: BasicMetric[]; + model: string; + }; +}) { + const models = new Set(); + const modelQueryDimensions = new Set(); + const modelQueryMetrics = new Set(); + const segmentQueryDimensions = new Set(); + const segmentQueryMetrics = new Set(); + const rootQueryDimensions = new Set(); + const rootQueryMetrics = new Set(); + + for (const dimension of dimensions.projected) { + modelQueryDimensions.add(dimension); + segmentQueryDimensions.add(dimension); + rootQueryDimensions.add(dimension); + models.add(dimension.model); + } + + for (const dimension of dimensions.filter) { + modelQueryDimensions.add(dimension); + models.add(dimension.model); + } + + for (const metric of metrics?.projected ?? []) { + modelQueryMetrics.add(metric); + segmentQueryMetrics.add(metric); + rootQueryMetrics.add(metric); + + const metricModel = metric.model; + const primaryKeyDimensions = metricModel.getPrimaryKeyDimensions(); + for (const primaryKeyDimension of primaryKeyDimensions) { + modelQueryDimensions.add(primaryKeyDimension); + } + models.add(metricModel); + } + + for (const metric of metrics?.filter ?? []) { + modelQueryMetrics.add(metric); + segmentQueryMetrics.add(metric); + + const metricModel = metric.model; + const primaryKeyDimensions = metricModel.getPrimaryKeyDimensions(); + for (const primaryKeyDimension of primaryKeyDimensions) { + modelQueryDimensions.add(primaryKeyDimension); + } + models.add(metricModel); + } + + const modelQueryDimensionsArray = Array.from(modelQueryDimensions).map((d) => + d.getPath(), + ); + const segmentQueryDimensionsArray = Array.from(segmentQueryDimensions).map( + (d) => d.getPath(), + ); + const rootQueryDimensionsArray = Array.from(rootQueryDimensions).map((d) => + d.getPath(), + ); + const modelQueryMetricsArray = Array.from(modelQueryMetrics).map((m) => + m.getPath(), + ); + const segmentQueryMetricsArray = Array.from(segmentQueryMetrics).map((m) => + m.getPath(), + ); + const rootQueryMetricsArray = Array.from(rootQueryMetrics).map((m) => + m.getPath(), + ); + + return { + models: Array.from(models).map((m) => m.name), + modelQuery: { + dimensions: modelQueryDimensionsArray, + metrics: modelQueryMetricsArray, + members: [...modelQueryDimensionsArray, ...modelQueryMetricsArray], + }, + segmentQuery: { + dimensions: segmentQueryDimensionsArray, + metrics: segmentQueryMetricsArray, + members: [...segmentQueryDimensionsArray, ...segmentQueryMetricsArray], + }, + rootQuery: { + dimensions: rootQueryDimensionsArray, + metrics: rootQueryMetricsArray, + members: [...rootQueryDimensionsArray, ...rootQueryMetricsArray], + }, + }; +} + +function getSegmentQuery( + repository: AnyRepository, + { + dimensions, + metrics, + filters, + }: { + dimensions: { projected: BasicDimension[]; filter: BasicDimension[] }; + metrics?: { + projected: BasicMetric[]; + filter: BasicMetric[]; + model: string; + }; + filters: QueryFilter[]; + }, + alias: string, +) { + const initialModel = + metrics?.model ?? + dimensions.projected[0]?.model.name ?? + dimensions.filter[0]?.model.name; + + invariant(initialModel, "Initial model name not found"); + + const segmentModelsAndMembers = getSegmentQueryModelsAndMembers({ + dimensions, + metrics, + }); + + const joinGraph = findOptimalJoinGraph( + repository.graph, + segmentModelsAndMembers.models, + ); + + return { + ...segmentModelsAndMembers, + alias, + joinGraph, + initialModel, + filters, + }; +} + +function orderWithOnlyProjectedMembers( + order: Order[] | undefined, + projectedMembers: string[], +) { + if (!order) { + return; + } + const newOrder = order.filter(({ member }) => + projectedMembers.includes(member), + ); + if (newOrder.length > 0) { + return newOrder; + } +} + +function getMetricsByModel( + projectedMetrics: BasicMetric[], + filtersMetrics: BasicMetric[], +) { + const metricsByModel: Record< + string, + { projected: BasicMetric[]; filter: BasicMetric[] } + > = {}; + + for (const m of projectedMetrics) { + metricsByModel[m.model.name] ||= { projected: [], filter: [] }; + metricsByModel[m.model.name]!.projected.push(m); + } + + for (const m of filtersMetrics) { + metricsByModel[m.model.name] ||= { projected: [], filter: [] }; + metricsByModel[m.model.name]!.filter.push(m); + } + + return Object.entries(metricsByModel); +} + +export function getQueryPlan(repository: AnyRepository, query: AnyInputQuery) { + const { dimensions: projectedDimensions, metrics: projectedMetrics } = + getMembersDimensionsAndMetrics(repository, query.members); + const { dimensionFilters, metricFilters } = getDimensionAndMetricFilters( + repository, + query.filters, + ); + const filtersDimensions = ( + getFiltersMembers(repository, dimensionFilters) as BasicDimension[] + ).filter((dimension) => !projectedDimensions.includes(dimension)); + const filtersMetrics = ( + getFiltersMembers(repository, metricFilters) as BasicMetric[] + ).filter((metric) => !projectedMetrics.includes(metric)); + + const metricsByModel = getMetricsByModel(projectedMetrics, filtersMetrics); + + const segments = + metricsByModel.length > 0 + ? metricsByModel.map(([modelName, metrics], index) => + getSegmentQuery( + repository, + { + dimensions: { + projected: projectedDimensions, + filter: filtersDimensions, + }, + metrics: { + projected: metrics.projected, + filter: metrics.filter, + model: modelName, + }, + filters: dimensionFilters, + }, + getSegmentAlias(index), + ), + ) + : [ + getSegmentQuery( + repository, + { + dimensions: { + projected: projectedDimensions, + filter: filtersDimensions, + }, + filters: dimensionFilters, + }, + getSegmentAlias(0), + ), + ]; + + const projectedDimensionPaths = projectedDimensions.map((d) => d.getPath()); + const projectedMetricPaths = projectedMetrics.map((m) => m.getPath()); + + return { + segments, + filters: metricFilters, + projectedDimensions: projectedDimensionPaths, + projectedMetrics: projectedMetricPaths, + limit: query.limit, + offset: query.offset, + order: orderWithOnlyProjectedMembers(query.order, [ + ...projectedDimensionPaths, + ...projectedMetricPaths, + ]), + }; +} + +export type QueryPlan = ReturnType; diff --git a/src/lib/types.ts b/src/lib/types.ts index 5291ec5..fc8c8d7 100644 --- a/src/lib/types.ts +++ b/src/lib/types.ts @@ -59,29 +59,6 @@ export type Query = { offset?: number; }; -export interface ModelQuery { - dimensions: Set; - metrics: Set; -} - -export interface QuerySegmentQuery { - dimensions: string[]; - metrics: string[]; - filters: AnyQueryFilter[]; -} - -export interface QuerySegment { - query: QuerySegmentQuery; - projectedQuery: QuerySegmentQuery; - referencedModels: { - all: string[]; - dimensions: string[]; - metrics: string[]; - }; - modelQueries: Record; - metricModel: string | null; -} - export const TemporalGranularityIndex = { time: { description: "Time of underlying field. Example output: 00:00:00",