From e37b2f785e78fe828ca3b4f06e2d4eac839abca9 Mon Sep 17 00:00:00 2001 From: Ravi Suhag Date: Wed, 5 Jun 2024 15:55:35 -0500 Subject: [PATCH] feat(bigquery): store total rows when profiling (#491) --- plugins/extractors/bigquery/bigquery.go | 2 +- plugins/extractors/bigquery/profile.go | 8 +- plugins/extractors/bigquery/profile_test.go | 10 +- .../testdata/expected-assets-mixed.json | 451 +++++++++--------- .../bigquery/testdata/expected-assets.json | 389 ++++++++------- 5 files changed, 417 insertions(+), 443 deletions(-) diff --git a/plugins/extractors/bigquery/bigquery.go b/plugins/extractors/bigquery/bigquery.go index 1d92b6bad..d3826b737 100755 --- a/plugins/extractors/bigquery/bigquery.go +++ b/plugins/extractors/bigquery/bigquery.go @@ -374,7 +374,7 @@ func (e *Extractor) buildAsset(ctx context.Context, t *bigquery.Table, md *bigqu tableFQN := t.FullyQualifiedName() tableURN := plugins.BigQueryURN(t.ProjectID, t.DatasetID, t.TableID) - tableProfile := e.buildTableProfile(tableURN, tableStats) + tableProfile := e.buildTableProfile(tableURN, tableStats, md) var partitionField string partitionData := make(map[string]interface{}) if md.TimePartitioning != nil { diff --git a/plugins/extractors/bigquery/profile.go b/plugins/extractors/bigquery/profile.go index c13e4d3e7..c4b156ebc 100644 --- a/plugins/extractors/bigquery/profile.go +++ b/plugins/extractors/bigquery/profile.go @@ -1,11 +1,12 @@ package bigquery import ( + "cloud.google.com/go/bigquery" v1beta2 "github.com/raystack/meteor/models/raystack/assets/v1beta2" "github.com/raystack/meteor/plugins/extractors/bigquery/auditlog" ) -func (e *Extractor) buildTableProfile(tableURN string, tableStats *auditlog.TableStats) (tp *v1beta2.TableProfile) { +func (e *Extractor) buildTableProfile(tableURN string, tableStats *auditlog.TableStats, md *bigquery.TableMetadata) *v1beta2.TableProfile { var tableUsage int64 var commonJoins []*v1beta2.TableCommonJoin var filterConditions []string @@ -37,11 +38,10 @@ func (e *Extractor) buildTableProfile(tableURN string, tableStats *auditlog.Tabl } } - tp = &v1beta2.TableProfile{ + return &v1beta2.TableProfile{ UsageCount: tableUsage, CommonJoins: commonJoins, Filters: filterConditions, + TotalRows: int64(md.NumRows), } - - return } diff --git a/plugins/extractors/bigquery/profile_test.go b/plugins/extractors/bigquery/profile_test.go index c69dbfcfa..e2a025795 100755 --- a/plugins/extractors/bigquery/profile_test.go +++ b/plugins/extractors/bigquery/profile_test.go @@ -6,6 +6,7 @@ package bigquery import ( "testing" + "cloud.google.com/go/bigquery" v1beta2 "github.com/raystack/meteor/models/raystack/assets/v1beta2" "github.com/raystack/meteor/plugins" "github.com/raystack/meteor/plugins/extractors/bigquery/auditlog" @@ -22,7 +23,7 @@ func TestBuildTableProfile(t *testing.T) { }, } - tp := extr.buildTableProfile(tableURN, tableStats) + tp := extr.buildTableProfile(tableURN, tableStats, &bigquery.TableMetadata{}) assert.Empty(t, tp.UsageCount) assert.Empty(t, tp.CommonJoins) @@ -35,7 +36,7 @@ func TestBuildTableProfile(t *testing.T) { }, } - tp := extr.buildTableProfile(tableURN, nil) + tp := extr.buildTableProfile(tableURN, nil, &bigquery.TableMetadata{}) assert.Empty(t, tp.UsageCount) assert.Empty(t, tp.CommonJoins) @@ -77,7 +78,9 @@ func TestBuildTableProfile(t *testing.T) { }, } - tp := extr.buildTableProfile(tableURN, tableStats) + tp := extr.buildTableProfile(tableURN, tableStats, &bigquery.TableMetadata{ + NumRows: 42, + }) assert.EqualValues(t, 5, tp.UsageCount) assert.Contains(t, tp.CommonJoins, &v1beta2.TableCommonJoin{ @@ -94,5 +97,6 @@ func TestBuildTableProfile(t *testing.T) { Conditions: []string{"ON t1.somefield = t2.anotherfield"}, }) assert.Contains(t, tp.Filters, "WHERE t1.somefield = 'somevalue'") + assert.Equal(t, tp.TotalRows, int64(42)) }) } diff --git a/plugins/extractors/bigquery/testdata/expected-assets-mixed.json b/plugins/extractors/bigquery/testdata/expected-assets-mixed.json index aaf914b09..2fdd1f3eb 100644 --- a/plugins/extractors/bigquery/testdata/expected-assets-mixed.json +++ b/plugins/extractors/bigquery/testdata/expected-assets-mixed.json @@ -1,253 +1,228 @@ [ - { - "create_time": null, - "data": { - "@type": "type.googleapis.com/raystack.assets.v1beta2.Table", - "attributes": { - "clustering_fields": [], - "dataset": "dataset1", - "full_qualified_name": "test-project-id:dataset1.table_a", - "partition_data": { - "require_partition_filter": false + { + "create_time": null, + "data": { + "@type": "type.googleapis.com/raystack.assets.v1beta2.Table", + "attributes": { + "clustering_fields": [], + "dataset": "dataset1", + "full_qualified_name": "test-project-id:dataset1.table_a", + "partition_data": { + "require_partition_filter": false + }, + "project": "test-project-id", + "type": "TABLE" + }, + "columns": [ + { + "attributes": { + "mode": "REQUIRED" }, - "project": "test-project-id", - "type": "TABLE" + "columns": [], + "data_type": "INTEGER", + "description": "", + "is_nullable": false, + "length": "0", + "name": "id", + "profile": { + "avg": 0, + "count": "0", + "max": "", + "med": 0, + "min": "", + "top": "", + "unique": "0" + } }, - "columns": [ - { - "attributes": { - "mode": "REQUIRED" - }, - "columns": [], - "data_type": "INTEGER", - "description": "", - "is_nullable": false, - "length": "0", - "name": "id", - "profile": { - "avg": 0, - "count": "0", - "max": "", - "med": 0, - "min": "", - "top": "", - "unique": "0" - } + { + "attributes": { + "mode": "REQUIRED" }, - { - "attributes": { - "mode": "REQUIRED" - }, - "columns": [], - "data_type": "STRING", - "description": "", - "is_nullable": false, - "length": "0", - "name": "name", - "profile": { - "avg": 0, - "count": "0", - "max": "", - "med": 0, - "min": "", - "top": "", - "unique": "0" - } + "columns": [], + "data_type": "STRING", + "description": "", + "is_nullable": false, + "length": "0", + "name": "name", + "profile": { + "avg": 0, + "count": "0", + "max": "", + "med": 0, + "min": "", + "top": "", + "unique": "0" + } + }, + { + "attributes": { + "mode": "REPEATED" }, - { - "attributes": { - "mode": "REPEATED" - }, - "columns": [ - { - "attributes": { - "mode": "NULLABLE" - }, - "columns": [], - "data_type": "STRING", - "description": "", - "is_nullable": true, - "length": "0", - "name": "key", - "profile": null + "columns": [ + { + "attributes": { + "mode": "NULLABLE" }, - { - "attributes": { - "mode": "NULLABLE" - }, - "columns": [], - "data_type": "JSON", - "description": "", - "is_nullable": true, - "length": "0", - "name": "value", - "profile": null - } - ], - "data_type": "RECORD", - "description": "", - "is_nullable": false, - "length": "0", - "name": "structarr", - "profile": null - }, - { - "attributes": { - "mode": "NULLABLE" + "columns": [], + "data_type": "STRING", + "description": "", + "is_nullable": true, + "length": "0", + "name": "key", + "profile": null }, - "columns": [], - "data_type": "DATE", - "description": "", - "is_nullable": true, - "length": "0", - "name": "birthday", - "profile": { - "avg": 0, - "count": "0", - "max": "", - "med": 0, - "min": "", - "top": "", - "unique": "0" + { + "attributes": { + "mode": "NULLABLE" + }, + "columns": [], + "data_type": "JSON", + "description": "", + "is_nullable": true, + "length": "0", + "name": "value", + "profile": null } + ], + "data_type": "RECORD", + "description": "", + "is_nullable": false, + "length": "0", + "name": "structarr", + "profile": null + }, + { + "attributes": { + "mode": "NULLABLE" }, - { - "attributes": { - "mode": "NULLABLE" - }, - "columns": [], - "data_type": "NUMERIC", - "description": "", - "is_nullable": true, - "length": "0", - "name": "skillNum", - "profile": { - "avg": 0, - "count": "0", - "max": "", - "med": 0, - "min": "", - "top": "", - "unique": "0" - } + "columns": [], + "data_type": "DATE", + "description": "", + "is_nullable": true, + "length": "0", + "name": "birthday", + "profile": { + "avg": 0, + "count": "0", + "max": "", + "med": 0, + "min": "", + "top": "", + "unique": "0" + } + }, + { + "attributes": { + "mode": "NULLABLE" }, - { - "attributes": { - "mode": "NULLABLE" - }, - "columns": [], - "data_type": "TIMESTAMP", - "description": "", - "is_nullable": true, - "length": "0", - "name": "created_at", - "profile": { - "avg": 0, - "count": "0", - "max": "", - "med": 0, - "min": "", - "top": "", - "unique": "0" - } + "columns": [], + "data_type": "NUMERIC", + "description": "", + "is_nullable": true, + "length": "0", + "name": "skillNum", + "profile": { + "avg": 0, + "count": "0", + "max": "", + "med": 0, + "min": "", + "top": "", + "unique": "0" + } + }, + { + "attributes": { + "mode": "NULLABLE" + }, + "columns": [], + "data_type": "TIMESTAMP", + "description": "", + "is_nullable": true, + "length": "0", + "name": "created_at", + "profile": { + "avg": 0, + "count": "0", + "max": "", + "med": 0, + "min": "", + "top": "", + "unique": "0" } + } + ], + "create_time": "2023-06-13T03:46:12.372974Z", + "preview_fields": [ + "id", + "name", + "structarr", + "birthday", + "skillNum", + "created_at" + ], + "preview_rows": [ + [ + 1, + "alice", + [["profile", "{\"age\": 15}"]], + "2007-02-01", + "9", + "2022-01-02T18:00:00Z" ], - "create_time": "2023-06-13T03:46:12.372974Z", - "preview_fields": [ - "id", - "name", - "structarr", - "birthday", - "skillNum", - "created_at" + [ + 4, + "carol", + [["profile", "{\"age\": 25}"]], + "1992-05-01", + "7", + "2022-01-01T12:00:00Z" ], - "preview_rows": [ - [ - 1, - "alice", - [ - [ - "profile", - "{\"age\": 15}" - ] - ], - "2007-02-01", - "9", - "2022-01-02T18:00:00Z" - ], - [ - 4, - "carol", - [ - [ - "profile", - "{\"age\": 25}" - ] - ], - "1992-05-01", - "7", - "2022-01-01T12:00:00Z" - ], - [ - 5, - "bob", - [ - [ - "profile", - "{\"age\": 10}" - ] - ], - "2012-01-01", - "3", - "2022-01-05T12:00:00Z" - ], - [ - 3, - "dave", - [ - [ - "profile", - "{\"age\": 20}" - ] - ], - "1997-04-01", - "11", - "2022-01-04T00:00:00Z" - ], - [ - 2, - "eve", - [ - [ - "profile", - "{\"age\": 30}" - ] - ], - "2002-03-01", - "5", - "2022-01-03T06:00:00Z" - ] + [ + 5, + "bob", + [["profile", "{\"age\": 10}"]], + "2012-01-01", + "3", + "2022-01-05T12:00:00Z" ], - "profile": { - "common_joins": [], - "filters": [], - "partition_key": "", - "partition_value": "", - "total_rows": "0", - "usage_count": "0" - }, - "update_time": "2023-06-13T03:46:12.372974Z" + [ + 3, + "dave", + [["profile", "{\"age\": 20}"]], + "1997-04-01", + "11", + "2022-01-04T00:00:00Z" + ], + [ + 2, + "eve", + [["profile", "{\"age\": 30}"]], + "2002-03-01", + "5", + "2022-01-03T06:00:00Z" + ] + ], + "profile": { + "common_joins": [], + "filters": [], + "partition_key": "", + "partition_value": "", + "total_rows": "5", + "usage_count": "0" }, - "description": "", - "event": null, - "labels": {}, - "lineage": null, - "name": "table_a", - "owners": [], - "service": "bigquery", - "type": "table", - "update_time": null, - "url": "", - "urn": "urn:bigquery:test-project-id:table:test-project-id:dataset1.table_a" - } - ] \ No newline at end of file + "update_time": "2023-06-13T03:46:12.372974Z" + }, + "description": "", + "event": null, + "labels": {}, + "lineage": null, + "name": "table_a", + "owners": [], + "service": "bigquery", + "type": "table", + "update_time": null, + "url": "", + "urn": "urn:bigquery:test-project-id:table:test-project-id:dataset1.table_a" + } +] diff --git a/plugins/extractors/bigquery/testdata/expected-assets.json b/plugins/extractors/bigquery/testdata/expected-assets.json index 9bf98c273..f163f07a1 100755 --- a/plugins/extractors/bigquery/testdata/expected-assets.json +++ b/plugins/extractors/bigquery/testdata/expected-assets.json @@ -1,201 +1,196 @@ [ - { - "create_time": null, - "data": { - "@type": "type.googleapis.com/raystack.assets.v1beta2.Table", - "attributes": { - "clustering_fields": [], - "dataset": "dataset1", - "full_qualified_name": "test-project-id:dataset1.table_a", - "partition_data": { - "require_partition_filter": false - }, - "project": "test-project-id", - "type": "TABLE" - }, - "columns": [ - { - "attributes": { - "mode": "REQUIRED" - }, - "columns": [], - "data_type": "INTEGER", - "description": "", - "is_nullable": false, - "length": "0", - "name": "id", - "profile": { - "avg": 0, - "count": "0", - "max": "", - "med": 0, - "min": "", - "top": "", - "unique": "0" - } - }, - { - "attributes": { - "mode": "REQUIRED" - }, - "columns": [], - "data_type": "STRING", - "description": "", - "is_nullable": false, - "length": "0", - "name": "name", - "profile": { - "avg": 0, - "count": "0", - "max": "", - "med": 0, - "min": "", - "top": "", - "unique": "0" - } - }, - { - "attributes": { - "mode": "REPEATED" - }, - "columns": [ - { - "attributes": { - "mode": "NULLABLE" - }, - "columns": [], - "data_type": "STRING", - "description": "", - "is_nullable": true, - "length": "0", - "name": "key", - "profile": null - }, - { - "attributes": { - "mode": "NULLABLE" - }, - "columns": [], - "data_type": "JSON", - "description": "", - "is_nullable": true, - "length": "0", - "name": "value", - "profile": null - } - ], - "data_type": "RECORD", - "description": "", - "is_nullable": false, - "length": "0", - "name": "structarr", - "profile": null - }, - { - "attributes": { - "mode": "NULLABLE" - }, - "columns": [], - "data_type": "DATE", - "description": "", - "is_nullable": true, - "length": "0", - "name": "birthday", - "profile": { - "avg": 0, - "count": "0", - "max": "", - "med": 0, - "min": "", - "top": "", - "unique": "0" - } - }, - { - "attributes": { - "mode": "NULLABLE" - }, - "columns": [], - "data_type": "NUMERIC", - "description": "", - "is_nullable": true, - "length": "0", - "name": "skillNum", - "profile": { - "avg": 0, - "count": "0", - "max": "", - "med": 0, - "min": "", - "top": "", - "unique": "0" - } - }, - { - "attributes": { - "mode": "NULLABLE" - }, - "columns": [], - "data_type": "TIMESTAMP", - "description": "", - "is_nullable": true, - "length": "0", - "name": "created_at", - "profile": { - "avg": 0, - "count": "0", - "max": "", - "med": 0, - "min": "", - "top": "", - "unique": "0" - } - } - ], - "create_time": "2023-06-13T03:46:12.372974Z", - "preview_fields": [ - "id", - "name", - "structarr", - "birthday", - "skillNum", - "created_at" - ], - "preview_rows": [ - [ - 1, - "alice", - [ - [ - "profile", - "{\"age\": 10}" - ] - ], - "2012-01-01", - "3", - "2022-01-01T12:00:00Z" - ] - ], - "profile": { - "common_joins": [], - "filters": [], - "partition_key": "", - "partition_value": "", - "total_rows": "0", - "usage_count": "0" + { + "create_time": null, + "data": { + "@type": "type.googleapis.com/raystack.assets.v1beta2.Table", + "attributes": { + "clustering_fields": [], + "dataset": "dataset1", + "full_qualified_name": "test-project-id:dataset1.table_a", + "partition_data": { + "require_partition_filter": false + }, + "project": "test-project-id", + "type": "TABLE" + }, + "columns": [ + { + "attributes": { + "mode": "REQUIRED" + }, + "columns": [], + "data_type": "INTEGER", + "description": "", + "is_nullable": false, + "length": "0", + "name": "id", + "profile": { + "avg": 0, + "count": "0", + "max": "", + "med": 0, + "min": "", + "top": "", + "unique": "0" + } + }, + { + "attributes": { + "mode": "REQUIRED" + }, + "columns": [], + "data_type": "STRING", + "description": "", + "is_nullable": false, + "length": "0", + "name": "name", + "profile": { + "avg": 0, + "count": "0", + "max": "", + "med": 0, + "min": "", + "top": "", + "unique": "0" + } + }, + { + "attributes": { + "mode": "REPEATED" + }, + "columns": [ + { + "attributes": { + "mode": "NULLABLE" + }, + "columns": [], + "data_type": "STRING", + "description": "", + "is_nullable": true, + "length": "0", + "name": "key", + "profile": null }, - "update_time": "2023-06-13T03:46:12.372974Z" + { + "attributes": { + "mode": "NULLABLE" + }, + "columns": [], + "data_type": "JSON", + "description": "", + "is_nullable": true, + "length": "0", + "name": "value", + "profile": null + } + ], + "data_type": "RECORD", + "description": "", + "is_nullable": false, + "length": "0", + "name": "structarr", + "profile": null + }, + { + "attributes": { + "mode": "NULLABLE" + }, + "columns": [], + "data_type": "DATE", + "description": "", + "is_nullable": true, + "length": "0", + "name": "birthday", + "profile": { + "avg": 0, + "count": "0", + "max": "", + "med": 0, + "min": "", + "top": "", + "unique": "0" + } + }, + { + "attributes": { + "mode": "NULLABLE" + }, + "columns": [], + "data_type": "NUMERIC", + "description": "", + "is_nullable": true, + "length": "0", + "name": "skillNum", + "profile": { + "avg": 0, + "count": "0", + "max": "", + "med": 0, + "min": "", + "top": "", + "unique": "0" + } }, - "description": "", - "event": null, - "labels": {}, - "lineage": null, - "name": "table_a", - "owners": [], - "service": "bigquery", - "type": "table", - "update_time": null, - "url": "", - "urn": "urn:bigquery:test-project-id:table:test-project-id:dataset1.table_a" - } + { + "attributes": { + "mode": "NULLABLE" + }, + "columns": [], + "data_type": "TIMESTAMP", + "description": "", + "is_nullable": true, + "length": "0", + "name": "created_at", + "profile": { + "avg": 0, + "count": "0", + "max": "", + "med": 0, + "min": "", + "top": "", + "unique": "0" + } + } + ], + "create_time": "2023-06-13T03:46:12.372974Z", + "preview_fields": [ + "id", + "name", + "structarr", + "birthday", + "skillNum", + "created_at" + ], + "preview_rows": [ + [ + 1, + "alice", + [["profile", "{\"age\": 10}"]], + "2012-01-01", + "3", + "2022-01-01T12:00:00Z" + ] + ], + "profile": { + "common_joins": [], + "filters": [], + "partition_key": "", + "partition_value": "", + "total_rows": "5", + "usage_count": "0" + }, + "update_time": "2023-06-13T03:46:12.372974Z" + }, + "description": "", + "event": null, + "labels": {}, + "lineage": null, + "name": "table_a", + "owners": [], + "service": "bigquery", + "type": "table", + "update_time": null, + "url": "", + "urn": "urn:bigquery:test-project-id:table:test-project-id:dataset1.table_a" + } ]