From 01de388c1f7702b9fc530fea6f25a82148c76edf Mon Sep 17 00:00:00 2001 From: Maxime Armstrong <46797220+maximearmstrong@users.noreply.github.com> Date: Fri, 4 Oct 2024 17:42:14 -0400 Subject: [PATCH] [dagster-tableau][fix] Update workbooks and view endpoint (#25085) ## Summary & Motivation Previous implementation worked well when multiple workbooks exist in a workspace, but could not be loaded in Dagster when only one workbook exists. Parsing a XML response with only one workbook would create a JSON object like `{"workbooks": {"workbook": {"id": "my_id"}}}` but one with many workbooks would be `{"workbooks": {"workbook": [{"id": "my_id_1"}, ..., {"id": "my_id_n"}]}}` Updates the code to use `tableauserverclient.WorkbookItem` and `tableauserverclient.ViewItem` instead of XML and JSON objects - Tableau handles parsing their API response in these classes. ## How I Tested These Changes BK with updated tests. ## Changelog NOCHANGELOG - [ ] `NEW` _(added new feature or capability)_ - [ ] `BUGFIX` _(fixed a bug)_ - [ ] `DOCS` _(added or updated documentation)_ --- .../dagster_tableau/resources.py | 64 +++++++++---------- .../dagster_tableau_tests/conftest.py | 64 +++++++++++++++++-- .../libraries/dagster-tableau/setup.py | 1 - 3 files changed, 89 insertions(+), 40 deletions(-) diff --git a/python_modules/libraries/dagster-tableau/dagster_tableau/resources.py b/python_modules/libraries/dagster-tableau/dagster_tableau/resources.py index 7451269ca54ed..99e63f7b51038 100644 --- a/python_modules/libraries/dagster-tableau/dagster_tableau/resources.py +++ b/python_modules/libraries/dagster-tableau/dagster_tableau/resources.py @@ -1,5 +1,4 @@ import datetime -import json import logging import time import uuid @@ -10,7 +9,6 @@ import jwt import requests import tableauserverclient as TSC -import xmltodict from dagster import ( AssetsDefinition, ConfigurableResource, @@ -72,11 +70,10 @@ def _log(self) -> logging.Logger: return get_dagster_logger() @cached_method - def get_workbooks(self) -> Mapping[str, object]: + def get_workbooks(self) -> List[TSC.WorkbookItem]: """Fetches a list of all Tableau workbooks in the workspace.""" - return self._response_to_dict( - self._server.workbooks.get_request(self._server.workbooks.baseurl) - ) + workbooks, _ = self._server.workbooks.get() + return workbooks @cached_method def get_workbook(self, workbook_id) -> Mapping[str, object]: @@ -89,11 +86,9 @@ def get_workbook(self, workbook_id) -> Mapping[str, object]: def get_view( self, view_id: str, - ) -> Mapping[str, object]: + ) -> TSC.ViewItem: """Fetches information for a given view.""" - return self._response_to_dict( - self._server.views.get_request(f"{self._server.views.baseurl}/{view_id}") - ) + return self._server.views.get_by_id(view_id) def get_job( self, @@ -105,9 +100,9 @@ def get_job( def cancel_job( self, job_id: str, - ) -> Mapping[str, object]: - """Fetches information for a given job.""" - return self._response_to_dict(self._server.jobs.cancel(job_id)) + ) -> requests.Response: + """Cancels a given job.""" + return self._server.jobs.cancel(job_id) def refresh_workbook(self, workbook_id) -> TSC.JobItem: """Refreshes all extracts for a given workbook and return the JobItem object.""" @@ -216,12 +211,6 @@ def sign_in(self) -> Auth.contextmgr: tableau_auth = TSC.JWTAuth(jwt_token, site_id=self.site_name) # pyright: ignore (reportAttributeAccessIssue) return self._server.auth.sign_in(tableau_auth) - @staticmethod - def _response_to_dict(response: requests.Response): - return json.loads( - json.dumps(xmltodict.parse(response.text, attr_prefix="", cdata_key="")["tsResponse"]) - ) - @property def workbook_graphql_query(self) -> str: return """ @@ -362,8 +351,7 @@ def fetch_tableau_workspace_data( TableauWorkspaceData: A snapshot of the Tableau workspace's content. """ with self.get_client() as client: - workbooks_data = client.get_workbooks()["workbooks"] - workbook_ids = [workbook["id"] for workbook in workbooks_data["workbook"]] + workbook_ids = [workbook.id for workbook in client.get_workbooks()] workbooks_by_id = {} sheets_by_id = {} @@ -598,7 +586,7 @@ def _assets(tableau: BaseTableauWorkspace): *workspace_data.sheets_by_id.items(), *workspace_data.dashboards_by_id.items(), ]: - data = client.get_view(view_id)["view"] + data = client.get_view(view_id) if view_content_data.content_type == TableauContentType.SHEET: asset_key = translator.get_sheet_asset_key(view_content_data) elif view_content_data.content_type == TableauContentType.DASHBOARD: @@ -610,24 +598,32 @@ def _assets(tableau: BaseTableauWorkspace): value=None, output_name="__".join(asset_key.path), metadata={ - "workbook_id": data["workbook"]["id"], - "owner_id": data["owner"]["id"], - "name": data["name"], - "contentUrl": data["contentUrl"], - "createdAt": data["createdAt"], - "updatedAt": data["updatedAt"], + "workbook_id": data.workbook_id, + "owner_id": data.owner_id, + "name": data.name, + "contentUrl": data.content_url, + "createdAt": data.created_at.strftime("%Y-%m-%dT%H:%M:%S") + if data.created_at + else None, + "updatedAt": data.updated_at.strftime("%Y-%m-%dT%H:%M:%S") + if data.updated_at + else None, }, ) else: yield ObserveResult( asset_key=asset_key, metadata={ - "workbook_id": data["workbook"]["id"], - "owner_id": data["owner"]["id"], - "name": data["name"], - "contentUrl": data["contentUrl"], - "createdAt": data["createdAt"], - "updatedAt": data["updatedAt"], + "workbook_id": data.workbook_id, + "owner_id": data.owner_id, + "name": data.name, + "contentUrl": data.content_url, + "createdAt": data.created_at.strftime("%Y-%m-%dT%H:%M:%S") + if data.created_at + else None, + "updatedAt": data.updated_at.strftime("%Y-%m-%dT%H:%M:%S") + if data.updated_at + else None, }, ) diff --git a/python_modules/libraries/dagster-tableau/dagster_tableau_tests/conftest.py b/python_modules/libraries/dagster-tableau/dagster_tableau_tests/conftest.py index a875c7b61adba..ef1d7ed3cd139 100644 --- a/python_modules/libraries/dagster-tableau/dagster_tableau_tests/conftest.py +++ b/python_modules/libraries/dagster-tableau/dagster_tableau_tests/conftest.py @@ -1,7 +1,7 @@ # ruff: noqa: SLF001 import uuid -from unittest.mock import PropertyMock, patch +from unittest.mock import MagicMock, PropertyMock, patch import pytest from dagster_tableau.translator import TableauContentData, TableauContentType, TableauWorkspaceData @@ -141,9 +141,9 @@ def sign_in_fixture(): @pytest.fixture(name="get_workbooks", autouse=True) -def get_workbooks_fixture(): +def get_workbooks_fixture(build_workbook_item): with patch("dagster_tableau.resources.BaseTableauClient.get_workbooks") as mocked_function: - mocked_function.return_value = SAMPLE_WORKBOOKS + mocked_function.return_value = [build_workbook_item()] yield mocked_function @@ -155,9 +155,9 @@ def get_workbook_fixture(): @pytest.fixture(name="get_view", autouse=True) -def get_view_fixture(): +def get_view_fixture(build_view_item): with patch("dagster_tableau.resources.BaseTableauClient.get_view") as mocked_function: - mocked_function.side_effect = [SAMPLE_VIEW_SHEET, SAMPLE_VIEW_DASHBOARD] + mocked_function.return_value = build_view_item() yield mocked_function @@ -185,6 +185,60 @@ def cancel_job_fixture(): yield mocked_function +@pytest.fixture(name="build_workbook_item", autouse=True) +def build_workbook_item_fixture(): + with patch("dagster_tableau.resources.TSC.WorkbookItem") as mocked_class: + type(mocked_class.return_value).id = PropertyMock( + return_value=SAMPLE_WORKBOOKS["workbooks"]["workbook"][0]["id"] + ) + yield mocked_class + + +@pytest.fixture(name="build_view_item", autouse=True) +def build_view_item_fixture(): + with patch("dagster_tableau.resources.TSC.ViewItem") as mocked_class: + mock_sheet = MagicMock() + type(mock_sheet.return_value).workbook_id = PropertyMock( + return_value=SAMPLE_VIEW_SHEET["view"]["workbook"]["id"] + ) + type(mock_sheet.return_value).owner_id = PropertyMock( + return_value=SAMPLE_VIEW_SHEET["view"]["owner"]["id"] + ) + type(mock_sheet.return_value).name = PropertyMock( + return_value=SAMPLE_VIEW_SHEET["view"]["name"] + ) + type(mock_sheet.return_value).content_url = PropertyMock( + return_value=SAMPLE_VIEW_SHEET["view"]["contentUrl"] + ) + type(mock_sheet.return_value).created_at = PropertyMock( + return_value=SAMPLE_VIEW_SHEET["view"]["createdAt"] + ) + type(mock_sheet.return_value).updated_at = PropertyMock( + return_value=SAMPLE_VIEW_SHEET["view"]["updatedAt"] + ) + mock_dashboard = MagicMock() + type(mock_dashboard.return_value).workbook_id = PropertyMock( + return_value=SAMPLE_VIEW_DASHBOARD["view"]["workbook"]["id"] + ) + type(mock_dashboard.return_value).owner_id = PropertyMock( + return_value=SAMPLE_VIEW_DASHBOARD["view"]["owner"]["id"] + ) + type(mock_dashboard.return_value).name = PropertyMock( + return_value=SAMPLE_VIEW_DASHBOARD["view"]["name"] + ) + type(mock_dashboard.return_value).content_url = PropertyMock( + return_value=SAMPLE_VIEW_DASHBOARD["view"]["contentUrl"] + ) + type(mock_dashboard.return_value).created_at = PropertyMock( + return_value=SAMPLE_VIEW_DASHBOARD["view"]["createdAt"] + ) + type(mock_dashboard.return_value).updated_at = PropertyMock( + return_value=SAMPLE_VIEW_DASHBOARD["view"]["updatedAt"] + ) + mocked_class.side_effect = [mock_sheet, mock_dashboard] + yield mocked_class + + @pytest.fixture(name="get_data_source_by_id", autouse=True) def get_data_source_by_id_fixture(): with patch( diff --git a/python_modules/libraries/dagster-tableau/setup.py b/python_modules/libraries/dagster-tableau/setup.py index fcb852e6ebe72..aa22d9be1993f 100644 --- a/python_modules/libraries/dagster-tableau/setup.py +++ b/python_modules/libraries/dagster-tableau/setup.py @@ -39,7 +39,6 @@ def get_version() -> str: f"dagster{pin}", "pyjwt[crypto]", "tableauserverclient", - "xmltodict", ], include_package_data=True, python_requires=">=3.8,<3.13",