From d94b2f3090897890aa5aa1baa0133cdd1e57f83b Mon Sep 17 00:00:00 2001 From: Steven Matson <23409221+smats0n@users.noreply.github.com> Date: Tue, 19 Mar 2024 06:42:12 -0700 Subject: [PATCH] [dagster-databricks] Fix setting databricks cluster node configuration (#20000) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary & Motivation When submitting a Databricks Job, the DatabricksJobRunner is not setting the cluster's `driver_instance_pool_id` node configuration correctly. As a result, it is not possible to launch a Databricks Job from Dagster to run on Databricks instance pools. This sets the cluster's node configuration in accordance with the Databricks Jobs API to allow launching Databricks Jobs on instance pools. Screenshot 2024-02-28 at 8 49 36 PM The code is currently attempting to access `driver_node_type_id` from `cluster.new.nodes`. However, the step launcher's `run_config` spec states that the `cluster.new.nodes` field expects `driver_node_type_id ` and `node_type_id ` to be nested within an object called `node_types`. The fields `driver_instance_pool_id` and `instance_pool_id` are first-class properties alongside `node_types` (see `run_config` spec [here](https://github.com/dagster-io/dagster/blob/2cabe8733cb517c8caaa21e0b323f46b944ef3ef/python_modules/libraries/dagster-databricks/dagster_databricks/configs.py#L362)). ## How I Tested These Changes I launched Dagster runs in a Databricks workspace from local Dagster deployment. --- .../dagster-databricks/dagster_databricks/databricks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python_modules/libraries/dagster-databricks/dagster_databricks/databricks.py b/python_modules/libraries/dagster-databricks/dagster_databricks/databricks.py index 9cd64f125b2cf..a15bb4b3e1954 100644 --- a/python_modules/libraries/dagster-databricks/dagster_databricks/databricks.py +++ b/python_modules/libraries/dagster-databricks/dagster_databricks/databricks.py @@ -564,7 +564,7 @@ def submit_run(self, run_config: Mapping[str, Any], task: Mapping[str, Any]) -> new_cluster["instance_pool_id"] = nodes["instance_pool_id"] if "driver_instance_pool_id" in nodes: - new_cluster["driver_node_type_id"] = nodes["driver_node_type_id"] + new_cluster["driver_instance_pool_id"] = nodes["driver_instance_pool_id"] else: node_types = nodes["node_types"] new_cluster["node_type_id"] = node_types["node_type_id"]