From de7fc9b07557b0dfcd94a5fd0ff5aba3837003d1 Mon Sep 17 00:00:00 2001 From: Sean Mackesey Date: Mon, 25 Mar 2024 07:05:13 -0400 Subject: [PATCH] [batch-inserts] Toy for generating batch insert events (#20592) ## Summary & Motivation Toy definitions for generating larged numbers of batch insert events from a single-run backfill. ## How I Tested These Changes Manual execution locally. --- .../toys/many_partitions_ranged_backfill.py | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 python_modules/dagster-test/dagster_test/toys/many_partitions_ranged_backfill.py diff --git a/python_modules/dagster-test/dagster_test/toys/many_partitions_ranged_backfill.py b/python_modules/dagster-test/dagster_test/toys/many_partitions_ranged_backfill.py new file mode 100644 index 0000000000000..b6738b762eec5 --- /dev/null +++ b/python_modules/dagster-test/dagster_test/toys/many_partitions_ranged_backfill.py @@ -0,0 +1,37 @@ +"""An asset with a large number of partitions (hourly over 1 years = 8760 partitions) and a +single-run backfill policy. When backfilled, will generate a large number of `store_event` or +`store_event_batch` calls, depending on if batching is enabled. Use a DummyIOManager to avoid +unnecessary writes. +""" + +from dagster import ( + AssetExecutionContext, + BackfillPolicy, + Definitions, + HourlyPartitionsDefinition, + IOManager, + asset, +) + +partitions_def = HourlyPartitionsDefinition( + start_date="2023-01-01-00:00", end_date="2024-01-01-00:00" +) + + +@asset(partitions_def=partitions_def, backfill_policy=BackfillPolicy.single_run()) +def foo(context: AssetExecutionContext): + return {k: 1 for k in context.partition_keys} + + +class DummyIOManager(IOManager): + def load_input(self, context, obj): + return 1 + + def handle_output(self, context, obj): + pass + + +defs = Definitions( + assets=[foo], + resources={"io_manager": DummyIOManager()}, +)