From 67d3ec5626a4077746e48d85d32010a79e49f384 Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Fri, 14 Jun 2024 10:11:57 -0500 Subject: [PATCH] update tpch examples for new pyarrow interval Fixes #665 --- examples/tpch/q01_pricing_summary_report.py | 4 +--- examples/tpch/q04_order_priority_checking.py | 4 +--- examples/tpch/q05_local_supplier_volume.py | 4 +--- examples/tpch/q06_forecasting_revenue_change.py | 4 +--- examples/tpch/q10_returned_item_reporting.py | 4 +--- examples/tpch/q12_ship_mode_order_priority.py | 4 +--- examples/tpch/q14_promotion_effect.py | 5 ++--- examples/tpch/q15_top_supplier.py | 5 ++--- examples/tpch/q20_potential_part_promotion.py | 4 +--- 9 files changed, 11 insertions(+), 27 deletions(-) diff --git a/examples/tpch/q01_pricing_summary_report.py b/examples/tpch/q01_pricing_summary_report.py index 7e86055d9..cb9485a7a 100644 --- a/examples/tpch/q01_pricing_summary_report.py +++ b/examples/tpch/q01_pricing_summary_report.py @@ -48,9 +48,7 @@ # want to report results for. It should be between 60-120 days before the end. DAYS_BEFORE_FINAL = 90 -# Note: this is a hack on setting the values. It should be set differently once -# https://github.com/apache/datafusion-python/issues/665 is resolved. -interval = pa.scalar((0, 0, DAYS_BEFORE_FINAL), type=pa.month_day_nano_interval()) +interval = pa.scalar((0, DAYS_BEFORE_FINAL, 0), type=pa.month_day_nano_interval()) print("Final date in database:", greatest_ship_date) diff --git a/examples/tpch/q04_order_priority_checking.py b/examples/tpch/q04_order_priority_checking.py index 40eab6970..9dbd81674 100644 --- a/examples/tpch/q04_order_priority_checking.py +++ b/examples/tpch/q04_order_priority_checking.py @@ -49,9 +49,7 @@ # Create a date object from the string date = datetime.strptime(DATE_OF_INTEREST, "%Y-%m-%d").date() -# Note: this is a hack on setting the values. It should be set differently once -# https://github.com/apache/datafusion-python/issues/665 is resolved. -interval = pa.scalar((0, 0, INTERVAL_DAYS), type=pa.month_day_nano_interval()) +interval = pa.scalar((0, INTERVAL_DAYS, 0), type=pa.month_day_nano_interval()) # Limit results to cases where commitment date before receipt date # Aggregate the results so we only get one row to join with the order table. diff --git a/examples/tpch/q05_local_supplier_volume.py b/examples/tpch/q05_local_supplier_volume.py index 27b4b84c7..f17f600a4 100644 --- a/examples/tpch/q05_local_supplier_volume.py +++ b/examples/tpch/q05_local_supplier_volume.py @@ -41,9 +41,7 @@ date = datetime.strptime(DATE_OF_INTEREST, "%Y-%m-%d").date() -# Note: this is a hack on setting the values. It should be set differently once -# https://github.com/apache/datafusion-python/issues/665 is resolved. -interval = pa.scalar((0, 0, INTERVAL_DAYS), type=pa.month_day_nano_interval()) +interval = pa.scalar((0, INTERVAL_DAYS, 0), type=pa.month_day_nano_interval()) # Load the dataframes we need diff --git a/examples/tpch/q06_forecasting_revenue_change.py b/examples/tpch/q06_forecasting_revenue_change.py index 3f58c5ec0..ec98aaf5e 100644 --- a/examples/tpch/q06_forecasting_revenue_change.py +++ b/examples/tpch/q06_forecasting_revenue_change.py @@ -45,9 +45,7 @@ date = datetime.strptime(DATE_OF_INTEREST, "%Y-%m-%d").date() -# Note: this is a hack on setting the values. It should be set differently once -# https://github.com/apache/datafusion-python/issues/665 is resolved. -interval = pa.scalar((0, 0, INTERVAL_DAYS), type=pa.month_day_nano_interval()) +interval = pa.scalar((0, INTERVAL_DAYS, 0), type=pa.month_day_nano_interval()) # Load the dataframes we need diff --git a/examples/tpch/q10_returned_item_reporting.py b/examples/tpch/q10_returned_item_reporting.py index ed88c2995..78327c3ad 100644 --- a/examples/tpch/q10_returned_item_reporting.py +++ b/examples/tpch/q10_returned_item_reporting.py @@ -38,9 +38,7 @@ date_start_of_quarter = lit(datetime.strptime(DATE_START_OF_QUARTER, "%Y-%m-%d").date()) -# Note: this is a hack on setting the values. It should be set differently once -# https://github.com/apache/datafusion-python/issues/665 is resolved. -interval_one_quarter = lit(pa.scalar((0, 0, 92), type=pa.month_day_nano_interval())) +interval_one_quarter = lit(pa.scalar((0, 92, 0), type=pa.month_day_nano_interval())) # Load the dataframes we need diff --git a/examples/tpch/q12_ship_mode_order_priority.py b/examples/tpch/q12_ship_mode_order_priority.py index d3dd7d283..150870c64 100644 --- a/examples/tpch/q12_ship_mode_order_priority.py +++ b/examples/tpch/q12_ship_mode_order_priority.py @@ -51,9 +51,7 @@ date = datetime.strptime(DATE_OF_INTEREST, "%Y-%m-%d").date() -# Note: this is a hack on setting the values. It should be set differently once -# https://github.com/apache/datafusion-python/issues/665 is resolved. -interval = pa.scalar((0, 0, 365), type=pa.month_day_nano_interval()) +interval = pa.scalar((0, 365, 0), type=pa.month_day_nano_interval()) df = df_lineitem.filter(col("l_receiptdate") >= lit(date)).filter( diff --git a/examples/tpch/q14_promotion_effect.py b/examples/tpch/q14_promotion_effect.py index 333398c17..75fa363ad 100644 --- a/examples/tpch/q14_promotion_effect.py +++ b/examples/tpch/q14_promotion_effect.py @@ -34,9 +34,8 @@ DATE = "1995-09-01" date_of_interest = lit(datetime.strptime(DATE, "%Y-%m-%d").date()) -# Note: this is a hack on setting the values. It should be set differently once -# https://github.com/apache/datafusion-python/issues/665 is resolved. -interval_one_month = lit(pa.scalar((0, 0, 30), type=pa.month_day_nano_interval())) + +interval_one_month = lit(pa.scalar((0, 30, 0), type=pa.month_day_nano_interval())) # Load the dataframes we need diff --git a/examples/tpch/q15_top_supplier.py b/examples/tpch/q15_top_supplier.py index 91af34a9f..4b9e4c1dd 100644 --- a/examples/tpch/q15_top_supplier.py +++ b/examples/tpch/q15_top_supplier.py @@ -34,9 +34,8 @@ DATE = "1996-01-01" date_of_interest = lit(datetime.strptime(DATE, "%Y-%m-%d").date()) -# Note: this is a hack on setting the values. It should be set differently once -# https://github.com/apache/datafusion-python/issues/665 is resolved. -interval_3_months = lit(pa.scalar((0, 0, 91), type=pa.month_day_nano_interval())) + +interval_3_months = lit(pa.scalar((0, 91, 0), type=pa.month_day_nano_interval())) # Load the dataframes we need diff --git a/examples/tpch/q20_potential_part_promotion.py b/examples/tpch/q20_potential_part_promotion.py index 4a602846f..85e7226f7 100644 --- a/examples/tpch/q20_potential_part_promotion.py +++ b/examples/tpch/q20_potential_part_promotion.py @@ -56,9 +56,7 @@ date = datetime.strptime(DATE_OF_INTEREST, "%Y-%m-%d").date() -# Note: this is a hack on setting the values. It should be set differently once -# https://github.com/apache/datafusion-python/issues/665 is resolved. -interval = pa.scalar((0, 0, 365), type=pa.month_day_nano_interval()) +interval = pa.scalar((0, 365, 0), type=pa.month_day_nano_interval()) # Filter down dataframes df_nation = df_nation.filter(col("n_name") == lit(NATION_OF_INTEREST))