Replacing the dataset for h2o sample_script to fix tests (#1258)

GoogleCloudDataproc · Nov 11, 2024 · c063e5f · c063e5f
1 parent 6f517b2
commit c063e5f
Showing 1 changed file with 3 additions and 8 deletions.
diff --git a/h2o/sample-script.py b/h2o/sample-script.py
@@ -5,10 +5,9 @@
 spark = SparkSession.builder.appName("SparklingWaterApp").getOrCreate()
 hc = H2OContext.getOrCreate()
 
-bucket = "h2o-bq-large-dataset"
-train_path = "demos/cc_train.csv"
-test_path = "demos/cc_test.csv"
-y = "DEFAULT_PAYMENT_NEXT_MONTH"
+bucket = "h2o-bq-large-dataset-1"
+train_path = "demos/prostate.csv"
+y = "CAPSULE"
 is_classification = True
 
 drop_cols = []
@@ -17,13 +16,9 @@
 train_data = spark.read\
                   .options(header='true', inferSchema='true')\
                   .csv("gs://{}/{}".format(bucket, train_path))
-test_data = spark.read\
-                 .options(header='true', inferSchema='true')\
-                 .csv("gs://{}/{}".format(bucket, test_path))
 
 print("CREATING H2O FRAME")
 training_frame = hc.asH2OFrame(train_data)
-test_frame = hc.asH2OFrame(test_data)
 
 x = training_frame.columns
 x.remove(y)