Skip to content

Commit

Permalink
Replacing the dataset for h2o sample_script to fix tests (#1258)
Browse files Browse the repository at this point in the history
  • Loading branch information
prince-cs authored Nov 11, 2024
1 parent 6f517b2 commit c063e5f
Showing 1 changed file with 3 additions and 8 deletions.
11 changes: 3 additions & 8 deletions h2o/sample-script.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@
spark = SparkSession.builder.appName("SparklingWaterApp").getOrCreate()
hc = H2OContext.getOrCreate()

bucket = "h2o-bq-large-dataset"
train_path = "demos/cc_train.csv"
test_path = "demos/cc_test.csv"
y = "DEFAULT_PAYMENT_NEXT_MONTH"
bucket = "h2o-bq-large-dataset-1"
train_path = "demos/prostate.csv"
y = "CAPSULE"
is_classification = True

drop_cols = []
Expand All @@ -17,13 +16,9 @@
train_data = spark.read\
.options(header='true', inferSchema='true')\
.csv("gs://{}/{}".format(bucket, train_path))
test_data = spark.read\
.options(header='true', inferSchema='true')\
.csv("gs://{}/{}".format(bucket, test_path))

print("CREATING H2O FRAME")
training_frame = hc.asH2OFrame(train_data)
test_frame = hc.asH2OFrame(test_data)

x = training_frame.columns
x.remove(y)
Expand Down

0 comments on commit c063e5f

Please sign in to comment.