Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEA] Expose stripe_size_rows setting for ORCWriterOptions #17927

19 changes: 18 additions & 1 deletion java/src/main/java/ai/rapids/cudf/ORCWriterOptions.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -23,17 +23,34 @@
* that will be used by the ORC writer to write the file.
*/
public class ORCWriterOptions extends CompressionMetadataWriterOptions {
private int stripeSizeRows;

private ORCWriterOptions(Builder builder) {
super(builder);
this.stripeSizeRows = builder.stripeSizeRows;
}

public static Builder builder() {
return new Builder();
}

public int getStripeSizeRows() {
return stripeSizeRows;
}

public static class Builder extends CompressionMetadataWriterOptions.Builder
<Builder, ORCWriterOptions> {
// < 1M rows default orc stripe rows, defined in cudf/cpp/include/cudf/io/orc.hpp
private int stripeSizeRows = 1000000;

public Builder withStripeSizeRows(int stripeSizeRows) {
// maximum stripe size cannot be smaller than 512
if (stripeSizeRows < 512) {
throw new IllegalArgumentException("Maximum stripe size cannot be smaller than 512");
}
this.stripeSizeRows = stripeSizeRows;
return this;
}

public ORCWriterOptions build() {
return new ORCWriterOptions(this);
Expand Down
4 changes: 4 additions & 0 deletions java/src/main/java/ai/rapids/cudf/Table.java
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,7 @@ private static native long writeORCFileBegin(String[] columnNames,
int compression,
int[] precisions,
boolean[] isMapValues,
int stripeSizeRows,
String filename) throws CudfException;

/**
Expand All @@ -501,6 +502,7 @@ private static native long writeORCBufferBegin(String[] columnNames,
int compression,
int[] precisions,
boolean[] isMapValues,
int stripeSizeRows,
HostBufferConsumer consumer,
HostMemoryAllocator hostMemoryAllocator
) throws CudfException;
Expand Down Expand Up @@ -1823,6 +1825,7 @@ private ORCTableWriter(ORCWriterOptions options, File outputFile) {
options.getCompressionType().nativeId,
options.getFlatPrecision(),
options.getFlatIsMap(),
options.getStripeSizeRows(),
outputFile.getAbsolutePath()));
this.consumer = null;
}
Expand All @@ -1838,6 +1841,7 @@ private ORCTableWriter(ORCWriterOptions options, HostBufferConsumer consumer,
options.getCompressionType().nativeId,
options.getFlatPrecision(),
options.getFlatIsMap(),
options.getStripeSizeRows(),
consumer, hostMemoryAllocator));
this.consumer = consumer;
}
Expand Down
4 changes: 4 additions & 0 deletions java/src/main/native/src/TableJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2480,6 +2480,7 @@ Java_ai_rapids_cudf_Table_writeORCBufferBegin(JNIEnv* env,
jint j_compression,
jintArray j_precisions,
jbooleanArray j_is_map,
jint j_stripe_size_rows,
jobject consumer,
jobject host_memory_allocator)
{
Expand Down Expand Up @@ -2535,6 +2536,7 @@ Java_ai_rapids_cudf_Table_writeORCBufferBegin(JNIEnv* env,
.enable_statistics(ORC_STATISTICS_ROW_GROUP)
.key_value_metadata(kv_metadata)
.compression_statistics(stats)
.stripe_size_rows(j_stripe_size_rows)
.build();
auto writer_ptr = std::make_unique<cudf::io::orc_chunked_writer>(opts);
cudf::jni::native_orc_writer_handle* ret = new cudf::jni::native_orc_writer_handle(
Expand All @@ -2555,6 +2557,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCFileBegin(JNIEnv* env,
jint j_compression,
jintArray j_precisions,
jbooleanArray j_is_map,
jint j_stripe_size_rows,
jstring j_output_path)
{
JNI_NULL_CHECK(env, j_col_names, "null columns", 0);
Expand Down Expand Up @@ -2606,6 +2609,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCFileBegin(JNIEnv* env,
.enable_statistics(ORC_STATISTICS_ROW_GROUP)
.key_value_metadata(kv_metadata)
.compression_statistics(stats)
.stripe_size_rows(j_stripe_size_rows)
.build();
auto writer_ptr = std::make_unique<cudf::io::orc_chunked_writer>(opts);
cudf::jni::native_orc_writer_handle* ret =
Expand Down
Loading