Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEA] Expose stripe_size_rows setting for ORCWriterOptions #17927

Draft
wants to merge 5 commits into
base: branch-25.04
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@
public class CompressionMetadataWriterOptions extends ColumnWriterOptions.StructColumnWriterOptions {
private final CompressionType compressionType;
private final Map<String, String> metadata;
private final int stripeSizeRows;
ustcfy marked this conversation as resolved.
Show resolved Hide resolved
ustcfy marked this conversation as resolved.
Show resolved Hide resolved
ustcfy marked this conversation as resolved.
Show resolved Hide resolved

protected CompressionMetadataWriterOptions(Builder builder) {
super(builder);
this.compressionType = builder.compressionType;
this.metadata = builder.metadata;
this.stripeSizeRows = builder.stripeSizeRows;
}

@Override
Expand Down Expand Up @@ -96,10 +98,15 @@ public int getTopLevelChildren() {
return childColumnOptions.length;
}

public int getStripeSizeRows() {
return stripeSizeRows;
}

public abstract static class Builder<T extends Builder,
V extends CompressionMetadataWriterOptions> extends AbstractStructBuilder<T, V> {
final Map<String, String> metadata = new LinkedHashMap<>();
CompressionType compressionType = CompressionType.AUTO;
int stripeSizeRows = 1000000;
ustcfy marked this conversation as resolved.
Show resolved Hide resolved

/**
* Add a metadata key and a value
Expand All @@ -124,5 +131,10 @@ public T withCompressionType(CompressionType compression) {
this.compressionType = compression;
return (T) this;
}

public T withStripeSizeRows(int stripeSizeRows) {
this.stripeSizeRows = stripeSizeRows;
ustcfy marked this conversation as resolved.
Show resolved Hide resolved
return (T) this;
}
}
}
4 changes: 4 additions & 0 deletions java/src/main/java/ai/rapids/cudf/Table.java
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,7 @@ private static native long writeORCFileBegin(String[] columnNames,
int compression,
int[] precisions,
boolean[] isMapValues,
int stripe_size_rows,
String filename) throws CudfException;

/**
Expand All @@ -501,6 +502,7 @@ private static native long writeORCBufferBegin(String[] columnNames,
int compression,
int[] precisions,
boolean[] isMapValues,
int stripe_size_rows,
HostBufferConsumer consumer,
HostMemoryAllocator hostMemoryAllocator
) throws CudfException;
Expand Down Expand Up @@ -1823,6 +1825,7 @@ private ORCTableWriter(ORCWriterOptions options, File outputFile) {
options.getCompressionType().nativeId,
options.getFlatPrecision(),
options.getFlatIsMap(),
options.getStripeSizeRows(),
outputFile.getAbsolutePath()));
this.consumer = null;
}
Expand All @@ -1838,6 +1841,7 @@ private ORCTableWriter(ORCWriterOptions options, HostBufferConsumer consumer,
options.getCompressionType().nativeId,
options.getFlatPrecision(),
options.getFlatIsMap(),
options.getStripeSizeRows(),
consumer, hostMemoryAllocator));
this.consumer = consumer;
}
Expand Down
4 changes: 4 additions & 0 deletions java/src/main/native/src/TableJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2480,6 +2480,7 @@ Java_ai_rapids_cudf_Table_writeORCBufferBegin(JNIEnv* env,
jint j_compression,
jintArray j_precisions,
jbooleanArray j_is_map,
jint j_stripe_size_rows,
jobject consumer,
jobject host_memory_allocator)
{
Expand Down Expand Up @@ -2535,6 +2536,7 @@ Java_ai_rapids_cudf_Table_writeORCBufferBegin(JNIEnv* env,
.enable_statistics(ORC_STATISTICS_ROW_GROUP)
.key_value_metadata(kv_metadata)
.compression_statistics(stats)
.stripe_size_rows(j_stripe_size_rows)
.build();
auto writer_ptr = std::make_unique<cudf::io::orc_chunked_writer>(opts);
cudf::jni::native_orc_writer_handle* ret = new cudf::jni::native_orc_writer_handle(
Expand All @@ -2555,6 +2557,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCFileBegin(JNIEnv* env,
jint j_compression,
jintArray j_precisions,
jbooleanArray j_is_map,
jint j_stripe_size_rows,
jstring j_output_path)
{
JNI_NULL_CHECK(env, j_col_names, "null columns", 0);
Expand Down Expand Up @@ -2606,6 +2609,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCFileBegin(JNIEnv* env,
.enable_statistics(ORC_STATISTICS_ROW_GROUP)
.key_value_metadata(kv_metadata)
.compression_statistics(stats)
.stripe_size_rows(j_stripe_size_rows)
.build();
auto writer_ptr = std::make_unique<cudf::io::orc_chunked_writer>(opts);
cudf::jni::native_orc_writer_handle* ret =
Expand Down