From 1edcd425a40e41df3bcb7b12c4ed17d4ed290c9a Mon Sep 17 00:00:00 2001 From: Iman Tabrizian Date: Tue, 31 Oct 2023 17:37:09 -0400 Subject: [PATCH 1/7] Add same input/output bstate buffer option --- protobuf/model_config.proto | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/protobuf/model_config.proto b/protobuf/model_config.proto index 35d4e82..caa4a1e 100644 --- a/protobuf/model_config.proto +++ b/protobuf/model_config.proto @@ -1382,6 +1382,13 @@ message ModelSequenceBatching //@@ The optional field to specify the initial state for the model. //@@ repeated InitialState initial_state = 5; + + //@@ .. cpp:var:: bool use_single_buffer + //@@ + //@@ The optional field to use a single buffer for both input and output state. + //@@ The default value is false. + //@@ + bool use_single_buffer = 6; } //@@ .. cpp:var:: message StrategyDirect From 9246bbef6f7dcc2b0a8b3faeea718cca3b26f777 Mon Sep 17 00:00:00 2001 From: Iman Tabrizian Date: Thu, 2 Nov 2023 11:28:02 -0400 Subject: [PATCH 2/7] Add an option for using GrowableMemory --- protobuf/model_config.proto | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/protobuf/model_config.proto b/protobuf/model_config.proto index caa4a1e..011cec5 100644 --- a/protobuf/model_config.proto +++ b/protobuf/model_config.proto @@ -1389,6 +1389,17 @@ message ModelSequenceBatching //@@ The default value is false. //@@ bool use_single_buffer = 6; + + //@@ .. cpp:var:: bool use_growable_memory + //@@ + //@@ The optional field to allow an implicit state buffer to grow or shrink + //@@ when the size changes during a sequence. When using this option Triton + //@@ guarantess that it will use the same buffer even if the state size changes. + //@@ Currently, this option only applies for implicit state that uses CUDA and + //@@ use_single_buffer must be enabled. + //@@ The default value is false. + //@@ + bool use_growable_memory = 7; } //@@ .. cpp:var:: message StrategyDirect From 7575c56d110a1b4c2979f7b09b9270c429bbba8f Mon Sep 17 00:00:00 2001 From: Iman Tabrizian Date: Thu, 9 Nov 2023 11:42:56 -0500 Subject: [PATCH 3/7] Review comments --- protobuf/model_config.proto | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/protobuf/model_config.proto b/protobuf/model_config.proto index 011cec5..d68250f 100644 --- a/protobuf/model_config.proto +++ b/protobuf/model_config.proto @@ -1383,20 +1383,28 @@ message ModelSequenceBatching //@@ repeated InitialState initial_state = 5; - //@@ .. cpp:var:: bool use_single_buffer + //@@ .. cpp:var:: bool use_same_buffer_for_input_output //@@ //@@ The optional field to use a single buffer for both input and output state. + //@@ Without this option, Triton uses separate buffers for input and output state + //@@ which can be problematic if the state size is large. This option can help + //@@ reduce the memory usage when the state size is large. There is no harm + //@@ in always enabling this option if the output state will be written after + //@@ the input state is read by the framework. Note that when using this + //@@ option, `TRITONBACKEND_StateUpdate` has no effect and the state will + //@@ be always updated. //@@ The default value is false. //@@ - bool use_single_buffer = 6; + bool use_same_buffer_for_input_output = 6; //@@ .. cpp:var:: bool use_growable_memory //@@ - //@@ The optional field to allow an implicit state buffer to grow or shrink + //@@ The optional field to allow an implicit state buffer to grow the buffer //@@ when the size changes during a sequence. When using this option Triton - //@@ guarantess that it will use the same buffer even if the state size changes. - //@@ Currently, this option only applies for implicit state that uses CUDA and - //@@ use_single_buffer must be enabled. + //@@ guarantess that it will use the same allocations even if the state size changes. + //@@ The added size will be appended to the end of the buffer and the existing data + //@@ will be preserved. Currently, this option only applies for + //@@ implicit state that uses CUDA memory and use_single_buffer must be enabled. //@@ The default value is false. //@@ bool use_growable_memory = 7; From 43bba9ee92d882d1c15e815468142f73207fce62 Mon Sep 17 00:00:00 2001 From: Iman Tabrizian Date: Thu, 9 Nov 2023 13:05:15 -0500 Subject: [PATCH 4/7] Format --- protobuf/model_config.proto | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/protobuf/model_config.proto b/protobuf/model_config.proto index d68250f..efe4f03 100644 --- a/protobuf/model_config.proto +++ b/protobuf/model_config.proto @@ -1385,26 +1385,30 @@ message ModelSequenceBatching //@@ .. cpp:var:: bool use_same_buffer_for_input_output //@@ - //@@ The optional field to use a single buffer for both input and output state. - //@@ Without this option, Triton uses separate buffers for input and output state - //@@ which can be problematic if the state size is large. This option can help - //@@ reduce the memory usage when the state size is large. There is no harm - //@@ in always enabling this option if the output state will be written after + //@@ The optional field to use a single buffer for both input and output + //@@ state. Without this option, Triton uses separate buffers for input + //@@ and output state which can be problematic if the state size is + //@@ large. This option can help reduce the memory usage when + //@@ the state size is large. There is no harm in always enabling + //@@ this option if the output state will be written after //@@ the input state is read by the framework. Note that when using this - //@@ option, `TRITONBACKEND_StateUpdate` has no effect and the state will - //@@ be always updated. + //@@ option, `TRITONBACKEND_StateUpdate` has no effect and the state + //@@ will be always updated. //@@ The default value is false. //@@ bool use_same_buffer_for_input_output = 6; //@@ .. cpp:var:: bool use_growable_memory //@@ - //@@ The optional field to allow an implicit state buffer to grow the buffer - //@@ when the size changes during a sequence. When using this option Triton - //@@ guarantess that it will use the same allocations even if the state size changes. - //@@ The added size will be appended to the end of the buffer and the existing data + //@@ The optional field to allow an implicit state buffer to grow the + //@@ buffer when the size changes during a sequence. When using this + //@@ option Triton guarantess that it will use the same allocations + //@@ even if the state size changes. + //@@ The added size will be appended to the end of the buffer and the //@@ will be preserved. Currently, this option only applies for - //@@ implicit state that uses CUDA memory and use_single_buffer must be enabled. + //@@ implicit state that uses CUDA memory and use_single_buffer must be + //@@ existing data enabled. + //@@ //@@ The default value is false. //@@ bool use_growable_memory = 7; From b9399f5282701513bf87c1de3a00f495ea43442f Mon Sep 17 00:00:00 2001 From: Iman Tabrizian Date: Mon, 13 Nov 2023 01:24:17 -0500 Subject: [PATCH 5/7] Review comments --- protobuf/model_config.proto | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/protobuf/model_config.proto b/protobuf/model_config.proto index efe4f03..b00d006 100644 --- a/protobuf/model_config.proto +++ b/protobuf/model_config.proto @@ -1386,28 +1386,31 @@ message ModelSequenceBatching //@@ .. cpp:var:: bool use_same_buffer_for_input_output //@@ //@@ The optional field to use a single buffer for both input and output - //@@ state. Without this option, Triton uses separate buffers for input - //@@ and output state which can be problematic if the state size is - //@@ large. This option can help reduce the memory usage when - //@@ the state size is large. There is no harm in always enabling - //@@ this option if the output state will be written after - //@@ the input state is read by the framework. Note that when using this - //@@ option, `TRITONBACKEND_StateUpdate` has no effect and the state - //@@ will be always updated. + //@@ state. Without this option, Triton allocates separate buffers + //@@ for input and output state + //@@ which can be problematic if the state size is + //@@ large. This option reduces the memory usage by allocating a single + //@@ buffer. Enabling this option is recommended whenever + //@@ the input state is processed before the output state is written. + //@@ When enabled the state + //@@ will always be updated independent of whether + //@@ TRITONBACKEND_StateUpdate is called + //@@ (however TRITONBACKEND_StateUpdate should still be called for + //@@ completeness). + //@@ //@@ The default value is false. //@@ bool use_same_buffer_for_input_output = 6; //@@ .. cpp:var:: bool use_growable_memory //@@ - //@@ The optional field to allow an implicit state buffer to grow the - //@@ buffer when the size changes during a sequence. When using this - //@@ option Triton guarantess that it will use the same allocations - //@@ even if the state size changes. - //@@ The added size will be appended to the end of the buffer and the - //@@ will be preserved. Currently, this option only applies for - //@@ implicit state that uses CUDA memory and use_single_buffer must be - //@@ existing data enabled. + //@@ The optional field to enable an implicit state buffer to grow + // without + //@@ reallocating or copying existing memory. + //@@ Additional memory will be appended to the end of the buffer and + //@@ existing data will be preserved. + //@@ This option is only available for CUDA memory and requires enabling + //@@ use_same_buffer_for_input_output. //@@ //@@ The default value is false. //@@ From c8e8c8b361bb7add1133872d628e3b9e0d9124ce Mon Sep 17 00:00:00 2001 From: Iman Tabrizian Date: Tue, 14 Nov 2023 00:48:49 -0500 Subject: [PATCH 6/7] Review comment --- protobuf/model_config.proto | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/protobuf/model_config.proto b/protobuf/model_config.proto index b00d006..654ca05 100644 --- a/protobuf/model_config.proto +++ b/protobuf/model_config.proto @@ -1405,12 +1405,13 @@ message ModelSequenceBatching //@@ .. cpp:var:: bool use_growable_memory //@@ //@@ The optional field to enable an implicit state buffer to grow - // without - //@@ reallocating or copying existing memory. + //@@ without reallocating or copying existing memory. //@@ Additional memory will be appended to the end of the buffer and //@@ existing data will be preserved. //@@ This option is only available for CUDA memory and requires enabling - //@@ use_same_buffer_for_input_output. + //@@ use_same_buffer_for_input_output. When using this option, + //@@ StateBuffer call will always return CUDA memory even if CPU memory + //@@ is provided. //@@ //@@ The default value is false. //@@ From 591e745a4952ae567860adb2f46d065d6347f15a Mon Sep 17 00:00:00 2001 From: Iman Tabrizian Date: Wed, 15 Nov 2023 11:29:06 -0500 Subject: [PATCH 7/7] Fix description --- protobuf/model_config.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/protobuf/model_config.proto b/protobuf/model_config.proto index 654ca05..0773fa9 100644 --- a/protobuf/model_config.proto +++ b/protobuf/model_config.proto @@ -1411,7 +1411,7 @@ message ModelSequenceBatching //@@ This option is only available for CUDA memory and requires enabling //@@ use_same_buffer_for_input_output. When using this option, //@@ StateBuffer call will always return CUDA memory even if CPU memory - //@@ is provided. + //@@ is requested. //@@ //@@ The default value is false. //@@