dotnet · feiyun0112 · Jun 23, 2025
diff --git a/docs/gen-ai/CausalLMPipeline.md b/docs/gen-ai/CausalLMPipeline.md
@@ -20,11 +20,11 @@ public abstract class CausalLMPipeline
         bool echo = false); // echo the input token ids in the output token ids
 }
 
-public CasualLMPipeline<TTokenizer, TCausalLM> : CausalLMPipeline
+public CausalLMPipeline<TTokenizer, TCausalLM> : CausalLMPipeline
     where TTokenizer : ITokenizer
     where TCausalLM : nn.Module<CausalLanguageModelInput, CausalLanguageModelOutput>
 {
-    public CausalLMPipeline<LLama2Tokenizer, Phi3ForCasualLM> Create(LLama2Tokenizer tokenizer, Phi3ForCasualLM model);
+    public CausalLMPipeline<LLama2Tokenizer, Phi3ForCausalLM> Create(LLama2Tokenizer tokenizer, Phi3ForCausalLM model);
 
 }
 ```
@@ -105,7 +105,7 @@ The extension `Generate` method provides a even-easier way to generate text with
 
 ```C#
 public static string Generate(
-    this CasualLMPipeline pipeline,
+    this CausalLMPipeline pipeline,
     string prompt,
     int maxLen = 128,
     float temperature = 0.7f,

diff --git a/docs/gen-ai/Usage.md b/docs/gen-ai/Usage.md
@@ -7,9 +7,9 @@ This document shows how to use the causal language model API for text generation
 ```C#
 var pathToPhi3 = "path/to/phi3";
 var tokenizer = LLama2Tokenizer.FromPretrained(pathToPhi3);
-var phi3CausalModel = Phi3ForCasualLM.FromPretrained(pathToPhi3);
+var phi3CausalModel = Phi3ForCausalLM.FromPretrained(pathToPhi3);
 
-CausalLMPipeline<LLama2Tokenizer, Phi3ForCasualLM> pipeline = new CausalLMPipeline(tokenizer, phi3CausalModel);
+CausalLMPipeline<LLama2Tokenizer, Phi3ForCausalLM> pipeline = new CausalLMPipeline(tokenizer, phi3CausalModel);
 
 var prompt = "<|user|>Once upon a time<|end|><assistant>";
 var output = pipeline.Generate(
@@ -24,16 +24,16 @@ In most cases, developers would like to consume the model in a uniformed way. In
 ```C#
 var pathToPhi3 = "path/to/phi3";
 var tokenizer = LLama2Tokenizer.FromPretrained(pathToPhi3);
-var phi3CausalModel = Phi3ForCasualLM.FromPretrained(pathToPhi3);
-CausalLMPipeline<LLama2Tokenizer, Phi3ForCasualLM> pipeline = new CausalLMPipeline(tokenizer, phi3CausalModel);
+var phi3CausalModel = Phi3ForCausalLM.FromPretrained(pathToPhi3);
+CausalLMPipeline<LLama2Tokenizer, Phi3ForCausalLM> pipeline = new CausalLMPipeline(tokenizer, phi3CausalModel);
 var kernel = Kernel.CreateBuilder()
     // the type of the tokenizer and the model are explicitly specified
     // here for clarity, but the compiler can infer them
     // The typed pipeline prevent developers from passing an arbitrary CausalLMPipeline
     // The reason why we don't want to allow developers to pass an arbitrary CausalLMPipeline is because
     // - the model and the tokenizer must be compatible
     // - the chat template must be compatible with the model. e.g. In `AddPhi3AsChatCompletionService`, the chat template is fixed to "<|user|>{prompt}<|end|><assistant>"
-    .AddPhi3AsChatCompletionService<LLama2Tokenizer, Phi3ForCasualLM>(pipeline)
+    .AddPhi3AsChatCompletionService<LLama2Tokenizer, Phi3ForCausalLM>(pipeline)
     .Build();
 ```
 
@@ -42,7 +42,7 @@ Similarly, developers would also like to consume the language model like agent.
 ```C#
 var pathToPhi3 = "path/to/phi3";
 var tokenizer = LLama2Tokenizer.FromPretrained(pathToPhi3);
-var phi3CausalModel = Phi3ForCasualLM.FromPretrained(pathToPhi3);
+var phi3CausalModel = Phi3ForCausalLM.FromPretrained(pathToPhi3);
 var pipeline = new CausalLMPipeline(tokenizer, phi3CausalModel);
 var agent = new Phi3MiniAgent(pipeline, name: "assistant");
 
@@ -59,7 +59,7 @@ If the model is deployed as a service, developers can consume the model similar
 // server.cs
 var pathToPhi3 = "path/to/phi3";
 var tokenizer = LLama2Tokenizer.FromPretrained(pathToPhi3);
-var phi3CausalModel = Phi3ForCasualLM.FromPretrained(pathToPhi3);
+var phi3CausalModel = Phi3ForCausalLM.FromPretrained(pathToPhi3);
 var pipeline = new CausalLMPipeline(tokenizer, phi3CausalModel);
 var agent = new Phi3MiniAgent(pipeline, name: "assistant");
 

diff --git a/docs/samples/Microsoft.ML.GenAI.Samples/Llama/SFT_Llama_3_2_1B.cs b/docs/samples/Microsoft.ML.GenAI.Samples/Llama/SFT_Llama_3_2_1B.cs
@@ -25,7 +25,7 @@ public static async Task Train(string weightFolder, string checkPointName = "mod
         using var loggerFactory = LoggerFactory.Create(builder => builder.AddConsole());
 
         // create logger
-        var logger = loggerFactory.CreateLogger<CasualLMSupervisedFineTuningTrainer>();
+        var logger = loggerFactory.CreateLogger<CausalLMSupervisedFineTuningTrainer>();
 
         var device = "cuda";
 
@@ -46,10 +46,10 @@ public static async Task Train(string weightFolder, string checkPointName = "mod
         var input = CreateDataset(dataset, pipeline.TypedTokenizer, Llama3_1ChatTemplateBuilder.Instance);
 
         // create trainer
-        var sftTrainer = new CasualLMSupervisedFineTuningTrainer(pipeline, logger: logger);
+        var sftTrainer = new CausalLMSupervisedFineTuningTrainer(pipeline, logger: logger);
 
         // Train the model
-        var option = new CasualLMSupervisedFineTuningTrainer.Option
+        var option = new CausalLMSupervisedFineTuningTrainer.Option
         {
             BatchSize = 1,
             Device = device,

diff --git a/docs/samples/Microsoft.ML.GenAI.Samples/MEAI/Phi3.cs b/docs/samples/Microsoft.ML.GenAI.Samples/MEAI/Phi3.cs
@@ -27,8 +27,8 @@ public static async Task RunAsync(string weightFolder)
         torch.set_default_dtype(defaultType);
         var tokenizerPath = Path.Combine(weightFolder, "tokenizer.model");
         var tokenizer = Phi3TokenizerHelper.FromPretrained(tokenizerPath);
-        var model = Phi3ForCasualLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true);
-        var pipeline = new CausalLMPipeline<LlamaTokenizer, Phi3ForCasualLM>(tokenizer, model, device);
+        var model = Phi3ForCausalLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true);
+        var pipeline = new CausalLMPipeline<LlamaTokenizer, Phi3ForCausalLM>(tokenizer, model, device);
         var client = new Phi3CausalLMChatClient(pipeline);
 
         var task = """

diff --git a/docs/samples/Microsoft.ML.GenAI.Samples/Phi3Mini/AutoGenSample.cs b/docs/samples/Microsoft.ML.GenAI.Samples/Phi3Mini/AutoGenSample.cs
@@ -29,8 +29,8 @@ public static async Task RunAsync()
         var weightFolder = @"C:\Users\xiaoyuz\source\repos\Phi-3-mini-4k-instruct";
         var tokenizerPath = Path.Combine(weightFolder, "tokenizer.model");
         var tokenizer = Phi3TokenizerHelper.FromPretrained(tokenizerPath);
-        var model = Phi3ForCasualLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true);
-        var pipeline = new CausalLMPipeline<LlamaTokenizer, Phi3ForCasualLM>(tokenizer, model, device);
+        var model = Phi3ForCausalLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true);
+        var pipeline = new CausalLMPipeline<LlamaTokenizer, Phi3ForCausalLM>(tokenizer, model, device);
         var question = @"write a C# program to calculate the factorial of a number";
 
         // agent

diff --git a/docs/samples/Microsoft.ML.GenAI.Samples/Phi3Mini/SemanticKernelSample.cs b/docs/samples/Microsoft.ML.GenAI.Samples/Phi3Mini/SemanticKernelSample.cs
@@ -25,8 +25,8 @@ public static async Task RunChatCompletionSample()
         var weightFolder = @"C:\Users\xiaoyuz\source\repos\Phi-3-mini-4k-instruct";
         var tokenizerPath = Path.Combine(weightFolder, "tokenizer.model");
         var tokenizer = Phi3TokenizerHelper.FromPretrained(tokenizerPath);
-        var model = Phi3ForCasualLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true);
-        var pipeline = new CausalLMPipeline<LlamaTokenizer, Phi3ForCasualLM>(tokenizer, model, device);
+        var model = Phi3ForCausalLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true);
+        var pipeline = new CausalLMPipeline<LlamaTokenizer, Phi3ForCausalLM>(tokenizer, model, device);
 
         var kernel = Kernel.CreateBuilder()
             .AddGenAIChatCompletion(pipeline)
@@ -56,8 +56,8 @@ public static async Task RunTextGenerationSample()
         var weightFolder = @"C:\Users\xiaoyuz\source\repos\Phi-3-mini-4k-instruct";
         var tokenizerPath = Path.Combine(weightFolder, "tokenizer.model");
         var tokenizer = Phi3TokenizerHelper.FromPretrained(tokenizerPath);
-        var model = Phi3ForCasualLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true);
-        var pipeline = new CausalLMPipeline<LlamaTokenizer, Phi3ForCasualLM>(tokenizer, model, device);
+        var model = Phi3ForCausalLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true);
+        var pipeline = new CausalLMPipeline<LlamaTokenizer, Phi3ForCausalLM>(tokenizer, model, device);
 
         var kernel = Kernel.CreateBuilder()
             .AddGenAITextGeneration(pipeline)

diff --git a/...er/CasualLMSupervisedFineTuningTrainer.cs → ...er/CausalLMSupervisedFineTuningTrainer.cs b/...er/CasualLMSupervisedFineTuningTrainer.cs → ...er/CausalLMSupervisedFineTuningTrainer.cs
@@ -13,12 +13,12 @@
 
 namespace Microsoft.ML.GenAI.Core.Trainer;
 
-public class CasualLMSupervisedFineTuningTrainer
+public class CausalLMSupervisedFineTuningTrainer
 {
-    private readonly ILogger<CasualLMSupervisedFineTuningTrainer>? _logger;
+    private readonly ILogger<CausalLMSupervisedFineTuningTrainer>? _logger;
     private readonly ICausalLMPipeline _pipeline;
 
-    public CasualLMSupervisedFineTuningTrainer(ICausalLMPipeline pipeline, ILogger<CasualLMSupervisedFineTuningTrainer>? logger = null)
+    public CausalLMSupervisedFineTuningTrainer(ICausalLMPipeline pipeline, ILogger<CausalLMSupervisedFineTuningTrainer>? logger = null)
     {
         _logger = logger;
         _pipeline = pipeline;

diff --git a/src/Microsoft.ML.GenAI.Core/Utility/AttentionMaskConverter.cs b/src/Microsoft.ML.GenAI.Core/Utility/AttentionMaskConverter.cs
@@ -13,12 +13,12 @@ namespace Microsoft.ML.GenAI.Core;
 
 public class AttentionMaskConverter
 {
-    private readonly bool _isCasual;
+    private readonly bool _isCausal;
     private readonly int? _slidingWindow;
 
     public AttentionMaskConverter(bool isCausal, int? slidingWindow)
     {
-        this._isCasual = isCausal;
+        this._isCausal = isCausal;
         this._slidingWindow = slidingWindow;
     }
 
@@ -42,42 +42,42 @@ public Tensor To4D(
 
         // create causal mask
         // [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
-        Tensor? casual4dMask = null;
-        if ((inputShape[^1] > 1 || this._slidingWindow is not null) && this._isCasual)
+        Tensor? causal4dMask = null;
+        if ((inputShape[^1] > 1 || this._slidingWindow is not null) && this._isCausal)
         {
             if (keyValueLength is null)
             {
                 throw new ArgumentException("key_value_length should be provided when attention_mask is causal");
             }
 
             var pastKeyValuesLength = keyValueLength.Value - queryLength;
-            casual4dMask = MakeCasualMask(inputShape, dType, attentionMask2d.device, pastKeyValuesLength, this._slidingWindow);
+            causal4dMask = MakeCausalMask(inputShape, dType, attentionMask2d.device, pastKeyValuesLength, this._slidingWindow);
         }
         else if (this._slidingWindow is not null)
         {
             throw new NotImplementedException("Sliding window is not supported for non-causal masks");
         }
 
         var expandedAttnMask = ExpandMask(attentionMask2d, dType, queryLength).to(attentionMask2d.device);
-        if (casual4dMask is not null)
+        if (causal4dMask is not null)
         {
             var min = torch.finfo(dType).min;
-            expandedAttnMask = casual4dMask.masked_fill(expandedAttnMask.to(ScalarType.Bool), min);
+            expandedAttnMask = causal4dMask.masked_fill(expandedAttnMask.to(ScalarType.Bool), min);
         }
 
         return expandedAttnMask;
     }
 
-    public Tensor? ToCasual4D(
+    public Tensor? ToCausal4D(
         int batchSize,
         int queryLength,
         int keyValueLength,
         ScalarType dType,
         Device device)
     {
-        if (!_isCasual)
+        if (!_isCausal)
         {
-            throw new ArgumentException("This is not a casual mask");
+            throw new ArgumentException("This is not a causal mask");
         }
 
         long[] inputShape = [batchSize, queryLength];
@@ -88,13 +88,13 @@ public Tensor To4D(
         Tensor? causal4DMask = null;
         if (queryLength > 1 || this._slidingWindow is int)
         {
-            causal4DMask = MakeCasualMask(inputShape, dType, device, pastKeyValueLength, this._slidingWindow);
+            causal4DMask = MakeCausalMask(inputShape, dType, device, pastKeyValueLength, this._slidingWindow);
         }
 
         return causal4DMask;
     }
 
-    public static Tensor MakeCasualMask(
+    public static Tensor MakeCausalMask(
         long[] inputIdsShape,
         ScalarType dType,
         Device device,
@@ -158,7 +158,7 @@ public static Tensor MakeCasualMask(
             return converter.To4D(attentionMask, (int)inputShape[1], dType, keyValueLength);
         }
 
-        return converter.ToCasual4D(batchSize, queryLength, keyValueLength, dType, device);
+        return converter.ToCausal4D(batchSize, queryLength, keyValueLength, dType, device);
     }
 
     public static Tensor ExpandMask(

diff --git a/src/Microsoft.ML.GenAI.Phi/Extension/SemanticKernelExtension.cs b/src/Microsoft.ML.GenAI.Phi/Extension/SemanticKernelExtension.cs
@@ -15,7 +15,7 @@ public static class SemanticKernelExtension
 {
     public static IKernelBuilder AddGenAIChatCompletion(
         this IKernelBuilder builder,
-        ICausalLMPipeline<Tokenizer, Phi3ForCasualLM> pipeline)
+        ICausalLMPipeline<Tokenizer, Phi3ForCausalLM> pipeline)
     {
         builder.Services.AddSingleton<IChatCompletionService>(new Phi3CausalLMChatCompletionService(pipeline));
 
@@ -24,7 +24,7 @@ public static IKernelBuilder AddGenAIChatCompletion(
 
     public static IKernelBuilder AddGenAITextGeneration(
         this IKernelBuilder builder,
-        ICausalLMPipeline<Tokenizer, Phi3ForCasualLM> pipeline)
+        ICausalLMPipeline<Tokenizer, Phi3ForCausalLM> pipeline)
     {
         builder.Services.AddSingleton<ITextGenerationService>(new Phi3CausalLMTextGenerationService(pipeline));
 

diff --git a/src/Microsoft.ML.GenAI.Phi/Module/Phi2Model.cs b/src/Microsoft.ML.GenAI.Phi/Module/Phi2Model.cs
@@ -80,7 +80,7 @@ public override (Tensor, Tensor?, Tensor?) forward(
         // use 4d attention mask
         if (attentionMask is not null)
         {
-            attentionMask = this.Prepare4DCasualAttentionMask(attentionMask, seqLen, pastKeyValueLength, inputEmbeddings.dtype);
+            attentionMask = this.Prepare4DCausalAttentionMask(attentionMask, seqLen, pastKeyValueLength, inputEmbeddings.dtype);
         }
 
         var hiddenStates = inputEmbeddings;
@@ -100,7 +100,7 @@ public override (Tensor, Tensor?, Tensor?) forward(
         return (hiddenStates, null, null);
     }
 
-    private Tensor Prepare4DCasualAttentionMask(
+    private Tensor Prepare4DCausalAttentionMask(
         Tensor attentionMask,
         int queryLength,
         int pastKeyValueLength,
@@ -110,11 +110,11 @@ private Tensor Prepare4DCasualAttentionMask(
         var seqLen = attentionMask.shape[1];
         Contract.Assert(seqLen == queryLength, "seqLen must be equal to queryLength");
         var targetLength = queryLength + pastKeyValueLength;
-        var casual4DMask = this.MakeCasualAttentionMask(batchSize, queryLength, pastKeyValueLength, attentionMask.device, dtype);
+        var causal4DMask = this.MakeCausalAttentionMask(batchSize, queryLength, pastKeyValueLength, attentionMask.device, dtype);
         var expandedMask = this.ExpandMask(attentionMask, dtype, queryLength).to(attentionMask.device);
 
-        casual4DMask.masked_fill_(expandedMask.to_type(ScalarType.Bool), torch.finfo(dtype).min);
-        return casual4DMask;
+        causal4DMask.masked_fill_(expandedMask.to_type(ScalarType.Bool), torch.finfo(dtype).min);
+        return causal4DMask;
     }
 
     private Tensor ExpandMask(
@@ -132,7 +132,7 @@ private Tensor ExpandMask(
 
         return invertedMask.masked_fill(invertedMask.to_type(ScalarType.Bool), torch.finfo(dtype).min);
     }
-    private Tensor MakeCasualAttentionMask(
+    private Tensor MakeCausalAttentionMask(
         int batchSize,
         int targetLen,
         int pastKeyValueLength,

diff --git a/...soft.ML.GenAI.Phi/Phi2/Phi2ForCasualLM.cs → ...soft.ML.GenAI.Phi/Phi2/Phi2ForCausalLM.cs b/...soft.ML.GenAI.Phi/Phi2/Phi2ForCasualLM.cs → ...soft.ML.GenAI.Phi/Phi2/Phi2ForCausalLM.cs
@@ -14,15 +14,15 @@
 
 namespace Microsoft.ML.GenAI.Phi;
 
-public class Phi2ForCasualLM : nn.Module<CausalLMModelInput, CausalLMModelOutput>
+public class Phi2ForCausalLM : nn.Module<CausalLMModelInput, CausalLMModelOutput>
 {
 #pragma warning disable MSML_PrivateFieldName // Private field name not in: _camelCase format
     private readonly Phi2Model model;
     private readonly GenAILinear lm_head;
 #pragma warning restore MSML_PrivateFieldName // Private field name not in: _camelCase format
 
-    public Phi2ForCasualLM(Phi2Config config)
-        : base(nameof(Phi2ForCasualLM))
+    public Phi2ForCausalLM(Phi2Config config)
+        : base(nameof(Phi2ForCausalLM))
     {
         this.model = new Phi2Model(config);
         this.lm_head = new GenAILinear(config.HiddenSize, config.VocabSize, dtype: config.Dtype);
@@ -47,7 +47,7 @@ public override CausalLMModelOutput forward(CausalLMModelInput input) // use_cac
         return new CausalLMModelOutput(lastHiddenState: hiddenState, logits: lmLogits);
     }
 
-    public static Phi2ForCasualLM FromPretrained(
+    public static Phi2ForCausalLM FromPretrained(
         string modelFolder,
         string configName = "config.json",
         string checkPointName = "model.safetensors.index.json",
@@ -58,7 +58,7 @@ public static Phi2ForCasualLM FromPretrained(
         var config = Path.Join(modelFolder, configName);
         var modelConfig = JsonSerializer.Deserialize<Phi2Config>(File.ReadAllText(config)) ?? throw new ArgumentNullException(nameof(config));
         modelConfig.Dtype = torchDtype;
-        var wrapper = new Phi2ForCasualLM(modelConfig);
+        var wrapper = new Phi2ForCausalLM(modelConfig);
         var loadedParameters = new Dictionary<string, bool>();
         wrapper.load_checkpoint(path: modelFolder, checkpointName: checkPointName, strict: true, loadedParameters: loadedParameters, useTqdm: useTqdm);
         wrapper = wrapper.to(device);

diff --git a/src/Microsoft.ML.GenAI.Phi/Phi3/Phi3CausalLMAgent.cs b/src/Microsoft.ML.GenAI.Phi/Phi3/Phi3CausalLMAgent.cs
@@ -17,12 +17,12 @@ namespace Microsoft.ML.GenAI.Phi;
 public class Phi3Agent : IStreamingAgent
 {
     private const char Newline = '\n';
-    private readonly ICausalLMPipeline<Tokenizer, Phi3ForCasualLM> _pipeline;
+    private readonly ICausalLMPipeline<Tokenizer, Phi3ForCausalLM> _pipeline;
     private readonly string? _systemMessage;
     private readonly IAutoGenChatTemplateBuilder _templateBuilder;
 
     public Phi3Agent(
-        ICausalLMPipeline<Tokenizer, Phi3ForCasualLM> pipeline,
+        ICausalLMPipeline<Tokenizer, Phi3ForCausalLM> pipeline,
         string name,
         string? systemMessage = "you are a helpful assistant",
         IAutoGenChatTemplateBuilder? templateBuilder = null)

diff --git a/src/Microsoft.ML.GenAI.Phi/Phi3/Phi3CausalLMChatClient.cs b/src/Microsoft.ML.GenAI.Phi/Phi3/Phi3CausalLMChatClient.cs
@@ -14,12 +14,12 @@
 
 namespace Microsoft.ML.GenAI.Phi;
 
-public class Phi3CausalLMChatClient : CausalLMPipelineChatClient<Tokenizer, Phi3ForCasualLM>
+public class Phi3CausalLMChatClient : CausalLMPipelineChatClient<Tokenizer, Phi3ForCausalLM>
 {
     private readonly string _eotToken = "<|end|>";
 
     public Phi3CausalLMChatClient(
-        ICausalLMPipeline<Tokenizer, Phi3ForCasualLM> pipeline,
+        ICausalLMPipeline<Tokenizer, Phi3ForCausalLM> pipeline,
         IMEAIChatTemplateBuilder? templateBuilder = null,
         ChatClientMetadata? metadata = null)
         : base(

diff --git a/src/Microsoft.ML.GenAI.Phi/Phi3/Phi3CausalLMChatCompletionService.cs b/src/Microsoft.ML.GenAI.Phi/Phi3/Phi3CausalLMChatCompletionService.cs
@@ -14,12 +14,12 @@ namespace Microsoft.ML.GenAI.Phi;
 
 public class Phi3CausalLMChatCompletionService : IChatCompletionService
 {
-    private readonly ICausalLMPipeline<Tokenizer, Phi3ForCasualLM> _pipeline;
+    private readonly ICausalLMPipeline<Tokenizer, Phi3ForCausalLM> _pipeline;
     private readonly Phi3CausalLMTextGenerationService _textGenerationService;
     private readonly ISemanticKernelChatTemplateBuilder _templateBuilder;
 
     public Phi3CausalLMChatCompletionService(
-        ICausalLMPipeline<Tokenizer, Phi3ForCasualLM> pipeline,
+        ICausalLMPipeline<Tokenizer, Phi3ForCausalLM> pipeline,
         ISemanticKernelChatTemplateBuilder? templateBuilder = null)
     {
         _pipeline = pipeline;