Skip to content

Rename Casual to Causal #7484

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions docs/gen-ai/CausalLMPipeline.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ public abstract class CausalLMPipeline
bool echo = false); // echo the input token ids in the output token ids
}

public CasualLMPipeline<TTokenizer, TCausalLM> : CausalLMPipeline
public CausalLMPipeline<TTokenizer, TCausalLM> : CausalLMPipeline
where TTokenizer : ITokenizer
where TCausalLM : nn.Module<CausalLanguageModelInput, CausalLanguageModelOutput>
{
public CausalLMPipeline<LLama2Tokenizer, Phi3ForCasualLM> Create(LLama2Tokenizer tokenizer, Phi3ForCasualLM model);
public CausalLMPipeline<LLama2Tokenizer, Phi3ForCausalLM> Create(LLama2Tokenizer tokenizer, Phi3ForCausalLM model);

}
```
Expand Down Expand Up @@ -105,7 +105,7 @@ The extension `Generate` method provides a even-easier way to generate text with

```C#
public static string Generate(
this CasualLMPipeline pipeline,
this CausalLMPipeline pipeline,
string prompt,
int maxLen = 128,
float temperature = 0.7f,
Expand Down
14 changes: 7 additions & 7 deletions docs/gen-ai/Usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ This document shows how to use the causal language model API for text generation
```C#
var pathToPhi3 = "path/to/phi3";
var tokenizer = LLama2Tokenizer.FromPretrained(pathToPhi3);
var phi3CausalModel = Phi3ForCasualLM.FromPretrained(pathToPhi3);
var phi3CausalModel = Phi3ForCausalLM.FromPretrained(pathToPhi3);

CausalLMPipeline<LLama2Tokenizer, Phi3ForCasualLM> pipeline = new CausalLMPipeline(tokenizer, phi3CausalModel);
CausalLMPipeline<LLama2Tokenizer, Phi3ForCausalLM> pipeline = new CausalLMPipeline(tokenizer, phi3CausalModel);

var prompt = "<|user|>Once upon a time<|end|><assistant>";
var output = pipeline.Generate(
Expand All @@ -24,16 +24,16 @@ In most cases, developers would like to consume the model in a uniformed way. In
```C#
var pathToPhi3 = "path/to/phi3";
var tokenizer = LLama2Tokenizer.FromPretrained(pathToPhi3);
var phi3CausalModel = Phi3ForCasualLM.FromPretrained(pathToPhi3);
CausalLMPipeline<LLama2Tokenizer, Phi3ForCasualLM> pipeline = new CausalLMPipeline(tokenizer, phi3CausalModel);
var phi3CausalModel = Phi3ForCausalLM.FromPretrained(pathToPhi3);
CausalLMPipeline<LLama2Tokenizer, Phi3ForCausalLM> pipeline = new CausalLMPipeline(tokenizer, phi3CausalModel);
var kernel = Kernel.CreateBuilder()
// the type of the tokenizer and the model are explicitly specified
// here for clarity, but the compiler can infer them
// The typed pipeline prevent developers from passing an arbitrary CausalLMPipeline
// The reason why we don't want to allow developers to pass an arbitrary CausalLMPipeline is because
// - the model and the tokenizer must be compatible
// - the chat template must be compatible with the model. e.g. In `AddPhi3AsChatCompletionService`, the chat template is fixed to "<|user|>{prompt}<|end|><assistant>"
.AddPhi3AsChatCompletionService<LLama2Tokenizer, Phi3ForCasualLM>(pipeline)
.AddPhi3AsChatCompletionService<LLama2Tokenizer, Phi3ForCausalLM>(pipeline)
.Build();
```

Expand All @@ -42,7 +42,7 @@ Similarly, developers would also like to consume the language model like agent.
```C#
var pathToPhi3 = "path/to/phi3";
var tokenizer = LLama2Tokenizer.FromPretrained(pathToPhi3);
var phi3CausalModel = Phi3ForCasualLM.FromPretrained(pathToPhi3);
var phi3CausalModel = Phi3ForCausalLM.FromPretrained(pathToPhi3);
var pipeline = new CausalLMPipeline(tokenizer, phi3CausalModel);
var agent = new Phi3MiniAgent(pipeline, name: "assistant");

Expand All @@ -59,7 +59,7 @@ If the model is deployed as a service, developers can consume the model similar
// server.cs
var pathToPhi3 = "path/to/phi3";
var tokenizer = LLama2Tokenizer.FromPretrained(pathToPhi3);
var phi3CausalModel = Phi3ForCasualLM.FromPretrained(pathToPhi3);
var phi3CausalModel = Phi3ForCausalLM.FromPretrained(pathToPhi3);
var pipeline = new CausalLMPipeline(tokenizer, phi3CausalModel);
var agent = new Phi3MiniAgent(pipeline, name: "assistant");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public static async Task Train(string weightFolder, string checkPointName = "mod
using var loggerFactory = LoggerFactory.Create(builder => builder.AddConsole());

// create logger
var logger = loggerFactory.CreateLogger<CasualLMSupervisedFineTuningTrainer>();
var logger = loggerFactory.CreateLogger<CausalLMSupervisedFineTuningTrainer>();

var device = "cuda";

Expand All @@ -46,10 +46,10 @@ public static async Task Train(string weightFolder, string checkPointName = "mod
var input = CreateDataset(dataset, pipeline.TypedTokenizer, Llama3_1ChatTemplateBuilder.Instance);

// create trainer
var sftTrainer = new CasualLMSupervisedFineTuningTrainer(pipeline, logger: logger);
var sftTrainer = new CausalLMSupervisedFineTuningTrainer(pipeline, logger: logger);

// Train the model
var option = new CasualLMSupervisedFineTuningTrainer.Option
var option = new CausalLMSupervisedFineTuningTrainer.Option
{
BatchSize = 1,
Device = device,
Expand Down
4 changes: 2 additions & 2 deletions docs/samples/Microsoft.ML.GenAI.Samples/MEAI/Phi3.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ public static async Task RunAsync(string weightFolder)
torch.set_default_dtype(defaultType);
var tokenizerPath = Path.Combine(weightFolder, "tokenizer.model");
var tokenizer = Phi3TokenizerHelper.FromPretrained(tokenizerPath);
var model = Phi3ForCasualLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true);
var pipeline = new CausalLMPipeline<LlamaTokenizer, Phi3ForCasualLM>(tokenizer, model, device);
var model = Phi3ForCausalLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true);
var pipeline = new CausalLMPipeline<LlamaTokenizer, Phi3ForCausalLM>(tokenizer, model, device);
var client = new Phi3CausalLMChatClient(pipeline);

var task = """
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ public static async Task RunAsync()
var weightFolder = @"C:\Users\xiaoyuz\source\repos\Phi-3-mini-4k-instruct";
var tokenizerPath = Path.Combine(weightFolder, "tokenizer.model");
var tokenizer = Phi3TokenizerHelper.FromPretrained(tokenizerPath);
var model = Phi3ForCasualLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true);
var pipeline = new CausalLMPipeline<LlamaTokenizer, Phi3ForCasualLM>(tokenizer, model, device);
var model = Phi3ForCausalLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true);
var pipeline = new CausalLMPipeline<LlamaTokenizer, Phi3ForCausalLM>(tokenizer, model, device);
var question = @"write a C# program to calculate the factorial of a number";

// agent
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ public static async Task RunChatCompletionSample()
var weightFolder = @"C:\Users\xiaoyuz\source\repos\Phi-3-mini-4k-instruct";
var tokenizerPath = Path.Combine(weightFolder, "tokenizer.model");
var tokenizer = Phi3TokenizerHelper.FromPretrained(tokenizerPath);
var model = Phi3ForCasualLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true);
var pipeline = new CausalLMPipeline<LlamaTokenizer, Phi3ForCasualLM>(tokenizer, model, device);
var model = Phi3ForCausalLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true);
var pipeline = new CausalLMPipeline<LlamaTokenizer, Phi3ForCausalLM>(tokenizer, model, device);

var kernel = Kernel.CreateBuilder()
.AddGenAIChatCompletion(pipeline)
Expand Down Expand Up @@ -56,8 +56,8 @@ public static async Task RunTextGenerationSample()
var weightFolder = @"C:\Users\xiaoyuz\source\repos\Phi-3-mini-4k-instruct";
var tokenizerPath = Path.Combine(weightFolder, "tokenizer.model");
var tokenizer = Phi3TokenizerHelper.FromPretrained(tokenizerPath);
var model = Phi3ForCasualLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true);
var pipeline = new CausalLMPipeline<LlamaTokenizer, Phi3ForCasualLM>(tokenizer, model, device);
var model = Phi3ForCausalLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt8: true);
var pipeline = new CausalLMPipeline<LlamaTokenizer, Phi3ForCausalLM>(tokenizer, model, device);

var kernel = Kernel.CreateBuilder()
.AddGenAITextGeneration(pipeline)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@

namespace Microsoft.ML.GenAI.Core.Trainer;

public class CasualLMSupervisedFineTuningTrainer
public class CausalLMSupervisedFineTuningTrainer
{
private readonly ILogger<CasualLMSupervisedFineTuningTrainer>? _logger;
private readonly ILogger<CausalLMSupervisedFineTuningTrainer>? _logger;
private readonly ICausalLMPipeline _pipeline;

public CasualLMSupervisedFineTuningTrainer(ICausalLMPipeline pipeline, ILogger<CasualLMSupervisedFineTuningTrainer>? logger = null)
public CausalLMSupervisedFineTuningTrainer(ICausalLMPipeline pipeline, ILogger<CausalLMSupervisedFineTuningTrainer>? logger = null)
{
_logger = logger;
_pipeline = pipeline;
Expand Down
26 changes: 13 additions & 13 deletions src/Microsoft.ML.GenAI.Core/Utility/AttentionMaskConverter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ namespace Microsoft.ML.GenAI.Core;

public class AttentionMaskConverter
{
private readonly bool _isCasual;
private readonly bool _isCausal;
private readonly int? _slidingWindow;

public AttentionMaskConverter(bool isCausal, int? slidingWindow)
{
this._isCasual = isCausal;
this._isCausal = isCausal;
this._slidingWindow = slidingWindow;
}

Expand All @@ -42,42 +42,42 @@ public Tensor To4D(

// create causal mask
// [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
Tensor? casual4dMask = null;
if ((inputShape[^1] > 1 || this._slidingWindow is not null) && this._isCasual)
Tensor? causal4dMask = null;
if ((inputShape[^1] > 1 || this._slidingWindow is not null) && this._isCausal)
{
if (keyValueLength is null)
{
throw new ArgumentException("key_value_length should be provided when attention_mask is causal");
}

var pastKeyValuesLength = keyValueLength.Value - queryLength;
casual4dMask = MakeCasualMask(inputShape, dType, attentionMask2d.device, pastKeyValuesLength, this._slidingWindow);
causal4dMask = MakeCausalMask(inputShape, dType, attentionMask2d.device, pastKeyValuesLength, this._slidingWindow);
}
else if (this._slidingWindow is not null)
{
throw new NotImplementedException("Sliding window is not supported for non-causal masks");
}

var expandedAttnMask = ExpandMask(attentionMask2d, dType, queryLength).to(attentionMask2d.device);
if (casual4dMask is not null)
if (causal4dMask is not null)
{
var min = torch.finfo(dType).min;
expandedAttnMask = casual4dMask.masked_fill(expandedAttnMask.to(ScalarType.Bool), min);
expandedAttnMask = causal4dMask.masked_fill(expandedAttnMask.to(ScalarType.Bool), min);
}

return expandedAttnMask;
}

public Tensor? ToCasual4D(
public Tensor? ToCausal4D(
int batchSize,
int queryLength,
int keyValueLength,
ScalarType dType,
Device device)
{
if (!_isCasual)
if (!_isCausal)
{
throw new ArgumentException("This is not a casual mask");
throw new ArgumentException("This is not a causal mask");
}

long[] inputShape = [batchSize, queryLength];
Expand All @@ -88,13 +88,13 @@ public Tensor To4D(
Tensor? causal4DMask = null;
if (queryLength > 1 || this._slidingWindow is int)
{
causal4DMask = MakeCasualMask(inputShape, dType, device, pastKeyValueLength, this._slidingWindow);
causal4DMask = MakeCausalMask(inputShape, dType, device, pastKeyValueLength, this._slidingWindow);
}

return causal4DMask;
}

public static Tensor MakeCasualMask(
public static Tensor MakeCausalMask(
long[] inputIdsShape,
ScalarType dType,
Device device,
Expand Down Expand Up @@ -158,7 +158,7 @@ public static Tensor MakeCasualMask(
return converter.To4D(attentionMask, (int)inputShape[1], dType, keyValueLength);
}

return converter.ToCasual4D(batchSize, queryLength, keyValueLength, dType, device);
return converter.ToCausal4D(batchSize, queryLength, keyValueLength, dType, device);
}

public static Tensor ExpandMask(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public static class SemanticKernelExtension
{
public static IKernelBuilder AddGenAIChatCompletion(
this IKernelBuilder builder,
ICausalLMPipeline<Tokenizer, Phi3ForCasualLM> pipeline)
ICausalLMPipeline<Tokenizer, Phi3ForCausalLM> pipeline)
{
builder.Services.AddSingleton<IChatCompletionService>(new Phi3CausalLMChatCompletionService(pipeline));

Expand All @@ -24,7 +24,7 @@ public static IKernelBuilder AddGenAIChatCompletion(

public static IKernelBuilder AddGenAITextGeneration(
this IKernelBuilder builder,
ICausalLMPipeline<Tokenizer, Phi3ForCasualLM> pipeline)
ICausalLMPipeline<Tokenizer, Phi3ForCausalLM> pipeline)
{
builder.Services.AddSingleton<ITextGenerationService>(new Phi3CausalLMTextGenerationService(pipeline));

Expand Down
12 changes: 6 additions & 6 deletions src/Microsoft.ML.GenAI.Phi/Module/Phi2Model.cs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ public override (Tensor, Tensor?, Tensor?) forward(
// use 4d attention mask
if (attentionMask is not null)
{
attentionMask = this.Prepare4DCasualAttentionMask(attentionMask, seqLen, pastKeyValueLength, inputEmbeddings.dtype);
attentionMask = this.Prepare4DCausalAttentionMask(attentionMask, seqLen, pastKeyValueLength, inputEmbeddings.dtype);
}

var hiddenStates = inputEmbeddings;
Expand All @@ -100,7 +100,7 @@ public override (Tensor, Tensor?, Tensor?) forward(
return (hiddenStates, null, null);
}

private Tensor Prepare4DCasualAttentionMask(
private Tensor Prepare4DCausalAttentionMask(
Tensor attentionMask,
int queryLength,
int pastKeyValueLength,
Expand All @@ -110,11 +110,11 @@ private Tensor Prepare4DCasualAttentionMask(
var seqLen = attentionMask.shape[1];
Contract.Assert(seqLen == queryLength, "seqLen must be equal to queryLength");
var targetLength = queryLength + pastKeyValueLength;
var casual4DMask = this.MakeCasualAttentionMask(batchSize, queryLength, pastKeyValueLength, attentionMask.device, dtype);
var causal4DMask = this.MakeCausalAttentionMask(batchSize, queryLength, pastKeyValueLength, attentionMask.device, dtype);
var expandedMask = this.ExpandMask(attentionMask, dtype, queryLength).to(attentionMask.device);

casual4DMask.masked_fill_(expandedMask.to_type(ScalarType.Bool), torch.finfo(dtype).min);
return casual4DMask;
causal4DMask.masked_fill_(expandedMask.to_type(ScalarType.Bool), torch.finfo(dtype).min);
return causal4DMask;
}

private Tensor ExpandMask(
Expand All @@ -132,7 +132,7 @@ private Tensor ExpandMask(

return invertedMask.masked_fill(invertedMask.to_type(ScalarType.Bool), torch.finfo(dtype).min);
}
private Tensor MakeCasualAttentionMask(
private Tensor MakeCausalAttentionMask(
int batchSize,
int targetLen,
int pastKeyValueLength,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@

namespace Microsoft.ML.GenAI.Phi;

public class Phi2ForCasualLM : nn.Module<CausalLMModelInput, CausalLMModelOutput>
public class Phi2ForCausalLM : nn.Module<CausalLMModelInput, CausalLMModelOutput>
{
#pragma warning disable MSML_PrivateFieldName // Private field name not in: _camelCase format
private readonly Phi2Model model;
private readonly GenAILinear lm_head;
#pragma warning restore MSML_PrivateFieldName // Private field name not in: _camelCase format

public Phi2ForCasualLM(Phi2Config config)
: base(nameof(Phi2ForCasualLM))
public Phi2ForCausalLM(Phi2Config config)
: base(nameof(Phi2ForCausalLM))
{
this.model = new Phi2Model(config);
this.lm_head = new GenAILinear(config.HiddenSize, config.VocabSize, dtype: config.Dtype);
Expand All @@ -47,7 +47,7 @@ public override CausalLMModelOutput forward(CausalLMModelInput input) // use_cac
return new CausalLMModelOutput(lastHiddenState: hiddenState, logits: lmLogits);
}

public static Phi2ForCasualLM FromPretrained(
public static Phi2ForCausalLM FromPretrained(
string modelFolder,
string configName = "config.json",
string checkPointName = "model.safetensors.index.json",
Expand All @@ -58,7 +58,7 @@ public static Phi2ForCasualLM FromPretrained(
var config = Path.Join(modelFolder, configName);
var modelConfig = JsonSerializer.Deserialize<Phi2Config>(File.ReadAllText(config)) ?? throw new ArgumentNullException(nameof(config));
modelConfig.Dtype = torchDtype;
var wrapper = new Phi2ForCasualLM(modelConfig);
var wrapper = new Phi2ForCausalLM(modelConfig);
var loadedParameters = new Dictionary<string, bool>();
wrapper.load_checkpoint(path: modelFolder, checkpointName: checkPointName, strict: true, loadedParameters: loadedParameters, useTqdm: useTqdm);
wrapper = wrapper.to(device);
Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.GenAI.Phi/Phi3/Phi3CausalLMAgent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ namespace Microsoft.ML.GenAI.Phi;
public class Phi3Agent : IStreamingAgent
{
private const char Newline = '\n';
private readonly ICausalLMPipeline<Tokenizer, Phi3ForCasualLM> _pipeline;
private readonly ICausalLMPipeline<Tokenizer, Phi3ForCausalLM> _pipeline;
private readonly string? _systemMessage;
private readonly IAutoGenChatTemplateBuilder _templateBuilder;

public Phi3Agent(
ICausalLMPipeline<Tokenizer, Phi3ForCasualLM> pipeline,
ICausalLMPipeline<Tokenizer, Phi3ForCausalLM> pipeline,
string name,
string? systemMessage = "you are a helpful assistant",
IAutoGenChatTemplateBuilder? templateBuilder = null)
Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.GenAI.Phi/Phi3/Phi3CausalLMChatClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@

namespace Microsoft.ML.GenAI.Phi;

public class Phi3CausalLMChatClient : CausalLMPipelineChatClient<Tokenizer, Phi3ForCasualLM>
public class Phi3CausalLMChatClient : CausalLMPipelineChatClient<Tokenizer, Phi3ForCausalLM>
{
private readonly string _eotToken = "<|end|>";

public Phi3CausalLMChatClient(
ICausalLMPipeline<Tokenizer, Phi3ForCasualLM> pipeline,
ICausalLMPipeline<Tokenizer, Phi3ForCausalLM> pipeline,
IMEAIChatTemplateBuilder? templateBuilder = null,
ChatClientMetadata? metadata = null)
: base(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ namespace Microsoft.ML.GenAI.Phi;

public class Phi3CausalLMChatCompletionService : IChatCompletionService
{
private readonly ICausalLMPipeline<Tokenizer, Phi3ForCasualLM> _pipeline;
private readonly ICausalLMPipeline<Tokenizer, Phi3ForCausalLM> _pipeline;
private readonly Phi3CausalLMTextGenerationService _textGenerationService;
private readonly ISemanticKernelChatTemplateBuilder _templateBuilder;

public Phi3CausalLMChatCompletionService(
ICausalLMPipeline<Tokenizer, Phi3ForCasualLM> pipeline,
ICausalLMPipeline<Tokenizer, Phi3ForCausalLM> pipeline,
ISemanticKernelChatTemplateBuilder? templateBuilder = null)
{
_pipeline = pipeline;
Expand Down
Loading