Skip to content

Commit

Permalink
1. Support vision transformer model and add an application for image …
Browse files Browse the repository at this point in the history
…caption.

2. Updated base of rotatory PE from10000 to 500000
3. Fix memory usage boost while using larger update frequency and validation
4. Add a new parameter to save checkpoint every certain updates. (SaveModelEveryUpdates)
5.Add cosine decay learning rate
  • Loading branch information
zhongkaifu committed Oct 10, 2023
1 parent bd152bf commit f61642f
Show file tree
Hide file tree
Showing 77 changed files with 2,083 additions and 2,187 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,5 @@ Temporary Items
/Tools/GPTConsole/bin/Release/net7.0
/Tools/GPTConsole/obj
/Tools/GPTConsole/bin/Debug/net7.0
/Tools/ImgSeqConsole/bin
/Tools/ImgSeqConsole/obj
12 changes: 12 additions & 0 deletions AdvUtils/Properties/launchSettings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"profiles": {
"AdvUtils": {
"commandName": "Project",
"launchBrowser": true,
"environmentVariables": {
"ASPNETCORE_ENVIRONMENT": "Development"
},
"applicationUrl": "https://localhost:49915;http://localhost:49916"
}
}
}
27 changes: 9 additions & 18 deletions Seq2SeqSharp.sln
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,6 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SeqLabelConsole", "Tools\Se
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SeqClassificationConsole", "Tools\SeqClassificationConsole\SeqClassificationConsole.csproj", "{0FE4AB2B-DD9A-4BB5-B1BE-825F05D26998}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Seq2SeqClassificationConsole", "Tools\Seq2SeqClassificationConsole\Seq2SeqClassificationConsole.csproj", "{AA0B87F0-F5FF-4AA2-B481-124F426EAE74}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SeqSimilarityConsole", "Tools\SeqSimilarityConsole\SeqSimilarityConsole.csproj", "{2ABB8409-AA2B-46E9-9B7F-87C548201B4B}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Tools", "Tools", "{C2DFE174-7167-41D4-A8D2-EC8DC54AA71E}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SeqWebAPIs", "Tools\SeqWebAPIs\SeqWebAPIs.csproj", "{26272325-E3CD-4B9B-9A6D-043607E7DA6C}"
Expand Down Expand Up @@ -74,6 +70,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "PythonPackage", "PythonPack
PyPackage\Seq2SeqSharp\__init__.py = PyPackage\Seq2SeqSharp\__init__.py
EndProjectSection
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ImgSeqConsole", "Tools\ImgSeqConsole\ImgSeqConsole.csproj", "{D5B59E92-8BFF-4B30-844B-E95E67D5A68B}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -120,18 +118,6 @@ Global
{0FE4AB2B-DD9A-4BB5-B1BE-825F05D26998}.Release|Any CPU.Build.0 = Release|Any CPU
{0FE4AB2B-DD9A-4BB5-B1BE-825F05D26998}.ReleaseCpuOnly|Any CPU.ActiveCfg = Release|Any CPU
{0FE4AB2B-DD9A-4BB5-B1BE-825F05D26998}.ReleaseCpuOnly|Any CPU.Build.0 = Release|Any CPU
{AA0B87F0-F5FF-4AA2-B481-124F426EAE74}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{AA0B87F0-F5FF-4AA2-B481-124F426EAE74}.Debug|Any CPU.Build.0 = Debug|Any CPU
{AA0B87F0-F5FF-4AA2-B481-124F426EAE74}.Release|Any CPU.ActiveCfg = Release|Any CPU
{AA0B87F0-F5FF-4AA2-B481-124F426EAE74}.Release|Any CPU.Build.0 = Release|Any CPU
{AA0B87F0-F5FF-4AA2-B481-124F426EAE74}.ReleaseCpuOnly|Any CPU.ActiveCfg = Release|Any CPU
{AA0B87F0-F5FF-4AA2-B481-124F426EAE74}.ReleaseCpuOnly|Any CPU.Build.0 = Release|Any CPU
{2ABB8409-AA2B-46E9-9B7F-87C548201B4B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{2ABB8409-AA2B-46E9-9B7F-87C548201B4B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{2ABB8409-AA2B-46E9-9B7F-87C548201B4B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{2ABB8409-AA2B-46E9-9B7F-87C548201B4B}.Release|Any CPU.Build.0 = Release|Any CPU
{2ABB8409-AA2B-46E9-9B7F-87C548201B4B}.ReleaseCpuOnly|Any CPU.ActiveCfg = Release|Any CPU
{2ABB8409-AA2B-46E9-9B7F-87C548201B4B}.ReleaseCpuOnly|Any CPU.Build.0 = Release|Any CPU
{26272325-E3CD-4B9B-9A6D-043607E7DA6C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{26272325-E3CD-4B9B-9A6D-043607E7DA6C}.Debug|Any CPU.Build.0 = Debug|Any CPU
{26272325-E3CD-4B9B-9A6D-043607E7DA6C}.Release|Any CPU.ActiveCfg = Release|Any CPU
Expand Down Expand Up @@ -192,6 +178,12 @@ Global
{4DBA9DDA-569C-4F31-9C98-84837D2F8148}.Release|Any CPU.Build.0 = Release|Any CPU
{4DBA9DDA-569C-4F31-9C98-84837D2F8148}.ReleaseCpuOnly|Any CPU.ActiveCfg = Release|Any CPU
{4DBA9DDA-569C-4F31-9C98-84837D2F8148}.ReleaseCpuOnly|Any CPU.Build.0 = Release|Any CPU
{D5B59E92-8BFF-4B30-844B-E95E67D5A68B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{D5B59E92-8BFF-4B30-844B-E95E67D5A68B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{D5B59E92-8BFF-4B30-844B-E95E67D5A68B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{D5B59E92-8BFF-4B30-844B-E95E67D5A68B}.Release|Any CPU.Build.0 = Release|Any CPU
{D5B59E92-8BFF-4B30-844B-E95E67D5A68B}.ReleaseCpuOnly|Any CPU.ActiveCfg = Release|Any CPU
{D5B59E92-8BFF-4B30-844B-E95E67D5A68B}.ReleaseCpuOnly|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand All @@ -200,8 +192,6 @@ Global
{01D50B42-415C-4E85-860D-DBBC4A759649} = {C2DFE174-7167-41D4-A8D2-EC8DC54AA71E}
{D54B896D-B353-4D1D-8449-83A38F0097AD} = {C2DFE174-7167-41D4-A8D2-EC8DC54AA71E}
{0FE4AB2B-DD9A-4BB5-B1BE-825F05D26998} = {C2DFE174-7167-41D4-A8D2-EC8DC54AA71E}
{AA0B87F0-F5FF-4AA2-B481-124F426EAE74} = {C2DFE174-7167-41D4-A8D2-EC8DC54AA71E}
{2ABB8409-AA2B-46E9-9B7F-87C548201B4B} = {C2DFE174-7167-41D4-A8D2-EC8DC54AA71E}
{26272325-E3CD-4B9B-9A6D-043607E7DA6C} = {C2DFE174-7167-41D4-A8D2-EC8DC54AA71E}
{8EAD509F-0740-4435-AC3A-11E6AE6864C9} = {22BAD359-B5F0-4A7C-A313-CC8F01B49FE9}
{0E14AD11-A23C-45EC-8C3C-165E9B6B07E3} = {C2DFE174-7167-41D4-A8D2-EC8DC54AA71E}
Expand All @@ -211,6 +201,7 @@ Global
{2DDDB48D-F912-4362-BF00-91F438009EAE} = {C15C991E-2657-4CF3-A976-84334A25DBD2}
{DFEE8ACE-4935-40D1-8B9B-1E9F7FFC6FAE} = {C15C991E-2657-4CF3-A976-84334A25DBD2}
{4DBA9DDA-569C-4F31-9C98-84837D2F8148} = {C2DFE174-7167-41D4-A8D2-EC8DC54AA71E}
{D5B59E92-8BFF-4B30-844B-E95E67D5A68B} = {C2DFE174-7167-41D4-A8D2-EC8DC54AA71E}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {CAE1535E-6AF4-4CD0-8E90-EBACD99D865A}
Expand Down
22 changes: 1 addition & 21 deletions Seq2SeqSharp/Applications/Encoder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public static MultiProcessorNetworkWrapper<IEncoder> CreateEncoders(IModel model
return encoder;
}

static public IWeightTensor Run(IComputeGraph computeGraph, ISntPairBatch sntPairBatch, IEncoder encoder, IModel modelMetaData, ShuffleEnums shuffleType,
static public IWeightTensor Run(IComputeGraph computeGraph, IEncoder encoder, IModel modelMetaData, ShuffleEnums shuffleType,
IWeightTensor srcEmbedding, IWeightTensor posEmbeddings, IWeightTensor segmentEmbedding, List<List<int>> srcSntsIds, float[] originalSrcLengths, bool amp = false)
{
// Reset networks
Expand All @@ -68,26 +68,6 @@ static public IWeightTensor Run(IComputeGraph computeGraph, ISntPairBatch sntPai
return encOutput;
}

public static IWeightTensor BuildTensorForSourceTokenGroupAt(IComputeGraph computeGraph, ISntPairBatch sntPairBatch, ShuffleEnums shuffleType, IEncoder encoder, IModel modelMetaData, IWeightTensor srcEmbedding, IWeightTensor posEmbeddings, IWeightTensor segmentEmbedding, int groupId)
{
var contextTokens = InsertCLSToken(sntPairBatch.GetSrcTokens(groupId));
var originalSrcContextLength = BuildInTokens.PadSentences(contextTokens);
var contextTokenIds = modelMetaData.SrcVocab.GetWordIndex(contextTokens);

IWeightTensor encContextOutput = InnerRunner(computeGraph, contextTokenIds, originalSrcContextLength, shuffleType, encoder, modelMetaData, srcEmbedding, posEmbeddings, segmentEmbedding);

int contextPaddedLen = contextTokens[0].Count;
float[] contextCLSIdxs = new float[sntPairBatch.BatchSize];
for (int j = 0; j < sntPairBatch.BatchSize; j++)
{
contextCLSIdxs[j] = j * contextPaddedLen;
}

var indice = computeGraph.CreateTensorWeights(new long[] { contextCLSIdxs.Length, 1 }, contextCLSIdxs);
IWeightTensor contextCLSOutput = computeGraph.IndexSelect(encContextOutput, indice);
return contextCLSOutput;
}

static private IWeightTensor InnerRunner(IComputeGraph computeGraph, List<List<int>> srcTokensList, float[] originalSrcLengths, ShuffleEnums shuffleType, IEncoder encoder, IModel modelMetaData,
IWeightTensor srcEmbedding, IWeightTensor posEmbedding, IWeightTensor segmentEmbedding, bool amp = false)
{
Expand Down
4 changes: 2 additions & 2 deletions Seq2SeqSharp/Applications/GPT.cs
Original file line number Diff line number Diff line change
Expand Up @@ -195,13 +195,13 @@ private List<List<List<string>>> CombineInputOutput(List<List<string>> input, Li
/// <param name="tgtSnts">A batch of output tokenized sentences in target side</param>
/// <param name="deviceIdIdx">The index of current device</param>
/// <returns>The cost of forward part</returns>
public override List<NetworkResult> RunForwardOnSingleDevice(IComputeGraph computeGraph, ISntPairBatch sntPairBatch, DecodingOptions decodingOptions, bool isTraining)
public override List<NetworkResult> RunForwardOnSingleDevice(IComputeGraph computeGraph, IPairBatch sntPairBatch, DecodingOptions decodingOptions, bool isTraining)
{
(var decoder, var decoderFFLayer, var tgtEmbedding, var segmentEmbedding, var posEmbeddings) = GetNetworksOnDeviceAt(computeGraph.DeviceId);
List<NetworkResult> nrs = new List<NetworkResult>();

// Generate output decoder sentences
var tgtSnts = sntPairBatch.GetSrcTokens(0);
var tgtSnts = sntPairBatch.GetSrcTokens();
int batchSize = tgtSnts.Count;
var tgtTokensList = m_modelMetaData.TgtVocab.GetWordIndex(tgtSnts);
NetworkResult nr = new NetworkResult();
Expand Down
Loading

0 comments on commit f61642f

Please sign in to comment.