Skip to content

Commit

Permalink
#1. Support model vector quantization. Model size will be reduce to 1…
Browse files Browse the repository at this point in the history
…/4 size of orginial model

#2. Remove NO_SUPPORT_PARALLEL_LIB tag
#3. Change L-BFGS history parameter array type from double to FixedBigArray in order to support larger feature size
  • Loading branch information
zhongkaifu committed Feb 13, 2016
1 parent e0fbe8a commit f329ca1
Show file tree
Hide file tree
Showing 22 changed files with 233 additions and 791 deletions.
5 changes: 4 additions & 1 deletion CRFSharpConsole/CRFSharpConsole.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<ItemGroup>
<Reference Include="AdvUtils, Version=1.0.0.0, Culture=neutral, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
<HintPath>..\dll\AdvUtils.dll</HintPath>
</Reference>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" />
Expand All @@ -61,7 +65,6 @@
<Compile Include="DecoderConsole.cs" />
<Compile Include="EncoderConsole.cs" />
<Compile Include="Program.cs" />
<Compile Include="ShrinkConsole.cs" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Core\CRFSharpWrapper\CRFSharpWrapper.csproj">
Expand Down
5 changes: 4 additions & 1 deletion CRFSharpConsole/CRFSharpConsole.csproj.user
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x86'">
<StartArguments>-encode -modelfile model.dat -trainfile input.txt -template template.txt -thread 4</StartArguments>
<StartArguments>-encode -template template.1 -trainfile train.data -modelfile ner.model -maxiter 100 -minfeafreq 1 -mindiff 0.001 -thread 4 -debug 1 -slotrate 0.95</StartArguments>
<StartWorkingDirectory>C:\Users\Admin\Source\Repos\CRFSharp2\CRFSharpConsole\bin\Release\</StartWorkingDirectory>
</PropertyGroup>
<PropertyGroup>
Expand All @@ -14,4 +14,7 @@
<FallbackCulture>en-US</FallbackCulture>
<VerifyUploadedFiles>false</VerifyUploadedFiles>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x86'">
<StartArguments>-encode -template template.txt -trainfile tcorpus.txt -modelfile pos.model -maxiter 100 -minfeafreq 1 -mindiff 0.0001 -thread 4 -debug 1 -slotrate 0.95</StartArguments>
</PropertyGroup>
</Project>
14 changes: 7 additions & 7 deletions CRFSharpConsole/DecoderConsole.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
using CRFSharp;
using System.Threading.Tasks;
using System.Collections.Concurrent;
using AdvUtils;

namespace CRFSharpConsole
{
Expand Down Expand Up @@ -113,13 +114,13 @@ bool Decode(CRFSharpWrapper.DecoderArgs options)

if (File.Exists(options.strInputFileName) == false)
{
Console.WriteLine("FAILED: Open {0} file failed.", options.strInputFileName);
Logger.WriteLine("FAILED: Open {0} file failed.", options.strInputFileName);
return false;
}

if (File.Exists(options.strModelFileName) == false)
{
Console.WriteLine("FAILED: Open {0} file failed.", options.strModelFileName);
Logger.WriteLine("FAILED: Open {0} file failed.", options.strModelFileName);
return false;
}

Expand All @@ -137,11 +138,10 @@ bool Decode(CRFSharpWrapper.DecoderArgs options)

//Create CRFSharp wrapper instance. It's a global instance
var crfWrapper = new CRFSharpWrapper.Decoder();
//Load model from file
if (crfWrapper.LoadModel(options.strModelFileName) == false)
{
return false;
}

//Load encoded model from file
Logger.WriteLine("Loading model from {0}", options.strModelFileName);
crfWrapper.LoadModel(options.strModelFileName);

var queueRecords = new ConcurrentQueue<List<List<string>>>();
var queueSegRecords = new ConcurrentQueue<List<List<string>>>();
Expand Down
16 changes: 9 additions & 7 deletions CRFSharpConsole/EncoderConsole.cs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using CRFSharpWrapper;
using AdvUtils;

namespace CRFSharpConsole
{
Expand Down Expand Up @@ -79,6 +77,9 @@ public void Run(string [] args)
case "retrainmodel":
options.strRetrainModelFileName = value;
break;
case "vq":
options.bVQ = (int.Parse(value) != 0) ? true : false;
break;
case "regtype":
if (value.ToLower().Trim() == "l1")
{
Expand All @@ -90,15 +91,15 @@ public void Run(string [] args)
}
else
{
Console.WriteLine("Invalidated regularization type");
Logger.WriteLine("Invalidated regularization type");
Usage();
return;
}
break;
default:
var cc = Console.ForegroundColor;
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine("No supported {0} parameter, exit", key);
Logger.WriteLine("No supported {0} parameter, exit", key);
Console.ForegroundColor = cc;
Usage();
return;
Expand All @@ -108,7 +109,7 @@ public void Run(string [] args)
{
var cc = Console.ForegroundColor;
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine("{0} is invalidated parameter.", key);
Logger.WriteLine("{0} is invalidated parameter.", key);
Console.ForegroundColor = cc;
Usage();
return;
Expand Down Expand Up @@ -146,6 +147,7 @@ private static void Usage()
Console.WriteLine("\t-regtype <string> : regularization type (L1 and L2). L1 will generate a sparse model. Default is L2");
Console.WriteLine("\t-hugelexmem <int> : build lexical dictionary in huge mode and shrinking start when used memory reaches this value. This mode can build more lexical items, but slowly. Value ranges [1,100] and default is disabled.");
Console.WriteLine("\t-retrainmodel <string> : the existed model for re-training.");
Console.WriteLine("\t-vq <int> : vector quantization value (0/1). Default value is 1");
Console.WriteLine("\t-debug <int> : debug level, default value is 1");
Console.WriteLine("\t 0 - no debug information output");
Console.WriteLine("\t 1 - only output raw lexical dictionary for feature set");
Expand All @@ -155,7 +157,7 @@ private static void Usage()
Console.WriteLine("Note: -hugelexmem is only used for special task, and it is not recommended for common task, since it costs lots of time for memory shrink in order to load more lexical features into memory");
Console.WriteLine();
Console.WriteLine("A command line example as follows:");
Console.WriteLine("\tCRFSharpConsole.exe -encode -template template.1 -trainfile ner.train -modelfile ner.model -maxiter 100 -minfeafreq 1 -mindiff 0.0001 -thread 4 -debug 1 -slotrate 0.95");
Console.WriteLine("\tCRFSharpConsole.exe -encode -template template.1 -trainfile ner.train -modelfile ner.model -maxiter 100 -minfeafreq 1 -mindiff 0.0001 -thread 4 -debug 2 -vq 1 -slotrate 0.95");
}
}
}
43 changes: 7 additions & 36 deletions CRFSharpConsole/Program.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using AdvUtils;

namespace CRFSharpConsole
{
Expand All @@ -10,14 +8,15 @@ class Program
static void Usage()
{
Console.WriteLine("Linear-chain CRF encoder & decoder by Zhongkai Fu ([email protected])");
Console.WriteLine("CRFSharpConsole [parameter list...]");
Console.WriteLine(" -encode [parameter list...] - Encode CRF model from given training corpus");
Console.WriteLine(" -decode [parameter list...] - Decode CRF model to label text");
Console.WriteLine(" -shrink [parameter list...] - Shrink encoded CRF model");
Console.WriteLine("CRFSharpConsole.exe [parameters list...]");
Console.WriteLine(" -encode [parameters list...] - Encode CRF model from training corpus");
Console.WriteLine(" -decode [parameters list...] - Decode CRF model on test corpus");
}

static void Main(string[] args)
{
Logger.LogFile = "CRFSharpConsole.log";

if (args.Length < 1)
{
Usage();
Expand All @@ -26,7 +25,6 @@ static void Main(string[] args)

var bEncoder = false;
var bDecoder = false;
var bShrink = false;

for (int index = 0; index < args.Length; index++)
{
Expand All @@ -50,21 +48,15 @@ static void Main(string[] args)
{
bDecoder = true;
}
if (strType == "shrink")
{
bShrink = true;
}
}

//Invalidated parameter
if (bEncoder == false && bDecoder == false && bShrink == false)
if (bEncoder == false && bDecoder == false)
{
Usage();
return;
}

//try
//{
if (bEncoder == true)
{
var encoderConsole = new EncoderConsole();
Expand All @@ -75,31 +67,10 @@ static void Main(string[] args)
var decoderConsole = new DecoderConsole();
decoderConsole.Run(args);
}
else if (bShrink == true)
{
var shrinkConsole = new ShrinkConsole();
shrinkConsole.Run(args);
}
else
{
Usage();
}
// }
//catch (System.AggregateException err)
//{
// Console.WriteLine("Error Message : {0}", err.Message);
// Console.WriteLine("Call stack : {0}", err.StackTrace);
// Console.WriteLine("Inner Exception : {0}", err.InnerException);
// foreach (Exception exp in err.InnerExceptions)
// {
// Console.WriteLine("Inner Exception in Collect: {0}", exp);
// }
//}
//catch (System.Exception err)
//{
// Console.WriteLine("Error Message : {0}", err.Message);
// Console.WriteLine("Call stack : {0}", err.StackTrace);
//}
}
}
}
23 changes: 0 additions & 23 deletions CRFSharpConsole/ShrinkConsole.cs

This file was deleted.

1 change: 0 additions & 1 deletion Core/CRFSharp/CRFSharp.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@
<Compile Include="encoder\Mcsrch.cs" />
<Compile Include="encoder\ModelWritter.cs" />
<Compile Include="encoder\DefaultFeatureLexicalDict.cs" />
<Compile Include="encoder\OrderableListPartitioner.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
Expand Down
26 changes: 0 additions & 26 deletions Core/CRFSharp/base/Utils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ public class Utils
public const int DEFAULT_CRF_MAX_WORD_NUM = 100;

public const int MODEL_TYPE_NORM = 100;
public const int MODEL_TYPE_SHRINKED = 101;


public const int ERROR_INVALIDATED_FEATURE = -8;
Expand Down Expand Up @@ -123,31 +122,6 @@ public static bool is_heap_empty(Heap H)
return H.size == 0;
}

public static QueueElement find_min(Heap H)
{
return H.elem_ptr_list[1];
}

public static void heap_clear(ref Heap H)
{
if (H == null)
{
return;
}
if (H.elem_list != null)
{
H.elem_list = null;
}
if (H.elem_ptr_list != null)
{
H.elem_ptr_list = null;
}
if (H != null)
{
H = null;
}
}

public static void heap_reset(Heap H)
{
if (H != null)
Expand Down
Loading

0 comments on commit f329ca1

Please sign in to comment.