From c172f02afbecc3a8e624d706caedfc69838e7ed8 Mon Sep 17 00:00:00 2001 From: Bert Hekman Date: Thu, 9 Feb 2017 21:46:34 +0100 Subject: [PATCH 01/12] Search for globally available dictionaries (on non-windows systems) --- src/spellchecker_hunspell.cc | 124 ++++++++++++++++++++++++++++++++++- src/spellchecker_hunspell.h | 3 + 2 files changed, 124 insertions(+), 3 deletions(-) diff --git a/src/spellchecker_hunspell.cc b/src/spellchecker_hunspell.cc index 0ca629b..cef1d7f 100644 --- a/src/spellchecker_hunspell.cc +++ b/src/spellchecker_hunspell.cc @@ -1,9 +1,32 @@ #include #include #include +#include #include "../vendor/hunspell/src/hunspell/hunspell.hxx" #include "spellchecker_hunspell.h" +#ifdef WIN32 + +#define SEARCH_PATHS "C:\\Hunspell\\" +#define DIR_SEPARATOR "\\" +#define PATH_SEPARATOR ";" + +#else + +// Not windows +#include +#include + +#define SEARCH_PATHS \ + "/usr/share/hunspell:" \ + "/usr/share/myspell:" \ + "/usr/share/myspell/dicts:" \ + "/Library/Spelling" +#define DIR_SEPARATOR "/" +#define PATH_SEPARATOR ":" + +#endif + namespace spellchecker { HunspellSpellchecker::HunspellSpellchecker() : hunspell(NULL), transcoder(NewTranscoder()) { } @@ -29,8 +52,13 @@ bool HunspellSpellchecker::SetDictionary(const std::string& language, const std: std::string lang = language; std::replace(lang.begin(), lang.end(), '-', '_'); - std::string affixpath = dirname + "/" + lang + ".aff"; - std::string dpath = dirname + "/" + lang + ".dic"; + std::string search_path; + search_path.append(dirname); + search_path.append(PATH_SEPARATOR); + search_path.append(SEARCH_PATHS); + + std::string affixpath = FindDictionary(search_path, lang, ".aff"); + std::string dpath = FindDictionary(search_path, lang, ".dic"); // TODO: This code is almost certainly jacked on Win32 for non-ASCII paths FILE* handle = fopen(dpath.c_str(), "r"); @@ -44,7 +72,12 @@ bool HunspellSpellchecker::SetDictionary(const std::string& language, const std: } std::vector HunspellSpellchecker::GetAvailableDictionaries(const std::string& path) { - return std::vector(); + std::string search_path; + search_path.assign(path); + search_path.append(PATH_SEPARATOR); + search_path.append(SEARCH_PATHS); + + return SearchAvailableDictionaries(search_path); } bool HunspellSpellchecker::IsMisspelled(const std::string& word) { @@ -141,4 +174,89 @@ std::vector HunspellSpellchecker::GetCorrectionsForMisspelling(cons return corrections; } +std::vector HunspellSpellchecker::SearchAvailableDictionaries(const std::string& path) { + const char * c_path = path.c_str(); + char * begin = const_cast(c_path); // TODO: Do we need this? + char * end = begin; + + std::vector my_list; + + while (1) { + while ( ! ((*end == *PATH_SEPARATOR) || (*end == '\0'))) { + end++; + } + + std::string search_path; + search_path.assign(begin, end - begin); + search_path.append(DIR_SEPARATOR); + +#ifdef WIN32 + // TODO: Windows compatibility? +#else + DIR* dir = opendir(search_path.c_str()); + + if (dir) { + struct dirent* de; + while ((de = readdir(dir))) { + std::string filename(de->d_name); + + if (filename.size() > 4 && filename.compare(filename.size() - 4, 4, ".dic") == 0) { + my_list.push_back(filename.substr(0, filename.size() - 4)); + } + else if (filename.size() > 7 && filename.compare(filename.size() - 7, 7, ".dic.hz") == 0) { + my_list.push_back(filename.substr(0, filename.size() - 7)); + } + } + + closedir(dir); + } +#endif + + if (*end == '\0') { + return my_list; + } + + end++; + begin = end; + } +} + +std::string HunspellSpellchecker::FindDictionary(const std::string& path, const std::string& language, const std::string& extension) { + const char * c_path = path.c_str(); + char * begin = const_cast(c_path); // TODO: Do we need this? + char * end = begin; + + while (1) { + while ( ! ((*end == *PATH_SEPARATOR) || (*end == '\0'))) { + end++; + } + + std::string file_path; + file_path.assign(begin, end - begin); + file_path.append(DIR_SEPARATOR); + file_path.append(language); + file_path.append(extension); + + std::ifstream f; + f.open(file_path, std::ios_base::in); + if (f.is_open()) { + return file_path; + } + + file_path.append(".hz"); + + f.open(file_path, std::ios_base::in); + if (f.is_open()) { + return file_path; + } + + if (*end == '\0') { + return NULL; + } + + end++; + begin = end; + } +} + } // namespace spellchecker diff --git a/src/spellchecker_hunspell.h b/src/spellchecker_hunspell.h index bcc9d65..8114b39 100644 --- a/src/spellchecker_hunspell.h +++ b/src/spellchecker_hunspell.h @@ -24,6 +24,9 @@ class HunspellSpellchecker : public SpellcheckerImplementation { private: Hunspell* hunspell; Transcoder *transcoder; + + std::vector SearchAvailableDictionaries(const std::string& path); + std::string FindDictionary(const std::string& path, const std::string& language, const std::string& extension); }; } // namespace spellchecker From f458574ed4cc2e295d3d155bc30d8ad4e9f1cc90 Mon Sep 17 00:00:00 2001 From: Bert Hekman Date: Thu, 9 Feb 2017 22:12:32 +0100 Subject: [PATCH 02/12] Simplify string concatenation --- src/spellchecker_hunspell.cc | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/spellchecker_hunspell.cc b/src/spellchecker_hunspell.cc index cef1d7f..a5e7160 100644 --- a/src/spellchecker_hunspell.cc +++ b/src/spellchecker_hunspell.cc @@ -52,10 +52,7 @@ bool HunspellSpellchecker::SetDictionary(const std::string& language, const std: std::string lang = language; std::replace(lang.begin(), lang.end(), '-', '_'); - std::string search_path; - search_path.append(dirname); - search_path.append(PATH_SEPARATOR); - search_path.append(SEARCH_PATHS); + std::string search_path = dirname + PATH_SEPARATOR + SEARCH_PATHS; std::string affixpath = FindDictionary(search_path, lang, ".aff"); std::string dpath = FindDictionary(search_path, lang, ".dic"); @@ -72,10 +69,7 @@ bool HunspellSpellchecker::SetDictionary(const std::string& language, const std: } std::vector HunspellSpellchecker::GetAvailableDictionaries(const std::string& path) { - std::string search_path; - search_path.assign(path); - search_path.append(PATH_SEPARATOR); - search_path.append(SEARCH_PATHS); + std::string search_path = path + PATH_SEPARATOR + SEARCH_PATHS; return SearchAvailableDictionaries(search_path); } From 1d86e8bf32d4021099b980b8be9a3cef66bc36fd Mon Sep 17 00:00:00 2001 From: Bert Hekman Date: Thu, 9 Feb 2017 22:21:58 +0100 Subject: [PATCH 03/12] Fix: Correctly handle dictionaries which cannot be found --- src/spellchecker_hunspell.cc | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/spellchecker_hunspell.cc b/src/spellchecker_hunspell.cc index a5e7160..660cf15 100644 --- a/src/spellchecker_hunspell.cc +++ b/src/spellchecker_hunspell.cc @@ -57,12 +57,9 @@ bool HunspellSpellchecker::SetDictionary(const std::string& language, const std: std::string affixpath = FindDictionary(search_path, lang, ".aff"); std::string dpath = FindDictionary(search_path, lang, ".dic"); - // TODO: This code is almost certainly jacked on Win32 for non-ASCII paths - FILE* handle = fopen(dpath.c_str(), "r"); - if (!handle) { + if (dpath.compare("") == 0) { return false; } - fclose(handle); hunspell = new Hunspell(affixpath.c_str(), dpath.c_str()); return true; @@ -245,7 +242,7 @@ std::string HunspellSpellchecker::FindDictionary(const std::string& path, const } if (*end == '\0') { - return NULL; + return ""; } end++; From ea396b997ee5514d094a7f844b10602dc31740fd Mon Sep 17 00:00:00 2001 From: Bert Hekman Date: Thu, 9 Feb 2017 22:27:33 +0100 Subject: [PATCH 04/12] Remove guard in spec which disables getAvailableDictionaries test on linux, CI or when hunspell is preferred --- spec/spellchecker-spec.coffee | 4 ---- 1 file changed, 4 deletions(-) diff --git a/spec/spellchecker-spec.coffee b/spec/spellchecker-spec.coffee index cc1013c..ca706f7 100644 --- a/spec/spellchecker-spec.coffee +++ b/spec/spellchecker-spec.coffee @@ -158,10 +158,6 @@ describe "SpellChecker", -> @fixture.setDictionary defaultLanguage, dictionaryDirectory it "returns an array of string dictionary names", -> - # NB: getAvailableDictionaries is nop'ped in hunspell and it also doesn't - # work inside Appveyor's CI environment - return if process.platform is 'linux' or process.env.CI or process.env.SPELLCHECKER_PREFER_HUNSPELL - dictionaries = @fixture.getAvailableDictionaries() expect(Array.isArray(dictionaries)).toBe true From 83535d4283f7c43dd27cc423bddc75ecaed4905d Mon Sep 17 00:00:00 2001 From: Bert Hekman Date: Thu, 9 Feb 2017 22:29:35 +0100 Subject: [PATCH 05/12] Fix typo in spec --- spec/spellchecker-spec.coffee | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/spellchecker-spec.coffee b/spec/spellchecker-spec.coffee index ca706f7..547cf31 100644 --- a/spec/spellchecker-spec.coffee +++ b/spec/spellchecker-spec.coffee @@ -164,7 +164,7 @@ describe "SpellChecker", -> expect(dictionaries.length).toBeGreaterThan 0 for dictionary in dictionaries.length expect(typeof dictionary).toBe 'string' - expect(diction.length).toBeGreaterThan 0 + expect(dictionary.length).toBeGreaterThan 0 describe ".setDictionary(lang, dictDirectory)", -> it "sets the spell checker's language, and dictionary directory", -> From f3210bdb47d038c43bee7f39e4622a0666d6c97d Mon Sep 17 00:00:00 2001 From: Bert Hekman Date: Thu, 9 Feb 2017 22:48:48 +0100 Subject: [PATCH 06/12] Fix bug which prevented the path argument to be passed through; Add some tests for getAvailableDictionaries; Add missing semicolon --- lib/spellchecker.js | 2 +- spec/spellchecker-spec.coffee | 7 +++++-- src/main.cc | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/spellchecker.js b/lib/spellchecker.js index 90c72e0..3f754fd 100644 --- a/lib/spellchecker.js +++ b/lib/spellchecker.js @@ -69,7 +69,7 @@ var getDictionaryPath = function() { } catch (error) { } return dict; -} +}; module.exports = { setDictionary: setDictionary, diff --git a/spec/spellchecker-spec.coffee b/spec/spellchecker-spec.coffee index 547cf31..4ba79fd 100644 --- a/spec/spellchecker-spec.coffee +++ b/spec/spellchecker-spec.coffee @@ -158,10 +158,13 @@ describe "SpellChecker", -> @fixture.setDictionary defaultLanguage, dictionaryDirectory it "returns an array of string dictionary names", -> - dictionaries = @fixture.getAvailableDictionaries() + dictionaries = @fixture.getAvailableDictionaries dictionaryDirectory expect(Array.isArray(dictionaries)).toBe true - expect(dictionaries.length).toBeGreaterThan 0 + expect(dictionaries.length).toBeGreaterThan 3 + expect(dictionaries).toContain('en_US'); + expect(dictionaries).toContain('de_DE_frami'); + expect(dictionaries).toContain('fr'); for dictionary in dictionaries.length expect(typeof dictionary).toBe 'string' expect(dictionary.length).toBeGreaterThan 0 diff --git a/src/main.cc b/src/main.cc index 673837e..6c75dea 100644 --- a/src/main.cc +++ b/src/main.cc @@ -120,7 +120,7 @@ class Spellchecker : public Nan::ObjectWrap { std::string path = "."; if (info.Length() > 0) { - std::string path = *String::Utf8Value(info[0]); + path = *String::Utf8Value(info[0]); } std::vector dictionaries = From 433afd3f774618c28590121edde52f82a1ea2b6a Mon Sep 17 00:00:00 2001 From: Bert Hekman Date: Thu, 9 Feb 2017 23:02:32 +0100 Subject: [PATCH 07/12] Make spec for getAvailableDictionaries when using hunspell/linux more specific --- spec/spellchecker-spec.coffee | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/spec/spellchecker-spec.coffee b/spec/spellchecker-spec.coffee index 4ba79fd..3c0fa41 100644 --- a/spec/spellchecker-spec.coffee +++ b/spec/spellchecker-spec.coffee @@ -161,13 +161,21 @@ describe "SpellChecker", -> dictionaries = @fixture.getAvailableDictionaries dictionaryDirectory expect(Array.isArray(dictionaries)).toBe true + expect(dictionaries.length).toBeGreaterThan 0 + for dictionary in dictionaries.length + expect(typeof dictionary).toBe 'string' + expect(dictionary.length).toBeGreaterThan 0 + + it "returns the right dictionary names when using hunspell on linux", -> + return if process.platform is not 'linux' + + dictionaries = @fixture.getAvailableDictionaries dictionaryDirectory + expect(Array.isArray(dictionaries)).toBe true + expect(dictionaries.length).toBeGreaterThan 3 expect(dictionaries).toContain('en_US'); expect(dictionaries).toContain('de_DE_frami'); expect(dictionaries).toContain('fr'); - for dictionary in dictionaries.length - expect(typeof dictionary).toBe 'string' - expect(dictionary.length).toBeGreaterThan 0 describe ".setDictionary(lang, dictDirectory)", -> it "sets the spell checker's language, and dictionary directory", -> From a91be38d8055ccef939a105e9cd5a13c438a8185 Mon Sep 17 00:00:00 2001 From: Bert Hekman Date: Thu, 9 Feb 2017 23:07:12 +0100 Subject: [PATCH 08/12] Fix linux/hunspell spec --- spec/spellchecker-spec.coffee | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/spellchecker-spec.coffee b/spec/spellchecker-spec.coffee index 3c0fa41..3c6b1b3 100644 --- a/spec/spellchecker-spec.coffee +++ b/spec/spellchecker-spec.coffee @@ -167,7 +167,7 @@ describe "SpellChecker", -> expect(dictionary.length).toBeGreaterThan 0 it "returns the right dictionary names when using hunspell on linux", -> - return if process.platform is not 'linux' + return if not (process.platform is 'linux') dictionaries = @fixture.getAvailableDictionaries dictionaryDirectory expect(Array.isArray(dictionaries)).toBe true From 6d7275f405235806d3cd1b2e2a1f674b76eac3ba Mon Sep 17 00:00:00 2001 From: Bert Hekman Date: Thu, 9 Feb 2017 23:11:05 +0100 Subject: [PATCH 09/12] Disable getAvailableDictionaries on windows with hunspell preferred --- spec/spellchecker-spec.coffee | 3 +++ 1 file changed, 3 insertions(+) diff --git a/spec/spellchecker-spec.coffee b/spec/spellchecker-spec.coffee index 3c6b1b3..c5fc68e 100644 --- a/spec/spellchecker-spec.coffee +++ b/spec/spellchecker-spec.coffee @@ -158,6 +158,9 @@ describe "SpellChecker", -> @fixture.setDictionary defaultLanguage, dictionaryDirectory it "returns an array of string dictionary names", -> + # NB: getAvailableDictionaries is nop'ped in hunspell on windows + return if process.platform is 'win32' and process.env.SPELLCHECKER_PREFER_HUNSPELL + dictionaries = @fixture.getAvailableDictionaries dictionaryDirectory expect(Array.isArray(dictionaries)).toBe true From 21b5ed9926865ac839830d7217646ff8f01ad728 Mon Sep 17 00:00:00 2001 From: Bert Hekman Date: Fri, 10 Feb 2017 00:20:51 +0100 Subject: [PATCH 10/12] Add windows compatibility when using getAvailableDictionaries in hunspell --- spec/spellchecker-spec.coffee | 5 +---- src/spellchecker_hunspell.cc | 26 +++++++++++++++++++++++++- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/spec/spellchecker-spec.coffee b/spec/spellchecker-spec.coffee index c5fc68e..f20b1e2 100644 --- a/spec/spellchecker-spec.coffee +++ b/spec/spellchecker-spec.coffee @@ -158,9 +158,6 @@ describe "SpellChecker", -> @fixture.setDictionary defaultLanguage, dictionaryDirectory it "returns an array of string dictionary names", -> - # NB: getAvailableDictionaries is nop'ped in hunspell on windows - return if process.platform is 'win32' and process.env.SPELLCHECKER_PREFER_HUNSPELL - dictionaries = @fixture.getAvailableDictionaries dictionaryDirectory expect(Array.isArray(dictionaries)).toBe true @@ -170,7 +167,7 @@ describe "SpellChecker", -> expect(dictionary.length).toBeGreaterThan 0 it "returns the right dictionary names when using hunspell on linux", -> - return if not (process.platform is 'linux') + return if not (process.platform is 'linux') and not (process.platform is 'win32' and process.env.SPELLCHECKER_PREFER_HUNSPELL) dictionaries = @fixture.getAvailableDictionaries dictionaryDirectory expect(Array.isArray(dictionaries)).toBe true diff --git a/src/spellchecker_hunspell.cc b/src/spellchecker_hunspell.cc index 660cf15..4bf6f8e 100644 --- a/src/spellchecker_hunspell.cc +++ b/src/spellchecker_hunspell.cc @@ -7,6 +7,8 @@ #ifdef WIN32 +#include + #define SEARCH_PATHS "C:\\Hunspell\\" #define DIR_SEPARATOR "\\" #define PATH_SEPARATOR ";" @@ -182,7 +184,29 @@ std::vector HunspellSpellchecker::SearchAvailableDictionaries(const search_path.append(DIR_SEPARATOR); #ifdef WIN32 - // TODO: Windows compatibility? + search_path.append("*"); + + WIN32_FIND_DATA search_data; + memset(&search_data, 0, sizeof(WIN32_FIND_DATA)); + + HANDLE handle = FindFirstFile(search_path.c_str(), &search_data); + + while (handle != INVALID_HANDLE_VALUE) { + std::string filename(search_data.cFileName); + + if (filename.size() > 4 && filename.compare(filename.size() - 4, 4, ".dic") == 0) { + my_list.push_back(filename.substr(0, filename.size() - 4)); + } + else if (filename.size() > 7 && filename.compare(filename.size() - 7, 7, ".dic.hz") == 0) { + my_list.push_back(filename.substr(0, filename.size() - 7)); + } + + if (FindNextFile(handle, &search_data) == FALSE) { + break; + } + } + + FindClose(handle); #else DIR* dir = opendir(search_path.c_str()); From 08c0acd49dbb02bcbf502ddba2db594f75d7077f Mon Sep 17 00:00:00 2001 From: Bert Hekman Date: Fri, 10 Feb 2017 16:32:11 +0100 Subject: [PATCH 11/12] Force en_US when testing corrections (should fix tests on Mac) --- spec/spellchecker-spec.coffee | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/spellchecker-spec.coffee b/spec/spellchecker-spec.coffee index f20b1e2..03d2262 100644 --- a/spec/spellchecker-spec.coffee +++ b/spec/spellchecker-spec.coffee @@ -107,7 +107,7 @@ describe "SpellChecker", -> describe ".getCorrectionsForMisspelling(word)", -> beforeEach -> @fixture = new Spellchecker() - @fixture.setDictionary defaultLanguage, dictionaryDirectory + @fixture.setDictionary 'en_US', dictionaryDirectory it "returns an array of possible corrections", -> corrections = @fixture.getCorrectionsForMisspelling('worrd') From e2dcdb4b72b25ebf189025aa4b67465b323b303d Mon Sep 17 00:00:00 2001 From: Bert Hekman Date: Sat, 11 Feb 2017 16:36:09 +0100 Subject: [PATCH 12/12] Use std::istringstream and std::getline to iterate through the paths --- src/spellchecker_hunspell.cc | 48 +++++++++--------------------------- 1 file changed, 11 insertions(+), 37 deletions(-) diff --git a/src/spellchecker_hunspell.cc b/src/spellchecker_hunspell.cc index 4bf6f8e..5e98fdf 100644 --- a/src/spellchecker_hunspell.cc +++ b/src/spellchecker_hunspell.cc @@ -2,6 +2,7 @@ #include #include #include +#include #include "../vendor/hunspell/src/hunspell/hunspell.hxx" #include "spellchecker_hunspell.h" @@ -11,7 +12,7 @@ #define SEARCH_PATHS "C:\\Hunspell\\" #define DIR_SEPARATOR "\\" -#define PATH_SEPARATOR ";" +#define PATH_SEPARATOR ';' #else @@ -25,7 +26,7 @@ "/usr/share/myspell/dicts:" \ "/Library/Spelling" #define DIR_SEPARATOR "/" -#define PATH_SEPARATOR ":" +#define PATH_SEPARATOR ':' #endif @@ -168,19 +169,10 @@ std::vector HunspellSpellchecker::GetCorrectionsForMisspelling(cons } std::vector HunspellSpellchecker::SearchAvailableDictionaries(const std::string& path) { - const char * c_path = path.c_str(); - char * begin = const_cast(c_path); // TODO: Do we need this? - char * end = begin; - std::vector my_list; + std::istringstream path_stream(path); - while (1) { - while ( ! ((*end == *PATH_SEPARATOR) || (*end == '\0'))) { - end++; - } - - std::string search_path; - search_path.assign(begin, end - begin); + for (std::string search_path; std::getline(path_stream, search_path, PATH_SEPARATOR); ) { search_path.append(DIR_SEPARATOR); #ifdef WIN32 @@ -226,28 +218,15 @@ std::vector HunspellSpellchecker::SearchAvailableDictionaries(const closedir(dir); } #endif - - if (*end == '\0') { - return my_list; - } - - end++; - begin = end; } + + return my_list; } std::string HunspellSpellchecker::FindDictionary(const std::string& path, const std::string& language, const std::string& extension) { - const char * c_path = path.c_str(); - char * begin = const_cast(c_path); // TODO: Do we need this? - char * end = begin; + std::istringstream path_stream(path); - while (1) { - while ( ! ((*end == *PATH_SEPARATOR) || (*end == '\0'))) { - end++; - } - - std::string file_path; - file_path.assign(begin, end - begin); + for (std::string file_path; std::getline(path_stream, file_path, PATH_SEPARATOR); ) { file_path.append(DIR_SEPARATOR); file_path.append(language); file_path.append(extension); @@ -264,14 +243,9 @@ std::string HunspellSpellchecker::FindDictionary(const std::string& path, const if (f.is_open()) { return file_path; } - - if (*end == '\0') { - return ""; - } - - end++; - begin = end; } + + return ""; } } // namespace spellchecker