Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-irbis committed Jan 30, 2019
1 parent df35781 commit 0a953b5
Show file tree
Hide file tree
Showing 71 changed files with 709 additions and 554 deletions.
6 changes: 3 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ env:
- DEADLINKS_VERS=0.3.0
- RUSTFLAGS="-C link-dead-code"
- RUST_LOG=off
- TARPAULIN_VERS=0.6.8
- TARPAULIN_VERS=0.7.0

install:
- cargo install --list
Expand All @@ -52,7 +52,7 @@ script: skip
jobs:
include:
- stage: test
rust: 1.27.0
rust: 1.31.0
script:
- cargo test --verbose --all -- --test-threads=1

Expand Down Expand Up @@ -106,7 +106,7 @@ jobs:
- cargo clippy -- -D warnings

- stage: quality
rust: nightly-2018-11-18
rust: nightly-2019-01-29
env:
- FEATURE=cov
install:
Expand Down
6 changes: 4 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ readme = "README.md"
homepage = "https://github.com/irbis-labs/rsmorphy"
repository = "https://github.com/irbis-labs/rsmorphy"
documentation = "https://docs.rs/rsmorphy/"
edition = "2018"
publish = true

[badges]
Expand Down Expand Up @@ -41,9 +42,10 @@ serde = "1.0"
serde_derive = "1.0"
serde_json = "1.0"
string_cache = "0.7"
unicode_categories = "0.1"
uc = { version = "0.1", package = "unicode_categories" }

rsmorphy-dict-ru = { version = "0.1", path = "./dict/ru" }
dict-ru = { version = "0.1", package = "rsmorphy-dict-ru", path = "./dict/ru" }
#dict-uk = { version = "0.1", package = "rsmorphy-dict-uk", path = "./dict/uk" }


[dev-dependencies]
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

[![Build Status](https://travis-ci.org/irbis-labs/rsmorphy.svg)](https://travis-ci.org/irbis-labs/rsmorphy)
[![Coverage Status](https://coveralls.io/repos/github/irbis-labs/rsmorphy/badge.svg?branch=master)](https://coveralls.io/github/irbis-labs/rsmorphy?branch=master)
![Minimal rust version 1.27](https://img.shields.io/badge/rustc-1.27+-green.svg)
![Nightly rust version from August 25, 2018](https://img.shields.io/badge/rustc-nightly_2018--08--25-yellow.svg)
![Minimal rust version 1.32](https://img.shields.io/badge/rustc-1.32+-green.svg)
![Nightly rust version from August 25, 2018](https://img.shields.io/badge/rustc-nightly_2019--01--29-yellow.svg)

[![Join the chat at https://gitter.im/rsmorphy/Lobby](https://badges.gitter.im/rsmorphy/Lobby.svg)](https://gitter.im/rsmorphy/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
[![Waffle.io - Columns and their card count](https://badge.waffle.io/irbis-labs/rsmorphy.svg?columns=inbox,backlog,in%20progress,done)](https://waffle.io/irbis-labs/rsmorphy)
Expand Down
14 changes: 1 addition & 13 deletions dict/ru/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,7 @@ fn main() -> io::Result<()> {
let mut f = File::create("src/release.rs")
.expect("Can't create a file");

let res = Command::new("rustc")
.arg("--version")
.output()
.expect("Can't get rustc version");

let version = from_utf8(&res.stdout)
.expect("Can't convert from utf-8");

writeln!(f, r"pub static RUSTC_VERSION: &str = {:?};", version)
.expect("Can't write to a file");

writeln!(f, r"pub const DICT_PATH: &str = {:?};", dict_path)
.expect("Can't write to a file");
writeln!(f, r"pub const DICT_PATH: &str = {:?};", dict_path)?;

Ok(())
}
7 changes: 4 additions & 3 deletions examples/enc-dec.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
extern crate rsmorphy;

use rsmorphy::{prelude::*, rsmorphy_dict_ru};
use dict_ru;
use rsmorphy::prelude::*;

pub mod util;

use util::print_row_parsed;
use crate::util::print_row_parsed;

fn table(morph: &MorphAnalyzer, s: &str) {
for (i, parsed) in morph.parse(s).into_iter().enumerate() {
Expand All @@ -22,7 +23,7 @@ fn table(morph: &MorphAnalyzer, s: &str) {
}

fn main() {
let morph_ru = MorphAnalyzer::from_file(rsmorphy_dict_ru::DICT_PATH);
let morph_ru = MorphAnalyzer::from_file(dict_ru::DICT_PATH);

// table(&morph_ru, "яблоко");
// table(&morph_ru, "хлеб");
Expand Down
5 changes: 3 additions & 2 deletions examples/inflect.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
extern crate rsmorphy;

use rsmorphy::{prelude::*, rsmorphy_dict_ru};
use dict_ru;
use rsmorphy::prelude::*;

pub mod util;

//use util::print_row_lex;

fn main() {
let morph_ru = MorphAnalyzer::from_file(rsmorphy_dict_ru::DICT_PATH);
let morph_ru = MorphAnalyzer::from_file(dict_ru::DICT_PATH);

//let lex = Lex::from_id(&morph_ru, "ru:d:стали,388,4").unwrap();
//print_row_lex(&morph_ru, 0, &lex.inflect(&morph_ru, &GrammemeSet::from_str("plur,ablt")).unwrap());
Expand Down
7 changes: 4 additions & 3 deletions examples/lexeme.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@ extern crate rsmorphy;
use std::collections::BTreeSet;
use std::iter::FromIterator;

use rsmorphy::{prelude::*, rsmorphy_dict_ru};
use dict_ru;
use rsmorphy::prelude::*;

pub mod util;

use util::{input_loop, print_row_lex};
use crate::util::{input_loop, print_row_lex};

fn print_lexeme(morph: &MorphAnalyzer, lex: &Lex) {
for (i, lex) in lex.iter_lexeme(morph).enumerate() {
Expand All @@ -30,7 +31,7 @@ fn list(morph: &MorphAnalyzer, s: &str) {
}

fn main() {
let morph_ru = MorphAnalyzer::from_file(rsmorphy_dict_ru::DICT_PATH);
let morph_ru = MorphAnalyzer::from_file(dict_ru::DICT_PATH);

input_loop(|word| list(&morph_ru, word));
}
7 changes: 4 additions & 3 deletions examples/parse.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
//extern crate rustyline;
extern crate rsmorphy;

use rsmorphy::{prelude::*, rsmorphy_dict_ru};
use dict_ru;
use rsmorphy::prelude::*;

pub mod util;

use util::{input_loop, print_row_parsed};
use crate::util::{input_loop, print_row_parsed};

fn table(morph: &MorphAnalyzer, s: &str) {
for (i, parsed) in morph.parse(s).into_iter().enumerate() {
Expand All @@ -14,7 +15,7 @@ fn table(morph: &MorphAnalyzer, s: &str) {
}

fn main() {
let morph_ru = MorphAnalyzer::from_file(rsmorphy_dict_ru::DICT_PATH);
let morph_ru = MorphAnalyzer::from_file(dict_ru::DICT_PATH);

input_loop(|word| table(&morph_ru, word))
}
18 changes: 10 additions & 8 deletions src/analyzer/morph.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use std::path::Path;

use container::{ParseResult, SeenSet};
use opencorpora::dictionary::Dictionary;

use analyzer::units::*;
use estimator::SingleTagProbabilityEstimator;
use crate::{
analyzer::units::*,
container::{ParseResult, SeenSet},
estimator::SingleTagProbabilityEstimator,
opencorpora::dictionary::Dictionary,
};

#[derive(Debug, Default, Clone)]
pub struct Units {
Expand Down Expand Up @@ -107,10 +108,11 @@ impl MorphAnalyzer {
#[cfg(test)]
mod tests {
use env_logger;
use {rsmorphy_dict_ru, MorphAnalyzer};

lazy_static! {
static ref RU: MorphAnalyzer = MorphAnalyzer::from_file(rsmorphy_dict_ru::DICT_PATH);
use crate::MorphAnalyzer;

lazy_static::lazy_static! {
static ref RU: MorphAnalyzer = MorphAnalyzer::from_file(dict_ru::DICT_PATH);
}

#[test]
Expand Down
28 changes: 13 additions & 15 deletions src/analyzer/units/abbr/initials.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
use std::borrow::Cow;
use std::collections::BTreeSet;
use std::iter::FromIterator;
use std::{borrow::Cow, collections::BTreeSet, iter::FromIterator};

use analyzer::units::abc::AnalyzerUnit;
use analyzer::MorphAnalyzer;
use container::stack::StackSource;
use container::{Initials, InitialsKind};
use container::{Lex, Score};
use container::{ParseResult, Parsed, SeenSet};
use opencorpora::OpencorporaTagReg;
use crate::{
analyzer::{units::abc::AnalyzerUnit, MorphAnalyzer},
container::{
stack::StackSource, Initials, InitialsKind, Lex, ParseResult, Parsed, Score, SeenSet,
},
opencorpora::OpencorporaTagReg,
};

lazy_static! {
lazy_static::lazy_static! {
#[derive(Debug)]
pub static ref LETTERS: BTreeSet<&'static str> = {
let set = "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЭЮЯ".split("").filter(|v| !v.is_empty());
Expand Down Expand Up @@ -63,13 +61,13 @@ impl AnalyzerUnit for InitialsAnalyzer {
word_lower: &str,
_seen_parses: &mut SeenSet,
) {
trace!("AbbreviatedFirstNameAnalyzer::parse()");
trace!(r#" word: "{}", word_lower: "{}" "#, word, word_lower);
trace!(
log::trace!("AbbreviatedFirstNameAnalyzer::parse()");
log::trace!(r#" word: "{}", word_lower: "{}" "#, word, word_lower);
log::trace!(
r#" LETTERS: "{:?}" "#,
LETTERS.iter().cloned().collect::<Vec<&str>>().join(", ")
);
trace!(r#" LETTERS contains word: "{}" "#, LETTERS.contains(word));
log::trace!(r#" LETTERS contains word: "{}" "#, LETTERS.contains(word));

if let Some(&letter) = LETTERS.get(word) {
for (tag_idx, &(_, kind)) in self.tags.iter().enumerate() {
Expand Down
7 changes: 4 additions & 3 deletions src/analyzer/units/abc.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use analyzer::MorphAnalyzer;
use container::ParseResult;
use container::SeenSet;
use crate::{
analyzer::MorphAnalyzer,
container::{ParseResult, SeenSet},
};

pub trait AnalyzerUnit {
fn parse(
Expand Down
17 changes: 8 additions & 9 deletions src/analyzer/units/by_analogy/kp.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
use analyzer::units::abc::AnalyzerUnit;
use analyzer::MorphAnalyzer;
use container::abc::*;
use container::stack::StackAffix;
use container::{Affix, AffixKind, Lex, ParseResult, Parsed, SeenSet};
use util::add_parsed_if_not_seen;
use crate::{
analyzer::{units::abc::AnalyzerUnit, MorphAnalyzer},
container::{abc::*, stack::StackAffix, Affix, AffixKind, Lex, ParseResult, Parsed, SeenSet},
util::add_parsed_if_not_seen,
};

/// Parse the word by checking if it starts with a known prefix
/// and parsing the reminder.
Expand Down Expand Up @@ -34,8 +33,8 @@ impl AnalyzerUnit for KnownPrefixAnalyzer {
word_lower: &str,
seen_parses: &mut SeenSet,
) {
trace!("KnownPrefixAnalyzer::parse()");
trace!(r#" word = "{}", word_lower = "{}" "#, word, word_lower);
log::trace!("KnownPrefixAnalyzer::parse()");
log::trace!(r#" word = "{}", word_lower = "{}" "#, word, word_lower);

// This analyzer only works on longer words
if word_lower.chars().count() < self.min_reminder_length {
Expand Down Expand Up @@ -93,7 +92,7 @@ impl KnownPrefixAnalyzer {
debug_assert!(word_len >= self.min_reminder_length);
let limit = word_len - self.min_reminder_length;
let word_prefixes = morph.dict.prediction_prefixes.sorted_prefixes(word);
trace!("word_prefixes: {}", word_prefixes.join(", "));
log::trace!("word_prefixes: {}", word_prefixes.join(", "));
word_prefixes
.into_iter()
.map(move |prefix| (prefix.chars().count(), prefix))
Expand Down
38 changes: 26 additions & 12 deletions src/analyzer/units/by_analogy/ks.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
use std::borrow::Cow;

use analyzer::units::abc::AnalyzerUnit;
use analyzer::MorphAnalyzer;
use container::stack::StackAffix;
use container::{Affix, Dictionary, Lex, ParseResult, Parsed, Score, Seen, SeenSet, WordStruct};
use opencorpora::dictionary::{PredictionSuffixesDawg, HHH};
use crate::{
analyzer::{units::abc::AnalyzerUnit, MorphAnalyzer},
container::{
stack::StackAffix, Affix, Dictionary, Lex, ParseResult, Parsed, Score, Seen, SeenSet,
WordStruct,
},
opencorpora::dictionary::{PredictionSuffixesDawg, HHH},
};

/// Parse the word by checking how the words with similar suffixes
/// are parsed.
Expand Down Expand Up @@ -35,8 +38,8 @@ impl AnalyzerUnit for KnownSuffixAnalyzer {
word_lower: &str,
seen_parses: &mut SeenSet,
) {
trace!("KnownSuffixAnalyzer::parse()");
trace!(r#" word: "{}", word_lower: "{}" "#, word, word_lower);
log::trace!("KnownSuffixAnalyzer::parse()");
log::trace!(r#" word: "{}", word_lower: "{}" "#, word, word_lower);

let char_len: usize = word_lower.chars().count();

Expand All @@ -50,7 +53,7 @@ impl AnalyzerUnit for KnownSuffixAnalyzer {
let mut total_counts: Vec<u16> = vec![1; morph.dict.paradigm_prefixes.len()];

for (prefix_id, prefix, suffixes_dawg) in self.possible_prefixes(morph, word_lower) {
trace!(r#" prefix_id: {}, prefix: "{}" "#, prefix_id, prefix);
log::trace!(r#" prefix_id: {}, prefix: "{}" "#, prefix_id, prefix);

'iter_splits: for &i in &morph.dict.prediction_splits {
if i >= char_len {
Expand All @@ -62,28 +65,39 @@ impl AnalyzerUnit for KnownSuffixAnalyzer {
.take(char_len - i)
.map(char::len_utf8)
.sum();
trace!("i: {}, pos: {}", i, pos);
log::trace!("i: {}, pos: {}", i, pos);

let (word_start, word_end) = (&word_lower[..pos], &word_lower[pos..]);
trace!("word_start: {}, word_end: {}", word_start, word_end);
log::trace!("word_start: {}, word_end: {}", word_start, word_end);

let para_data = suffixes_dawg.similar_items(word_end, &morph.dict.char_substitutes);
for (fixed_suffix, parses) in para_data {
trace!("fixed_suffix: {}", fixed_suffix);

let fixed_word: Cow<str> = if fixed_suffix == word_end {
Cow::from(word_lower)
} else {
Cow::from(format!("{}{}", word_start, fixed_suffix))
};
log::trace!(
"fixed_suffix: {:?}, fixed_word: {:?}",
fixed_suffix,
fixed_word
);

log::trace!("parses: {:?}", parses);
'iter_parses: for HHH(cnt, para_id, idx) in parses {
let tag = morph.dict.get_tag(para_id.into(), idx.into());

if !tag.is_productive() {
continue 'iter_parses;
}

log::trace!("tc: {:?}", total_counts);
log::trace!(
"prefix_id: {:?}, tc[prefix_id]: {:?}, cnt: {:?}",
prefix_id,
total_counts[prefix_id as usize],
cnt
);
total_counts[prefix_id as usize] += cnt;

let seen = Seen::new(fixed_word.clone(), tag, para_id);
Expand Down
Loading

0 comments on commit 0a953b5

Please sign in to comment.