better, cleaner code, numbers are now handled properly

parent 4ceee997
Pipeline #228 passed with stage
in 1 minute and 17 seconds
......@@ -305,6 +305,7 @@ yðou,pron.3pau.n.voc
yþa,pron.3pau.m.voc
yþu,pron.3pau.f.voc
yþu,pron.3pl.f.voc
Éþtrið,np
Ðeberget,np
Þor,n
æn,conj
......
......@@ -7,29 +7,8 @@ use std::fs;
use std::io::Write;
use xml::writer::{EmitterConfig, EventWriter, XmlEvent};
struct Word {
word: String,
morpheme: String,
gloss: String,
}
impl Word {
fn new(word: String) -> Word {
Word {
word: word,
morpheme: String::new(),
gloss: String::new(),
}
}
fn xml_event(&self) -> XmlEvent {
XmlEvent::start_element("word")
.attr("text", &self.word)
.attr("morpheme", &self.morpheme)
.attr("gloss", &self.gloss)
.into()
}
}
mod word;
use word::Word;
fn import_morphemes() -> HashMap<String, String> {
let contents = fs::read_to_string("matter-dict.csv")
......@@ -41,8 +20,8 @@ fn import_morphemes() -> HashMap<String, String> {
for result in rdr.records() {
let record = result.unwrap();
morphemes_collection.insert(
String::from(record.get(0).unwrap()),
String::from(record.get(1).unwrap()),
String::from(record.get(0).unwrap().to_lowercase()),
String::from(record.get(1).unwrap().to_lowercase()),
);
}
morphemes_collection
......@@ -63,10 +42,22 @@ fn read_text(filename: &str) -> Vec<String> {
}
fn process_suffixes(word: &mut Word) {
let (_, affixes) = word.word.split_at(word.morpheme.len());
if affixes != "" {
println!("{}-{}", word.morpheme, affixes);
word.make_affixes();
if word.get_affixes() == "" {
return;
}
println!("Original:\t{}-{}", word.get_morpheme(), word.get_affixes());
// Check number suffix
if !word.check_paucal() {
word.check_plural();
}
println!("Number:\t\t{}-{}", word.get_morpheme(), word.get_affixes());
if word.get_affixes() == "" {
return;
}
// Check possessive suffix
}
fn process_word<W: Write>(
......@@ -74,22 +65,11 @@ fn process_word<W: Write>(
word: &str,
morphemes: &HashMap<String, String>,
) {
let mut word = Word::new(word.to_string());
for (morpheme, gloss) in morphemes {
if word.word.starts_with(morpheme.as_str()) &&
morpheme.len() >= word.morpheme.len()
{
word.morpheme = morpheme.clone();
word.gloss = gloss.clone();
}
}
if word.morpheme != "" {
process_suffixes(&mut word);
} else {
word.morpheme = word.word.clone();
word.gloss = String::from("unknown");
let mut word = Word::new(word.to_string(), &morphemes);
if word.get_morpheme() != "unknown" {
process_suffixes(&mut word)
}
let event : XmlEvent = word.xml_event();
let event: XmlEvent = word.xml_event();
w.write(event).unwrap();
let event: XmlEvent = XmlEvent::end_element().into();
w.write(event).unwrap();
......
use xml::writer::XmlEvent;
use std::collections::HashMap;
pub struct Word {
word: String,
morpheme: String,
gloss: String,
affixes: String,
ends_vowel: bool,
}
impl Word {
pub fn new(word: String, morphemes: &HashMap<String, String>) -> Word {
let mut newword = Word {
word: word,
morpheme: String::new(),
gloss: String::new(),
affixes: String::new(),
ends_vowel: false,
};
for (morpheme, gloss) in morphemes {
if newword.word.starts_with(morpheme.as_str()) &&
morpheme.len() >= newword.morpheme.len()
{
newword.morpheme = morpheme.clone();
newword.gloss = gloss.clone();
}
}
if newword.morpheme == "" {
newword.morpheme = newword.word.clone();
newword.gloss = String::from("unknown");
}
newword
}
#[allow(dead_code)]
pub fn get_word(&self) -> &String {
&self.word
}
#[allow(dead_code)]
pub fn set_word(&mut self, val: String) {
self.word = val;
}
#[allow(dead_code)]
pub fn get_morpheme(&self) -> &String {
&self.morpheme
}
#[allow(dead_code)]
pub fn set_morpheme(&mut self, val: String) {
self.morpheme = val;
}
#[allow(dead_code)]
pub fn get_gloss(&self) -> &String {
&self.gloss
}
#[allow(dead_code)]
pub fn set_gloss(&mut self, val: String) {
self.gloss = val;
}
#[allow(dead_code)]
pub fn get_affixes(&self) -> &String {
&self.affixes
}
#[allow(dead_code)]
pub fn set_affixes(&mut self, val: String) {
self.affixes = val;
}
#[allow(dead_code)]
pub fn ends_with_vowel(&self) -> bool {
self.ends_vowel
}
pub fn xml_event(&self) -> XmlEvent {
XmlEvent::start_element("word")
.attr("text", &self.word)
.attr("morpheme", &self.morpheme)
.attr("gloss", &self.gloss)
.into()
}
fn make_end_with_vowel(&self) -> bool {
let vowels = vec!['i', 'y', 'u', 'é', 'e', 'o', 'a', 'æ'];
vowels
.iter()
.any(|v| v == &self.morpheme.chars().last().unwrap())
}
pub fn make_affixes(&mut self) {
self.affixes = self.word.split_at(self.morpheme.len()).1.to_string();
self.ends_vowel = self.make_end_with_vowel();
}
pub fn check_plural(&mut self) -> bool {
if self.ends_vowel && self.affixes.starts_with("þ") {
self.affixes = self.affixes.trim_start_matches("þ").to_string();
self.gloss.push_str(".pl");
true
} else if self.affixes.starts_with("eþ") {
self.affixes = self.affixes.trim_start_matches("eþ").to_string();
self.gloss.push_str(".pl");
true
} else {
false
}
}
pub fn check_paucal(&mut self) -> bool {
if self.ends_vowel && self.affixes.starts_with("t") {
self.affixes = self.affixes.trim_start_matches("t").to_string();
self.gloss.push_str(".pau");
true
} else if self.affixes.starts_with("et") {
self.affixes = self.affixes.trim_start_matches("et").to_string();
self.gloss.push_str(".pau");
true
} else {
false
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment