Browse Source

some more improvements

master
Phuntsok Drak-pa 9 months ago
parent
commit
8efcf35aca
3 changed files with 35 additions and 22 deletions
  1. +1
    -0
      matter-dict.csv
  2. +1
    -1
      matter.txt
  3. +33
    -21
      src/main.rs

+ 1
- 0
matter-dict.csv View File

@@ -324,6 +324,7 @@ yþu,pron.3pl.f.voc
þcyrm,n
þean,nbr
þeg,vt
þror,np
þett,vt
þi,pron.1pau.nom
þid,pron.1pau.acc


+ 1
- 1
matter.txt View File

@@ -1 +1 @@
Em meþ Gunnarac annéðant þynea. An aenant caupage, ar annéð caupe. Fe en eppelant etano Éþtrið fent etano? Thror eppelant feþ geffo? Du feċ gei? Hint fec fém gér? Fon landytoċ beþt bƿand? Feren Mattérant frégei? Ferve Mattérant frégei? Eppeleþant eða curþabérant, fertið y caupei? Ferden urbyþ gon? Fertið bryðdegynant haþt? Fertiðoċ Mattérant frégei? Fertiðac y ċilde?
Em meþ Gunnarac annéðant þynea. An ænant caupage, ar annéð caupe. Fe en eppelant etano Éþtrið fent etano? Þror eppelant feþ geffo? Du feċ gei? Hint fec fém gér? Fon landytoċ beþt bƿand? Feren Mattérant frégei? Ferve Mattérant frégei? Eppeleþant eða curþabérant, fertið y caupei? Ferden urbyþ gon? Fertið bryðdegynant haþt? Fertiðoċ Mattérant frégei? Fertiðac y ċilde?

+ 33
- 21
src/main.rs View File

@@ -25,10 +25,11 @@ fn import_morphemes() -> HashMap<String, String> {
}

fn read_text(filename: &str) -> Vec<String> {
let text = fs::read_to_string(filename)
let mut text = fs::read_to_string(filename)
.expect("Something went wrong reading the input text.");
text.retain(|c| c != ',');
let mut sentences: Vec<_> = text
.split(|s| s == '.' || s == '?' || s == '!')
.split(|s| s == '.' || s == '?' || s == '!' || s == ';')
.collect::<Vec<_>>()
.iter()
.map(|s| s.trim().to_lowercase().to_string())
@@ -37,6 +38,35 @@ fn read_text(filename: &str) -> Vec<String> {
sentences
}

fn process_word<W: Write>(
w: &mut EventWriter<W>,
word: &str,
morphemes: &HashMap<String, String>,
) {
let mut details = String::new();
let mut morpheme = String::new();
for (key, value) in morphemes {
if word.starts_with(key.as_str()) && key.len() >= morpheme.len() {
morpheme = key.clone();
details = value.clone();
}
}
let event: XmlEvent = XmlEvent::start_element("word")
.attr("text", word)
.attr("morpheme", &morpheme)
.attr("gloss", &details)
.into();
if morpheme != "" {
let word = word.split_at(morpheme.len());
if word.1 != "" {
println!("{}-{}", morpheme, word.1);
}
}
w.write(event).unwrap();
let event: XmlEvent = XmlEvent::end_element().into();
w.write(event).unwrap();
}

fn process_sentence<W: Write>(
w: &mut EventWriter<W>,
sentence: &String,
@@ -44,26 +74,8 @@ fn process_sentence<W: Write>(
) {
let event: XmlEvent = XmlEvent::start_element("sentence").into();
w.write(event).unwrap();
println!("{:?}", sentence);
for word in sentence.split_whitespace() {
println!("{:?}", word);
let mut details = String::new();
let mut morpheme = String::new();
for (key, value) in morphemes {
if word.starts_with(key.as_str()) && key.len() >= morpheme.len() {
morpheme = key.clone();
details = value.clone();
println!("{}", value);
}
}
let event: XmlEvent = XmlEvent::start_element("word")
.attr("text", word)
.attr("morpheme", &morpheme)
.attr("gloss", &details)
.into();
w.write(event).unwrap();
let event: XmlEvent = XmlEvent::end_element().into();
w.write(event).unwrap();
process_word(w, word, morphemes);
}
let event: XmlEvent = XmlEvent::end_element().into();
w.write(event).unwrap();


Loading…
Cancel
Save