some more improvements

parent 05828931
Pipeline #226 passed with stage
in 1 minute and 17 seconds
......@@ -324,6 +324,7 @@ yþu,pron.3pl.f.voc
þcyrm,n
þean,nbr
þeg,vt
þror,np
þett,vt
þi,pron.1pau.nom
þid,pron.1pau.acc
......
Em meþ Gunnarac annéðant þynea. An aenant caupage, ar annéð caupe. Fe en eppelant etano Éþtrið fent etano? Thror eppelant feþ geffo? Du feċ gei? Hint fec fém gér? Fon landytoċ beþt bƿand? Feren Mattérant frégei? Ferve Mattérant frégei? Eppeleþant eða curþabérant, fertið y caupei? Ferden urbyþ gon? Fertið bryðdegynant haþt? Fertiðoċ Mattérant frégei? Fertiðac y ċilde?
Em meþ Gunnarac annéðant þynea. An ænant caupage, ar annéð caupe. Fe en eppelant etano Éþtrið fent etano? Þror eppelant feþ geffo? Du feċ gei? Hint fec fém gér? Fon landytoċ beþt bƿand? Feren Mattérant frégei? Ferve Mattérant frégei? Eppeleþant eða curþabérant, fertið y caupei? Ferden urbyþ gon? Fertið bryðdegynant haþt? Fertiðoċ Mattérant frégei? Fertiðac y ċilde?
......@@ -25,10 +25,11 @@ fn import_morphemes() -> HashMap<String, String> {
}
fn read_text(filename: &str) -> Vec<String> {
let text = fs::read_to_string(filename)
let mut text = fs::read_to_string(filename)
.expect("Something went wrong reading the input text.");
text.retain(|c| c != ',');
let mut sentences: Vec<_> = text
.split(|s| s == '.' || s == '?' || s == '!')
.split(|s| s == '.' || s == '?' || s == '!' || s == ';')
.collect::<Vec<_>>()
.iter()
.map(|s| s.trim().to_lowercase().to_string())
......@@ -37,6 +38,35 @@ fn read_text(filename: &str) -> Vec<String> {
sentences
}
fn process_word<W: Write>(
w: &mut EventWriter<W>,
word: &str,
morphemes: &HashMap<String, String>,
) {
let mut details = String::new();
let mut morpheme = String::new();
for (key, value) in morphemes {
if word.starts_with(key.as_str()) && key.len() >= morpheme.len() {
morpheme = key.clone();
details = value.clone();
}
}
let event: XmlEvent = XmlEvent::start_element("word")
.attr("text", word)
.attr("morpheme", &morpheme)
.attr("gloss", &details)
.into();
if morpheme != "" {
let word = word.split_at(morpheme.len());
if word.1 != "" {
println!("{}-{}", morpheme, word.1);
}
}
w.write(event).unwrap();
let event: XmlEvent = XmlEvent::end_element().into();
w.write(event).unwrap();
}
fn process_sentence<W: Write>(
w: &mut EventWriter<W>,
sentence: &String,
......@@ -44,26 +74,8 @@ fn process_sentence<W: Write>(
) {
let event: XmlEvent = XmlEvent::start_element("sentence").into();
w.write(event).unwrap();
println!("{:?}", sentence);
for word in sentence.split_whitespace() {
println!("{:?}", word);
let mut details = String::new();
let mut morpheme = String::new();
for (key, value) in morphemes {
if word.starts_with(key.as_str()) && key.len() >= morpheme.len() {
morpheme = key.clone();
details = value.clone();
println!("{}", value);
}
}
let event: XmlEvent = XmlEvent::start_element("word")
.attr("text", word)
.attr("morpheme", &morpheme)
.attr("gloss", &details)
.into();
w.write(event).unwrap();
let event: XmlEvent = XmlEvent::end_element().into();
w.write(event).unwrap();
process_word(w, word, morphemes);
}
let event: XmlEvent = XmlEvent::end_element().into();
w.write(event).unwrap();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment