neat output for now

parent 99100fcf
/target
**/*.rs.bk
*.xml
......@@ -34,6 +34,7 @@ name = "matter-parser"
version = "0.1.0"
dependencies = [
"csv 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
"xml-rs 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
......@@ -54,6 +55,11 @@ name = "serde"
version = "1.0.91"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "xml-rs"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[metadata]
"checksum csv 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "9044e25afb0924b5a5fc5511689b0918629e85d68ea591e5e87fbf1e85ea1b3b"
"checksum csv-core 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fa5cdef62f37e6ffe7d1f07a381bc0db32b7a3ff1cac0de56cb0d81e71f53d65"
......@@ -62,3 +68,4 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2efc7bc57c883d4a4d6e3246905283d8dae951bb3bd32f49d6ef297f546e1c39"
"checksum ryu 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "b96a9549dc8d48f2c283938303c4b5a77aa29bfbc5b54b084fb1630408899a8f"
"checksum serde 1.0.91 (registry+https://github.com/rust-lang/crates.io-index)" = "a72e9b96fa45ce22a4bc23da3858dfccfd60acd28a25bcd328a98fdd6bea43fd"
"checksum xml-rs 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "541b12c998c5b56aa2b4e6f18f03664eef9a4fd0a246a55594efae6cc2d964b5"
......@@ -6,3 +6,4 @@ edition = "2018"
[dependencies]
csv = "1"
xml-rs = "0.8.0"
Alfén,n
Deccalfén,n
Fréyia,n
Fréyr,n
Hyrfialþær,np
Leþþalfén,n
Odin,n
Valalla,n
a,art.def.sg.n
al,ad
al,art.def.sg.f
altið,adv
an,art.dem.sg.near
annéð,adj
anrad,nbr
ar,conj
areð,n
armér,n
at,art.dem.pau.near
að,art.dem.pl.near
að,conj
aċt,nbr
aċteig,nbr
bac,n
berg,n
bern,n
bevit,adj
bevityt,n
boccé,n
breif,n
bruðyr,n
bryð,n
bryðdeg,n
bæn,n
bér,n
béta,adv
bƿ,vi
calfér,n
canal,n
cat,n
caup,vt
cirþabér,n
comm,vi
cycvénd,n
cyn,vt
cyng,n
cyngyt,n
céveð,n
daun,pron.2sg.acc
deccar,adj
den,n
den,vi
deten,nbr
dottyr,n
dran,nbr
du,pron.2sg.nom
dur,n
duð,pron.2sg.dat
dyn,pron.2sg.gen
dyr,n
dyrc,n
dæg,n
ealant,n
ed,art.dem.pau
ein,adj
eint,pron.2pau.acc
eldyr,n
eldyr,n
ella,conj
em,art.dem.sg
en,art.def.sg.nhum
eny,pron.2pau.voc
eppel,n
et,vt
ev,art.def.pl.nhum
eð,art.dem.pl
eða,adv
eþ,art.def.pl.nhum
faðcyng,n
faðcyngyt,n
faðmoð,n
faðmoð,vi
faðér,n
faðérr,vi
fe,pron.q.nom
fec,pron.q.abl
feioð,n
fent,pron.q.acc
ferden,pron.q.instr
feren,pron.q.goal
fertið,pron.q.loc.temp
fertiðac,pron.q.limit.temp
fertiðoċ,pron.q.abl.temp
ferve,pron.q.motivation
fervid,pron.q.choice
feþ,pron.q.dat
feċ,pron.q.loc
fial,n
fici,vt
fician,n
fingér,n
flytt,vi
fobror,n
fogel,n
fon,pron.q.gen
foþtyr,n
freði,n
fri,adj
frie,nbr
frihyt,n
fro,nbr
félag,n
fém,pron.q.limit
g,vi
galm,adj
geff,vt
gelty,n
gemmel,adj
gilðar,n
gilðaryt,n
gunnar,np
guð,n
guþ,n
gyi,vi
gymmur,n
gyrneg,n
gærn,n
gæt,n
gér,vt
hae,pron.3sg.m.nom
haed,pron.3sg.m.acc
haen,pron.3sg.m.gen
haeð,pron.3sg.m.dat
hauþtér,n
hauþtér,vt
hea,pron.3pl.n.nom
heam,pron.3pl.n.gen
heaþ,pron.3pl.n.dat
heaþy,pron.3pl.n.voc
hei,pron.3pau.n.nom
heill,vt
hein,pron.3pau.n.gen
heit,pron.3pau.n.acc
heiþ,pron.3pau.n.dat
heþ,pron.3sg.n.dat
him,pron.3sg.n.gen
hint,pron.3pl.n.acc
hint,pron.3sg.n.acc
hit,pron.3sg.n.nom
hiþ,pron.3sg.n.dat
hond,n
hot,pron.3pau.n.acc
hou,pron.3pau.n.nom
hou,pron.3pl.n.nom
hou,pron.3sg.f.nom
houn,pron.3sg.f.gen
hound,pron.3sg.f.acc
hout,pron.3pl.n.acc
houþ,pron.3pl.n.dat
houþy,pron.3pl.n.voc
hoþ,pron.3pau.n.dat
hryþ,n
hultar,n
huð,pron.3sg.f.dat
hynd,n
hyr,n
hyrfial,n
hyþ,n
hé,pron.3sg.n.nom
hégri,adj
hén,pron.3sg.n.gen
hérðér,n
hét,pron.3sg.n.acc
héþir,n
héþtyr,n
orn,n
ulv,n
wyrm,n
ylgar,n
éþƿér,n
ċuðar,n
den,n
freði,n
lyþt,n
þéga,n
ƿen,adj
boccé,n
breif,n
ryn,n
þteinér,n
céveð,n
myþic,n
iara,n
ie,adv
iea,adv
im,pron.2pau.nom
iurd,n
iurþtiern,n
gymmur,n
mény,n
þtiern,n
þynna,n
gæt,n
urby,n
bér,n
hyþ,n
tere,n
marcéð,n
fri,adj
armér,n
bac,n
fingér,n
hond,n
hérðér,n
lycam,n
néf,n
ræð,adj
lyn,n
meccil,adj
þmoð,adj
þtor,adj
mænd,adj
vend,adj
vinþtri,adj
hégri,adj
eldyr,n
hyr,n
eldyr,n
lycce,adj
ƿille,n
ru,n
ċild,n
þtikyl,n
ċlið,n
hultar,n
cyng,n
cyngyt,n
faðcyng,n
faðcyngyt,n
iéral,n
landyt,n
ðengil,n
annéð,adj
béta,adv
eða,adv
ja,adv
jæ,adv
landyt,n
lant,n
leiþ,n
leiþér,adj
licca,adv
méllém,adv
na,adv
neiċ,adv
næ,adv
ov,adv
velgi,adv
þiv,conj
þém,inter
a,art.def.sg.n
é,art.def.sg.m
al,art.def.sg.f
en,art.def.sg.nhum
od,art.def.pau.n.m.f
yt,art.def.pau.nhum
eþ,art.def.pl.nhum
ev,art.def.pl.nhum
an,art.dem.sg.near
at,art.dem.pau.near
að,art.dem.pl.near
em,art.dem.sg
ed,art.dem.pau
eð,art.dem.pl
un,art.dem.sg.far
ut,art.dem.pau.far
uð,art.dem.pl.far
fe,pron.q.nom
fent,pron.q.acc
feþ,pron.q.dat
feċ,pron.q.loc
fec,pron.q.abl
fém,pron.q.limit
fon,pron.q.gen
feren,pron.q.goal
ferve,pron.q.motivation
fervid,pron.q.choice
ferden,pron.q.instr
fertið,pron.q.loc.temp
fertiðoċ,pron.q.abl.temp
fertiðac,pron.q.limit.temp
ar,conj
að,conj
ella,conj
men,conj
némmé,conj
og,conj
æn,conj
ér,conj
ðea,conj
tél,prep
þiv,prep
ċeg,pron.1sg.nom
ċent,pron.1sg.acc
ċeið,pron.1sg.dat
ċyn,pron.1sg.gen
þi,pron.1pau.nom
þid,pron.1pau.acc
þið,pron.1pau.dat
þyn,pron.1pau.gen
non,pron.1pl.nom
nound,pron.1pl.acc
nonþ,pron.1pl.dat
nun,pron.1pl.gen
du,pron.2sg.nom
daun,pron.2sg.acc
duð,pron.2sg.dat
dyn,pron.2sg.gen
udy,pron.2sg.voc
im,pron.2pau.nom
eint,pron.2pau.acc
ði,pron.2pau.dat
nim,pron.2pau.gen
eny,pron.2pau.voc
min,pron.2pl.nom
liegg,vi
lit,vt
logar,n
lycam,n
lycce,adj
lyf,n
lyn,n
lyvér,adj
lyþt,n
lætt,vi
léc,adj
léþ,vt
marcéð,n
maþtyr,n
meccil,adj
mein,n
meint,pron.2pl.acc
minþ,pron.2pl.dat
meinƿaċ,n
men,conj
menyþ,n
meun,pron.2pl.gen
meþ,n
min,pron.2pl.nom
miny,pron.2pl.voc
hé,pron.3sg.n.nom
hét,pron.3sg.n.acc
heþ,pron.3sg.n.dat
hén,pron.3sg.n.gen
yhé,pron.3sg.n.voc
hae,pron.3sg.m.nom
haed,pron.3sg.m.acc
haeð,pron.3sg.m.dat
haen,pron.3sg.m.gen
yhae,pron.3sg.m.voc
hou,pron.3sg.f.nom
hound,pron.3sg.f.acc
huð,pron.3sg.f.dat
houn,pron.3sg.f.gen
yhou,pron.3sg.f.voc
hit,pron.3sg.n.nom
hint,pron.3sg.n.acc
hiþ,pron.3sg.n.dat
him,pron.3sg.n.gen
yhi,pron.3sg.n.voc
hei,pron.3pau.n.nom
heit,pron.3pau.n.acc
heiþ,pron.3pau.n.dat
hein,pron.3pau.n.gen
yhei,pron.3pau.n.voc
þa,pron.3pau.m.nom
þad,pron.3pau.m.acc
þaið,pron.3pau.m.dat
þan,pron.3pau.m.gen
yþa,pron.3pau.m.voc
þu,pron.3pau.f.nom
þunt,pron.3pau.f.acc
þou,pron.3pau.f.dat
þun,pron.3pau.f.gen
yþu,pron.3pau.f.voc
hou,pron.3pau.n.nom
hot,pron.3pau.n.acc
hoþ,pron.3pau.n.dat
ðo,pron.3pau.n.gen
yðou,pron.3pau.n.voc
hea,pron.3pl.n.nom
hint,pron.3pl.n.acc
heaþ,pron.3pl.n.dat
heam,pron.3pl.n.gen
heaþy,pron.3pl.n.voc
þa,pron.3pl.m.nom
þat,pron.3pl.m.acc
þaið,pron.3pl.m.dat
þan,pron.3pl.m.gen
þaiðy,pron.3pl.m.voc
þu,pron.3pl.f.nom
þunt,pron.3pl.f.acc
þou,pron.3pl.f.dat
þun,pron.3pl.f.gen
yþu,pron.3pl.f.voc
hou,pron.3pl.n.nom
hout,pron.3pl.n.acc
houþ,pron.3pl.n.dat
ðo,pron.3pl.n.gen
houþy,pron.3pl.n.voc
iara,n
retty,n
Hyrfialþær,np
Ðeberget,np
deccar,adj
leiþ,n
leiþér,adj
myrcér,n
frihyt,n
léc,adj
þoc,n
canal,n
þcort,adj
berg,n
ealant,n
fial,n
hyrfial,n
lant,n
logar,n
pyl,n
þær,n
træ,n
þær,n
al,ad
norm,n
nyn,nbr
æn,nbr
tƿéa,nbr
ðe,nbr
fro,nbr
ðeif,nbr
ċcæc,nbr
þean,nbr
aċt,nbr
onnén,nbr
dran,nbr
tieg,nbr
ðiea,nbr
frie,nbr
ðeig,nbr
ċciag,nbr
þieg,nbr
aċteig,nbr
onneg,nbr
anrad,nbr
tanþen,nbr
deten,nbr
minþ,pron.2pl.dat
mollen,nbr
vrelien,nbr
ƿaċen,n
feioð,n
meþ,n
yld,n
bruðyr,n
dottyr,n
faðmoð,n
faðér,n
fobror,n
foþtyr,n
maþtyr,n
morg,adv
moðér,n
moðérr,vi
myrcér,n
myþic,n
mæbror,n
þon,n
þyþter,n
mænd,adj
méllém,adv
mény,n
mérc,vt
na,adv
nam,n
ryd,n
Alfén,n
Deccalfén,n
Leþþalfén,n
éccċi,on
Fréyia,n
Fréyr,n
Odin,n
Valalla,n
bæn,n
Þor,n
guð,n
guþ,n
bevit,adj
bevityt,n
þyn,n
areð,n
dyrc,n
gilðar,n
gilðaryt,n
félag,n
ie,adv
iea,adv
ne,adv
nea,adv
altið,adv
dæg,n
menyþ,n
morg,adv
neiċ,adv
neþty,adj
nim,pron.2pau.gen
non,pron.1pl.nom
nonþ,pron.1pl.dat
norm,n
nound,pron.1pl.acc
nu,adv
nun,pron.1pl.gen
nyn,nbr
næ,adv
néf,n
némmé,conj
od,art.def.pau.n.m.f
og,conj
onneg,nbr
onnén,nbr
orn,n
ov,adv
pyl,n
retty,n
rinn,vi
ritt,vt
ru,n
ryd,n
ryn,n
ræð,adj
tanþen,nbr
tebyr,vt
tere,n
tieg,nbr
tið,n
træ,n
tynn,vt
tél,prep
tƿéa,nbr
udy,pron.2sg.voc
ulv,n
un,art.dem.sg.far
urby,n
ut,art.dem.pau.far
uð,art.dem.pl.far
velgi,adv
vend,adj
verd,vt
vinþtri,adj
vitt,vt
voc,n
þcyrm,n
galm,adj
gyrneg,n
gærn,n
hauþtér,n
vrelien,nbr
vér,n
vérr,vt
vétter,n
þymmér,n
cirþabér,n
eppel,n
bryð,n
bryðdeg,n
ein,adj
gemmel,adj
lyf,n
lyvér,adj
vétter,vi
wyrm,n
yhae,pron.3sg.m.voc
yhei,pron.3pau.n.voc
yhi,pron.3sg.n.voc
yhou,pron.3sg.f.voc
yhé,pron.3sg.n.voc
yld,n
ylgar,n
yt,art.def.pau.nhum
yðou,pron.3pau.n.voc
yþa,pron.3pau.m.voc
yþu,pron.3pau.f.voc
yþu,pron.3pl.f.voc
Ðeberget,np
Þor,n
æn,conj
æn,nbr
ævi,n
mein,n
meinƿaċ,n
fici,vt
gyi,vi
den,vi
léþ,vt
ritt,vt
gér,vt
mérc,vt
þtein,vt
þong,vi
flytt,vi
þæll,vt
geff,vt
caup,vt
ƿili,vt
ċild,vi
verd,vt
lætt,vi
comm,vi
liegg,vi
rinn,vi
g,vi
et,vt
þpiċ,vt
faðmoð,vi
faðérr,vi
moðérr,vi
é,art.def.sg.m
éccċi,on
ér,conj
éþƿér,n
ðe,nbr
ðea,conj
ðeif,nbr
ðeig,nbr
ðengil,n
ði,pron.2pau.dat
ðiea,nbr
ðo,pron.3pau.n.gen
ðo,pron.3pl.n.gen
þa,pron.3pau.m.nom
þa,pron.3pl.m.nom
þad,pron.3pau.m.acc
þaið,pron.3pau.m.dat
þaið,pron.3pl.m.dat
þaiðy,pron.3pl.m.voc
þan,pron.3pau.m.gen
þan,pron.3pl.m.gen
þat,pron.3pl.m.acc
þcort,adj
þcyrm,n
þean,nbr
þeg,vt
tynn,vt
þett,vt
cyn,vt
vitt,vt
þi,pron.1pau.nom
þid,pron.1pau.acc
þie,vt
lit,vt
þyn,vt
heill,vt
tebyr,vt
hauþtér,vt
vérr,vt
vétter,vi
þieg,nbr
þiv,conj
þiv,prep
þið,pron.1pau.dat
þmoð,adj
þoc,n
þon,n
þong,vi
þou,pron.3pau.f.dat
þou,pron.3pl.f.dat
þpiċ,vt
þtein,vt
þteinér,n
þtiern,n
þtikyl,n
þtor,adj
þu,pron.3pau.f.nom
þu,pron.3pl.f.nom
þun,pron.3pau.f.gen
þun,pron.3pl.f.gen
þunt,pron.3pau.f.acc
þunt,pron.3pl.f.acc
þymmér,n
þymmér,vt
bƿ,vi
þyn,n
þyn,pron.1pau.gen
þyn,vt
þynna,n
þyþter,n
þæll,vt
þær,n
þær,n
þéga,n
þém,inter
ċciag,nbr
ċcæc,nbr
ċeg,pron.1sg.nom
ċeið,pron.1sg.dat
ċent,pron.1sg.acc
ċild,n
ċild,vi
ċlið,n
ċuðar,n
ċyn,pron.1sg.gen
ƿaċen,n
ƿen,adj
ƿili,vt
ƿille,n
Em meþ Gunnarac annéðant þynea. An aenant caupage, ar annéð caupe. Fe en eppelant etano Éþtrið fent etano? Thror eppelant feþ geffo? Du feċ gei? Hint fec fém gér? Fon landytoċ beþt bƿand? Feren Mattérant frégei? Ferve Mattérant frégei? Eppeleþant eða curþabérant, fertið y caupei? Ferden urbyþ gon? Fertið bryðdegynant haþt? Fertiðoċ Mattérant frégei? Fertiðac y ċilde?
extern crate csv;
extern crate xml;
use fs::File;
use std::collections::HashMap;
use std::error::Error;
use std::fs;
use std::io::Write;
use xml::writer::{EmitterConfig, EventWriter, XmlEvent};
fn import_morphemes() -> Result<HashMap<String, String>, Box<Error>> {
fn import_morphemes() -> HashMap<String, String> {
let contents = fs::read_to_string("matter-dict.csv")
.expect("Something went wrong reading the file");
.expect("Something went wrong reading the dictionary");
let mut rdr = csv::ReaderBuilder::new()
.delimiter(b',')
.from_reader(contents.as_bytes());
let mut morphemes_collection = HashMap::new();
for result in rdr.records() {
let record = result?;
let record = result.unwrap();
morphemes_collection.insert(
String::from(record.get(0).unwrap()),
String::from(record.get(1).unwrap()),
);
}
Ok(morphemes_collection)
morphemes_collection
}
fn read_text(filename: &str) -> Vec<String> {
let text = fs::read_to_string(filename)
.expect("Something went wrong reading the input text.");
let mut sentences: Vec<_> = text
.split(|s| s == '.' || s == '?' || s == '!')
.collect::<Vec<_>>()
.iter()
.map(|s| s.trim().to_lowercase().to_string())
.collect::<Vec<_>>();
sentences.retain(|s| s != "");
sentences
}
fn process_sentence<W: Write>(
w: &mut EventWriter<W>,
sentence: &String,
morphemes: &HashMap