...
 
Commits (2)
......@@ -21,3 +21,7 @@ crate-type = ["dylib"]
lalr = "0.0.1"
redfa = "0.0.2"
[dev-dependencies]
serde_json = "0.8"
../tests/json.rs
\ No newline at end of file
......@@ -116,6 +116,14 @@ fn expected_one_of<S: fmt::Display>(xs: &[S]) -> String {
err_msg
}
fn rule_name<T, N, A>(lhs: &N, rhs: &Rhs<T, N, A>) -> String where T: Display, N: Display {
let mut s = format!("{}_", lhs);
for symbol in &rhs.syms {
write!(&mut s, "_{}", symbol).unwrap();
}
s
}
pub fn lr1_machine<'a, T, N, A, FM, FA, FR, FO>(
cx: &mut base::ExtCtxt,
grammar: &'a Grammar<T, N, A>,
......@@ -184,14 +192,15 @@ where T: Ord + fmt::Debug + fmt::Display,
id: DUMMY_NODE_ID,
}
];
let rule_fn_ids: BTreeMap<_, _> = grammar.rules.iter()
.filter(|&(lhs, _)| *lhs != grammar.start)
.flat_map(|(_, rhss)| {
// Identify rules by their RHS, which should have unique addresses
rhss.iter().map(|rhs| rhs as *const _)
let rule_fn_ids: Vec<Vec<_>> = grammar.rules.iter()
.map(|(lhs, rhss)| {
if *lhs == grammar.start {
return vec![];
}
rhss.iter()
.map(|rhs| gensym(&format!("reduce_{}", rule_name(lhs, rhs))))
.collect()
})
.enumerate()
.map(|(i, k)| (k, gensym(&format!("reduce_{}", i))))
.collect();
let goto_fn_ids: BTreeMap<_, _> = grammar.rules.keys()
.filter(|&lhs| *lhs != grammar.start)
......@@ -223,13 +232,13 @@ where T: Ord + fmt::Debug + fmt::Display,
let stack_ty = quote_ty!(cx, Vec<Box<::std::any::Any> >);
let span_stack_ty = quote_ty!(cx, Vec<Option<$span_ty> >);
let state_stack_ty = quote_ty!(cx, Vec<u32>);
for (lhs, rhss) in grammar.rules.iter() {
for ((lhs, rhss), sub_rule_fn_ids) in grammar.rules.iter().zip(&rule_fn_ids) {
if *lhs == grammar.start {
continue;
}
let goto_fn = *goto_fn_ids.get(lhs).unwrap();
let lhs_ty = types.get(lhs).unwrap();
for rhs in rhss.iter() {
for (rhs, rule_fn_id) in rhss.iter().zip(sub_rule_fn_ids) {
let (result, arg_pats, span) = to_expr(lhs, &rhs.act, cx, &rhs.syms);
let args = vec![ast::Arg {
ty: cx.ty_rptr(DUMMY_SP, stack_ty.clone(), None, ast::Mutability::Mutable),
......@@ -253,7 +262,7 @@ where T: Ord + fmt::Debug + fmt::Display,
if rhs.syms.len() > 0 {
reduce_stmts.push(quote_stmt!(cx, let $span_id = $range_array_fn_id(&$span_stack_id[($span_stack_id.len() - $len)..]);).unwrap());
// XXX: Annoying syntax :(
reduce_stmts.push(quote_stmt!(cx, match $span_stack_id.len() - $len { x => $span_stack_id.truncate(x) };).unwrap());
reduce_stmts.push(quote_stmt!(cx, { let x = $span_stack_id.len() - $len; $span_stack_id.truncate(x); };).unwrap());
// Make the current_span available to the user by exposing it through a macro
reduce_stmts.push(quote_stmt!(cx, macro_rules! span {
() => { $span_id.unwrap() }
......@@ -307,8 +316,7 @@ where T: Ord + fmt::Debug + fmt::Display,
reduce_stmts.push(quote_stmt!(cx, $span_stack_id.push($span_id);).unwrap());
let block = cx.block(rspan, reduce_stmts);
let fn_id = rule_fn_ids.get(&(rhs as *const _)).unwrap().clone();
let f = cx.item_fn(span, fn_id, args, quote_ty!(cx, ()), block);
let f = cx.item_fn(span, rule_fn_id, args, quote_ty!(cx, ()), block);
stmts.push(cx.stmt_item(span, f));
}
}
......@@ -356,21 +364,22 @@ where T: Ord + fmt::Debug + fmt::Display,
for (&tok, action) in state.lookahead.iter() {
expected.push(format!("`{}`", tok));
let pat = cx.pat_some(DUMMY_SP, cx.pat_tuple(DUMMY_SP, vec![to_pat(tok, cx), cx.pat_wild(DUMMY_SP)]));
let arm_expr = match *action {
LRAction::Shift(dest) => lit_u32(cx, dest as u32),
match *action {
LRAction::Shift(dest) => {
let arm_expr = lit_u32(cx, dest as u32);
arms.push(cx.arm(DUMMY_SP, vec![pat], arm_expr));
}
LRAction::Reduce(_, rhs) => {
reduce_arms.entry(rhs as *const _).or_insert(vec![]).push(pat);
continue;
}
LRAction::Accept => unreachable!(),
};
arms.push(cx.arm(DUMMY_SP, vec![pat], arm_expr))
LRAction::Accept => panic!("bug: accepting on a token other than EOF?"),
}
}
if let Some(ref action) = state.eof {
expected.push("end of file".to_string());
let pat = cx.pat_none(DUMMY_SP);
match *action {
LRAction::Shift(_) => unreachable!(),
LRAction::Shift(_) => panic!("bug: shifting EOF?"),
LRAction::Reduce(_, rhs) => {
reduce_arms.entry(rhs as *const _).or_insert(vec![]).push(pat);
}
......@@ -393,8 +402,7 @@ where T: Ord + fmt::Debug + fmt::Display,
arms.push(quote_arm!(cx, _ => return ::std::result::Result::Err(($token_span_id, $err_msg_lit)),));
cx.arm(DUMMY_SP,
vec![pat_u32(cx, ix as u32)],
cx.expr_match(DUMMY_SP, cx.expr_ident(DUMMY_SP, token_span_id),
arms))
cx.expr_match(DUMMY_SP, cx.expr_ident(DUMMY_SP, token_span_id), arms))
}).chain(Some(quote_arm!(cx, _ => $unreachable,)).into_iter()).collect())),
quote_stmt!(cx, match $token_span_id {
Some(($token_id, $span_id)) => {
......@@ -500,7 +508,8 @@ fn parse_parser<'a>(
let range_fn_id = gensym("range");
let range_fn =
if !parser.check(&token::OpenDelim(token::Paren)) && span_ty.node == ast::TyKind::Tup(vec![]) {
if !parser.check(&token::OpenDelim(token::Paren)) && span_ty.node ==
ast::TyKind::Tup(vec![]) {
cx.item_fn(DUMMY_SP, range_fn_id, vec![
cx.arg(DUMMY_SP, gensym("_a"), span_ty.clone()),
cx.arg(DUMMY_SP, gensym("_b"), span_ty.clone()),
......
#![feature(plugin)]
#![plugin(plex)]
extern crate serde_json;
use std::io::Read;
mod lexer {
use std::char;
#[derive(Debug, Clone)]
pub enum Token {
LBrace,
RBrace,
LBracket,
RBracket,
Colon,
Comma,
Null,
Bool(bool),
I64(i64),
U64(u64),
F64(f64),
Str(String),
Whitespace,
Error(String),
}
fn parse_escape<'a>(s: &'a str) -> (u16, &'a str) {
let mut it = s.chars();
let backslash = it.next();
debug_assert!(backslash == Some('\\'));
let c = match it.next().expect("impossible: dangling escape") {
'\\' => '\\',
'/' => '/',
'"' => '"',
'b' => '\x08',
'f' => '\x0c',
'n' => '\n',
'r' => '\r',
't' => '\t',
'u' => {
let (hex, rest) = it.as_str().split_at(4);
return (u16::from_str_radix(hex, 16).expect("impossible: invalid hex escape"), rest);
},
x => panic!("impossible: unknown escape char {}", x),
};
(c as u16, it.as_str())
}
fn unescape_string(mut escaped: &str) -> Result<String, String> {
let mut unescaped = String::with_capacity(escaped.len());
while let Some(index) = escaped.find('\\') {
let (prefix, escape) = escaped.split_at(index);
unescaped.push_str(prefix);
let (escaped_codepoint, rest) = parse_escape(escape);
if escaped_codepoint & 0xFC00 == 0xD800 {
if !rest.starts_with("\\u") {
return Err("unpaired surrogate".into());
}
let (next_codepoint, rest) = parse_escape(rest);
if next_codepoint & 0xFC00 != 0xDC00 {
return Err("unpaired surrogate".into());
}
let cp = ((escaped_codepoint & 0x3FF) as u32) << 10 | ((next_codepoint & 0x3FF) as u32);
if let Some(c) = char::from_u32(cp) {
unescaped.push(c);
} else {
return Err(format!("invalid Unicode codepoint: \\u{:4x}", escaped_codepoint));
}
escaped = rest;
} else if let Some(c) = char::from_u32(escaped_codepoint as u32) {
unescaped.push(c);
escaped = rest;
} else {
return Err(format!("invalid Unicode codepoint: \\u{:4x}", escaped_codepoint));
}
}
unescaped.push_str(escaped);
Ok(unescaped)
}
lexer! {
fn next_token(text: 'a) -> Token;
r#"[ \t\r\n]+"# => Token::Whitespace,
// integers
r#"-?(0|[1-9][0-9]*)"# => {
if let Ok(num) = text.parse() {
Token::I64(num)
} else if let Ok(num) = text.parse() {
Token::U64(num)
} else if let Ok(num) = text.parse() {
// possible loss of precision... ok?
Token::F64(num)
} else {
Token::Error(format!("integer {} is out of range", text))
}
},
// all numbers
r#"-?(0|[1-9][0-9]*)(\.[0-9]+)?([eE][-+]?[0-9]+)?"# => {
if let Ok(num) = text.parse() {
Token::F64(num)
} else {
Token::Error(format!("integer {} is out of range", text))
}
},
r#""(\\(["\\/bfnrt]|u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F])|[^\\"])*""# => {
match unescape_string(&text[1..(text.len()-1)]) {
Ok(s) => Token::Str(s),
Err(s) => Token::Error(s),
}
}
r#"{"# => Token::LBrace,
r#"}"# => Token::RBrace,
r#"\["# => Token::LBracket,
r#"\]"# => Token::RBracket,
r#":"# => Token::Colon,
r#","# => Token::Comma,
r#"null"# => Token::Null,
r#"true"# => Token::Bool(true),
r#"false"# => Token::Bool(false),
r#"."# => Token::Error(format!("unexpected character: {}", text)),
}
pub struct Lexer<'a> {
remaining: &'a str,
}
impl<'a> Lexer<'a> {
pub fn new(s: &'a str) -> Lexer<'a> {
Lexer { remaining: s }
}
}
impl<'a> Iterator for Lexer<'a> {
type Item = Token;
fn next(&mut self) -> Option<Token> {
loop {
let tok = if let Some(tok) = next_token(&mut self.remaining) {
tok
} else {
return None
};
match tok {
Token::Whitespace => {
continue;
}
tok => {
return Some(tok);
}
}
}
}
}
}
mod parser {
use serde_json::value::{Value, Map};
use ::lexer::Token::*;
use ::lexer::*;
parser! {
fn parse_(Token, ());
value: Value {
Null => Value::Null,
Bool(b) => Value::Bool(b),
I64(n) => Value::I64(n),
U64(n) => Value::U64(n),
F64(n) => Value::F64(n),
Str(s) => Value::String(s),
LBracket RBracket => Value::Array(vec![]),
LBracket values[vals] RBracket => Value::Array(vals),
LBrace RBrace => Value::Object(Map::new()),
LBrace pairs[vals] RBrace => Value::Object(vals),
}
values: Vec<Value> {
value[v] => vec![v],
values[mut vs] Comma value[v] => {
vs.push(v);
vs
}
}
pairs: Map<String, Value> {
Str(k) Colon value[v] => {
let mut m = Map::new();
m.insert(k, v);
m
}
pairs[mut m] Comma Str(k) Colon value[v] => {
m.insert(k, v);
m
}
}
}
pub fn parse<I: Iterator<Item=Token>>(i: I) -> Result<Value, (Option<Token>, &'static str)> {
parse_(i.map(|x| (x, ()))).map_err(|(tok, expected)| (tok.map(|(tok, ())| tok), expected))
}
}
fn main() {
let mut s = String::new();
std::io::stdin().read_to_string(&mut s).unwrap();
let lexer = lexer::Lexer::new(&s);
match parser::parse(lexer) {
Err((Some(lexer::Token::Error(s)), _)) => {
println!("Lexer error: {}", s);
}
Err((Some(tok), msg)) => {
println!("Parse error: {}, but got {:?}", msg, tok);
}
Err((None, msg)) => {
println!("Parse error: {}, but got EOF", msg);
}
Ok(json) => {
println!("{}", json);
}
}
}