Add parser

This commit is contained in:
xenofem 2020-06-19 01:39:08 -04:00
parent 36e33d59cb
commit bb9c1fd885
4 changed files with 152 additions and 2 deletions

View file

@ -8,4 +8,5 @@ license = "MIT"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
nom = "5.1.2"
rand = "0.7.3" rand = "0.7.3"

View file

@ -5,7 +5,7 @@ Purrchance is an unofficial Rust implementation of the
## Features I might implement eventually ## Features I might implement eventually
- [ ] Parsing grammars from text format - [x] Parsing grammars from text format
- [x] Basic lists - [x] Basic lists
- [x] Probability weights - [x] Probability weights
- [ ] Single-item lists - [ ] Single-item lists

View file

@ -1,5 +1,8 @@
extern crate nom;
extern crate rand; extern crate rand;
pub mod parser;
use rand::{seq::SliceRandom, thread_rng}; use rand::{seq::SliceRandom, thread_rng};
use std::collections::HashMap; use std::collections::HashMap;
@ -15,7 +18,7 @@ pub enum Symbol {
impl Purrchance for Symbol { impl Purrchance for Symbol {
fn eval(&self, g: &Grammar) -> Option<String> { fn eval(&self, g: &Grammar) -> Option<String> {
match self { match self {
Symbol::Terminal(s) => Some(s.to_string()), Symbol::Terminal(s) => Some(String::from(s)),
Symbol::NonTerminal(label) => g.0.get(label)?.eval(g), Symbol::NonTerminal(label) => g.0.get(label)?.eval(g),
} }
} }
@ -42,6 +45,7 @@ pub struct Grammar(HashMap<String,List>);
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use parser::*;
#[test] #[test]
fn eval_terminal() { fn eval_terminal() {
@ -104,4 +108,42 @@ mod tests {
let nt = Symbol::NonTerminal("missing".to_string()); let nt = Symbol::NonTerminal("missing".to_string());
assert_eq!(nt.eval(&g), None); assert_eq!(nt.eval(&g), None);
} }
#[test]
fn eval_loaded_grammar() {
let g = load_grammar("test\n foo\n");
let nt = Symbol::NonTerminal(String::from("test"));
assert_eq!(nt.eval(&g), Some(String::from("foo")));
}
#[test]
fn eval_loaded_grammar_comments() {
let g = load_grammar("// testing
test
foo // blah blah
// isn't this fun?");
let nt = Symbol::NonTerminal(String::from("test"));
assert_eq!(nt.eval(&g), Some(String::from("foo")));
}
#[test]
fn eval_loaded_grammar_comments_weights() {
let g = load_grammar("// testing
test
foo ^100
// isn't this fun?");
let nt = Symbol::NonTerminal(String::from("test"));
assert_eq!(nt.eval(&g), Some(String::from("foo")));
}
#[test]
fn eval_loaded_grammar_comments_fraction_weights_tabs() {
let g = load_grammar("
test
foo ^1000000
bar ^1/1000000
");
let nt = Symbol::NonTerminal(String::from("test"));
assert_eq!(nt.eval(&g), Some(String::from("foo")));
}
} }

107
src/parser.rs Normal file
View file

@ -0,0 +1,107 @@
use super::*;
use std::iter::FromIterator;
use nom::{Err, error::{ErrorKind, ParseError}};
use nom::{FindSubstring, InputLength, InputTake, IResult};
use nom::branch::*;
use nom::bytes::complete::*;
use nom::combinator::*;
use nom::multi::*;
use nom::number::complete::*;
use nom::sequence::*;
fn nonterminal(input: &str) -> IResult<&str, Symbol> {
map(delimited(tag("["), nonterminal_name, tag("]")), Symbol::NonTerminal)(input)
}
fn weight(input: &str) -> IResult<&str, f64> {
preceded(tag("^"), alt((double, rat)))(input)
}
fn comment(input: &str) -> IResult<&str, ()> {
map(tuple((tag("//"), is_not("\n"))), |_| ())(input)
}
fn whitespace(input: &str) -> IResult<&str, ()> {
map(take_while(|c| (c == ' ' || c == '\t')), |_| ())(input)
}
fn empty_line(input: &str) -> IResult<&str, ()> {
map(tuple((opt(comment), tag("\n"))), |_| ())(input)
}
pub fn take_until_any<T, Input, Error: ParseError<Input>>(tags: Vec<T>) -> impl Fn(Input) -> IResult<Input, Input, Error>
where
Input: InputTake + FindSubstring<T>,
T: InputLength + Clone,
{
move |i: Input| {
let min_index = tags.iter().filter_map(|tag| i.find_substring(tag.clone())).min();
let res: IResult<_, _, Error> = match min_index {
None => Err(Err::Error(Error::from_error_kind(i, ErrorKind::TakeUntil))),
Some(index) => Ok(i.take_split(index)),
};
res
}
}
fn terminal(input: &str) -> IResult<&str, Symbol> {
let (input, mut terminal_val) = verify(take_until_any(vec!["[", "//", "^", "\n"]), |s: &str| s.len() > 0)(input)?;
if !input.starts_with("[") {
terminal_val = terminal_val.trim_end();
}
Ok((input, Symbol::Terminal(String::from(terminal_val))))
}
fn nonterminal_name(input: &str) -> IResult<&str, String> {
let (input, head) = take_while_m_n(1, 1, |c: char| (c.is_alphabetic() || c == '_'))(input)?;
let (input, tail) = take_while(|c: char| (c.is_alphanumeric() || c == '_'))(input)?;
Ok((input, String::from(head.to_owned() + tail)))
}
fn rat(input: &str) -> IResult<&str, f64> {
let (input, num) = double(input)?;
let (input, _) = tag("/")(input)?;
let (input, denom) = verify(double, |&f| f != 0.0)(input)?;
Ok((input, num / denom))
}
fn expr(input: &str) -> IResult<&str, (Expr, f64)> {
let (input, (_, _, syms, _, weight, _, _, _, _)) = tuple((
many0(empty_line),
alt((tag(" "), tag("\t"))),
many1(alt((terminal, nonterminal))),
whitespace,
opt(weight),
whitespace,
opt(comment),
tag("\n"),
many0(empty_line),
))(input)?;
Ok((input, (Expr(syms), weight.unwrap_or(1.0))))
}
fn list(input: &str) -> IResult<&str, (String, List)> {
let (input, (_, name, _, _, _, exprs)) = tuple((
many0(empty_line),
nonterminal_name,
whitespace,
opt(comment),
tag("\n"),
many1(expr),
))(input)?;
Ok((input, (name, List(exprs))))
}
fn grammar(input: &str) -> IResult<&str, Grammar> {
let (input, lists) = many1(list)(input)?;
Ok((input, Grammar(HashMap::from_iter(lists.into_iter()))))
}
pub fn load_grammar(input: &str) -> Grammar {
grammar(input).unwrap().1
}