diff --git a/Cargo.toml b/Cargo.toml index d5a5d28..f162376 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,4 +8,5 @@ license = "MIT" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +nom = "5.1.2" rand = "0.7.3" diff --git a/README.md b/README.md index c74c901..0207cab 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Purrchance is an unofficial Rust implementation of the ## Features I might implement eventually -- [ ] Parsing grammars from text format +- [x] Parsing grammars from text format - [x] Basic lists - [x] Probability weights - [ ] Single-item lists diff --git a/src/lib.rs b/src/lib.rs index ba147b8..b2c529a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,8 @@ +extern crate nom; extern crate rand; +pub mod parser; + use rand::{seq::SliceRandom, thread_rng}; use std::collections::HashMap; @@ -15,7 +18,7 @@ pub enum Symbol { impl Purrchance for Symbol { fn eval(&self, g: &Grammar) -> Option { match self { - Symbol::Terminal(s) => Some(s.to_string()), + Symbol::Terminal(s) => Some(String::from(s)), Symbol::NonTerminal(label) => g.0.get(label)?.eval(g), } } @@ -42,6 +45,7 @@ pub struct Grammar(HashMap); #[cfg(test)] mod tests { use super::*; + use parser::*; #[test] fn eval_terminal() { @@ -104,4 +108,42 @@ mod tests { let nt = Symbol::NonTerminal("missing".to_string()); assert_eq!(nt.eval(&g), None); } + + #[test] + fn eval_loaded_grammar() { + let g = load_grammar("test\n foo\n"); + let nt = Symbol::NonTerminal(String::from("test")); + assert_eq!(nt.eval(&g), Some(String::from("foo"))); + } + + #[test] + fn eval_loaded_grammar_comments() { + let g = load_grammar("// testing +test + foo // blah blah +// isn't this fun?"); + let nt = Symbol::NonTerminal(String::from("test")); + assert_eq!(nt.eval(&g), Some(String::from("foo"))); + } + + #[test] + fn eval_loaded_grammar_comments_weights() { + let g = load_grammar("// testing +test + foo ^100 +// isn't this fun?"); + let nt = Symbol::NonTerminal(String::from("test")); + assert_eq!(nt.eval(&g), Some(String::from("foo"))); + } + + #[test] + fn eval_loaded_grammar_comments_fraction_weights_tabs() { + let g = load_grammar(" +test + foo ^1000000 + bar ^1/1000000 +"); + let nt = Symbol::NonTerminal(String::from("test")); + assert_eq!(nt.eval(&g), Some(String::from("foo"))); + } } diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..57ef599 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,107 @@ +use super::*; + +use std::iter::FromIterator; + +use nom::{Err, error::{ErrorKind, ParseError}}; +use nom::{FindSubstring, InputLength, InputTake, IResult}; +use nom::branch::*; +use nom::bytes::complete::*; +use nom::combinator::*; +use nom::multi::*; +use nom::number::complete::*; +use nom::sequence::*; + +fn nonterminal(input: &str) -> IResult<&str, Symbol> { + map(delimited(tag("["), nonterminal_name, tag("]")), Symbol::NonTerminal)(input) +} +fn weight(input: &str) -> IResult<&str, f64> { + preceded(tag("^"), alt((double, rat)))(input) +} +fn comment(input: &str) -> IResult<&str, ()> { + map(tuple((tag("//"), is_not("\n"))), |_| ())(input) +} +fn whitespace(input: &str) -> IResult<&str, ()> { + map(take_while(|c| (c == ' ' || c == '\t')), |_| ())(input) +} + +fn empty_line(input: &str) -> IResult<&str, ()> { + map(tuple((opt(comment), tag("\n"))), |_| ())(input) +} + +pub fn take_until_any>(tags: Vec) -> impl Fn(Input) -> IResult +where + Input: InputTake + FindSubstring, + T: InputLength + Clone, +{ + move |i: Input| { + let min_index = tags.iter().filter_map(|tag| i.find_substring(tag.clone())).min(); + let res: IResult<_, _, Error> = match min_index { + None => Err(Err::Error(Error::from_error_kind(i, ErrorKind::TakeUntil))), + Some(index) => Ok(i.take_split(index)), + }; + res + } +} + +fn terminal(input: &str) -> IResult<&str, Symbol> { + let (input, mut terminal_val) = verify(take_until_any(vec!["[", "//", "^", "\n"]), |s: &str| s.len() > 0)(input)?; + if !input.starts_with("[") { + terminal_val = terminal_val.trim_end(); + } + + Ok((input, Symbol::Terminal(String::from(terminal_val)))) +} + +fn nonterminal_name(input: &str) -> IResult<&str, String> { + let (input, head) = take_while_m_n(1, 1, |c: char| (c.is_alphabetic() || c == '_'))(input)?; + let (input, tail) = take_while(|c: char| (c.is_alphanumeric() || c == '_'))(input)?; + + Ok((input, String::from(head.to_owned() + tail))) +} + +fn rat(input: &str) -> IResult<&str, f64> { + let (input, num) = double(input)?; + let (input, _) = tag("/")(input)?; + let (input, denom) = verify(double, |&f| f != 0.0)(input)?; + + Ok((input, num / denom)) +} + +fn expr(input: &str) -> IResult<&str, (Expr, f64)> { + let (input, (_, _, syms, _, weight, _, _, _, _)) = tuple(( + many0(empty_line), + alt((tag(" "), tag("\t"))), + many1(alt((terminal, nonterminal))), + whitespace, + opt(weight), + whitespace, + opt(comment), + tag("\n"), + many0(empty_line), + ))(input)?; + + Ok((input, (Expr(syms), weight.unwrap_or(1.0)))) +} + +fn list(input: &str) -> IResult<&str, (String, List)> { + let (input, (_, name, _, _, _, exprs)) = tuple(( + many0(empty_line), + nonterminal_name, + whitespace, + opt(comment), + tag("\n"), + many1(expr), + ))(input)?; + + Ok((input, (name, List(exprs)))) +} + +fn grammar(input: &str) -> IResult<&str, Grammar> { + let (input, lists) = many1(list)(input)?; + + Ok((input, Grammar(HashMap::from_iter(lists.into_iter())))) +} + +pub fn load_grammar(input: &str) -> Grammar { + grammar(input).unwrap().1 +}