Add parser
This commit is contained in:
parent
36e33d59cb
commit
bb9c1fd885
|
@ -8,4 +8,5 @@ license = "MIT"
|
|||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
nom = "5.1.2"
|
||||
rand = "0.7.3"
|
||||
|
|
|
@ -5,7 +5,7 @@ Purrchance is an unofficial Rust implementation of the
|
|||
|
||||
## Features I might implement eventually
|
||||
|
||||
- [ ] Parsing grammars from text format
|
||||
- [x] Parsing grammars from text format
|
||||
- [x] Basic lists
|
||||
- [x] Probability weights
|
||||
- [ ] Single-item lists
|
||||
|
|
44
src/lib.rs
44
src/lib.rs
|
@ -1,5 +1,8 @@
|
|||
extern crate nom;
|
||||
extern crate rand;
|
||||
|
||||
pub mod parser;
|
||||
|
||||
use rand::{seq::SliceRandom, thread_rng};
|
||||
use std::collections::HashMap;
|
||||
|
||||
|
@ -15,7 +18,7 @@ pub enum Symbol {
|
|||
impl Purrchance for Symbol {
|
||||
fn eval(&self, g: &Grammar) -> Option<String> {
|
||||
match self {
|
||||
Symbol::Terminal(s) => Some(s.to_string()),
|
||||
Symbol::Terminal(s) => Some(String::from(s)),
|
||||
Symbol::NonTerminal(label) => g.0.get(label)?.eval(g),
|
||||
}
|
||||
}
|
||||
|
@ -42,6 +45,7 @@ pub struct Grammar(HashMap<String,List>);
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use parser::*;
|
||||
|
||||
#[test]
|
||||
fn eval_terminal() {
|
||||
|
@ -104,4 +108,42 @@ mod tests {
|
|||
let nt = Symbol::NonTerminal("missing".to_string());
|
||||
assert_eq!(nt.eval(&g), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn eval_loaded_grammar() {
|
||||
let g = load_grammar("test\n foo\n");
|
||||
let nt = Symbol::NonTerminal(String::from("test"));
|
||||
assert_eq!(nt.eval(&g), Some(String::from("foo")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn eval_loaded_grammar_comments() {
|
||||
let g = load_grammar("// testing
|
||||
test
|
||||
foo // blah blah
|
||||
// isn't this fun?");
|
||||
let nt = Symbol::NonTerminal(String::from("test"));
|
||||
assert_eq!(nt.eval(&g), Some(String::from("foo")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn eval_loaded_grammar_comments_weights() {
|
||||
let g = load_grammar("// testing
|
||||
test
|
||||
foo ^100
|
||||
// isn't this fun?");
|
||||
let nt = Symbol::NonTerminal(String::from("test"));
|
||||
assert_eq!(nt.eval(&g), Some(String::from("foo")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn eval_loaded_grammar_comments_fraction_weights_tabs() {
|
||||
let g = load_grammar("
|
||||
test
|
||||
foo ^1000000
|
||||
bar ^1/1000000
|
||||
");
|
||||
let nt = Symbol::NonTerminal(String::from("test"));
|
||||
assert_eq!(nt.eval(&g), Some(String::from("foo")));
|
||||
}
|
||||
}
|
||||
|
|
107
src/parser.rs
Normal file
107
src/parser.rs
Normal file
|
@ -0,0 +1,107 @@
|
|||
use super::*;
|
||||
|
||||
use std::iter::FromIterator;
|
||||
|
||||
use nom::{Err, error::{ErrorKind, ParseError}};
|
||||
use nom::{FindSubstring, InputLength, InputTake, IResult};
|
||||
use nom::branch::*;
|
||||
use nom::bytes::complete::*;
|
||||
use nom::combinator::*;
|
||||
use nom::multi::*;
|
||||
use nom::number::complete::*;
|
||||
use nom::sequence::*;
|
||||
|
||||
fn nonterminal(input: &str) -> IResult<&str, Symbol> {
|
||||
map(delimited(tag("["), nonterminal_name, tag("]")), Symbol::NonTerminal)(input)
|
||||
}
|
||||
fn weight(input: &str) -> IResult<&str, f64> {
|
||||
preceded(tag("^"), alt((double, rat)))(input)
|
||||
}
|
||||
fn comment(input: &str) -> IResult<&str, ()> {
|
||||
map(tuple((tag("//"), is_not("\n"))), |_| ())(input)
|
||||
}
|
||||
fn whitespace(input: &str) -> IResult<&str, ()> {
|
||||
map(take_while(|c| (c == ' ' || c == '\t')), |_| ())(input)
|
||||
}
|
||||
|
||||
fn empty_line(input: &str) -> IResult<&str, ()> {
|
||||
map(tuple((opt(comment), tag("\n"))), |_| ())(input)
|
||||
}
|
||||
|
||||
pub fn take_until_any<T, Input, Error: ParseError<Input>>(tags: Vec<T>) -> impl Fn(Input) -> IResult<Input, Input, Error>
|
||||
where
|
||||
Input: InputTake + FindSubstring<T>,
|
||||
T: InputLength + Clone,
|
||||
{
|
||||
move |i: Input| {
|
||||
let min_index = tags.iter().filter_map(|tag| i.find_substring(tag.clone())).min();
|
||||
let res: IResult<_, _, Error> = match min_index {
|
||||
None => Err(Err::Error(Error::from_error_kind(i, ErrorKind::TakeUntil))),
|
||||
Some(index) => Ok(i.take_split(index)),
|
||||
};
|
||||
res
|
||||
}
|
||||
}
|
||||
|
||||
fn terminal(input: &str) -> IResult<&str, Symbol> {
|
||||
let (input, mut terminal_val) = verify(take_until_any(vec!["[", "//", "^", "\n"]), |s: &str| s.len() > 0)(input)?;
|
||||
if !input.starts_with("[") {
|
||||
terminal_val = terminal_val.trim_end();
|
||||
}
|
||||
|
||||
Ok((input, Symbol::Terminal(String::from(terminal_val))))
|
||||
}
|
||||
|
||||
fn nonterminal_name(input: &str) -> IResult<&str, String> {
|
||||
let (input, head) = take_while_m_n(1, 1, |c: char| (c.is_alphabetic() || c == '_'))(input)?;
|
||||
let (input, tail) = take_while(|c: char| (c.is_alphanumeric() || c == '_'))(input)?;
|
||||
|
||||
Ok((input, String::from(head.to_owned() + tail)))
|
||||
}
|
||||
|
||||
fn rat(input: &str) -> IResult<&str, f64> {
|
||||
let (input, num) = double(input)?;
|
||||
let (input, _) = tag("/")(input)?;
|
||||
let (input, denom) = verify(double, |&f| f != 0.0)(input)?;
|
||||
|
||||
Ok((input, num / denom))
|
||||
}
|
||||
|
||||
fn expr(input: &str) -> IResult<&str, (Expr, f64)> {
|
||||
let (input, (_, _, syms, _, weight, _, _, _, _)) = tuple((
|
||||
many0(empty_line),
|
||||
alt((tag(" "), tag("\t"))),
|
||||
many1(alt((terminal, nonterminal))),
|
||||
whitespace,
|
||||
opt(weight),
|
||||
whitespace,
|
||||
opt(comment),
|
||||
tag("\n"),
|
||||
many0(empty_line),
|
||||
))(input)?;
|
||||
|
||||
Ok((input, (Expr(syms), weight.unwrap_or(1.0))))
|
||||
}
|
||||
|
||||
fn list(input: &str) -> IResult<&str, (String, List)> {
|
||||
let (input, (_, name, _, _, _, exprs)) = tuple((
|
||||
many0(empty_line),
|
||||
nonterminal_name,
|
||||
whitespace,
|
||||
opt(comment),
|
||||
tag("\n"),
|
||||
many1(expr),
|
||||
))(input)?;
|
||||
|
||||
Ok((input, (name, List(exprs))))
|
||||
}
|
||||
|
||||
fn grammar(input: &str) -> IResult<&str, Grammar> {
|
||||
let (input, lists) = many1(list)(input)?;
|
||||
|
||||
Ok((input, Grammar(HashMap::from_iter(lists.into_iter()))))
|
||||
}
|
||||
|
||||
pub fn load_grammar(input: &str) -> Grammar {
|
||||
grammar(input).unwrap().1
|
||||
}
|
Loading…
Reference in a new issue