Simplified FSM for Lexer, lexer tests passing.

This commit is contained in:
Benjamin Fry 2015-09-02 20:11:06 -07:00
parent aec63c5dfe
commit 09c857b57f
3 changed files with 262 additions and 254 deletions

View File

@ -31,6 +31,7 @@ pub enum LexerError {
UnclosedList,
UnrecognizedDollar(String),
EOF,
IllegalState(&'static str),
}
impl fmt::Display for LexerError {
@ -47,6 +48,7 @@ impl fmt::Display for LexerError {
LexerError::UnclosedList => write!(f, "Unclosed list, missing ')'"),
LexerError::UnrecognizedDollar(ref s) => write!(f, "Unrecognized dollar content: {}", s),
LexerError::EOF => write!(f, "End of input reached before next read could complete"),
LexerError::IllegalState(s) => write!(f, "Illegal state: {}", s),
}
}
}
@ -65,6 +67,7 @@ impl Error for LexerError {
LexerError::UnclosedList => "Unclosed list",
LexerError::UnrecognizedDollar(..) => "Unrecognized dollar content",
LexerError::EOF => "End of input",
LexerError::IllegalState(..) => "Illegal state",
}
}

View File

@ -1,4 +1,3 @@
use std::cell::{Cell,RefCell};
use std::iter::Peekable;
use std::str::Chars;
use std::char;
@ -7,113 +6,156 @@ use ::error::{LexerResult,LexerError};
pub struct Lexer<'a> {
txt: Peekable<Chars<'a>>,
is_first_line: bool,
in_list: bool,
state: State,
}
impl<'a> Lexer<'a> {
pub fn new(txt: &str) -> Lexer {
Lexer { txt: txt.chars().peekable(), is_first_line: true, in_list: false }
Lexer { txt: txt.chars().peekable(), state: State::StartLine }
}
pub fn next_token(&mut self) -> LexerResult<Option<Token>> {
let cur_token: Cell<Option<State>> = Cell::new(None);
let cur_string: RefCell<Option<String>> = RefCell::new(None);
let mut char_data_vec: Option<Vec<String>> = None;
let mut char_data: Option<String> = None;
//while let Some(ch) = self.txt.by_ref().peekable().peek() {
'out: for i in 0..4096 { // max chars in a single lex, helps with issues in the lexer...
assert!(i < 4095); // keeps the bounds of the loop defined (nothing lasts forever)
// This is to get around mutibility rules such that we can peek at the iter without moving next...
let ch: char = if let Some(c) = self.peek() { c } else { break 'out };
let ch: Option<char> = self.peek();
// collectors
if let Some(t) = cur_token.get() {
match t {
State::Comment => {
// handy line for debugging
println!("ch = {:?}; state = {:?}(c: {:?}, v: {:?})", ch, self.state, char_data, char_data_vec);
// continuing states should pass back the state as the last statement,
// terminal states should set the state internally and return the proper Token::*.
// TODO: there is some non-ideal copying going on in here...
match self.state {
State::StartLine => {
match ch {
'\n' => cur_token.set(None), // out of the comment
_ => { self.txt.next(); }, // advance the token by default
Some('\r') | Some('\n') => { self.state = State::EOL; },
// white space at the start of line is a Blank
Some(ch) if ch.is_whitespace() => { self.state = State::Blank },
Some(_) => { self.state = State::RestOfLine },
None => { self.state = State::EOF; },
}
},
State::RestOfLine => {
match ch {
Some('@') => { self.state = State::At },
Some('(') => { self.txt.next(); char_data_vec = Some(Vec::new()); self.state = State::StartList; },
Some(')') => { return Err(LexerError::IllegalCharacter(ch.unwrap_or(')'))) },
Some('$') => { self.txt.next(); char_data = Some(String::new()); self.state = State::Dollar; },
Some('\r') | Some('\n') => { self.state = State::EOL; },
Some('"') => { self.txt.next(); char_data = Some(String::new()); self.state = State::Quote; },
Some(';') => { self.state = State::Comment{ is_list: false } },
Some(ch) if ch.is_whitespace() => { self.txt.next(); }, // gobble other whitespace
Some(ch) if !ch.is_control() && !ch.is_whitespace() => { char_data = Some(String::new()); self.state = State::CharData{ is_list: false }; },
Some(ch) => return Err(LexerError::UnrecognizedChar(ch)),
None => { self.state = State::EOF; },
}
}
State::Blank => {
// consume the whitespace
self.txt.next();
self.state = State::RestOfLine;
return Ok(Some(Token::Blank))
}
State::Comment{ is_list } => {
match ch {
Some('\r') | Some('\n') => {
if is_list { self.state = State::StartList; }
else { self.state = State::EOL; }
}, // out of the comment
Some(_) => { self.txt.next(); }, // advance the token by default and maintain state
None => { self.state = State::EOF; },
}
continue 'out;
},
State::Quote => {
match ch {
'"' => { cur_token.set(Some(State::Quoted)); self.txt.next() ; break 'out },
'\\' => try!(self.escape_seq().and_then(|ch|Ok(self.push(State::Quote, &cur_token, &cur_string, ch)))),
_ => self.push(State::Quote, &cur_token, &cur_string, ch),
// end and gobble the '"'
Some('"') => { self.state = State::RestOfLine; self.txt.next() ; return Ok(Some(Token::CharData(char_data.take().unwrap_or("".into())))) },
Some('\\') => { try!(Self::push_to_str(&mut char_data, try!(self.escape_seq()))); },
Some(ch) => { self.txt.next(); try!(Self::push_to_str(&mut char_data, ch)); },
None => { return Err(LexerError::UnclosedQuotedString) },
}
continue 'out; // skipping rest of processing for quoted strings.
}
},
State::Dollar => {
match ch {
'A' ... 'Z' => { self.push(State::Dollar, &cur_token, &cur_string, ch); continue 'out },
_ => { break 'out},
// even this is a little broad for what's actually possible in a dollar...
Some('A' ... 'Z') => { self.txt.next(); try!(Self::push_to_str(&mut char_data, ch.unwrap())); },
// finishes the Dollar...
Some(_) | None => {
self.state = State::RestOfLine;
let dollar: String = try!(char_data.take().ok_or(LexerError::IllegalState("char_data is None")));
if "INCLUDE" == dollar { return Ok(Some(Token::Include)) }
else if "ORIGIN" == dollar { return Ok(Some(Token::Origin)) }
else { return Err(LexerError::UnrecognizedDollar(char_data.take().unwrap_or("".into()))) }
},
}
},
State::StartList => {
match ch {
Some(';') => { self.txt.next(); self.state = State::Comment{ is_list: true } },
Some(')') => { self.txt.next(); self.state = State::RestOfLine; return char_data_vec.take().ok_or(LexerError::IllegalState("char_data_vec is None")).map(|v|Some(Token::List(v))); }
Some(ch) if ch.is_whitespace() => { self.txt.next(); },
Some(ch) if !ch.is_control() && !ch.is_whitespace() => { char_data = Some(String::new()); self.state = State::CharData{ is_list: true } },
Some(ch) => return Err(LexerError::UnrecognizedChar(ch)),
None => { return Err(LexerError::UnclosedList) },
}
},
State::CharData{ is_list } => {
match ch {
Some(')') if !is_list => { return Err(LexerError::IllegalCharacter(ch.unwrap_or(')'))) },
Some(ch) if ch.is_whitespace() || ch == ')' || ch == ';' => {
if is_list {
try!(char_data_vec.as_mut().ok_or(LexerError::IllegalState("char_data_vec is None")).and_then(|v|Ok(v.push(try!(char_data.take().ok_or(LexerError::IllegalState("char_data is None")))))));
self.state = State::StartList;
} else {
self.state = State::RestOfLine;
let result = char_data.take().ok_or(LexerError::IllegalState("char_data is None"));
println!("result: {:?}", result);
let opt = result.map(|s|Some(Token::CharData(s)));
println!("opt: {:?}", opt);
return opt;
// return char_data.take().ok_or(LexerError::IllegalState("char_data is None")).map(|s|Some(Token::CharData(s)))
}
},
Some('\\') => { try!(Self::push_to_str(&mut char_data, try!(self.escape_seq()))); },
Some(ch) if !ch.is_control() && !ch.is_whitespace() => { self.txt.next(); try!(Self::push_to_str(&mut char_data, ch)); },
Some(ch) => return Err(LexerError::UnrecognizedChar(ch)),
None => { self.state = State::EOF; return char_data.take().ok_or(LexerError::IllegalState("char_data is None")).map(|s|Some(Token::CharData(s))) },
}
},
State::At => {
self.txt.next();
self.state = State::RestOfLine;
return Ok(Some(Token::At))
},
State::EOL => {
match ch {
Some('\r') => { self.txt.next(); },
Some('\n') => { self.txt.next(); self.state = State::StartLine; return Ok(Some(Token::EOL)) },
Some(_) => { return Err(LexerError::IllegalCharacter(ch.unwrap())) },
None => { return Err(LexerError::EOF) },
}
},
// to exhaust all cases, this should never be run...
State::EOF => {
self.txt.next(); // making sure we consume the last... it will always return None after.
return Ok(None)
}
_ => (),// do nothing
}
}
// general case match for all other states...
match ch {
' '|'\t' => {
if self.is_first_line { self.set_token_if_not(State::Blank, &cur_token); break } // need the first blank on a line
if cur_token.get().is_some() { break } else { self.txt.next(); continue } // gobble all whitespace
},
'a' ... 'z' | 'A' ... 'Z' | '-' | '.' | '0' ... '9' => { self.push(State::CharData, &cur_token, &cur_string, ch); },
'\r' => if cur_token.get().is_some() { break } else { self.txt.next(); continue },
'\n' => {
if self.in_list {
// in a list act like a standard whitespace.
if cur_token.get().is_some() {
break
} else {
self.txt.next(); continue
}
} else {
self.set_token_if_not(State::EOL, &cur_token);
self.is_first_line = true;
break
}
},
'@' => { self.set_token_if_not(State::At, &cur_token); break },
'$' => if self.set_token_if_not(State::Dollar, &cur_token) { continue } else { break },
'(' => {
if self.set_token_if_not(State::StartList, &cur_token) {
if self.in_list { return Err(LexerError::IllegalCharacter(ch)) }
else { self.in_list = true; }
}
break
},
')' => {
if self.set_token_if_not(State::EndList, &cur_token) {
if !self.in_list { return Err(LexerError::IllegalCharacter(ch)) }
else { self.in_list = false; }
}
break
},
'"' => if self.set_token_if_not(State::Quote, &cur_token) { continue } else { break },
';' => if self.set_token_if_not(State::Comment, &cur_token) { continue } else { break },
'\\' => {
try!(self.escape_seq().and_then(|c|Ok(self.push(State::CharData, &cur_token, &cur_string, c))));
}
continue;
},
_ if !ch.is_control() && !ch.is_whitespace() => { self.push(State::CharData, &cur_token, &cur_string, ch); },
_ => return Err(LexerError::UnrecognizedChar(ch)),
}
}
unreachable!("The above match statement should have found a terminal state");
}
// if the token is unset, then we are at end of stream, aka None
match cur_token.get() {
Some(State::Quote) => Err(LexerError::UnclosedQuotedString),
None if self.in_list => Err(LexerError::UnclosedList),
None => Ok(None),
Some(s) => Token::from(s, cur_string.into_inner()),
}
fn push_to_str(collect: &mut Option<String>, ch: char) -> LexerResult<()> {
collect.as_mut().ok_or(LexerError::IllegalState("collect is None")).and_then(|s|Ok(s.push(ch)))
}
fn escape_seq(&mut self) -> LexerResult<char> {
@ -126,7 +168,7 @@ impl<'a> Lexer<'a> {
// in this case it's an excaped octal: \DDD
let d1 = try!(self.txt.next().ok_or(LexerError::EOF)); // gobble
let d2 = try!(self.txt.next().ok_or(LexerError::EOF)); // gobble
let d3 = try!(self.peek().ok_or(LexerError::EOF)); // peek b/c the push will advance
let d3 = try!(self.txt.next().ok_or(LexerError::EOF)); // gobble
// let ddd: [u8; 3] = [d1.unwrap() as u8, d2.unwrap() as u8, *d3.unwrap() as u8];
// let ch: char = try!(u32::from_str_radix(&ddd.into(), 8)
@ -140,6 +182,7 @@ impl<'a> Lexer<'a> {
return Ok(ch);
} else {
// this is an excaped char: \X
self.txt.next(); // gobble the char
return Ok(ch);
}
} else {
@ -151,47 +194,22 @@ impl<'a> Lexer<'a> {
fn peek(&mut self) -> Option<char> {
self.txt.peek().map(|c|*c)
}
/// set's the token if it's not set, if it is succesul it advances the txt iter
fn set_token_if_not(&mut self, next_state: State, cur_token: &Cell<Option<State>>) -> bool {
self.is_first_line = false;
if cur_token.get().is_none() {
cur_token.set(Some(next_state));
self.txt.next(); // if we set a new state, it means we can consume the char
true
} else {
false
}
}
fn push(&mut self, next_state: State, cur_token: &Cell<Option<State>>, cell_string: &RefCell<Option<String>>, ch: char) {
self.is_first_line = false;
if cur_token.get().is_none() {
cur_token.set(Some(next_state));
}
let mut cur_string = cell_string.borrow_mut();
if cur_string.is_none() { *cur_string = Some(String::new()); }
if let Some(s) = cur_string.as_mut() {
s.push(ch);
}
self.txt.next();
}
}
#[derive(Copy, Clone, PartialEq)]
#[derive(Copy, Clone, PartialEq, Debug)]
pub enum State {
StartLine,
RestOfLine,
Blank, // only if the first part of the line
StartList, // (
EndList, // )
CharData, // [a-zA-Z, non-control utf8]+
Comment, // ;.*
StartList, // (..)
CharData{ is_list: bool }, // [a-zA-Z, non-control utf8]+
// Name, // CharData + '.' + CharData
Comment{ is_list: bool }, // ;.*
At, // @
Quote, // ".*"
Quoted, // finish the quoted sequence
Dollar, // $
EOL, // \n or \r\n
EOF,
}
#[derive(PartialEq, Debug, Clone)]
@ -199,6 +217,7 @@ pub enum Token {
Blank, // only if the first part of the line
StartList, // (
EndList, // )
List(Vec<String>), // (..)
CharData(String), // [a-zA-Z, non-control utf8, ., -, 0-9]+, ".*"
At, // @
Include, // $INCLUDE
@ -206,68 +225,73 @@ pub enum Token {
EOL, // \n or \r\n
}
impl Token {
pub fn from(state: State, value: Option<String>) -> LexerResult<Option<Token>> {
Ok(Some(match state {
State::Blank => Token::Blank,
State::StartList => Token::StartList,
State::EndList => Token::EndList,
State::CharData => Token::CharData(value.unwrap()),
State::Comment => Token::EOL, // comments can't end a sequence, so must be EOF/EOL
State::At => Token::At,
State::Quote => return Err(LexerError::UnclosedQuotedString),
State::Quoted => Token::CharData(value.unwrap_or_default()),
State::Dollar => {
let s = value.unwrap_or_default();
if "INCLUDE".to_string() == s { Token::Include }
else if "ORIGIN".to_string() == s { Token::Origin }
else { return Err(LexerError::UnrecognizedDollar(s)) }
},
State::EOL => Token::EOL,
}))
}
}
// impl Token {
// pub fn from(state: State, value: Option<String>) -> LexerResult<Option<Token>> {
// Ok(Some(match state {
// State::Blank => Token::Blank,
// State::StartList => Token::StartList,
// State::EndList => Token::EndList,
// State::CharData => Token::CharData(value.unwrap()),
// State::Comment => Token::EOL, // comments can't end a sequence, so must be EOF/EOL
// State::At => Token::At,
// State::Quote => return Err(LexerError::UnclosedQuotedString),
// State::Quoted => Token::CharData(value.unwrap_or()),
// State::Dollar => {
// let s = value.unwrap_or();
// if "INCLUDE".to_string() == s { Token::Include }
// else if "ORIGIN".to_string() == s { Token::Origin }
// else { return Err(LexerError::UnrecognizedDollar(s)) }
// },
// State::EOL => Token::EOL,
// }))
// }
// }
#[cfg(test)]
mod lex_test {
use super::*;
fn next_token(lexer: &mut Lexer) -> Option<Token> {
let result = lexer.next_token();
if result.is_err() { panic!("{:?}", result) }
result.unwrap()
}
#[test]
fn blank() {
// first blank
let mut lexer = Lexer::new(" dead beef");
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Blank);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("dead".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("beef".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::Blank);
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("dead".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("beef".to_string()));
// not the second blank
let mut lexer = Lexer::new("dead beef");
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("dead".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("beef".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("dead".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("beef".to_string()));
let mut lexer = Lexer::new("dead beef\r\n after");
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("dead".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("beef".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Blank);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("after".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("dead".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("beef".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
assert_eq!(next_token(&mut lexer).unwrap(), Token::Blank);
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("after".to_string()));
let mut lexer = Lexer::new("dead beef ();comment
after");
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("dead".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("beef".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::StartList);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EndList);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Blank);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("after".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("dead".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("beef".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::List(vec![]));
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
assert_eq!(next_token(&mut lexer).unwrap(), Token::Blank);
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("after".to_string()));
}
#[test]
fn escape() {
assert_eq!(Lexer::new("a\\A").next_token().unwrap().unwrap(), Token::CharData("aA".to_string()));
assert_eq!(Lexer::new("a\\Aa").next_token().unwrap().unwrap(), Token::CharData("aAa".to_string()));
assert_eq!(Lexer::new("a\\$").next_token().unwrap().unwrap(), Token::CharData("a$".to_string()));
assert_eq!(Lexer::new("a\\077").next_token().unwrap().unwrap(), Token::CharData("a?".to_string()));
assert!(Lexer::new("a\\").next_token().is_err());
@ -283,14 +307,14 @@ mod lex_test {
let mut lexer = Lexer::new("\"multi\nline\ntext\"");
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("multi\nline\ntext".to_string()));
assert_eq!(lexer.next_token().unwrap(), None);
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("multi\nline\ntext".to_string()));
assert_eq!(next_token(&mut lexer), None);
let mut lexer = Lexer::new("\"multi\r\nline\r\ntext\"\r\n");
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("multi\r\nline\r\ntext".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
assert_eq!(lexer.next_token().unwrap(), None);
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("multi\r\nline\r\ntext".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
assert_eq!(next_token(&mut lexer), None);
assert!(Lexer::new("\"multi").next_token().is_err());
}
@ -303,63 +327,46 @@ mod lex_test {
// fun with tests!!! lots of options
#[test]
fn lex() {
assert_eq!(Lexer::new(".").next_token().unwrap().unwrap(), Token::CharData(".".to_string()));
assert_eq!(Lexer::new(" .").next_token().unwrap().unwrap(), Token::Blank);
assert_eq!(Lexer::new("abc").next_token().unwrap().unwrap(), Token::CharData("abc".to_string()));
assert_eq!(Lexer::new("abc.").next_token().unwrap().unwrap(), Token::CharData("abc.".to_string()));
assert_eq!(Lexer::new(";abc").next_token().unwrap().unwrap(), Token::EOL);
assert_eq!(Lexer::new(";;@$-\"").next_token().unwrap().unwrap(), Token::EOL);
assert_eq!(Lexer::new("@").next_token().unwrap().unwrap(), Token::At);
assert_eq!(Lexer::new("123").next_token().unwrap().unwrap(), Token::CharData("123".to_string()));
assert_eq!(Lexer::new("$INCLUDE").next_token().unwrap().unwrap(), Token::Include);
assert_eq!(Lexer::new("$ORIGIN").next_token().unwrap().unwrap(), Token::Origin);
assert_eq!(Lexer::new("\n").next_token().unwrap().unwrap(), Token::EOL);
assert_eq!(Lexer::new("\r\n").next_token().unwrap().unwrap(), Token::EOL);
assert_eq!(next_token(&mut Lexer::new(".")).unwrap(), Token::CharData(".".to_string()));
assert_eq!(next_token(&mut Lexer::new(" .")).unwrap(), Token::Blank);
assert_eq!(next_token(&mut Lexer::new("abc")).unwrap(), Token::CharData("abc".to_string()));
assert_eq!(next_token(&mut Lexer::new("abc.")).unwrap(), Token::CharData("abc.".to_string()));
assert_eq!(next_token(&mut Lexer::new(";abc")), None);
assert_eq!(next_token(&mut Lexer::new(";;@$-\"")), None);
assert_eq!(next_token(&mut Lexer::new("@")).unwrap(), Token::At);
assert_eq!(next_token(&mut Lexer::new("123")).unwrap(), Token::CharData("123".to_string()));
assert_eq!(next_token(&mut Lexer::new("$INCLUDE")).unwrap(), Token::Include);
assert_eq!(next_token(&mut Lexer::new("$ORIGIN")).unwrap(), Token::Origin);
assert_eq!(next_token(&mut Lexer::new("\n")), Some(Token::EOL));
assert_eq!(next_token(&mut Lexer::new("\r\n")), Some(Token::EOL));
}
#[test]
fn list() {
let mut lexer = Lexer::new("(");
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::StartList);
assert!(lexer.next_token().is_err());
assert!(Lexer::new(")").next_token().is_err());
let mut lexer = Lexer::new("()");
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::StartList);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EndList);
assert_eq!(lexer.next_token().unwrap(), None);
assert_eq!(next_token(&mut lexer).unwrap(), Token::List(vec![]));
assert_eq!(next_token(&mut lexer), None);
let mut lexer = Lexer::new("(abc)");
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::StartList);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("abc".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EndList);
assert_eq!(lexer.next_token().unwrap(), None);
assert_eq!(next_token(&mut lexer).unwrap(), Token::List(vec!["abc".to_string()]));
assert_eq!(next_token(&mut lexer), None);
let mut lexer = Lexer::new("(\nabc\n)");
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::StartList);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("abc".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EndList);
assert_eq!(lexer.next_token().unwrap(), None);
assert_eq!(next_token(&mut lexer).unwrap(), Token::List(vec!["abc".to_string()]));
assert_eq!(next_token(&mut lexer), None);
let mut lexer = Lexer::new("(\nabc\nabc)");
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::StartList);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("abc".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("abc".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EndList);
assert_eq!(lexer.next_token().unwrap(), None);
assert_eq!(next_token(&mut lexer).unwrap(), Token::List(vec!["abc".to_string(), "abc".to_string()]));
assert_eq!(next_token(&mut lexer), None);
let mut lexer = Lexer::new("(\n\"abc\"\n)");
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::StartList);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("abc".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EndList);
assert_eq!(lexer.next_token().unwrap(), None);
let mut lexer = Lexer::new("(\n\"abc\";comment\n)");
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::StartList);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("abc".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EndList);
assert_eq!(lexer.next_token().unwrap(), None);
let mut lexer = Lexer::new("(\nabc;comment\n)");
assert_eq!(next_token(&mut lexer).unwrap(), Token::List(vec!["abc".to_string()]));
assert_eq!(next_token(&mut lexer), None);
}
#[test]
@ -384,59 +391,57 @@ VENERA A 10.1.0.52
$INCLUDE <SUBSYS>ISI-MAILBOXES.TXT");
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::At);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("IN".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("SOA".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("VENERA".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("Action.domains".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::StartList);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("20".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("7200".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("600".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("3600000".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("60".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EndList);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Blank);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("NS".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("A.ISI.EDU.".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Blank);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("NS".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("VENERA".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Blank);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("NS".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("VAXA".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Blank);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("MX".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("10".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("VENERA".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Blank);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("MX".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("20".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("VAXA".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("A".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("A".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("26.3.0.103".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("VENERA".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("A".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("10.1.0.52".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Blank);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("A".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("128.9.0.32".to_string()));
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Include);
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("<SUBSYS>ISI-MAILBOXES.TXT".to_string()));
assert!(lexer.next_token().unwrap().is_none());
assert_eq!(next_token(&mut lexer).unwrap(), Token::At);
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("IN".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("SOA".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("VENERA".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("Action.domains".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::List(vec!["20".to_string(),
"7200".to_string(),
"600".to_string(),
"3600000".to_string(),
"60".to_string()]));
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
assert_eq!(next_token(&mut lexer).unwrap(), Token::Blank);
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("NS".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("A.ISI.EDU.".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
assert_eq!(next_token(&mut lexer).unwrap(), Token::Blank);
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("NS".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("VENERA".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
assert_eq!(next_token(&mut lexer).unwrap(), Token::Blank);
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("NS".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("VAXA".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
assert_eq!(next_token(&mut lexer).unwrap(), Token::Blank);
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("MX".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("10".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("VENERA".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
assert_eq!(next_token(&mut lexer).unwrap(), Token::Blank);
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("MX".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("20".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("VAXA".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("A".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("A".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("26.3.0.103".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("VENERA".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("A".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("10.1.0.52".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
assert_eq!(next_token(&mut lexer).unwrap(), Token::Blank);
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("A".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("128.9.0.32".to_string()));
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
assert_eq!(next_token(&mut lexer).unwrap(), Token::Include);
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("<SUBSYS>ISI-MAILBOXES.TXT".to_string()));
assert!(next_token(&mut lexer).is_none());
}
}

View File

@ -49,7 +49,7 @@ VENERA A 10.1.0.52
// this should all be lowercased
assert_eq!(&Name::new().label("venera").label("isi").label("edu"), mname);
// TODO: this is broken, need to build names directly into the lexer I think.
//assert_eq!(&Name::new().label("Action.domains").label("isi").label("edu"), rname);
assert_eq!(&Name::new().label("action.domains").label("isi").label("edu"), rname);
assert_eq!(&Name::new().label("action").label("domains").label("isi").label("edu"), rname);
assert_eq!(20, serial);
assert_eq!(7200, refresh);