Simplified FSM for Lexer, lexer tests passing.
This commit is contained in:
parent
aec63c5dfe
commit
09c857b57f
@ -31,6 +31,7 @@ pub enum LexerError {
|
||||
UnclosedList,
|
||||
UnrecognizedDollar(String),
|
||||
EOF,
|
||||
IllegalState(&'static str),
|
||||
}
|
||||
|
||||
impl fmt::Display for LexerError {
|
||||
@ -47,6 +48,7 @@ impl fmt::Display for LexerError {
|
||||
LexerError::UnclosedList => write!(f, "Unclosed list, missing ')'"),
|
||||
LexerError::UnrecognizedDollar(ref s) => write!(f, "Unrecognized dollar content: {}", s),
|
||||
LexerError::EOF => write!(f, "End of input reached before next read could complete"),
|
||||
LexerError::IllegalState(s) => write!(f, "Illegal state: {}", s),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -65,6 +67,7 @@ impl Error for LexerError {
|
||||
LexerError::UnclosedList => "Unclosed list",
|
||||
LexerError::UnrecognizedDollar(..) => "Unrecognized dollar content",
|
||||
LexerError::EOF => "End of input",
|
||||
LexerError::IllegalState(..) => "Illegal state",
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,3 @@
|
||||
use std::cell::{Cell,RefCell};
|
||||
use std::iter::Peekable;
|
||||
use std::str::Chars;
|
||||
use std::char;
|
||||
@ -7,113 +6,156 @@ use ::error::{LexerResult,LexerError};
|
||||
|
||||
pub struct Lexer<'a> {
|
||||
txt: Peekable<Chars<'a>>,
|
||||
is_first_line: bool,
|
||||
in_list: bool,
|
||||
state: State,
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
pub fn new(txt: &str) -> Lexer {
|
||||
Lexer { txt: txt.chars().peekable(), is_first_line: true, in_list: false }
|
||||
Lexer { txt: txt.chars().peekable(), state: State::StartLine }
|
||||
}
|
||||
|
||||
pub fn next_token(&mut self) -> LexerResult<Option<Token>> {
|
||||
let cur_token: Cell<Option<State>> = Cell::new(None);
|
||||
let cur_string: RefCell<Option<String>> = RefCell::new(None);
|
||||
let mut char_data_vec: Option<Vec<String>> = None;
|
||||
let mut char_data: Option<String> = None;
|
||||
|
||||
//while let Some(ch) = self.txt.by_ref().peekable().peek() {
|
||||
'out: for i in 0..4096 { // max chars in a single lex, helps with issues in the lexer...
|
||||
assert!(i < 4095); // keeps the bounds of the loop defined (nothing lasts forever)
|
||||
|
||||
// This is to get around mutibility rules such that we can peek at the iter without moving next...
|
||||
let ch: char = if let Some(c) = self.peek() { c } else { break 'out };
|
||||
let ch: Option<char> = self.peek();
|
||||
|
||||
// collectors
|
||||
if let Some(t) = cur_token.get() {
|
||||
match t {
|
||||
State::Comment => {
|
||||
// handy line for debugging
|
||||
println!("ch = {:?}; state = {:?}(c: {:?}, v: {:?})", ch, self.state, char_data, char_data_vec);
|
||||
|
||||
// continuing states should pass back the state as the last statement,
|
||||
// terminal states should set the state internally and return the proper Token::*.
|
||||
// TODO: there is some non-ideal copying going on in here...
|
||||
match self.state {
|
||||
State::StartLine => {
|
||||
match ch {
|
||||
'\n' => cur_token.set(None), // out of the comment
|
||||
_ => { self.txt.next(); }, // advance the token by default
|
||||
Some('\r') | Some('\n') => { self.state = State::EOL; },
|
||||
// white space at the start of line is a Blank
|
||||
Some(ch) if ch.is_whitespace() => { self.state = State::Blank },
|
||||
Some(_) => { self.state = State::RestOfLine },
|
||||
None => { self.state = State::EOF; },
|
||||
}
|
||||
},
|
||||
State::RestOfLine => {
|
||||
match ch {
|
||||
Some('@') => { self.state = State::At },
|
||||
Some('(') => { self.txt.next(); char_data_vec = Some(Vec::new()); self.state = State::StartList; },
|
||||
Some(')') => { return Err(LexerError::IllegalCharacter(ch.unwrap_or(')'))) },
|
||||
Some('$') => { self.txt.next(); char_data = Some(String::new()); self.state = State::Dollar; },
|
||||
Some('\r') | Some('\n') => { self.state = State::EOL; },
|
||||
Some('"') => { self.txt.next(); char_data = Some(String::new()); self.state = State::Quote; },
|
||||
Some(';') => { self.state = State::Comment{ is_list: false } },
|
||||
Some(ch) if ch.is_whitespace() => { self.txt.next(); }, // gobble other whitespace
|
||||
Some(ch) if !ch.is_control() && !ch.is_whitespace() => { char_data = Some(String::new()); self.state = State::CharData{ is_list: false }; },
|
||||
Some(ch) => return Err(LexerError::UnrecognizedChar(ch)),
|
||||
None => { self.state = State::EOF; },
|
||||
}
|
||||
}
|
||||
State::Blank => {
|
||||
// consume the whitespace
|
||||
self.txt.next();
|
||||
self.state = State::RestOfLine;
|
||||
return Ok(Some(Token::Blank))
|
||||
}
|
||||
State::Comment{ is_list } => {
|
||||
match ch {
|
||||
Some('\r') | Some('\n') => {
|
||||
if is_list { self.state = State::StartList; }
|
||||
else { self.state = State::EOL; }
|
||||
}, // out of the comment
|
||||
Some(_) => { self.txt.next(); }, // advance the token by default and maintain state
|
||||
None => { self.state = State::EOF; },
|
||||
}
|
||||
|
||||
continue 'out;
|
||||
},
|
||||
State::Quote => {
|
||||
match ch {
|
||||
'"' => { cur_token.set(Some(State::Quoted)); self.txt.next() ; break 'out },
|
||||
'\\' => try!(self.escape_seq().and_then(|ch|Ok(self.push(State::Quote, &cur_token, &cur_string, ch)))),
|
||||
_ => self.push(State::Quote, &cur_token, &cur_string, ch),
|
||||
// end and gobble the '"'
|
||||
Some('"') => { self.state = State::RestOfLine; self.txt.next() ; return Ok(Some(Token::CharData(char_data.take().unwrap_or("".into())))) },
|
||||
Some('\\') => { try!(Self::push_to_str(&mut char_data, try!(self.escape_seq()))); },
|
||||
Some(ch) => { self.txt.next(); try!(Self::push_to_str(&mut char_data, ch)); },
|
||||
None => { return Err(LexerError::UnclosedQuotedString) },
|
||||
}
|
||||
|
||||
continue 'out; // skipping rest of processing for quoted strings.
|
||||
}
|
||||
},
|
||||
State::Dollar => {
|
||||
match ch {
|
||||
'A' ... 'Z' => { self.push(State::Dollar, &cur_token, &cur_string, ch); continue 'out },
|
||||
_ => { break 'out},
|
||||
// even this is a little broad for what's actually possible in a dollar...
|
||||
Some('A' ... 'Z') => { self.txt.next(); try!(Self::push_to_str(&mut char_data, ch.unwrap())); },
|
||||
// finishes the Dollar...
|
||||
Some(_) | None => {
|
||||
self.state = State::RestOfLine;
|
||||
let dollar: String = try!(char_data.take().ok_or(LexerError::IllegalState("char_data is None")));
|
||||
|
||||
if "INCLUDE" == dollar { return Ok(Some(Token::Include)) }
|
||||
else if "ORIGIN" == dollar { return Ok(Some(Token::Origin)) }
|
||||
else { return Err(LexerError::UnrecognizedDollar(char_data.take().unwrap_or("".into()))) }
|
||||
},
|
||||
}
|
||||
},
|
||||
State::StartList => {
|
||||
match ch {
|
||||
Some(';') => { self.txt.next(); self.state = State::Comment{ is_list: true } },
|
||||
Some(')') => { self.txt.next(); self.state = State::RestOfLine; return char_data_vec.take().ok_or(LexerError::IllegalState("char_data_vec is None")).map(|v|Some(Token::List(v))); }
|
||||
Some(ch) if ch.is_whitespace() => { self.txt.next(); },
|
||||
Some(ch) if !ch.is_control() && !ch.is_whitespace() => { char_data = Some(String::new()); self.state = State::CharData{ is_list: true } },
|
||||
Some(ch) => return Err(LexerError::UnrecognizedChar(ch)),
|
||||
None => { return Err(LexerError::UnclosedList) },
|
||||
}
|
||||
},
|
||||
State::CharData{ is_list } => {
|
||||
match ch {
|
||||
Some(')') if !is_list => { return Err(LexerError::IllegalCharacter(ch.unwrap_or(')'))) },
|
||||
Some(ch) if ch.is_whitespace() || ch == ')' || ch == ';' => {
|
||||
if is_list {
|
||||
try!(char_data_vec.as_mut().ok_or(LexerError::IllegalState("char_data_vec is None")).and_then(|v|Ok(v.push(try!(char_data.take().ok_or(LexerError::IllegalState("char_data is None")))))));
|
||||
self.state = State::StartList;
|
||||
} else {
|
||||
self.state = State::RestOfLine;
|
||||
let result = char_data.take().ok_or(LexerError::IllegalState("char_data is None"));
|
||||
println!("result: {:?}", result);
|
||||
let opt = result.map(|s|Some(Token::CharData(s)));
|
||||
println!("opt: {:?}", opt);
|
||||
return opt;
|
||||
|
||||
// return char_data.take().ok_or(LexerError::IllegalState("char_data is None")).map(|s|Some(Token::CharData(s)))
|
||||
}
|
||||
},
|
||||
Some('\\') => { try!(Self::push_to_str(&mut char_data, try!(self.escape_seq()))); },
|
||||
Some(ch) if !ch.is_control() && !ch.is_whitespace() => { self.txt.next(); try!(Self::push_to_str(&mut char_data, ch)); },
|
||||
Some(ch) => return Err(LexerError::UnrecognizedChar(ch)),
|
||||
None => { self.state = State::EOF; return char_data.take().ok_or(LexerError::IllegalState("char_data is None")).map(|s|Some(Token::CharData(s))) },
|
||||
}
|
||||
},
|
||||
State::At => {
|
||||
self.txt.next();
|
||||
self.state = State::RestOfLine;
|
||||
return Ok(Some(Token::At))
|
||||
},
|
||||
State::EOL => {
|
||||
match ch {
|
||||
Some('\r') => { self.txt.next(); },
|
||||
Some('\n') => { self.txt.next(); self.state = State::StartLine; return Ok(Some(Token::EOL)) },
|
||||
Some(_) => { return Err(LexerError::IllegalCharacter(ch.unwrap())) },
|
||||
None => { return Err(LexerError::EOF) },
|
||||
}
|
||||
},
|
||||
// to exhaust all cases, this should never be run...
|
||||
State::EOF => {
|
||||
self.txt.next(); // making sure we consume the last... it will always return None after.
|
||||
return Ok(None)
|
||||
}
|
||||
_ => (),// do nothing
|
||||
}
|
||||
}
|
||||
|
||||
// general case match for all other states...
|
||||
match ch {
|
||||
' '|'\t' => {
|
||||
if self.is_first_line { self.set_token_if_not(State::Blank, &cur_token); break } // need the first blank on a line
|
||||
if cur_token.get().is_some() { break } else { self.txt.next(); continue } // gobble all whitespace
|
||||
},
|
||||
'a' ... 'z' | 'A' ... 'Z' | '-' | '.' | '0' ... '9' => { self.push(State::CharData, &cur_token, &cur_string, ch); },
|
||||
'\r' => if cur_token.get().is_some() { break } else { self.txt.next(); continue },
|
||||
'\n' => {
|
||||
if self.in_list {
|
||||
// in a list act like a standard whitespace.
|
||||
if cur_token.get().is_some() {
|
||||
break
|
||||
} else {
|
||||
self.txt.next(); continue
|
||||
}
|
||||
} else {
|
||||
self.set_token_if_not(State::EOL, &cur_token);
|
||||
self.is_first_line = true;
|
||||
break
|
||||
}
|
||||
},
|
||||
'@' => { self.set_token_if_not(State::At, &cur_token); break },
|
||||
'$' => if self.set_token_if_not(State::Dollar, &cur_token) { continue } else { break },
|
||||
'(' => {
|
||||
if self.set_token_if_not(State::StartList, &cur_token) {
|
||||
if self.in_list { return Err(LexerError::IllegalCharacter(ch)) }
|
||||
else { self.in_list = true; }
|
||||
}
|
||||
break
|
||||
},
|
||||
')' => {
|
||||
if self.set_token_if_not(State::EndList, &cur_token) {
|
||||
if !self.in_list { return Err(LexerError::IllegalCharacter(ch)) }
|
||||
else { self.in_list = false; }
|
||||
}
|
||||
break
|
||||
},
|
||||
'"' => if self.set_token_if_not(State::Quote, &cur_token) { continue } else { break },
|
||||
';' => if self.set_token_if_not(State::Comment, &cur_token) { continue } else { break },
|
||||
'\\' => {
|
||||
try!(self.escape_seq().and_then(|c|Ok(self.push(State::CharData, &cur_token, &cur_string, c))));
|
||||
}
|
||||
|
||||
continue;
|
||||
},
|
||||
_ if !ch.is_control() && !ch.is_whitespace() => { self.push(State::CharData, &cur_token, &cur_string, ch); },
|
||||
_ => return Err(LexerError::UnrecognizedChar(ch)),
|
||||
}
|
||||
}
|
||||
unreachable!("The above match statement should have found a terminal state");
|
||||
}
|
||||
|
||||
// if the token is unset, then we are at end of stream, aka None
|
||||
match cur_token.get() {
|
||||
Some(State::Quote) => Err(LexerError::UnclosedQuotedString),
|
||||
None if self.in_list => Err(LexerError::UnclosedList),
|
||||
None => Ok(None),
|
||||
Some(s) => Token::from(s, cur_string.into_inner()),
|
||||
}
|
||||
fn push_to_str(collect: &mut Option<String>, ch: char) -> LexerResult<()> {
|
||||
collect.as_mut().ok_or(LexerError::IllegalState("collect is None")).and_then(|s|Ok(s.push(ch)))
|
||||
}
|
||||
|
||||
fn escape_seq(&mut self) -> LexerResult<char> {
|
||||
@ -126,7 +168,7 @@ impl<'a> Lexer<'a> {
|
||||
// in this case it's an excaped octal: \DDD
|
||||
let d1 = try!(self.txt.next().ok_or(LexerError::EOF)); // gobble
|
||||
let d2 = try!(self.txt.next().ok_or(LexerError::EOF)); // gobble
|
||||
let d3 = try!(self.peek().ok_or(LexerError::EOF)); // peek b/c the push will advance
|
||||
let d3 = try!(self.txt.next().ok_or(LexerError::EOF)); // gobble
|
||||
|
||||
// let ddd: [u8; 3] = [d1.unwrap() as u8, d2.unwrap() as u8, *d3.unwrap() as u8];
|
||||
// let ch: char = try!(u32::from_str_radix(&ddd.into(), 8)
|
||||
@ -140,6 +182,7 @@ impl<'a> Lexer<'a> {
|
||||
return Ok(ch);
|
||||
} else {
|
||||
// this is an excaped char: \X
|
||||
self.txt.next(); // gobble the char
|
||||
return Ok(ch);
|
||||
}
|
||||
} else {
|
||||
@ -151,47 +194,22 @@ impl<'a> Lexer<'a> {
|
||||
fn peek(&mut self) -> Option<char> {
|
||||
self.txt.peek().map(|c|*c)
|
||||
}
|
||||
|
||||
/// set's the token if it's not set, if it is succesul it advances the txt iter
|
||||
fn set_token_if_not(&mut self, next_state: State, cur_token: &Cell<Option<State>>) -> bool {
|
||||
self.is_first_line = false;
|
||||
if cur_token.get().is_none() {
|
||||
cur_token.set(Some(next_state));
|
||||
self.txt.next(); // if we set a new state, it means we can consume the char
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, next_state: State, cur_token: &Cell<Option<State>>, cell_string: &RefCell<Option<String>>, ch: char) {
|
||||
self.is_first_line = false;
|
||||
if cur_token.get().is_none() {
|
||||
cur_token.set(Some(next_state));
|
||||
}
|
||||
|
||||
let mut cur_string = cell_string.borrow_mut();
|
||||
if cur_string.is_none() { *cur_string = Some(String::new()); }
|
||||
if let Some(s) = cur_string.as_mut() {
|
||||
s.push(ch);
|
||||
}
|
||||
|
||||
self.txt.next();
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, PartialEq)]
|
||||
#[derive(Copy, Clone, PartialEq, Debug)]
|
||||
pub enum State {
|
||||
StartLine,
|
||||
RestOfLine,
|
||||
Blank, // only if the first part of the line
|
||||
StartList, // (
|
||||
EndList, // )
|
||||
CharData, // [a-zA-Z, non-control utf8]+
|
||||
Comment, // ;.*
|
||||
StartList, // (..)
|
||||
CharData{ is_list: bool }, // [a-zA-Z, non-control utf8]+
|
||||
// Name, // CharData + '.' + CharData
|
||||
Comment{ is_list: bool }, // ;.*
|
||||
At, // @
|
||||
Quote, // ".*"
|
||||
Quoted, // finish the quoted sequence
|
||||
Dollar, // $
|
||||
EOL, // \n or \r\n
|
||||
EOF,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Debug, Clone)]
|
||||
@ -199,6 +217,7 @@ pub enum Token {
|
||||
Blank, // only if the first part of the line
|
||||
StartList, // (
|
||||
EndList, // )
|
||||
List(Vec<String>), // (..)
|
||||
CharData(String), // [a-zA-Z, non-control utf8, ., -, 0-9]+, ".*"
|
||||
At, // @
|
||||
Include, // $INCLUDE
|
||||
@ -206,68 +225,73 @@ pub enum Token {
|
||||
EOL, // \n or \r\n
|
||||
}
|
||||
|
||||
impl Token {
|
||||
pub fn from(state: State, value: Option<String>) -> LexerResult<Option<Token>> {
|
||||
Ok(Some(match state {
|
||||
State::Blank => Token::Blank,
|
||||
State::StartList => Token::StartList,
|
||||
State::EndList => Token::EndList,
|
||||
State::CharData => Token::CharData(value.unwrap()),
|
||||
State::Comment => Token::EOL, // comments can't end a sequence, so must be EOF/EOL
|
||||
State::At => Token::At,
|
||||
State::Quote => return Err(LexerError::UnclosedQuotedString),
|
||||
State::Quoted => Token::CharData(value.unwrap_or_default()),
|
||||
State::Dollar => {
|
||||
let s = value.unwrap_or_default();
|
||||
if "INCLUDE".to_string() == s { Token::Include }
|
||||
else if "ORIGIN".to_string() == s { Token::Origin }
|
||||
else { return Err(LexerError::UnrecognizedDollar(s)) }
|
||||
},
|
||||
State::EOL => Token::EOL,
|
||||
}))
|
||||
}
|
||||
}
|
||||
// impl Token {
|
||||
// pub fn from(state: State, value: Option<String>) -> LexerResult<Option<Token>> {
|
||||
// Ok(Some(match state {
|
||||
// State::Blank => Token::Blank,
|
||||
// State::StartList => Token::StartList,
|
||||
// State::EndList => Token::EndList,
|
||||
// State::CharData => Token::CharData(value.unwrap()),
|
||||
// State::Comment => Token::EOL, // comments can't end a sequence, so must be EOF/EOL
|
||||
// State::At => Token::At,
|
||||
// State::Quote => return Err(LexerError::UnclosedQuotedString),
|
||||
// State::Quoted => Token::CharData(value.unwrap_or()),
|
||||
// State::Dollar => {
|
||||
// let s = value.unwrap_or();
|
||||
// if "INCLUDE".to_string() == s { Token::Include }
|
||||
// else if "ORIGIN".to_string() == s { Token::Origin }
|
||||
// else { return Err(LexerError::UnrecognizedDollar(s)) }
|
||||
// },
|
||||
// State::EOL => Token::EOL,
|
||||
// }))
|
||||
// }
|
||||
// }
|
||||
|
||||
#[cfg(test)]
|
||||
mod lex_test {
|
||||
use super::*;
|
||||
|
||||
fn next_token(lexer: &mut Lexer) -> Option<Token> {
|
||||
let result = lexer.next_token();
|
||||
if result.is_err() { panic!("{:?}", result) }
|
||||
result.unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn blank() {
|
||||
// first blank
|
||||
let mut lexer = Lexer::new(" dead beef");
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Blank);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("dead".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("beef".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::Blank);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("dead".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("beef".to_string()));
|
||||
|
||||
// not the second blank
|
||||
let mut lexer = Lexer::new("dead beef");
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("dead".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("beef".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("dead".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("beef".to_string()));
|
||||
|
||||
|
||||
let mut lexer = Lexer::new("dead beef\r\n after");
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("dead".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("beef".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Blank);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("after".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("dead".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("beef".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::Blank);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("after".to_string()));
|
||||
|
||||
let mut lexer = Lexer::new("dead beef ();comment
|
||||
after");
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("dead".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("beef".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::StartList);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EndList);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Blank);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("after".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("dead".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("beef".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::List(vec![]));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::Blank);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("after".to_string()));
|
||||
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn escape() {
|
||||
assert_eq!(Lexer::new("a\\A").next_token().unwrap().unwrap(), Token::CharData("aA".to_string()));
|
||||
assert_eq!(Lexer::new("a\\Aa").next_token().unwrap().unwrap(), Token::CharData("aAa".to_string()));
|
||||
assert_eq!(Lexer::new("a\\$").next_token().unwrap().unwrap(), Token::CharData("a$".to_string()));
|
||||
assert_eq!(Lexer::new("a\\077").next_token().unwrap().unwrap(), Token::CharData("a?".to_string()));
|
||||
assert!(Lexer::new("a\\").next_token().is_err());
|
||||
@ -283,14 +307,14 @@ mod lex_test {
|
||||
|
||||
let mut lexer = Lexer::new("\"multi\nline\ntext\"");
|
||||
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("multi\nline\ntext".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap(), None);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("multi\nline\ntext".to_string()));
|
||||
assert_eq!(next_token(&mut lexer), None);
|
||||
|
||||
let mut lexer = Lexer::new("\"multi\r\nline\r\ntext\"\r\n");
|
||||
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("multi\r\nline\r\ntext".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
|
||||
assert_eq!(lexer.next_token().unwrap(), None);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("multi\r\nline\r\ntext".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
|
||||
assert_eq!(next_token(&mut lexer), None);
|
||||
|
||||
assert!(Lexer::new("\"multi").next_token().is_err());
|
||||
}
|
||||
@ -303,63 +327,46 @@ mod lex_test {
|
||||
// fun with tests!!! lots of options
|
||||
#[test]
|
||||
fn lex() {
|
||||
assert_eq!(Lexer::new(".").next_token().unwrap().unwrap(), Token::CharData(".".to_string()));
|
||||
assert_eq!(Lexer::new(" .").next_token().unwrap().unwrap(), Token::Blank);
|
||||
assert_eq!(Lexer::new("abc").next_token().unwrap().unwrap(), Token::CharData("abc".to_string()));
|
||||
assert_eq!(Lexer::new("abc.").next_token().unwrap().unwrap(), Token::CharData("abc.".to_string()));
|
||||
assert_eq!(Lexer::new(";abc").next_token().unwrap().unwrap(), Token::EOL);
|
||||
assert_eq!(Lexer::new(";;@$-\"").next_token().unwrap().unwrap(), Token::EOL);
|
||||
assert_eq!(Lexer::new("@").next_token().unwrap().unwrap(), Token::At);
|
||||
assert_eq!(Lexer::new("123").next_token().unwrap().unwrap(), Token::CharData("123".to_string()));
|
||||
assert_eq!(Lexer::new("$INCLUDE").next_token().unwrap().unwrap(), Token::Include);
|
||||
assert_eq!(Lexer::new("$ORIGIN").next_token().unwrap().unwrap(), Token::Origin);
|
||||
assert_eq!(Lexer::new("\n").next_token().unwrap().unwrap(), Token::EOL);
|
||||
assert_eq!(Lexer::new("\r\n").next_token().unwrap().unwrap(), Token::EOL);
|
||||
assert_eq!(next_token(&mut Lexer::new(".")).unwrap(), Token::CharData(".".to_string()));
|
||||
assert_eq!(next_token(&mut Lexer::new(" .")).unwrap(), Token::Blank);
|
||||
assert_eq!(next_token(&mut Lexer::new("abc")).unwrap(), Token::CharData("abc".to_string()));
|
||||
assert_eq!(next_token(&mut Lexer::new("abc.")).unwrap(), Token::CharData("abc.".to_string()));
|
||||
assert_eq!(next_token(&mut Lexer::new(";abc")), None);
|
||||
assert_eq!(next_token(&mut Lexer::new(";;@$-\"")), None);
|
||||
assert_eq!(next_token(&mut Lexer::new("@")).unwrap(), Token::At);
|
||||
assert_eq!(next_token(&mut Lexer::new("123")).unwrap(), Token::CharData("123".to_string()));
|
||||
assert_eq!(next_token(&mut Lexer::new("$INCLUDE")).unwrap(), Token::Include);
|
||||
assert_eq!(next_token(&mut Lexer::new("$ORIGIN")).unwrap(), Token::Origin);
|
||||
assert_eq!(next_token(&mut Lexer::new("\n")), Some(Token::EOL));
|
||||
assert_eq!(next_token(&mut Lexer::new("\r\n")), Some(Token::EOL));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn list() {
|
||||
let mut lexer = Lexer::new("(");
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::StartList);
|
||||
assert!(lexer.next_token().is_err());
|
||||
|
||||
assert!(Lexer::new(")").next_token().is_err());
|
||||
|
||||
let mut lexer = Lexer::new("()");
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::StartList);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EndList);
|
||||
assert_eq!(lexer.next_token().unwrap(), None);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::List(vec![]));
|
||||
assert_eq!(next_token(&mut lexer), None);
|
||||
|
||||
let mut lexer = Lexer::new("(abc)");
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::StartList);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("abc".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EndList);
|
||||
assert_eq!(lexer.next_token().unwrap(), None);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::List(vec!["abc".to_string()]));
|
||||
assert_eq!(next_token(&mut lexer), None);
|
||||
|
||||
let mut lexer = Lexer::new("(\nabc\n)");
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::StartList);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("abc".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EndList);
|
||||
assert_eq!(lexer.next_token().unwrap(), None);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::List(vec!["abc".to_string()]));
|
||||
assert_eq!(next_token(&mut lexer), None);
|
||||
|
||||
let mut lexer = Lexer::new("(\nabc\nabc)");
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::StartList);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("abc".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("abc".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EndList);
|
||||
assert_eq!(lexer.next_token().unwrap(), None);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::List(vec!["abc".to_string(), "abc".to_string()]));
|
||||
assert_eq!(next_token(&mut lexer), None);
|
||||
|
||||
|
||||
let mut lexer = Lexer::new("(\n\"abc\"\n)");
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::StartList);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("abc".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EndList);
|
||||
assert_eq!(lexer.next_token().unwrap(), None);
|
||||
let mut lexer = Lexer::new("(\n\"abc\";comment\n)");
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::StartList);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("abc".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EndList);
|
||||
assert_eq!(lexer.next_token().unwrap(), None);
|
||||
let mut lexer = Lexer::new("(\nabc;comment\n)");
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::List(vec!["abc".to_string()]));
|
||||
assert_eq!(next_token(&mut lexer), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -384,59 +391,57 @@ VENERA A 10.1.0.52
|
||||
|
||||
$INCLUDE <SUBSYS>ISI-MAILBOXES.TXT");
|
||||
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::At);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("IN".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("SOA".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("VENERA".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("Action.domains".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::StartList);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("20".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("7200".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("600".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("3600000".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("60".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EndList);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Blank);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("NS".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("A.ISI.EDU.".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Blank);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("NS".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("VENERA".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Blank);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("NS".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("VAXA".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Blank);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("MX".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("10".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("VENERA".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Blank);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("MX".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("20".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("VAXA".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("A".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("A".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("26.3.0.103".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("VENERA".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("A".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("10.1.0.52".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Blank);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("A".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("128.9.0.32".to_string()));
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Include);
|
||||
assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("<SUBSYS>ISI-MAILBOXES.TXT".to_string()));
|
||||
assert!(lexer.next_token().unwrap().is_none());
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::At);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("IN".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("SOA".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("VENERA".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("Action.domains".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::List(vec!["20".to_string(),
|
||||
"7200".to_string(),
|
||||
"600".to_string(),
|
||||
"3600000".to_string(),
|
||||
"60".to_string()]));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::Blank);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("NS".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("A.ISI.EDU.".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::Blank);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("NS".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("VENERA".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::Blank);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("NS".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("VAXA".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::Blank);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("MX".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("10".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("VENERA".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::Blank);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("MX".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("20".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("VAXA".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("A".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("A".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("26.3.0.103".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("VENERA".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("A".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("10.1.0.52".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::Blank);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("A".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("128.9.0.32".to_string()));
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::EOL);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::Include);
|
||||
assert_eq!(next_token(&mut lexer).unwrap(), Token::CharData("<SUBSYS>ISI-MAILBOXES.TXT".to_string()));
|
||||
assert!(next_token(&mut lexer).is_none());
|
||||
}
|
||||
}
|
||||
|
@ -49,7 +49,7 @@ VENERA A 10.1.0.52
|
||||
// this should all be lowercased
|
||||
assert_eq!(&Name::new().label("venera").label("isi").label("edu"), mname);
|
||||
// TODO: this is broken, need to build names directly into the lexer I think.
|
||||
//assert_eq!(&Name::new().label("Action.domains").label("isi").label("edu"), rname);
|
||||
assert_eq!(&Name::new().label("action.domains").label("isi").label("edu"), rname);
|
||||
assert_eq!(&Name::new().label("action").label("domains").label("isi").label("edu"), rname);
|
||||
assert_eq!(20, serial);
|
||||
assert_eq!(7200, refresh);
|
||||
|
Loading…
Reference in New Issue
Block a user