Lexer for text files

2015-08-25 23:15:37 -07:00 · 2015-08-25 23:15:37 -07:00 · 28f38c6425
commit 28f38c6425
parent 1495ca4dd3
12 changed files with 534 additions and 11 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1,12 +1,52 @@
 [root]
 name = "trust-dns"
-version = "0.1.0"
+version = "0.1.1"
 dependencies = [
- "rustc-serialize 0.3.16 (registry+https://github.com/rust-lang/crates.io-index)",
+ "log 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

 [[package]]
-name = "rustc-serialize"
-version = "0.3.16"
+name = "aho-corasick"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "memchr 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "libc"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "log"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "libc 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "memchr"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "libc 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "regex"
+version = "0.1.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "aho-corasick 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "memchr 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex-syntax 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"

--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "trust-dns"
-version = "0.1.0"
+version = "0.1.1"
 authors = ["Benjamin Fry <benjaminfry@me.com>"]

 # A short blurb about the package. This is not rendered in any format when
@ -27,5 +27,6 @@ keywords = ["DNS", "dns", "host", "BIND", "dig", "named", "dnssec"]
 license = "Apache-2.0"

 [dependencies]
-
+log = "^0.3.1"
+regex = "^0.1.41"
 # rustc-serialize = "^0.3.16"
--- a/src/error/decode_error.rs
+++ b/src/error/decode_error.rs
@ -55,7 +55,7 @@ impl Error for DecodeError {
      DecodeError::UnknownRecordTypeStr(..) => "RecordType string unknown",
      DecodeError::NoRecordDataType => "RecordType unspecified",
      DecodeError::NoRecordDataLength => "RecordData length unspecified",
-      DecodeError::EOF => "End of file",
+      DecodeError::EOF => "End of input",
    }
  }

--- a/src/error/lexer_error.rs
+++ b/src/error/lexer_error.rs
@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2015 Benjamin Fry <benjaminfry@me.com>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+use std::error::Error;
+use std::fmt;
+use std::num;
+use std::string::FromUtf8Error;
+
+#[derive(Debug)]
+pub enum LexerError {
+  ParseUtf8Error(FromUtf8Error),
+  EscapedCharOutsideCharData,
+  IllegalCharacter(char),
+  UnrecognizedChar(char),
+  BadEscapedData(String),
+  UnrecognizedOctet(u32),
+  ParseIntError(num::ParseIntError),
+  UnclosedQuotedString,
+  EOF,
+}
+
+impl fmt::Display for LexerError {
+  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+    match *self {
+      LexerError::ParseUtf8Error(ref err) => err.fmt(f),
+      LexerError::EscapedCharOutsideCharData => write!(f, "Escaped character outside character data"),
+      LexerError::IllegalCharacter(ch) => write!(f, "Illegal input character: {}", ch),
+      LexerError::UnrecognizedChar(ch) => write!(f, "Did not recognize the input character: {}", ch),
+      LexerError::BadEscapedData(ref s) => write!(f, "Illegal input character: {}", s),
+      LexerError::UnrecognizedOctet(o) => write!(f, "Unrecognized octet: {}", o),
+      LexerError::ParseIntError(ref err) => err.fmt(f),
+      LexerError::UnclosedQuotedString => write!(f, "Unclosed quoted string"),
+      LexerError::EOF => write!(f, "End of input reached before next read could complete"),
+    }
+  }
+}
+
+impl Error for LexerError {
+  fn description(&self) -> &str {
+    match *self {
+      LexerError::ParseUtf8Error(ref err) => err.description(),
+      LexerError::EscapedCharOutsideCharData => "Escaped character outside character data",
+      LexerError::IllegalCharacter(..) => "Illegal character input",
+      LexerError::UnrecognizedChar(..) => "Unrecognized character input",
+      LexerError::BadEscapedData(..) => "Escaped data not recognized",
+      LexerError::UnrecognizedOctet(..) => "Unrecognized octet",
+      LexerError::ParseIntError(ref err) => err.description(),
+      LexerError::UnclosedQuotedString => "Unclosed quoted string",
+      LexerError::EOF => "End of input",
+    }
+  }
+
+  fn cause(&self) -> Option<&Error> {
+    match *self {
+      LexerError::ParseUtf8Error(ref err) => Some(err),
+      LexerError::ParseIntError(ref err) => Some(err),
+      _ => None,
+    }
+  }
+}
+
+impl From<FromUtf8Error> for LexerError {
+    fn from(err: FromUtf8Error) -> LexerError {
+        LexerError::ParseUtf8Error(err)
+    }
+}
+
+
+impl From<num::ParseIntError> for LexerError {
+    fn from(err: num::ParseIntError) -> LexerError {
+        LexerError::ParseIntError(err)
+    }
+}
--- a/src/error/mod.rs
+++ b/src/error/mod.rs
@ -13,15 +13,18 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
- 
+
 mod decode_error;
 mod encode_error;
 mod client_error;
+mod lexer_error;

 pub use self::decode_error::DecodeError;
 pub use self::encode_error::EncodeError;
 pub use self::client_error::ClientError;
+pub use self::lexer_error::LexerError;

 pub type DecodeResult<T> = Result<T, DecodeError>;
 pub type EncodeResult = Result<(), EncodeError>;
 pub type ClientResult<T> = Result<T, ClientError>;
+pub type LexerResult<T> = Result<T, LexerError>;
--- a/src/lib.rs
+++ b/src/lib.rs
@ -18,3 +18,6 @@ pub mod op;
 pub mod udp;
 pub mod error;
 pub mod serialize;
+
+#[macro_use] extern crate log;
+extern crate regex;
--- a/src/serialize/binary/encoder.rs
+++ b/src/serialize/binary/encoder.rs
@ -46,7 +46,7 @@ impl BinEncoder {
  /// matches description from above.
  ///
  /// ```
-  /// use trust_dns::serialize::binary::encoder::BinEncoder;
+  /// use trust_dns::serialize::binary::BinEncoder;
  ///
  /// let mut encoder: BinEncoder = BinEncoder::new();
  /// encoder.emit_character_data("abc");
--- a/src/serialize/binary/mod.rs
+++ b/src/serialize/binary/mod.rs
@ -13,8 +13,8 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-pub mod decoder;
-pub mod encoder;
+mod decoder;
+mod encoder;

 pub use self::decoder::BinDecoder;
 pub use self::encoder::BinEncoder;
--- a/src/serialize/mod.rs
+++ b/src/serialize/mod.rs
@ -14,3 +14,4 @@
 * limitations under the License.
 */
 pub mod binary;
+pub mod txt;
--- a/src/serialize/txt/decoder.rs
+++ b/src/serialize/txt/decoder.rs
@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2015 Benjamin Fry <benjaminfry@me.com>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+use ::error::{DecodeError, DecodeResult};
+use ::rr::record_type::RecordType;
+
+/// This is non-destructive to the inner buffer, b/c for pointer types we need to perform a reverse
+///  seek to lookup names
+///
+/// A note on serialization, there was a thought to have this implement the rustc-serialization,
+///  but given that this is such a small subset of all the serialization which that performs
+///  this is a simpler implementation without the cruft, at least for serializing to/from the
+///  binary DNS protocols. rustc-serialization will be used for other coms, e.g. json over http
+pub struct TxtDecoder {
+  buffer: Vec<u8>,
+  index: usize,
+  record_type: Option<RecordType>,
+  rdata_length: Option<u16>,
+}
--- a/src/serialize/txt/master_lex.rs
+++ b/src/serialize/txt/master_lex.rs
@ -0,0 +1,325 @@
+use std::cell::{Cell,RefCell};
+use std::iter::Peekable;
+use std::str::Chars;
+use std::char;
+
+use ::error::{LexerResult,LexerError};
+
+pub struct Lexer<'a> {
+  txt: Peekable<Chars<'a>>,
+}
+
+impl<'a> Lexer<'a> {
+  pub fn new(txt: &str) -> Lexer {
+    Self::with_chars(txt.chars())
+  }
+
+  pub fn with_chars(chars: Chars) -> Lexer {
+    Lexer { txt: chars.peekable() }
+  }
+
+  pub fn next_token(&mut self) -> LexerResult<Option<Token>> {
+    let mut cur_token: Cell<Option<State>> = Cell::new(None);
+    let mut cur_string: RefCell<Option<String>> = RefCell::new(None);
+
+    //while let Some(ch) = self.txt.by_ref().peekable().peek() {
+    'out: for i in 0..4096 { // max chars in a single lex, helps with issues in the lexer...
+      assert!(i < 4095);     // keeps the bounds of the loop defined (nothing lasts forever)
+
+      // This is to get around mutibility rules such that we can peek at the iter without moving next...
+      let ch: char = {
+         //let mut peekable = self.txt.by_ref().peekable();
+         let next_ch: Option<&char> = self.txt.peek();
+         if next_ch.is_some() { *next_ch.unwrap() } else { break 'out }
+      };
+
+      if let Some(t) = cur_token.get() {
+        if let State::Comment = t {
+          let ch = self.txt.next();
+          if ch.is_none() || ch.unwrap() == '\n' { return Ok(Some(Token::EOL)); } // special case for comments
+          else { continue 'out } // gobbling rest of line for comment
+        } else if let State::Quote = t {
+          match ch {
+            '"' => { cur_token.set(Some(State::Quoted)) ; break 'out },
+            '/' => try!(self.escape_seq().and_then(|ch|Ok(self.push(State::Quote, &cur_token, &cur_string, ch)))),
+            _ => self.push(State::Quote, &cur_token, &cur_string, ch),
+          }
+
+          continue 'out; // skipping rest of processing for quoted strings.
+        }
+      }
+      match ch {
+        ' '|'\t'|'\r' => {
+          match cur_token.get() {
+            None => {self.txt.next(); continue},  // gobble all whitespace
+            Some(..) => break, // end previous thing...
+          }
+        },
+        'a' ... 'z' | 'A' ... 'Z' | '-'                     => { self.push(State::CharData, &cur_token, &cur_string, ch); },
+        '0' ... '9'                                         => { self.push(State::Number, &cur_token, &cur_string, ch); },
+        '\u{E000}' ... '\u{10FFFF}' if ch.is_alphanumeric() => { self.push(State::CharData, &cur_token, &cur_string, ch); },
+        '\n' => { self.set_token_if_not(State::EOL, &cur_token);         break },
+        '@'  => { self.set_token_if_not(State::At, &cur_token);          break },
+        '$'  => if self.set_token_if_not(State::Dollar, &cur_token) { continue } else { break },
+        '('  => { self.set_token_if_not(State::LeftParen, &cur_token);   break },
+        ')'  => { self.set_token_if_not(State::RightParen, &cur_token);  break },
+        '"'  => if self.set_token_if_not(State::Quote, &cur_token) { continue } else { break },
+        ';'  => if self.set_token_if_not(State::Comment, &cur_token) { continue } else { break },
+        '.'  => { self.set_token_if_not(State::Dot, &cur_token) ; break },
+        '\\' => {
+          try!(self.escape_seq().and_then(|c|Ok(self.push(State::CharData, &cur_token, &cur_string, c))));
+
+          continue;
+        },
+         _ => return Err(LexerError::UnrecognizedChar(ch)),
+      }
+    }
+
+    // if the token is unset, then we are at end of stream, aka None
+    if cur_token.get().is_none() { return Ok(None); }
+    Token::from(cur_token.get().unwrap(), cur_string.into_inner())
+  }
+
+  fn escape_seq(&mut self) -> LexerResult<char> {
+    // escaped character, let's decode it.
+    self.txt.next(); // consume the escape
+    let ch = {
+      let ch_opt = self.txt.peek(); // the next character
+      if ch_opt.is_none() { return Err(LexerError::EOF) }
+      *ch_opt.unwrap()
+    };
+
+    if (!ch.is_control()) {
+      if (ch.is_numeric()) {
+        // in this case it's an excaped octal: \DDD
+        let d1 = self.txt.next(); // gobble
+        let d2 = self.txt.next(); // gobble
+        let d3 = try!(self.peek()); // peek b/c the push will advance
+
+        if d2.is_none() { return Err(LexerError::EOF) }
+
+        // let ddd: [u8; 3] = [d1.unwrap() as u8, d2.unwrap() as u8, *d3.unwrap() as u8];
+        // let ch: char = try!(u32::from_str_radix(&ddd.into(), 8)
+
+        let ddd: String = try!(String::from_utf8(vec![d1.unwrap() as u8, d2.unwrap() as u8, d3 as u8]));
+        let ch: char = try!(u32::from_str_radix(&ddd, 8)
+        .or(Err(LexerError::BadEscapedData(ddd)))
+        .and_then(|o|char::from_u32(o).ok_or(LexerError::UnrecognizedOctet(o))));
+        //let ch: char = try!(char::from_digit(try!(u32::from_str_radix(&ddd as &str, 8)), 8).ok_or(Err(LexerError::BadEscapedData(ddd)))); // octal parsing
+
+        return Ok(ch);
+      } else {
+        // this is an excaped char: \X
+        return Ok(ch);
+      }
+    } else {
+      return Err(LexerError::IllegalCharacter(ch));
+    }
+
+  }
+
+  fn peek(&mut self) -> LexerResult<char> {
+    let ch_opt = self.txt.peek(); // the next character
+    if ch_opt.is_none() { return Err(LexerError::EOF) }
+    Ok(*ch_opt.unwrap())
+  }
+
+  /// set's the token if it's not set, if it is succesul it advances the txt iter
+  fn set_token_if_not(&mut self, next_state: State, cur_token: &Cell<Option<State>>) -> bool {
+    if cur_token.get().is_none() {
+      cur_token.set(Some(next_state));
+      self.txt.next();
+      true
+    } else {
+      false
+    }
+  }
+
+  fn push(&mut self, next_state: State, cur_token: &Cell<Option<State>>, cell_string: &RefCell<Option<String>>, ch: char) {
+    if cur_token.get().is_none() {
+      cur_token.set(Some(next_state));
+    }
+
+    let mut cur_string = cell_string.borrow_mut();
+    if cur_string.is_none() { *cur_string = Some(String::new()); }
+    if let Some(s) = cur_string.as_mut() {
+      s.push(ch);
+    }
+
+    self.txt.next();
+  }
+}
+
+#[derive(Copy, Clone, PartialEq)]
+pub enum State {
+  Dot,               // .
+  LeftParen,         // (
+  RightParen,        // )
+  CharData,          // [a-zA-Z, non-control utf8]+
+  Comment,           // ;.*
+  At,                // @
+  Number,            // [0-9]+
+  Quote,             // ".*"
+  Quoted,            // finish the quoted sequence
+  Dollar,            // $
+  EOL,               // \n or \r\n
+}
+
+#[derive(PartialEq, Debug)]
+pub enum Token {
+  Dot,               // .
+  LeftParen,         // (
+  RightParen,        // )
+  CharData(String),  // [a-zA-Z, non-control utf8]+
+  At,                // @
+  Number(i32),       // [0-9]+
+  Quote(String),     // ".*"
+  Dollar(String),    // $
+  EOL,               // \n or \r\n
+}
+
+impl Token {
+  pub fn from(state: State, value: Option<String>) -> LexerResult<Option<Token>> {
+    Ok(Some(match state {
+      State::Dot => Token::Dot,
+      State::LeftParen => Token::LeftParen,
+      State::RightParen => Token::RightParen,
+      State::CharData => Token::CharData(value.unwrap()),
+      State::Comment => Token::EOL, // comments can't end a sequence, so must be EOF/EOL
+      State::At => Token::At,
+      State::Number => Token::Number(value.unwrap().parse().unwrap()),
+      State::Quote => return Err(LexerError::UnclosedQuotedString),
+      State::Quoted => Token::Quote(value.unwrap_or_default()),
+      State::Dollar => Token::Dollar(value.unwrap_or_default()),
+      State::EOL => Token::EOL,
+    }))
+  }
+}
+
+#[cfg(test)]
+mod lex_test {
+  use super::*;
+
+  // fun with tests!!! lots of options
+  #[test]
+  fn lex() {
+    assert_eq!(Lexer::new(".").next_token().unwrap().unwrap(), Token::Dot);
+    assert_eq!(Lexer::new("            .").next_token().unwrap().unwrap(), Token::Dot);
+    assert_eq!(Lexer::new("(").next_token().unwrap().unwrap(), Token::LeftParen);
+    assert_eq!(Lexer::new(")").next_token().unwrap().unwrap(), Token::RightParen);
+    assert_eq!(Lexer::new("abc").next_token().unwrap().unwrap(), Token::CharData("abc".to_string()));
+    assert_eq!(Lexer::new("abc.").next_token().unwrap().unwrap(), Token::CharData("abc".to_string()));
+    assert_eq!(Lexer::new("a\\A").next_token().unwrap().unwrap(), Token::CharData("aA".to_string()));
+    assert_eq!(Lexer::new("a\\$").next_token().unwrap().unwrap(), Token::CharData("a$".to_string()));
+    assert_eq!(Lexer::new("a\\077").next_token().unwrap().unwrap(), Token::CharData("a?".to_string()));
+    assert_eq!(Lexer::new(";abc").next_token().unwrap().unwrap(), Token::EOL);
+    assert_eq!(Lexer::new(";;@$-\"").next_token().unwrap().unwrap(), Token::EOL);
+    assert_eq!(Lexer::new("@").next_token().unwrap().unwrap(), Token::At);
+    assert_eq!(Lexer::new("123").next_token().unwrap().unwrap(), Token::Number(123));
+    assert_eq!(Lexer::new("\"Quoted\"").next_token().unwrap().unwrap(), Token::Quote("Quoted".to_string()));
+    assert_eq!(Lexer::new("\";@$\"").next_token().unwrap().unwrap(), Token::Quote(";@$".to_string()));
+    assert_eq!(Lexer::new("$Bill").next_token().unwrap().unwrap(), Token::Dollar("Bill".to_string()));
+    assert_eq!(Lexer::new("$$Bill").next_token().unwrap().unwrap(), Token::Dollar("".to_string()));
+    assert_eq!(Lexer::new("\n").next_token().unwrap().unwrap(), Token::EOL);
+    assert_eq!(Lexer::new("\r\n").next_token().unwrap().unwrap(), Token::EOL);
+
+    let mut lexer = Lexer::new("@   IN  SOA     VENERA      Action\\.domains (\n\
+                                 20     ; SERIAL\n\
+                                 7200   ; REFRESH\n\
+                                 600    ; RETRY\n\
+                                 3600000; EXPIRE\n\
+                                 60)    ; MINIMUM\n\
+\n\
+        NS      A.ISI.EDU.\n\
+        NS      VENERA\n\
+        NS      VAXA\n\
+        MX      10      VENERA\n\
+        MX      20      VAXA\n\
+\n\
+A       A       26.3.0.103\n\
+\n\
+VENERA  A       10.1.0.52\n\
+        A       128.9.0.32\n\
+\n\
+$INCLUDE \\<SUBSYS\\>ISI-MAILBOXES.TXT");
+
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::At);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("IN".to_string()));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("SOA".to_string()));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("VENERA".to_string()));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("Action.domains".to_string()));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::LeftParen);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Number(20));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Number(7200));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Number(600));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Number(3600000));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Number(60));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::RightParen);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("NS".to_string()));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("A".to_string()));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Dot);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("ISI".to_string()));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Dot);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("EDU".to_string()));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Dot);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("NS".to_string()));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("VENERA".to_string()));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("NS".to_string()));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("VAXA".to_string()));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("MX".to_string()));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Number(10));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("VENERA".to_string()));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("MX".to_string()));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Number(20));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("VAXA".to_string()));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("A".to_string()));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("A".to_string()));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Number(26));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Dot);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Number(3));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Dot);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Number(0));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Dot);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Number(103));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("VENERA".to_string()));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("A".to_string()));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Number(10));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Dot);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Number(1));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Dot);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Number(0));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Dot);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Number(52));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("A".to_string()));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Number(128));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Dot);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Number(9));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Dot);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Number(0));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Dot);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Number(32));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::EOL);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Dollar("INCLUDE".to_string()));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("<SUBSYS>ISI-MAILBOXES".to_string()));
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::Dot);
+    assert_eq!(lexer.next_token().unwrap().unwrap(), Token::CharData("TXT".to_string()));
+    assert!(lexer.next_token().unwrap().is_none());
+  }
+}
--- a/src/serialize/txt/mod.rs
+++ b/src/serialize/txt/mod.rs
@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2015 Benjamin Fry <benjaminfry@me.com>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// mod master;
+mod master_lex;
+mod decoder;
+//mod encoder;
+
+
+
+pub use self::decoder::TxtDecoder;
+//pub use self::encoder::TxtEncoder;
+
+//#[cfg(test)]
+//pub mod txt_tests;
+
+//use ::error::*;
+
+// pub trait TxtSerializable {
+//   fn read(decoder: &mut TxtDecoder) -> DecodeResult<Self>;
+//   fn emit(&self, encoder: &mut TxtEncoder) -> EncodeResult;
+// }