proto: handle includes in the parser

This commit is contained in:
Dirkjan Ochtman 2023-08-03 14:02:46 +02:00 committed by Benjamin Fry
parent 4b334aee36
commit 77e06525c1
6 changed files with 229 additions and 253 deletions

View File

@ -335,7 +335,7 @@ mod tests {
// TODO: make Parser return an iterator over all records in a stream.
fn parse_record<D: RecordData>(txt: &str) -> D {
let lex = Lexer::new(txt);
let records = Parser::new(lex, Some(Name::root()))
let records = Parser::new(lex, None, Some(Name::root()))
.parse()
.expect("failed to parse record")
.1;

View File

@ -5,7 +5,12 @@
// http://opensource.org/licenses/MIT>, at your option. This file may not be
// copied, modified, or distributed except according to those terms.
use std::{collections::BTreeMap, str::FromStr};
use std::{
collections::BTreeMap,
fs, mem,
path::{Path, PathBuf},
str::FromStr,
};
use crate::{
rr::{DNSClass, LowerName, Name, RData, Record, RecordSet, RecordType, RrKey},
@ -117,14 +122,20 @@ use crate::{
/// the line is ignored.
/// ```
pub struct Parser<'a> {
lexer: Lexer<'a>,
lexers: Vec<(Lexer<'a>, Option<PathBuf>)>,
origin: Option<Name>,
}
impl<'a> Parser<'a> {
/// Returns a new Zone file parser
pub fn new(lexer: Lexer<'a>, origin: Option<Name>) -> Self {
Self { lexer, origin }
///
/// The `path` argument's parent directory is used to resolve relative `$INCLUDE` paths.
/// Relative `$INCLUDE` paths will yield an error if `path` is `None`.
pub fn new(lexer: Lexer<'a>, path: Option<PathBuf>, origin: Option<Name>) -> Self {
Self {
lexers: vec![(lexer, path)],
origin,
}
}
/// Parse a file from the Lexer
@ -132,146 +143,196 @@ impl<'a> Parser<'a> {
/// # Return
///
/// A pair of the Zone origin name and a map of all Keys to RecordSets
pub fn parse(self) -> ParseResult<(Name, BTreeMap<RrKey, RecordSet>)> {
let Self {
mut lexer,
mut origin,
} = self;
pub fn parse(mut self) -> ParseResult<(Name, BTreeMap<RrKey, RecordSet>)> {
let mut origin = self.origin;
let mut records: BTreeMap<RrKey, RecordSet> = BTreeMap::new();
let mut class: DNSClass = DNSClass::IN;
let mut current_name: Option<Name> = None;
let mut rtype: Option<RecordType> = None;
let mut ttl: Option<u32> = None;
let mut state = State::StartLine;
let mut stack = self.lexers.len();
while let Some(t) = lexer.next_token()? {
state = match state {
State::StartLine => {
// current_name is not reset on the next line b/c it might be needed from the previous
rtype = None;
'outer: while let Some((lexer, path)) = self.lexers.last_mut() {
while let Some(t) = lexer.next_token()? {
state = match state {
State::StartLine => {
// current_name is not reset on the next line b/c it might be needed from the previous
rtype = None;
match t {
// if Dollar, then $INCLUDE or $ORIGIN
Token::Include => {
return Err(ParseError::from(ParseErrorKind::Message("The parser does not support $INCLUDE. Consider inlining file before parsing")))
},
Token::Origin => State::Origin,
Token::Ttl => State::Ttl,
match t {
// if Dollar, then $INCLUDE or $ORIGIN
Token::Include => State::Include(None),
Token::Origin => State::Origin,
Token::Ttl => State::Ttl,
// if CharData, then Name then ttl_class_type
Token::CharData(data) => {
current_name = Some(Name::parse(&data, origin.as_ref())?);
State::TtlClassType
// if CharData, then Name then ttl_class_type
Token::CharData(data) => {
current_name = Some(Name::parse(&data, origin.as_ref())?);
State::TtlClassType
}
// @ is a placeholder for specifying the current origin
Token::At => {
current_name = origin.clone(); // TODO a COW or RC would reduce copies...
State::TtlClassType
}
// if blank, then nothing or ttl_class_type
Token::Blank => State::TtlClassType,
Token::EOL => State::StartLine, // probably a comment
_ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
}
// @ is a placeholder for specifying the current origin
Token::At => {
current_name = origin.clone(); // TODO a COW or RC would reduce copies...
State::TtlClassType
}
// if blank, then nothing or ttl_class_type
Token::Blank => State::TtlClassType,
Token::EOL => State::StartLine, // probably a comment
_ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
}
}
State::Ttl => match t {
Token::CharData(data) => {
ttl = Some(Self::parse_time(&data)?);
State::StartLine
}
_ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
},
State::Origin => {
match t {
State::Ttl => match t {
Token::CharData(data) => {
// TODO an origin was specified, should this be legal? definitely confusing...
origin = Some(Name::parse(&data, None)?);
ttl = Some(Self::parse_time(&data)?);
State::StartLine
}
_ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
},
State::Origin => {
match t {
Token::CharData(data) => {
// TODO an origin was specified, should this be legal? definitely confusing...
origin = Some(Name::parse(&data, None)?);
State::StartLine
}
_ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
}
}
}
State::Include => return Err(ParseError::from(ParseErrorKind::Message(
"The parser does not support $INCLUDE. Consider inlining file before parsing",
))),
State::TtlClassType => {
match t {
// if number, TTL
// Token::Number(ref num) => ttl = Some(*num),
// One of Class or Type (these cannot be overlapping!)
Token::CharData(mut data) => {
// if it's a number it's a ttl
let result: ParseResult<u32> = Self::parse_time(&data);
if result.is_ok() {
ttl = result.ok();
State::TtlClassType // hm, should this go to just ClassType?
} else {
// if can parse DNSClass, then class
data.make_ascii_uppercase();
let result = DNSClass::from_str(&data);
if let Ok(parsed) = result {
class = parsed;
State::TtlClassType
State::Include(include_path) => match (t, include_path) {
(Token::CharData(data), None) => State::Include(Some(data)),
(Token::EOL, Some(include_path)) => {
// RFC1035 (section 5) does not specify how filename for $INCLUDE
// should be resolved into file path. The underlying code implements the
// following:
// * if the path is absolute (relies on Path::is_absolute), it uses normalized path
// * otherwise, it joins the path with parent root of the current file
//
// TODO: Inlining files specified using non-relative path might potentially introduce
// security issue in some cases (e.g. when working with zone files from untrusted sources)
// and should probably be configurable by user.
if stack > MAX_INCLUDE_LEVEL {
return Err(ParseErrorKind::Message(
"Max depth level for nested $INCLUDE is reached",
)
.into());
}
let include = Path::new(&include_path);
let include = match (include.is_absolute(), path) {
(true, _) => include.to_path_buf(),
(false, Some(path)) => path
.parent()
.expect("file has to have parent folder")
.join(include),
(false, None) => {
return Err(ParseErrorKind::Message(
"Relative $INCLUDE is not supported",
)
.into());
}
};
let input = fs::read_to_string(&include)?;
let lexer = Lexer::new(input);
self.lexers.push((lexer, Some(include)));
stack += 1;
state = State::StartLine;
continue 'outer;
}
(Token::CharData(_), Some(_)) => {
return Err(ParseErrorKind::Message(
"Domain name for $INCLUDE is not supported",
)
.into());
}
(t, _) => {
return Err(ParseErrorKind::UnexpectedToken(t).into());
}
},
State::TtlClassType => {
match t {
// if number, TTL
// Token::Number(ref num) => ttl = Some(*num),
// One of Class or Type (these cannot be overlapping!)
Token::CharData(mut data) => {
// if it's a number it's a ttl
let result: ParseResult<u32> = Self::parse_time(&data);
if result.is_ok() {
ttl = result.ok();
State::TtlClassType // hm, should this go to just ClassType?
} else {
// if can parse RecordType, then RecordType
rtype = Some(RecordType::from_str(&data)?);
State::Record(vec![])
// if can parse DNSClass, then class
data.make_ascii_uppercase();
let result = DNSClass::from_str(&data);
if let Ok(parsed) = result {
class = parsed;
State::TtlClassType
} else {
// if can parse RecordType, then RecordType
rtype = Some(RecordType::from_str(&data)?);
State::Record(vec![])
}
}
}
// could be nothing if started with blank and is a comment, i.e. EOL
Token::EOL => {
State::StartLine // next line
}
_ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
}
// could be nothing if started with blank and is a comment, i.e. EOL
Token::EOL => {
State::StartLine // next line
}
_ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
}
}
State::Record(record_parts) => {
// b/c of ownership rules, perhaps, just collect all the RData components as a list of
// tokens to pass into the processor
match t {
Token::EOL => {
Self::flush_record(
record_parts,
&origin,
&current_name,
rtype,
&mut ttl,
class,
&mut records,
)?;
State::StartLine
State::Record(record_parts) => {
// b/c of ownership rules, perhaps, just collect all the RData components as a list of
// tokens to pass into the processor
match t {
Token::EOL => {
Self::flush_record(
record_parts,
&origin,
&current_name,
rtype,
&mut ttl,
class,
&mut records,
)?;
State::StartLine
}
Token::CharData(part) => {
let mut record_parts = record_parts;
record_parts.push(part);
State::Record(record_parts)
}
// TODO: we should not tokenize the list...
Token::List(list) => {
let mut record_parts = record_parts;
record_parts.extend(list);
State::Record(record_parts)
}
_ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
}
Token::CharData(part) => {
let mut record_parts = record_parts;
record_parts.push(part);
State::Record(record_parts)
}
// TODO: we should not tokenize the list...
Token::List(list) => {
let mut record_parts = record_parts;
record_parts.extend(list);
State::Record(record_parts)
}
_ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
}
}
};
}
}
//Extra flush at the end for the case of missing endline
if let State::Record(record_parts) = state {
Self::flush_record(
record_parts,
&origin,
&current_name,
rtype,
&mut ttl,
class,
&mut records,
)?;
// Extra flush at the end for the case of missing endline
if let State::Record(record_parts) = mem::replace(&mut state, State::StartLine) {
Self::flush_record(
record_parts,
&origin,
&current_name,
rtype,
&mut ttl,
class,
&mut records,
)?;
}
stack -= 1;
self.lexers.pop();
}
//
@ -455,10 +516,13 @@ enum State {
TtlClassType, // [<TTL>] [<class>] <type>,
Ttl, // $TTL <time>
Record(Vec<String>),
Include, // $INCLUDE <filename>
Include(Option<String>), // $INCLUDE <filename>
Origin,
}
/// Max traversal depth for $INCLUDE files
const MAX_INCLUDE_LEVEL: usize = 256;
#[cfg(test)]
mod tests {
use super::*;
@ -473,7 +537,7 @@ mod tests {
"#;
let lexer = Lexer::new(zone_data);
let result = Parser::new(lexer, Some(domain)).parse();
let result = Parser::new(lexer, None, Some(domain)).parse();
assert!(
result.is_err()
& result

View File

@ -5,23 +5,26 @@
// http://opensource.org/licenses/MIT>, at your option. This file may not be
// copied, modified, or distributed except according to those terms.
use std::char;
use std::iter::Peekable;
use std::str::Chars;
use std::borrow::Cow;
use std::{char, iter::Peekable};
use crate::serialize::txt::errors::{LexerError, LexerErrorKind, LexerResult};
/// A Lexer for Zone files
pub struct Lexer<'a> {
txt: Peekable<Chars<'a>>,
txt: Peekable<CowChars<'a>>,
state: State,
}
impl<'a> Lexer<'a> {
/// Creates a new lexer with the given data to parse
pub fn new(txt: &str) -> Lexer<'_> {
pub fn new(txt: impl Into<Cow<'a, str>>) -> Lexer<'a> {
Lexer {
txt: txt.chars().peekable(),
txt: CowChars {
data: txt.into(),
offset: 0,
}
.peekable(),
state: State::StartLine,
}
}
@ -327,7 +330,27 @@ impl<'a> Lexer<'a> {
}
fn peek(&mut self) -> Option<char> {
self.txt.peek().cloned()
self.txt.peek().copied()
}
}
struct CowChars<'a> {
data: Cow<'a, str>,
offset: usize,
}
impl<'a> Iterator for CowChars<'a> {
type Item = char;
fn next(&mut self) -> Option<char> {
let mut iter = self.data[self.offset..].char_indices();
let (_, ch) = iter.next()?; // The returned index is always `0`
match iter.next() {
Some((idx, _)) => self.offset += idx,
None => self.offset = self.data.len(),
}
Some(ch)
}
}

View File

@ -9,8 +9,7 @@
use std::{
collections::BTreeMap,
fs::File,
io::{BufRead, BufReader},
fs,
ops::{Deref, DerefMut},
path::{Path, PathBuf},
};
@ -25,7 +24,7 @@ use crate::{
use crate::{
authority::{Authority, LookupError, LookupOptions, MessageRequest, UpdateResult, ZoneType},
proto::rr::{LowerName, Name, RecordSet, RecordType, RrKey},
proto::serialize::txt::{Lexer, Parser, Token},
proto::serialize::txt::{Lexer, Parser},
server::RequestInfo,
store::{file::FileConfig, in_memory::InMemoryAuthority},
};
@ -36,36 +35,6 @@ use crate::{
/// start of authority for the zone, is a Secondary, or a cached zone.
pub struct FileAuthority(InMemoryAuthority);
/// Max traversal depth for $INCLUDE files
const MAX_INCLUDE_LEVEL: u16 = 256;
/// Inner state of zone file loader, tracks depth of $INCLUDE
/// loads as well as visited previously files, so the loader
/// is able to abort e.g. when cycle is detected
///
/// Note, that tracking max depth level explicitly covers also
/// cycles in $INCLUDEs. The error description in this case would
/// not be very helpful to detect the root cause of the problem
/// though. The way to improve diagnose experience would be to
/// traverse $INCLUDE files in topologically sorted order which
/// requires quite some re-arrangements in the code and in the
/// way loader is currently implemented.
struct FileReaderState {
level: u16,
}
impl FileReaderState {
fn new() -> Self {
Self { level: 0 }
}
fn next_level(&self) -> Self {
Self {
level: self.level + 1,
}
}
}
impl FileAuthority {
/// Creates a new Authority.
///
@ -91,85 +60,6 @@ impl FileAuthority {
InMemoryAuthority::new(origin, records, zone_type, allow_axfr).map(Self)
}
/// Read given file line by line and recursively invokes reader for
/// $INCLUDE directives
///
/// TODO: it looks hacky as far we effectively duplicate parser's functionality
/// (at least partially) and performing lexing twice.
/// Better solution requires us to change lexer to deal
/// with Lines-like iterator instead of String buf (or capability to combine a few
/// lexer instances into a single lexer).
///
/// TODO: $INCLUDE could specify domain name -- to support on-flight swap for Origin
/// value we definitely need to rethink and rework loader/parser/lexer
fn read_file(
zone_path: PathBuf,
buf: &mut String,
state: FileReaderState,
) -> Result<(), String> {
let file = File::open(&zone_path)
.map_err(|e| format!("failed to read {}: {:?}", zone_path.display(), e))?;
let reader = BufReader::new(file);
for line in reader.lines() {
let content = line.map_err(|err| format!("failed to read line: {err:?}"))?;
let mut lexer = Lexer::new(&content);
match (lexer.next_token(), lexer.next_token(), lexer.next_token()) {
(
Ok(Some(Token::Include)),
Ok(Some(Token::CharData(include_path))),
Ok(Some(Token::CharData(_domain))),
) => {
return Err(format!(
"Domain name for $INCLUDE is not supported at {}, trying to include {}",
zone_path.display(),
include_path
));
}
(Ok(Some(Token::Include)), Ok(Some(Token::CharData(include_path))), _) => {
// RFC1035 (section 5) does not specify how filename for $INCLUDE
// should be resolved into file path. The underlying code implements the
// following:
// * if the path is absolute (relies on Path::is_absolute), it uses normalized path
// * otherwise, it joins the path with parent root of the current file
//
// TODO: Inlining files specified using non-relative path might potentially introduce
// security issue in some cases (e.g. when working with zone files from untrusted sources)
// and should probably be configurable by user.
let include_path = Path::new(&include_path);
let include_zone_path = if include_path.is_absolute() {
include_path.to_path_buf()
} else {
let parent_dir =
zone_path.parent().expect("file has to have parent folder");
parent_dir.join(include_path)
};
if state.level >= MAX_INCLUDE_LEVEL {
return Err(format!("Max depth level for nested $INCLUDE is reached at {}, trying to include {}", zone_path.display(), include_zone_path.display()));
}
let mut include_buf = String::new();
info!(
"including file {} into {}",
include_zone_path.display(),
zone_path.display()
);
Self::read_file(include_zone_path, &mut include_buf, state.next_level())?;
buf.push_str(&include_buf);
}
_ => {
buf.push_str(&content);
}
}
buf.push('\n');
}
Ok(())
}
/// Read the Authority for the origin from the specified configuration
pub fn try_from_config(
origin: Name,
@ -183,15 +73,13 @@ impl FileAuthority {
info!("loading zone file: {:?}", zone_path);
let mut buf = String::new();
// TODO: this should really use something to read line by line or some other method to
// keep the usage down. and be a custom lexer...
Self::read_file(zone_path, &mut buf, FileReaderState::new())
let buf = fs::read_to_string(&zone_path)
.map_err(|e| format!("failed to read {}: {:?}", &config.zone_file_path, e))?;
let lexer = Lexer::new(&buf);
let (origin, records) = Parser::new(lexer, Some(origin))
let lexer = Lexer::new(buf);
let (origin, records) = Parser::new(lexer, Some(zone_path), Some(origin))
.parse()
.map_err(|e| format!("failed to parse {}: {:?}", config.zone_file_path, e))?;

View File

@ -44,8 +44,9 @@ impl RecursiveConfig {
let mut roots_str = String::new();
roots.read_to_string(&mut roots_str)?;
let lexer = Lexer::new(&roots_str);
let (_zone, roots_zone) = Parser::new(lexer, Some(Name::root())).parse()?;
let lexer = Lexer::new(roots_str);
let (_zone, roots_zone) =
Parser::new(lexer, Some(path.into_owned()), Some(Name::root())).parse()?;
// TODO: we may want to deny some of the root nameservers, for reasons...
Ok(roots_zone

View File

@ -57,7 +57,7 @@ tech. 3600 in soa ns0.centralnic.net. hostmaster.centralnic.ne
"#,
);
let records = Parser::new(lexer, Some(Name::from_str("isi.edu").unwrap())).parse();
let records = Parser::new(lexer, None, Some(Name::from_str("isi.edu").unwrap())).parse();
if records.is_err() {
panic!("failed to parse: {:?}", records.err())
}
@ -420,7 +420,7 @@ a A 127.0.0.1
",
);
let records = Parser::new(lexer, Some(Name::from_str("isi.edu").unwrap())).parse();
let records = Parser::new(lexer, None, Some(Name::from_str("isi.edu").unwrap())).parse();
if records.is_err() {
panic!("failed to parse: {:?}", records.err())
@ -448,7 +448,7 @@ b A 127.0.0.2
",
);
let records = Parser::new(lexer, Some(Name::from_str("isi.edu").unwrap())).parse();
let records = Parser::new(lexer, None, Some(Name::from_str("isi.edu").unwrap())).parse();
if records.is_err() {
panic!("failed to parse: {:?}", records.err())
@ -475,7 +475,7 @@ a A 127.0.0.1
",
);
let records = Parser::new(lexer, Some(Name::from_str("isi.edu").unwrap())).parse();
let records = Parser::new(lexer, None, Some(Name::from_str("isi.edu").unwrap())).parse();
if records.is_err() {
panic!("failed to parse: {:?}", records.err())
@ -494,7 +494,7 @@ fn test_named_root() {
"###,
);
let records = Parser::new(lexer, Some(Name::root())).parse();
let records = Parser::new(lexer, None, Some(Name::root())).parse();
if records.is_err() {
panic!("failed to parse: {:?}", records.err())