// Copyright (c) 2021 Soni L.
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
// Documentation and comments licensed under CC BY-SA 4.0.
//! String Cursor (sorta).
use ::std::io::Cursor;
use ::std::str::FromStr;
use crate::error::ReadError;
/// Extension trait on [`Cursor`]s to help with command parsing.
///
/// All `read_*` methods reset the cursor on error.
///
/// Note that, compared to Brigadier, this lacks methods such as
/// `getRemainingLength` (use `get_remaining().len()` or
/// `remaining_slice().len()`) and `getTotalLength` (use `get_ref().len()`).
pub trait StringReader<'a>: Sized {
/// Returns the total length of the string.
//#[inline]
fn total_len(&self) -> usize;
/// Returns the part of the string that has been read so far.
///
/// # Panics
///
/// Panics if this cursor is not on an UTF-8 character boundary.
//#[inline]
fn get_read(&self) -> &'a str;
/// Returns the part of the string that has yet to be read.
///
/// # Panics
///
/// Panics if this cursor is not on an UTF-8 character boundary.
//#[inline]
fn get_remaining(&self) -> &'a str;
/// Returns whether there's anything left to read.
#[inline]
fn can_read(&self) -> bool {
self.can_read_n(1)
}
/// Returns whether there's enough left to read, based on the passed length.
//#[inline]
fn can_read_n(&self, len: usize) -> bool;
/// Returns the next char.
///
/// # Panics
///
/// Panics if there's nothing left to read, or if this cursor is not on an
/// UTF-8 character boundary.
#[inline]
fn peek(&self) -> char {
self.peek_n(0)
}
/// Returns the next nth **byte** (and, if needed, subsequent bytes) as a
/// char.
///
/// # Panics
///
/// Panics if the offset is beyond the boundaries of the buffer, or if the
/// offset is not on an UTF-8 character boundary.
//#[inline]
fn peek_n(&self, offset: usize) -> char;
/// Advances to the next char.
///
/// # Panics
///
/// Panics if there's nothing left to read, or if this cursor is not on an
/// UTF-8 character boundary.
//#[inline]
fn skip(&mut self);
/// Attempts to read the next char.
///
/// # Panics
///
/// Panics if this cursor is not on an UTF-8 character boundary.
//#[inline]
fn read_char(&mut self) -> Option<char>;
/// Checks the next char.
///
/// # Panics
///
/// Panics if this cursor is not on an UTF-8 character boundary.
fn expect<E: ReadError<'a, Self>>(&mut self, c: char) -> Result<(), E> {
if !self.can_read() || self.peek() != c {
// because we want the error constructors to take &str.
let mut buf = [0u8; 4];
Err(E::expected_symbol(self, c.encode_utf8(&mut buf)))
} else {
Ok(self.skip())
}
}
/// Skips whitespace.
///
/// # Panics
///
/// Panics if this cursor is not on an UTF-8 character boundary.
fn skip_whitespace(&mut self) {
// FIXME figure out if we wanna use the same whitespace rules as
// brigadier, because rust uses unicode whereas java uses java rules.
while self.can_read() && self.peek().is_whitespace() {
self.skip();
}
}
/// Reads an integer.
///
/// # Panics
///
/// Panics if this cursor is not on an UTF-8 character boundary.
fn read_integer<T, E: ReadError<'a, Self>>(&mut self) -> Result<T, E>
where T: FromStr<Err=::std::num::ParseIntError>;
/// Reads a float.
///
/// # Panics
///
/// Panics if this cursor is not on an UTF-8 character boundary.
fn read_float<T, E: ReadError<'a, Self>>(&mut self) -> Result<T, E>
where T: FromStr<Err=::std::num::ParseFloatError>;
/// Reads a bool.
///
/// # Panics
///
/// Panics if this cursor is not on an UTF-8 character boundary.
fn read_bool<E: ReadError<'a, Self>>(&mut self) -> Result<bool, E>;
/// Reads an unquoted string.
///
/// # Panics
///
/// Panics if this cursor is not on an UTF-8 character boundary.
// this is a bit of a weird one in that it can't error.
fn read_unquoted_str(&mut self) -> &'a str;
/// Reads a quoted string.
///
/// # Panics
///
/// Panics if this cursor is not on an UTF-8 character boundary.
fn read_quoted_string<E: ReadError<'a, Self>>(
&mut self,
) -> Result<String, E>;
/// Reads a quoted or an unquoted string.
///
/// # Panics
///
/// Panics if this cursor is not on an UTF-8 character boundary.
fn read_string<E: ReadError<'a, Self>>(&mut self) -> Result<String, E>;
}
impl<'a> StringReader<'a> for Cursor<&'a str> {
#[inline]
fn total_len(&self) -> usize {
self.get_ref().len()
}
#[inline]
fn get_read(&self) -> &'a str {
&self.get_ref()[..(self.position() as usize)]
}
#[inline]
fn get_remaining(&self) -> &'a str {
&self.get_ref()[(self.position() as usize)..]
}
#[inline]
fn can_read_n(&self, len: usize) -> bool {
// NOTE: NOT overflow-aware!
self.position() as usize + len <= self.total_len()
}
#[inline]
fn peek_n(&self, offset: usize) -> char {
// NOTE: NOT overflow-aware!
self.get_ref()[(self.position() as usize + offset)..]
.chars().next().unwrap()
}
#[inline]
fn skip(&mut self) {
self.set_position(self.position() + self.peek().len_utf8() as u64);
}
#[inline]
fn read_char(&mut self) -> Option<char> {
let res = self.get_ref()[(self.position() as usize)..].chars().next();
if let Some(c) = res {
self.set_position(self.position() + c.len_utf8() as u64);
}
res
}
fn read_integer<T, E: ReadError<'a, Self>>(&mut self) -> Result<T, E>
where T: FromStr<Err=::std::num::ParseIntError> {
// see read_unquoted_str for rationale
let start = self.position() as usize;
let total = self.total_len();
let end = total - {
self.get_remaining().trim_start_matches(number_chars).len()
};
self.set_position(end as u64);
let number = &self.get_ref()[start..end];
if number.is_empty() {
// don't need to set_position here, we haven't moved
Err(E::expected_integer(self))
} else {
number.parse().map_err(|_| {
self.set_position(start as u64);
E::invalid_integer(self, number)
})
}
}
fn read_float<T, E: ReadError<'a, Self>>(&mut self) -> Result<T, E>
where T: FromStr<Err=::std::num::ParseFloatError> {
// see read_unquoted_str for rationale
let start = self.position() as usize;
let total = self.total_len();
let end = total - {
self.get_remaining().trim_start_matches(number_chars).len()
};
self.set_position(end as u64);
let number = &self.get_ref()[start..end];
if number.is_empty() {
// don't need to set_position here, we haven't moved
Err(E::expected_float(self))
} else {
number.parse().map_err(|_| {
self.set_position(start as u64);
E::invalid_float(self, number)
})
}
}
fn read_bool<E: ReadError<'a, Self>>(&mut self) -> Result<bool, E> {
let pos = self.position();
// NOTE: brigadier also allows quoted strings for bools.
// we consider that a bug, so we don't.
let res = match self.read_unquoted_str() {
"true" => Ok(true),
"false" => Ok(false),
"" => Err(E::expected_bool(self)),
value => {
self.set_position(pos);
Err(E::invalid_bool(self, value))
},
};
res
}
fn read_unquoted_str(&mut self) -> &'a str {
// there's no easy way to grab start matches, so we have to do something
// a bit more involved.
let start = self.position() as usize;
let total = self.total_len();
let end = total - {
self.get_remaining().trim_start_matches(unquoted_chars).len()
};
self.set_position(end as u64);
&self.get_ref()[start..end]
}
fn read_quoted_string<E: ReadError<'a, Self>>(
&mut self,
) -> Result<String, E> {
if !self.can_read() {
Ok("".into())
} else if quote_chars(self.peek()) {
let start = self.position() as usize;
let terminator = self.read_char().unwrap();
let res = read_string_until(self, terminator);
if res.is_err() {
self.set_position(start as u64);
}
res
} else {
Err(E::expected_start_of_quote(self))
}
}
fn read_string<E: ReadError<'a, Self>>(&mut self) -> Result<String, E> {
if !self.can_read() {
Ok("".into())
} else if quote_chars(self.peek()) {
let start = self.position() as usize;
let terminator = self.read_char().unwrap();
let res = read_string_until(self, terminator);
if res.is_err() {
self.set_position(start as u64);
}
res
} else {
Ok(self.read_unquoted_str().into())
}
}
}
fn read_string_until<'a, E: ReadError<'a, Cursor<&'a str>>>(
this: &mut Cursor<&'a str>,
terminator: char,
) -> Result<String, E> {
let mut result = String::new();
let mut escaped = false;
while let Some(c) = this.read_char() {
if escaped {
if c == terminator || escape_char(c) {
result.push(c);
escaped = false;
} else {
let mut buf = [0u8; 4];
// NOTE: brigadier unskips the escape. we don't bother.
return Err(E::invalid_escape(this, c.encode_utf8(&mut buf)));
}
} else if escape_char(c) {
escaped = true;
} else if c == terminator {
return Ok(result);
} else {
result.push(c);
}
}
Err(E::expected_end_of_quote(this))
}
/// Symbols allowed in unquoted strings.
#[inline]
fn unquoted_chars(c: char) -> bool {
matches!(
c,
'0' ..= '9' | 'A' ..= 'Z' | 'a' ..= 'z' | '_' | '-' | '.' | '+',
)
}
/// Symbols allowed in numbers.
#[inline]
fn number_chars(c: char) -> bool {
matches!(
c,
'0' ..= '9' | '-' | '.',
)
}
/// Symbols allowed to start/end a quoted string.
#[inline]
fn quote_chars(c: char) -> bool {
matches!(
c,
'"' | '\'',
)
}
/// Symbol allowed to escape other symbols.
#[inline]
fn escape_char(c: char) -> bool {
matches!(
c,
'\\',
)
}