From d4724b4734776d32fb86cd3c932e18fc41b68316 Mon Sep 17 00:00:00 2001 From: SoniEx2 Date: Thu, 11 Nov 2021 20:29:55 -0300 Subject: Start porting Brigadier to Rust --- src/strcursor.rs | 368 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 368 insertions(+) create mode 100644 src/strcursor.rs (limited to 'src/strcursor.rs') diff --git a/src/strcursor.rs b/src/strcursor.rs new file mode 100644 index 0000000..f8b95c6 --- /dev/null +++ b/src/strcursor.rs @@ -0,0 +1,368 @@ +// Copyright (c) 2021 Soni L. +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +//! String Cursor (sorta). + +use ::std::io::Cursor; +use ::std::str::FromStr; + +/// Built-in `StringReader` errors. +pub trait ReadError<'a, C: StringReader<'a>>: Sized + std::error::Error { + /// Creates an error that indicates an invalid integer was found. + fn invalid_integer(context: &C, from: &str) -> Self; + /// Creates an error that indicates an integer was expected. + fn expected_integer(context: &C) -> Self; + /// Creates an error that indicates an invalid float was found. + fn invalid_float(context: &C, from: &str) -> Self; + /// Creates an error that indicates a float was expected. + fn expected_float(context: &C) -> Self; + /// Creates an error that indicates an invalid bool was found. + fn invalid_bool(context: &C, from: &str) -> Self; + /// Creates an error that indicates a bool was expected. + fn expected_bool(context: &C) -> Self; + /// Creates an error that indicates the start of a quote was expected. + fn expected_start_of_quote(context: &C) -> Self; + /// Creates an error that indicates the end of a quote was expected. + fn expected_end_of_quote(context: &C) -> Self; + /// Creates an error that indicates an invalid escape was found. + fn invalid_escape(context: &C, from: &str) -> Self; + /// Creates an error that indicates a symbol was expected. + fn expected_symbol(context: &C, from: &str) -> Self; +} + +/// Extension trait on [`Cursor`]s to help with command parsing. +/// +/// All `read_*` methods reset the cursor on error. +/// +/// Note that, compared to Brigadier, this lacks methods such as +/// `getRemainingLength` (use `get_remaining().len()` or +/// `remaining_slice().len()`) and `getTotalLength` (use `get_ref().len()`). +pub trait StringReader<'a>: Sized { + /// Returns the part of the string that has been read so far. + /// + /// # Panics + /// + /// Panics if this cursor is not on an UTF-8 character boundary. + //#[inline] + fn get_read(&self) -> &'a str; + /// Returns the part of the string that has yet to be read. + /// + /// # Panics + /// + /// Panics if this cursor is not on an UTF-8 character boundary. + //#[inline] + fn get_remaining(&self) -> &'a str; + /// Returns whether there's anything left to read. + #[inline] + fn can_read(&self) -> bool { + self.can_read_n(1) + } + /// Returns whether there's enough left to read, based on the passed length. + //#[inline] + fn can_read_n(&self, len: usize) -> bool; + /// Returns the next char. + /// + /// # Panics + /// + /// Panics if there's nothing left to read, or if this cursor is not on an + /// UTF-8 character boundary. + #[inline] + fn peek(&self) -> char { + self.peek_n(0) + } + /// Returns the next nth **byte** (and, if needed, subsequent bytes) as a + /// char. + /// + /// # Panics + /// + /// Panics if the offset is beyond the boundaries of the buffer, or if the + /// offset is not on an UTF-8 character boundary. + //#[inline] + fn peek_n(&self, offset: usize) -> char; + + /// Advances to the next char. + /// + /// # Panics + /// + /// Panics if there's nothing left to read, or if this cursor is not on an + /// UTF-8 character boundary. + //#[inline] + fn skip(&mut self); + /// Attempts to read the next char. + /// + /// # Panics + /// + /// Panics if this cursor is not on an UTF-8 character boundary. + //#[inline] + fn read_char(&mut self) -> Option; + /// Checks the next char. + /// + /// # Panics + /// + /// Panics if this cursor is not on an UTF-8 character boundary. + fn expect>(&mut self, c: char) -> Result<(), E> { + if !self.can_read() || self.peek() != c { + // because we want the error constructors to take &str. + let mut buf = [0u8; 4]; + Err(E::expected_symbol(self, c.encode_utf8(&mut buf))) + } else { + Ok(self.skip()) + } + } + /// Skips whitespace. + /// + /// # Panics + /// + /// Panics if this cursor is not on an UTF-8 character boundary. + fn skip_whitespace(&mut self) { + // FIXME figure out if we wanna use the same whitespace rules as + // brigadier, because rust uses unicode whereas java uses java rules. + while self.can_read() && self.peek().is_whitespace() { + self.skip(); + } + } + + /// Reads an integer. + /// + /// # Panics + /// + /// Panics if this cursor is not on an UTF-8 character boundary. + fn read_integer>(&mut self) -> Result + where T: FromStr; + /// Reads a float. + /// + /// # Panics + /// + /// Panics if this cursor is not on an UTF-8 character boundary. + fn read_float>(&mut self) -> Result + where T: FromStr; + /// Reads a bool. + /// + /// # Panics + /// + /// Panics if this cursor is not on an UTF-8 character boundary. + fn read_bool>(&mut self) -> Result; + /// Reads an unquoted string. + /// + /// # Panics + /// + /// Panics if this cursor is not on an UTF-8 character boundary. + // this is a bit of a weird one in that it can't error. + fn read_unquoted_str(&mut self) -> &'a str; + /// Reads a quoted string. + /// + /// # Panics + /// + /// Panics if this cursor is not on an UTF-8 character boundary. + fn read_quoted_string>( + &mut self, + ) -> Result; + /// Reads a quoted or an unquoted string. + /// + /// # Panics + /// + /// Panics if this cursor is not on an UTF-8 character boundary. + fn read_string>(&mut self) -> Result; +} + +impl<'a> StringReader<'a> for Cursor<&'a str> { + #[inline] + fn get_read(&self) -> &'a str { + &self.get_ref()[..(self.position() as usize)] + } + #[inline] + fn get_remaining(&self) -> &'a str { + &self.get_ref()[(self.position() as usize)..] + } + #[inline] + fn can_read_n(&self, len: usize) -> bool { + // NOTE: NOT overflow-aware! + self.position() as usize + len <= self.get_ref().len() + } + #[inline] + fn peek_n(&self, offset: usize) -> char { + // NOTE: NOT overflow-aware! + self.get_ref()[(self.position() as usize + offset)..] + .chars().next().unwrap() + } + + #[inline] + fn skip(&mut self) { + self.set_position(self.position() + self.peek().len_utf8() as u64); + } + #[inline] + fn read_char(&mut self) -> Option { + let res = self.get_ref()[(self.position() as usize)..].chars().next(); + if let Some(c) = res { + self.set_position(self.position() + c.len_utf8() as u64); + } + res + } + + fn read_integer>(&mut self) -> Result + where T: FromStr { + // see read_unquoted_str for rationale + let start = self.position() as usize; + let total = self.get_ref().len(); + let end = total - { + self.get_remaining().trim_start_matches(number_chars).len() + }; + self.set_position(end as u64); + + let number = &self.get_ref()[start..end]; + if number.is_empty() { + // don't need to set_position here, we haven't moved + Err(E::expected_integer(self)) + } else { + number.parse().map_err(|_| { + self.set_position(start as u64); + E::invalid_integer(self, number) + }) + } + } + fn read_float>(&mut self) -> Result + where T: FromStr { + // see read_unquoted_str for rationale + let start = self.position() as usize; + let total = self.get_ref().len(); + let end = total - { + self.get_remaining().trim_start_matches(number_chars).len() + }; + self.set_position(end as u64); + + let number = &self.get_ref()[start..end]; + if number.is_empty() { + // don't need to set_position here, we haven't moved + Err(E::expected_float(self)) + } else { + number.parse().map_err(|_| { + self.set_position(start as u64); + E::invalid_float(self, number) + }) + } + } + fn read_bool>(&mut self) -> Result { + let pos = self.position(); + // NOTE: brigadier also allows quoted strings for bools. + // we consider that a bug, so we don't. + let res = match self.read_unquoted_str() { + "true" => Ok(true), + "false" => Ok(false), + "" => Err(E::expected_bool(self)), + value => { + self.set_position(pos); + Err(E::invalid_bool(self, value)) + }, + }; + res + } + fn read_unquoted_str(&mut self) -> &'a str { + // there's no easy way to grab start matches, so we have to do something + // a bit more involved. + let start = self.position() as usize; + let total = self.get_ref().len(); + let end = total - { + self.get_remaining().trim_start_matches(unquoted_chars).len() + }; + self.set_position(end as u64); + &self.get_ref()[start..end] + } + fn read_quoted_string>( + &mut self, + ) -> Result { + if !self.can_read() { + Ok("".into()) + } else if quote_chars(self.peek()) { + let start = self.position() as usize; + let terminator = self.read_char().unwrap(); + let res = read_string_until(self, terminator); + if res.is_err() { + self.set_position(start as u64); + } + res + } else { + Err(E::expected_start_of_quote(self)) + } + } + fn read_string>(&mut self) -> Result { + if !self.can_read() { + Ok("".into()) + } else if quote_chars(self.peek()) { + let start = self.position() as usize; + let terminator = self.read_char().unwrap(); + let res = read_string_until(self, terminator); + if res.is_err() { + self.set_position(start as u64); + } + res + } else { + Ok(self.read_unquoted_str().into()) + } + } +} + +fn read_string_until<'a, E: ReadError<'a, Cursor<&'a str>>>( + this: &mut Cursor<&'a str>, + terminator: char, +) -> Result { + let mut result = String::new(); + let mut escaped = false; + + while let Some(c) = this.read_char() { + if escaped { + if c == terminator || escape_char(c) { + result.push(c); + escaped = false; + } else { + let mut buf = [0u8; 4]; + // NOTE: brigadier unskips the escape. we don't bother. + return Err(E::invalid_escape(this, c.encode_utf8(&mut buf))); + } + } else if escape_char(c) { + escaped = true; + } else if c == terminator { + return Ok(result); + } else { + result.push(c); + } + } + + Err(E::expected_end_of_quote(this)) +} + +/// Symbols allowed in unquoted strings. +#[inline] +fn unquoted_chars(c: char) -> bool { + matches!( + c, + '0' ..= '9' | 'A' ..= 'Z' | 'a' ..= 'z' | '_' | '-' | '.' | '+', + ) +} + +/// Symbols allowed in numbers. +#[inline] +fn number_chars(c: char) -> bool { + matches!( + c, + '0' ..= '9' | '-' | '.', + ) +} + +/// Symbols allowed to start/end a quoted string. +#[inline] +fn quote_chars(c: char) -> bool { + matches!( + c, + '"' | '\'', + ) +} + +/// Symbol allowed to escape other symbols. +#[inline] +fn escape_char(c: char) -> bool { + matches!( + c, + '\\', + ) +} -- cgit 1.4.1