// Copyright (c) 2021 Soni L. // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. // Documentation and comments licensed under CC BY-SA 4.0. //! String Cursor (sorta). use ::std::io::Cursor; use ::std::str::FromStr; /// Built-in `StringReader` errors. pub trait ReadError<'a, C: StringReader<'a>>: Sized + std::error::Error { /// Creates an error that indicates an invalid integer was found. fn invalid_integer(context: &C, from: &str) -> Self; /// Creates an error that indicates an integer was expected. fn expected_integer(context: &C) -> Self; /// Creates an error that indicates an invalid float was found. fn invalid_float(context: &C, from: &str) -> Self; /// Creates an error that indicates a float was expected. fn expected_float(context: &C) -> Self; /// Creates an error that indicates an invalid bool was found. fn invalid_bool(context: &C, from: &str) -> Self; /// Creates an error that indicates a bool was expected. fn expected_bool(context: &C) -> Self; /// Creates an error that indicates the start of a quote was expected. fn expected_start_of_quote(context: &C) -> Self; /// Creates an error that indicates the end of a quote was expected. fn expected_end_of_quote(context: &C) -> Self; /// Creates an error that indicates an invalid escape was found. fn invalid_escape(context: &C, from: &str) -> Self; /// Creates an error that indicates a symbol was expected. fn expected_symbol(context: &C, from: &str) -> Self; } /// Extension trait on [`Cursor`]s to help with command parsing. /// /// All `read_*` methods reset the cursor on error. /// /// Note that, compared to Brigadier, this lacks methods such as /// `getRemainingLength` (use `get_remaining().len()` or /// `remaining_slice().len()`) and `getTotalLength` (use `get_ref().len()`). pub trait StringReader<'a>: Sized { /// Returns the part of the string that has been read so far. /// /// # Panics /// /// Panics if this cursor is not on an UTF-8 character boundary. //#[inline] fn get_read(&self) -> &'a str; /// Returns the part of the string that has yet to be read. /// /// # Panics /// /// Panics if this cursor is not on an UTF-8 character boundary. //#[inline] fn get_remaining(&self) -> &'a str; /// Returns whether there's anything left to read. #[inline] fn can_read(&self) -> bool { self.can_read_n(1) } /// Returns whether there's enough left to read, based on the passed length. //#[inline] fn can_read_n(&self, len: usize) -> bool; /// Returns the next char. /// /// # Panics /// /// Panics if there's nothing left to read, or if this cursor is not on an /// UTF-8 character boundary. #[inline] fn peek(&self) -> char { self.peek_n(0) } /// Returns the next nth **byte** (and, if needed, subsequent bytes) as a /// char. /// /// # Panics /// /// Panics if the offset is beyond the boundaries of the buffer, or if the /// offset is not on an UTF-8 character boundary. //#[inline] fn peek_n(&self, offset: usize) -> char; /// Advances to the next char. /// /// # Panics /// /// Panics if there's nothing left to read, or if this cursor is not on an /// UTF-8 character boundary. //#[inline] fn skip(&mut self); /// Attempts to read the next char. /// /// # Panics /// /// Panics if this cursor is not on an UTF-8 character boundary. //#[inline] fn read_char(&mut self) -> Option; /// Checks the next char. /// /// # Panics /// /// Panics if this cursor is not on an UTF-8 character boundary. fn expect>(&mut self, c: char) -> Result<(), E> { if !self.can_read() || self.peek() != c { // because we want the error constructors to take &str. let mut buf = [0u8; 4]; Err(E::expected_symbol(self, c.encode_utf8(&mut buf))) } else { Ok(self.skip()) } } /// Skips whitespace. /// /// # Panics /// /// Panics if this cursor is not on an UTF-8 character boundary. fn skip_whitespace(&mut self) { // FIXME figure out if we wanna use the same whitespace rules as // brigadier, because rust uses unicode whereas java uses java rules. while self.can_read() && self.peek().is_whitespace() { self.skip(); } } /// Reads an integer. /// /// # Panics /// /// Panics if this cursor is not on an UTF-8 character boundary. fn read_integer>(&mut self) -> Result where T: FromStr; /// Reads a float. /// /// # Panics /// /// Panics if this cursor is not on an UTF-8 character boundary. fn read_float>(&mut self) -> Result where T: FromStr; /// Reads a bool. /// /// # Panics /// /// Panics if this cursor is not on an UTF-8 character boundary. fn read_bool>(&mut self) -> Result; /// Reads an unquoted string. /// /// # Panics /// /// Panics if this cursor is not on an UTF-8 character boundary. // this is a bit of a weird one in that it can't error. fn read_unquoted_str(&mut self) -> &'a str; /// Reads a quoted string. /// /// # Panics /// /// Panics if this cursor is not on an UTF-8 character boundary. fn read_quoted_string>( &mut self, ) -> Result; /// Reads a quoted or an unquoted string. /// /// # Panics /// /// Panics if this cursor is not on an UTF-8 character boundary. fn read_string>(&mut self) -> Result; } impl<'a> StringReader<'a> for Cursor<&'a str> { #[inline] fn get_read(&self) -> &'a str { &self.get_ref()[..(self.position() as usize)] } #[inline] fn get_remaining(&self) -> &'a str { &self.get_ref()[(self.position() as usize)..] } #[inline] fn can_read_n(&self, len: usize) -> bool { // NOTE: NOT overflow-aware! self.position() as usize + len <= self.get_ref().len() } #[inline] fn peek_n(&self, offset: usize) -> char { // NOTE: NOT overflow-aware! self.get_ref()[(self.position() as usize + offset)..] .chars().next().unwrap() } #[inline] fn skip(&mut self) { self.set_position(self.position() + self.peek().len_utf8() as u64); } #[inline] fn read_char(&mut self) -> Option { let res = self.get_ref()[(self.position() as usize)..].chars().next(); if let Some(c) = res { self.set_position(self.position() + c.len_utf8() as u64); } res } fn read_integer>(&mut self) -> Result where T: FromStr { // see read_unquoted_str for rationale let start = self.position() as usize; let total = self.get_ref().len(); let end = total - { self.get_remaining().trim_start_matches(number_chars).len() }; self.set_position(end as u64); let number = &self.get_ref()[start..end]; if number.is_empty() { // don't need to set_position here, we haven't moved Err(E::expected_integer(self)) } else { number.parse().map_err(|_| { self.set_position(start as u64); E::invalid_integer(self, number) }) } } fn read_float>(&mut self) -> Result where T: FromStr { // see read_unquoted_str for rationale let start = self.position() as usize; let total = self.get_ref().len(); let end = total - { self.get_remaining().trim_start_matches(number_chars).len() }; self.set_position(end as u64); let number = &self.get_ref()[start..end]; if number.is_empty() { // don't need to set_position here, we haven't moved Err(E::expected_float(self)) } else { number.parse().map_err(|_| { self.set_position(start as u64); E::invalid_float(self, number) }) } } fn read_bool>(&mut self) -> Result { let pos = self.position(); // NOTE: brigadier also allows quoted strings for bools. // we consider that a bug, so we don't. let res = match self.read_unquoted_str() { "true" => Ok(true), "false" => Ok(false), "" => Err(E::expected_bool(self)), value => { self.set_position(pos); Err(E::invalid_bool(self, value)) }, }; res } fn read_unquoted_str(&mut self) -> &'a str { // there's no easy way to grab start matches, so we have to do something // a bit more involved. let start = self.position() as usize; let total = self.get_ref().len(); let end = total - { self.get_remaining().trim_start_matches(unquoted_chars).len() }; self.set_position(end as u64); &self.get_ref()[start..end] } fn read_quoted_string>( &mut self, ) -> Result { if !self.can_read() { Ok("".into()) } else if quote_chars(self.peek()) { let start = self.position() as usize; let terminator = self.read_char().unwrap(); let res = read_string_until(self, terminator); if res.is_err() { self.set_position(start as u64); } res } else { Err(E::expected_start_of_quote(self)) } } fn read_string>(&mut self) -> Result { if !self.can_read() { Ok("".into()) } else if quote_chars(self.peek()) { let start = self.position() as usize; let terminator = self.read_char().unwrap(); let res = read_string_until(self, terminator); if res.is_err() { self.set_position(start as u64); } res } else { Ok(self.read_unquoted_str().into()) } } } fn read_string_until<'a, E: ReadError<'a, Cursor<&'a str>>>( this: &mut Cursor<&'a str>, terminator: char, ) -> Result { let mut result = String::new(); let mut escaped = false; while let Some(c) = this.read_char() { if escaped { if c == terminator || escape_char(c) { result.push(c); escaped = false; } else { let mut buf = [0u8; 4]; // NOTE: brigadier unskips the escape. we don't bother. return Err(E::invalid_escape(this, c.encode_utf8(&mut buf))); } } else if escape_char(c) { escaped = true; } else if c == terminator { return Ok(result); } else { result.push(c); } } Err(E::expected_end_of_quote(this)) } /// Symbols allowed in unquoted strings. #[inline] fn unquoted_chars(c: char) -> bool { matches!( c, '0' ..= '9' | 'A' ..= 'Z' | 'a' ..= 'z' | '_' | '-' | '.' | '+', ) } /// Symbols allowed in numbers. #[inline] fn number_chars(c: char) -> bool { matches!( c, '0' ..= '9' | '-' | '.', ) } /// Symbols allowed to start/end a quoted string. #[inline] fn quote_chars(c: char) -> bool { matches!( c, '"' | '\'', ) } /// Symbol allowed to escape other symbols. #[inline] fn escape_char(c: char) -> bool { matches!( c, '\\', ) }