summary refs log tree commit diff stats
path: root/src/strcursor.rs
diff options
context:
space:
mode:
authorSoniEx2 <endermoneymod@gmail.com>2021-11-11 20:29:55 -0300
committerSoniEx2 <endermoneymod@gmail.com>2021-11-11 20:29:55 -0300
commitd4724b4734776d32fb86cd3c932e18fc41b68316 (patch)
treef289b70f1aa1b971d76737d702ea112a67d9c8c2 /src/strcursor.rs
Start porting Brigadier to Rust
Diffstat (limited to 'src/strcursor.rs')
-rw-r--r--src/strcursor.rs368
1 files changed, 368 insertions, 0 deletions
diff --git a/src/strcursor.rs b/src/strcursor.rs
new file mode 100644
index 0000000..f8b95c6
--- /dev/null
+++ b/src/strcursor.rs
@@ -0,0 +1,368 @@
+// Copyright (c) 2021 Soni L.
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+//! String Cursor (sorta).
+
+use ::std::io::Cursor;
+use ::std::str::FromStr;
+
+/// Built-in `StringReader` errors.
+pub trait ReadError<'a, C: StringReader<'a>>: Sized + std::error::Error {
+    /// Creates an error that indicates an invalid integer was found.
+    fn invalid_integer(context: &C, from: &str) -> Self;
+    /// Creates an error that indicates an integer was expected.
+    fn expected_integer(context: &C) -> Self;
+    /// Creates an error that indicates an invalid float was found.
+    fn invalid_float(context: &C, from: &str) -> Self;
+    /// Creates an error that indicates a float was expected.
+    fn expected_float(context: &C) -> Self;
+    /// Creates an error that indicates an invalid bool was found.
+    fn invalid_bool(context: &C, from: &str) -> Self;
+    /// Creates an error that indicates a bool was expected.
+    fn expected_bool(context: &C) -> Self;
+    /// Creates an error that indicates the start of a quote was expected.
+    fn expected_start_of_quote(context: &C) -> Self;
+    /// Creates an error that indicates the end of a quote was expected.
+    fn expected_end_of_quote(context: &C) -> Self;
+    /// Creates an error that indicates an invalid escape was found.
+    fn invalid_escape(context: &C, from: &str) -> Self;
+    /// Creates an error that indicates a symbol was expected.
+    fn expected_symbol(context: &C, from: &str) -> Self;
+}
+
+/// Extension trait on [`Cursor`]s to help with command parsing.
+///
+/// All `read_*` methods reset the cursor on error.
+///
+/// Note that, compared to Brigadier, this lacks methods such as
+/// `getRemainingLength` (use `get_remaining().len()` or
+/// `remaining_slice().len()`) and `getTotalLength` (use `get_ref().len()`).
+pub trait StringReader<'a>: Sized {
+    /// Returns the part of the string that has been read so far.
+    ///
+    /// # Panics
+    ///
+    /// Panics if this cursor is not on an UTF-8 character boundary.
+    //#[inline]
+    fn get_read(&self) -> &'a str;
+    /// Returns the part of the string that has yet to be read.
+    ///
+    /// # Panics
+    ///
+    /// Panics if this cursor is not on an UTF-8 character boundary.
+    //#[inline]
+    fn get_remaining(&self) -> &'a str;
+    /// Returns whether there's anything left to read.
+    #[inline]
+    fn can_read(&self) -> bool {
+        self.can_read_n(1)
+    }
+    /// Returns whether there's enough left to read, based on the passed length.
+    //#[inline]
+    fn can_read_n(&self, len: usize) -> bool;
+    /// Returns the next char.
+    ///
+    /// # Panics
+    ///
+    /// Panics if there's nothing left to read, or if this cursor is not on an
+    /// UTF-8 character boundary.
+    #[inline]
+    fn peek(&self) -> char {
+        self.peek_n(0)
+    }
+    /// Returns the next nth **byte** (and, if needed, subsequent bytes) as a
+    /// char.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the offset is beyond the boundaries of the buffer, or if the
+    /// offset is not on an UTF-8 character boundary.
+    //#[inline]
+    fn peek_n(&self, offset: usize) -> char;
+
+    /// Advances to the next char.
+    ///
+    /// # Panics
+    ///
+    /// Panics if there's nothing left to read, or if this cursor is not on an
+    /// UTF-8 character boundary.
+    //#[inline]
+    fn skip(&mut self);
+    /// Attempts to read the next char.
+    ///
+    /// # Panics
+    ///
+    /// Panics if this cursor is not on an UTF-8 character boundary.
+    //#[inline]
+    fn read_char(&mut self) -> Option<char>;
+    /// Checks the next char.
+    ///
+    /// # Panics
+    ///
+    /// Panics if this cursor is not on an UTF-8 character boundary.
+    fn expect<E: ReadError<'a, Self>>(&mut self, c: char) -> Result<(), E> {
+        if !self.can_read() || self.peek() != c {
+            // because we want the error constructors to take &str.
+            let mut buf = [0u8; 4];
+            Err(E::expected_symbol(self, c.encode_utf8(&mut buf)))
+        } else {
+            Ok(self.skip())
+        }
+    }
+    /// Skips whitespace.
+    ///
+    /// # Panics
+    ///
+    /// Panics if this cursor is not on an UTF-8 character boundary.
+    fn skip_whitespace(&mut self) {
+        // FIXME figure out if we wanna use the same whitespace rules as
+        // brigadier, because rust uses unicode whereas java uses java rules.
+        while self.can_read() && self.peek().is_whitespace() {
+            self.skip();
+        }
+    }
+
+    /// Reads an integer.
+    ///
+    /// # Panics
+    ///
+    /// Panics if this cursor is not on an UTF-8 character boundary.
+    fn read_integer<T, E: ReadError<'a, Self>>(&mut self) -> Result<T, E>
+    where T: FromStr<Err=std::num::ParseIntError>;
+    /// Reads a float.
+    ///
+    /// # Panics
+    ///
+    /// Panics if this cursor is not on an UTF-8 character boundary.
+    fn read_float<T, E: ReadError<'a, Self>>(&mut self) -> Result<T, E>
+    where T: FromStr<Err=std::num::ParseFloatError>;
+    /// Reads a bool.
+    ///
+    /// # Panics
+    ///
+    /// Panics if this cursor is not on an UTF-8 character boundary.
+    fn read_bool<E: ReadError<'a, Self>>(&mut self) -> Result<bool, E>;
+    /// Reads an unquoted string.
+    ///
+    /// # Panics
+    ///
+    /// Panics if this cursor is not on an UTF-8 character boundary.
+    // this is a bit of a weird one in that it can't error.
+    fn read_unquoted_str(&mut self) -> &'a str;
+    /// Reads a quoted string.
+    ///
+    /// # Panics
+    ///
+    /// Panics if this cursor is not on an UTF-8 character boundary.
+    fn read_quoted_string<E: ReadError<'a, Self>>(
+        &mut self,
+    ) -> Result<String, E>;
+    /// Reads a quoted or an unquoted string.
+    ///
+    /// # Panics
+    ///
+    /// Panics if this cursor is not on an UTF-8 character boundary.
+    fn read_string<E: ReadError<'a, Self>>(&mut self) -> Result<String, E>;
+}
+
+impl<'a> StringReader<'a> for Cursor<&'a str> {
+    #[inline]
+    fn get_read(&self) -> &'a str {
+        &self.get_ref()[..(self.position() as usize)]
+    }
+    #[inline]
+    fn get_remaining(&self) -> &'a str {
+        &self.get_ref()[(self.position() as usize)..]
+    }
+    #[inline]
+    fn can_read_n(&self, len: usize) -> bool {
+        // NOTE: NOT overflow-aware!
+        self.position() as usize + len <= self.get_ref().len()
+    }
+    #[inline]
+    fn peek_n(&self, offset: usize) -> char {
+        // NOTE: NOT overflow-aware!
+        self.get_ref()[(self.position() as usize + offset)..]
+            .chars().next().unwrap()
+    }
+
+    #[inline]
+    fn skip(&mut self) {
+        self.set_position(self.position() + self.peek().len_utf8() as u64);
+    }
+    #[inline]
+    fn read_char(&mut self) -> Option<char> {
+        let res = self.get_ref()[(self.position() as usize)..].chars().next();
+        if let Some(c) = res {
+            self.set_position(self.position() + c.len_utf8() as u64);
+        }
+        res
+    }
+
+    fn read_integer<T, E: ReadError<'a, Self>>(&mut self) -> Result<T, E>
+    where T: FromStr<Err=std::num::ParseIntError> {
+        // see read_unquoted_str for rationale
+        let start = self.position() as usize;
+        let total = self.get_ref().len();
+        let end = total - {
+            self.get_remaining().trim_start_matches(number_chars).len()
+        };
+        self.set_position(end as u64);
+
+        let number = &self.get_ref()[start..end];
+        if number.is_empty() {
+            // don't need to set_position here, we haven't moved
+            Err(E::expected_integer(self))
+        } else {
+            number.parse().map_err(|_| {
+                self.set_position(start as u64);
+                E::invalid_integer(self, number)
+            })
+        }
+    }
+    fn read_float<T, E: ReadError<'a, Self>>(&mut self) -> Result<T, E>
+    where T: FromStr<Err=std::num::ParseFloatError> {
+        // see read_unquoted_str for rationale
+        let start = self.position() as usize;
+        let total = self.get_ref().len();
+        let end = total - {
+            self.get_remaining().trim_start_matches(number_chars).len()
+        };
+        self.set_position(end as u64);
+
+        let number = &self.get_ref()[start..end];
+        if number.is_empty() {
+            // don't need to set_position here, we haven't moved
+            Err(E::expected_float(self))
+        } else {
+            number.parse().map_err(|_| {
+                self.set_position(start as u64);
+                E::invalid_float(self, number)
+            })
+        }
+    }
+    fn read_bool<E: ReadError<'a, Self>>(&mut self) -> Result<bool, E> {
+        let pos = self.position();
+        // NOTE: brigadier also allows quoted strings for bools.
+        // we consider that a bug, so we don't.
+        let res = match self.read_unquoted_str() {
+            "true" => Ok(true),
+            "false" => Ok(false),
+            "" => Err(E::expected_bool(self)),
+            value => {
+                self.set_position(pos);
+                Err(E::invalid_bool(self, value))
+            },
+        };
+        res
+    }
+    fn read_unquoted_str(&mut self) -> &'a str {
+        // there's no easy way to grab start matches, so we have to do something
+        // a bit more involved.
+        let start = self.position() as usize;
+        let total = self.get_ref().len();
+        let end = total - {
+            self.get_remaining().trim_start_matches(unquoted_chars).len()
+        };
+        self.set_position(end as u64);
+        &self.get_ref()[start..end]
+    }
+    fn read_quoted_string<E: ReadError<'a, Self>>(
+        &mut self,
+    ) -> Result<String, E> {
+        if !self.can_read() {
+            Ok("".into())
+        } else if quote_chars(self.peek()) {
+            let start = self.position() as usize;
+            let terminator = self.read_char().unwrap();
+            let res = read_string_until(self, terminator);
+            if res.is_err() {
+                self.set_position(start as u64);
+            }
+            res
+        } else {
+            Err(E::expected_start_of_quote(self))
+        }
+    }
+    fn read_string<E: ReadError<'a, Self>>(&mut self) -> Result<String, E> {
+        if !self.can_read() {
+            Ok("".into())
+        } else if quote_chars(self.peek()) {
+            let start = self.position() as usize;
+            let terminator = self.read_char().unwrap();
+            let res = read_string_until(self, terminator);
+            if res.is_err() {
+                self.set_position(start as u64);
+            }
+            res
+        } else {
+            Ok(self.read_unquoted_str().into())
+        }
+    }
+}
+
+fn read_string_until<'a, E: ReadError<'a, Cursor<&'a str>>>(
+    this: &mut Cursor<&'a str>,
+    terminator: char,
+) -> Result<String, E> {
+    let mut result = String::new();
+    let mut escaped = false;
+
+    while let Some(c) = this.read_char() {
+        if escaped {
+            if c == terminator || escape_char(c) {
+                result.push(c);
+                escaped = false;
+            } else {
+                let mut buf = [0u8; 4];
+                // NOTE: brigadier unskips the escape. we don't bother.
+                return Err(E::invalid_escape(this, c.encode_utf8(&mut buf)));
+            }
+        } else if escape_char(c) {
+            escaped = true;
+        } else if c == terminator {
+            return Ok(result);
+        } else {
+            result.push(c);
+        }
+    }
+
+    Err(E::expected_end_of_quote(this))
+}
+
+/// Symbols allowed in unquoted strings.
+#[inline]
+fn unquoted_chars(c: char) -> bool {
+    matches!(
+        c,
+        '0' ..= '9' | 'A' ..= 'Z' | 'a' ..= 'z' | '_' | '-' | '.' | '+',
+    )
+}
+
+/// Symbols allowed in numbers.
+#[inline]
+fn number_chars(c: char) -> bool {
+    matches!(
+        c,
+        '0' ..= '9' | '-' | '.',
+    )
+}
+
+/// Symbols allowed to start/end a quoted string.
+#[inline]
+fn quote_chars(c: char) -> bool {
+    matches!(
+        c,
+        '"' | '\'',
+    )
+}
+
+/// Symbol allowed to escape other symbols.
+#[inline]
+fn escape_char(c: char) -> bool {
+    matches!(
+        c,
+        '\\',
+    )
+}