summary refs log blame commit diff stats
path: root/src/strcursor.rs
blob: f8b95c62a6e0cce5c9cdb2867f4907c738b6dfcb (plain) (tree)















































































































































































































































































































































































                                                                                
// Copyright (c) 2021 Soni L.
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.

//! String Cursor (sorta).

use ::std::io::Cursor;
use ::std::str::FromStr;

/// Built-in `StringReader` errors.
pub trait ReadError<'a, C: StringReader<'a>>: Sized + std::error::Error {
    /// Creates an error that indicates an invalid integer was found.
    fn invalid_integer(context: &C, from: &str) -> Self;
    /// Creates an error that indicates an integer was expected.
    fn expected_integer(context: &C) -> Self;
    /// Creates an error that indicates an invalid float was found.
    fn invalid_float(context: &C, from: &str) -> Self;
    /// Creates an error that indicates a float was expected.
    fn expected_float(context: &C) -> Self;
    /// Creates an error that indicates an invalid bool was found.
    fn invalid_bool(context: &C, from: &str) -> Self;
    /// Creates an error that indicates a bool was expected.
    fn expected_bool(context: &C) -> Self;
    /// Creates an error that indicates the start of a quote was expected.
    fn expected_start_of_quote(context: &C) -> Self;
    /// Creates an error that indicates the end of a quote was expected.
    fn expected_end_of_quote(context: &C) -> Self;
    /// Creates an error that indicates an invalid escape was found.
    fn invalid_escape(context: &C, from: &str) -> Self;
    /// Creates an error that indicates a symbol was expected.
    fn expected_symbol(context: &C, from: &str) -> Self;
}

/// Extension trait on [`Cursor`]s to help with command parsing.
///
/// All `read_*` methods reset the cursor on error.
///
/// Note that, compared to Brigadier, this lacks methods such as
/// `getRemainingLength` (use `get_remaining().len()` or
/// `remaining_slice().len()`) and `getTotalLength` (use `get_ref().len()`).
pub trait StringReader<'a>: Sized {
    /// Returns the part of the string that has been read so far.
    ///
    /// # Panics
    ///
    /// Panics if this cursor is not on an UTF-8 character boundary.
    //#[inline]
    fn get_read(&self) -> &'a str;
    /// Returns the part of the string that has yet to be read.
    ///
    /// # Panics
    ///
    /// Panics if this cursor is not on an UTF-8 character boundary.
    //#[inline]
    fn get_remaining(&self) -> &'a str;
    /// Returns whether there's anything left to read.
    #[inline]
    fn can_read(&self) -> bool {
        self.can_read_n(1)
    }
    /// Returns whether there's enough left to read, based on the passed length.
    //#[inline]
    fn can_read_n(&self, len: usize) -> bool;
    /// Returns the next char.
    ///
    /// # Panics
    ///
    /// Panics if there's nothing left to read, or if this cursor is not on an
    /// UTF-8 character boundary.
    #[inline]
    fn peek(&self) -> char {
        self.peek_n(0)
    }
    /// Returns the next nth **byte** (and, if needed, subsequent bytes) as a
    /// char.
    ///
    /// # Panics
    ///
    /// Panics if the offset is beyond the boundaries of the buffer, or if the
    /// offset is not on an UTF-8 character boundary.
    //#[inline]
    fn peek_n(&self, offset: usize) -> char;

    /// Advances to the next char.
    ///
    /// # Panics
    ///
    /// Panics if there's nothing left to read, or if this cursor is not on an
    /// UTF-8 character boundary.
    //#[inline]
    fn skip(&mut self);
    /// Attempts to read the next char.
    ///
    /// # Panics
    ///
    /// Panics if this cursor is not on an UTF-8 character boundary.
    //#[inline]
    fn read_char(&mut self) -> Option<char>;
    /// Checks the next char.
    ///
    /// # Panics
    ///
    /// Panics if this cursor is not on an UTF-8 character boundary.
    fn expect<E: ReadError<'a, Self>>(&mut self, c: char) -> Result<(), E> {
        if !self.can_read() || self.peek() != c {
            // because we want the error constructors to take &str.
            let mut buf = [0u8; 4];
            Err(E::expected_symbol(self, c.encode_utf8(&mut buf)))
        } else {
            Ok(self.skip())
        }
    }
    /// Skips whitespace.
    ///
    /// # Panics
    ///
    /// Panics if this cursor is not on an UTF-8 character boundary.
    fn skip_whitespace(&mut self) {
        // FIXME figure out if we wanna use the same whitespace rules as
        // brigadier, because rust uses unicode whereas java uses java rules.
        while self.can_read() && self.peek().is_whitespace() {
            self.skip();
        }
    }

    /// Reads an integer.
    ///
    /// # Panics
    ///
    /// Panics if this cursor is not on an UTF-8 character boundary.
    fn read_integer<T, E: ReadError<'a, Self>>(&mut self) -> Result<T, E>
    where T: FromStr<Err=std::num::ParseIntError>;
    /// Reads a float.
    ///
    /// # Panics
    ///
    /// Panics if this cursor is not on an UTF-8 character boundary.
    fn read_float<T, E: ReadError<'a, Self>>(&mut self) -> Result<T, E>
    where T: FromStr<Err=std::num::ParseFloatError>;
    /// Reads a bool.
    ///
    /// # Panics
    ///
    /// Panics if this cursor is not on an UTF-8 character boundary.
    fn read_bool<E: ReadError<'a, Self>>(&mut self) -> Result<bool, E>;
    /// Reads an unquoted string.
    ///
    /// # Panics
    ///
    /// Panics if this cursor is not on an UTF-8 character boundary.
    // this is a bit of a weird one in that it can't error.
    fn read_unquoted_str(&mut self) -> &'a str;
    /// Reads a quoted string.
    ///
    /// # Panics
    ///
    /// Panics if this cursor is not on an UTF-8 character boundary.
    fn read_quoted_string<E: ReadError<'a, Self>>(
        &mut self,
    ) -> Result<String, E>;
    /// Reads a quoted or an unquoted string.
    ///
    /// # Panics
    ///
    /// Panics if this cursor is not on an UTF-8 character boundary.
    fn read_string<E: ReadError<'a, Self>>(&mut self) -> Result<String, E>;
}

impl<'a> StringReader<'a> for Cursor<&'a str> {
    #[inline]
    fn get_read(&self) -> &'a str {
        &self.get_ref()[..(self.position() as usize)]
    }
    #[inline]
    fn get_remaining(&self) -> &'a str {
        &self.get_ref()[(self.position() as usize)..]
    }
    #[inline]
    fn can_read_n(&self, len: usize) -> bool {
        // NOTE: NOT overflow-aware!
        self.position() as usize + len <= self.get_ref().len()
    }
    #[inline]
    fn peek_n(&self, offset: usize) -> char {
        // NOTE: NOT overflow-aware!
        self.get_ref()[(self.position() as usize + offset)..]
            .chars().next().unwrap()
    }

    #[inline]
    fn skip(&mut self) {
        self.set_position(self.position() + self.peek().len_utf8() as u64);
    }
    #[inline]
    fn read_char(&mut self) -> Option<char> {
        let res = self.get_ref()[(self.position() as usize)..].chars().next();
        if let Some(c) = res {
            self.set_position(self.position() + c.len_utf8() as u64);
        }
        res
    }

    fn read_integer<T, E: ReadError<'a, Self>>(&mut self) -> Result<T, E>
    where T: FromStr<Err=std::num::ParseIntError> {
        // see read_unquoted_str for rationale
        let start = self.position() as usize;
        let total = self.get_ref().len();
        let end = total - {
            self.get_remaining().trim_start_matches(number_chars).len()
        };
        self.set_position(end as u64);

        let number = &self.get_ref()[start..end];
        if number.is_empty() {
            // don't need to set_position here, we haven't moved
            Err(E::expected_integer(self))
        } else {
            number.parse().map_err(|_| {
                self.set_position(start as u64);
                E::invalid_integer(self, number)
            })
        }
    }
    fn read_float<T, E: ReadError<'a, Self>>(&mut self) -> Result<T, E>
    where T: FromStr<Err=std::num::ParseFloatError> {
        // see read_unquoted_str for rationale
        let start = self.position() as usize;
        let total = self.get_ref().len();
        let end = total - {
            self.get_remaining().trim_start_matches(number_chars).len()
        };
        self.set_position(end as u64);

        let number = &self.get_ref()[start..end];
        if number.is_empty() {
            // don't need to set_position here, we haven't moved
            Err(E::expected_float(self))
        } else {
            number.parse().map_err(|_| {
                self.set_position(start as u64);
                E::invalid_float(self, number)
            })
        }
    }
    fn read_bool<E: ReadError<'a, Self>>(&mut self) -> Result<bool, E> {
        let pos = self.position();
        // NOTE: brigadier also allows quoted strings for bools.
        // we consider that a bug, so we don't.
        let res = match self.read_unquoted_str() {
            "true" => Ok(true),
            "false" => Ok(false),
            "" => Err(E::expected_bool(self)),
            value => {
                self.set_position(pos);
                Err(E::invalid_bool(self, value))
            },
        };
        res
    }
    fn read_unquoted_str(&mut self) -> &'a str {
        // there's no easy way to grab start matches, so we have to do something
        // a bit more involved.
        let start = self.position() as usize;
        let total = self.get_ref().len();
        let end = total - {
            self.get_remaining().trim_start_matches(unquoted_chars).len()
        };
        self.set_position(end as u64);
        &self.get_ref()[start..end]
    }
    fn read_quoted_string<E: ReadError<'a, Self>>(
        &mut self,
    ) -> Result<String, E> {
        if !self.can_read() {
            Ok("".into())
        } else if quote_chars(self.peek()) {
            let start = self.position() as usize;
            let terminator = self.read_char().unwrap();
            let res = read_string_until(self, terminator);
            if res.is_err() {
                self.set_position(start as u64);
            }
            res
        } else {
            Err(E::expected_start_of_quote(self))
        }
    }
    fn read_string<E: ReadError<'a, Self>>(&mut self) -> Result<String, E> {
        if !self.can_read() {
            Ok("".into())
        } else if quote_chars(self.peek()) {
            let start = self.position() as usize;
            let terminator = self.read_char().unwrap();
            let res = read_string_until(self, terminator);
            if res.is_err() {
                self.set_position(start as u64);
            }
            res
        } else {
            Ok(self.read_unquoted_str().into())
        }
    }
}

fn read_string_until<'a, E: ReadError<'a, Cursor<&'a str>>>(
    this: &mut Cursor<&'a str>,
    terminator: char,
) -> Result<String, E> {
    let mut result = String::new();
    let mut escaped = false;

    while let Some(c) = this.read_char() {
        if escaped {
            if c == terminator || escape_char(c) {
                result.push(c);
                escaped = false;
            } else {
                let mut buf = [0u8; 4];
                // NOTE: brigadier unskips the escape. we don't bother.
                return Err(E::invalid_escape(this, c.encode_utf8(&mut buf)));
            }
        } else if escape_char(c) {
            escaped = true;
        } else if c == terminator {
            return Ok(result);
        } else {
            result.push(c);
        }
    }

    Err(E::expected_end_of_quote(this))
}

/// Symbols allowed in unquoted strings.
#[inline]
fn unquoted_chars(c: char) -> bool {
    matches!(
        c,
        '0' ..= '9' | 'A' ..= 'Z' | 'a' ..= 'z' | '_' | '-' | '.' | '+',
    )
}

/// Symbols allowed in numbers.
#[inline]
fn number_chars(c: char) -> bool {
    matches!(
        c,
        '0' ..= '9' | '-' | '.',
    )
}

/// Symbols allowed to start/end a quoted string.
#[inline]
fn quote_chars(c: char) -> bool {
    matches!(
        c,
        '"' | '\'',
    )
}

/// Symbol allowed to escape other symbols.
#[inline]
fn escape_char(c: char) -> bool {
    matches!(
        c,
        '\\',
    )
}