alloy_sol_type_parser/
ident.rs

1use winnow::{
2    error::{ErrMode, ParserError},
3    stream::{AsBStr, Stream},
4    ModalResult,
5};
6
7/// The regular expression for a Solidity identifier.
8///
9/// <https://docs.soliditylang.org/en/latest/grammar.html#a4.SolidityLexer.Identifier>
10pub const IDENT_REGEX: &str = "[a-zA-Z$_][a-zA-Z0-9$_]*";
11
12/// Returns `true` if the given character is valid at the start of a Solidity
13/// identifier.
14#[inline]
15pub const fn is_id_start(c: char) -> bool {
16    matches!(c, 'a'..='z' | 'A'..='Z' | '_' | '$')
17}
18
19/// Returns `true` if the given character is valid in a Solidity identifier.
20#[inline]
21pub const fn is_id_continue(c: char) -> bool {
22    matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '$')
23}
24
25/// Returns `true` if the given string is a valid Solidity identifier.
26///
27/// An identifier in Solidity has to start with a letter, a dollar-sign or
28/// an underscore and may additionally contain numbers after the first
29/// symbol.
30///
31/// Solidity reference:
32/// <https://docs.soliditylang.org/en/latest/grammar.html#a4.SolidityLexer.Identifier>
33pub const fn is_valid_identifier(s: &str) -> bool {
34    // Note: valid idents can only contain ASCII characters, so we can
35    // use the byte representation here.
36    let [first, rest @ ..] = s.as_bytes() else {
37        return false;
38    };
39
40    if !is_id_start(*first as char) {
41        return false;
42    }
43
44    let mut i = 0;
45    while i < rest.len() {
46        if !is_id_continue(rest[i] as char) {
47            return false;
48        }
49        i += 1;
50    }
51
52    true
53}
54
55/// Parses a Solidity identifier.
56#[inline]
57pub fn identifier<'a>(input: &mut &'a str) -> ModalResult<&'a str> {
58    identifier_parser(input)
59}
60
61#[inline]
62pub(crate) fn identifier_parser<'a, I>(input: &mut I) -> ModalResult<&'a str>
63where
64    I: Stream<Slice = &'a str> + AsBStr,
65{
66    // See note in `is_valid_identifier` above.
67    // Use the faster `slice::Iter` instead of `str::Chars`.
68    let mut chars = input.as_bstr().iter().map(|b| *b as char);
69
70    let Some(true) = chars.next().map(is_id_start) else {
71        return Err(ErrMode::from_input(input));
72    };
73
74    // 1 for the first character, we know it's ASCII
75    let len = 1 + chars.take_while(|c| is_id_continue(*c)).count();
76    Ok(input.next_slice(len))
77}
78
79#[cfg(test)]
80mod tests {
81    use super::*;
82
83    #[test]
84    fn test_parse_identifier() {
85        ident_test("foo", Ok("foo"), "");
86        ident_test("foo ", Ok("foo"), " ");
87        ident_test("$foo", Ok("$foo"), "");
88        ident_test("foo$", Ok("foo$"), "");
89        ident_test("foo2$", Ok("foo2$"), "");
90        ident_test("foo 2$", Ok("foo"), " 2$");
91        ident_test("_foo 2$", Ok("_foo"), " 2$");
92
93        ident_test("èfoo", Err(()), "èfoo");
94        ident_test("fèoo", Ok("f"), "èoo");
95        ident_test("foèo", Ok("fo"), "èo");
96        ident_test("fooè", Ok("foo"), "è");
97
98        ident_test("3foo", Err(()), "3foo");
99        ident_test("f3oo", Ok("f3oo"), "");
100        ident_test("fo3o", Ok("fo3o"), "");
101        ident_test("foo3", Ok("foo3"), "");
102    }
103
104    #[track_caller]
105    fn ident_test(mut input: &str, expected: Result<&str, ()>, output: &str) {
106        assert_eq!(identifier(&mut input).map_err(drop), expected, "result mismatch");
107        if let Ok(expected) = expected {
108            assert!(is_valid_identifier(expected), "expected is not a valid ident");
109        }
110        assert_eq!(input, output, "output mismatch");
111    }
112}