ide_db/syntax_helpers/
format_string_exprs.rs

1//! Tools to work with expressions present in format string literals for the `format_args!` family of macros.
2//! Primarily meant for assists and completions.
3
4/// Enum for representing extracted format string args.
5/// Can either be extracted expressions (which includes identifiers),
6/// or placeholders `{}`.
7#[derive(Debug, PartialEq, Eq)]
8pub enum Arg {
9    Placeholder,
10    Ident(String),
11    Expr(String),
12}
13
14/// Add placeholders like `$1` and `$2` in place of [`Arg::Placeholder`],
15/// and unwraps the [`Arg::Ident`] and [`Arg::Expr`] enums.
16/// ```rust
17/// # use ide_db::syntax_helpers::format_string_exprs::*;
18/// assert_eq!(with_placeholders(vec![Arg::Ident("ident".to_owned()), Arg::Placeholder, Arg::Expr("expr + 2".to_owned())]), vec!["ident".to_owned(), "$1".to_owned(), "expr + 2".to_owned()])
19/// ```
20pub fn with_placeholders(args: Vec<Arg>) -> Vec<String> {
21    let mut placeholder_id = 1;
22    args.into_iter()
23        .map(move |a| match a {
24            Arg::Expr(s) | Arg::Ident(s) => s,
25            Arg::Placeholder => {
26                let s = format!("${placeholder_id}");
27                placeholder_id += 1;
28                s
29            }
30        })
31        .collect()
32}
33
34// FIXME Remove this, we have this information in the HIR now
35/// Parser for a format-like string. It is more allowing in terms of string contents,
36/// as we expect variable placeholders to be filled with expressions.
37///
38/// Splits a format string that may contain expressions
39/// like
40/// ```rust
41/// # use ide_db::syntax_helpers::format_string_exprs::*;
42/// assert_eq!(parse_format_exprs("{ident} {} {expr + 42} ").unwrap(), ("{ident} {} {} ".to_owned(), vec![Arg::Placeholder, Arg::Expr("expr + 42".to_owned())]));
43/// ```
44pub fn parse_format_exprs(input: &str) -> Result<(String, Vec<Arg>), ()> {
45    #[derive(Debug, Clone, Copy, PartialEq)]
46    enum State {
47        NotArg,
48        MaybeArg,
49        Expr,
50        Ident,
51        MaybeIncorrect,
52        FormatOpts,
53    }
54
55    let mut state = State::NotArg;
56    let mut current_expr = String::new();
57    let mut extracted_expressions = Vec::new();
58    let mut output = String::new();
59
60    // Count of open braces inside of an expression.
61    // We assume that user knows what they're doing, thus we treat it like a correct pattern, e.g.
62    // "{MyStruct { val_a: 0, val_b: 1 }}".
63    let mut inexpr_open_count = 0;
64
65    let mut chars = input.chars().peekable();
66    while let Some(chr) = chars.next() {
67        match (state, chr) {
68            (State::NotArg, '{') => {
69                output.push(chr);
70                state = State::MaybeArg;
71            }
72            (State::NotArg, '}') => {
73                output.push(chr);
74                state = State::MaybeIncorrect;
75            }
76            (State::NotArg, _) => {
77                output.push(chr);
78            }
79            (State::MaybeIncorrect, '}') => {
80                // It's okay, we met "}}".
81                output.push(chr);
82                state = State::NotArg;
83            }
84            (State::MaybeIncorrect, _) => {
85                // Error in the string.
86                return Err(());
87            }
88            // Escaped braces `{{`
89            (State::MaybeArg, '{') => {
90                output.push(chr);
91                state = State::NotArg;
92            }
93            (State::MaybeArg, '}') => {
94                // This is an empty sequence '{}'.
95                output.push(chr);
96                extracted_expressions.push(Arg::Placeholder);
97                state = State::NotArg;
98            }
99            (State::MaybeArg, ':') => {
100                output.push(chr);
101                extracted_expressions.push(Arg::Placeholder);
102                state = State::FormatOpts;
103            }
104            (State::MaybeArg, _) => {
105                current_expr.push(chr);
106
107                // While Rust uses the unicode sets of XID_start and XID_continue for Identifiers
108                // this is probably the best we can do to avoid a false positive
109                if chr.is_alphabetic() || chr == '_' {
110                    state = State::Ident;
111                } else {
112                    state = State::Expr;
113                }
114            }
115            (State::Ident | State::Expr, ':') if matches!(chars.peek(), Some(':')) => {
116                // path separator
117                state = State::Expr;
118                current_expr.push_str("::");
119                chars.next();
120            }
121            (State::Ident | State::Expr, ':' | '}') => {
122                if inexpr_open_count == 0 {
123                    let trimmed = current_expr.trim();
124
125                    // if the expression consists of a single number, like "0" or "12", it can refer to
126                    // format args in the order they are specified.
127                    // see: https://doc.rust-lang.org/std/fmt/#positional-parameters
128                    if trimmed.chars().fold(true, |only_num, c| c.is_ascii_digit() && only_num) {
129                        output.push_str(trimmed);
130                    } else if matches!(state, State::Expr) {
131                        extracted_expressions.push(Arg::Expr(trimmed.into()));
132                    } else if matches!(state, State::Ident) {
133                        output.push_str(trimmed);
134                    }
135
136                    output.push(chr);
137                    current_expr.clear();
138                    state = if chr == ':' {
139                        State::FormatOpts
140                    } else if chr == '}' {
141                        State::NotArg
142                    } else {
143                        unreachable!()
144                    };
145                } else if chr == '}' {
146                    // We're closing one brace met before inside of the expression.
147                    current_expr.push(chr);
148                    inexpr_open_count -= 1;
149                } else if chr == ':' {
150                    // We're inside of braced expression, assume that it's a struct field name/value delimiter.
151                    current_expr.push(chr);
152                }
153            }
154            (State::Ident | State::Expr, '{') => {
155                state = State::Expr;
156                current_expr.push(chr);
157                inexpr_open_count += 1;
158            }
159            (State::Ident | State::Expr, _) => {
160                if !(chr.is_alphanumeric() || chr == '_' || chr == '#') {
161                    state = State::Expr;
162                }
163
164                current_expr.push(chr);
165            }
166            (State::FormatOpts, '}') => {
167                output.push(chr);
168                state = State::NotArg;
169            }
170            (State::FormatOpts, _) => {
171                output.push(chr);
172            }
173        }
174    }
175
176    if state != State::NotArg {
177        return Err(());
178    }
179
180    Ok((output, extracted_expressions))
181}
182
183#[cfg(test)]
184mod tests {
185    use super::*;
186    use expect_test::{Expect, expect};
187
188    fn check(input: &str, expect: &Expect) {
189        let (output, exprs) = parse_format_exprs(input).unwrap_or(("-".to_owned(), vec![]));
190        let outcome_repr = if !exprs.is_empty() {
191            format!("{output}; {}", with_placeholders(exprs).join(", "))
192        } else {
193            output
194        };
195
196        expect.assert_eq(&outcome_repr);
197    }
198
199    #[test]
200    fn format_str_parser() {
201        let test_vector = &[
202            ("no expressions", expect![["no expressions"]]),
203            (r"no expressions with \$0$1", expect![r"no expressions with \$0$1"]),
204            ("{expr} is {2 + 2}", expect![["{expr} is {}; 2 + 2"]]),
205            ("{expr:?}", expect![["{expr:?}"]]),
206            ("{expr:1$}", expect![[r"{expr:1$}"]]),
207            ("{:1$}", expect![[r"{:1$}; $1"]]),
208            ("{:>padding$}", expect![[r"{:>padding$}; $1"]]),
209            ("{}, {}, {0}", expect![[r"{}, {}, {0}; $1, $2"]]),
210            ("{}, {}, {0:b}", expect![[r"{}, {}, {0:b}; $1, $2"]]),
211            ("{$0}", expect![[r"{}; $0"]]),
212            ("{malformed", expect![["-"]]),
213            ("malformed}", expect![["-"]]),
214            ("{{correct", expect![["{{correct"]]),
215            ("correct}}", expect![["correct}}"]]),
216            ("{correct}}}", expect![["{correct}}}"]]),
217            ("{correct}}}}}", expect![["{correct}}}}}"]]),
218            ("{incorrect}}", expect![["-"]]),
219            ("placeholders {} {}", expect![["placeholders {} {}; $1, $2"]]),
220            ("mixed {} {2 + 2} {}", expect![["mixed {} {} {}; $1, 2 + 2, $2"]]),
221            (
222                "{SomeStruct { val_a: 0, val_b: 1 }}",
223                expect![["{}; SomeStruct { val_a: 0, val_b: 1 }"]],
224            ),
225            ("{expr:?} is {2.32f64:.5}", expect![["{expr:?} is {:.5}; 2.32f64"]]),
226            (
227                "{SomeStruct { val_a: 0, val_b: 1 }:?}",
228                expect![["{:?}; SomeStruct { val_a: 0, val_b: 1 }"]],
229            ),
230            ("{     2 + 2        }", expect![["{}; 2 + 2"]]),
231            ("{strsim::jaro_winkle(a)}", expect![["{}; strsim::jaro_winkle(a)"]]),
232            ("{foo::bar::baz()}", expect![["{}; foo::bar::baz()"]]),
233            ("{foo::bar():?}", expect![["{:?}; foo::bar()"]]),
234        ];
235
236        for (input, output) in test_vector {
237            check(input, output)
238        }
239    }
240
241    #[test]
242    fn arg_type() {
243        assert_eq!(
244            parse_format_exprs("{_ident} {r#raw_ident} {expr.obj} {name {thing: 42} } {}")
245                .unwrap()
246                .1,
247            vec![
248                Arg::Expr("expr.obj".to_owned()),
249                Arg::Expr("name {thing: 42}".to_owned()),
250                Arg::Placeholder
251            ]
252        );
253    }
254}