syntax/
validation.rs

1//! This module implements syntax validation that the parser doesn't handle.
2//!
3//! A failed validation emits a diagnostic.
4
5mod block;
6
7use itertools::Itertools;
8use rowan::Direction;
9use rustc_literal_escaper::{
10    EscapeError, unescape_byte, unescape_byte_str, unescape_c_str, unescape_char, unescape_str,
11};
12
13use crate::{
14    AstNode, SyntaxError,
15    SyntaxKind::{CONST, FN, INT_NUMBER, TYPE_ALIAS},
16    SyntaxNode, SyntaxToken, T, TextSize, algo,
17    ast::{self, HasAttrs, HasVisibility, IsString, RangeItem},
18    match_ast,
19};
20
21pub(crate) fn validate(root: &SyntaxNode, errors: &mut Vec<SyntaxError>) {
22    let _p = tracing::info_span!("parser::validate").entered();
23    // FIXME:
24    // * Add unescape validation of raw string literals and raw byte string literals
25    // * Add validation of doc comments are being attached to nodes
26
27    for node in root.descendants() {
28        match_ast! {
29            match node {
30                ast::Literal(it) => validate_literal(it, errors),
31                ast::Const(it) => validate_const(it, errors),
32                ast::BlockExpr(it) => block::validate_block_expr(it, errors),
33                ast::FieldExpr(it) => validate_numeric_name(it.name_ref(), errors),
34                ast::RecordExprField(it) => validate_numeric_name(it.name_ref(), errors),
35                ast::Visibility(it) => validate_visibility(it, errors),
36                ast::RangeExpr(it) => validate_range_expr(it, errors),
37                ast::PathSegment(it) => validate_path_keywords(it, errors),
38                ast::RefType(it) => validate_trait_object_ref_ty(it, errors),
39                ast::PtrType(it) => validate_trait_object_ptr_ty(it, errors),
40                ast::FnPtrType(it) => validate_trait_object_fn_ptr_ret_ty(it, errors),
41                ast::MacroRules(it) => validate_macro_rules(it, errors),
42                ast::LetExpr(it) => validate_let_expr(it, errors),
43                ast::DynTraitType(it) => errors.extend(validate_trait_object_ty(it)),
44                ast::ImplTraitType(it) => errors.extend(validate_impl_object_ty(it)),
45                _ => (),
46            }
47        }
48    }
49}
50
51fn rustc_unescape_error_to_string(err: EscapeError) -> (&'static str, bool) {
52    use EscapeError as EE;
53
54    #[rustfmt::skip]
55    let err_message = match err {
56        EE::ZeroChars => {
57            "Literal must not be empty"
58        }
59        EE::MoreThanOneChar => {
60            "Literal must be one character long"
61        }
62        EE::LoneSlash => {
63            "Character must be escaped: `\\`"
64        }
65        EE::InvalidEscape => {
66            "Invalid escape"
67        }
68        EE::BareCarriageReturn | EE::BareCarriageReturnInRawString => {
69            "Character must be escaped: `\r`"
70        }
71        EE::EscapeOnlyChar => {
72            "Escape character `\\` must be escaped itself"
73        }
74        EE::TooShortHexEscape => {
75            "ASCII hex escape code must have exactly two digits"
76        }
77        EE::InvalidCharInHexEscape => {
78            "ASCII hex escape code must contain only hex characters"
79        }
80        EE::OutOfRangeHexEscape => {
81            "ASCII hex escape code must be at most 0x7F"
82        }
83        EE::NoBraceInUnicodeEscape => {
84            "Missing `{` to begin the unicode escape"
85        }
86        EE::InvalidCharInUnicodeEscape => {
87            "Unicode escape must contain only hex characters and underscores"
88        }
89        EE::EmptyUnicodeEscape => {
90            "Unicode escape must not be empty"
91        }
92        EE::UnclosedUnicodeEscape => {
93            "Missing `}` to terminate the unicode escape"
94        }
95        EE::LeadingUnderscoreUnicodeEscape => {
96            "Unicode escape code must not begin with an underscore"
97        }
98        EE::OverlongUnicodeEscape => {
99            "Unicode escape code must have at most 6 digits"
100        }
101        EE::LoneSurrogateUnicodeEscape => {
102            "Unicode escape code must not be a surrogate"
103        }
104        EE::OutOfRangeUnicodeEscape => {
105            "Unicode escape code must be at most 0x10FFFF"
106        }
107        EE::UnicodeEscapeInByte => {
108            "Byte literals must not contain unicode escapes"
109        }
110        EE::NonAsciiCharInByte  => {
111            "Byte literals must not contain non-ASCII characters"
112        }
113        EE::NulInCStr  => {
114            "C strings literals must not contain null characters"
115        }
116        EE::UnskippedWhitespaceWarning => "Whitespace after this escape is not skipped",
117        EE::MultipleSkippedLinesWarning => "Multiple lines are skipped by this escape",
118
119    };
120
121    (err_message, err.is_fatal())
122}
123
124fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
125    // FIXME: move this function to outer scope (https://github.com/rust-lang/rust-analyzer/pull/2834#discussion_r366196658)
126    fn unquote(text: &str, prefix_len: usize, end_delimiter: char) -> Option<&str> {
127        text.rfind(end_delimiter).and_then(|end| text.get(prefix_len..end))
128    }
129
130    let token = literal.token();
131    let text = token.text();
132
133    // FIXME: lift this lambda refactor to `fn` (https://github.com/rust-lang/rust-analyzer/pull/2834#discussion_r366199205)
134    let mut push_err = |prefix_len, off, err: EscapeError| {
135        let off = token.text_range().start() + TextSize::try_from(off + prefix_len).unwrap();
136        let (message, is_err) = rustc_unescape_error_to_string(err);
137        // FIXME: Emit lexer warnings
138        if is_err {
139            acc.push(SyntaxError::new_at_offset(message, off));
140        }
141    };
142
143    match literal.kind() {
144        ast::LiteralKind::String(s) => {
145            if !s.is_raw()
146                && let Some(without_quotes) = unquote(text, 1, '"')
147            {
148                unescape_str(without_quotes, |range, char| {
149                    if let Err(err) = char {
150                        push_err(1, range.start, err);
151                    }
152                });
153            }
154        }
155        ast::LiteralKind::ByteString(s) => {
156            if !s.is_raw()
157                && let Some(without_quotes) = unquote(text, 2, '"')
158            {
159                unescape_byte_str(without_quotes, |range, char| {
160                    if let Err(err) = char {
161                        push_err(1, range.start, err);
162                    }
163                });
164            }
165        }
166        ast::LiteralKind::CString(s) => {
167            if !s.is_raw()
168                && let Some(without_quotes) = unquote(text, 2, '"')
169            {
170                unescape_c_str(without_quotes, |range, char| {
171                    if let Err(err) = char {
172                        push_err(1, range.start, err);
173                    }
174                });
175            }
176        }
177        ast::LiteralKind::Char(_) => {
178            if let Some(without_quotes) = unquote(text, 1, '\'')
179                && let Err(err) = unescape_char(without_quotes)
180            {
181                push_err(1, 0, err);
182            }
183        }
184        ast::LiteralKind::Byte(_) => {
185            if let Some(without_quotes) = unquote(text, 2, '\'')
186                && let Err(err) = unescape_byte(without_quotes)
187            {
188                push_err(2, 0, err);
189            }
190        }
191        ast::LiteralKind::IntNumber(_)
192        | ast::LiteralKind::FloatNumber(_)
193        | ast::LiteralKind::Bool(_) => {}
194    }
195}
196
197pub(crate) fn validate_block_structure(root: &SyntaxNode) {
198    let mut stack = Vec::new();
199    for node in root.descendants_with_tokens() {
200        match node.kind() {
201            T!['{'] => stack.push(node),
202            T!['}'] => {
203                if let Some(pair) = stack.pop() {
204                    assert_eq!(
205                        node.parent(),
206                        pair.parent(),
207                        "\nunpaired curlies:\n{}\n{:#?}\n",
208                        root.text(),
209                        root,
210                    );
211                    assert!(
212                        node.next_sibling_or_token().is_none()
213                            && pair.prev_sibling_or_token().is_none(),
214                        "\nfloating curlies at {:?}\nfile:\n{}\nerror:\n{}\n",
215                        node,
216                        root.text(),
217                        node,
218                    );
219                }
220            }
221            _ => (),
222        }
223    }
224}
225
226fn validate_numeric_name(name_ref: Option<ast::NameRef>, errors: &mut Vec<SyntaxError>) {
227    if let Some(int_token) = int_token(name_ref)
228        && int_token.text().chars().any(|c| !c.is_ascii_digit())
229    {
230        errors.push(SyntaxError::new(
231            "Tuple (struct) field access is only allowed through \
232                decimal integers with no underscores or suffix",
233            int_token.text_range(),
234        ));
235    }
236
237    fn int_token(name_ref: Option<ast::NameRef>) -> Option<SyntaxToken> {
238        name_ref?.syntax().first_child_or_token()?.into_token().filter(|it| it.kind() == INT_NUMBER)
239    }
240}
241
242fn validate_visibility(vis: ast::Visibility, errors: &mut Vec<SyntaxError>) {
243    let path_without_in_token = vis.in_token().is_none()
244        && vis.path().and_then(|p| p.as_single_name_ref()).and_then(|n| n.ident_token()).is_some();
245    if path_without_in_token {
246        errors.push(SyntaxError::new("incorrect visibility restriction", vis.syntax.text_range()));
247    }
248    let parent = match vis.syntax().parent() {
249        Some(it) => it,
250        None => return,
251    };
252    match parent.kind() {
253        FN | CONST | TYPE_ALIAS => (),
254        _ => return,
255    }
256
257    let impl_def = match parent.parent().and_then(|it| it.parent()).and_then(ast::Impl::cast) {
258        Some(it) => it,
259        None => return,
260    };
261    // FIXME: disable validation if there's an attribute, since some proc macros use this syntax.
262    // ideally the validation would run only on the fully expanded code, then this wouldn't be necessary.
263    if impl_def.trait_().is_some() && impl_def.attrs().next().is_none() {
264        errors.push(SyntaxError::new("Unnecessary visibility qualifier", vis.syntax.text_range()));
265    }
266}
267
268fn validate_range_expr(expr: ast::RangeExpr, errors: &mut Vec<SyntaxError>) {
269    if expr.op_kind() == Some(ast::RangeOp::Inclusive) && expr.end().is_none() {
270        errors.push(SyntaxError::new(
271            "An inclusive range must have an end expression",
272            expr.syntax().text_range(),
273        ));
274    }
275}
276
277fn validate_path_keywords(segment: ast::PathSegment, errors: &mut Vec<SyntaxError>) {
278    let path = segment.parent_path();
279    let is_path_start = segment.coloncolon_token().is_none() && path.qualifier().is_none();
280
281    if let Some(token) = segment.self_token() {
282        if !is_path_start {
283            errors.push(SyntaxError::new(
284                "The `self` keyword is only allowed as the first segment of a path",
285                token.text_range(),
286            ));
287        }
288    } else if let Some(token) = segment.crate_token()
289        && (!is_path_start || use_prefix(path).is_some())
290    {
291        errors.push(SyntaxError::new(
292            "The `crate` keyword is only allowed as the first segment of a path",
293            token.text_range(),
294        ));
295    }
296
297    fn use_prefix(mut path: ast::Path) -> Option<ast::Path> {
298        for node in path.syntax().ancestors().skip(1) {
299            match_ast! {
300                match node {
301                    ast::UseTree(it) => if let Some(tree_path) = it.path() {
302                        // Even a top-level path exists within a `UseTree` so we must explicitly
303                        // allow our path but disallow anything else
304                        if tree_path != path {
305                            return Some(tree_path);
306                        }
307                    },
308                    ast::UseTreeList(_) => continue,
309                    ast::Path(parent) => path = parent,
310                    _ => return None,
311                }
312            };
313        }
314        None
315    }
316}
317
318fn validate_trait_object_ref_ty(ty: ast::RefType, errors: &mut Vec<SyntaxError>) {
319    match ty.ty() {
320        Some(ast::Type::DynTraitType(ty)) => {
321            if let Some(err) = validate_trait_object_ty_plus(ty) {
322                errors.push(err);
323            }
324        }
325        Some(ast::Type::ImplTraitType(ty)) => {
326            if let Some(err) = validate_impl_object_ty_plus(ty) {
327                errors.push(err);
328            }
329        }
330        _ => (),
331    }
332}
333
334fn validate_trait_object_ptr_ty(ty: ast::PtrType, errors: &mut Vec<SyntaxError>) {
335    match ty.ty() {
336        Some(ast::Type::DynTraitType(ty)) => {
337            if let Some(err) = validate_trait_object_ty_plus(ty) {
338                errors.push(err);
339            }
340        }
341        Some(ast::Type::ImplTraitType(ty)) => {
342            if let Some(err) = validate_impl_object_ty_plus(ty) {
343                errors.push(err);
344            }
345        }
346        _ => (),
347    }
348}
349
350fn validate_trait_object_fn_ptr_ret_ty(ty: ast::FnPtrType, errors: &mut Vec<SyntaxError>) {
351    match ty.ret_type().and_then(|ty| ty.ty()) {
352        Some(ast::Type::DynTraitType(ty)) => {
353            if let Some(err) = validate_trait_object_ty_plus(ty) {
354                errors.push(err);
355            }
356        }
357        Some(ast::Type::ImplTraitType(ty)) => {
358            if let Some(err) = validate_impl_object_ty_plus(ty) {
359                errors.push(err);
360            }
361        }
362        _ => (),
363    }
364}
365
366fn validate_trait_object_ty(ty: ast::DynTraitType) -> Option<SyntaxError> {
367    let tbl = ty.type_bound_list()?;
368    let no_bounds = tbl.bounds().filter_map(|it| it.ty()).next().is_none();
369
370    match no_bounds {
371        true => Some(SyntaxError::new(
372            "At least one trait is required for an object type",
373            ty.syntax().text_range(),
374        )),
375        false => None,
376    }
377}
378
379fn validate_impl_object_ty(ty: ast::ImplTraitType) -> Option<SyntaxError> {
380    let tbl = ty.type_bound_list()?;
381    let no_bounds = tbl.bounds().filter_map(|it| it.ty()).next().is_none();
382
383    match no_bounds {
384        true => Some(SyntaxError::new(
385            "At least one trait is required for an object type",
386            ty.syntax().text_range(),
387        )),
388        false => None,
389    }
390}
391
392// FIXME: This is not a validation error, this is a context dependent parse error
393fn validate_trait_object_ty_plus(ty: ast::DynTraitType) -> Option<SyntaxError> {
394    let dyn_token = ty.dyn_token()?;
395    let preceding_token = algo::skip_trivia_token(dyn_token.prev_token()?, Direction::Prev)?;
396    let tbl = ty.type_bound_list()?;
397    let more_than_one_bound = tbl.bounds().next_tuple::<(_, _)>().is_some();
398
399    if more_than_one_bound && !matches!(preceding_token.kind(), T!['('] | T![<] | T![=]) {
400        Some(SyntaxError::new("ambiguous `+` in a type", ty.syntax().text_range()))
401    } else {
402        None
403    }
404}
405
406// FIXME: This is not a validation error, this is a context dependent parse error
407fn validate_impl_object_ty_plus(ty: ast::ImplTraitType) -> Option<SyntaxError> {
408    let dyn_token = ty.impl_token()?;
409    let preceding_token = algo::skip_trivia_token(dyn_token.prev_token()?, Direction::Prev)?;
410    let tbl = ty.type_bound_list()?;
411    let more_than_one_bound = tbl.bounds().next_tuple::<(_, _)>().is_some();
412
413    if more_than_one_bound && !matches!(preceding_token.kind(), T!['('] | T![<] | T![=]) {
414        Some(SyntaxError::new("ambiguous `+` in a type", ty.syntax().text_range()))
415    } else {
416        None
417    }
418}
419
420fn validate_macro_rules(mac: ast::MacroRules, errors: &mut Vec<SyntaxError>) {
421    if let Some(vis) = mac.visibility() {
422        errors.push(SyntaxError::new(
423            "visibilities are not allowed on `macro_rules!` items",
424            vis.syntax().text_range(),
425        ));
426    }
427}
428
429fn validate_const(const_: ast::Const, errors: &mut Vec<SyntaxError>) {
430    if let Some(mut_token) = const_
431        .const_token()
432        .and_then(|t| t.next_token())
433        .and_then(|t| algo::skip_trivia_token(t, Direction::Next))
434        .filter(|t| t.kind() == T![mut])
435    {
436        errors.push(SyntaxError::new("const globals cannot be mutable", mut_token.text_range()));
437    }
438}
439
440fn validate_let_expr(let_: ast::LetExpr, errors: &mut Vec<SyntaxError>) {
441    let mut token = let_.syntax().clone();
442    loop {
443        token = match token.parent() {
444            Some(it) => it,
445            None => break,
446        };
447
448        if ast::ParenExpr::can_cast(token.kind()) {
449            continue;
450        } else if let Some(it) = ast::BinExpr::cast(token.clone()) {
451            if it.op_kind() == Some(ast::BinaryOp::LogicOp(ast::LogicOp::And)) {
452                continue;
453            }
454        } else if ast::IfExpr::can_cast(token.kind())
455            || ast::WhileExpr::can_cast(token.kind())
456            || ast::MatchGuard::can_cast(token.kind())
457        {
458            // It must be part of the condition since the expressions are inside a block.
459            return;
460        }
461
462        break;
463    }
464    errors.push(SyntaxError::new(
465        "`let` expressions are not supported here",
466        let_.syntax().text_range(),
467    ));
468}