ide/
syntax_highlighting.rs

1pub(crate) mod tags;
2
3mod highlights;
4mod injector;
5
6mod escape;
7mod format;
8mod highlight;
9mod inject;
10
11mod html;
12#[cfg(test)]
13mod tests;
14
15use std::ops::ControlFlow;
16
17use either::Either;
18use hir::{DefWithBody, EditionedFileId, InFile, InRealFile, MacroKind, Name, Semantics};
19use ide_db::{FxHashMap, FxHashSet, Ranker, RootDatabase, SymbolKind, base_db::salsa};
20use syntax::{
21    AstNode, AstToken, NodeOrToken,
22    SyntaxKind::*,
23    SyntaxNode, SyntaxToken, T, TextRange, WalkEvent,
24    ast::{self, IsString},
25};
26
27use crate::{
28    FileId, HlMod, HlOperator, HlPunct, HlTag,
29    syntax_highlighting::{
30        escape::{highlight_escape_byte, highlight_escape_char, highlight_escape_string},
31        format::highlight_format_string,
32        highlights::Highlights,
33        tags::Highlight,
34    },
35};
36
37pub(crate) use html::highlight_as_html;
38
39#[derive(Debug, Clone, Copy)]
40pub struct HlRange {
41    pub range: TextRange,
42    pub highlight: Highlight,
43    pub binding_hash: Option<u64>,
44}
45
46#[derive(Copy, Clone, Debug, PartialEq, Eq)]
47pub struct HighlightConfig {
48    /// Whether to highlight strings
49    pub strings: bool,
50    /// Whether to highlight punctuation
51    pub punctuation: bool,
52    /// Whether to specialize punctuation highlights
53    pub specialize_punctuation: bool,
54    /// Whether to highlight operator
55    pub operator: bool,
56    /// Whether to specialize operator highlights
57    pub specialize_operator: bool,
58    /// Whether to inject highlights into doc comments
59    pub inject_doc_comment: bool,
60    /// Whether to highlight the macro call bang
61    pub macro_bang: bool,
62    /// Whether to highlight unresolved things be their syntax
63    pub syntactic_name_ref_highlighting: bool,
64}
65
66// Feature: Semantic Syntax Highlighting
67//
68// rust-analyzer highlights the code semantically.
69// For example, `Bar` in `foo::Bar` might be colored differently depending on whether `Bar` is an enum or a trait.
70// rust-analyzer does not specify colors directly, instead it assigns a tag (like `struct`) and a set of modifiers (like `declaration`) to each token.
71// It's up to the client to map those to specific colors.
72//
73// The general rule is that a reference to an entity gets colored the same way as the entity itself.
74// We also give special modifier for `mut` and `&mut` local variables.
75//
76//
77// #### Token Tags
78//
79// Rust-analyzer currently emits the following token tags:
80//
81// - For items:
82//
83// |           |                                |
84// |-----------|--------------------------------|
85// | attribute |  Emitted for attribute macros. |
86// |enum| Emitted for enums. |
87// |function| Emitted for free-standing functions. |
88// |derive| Emitted for derive macros. |
89// |macro| Emitted for function-like macros. |
90// |method| Emitted for associated functions, also knowns as methods. |
91// |namespace| Emitted for modules. |
92// |struct| Emitted for structs.|
93// |trait| Emitted for traits.|
94// |typeAlias| Emitted for type aliases and `Self` in `impl`s.|
95// |union| Emitted for unions.|
96//
97// - For literals:
98//
99// |           |                                |
100// |-----------|--------------------------------|
101// | boolean|  Emitted for the boolean literals `true` and `false`.|
102// | character| Emitted for character literals.|
103// | number| Emitted for numeric literals.|
104// | string| Emitted for string literals.|
105// | escapeSequence| Emitted for escaped sequences inside strings like `\n`.|
106// | formatSpecifier| Emitted for format specifiers `{:?}` in `format!`-like macros.|
107//
108// - For operators:
109//
110// |           |                                |
111// |-----------|--------------------------------|
112// |operator| Emitted for general operators.|
113// |arithmetic| Emitted for the arithmetic operators `+`, `-`, `*`, `/`, `+=`, `-=`, `*=`, `/=`.|
114// |bitwise| Emitted for the bitwise operators `|`, `&`, `!`, `^`, `|=`, `&=`, `^=`.|
115// |comparison| Emitted for the comparison oerators `>`, `<`, `==`, `>=`, `<=`, `!=`.|
116// |logical| Emitted for the logical operatos `||`, `&&`, `!`.|
117//
118// - For punctuation:
119//
120// |           |                                |
121// |-----------|--------------------------------|
122// |punctuation| Emitted for general punctuation.|
123// |attributeBracket| Emitted for attribute invocation brackets, that is the `#[` and `]` tokens.|
124// |angle| Emitted for `<>` angle brackets.|
125// |brace| Emitted for `{}` braces.|
126// |bracket| Emitted for `[]` brackets.|
127// |parenthesis| Emitted for `()` parentheses.|
128// |colon| Emitted for the `:` token.|
129// |comma| Emitted for the `,` token.|
130// |dot| Emitted for the `.` token.|
131// |semi| Emitted for the `;` token.|
132// |macroBang| Emitted for the `!` token in macro calls.|
133//
134//-
135//
136// |           |                                |
137// |-----------|--------------------------------|
138// |builtinAttribute| Emitted for names to builtin attributes in attribute path, the `repr` in `#[repr(u8)]` for example.|
139// |builtinType| Emitted for builtin types like `u32`, `str` and `f32`.|
140// |comment| Emitted for comments.|
141// |constParameter| Emitted for const parameters.|
142// |deriveHelper| Emitted for derive helper attributes.|
143// |enumMember| Emitted for enum variants.|
144// |generic| Emitted for generic tokens that have no mapping.|
145// |keyword| Emitted for keywords.|
146// |label| Emitted for labels.|
147// |lifetime| Emitted for lifetimes.|
148// |parameter| Emitted for non-self function parameters.|
149// |property| Emitted for struct and union fields.|
150// |selfKeyword| Emitted for the self function parameter and self path-specifier.|
151// |selfTypeKeyword| Emitted for the Self type parameter.|
152// |toolModule| Emitted for tool modules.|
153// |typeParameter| Emitted for type parameters.|
154// |unresolvedReference| Emitted for unresolved references, names that rust-analyzer can't find the definition of.|
155// |variable| Emitted for locals, constants and statics.|
156//
157//
158// #### Token Modifiers
159//
160// Token modifiers allow to style some elements in the source code more precisely.
161//
162// Rust-analyzer currently emits the following token modifiers:
163//
164// |           |                                |
165// |-----------|--------------------------------|
166// |async| Emitted for async functions and the `async` and `await` keywords.|
167// |attribute| Emitted for tokens inside attributes.|
168// |callable| Emitted for locals whose types implements one of the `Fn*` traits.|
169// |constant| Emitted for const.|
170// |consuming| Emitted for locals that are being consumed when use in a function call.|
171// |controlFlow| Emitted for control-flow related tokens, this includes th `?` operator.|
172// |crateRoot| Emitted for crate names, like `serde` and `crate.|
173// |declaration| Emitted for names of definitions, like `foo` in `fn foo(){}`.|
174// |defaultLibrary| Emitted for items from built-in crates (std, core, allc, test and proc_macro).|
175// |documentation| Emitted for documentation comment.|
176// |injected| Emitted for doc-string injected highlighting like rust source blocks in documentation.|
177// |intraDocLink| Emitted for intra doc links in doc-string.|
178// |library| Emitted for items that are defined outside of the current crae.|
179// |macro|  Emitted for tokens inside macro call.|
180// |mutable| Emitted for mutable locals and statics as well as functions taking `&mut self`.|
181// |public| Emitted for items that are from the current crate and are `pub.|
182// |reference| Emitted for locals behind a reference and functions taking self` by reference.|
183// |static| Emitted for "static" functions, also known as functions that d not take a `self` param, as well as statics and consts.|
184// |trait| Emitted for associated trait item.|
185// |unsafe| Emitted for unsafe operations, like unsafe function calls, as ell as the `unsafe` token.|
186//
187// ![Semantic Syntax Highlighting](https://user-images.githubusercontent.com/48062697/113164457-06cfb980-9239-11eb-819b-0f93e646acf8.png)
188// ![Semantic Syntax Highlighting](https://user-images.githubusercontent.com/48062697/113187625-f7f50100-9250-11eb-825e-91c58f236071.png)
189pub(crate) fn highlight(
190    db: &RootDatabase,
191    config: HighlightConfig,
192    file_id: FileId,
193    range_to_highlight: Option<TextRange>,
194) -> Vec<HlRange> {
195    let _p = tracing::info_span!("highlight").entered();
196    let sema = Semantics::new(db);
197    let file_id = sema
198        .attach_first_edition(file_id)
199        .unwrap_or_else(|| EditionedFileId::current_edition(db, file_id));
200
201    // Determine the root based on the given range.
202    let (root, range_to_highlight) = {
203        let file = sema.parse(file_id);
204        let source_file = file.syntax();
205        match range_to_highlight {
206            Some(range) => {
207                let node = match source_file.covering_element(range) {
208                    NodeOrToken::Node(it) => it,
209                    NodeOrToken::Token(it) => it.parent().unwrap_or_else(|| source_file.clone()),
210                };
211                (node, range)
212            }
213            None => (source_file.clone(), source_file.text_range()),
214        }
215    };
216
217    let mut hl = highlights::Highlights::new(root.text_range());
218    let krate = sema.scope(&root).map(|it| it.krate());
219    traverse(&mut hl, &sema, config, InRealFile::new(file_id, &root), krate, range_to_highlight);
220    hl.to_vec()
221}
222
223fn traverse(
224    hl: &mut Highlights,
225    sema: &Semantics<'_, RootDatabase>,
226    config: HighlightConfig,
227    InRealFile { file_id, value: root }: InRealFile<&SyntaxNode>,
228    krate: Option<hir::Crate>,
229    range_to_highlight: TextRange,
230) {
231    let is_unlinked = sema.file_to_module_def(file_id.file_id(sema.db)).is_none();
232
233    enum AttrOrDerive {
234        Attr(ast::Item),
235        Derive(ast::Item),
236    }
237
238    impl AttrOrDerive {
239        fn item(&self) -> &ast::Item {
240            match self {
241                AttrOrDerive::Attr(item) | AttrOrDerive::Derive(item) => item,
242            }
243        }
244    }
245
246    let empty = FxHashSet::default();
247
248    // FIXME: accommodate range highlighting
249    let mut tt_level = 0;
250    // FIXME: accommodate range highlighting
251    let mut attr_or_derive_item = None;
252
253    // FIXME: these are not perfectly accurate, we determine them by the real file's syntax tree
254    // an attribute nested in a macro call will not emit `inside_attribute`
255    let mut inside_attribute = false;
256
257    // FIXME: accommodate range highlighting
258    let mut body_stack: Vec<Option<DefWithBody>> = vec![];
259    let mut per_body_cache: FxHashMap<DefWithBody, (FxHashSet<_>, FxHashMap<Name, u32>)> =
260        FxHashMap::default();
261
262    // Walk all nodes, keeping track of whether we are inside a macro or not.
263    // If in macro, expand it first and highlight the expanded code.
264    let mut preorder = root.preorder_with_tokens();
265    while let Some(event) = preorder.next() {
266        use WalkEvent::{Enter, Leave};
267
268        let range = match &event {
269            Enter(it) | Leave(it) => it.text_range(),
270        };
271
272        // Element outside of the viewport, no need to highlight
273        if range_to_highlight.intersect(range).is_none() {
274            continue;
275        }
276
277        match event.clone() {
278            Enter(NodeOrToken::Node(node)) if ast::TokenTree::can_cast(node.kind()) => {
279                tt_level += 1;
280            }
281            Leave(NodeOrToken::Node(node)) if ast::TokenTree::can_cast(node.kind()) => {
282                tt_level -= 1;
283            }
284            Enter(NodeOrToken::Node(node)) if ast::Attr::can_cast(node.kind()) => {
285                inside_attribute = true
286            }
287            Leave(NodeOrToken::Node(node)) if ast::Attr::can_cast(node.kind()) => {
288                inside_attribute = false
289            }
290            Enter(NodeOrToken::Node(node)) => {
291                if let Some(item) = <Either<ast::Item, ast::Variant>>::cast(node.clone()) {
292                    match item {
293                        Either::Left(item) => {
294                            match &item {
295                                ast::Item::Fn(it) => {
296                                    body_stack.push(sema.to_def(it).map(Into::into))
297                                }
298                                ast::Item::Const(it) => {
299                                    body_stack.push(sema.to_def(it).map(Into::into))
300                                }
301                                ast::Item::Static(it) => {
302                                    body_stack.push(sema.to_def(it).map(Into::into))
303                                }
304                                _ => (),
305                            }
306
307                            if attr_or_derive_item.is_none() {
308                                if sema.is_attr_macro_call(InFile::new(file_id.into(), &item)) {
309                                    attr_or_derive_item = Some(AttrOrDerive::Attr(item));
310                                } else {
311                                    let adt = match item {
312                                        ast::Item::Enum(it) => Some(ast::Adt::Enum(it)),
313                                        ast::Item::Struct(it) => Some(ast::Adt::Struct(it)),
314                                        ast::Item::Union(it) => Some(ast::Adt::Union(it)),
315                                        _ => None,
316                                    };
317                                    match adt {
318                                        Some(adt)
319                                            if sema.is_derive_annotated(InFile::new(
320                                                file_id.into(),
321                                                &adt,
322                                            )) =>
323                                        {
324                                            attr_or_derive_item =
325                                                Some(AttrOrDerive::Derive(ast::Item::from(adt)));
326                                        }
327                                        _ => (),
328                                    }
329                                }
330                            }
331                        }
332                        Either::Right(it) => body_stack.push(sema.to_def(&it).map(Into::into)),
333                    }
334                }
335            }
336            Leave(NodeOrToken::Node(node))
337                if <Either<ast::Item, ast::Variant>>::can_cast(node.kind()) =>
338            {
339                match ast::Item::cast(node.clone()) {
340                    Some(item) => {
341                        if attr_or_derive_item.as_ref().is_some_and(|it| *it.item() == item) {
342                            attr_or_derive_item = None;
343                        }
344                        if matches!(
345                            item,
346                            ast::Item::Fn(_) | ast::Item::Const(_) | ast::Item::Static(_)
347                        ) {
348                            body_stack.pop();
349                        }
350                    }
351                    None => _ = body_stack.pop(),
352                }
353            }
354            _ => (),
355        }
356
357        let element = match event {
358            Enter(NodeOrToken::Token(tok)) if tok.kind() == WHITESPACE => continue,
359            Enter(it) => it,
360            Leave(NodeOrToken::Token(_)) => continue,
361            Leave(NodeOrToken::Node(node)) => {
362                if config.inject_doc_comment {
363                    // Doc comment highlighting injection, we do this when leaving the node
364                    // so that we overwrite the highlighting of the doc comment itself.
365                    inject::doc_comment(hl, sema, config, file_id, &node);
366                }
367                continue;
368            }
369        };
370
371        let element = match element.clone() {
372            NodeOrToken::Node(n) => match ast::NameLike::cast(n) {
373                Some(n) => NodeOrToken::Node(n),
374                None => continue,
375            },
376            NodeOrToken::Token(t) => NodeOrToken::Token(t),
377        };
378        let original_token = element.as_token().cloned();
379
380        // Descending tokens into macros is expensive even if no descending occurs, so make sure
381        // that we actually are in a position where descending is possible.
382        let in_macro = tt_level > 0
383            || match attr_or_derive_item {
384                Some(AttrOrDerive::Attr(_)) => true,
385                Some(AttrOrDerive::Derive(_)) => inside_attribute,
386                None => false,
387            };
388
389        let (descended_element, current_body) = match element {
390            // Attempt to descend tokens into macro-calls.
391            NodeOrToken::Token(token) if in_macro => {
392                let descended = descend_token(sema, InRealFile::new(file_id, token));
393                let body = match &descended.value {
394                    NodeOrToken::Node(n) => {
395                        sema.body_for(InFile::new(descended.file_id, n.syntax()))
396                    }
397                    NodeOrToken::Token(t) => {
398                        t.parent().and_then(|it| sema.body_for(InFile::new(descended.file_id, &it)))
399                    }
400                };
401                (descended, body)
402            }
403            n => (InFile::new(file_id.into(), n), body_stack.last().copied().flatten()),
404        };
405        // string highlight injections
406        if let (Some(original_token), Some(descended_token)) =
407            (original_token, descended_element.value.as_token())
408        {
409            let control_flow = string_injections(
410                hl,
411                sema,
412                config,
413                file_id,
414                krate,
415                original_token,
416                descended_token,
417            );
418            if control_flow.is_break() {
419                continue;
420            }
421        }
422
423        let edition = descended_element.file_id.edition(sema.db);
424        let (unsafe_ops, bindings_shadow_count) = match current_body {
425            Some(current_body) => {
426                let (ops, bindings) = per_body_cache
427                    .entry(current_body)
428                    .or_insert_with(|| (sema.get_unsafe_ops(current_body), Default::default()));
429                (&*ops, Some(bindings))
430            }
431            None => (&empty, None),
432        };
433        let is_unsafe_node =
434            |node| unsafe_ops.contains(&InFile::new(descended_element.file_id, node));
435        let element = match descended_element.value {
436            NodeOrToken::Node(name_like) => {
437                let hl = salsa::attach(sema.db, || {
438                    highlight::name_like(
439                        sema,
440                        krate,
441                        bindings_shadow_count,
442                        &is_unsafe_node,
443                        config.syntactic_name_ref_highlighting,
444                        name_like,
445                        edition,
446                    )
447                });
448                if hl.is_some() && !in_macro {
449                    // skip highlighting the contained token of our name-like node
450                    // as that would potentially overwrite our result
451                    preorder.skip_subtree();
452                }
453                hl
454            }
455            NodeOrToken::Token(token) => salsa::attach(sema.db, || {
456                highlight::token(sema, token, edition, &is_unsafe_node, tt_level > 0)
457                    .zip(Some(None))
458            }),
459        };
460        if let Some((mut highlight, binding_hash)) = element {
461            if is_unlinked && highlight.tag == HlTag::UnresolvedReference {
462                // do not emit unresolved references if the file is unlinked
463                // let the editor do its highlighting for these tokens instead
464                continue;
465            }
466
467            // apply config filtering
468            if !filter_by_config(&mut highlight, config) {
469                continue;
470            }
471
472            if inside_attribute {
473                highlight |= HlMod::Attribute
474            }
475            if let Some(m) = descended_element.file_id.macro_file() {
476                if let MacroKind::ProcMacro | MacroKind::Attr | MacroKind::Derive = m.kind(sema.db)
477                {
478                    highlight |= HlMod::ProcMacro
479                }
480                highlight |= HlMod::Macro
481            }
482
483            hl.add(HlRange { range, highlight, binding_hash });
484        }
485    }
486}
487
488fn string_injections(
489    hl: &mut Highlights,
490    sema: &Semantics<'_, RootDatabase>,
491    config: HighlightConfig,
492    file_id: EditionedFileId,
493    krate: Option<hir::Crate>,
494    token: SyntaxToken,
495    descended_token: &SyntaxToken,
496) -> ControlFlow<()> {
497    if !matches!(token.kind(), STRING | BYTE_STRING | BYTE | CHAR | C_STRING) {
498        return ControlFlow::Continue(());
499    }
500    if let Some(string) = ast::String::cast(token.clone()) {
501        if let Some(descended_string) = ast::String::cast(descended_token.clone()) {
502            if string.is_raw()
503                && inject::ra_fixture(hl, sema, config, &string, &descended_string).is_some()
504            {
505                return ControlFlow::Break(());
506            }
507            highlight_format_string(
508                hl,
509                sema,
510                krate,
511                &string,
512                &descended_string,
513                file_id.edition(sema.db),
514            );
515
516            if !string.is_raw() {
517                highlight_escape_string(hl, &string);
518            }
519        }
520    } else if let Some(byte_string) = ast::ByteString::cast(token.clone()) {
521        if !byte_string.is_raw() {
522            highlight_escape_string(hl, &byte_string);
523        }
524    } else if let Some(c_string) = ast::CString::cast(token.clone()) {
525        if !c_string.is_raw() {
526            highlight_escape_string(hl, &c_string);
527        }
528    } else if let Some(char) = ast::Char::cast(token.clone()) {
529        highlight_escape_char(hl, &char)
530    } else if let Some(byte) = ast::Byte::cast(token) {
531        highlight_escape_byte(hl, &byte)
532    }
533    ControlFlow::Continue(())
534}
535
536fn descend_token(
537    sema: &Semantics<'_, RootDatabase>,
538    token: InRealFile<SyntaxToken>,
539) -> InFile<NodeOrToken<ast::NameLike, SyntaxToken>> {
540    if token.value.kind() == COMMENT {
541        return token.map(NodeOrToken::Token).into();
542    }
543    let ranker = Ranker::from_token(&token.value);
544
545    let mut t = None;
546    let mut r = 0;
547    sema.descend_into_macros_breakable(token.clone().into(), |tok, _ctx| {
548        // FIXME: Consider checking ctx transparency for being opaque?
549        let my_rank = ranker.rank_token(&tok.value);
550
551        if my_rank >= Ranker::MAX_RANK {
552            // a rank of 0b1110 means that we have found a maximally interesting
553            // token so stop early.
554            t = Some(tok);
555            return ControlFlow::Break(());
556        }
557
558        // r = r.max(my_rank);
559        // t = Some(t.take_if(|_| r < my_rank).unwrap_or(tok));
560        match &mut t {
561            Some(prev) if r < my_rank => {
562                *prev = tok;
563                r = my_rank;
564            }
565            Some(_) => (),
566            None => {
567                r = my_rank;
568                t = Some(tok)
569            }
570        }
571        ControlFlow::Continue(())
572    });
573
574    let token = t.unwrap_or_else(|| token.into());
575    token.map(|token| match token.parent().and_then(ast::NameLike::cast) {
576        // Remap the token into the wrapping single token nodes
577        Some(parent) => match (token.kind(), parent.syntax().kind()) {
578            (T![ident] | T![self], NAME)
579            | (T![ident] | T![self] | T![super] | T![crate] | T![Self], NAME_REF)
580            | (INT_NUMBER, NAME_REF)
581            | (LIFETIME_IDENT, LIFETIME) => NodeOrToken::Node(parent),
582            _ => NodeOrToken::Token(token),
583        },
584        None => NodeOrToken::Token(token),
585    })
586}
587
588fn filter_by_config(highlight: &mut Highlight, config: HighlightConfig) -> bool {
589    match &mut highlight.tag {
590        HlTag::StringLiteral if !config.strings => return false,
591        // If punctuation is disabled, make the macro bang part of the macro call again.
592        tag @ HlTag::Punctuation(HlPunct::MacroBang) => {
593            if !config.macro_bang {
594                *tag = HlTag::Symbol(SymbolKind::Macro);
595            } else if !config.specialize_punctuation {
596                *tag = HlTag::Punctuation(HlPunct::Other);
597            }
598        }
599        HlTag::Punctuation(_) if !config.punctuation && highlight.mods.is_empty() => return false,
600        tag @ HlTag::Punctuation(_) if !config.specialize_punctuation => {
601            *tag = HlTag::Punctuation(HlPunct::Other);
602        }
603        HlTag::Operator(_) if !config.operator && highlight.mods.is_empty() => return false,
604        tag @ HlTag::Operator(_) if !config.specialize_operator => {
605            *tag = HlTag::Operator(HlOperator::Other);
606        }
607        _ => (),
608    }
609    true
610}