ide/
syntax_highlighting.rs

1pub(crate) mod tags;
2
3mod highlights;
4
5mod escape;
6mod format;
7mod highlight;
8mod inject;
9
10mod html;
11#[cfg(test)]
12mod tests;
13
14use std::ops::ControlFlow;
15
16use either::Either;
17use hir::{DefWithBody, EditionedFileId, InFile, InRealFile, MacroKind, Semantics};
18use ide_db::{FxHashMap, FxHashSet, MiniCore, Ranker, RootDatabase, SymbolKind};
19use syntax::{
20    AstNode, AstToken, NodeOrToken,
21    SyntaxKind::*,
22    SyntaxNode, SyntaxToken, T, TextRange, WalkEvent,
23    ast::{self, IsString},
24};
25
26use crate::{
27    FileId, HlMod, HlOperator, HlPunct, HlTag,
28    syntax_highlighting::{
29        escape::{highlight_escape_byte, highlight_escape_char, highlight_escape_string},
30        format::highlight_format_string,
31        highlights::Highlights,
32        tags::Highlight,
33    },
34};
35
36pub(crate) use html::highlight_as_html;
37pub(crate) use html::highlight_as_html_with_config;
38
39#[derive(Debug, Clone, Copy)]
40pub struct HlRange {
41    pub range: TextRange,
42    pub highlight: Highlight,
43    pub binding_hash: Option<u64>,
44}
45
46#[derive(Copy, Clone, Debug)]
47pub struct HighlightConfig<'a> {
48    /// Whether to highlight strings
49    pub strings: bool,
50    /// Whether to highlight comments
51    pub comments: bool,
52    /// Whether to highlight punctuation
53    pub punctuation: bool,
54    /// Whether to specialize punctuation highlights
55    pub specialize_punctuation: bool,
56    /// Whether to highlight operator
57    pub operator: bool,
58    /// Whether to specialize operator highlights
59    pub specialize_operator: bool,
60    /// Whether to inject highlights into doc comments
61    pub inject_doc_comment: bool,
62    /// Whether to highlight the macro call bang
63    pub macro_bang: bool,
64    /// Whether to highlight unresolved things be their syntax
65    pub syntactic_name_ref_highlighting: bool,
66    pub minicore: MiniCore<'a>,
67}
68
69// Feature: Semantic Syntax Highlighting
70//
71// rust-analyzer highlights the code semantically.
72// For example, `Bar` in `foo::Bar` might be colored differently depending on whether `Bar` is an enum or a trait.
73// rust-analyzer does not specify colors directly, instead it assigns a tag (like `struct`) and a set of modifiers (like `declaration`) to each token.
74// It's up to the client to map those to specific colors.
75//
76// The general rule is that a reference to an entity gets colored the same way as the entity itself.
77// We also give special modifier for `mut` and `&mut` local variables.
78//
79//
80// #### Token Tags
81//
82// Rust-analyzer currently emits the following token tags:
83//
84// - For items:
85//
86// |           |                                |
87// |-----------|--------------------------------|
88// | attribute |  Emitted for attribute macros. |
89// |enum| Emitted for enums. |
90// |function| Emitted for free-standing functions. |
91// |derive| Emitted for derive macros. |
92// |macro| Emitted for function-like macros. |
93// |method| Emitted for associated functions, also knowns as methods. |
94// |namespace| Emitted for modules. |
95// |struct| Emitted for structs.|
96// |trait| Emitted for traits.|
97// |typeAlias| Emitted for type aliases and `Self` in `impl`s.|
98// |union| Emitted for unions.|
99//
100// - For literals:
101//
102// |           |                                |
103// |-----------|--------------------------------|
104// | boolean|  Emitted for the boolean literals `true` and `false`.|
105// | character| Emitted for character literals.|
106// | number| Emitted for numeric literals.|
107// | string| Emitted for string literals.|
108// | escapeSequence| Emitted for escaped sequences inside strings like `\n`.|
109// | formatSpecifier| Emitted for format specifiers `{:?}` in `format!`-like macros.|
110//
111// - For operators:
112//
113// |           |                                |
114// |-----------|--------------------------------|
115// |operator| Emitted for general operators.|
116// |arithmetic| Emitted for the arithmetic operators `+`, `-`, `*`, `/`, `+=`, `-=`, `*=`, `/=`.|
117// |bitwise| Emitted for the bitwise operators `\|`, `&`, `!`, `^`, `\|=`, `&=`, `^=`.|
118// |comparison| Emitted for the comparison oerators `>`, `<`, `==`, `>=`, `<=`, `!=`.|
119// |logical| Emitted for the logical operators `\|\|`, `&&`, `!`.|
120//
121// - For punctuation:
122//
123// |           |                                |
124// |-----------|--------------------------------|
125// |punctuation| Emitted for general punctuation.|
126// |attributeBracket| Emitted for attribute invocation brackets, that is the `#[` and `]` tokens.|
127// |angle| Emitted for `<>` angle brackets.|
128// |brace| Emitted for `{}` braces.|
129// |bracket| Emitted for `[]` brackets.|
130// |parenthesis| Emitted for `()` parentheses.|
131// |colon| Emitted for the `:` token.|
132// |comma| Emitted for the `,` token.|
133// |dot| Emitted for the `.` token.|
134// |semi| Emitted for the `;` token.|
135// |macroBang| Emitted for the `!` token in macro calls.|
136//
137//-
138//
139// |           |                                |
140// |-----------|--------------------------------|
141// |builtinAttribute| Emitted for names to builtin attributes in attribute path, the `repr` in `#[repr(u8)]` for example.|
142// |builtinType| Emitted for builtin types like `u32`, `str` and `f32`.|
143// |comment| Emitted for comments.|
144// |constParameter| Emitted for const parameters.|
145// |deriveHelper| Emitted for derive helper attributes.|
146// |enumMember| Emitted for enum variants.|
147// |generic| Emitted for generic tokens that have no mapping.|
148// |keyword| Emitted for keywords.|
149// |label| Emitted for labels.|
150// |lifetime| Emitted for lifetimes.|
151// |parameter| Emitted for non-self function parameters.|
152// |property| Emitted for struct and union fields.|
153// |selfKeyword| Emitted for the self function parameter and self path-specifier.|
154// |selfTypeKeyword| Emitted for the Self type parameter.|
155// |toolModule| Emitted for tool modules.|
156// |typeParameter| Emitted for type parameters.|
157// |unresolvedReference| Emitted for unresolved references, names that rust-analyzer can't find the definition of.|
158// |variable| Emitted for locals, constants and statics.|
159//
160//
161// #### Token Modifiers
162//
163// Token modifiers allow to style some elements in the source code more precisely.
164//
165// Rust-analyzer currently emits the following token modifiers:
166//
167// |           |                                |
168// |-----------|--------------------------------|
169// |async| Emitted for async functions and the `async` and `await` keywords.|
170// |attribute| Emitted for tokens inside attributes.|
171// |callable| Emitted for locals whose types implements one of the `Fn*` traits.|
172// |constant| Emitted for const.|
173// |consuming| Emitted for locals that are being consumed when use in a function call.|
174// |controlFlow| Emitted for control-flow related tokens, this includes th `?` operator.|
175// |crateRoot| Emitted for crate names, like `serde` and `crate`.|
176// |declaration| Emitted for names of definitions, like `foo` in `fn foo(){}`.|
177// |defaultLibrary| Emitted for items from built-in crates (std, core, alloc, test and proc_macro).|
178// |documentation| Emitted for documentation comment.|
179// |injected| Emitted for doc-string injected highlighting like rust source blocks in documentation.|
180// |intraDocLink| Emitted for intra doc links in doc-string.|
181// |library| Emitted for items that are defined outside of the current crate.|
182// |macro|  Emitted for tokens inside macro call.|
183// |mutable| Emitted for mutable locals and statics as well as functions taking `&mut self`.|
184// |public| Emitted for items that are from the current crate and are `pub`.|
185// |reference| Emitted for locals behind a reference and functions taking `self` by reference.|
186// |static| Emitted for "static" functions, also known as functions that do not take a `self` param, as well as statics and consts.|
187// |trait| Emitted for associated trait item.|
188// |unsafe| Emitted for unsafe operations, like unsafe function calls, as well as the `unsafe` token.|
189//
190// ![Semantic Syntax Highlighting](https://user-images.githubusercontent.com/48062697/113164457-06cfb980-9239-11eb-819b-0f93e646acf8.png)
191// ![Semantic Syntax Highlighting](https://user-images.githubusercontent.com/48062697/113187625-f7f50100-9250-11eb-825e-91c58f236071.png)
192pub(crate) fn highlight(
193    db: &RootDatabase,
194    config: &HighlightConfig<'_>,
195    file_id: FileId,
196    range_to_highlight: Option<TextRange>,
197) -> Vec<HlRange> {
198    let _p = tracing::info_span!("highlight").entered();
199    let sema = Semantics::new(db);
200    let file_id = sema.attach_first_edition(file_id);
201
202    // Determine the root based on the given range.
203    let (root, range_to_highlight) = {
204        let file = sema.parse(file_id);
205        let source_file = file.syntax();
206        match range_to_highlight {
207            Some(range) => {
208                let node = match source_file.covering_element(range) {
209                    NodeOrToken::Node(it) => it,
210                    NodeOrToken::Token(it) => it.parent().unwrap_or_else(|| source_file.clone()),
211                };
212                (node, range)
213            }
214            None => (source_file.clone(), source_file.text_range()),
215        }
216    };
217
218    let mut hl = highlights::Highlights::new(root.text_range());
219    let krate = sema.scope(&root).map(|it| it.krate());
220    traverse(&mut hl, &sema, config, InRealFile::new(file_id, &root), krate, range_to_highlight);
221    hl.to_vec()
222}
223
224fn traverse(
225    hl: &mut Highlights,
226    sema: &Semantics<'_, RootDatabase>,
227    config: &HighlightConfig<'_>,
228    InRealFile { file_id, value: root }: InRealFile<&SyntaxNode>,
229    krate: Option<hir::Crate>,
230    range_to_highlight: TextRange,
231) {
232    let is_unlinked = sema.file_to_module_def(file_id.file_id(sema.db)).is_none();
233
234    enum AttrOrDerive {
235        Attr(ast::Item),
236        Derive(ast::Item),
237    }
238
239    impl AttrOrDerive {
240        fn item(&self) -> &ast::Item {
241            match self {
242                AttrOrDerive::Attr(item) | AttrOrDerive::Derive(item) => item,
243            }
244        }
245    }
246
247    let empty = FxHashSet::default();
248
249    // FIXME: accommodate range highlighting
250    let mut tt_level = 0;
251    // FIXME: accommodate range highlighting
252    let mut attr_or_derive_item = None;
253
254    // FIXME: these are not perfectly accurate, we determine them by the real file's syntax tree
255    // an attribute nested in a macro call will not emit `inside_attribute`
256    let mut inside_attribute = false;
257
258    // FIXME: accommodate range highlighting
259    let mut body_stack: Vec<Option<DefWithBody>> = vec![];
260    let mut per_body_cache: FxHashMap<DefWithBody, FxHashSet<_>> = FxHashMap::default();
261
262    // Walk all nodes, keeping track of whether we are inside a macro or not.
263    // If in macro, expand it first and highlight the expanded code.
264    let mut preorder = root.preorder_with_tokens();
265    while let Some(event) = preorder.next() {
266        use WalkEvent::{Enter, Leave};
267
268        let range = match &event {
269            Enter(it) | Leave(it) => it.text_range(),
270        };
271
272        // Element outside of the viewport, no need to highlight
273        if range_to_highlight.intersect(range).is_none() {
274            continue;
275        }
276
277        match event.clone() {
278            Enter(NodeOrToken::Node(node)) if ast::TokenTree::can_cast(node.kind()) => {
279                tt_level += 1;
280            }
281            Leave(NodeOrToken::Node(node)) if ast::TokenTree::can_cast(node.kind()) => {
282                tt_level -= 1;
283            }
284            Enter(NodeOrToken::Node(node)) if ast::Attr::can_cast(node.kind()) => {
285                inside_attribute = true
286            }
287            Leave(NodeOrToken::Node(node)) if ast::Attr::can_cast(node.kind()) => {
288                inside_attribute = false
289            }
290            Enter(NodeOrToken::Node(node)) => {
291                if let Some(item) = <Either<ast::Item, ast::Variant>>::cast(node.clone()) {
292                    match item {
293                        Either::Left(item) => {
294                            match &item {
295                                ast::Item::Fn(it) => {
296                                    body_stack.push(sema.to_def(it).map(Into::into))
297                                }
298                                ast::Item::Const(it) => {
299                                    body_stack.push(sema.to_def(it).map(Into::into))
300                                }
301                                ast::Item::Static(it) => {
302                                    body_stack.push(sema.to_def(it).map(Into::into))
303                                }
304                                _ => (),
305                            }
306
307                            if attr_or_derive_item.is_none() {
308                                if sema.is_attr_macro_call(InFile::new(file_id.into(), &item)) {
309                                    attr_or_derive_item = Some(AttrOrDerive::Attr(item));
310                                } else {
311                                    let adt = match item {
312                                        ast::Item::Enum(it) => Some(ast::Adt::Enum(it)),
313                                        ast::Item::Struct(it) => Some(ast::Adt::Struct(it)),
314                                        ast::Item::Union(it) => Some(ast::Adt::Union(it)),
315                                        _ => None,
316                                    };
317                                    match adt {
318                                        Some(adt)
319                                            if sema.is_derive_annotated(InFile::new(
320                                                file_id.into(),
321                                                &adt,
322                                            )) =>
323                                        {
324                                            attr_or_derive_item =
325                                                Some(AttrOrDerive::Derive(ast::Item::from(adt)));
326                                        }
327                                        _ => (),
328                                    }
329                                }
330                            }
331                        }
332                        Either::Right(it) => body_stack.push(sema.to_def(&it).map(Into::into)),
333                    }
334                }
335            }
336            Leave(NodeOrToken::Node(node))
337                if <Either<ast::Item, ast::Variant>>::can_cast(node.kind()) =>
338            {
339                match ast::Item::cast(node.clone()) {
340                    Some(item) => {
341                        if attr_or_derive_item.as_ref().is_some_and(|it| *it.item() == item) {
342                            attr_or_derive_item = None;
343                        }
344                        if matches!(
345                            item,
346                            ast::Item::Fn(_) | ast::Item::Const(_) | ast::Item::Static(_)
347                        ) {
348                            body_stack.pop();
349                        }
350                    }
351                    None => _ = body_stack.pop(),
352                }
353            }
354            _ => (),
355        }
356
357        let element = match event {
358            Enter(NodeOrToken::Token(tok)) if tok.kind() == WHITESPACE => continue,
359            Enter(it) => it,
360            Leave(NodeOrToken::Token(_)) => continue,
361            Leave(NodeOrToken::Node(node)) => {
362                if config.inject_doc_comment {
363                    // Doc comment highlighting injection, we do this when leaving the node
364                    // so that we overwrite the highlighting of the doc comment itself.
365                    inject::doc_comment(hl, sema, config, file_id, &node);
366                }
367                continue;
368            }
369        };
370
371        let element = match element.clone() {
372            NodeOrToken::Node(n) => match ast::NameLike::cast(n) {
373                Some(n) => NodeOrToken::Node(n),
374                None => continue,
375            },
376            NodeOrToken::Token(t) => NodeOrToken::Token(t),
377        };
378        let original_token = element.as_token().cloned();
379
380        // Descending tokens into macros is expensive even if no descending occurs, so make sure
381        // that we actually are in a position where descending is possible.
382        let in_macro = tt_level > 0
383            || match attr_or_derive_item {
384                Some(AttrOrDerive::Attr(_)) => true,
385                Some(AttrOrDerive::Derive(_)) => inside_attribute,
386                None => false,
387            };
388
389        let (descended_element, current_body) = match element {
390            // Attempt to descend tokens into macro-calls.
391            NodeOrToken::Token(token) if in_macro => {
392                let descended = descend_token(sema, InRealFile::new(file_id, token));
393                let body = match &descended.value {
394                    NodeOrToken::Node(n) => {
395                        sema.body_for(InFile::new(descended.file_id, n.syntax()))
396                    }
397                    NodeOrToken::Token(t) => {
398                        t.parent().and_then(|it| sema.body_for(InFile::new(descended.file_id, &it)))
399                    }
400                };
401                (descended, body)
402            }
403            n => (InFile::new(file_id.into(), n), body_stack.last().copied().flatten()),
404        };
405        // string highlight injections
406        if let (Some(original_token), Some(descended_token)) =
407            (original_token, descended_element.value.as_token())
408        {
409            let control_flow = string_injections(
410                hl,
411                sema,
412                config,
413                file_id,
414                krate,
415                original_token,
416                descended_token,
417            );
418            if control_flow.is_break() {
419                continue;
420            }
421        }
422
423        let edition = descended_element.file_id.edition(sema.db);
424        let unsafe_ops = match current_body {
425            Some(current_body) => per_body_cache
426                .entry(current_body)
427                .or_insert_with(|| sema.get_unsafe_ops(current_body)),
428            None => &empty,
429        };
430        let is_unsafe_node =
431            |node| unsafe_ops.contains(&InFile::new(descended_element.file_id, node));
432        let element = match descended_element.value {
433            NodeOrToken::Node(name_like) => {
434                let hl = highlight::name_like(
435                    sema,
436                    krate,
437                    &is_unsafe_node,
438                    config.syntactic_name_ref_highlighting,
439                    name_like,
440                    edition,
441                );
442                if hl.is_some() && !in_macro {
443                    // skip highlighting the contained token of our name-like node
444                    // as that would potentially overwrite our result
445                    preorder.skip_subtree();
446                }
447                hl
448            }
449            NodeOrToken::Token(token) => {
450                highlight::token(sema, token, edition, &is_unsafe_node, tt_level > 0)
451                    .zip(Some(None))
452            }
453        };
454        if let Some((mut highlight, binding_hash)) = element {
455            if is_unlinked && highlight.tag == HlTag::UnresolvedReference {
456                // do not emit unresolved references if the file is unlinked
457                // let the editor do its highlighting for these tokens instead
458                continue;
459            }
460
461            // apply config filtering
462            if !filter_by_config(&mut highlight, config) {
463                continue;
464            }
465
466            if inside_attribute {
467                highlight |= HlMod::Attribute
468            }
469            if let Some(m) = descended_element.file_id.macro_file() {
470                if let MacroKind::ProcMacro | MacroKind::Attr | MacroKind::Derive = m.kind(sema.db)
471                {
472                    highlight |= HlMod::ProcMacro
473                }
474                highlight |= HlMod::Macro
475            }
476
477            hl.add(HlRange { range, highlight, binding_hash });
478        }
479    }
480}
481
482fn string_injections(
483    hl: &mut Highlights,
484    sema: &Semantics<'_, RootDatabase>,
485    config: &HighlightConfig<'_>,
486    file_id: EditionedFileId,
487    krate: Option<hir::Crate>,
488    token: SyntaxToken,
489    descended_token: &SyntaxToken,
490) -> ControlFlow<()> {
491    if !matches!(token.kind(), STRING | BYTE_STRING | BYTE | CHAR | C_STRING) {
492        return ControlFlow::Continue(());
493    }
494    if let Some(string) = ast::String::cast(token.clone()) {
495        if let Some(descended_string) = ast::String::cast(descended_token.clone()) {
496            if string.is_raw()
497                && inject::ra_fixture(hl, sema, config, &string, &descended_string).is_some()
498            {
499                return ControlFlow::Break(());
500            }
501            highlight_format_string(
502                hl,
503                sema,
504                krate,
505                &string,
506                &descended_string,
507                file_id.edition(sema.db),
508            );
509
510            if !string.is_raw() {
511                highlight_escape_string(hl, config, &string);
512            }
513        }
514    } else if let Some(byte_string) = ast::ByteString::cast(token.clone()) {
515        if !byte_string.is_raw() {
516            highlight_escape_string(hl, config, &byte_string);
517        }
518    } else if let Some(c_string) = ast::CString::cast(token.clone()) {
519        if !c_string.is_raw() {
520            highlight_escape_string(hl, config, &c_string);
521        }
522    } else if let Some(char) = ast::Char::cast(token.clone()) {
523        highlight_escape_char(hl, config, &char)
524    } else if let Some(byte) = ast::Byte::cast(token) {
525        highlight_escape_byte(hl, config, &byte)
526    }
527    ControlFlow::Continue(())
528}
529
530fn descend_token(
531    sema: &Semantics<'_, RootDatabase>,
532    token: InRealFile<SyntaxToken>,
533) -> InFile<NodeOrToken<ast::NameLike, SyntaxToken>> {
534    if token.value.kind() == COMMENT {
535        return token.map(NodeOrToken::Token).into();
536    }
537    let ranker = Ranker::from_token(&token.value);
538
539    let mut t = None;
540    let mut r = 0;
541    sema.descend_into_macros_breakable(token.clone().into(), |tok, _ctx| {
542        // FIXME: Consider checking ctx transparency for being opaque?
543        let my_rank = ranker.rank_token(&tok.value);
544
545        if my_rank >= Ranker::MAX_RANK {
546            // a rank of 0b1110 means that we have found a maximally interesting
547            // token so stop early.
548            t = Some(tok);
549            return ControlFlow::Break(());
550        }
551
552        // r = r.max(my_rank);
553        // t = Some(t.take_if(|_| r < my_rank).unwrap_or(tok));
554        match &mut t {
555            Some(prev) if r < my_rank => {
556                *prev = tok;
557                r = my_rank;
558            }
559            Some(_) => (),
560            None => {
561                r = my_rank;
562                t = Some(tok)
563            }
564        }
565        ControlFlow::Continue(())
566    });
567
568    let token = t.unwrap_or_else(|| token.into());
569    token.map(|token| match token.parent().and_then(ast::NameLike::cast) {
570        // Remap the token into the wrapping single token nodes
571        Some(parent) => match (token.kind(), parent.syntax().kind()) {
572            (T![ident] | T![self], NAME)
573            | (T![ident] | T![self] | T![super] | T![crate] | T![Self], NAME_REF)
574            | (INT_NUMBER, NAME_REF)
575            | (LIFETIME_IDENT, LIFETIME) => NodeOrToken::Node(parent),
576            _ => NodeOrToken::Token(token),
577        },
578        None => NodeOrToken::Token(token),
579    })
580}
581
582fn filter_by_config(highlight: &mut Highlight, config: &HighlightConfig<'_>) -> bool {
583    match &mut highlight.tag {
584        HlTag::StringLiteral | HlTag::EscapeSequence | HlTag::InvalidEscapeSequence
585            if !config.strings =>
586        {
587            return false;
588        }
589        HlTag::Comment if !config.comments => return false,
590        // If punctuation is disabled, make the macro bang part of the macro call again.
591        tag @ HlTag::Punctuation(HlPunct::MacroBang) => {
592            if !config.macro_bang {
593                *tag = HlTag::Symbol(SymbolKind::Macro);
594            } else if !config.specialize_punctuation {
595                *tag = HlTag::Punctuation(HlPunct::Other);
596            }
597        }
598        HlTag::Punctuation(_) if !config.punctuation && highlight.mods.is_empty() => return false,
599        tag @ HlTag::Punctuation(_) if !config.specialize_punctuation => {
600            *tag = HlTag::Punctuation(HlPunct::Other);
601        }
602        HlTag::Operator(_) if !config.operator && highlight.mods.is_empty() => return false,
603        tag @ HlTag::Operator(_) if !config.specialize_operator => {
604            *tag = HlTag::Operator(HlOperator::Other);
605        }
606        _ => (),
607    }
608    true
609}