ide/
syntax_highlighting.rs

1pub(crate) mod tags;
2
3mod highlights;
4
5mod escape;
6mod format;
7mod highlight;
8mod inject;
9
10mod html;
11#[cfg(test)]
12mod tests;
13
14use std::ops::ControlFlow;
15
16use either::Either;
17use hir::{
18    DefWithBody, EditionedFileId, ExpressionStoreOwner, InFile, InRealFile, MacroKind, Semantics,
19};
20use ide_db::{FxHashMap, FxHashSet, Ranker, RootDatabase, SymbolKind, ra_fixture::RaFixtureConfig};
21use syntax::{
22    AstNode, AstToken, NodeOrToken,
23    SyntaxKind::*,
24    SyntaxNode, SyntaxToken, T, TextRange, WalkEvent,
25    ast::{self, IsString},
26};
27
28use crate::{
29    FileId, HlMod, HlOperator, HlPunct, HlTag,
30    syntax_highlighting::{
31        escape::{highlight_escape_byte, highlight_escape_char, highlight_escape_string},
32        format::highlight_format_string,
33        highlights::Highlights,
34        tags::Highlight,
35    },
36};
37
38pub(crate) use html::highlight_as_html;
39pub(crate) use html::highlight_as_html_with_config;
40
41#[derive(Debug, Clone, Copy)]
42pub struct HlRange {
43    pub range: TextRange,
44    pub highlight: Highlight,
45    pub binding_hash: Option<u64>,
46}
47
48#[derive(Copy, Clone, Debug)]
49pub struct HighlightConfig<'a> {
50    /// Whether to highlight strings
51    pub strings: bool,
52    /// Whether to highlight comments
53    pub comments: bool,
54    /// Whether to highlight punctuation
55    pub punctuation: bool,
56    /// Whether to specialize punctuation highlights
57    pub specialize_punctuation: bool,
58    /// Whether to highlight operator
59    pub operator: bool,
60    /// Whether to specialize operator highlights
61    pub specialize_operator: bool,
62    /// Whether to inject highlights into doc comments
63    pub inject_doc_comment: bool,
64    /// Whether to highlight the macro call bang
65    pub macro_bang: bool,
66    /// Whether to highlight unresolved things be their syntax
67    pub syntactic_name_ref_highlighting: bool,
68    pub ra_fixture: RaFixtureConfig<'a>,
69}
70
71// Feature: Semantic Syntax Highlighting
72//
73// rust-analyzer highlights the code semantically.
74// For example, `Bar` in `foo::Bar` might be colored differently depending on whether `Bar` is an enum or a trait.
75// rust-analyzer does not specify colors directly, instead it assigns a tag (like `struct`) and a set of modifiers (like `declaration`) to each token.
76// It's up to the client to map those to specific colors.
77//
78// The general rule is that a reference to an entity gets colored the same way as the entity itself.
79// We also give special modifier for `mut` and `&mut` local variables.
80//
81//
82// #### Token Tags
83//
84// Rust-analyzer currently emits the following token tags:
85//
86// - For items:
87//
88// |           |                                |
89// |-----------|--------------------------------|
90// | attribute |  Emitted for attribute macros. |
91// |enum| Emitted for enums. |
92// |function| Emitted for free-standing functions. |
93// |derive| Emitted for derive macros. |
94// |macro| Emitted for function-like macros. |
95// |method| Emitted for associated functions, also knowns as methods. |
96// |namespace| Emitted for modules. |
97// |struct| Emitted for structs.|
98// |trait| Emitted for traits.|
99// |typeAlias| Emitted for type aliases and `Self` in `impl`s.|
100// |union| Emitted for unions.|
101//
102// - For literals:
103//
104// |           |                                |
105// |-----------|--------------------------------|
106// | boolean|  Emitted for the boolean literals `true` and `false`.|
107// | character| Emitted for character literals.|
108// | number| Emitted for numeric literals.|
109// | string| Emitted for string literals.|
110// | escapeSequence| Emitted for escaped sequences inside strings like `\n`.|
111// | formatSpecifier| Emitted for format specifiers `{:?}` in `format!`-like macros.|
112//
113// - For operators:
114//
115// |           |                                |
116// |-----------|--------------------------------|
117// |operator| Emitted for general operators.|
118// |arithmetic| Emitted for the arithmetic operators `+`, `-`, `*`, `/`, `+=`, `-=`, `*=`, `/=`.|
119// |bitwise| Emitted for the bitwise operators `\|`, `&`, `!`, `^`, `\|=`, `&=`, `^=`.|
120// |comparison| Emitted for the comparison oerators `>`, `<`, `==`, `>=`, `<=`, `!=`.|
121// |logical| Emitted for the logical operators `\|\|`, `&&`, `!`.|
122//
123// - For punctuation:
124//
125// |           |                                |
126// |-----------|--------------------------------|
127// |punctuation| Emitted for general punctuation.|
128// |attributeBracket| Emitted for attribute invocation brackets, that is the `#[` and `]` tokens.|
129// |angle| Emitted for `<>` angle brackets.|
130// |brace| Emitted for `{}` braces.|
131// |bracket| Emitted for `[]` brackets.|
132// |parenthesis| Emitted for `()` parentheses.|
133// |colon| Emitted for the `:` token.|
134// |comma| Emitted for the `,` token.|
135// |dot| Emitted for the `.` token.|
136// |semi| Emitted for the `;` token.|
137// |macroBang| Emitted for the `!` token in macro calls.|
138//
139//-
140//
141// |           |                                |
142// |-----------|--------------------------------|
143// |builtinAttribute| Emitted for names to builtin attributes in attribute path, the `repr` in `#[repr(u8)]` for example.|
144// |builtinType| Emitted for builtin types like `u32`, `str` and `f32`.|
145// |comment| Emitted for comments.|
146// |constParameter| Emitted for const parameters.|
147// |deriveHelper| Emitted for derive helper attributes.|
148// |enumMember| Emitted for enum variants.|
149// |generic| Emitted for generic tokens that have no mapping.|
150// |keyword| Emitted for keywords.|
151// |label| Emitted for labels.|
152// |lifetime| Emitted for lifetimes.|
153// |parameter| Emitted for non-self function parameters.|
154// |property| Emitted for struct and union fields.|
155// |selfKeyword| Emitted for the self function parameter and self path-specifier.|
156// |selfTypeKeyword| Emitted for the Self type parameter.|
157// |toolModule| Emitted for tool modules.|
158// |typeParameter| Emitted for type parameters.|
159// |unresolvedReference| Emitted for unresolved references, names that rust-analyzer can't find the definition of.|
160// |variable| Emitted for locals, constants and statics.|
161//
162//
163// #### Token Modifiers
164//
165// Token modifiers allow to style some elements in the source code more precisely.
166//
167// Rust-analyzer currently emits the following token modifiers:
168//
169// |           |                                |
170// |-----------|--------------------------------|
171// |async| Emitted for async functions and the `async` and `await` keywords.|
172// |attribute| Emitted for tokens inside attributes.|
173// |callable| Emitted for locals whose types implements one of the `Fn*` traits.|
174// |constant| Emitted for const.|
175// |consuming| Emitted for locals that are being consumed when use in a function call.|
176// |controlFlow| Emitted for control-flow related tokens, this includes th `?` operator.|
177// |crateRoot| Emitted for crate names, like `serde` and `crate`.|
178// |declaration| Emitted for names of definitions, like `foo` in `fn foo(){}`.|
179// |defaultLibrary| Emitted for items from built-in crates (std, core, alloc, test and proc_macro).|
180// |documentation| Emitted for documentation comment.|
181// |injected| Emitted for doc-string injected highlighting like rust source blocks in documentation.|
182// |intraDocLink| Emitted for intra doc links in doc-string.|
183// |library| Emitted for items that are defined outside of the current crate.|
184// |macro|  Emitted for tokens inside macro call.|
185// |mutable| Emitted for mutable locals and statics as well as functions taking `&mut self`.|
186// |public| Emitted for items that are from the current crate and are `pub`.|
187// |reference| Emitted for locals behind a reference and functions taking `self` by reference.|
188// |static| Emitted for "static" functions, also known as functions that do not take a `self` param, as well as statics and consts.|
189// |trait| Emitted for associated trait item.|
190// |unsafe| Emitted for unsafe operations, like unsafe function calls, as well as the `unsafe` token.|
191//
192// ![Semantic Syntax Highlighting](https://user-images.githubusercontent.com/48062697/113164457-06cfb980-9239-11eb-819b-0f93e646acf8.png)
193// ![Semantic Syntax Highlighting](https://user-images.githubusercontent.com/48062697/113187625-f7f50100-9250-11eb-825e-91c58f236071.png)
194pub(crate) fn highlight(
195    db: &RootDatabase,
196    config: &HighlightConfig<'_>,
197    file_id: FileId,
198    range_to_highlight: Option<TextRange>,
199) -> Vec<HlRange> {
200    let _p = tracing::info_span!("highlight").entered();
201    let sema = Semantics::new(db);
202    let file_id = sema.attach_first_edition(file_id);
203
204    // Determine the root based on the given range.
205    let (root, range_to_highlight) = {
206        let file = sema.parse(file_id);
207        let source_file = file.syntax();
208        match range_to_highlight {
209            Some(range) => {
210                let node = match source_file.covering_element(range) {
211                    NodeOrToken::Node(it) => it,
212                    NodeOrToken::Token(it) => it.parent().unwrap_or_else(|| source_file.clone()),
213                };
214                (node, range)
215            }
216            None => (source_file.clone(), source_file.text_range()),
217        }
218    };
219
220    let mut hl = highlights::Highlights::new(root.text_range());
221    let krate = sema.scope(&root).map(|it| it.krate());
222    traverse(&mut hl, &sema, config, InRealFile::new(file_id, &root), krate, range_to_highlight);
223    hl.to_vec()
224}
225
226fn traverse(
227    hl: &mut Highlights,
228    sema: &Semantics<'_, RootDatabase>,
229    config: &HighlightConfig<'_>,
230    InRealFile { file_id, value: root }: InRealFile<&SyntaxNode>,
231    krate: Option<hir::Crate>,
232    range_to_highlight: TextRange,
233) {
234    let is_unlinked = sema.file_to_module_def(file_id.file_id(sema.db)).is_none();
235
236    enum AttrOrDerive {
237        Attr(ast::Item),
238        Derive(ast::Item),
239    }
240
241    impl AttrOrDerive {
242        fn item(&self) -> &ast::Item {
243            match self {
244                AttrOrDerive::Attr(item) | AttrOrDerive::Derive(item) => item,
245            }
246        }
247    }
248
249    let empty = FxHashSet::default();
250
251    // FIXME: accommodate range highlighting
252    let mut tt_level = 0;
253    // FIXME: accommodate range highlighting
254    let mut attr_or_derive_item = None;
255
256    // FIXME: these are not perfectly accurate, we determine them by the real file's syntax tree
257    // an attribute nested in a macro call will not emit `inside_attribute`
258    let mut inside_attribute = false;
259
260    // FIXME: accommodate range highlighting
261    let mut body_stack: Vec<Option<ExpressionStoreOwner>> = vec![];
262    let mut per_body_cache: FxHashMap<ExpressionStoreOwner, FxHashSet<_>> = FxHashMap::default();
263
264    // Walk all nodes, keeping track of whether we are inside a macro or not.
265    // If in macro, expand it first and highlight the expanded code.
266    let mut preorder = root.preorder_with_tokens();
267    while let Some(event) = preorder.next() {
268        use WalkEvent::{Enter, Leave};
269
270        let range = match &event {
271            Enter(it) | Leave(it) => it.text_range(),
272        };
273
274        // Element outside of the viewport, no need to highlight
275        if range_to_highlight.intersect(range).is_none() {
276            continue;
277        }
278
279        match event.clone() {
280            Enter(NodeOrToken::Node(node)) if ast::TokenTree::can_cast(node.kind()) => {
281                tt_level += 1;
282            }
283            Leave(NodeOrToken::Node(node)) if ast::TokenTree::can_cast(node.kind()) => {
284                tt_level -= 1;
285            }
286            Enter(NodeOrToken::Node(node)) if ast::Attr::can_cast(node.kind()) => {
287                inside_attribute = true
288            }
289            Leave(NodeOrToken::Node(node)) if ast::Attr::can_cast(node.kind()) => {
290                inside_attribute = false
291            }
292            Enter(NodeOrToken::Node(node)) => {
293                // FIXME: ExpressionStore signatures and variant fields
294                // Maybe we can re-use child container stuff here
295                if let Some(item) = <Either<ast::Item, ast::Variant>>::cast(node.clone()) {
296                    match item {
297                        Either::Left(item) => {
298                            match &item {
299                                ast::Item::Fn(it) => body_stack
300                                    .push(sema.to_def(it).map(DefWithBody::from).map(Into::into)),
301                                ast::Item::Const(it) => body_stack
302                                    .push(sema.to_def(it).map(DefWithBody::from).map(Into::into)),
303                                ast::Item::Static(it) => body_stack
304                                    .push(sema.to_def(it).map(DefWithBody::from).map(Into::into)),
305                                _ => (),
306                            }
307
308                            if attr_or_derive_item.is_none() {
309                                if sema.is_attr_macro_call(InFile::new(file_id.into(), &item)) {
310                                    attr_or_derive_item = Some(AttrOrDerive::Attr(item));
311                                } else {
312                                    let adt = match item {
313                                        ast::Item::Enum(it) => Some(ast::Adt::Enum(it)),
314                                        ast::Item::Struct(it) => Some(ast::Adt::Struct(it)),
315                                        ast::Item::Union(it) => Some(ast::Adt::Union(it)),
316                                        _ => None,
317                                    };
318                                    match adt {
319                                        Some(adt)
320                                            if sema.is_derive_annotated(InFile::new(
321                                                file_id.into(),
322                                                &adt,
323                                            )) =>
324                                        {
325                                            attr_or_derive_item =
326                                                Some(AttrOrDerive::Derive(ast::Item::from(adt)));
327                                        }
328                                        _ => (),
329                                    }
330                                }
331                            }
332                        }
333                        Either::Right(it) => {
334                            body_stack.push(sema.to_def(&it).map(DefWithBody::from).map(Into::into))
335                        }
336                    }
337                }
338            }
339            Leave(NodeOrToken::Node(node))
340                if <Either<ast::Item, ast::Variant>>::can_cast(node.kind()) =>
341            {
342                match ast::Item::cast(node.clone()) {
343                    Some(item) => {
344                        if attr_or_derive_item.as_ref().is_some_and(|it| *it.item() == item) {
345                            attr_or_derive_item = None;
346                        }
347                        if matches!(
348                            item,
349                            ast::Item::Fn(_) | ast::Item::Const(_) | ast::Item::Static(_)
350                        ) {
351                            body_stack.pop();
352                        }
353                    }
354                    None => _ = body_stack.pop(),
355                }
356            }
357            _ => (),
358        }
359
360        let element = match event {
361            Enter(NodeOrToken::Token(tok)) if tok.kind() == WHITESPACE => continue,
362            Enter(it) => it,
363            Leave(NodeOrToken::Token(_)) => continue,
364            Leave(NodeOrToken::Node(node)) => {
365                if config.inject_doc_comment {
366                    // Doc comment highlighting injection, we do this when leaving the node
367                    // so that we overwrite the highlighting of the doc comment itself.
368                    inject::doc_comment(hl, sema, config, file_id, &node);
369                }
370                continue;
371            }
372        };
373
374        let element = match element.clone() {
375            NodeOrToken::Node(n) => match ast::NameLike::cast(n) {
376                Some(n) => NodeOrToken::Node(n),
377                None => continue,
378            },
379            NodeOrToken::Token(t) => NodeOrToken::Token(t),
380        };
381        let original_token = element.as_token().cloned();
382
383        // Descending tokens into macros is expensive even if no descending occurs, so make sure
384        // that we actually are in a position where descending is possible.
385        let in_macro = tt_level > 0
386            || match attr_or_derive_item {
387                Some(AttrOrDerive::Attr(_)) => true,
388                Some(AttrOrDerive::Derive(_)) => inside_attribute,
389                None => false,
390            };
391
392        let (descended_element, current_body) = match element {
393            // Attempt to descend tokens into macro-calls.
394            NodeOrToken::Token(token) if in_macro => {
395                let descended = descend_token(sema, InRealFile::new(file_id, token));
396                let body = match &descended.value {
397                    NodeOrToken::Node(n) => {
398                        sema.store_owner_for(InFile::new(descended.file_id, n.syntax()))
399                    }
400                    NodeOrToken::Token(t) => t
401                        .parent()
402                        .and_then(|it| sema.store_owner_for(InFile::new(descended.file_id, &it))),
403                };
404                (descended, body)
405            }
406            n => (InFile::new(file_id.into(), n), body_stack.last().copied().flatten()),
407        };
408        // string highlight injections
409        if let (Some(original_token), Some(descended_token)) =
410            (original_token, descended_element.value.as_token())
411        {
412            let control_flow = string_injections(
413                hl,
414                sema,
415                config,
416                file_id,
417                krate,
418                original_token,
419                descended_token,
420            );
421            if control_flow.is_break() {
422                continue;
423            }
424        }
425
426        let edition = descended_element.file_id.edition(sema.db);
427        let unsafe_ops = match current_body {
428            Some(current_body) => per_body_cache
429                .entry(current_body)
430                .or_insert_with(|| sema.get_unsafe_ops(current_body)),
431            None => &empty,
432        };
433        let is_unsafe_node =
434            |node| unsafe_ops.contains(&InFile::new(descended_element.file_id, node));
435        let element = match descended_element.value {
436            NodeOrToken::Node(name_like) => {
437                let hl = highlight::name_like(
438                    sema,
439                    krate,
440                    &is_unsafe_node,
441                    config.syntactic_name_ref_highlighting,
442                    name_like,
443                    edition,
444                );
445                if hl.is_some() && !in_macro {
446                    // skip highlighting the contained token of our name-like node
447                    // as that would potentially overwrite our result
448                    preorder.skip_subtree();
449                }
450                hl
451            }
452            NodeOrToken::Token(token) => {
453                highlight::token(sema, token, edition, &is_unsafe_node, tt_level > 0)
454                    .zip(Some(None))
455            }
456        };
457        if let Some((mut highlight, binding_hash)) = element {
458            if is_unlinked && highlight.tag == HlTag::UnresolvedReference {
459                // do not emit unresolved references if the file is unlinked
460                // let the editor do its highlighting for these tokens instead
461                continue;
462            }
463
464            // apply config filtering
465            if !filter_by_config(&mut highlight, config) {
466                continue;
467            }
468
469            if inside_attribute {
470                highlight |= HlMod::Attribute
471            }
472            if let Some(m) = descended_element.file_id.macro_file() {
473                if let MacroKind::ProcMacro | MacroKind::Attr | MacroKind::Derive = m.kind(sema.db)
474                {
475                    highlight |= HlMod::ProcMacro
476                }
477                highlight |= HlMod::Macro
478            }
479
480            hl.add(HlRange { range, highlight, binding_hash });
481        }
482    }
483}
484
485fn string_injections(
486    hl: &mut Highlights,
487    sema: &Semantics<'_, RootDatabase>,
488    config: &HighlightConfig<'_>,
489    file_id: EditionedFileId,
490    krate: Option<hir::Crate>,
491    token: SyntaxToken,
492    descended_token: &SyntaxToken,
493) -> ControlFlow<()> {
494    if !matches!(token.kind(), STRING | BYTE_STRING | BYTE | CHAR | C_STRING) {
495        return ControlFlow::Continue(());
496    }
497    if let Some(string) = ast::String::cast(token.clone()) {
498        if let Some(descended_string) = ast::String::cast(descended_token.clone()) {
499            if string.is_raw()
500                && inject::ra_fixture(hl, sema, config, &string, &descended_string).is_some()
501            {
502                return ControlFlow::Break(());
503            }
504            highlight_format_string(
505                hl,
506                sema,
507                krate,
508                &string,
509                &descended_string,
510                file_id.edition(sema.db),
511            );
512
513            if !string.is_raw() {
514                highlight_escape_string(hl, config, &string);
515            }
516        }
517    } else if let Some(byte_string) = ast::ByteString::cast(token.clone()) {
518        if !byte_string.is_raw() {
519            highlight_escape_string(hl, config, &byte_string);
520        }
521    } else if let Some(c_string) = ast::CString::cast(token.clone()) {
522        if !c_string.is_raw() {
523            highlight_escape_string(hl, config, &c_string);
524        }
525    } else if let Some(char) = ast::Char::cast(token.clone()) {
526        highlight_escape_char(hl, config, &char)
527    } else if let Some(byte) = ast::Byte::cast(token) {
528        highlight_escape_byte(hl, config, &byte)
529    }
530    ControlFlow::Continue(())
531}
532
533fn descend_token(
534    sema: &Semantics<'_, RootDatabase>,
535    token: InRealFile<SyntaxToken>,
536) -> InFile<NodeOrToken<ast::NameLike, SyntaxToken>> {
537    if token.value.kind() == COMMENT {
538        return token.map(NodeOrToken::Token).into();
539    }
540    let ranker = Ranker::from_token(&token.value);
541
542    let mut t = None;
543    let mut r = 0;
544    sema.descend_into_macros_breakable(token.clone().into(), |tok, _ctx| {
545        // FIXME: Consider checking ctx transparency for being opaque?
546        let my_rank = ranker.rank_token(&tok.value);
547
548        if my_rank >= Ranker::MAX_RANK {
549            // a rank of 0b1110 means that we have found a maximally interesting
550            // token so stop early.
551            t = Some(tok);
552            return ControlFlow::Break(());
553        }
554
555        // r = r.max(my_rank);
556        // t = Some(t.take_if(|_| r < my_rank).unwrap_or(tok));
557        match &mut t {
558            Some(prev) if r < my_rank => {
559                *prev = tok;
560                r = my_rank;
561            }
562            Some(_) => (),
563            None => {
564                r = my_rank;
565                t = Some(tok)
566            }
567        }
568        ControlFlow::Continue(())
569    });
570
571    let token = t.unwrap_or_else(|| token.into());
572    token.map(|token| match token.parent().and_then(ast::NameLike::cast) {
573        // Remap the token into the wrapping single token nodes
574        Some(parent) => match (token.kind(), parent.syntax().kind()) {
575            (T![ident] | T![self], NAME)
576            | (T![ident] | T![self] | T![super] | T![crate] | T![Self], NAME_REF)
577            | (INT_NUMBER, NAME_REF)
578            | (LIFETIME_IDENT, LIFETIME) => NodeOrToken::Node(parent),
579            _ => NodeOrToken::Token(token),
580        },
581        None => NodeOrToken::Token(token),
582    })
583}
584
585fn filter_by_config(highlight: &mut Highlight, config: &HighlightConfig<'_>) -> bool {
586    match &mut highlight.tag {
587        HlTag::StringLiteral | HlTag::EscapeSequence | HlTag::InvalidEscapeSequence
588            if !config.strings =>
589        {
590            return false;
591        }
592        HlTag::Comment if !config.comments => return false,
593        // If punctuation is disabled, make the macro bang part of the macro call again.
594        tag @ HlTag::Punctuation(HlPunct::MacroBang) => {
595            if !config.macro_bang {
596                *tag = HlTag::Symbol(SymbolKind::Macro);
597            } else if !config.specialize_punctuation {
598                *tag = HlTag::Punctuation(HlPunct::Other);
599            }
600        }
601        HlTag::Punctuation(_) if !config.punctuation && highlight.mods.is_empty() => return false,
602        tag @ HlTag::Punctuation(_) if !config.specialize_punctuation => {
603            *tag = HlTag::Punctuation(HlPunct::Other);
604        }
605        HlTag::Operator(_) if !config.operator && highlight.mods.is_empty() => return false,
606        tag @ HlTag::Operator(_) if !config.specialize_operator => {
607            *tag = HlTag::Operator(HlOperator::Other);
608        }
609        _ => (),
610    }
611    true
612}