ide/syntax_highlighting.rs
1pub(crate) mod tags;
2
3mod highlights;
4mod injector;
5
6mod escape;
7mod format;
8mod highlight;
9mod inject;
10
11mod html;
12#[cfg(test)]
13mod tests;
14
15use std::ops::ControlFlow;
16
17use either::Either;
18use hir::{DefWithBody, EditionedFileId, InFile, InRealFile, MacroKind, Name, Semantics};
19use ide_db::{FxHashMap, FxHashSet, Ranker, RootDatabase, SymbolKind};
20use syntax::{
21 AstNode, AstToken, NodeOrToken,
22 SyntaxKind::*,
23 SyntaxNode, SyntaxToken, T, TextRange, WalkEvent,
24 ast::{self, IsString},
25};
26
27use crate::{
28 FileId, HlMod, HlOperator, HlPunct, HlTag,
29 syntax_highlighting::{
30 escape::{highlight_escape_byte, highlight_escape_char, highlight_escape_string},
31 format::highlight_format_string,
32 highlights::Highlights,
33 tags::Highlight,
34 },
35};
36
37pub(crate) use html::highlight_as_html;
38pub(crate) use html::highlight_as_html_with_config;
39
40#[derive(Debug, Clone, Copy)]
41pub struct HlRange {
42 pub range: TextRange,
43 pub highlight: Highlight,
44 pub binding_hash: Option<u64>,
45}
46
47#[derive(Copy, Clone, Debug, PartialEq, Eq)]
48pub struct HighlightConfig {
49 /// Whether to highlight strings
50 pub strings: bool,
51 /// Whether to highlight comments
52 pub comments: bool,
53 /// Whether to highlight punctuation
54 pub punctuation: bool,
55 /// Whether to specialize punctuation highlights
56 pub specialize_punctuation: bool,
57 /// Whether to highlight operator
58 pub operator: bool,
59 /// Whether to specialize operator highlights
60 pub specialize_operator: bool,
61 /// Whether to inject highlights into doc comments
62 pub inject_doc_comment: bool,
63 /// Whether to highlight the macro call bang
64 pub macro_bang: bool,
65 /// Whether to highlight unresolved things be their syntax
66 pub syntactic_name_ref_highlighting: bool,
67}
68
69// Feature: Semantic Syntax Highlighting
70//
71// rust-analyzer highlights the code semantically.
72// For example, `Bar` in `foo::Bar` might be colored differently depending on whether `Bar` is an enum or a trait.
73// rust-analyzer does not specify colors directly, instead it assigns a tag (like `struct`) and a set of modifiers (like `declaration`) to each token.
74// It's up to the client to map those to specific colors.
75//
76// The general rule is that a reference to an entity gets colored the same way as the entity itself.
77// We also give special modifier for `mut` and `&mut` local variables.
78//
79//
80// #### Token Tags
81//
82// Rust-analyzer currently emits the following token tags:
83//
84// - For items:
85//
86// | | |
87// |-----------|--------------------------------|
88// | attribute | Emitted for attribute macros. |
89// |enum| Emitted for enums. |
90// |function| Emitted for free-standing functions. |
91// |derive| Emitted for derive macros. |
92// |macro| Emitted for function-like macros. |
93// |method| Emitted for associated functions, also knowns as methods. |
94// |namespace| Emitted for modules. |
95// |struct| Emitted for structs.|
96// |trait| Emitted for traits.|
97// |typeAlias| Emitted for type aliases and `Self` in `impl`s.|
98// |union| Emitted for unions.|
99//
100// - For literals:
101//
102// | | |
103// |-----------|--------------------------------|
104// | boolean| Emitted for the boolean literals `true` and `false`.|
105// | character| Emitted for character literals.|
106// | number| Emitted for numeric literals.|
107// | string| Emitted for string literals.|
108// | escapeSequence| Emitted for escaped sequences inside strings like `\n`.|
109// | formatSpecifier| Emitted for format specifiers `{:?}` in `format!`-like macros.|
110//
111// - For operators:
112//
113// | | |
114// |-----------|--------------------------------|
115// |operator| Emitted for general operators.|
116// |arithmetic| Emitted for the arithmetic operators `+`, `-`, `*`, `/`, `+=`, `-=`, `*=`, `/=`.|
117// |bitwise| Emitted for the bitwise operators `|`, `&`, `!`, `^`, `|=`, `&=`, `^=`.|
118// |comparison| Emitted for the comparison oerators `>`, `<`, `==`, `>=`, `<=`, `!=`.|
119// |logical| Emitted for the logical operators `||`, `&&`, `!`.|
120//
121// - For punctuation:
122//
123// | | |
124// |-----------|--------------------------------|
125// |punctuation| Emitted for general punctuation.|
126// |attributeBracket| Emitted for attribute invocation brackets, that is the `#[` and `]` tokens.|
127// |angle| Emitted for `<>` angle brackets.|
128// |brace| Emitted for `{}` braces.|
129// |bracket| Emitted for `[]` brackets.|
130// |parenthesis| Emitted for `()` parentheses.|
131// |colon| Emitted for the `:` token.|
132// |comma| Emitted for the `,` token.|
133// |dot| Emitted for the `.` token.|
134// |semi| Emitted for the `;` token.|
135// |macroBang| Emitted for the `!` token in macro calls.|
136//
137//-
138//
139// | | |
140// |-----------|--------------------------------|
141// |builtinAttribute| Emitted for names to builtin attributes in attribute path, the `repr` in `#[repr(u8)]` for example.|
142// |builtinType| Emitted for builtin types like `u32`, `str` and `f32`.|
143// |comment| Emitted for comments.|
144// |constParameter| Emitted for const parameters.|
145// |deriveHelper| Emitted for derive helper attributes.|
146// |enumMember| Emitted for enum variants.|
147// |generic| Emitted for generic tokens that have no mapping.|
148// |keyword| Emitted for keywords.|
149// |label| Emitted for labels.|
150// |lifetime| Emitted for lifetimes.|
151// |parameter| Emitted for non-self function parameters.|
152// |property| Emitted for struct and union fields.|
153// |selfKeyword| Emitted for the self function parameter and self path-specifier.|
154// |selfTypeKeyword| Emitted for the Self type parameter.|
155// |toolModule| Emitted for tool modules.|
156// |typeParameter| Emitted for type parameters.|
157// |unresolvedReference| Emitted for unresolved references, names that rust-analyzer can't find the definition of.|
158// |variable| Emitted for locals, constants and statics.|
159//
160//
161// #### Token Modifiers
162//
163// Token modifiers allow to style some elements in the source code more precisely.
164//
165// Rust-analyzer currently emits the following token modifiers:
166//
167// | | |
168// |-----------|--------------------------------|
169// |async| Emitted for async functions and the `async` and `await` keywords.|
170// |attribute| Emitted for tokens inside attributes.|
171// |callable| Emitted for locals whose types implements one of the `Fn*` traits.|
172// |constant| Emitted for const.|
173// |consuming| Emitted for locals that are being consumed when use in a function call.|
174// |controlFlow| Emitted for control-flow related tokens, this includes th `?` operator.|
175// |crateRoot| Emitted for crate names, like `serde` and `crate.|
176// |declaration| Emitted for names of definitions, like `foo` in `fn foo(){}`.|
177// |defaultLibrary| Emitted for items from built-in crates (std, core, allc, test and proc_macro).|
178// |documentation| Emitted for documentation comment.|
179// |injected| Emitted for doc-string injected highlighting like rust source blocks in documentation.|
180// |intraDocLink| Emitted for intra doc links in doc-string.|
181// |library| Emitted for items that are defined outside of the current crae.|
182// |macro| Emitted for tokens inside macro call.|
183// |mutable| Emitted for mutable locals and statics as well as functions taking `&mut self`.|
184// |public| Emitted for items that are from the current crate and are `pub.|
185// |reference| Emitted for locals behind a reference and functions taking self` by reference.|
186// |static| Emitted for "static" functions, also known as functions that d not take a `self` param, as well as statics and consts.|
187// |trait| Emitted for associated trait item.|
188// |unsafe| Emitted for unsafe operations, like unsafe function calls, as ell as the `unsafe` token.|
189//
190// 
191// 
192pub(crate) fn highlight(
193 db: &RootDatabase,
194 config: HighlightConfig,
195 file_id: FileId,
196 range_to_highlight: Option<TextRange>,
197) -> Vec<HlRange> {
198 let _p = tracing::info_span!("highlight").entered();
199 let sema = Semantics::new(db);
200 let file_id = sema
201 .attach_first_edition(file_id)
202 .unwrap_or_else(|| EditionedFileId::current_edition(db, file_id));
203
204 // Determine the root based on the given range.
205 let (root, range_to_highlight) = {
206 let file = sema.parse(file_id);
207 let source_file = file.syntax();
208 match range_to_highlight {
209 Some(range) => {
210 let node = match source_file.covering_element(range) {
211 NodeOrToken::Node(it) => it,
212 NodeOrToken::Token(it) => it.parent().unwrap_or_else(|| source_file.clone()),
213 };
214 (node, range)
215 }
216 None => (source_file.clone(), source_file.text_range()),
217 }
218 };
219
220 let mut hl = highlights::Highlights::new(root.text_range());
221 let krate = sema.scope(&root).map(|it| it.krate());
222 traverse(&mut hl, &sema, config, InRealFile::new(file_id, &root), krate, range_to_highlight);
223 hl.to_vec()
224}
225
226fn traverse(
227 hl: &mut Highlights,
228 sema: &Semantics<'_, RootDatabase>,
229 config: HighlightConfig,
230 InRealFile { file_id, value: root }: InRealFile<&SyntaxNode>,
231 krate: Option<hir::Crate>,
232 range_to_highlight: TextRange,
233) {
234 let is_unlinked = sema.file_to_module_def(file_id.file_id(sema.db)).is_none();
235
236 enum AttrOrDerive {
237 Attr(ast::Item),
238 Derive(ast::Item),
239 }
240
241 impl AttrOrDerive {
242 fn item(&self) -> &ast::Item {
243 match self {
244 AttrOrDerive::Attr(item) | AttrOrDerive::Derive(item) => item,
245 }
246 }
247 }
248
249 let empty = FxHashSet::default();
250
251 // FIXME: accommodate range highlighting
252 let mut tt_level = 0;
253 // FIXME: accommodate range highlighting
254 let mut attr_or_derive_item = None;
255
256 // FIXME: these are not perfectly accurate, we determine them by the real file's syntax tree
257 // an attribute nested in a macro call will not emit `inside_attribute`
258 let mut inside_attribute = false;
259
260 // FIXME: accommodate range highlighting
261 let mut body_stack: Vec<Option<DefWithBody>> = vec![];
262 let mut per_body_cache: FxHashMap<DefWithBody, (FxHashSet<_>, FxHashMap<Name, u32>)> =
263 FxHashMap::default();
264
265 // Walk all nodes, keeping track of whether we are inside a macro or not.
266 // If in macro, expand it first and highlight the expanded code.
267 let mut preorder = root.preorder_with_tokens();
268 while let Some(event) = preorder.next() {
269 use WalkEvent::{Enter, Leave};
270
271 let range = match &event {
272 Enter(it) | Leave(it) => it.text_range(),
273 };
274
275 // Element outside of the viewport, no need to highlight
276 if range_to_highlight.intersect(range).is_none() {
277 continue;
278 }
279
280 match event.clone() {
281 Enter(NodeOrToken::Node(node)) if ast::TokenTree::can_cast(node.kind()) => {
282 tt_level += 1;
283 }
284 Leave(NodeOrToken::Node(node)) if ast::TokenTree::can_cast(node.kind()) => {
285 tt_level -= 1;
286 }
287 Enter(NodeOrToken::Node(node)) if ast::Attr::can_cast(node.kind()) => {
288 inside_attribute = true
289 }
290 Leave(NodeOrToken::Node(node)) if ast::Attr::can_cast(node.kind()) => {
291 inside_attribute = false
292 }
293 Enter(NodeOrToken::Node(node)) => {
294 if let Some(item) = <Either<ast::Item, ast::Variant>>::cast(node.clone()) {
295 match item {
296 Either::Left(item) => {
297 match &item {
298 ast::Item::Fn(it) => {
299 body_stack.push(sema.to_def(it).map(Into::into))
300 }
301 ast::Item::Const(it) => {
302 body_stack.push(sema.to_def(it).map(Into::into))
303 }
304 ast::Item::Static(it) => {
305 body_stack.push(sema.to_def(it).map(Into::into))
306 }
307 _ => (),
308 }
309
310 if attr_or_derive_item.is_none() {
311 if sema.is_attr_macro_call(InFile::new(file_id.into(), &item)) {
312 attr_or_derive_item = Some(AttrOrDerive::Attr(item));
313 } else {
314 let adt = match item {
315 ast::Item::Enum(it) => Some(ast::Adt::Enum(it)),
316 ast::Item::Struct(it) => Some(ast::Adt::Struct(it)),
317 ast::Item::Union(it) => Some(ast::Adt::Union(it)),
318 _ => None,
319 };
320 match adt {
321 Some(adt)
322 if sema.is_derive_annotated(InFile::new(
323 file_id.into(),
324 &adt,
325 )) =>
326 {
327 attr_or_derive_item =
328 Some(AttrOrDerive::Derive(ast::Item::from(adt)));
329 }
330 _ => (),
331 }
332 }
333 }
334 }
335 Either::Right(it) => body_stack.push(sema.to_def(&it).map(Into::into)),
336 }
337 }
338 }
339 Leave(NodeOrToken::Node(node))
340 if <Either<ast::Item, ast::Variant>>::can_cast(node.kind()) =>
341 {
342 match ast::Item::cast(node.clone()) {
343 Some(item) => {
344 if attr_or_derive_item.as_ref().is_some_and(|it| *it.item() == item) {
345 attr_or_derive_item = None;
346 }
347 if matches!(
348 item,
349 ast::Item::Fn(_) | ast::Item::Const(_) | ast::Item::Static(_)
350 ) {
351 body_stack.pop();
352 }
353 }
354 None => _ = body_stack.pop(),
355 }
356 }
357 _ => (),
358 }
359
360 let element = match event {
361 Enter(NodeOrToken::Token(tok)) if tok.kind() == WHITESPACE => continue,
362 Enter(it) => it,
363 Leave(NodeOrToken::Token(_)) => continue,
364 Leave(NodeOrToken::Node(node)) => {
365 if config.inject_doc_comment {
366 // Doc comment highlighting injection, we do this when leaving the node
367 // so that we overwrite the highlighting of the doc comment itself.
368 inject::doc_comment(hl, sema, config, file_id, &node);
369 }
370 continue;
371 }
372 };
373
374 let element = match element.clone() {
375 NodeOrToken::Node(n) => match ast::NameLike::cast(n) {
376 Some(n) => NodeOrToken::Node(n),
377 None => continue,
378 },
379 NodeOrToken::Token(t) => NodeOrToken::Token(t),
380 };
381 let original_token = element.as_token().cloned();
382
383 // Descending tokens into macros is expensive even if no descending occurs, so make sure
384 // that we actually are in a position where descending is possible.
385 let in_macro = tt_level > 0
386 || match attr_or_derive_item {
387 Some(AttrOrDerive::Attr(_)) => true,
388 Some(AttrOrDerive::Derive(_)) => inside_attribute,
389 None => false,
390 };
391
392 let (descended_element, current_body) = match element {
393 // Attempt to descend tokens into macro-calls.
394 NodeOrToken::Token(token) if in_macro => {
395 let descended = descend_token(sema, InRealFile::new(file_id, token));
396 let body = match &descended.value {
397 NodeOrToken::Node(n) => {
398 sema.body_for(InFile::new(descended.file_id, n.syntax()))
399 }
400 NodeOrToken::Token(t) => {
401 t.parent().and_then(|it| sema.body_for(InFile::new(descended.file_id, &it)))
402 }
403 };
404 (descended, body)
405 }
406 n => (InFile::new(file_id.into(), n), body_stack.last().copied().flatten()),
407 };
408 // string highlight injections
409 if let (Some(original_token), Some(descended_token)) =
410 (original_token, descended_element.value.as_token())
411 {
412 let control_flow = string_injections(
413 hl,
414 sema,
415 config,
416 file_id,
417 krate,
418 original_token,
419 descended_token,
420 );
421 if control_flow.is_break() {
422 continue;
423 }
424 }
425
426 let edition = descended_element.file_id.edition(sema.db);
427 let (unsafe_ops, bindings_shadow_count) = match current_body {
428 Some(current_body) => {
429 let (ops, bindings) = per_body_cache.entry(current_body).or_insert_with(|| {
430 (
431 hir::attach_db(sema.db, || sema.get_unsafe_ops(current_body)),
432 Default::default(),
433 )
434 });
435 (&*ops, Some(bindings))
436 }
437 None => (&empty, None),
438 };
439 let is_unsafe_node =
440 |node| unsafe_ops.contains(&InFile::new(descended_element.file_id, node));
441 let element = match descended_element.value {
442 NodeOrToken::Node(name_like) => {
443 let hl = hir::attach_db(sema.db, || {
444 highlight::name_like(
445 sema,
446 krate,
447 bindings_shadow_count,
448 &is_unsafe_node,
449 config.syntactic_name_ref_highlighting,
450 name_like,
451 edition,
452 )
453 });
454 if hl.is_some() && !in_macro {
455 // skip highlighting the contained token of our name-like node
456 // as that would potentially overwrite our result
457 preorder.skip_subtree();
458 }
459 hl
460 }
461 NodeOrToken::Token(token) => hir::attach_db(sema.db, || {
462 highlight::token(sema, token, edition, &is_unsafe_node, tt_level > 0)
463 .zip(Some(None))
464 }),
465 };
466 if let Some((mut highlight, binding_hash)) = element {
467 if is_unlinked && highlight.tag == HlTag::UnresolvedReference {
468 // do not emit unresolved references if the file is unlinked
469 // let the editor do its highlighting for these tokens instead
470 continue;
471 }
472
473 // apply config filtering
474 if !filter_by_config(&mut highlight, config) {
475 continue;
476 }
477
478 if inside_attribute {
479 highlight |= HlMod::Attribute
480 }
481 if let Some(m) = descended_element.file_id.macro_file() {
482 if let MacroKind::ProcMacro | MacroKind::Attr | MacroKind::Derive = m.kind(sema.db)
483 {
484 highlight |= HlMod::ProcMacro
485 }
486 highlight |= HlMod::Macro
487 }
488
489 hl.add(HlRange { range, highlight, binding_hash });
490 }
491 }
492}
493
494fn string_injections(
495 hl: &mut Highlights,
496 sema: &Semantics<'_, RootDatabase>,
497 config: HighlightConfig,
498 file_id: EditionedFileId,
499 krate: Option<hir::Crate>,
500 token: SyntaxToken,
501 descended_token: &SyntaxToken,
502) -> ControlFlow<()> {
503 if !matches!(token.kind(), STRING | BYTE_STRING | BYTE | CHAR | C_STRING) {
504 return ControlFlow::Continue(());
505 }
506 if let Some(string) = ast::String::cast(token.clone()) {
507 if let Some(descended_string) = ast::String::cast(descended_token.clone()) {
508 if string.is_raw()
509 && inject::ra_fixture(hl, sema, config, &string, &descended_string).is_some()
510 {
511 return ControlFlow::Break(());
512 }
513 highlight_format_string(
514 hl,
515 sema,
516 krate,
517 &string,
518 &descended_string,
519 file_id.edition(sema.db),
520 );
521
522 if !string.is_raw() {
523 highlight_escape_string(hl, &string);
524 }
525 }
526 } else if let Some(byte_string) = ast::ByteString::cast(token.clone()) {
527 if !byte_string.is_raw() {
528 highlight_escape_string(hl, &byte_string);
529 }
530 } else if let Some(c_string) = ast::CString::cast(token.clone()) {
531 if !c_string.is_raw() {
532 highlight_escape_string(hl, &c_string);
533 }
534 } else if let Some(char) = ast::Char::cast(token.clone()) {
535 highlight_escape_char(hl, &char)
536 } else if let Some(byte) = ast::Byte::cast(token) {
537 highlight_escape_byte(hl, &byte)
538 }
539 ControlFlow::Continue(())
540}
541
542fn descend_token(
543 sema: &Semantics<'_, RootDatabase>,
544 token: InRealFile<SyntaxToken>,
545) -> InFile<NodeOrToken<ast::NameLike, SyntaxToken>> {
546 if token.value.kind() == COMMENT {
547 return token.map(NodeOrToken::Token).into();
548 }
549 let ranker = Ranker::from_token(&token.value);
550
551 let mut t = None;
552 let mut r = 0;
553 sema.descend_into_macros_breakable(token.clone().into(), |tok, _ctx| {
554 // FIXME: Consider checking ctx transparency for being opaque?
555 let my_rank = ranker.rank_token(&tok.value);
556
557 if my_rank >= Ranker::MAX_RANK {
558 // a rank of 0b1110 means that we have found a maximally interesting
559 // token so stop early.
560 t = Some(tok);
561 return ControlFlow::Break(());
562 }
563
564 // r = r.max(my_rank);
565 // t = Some(t.take_if(|_| r < my_rank).unwrap_or(tok));
566 match &mut t {
567 Some(prev) if r < my_rank => {
568 *prev = tok;
569 r = my_rank;
570 }
571 Some(_) => (),
572 None => {
573 r = my_rank;
574 t = Some(tok)
575 }
576 }
577 ControlFlow::Continue(())
578 });
579
580 let token = t.unwrap_or_else(|| token.into());
581 token.map(|token| match token.parent().and_then(ast::NameLike::cast) {
582 // Remap the token into the wrapping single token nodes
583 Some(parent) => match (token.kind(), parent.syntax().kind()) {
584 (T![ident] | T![self], NAME)
585 | (T![ident] | T![self] | T![super] | T![crate] | T![Self], NAME_REF)
586 | (INT_NUMBER, NAME_REF)
587 | (LIFETIME_IDENT, LIFETIME) => NodeOrToken::Node(parent),
588 _ => NodeOrToken::Token(token),
589 },
590 None => NodeOrToken::Token(token),
591 })
592}
593
594fn filter_by_config(highlight: &mut Highlight, config: HighlightConfig) -> bool {
595 match &mut highlight.tag {
596 HlTag::StringLiteral if !config.strings => return false,
597 HlTag::Comment if !config.comments => return false,
598 // If punctuation is disabled, make the macro bang part of the macro call again.
599 tag @ HlTag::Punctuation(HlPunct::MacroBang) => {
600 if !config.macro_bang {
601 *tag = HlTag::Symbol(SymbolKind::Macro);
602 } else if !config.specialize_punctuation {
603 *tag = HlTag::Punctuation(HlPunct::Other);
604 }
605 }
606 HlTag::Punctuation(_) if !config.punctuation && highlight.mods.is_empty() => return false,
607 tag @ HlTag::Punctuation(_) if !config.specialize_punctuation => {
608 *tag = HlTag::Punctuation(HlPunct::Other);
609 }
610 HlTag::Operator(_) if !config.operator && highlight.mods.is_empty() => return false,
611 tag @ HlTag::Operator(_) if !config.specialize_operator => {
612 *tag = HlTag::Operator(HlOperator::Other);
613 }
614 _ => (),
615 }
616 true
617}