syntax_bridge/
lib.rs

1//! Conversions between [`SyntaxNode`] and [`tt::TokenTree`].
2
3#![cfg_attr(feature = "in-rust-tree", feature(rustc_private))]
4
5#[cfg(feature = "in-rust-tree")]
6extern crate rustc_driver as _;
7
8use std::{collections::VecDeque, fmt, hash::Hash};
9
10use intern::Symbol;
11use rustc_hash::{FxHashMap, FxHashSet};
12use span::{Edition, SpanAnchor, SpanData, SpanMap};
13use stdx::{format_to, never};
14use syntax::{
15    AstToken, Parse, PreorderWithTokens, SmolStr, SyntaxElement,
16    SyntaxKind::{self, *},
17    SyntaxNode, SyntaxToken, SyntaxTreeBuilder, T, TextRange, TextSize, WalkEvent,
18    ast::{self, make::tokens::doc_comment},
19    format_smolstr,
20};
21use tt::{Punct, buffer::Cursor, token_to_literal};
22
23pub mod prettify_macro_expansion;
24mod to_parser_input;
25pub use to_parser_input::to_parser_input;
26// FIXME: we probably should re-think  `token_tree_to_syntax_node` interfaces
27pub use ::parser::TopEntryPoint;
28
29#[cfg(test)]
30mod tests;
31
32pub trait SpanMapper<S> {
33    fn span_for(&self, range: TextRange) -> S;
34}
35
36impl<S> SpanMapper<SpanData<S>> for SpanMap<S>
37where
38    SpanData<S>: Copy,
39{
40    fn span_for(&self, range: TextRange) -> SpanData<S> {
41        self.span_at(range.start())
42    }
43}
44
45impl<S: Copy, SM: SpanMapper<S>> SpanMapper<S> for &SM {
46    fn span_for(&self, range: TextRange) -> S {
47        SM::span_for(self, range)
48    }
49}
50
51/// Dummy things for testing where spans don't matter.
52pub mod dummy_test_span_utils {
53
54    use span::{Span, SyntaxContext};
55
56    use super::*;
57
58    pub const DUMMY: Span = Span {
59        range: TextRange::empty(TextSize::new(0)),
60        anchor: span::SpanAnchor {
61            file_id: span::EditionedFileId::new(
62                span::FileId::from_raw(0xe4e4e),
63                span::Edition::CURRENT,
64            ),
65            ast_id: span::ROOT_ERASED_FILE_AST_ID,
66        },
67        ctx: SyntaxContext::root(Edition::CURRENT),
68    };
69
70    pub struct DummyTestSpanMap;
71
72    impl SpanMapper<Span> for DummyTestSpanMap {
73        fn span_for(&self, range: syntax::TextRange) -> Span {
74            Span {
75                range,
76                anchor: span::SpanAnchor {
77                    file_id: span::EditionedFileId::new(
78                        span::FileId::from_raw(0xe4e4e),
79                        span::Edition::CURRENT,
80                    ),
81                    ast_id: span::ROOT_ERASED_FILE_AST_ID,
82                },
83                ctx: SyntaxContext::root(Edition::CURRENT),
84            }
85        }
86    }
87}
88
89/// Doc comment desugaring differs between mbe and proc-macros.
90#[derive(Copy, Clone, PartialEq, Eq)]
91pub enum DocCommentDesugarMode {
92    /// Desugars doc comments as quoted raw strings
93    Mbe,
94    /// Desugars doc comments as quoted strings
95    ProcMacro,
96}
97
98/// Converts a syntax tree to a [`tt::Subtree`] using the provided span map to populate the
99/// subtree's spans.
100pub fn syntax_node_to_token_tree<Ctx, SpanMap>(
101    node: &SyntaxNode,
102    map: SpanMap,
103    span: SpanData<Ctx>,
104    mode: DocCommentDesugarMode,
105) -> tt::TopSubtree<SpanData<Ctx>>
106where
107    SpanData<Ctx>: Copy + fmt::Debug,
108    SpanMap: SpanMapper<SpanData<Ctx>>,
109{
110    let mut c =
111        Converter::new(node, map, Default::default(), Default::default(), span, mode, |_, _| {
112            (true, Vec::new())
113        });
114    convert_tokens(&mut c)
115}
116
117/// Converts a syntax tree to a [`tt::Subtree`] using the provided span map to populate the
118/// subtree's spans. Additionally using the append and remove parameters, the additional tokens can
119/// be injected or hidden from the output.
120pub fn syntax_node_to_token_tree_modified<Ctx, SpanMap, OnEvent>(
121    node: &SyntaxNode,
122    map: SpanMap,
123    append: FxHashMap<SyntaxElement, Vec<tt::Leaf<SpanData<Ctx>>>>,
124    remove: FxHashSet<SyntaxElement>,
125    call_site: SpanData<Ctx>,
126    mode: DocCommentDesugarMode,
127    on_enter: OnEvent,
128) -> tt::TopSubtree<SpanData<Ctx>>
129where
130    SpanMap: SpanMapper<SpanData<Ctx>>,
131    SpanData<Ctx>: Copy + fmt::Debug,
132    OnEvent: FnMut(
133        &mut PreorderWithTokens,
134        &WalkEvent<SyntaxElement>,
135    ) -> (bool, Vec<tt::Leaf<SpanData<Ctx>>>),
136{
137    let mut c = Converter::new(node, map, append, remove, call_site, mode, on_enter);
138    convert_tokens(&mut c)
139}
140
141// The following items are what `rustc` macro can be parsed into :
142// link: https://github.com/rust-lang/rust/blob/9ebf47851a357faa4cd97f4b1dc7835f6376e639/src/libsyntax/ext/expand.rs#L141
143// * Expr(Box<ast::Expr>)                     -> token_tree_to_expr
144// * Pat(Box<ast::Pat>)                       -> token_tree_to_pat
145// * Ty(Box<ast::Ty>)                         -> token_tree_to_ty
146// * Stmts(SmallVec<[ast::Stmt; 1]>)        -> token_tree_to_stmts
147// * Items(SmallVec<[Box<ast::Item>; 1]>)     -> token_tree_to_items
148//
149// * TraitItems(SmallVec<[ast::TraitItem; 1]>)
150// * AssocItems(SmallVec<[ast::AssocItem; 1]>)
151// * ForeignItems(SmallVec<[ast::ForeignItem; 1]>
152
153/// Converts a [`tt::Subtree`] back to a [`SyntaxNode`].
154/// The produced `SpanMap` contains a mapping from the syntax nodes offsets to the subtree's spans.
155pub fn token_tree_to_syntax_node<Ctx>(
156    tt: &tt::TopSubtree<SpanData<Ctx>>,
157    entry_point: parser::TopEntryPoint,
158    span_to_edition: &mut dyn FnMut(Ctx) -> Edition,
159) -> (Parse<SyntaxNode>, SpanMap<Ctx>)
160where
161    Ctx: Copy + fmt::Debug + PartialEq + PartialEq + Eq + Hash,
162{
163    let buffer = tt.view().strip_invisible();
164    let parser_input = to_parser_input(buffer, span_to_edition);
165    // It matters what edition we parse with even when we escape all identifiers correctly.
166    let parser_output = entry_point.parse(&parser_input);
167    let mut tree_sink = TtTreeSink::new(buffer.cursor());
168    for event in parser_output.iter() {
169        match event {
170            parser::Step::Token { kind, n_input_tokens: n_raw_tokens } => {
171                tree_sink.token(kind, n_raw_tokens)
172            }
173            parser::Step::FloatSplit { ends_in_dot: has_pseudo_dot } => {
174                tree_sink.float_split(has_pseudo_dot)
175            }
176            parser::Step::Enter { kind } => tree_sink.start_node(kind),
177            parser::Step::Exit => tree_sink.finish_node(),
178            parser::Step::Error { msg } => tree_sink.error(msg.to_owned()),
179        }
180    }
181    tree_sink.finish()
182}
183
184/// Convert a string to a `TokenTree`. The spans of the subtree will be anchored to the provided
185/// anchor with the given context.
186pub fn parse_to_token_tree<Ctx>(
187    edition: Edition,
188    anchor: SpanAnchor,
189    ctx: Ctx,
190    text: &str,
191) -> Option<tt::TopSubtree<SpanData<Ctx>>>
192where
193    SpanData<Ctx>: Copy + fmt::Debug,
194    Ctx: Copy,
195{
196    let lexed = parser::LexedStr::new(edition, text);
197    if lexed.errors().next().is_some() {
198        return None;
199    }
200    let mut conv =
201        RawConverter { lexed, anchor, pos: 0, ctx, mode: DocCommentDesugarMode::ProcMacro };
202    Some(convert_tokens(&mut conv))
203}
204
205/// Convert a string to a `TokenTree`. The passed span will be used for all spans of the produced subtree.
206pub fn parse_to_token_tree_static_span<S>(
207    edition: Edition,
208    span: S,
209    text: &str,
210) -> Option<tt::TopSubtree<S>>
211where
212    S: Copy + fmt::Debug,
213{
214    let lexed = parser::LexedStr::new(edition, text);
215    if lexed.errors().next().is_some() {
216        return None;
217    }
218    let mut conv =
219        StaticRawConverter { lexed, pos: 0, span, mode: DocCommentDesugarMode::ProcMacro };
220    Some(convert_tokens(&mut conv))
221}
222
223fn convert_tokens<S, C>(conv: &mut C) -> tt::TopSubtree<S>
224where
225    C: TokenConverter<S>,
226    S: Copy + fmt::Debug,
227    C::Token: fmt::Debug,
228{
229    let mut builder =
230        tt::TopSubtreeBuilder::new(tt::Delimiter::invisible_spanned(conv.call_site()));
231
232    while let Some((token, abs_range)) = conv.bump() {
233        let tt = match token.as_leaf() {
234            // These delimiters are not actually valid punctuation, but we produce them in syntax fixup.
235            // So we need to handle them specially here.
236            Some(&tt::Leaf::Punct(Punct {
237                char: char @ ('(' | ')' | '{' | '}' | '[' | ']'),
238                span,
239                spacing: _,
240            })) => {
241                let found_expected_delimiter =
242                    builder.expected_delimiters().enumerate().find(|(_, delim)| match delim.kind {
243                        tt::DelimiterKind::Parenthesis => char == ')',
244                        tt::DelimiterKind::Brace => char == '}',
245                        tt::DelimiterKind::Bracket => char == ']',
246                        tt::DelimiterKind::Invisible => false,
247                    });
248                if let Some((idx, _)) = found_expected_delimiter {
249                    for _ in 0..=idx {
250                        builder.close(span);
251                    }
252                    continue;
253                }
254
255                let delim = match char {
256                    '(' => tt::DelimiterKind::Parenthesis,
257                    '{' => tt::DelimiterKind::Brace,
258                    '[' => tt::DelimiterKind::Bracket,
259                    _ => panic!("unmatched closing delimiter from syntax fixup"),
260                };
261
262                // Start a new subtree
263                builder.open(delim, span);
264                continue;
265            }
266            Some(leaf) => leaf.clone(),
267            None => match token.kind(conv) {
268                // Desugar doc comments into doc attributes
269                COMMENT => {
270                    let span = conv.span_for(abs_range);
271                    conv.convert_doc_comment(&token, span, &mut builder);
272                    continue;
273                }
274                kind if kind.is_punct() && kind != UNDERSCORE => {
275                    let found_expected_delimiter =
276                        builder.expected_delimiters().enumerate().find(|(_, delim)| {
277                            match delim.kind {
278                                tt::DelimiterKind::Parenthesis => kind == T![')'],
279                                tt::DelimiterKind::Brace => kind == T!['}'],
280                                tt::DelimiterKind::Bracket => kind == T![']'],
281                                tt::DelimiterKind::Invisible => false,
282                            }
283                        });
284
285                    // Current token is a closing delimiter that we expect, fix up the closing span
286                    // and end the subtree here.
287                    // We also close any open inner subtrees that might be missing their delimiter.
288                    if let Some((idx, _)) = found_expected_delimiter {
289                        for _ in 0..=idx {
290                            // FIXME: record an error somewhere if we're closing more than one tree here?
291                            builder.close(conv.span_for(abs_range));
292                        }
293                        continue;
294                    }
295
296                    let delim = match kind {
297                        T!['('] => Some(tt::DelimiterKind::Parenthesis),
298                        T!['{'] => Some(tt::DelimiterKind::Brace),
299                        T!['['] => Some(tt::DelimiterKind::Bracket),
300                        _ => None,
301                    };
302
303                    // Start a new subtree
304                    if let Some(kind) = delim {
305                        builder.open(kind, conv.span_for(abs_range));
306                        continue;
307                    }
308
309                    let spacing = match conv.peek().map(|next| next.kind(conv)) {
310                        Some(kind) if is_single_token_op(kind) => tt::Spacing::Joint,
311                        _ => tt::Spacing::Alone,
312                    };
313                    let Some(char) = token.to_char(conv) else {
314                        panic!("Token from lexer must be single char: token = {token:#?}")
315                    };
316                    // FIXME: this might still be an unmatched closing delimiter? Maybe we should assert here
317                    tt::Leaf::from(tt::Punct { char, spacing, span: conv.span_for(abs_range) })
318                }
319                kind => {
320                    macro_rules! make_ident {
321                        () => {
322                            tt::Ident {
323                                span: conv.span_for(abs_range),
324                                sym: Symbol::intern(&token.to_text(conv)),
325                                is_raw: tt::IdentIsRaw::No,
326                            }
327                            .into()
328                        };
329                    }
330                    let leaf: tt::Leaf<_> = match kind {
331                        k if k.is_any_identifier() => {
332                            let text = token.to_text(conv);
333                            tt::Ident::new(&text, conv.span_for(abs_range)).into()
334                        }
335                        UNDERSCORE => make_ident!(),
336                        k if k.is_literal() => {
337                            let text = token.to_text(conv);
338                            let span = conv.span_for(abs_range);
339                            token_to_literal(&text, span).into()
340                        }
341                        LIFETIME_IDENT => {
342                            let apostrophe = tt::Leaf::from(tt::Punct {
343                                char: '\'',
344                                spacing: tt::Spacing::Joint,
345                                span: conv
346                                    .span_for(TextRange::at(abs_range.start(), TextSize::of('\''))),
347                            });
348                            builder.push(apostrophe);
349
350                            let ident = tt::Leaf::from(tt::Ident {
351                                sym: Symbol::intern(&token.to_text(conv)[1..]),
352                                span: conv.span_for(TextRange::new(
353                                    abs_range.start() + TextSize::of('\''),
354                                    abs_range.end(),
355                                )),
356                                is_raw: tt::IdentIsRaw::No,
357                            });
358                            builder.push(ident);
359                            continue;
360                        }
361                        _ => continue,
362                    };
363
364                    leaf
365                }
366            },
367        };
368
369        builder.push(tt);
370    }
371
372    while builder.expected_delimiters().next().is_some() {
373        // FIXME: record an error somewhere?
374        builder.close(conv.call_site());
375    }
376    builder.build_skip_top_subtree()
377}
378
379fn is_single_token_op(kind: SyntaxKind) -> bool {
380    matches!(
381        kind,
382        EQ | L_ANGLE
383            | R_ANGLE
384            | BANG
385            | AMP
386            | PIPE
387            | TILDE
388            | AT
389            | DOT
390            | COMMA
391            | SEMICOLON
392            | COLON
393            | POUND
394            | DOLLAR
395            | QUESTION
396            | PLUS
397            | MINUS
398            | STAR
399            | SLASH
400            | PERCENT
401            | CARET
402            // LIFETIME_IDENT will be split into a sequence of `'` (a single quote) and an
403            // identifier.
404            | LIFETIME_IDENT
405    )
406}
407
408/// Returns the textual content of a doc comment block as a quoted string
409/// That is, strips leading `///` (or `/**`, etc)
410/// and strips the ending `*/`
411/// And then quote the string, which is needed to convert to `tt::Literal`
412///
413/// Note that proc-macros desugar with string literals where as macro_rules macros desugar with raw string literals.
414pub fn desugar_doc_comment_text(text: &str, mode: DocCommentDesugarMode) -> (Symbol, tt::LitKind) {
415    match mode {
416        DocCommentDesugarMode::Mbe => {
417            let mut num_of_hashes = 0;
418            let mut count = 0;
419            for ch in text.chars() {
420                count = match ch {
421                    '"' => 1,
422                    '#' if count > 0 => count + 1,
423                    _ => 0,
424                };
425                num_of_hashes = num_of_hashes.max(count);
426            }
427
428            // Quote raw string with delimiters
429            (Symbol::intern(text), tt::LitKind::StrRaw(num_of_hashes))
430        }
431        // Quote string with delimiters
432        DocCommentDesugarMode::ProcMacro => {
433            (Symbol::intern(&format_smolstr!("{}", text.escape_debug())), tt::LitKind::Str)
434        }
435    }
436}
437
438fn convert_doc_comment<S: Copy>(
439    token: &syntax::SyntaxToken,
440    span: S,
441    mode: DocCommentDesugarMode,
442    builder: &mut tt::TopSubtreeBuilder<S>,
443) {
444    let Some(comment) = ast::Comment::cast(token.clone()) else { return };
445    let Some(doc) = comment.kind().doc else { return };
446
447    let mk_ident = |s: &str| {
448        tt::Leaf::from(tt::Ident { sym: Symbol::intern(s), span, is_raw: tt::IdentIsRaw::No })
449    };
450
451    let mk_punct =
452        |c: char| tt::Leaf::from(tt::Punct { char: c, spacing: tt::Spacing::Alone, span });
453
454    let mk_doc_literal = |comment: &ast::Comment| {
455        let prefix_len = comment.prefix().len();
456        let mut text = &comment.text()[prefix_len..];
457
458        // Remove ending "*/"
459        if comment.kind().shape == ast::CommentShape::Block {
460            text = &text[0..text.len() - 2];
461        }
462        let (text, kind) = desugar_doc_comment_text(text, mode);
463        let lit = tt::Literal { symbol: text, span, kind, suffix: None };
464
465        tt::Leaf::from(lit)
466    };
467
468    // Make `doc="\" Comments\""
469    let meta_tkns = [mk_ident("doc"), mk_punct('='), mk_doc_literal(&comment)];
470
471    // Make `#![]`
472    builder.push(mk_punct('#'));
473    if let ast::CommentPlacement::Inner = doc {
474        builder.push(mk_punct('!'));
475    }
476    builder.open(tt::DelimiterKind::Bracket, span);
477    builder.extend(meta_tkns);
478    builder.close(span);
479}
480
481/// A raw token (straight from lexer) converter
482struct RawConverter<'a, Ctx> {
483    lexed: parser::LexedStr<'a>,
484    pos: usize,
485    anchor: SpanAnchor,
486    ctx: Ctx,
487    mode: DocCommentDesugarMode,
488}
489/// A raw token (straight from lexer) converter that gives every token the same span.
490struct StaticRawConverter<'a, S> {
491    lexed: parser::LexedStr<'a>,
492    pos: usize,
493    span: S,
494    mode: DocCommentDesugarMode,
495}
496
497trait SrcToken<Ctx, S> {
498    fn kind(&self, ctx: &Ctx) -> SyntaxKind;
499
500    fn to_char(&self, ctx: &Ctx) -> Option<char>;
501
502    fn to_text(&self, ctx: &Ctx) -> SmolStr;
503
504    fn as_leaf(&self) -> Option<&tt::Leaf<S>> {
505        None
506    }
507}
508
509trait TokenConverter<S>: Sized {
510    type Token: SrcToken<Self, S>;
511
512    fn convert_doc_comment(
513        &self,
514        token: &Self::Token,
515        span: S,
516        builder: &mut tt::TopSubtreeBuilder<S>,
517    );
518
519    fn bump(&mut self) -> Option<(Self::Token, TextRange)>;
520
521    fn peek(&self) -> Option<Self::Token>;
522
523    fn span_for(&self, range: TextRange) -> S;
524
525    fn call_site(&self) -> S;
526}
527
528impl<S, Ctx> SrcToken<RawConverter<'_, Ctx>, S> for usize {
529    fn kind(&self, ctx: &RawConverter<'_, Ctx>) -> SyntaxKind {
530        ctx.lexed.kind(*self)
531    }
532
533    fn to_char(&self, ctx: &RawConverter<'_, Ctx>) -> Option<char> {
534        ctx.lexed.text(*self).chars().next()
535    }
536
537    fn to_text(&self, ctx: &RawConverter<'_, Ctx>) -> SmolStr {
538        ctx.lexed.text(*self).into()
539    }
540}
541
542impl<S: Copy> SrcToken<StaticRawConverter<'_, S>, S> for usize {
543    fn kind(&self, ctx: &StaticRawConverter<'_, S>) -> SyntaxKind {
544        ctx.lexed.kind(*self)
545    }
546
547    fn to_char(&self, ctx: &StaticRawConverter<'_, S>) -> Option<char> {
548        ctx.lexed.text(*self).chars().next()
549    }
550
551    fn to_text(&self, ctx: &StaticRawConverter<'_, S>) -> SmolStr {
552        ctx.lexed.text(*self).into()
553    }
554}
555
556impl<Ctx: Copy> TokenConverter<SpanData<Ctx>> for RawConverter<'_, Ctx>
557where
558    SpanData<Ctx>: Copy,
559{
560    type Token = usize;
561
562    fn convert_doc_comment(
563        &self,
564        &token: &usize,
565        span: SpanData<Ctx>,
566        builder: &mut tt::TopSubtreeBuilder<SpanData<Ctx>>,
567    ) {
568        let text = self.lexed.text(token);
569        convert_doc_comment(&doc_comment(text), span, self.mode, builder);
570    }
571
572    fn bump(&mut self) -> Option<(Self::Token, TextRange)> {
573        if self.pos == self.lexed.len() {
574            return None;
575        }
576        let token = self.pos;
577        self.pos += 1;
578        let range = self.lexed.text_range(token);
579        let range = TextRange::new(range.start.try_into().ok()?, range.end.try_into().ok()?);
580
581        Some((token, range))
582    }
583
584    fn peek(&self) -> Option<Self::Token> {
585        if self.pos == self.lexed.len() {
586            return None;
587        }
588        Some(self.pos)
589    }
590
591    fn span_for(&self, range: TextRange) -> SpanData<Ctx> {
592        SpanData { range, anchor: self.anchor, ctx: self.ctx }
593    }
594
595    fn call_site(&self) -> SpanData<Ctx> {
596        SpanData { range: TextRange::empty(0.into()), anchor: self.anchor, ctx: self.ctx }
597    }
598}
599
600impl<S> TokenConverter<S> for StaticRawConverter<'_, S>
601where
602    S: Copy,
603{
604    type Token = usize;
605
606    fn convert_doc_comment(&self, &token: &usize, span: S, builder: &mut tt::TopSubtreeBuilder<S>) {
607        let text = self.lexed.text(token);
608        convert_doc_comment(&doc_comment(text), span, self.mode, builder);
609    }
610
611    fn bump(&mut self) -> Option<(Self::Token, TextRange)> {
612        if self.pos == self.lexed.len() {
613            return None;
614        }
615        let token = self.pos;
616        self.pos += 1;
617        let range = self.lexed.text_range(token);
618        let range = TextRange::new(range.start.try_into().ok()?, range.end.try_into().ok()?);
619
620        Some((token, range))
621    }
622
623    fn peek(&self) -> Option<Self::Token> {
624        if self.pos == self.lexed.len() {
625            return None;
626        }
627        Some(self.pos)
628    }
629
630    fn span_for(&self, _: TextRange) -> S {
631        self.span
632    }
633
634    fn call_site(&self) -> S {
635        self.span
636    }
637}
638
639struct Converter<SpanMap, S, OnEvent> {
640    current: Option<SyntaxToken>,
641    current_leaves: VecDeque<tt::Leaf<S>>,
642    preorder: PreorderWithTokens,
643    range: TextRange,
644    punct_offset: Option<(SyntaxToken, TextSize)>,
645    /// Used to make the emitted text ranges in the spans relative to the span anchor.
646    map: SpanMap,
647    append: FxHashMap<SyntaxElement, Vec<tt::Leaf<S>>>,
648    remove: FxHashSet<SyntaxElement>,
649    call_site: S,
650    mode: DocCommentDesugarMode,
651    on_event: OnEvent,
652}
653
654impl<SpanMap, S, OnEvent> Converter<SpanMap, S, OnEvent>
655where
656    OnEvent: FnMut(&mut PreorderWithTokens, &WalkEvent<SyntaxElement>) -> (bool, Vec<tt::Leaf<S>>),
657{
658    fn new(
659        node: &SyntaxNode,
660        map: SpanMap,
661        append: FxHashMap<SyntaxElement, Vec<tt::Leaf<S>>>,
662        remove: FxHashSet<SyntaxElement>,
663        call_site: S,
664        mode: DocCommentDesugarMode,
665        on_enter: OnEvent,
666    ) -> Self {
667        let mut converter = Converter {
668            current: None,
669            preorder: node.preorder_with_tokens(),
670            range: node.text_range(),
671            punct_offset: None,
672            map,
673            append,
674            remove,
675            call_site,
676            current_leaves: VecDeque::new(),
677            mode,
678            on_event: on_enter,
679        };
680        converter.current = converter.next_token();
681        converter
682    }
683
684    fn next_token(&mut self) -> Option<SyntaxToken> {
685        while let Some(ev) = self.preorder.next() {
686            let (keep_event, insert_leaves) = (self.on_event)(&mut self.preorder, &ev);
687            self.current_leaves.extend(insert_leaves);
688            if !keep_event {
689                continue;
690            }
691            match ev {
692                WalkEvent::Enter(token) => {
693                    if self.remove.contains(&token) {
694                        match token {
695                            syntax::NodeOrToken::Token(_) => {
696                                continue;
697                            }
698                            node => {
699                                self.preorder.skip_subtree();
700                                if let Some(v) = self.append.remove(&node) {
701                                    self.current_leaves.extend(v);
702                                    continue;
703                                }
704                            }
705                        }
706                    } else if let syntax::NodeOrToken::Token(token) = token {
707                        return Some(token);
708                    }
709                }
710                WalkEvent::Leave(ele) => {
711                    if let Some(v) = self.append.remove(&ele) {
712                        self.current_leaves.extend(v);
713                        continue;
714                    }
715                }
716            }
717        }
718        None
719    }
720}
721
722#[derive(Debug)]
723enum SynToken<S> {
724    Ordinary(SyntaxToken),
725    Punct { token: SyntaxToken, offset: usize },
726    Leaf(tt::Leaf<S>),
727}
728
729impl<S> SynToken<S> {
730    fn token(&self) -> &SyntaxToken {
731        match self {
732            SynToken::Ordinary(it) | SynToken::Punct { token: it, offset: _ } => it,
733            SynToken::Leaf(_) => unreachable!(),
734        }
735    }
736}
737
738impl<SpanMap, S, OnEvent> SrcToken<Converter<SpanMap, S, OnEvent>, S> for SynToken<S> {
739    fn kind(&self, _ctx: &Converter<SpanMap, S, OnEvent>) -> SyntaxKind {
740        match self {
741            SynToken::Ordinary(token) => token.kind(),
742            SynToken::Punct { token, offset: i } => {
743                SyntaxKind::from_char(token.text().chars().nth(*i).unwrap()).unwrap()
744            }
745            SynToken::Leaf(_) => {
746                never!();
747                SyntaxKind::ERROR
748            }
749        }
750    }
751    fn to_char(&self, _ctx: &Converter<SpanMap, S, OnEvent>) -> Option<char> {
752        match self {
753            SynToken::Ordinary(_) => None,
754            SynToken::Punct { token: it, offset: i } => it.text().chars().nth(*i),
755            SynToken::Leaf(_) => None,
756        }
757    }
758    fn to_text(&self, _ctx: &Converter<SpanMap, S, OnEvent>) -> SmolStr {
759        match self {
760            SynToken::Ordinary(token) | SynToken::Punct { token, offset: _ } => token.text().into(),
761            SynToken::Leaf(_) => {
762                never!();
763                "".into()
764            }
765        }
766    }
767    fn as_leaf(&self) -> Option<&tt::Leaf<S>> {
768        match self {
769            SynToken::Ordinary(_) | SynToken::Punct { .. } => None,
770            SynToken::Leaf(it) => Some(it),
771        }
772    }
773}
774
775impl<S, SpanMap, OnEvent> TokenConverter<S> for Converter<SpanMap, S, OnEvent>
776where
777    S: Copy,
778    SpanMap: SpanMapper<S>,
779    OnEvent: FnMut(&mut PreorderWithTokens, &WalkEvent<SyntaxElement>) -> (bool, Vec<tt::Leaf<S>>),
780{
781    type Token = SynToken<S>;
782    fn convert_doc_comment(
783        &self,
784        token: &Self::Token,
785        span: S,
786        builder: &mut tt::TopSubtreeBuilder<S>,
787    ) {
788        convert_doc_comment(token.token(), span, self.mode, builder);
789    }
790
791    fn bump(&mut self) -> Option<(Self::Token, TextRange)> {
792        if let Some((punct, offset)) = self.punct_offset.clone()
793            && usize::from(offset) + 1 < punct.text().len()
794        {
795            let offset = offset + TextSize::of('.');
796            let range = punct.text_range();
797            self.punct_offset = Some((punct.clone(), offset));
798            let range = TextRange::at(range.start() + offset, TextSize::of('.'));
799            return Some((
800                SynToken::Punct { token: punct, offset: u32::from(offset) as usize },
801                range,
802            ));
803        }
804
805        if let Some(leaf) = self.current_leaves.pop_front() {
806            return Some((SynToken::Leaf(leaf), TextRange::empty(TextSize::new(0))));
807        }
808
809        let curr = self.current.clone()?;
810        if !self.range.contains_range(curr.text_range()) {
811            return None;
812        }
813
814        self.current = self.next_token();
815        let token = if curr.kind().is_punct() {
816            self.punct_offset = Some((curr.clone(), 0.into()));
817            let range = curr.text_range();
818            let range = TextRange::at(range.start(), TextSize::of('.'));
819            (SynToken::Punct { token: curr, offset: 0_usize }, range)
820        } else {
821            self.punct_offset = None;
822            let range = curr.text_range();
823            (SynToken::Ordinary(curr), range)
824        };
825
826        Some(token)
827    }
828
829    fn peek(&self) -> Option<Self::Token> {
830        if let Some((punct, mut offset)) = self.punct_offset.clone() {
831            offset += TextSize::of('.');
832            if usize::from(offset) < punct.text().len() {
833                return Some(SynToken::Punct { token: punct, offset: usize::from(offset) });
834            }
835        }
836
837        let curr = self.current.clone()?;
838        if !self.range.contains_range(curr.text_range()) {
839            return None;
840        }
841
842        let token = if curr.kind().is_punct() {
843            SynToken::Punct { token: curr, offset: 0_usize }
844        } else {
845            SynToken::Ordinary(curr)
846        };
847        Some(token)
848    }
849
850    fn span_for(&self, range: TextRange) -> S {
851        self.map.span_for(range)
852    }
853    fn call_site(&self) -> S {
854        self.call_site
855    }
856}
857
858struct TtTreeSink<'a, Ctx>
859where
860    SpanData<Ctx>: Copy,
861{
862    buf: String,
863    cursor: Cursor<'a, SpanData<Ctx>>,
864    text_pos: TextSize,
865    inner: SyntaxTreeBuilder,
866    token_map: SpanMap<Ctx>,
867}
868
869impl<'a, Ctx> TtTreeSink<'a, Ctx>
870where
871    SpanData<Ctx>: Copy,
872{
873    fn new(cursor: Cursor<'a, SpanData<Ctx>>) -> Self {
874        TtTreeSink {
875            buf: String::new(),
876            cursor,
877            text_pos: 0.into(),
878            inner: SyntaxTreeBuilder::default(),
879            token_map: SpanMap::empty(),
880        }
881    }
882
883    fn finish(mut self) -> (Parse<SyntaxNode>, SpanMap<Ctx>) {
884        self.token_map.finish();
885        (self.inner.finish(), self.token_map)
886    }
887}
888
889fn delim_to_str(d: tt::DelimiterKind, closing: bool) -> Option<&'static str> {
890    let texts = match d {
891        tt::DelimiterKind::Parenthesis => "()",
892        tt::DelimiterKind::Brace => "{}",
893        tt::DelimiterKind::Bracket => "[]",
894        tt::DelimiterKind::Invisible => return None,
895    };
896
897    let idx = closing as usize;
898    Some(&texts[idx..texts.len() - (1 - idx)])
899}
900
901impl<Ctx> TtTreeSink<'_, Ctx>
902where
903    SpanData<Ctx>: Copy + fmt::Debug,
904    Ctx: PartialEq,
905{
906    /// Parses a float literal as if it was a one to two name ref nodes with a dot inbetween.
907    /// This occurs when a float literal is used as a field access.
908    fn float_split(&mut self, has_pseudo_dot: bool) {
909        let (text, span) = match self.cursor.token_tree() {
910            Some(tt::TokenTree::Leaf(tt::Leaf::Literal(tt::Literal {
911                symbol: text,
912                span,
913                kind: tt::LitKind::Float,
914                suffix: _,
915            }))) => (text.as_str(), *span),
916            tt => unreachable!("{tt:?}"),
917        };
918        // FIXME: Span splitting
919        match text.split_once('.') {
920            Some((left, right)) => {
921                assert!(!left.is_empty());
922
923                self.inner.start_node(SyntaxKind::NAME_REF);
924                self.inner.token(SyntaxKind::INT_NUMBER, left);
925                self.inner.finish_node();
926                self.token_map.push(self.text_pos + TextSize::of(left), span);
927
928                // here we move the exit up, the original exit has been deleted in process
929                self.inner.finish_node();
930
931                self.inner.token(SyntaxKind::DOT, ".");
932                self.token_map.push(self.text_pos + TextSize::of(left) + TextSize::of("."), span);
933
934                if has_pseudo_dot {
935                    assert!(right.is_empty(), "{left}.{right}");
936                } else {
937                    assert!(!right.is_empty(), "{left}.{right}");
938                    self.inner.start_node(SyntaxKind::NAME_REF);
939                    self.inner.token(SyntaxKind::INT_NUMBER, right);
940                    self.token_map.push(self.text_pos + TextSize::of(text), span);
941                    self.inner.finish_node();
942
943                    // the parser creates an unbalanced start node, we are required to close it here
944                    self.inner.finish_node();
945                }
946                self.text_pos += TextSize::of(text);
947            }
948            None => unreachable!(),
949        }
950        self.cursor.bump();
951    }
952
953    fn token(&mut self, kind: SyntaxKind, mut n_tokens: u8) {
954        if kind == LIFETIME_IDENT {
955            n_tokens = 2;
956        }
957
958        let mut last_two = self.cursor.peek_two_leaves();
959        let mut combined_span = None;
960        'tokens: for _ in 0..n_tokens {
961            let tmp: u8;
962            if self.cursor.eof() {
963                break;
964            }
965            last_two = self.cursor.peek_two_leaves();
966            let (text, span) = loop {
967                break match self.cursor.token_tree() {
968                    Some(tt::TokenTree::Leaf(leaf)) => match leaf {
969                        tt::Leaf::Ident(ident) => {
970                            if ident.is_raw.yes() {
971                                self.buf.push_str("r#");
972                                self.text_pos += TextSize::of("r#");
973                            }
974                            let r = (ident.sym.as_str(), ident.span);
975                            self.cursor.bump();
976                            r
977                        }
978                        tt::Leaf::Punct(punct) => {
979                            assert!(punct.char.is_ascii());
980                            tmp = punct.char as u8;
981                            let r = (
982                                std::str::from_utf8(std::slice::from_ref(&tmp)).unwrap(),
983                                punct.span,
984                            );
985                            self.cursor.bump();
986                            r
987                        }
988                        tt::Leaf::Literal(lit) => {
989                            let buf_l = self.buf.len();
990                            format_to!(self.buf, "{lit}");
991                            debug_assert_ne!(self.buf.len() - buf_l, 0);
992                            self.text_pos += TextSize::new((self.buf.len() - buf_l) as u32);
993                            combined_span = match combined_span {
994                                None => Some(lit.span),
995                                Some(prev_span) => Some(Self::merge_spans(prev_span, lit.span)),
996                            };
997                            self.cursor.bump();
998                            continue 'tokens;
999                        }
1000                    },
1001                    Some(tt::TokenTree::Subtree(subtree)) => {
1002                        self.cursor.bump();
1003                        match delim_to_str(subtree.delimiter.kind, false) {
1004                            Some(it) => (it, subtree.delimiter.open),
1005                            None => continue,
1006                        }
1007                    }
1008                    None => {
1009                        let parent = self.cursor.end();
1010                        match delim_to_str(parent.delimiter.kind, true) {
1011                            Some(it) => (it, parent.delimiter.close),
1012                            None => continue,
1013                        }
1014                    }
1015                };
1016            };
1017            self.buf += text;
1018            self.text_pos += TextSize::of(text);
1019            combined_span = match combined_span {
1020                None => Some(span),
1021                Some(prev_span) => Some(Self::merge_spans(prev_span, span)),
1022            }
1023        }
1024
1025        self.token_map.push(self.text_pos, combined_span.expect("expected at least one token"));
1026        self.inner.token(kind, self.buf.as_str());
1027        self.buf.clear();
1028        // FIXME: Emitting whitespace for this is really just a hack, we should get rid of it.
1029        // Add whitespace between adjoint puncts
1030        if let Some([tt::Leaf::Punct(curr), tt::Leaf::Punct(next)]) = last_two {
1031            // Note: We always assume the semi-colon would be the last token in
1032            // other parts of RA such that we don't add whitespace here.
1033            //
1034            // When `next` is a `Punct` of `'`, that's a part of a lifetime identifier so we don't
1035            // need to add whitespace either.
1036            if curr.spacing == tt::Spacing::Alone && curr.char != ';' && next.char != '\'' {
1037                self.inner.token(WHITESPACE, " ");
1038                self.text_pos += TextSize::of(' ');
1039                self.token_map.push(self.text_pos, curr.span);
1040            }
1041        }
1042    }
1043
1044    fn start_node(&mut self, kind: SyntaxKind) {
1045        self.inner.start_node(kind);
1046    }
1047
1048    fn finish_node(&mut self) {
1049        self.inner.finish_node();
1050    }
1051
1052    fn error(&mut self, error: String) {
1053        self.inner.error(error, self.text_pos)
1054    }
1055
1056    fn merge_spans(a: SpanData<Ctx>, b: SpanData<Ctx>) -> SpanData<Ctx> {
1057        // We don't do what rustc does exactly, rustc does something clever when the spans have different syntax contexts
1058        // but this runs afoul of our separation between `span` and `hir-expand`.
1059        SpanData {
1060            range: if a.ctx == b.ctx && a.anchor == b.anchor {
1061                TextRange::new(
1062                    std::cmp::min(a.range.start(), b.range.start()),
1063                    std::cmp::max(a.range.end(), b.range.end()),
1064                )
1065            } else {
1066                // Combining ranges make no sense when they come from different syntax contexts.
1067                a.range
1068            },
1069            anchor: a.anchor,
1070            ctx: a.ctx,
1071        }
1072    }
1073}