xtask/codegen/
grammar.rs

1//! This module generates AST datatype used by rust-analyzer.
2//!
3//! Specifically, it generates the `SyntaxKind` enum and a number of newtype
4//! wrappers around `SyntaxNode` which implement `syntax::AstNode`.
5
6#![allow(clippy::disallowed_types)]
7
8use std::{
9    collections::{BTreeSet, HashSet},
10    fmt::Write,
11    fs,
12};
13
14use either::Either;
15use itertools::Itertools;
16use proc_macro2::{Punct, Spacing};
17use quote::{format_ident, quote};
18use stdx::panic_context;
19use ungrammar::{Grammar, Rule};
20
21use crate::{
22    codegen::{add_preamble, ensure_file_contents, grammar::ast_src::generate_kind_src, reformat},
23    project_root,
24};
25
26mod ast_src;
27use self::ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Cardinality, Field, KindsSrc};
28
29pub(crate) fn generate(check: bool) {
30    let grammar = fs::read_to_string(project_root().join("crates/syntax/rust.ungram"))
31        .unwrap()
32        .parse()
33        .unwrap();
34    let ast = lower(&grammar);
35    let kinds_src = generate_kind_src(&ast.nodes, &ast.enums, &grammar);
36
37    let syntax_kinds = generate_syntax_kinds(kinds_src);
38    let syntax_kinds_file = project_root().join("crates/parser/src/syntax_kind/generated.rs");
39    ensure_file_contents(
40        crate::flags::CodegenType::Grammar,
41        syntax_kinds_file.as_path(),
42        &syntax_kinds,
43        check,
44    );
45
46    let ast_tokens = generate_tokens(&ast);
47    let ast_tokens_file = project_root().join("crates/syntax/src/ast/generated/tokens.rs");
48    ensure_file_contents(
49        crate::flags::CodegenType::Grammar,
50        ast_tokens_file.as_path(),
51        &ast_tokens,
52        check,
53    );
54
55    let ast_nodes = generate_nodes(kinds_src, &ast);
56    let ast_nodes_file = project_root().join("crates/syntax/src/ast/generated/nodes.rs");
57    ensure_file_contents(
58        crate::flags::CodegenType::Grammar,
59        ast_nodes_file.as_path(),
60        &ast_nodes,
61        check,
62    );
63}
64
65fn generate_tokens(grammar: &AstSrc) -> String {
66    let tokens = grammar.tokens.iter().map(|token| {
67        let name = format_ident!("{}", token);
68        let kind = format_ident!("{}", to_upper_snake_case(token));
69        quote! {
70            pub struct #name {
71                pub(crate) syntax: SyntaxToken,
72            }
73            impl std::fmt::Display for #name {
74                fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
75                    std::fmt::Display::fmt(&self.syntax, f)
76                }
77            }
78            impl AstToken for #name {
79                fn can_cast(kind: SyntaxKind) -> bool { kind == #kind }
80                fn cast(syntax: SyntaxToken) -> Option<Self> {
81                    if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }
82                }
83                fn syntax(&self) -> &SyntaxToken { &self.syntax }
84            }
85
86            impl fmt::Debug for #name {
87                fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
88                    f.debug_struct(#token).field("syntax", &self.syntax).finish()
89                }
90            }
91            impl Clone for #name {
92                fn clone(&self) -> Self {
93                    Self { syntax: self.syntax.clone() }
94                }
95            }
96            impl hash::Hash for #name {
97                fn hash<H: hash::Hasher>(&self, state: &mut H) {
98                    self.syntax.hash(state);
99                }
100            }
101
102            impl Eq for #name {}
103            impl PartialEq for #name {
104                fn eq(&self, other: &Self) -> bool {
105                    self.syntax == other.syntax
106                }
107            }
108        }
109    });
110
111    add_preamble(
112        crate::flags::CodegenType::Grammar,
113        reformat(
114            quote! {
115                use std::{fmt, hash};
116
117                use crate::{SyntaxKind::{self, *}, SyntaxToken, ast::AstToken};
118
119                #(#tokens)*
120            }
121            .to_string(),
122        ),
123    )
124    .replace("#[derive", "\n#[derive")
125}
126
127fn generate_nodes(kinds: KindsSrc, grammar: &AstSrc) -> String {
128    let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar
129        .nodes
130        .iter()
131        .map(|node| {
132            let node_str_name = &node.name;
133            let name = format_ident!("{}", node.name);
134            let kind = format_ident!("{}", to_upper_snake_case(&node.name));
135            let traits = node
136                .traits
137                .iter()
138                .filter(|trait_name| {
139                    // Loops have two expressions so this might collide, therefore manual impl it
140                    node.name != "ForExpr" && node.name != "WhileExpr"
141                        || trait_name.as_str() != "HasLoopBody"
142                })
143                .map(|trait_name| {
144                    let trait_name = format_ident!("{}", trait_name);
145                    quote!(impl ast::#trait_name for #name {})
146                });
147
148            let methods = node.fields.iter().map(|field| {
149                let method_name = format_ident!("{}", field.method_name());
150                let ty = field.ty();
151
152                if field.is_many() {
153                    quote! {
154                        #[inline]
155                        pub fn #method_name(&self) -> AstChildren<#ty> {
156                            support::children(&self.syntax)
157                        }
158                    }
159                } else if let Some(token_kind) = field.token_kind() {
160                    quote! {
161                        #[inline]
162                        pub fn #method_name(&self) -> Option<#ty> {
163                            support::token(&self.syntax, #token_kind)
164                        }
165                    }
166                } else {
167                    quote! {
168                        #[inline]
169                        pub fn #method_name(&self) -> Option<#ty> {
170                            support::child(&self.syntax)
171                        }
172                    }
173                }
174            });
175            (
176                quote! {
177                    #[pretty_doc_comment_placeholder_workaround]
178                    pub struct #name {
179                        pub(crate) syntax: SyntaxNode,
180                    }
181
182                    #(#traits)*
183
184                    impl #name {
185                        #(#methods)*
186                    }
187                },
188                quote! {
189                    impl AstNode for #name {
190                        #[inline]
191                        fn kind() -> SyntaxKind
192                        where
193                            Self: Sized
194                        {
195                            #kind
196                        }
197                        #[inline]
198                        fn can_cast(kind: SyntaxKind) -> bool {
199                            kind == #kind
200                        }
201                        #[inline]
202                        fn cast(syntax: SyntaxNode) -> Option<Self> {
203                            if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }
204                        }
205                        #[inline]
206                        fn syntax(&self) -> &SyntaxNode { &self.syntax }
207                    }
208
209                    impl hash::Hash for #name {
210                        fn hash<H: hash::Hasher>(&self, state: &mut H) {
211                            self.syntax.hash(state);
212                        }
213                    }
214
215                    impl Eq for #name {}
216                    impl PartialEq for #name {
217                        fn eq(&self, other: &Self) -> bool {
218                            self.syntax == other.syntax
219                        }
220                    }
221
222                    impl Clone for #name {
223                        fn clone(&self) -> Self {
224                            Self { syntax: self.syntax.clone() }
225                        }
226                    }
227
228                    impl fmt::Debug for #name {
229                        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
230                            f.debug_struct(#node_str_name).field("syntax", &self.syntax).finish()
231                        }
232                    }
233                },
234            )
235        })
236        .unzip();
237
238    let (enum_defs, enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar
239        .enums
240        .iter()
241        .map(|en| {
242            let variants: Vec<_> =
243                en.variants.iter().map(|var| format_ident!("{}", var)).sorted().collect();
244            let name = format_ident!("{}", en.name);
245            let kinds: Vec<_> = variants
246                .iter()
247                .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))
248                .collect();
249            let traits = en.traits.iter().sorted().map(|trait_name| {
250                let trait_name = format_ident!("{}", trait_name);
251                quote!(impl ast::#trait_name for #name {})
252            });
253
254            let ast_node = if en.name == "Stmt" {
255                quote! {}
256            } else {
257                quote! {
258                    impl AstNode for #name {
259                        #[inline]
260                        fn can_cast(kind: SyntaxKind) -> bool {
261                            matches!(kind, #(#kinds)|*)
262                        }
263                        #[inline]
264                        fn cast(syntax: SyntaxNode) -> Option<Self> {
265                            let res = match syntax.kind() {
266                                #(
267                                #kinds => #name::#variants(#variants { syntax }),
268                                )*
269                                _ => return None,
270                            };
271                            Some(res)
272                        }
273                        #[inline]
274                        fn syntax(&self) -> &SyntaxNode {
275                            match self {
276                                #(
277                                #name::#variants(it) => &it.syntax,
278                                )*
279                            }
280                        }
281                    }
282                }
283            };
284
285            (
286                quote! {
287                    #[pretty_doc_comment_placeholder_workaround]
288                    #[derive(Debug, Clone, PartialEq, Eq, Hash)]
289                    pub enum #name {
290                        #(#variants(#variants),)*
291                    }
292
293                    #(#traits)*
294                },
295                quote! {
296                    #(
297                        impl From<#variants> for #name {
298                            #[inline]
299                            fn from(node: #variants) -> #name {
300                                #name::#variants(node)
301                            }
302                        }
303                    )*
304                    #ast_node
305                },
306            )
307        })
308        .unzip();
309    let (any_node_defs, any_node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar
310        .nodes
311        .iter()
312        .flat_map(|node| node.traits.iter().map(move |t| (t, node)))
313        .into_group_map()
314        .into_iter()
315        .sorted_by_key(|(name, _)| *name)
316        .map(|(trait_name, nodes)| {
317            let name = format_ident!("Any{}", trait_name);
318            let node_str_name = name.to_string();
319            let trait_name = format_ident!("{}", trait_name);
320            let kinds: Vec<_> = nodes
321                .iter()
322                .map(|name| format_ident!("{}", to_upper_snake_case(&name.name.to_string())))
323                .collect();
324            let nodes = nodes.iter().map(|node| format_ident!("{}", node.name));
325            (
326                quote! {
327                    #[pretty_doc_comment_placeholder_workaround]
328                    pub struct #name {
329                        pub(crate) syntax: SyntaxNode,
330                    }
331                    impl #name {
332                        #[inline]
333                        pub fn new<T: ast::#trait_name>(node: T) -> #name {
334                            #name {
335                                syntax: node.syntax().clone()
336                            }
337                        }
338                    }
339                },
340                quote! {
341                    impl ast::#trait_name for #name {}
342                    impl AstNode for #name {
343                        #[inline]
344                        fn can_cast(kind: SyntaxKind) -> bool {
345                            matches!(kind, #(#kinds)|*)
346                        }
347                        #[inline]
348                        fn cast(syntax: SyntaxNode) -> Option<Self> {
349                            Self::can_cast(syntax.kind()).then_some(#name { syntax })
350                        }
351                        #[inline]
352                        fn syntax(&self) -> &SyntaxNode {
353                            &self.syntax
354                        }
355                    }
356
357                    impl hash::Hash for #name {
358                        fn hash<H: hash::Hasher>(&self, state: &mut H) {
359                            self.syntax.hash(state);
360                        }
361                    }
362
363                    impl Eq for #name {}
364                    impl PartialEq for #name {
365                        fn eq(&self, other: &Self) -> bool {
366                            self.syntax == other.syntax
367                        }
368                    }
369
370                    impl Clone for #name {
371                        fn clone(&self) -> Self {
372                            Self { syntax: self.syntax.clone() }
373                        }
374                    }
375
376                    impl fmt::Debug for #name {
377                        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
378                            f.debug_struct(#node_str_name).field("syntax", &self.syntax).finish()
379                        }
380                    }
381
382                    #(
383                        impl From<#nodes> for #name {
384                            #[inline]
385                            fn from(node: #nodes) -> #name {
386                                #name { syntax: node.syntax }
387                            }
388                        }
389                    )*
390                },
391            )
392        })
393        .unzip();
394
395    let enum_names = grammar.enums.iter().map(|it| &it.name);
396    let node_names = grammar.nodes.iter().map(|it| &it.name);
397
398    let display_impls =
399        enum_names.chain(node_names.clone()).map(|it| format_ident!("{}", it)).map(|name| {
400            quote! {
401                impl std::fmt::Display for #name {
402                    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
403                        std::fmt::Display::fmt(self.syntax(), f)
404                    }
405                }
406            }
407        });
408
409    let defined_nodes: HashSet<_> = node_names.collect();
410
411    for node in kinds
412        .nodes
413        .iter()
414        .map(|kind| to_pascal_case(kind))
415        .filter(|name| !defined_nodes.iter().any(|&it| it == name))
416    {
417        eprintln!("Warning: node {node} not defined in AST source");
418        drop(node);
419    }
420
421    let ast = quote! {
422        #![allow(non_snake_case)]
423        use std::{fmt, hash};
424
425        use crate::{
426            SyntaxNode, SyntaxToken, SyntaxKind::{self, *},
427            ast::{self, AstNode, AstChildren, support},
428            T,
429        };
430
431        #(#node_defs)*
432        #(#enum_defs)*
433        #(#any_node_defs)*
434        #(#node_boilerplate_impls)*
435        #(#enum_boilerplate_impls)*
436        #(#any_node_boilerplate_impls)*
437        #(#display_impls)*
438    };
439
440    let ast = ast.to_string().replace("T ! [", "T![");
441
442    let mut res = String::with_capacity(ast.len() * 2);
443
444    let mut docs =
445        grammar.nodes.iter().map(|it| &it.doc).chain(grammar.enums.iter().map(|it| &it.doc));
446
447    for chunk in ast.split("# [pretty_doc_comment_placeholder_workaround] ") {
448        res.push_str(chunk);
449        if let Some(doc) = docs.next() {
450            write_doc_comment(doc, &mut res);
451        }
452    }
453
454    let res = add_preamble(crate::flags::CodegenType::Grammar, reformat(res));
455    res.replace("#[derive", "\n#[derive")
456}
457
458fn write_doc_comment(contents: &[String], dest: &mut String) {
459    for line in contents {
460        writeln!(dest, "///{line}").unwrap();
461    }
462}
463
464fn generate_syntax_kinds(grammar: KindsSrc) -> String {
465    let (single_byte_tokens_values, single_byte_tokens): (Vec<_>, Vec<_>) = grammar
466        .punct
467        .iter()
468        .filter(|(token, _name)| token.len() == 1)
469        .map(|(token, name)| (token.chars().next().unwrap(), format_ident!("{}", name)))
470        .unzip();
471
472    let punctuation_values = grammar.punct.iter().map(|(token, _name)| {
473        if "{}[]()".contains(token) {
474            let c = token.chars().next().unwrap();
475            quote! { #c }
476            // underscore is an identifier in the proc-macro api
477        } else if *token == "_" {
478            quote! { _ }
479        } else {
480            let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint));
481            quote! { #(#cs)* }
482        }
483    });
484    let punctuation =
485        grammar.punct.iter().map(|(_token, name)| format_ident!("{}", name)).collect::<Vec<_>>();
486    let punctuation_texts = grammar.punct.iter().map(|&(text, _name)| text);
487
488    let fmt_kw_as_variant = |&name| match name {
489        "Self" => format_ident!("SELF_TYPE_KW"),
490        name => format_ident!("{}_KW", to_upper_snake_case(name)),
491    };
492    let strict_keywords = grammar.keywords;
493    let strict_keywords_variants =
494        strict_keywords.iter().map(fmt_kw_as_variant).collect::<Vec<_>>();
495    let strict_keywords_tokens = strict_keywords.iter().map(|it| format_ident!("{it}"));
496
497    let edition_dependent_keywords_variants_match_arm = grammar
498        .edition_dependent_keywords
499        .iter()
500        .map(|(kw, ed)| {
501            let kw = fmt_kw_as_variant(kw);
502            quote! { #kw if #ed <= edition }
503        })
504        .collect::<Vec<_>>();
505    let edition_dependent_keywords_str_match_arm = grammar
506        .edition_dependent_keywords
507        .iter()
508        .map(|(kw, ed)| {
509            quote! { #kw if #ed <= edition }
510        })
511        .collect::<Vec<_>>();
512    let edition_dependent_keywords = grammar.edition_dependent_keywords.iter().map(|&(it, _)| it);
513    let edition_dependent_keywords_variants = grammar
514        .edition_dependent_keywords
515        .iter()
516        .map(|(kw, _)| fmt_kw_as_variant(kw))
517        .collect::<Vec<_>>();
518    let edition_dependent_keywords_tokens =
519        grammar.edition_dependent_keywords.iter().map(|(it, _)| format_ident!("{it}"));
520
521    let contextual_keywords = grammar.contextual_keywords;
522    let contextual_keywords_variants =
523        contextual_keywords.iter().map(fmt_kw_as_variant).collect::<Vec<_>>();
524    let contextual_keywords_tokens = contextual_keywords.iter().map(|it| format_ident!("{it}"));
525    let contextual_keywords_str_match_arm = grammar.contextual_keywords.iter().map(|kw| {
526        match grammar.edition_dependent_keywords.iter().find(|(ed_kw, _)| ed_kw == kw) {
527            Some((_, ed)) => quote! { #kw if edition < #ed },
528            None => quote! { #kw },
529        }
530    });
531    let contextual_keywords_variants_match_arm = grammar
532        .contextual_keywords
533        .iter()
534        .map(|kw_s| {
535            let kw = fmt_kw_as_variant(kw_s);
536            match grammar.edition_dependent_keywords.iter().find(|(ed_kw, _)| ed_kw == kw_s) {
537                Some((_, ed)) => quote! { #kw if edition < #ed },
538                None => quote! { #kw },
539            }
540        })
541        .collect::<Vec<_>>();
542
543    let non_strict_keyword_variants = contextual_keywords_variants
544        .iter()
545        .chain(edition_dependent_keywords_variants.iter())
546        .sorted()
547        .dedup()
548        .collect::<Vec<_>>();
549
550    let literals =
551        grammar.literals.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
552
553    let tokens = grammar.tokens.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
554
555    let nodes = grammar.nodes.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
556
557    let ast = quote! {
558        #![allow(bad_style, missing_docs, unreachable_pub)]
559        use crate::Edition;
560
561        /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`.
562        #[derive(Debug)]
563        #[repr(u16)]
564        pub enum SyntaxKind {
565            // Technical SyntaxKinds: they appear temporally during parsing,
566            // but never end up in the final tree
567            #[doc(hidden)]
568            TOMBSTONE,
569            #[doc(hidden)]
570            EOF,
571            #(#punctuation,)*
572            #(#strict_keywords_variants,)*
573            #(#non_strict_keyword_variants,)*
574            #(#literals,)*
575            #(#tokens,)*
576            #(#nodes,)*
577
578            // Technical kind so that we can cast from u16 safely
579            #[doc(hidden)]
580            __LAST,
581        }
582        use self::SyntaxKind::*;
583
584        impl SyntaxKind {
585            #[allow(unreachable_patterns)]
586            pub const fn text(self) -> &'static str {
587                match self {
588                    TOMBSTONE | EOF | __LAST
589                    #( | #literals )*
590                    #( | #nodes )*
591                    #( | #tokens )* => panic!("no text for these `SyntaxKind`s"),
592                    #( #punctuation => #punctuation_texts ,)*
593                    #( #strict_keywords_variants => #strict_keywords ,)*
594                    #( #contextual_keywords_variants => #contextual_keywords ,)*
595                    #( #edition_dependent_keywords_variants => #edition_dependent_keywords ,)*
596                }
597            }
598
599            /// Checks whether this syntax kind is a strict keyword for the given edition.
600            /// Strict keywords are identifiers that are always considered keywords.
601            pub fn is_strict_keyword(self, edition: Edition) -> bool {
602                matches!(self, #(#strict_keywords_variants)|*)
603                || match self {
604                    #(#edition_dependent_keywords_variants_match_arm => true,)*
605                    _ => false,
606                }
607            }
608
609            /// Checks whether this syntax kind is a weak keyword for the given edition.
610            /// Weak keywords are identifiers that are considered keywords only in certain contexts.
611            pub fn is_contextual_keyword(self, edition: Edition) -> bool {
612                match self {
613                    #(#contextual_keywords_variants_match_arm => true,)*
614                    _ => false,
615                }
616            }
617
618            /// Checks whether this syntax kind is a strict or weak keyword for the given edition.
619            pub fn is_keyword(self, edition: Edition) -> bool {
620                matches!(self, #(#strict_keywords_variants)|*)
621                || match self {
622                    #(#edition_dependent_keywords_variants_match_arm => true,)*
623                    #(#contextual_keywords_variants_match_arm => true,)*
624                    _ => false,
625                }
626            }
627
628            pub fn is_punct(self) -> bool {
629                matches!(self, #(#punctuation)|*)
630            }
631
632            pub fn is_literal(self) -> bool {
633                matches!(self, #(#literals)|*)
634            }
635
636            pub fn from_keyword(ident: &str, edition: Edition) -> Option<SyntaxKind> {
637                let kw = match ident {
638                    #(#strict_keywords => #strict_keywords_variants,)*
639                    #(#edition_dependent_keywords_str_match_arm => #edition_dependent_keywords_variants,)*
640                    _ => return None,
641                };
642                Some(kw)
643            }
644
645            pub fn from_contextual_keyword(ident: &str, edition: Edition) -> Option<SyntaxKind> {
646                let kw = match ident {
647                    #(#contextual_keywords_str_match_arm => #contextual_keywords_variants,)*
648                    _ => return None,
649                };
650                Some(kw)
651            }
652
653            pub fn from_char(c: char) -> Option<SyntaxKind> {
654                let tok = match c {
655                    #(#single_byte_tokens_values => #single_byte_tokens,)*
656                    _ => return None,
657                };
658                Some(tok)
659            }
660        }
661
662        #[macro_export]
663        macro_rules! T_ {
664            #([#punctuation_values] => { $crate::SyntaxKind::#punctuation };)*
665            #([#strict_keywords_tokens] => { $crate::SyntaxKind::#strict_keywords_variants };)*
666            #([#contextual_keywords_tokens] => { $crate::SyntaxKind::#contextual_keywords_variants };)*
667            #([#edition_dependent_keywords_tokens] => { $crate::SyntaxKind::#edition_dependent_keywords_variants };)*
668            [lifetime_ident] => { $crate::SyntaxKind::LIFETIME_IDENT };
669            [int_number] => { $crate::SyntaxKind::INT_NUMBER };
670            [ident] => { $crate::SyntaxKind::IDENT };
671            [string] => { $crate::SyntaxKind::STRING };
672            [shebang] => { $crate::SyntaxKind::SHEBANG };
673            [frontmatter] => { $crate::SyntaxKind::FRONTMATTER };
674        }
675
676        impl ::core::marker::Copy for SyntaxKind {}
677        impl ::core::clone::Clone for SyntaxKind {
678            #[inline]
679            fn clone(&self) -> Self {
680                *self
681            }
682        }
683        impl ::core::cmp::PartialEq for SyntaxKind {
684            #[inline]
685            fn eq(&self, other: &Self) -> bool {
686                (*self as u16) == (*other as u16)
687            }
688        }
689        impl ::core::cmp::Eq for SyntaxKind {}
690        impl ::core::cmp::PartialOrd for SyntaxKind {
691            #[inline]
692            fn partial_cmp(&self, other: &Self) -> core::option::Option<core::cmp::Ordering> {
693                Some(self.cmp(other))
694            }
695        }
696        impl ::core::cmp::Ord for SyntaxKind {
697            #[inline]
698            fn cmp(&self, other: &Self) -> core::cmp::Ordering {
699                (*self as u16).cmp(&(*other as u16))
700            }
701        }
702        impl ::core::hash::Hash for SyntaxKind {
703            fn hash<H: ::core::hash::Hasher>(&self, state: &mut H) {
704                ::core::mem::discriminant(self).hash(state);
705            }
706        }
707    };
708
709    let result = add_preamble(crate::flags::CodegenType::Grammar, reformat(ast.to_string()));
710
711    if let Some(start) = result.find("macro_rules ! T_")
712        && let Some(macro_end) = result[start..].find("\nimpl ::core::marker::Copy")
713    {
714        let macro_section = &result[start..start + macro_end];
715        let formatted_macro = macro_section
716            .replace("T_ { [", "T_ {\n    [")
717            .replace(" ; [", ";\n    [")
718            .replace(" ; }", ";\n}")
719            .trim_end()
720            .to_owned()
721            + "\n";
722        return result.replace(macro_section, &formatted_macro);
723    }
724
725    result
726}
727
728fn to_upper_snake_case(s: &str) -> String {
729    let mut buf = String::with_capacity(s.len());
730    let mut prev = false;
731    for c in s.chars() {
732        if c.is_ascii_uppercase() && prev {
733            buf.push('_')
734        }
735        prev = true;
736
737        buf.push(c.to_ascii_uppercase());
738    }
739    buf
740}
741
742fn to_lower_snake_case(s: &str) -> String {
743    let mut buf = String::with_capacity(s.len());
744    let mut prev = false;
745    for c in s.chars() {
746        if c.is_ascii_uppercase() && prev {
747            buf.push('_')
748        }
749        prev = true;
750
751        buf.push(c.to_ascii_lowercase());
752    }
753    buf
754}
755
756fn to_pascal_case(s: &str) -> String {
757    let mut buf = String::with_capacity(s.len());
758    let mut prev_is_underscore = true;
759    for c in s.chars() {
760        if c == '_' {
761            prev_is_underscore = true;
762        } else if prev_is_underscore {
763            buf.push(c.to_ascii_uppercase());
764            prev_is_underscore = false;
765        } else {
766            buf.push(c.to_ascii_lowercase());
767        }
768    }
769    buf
770}
771
772fn pluralize(s: &str) -> String {
773    format!("{s}s")
774}
775
776impl Field {
777    fn is_many(&self) -> bool {
778        matches!(self, Field::Node { cardinality: Cardinality::Many, .. })
779    }
780    fn token_kind(&self) -> Option<proc_macro2::TokenStream> {
781        match self {
782            Field::Token(token) => {
783                let token: proc_macro2::TokenStream = token.parse().unwrap();
784                Some(quote! { T![#token] })
785            }
786            _ => None,
787        }
788    }
789    fn method_name(&self) -> String {
790        match self {
791            Field::Token(name) => {
792                let name = match name.as_str() {
793                    ";" => "semicolon",
794                    "->" => "thin_arrow",
795                    "'{'" => "l_curly",
796                    "'}'" => "r_curly",
797                    "'('" => "l_paren",
798                    "')'" => "r_paren",
799                    "'['" => "l_brack",
800                    "']'" => "r_brack",
801                    "<" => "l_angle",
802                    ">" => "r_angle",
803                    "=" => "eq",
804                    "!" => "excl",
805                    "*" => "star",
806                    "&" => "amp",
807                    "-" => "minus",
808                    "_" => "underscore",
809                    "." => "dot",
810                    ".." => "dotdot",
811                    "..." => "dotdotdot",
812                    "..=" => "dotdoteq",
813                    "=>" => "fat_arrow",
814                    "@" => "at",
815                    ":" => "colon",
816                    "::" => "coloncolon",
817                    "#" => "pound",
818                    "?" => "question_mark",
819                    "," => "comma",
820                    "|" => "pipe",
821                    "~" => "tilde",
822                    _ => name,
823                };
824                format!("{name}_token",)
825            }
826            Field::Node { name, .. } => {
827                if name == "type" {
828                    String::from("ty")
829                } else {
830                    name.to_owned()
831                }
832            }
833        }
834    }
835    fn ty(&self) -> proc_macro2::Ident {
836        match self {
837            Field::Token(_) => format_ident!("SyntaxToken"),
838            Field::Node { ty, .. } => format_ident!("{}", ty),
839        }
840    }
841}
842
843fn clean_token_name(name: &str) -> String {
844    let cleaned = name.trim_start_matches(['@', '#', '?']);
845    if cleaned.is_empty() { name.to_owned() } else { cleaned.to_owned() }
846}
847
848fn lower(grammar: &Grammar) -> AstSrc {
849    let mut res = AstSrc {
850        tokens:
851            "Whitespace Comment String ByteString CString IntNumber FloatNumber Char Byte Ident"
852                .split_ascii_whitespace()
853                .map(|it| it.to_owned())
854                .collect::<Vec<_>>(),
855        ..Default::default()
856    };
857
858    let nodes = grammar.iter().collect::<Vec<_>>();
859
860    for &node in &nodes {
861        let name = grammar[node].name.clone();
862        let rule = &grammar[node].rule;
863        let _g = panic_context::enter(name.clone());
864        match lower_enum(grammar, rule) {
865            Some(variants) => {
866                let enum_src = AstEnumSrc { doc: Vec::new(), name, traits: Vec::new(), variants };
867                res.enums.push(enum_src);
868            }
869            None => {
870                let mut fields = Vec::new();
871                lower_rule(&mut fields, grammar, None, rule);
872                res.nodes.push(AstNodeSrc { doc: Vec::new(), name, traits: Vec::new(), fields });
873            }
874        }
875    }
876
877    deduplicate_fields(&mut res);
878    extract_enums(&mut res);
879    extract_struct_traits(&mut res);
880    extract_enum_traits(&mut res);
881    res.nodes.sort_by_key(|it| it.name.clone());
882    res.enums.sort_by_key(|it| it.name.clone());
883    res.tokens.sort();
884    res.nodes.iter_mut().for_each(|it| {
885        it.traits.sort();
886        it.fields.sort_by_key(|it| match it {
887            Field::Token(name) => (true, name.clone()),
888            Field::Node { name, .. } => (false, name.clone()),
889        });
890    });
891    res.enums.iter_mut().for_each(|it| {
892        it.traits.sort();
893        it.variants.sort();
894    });
895    res
896}
897
898fn lower_enum(grammar: &Grammar, rule: &Rule) -> Option<Vec<String>> {
899    let alternatives = match rule {
900        Rule::Alt(it) => it,
901        _ => return None,
902    };
903    let mut variants = Vec::new();
904    for alternative in alternatives {
905        match alternative {
906            Rule::Node(it) => variants.push(grammar[*it].name.clone()),
907            Rule::Token(it) if grammar[*it].name == ";" => (),
908            _ => return None,
909        }
910    }
911    Some(variants)
912}
913
914fn lower_rule(acc: &mut Vec<Field>, grammar: &Grammar, label: Option<&String>, rule: &Rule) {
915    if lower_separated_list(acc, grammar, label, rule) {
916        return;
917    }
918
919    match rule {
920        Rule::Node(node) => {
921            let ty = grammar[*node].name.clone();
922            let name = label.cloned().unwrap_or_else(|| to_lower_snake_case(&ty));
923            let field = Field::Node { name, ty, cardinality: Cardinality::Optional };
924            acc.push(field);
925        }
926        Rule::Token(token) => {
927            assert!(label.is_none());
928            let mut name = clean_token_name(&grammar[*token].name);
929            if "[]{}()".contains(&name) {
930                name = format!("'{name}'");
931            }
932            let field = Field::Token(name);
933            acc.push(field);
934        }
935        Rule::Rep(inner) => {
936            if let Rule::Node(node) = &**inner {
937                let ty = grammar[*node].name.clone();
938                let name = label.cloned().unwrap_or_else(|| pluralize(&to_lower_snake_case(&ty)));
939                let field = Field::Node { name, ty, cardinality: Cardinality::Many };
940                acc.push(field);
941                return;
942            }
943            panic!("unhandled rule: {rule:?}")
944        }
945        Rule::Labeled { label: l, rule } => {
946            assert!(label.is_none());
947            let manually_implemented = matches!(
948                l.as_str(),
949                "lhs"
950                    | "rhs"
951                    | "then_branch"
952                    | "else_branch"
953                    | "start"
954                    | "end"
955                    | "op"
956                    | "index"
957                    | "base"
958                    | "value"
959                    | "trait"
960                    | "self_ty"
961                    | "iterable"
962                    | "condition"
963                    | "args"
964                    | "body"
965            );
966            if manually_implemented {
967                return;
968            }
969            lower_rule(acc, grammar, Some(l), rule);
970        }
971        Rule::Seq(rules) | Rule::Alt(rules) => {
972            for rule in rules {
973                lower_rule(acc, grammar, label, rule)
974            }
975        }
976        Rule::Opt(rule) => lower_rule(acc, grammar, label, rule),
977    }
978}
979
980// (T (',' T)* ','?)
981fn lower_separated_list(
982    acc: &mut Vec<Field>,
983    grammar: &Grammar,
984    label: Option<&String>,
985    rule: &Rule,
986) -> bool {
987    let rule = match rule {
988        Rule::Seq(it) => it,
989        _ => return false,
990    };
991
992    let (nt, repeat, trailing_sep) = match rule.as_slice() {
993        [Rule::Node(node), Rule::Rep(repeat), Rule::Opt(trailing_sep)] => {
994            (Either::Left(node), repeat, Some(trailing_sep))
995        }
996        [Rule::Node(node), Rule::Rep(repeat)] => (Either::Left(node), repeat, None),
997        [Rule::Token(token), Rule::Rep(repeat), Rule::Opt(trailing_sep)] => {
998            (Either::Right(token), repeat, Some(trailing_sep))
999        }
1000        [Rule::Token(token), Rule::Rep(repeat)] => (Either::Right(token), repeat, None),
1001        _ => return false,
1002    };
1003    let repeat = match &**repeat {
1004        Rule::Seq(it) => it,
1005        _ => return false,
1006    };
1007    if !matches!(
1008        repeat.as_slice(),
1009        [comma, nt_]
1010            if trailing_sep.is_none_or(|it| comma == &**it) && match (nt, nt_) {
1011                (Either::Left(node), Rule::Node(nt_)) => node == nt_,
1012                (Either::Right(token), Rule::Token(nt_)) => token == nt_,
1013                _ => false,
1014            }
1015    ) {
1016        return false;
1017    }
1018    match nt {
1019        Either::Right(token) => {
1020            let name = clean_token_name(&grammar[*token].name);
1021            let field = Field::Token(name);
1022            acc.push(field);
1023        }
1024        Either::Left(node) => {
1025            let ty = grammar[*node].name.clone();
1026            let name = label.cloned().unwrap_or_else(|| pluralize(&to_lower_snake_case(&ty)));
1027            let field = Field::Node { name, ty, cardinality: Cardinality::Many };
1028            acc.push(field);
1029        }
1030    }
1031    true
1032}
1033
1034fn deduplicate_fields(ast: &mut AstSrc) {
1035    for node in &mut ast.nodes {
1036        let mut i = 0;
1037        'outer: while i < node.fields.len() {
1038            for j in 0..i {
1039                let f1 = &node.fields[i];
1040                let f2 = &node.fields[j];
1041                if f1 == f2 {
1042                    node.fields.remove(i);
1043                    continue 'outer;
1044                }
1045            }
1046            i += 1;
1047        }
1048    }
1049}
1050
1051fn extract_enums(ast: &mut AstSrc) {
1052    for node in &mut ast.nodes {
1053        for enm in &ast.enums {
1054            let mut to_remove = Vec::new();
1055            for (i, field) in node.fields.iter().enumerate() {
1056                let ty = field.ty().to_string();
1057                if enm.variants.iter().any(|it| it == &ty) {
1058                    to_remove.push(i);
1059                }
1060            }
1061            if to_remove.len() == enm.variants.len() {
1062                node.remove_field(to_remove);
1063                let ty = enm.name.clone();
1064                let name = to_lower_snake_case(&ty);
1065                node.fields.push(Field::Node { name, ty, cardinality: Cardinality::Optional });
1066            }
1067        }
1068    }
1069}
1070
1071const TRAITS: &[(&str, &[&str])] = &[
1072    ("HasAttrs", &["attrs"]),
1073    ("HasName", &["name"]),
1074    ("HasVisibility", &["visibility"]),
1075    ("HasGenericParams", &["generic_param_list", "where_clause"]),
1076    ("HasGenericArgs", &["generic_arg_list"]),
1077    ("HasTypeBounds", &["type_bound_list", "colon_token"]),
1078    ("HasModuleItem", &["items"]),
1079    ("HasLoopBody", &["label", "loop_body"]),
1080    ("HasArgList", &["arg_list"]),
1081];
1082
1083fn extract_struct_traits(ast: &mut AstSrc) {
1084    for node in &mut ast.nodes {
1085        for (name, methods) in TRAITS {
1086            extract_struct_trait(node, name, methods);
1087        }
1088    }
1089
1090    let nodes_with_doc_comments = [
1091        "SourceFile",
1092        "Fn",
1093        "Struct",
1094        "Union",
1095        "RecordField",
1096        "TupleField",
1097        "Enum",
1098        "Variant",
1099        "Trait",
1100        "Module",
1101        "Static",
1102        "Const",
1103        "TypeAlias",
1104        "Impl",
1105        "ExternBlock",
1106        "ExternCrate",
1107        "MacroCall",
1108        "MacroRules",
1109        "MacroDef",
1110        "Use",
1111    ];
1112
1113    for node in &mut ast.nodes {
1114        if nodes_with_doc_comments.contains(&&*node.name) {
1115            node.traits.push("HasDocComments".into());
1116        }
1117    }
1118}
1119
1120fn extract_struct_trait(node: &mut AstNodeSrc, trait_name: &str, methods: &[&str]) {
1121    let mut to_remove = Vec::new();
1122    for (i, field) in node.fields.iter().enumerate() {
1123        let method_name = field.method_name();
1124        if methods.iter().any(|&it| it == method_name) {
1125            to_remove.push(i);
1126        }
1127    }
1128    if to_remove.len() == methods.len() {
1129        node.traits.push(trait_name.to_owned());
1130        node.remove_field(to_remove);
1131    }
1132}
1133
1134fn extract_enum_traits(ast: &mut AstSrc) {
1135    for enm in &mut ast.enums {
1136        if enm.name == "Stmt" {
1137            continue;
1138        }
1139        let nodes = &ast.nodes;
1140        let mut variant_traits = enm
1141            .variants
1142            .iter()
1143            .map(|var| nodes.iter().find(|it| &it.name == var).unwrap())
1144            .map(|node| node.traits.iter().cloned().collect::<BTreeSet<_>>());
1145
1146        let mut enum_traits = match variant_traits.next() {
1147            Some(it) => it,
1148            None => continue,
1149        };
1150        for traits in variant_traits {
1151            enum_traits = enum_traits.intersection(&traits).cloned().collect();
1152        }
1153        enm.traits = enum_traits.into_iter().collect();
1154    }
1155}
1156
1157impl AstNodeSrc {
1158    fn remove_field(&mut self, to_remove: Vec<usize>) {
1159        to_remove.into_iter().rev().for_each(|idx| {
1160            self.fields.remove(idx);
1161        });
1162    }
1163}
1164
1165#[test]
1166fn test() {
1167    generate(true);
1168}