Skip to main content

xtask/codegen/
grammar.rs

1//! This module generates AST datatype used by rust-analyzer.
2//!
3//! Specifically, it generates the `SyntaxKind` enum and a number of newtype
4//! wrappers around `SyntaxNode` which implement `syntax::AstNode`.
5
6#![allow(clippy::disallowed_types)]
7
8use std::{
9    collections::{BTreeSet, HashSet},
10    fmt::Write,
11    fs,
12};
13
14use either::Either;
15use itertools::Itertools;
16use proc_macro2::{Punct, Spacing};
17use quote::{format_ident, quote};
18use stdx::panic_context;
19use ungrammar::{Grammar, Rule};
20
21use crate::{
22    codegen::{add_preamble, ensure_file_contents, grammar::ast_src::generate_kind_src, reformat},
23    project_root,
24};
25
26mod ast_src;
27use self::ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Cardinality, Field, KindsSrc};
28
29pub(crate) fn generate(check: bool) {
30    let grammar = fs::read_to_string(project_root().join("crates/syntax/rust.ungram"))
31        .unwrap()
32        .parse()
33        .unwrap();
34    let ast = lower(&grammar);
35    let kinds_src = generate_kind_src(&ast.nodes, &ast.enums, &grammar);
36
37    let syntax_kinds = generate_syntax_kinds(kinds_src);
38    let syntax_kinds_file = project_root().join("crates/parser/src/syntax_kind/generated.rs");
39    ensure_file_contents(
40        crate::flags::CodegenType::Grammar,
41        syntax_kinds_file.as_path(),
42        &syntax_kinds,
43        check,
44    );
45
46    let ast_tokens = generate_tokens(&ast);
47    let ast_tokens_file = project_root().join("crates/syntax/src/ast/generated/tokens.rs");
48    ensure_file_contents(
49        crate::flags::CodegenType::Grammar,
50        ast_tokens_file.as_path(),
51        &ast_tokens,
52        check,
53    );
54
55    let ast_nodes = generate_nodes(kinds_src, &ast);
56    let ast_nodes_file = project_root().join("crates/syntax/src/ast/generated/nodes.rs");
57    ensure_file_contents(
58        crate::flags::CodegenType::Grammar,
59        ast_nodes_file.as_path(),
60        &ast_nodes,
61        check,
62    );
63}
64
65fn generate_tokens(grammar: &AstSrc) -> String {
66    let tokens = grammar.tokens.iter().map(|token| {
67        let name = format_ident!("{}", token);
68        let kind = format_ident!("{}", to_upper_snake_case(token));
69        quote! {
70            pub struct #name {
71                pub(crate) syntax: SyntaxToken,
72            }
73            impl std::fmt::Display for #name {
74                fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
75                    std::fmt::Display::fmt(&self.syntax, f)
76                }
77            }
78            impl AstToken for #name {
79                fn can_cast(kind: SyntaxKind) -> bool { kind == #kind }
80                fn cast(syntax: SyntaxToken) -> Option<Self> {
81                    if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }
82                }
83                fn syntax(&self) -> &SyntaxToken { &self.syntax }
84            }
85
86            impl fmt::Debug for #name {
87                fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
88                    f.debug_struct(#token).field("syntax", &self.syntax).finish()
89                }
90            }
91            impl Clone for #name {
92                fn clone(&self) -> Self {
93                    Self { syntax: self.syntax.clone() }
94                }
95            }
96            impl hash::Hash for #name {
97                fn hash<H: hash::Hasher>(&self, state: &mut H) {
98                    self.syntax.hash(state);
99                }
100            }
101
102            impl Eq for #name {}
103            impl PartialEq for #name {
104                fn eq(&self, other: &Self) -> bool {
105                    self.syntax == other.syntax
106                }
107            }
108        }
109    });
110
111    add_preamble(
112        crate::flags::CodegenType::Grammar,
113        reformat(
114            quote! {
115                use std::{fmt, hash};
116
117                use crate::{SyntaxKind::{self, *}, SyntaxToken, ast::AstToken};
118
119                #(#tokens)*
120            }
121            .to_string(),
122        ),
123    )
124    .replace("#[derive", "\n#[derive")
125}
126
127fn generate_nodes(kinds: KindsSrc, grammar: &AstSrc) -> String {
128    let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar
129        .nodes
130        .iter()
131        .map(|node| {
132            let node_str_name = &node.name;
133            let name = format_ident!("{}", node.name);
134            let kind = format_ident!("{}", to_upper_snake_case(&node.name));
135            let traits = node
136                .traits
137                .iter()
138                .filter(|trait_name| {
139                    // Loops have two expressions so this might collide, therefore manual impl it
140                    node.name != "ForExpr" && node.name != "WhileExpr"
141                        || trait_name.as_str() != "HasLoopBody"
142                })
143                .map(|trait_name| {
144                    let trait_name = format_ident!("{}", trait_name);
145                    quote!(impl ast::#trait_name for #name {})
146                });
147
148            let methods = node.fields.iter().map(|field| {
149                let method_name = format_ident!("{}", field.method_name());
150                let ty = field.ty();
151
152                if field.is_many() {
153                    quote! {
154                        #[inline]
155                        pub fn #method_name(&self) -> AstChildren<#ty> {
156                            support::children(&self.syntax)
157                        }
158                    }
159                } else if let Some(token_kind) = field.token_kind() {
160                    quote! {
161                        #[inline]
162                        pub fn #method_name(&self) -> Option<#ty> {
163                            support::token(&self.syntax, #token_kind)
164                        }
165                    }
166                } else {
167                    quote! {
168                        #[inline]
169                        pub fn #method_name(&self) -> Option<#ty> {
170                            support::child(&self.syntax)
171                        }
172                    }
173                }
174            });
175            (
176                quote! {
177                    #[pretty_doc_comment_placeholder_workaround]
178                    pub struct #name {
179                        pub(crate) syntax: SyntaxNode,
180                    }
181
182                    #(#traits)*
183
184                    impl #name {
185                        #(#methods)*
186                    }
187                },
188                quote! {
189                    impl AstNode for #name {
190                        #[inline]
191                        fn kind() -> SyntaxKind
192                        where
193                            Self: Sized
194                        {
195                            #kind
196                        }
197                        #[inline]
198                        fn can_cast(kind: SyntaxKind) -> bool {
199                            kind == #kind
200                        }
201                        #[inline]
202                        fn cast(syntax: SyntaxNode) -> Option<Self> {
203                            if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }
204                        }
205                        #[inline]
206                        fn syntax(&self) -> &SyntaxNode { &self.syntax }
207                    }
208
209                    impl hash::Hash for #name {
210                        fn hash<H: hash::Hasher>(&self, state: &mut H) {
211                            self.syntax.hash(state);
212                        }
213                    }
214
215                    impl Eq for #name {}
216                    impl PartialEq for #name {
217                        fn eq(&self, other: &Self) -> bool {
218                            self.syntax == other.syntax
219                        }
220                    }
221
222                    impl Clone for #name {
223                        fn clone(&self) -> Self {
224                            Self { syntax: self.syntax.clone() }
225                        }
226                    }
227
228                    impl fmt::Debug for #name {
229                        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
230                            f.debug_struct(#node_str_name).field("syntax", &self.syntax).finish()
231                        }
232                    }
233                },
234            )
235        })
236        .unzip();
237
238    let (enum_defs, enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar
239        .enums
240        .iter()
241        .map(|en| {
242            let variants: Vec<_> =
243                en.variants.iter().map(|var| format_ident!("{}", var)).sorted().collect();
244            let name = format_ident!("{}", en.name);
245            let kinds: Vec<_> = variants
246                .iter()
247                .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))
248                .collect();
249            let traits = en.traits.iter().sorted().map(|trait_name| {
250                let trait_name = format_ident!("{}", trait_name);
251                quote!(impl ast::#trait_name for #name {})
252            });
253
254            let ast_node = if en.name == "Stmt" {
255                quote! {}
256            } else {
257                quote! {
258                    impl AstNode for #name {
259                        #[inline]
260                        fn can_cast(kind: SyntaxKind) -> bool {
261                            matches!(kind, #(#kinds)|*)
262                        }
263                        #[inline]
264                        fn cast(syntax: SyntaxNode) -> Option<Self> {
265                            let res = match syntax.kind() {
266                                #(
267                                #kinds => #name::#variants(#variants { syntax }),
268                                )*
269                                _ => return None,
270                            };
271                            Some(res)
272                        }
273                        #[inline]
274                        fn syntax(&self) -> &SyntaxNode {
275                            match self {
276                                #(
277                                #name::#variants(it) => &it.syntax,
278                                )*
279                            }
280                        }
281                    }
282                }
283            };
284
285            (
286                quote! {
287                    #[pretty_doc_comment_placeholder_workaround]
288                    #[derive(Debug, Clone, PartialEq, Eq, Hash)]
289                    pub enum #name {
290                        #(#variants(#variants),)*
291                    }
292
293                    #(#traits)*
294                },
295                quote! {
296                    #(
297                        impl From<#variants> for #name {
298                            #[inline]
299                            fn from(node: #variants) -> #name {
300                                #name::#variants(node)
301                            }
302                        }
303                    )*
304                    #ast_node
305                },
306            )
307        })
308        .unzip();
309    let (any_node_defs, any_node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar
310        .nodes
311        .iter()
312        .flat_map(|node| node.traits.iter().map(move |t| (t, node)))
313        .into_group_map()
314        .into_iter()
315        .sorted_by_key(|(name, _)| *name)
316        .map(|(trait_name, nodes)| {
317            let name = format_ident!("Any{}", trait_name);
318            let node_str_name = name.to_string();
319            let trait_name = format_ident!("{}", trait_name);
320            let kinds: Vec<_> = nodes
321                .iter()
322                .map(|name| format_ident!("{}", to_upper_snake_case(&name.name.to_string())))
323                .collect();
324            let nodes = nodes.iter().map(|node| format_ident!("{}", node.name));
325            (
326                quote! {
327                    #[pretty_doc_comment_placeholder_workaround]
328                    pub struct #name {
329                        pub(crate) syntax: SyntaxNode,
330                    }
331                    impl #name {
332                        #[inline]
333                        pub fn new<T: ast::#trait_name>(node: T) -> #name {
334                            #name {
335                                syntax: node.syntax().clone()
336                            }
337                        }
338                    }
339                },
340                quote! {
341                    impl ast::#trait_name for #name {}
342                    impl AstNode for #name {
343                        #[inline]
344                        fn can_cast(kind: SyntaxKind) -> bool {
345                            matches!(kind, #(#kinds)|*)
346                        }
347                        #[inline]
348                        fn cast(syntax: SyntaxNode) -> Option<Self> {
349                            Self::can_cast(syntax.kind()).then_some(#name { syntax })
350                        }
351                        #[inline]
352                        fn syntax(&self) -> &SyntaxNode {
353                            &self.syntax
354                        }
355                    }
356
357                    impl hash::Hash for #name {
358                        fn hash<H: hash::Hasher>(&self, state: &mut H) {
359                            self.syntax.hash(state);
360                        }
361                    }
362
363                    impl Eq for #name {}
364                    impl PartialEq for #name {
365                        fn eq(&self, other: &Self) -> bool {
366                            self.syntax == other.syntax
367                        }
368                    }
369
370                    impl Clone for #name {
371                        fn clone(&self) -> Self {
372                            Self { syntax: self.syntax.clone() }
373                        }
374                    }
375
376                    impl fmt::Debug for #name {
377                        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
378                            f.debug_struct(#node_str_name).field("syntax", &self.syntax).finish()
379                        }
380                    }
381
382                    #(
383                        impl From<#nodes> for #name {
384                            #[inline]
385                            fn from(node: #nodes) -> #name {
386                                #name { syntax: node.syntax }
387                            }
388                        }
389                    )*
390                },
391            )
392        })
393        .unzip();
394
395    let enum_names = grammar.enums.iter().map(|it| &it.name);
396    let node_names = grammar.nodes.iter().map(|it| &it.name);
397
398    let display_impls =
399        enum_names.chain(node_names.clone()).map(|it| format_ident!("{}", it)).map(|name| {
400            quote! {
401                impl std::fmt::Display for #name {
402                    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
403                        std::fmt::Display::fmt(self.syntax(), f)
404                    }
405                }
406            }
407        });
408
409    let defined_nodes: HashSet<_> = node_names.collect();
410
411    for node in kinds
412        .nodes
413        .iter()
414        .map(|kind| to_pascal_case(kind))
415        .filter(|name| !defined_nodes.iter().any(|&it| it == name))
416    {
417        eprintln!("Warning: node {node} not defined in AST source");
418        drop(node);
419    }
420
421    let ast = quote! {
422        #![allow(non_snake_case)]
423        use std::{fmt, hash};
424
425        use crate::{
426            SyntaxNode, SyntaxToken, SyntaxKind::{self, *},
427            ast::{self, AstNode, AstChildren, support},
428            T,
429        };
430
431        #(#node_defs)*
432        #(#enum_defs)*
433        #(#any_node_defs)*
434        #(#node_boilerplate_impls)*
435        #(#enum_boilerplate_impls)*
436        #(#any_node_boilerplate_impls)*
437        #(#display_impls)*
438    };
439
440    let ast = ast.to_string().replace("T ! [", "T![");
441
442    let mut res = String::with_capacity(ast.len() * 2);
443
444    let mut docs =
445        grammar.nodes.iter().map(|it| &it.doc).chain(grammar.enums.iter().map(|it| &it.doc));
446
447    for chunk in ast.split("# [pretty_doc_comment_placeholder_workaround] ") {
448        res.push_str(chunk);
449        if let Some(doc) = docs.next() {
450            write_doc_comment(doc, &mut res);
451        }
452    }
453
454    let res = add_preamble(crate::flags::CodegenType::Grammar, reformat(res));
455    res.replace("#[derive", "\n#[derive")
456}
457
458fn write_doc_comment(contents: &[String], dest: &mut String) {
459    for line in contents {
460        writeln!(dest, "///{line}").unwrap();
461    }
462}
463
464fn generate_syntax_kinds(grammar: KindsSrc) -> String {
465    let (single_byte_tokens_values, single_byte_tokens): (Vec<_>, Vec<_>) = grammar
466        .punct
467        .iter()
468        .filter(|(token, _name)| token.len() == 1)
469        .map(|(token, name)| (token.chars().next().unwrap(), format_ident!("{}", name)))
470        .unzip();
471
472    let punctuation_values = grammar.punct.iter().map(|(token, _name)| {
473        if "{}[]()".contains(token) {
474            let c = token.chars().next().unwrap();
475            quote! { #c }
476            // underscore is an identifier in the proc-macro api
477        } else if *token == "_" {
478            quote! { _ }
479        } else {
480            let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint));
481            quote! { #(#cs)* }
482        }
483    });
484    let punctuation =
485        grammar.punct.iter().map(|(_token, name)| format_ident!("{}", name)).collect::<Vec<_>>();
486    let punctuation_texts = grammar.punct.iter().map(|&(text, _name)| text);
487
488    let fmt_kw_as_variant = |&name| match name {
489        "Self" => format_ident!("SELF_TYPE_KW"),
490        name => format_ident!("{}_KW", to_upper_snake_case(name)),
491    };
492    let strict_keywords = grammar.keywords;
493    let strict_keywords_variants =
494        strict_keywords.iter().map(fmt_kw_as_variant).collect::<Vec<_>>();
495    let strict_keywords_tokens = strict_keywords.iter().map(|it| format_ident!("{it}"));
496
497    let edition_dependent_keywords_variants_match_arm = grammar
498        .edition_dependent_keywords
499        .iter()
500        .map(|(kw, ed)| {
501            let kw = fmt_kw_as_variant(kw);
502            quote! { #kw if #ed <= edition }
503        })
504        .collect::<Vec<_>>();
505    let edition_dependent_keywords_str_match_arm = grammar
506        .edition_dependent_keywords
507        .iter()
508        .map(|(kw, ed)| {
509            quote! { #kw if #ed <= edition }
510        })
511        .collect::<Vec<_>>();
512    let edition_dependent_keywords = grammar.edition_dependent_keywords.iter().map(|&(it, _)| it);
513    let edition_dependent_keywords_variants = grammar
514        .edition_dependent_keywords
515        .iter()
516        .map(|(kw, _)| fmt_kw_as_variant(kw))
517        .collect::<Vec<_>>();
518    let edition_dependent_keywords_tokens =
519        grammar.edition_dependent_keywords.iter().map(|(it, _)| format_ident!("{it}"));
520
521    let contextual_keywords = grammar.contextual_keywords;
522    let contextual_keywords_variants =
523        contextual_keywords.iter().map(fmt_kw_as_variant).collect::<Vec<_>>();
524    let contextual_keywords_tokens = contextual_keywords.iter().map(|it| format_ident!("{it}"));
525    let contextual_keywords_str_match_arm = grammar.contextual_keywords.iter().map(|kw| {
526        match grammar.edition_dependent_keywords.iter().find(|(ed_kw, _)| ed_kw == kw) {
527            Some((_, ed)) => quote! { #kw if edition < #ed },
528            None => quote! { #kw },
529        }
530    });
531    let contextual_keywords_variants_match_arm = grammar
532        .contextual_keywords
533        .iter()
534        .map(|kw_s| {
535            let kw = fmt_kw_as_variant(kw_s);
536            match grammar.edition_dependent_keywords.iter().find(|(ed_kw, _)| ed_kw == kw_s) {
537                Some((_, ed)) => quote! { #kw if edition < #ed },
538                None => quote! { #kw },
539            }
540        })
541        .collect::<Vec<_>>();
542
543    let non_strict_keyword_variants = contextual_keywords_variants
544        .iter()
545        .chain(edition_dependent_keywords_variants.iter())
546        .sorted()
547        .dedup()
548        .collect::<Vec<_>>();
549
550    let literals =
551        grammar.literals.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
552
553    let tokens = grammar.tokens.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
554
555    let nodes = grammar.nodes.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
556
557    let ast = quote! {
558        #![allow(bad_style, missing_docs, unreachable_pub)]
559        use crate::Edition;
560
561        /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`.
562        #[derive(Debug)]
563        #[repr(u16)]
564        pub enum SyntaxKind {
565            // Technical SyntaxKinds: they appear temporally during parsing,
566            // but never end up in the final tree
567            #[doc(hidden)]
568            TOMBSTONE,
569            #[doc(hidden)]
570            EOF,
571            #(#punctuation,)*
572            #(#strict_keywords_variants,)*
573            #(#non_strict_keyword_variants,)*
574            #(#literals,)*
575            #(#tokens,)*
576            #(#nodes,)*
577
578            // Technical kind so that we can cast from u16 safely
579            #[doc(hidden)]
580            __LAST,
581        }
582        use self::SyntaxKind::*;
583
584        impl SyntaxKind {
585            #[allow(unreachable_patterns)]
586            pub const fn text(self) -> &'static str {
587                match self {
588                    TOMBSTONE | EOF | __LAST
589                    #( | #literals )*
590                    #( | #nodes )*
591                    #( | #tokens )* => panic!("no text for these `SyntaxKind`s"),
592                    #( #punctuation => #punctuation_texts ,)*
593                    #( #strict_keywords_variants => #strict_keywords ,)*
594                    #( #contextual_keywords_variants => #contextual_keywords ,)*
595                    #( #edition_dependent_keywords_variants => #edition_dependent_keywords ,)*
596                }
597            }
598
599            /// Checks whether this syntax kind is a strict keyword for the given edition.
600            /// Strict keywords are identifiers that are always considered keywords.
601            pub fn is_strict_keyword(self, edition: Edition) -> bool {
602                matches!(self, #(#strict_keywords_variants)|*)
603                || match self {
604                    #(#edition_dependent_keywords_variants_match_arm => true,)*
605                    _ => false,
606                }
607            }
608
609            /// Checks whether this syntax kind is a weak keyword for the given edition.
610            /// Weak keywords are identifiers that are considered keywords only in certain contexts.
611            pub fn is_contextual_keyword(self, edition: Edition) -> bool {
612                match self {
613                    #(#contextual_keywords_variants_match_arm => true,)*
614                    _ => false,
615                }
616            }
617
618            /// Checks whether this syntax kind is a strict or weak keyword for the given edition.
619            pub fn is_keyword(self, edition: Edition) -> bool {
620                matches!(self, #(#strict_keywords_variants)|*)
621                || match self {
622                    #(#edition_dependent_keywords_variants_match_arm => true,)*
623                    #(#contextual_keywords_variants_match_arm => true,)*
624                    _ => false,
625                }
626            }
627
628            pub fn is_punct(self) -> bool {
629                matches!(self, #(#punctuation)|*)
630            }
631
632            pub fn is_literal(self) -> bool {
633                matches!(self, #(#literals)|*)
634            }
635
636            pub fn from_keyword(ident: &str, edition: Edition) -> Option<SyntaxKind> {
637                let kw = match ident {
638                    #(#strict_keywords => #strict_keywords_variants,)*
639                    #(#edition_dependent_keywords_str_match_arm => #edition_dependent_keywords_variants,)*
640                    _ => return None,
641                };
642                Some(kw)
643            }
644
645            pub fn from_contextual_keyword(ident: &str, edition: Edition) -> Option<SyntaxKind> {
646                let kw = match ident {
647                    #(#contextual_keywords_str_match_arm => #contextual_keywords_variants,)*
648                    _ => return None,
649                };
650                Some(kw)
651            }
652
653            pub fn from_char(c: char) -> Option<SyntaxKind> {
654                let tok = match c {
655                    #(#single_byte_tokens_values => #single_byte_tokens,)*
656                    _ => return None,
657                };
658                Some(tok)
659            }
660        }
661
662        /// `T![]`
663        #[macro_export]
664        macro_rules! T_ {
665            #([#punctuation_values] => { $crate::SyntaxKind::#punctuation };)*
666            #([#strict_keywords_tokens] => { $crate::SyntaxKind::#strict_keywords_variants };)*
667            #([#contextual_keywords_tokens] => { $crate::SyntaxKind::#contextual_keywords_variants };)*
668            #([#edition_dependent_keywords_tokens] => { $crate::SyntaxKind::#edition_dependent_keywords_variants };)*
669            [lifetime_ident] => { $crate::SyntaxKind::LIFETIME_IDENT };
670            [int_number] => { $crate::SyntaxKind::INT_NUMBER };
671            [ident] => { $crate::SyntaxKind::IDENT };
672            [string] => { $crate::SyntaxKind::STRING };
673            [shebang] => { $crate::SyntaxKind::SHEBANG };
674            [frontmatter] => { $crate::SyntaxKind::FRONTMATTER };
675        }
676
677        impl ::core::marker::Copy for SyntaxKind {}
678        impl ::core::clone::Clone for SyntaxKind {
679            #[inline]
680            fn clone(&self) -> Self {
681                *self
682            }
683        }
684        impl ::core::cmp::PartialEq for SyntaxKind {
685            #[inline]
686            fn eq(&self, other: &Self) -> bool {
687                (*self as u16) == (*other as u16)
688            }
689        }
690        impl ::core::cmp::Eq for SyntaxKind {}
691        impl ::core::cmp::PartialOrd for SyntaxKind {
692            #[inline]
693            fn partial_cmp(&self, other: &Self) -> core::option::Option<core::cmp::Ordering> {
694                Some(self.cmp(other))
695            }
696        }
697        impl ::core::cmp::Ord for SyntaxKind {
698            #[inline]
699            fn cmp(&self, other: &Self) -> core::cmp::Ordering {
700                (*self as u16).cmp(&(*other as u16))
701            }
702        }
703        impl ::core::hash::Hash for SyntaxKind {
704            fn hash<H: ::core::hash::Hasher>(&self, state: &mut H) {
705                ::core::mem::discriminant(self).hash(state);
706            }
707        }
708    };
709
710    let result = add_preamble(crate::flags::CodegenType::Grammar, reformat(ast.to_string()));
711
712    if let Some(start) = result.find("macro_rules ! T_")
713        && let Some(macro_end) = result[start..].find("\nimpl ::core::marker::Copy")
714    {
715        let macro_section = &result[start..start + macro_end];
716        let formatted_macro = macro_section
717            .replace("T_ { [", "T_ {\n    [")
718            .replace(" ; [", ";\n    [")
719            .replace(" ; }", ";\n}")
720            .trim_end()
721            .to_owned()
722            + "\n";
723        return result.replace(macro_section, &formatted_macro);
724    }
725
726    result
727}
728
729fn to_upper_snake_case(s: &str) -> String {
730    let mut buf = String::with_capacity(s.len());
731    let mut prev = false;
732    for c in s.chars() {
733        if c.is_ascii_uppercase() && prev {
734            buf.push('_')
735        }
736        prev = true;
737
738        buf.push(c.to_ascii_uppercase());
739    }
740    buf
741}
742
743fn to_lower_snake_case(s: &str) -> String {
744    let mut buf = String::with_capacity(s.len());
745    let mut prev = false;
746    for c in s.chars() {
747        if c.is_ascii_uppercase() && prev {
748            buf.push('_')
749        }
750        prev = true;
751
752        buf.push(c.to_ascii_lowercase());
753    }
754    buf
755}
756
757fn to_pascal_case(s: &str) -> String {
758    let mut buf = String::with_capacity(s.len());
759    let mut prev_is_underscore = true;
760    for c in s.chars() {
761        if c == '_' {
762            prev_is_underscore = true;
763        } else if prev_is_underscore {
764            buf.push(c.to_ascii_uppercase());
765            prev_is_underscore = false;
766        } else {
767            buf.push(c.to_ascii_lowercase());
768        }
769    }
770    buf
771}
772
773fn pluralize(s: &str) -> String {
774    format!("{s}s")
775}
776
777impl Field {
778    fn is_many(&self) -> bool {
779        matches!(self, Field::Node { cardinality: Cardinality::Many, .. })
780    }
781    fn token_kind(&self) -> Option<proc_macro2::TokenStream> {
782        match self {
783            Field::Token { token, .. } => {
784                let token: proc_macro2::TokenStream = token.parse().unwrap();
785                Some(quote! { T![#token] })
786            }
787            _ => None,
788        }
789    }
790    fn method_name(&self) -> String {
791        match self {
792            Field::Token { name, token, .. } => {
793                if let Some(name) = name {
794                    return name.clone();
795                }
796                let name = match token.as_str() {
797                    ";" => "semicolon",
798                    "->" => "thin_arrow",
799                    "'{'" => "l_curly",
800                    "'}'" => "r_curly",
801                    "'('" => "l_paren",
802                    "')'" => "r_paren",
803                    "'['" => "l_brack",
804                    "']'" => "r_brack",
805                    "<" => "l_angle",
806                    ">" => "r_angle",
807                    "=" => "eq",
808                    "!" => "excl",
809                    "*" => "star",
810                    "&" => "amp",
811                    "-" => "minus",
812                    "_" => "underscore",
813                    "." => "dot",
814                    ".." => "dotdot",
815                    "..." => "dotdotdot",
816                    "..=" => "dotdoteq",
817                    "=>" => "fat_arrow",
818                    "@" => "at",
819                    ":" => "colon",
820                    "::" => "coloncolon",
821                    "#" => "pound",
822                    "?" => "question_mark",
823                    "," => "comma",
824                    "|" => "pipe",
825                    "~" => "tilde",
826                    _ => token,
827                };
828                format!("{name}_token",)
829            }
830            Field::Node { name, .. } => {
831                if name == "type" {
832                    String::from("ty")
833                } else {
834                    name.to_owned()
835                }
836            }
837        }
838    }
839    fn ty(&self) -> proc_macro2::Ident {
840        match self {
841            Field::Token { .. } => format_ident!("SyntaxToken"),
842            Field::Node { ty, .. } => format_ident!("{}", ty),
843        }
844    }
845}
846
847fn clean_token_name(name: &str) -> String {
848    let cleaned = name.trim_start_matches(['@', '#', '?']);
849    if cleaned.is_empty() { name.to_owned() } else { cleaned.to_owned() }
850}
851
852fn lower(grammar: &Grammar) -> AstSrc {
853    let mut res = AstSrc {
854        tokens:
855            "Whitespace Comment String ByteString CString IntNumber FloatNumber Char Byte Ident"
856                .split_ascii_whitespace()
857                .map(|it| it.to_owned())
858                .collect::<Vec<_>>(),
859        ..Default::default()
860    };
861
862    let nodes = grammar.iter().collect::<Vec<_>>();
863
864    for &node in &nodes {
865        let name = grammar[node].name.clone();
866        let rule = &grammar[node].rule;
867        let _g = panic_context::enter(name.clone());
868        match lower_enum(grammar, rule) {
869            Some(variants) => {
870                let enum_src = AstEnumSrc { doc: Vec::new(), name, traits: Vec::new(), variants };
871                res.enums.push(enum_src);
872            }
873            None => {
874                let mut fields = Vec::new();
875                lower_rule(&mut fields, grammar, None, rule);
876                res.nodes.push(AstNodeSrc { doc: Vec::new(), name, traits: Vec::new(), fields });
877            }
878        }
879    }
880
881    deduplicate_fields(&mut res);
882    extract_enums(&mut res);
883    extract_struct_traits(&mut res);
884    extract_enum_traits(&mut res);
885    res.nodes.sort_by_key(|it| it.name.clone());
886    res.enums.sort_by_key(|it| it.name.clone());
887    res.tokens.sort();
888    res.nodes.iter_mut().for_each(|it| {
889        it.traits.sort();
890        it.fields.sort_by_key(|it| match it {
891            Field::Token { token, .. } => (true, token.clone()),
892            Field::Node { name, .. } => (false, name.clone()),
893        });
894    });
895    res.enums.iter_mut().for_each(|it| {
896        it.traits.sort();
897        it.variants.sort();
898    });
899    res
900}
901
902fn lower_enum(grammar: &Grammar, rule: &Rule) -> Option<Vec<String>> {
903    let alternatives = match rule {
904        Rule::Alt(it) => it,
905        _ => return None,
906    };
907    let mut variants = Vec::new();
908    for alternative in alternatives {
909        match alternative {
910            Rule::Node(it) => variants.push(grammar[*it].name.clone()),
911            Rule::Token(it) if grammar[*it].name == ";" => (),
912            _ => return None,
913        }
914    }
915    Some(variants)
916}
917
918fn lower_rule(acc: &mut Vec<Field>, grammar: &Grammar, label: Option<&String>, rule: &Rule) {
919    if lower_separated_list(acc, grammar, label, rule) {
920        return;
921    }
922
923    match rule {
924        Rule::Node(node) => {
925            let ty = grammar[*node].name.clone();
926            let name = label.cloned().unwrap_or_else(|| to_lower_snake_case(&ty));
927            let field = Field::Node { name, ty, cardinality: Cardinality::Optional };
928            acc.push(field);
929        }
930        Rule::Token(token) => {
931            let mut token = clean_token_name(&grammar[*token].name);
932            if "[]{}()".contains(&token) {
933                token = format!("'{token}'");
934            }
935            let field = Field::Token { name: label.cloned(), token };
936            acc.push(field);
937        }
938        Rule::Rep(inner) => {
939            if let Rule::Node(node) = &**inner {
940                let ty = grammar[*node].name.clone();
941                let name = label.cloned().unwrap_or_else(|| pluralize(&to_lower_snake_case(&ty)));
942                let field = Field::Node { name, ty, cardinality: Cardinality::Many };
943                acc.push(field);
944                return;
945            }
946            panic!("unhandled rule: {rule:?}")
947        }
948        Rule::Labeled { label: l, rule } => {
949            assert!(label.is_none());
950            let manually_implemented = matches!(
951                l.as_str(),
952                "lhs"
953                    | "rhs"
954                    | "then_branch"
955                    | "else_branch"
956                    | "start"
957                    | "end"
958                    | "op"
959                    | "index"
960                    | "base"
961                    | "value"
962                    | "trait"
963                    | "self_ty"
964                    | "iterable"
965                    | "condition"
966                    | "args"
967                    | "body"
968            );
969            if manually_implemented {
970                return;
971            }
972            lower_rule(acc, grammar, Some(l), rule);
973        }
974        Rule::Seq(rules) | Rule::Alt(rules) => {
975            for rule in rules {
976                lower_rule(acc, grammar, label, rule)
977            }
978        }
979        Rule::Opt(rule) => lower_rule(acc, grammar, label, rule),
980    }
981}
982
983// (T (',' T)* ','?)
984fn lower_separated_list(
985    acc: &mut Vec<Field>,
986    grammar: &Grammar,
987    label: Option<&String>,
988    rule: &Rule,
989) -> bool {
990    let rule = match rule {
991        Rule::Seq(it) => it,
992        _ => return false,
993    };
994
995    let (nt, repeat, trailing_sep) = match rule.as_slice() {
996        [Rule::Node(node), Rule::Rep(repeat), Rule::Opt(trailing_sep)] => {
997            (Either::Left(node), repeat, Some(trailing_sep))
998        }
999        [Rule::Node(node), Rule::Rep(repeat)] => (Either::Left(node), repeat, None),
1000        [Rule::Token(token), Rule::Rep(repeat), Rule::Opt(trailing_sep)] => {
1001            (Either::Right(token), repeat, Some(trailing_sep))
1002        }
1003        [Rule::Token(token), Rule::Rep(repeat)] => (Either::Right(token), repeat, None),
1004        _ => return false,
1005    };
1006    let repeat = match &**repeat {
1007        Rule::Seq(it) => it,
1008        _ => return false,
1009    };
1010    if !matches!(
1011        repeat.as_slice(),
1012        [comma, nt_]
1013            if trailing_sep.is_none_or(|it| comma == &**it) && match (nt, nt_) {
1014                (Either::Left(node), Rule::Node(nt_)) => node == nt_,
1015                (Either::Right(token), Rule::Token(nt_)) => token == nt_,
1016                _ => false,
1017            }
1018    ) {
1019        return false;
1020    }
1021    match nt {
1022        Either::Right(token) => {
1023            let token = clean_token_name(&grammar[*token].name);
1024            let field = Field::Token { token, name: None };
1025            acc.push(field);
1026        }
1027        Either::Left(node) => {
1028            let ty = grammar[*node].name.clone();
1029            let name = label.cloned().unwrap_or_else(|| pluralize(&to_lower_snake_case(&ty)));
1030            let field = Field::Node { name, ty, cardinality: Cardinality::Many };
1031            acc.push(field);
1032        }
1033    }
1034    true
1035}
1036
1037fn deduplicate_fields(ast: &mut AstSrc) {
1038    for node in &mut ast.nodes {
1039        let mut i = 0;
1040        'outer: while i < node.fields.len() {
1041            for j in 0..i {
1042                let f1 = &node.fields[i];
1043                let f2 = &node.fields[j];
1044                if f1 == f2 {
1045                    node.fields.remove(i);
1046                    continue 'outer;
1047                }
1048            }
1049            i += 1;
1050        }
1051    }
1052}
1053
1054fn extract_enums(ast: &mut AstSrc) {
1055    for node in &mut ast.nodes {
1056        for enm in &ast.enums {
1057            let mut to_remove = Vec::new();
1058            for (i, field) in node.fields.iter().enumerate() {
1059                let ty = field.ty().to_string();
1060                if enm.variants.iter().any(|it| it == &ty) {
1061                    to_remove.push(i);
1062                }
1063            }
1064            if to_remove.len() == enm.variants.len() {
1065                node.remove_field(to_remove);
1066                let ty = enm.name.clone();
1067                let name = to_lower_snake_case(&ty);
1068                node.fields.push(Field::Node { name, ty, cardinality: Cardinality::Optional });
1069            }
1070        }
1071    }
1072}
1073
1074const TRAITS: &[(&str, &[&str])] = &[
1075    ("HasAttrs", &["attrs"]),
1076    ("HasName", &["name"]),
1077    ("HasVisibility", &["visibility"]),
1078    ("HasGenericParams", &["generic_param_list", "where_clause"]),
1079    ("HasGenericArgs", &["generic_arg_list"]),
1080    ("HasTypeBounds", &["type_bound_list", "colon_token"]),
1081    ("HasModuleItem", &["items"]),
1082    ("HasLoopBody", &["label", "loop_body"]),
1083    ("HasArgList", &["arg_list"]),
1084];
1085
1086fn extract_struct_traits(ast: &mut AstSrc) {
1087    for node in &mut ast.nodes {
1088        for (name, methods) in TRAITS {
1089            extract_struct_trait(node, name, methods);
1090        }
1091    }
1092
1093    let nodes_with_doc_comments = [
1094        "SourceFile",
1095        "Fn",
1096        "Struct",
1097        "Union",
1098        "RecordField",
1099        "TupleField",
1100        "Enum",
1101        "Variant",
1102        "Trait",
1103        "Module",
1104        "Static",
1105        "Const",
1106        "TypeAlias",
1107        "Impl",
1108        "ExternBlock",
1109        "ExternCrate",
1110        "MacroCall",
1111        "MacroRules",
1112        "MacroDef",
1113        "Use",
1114    ];
1115
1116    for node in &mut ast.nodes {
1117        if nodes_with_doc_comments.contains(&&*node.name) {
1118            node.traits.push("HasDocComments".into());
1119        }
1120    }
1121}
1122
1123fn extract_struct_trait(node: &mut AstNodeSrc, trait_name: &str, methods: &[&str]) {
1124    let mut to_remove = Vec::new();
1125    for (i, field) in node.fields.iter().enumerate() {
1126        let method_name = field.method_name();
1127        if methods.iter().any(|&it| it == method_name) {
1128            to_remove.push(i);
1129        }
1130    }
1131    if to_remove.len() == methods.len() {
1132        node.traits.push(trait_name.to_owned());
1133        node.remove_field(to_remove);
1134    }
1135}
1136
1137fn extract_enum_traits(ast: &mut AstSrc) {
1138    for enm in &mut ast.enums {
1139        if enm.name == "Stmt" {
1140            continue;
1141        }
1142        let nodes = &ast.nodes;
1143        let mut variant_traits = enm
1144            .variants
1145            .iter()
1146            .map(|var| nodes.iter().find(|it| &it.name == var).unwrap())
1147            .map(|node| node.traits.iter().cloned().collect::<BTreeSet<_>>());
1148
1149        let mut enum_traits = match variant_traits.next() {
1150            Some(it) => it,
1151            None => continue,
1152        };
1153        for traits in variant_traits {
1154            enum_traits = enum_traits.intersection(&traits).cloned().collect();
1155        }
1156        enm.traits = enum_traits.into_iter().collect();
1157    }
1158}
1159
1160impl AstNodeSrc {
1161    fn remove_field(&mut self, to_remove: Vec<usize>) {
1162        to_remove.into_iter().rev().for_each(|idx| {
1163            self.fields.remove(idx);
1164        });
1165    }
1166}
1167
1168#[test]
1169fn test() {
1170    generate(true);
1171}