1#![allow(clippy::disallowed_types)]
7
8use std::{
9 collections::{BTreeSet, HashSet},
10 fmt::Write,
11 fs,
12};
13
14use either::Either;
15use itertools::Itertools;
16use proc_macro2::{Punct, Spacing};
17use quote::{format_ident, quote};
18use stdx::panic_context;
19use ungrammar::{Grammar, Rule};
20
21use crate::{
22 codegen::{add_preamble, ensure_file_contents, grammar::ast_src::generate_kind_src, reformat},
23 project_root,
24};
25
26mod ast_src;
27use self::ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Cardinality, Field, KindsSrc};
28
29pub(crate) fn generate(check: bool) {
30 let grammar = fs::read_to_string(project_root().join("crates/syntax/rust.ungram"))
31 .unwrap()
32 .parse()
33 .unwrap();
34 let ast = lower(&grammar);
35 let kinds_src = generate_kind_src(&ast.nodes, &ast.enums, &grammar);
36
37 let syntax_kinds = generate_syntax_kinds(kinds_src);
38 let syntax_kinds_file = project_root().join("crates/parser/src/syntax_kind/generated.rs");
39 ensure_file_contents(
40 crate::flags::CodegenType::Grammar,
41 syntax_kinds_file.as_path(),
42 &syntax_kinds,
43 check,
44 );
45
46 let ast_tokens = generate_tokens(&ast);
47 let ast_tokens_file = project_root().join("crates/syntax/src/ast/generated/tokens.rs");
48 ensure_file_contents(
49 crate::flags::CodegenType::Grammar,
50 ast_tokens_file.as_path(),
51 &ast_tokens,
52 check,
53 );
54
55 let ast_nodes = generate_nodes(kinds_src, &ast);
56 let ast_nodes_file = project_root().join("crates/syntax/src/ast/generated/nodes.rs");
57 ensure_file_contents(
58 crate::flags::CodegenType::Grammar,
59 ast_nodes_file.as_path(),
60 &ast_nodes,
61 check,
62 );
63}
64
65fn generate_tokens(grammar: &AstSrc) -> String {
66 let tokens = grammar.tokens.iter().map(|token| {
67 let name = format_ident!("{}", token);
68 let kind = format_ident!("{}", to_upper_snake_case(token));
69 quote! {
70 pub struct #name {
71 pub(crate) syntax: SyntaxToken,
72 }
73 impl std::fmt::Display for #name {
74 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
75 std::fmt::Display::fmt(&self.syntax, f)
76 }
77 }
78 impl AstToken for #name {
79 fn can_cast(kind: SyntaxKind) -> bool { kind == #kind }
80 fn cast(syntax: SyntaxToken) -> Option<Self> {
81 if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }
82 }
83 fn syntax(&self) -> &SyntaxToken { &self.syntax }
84 }
85
86 impl fmt::Debug for #name {
87 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
88 f.debug_struct(#token).field("syntax", &self.syntax).finish()
89 }
90 }
91 impl Clone for #name {
92 fn clone(&self) -> Self {
93 Self { syntax: self.syntax.clone() }
94 }
95 }
96 impl hash::Hash for #name {
97 fn hash<H: hash::Hasher>(&self, state: &mut H) {
98 self.syntax.hash(state);
99 }
100 }
101
102 impl Eq for #name {}
103 impl PartialEq for #name {
104 fn eq(&self, other: &Self) -> bool {
105 self.syntax == other.syntax
106 }
107 }
108 }
109 });
110
111 add_preamble(
112 crate::flags::CodegenType::Grammar,
113 reformat(
114 quote! {
115 use std::{fmt, hash};
116
117 use crate::{SyntaxKind::{self, *}, SyntaxToken, ast::AstToken};
118
119 #(#tokens)*
120 }
121 .to_string(),
122 ),
123 )
124 .replace("#[derive", "\n#[derive")
125}
126
127fn generate_nodes(kinds: KindsSrc, grammar: &AstSrc) -> String {
128 let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar
129 .nodes
130 .iter()
131 .map(|node| {
132 let node_str_name = &node.name;
133 let name = format_ident!("{}", node.name);
134 let kind = format_ident!("{}", to_upper_snake_case(&node.name));
135 let traits = node
136 .traits
137 .iter()
138 .filter(|trait_name| {
139 node.name != "ForExpr" && node.name != "WhileExpr"
141 || trait_name.as_str() != "HasLoopBody"
142 })
143 .map(|trait_name| {
144 let trait_name = format_ident!("{}", trait_name);
145 quote!(impl ast::#trait_name for #name {})
146 });
147
148 let methods = node.fields.iter().map(|field| {
149 let method_name = format_ident!("{}", field.method_name());
150 let ty = field.ty();
151
152 if field.is_many() {
153 quote! {
154 #[inline]
155 pub fn #method_name(&self) -> AstChildren<#ty> {
156 support::children(&self.syntax)
157 }
158 }
159 } else if let Some(token_kind) = field.token_kind() {
160 quote! {
161 #[inline]
162 pub fn #method_name(&self) -> Option<#ty> {
163 support::token(&self.syntax, #token_kind)
164 }
165 }
166 } else {
167 quote! {
168 #[inline]
169 pub fn #method_name(&self) -> Option<#ty> {
170 support::child(&self.syntax)
171 }
172 }
173 }
174 });
175 (
176 quote! {
177 #[pretty_doc_comment_placeholder_workaround]
178 pub struct #name {
179 pub(crate) syntax: SyntaxNode,
180 }
181
182 #(#traits)*
183
184 impl #name {
185 #(#methods)*
186 }
187 },
188 quote! {
189 impl AstNode for #name {
190 #[inline]
191 fn kind() -> SyntaxKind
192 where
193 Self: Sized
194 {
195 #kind
196 }
197 #[inline]
198 fn can_cast(kind: SyntaxKind) -> bool {
199 kind == #kind
200 }
201 #[inline]
202 fn cast(syntax: SyntaxNode) -> Option<Self> {
203 if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }
204 }
205 #[inline]
206 fn syntax(&self) -> &SyntaxNode { &self.syntax }
207 }
208
209 impl hash::Hash for #name {
210 fn hash<H: hash::Hasher>(&self, state: &mut H) {
211 self.syntax.hash(state);
212 }
213 }
214
215 impl Eq for #name {}
216 impl PartialEq for #name {
217 fn eq(&self, other: &Self) -> bool {
218 self.syntax == other.syntax
219 }
220 }
221
222 impl Clone for #name {
223 fn clone(&self) -> Self {
224 Self { syntax: self.syntax.clone() }
225 }
226 }
227
228 impl fmt::Debug for #name {
229 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
230 f.debug_struct(#node_str_name).field("syntax", &self.syntax).finish()
231 }
232 }
233 },
234 )
235 })
236 .unzip();
237
238 let (enum_defs, enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar
239 .enums
240 .iter()
241 .map(|en| {
242 let variants: Vec<_> =
243 en.variants.iter().map(|var| format_ident!("{}", var)).sorted().collect();
244 let name = format_ident!("{}", en.name);
245 let kinds: Vec<_> = variants
246 .iter()
247 .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))
248 .collect();
249 let traits = en.traits.iter().sorted().map(|trait_name| {
250 let trait_name = format_ident!("{}", trait_name);
251 quote!(impl ast::#trait_name for #name {})
252 });
253
254 let ast_node = if en.name == "Stmt" {
255 quote! {}
256 } else {
257 quote! {
258 impl AstNode for #name {
259 #[inline]
260 fn can_cast(kind: SyntaxKind) -> bool {
261 matches!(kind, #(#kinds)|*)
262 }
263 #[inline]
264 fn cast(syntax: SyntaxNode) -> Option<Self> {
265 let res = match syntax.kind() {
266 #(
267 #kinds => #name::#variants(#variants { syntax }),
268 )*
269 _ => return None,
270 };
271 Some(res)
272 }
273 #[inline]
274 fn syntax(&self) -> &SyntaxNode {
275 match self {
276 #(
277 #name::#variants(it) => &it.syntax,
278 )*
279 }
280 }
281 }
282 }
283 };
284
285 (
286 quote! {
287 #[pretty_doc_comment_placeholder_workaround]
288 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
289 pub enum #name {
290 #(#variants(#variants),)*
291 }
292
293 #(#traits)*
294 },
295 quote! {
296 #(
297 impl From<#variants> for #name {
298 #[inline]
299 fn from(node: #variants) -> #name {
300 #name::#variants(node)
301 }
302 }
303 )*
304 #ast_node
305 },
306 )
307 })
308 .unzip();
309 let (any_node_defs, any_node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar
310 .nodes
311 .iter()
312 .flat_map(|node| node.traits.iter().map(move |t| (t, node)))
313 .into_group_map()
314 .into_iter()
315 .sorted_by_key(|(name, _)| *name)
316 .map(|(trait_name, nodes)| {
317 let name = format_ident!("Any{}", trait_name);
318 let node_str_name = name.to_string();
319 let trait_name = format_ident!("{}", trait_name);
320 let kinds: Vec<_> = nodes
321 .iter()
322 .map(|name| format_ident!("{}", to_upper_snake_case(&name.name.to_string())))
323 .collect();
324 let nodes = nodes.iter().map(|node| format_ident!("{}", node.name));
325 (
326 quote! {
327 #[pretty_doc_comment_placeholder_workaround]
328 pub struct #name {
329 pub(crate) syntax: SyntaxNode,
330 }
331 impl #name {
332 #[inline]
333 pub fn new<T: ast::#trait_name>(node: T) -> #name {
334 #name {
335 syntax: node.syntax().clone()
336 }
337 }
338 }
339 },
340 quote! {
341 impl ast::#trait_name for #name {}
342 impl AstNode for #name {
343 #[inline]
344 fn can_cast(kind: SyntaxKind) -> bool {
345 matches!(kind, #(#kinds)|*)
346 }
347 #[inline]
348 fn cast(syntax: SyntaxNode) -> Option<Self> {
349 Self::can_cast(syntax.kind()).then_some(#name { syntax })
350 }
351 #[inline]
352 fn syntax(&self) -> &SyntaxNode {
353 &self.syntax
354 }
355 }
356
357 impl hash::Hash for #name {
358 fn hash<H: hash::Hasher>(&self, state: &mut H) {
359 self.syntax.hash(state);
360 }
361 }
362
363 impl Eq for #name {}
364 impl PartialEq for #name {
365 fn eq(&self, other: &Self) -> bool {
366 self.syntax == other.syntax
367 }
368 }
369
370 impl Clone for #name {
371 fn clone(&self) -> Self {
372 Self { syntax: self.syntax.clone() }
373 }
374 }
375
376 impl fmt::Debug for #name {
377 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
378 f.debug_struct(#node_str_name).field("syntax", &self.syntax).finish()
379 }
380 }
381
382 #(
383 impl From<#nodes> for #name {
384 #[inline]
385 fn from(node: #nodes) -> #name {
386 #name { syntax: node.syntax }
387 }
388 }
389 )*
390 },
391 )
392 })
393 .unzip();
394
395 let enum_names = grammar.enums.iter().map(|it| &it.name);
396 let node_names = grammar.nodes.iter().map(|it| &it.name);
397
398 let display_impls =
399 enum_names.chain(node_names.clone()).map(|it| format_ident!("{}", it)).map(|name| {
400 quote! {
401 impl std::fmt::Display for #name {
402 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
403 std::fmt::Display::fmt(self.syntax(), f)
404 }
405 }
406 }
407 });
408
409 let defined_nodes: HashSet<_> = node_names.collect();
410
411 for node in kinds
412 .nodes
413 .iter()
414 .map(|kind| to_pascal_case(kind))
415 .filter(|name| !defined_nodes.iter().any(|&it| it == name))
416 {
417 eprintln!("Warning: node {node} not defined in AST source");
418 drop(node);
419 }
420
421 let ast = quote! {
422 #![allow(non_snake_case)]
423 use std::{fmt, hash};
424
425 use crate::{
426 SyntaxNode, SyntaxToken, SyntaxKind::{self, *},
427 ast::{self, AstNode, AstChildren, support},
428 T,
429 };
430
431 #(#node_defs)*
432 #(#enum_defs)*
433 #(#any_node_defs)*
434 #(#node_boilerplate_impls)*
435 #(#enum_boilerplate_impls)*
436 #(#any_node_boilerplate_impls)*
437 #(#display_impls)*
438 };
439
440 let ast = ast.to_string().replace("T ! [", "T![");
441
442 let mut res = String::with_capacity(ast.len() * 2);
443
444 let mut docs =
445 grammar.nodes.iter().map(|it| &it.doc).chain(grammar.enums.iter().map(|it| &it.doc));
446
447 for chunk in ast.split("# [pretty_doc_comment_placeholder_workaround] ") {
448 res.push_str(chunk);
449 if let Some(doc) = docs.next() {
450 write_doc_comment(doc, &mut res);
451 }
452 }
453
454 let res = add_preamble(crate::flags::CodegenType::Grammar, reformat(res));
455 res.replace("#[derive", "\n#[derive")
456}
457
458fn write_doc_comment(contents: &[String], dest: &mut String) {
459 for line in contents {
460 writeln!(dest, "///{line}").unwrap();
461 }
462}
463
464fn generate_syntax_kinds(grammar: KindsSrc) -> String {
465 let (single_byte_tokens_values, single_byte_tokens): (Vec<_>, Vec<_>) = grammar
466 .punct
467 .iter()
468 .filter(|(token, _name)| token.len() == 1)
469 .map(|(token, name)| (token.chars().next().unwrap(), format_ident!("{}", name)))
470 .unzip();
471
472 let punctuation_values = grammar.punct.iter().map(|(token, _name)| {
473 if "{}[]()".contains(token) {
474 let c = token.chars().next().unwrap();
475 quote! { #c }
476 } else if *token == "_" {
478 quote! { _ }
479 } else {
480 let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint));
481 quote! { #(#cs)* }
482 }
483 });
484 let punctuation =
485 grammar.punct.iter().map(|(_token, name)| format_ident!("{}", name)).collect::<Vec<_>>();
486 let punctuation_texts = grammar.punct.iter().map(|&(text, _name)| text);
487
488 let fmt_kw_as_variant = |&name| match name {
489 "Self" => format_ident!("SELF_TYPE_KW"),
490 name => format_ident!("{}_KW", to_upper_snake_case(name)),
491 };
492 let strict_keywords = grammar.keywords;
493 let strict_keywords_variants =
494 strict_keywords.iter().map(fmt_kw_as_variant).collect::<Vec<_>>();
495 let strict_keywords_tokens = strict_keywords.iter().map(|it| format_ident!("{it}"));
496
497 let edition_dependent_keywords_variants_match_arm = grammar
498 .edition_dependent_keywords
499 .iter()
500 .map(|(kw, ed)| {
501 let kw = fmt_kw_as_variant(kw);
502 quote! { #kw if #ed <= edition }
503 })
504 .collect::<Vec<_>>();
505 let edition_dependent_keywords_str_match_arm = grammar
506 .edition_dependent_keywords
507 .iter()
508 .map(|(kw, ed)| {
509 quote! { #kw if #ed <= edition }
510 })
511 .collect::<Vec<_>>();
512 let edition_dependent_keywords = grammar.edition_dependent_keywords.iter().map(|&(it, _)| it);
513 let edition_dependent_keywords_variants = grammar
514 .edition_dependent_keywords
515 .iter()
516 .map(|(kw, _)| fmt_kw_as_variant(kw))
517 .collect::<Vec<_>>();
518 let edition_dependent_keywords_tokens =
519 grammar.edition_dependent_keywords.iter().map(|(it, _)| format_ident!("{it}"));
520
521 let contextual_keywords = grammar.contextual_keywords;
522 let contextual_keywords_variants =
523 contextual_keywords.iter().map(fmt_kw_as_variant).collect::<Vec<_>>();
524 let contextual_keywords_tokens = contextual_keywords.iter().map(|it| format_ident!("{it}"));
525 let contextual_keywords_str_match_arm = grammar.contextual_keywords.iter().map(|kw| {
526 match grammar.edition_dependent_keywords.iter().find(|(ed_kw, _)| ed_kw == kw) {
527 Some((_, ed)) => quote! { #kw if edition < #ed },
528 None => quote! { #kw },
529 }
530 });
531 let contextual_keywords_variants_match_arm = grammar
532 .contextual_keywords
533 .iter()
534 .map(|kw_s| {
535 let kw = fmt_kw_as_variant(kw_s);
536 match grammar.edition_dependent_keywords.iter().find(|(ed_kw, _)| ed_kw == kw_s) {
537 Some((_, ed)) => quote! { #kw if edition < #ed },
538 None => quote! { #kw },
539 }
540 })
541 .collect::<Vec<_>>();
542
543 let non_strict_keyword_variants = contextual_keywords_variants
544 .iter()
545 .chain(edition_dependent_keywords_variants.iter())
546 .sorted()
547 .dedup()
548 .collect::<Vec<_>>();
549
550 let literals =
551 grammar.literals.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
552
553 let tokens = grammar.tokens.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
554
555 let nodes = grammar.nodes.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
556
557 let ast = quote! {
558 #![allow(bad_style, missing_docs, unreachable_pub)]
559 use crate::Edition;
560
561 #[derive(Debug)]
563 #[repr(u16)]
564 pub enum SyntaxKind {
565 #[doc(hidden)]
568 TOMBSTONE,
569 #[doc(hidden)]
570 EOF,
571 #(#punctuation,)*
572 #(#strict_keywords_variants,)*
573 #(#non_strict_keyword_variants,)*
574 #(#literals,)*
575 #(#tokens,)*
576 #(#nodes,)*
577
578 #[doc(hidden)]
580 __LAST,
581 }
582 use self::SyntaxKind::*;
583
584 impl SyntaxKind {
585 #[allow(unreachable_patterns)]
586 pub const fn text(self) -> &'static str {
587 match self {
588 TOMBSTONE | EOF | __LAST
589 #( | #literals )*
590 #( | #nodes )*
591 #( | #tokens )* => panic!("no text for these `SyntaxKind`s"),
592 #( #punctuation => #punctuation_texts ,)*
593 #( #strict_keywords_variants => #strict_keywords ,)*
594 #( #contextual_keywords_variants => #contextual_keywords ,)*
595 #( #edition_dependent_keywords_variants => #edition_dependent_keywords ,)*
596 }
597 }
598
599 pub fn is_strict_keyword(self, edition: Edition) -> bool {
602 matches!(self, #(#strict_keywords_variants)|*)
603 || match self {
604 #(#edition_dependent_keywords_variants_match_arm => true,)*
605 _ => false,
606 }
607 }
608
609 pub fn is_contextual_keyword(self, edition: Edition) -> bool {
612 match self {
613 #(#contextual_keywords_variants_match_arm => true,)*
614 _ => false,
615 }
616 }
617
618 pub fn is_keyword(self, edition: Edition) -> bool {
620 matches!(self, #(#strict_keywords_variants)|*)
621 || match self {
622 #(#edition_dependent_keywords_variants_match_arm => true,)*
623 #(#contextual_keywords_variants_match_arm => true,)*
624 _ => false,
625 }
626 }
627
628 pub fn is_punct(self) -> bool {
629 matches!(self, #(#punctuation)|*)
630 }
631
632 pub fn is_literal(self) -> bool {
633 matches!(self, #(#literals)|*)
634 }
635
636 pub fn from_keyword(ident: &str, edition: Edition) -> Option<SyntaxKind> {
637 let kw = match ident {
638 #(#strict_keywords => #strict_keywords_variants,)*
639 #(#edition_dependent_keywords_str_match_arm => #edition_dependent_keywords_variants,)*
640 _ => return None,
641 };
642 Some(kw)
643 }
644
645 pub fn from_contextual_keyword(ident: &str, edition: Edition) -> Option<SyntaxKind> {
646 let kw = match ident {
647 #(#contextual_keywords_str_match_arm => #contextual_keywords_variants,)*
648 _ => return None,
649 };
650 Some(kw)
651 }
652
653 pub fn from_char(c: char) -> Option<SyntaxKind> {
654 let tok = match c {
655 #(#single_byte_tokens_values => #single_byte_tokens,)*
656 _ => return None,
657 };
658 Some(tok)
659 }
660 }
661
662 #[macro_export]
663 macro_rules! T_ {
664 #([#punctuation_values] => { $crate::SyntaxKind::#punctuation };)*
665 #([#strict_keywords_tokens] => { $crate::SyntaxKind::#strict_keywords_variants };)*
666 #([#contextual_keywords_tokens] => { $crate::SyntaxKind::#contextual_keywords_variants };)*
667 #([#edition_dependent_keywords_tokens] => { $crate::SyntaxKind::#edition_dependent_keywords_variants };)*
668 [lifetime_ident] => { $crate::SyntaxKind::LIFETIME_IDENT };
669 [int_number] => { $crate::SyntaxKind::INT_NUMBER };
670 [ident] => { $crate::SyntaxKind::IDENT };
671 [string] => { $crate::SyntaxKind::STRING };
672 [shebang] => { $crate::SyntaxKind::SHEBANG };
673 [frontmatter] => { $crate::SyntaxKind::FRONTMATTER };
674 }
675
676 impl ::core::marker::Copy for SyntaxKind {}
677 impl ::core::clone::Clone for SyntaxKind {
678 #[inline]
679 fn clone(&self) -> Self {
680 *self
681 }
682 }
683 impl ::core::cmp::PartialEq for SyntaxKind {
684 #[inline]
685 fn eq(&self, other: &Self) -> bool {
686 (*self as u16) == (*other as u16)
687 }
688 }
689 impl ::core::cmp::Eq for SyntaxKind {}
690 impl ::core::cmp::PartialOrd for SyntaxKind {
691 #[inline]
692 fn partial_cmp(&self, other: &Self) -> core::option::Option<core::cmp::Ordering> {
693 Some(self.cmp(other))
694 }
695 }
696 impl ::core::cmp::Ord for SyntaxKind {
697 #[inline]
698 fn cmp(&self, other: &Self) -> core::cmp::Ordering {
699 (*self as u16).cmp(&(*other as u16))
700 }
701 }
702 impl ::core::hash::Hash for SyntaxKind {
703 fn hash<H: ::core::hash::Hasher>(&self, state: &mut H) {
704 ::core::mem::discriminant(self).hash(state);
705 }
706 }
707 };
708
709 let result = add_preamble(crate::flags::CodegenType::Grammar, reformat(ast.to_string()));
710
711 if let Some(start) = result.find("macro_rules ! T_")
712 && let Some(macro_end) = result[start..].find("\nimpl ::core::marker::Copy")
713 {
714 let macro_section = &result[start..start + macro_end];
715 let formatted_macro = macro_section
716 .replace("T_ { [", "T_ {\n [")
717 .replace(" ; [", ";\n [")
718 .replace(" ; }", ";\n}")
719 .trim_end()
720 .to_owned()
721 + "\n";
722 return result.replace(macro_section, &formatted_macro);
723 }
724
725 result
726}
727
728fn to_upper_snake_case(s: &str) -> String {
729 let mut buf = String::with_capacity(s.len());
730 let mut prev = false;
731 for c in s.chars() {
732 if c.is_ascii_uppercase() && prev {
733 buf.push('_')
734 }
735 prev = true;
736
737 buf.push(c.to_ascii_uppercase());
738 }
739 buf
740}
741
742fn to_lower_snake_case(s: &str) -> String {
743 let mut buf = String::with_capacity(s.len());
744 let mut prev = false;
745 for c in s.chars() {
746 if c.is_ascii_uppercase() && prev {
747 buf.push('_')
748 }
749 prev = true;
750
751 buf.push(c.to_ascii_lowercase());
752 }
753 buf
754}
755
756fn to_pascal_case(s: &str) -> String {
757 let mut buf = String::with_capacity(s.len());
758 let mut prev_is_underscore = true;
759 for c in s.chars() {
760 if c == '_' {
761 prev_is_underscore = true;
762 } else if prev_is_underscore {
763 buf.push(c.to_ascii_uppercase());
764 prev_is_underscore = false;
765 } else {
766 buf.push(c.to_ascii_lowercase());
767 }
768 }
769 buf
770}
771
772fn pluralize(s: &str) -> String {
773 format!("{s}s")
774}
775
776impl Field {
777 fn is_many(&self) -> bool {
778 matches!(self, Field::Node { cardinality: Cardinality::Many, .. })
779 }
780 fn token_kind(&self) -> Option<proc_macro2::TokenStream> {
781 match self {
782 Field::Token(token) => {
783 let token: proc_macro2::TokenStream = token.parse().unwrap();
784 Some(quote! { T![#token] })
785 }
786 _ => None,
787 }
788 }
789 fn method_name(&self) -> String {
790 match self {
791 Field::Token(name) => {
792 let name = match name.as_str() {
793 ";" => "semicolon",
794 "->" => "thin_arrow",
795 "'{'" => "l_curly",
796 "'}'" => "r_curly",
797 "'('" => "l_paren",
798 "')'" => "r_paren",
799 "'['" => "l_brack",
800 "']'" => "r_brack",
801 "<" => "l_angle",
802 ">" => "r_angle",
803 "=" => "eq",
804 "!" => "excl",
805 "*" => "star",
806 "&" => "amp",
807 "-" => "minus",
808 "_" => "underscore",
809 "." => "dot",
810 ".." => "dotdot",
811 "..." => "dotdotdot",
812 "..=" => "dotdoteq",
813 "=>" => "fat_arrow",
814 "@" => "at",
815 ":" => "colon",
816 "::" => "coloncolon",
817 "#" => "pound",
818 "?" => "question_mark",
819 "," => "comma",
820 "|" => "pipe",
821 "~" => "tilde",
822 _ => name,
823 };
824 format!("{name}_token",)
825 }
826 Field::Node { name, .. } => {
827 if name == "type" {
828 String::from("ty")
829 } else {
830 name.to_owned()
831 }
832 }
833 }
834 }
835 fn ty(&self) -> proc_macro2::Ident {
836 match self {
837 Field::Token(_) => format_ident!("SyntaxToken"),
838 Field::Node { ty, .. } => format_ident!("{}", ty),
839 }
840 }
841}
842
843fn clean_token_name(name: &str) -> String {
844 let cleaned = name.trim_start_matches(['@', '#', '?']);
845 if cleaned.is_empty() { name.to_owned() } else { cleaned.to_owned() }
846}
847
848fn lower(grammar: &Grammar) -> AstSrc {
849 let mut res = AstSrc {
850 tokens:
851 "Whitespace Comment String ByteString CString IntNumber FloatNumber Char Byte Ident"
852 .split_ascii_whitespace()
853 .map(|it| it.to_owned())
854 .collect::<Vec<_>>(),
855 ..Default::default()
856 };
857
858 let nodes = grammar.iter().collect::<Vec<_>>();
859
860 for &node in &nodes {
861 let name = grammar[node].name.clone();
862 let rule = &grammar[node].rule;
863 let _g = panic_context::enter(name.clone());
864 match lower_enum(grammar, rule) {
865 Some(variants) => {
866 let enum_src = AstEnumSrc { doc: Vec::new(), name, traits: Vec::new(), variants };
867 res.enums.push(enum_src);
868 }
869 None => {
870 let mut fields = Vec::new();
871 lower_rule(&mut fields, grammar, None, rule);
872 res.nodes.push(AstNodeSrc { doc: Vec::new(), name, traits: Vec::new(), fields });
873 }
874 }
875 }
876
877 deduplicate_fields(&mut res);
878 extract_enums(&mut res);
879 extract_struct_traits(&mut res);
880 extract_enum_traits(&mut res);
881 res.nodes.sort_by_key(|it| it.name.clone());
882 res.enums.sort_by_key(|it| it.name.clone());
883 res.tokens.sort();
884 res.nodes.iter_mut().for_each(|it| {
885 it.traits.sort();
886 it.fields.sort_by_key(|it| match it {
887 Field::Token(name) => (true, name.clone()),
888 Field::Node { name, .. } => (false, name.clone()),
889 });
890 });
891 res.enums.iter_mut().for_each(|it| {
892 it.traits.sort();
893 it.variants.sort();
894 });
895 res
896}
897
898fn lower_enum(grammar: &Grammar, rule: &Rule) -> Option<Vec<String>> {
899 let alternatives = match rule {
900 Rule::Alt(it) => it,
901 _ => return None,
902 };
903 let mut variants = Vec::new();
904 for alternative in alternatives {
905 match alternative {
906 Rule::Node(it) => variants.push(grammar[*it].name.clone()),
907 Rule::Token(it) if grammar[*it].name == ";" => (),
908 _ => return None,
909 }
910 }
911 Some(variants)
912}
913
914fn lower_rule(acc: &mut Vec<Field>, grammar: &Grammar, label: Option<&String>, rule: &Rule) {
915 if lower_separated_list(acc, grammar, label, rule) {
916 return;
917 }
918
919 match rule {
920 Rule::Node(node) => {
921 let ty = grammar[*node].name.clone();
922 let name = label.cloned().unwrap_or_else(|| to_lower_snake_case(&ty));
923 let field = Field::Node { name, ty, cardinality: Cardinality::Optional };
924 acc.push(field);
925 }
926 Rule::Token(token) => {
927 assert!(label.is_none());
928 let mut name = clean_token_name(&grammar[*token].name);
929 if "[]{}()".contains(&name) {
930 name = format!("'{name}'");
931 }
932 let field = Field::Token(name);
933 acc.push(field);
934 }
935 Rule::Rep(inner) => {
936 if let Rule::Node(node) = &**inner {
937 let ty = grammar[*node].name.clone();
938 let name = label.cloned().unwrap_or_else(|| pluralize(&to_lower_snake_case(&ty)));
939 let field = Field::Node { name, ty, cardinality: Cardinality::Many };
940 acc.push(field);
941 return;
942 }
943 panic!("unhandled rule: {rule:?}")
944 }
945 Rule::Labeled { label: l, rule } => {
946 assert!(label.is_none());
947 let manually_implemented = matches!(
948 l.as_str(),
949 "lhs"
950 | "rhs"
951 | "then_branch"
952 | "else_branch"
953 | "start"
954 | "end"
955 | "op"
956 | "index"
957 | "base"
958 | "value"
959 | "trait"
960 | "self_ty"
961 | "iterable"
962 | "condition"
963 | "args"
964 | "body"
965 );
966 if manually_implemented {
967 return;
968 }
969 lower_rule(acc, grammar, Some(l), rule);
970 }
971 Rule::Seq(rules) | Rule::Alt(rules) => {
972 for rule in rules {
973 lower_rule(acc, grammar, label, rule)
974 }
975 }
976 Rule::Opt(rule) => lower_rule(acc, grammar, label, rule),
977 }
978}
979
980fn lower_separated_list(
982 acc: &mut Vec<Field>,
983 grammar: &Grammar,
984 label: Option<&String>,
985 rule: &Rule,
986) -> bool {
987 let rule = match rule {
988 Rule::Seq(it) => it,
989 _ => return false,
990 };
991
992 let (nt, repeat, trailing_sep) = match rule.as_slice() {
993 [Rule::Node(node), Rule::Rep(repeat), Rule::Opt(trailing_sep)] => {
994 (Either::Left(node), repeat, Some(trailing_sep))
995 }
996 [Rule::Node(node), Rule::Rep(repeat)] => (Either::Left(node), repeat, None),
997 [Rule::Token(token), Rule::Rep(repeat), Rule::Opt(trailing_sep)] => {
998 (Either::Right(token), repeat, Some(trailing_sep))
999 }
1000 [Rule::Token(token), Rule::Rep(repeat)] => (Either::Right(token), repeat, None),
1001 _ => return false,
1002 };
1003 let repeat = match &**repeat {
1004 Rule::Seq(it) => it,
1005 _ => return false,
1006 };
1007 if !matches!(
1008 repeat.as_slice(),
1009 [comma, nt_]
1010 if trailing_sep.is_none_or(|it| comma == &**it) && match (nt, nt_) {
1011 (Either::Left(node), Rule::Node(nt_)) => node == nt_,
1012 (Either::Right(token), Rule::Token(nt_)) => token == nt_,
1013 _ => false,
1014 }
1015 ) {
1016 return false;
1017 }
1018 match nt {
1019 Either::Right(token) => {
1020 let name = clean_token_name(&grammar[*token].name);
1021 let field = Field::Token(name);
1022 acc.push(field);
1023 }
1024 Either::Left(node) => {
1025 let ty = grammar[*node].name.clone();
1026 let name = label.cloned().unwrap_or_else(|| pluralize(&to_lower_snake_case(&ty)));
1027 let field = Field::Node { name, ty, cardinality: Cardinality::Many };
1028 acc.push(field);
1029 }
1030 }
1031 true
1032}
1033
1034fn deduplicate_fields(ast: &mut AstSrc) {
1035 for node in &mut ast.nodes {
1036 let mut i = 0;
1037 'outer: while i < node.fields.len() {
1038 for j in 0..i {
1039 let f1 = &node.fields[i];
1040 let f2 = &node.fields[j];
1041 if f1 == f2 {
1042 node.fields.remove(i);
1043 continue 'outer;
1044 }
1045 }
1046 i += 1;
1047 }
1048 }
1049}
1050
1051fn extract_enums(ast: &mut AstSrc) {
1052 for node in &mut ast.nodes {
1053 for enm in &ast.enums {
1054 let mut to_remove = Vec::new();
1055 for (i, field) in node.fields.iter().enumerate() {
1056 let ty = field.ty().to_string();
1057 if enm.variants.iter().any(|it| it == &ty) {
1058 to_remove.push(i);
1059 }
1060 }
1061 if to_remove.len() == enm.variants.len() {
1062 node.remove_field(to_remove);
1063 let ty = enm.name.clone();
1064 let name = to_lower_snake_case(&ty);
1065 node.fields.push(Field::Node { name, ty, cardinality: Cardinality::Optional });
1066 }
1067 }
1068 }
1069}
1070
1071const TRAITS: &[(&str, &[&str])] = &[
1072 ("HasAttrs", &["attrs"]),
1073 ("HasName", &["name"]),
1074 ("HasVisibility", &["visibility"]),
1075 ("HasGenericParams", &["generic_param_list", "where_clause"]),
1076 ("HasGenericArgs", &["generic_arg_list"]),
1077 ("HasTypeBounds", &["type_bound_list", "colon_token"]),
1078 ("HasModuleItem", &["items"]),
1079 ("HasLoopBody", &["label", "loop_body"]),
1080 ("HasArgList", &["arg_list"]),
1081];
1082
1083fn extract_struct_traits(ast: &mut AstSrc) {
1084 for node in &mut ast.nodes {
1085 for (name, methods) in TRAITS {
1086 extract_struct_trait(node, name, methods);
1087 }
1088 }
1089
1090 let nodes_with_doc_comments = [
1091 "SourceFile",
1092 "Fn",
1093 "Struct",
1094 "Union",
1095 "RecordField",
1096 "TupleField",
1097 "Enum",
1098 "Variant",
1099 "Trait",
1100 "Module",
1101 "Static",
1102 "Const",
1103 "TypeAlias",
1104 "Impl",
1105 "ExternBlock",
1106 "ExternCrate",
1107 "MacroCall",
1108 "MacroRules",
1109 "MacroDef",
1110 "Use",
1111 ];
1112
1113 for node in &mut ast.nodes {
1114 if nodes_with_doc_comments.contains(&&*node.name) {
1115 node.traits.push("HasDocComments".into());
1116 }
1117 }
1118}
1119
1120fn extract_struct_trait(node: &mut AstNodeSrc, trait_name: &str, methods: &[&str]) {
1121 let mut to_remove = Vec::new();
1122 for (i, field) in node.fields.iter().enumerate() {
1123 let method_name = field.method_name();
1124 if methods.iter().any(|&it| it == method_name) {
1125 to_remove.push(i);
1126 }
1127 }
1128 if to_remove.len() == methods.len() {
1129 node.traits.push(trait_name.to_owned());
1130 node.remove_field(to_remove);
1131 }
1132}
1133
1134fn extract_enum_traits(ast: &mut AstSrc) {
1135 for enm in &mut ast.enums {
1136 if enm.name == "Stmt" {
1137 continue;
1138 }
1139 let nodes = &ast.nodes;
1140 let mut variant_traits = enm
1141 .variants
1142 .iter()
1143 .map(|var| nodes.iter().find(|it| &it.name == var).unwrap())
1144 .map(|node| node.traits.iter().cloned().collect::<BTreeSet<_>>());
1145
1146 let mut enum_traits = match variant_traits.next() {
1147 Some(it) => it,
1148 None => continue,
1149 };
1150 for traits in variant_traits {
1151 enum_traits = enum_traits.intersection(&traits).cloned().collect();
1152 }
1153 enm.traits = enum_traits.into_iter().collect();
1154 }
1155}
1156
1157impl AstNodeSrc {
1158 fn remove_field(&mut self, to_remove: Vec<usize>) {
1159 to_remove.into_iter().rev().for_each(|idx| {
1160 self.fields.remove(idx);
1161 });
1162 }
1163}
1164
1165#[test]
1166fn test() {
1167 generate(true);
1168}