1#![allow(clippy::disallowed_types)]
7
8use std::{
9 collections::{BTreeSet, HashSet},
10 fmt::Write,
11 fs,
12};
13
14use either::Either;
15use itertools::Itertools;
16use proc_macro2::{Punct, Spacing};
17use quote::{format_ident, quote};
18use stdx::panic_context;
19use ungrammar::{Grammar, Rule};
20
21use crate::{
22 codegen::{add_preamble, ensure_file_contents, grammar::ast_src::generate_kind_src, reformat},
23 project_root,
24};
25
26mod ast_src;
27use self::ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Cardinality, Field, KindsSrc};
28
29pub(crate) fn generate(check: bool) {
30 let grammar = fs::read_to_string(project_root().join("crates/syntax/rust.ungram"))
31 .unwrap()
32 .parse()
33 .unwrap();
34 let ast = lower(&grammar);
35 let kinds_src = generate_kind_src(&ast.nodes, &ast.enums, &grammar);
36
37 let syntax_kinds = generate_syntax_kinds(kinds_src);
38 let syntax_kinds_file = project_root().join("crates/parser/src/syntax_kind/generated.rs");
39 ensure_file_contents(
40 crate::flags::CodegenType::Grammar,
41 syntax_kinds_file.as_path(),
42 &syntax_kinds,
43 check,
44 );
45
46 let ast_tokens = generate_tokens(&ast);
47 let ast_tokens_file = project_root().join("crates/syntax/src/ast/generated/tokens.rs");
48 ensure_file_contents(
49 crate::flags::CodegenType::Grammar,
50 ast_tokens_file.as_path(),
51 &ast_tokens,
52 check,
53 );
54
55 let ast_nodes = generate_nodes(kinds_src, &ast);
56 let ast_nodes_file = project_root().join("crates/syntax/src/ast/generated/nodes.rs");
57 ensure_file_contents(
58 crate::flags::CodegenType::Grammar,
59 ast_nodes_file.as_path(),
60 &ast_nodes,
61 check,
62 );
63}
64
65fn generate_tokens(grammar: &AstSrc) -> String {
66 let tokens = grammar.tokens.iter().map(|token| {
67 let name = format_ident!("{}", token);
68 let kind = format_ident!("{}", to_upper_snake_case(token));
69 quote! {
70 pub struct #name {
71 pub(crate) syntax: SyntaxToken,
72 }
73 impl std::fmt::Display for #name {
74 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
75 std::fmt::Display::fmt(&self.syntax, f)
76 }
77 }
78 impl AstToken for #name {
79 fn can_cast(kind: SyntaxKind) -> bool { kind == #kind }
80 fn cast(syntax: SyntaxToken) -> Option<Self> {
81 if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }
82 }
83 fn syntax(&self) -> &SyntaxToken { &self.syntax }
84 }
85
86 impl fmt::Debug for #name {
87 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
88 f.debug_struct(#token).field("syntax", &self.syntax).finish()
89 }
90 }
91 impl Clone for #name {
92 fn clone(&self) -> Self {
93 Self { syntax: self.syntax.clone() }
94 }
95 }
96 impl hash::Hash for #name {
97 fn hash<H: hash::Hasher>(&self, state: &mut H) {
98 self.syntax.hash(state);
99 }
100 }
101
102 impl Eq for #name {}
103 impl PartialEq for #name {
104 fn eq(&self, other: &Self) -> bool {
105 self.syntax == other.syntax
106 }
107 }
108 }
109 });
110
111 add_preamble(
112 crate::flags::CodegenType::Grammar,
113 reformat(
114 quote! {
115 use std::{fmt, hash};
116
117 use crate::{SyntaxKind::{self, *}, SyntaxToken, ast::AstToken};
118
119 #(#tokens)*
120 }
121 .to_string(),
122 ),
123 )
124 .replace("#[derive", "\n#[derive")
125}
126
127fn generate_nodes(kinds: KindsSrc, grammar: &AstSrc) -> String {
128 let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar
129 .nodes
130 .iter()
131 .map(|node| {
132 let node_str_name = &node.name;
133 let name = format_ident!("{}", node.name);
134 let kind = format_ident!("{}", to_upper_snake_case(&node.name));
135 let traits = node
136 .traits
137 .iter()
138 .filter(|trait_name| {
139 node.name != "ForExpr" && node.name != "WhileExpr"
141 || trait_name.as_str() != "HasLoopBody"
142 })
143 .map(|trait_name| {
144 let trait_name = format_ident!("{}", trait_name);
145 quote!(impl ast::#trait_name for #name {})
146 });
147
148 let methods = node.fields.iter().map(|field| {
149 let method_name = format_ident!("{}", field.method_name());
150 let ty = field.ty();
151
152 if field.is_many() {
153 quote! {
154 #[inline]
155 pub fn #method_name(&self) -> AstChildren<#ty> {
156 support::children(&self.syntax)
157 }
158 }
159 } else if let Some(token_kind) = field.token_kind() {
160 quote! {
161 #[inline]
162 pub fn #method_name(&self) -> Option<#ty> {
163 support::token(&self.syntax, #token_kind)
164 }
165 }
166 } else {
167 quote! {
168 #[inline]
169 pub fn #method_name(&self) -> Option<#ty> {
170 support::child(&self.syntax)
171 }
172 }
173 }
174 });
175 (
176 quote! {
177 #[pretty_doc_comment_placeholder_workaround]
178 pub struct #name {
179 pub(crate) syntax: SyntaxNode,
180 }
181
182 #(#traits)*
183
184 impl #name {
185 #(#methods)*
186 }
187 },
188 quote! {
189 impl AstNode for #name {
190 #[inline]
191 fn kind() -> SyntaxKind
192 where
193 Self: Sized
194 {
195 #kind
196 }
197 #[inline]
198 fn can_cast(kind: SyntaxKind) -> bool {
199 kind == #kind
200 }
201 #[inline]
202 fn cast(syntax: SyntaxNode) -> Option<Self> {
203 if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }
204 }
205 #[inline]
206 fn syntax(&self) -> &SyntaxNode { &self.syntax }
207 }
208
209 impl hash::Hash for #name {
210 fn hash<H: hash::Hasher>(&self, state: &mut H) {
211 self.syntax.hash(state);
212 }
213 }
214
215 impl Eq for #name {}
216 impl PartialEq for #name {
217 fn eq(&self, other: &Self) -> bool {
218 self.syntax == other.syntax
219 }
220 }
221
222 impl Clone for #name {
223 fn clone(&self) -> Self {
224 Self { syntax: self.syntax.clone() }
225 }
226 }
227
228 impl fmt::Debug for #name {
229 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
230 f.debug_struct(#node_str_name).field("syntax", &self.syntax).finish()
231 }
232 }
233 },
234 )
235 })
236 .unzip();
237
238 let (enum_defs, enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar
239 .enums
240 .iter()
241 .map(|en| {
242 let variants: Vec<_> =
243 en.variants.iter().map(|var| format_ident!("{}", var)).sorted().collect();
244 let name = format_ident!("{}", en.name);
245 let kinds: Vec<_> = variants
246 .iter()
247 .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))
248 .collect();
249 let traits = en.traits.iter().sorted().map(|trait_name| {
250 let trait_name = format_ident!("{}", trait_name);
251 quote!(impl ast::#trait_name for #name {})
252 });
253
254 let ast_node = if en.name == "Stmt" {
255 quote! {}
256 } else {
257 quote! {
258 impl AstNode for #name {
259 #[inline]
260 fn can_cast(kind: SyntaxKind) -> bool {
261 matches!(kind, #(#kinds)|*)
262 }
263 #[inline]
264 fn cast(syntax: SyntaxNode) -> Option<Self> {
265 let res = match syntax.kind() {
266 #(
267 #kinds => #name::#variants(#variants { syntax }),
268 )*
269 _ => return None,
270 };
271 Some(res)
272 }
273 #[inline]
274 fn syntax(&self) -> &SyntaxNode {
275 match self {
276 #(
277 #name::#variants(it) => &it.syntax,
278 )*
279 }
280 }
281 }
282 }
283 };
284
285 (
286 quote! {
287 #[pretty_doc_comment_placeholder_workaround]
288 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
289 pub enum #name {
290 #(#variants(#variants),)*
291 }
292
293 #(#traits)*
294 },
295 quote! {
296 #(
297 impl From<#variants> for #name {
298 #[inline]
299 fn from(node: #variants) -> #name {
300 #name::#variants(node)
301 }
302 }
303 )*
304 #ast_node
305 },
306 )
307 })
308 .unzip();
309 let (any_node_defs, any_node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar
310 .nodes
311 .iter()
312 .flat_map(|node| node.traits.iter().map(move |t| (t, node)))
313 .into_group_map()
314 .into_iter()
315 .sorted_by_key(|(name, _)| *name)
316 .map(|(trait_name, nodes)| {
317 let name = format_ident!("Any{}", trait_name);
318 let node_str_name = name.to_string();
319 let trait_name = format_ident!("{}", trait_name);
320 let kinds: Vec<_> = nodes
321 .iter()
322 .map(|name| format_ident!("{}", to_upper_snake_case(&name.name.to_string())))
323 .collect();
324 let nodes = nodes.iter().map(|node| format_ident!("{}", node.name));
325 (
326 quote! {
327 #[pretty_doc_comment_placeholder_workaround]
328 pub struct #name {
329 pub(crate) syntax: SyntaxNode,
330 }
331 impl #name {
332 #[inline]
333 pub fn new<T: ast::#trait_name>(node: T) -> #name {
334 #name {
335 syntax: node.syntax().clone()
336 }
337 }
338 }
339 },
340 quote! {
341 impl ast::#trait_name for #name {}
342 impl AstNode for #name {
343 #[inline]
344 fn can_cast(kind: SyntaxKind) -> bool {
345 matches!(kind, #(#kinds)|*)
346 }
347 #[inline]
348 fn cast(syntax: SyntaxNode) -> Option<Self> {
349 Self::can_cast(syntax.kind()).then_some(#name { syntax })
350 }
351 #[inline]
352 fn syntax(&self) -> &SyntaxNode {
353 &self.syntax
354 }
355 }
356
357 impl hash::Hash for #name {
358 fn hash<H: hash::Hasher>(&self, state: &mut H) {
359 self.syntax.hash(state);
360 }
361 }
362
363 impl Eq for #name {}
364 impl PartialEq for #name {
365 fn eq(&self, other: &Self) -> bool {
366 self.syntax == other.syntax
367 }
368 }
369
370 impl Clone for #name {
371 fn clone(&self) -> Self {
372 Self { syntax: self.syntax.clone() }
373 }
374 }
375
376 impl fmt::Debug for #name {
377 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
378 f.debug_struct(#node_str_name).field("syntax", &self.syntax).finish()
379 }
380 }
381
382 #(
383 impl From<#nodes> for #name {
384 #[inline]
385 fn from(node: #nodes) -> #name {
386 #name { syntax: node.syntax }
387 }
388 }
389 )*
390 },
391 )
392 })
393 .unzip();
394
395 let enum_names = grammar.enums.iter().map(|it| &it.name);
396 let node_names = grammar.nodes.iter().map(|it| &it.name);
397
398 let display_impls =
399 enum_names.chain(node_names.clone()).map(|it| format_ident!("{}", it)).map(|name| {
400 quote! {
401 impl std::fmt::Display for #name {
402 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
403 std::fmt::Display::fmt(self.syntax(), f)
404 }
405 }
406 }
407 });
408
409 let defined_nodes: HashSet<_> = node_names.collect();
410
411 for node in kinds
412 .nodes
413 .iter()
414 .map(|kind| to_pascal_case(kind))
415 .filter(|name| !defined_nodes.iter().any(|&it| it == name))
416 {
417 eprintln!("Warning: node {node} not defined in AST source");
418 drop(node);
419 }
420
421 let ast = quote! {
422 #![allow(non_snake_case)]
423 use std::{fmt, hash};
424
425 use crate::{
426 SyntaxNode, SyntaxToken, SyntaxKind::{self, *},
427 ast::{self, AstNode, AstChildren, support},
428 T,
429 };
430
431 #(#node_defs)*
432 #(#enum_defs)*
433 #(#any_node_defs)*
434 #(#node_boilerplate_impls)*
435 #(#enum_boilerplate_impls)*
436 #(#any_node_boilerplate_impls)*
437 #(#display_impls)*
438 };
439
440 let ast = ast.to_string().replace("T ! [", "T![");
441
442 let mut res = String::with_capacity(ast.len() * 2);
443
444 let mut docs =
445 grammar.nodes.iter().map(|it| &it.doc).chain(grammar.enums.iter().map(|it| &it.doc));
446
447 for chunk in ast.split("# [pretty_doc_comment_placeholder_workaround] ") {
448 res.push_str(chunk);
449 if let Some(doc) = docs.next() {
450 write_doc_comment(doc, &mut res);
451 }
452 }
453
454 let res = add_preamble(crate::flags::CodegenType::Grammar, reformat(res));
455 res.replace("#[derive", "\n#[derive")
456}
457
458fn write_doc_comment(contents: &[String], dest: &mut String) {
459 for line in contents {
460 writeln!(dest, "///{line}").unwrap();
461 }
462}
463
464fn generate_syntax_kinds(grammar: KindsSrc) -> String {
465 let (single_byte_tokens_values, single_byte_tokens): (Vec<_>, Vec<_>) = grammar
466 .punct
467 .iter()
468 .filter(|(token, _name)| token.len() == 1)
469 .map(|(token, name)| (token.chars().next().unwrap(), format_ident!("{}", name)))
470 .unzip();
471
472 let punctuation_values = grammar.punct.iter().map(|(token, _name)| {
473 if "{}[]()".contains(token) {
474 let c = token.chars().next().unwrap();
475 quote! { #c }
476 } else if *token == "_" {
478 quote! { _ }
479 } else {
480 let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint));
481 quote! { #(#cs)* }
482 }
483 });
484 let punctuation =
485 grammar.punct.iter().map(|(_token, name)| format_ident!("{}", name)).collect::<Vec<_>>();
486 let punctuation_texts = grammar.punct.iter().map(|&(text, _name)| text);
487
488 let fmt_kw_as_variant = |&name| match name {
489 "Self" => format_ident!("SELF_TYPE_KW"),
490 name => format_ident!("{}_KW", to_upper_snake_case(name)),
491 };
492 let strict_keywords = grammar.keywords;
493 let strict_keywords_variants =
494 strict_keywords.iter().map(fmt_kw_as_variant).collect::<Vec<_>>();
495 let strict_keywords_tokens = strict_keywords.iter().map(|it| format_ident!("{it}"));
496
497 let edition_dependent_keywords_variants_match_arm = grammar
498 .edition_dependent_keywords
499 .iter()
500 .map(|(kw, ed)| {
501 let kw = fmt_kw_as_variant(kw);
502 quote! { #kw if #ed <= edition }
503 })
504 .collect::<Vec<_>>();
505 let edition_dependent_keywords_str_match_arm = grammar
506 .edition_dependent_keywords
507 .iter()
508 .map(|(kw, ed)| {
509 quote! { #kw if #ed <= edition }
510 })
511 .collect::<Vec<_>>();
512 let edition_dependent_keywords = grammar.edition_dependent_keywords.iter().map(|&(it, _)| it);
513 let edition_dependent_keywords_variants = grammar
514 .edition_dependent_keywords
515 .iter()
516 .map(|(kw, _)| fmt_kw_as_variant(kw))
517 .collect::<Vec<_>>();
518 let edition_dependent_keywords_tokens =
519 grammar.edition_dependent_keywords.iter().map(|(it, _)| format_ident!("{it}"));
520
521 let contextual_keywords = grammar.contextual_keywords;
522 let contextual_keywords_variants =
523 contextual_keywords.iter().map(fmt_kw_as_variant).collect::<Vec<_>>();
524 let contextual_keywords_tokens = contextual_keywords.iter().map(|it| format_ident!("{it}"));
525 let contextual_keywords_str_match_arm = grammar.contextual_keywords.iter().map(|kw| {
526 match grammar.edition_dependent_keywords.iter().find(|(ed_kw, _)| ed_kw == kw) {
527 Some((_, ed)) => quote! { #kw if edition < #ed },
528 None => quote! { #kw },
529 }
530 });
531 let contextual_keywords_variants_match_arm = grammar
532 .contextual_keywords
533 .iter()
534 .map(|kw_s| {
535 let kw = fmt_kw_as_variant(kw_s);
536 match grammar.edition_dependent_keywords.iter().find(|(ed_kw, _)| ed_kw == kw_s) {
537 Some((_, ed)) => quote! { #kw if edition < #ed },
538 None => quote! { #kw },
539 }
540 })
541 .collect::<Vec<_>>();
542
543 let non_strict_keyword_variants = contextual_keywords_variants
544 .iter()
545 .chain(edition_dependent_keywords_variants.iter())
546 .sorted()
547 .dedup()
548 .collect::<Vec<_>>();
549
550 let literals =
551 grammar.literals.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
552
553 let tokens = grammar.tokens.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
554
555 let nodes = grammar.nodes.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
556
557 let ast = quote! {
558 #![allow(bad_style, missing_docs, unreachable_pub)]
559 use crate::Edition;
560
561 #[derive(Debug)]
563 #[repr(u16)]
564 pub enum SyntaxKind {
565 #[doc(hidden)]
568 TOMBSTONE,
569 #[doc(hidden)]
570 EOF,
571 #(#punctuation,)*
572 #(#strict_keywords_variants,)*
573 #(#non_strict_keyword_variants,)*
574 #(#literals,)*
575 #(#tokens,)*
576 #(#nodes,)*
577
578 #[doc(hidden)]
580 __LAST,
581 }
582 use self::SyntaxKind::*;
583
584 impl SyntaxKind {
585 #[allow(unreachable_patterns)]
586 pub const fn text(self) -> &'static str {
587 match self {
588 TOMBSTONE | EOF | __LAST
589 #( | #literals )*
590 #( | #nodes )*
591 #( | #tokens )* => panic!("no text for these `SyntaxKind`s"),
592 #( #punctuation => #punctuation_texts ,)*
593 #( #strict_keywords_variants => #strict_keywords ,)*
594 #( #contextual_keywords_variants => #contextual_keywords ,)*
595 #( #edition_dependent_keywords_variants => #edition_dependent_keywords ,)*
596 }
597 }
598
599 pub fn is_strict_keyword(self, edition: Edition) -> bool {
602 matches!(self, #(#strict_keywords_variants)|*)
603 || match self {
604 #(#edition_dependent_keywords_variants_match_arm => true,)*
605 _ => false,
606 }
607 }
608
609 pub fn is_contextual_keyword(self, edition: Edition) -> bool {
612 match self {
613 #(#contextual_keywords_variants_match_arm => true,)*
614 _ => false,
615 }
616 }
617
618 pub fn is_keyword(self, edition: Edition) -> bool {
620 matches!(self, #(#strict_keywords_variants)|*)
621 || match self {
622 #(#edition_dependent_keywords_variants_match_arm => true,)*
623 #(#contextual_keywords_variants_match_arm => true,)*
624 _ => false,
625 }
626 }
627
628 pub fn is_punct(self) -> bool {
629 matches!(self, #(#punctuation)|*)
630 }
631
632 pub fn is_literal(self) -> bool {
633 matches!(self, #(#literals)|*)
634 }
635
636 pub fn from_keyword(ident: &str, edition: Edition) -> Option<SyntaxKind> {
637 let kw = match ident {
638 #(#strict_keywords => #strict_keywords_variants,)*
639 #(#edition_dependent_keywords_str_match_arm => #edition_dependent_keywords_variants,)*
640 _ => return None,
641 };
642 Some(kw)
643 }
644
645 pub fn from_contextual_keyword(ident: &str, edition: Edition) -> Option<SyntaxKind> {
646 let kw = match ident {
647 #(#contextual_keywords_str_match_arm => #contextual_keywords_variants,)*
648 _ => return None,
649 };
650 Some(kw)
651 }
652
653 pub fn from_char(c: char) -> Option<SyntaxKind> {
654 let tok = match c {
655 #(#single_byte_tokens_values => #single_byte_tokens,)*
656 _ => return None,
657 };
658 Some(tok)
659 }
660 }
661
662 #[macro_export]
664 macro_rules! T_ {
665 #([#punctuation_values] => { $crate::SyntaxKind::#punctuation };)*
666 #([#strict_keywords_tokens] => { $crate::SyntaxKind::#strict_keywords_variants };)*
667 #([#contextual_keywords_tokens] => { $crate::SyntaxKind::#contextual_keywords_variants };)*
668 #([#edition_dependent_keywords_tokens] => { $crate::SyntaxKind::#edition_dependent_keywords_variants };)*
669 [lifetime_ident] => { $crate::SyntaxKind::LIFETIME_IDENT };
670 [int_number] => { $crate::SyntaxKind::INT_NUMBER };
671 [ident] => { $crate::SyntaxKind::IDENT };
672 [string] => { $crate::SyntaxKind::STRING };
673 [shebang] => { $crate::SyntaxKind::SHEBANG };
674 [frontmatter] => { $crate::SyntaxKind::FRONTMATTER };
675 }
676
677 impl ::core::marker::Copy for SyntaxKind {}
678 impl ::core::clone::Clone for SyntaxKind {
679 #[inline]
680 fn clone(&self) -> Self {
681 *self
682 }
683 }
684 impl ::core::cmp::PartialEq for SyntaxKind {
685 #[inline]
686 fn eq(&self, other: &Self) -> bool {
687 (*self as u16) == (*other as u16)
688 }
689 }
690 impl ::core::cmp::Eq for SyntaxKind {}
691 impl ::core::cmp::PartialOrd for SyntaxKind {
692 #[inline]
693 fn partial_cmp(&self, other: &Self) -> core::option::Option<core::cmp::Ordering> {
694 Some(self.cmp(other))
695 }
696 }
697 impl ::core::cmp::Ord for SyntaxKind {
698 #[inline]
699 fn cmp(&self, other: &Self) -> core::cmp::Ordering {
700 (*self as u16).cmp(&(*other as u16))
701 }
702 }
703 impl ::core::hash::Hash for SyntaxKind {
704 fn hash<H: ::core::hash::Hasher>(&self, state: &mut H) {
705 ::core::mem::discriminant(self).hash(state);
706 }
707 }
708 };
709
710 let result = add_preamble(crate::flags::CodegenType::Grammar, reformat(ast.to_string()));
711
712 if let Some(start) = result.find("macro_rules ! T_")
713 && let Some(macro_end) = result[start..].find("\nimpl ::core::marker::Copy")
714 {
715 let macro_section = &result[start..start + macro_end];
716 let formatted_macro = macro_section
717 .replace("T_ { [", "T_ {\n [")
718 .replace(" ; [", ";\n [")
719 .replace(" ; }", ";\n}")
720 .trim_end()
721 .to_owned()
722 + "\n";
723 return result.replace(macro_section, &formatted_macro);
724 }
725
726 result
727}
728
729fn to_upper_snake_case(s: &str) -> String {
730 let mut buf = String::with_capacity(s.len());
731 let mut prev = false;
732 for c in s.chars() {
733 if c.is_ascii_uppercase() && prev {
734 buf.push('_')
735 }
736 prev = true;
737
738 buf.push(c.to_ascii_uppercase());
739 }
740 buf
741}
742
743fn to_lower_snake_case(s: &str) -> String {
744 let mut buf = String::with_capacity(s.len());
745 let mut prev = false;
746 for c in s.chars() {
747 if c.is_ascii_uppercase() && prev {
748 buf.push('_')
749 }
750 prev = true;
751
752 buf.push(c.to_ascii_lowercase());
753 }
754 buf
755}
756
757fn to_pascal_case(s: &str) -> String {
758 let mut buf = String::with_capacity(s.len());
759 let mut prev_is_underscore = true;
760 for c in s.chars() {
761 if c == '_' {
762 prev_is_underscore = true;
763 } else if prev_is_underscore {
764 buf.push(c.to_ascii_uppercase());
765 prev_is_underscore = false;
766 } else {
767 buf.push(c.to_ascii_lowercase());
768 }
769 }
770 buf
771}
772
773fn pluralize(s: &str) -> String {
774 format!("{s}s")
775}
776
777impl Field {
778 fn is_many(&self) -> bool {
779 matches!(self, Field::Node { cardinality: Cardinality::Many, .. })
780 }
781 fn token_kind(&self) -> Option<proc_macro2::TokenStream> {
782 match self {
783 Field::Token { token, .. } => {
784 let token: proc_macro2::TokenStream = token.parse().unwrap();
785 Some(quote! { T![#token] })
786 }
787 _ => None,
788 }
789 }
790 fn method_name(&self) -> String {
791 match self {
792 Field::Token { name, token, .. } => {
793 if let Some(name) = name {
794 return name.clone();
795 }
796 let name = match token.as_str() {
797 ";" => "semicolon",
798 "->" => "thin_arrow",
799 "'{'" => "l_curly",
800 "'}'" => "r_curly",
801 "'('" => "l_paren",
802 "')'" => "r_paren",
803 "'['" => "l_brack",
804 "']'" => "r_brack",
805 "<" => "l_angle",
806 ">" => "r_angle",
807 "=" => "eq",
808 "!" => "excl",
809 "*" => "star",
810 "&" => "amp",
811 "-" => "minus",
812 "_" => "underscore",
813 "." => "dot",
814 ".." => "dotdot",
815 "..." => "dotdotdot",
816 "..=" => "dotdoteq",
817 "=>" => "fat_arrow",
818 "@" => "at",
819 ":" => "colon",
820 "::" => "coloncolon",
821 "#" => "pound",
822 "?" => "question_mark",
823 "," => "comma",
824 "|" => "pipe",
825 "~" => "tilde",
826 _ => token,
827 };
828 format!("{name}_token",)
829 }
830 Field::Node { name, .. } => {
831 if name == "type" {
832 String::from("ty")
833 } else {
834 name.to_owned()
835 }
836 }
837 }
838 }
839 fn ty(&self) -> proc_macro2::Ident {
840 match self {
841 Field::Token { .. } => format_ident!("SyntaxToken"),
842 Field::Node { ty, .. } => format_ident!("{}", ty),
843 }
844 }
845}
846
847fn clean_token_name(name: &str) -> String {
848 let cleaned = name.trim_start_matches(['@', '#', '?']);
849 if cleaned.is_empty() { name.to_owned() } else { cleaned.to_owned() }
850}
851
852fn lower(grammar: &Grammar) -> AstSrc {
853 let mut res = AstSrc {
854 tokens:
855 "Whitespace Comment String ByteString CString IntNumber FloatNumber Char Byte Ident"
856 .split_ascii_whitespace()
857 .map(|it| it.to_owned())
858 .collect::<Vec<_>>(),
859 ..Default::default()
860 };
861
862 let nodes = grammar.iter().collect::<Vec<_>>();
863
864 for &node in &nodes {
865 let name = grammar[node].name.clone();
866 let rule = &grammar[node].rule;
867 let _g = panic_context::enter(name.clone());
868 match lower_enum(grammar, rule) {
869 Some(variants) => {
870 let enum_src = AstEnumSrc { doc: Vec::new(), name, traits: Vec::new(), variants };
871 res.enums.push(enum_src);
872 }
873 None => {
874 let mut fields = Vec::new();
875 lower_rule(&mut fields, grammar, None, rule);
876 res.nodes.push(AstNodeSrc { doc: Vec::new(), name, traits: Vec::new(), fields });
877 }
878 }
879 }
880
881 deduplicate_fields(&mut res);
882 extract_enums(&mut res);
883 extract_struct_traits(&mut res);
884 extract_enum_traits(&mut res);
885 res.nodes.sort_by_key(|it| it.name.clone());
886 res.enums.sort_by_key(|it| it.name.clone());
887 res.tokens.sort();
888 res.nodes.iter_mut().for_each(|it| {
889 it.traits.sort();
890 it.fields.sort_by_key(|it| match it {
891 Field::Token { token, .. } => (true, token.clone()),
892 Field::Node { name, .. } => (false, name.clone()),
893 });
894 });
895 res.enums.iter_mut().for_each(|it| {
896 it.traits.sort();
897 it.variants.sort();
898 });
899 res
900}
901
902fn lower_enum(grammar: &Grammar, rule: &Rule) -> Option<Vec<String>> {
903 let alternatives = match rule {
904 Rule::Alt(it) => it,
905 _ => return None,
906 };
907 let mut variants = Vec::new();
908 for alternative in alternatives {
909 match alternative {
910 Rule::Node(it) => variants.push(grammar[*it].name.clone()),
911 Rule::Token(it) if grammar[*it].name == ";" => (),
912 _ => return None,
913 }
914 }
915 Some(variants)
916}
917
918fn lower_rule(acc: &mut Vec<Field>, grammar: &Grammar, label: Option<&String>, rule: &Rule) {
919 if lower_separated_list(acc, grammar, label, rule) {
920 return;
921 }
922
923 match rule {
924 Rule::Node(node) => {
925 let ty = grammar[*node].name.clone();
926 let name = label.cloned().unwrap_or_else(|| to_lower_snake_case(&ty));
927 let field = Field::Node { name, ty, cardinality: Cardinality::Optional };
928 acc.push(field);
929 }
930 Rule::Token(token) => {
931 let mut token = clean_token_name(&grammar[*token].name);
932 if "[]{}()".contains(&token) {
933 token = format!("'{token}'");
934 }
935 let field = Field::Token { name: label.cloned(), token };
936 acc.push(field);
937 }
938 Rule::Rep(inner) => {
939 if let Rule::Node(node) = &**inner {
940 let ty = grammar[*node].name.clone();
941 let name = label.cloned().unwrap_or_else(|| pluralize(&to_lower_snake_case(&ty)));
942 let field = Field::Node { name, ty, cardinality: Cardinality::Many };
943 acc.push(field);
944 return;
945 }
946 panic!("unhandled rule: {rule:?}")
947 }
948 Rule::Labeled { label: l, rule } => {
949 assert!(label.is_none());
950 let manually_implemented = matches!(
951 l.as_str(),
952 "lhs"
953 | "rhs"
954 | "then_branch"
955 | "else_branch"
956 | "start"
957 | "end"
958 | "op"
959 | "index"
960 | "base"
961 | "value"
962 | "trait"
963 | "self_ty"
964 | "iterable"
965 | "condition"
966 | "args"
967 | "body"
968 );
969 if manually_implemented {
970 return;
971 }
972 lower_rule(acc, grammar, Some(l), rule);
973 }
974 Rule::Seq(rules) | Rule::Alt(rules) => {
975 for rule in rules {
976 lower_rule(acc, grammar, label, rule)
977 }
978 }
979 Rule::Opt(rule) => lower_rule(acc, grammar, label, rule),
980 }
981}
982
983fn lower_separated_list(
985 acc: &mut Vec<Field>,
986 grammar: &Grammar,
987 label: Option<&String>,
988 rule: &Rule,
989) -> bool {
990 let rule = match rule {
991 Rule::Seq(it) => it,
992 _ => return false,
993 };
994
995 let (nt, repeat, trailing_sep) = match rule.as_slice() {
996 [Rule::Node(node), Rule::Rep(repeat), Rule::Opt(trailing_sep)] => {
997 (Either::Left(node), repeat, Some(trailing_sep))
998 }
999 [Rule::Node(node), Rule::Rep(repeat)] => (Either::Left(node), repeat, None),
1000 [Rule::Token(token), Rule::Rep(repeat), Rule::Opt(trailing_sep)] => {
1001 (Either::Right(token), repeat, Some(trailing_sep))
1002 }
1003 [Rule::Token(token), Rule::Rep(repeat)] => (Either::Right(token), repeat, None),
1004 _ => return false,
1005 };
1006 let repeat = match &**repeat {
1007 Rule::Seq(it) => it,
1008 _ => return false,
1009 };
1010 if !matches!(
1011 repeat.as_slice(),
1012 [comma, nt_]
1013 if trailing_sep.is_none_or(|it| comma == &**it) && match (nt, nt_) {
1014 (Either::Left(node), Rule::Node(nt_)) => node == nt_,
1015 (Either::Right(token), Rule::Token(nt_)) => token == nt_,
1016 _ => false,
1017 }
1018 ) {
1019 return false;
1020 }
1021 match nt {
1022 Either::Right(token) => {
1023 let token = clean_token_name(&grammar[*token].name);
1024 let field = Field::Token { token, name: None };
1025 acc.push(field);
1026 }
1027 Either::Left(node) => {
1028 let ty = grammar[*node].name.clone();
1029 let name = label.cloned().unwrap_or_else(|| pluralize(&to_lower_snake_case(&ty)));
1030 let field = Field::Node { name, ty, cardinality: Cardinality::Many };
1031 acc.push(field);
1032 }
1033 }
1034 true
1035}
1036
1037fn deduplicate_fields(ast: &mut AstSrc) {
1038 for node in &mut ast.nodes {
1039 let mut i = 0;
1040 'outer: while i < node.fields.len() {
1041 for j in 0..i {
1042 let f1 = &node.fields[i];
1043 let f2 = &node.fields[j];
1044 if f1 == f2 {
1045 node.fields.remove(i);
1046 continue 'outer;
1047 }
1048 }
1049 i += 1;
1050 }
1051 }
1052}
1053
1054fn extract_enums(ast: &mut AstSrc) {
1055 for node in &mut ast.nodes {
1056 for enm in &ast.enums {
1057 let mut to_remove = Vec::new();
1058 for (i, field) in node.fields.iter().enumerate() {
1059 let ty = field.ty().to_string();
1060 if enm.variants.iter().any(|it| it == &ty) {
1061 to_remove.push(i);
1062 }
1063 }
1064 if to_remove.len() == enm.variants.len() {
1065 node.remove_field(to_remove);
1066 let ty = enm.name.clone();
1067 let name = to_lower_snake_case(&ty);
1068 node.fields.push(Field::Node { name, ty, cardinality: Cardinality::Optional });
1069 }
1070 }
1071 }
1072}
1073
1074const TRAITS: &[(&str, &[&str])] = &[
1075 ("HasAttrs", &["attrs"]),
1076 ("HasName", &["name"]),
1077 ("HasVisibility", &["visibility"]),
1078 ("HasGenericParams", &["generic_param_list", "where_clause"]),
1079 ("HasGenericArgs", &["generic_arg_list"]),
1080 ("HasTypeBounds", &["type_bound_list", "colon_token"]),
1081 ("HasModuleItem", &["items"]),
1082 ("HasLoopBody", &["label", "loop_body"]),
1083 ("HasArgList", &["arg_list"]),
1084];
1085
1086fn extract_struct_traits(ast: &mut AstSrc) {
1087 for node in &mut ast.nodes {
1088 for (name, methods) in TRAITS {
1089 extract_struct_trait(node, name, methods);
1090 }
1091 }
1092
1093 let nodes_with_doc_comments = [
1094 "SourceFile",
1095 "Fn",
1096 "Struct",
1097 "Union",
1098 "RecordField",
1099 "TupleField",
1100 "Enum",
1101 "Variant",
1102 "Trait",
1103 "Module",
1104 "Static",
1105 "Const",
1106 "TypeAlias",
1107 "Impl",
1108 "ExternBlock",
1109 "ExternCrate",
1110 "MacroCall",
1111 "MacroRules",
1112 "MacroDef",
1113 "Use",
1114 ];
1115
1116 for node in &mut ast.nodes {
1117 if nodes_with_doc_comments.contains(&&*node.name) {
1118 node.traits.push("HasDocComments".into());
1119 }
1120 }
1121}
1122
1123fn extract_struct_trait(node: &mut AstNodeSrc, trait_name: &str, methods: &[&str]) {
1124 let mut to_remove = Vec::new();
1125 for (i, field) in node.fields.iter().enumerate() {
1126 let method_name = field.method_name();
1127 if methods.iter().any(|&it| it == method_name) {
1128 to_remove.push(i);
1129 }
1130 }
1131 if to_remove.len() == methods.len() {
1132 node.traits.push(trait_name.to_owned());
1133 node.remove_field(to_remove);
1134 }
1135}
1136
1137fn extract_enum_traits(ast: &mut AstSrc) {
1138 for enm in &mut ast.enums {
1139 if enm.name == "Stmt" {
1140 continue;
1141 }
1142 let nodes = &ast.nodes;
1143 let mut variant_traits = enm
1144 .variants
1145 .iter()
1146 .map(|var| nodes.iter().find(|it| &it.name == var).unwrap())
1147 .map(|node| node.traits.iter().cloned().collect::<BTreeSet<_>>());
1148
1149 let mut enum_traits = match variant_traits.next() {
1150 Some(it) => it,
1151 None => continue,
1152 };
1153 for traits in variant_traits {
1154 enum_traits = enum_traits.intersection(&traits).cloned().collect();
1155 }
1156 enm.traits = enum_traits.into_iter().collect();
1157 }
1158}
1159
1160impl AstNodeSrc {
1161 fn remove_field(&mut self, to_remove: Vec<usize>) {
1162 to_remove.into_iter().rev().for_each(|idx| {
1163 self.fields.remove(idx);
1164 });
1165 }
1166}
1167
1168#[test]
1169fn test() {
1170 generate(true);
1171}