xtask/codegen/grammar/
ast_src.rs

1//! Defines input for code generation process.
2
3use quote::ToTokens;
4
5use crate::codegen::grammar::to_upper_snake_case;
6
7#[derive(Copy, Clone, Debug)]
8pub(crate) struct KindsSrc {
9    pub(crate) punct: &'static [(&'static str, &'static str)],
10    pub(crate) keywords: &'static [&'static str],
11    pub(crate) contextual_keywords: &'static [&'static str],
12    pub(crate) literals: &'static [&'static str],
13    pub(crate) tokens: &'static [&'static str],
14    pub(crate) nodes: &'static [&'static str],
15    pub(crate) _enums: &'static [&'static str],
16    pub(crate) edition_dependent_keywords: &'static [(&'static str, Edition)],
17}
18
19#[allow(dead_code)]
20#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
21pub(super) enum Edition {
22    Edition2015,
23    Edition2018,
24    Edition2021,
25    Edition2024,
26}
27
28impl ToTokens for Edition {
29    fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
30        match self {
31            Edition::Edition2015 => {
32                tokens.extend(quote::quote! { Edition::Edition2015 });
33            }
34            Edition::Edition2018 => {
35                tokens.extend(quote::quote! { Edition::Edition2018 });
36            }
37            Edition::Edition2021 => {
38                tokens.extend(quote::quote! { Edition::Edition2021 });
39            }
40            Edition::Edition2024 => {
41                tokens.extend(quote::quote! { Edition::Edition2024 });
42            }
43        }
44    }
45}
46
47/// The punctuations of the language.
48const PUNCT: &[(&str, &str)] = &[
49    // KEEP THE DOLLAR AT THE TOP ITS SPECIAL
50    ("$", "DOLLAR"),
51    (";", "SEMICOLON"),
52    (",", "COMMA"),
53    ("(", "L_PAREN"),
54    (")", "R_PAREN"),
55    ("{", "L_CURLY"),
56    ("}", "R_CURLY"),
57    ("[", "L_BRACK"),
58    ("]", "R_BRACK"),
59    ("<", "L_ANGLE"),
60    (">", "R_ANGLE"),
61    ("@", "AT"),
62    ("#", "POUND"),
63    ("~", "TILDE"),
64    ("?", "QUESTION"),
65    ("&", "AMP"),
66    ("|", "PIPE"),
67    ("+", "PLUS"),
68    ("*", "STAR"),
69    ("/", "SLASH"),
70    ("^", "CARET"),
71    ("%", "PERCENT"),
72    ("_", "UNDERSCORE"),
73    (".", "DOT"),
74    ("..", "DOT2"),
75    ("...", "DOT3"),
76    ("..=", "DOT2EQ"),
77    (":", "COLON"),
78    ("::", "COLON2"),
79    ("=", "EQ"),
80    ("==", "EQ2"),
81    ("=>", "FAT_ARROW"),
82    ("!", "BANG"),
83    ("!=", "NEQ"),
84    ("-", "MINUS"),
85    ("->", "THIN_ARROW"),
86    ("<=", "LTEQ"),
87    (">=", "GTEQ"),
88    ("+=", "PLUSEQ"),
89    ("-=", "MINUSEQ"),
90    ("|=", "PIPEEQ"),
91    ("&=", "AMPEQ"),
92    ("^=", "CARETEQ"),
93    ("/=", "SLASHEQ"),
94    ("*=", "STAREQ"),
95    ("%=", "PERCENTEQ"),
96    ("&&", "AMP2"),
97    ("||", "PIPE2"),
98    ("<<", "SHL"),
99    (">>", "SHR"),
100    ("<<=", "SHLEQ"),
101    (">>=", "SHREQ"),
102];
103const TOKENS: &[&str] = &["ERROR", "WHITESPACE", "NEWLINE", "COMMENT"];
104// &["ERROR", "IDENT", "WHITESPACE", "LIFETIME_IDENT", "COMMENT", "SHEBANG"],;
105
106const EOF: &str = "EOF";
107
108const RESERVED: &[&str] = &[
109    "abstract", "become", "box", "do", "final", "macro", "override", "priv", "typeof", "unsized",
110    "virtual", "yield",
111];
112// keywords that are keywords only in specific parse contexts
113#[doc(alias = "WEAK_KEYWORDS")]
114const CONTEXTUAL_KEYWORDS: &[&str] =
115    &["macro_rules", "union", "default", "raw", "dyn", "auto", "yeet", "safe"];
116// keywords we use for special macro expansions
117const CONTEXTUAL_BUILTIN_KEYWORDS: &[&str] = &[
118    "asm",
119    "naked_asm",
120    "global_asm",
121    "att_syntax",
122    "builtin",
123    "clobber_abi",
124    "format_args",
125    // "in",
126    "inlateout",
127    "inout",
128    "label",
129    "lateout",
130    "may_unwind",
131    "nomem",
132    "noreturn",
133    "nostack",
134    "offset_of",
135    "options",
136    "out",
137    "preserves_flags",
138    "pure",
139    // "raw",
140    "readonly",
141    "sym",
142];
143
144// keywords that are keywords depending on the edition
145const EDITION_DEPENDENT_KEYWORDS: &[(&str, Edition)] = &[
146    ("try", Edition::Edition2018),
147    ("dyn", Edition::Edition2018),
148    ("async", Edition::Edition2018),
149    ("await", Edition::Edition2018),
150    ("gen", Edition::Edition2024),
151];
152
153pub(crate) fn generate_kind_src(
154    nodes: &[AstNodeSrc],
155    enums: &[AstEnumSrc],
156    grammar: &ungrammar::Grammar,
157) -> KindsSrc {
158    let mut contextual_keywords: Vec<&_> =
159        CONTEXTUAL_KEYWORDS.iter().chain(CONTEXTUAL_BUILTIN_KEYWORDS).copied().collect();
160
161    let mut keywords: Vec<&_> = Vec::new();
162    let mut tokens: Vec<&_> = TOKENS.to_vec();
163    let mut literals: Vec<&_> = Vec::new();
164    let mut used_puncts = vec![false; PUNCT.len()];
165    // Mark $ as used
166    used_puncts[0] = true;
167    grammar.tokens().for_each(|token| {
168        let name = &*grammar[token].name;
169        if name == EOF {
170            return;
171        }
172        match name.split_at(1) {
173            ("@", lit) if !lit.is_empty() => {
174                literals.push(String::leak(to_upper_snake_case(lit)));
175            }
176            ("#", token) if !token.is_empty() => {
177                tokens.push(String::leak(to_upper_snake_case(token)));
178            }
179            _ if contextual_keywords.contains(&name) => {}
180            _ if name.chars().all(char::is_alphabetic) => {
181                keywords.push(String::leak(name.to_owned()));
182            }
183            _ => {
184                let idx = PUNCT
185                    .iter()
186                    .position(|(punct, _)| punct == &name)
187                    .unwrap_or_else(|| panic!("Grammar references unknown punctuation {name:?}"));
188                used_puncts[idx] = true;
189            }
190        }
191    });
192    PUNCT.iter().zip(used_puncts).filter(|(_, used)| !used).for_each(|((punct, _), _)| {
193        panic!("Punctuation {punct:?} is not used in grammar");
194    });
195    keywords.extend(RESERVED.iter().copied());
196    keywords.sort();
197    keywords.dedup();
198    contextual_keywords.sort();
199    contextual_keywords.dedup();
200    let mut edition_dependent_keywords: Vec<(&_, _)> = EDITION_DEPENDENT_KEYWORDS.to_vec();
201    edition_dependent_keywords.sort();
202    edition_dependent_keywords.dedup();
203
204    keywords.retain(|&it| !contextual_keywords.contains(&it));
205    keywords.retain(|&it| !edition_dependent_keywords.iter().any(|&(kw, _)| kw == it));
206
207    // we leak things here for simplicity, that way we don't have to deal with lifetimes
208    // The execution is a one shot job so thats fine
209    let nodes = nodes
210        .iter()
211        .map(|it| &it.name)
212        .map(|it| to_upper_snake_case(it))
213        .map(String::leak)
214        .map(|it| &*it)
215        .collect();
216    let nodes = Vec::leak(nodes);
217    nodes.sort();
218    let enums = enums
219        .iter()
220        .map(|it| &it.name)
221        .map(|it| to_upper_snake_case(it))
222        .map(String::leak)
223        .map(|it| &*it)
224        .collect();
225    let enums = Vec::leak(enums);
226    enums.sort();
227    let keywords = Vec::leak(keywords);
228    let contextual_keywords = Vec::leak(contextual_keywords);
229    let edition_dependent_keywords = Vec::leak(edition_dependent_keywords);
230    let literals = Vec::leak(literals);
231    literals.sort();
232    let tokens = Vec::leak(tokens);
233    tokens.sort();
234
235    KindsSrc {
236        punct: PUNCT,
237        nodes,
238        _enums: enums,
239        keywords,
240        contextual_keywords,
241        edition_dependent_keywords,
242        literals,
243        tokens,
244    }
245}
246
247#[derive(Default, Debug)]
248pub(crate) struct AstSrc {
249    pub(crate) tokens: Vec<String>,
250    pub(crate) nodes: Vec<AstNodeSrc>,
251    pub(crate) enums: Vec<AstEnumSrc>,
252}
253
254#[derive(Debug)]
255pub(crate) struct AstNodeSrc {
256    pub(crate) doc: Vec<String>,
257    pub(crate) name: String,
258    pub(crate) traits: Vec<String>,
259    pub(crate) fields: Vec<Field>,
260}
261
262#[derive(Debug, Eq, PartialEq)]
263pub(crate) enum Field {
264    Token(String),
265    Node { name: String, ty: String, cardinality: Cardinality },
266}
267
268#[derive(Debug, Eq, PartialEq)]
269pub(crate) enum Cardinality {
270    Optional,
271    Many,
272}
273
274#[derive(Debug)]
275pub(crate) struct AstEnumSrc {
276    pub(crate) doc: Vec<String>,
277    pub(crate) name: String,
278    pub(crate) traits: Vec<String>,
279    pub(crate) variants: Vec<String>,
280}