Skip to main content

xtask/codegen/grammar/
ast_src.rs

1//! Defines input for code generation process.
2
3use quote::ToTokens;
4
5use crate::codegen::grammar::to_upper_snake_case;
6
7#[derive(Copy, Clone, Debug)]
8pub(crate) struct KindsSrc {
9    pub(crate) punct: &'static [(&'static str, &'static str)],
10    pub(crate) keywords: &'static [&'static str],
11    pub(crate) contextual_keywords: &'static [&'static str],
12    pub(crate) literals: &'static [&'static str],
13    pub(crate) tokens: &'static [&'static str],
14    pub(crate) nodes: &'static [&'static str],
15    pub(crate) _enums: &'static [&'static str],
16    pub(crate) edition_dependent_keywords: &'static [(&'static str, Edition)],
17}
18
19#[allow(dead_code)]
20#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
21pub(super) enum Edition {
22    Edition2015,
23    Edition2018,
24    Edition2021,
25    Edition2024,
26}
27
28impl ToTokens for Edition {
29    fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
30        match self {
31            Edition::Edition2015 => {
32                tokens.extend(quote::quote! { Edition::Edition2015 });
33            }
34            Edition::Edition2018 => {
35                tokens.extend(quote::quote! { Edition::Edition2018 });
36            }
37            Edition::Edition2021 => {
38                tokens.extend(quote::quote! { Edition::Edition2021 });
39            }
40            Edition::Edition2024 => {
41                tokens.extend(quote::quote! { Edition::Edition2024 });
42            }
43        }
44    }
45}
46
47/// The punctuations of the language.
48const PUNCT: &[(&str, &str)] = &[
49    // KEEP THE DOLLAR AT THE TOP ITS SPECIAL
50    ("$", "DOLLAR"),
51    (";", "SEMICOLON"),
52    (",", "COMMA"),
53    ("(", "L_PAREN"),
54    (")", "R_PAREN"),
55    ("{", "L_CURLY"),
56    ("}", "R_CURLY"),
57    ("[", "L_BRACK"),
58    ("]", "R_BRACK"),
59    ("<", "L_ANGLE"),
60    (">", "R_ANGLE"),
61    ("@", "AT"),
62    ("#", "POUND"),
63    ("~", "TILDE"),
64    ("?", "QUESTION"),
65    ("&", "AMP"),
66    ("|", "PIPE"),
67    ("+", "PLUS"),
68    ("*", "STAR"),
69    ("/", "SLASH"),
70    ("^", "CARET"),
71    ("%", "PERCENT"),
72    ("_", "UNDERSCORE"),
73    (".", "DOT"),
74    ("..", "DOT2"),
75    ("...", "DOT3"),
76    ("..=", "DOT2EQ"),
77    (":", "COLON"),
78    ("::", "COLON2"),
79    ("=", "EQ"),
80    ("==", "EQ2"),
81    ("=>", "FAT_ARROW"),
82    ("!", "BANG"),
83    ("!=", "NEQ"),
84    ("-", "MINUS"),
85    ("->", "THIN_ARROW"),
86    ("<=", "LTEQ"),
87    (">=", "GTEQ"),
88    ("+=", "PLUSEQ"),
89    ("-=", "MINUSEQ"),
90    ("|=", "PIPEEQ"),
91    ("&=", "AMPEQ"),
92    ("^=", "CARETEQ"),
93    ("/=", "SLASHEQ"),
94    ("*=", "STAREQ"),
95    ("%=", "PERCENTEQ"),
96    ("&&", "AMP2"),
97    ("||", "PIPE2"),
98    ("<<", "SHL"),
99    (">>", "SHR"),
100    ("<<=", "SHLEQ"),
101    (">>=", "SHREQ"),
102];
103const TOKENS: &[&str] = &["ERROR", "WHITESPACE", "NEWLINE", "COMMENT"];
104// &["ERROR", "IDENT", "WHITESPACE", "LIFETIME_IDENT", "COMMENT", "SHEBANG"],;
105
106const EOF: &str = "EOF";
107
108const RESERVED: &[&str] = &[
109    "abstract", "become", "box", "do", "final", "macro", "override", "priv", "typeof", "unsized",
110    "virtual", "yield",
111];
112// keywords that are keywords only in specific parse contexts
113#[doc(alias = "WEAK_KEYWORDS")]
114const CONTEXTUAL_KEYWORDS: &[&str] = &[
115    "macro_rules",
116    "union",
117    "default",
118    "raw",
119    "dyn",
120    "auto",
121    "yeet",
122    "safe",
123    "bikeshed",
124    "cfg_attr",
125    "cfg",
126    "null",
127];
128// keywords we use for special macro expansions
129const CONTEXTUAL_BUILTIN_KEYWORDS: &[&str] = &[
130    "asm",
131    "naked_asm",
132    "global_asm",
133    "att_syntax",
134    "builtin",
135    "clobber_abi",
136    "format_args",
137    // "in",
138    "inlateout",
139    "inout",
140    "label",
141    "lateout",
142    "may_unwind",
143    "nomem",
144    "noreturn",
145    "nostack",
146    "offset_of",
147    "options",
148    "out",
149    "preserves_flags",
150    "pure",
151    // "raw",
152    "readonly",
153    "sym",
154    "deref",
155    "pattern_type",
156    "is",
157    "include_bytes",
158];
159
160// keywords that are keywords depending on the edition
161const EDITION_DEPENDENT_KEYWORDS: &[(&str, Edition)] = &[
162    ("try", Edition::Edition2018),
163    ("dyn", Edition::Edition2018),
164    ("async", Edition::Edition2018),
165    ("await", Edition::Edition2018),
166    ("gen", Edition::Edition2024),
167];
168
169pub(crate) fn generate_kind_src(
170    nodes: &[AstNodeSrc],
171    enums: &[AstEnumSrc],
172    grammar: &ungrammar::Grammar,
173) -> KindsSrc {
174    let mut contextual_keywords: Vec<&_> =
175        CONTEXTUAL_KEYWORDS.iter().chain(CONTEXTUAL_BUILTIN_KEYWORDS).copied().collect();
176
177    let mut keywords: Vec<&_> = Vec::new();
178    let mut tokens: Vec<&_> = TOKENS.to_vec();
179    let mut literals: Vec<&_> = Vec::new();
180    let mut used_puncts = vec![false; PUNCT.len()];
181    // Mark $ as used
182    used_puncts[0] = true;
183    grammar.tokens().for_each(|token| {
184        let name = &*grammar[token].name;
185        if name == EOF {
186            return;
187        }
188        match name.split_at(1) {
189            ("@", lit) if !lit.is_empty() => {
190                literals.push(String::leak(to_upper_snake_case(lit)));
191            }
192            ("#", token) if !token.is_empty() => {
193                tokens.push(String::leak(to_upper_snake_case(token)));
194            }
195            _ if contextual_keywords.contains(&name) => {}
196            _ if name.chars().all(char::is_alphabetic) => {
197                keywords.push(String::leak(name.to_owned()));
198            }
199            _ => {
200                let idx = PUNCT
201                    .iter()
202                    .position(|(punct, _)| punct == &name)
203                    .unwrap_or_else(|| panic!("Grammar references unknown punctuation {name:?}"));
204                used_puncts[idx] = true;
205            }
206        }
207    });
208    if let Some(punct) = PUNCT.iter().zip(used_puncts).find(|(_, used)| !used) {
209        panic!("Punctuation {punct:?} is not used in grammar");
210    }
211    keywords.extend(RESERVED.iter().copied());
212    keywords.sort();
213    keywords.dedup();
214    contextual_keywords.sort();
215    contextual_keywords.dedup();
216    let mut edition_dependent_keywords: Vec<(&_, _)> = EDITION_DEPENDENT_KEYWORDS.to_vec();
217    edition_dependent_keywords.sort();
218    edition_dependent_keywords.dedup();
219
220    keywords.retain(|&it| !contextual_keywords.contains(&it));
221    keywords.retain(|&it| !edition_dependent_keywords.iter().any(|&(kw, _)| kw == it));
222
223    // we leak things here for simplicity, that way we don't have to deal with lifetimes
224    // The execution is a one shot job so thats fine
225    let nodes = nodes
226        .iter()
227        .map(|it| &it.name)
228        .map(|it| to_upper_snake_case(it))
229        .map(String::leak)
230        .map(|it| &*it)
231        .collect();
232    let nodes = Vec::leak(nodes);
233    nodes.sort();
234    let enums = enums
235        .iter()
236        .map(|it| &it.name)
237        .map(|it| to_upper_snake_case(it))
238        .map(String::leak)
239        .map(|it| &*it)
240        .collect();
241    let enums = Vec::leak(enums);
242    enums.sort();
243    let keywords = Vec::leak(keywords);
244    let contextual_keywords = Vec::leak(contextual_keywords);
245    let edition_dependent_keywords = Vec::leak(edition_dependent_keywords);
246    let literals = Vec::leak(literals);
247    literals.sort();
248    let tokens = Vec::leak(tokens);
249    tokens.sort();
250
251    KindsSrc {
252        punct: PUNCT,
253        nodes,
254        _enums: enums,
255        keywords,
256        contextual_keywords,
257        edition_dependent_keywords,
258        literals,
259        tokens,
260    }
261}
262
263#[derive(Default, Debug)]
264pub(crate) struct AstSrc {
265    pub(crate) tokens: Vec<String>,
266    pub(crate) nodes: Vec<AstNodeSrc>,
267    pub(crate) enums: Vec<AstEnumSrc>,
268}
269
270#[derive(Debug)]
271pub(crate) struct AstNodeSrc {
272    pub(crate) doc: Vec<String>,
273    pub(crate) name: String,
274    pub(crate) traits: Vec<String>,
275    pub(crate) fields: Vec<Field>,
276}
277
278#[derive(Debug, Eq, PartialEq)]
279pub(crate) enum Field {
280    Token { name: Option<String>, token: String },
281    Node { name: String, ty: String, cardinality: Cardinality },
282}
283
284#[derive(Debug, Eq, PartialEq)]
285pub(crate) enum Cardinality {
286    Optional,
287    Many,
288}
289
290#[derive(Debug)]
291pub(crate) struct AstEnumSrc {
292    pub(crate) doc: Vec<String>,
293    pub(crate) name: String,
294    pub(crate) traits: Vec<String>,
295    pub(crate) variants: Vec<String>,
296}