1use std::ops::Range;
4use std::{borrow::Cow, num::ParseIntError};
5
6use rustc_literal_escaper::{
7 EscapeError, MixedUnit, unescape_byte, unescape_byte_str, unescape_c_str, unescape_char,
8 unescape_str,
9};
10use stdx::always;
11
12use crate::{
13 TextRange, TextSize,
14 ast::{self, AstToken},
15};
16
17impl ast::Comment {
18 pub fn kind(&self) -> CommentKind {
19 CommentKind::from_text(self.text())
20 }
21
22 pub fn is_doc(&self) -> bool {
23 self.kind().doc.is_some()
24 }
25
26 pub fn is_inner(&self) -> bool {
27 self.kind().doc == Some(CommentPlacement::Inner)
28 }
29
30 pub fn is_outer(&self) -> bool {
31 self.kind().doc == Some(CommentPlacement::Outer)
32 }
33
34 pub fn prefix(&self) -> &'static str {
35 let &(prefix, _kind) = CommentKind::BY_PREFIX
36 .iter()
37 .find(|&(prefix, kind)| self.kind() == *kind && self.text().starts_with(prefix))
38 .unwrap();
39 prefix
40 }
41
42 pub fn doc_comment(&self) -> Option<(&str, TextSize)> {
45 let kind = self.kind();
46 match kind {
47 CommentKind { shape, doc: Some(_) } => {
48 let prefix = kind.prefix();
49 let text = &self.text()[prefix.len()..];
50 let text = if shape == CommentShape::Block {
51 text.strip_suffix("*/").unwrap_or(text)
52 } else {
53 text
54 };
55 Some((text, TextSize::of(prefix)))
56 }
57 _ => None,
58 }
59 }
60}
61
62#[derive(Debug, PartialEq, Eq, Clone, Copy)]
63pub struct CommentKind {
64 pub shape: CommentShape,
65 pub doc: Option<CommentPlacement>,
66}
67
68#[derive(Debug, PartialEq, Eq, Clone, Copy)]
69pub enum CommentShape {
70 Line,
71 Block,
72}
73
74impl CommentShape {
75 pub fn is_line(self) -> bool {
76 self == CommentShape::Line
77 }
78
79 pub fn is_block(self) -> bool {
80 self == CommentShape::Block
81 }
82}
83
84#[derive(Debug, PartialEq, Eq, Clone, Copy)]
85pub enum CommentPlacement {
86 Inner,
87 Outer,
88}
89
90impl CommentKind {
91 const BY_PREFIX: [(&'static str, CommentKind); 9] = [
92 ("/**/", CommentKind { shape: CommentShape::Block, doc: None }),
93 ("/***", CommentKind { shape: CommentShape::Block, doc: None }),
94 ("////", CommentKind { shape: CommentShape::Line, doc: None }),
95 ("///", CommentKind { shape: CommentShape::Line, doc: Some(CommentPlacement::Outer) }),
96 ("//!", CommentKind { shape: CommentShape::Line, doc: Some(CommentPlacement::Inner) }),
97 ("/**", CommentKind { shape: CommentShape::Block, doc: Some(CommentPlacement::Outer) }),
98 ("/*!", CommentKind { shape: CommentShape::Block, doc: Some(CommentPlacement::Inner) }),
99 ("//", CommentKind { shape: CommentShape::Line, doc: None }),
100 ("/*", CommentKind { shape: CommentShape::Block, doc: None }),
101 ];
102
103 pub(crate) fn from_text(text: &str) -> CommentKind {
104 let &(_prefix, kind) = CommentKind::BY_PREFIX
105 .iter()
106 .find(|&(prefix, _kind)| text.starts_with(prefix))
107 .unwrap();
108 kind
109 }
110
111 pub fn prefix(&self) -> &'static str {
112 let &(prefix, _) =
113 CommentKind::BY_PREFIX.iter().rev().find(|(_, kind)| kind == self).unwrap();
114 prefix
115 }
116}
117
118impl ast::Whitespace {
119 pub fn spans_multiple_lines(&self) -> bool {
120 let text = self.text();
121 text.find('\n').is_some_and(|idx| text[idx + 1..].contains('\n'))
122 }
123}
124
125#[derive(Debug)]
126pub struct QuoteOffsets {
127 pub quotes: (TextRange, TextRange),
128 pub contents: TextRange,
129}
130
131impl QuoteOffsets {
132 fn new(literal: &str) -> Option<QuoteOffsets> {
133 let left_quote = literal.find('"')?;
134 let right_quote = literal.rfind('"')?;
135 if left_quote == right_quote {
136 return None;
138 }
139
140 let start = TextSize::from(0);
141 let left_quote = TextSize::try_from(left_quote).unwrap() + TextSize::of('"');
142 let right_quote = TextSize::try_from(right_quote).unwrap();
143 let end = TextSize::of(literal);
144
145 let res = QuoteOffsets {
146 quotes: (TextRange::new(start, left_quote), TextRange::new(right_quote, end)),
147 contents: TextRange::new(left_quote, right_quote),
148 };
149 Some(res)
150 }
151}
152
153pub trait IsString: AstToken {
154 fn raw_prefix(&self) -> &'static str;
155 fn unescape(&self, s: &str, callback: impl FnMut(Range<usize>, Result<char, EscapeError>));
156 fn is_raw(&self) -> bool {
157 self.text().starts_with(self.raw_prefix())
158 }
159 fn quote_offsets(&self) -> Option<QuoteOffsets> {
160 let text = self.text();
161 let offsets = QuoteOffsets::new(text)?;
162 let o = self.syntax().text_range().start();
163 let offsets = QuoteOffsets {
164 quotes: (offsets.quotes.0 + o, offsets.quotes.1 + o),
165 contents: offsets.contents + o,
166 };
167 Some(offsets)
168 }
169 fn text_range_between_quotes(&self) -> Option<TextRange> {
170 self.quote_offsets().map(|it| it.contents)
171 }
172 fn text_without_quotes(&self) -> &str {
173 let text = self.text();
174 let Some(offsets) = self.text_range_between_quotes() else { return text };
175 &text[offsets - self.syntax().text_range().start()]
176 }
177 fn open_quote_text_range(&self) -> Option<TextRange> {
178 self.quote_offsets().map(|it| it.quotes.0)
179 }
180 fn close_quote_text_range(&self) -> Option<TextRange> {
181 self.quote_offsets().map(|it| it.quotes.1)
182 }
183 fn escaped_char_ranges(&self, cb: &mut dyn FnMut(TextRange, Result<char, EscapeError>)) {
184 let Some(text_range_no_quotes) = self.text_range_between_quotes() else { return };
185
186 let start = self.syntax().text_range().start();
187 let text = &self.text()[text_range_no_quotes - start];
188 let offset = text_range_no_quotes.start() - start;
189
190 self.unescape(text, &mut |range: Range<usize>, unescaped_char| {
191 if let Some((s, e)) = range.start.try_into().ok().zip(range.end.try_into().ok()) {
192 cb(TextRange::new(s, e) + offset, unescaped_char);
193 }
194 });
195 }
196 fn map_range_up(&self, range: TextRange) -> Option<TextRange> {
197 let contents_range = self.text_range_between_quotes()?;
198 if always!(TextRange::up_to(contents_range.len()).contains_range(range)) {
199 Some(range + contents_range.start())
200 } else {
201 None
202 }
203 }
204 fn map_offset_down(&self, offset: TextSize) -> Option<TextSize> {
205 let contents_range = self.text_range_between_quotes()?;
206 offset.checked_sub(contents_range.start())
207 }
208}
209
210impl IsString for ast::String {
211 fn raw_prefix(&self) -> &'static str {
212 "r"
213 }
214 fn unescape(&self, s: &str, cb: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
215 unescape_str(s, cb)
216 }
217}
218
219impl ast::String {
220 pub fn value(&self) -> Result<Cow<'_, str>, EscapeError> {
221 let text = self.text();
222 let text_range = self.text_range_between_quotes().ok_or(EscapeError::LoneSlash)?;
223 let text = &text[text_range - self.syntax().text_range().start()];
224 if self.is_raw() {
225 return Ok(Cow::Borrowed(text));
226 }
227
228 let mut buf = String::new();
229 let mut prev_end = 0;
230 let mut has_error = None;
231 unescape_str(text, |char_range, unescaped_char| {
232 match (unescaped_char, buf.capacity() == 0) {
233 (Ok(c), false) => buf.push(c),
234 (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
235 prev_end = char_range.end
236 }
237 (Ok(c), true) => {
238 buf.reserve_exact(text.len());
239 buf.push_str(&text[..prev_end]);
240 buf.push(c);
241 }
242 (Err(e), _) => has_error = Some(e),
243 }
244 });
245
246 match (has_error, buf.capacity() == 0) {
247 (Some(e), _) => Err(e),
248 (None, true) => Ok(Cow::Borrowed(text)),
249 (None, false) => Ok(Cow::Owned(buf)),
250 }
251 }
252}
253
254impl IsString for ast::ByteString {
255 fn raw_prefix(&self) -> &'static str {
256 "br"
257 }
258 fn unescape(&self, s: &str, mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
259 unescape_byte_str(s, |range, res| callback(range, res.map(char::from)))
260 }
261}
262
263impl ast::ByteString {
264 pub fn value(&self) -> Result<Cow<'_, [u8]>, EscapeError> {
265 let text = self.text();
266 let text_range = self.text_range_between_quotes().ok_or(EscapeError::LoneSlash)?;
267 let text = &text[text_range - self.syntax().text_range().start()];
268 if self.is_raw() {
269 return Ok(Cow::Borrowed(text.as_bytes()));
270 }
271
272 let mut buf: Vec<u8> = Vec::new();
273 let mut prev_end = 0;
274 let mut has_error = None;
275 unescape_byte_str(text, |char_range, unescaped_byte| {
276 match (unescaped_byte, buf.capacity() == 0) {
277 (Ok(b), false) => buf.push(b),
278 (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
279 prev_end = char_range.end
280 }
281 (Ok(b), true) => {
282 buf.reserve_exact(text.len());
283 buf.extend_from_slice(&text.as_bytes()[..prev_end]);
284 buf.push(b);
285 }
286 (Err(e), _) => has_error = Some(e),
287 }
288 });
289
290 match (has_error, buf.capacity() == 0) {
291 (Some(e), _) => Err(e),
292 (None, true) => Ok(Cow::Borrowed(text.as_bytes())),
293 (None, false) => Ok(Cow::Owned(buf)),
294 }
295 }
296}
297
298impl IsString for ast::CString {
299 fn raw_prefix(&self) -> &'static str {
300 "cr"
301 }
302 fn unescape(&self, s: &str, mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
305 unescape_c_str(s, |range, _res| callback(range, Ok('_')))
306 }
307}
308
309impl ast::CString {
310 pub fn value(&self) -> Result<Cow<'_, [u8]>, EscapeError> {
311 let text = self.text();
312 let text_range = self.text_range_between_quotes().ok_or(EscapeError::LoneSlash)?;
313 let text = &text[text_range - self.syntax().text_range().start()];
314 if self.is_raw() {
315 return Ok(Cow::Borrowed(text.as_bytes()));
316 }
317
318 let mut buf = Vec::new();
319 let mut prev_end = 0;
320 let mut has_error = None;
321 let extend_unit = |buf: &mut Vec<u8>, unit: MixedUnit| match unit {
322 MixedUnit::Char(c) => buf.extend(c.encode_utf8(&mut [0; 4]).as_bytes()),
323 MixedUnit::HighByte(b) => buf.push(b),
324 };
325 unescape_c_str(text, |char_range, unescaped| match (unescaped, buf.capacity() == 0) {
326 (Ok(u), false) => extend_unit(&mut buf, u),
327 (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
328 prev_end = char_range.end
329 }
330 (Ok(u), true) => {
331 buf.reserve_exact(text.len());
332 buf.extend(&text.as_bytes()[..prev_end]);
333 extend_unit(&mut buf, u);
334 }
335 (Err(e), _) => has_error = Some(e),
336 });
337
338 match (has_error, buf.capacity() == 0) {
339 (Some(e), _) => Err(e),
340 (None, true) => Ok(Cow::Borrowed(text.as_bytes())),
341 (None, false) => Ok(Cow::Owned(buf)),
342 }
343 }
344}
345
346impl ast::IntNumber {
347 pub fn radix(&self) -> Radix {
348 match self.text().get(..2).unwrap_or_default() {
349 "0b" => Radix::Binary,
350 "0o" => Radix::Octal,
351 "0x" => Radix::Hexadecimal,
352 _ => Radix::Decimal,
353 }
354 }
355
356 pub fn split_into_parts(&self) -> (&str, &str, &str) {
357 let radix = self.radix();
358 let (prefix, mut text) = self.text().split_at(radix.prefix_len());
359
360 let is_suffix_start: fn(&(usize, char)) -> bool = match radix {
361 Radix::Hexadecimal => |(_, c)| matches!(c, 'g'..='z' | 'G'..='Z'),
362 _ => |(_, c)| c.is_ascii_alphabetic(),
363 };
364
365 let mut suffix = "";
366 if let Some((suffix_start, _)) = text.char_indices().find(is_suffix_start) {
367 let (text2, suffix2) = text.split_at(suffix_start);
368 text = text2;
369 suffix = suffix2;
370 };
371
372 (prefix, text, suffix)
373 }
374
375 pub fn value(&self) -> Result<u128, ParseIntError> {
376 let (_, text, _) = self.split_into_parts();
377 u128::from_str_radix(&text.replace('_', ""), self.radix() as u32)
378 }
379
380 pub fn suffix(&self) -> Option<&str> {
381 let (_, _, suffix) = self.split_into_parts();
382 if suffix.is_empty() { None } else { Some(suffix) }
383 }
384
385 pub fn value_string(&self) -> String {
386 let (_, text, _) = self.split_into_parts();
387 text.replace('_', "")
388 }
389}
390
391impl ast::FloatNumber {
392 pub fn split_into_parts(&self) -> (&str, &str) {
393 let text = self.text();
394 let mut float_text = self.text();
395 let mut suffix = "";
396 let mut indices = text.char_indices();
397 if let Some((mut suffix_start, c)) = indices.by_ref().find(|(_, c)| c.is_ascii_alphabetic())
398 {
399 if c == 'e' || c == 'E' {
400 if let Some(suffix_start_tuple) = indices.find(|(_, c)| c.is_ascii_alphabetic()) {
401 suffix_start = suffix_start_tuple.0;
402
403 float_text = &text[..suffix_start];
404 suffix = &text[suffix_start..];
405 }
406 } else {
407 float_text = &text[..suffix_start];
408 suffix = &text[suffix_start..];
409 }
410 }
411
412 (float_text, suffix)
413 }
414
415 pub fn suffix(&self) -> Option<&str> {
416 let (_, suffix) = self.split_into_parts();
417 if suffix.is_empty() { None } else { Some(suffix) }
418 }
419
420 pub fn value_string(&self) -> String {
421 let (text, _) = self.split_into_parts();
422 text.replace('_', "")
423 }
424}
425
426#[derive(Debug, PartialEq, Eq, Copy, Clone)]
427pub enum Radix {
428 Binary = 2,
429 Octal = 8,
430 Decimal = 10,
431 Hexadecimal = 16,
432}
433
434impl Radix {
435 pub const ALL: &'static [Radix] =
436 &[Radix::Binary, Radix::Octal, Radix::Decimal, Radix::Hexadecimal];
437
438 const fn prefix_len(self) -> usize {
439 match self {
440 Self::Decimal => 0,
441 _ => 2,
442 }
443 }
444}
445
446impl ast::Char {
447 pub fn value(&self) -> Result<char, EscapeError> {
448 let mut text = self.text();
449 if text.starts_with('\'') {
450 text = &text[1..];
451 } else {
452 return Err(EscapeError::ZeroChars);
453 }
454 if text.ends_with('\'') {
455 text = &text[0..text.len() - 1];
456 }
457
458 unescape_char(text)
459 }
460}
461
462impl ast::Byte {
463 pub fn value(&self) -> Result<u8, EscapeError> {
464 let mut text = self.text();
465 if text.starts_with("b\'") {
466 text = &text[2..];
467 } else {
468 return Err(EscapeError::ZeroChars);
469 }
470 if text.ends_with('\'') {
471 text = &text[0..text.len() - 1];
472 }
473
474 unescape_byte(text)
475 }
476}
477
478pub enum AnyString {
479 ByteString(ast::ByteString),
480 CString(ast::CString),
481 String(ast::String),
482}
483
484impl AnyString {
485 pub fn value(&self) -> Result<Cow<'_, str>, EscapeError> {
486 fn from_utf8(s: Cow<'_, [u8]>) -> Result<Cow<'_, str>, EscapeError> {
487 match s {
488 Cow::Borrowed(s) => str::from_utf8(s)
489 .map_err(|_| EscapeError::NonAsciiCharInByte)
490 .map(Cow::Borrowed),
491 Cow::Owned(s) => String::from_utf8(s)
492 .map_err(|_| EscapeError::NonAsciiCharInByte)
493 .map(Cow::Owned),
494 }
495 }
496
497 match self {
498 AnyString::String(s) => s.value(),
499 AnyString::ByteString(s) => s.value().and_then(from_utf8),
500 AnyString::CString(s) => s.value().and_then(from_utf8),
501 }
502 }
503}
504
505impl ast::AstToken for AnyString {
506 fn can_cast(kind: crate::SyntaxKind) -> bool {
507 ast::String::can_cast(kind)
508 || ast::ByteString::can_cast(kind)
509 || ast::CString::can_cast(kind)
510 }
511
512 fn cast(syntax: crate::SyntaxToken) -> Option<Self> {
513 ast::String::cast(syntax.clone())
514 .map(Self::String)
515 .or_else(|| ast::ByteString::cast(syntax.clone()).map(Self::ByteString))
516 .or_else(|| ast::CString::cast(syntax).map(Self::CString))
517 }
518
519 fn syntax(&self) -> &crate::SyntaxToken {
520 match self {
521 Self::ByteString(it) => it.syntax(),
522 Self::CString(it) => it.syntax(),
523 Self::String(it) => it.syntax(),
524 }
525 }
526}
527
528impl IsString for AnyString {
529 fn raw_prefix(&self) -> &'static str {
530 match self {
531 AnyString::ByteString(s) => s.raw_prefix(),
532 AnyString::CString(s) => s.raw_prefix(),
533 AnyString::String(s) => s.raw_prefix(),
534 }
535 }
536
537 fn unescape(&self, s: &str, callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
538 match self {
539 AnyString::ByteString(it) => it.unescape(s, callback),
540 AnyString::CString(it) => it.unescape(s, callback),
541 AnyString::String(it) => it.unescape(s, callback),
542 }
543 }
544}
545
546#[cfg(test)]
547mod tests {
548 use rustc_apfloat::ieee::Quad as f128;
549
550 use crate::ast::{self, FloatNumber, IntNumber, make};
551
552 fn check_float_suffix<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
553 assert_eq!(FloatNumber { syntax: make::tokens::literal(lit) }.suffix(), expected.into());
554 }
555
556 fn check_int_suffix<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
557 assert_eq!(IntNumber { syntax: make::tokens::literal(lit) }.suffix(), expected.into());
558 }
559
560 fn check_float_value(lit: &str, expected: &str) {
562 let expected = Some(expected.parse::<f128>().unwrap());
563 assert_eq!(
564 FloatNumber { syntax: make::tokens::literal(lit) }.value_string().parse::<f128>().ok(),
565 expected
566 );
567 assert_eq!(
568 IntNumber { syntax: make::tokens::literal(lit) }.value_string().parse::<f128>().ok(),
569 expected
570 );
571 }
572
573 fn check_int_value(lit: &str, expected: impl Into<Option<u128>>) {
574 assert_eq!(IntNumber { syntax: make::tokens::literal(lit) }.value().ok(), expected.into());
575 }
576
577 #[test]
578 fn test_float_number_suffix() {
579 check_float_suffix("123.0", None);
580 check_float_suffix("123f32", "f32");
581 check_float_suffix("123.0e", None);
582 check_float_suffix("123.0e4", None);
583 check_float_suffix("123.0ef16", "f16");
584 check_float_suffix("123.0E4f32", "f32");
585 check_float_suffix("1_2_3.0_f128", "f128");
586 }
587
588 #[test]
589 fn test_int_number_suffix() {
590 check_int_suffix("123", None);
591 check_int_suffix("123i32", "i32");
592 check_int_suffix("1_0_1_l_o_l", "l_o_l");
593 check_int_suffix("0b11", None);
594 check_int_suffix("0o11", None);
595 check_int_suffix("0xff", None);
596 check_int_suffix("0b11u32", "u32");
597 check_int_suffix("0o11u32", "u32");
598 check_int_suffix("0xffu32", "u32");
599 }
600
601 fn check_string_value<'a>(lit: &str, expected: impl Into<Option<&'a str>>) {
602 assert_eq!(
603 ast::String { syntax: make::tokens::literal(&format!("\"{lit}\"")) }
604 .value()
605 .as_deref()
606 .ok(),
607 expected.into()
608 );
609 }
610
611 #[test]
612 fn test_string_escape() {
613 check_string_value(r"foobar", "foobar");
614 check_string_value(r"\foobar", None);
615 check_string_value(r"\nfoobar", "\nfoobar");
616 check_string_value(r"C:\\Windows\\System32\\", "C:\\Windows\\System32\\");
617 check_string_value(r"\x61bcde", "abcde");
618 check_string_value(
619 r"a\
620bcde", "abcde",
621 );
622 }
623
624 fn check_byte_string_value<'a, const N: usize>(
625 lit: &str,
626 expected: impl Into<Option<&'a [u8; N]>>,
627 ) {
628 assert_eq!(
629 ast::ByteString { syntax: make::tokens::literal(&format!("b\"{lit}\"")) }
630 .value()
631 .as_deref()
632 .ok(),
633 expected.into().map(|value| &value[..])
634 );
635 }
636
637 #[test]
638 fn test_byte_string_escape() {
639 check_byte_string_value(r"foobar", b"foobar");
640 check_byte_string_value(r"\foobar", None::<&[u8; 0]>);
641 check_byte_string_value(r"\nfoobar", b"\nfoobar");
642 check_byte_string_value(r"C:\\Windows\\System32\\", b"C:\\Windows\\System32\\");
643 check_byte_string_value(r"\x61bcde", b"abcde");
644 check_byte_string_value(
645 r"a\
646bcde", b"abcde",
647 );
648 }
649
650 #[test]
651 fn test_value_underscores() {
652 check_float_value("1.3_4665449586950493453___6_f128", "1.346654495869504934536");
653 check_float_value("1.234567891011121_f64", "1.234567891011121");
654 check_float_value("1__0.__0__f32", "10.0");
655 check_float_value("3._0_f16", "3.0");
656 check_int_value("0b__1_0_", 2);
657 check_int_value("1_1_1_1_1_1", 111111);
658 }
659}