use anyhow::{anyhow, bail};
use std::{
borrow::Cow,
io::{BufRead, Lines},
iter::Peekable,
};
const LISTING_DELIMITER: &str = "----";
const IMAGE_BLOCK_PREFIX: &str = "image::";
const VIDEO_BLOCK_PREFIX: &str = "video::";
struct Converter<'a, 'b, R: BufRead> {
iter: &'a mut Peekable<Lines<R>>,
output: &'b mut String,
}
impl<'a, 'b, R: BufRead> Converter<'a, 'b, R> {
fn new(iter: &'a mut Peekable<Lines<R>>, output: &'b mut String) -> Self {
Self { iter, output }
}
fn process(&mut self) -> anyhow::Result<()> {
self.process_document_header()?;
self.skip_blank_lines()?;
self.output.push('\n');
loop {
let line = self.iter.peek().unwrap().as_deref().map_err(|e| anyhow!("{e}"))?;
if get_title(line).is_some() {
let line = self.iter.next().unwrap().unwrap();
let (level, title) = get_title(&line).unwrap();
self.write_title(level, title);
} else if get_list_item(line).is_some() {
self.process_list()?;
} else if line.starts_with('[') {
self.process_source_code_block(0)?;
} else if line.starts_with(LISTING_DELIMITER) {
self.process_listing_block(None, 0)?;
} else if line.starts_with('.') {
self.process_block_with_title(0)?;
} else if line.starts_with(IMAGE_BLOCK_PREFIX) {
self.process_image_block(None, 0)?;
} else if line.starts_with(VIDEO_BLOCK_PREFIX) {
self.process_video_block(None, 0)?;
} else {
self.process_paragraph(0, |line| line.is_empty())?;
}
self.skip_blank_lines()?;
if self.iter.peek().is_none() {
break;
}
self.output.push('\n');
}
Ok(())
}
fn process_document_header(&mut self) -> anyhow::Result<()> {
self.process_document_title()?;
while let Some(line) = self.iter.next() {
let line = line?;
if line.is_empty() {
break;
}
if !line.starts_with(':') {
self.write_line(&line, 0)
}
}
Ok(())
}
fn process_document_title(&mut self) -> anyhow::Result<()> {
if let Some(Ok(line)) = self.iter.next() {
if let Some((level, title)) = get_title(&line) {
let title = process_inline_macros(title)?;
if level == 1 {
self.write_title(level, &title);
return Ok(());
}
}
}
bail!("document title not found")
}
fn process_list(&mut self) -> anyhow::Result<()> {
let mut nesting = ListNesting::new();
while let Some(line) = self.iter.peek() {
let line = line.as_deref().map_err(|e| anyhow!("{e}"))?;
if get_list_item(line).is_some() {
let line = self.iter.next().unwrap()?;
let line = process_inline_macros(&line)?;
let (marker, item) = get_list_item(&line).unwrap();
nesting.set_current(marker);
self.write_list_item(item, &nesting);
self.process_paragraph(nesting.indent(), |line| {
line.is_empty() || get_list_item(line).is_some() || line == "+"
})?;
} else if line == "+" {
let _ = self.iter.next().unwrap()?;
let line = self
.iter
.peek()
.ok_or_else(|| anyhow!("list continuation unexpectedly terminated"))?;
let line = line.as_deref().map_err(|e| anyhow!("{e}"))?;
let indent = nesting.indent();
if line.starts_with('[') {
self.write_line("", 0);
self.process_source_code_block(indent)?;
} else if line.starts_with(LISTING_DELIMITER) {
self.write_line("", 0);
self.process_listing_block(None, indent)?;
} else if line.starts_with('.') {
self.write_line("", 0);
self.process_block_with_title(indent)?;
} else if line.starts_with(IMAGE_BLOCK_PREFIX) {
self.write_line("", 0);
self.process_image_block(None, indent)?;
} else if line.starts_with(VIDEO_BLOCK_PREFIX) {
self.write_line("", 0);
self.process_video_block(None, indent)?;
} else {
self.write_line("", 0);
let current = nesting.current().unwrap();
self.process_paragraph(indent, |line| {
line.is_empty()
|| get_list_item(line).filter(|(m, _)| m == current).is_some()
|| line == "+"
})?;
}
} else {
break;
}
self.skip_blank_lines()?;
}
Ok(())
}
fn process_source_code_block(&mut self, level: usize) -> anyhow::Result<()> {
if let Some(Ok(line)) = self.iter.next() {
if let Some(styles) = line.strip_prefix("[source").and_then(|s| s.strip_suffix(']')) {
let mut styles = styles.split(',');
if !styles.next().unwrap().is_empty() {
bail!("not a source code block");
}
let language = styles.next();
return self.process_listing_block(language, level);
}
}
bail!("not a source code block")
}
fn process_listing_block(&mut self, style: Option<&str>, level: usize) -> anyhow::Result<()> {
if let Some(Ok(line)) = self.iter.next() {
if line == LISTING_DELIMITER {
self.write_indent(level);
self.output.push_str("```");
if let Some(style) = style {
self.output.push_str(style);
}
self.output.push('\n');
while let Some(line) = self.iter.next() {
let line = line?;
if line == LISTING_DELIMITER {
self.write_line("```", level);
return Ok(());
} else {
self.write_line(&line, level);
}
}
bail!("listing block is not terminated")
}
}
bail!("not a listing block")
}
fn process_block_with_title(&mut self, level: usize) -> anyhow::Result<()> {
if let Some(Ok(line)) = self.iter.next() {
let title =
line.strip_prefix('.').ok_or_else(|| anyhow!("extraction of the title failed"))?;
let line = self
.iter
.peek()
.ok_or_else(|| anyhow!("target block for the title is not found"))?;
let line = line.as_deref().map_err(|e| anyhow!("{e}"))?;
if line.starts_with(IMAGE_BLOCK_PREFIX) {
return self.process_image_block(Some(title), level);
} else if line.starts_with(VIDEO_BLOCK_PREFIX) {
return self.process_video_block(Some(title), level);
} else {
bail!("title for that block type is not supported");
}
}
bail!("not a title")
}
fn process_image_block(&mut self, caption: Option<&str>, level: usize) -> anyhow::Result<()> {
if let Some(Ok(line)) = self.iter.next() {
if let Some((url, attrs)) = parse_media_block(&line, IMAGE_BLOCK_PREFIX) {
let alt = if let Some(stripped) =
attrs.strip_prefix('"').and_then(|s| s.strip_suffix('"'))
{
stripped
} else {
attrs
};
if let Some(caption) = caption {
self.write_caption_line(caption, level);
}
self.write_indent(level);
self.output.push_str("![");
self.output.push_str(alt);
self.output.push_str("](");
self.output.push_str(url);
self.output.push_str(")\n");
return Ok(());
}
}
bail!("not a image block")
}
fn process_video_block(&mut self, caption: Option<&str>, level: usize) -> anyhow::Result<()> {
if let Some(Ok(line)) = self.iter.next() {
if let Some((url, attrs)) = parse_media_block(&line, VIDEO_BLOCK_PREFIX) {
let html_attrs = match attrs {
"options=loop" => "controls loop",
r#"options="autoplay,loop""# => "autoplay controls loop",
_ => bail!("unsupported video syntax"),
};
if let Some(caption) = caption {
self.write_caption_line(caption, level);
}
self.write_indent(level);
self.output.push_str(r#"<video src=""#);
self.output.push_str(url);
self.output.push_str(r#"" "#);
self.output.push_str(html_attrs);
self.output.push_str(">Your browser does not support the video tag.</video>\n");
return Ok(());
}
}
bail!("not a video block")
}
fn process_paragraph<P>(&mut self, level: usize, predicate: P) -> anyhow::Result<()>
where
P: Fn(&str) -> bool,
{
while let Some(line) = self.iter.peek() {
let line = line.as_deref().map_err(|e| anyhow!("{e}"))?;
if predicate(line) {
break;
}
self.write_indent(level);
let line = self.iter.next().unwrap()?;
let line = line.trim_start();
let line = process_inline_macros(line)?;
if let Some(stripped) = line.strip_suffix('+') {
self.output.push_str(stripped);
self.output.push('\\');
} else {
self.output.push_str(&line);
}
self.output.push('\n');
}
Ok(())
}
fn skip_blank_lines(&mut self) -> anyhow::Result<()> {
while let Some(line) = self.iter.peek() {
if !line.as_deref().unwrap().is_empty() {
break;
}
self.iter.next().unwrap()?;
}
Ok(())
}
fn write_title(&mut self, indent: usize, title: &str) {
for _ in 0..indent {
self.output.push('#');
}
self.output.push(' ');
self.output.push_str(title);
self.output.push('\n');
}
fn write_list_item(&mut self, item: &str, nesting: &ListNesting) {
let (marker, indent) = nesting.marker();
self.write_indent(indent);
self.output.push_str(marker);
self.output.push_str(item);
self.output.push('\n');
}
fn write_caption_line(&mut self, caption: &str, indent: usize) {
self.write_indent(indent);
self.output.push('_');
self.output.push_str(caption);
self.output.push_str("_\\\n");
}
fn write_indent(&mut self, indent: usize) {
for _ in 0..indent {
self.output.push(' ');
}
}
fn write_line(&mut self, line: &str, indent: usize) {
self.write_indent(indent);
self.output.push_str(line);
self.output.push('\n');
}
}
pub(crate) fn convert_asciidoc_to_markdown<R>(input: R) -> anyhow::Result<String>
where
R: BufRead,
{
let mut output = String::new();
let mut iter = input.lines().peekable();
let mut converter = Converter::new(&mut iter, &mut output);
converter.process()?;
Ok(output)
}
fn get_title(line: &str) -> Option<(usize, &str)> {
strip_prefix_symbol(line, '=')
}
fn get_list_item(line: &str) -> Option<(ListMarker, &str)> {
const HYPHEN_MARKER: &str = "- ";
if let Some(text) = line.strip_prefix(HYPHEN_MARKER) {
Some((ListMarker::Hyphen, text))
} else if let Some((count, text)) = strip_prefix_symbol(line, '*') {
Some((ListMarker::Asterisk(count), text))
} else if let Some((count, text)) = strip_prefix_symbol(line, '.') {
Some((ListMarker::Dot(count), text))
} else {
None
}
}
fn strip_prefix_symbol(line: &str, symbol: char) -> Option<(usize, &str)> {
let mut iter = line.chars();
if iter.next()? != symbol {
return None;
}
let mut count = 1;
loop {
match iter.next() {
Some(ch) if ch == symbol => {
count += 1;
}
Some(' ') => {
break;
}
_ => return None,
}
}
Some((count, iter.as_str()))
}
fn parse_media_block<'a>(line: &'a str, prefix: &str) -> Option<(&'a str, &'a str)> {
if let Some(line) = line.strip_prefix(prefix) {
if let Some((url, rest)) = line.split_once('[') {
if let Some(attrs) = rest.strip_suffix(']') {
return Some((url, attrs));
}
}
}
None
}
#[derive(Debug)]
struct ListNesting(Vec<ListMarker>);
impl ListNesting {
fn new() -> Self {
Self(Vec::<ListMarker>::with_capacity(6))
}
fn current(&mut self) -> Option<&ListMarker> {
self.0.last()
}
fn set_current(&mut self, marker: ListMarker) {
let Self(markers) = self;
if let Some(index) = markers.iter().position(|m| *m == marker) {
markers.truncate(index + 1);
} else {
markers.push(marker);
}
}
fn indent(&self) -> usize {
self.0.iter().map(|m| m.in_markdown().len()).sum()
}
fn marker(&self) -> (&str, usize) {
let Self(markers) = self;
let indent = markers.iter().take(markers.len() - 1).map(|m| m.in_markdown().len()).sum();
let marker = match markers.last() {
None => "",
Some(marker) => marker.in_markdown(),
};
(marker, indent)
}
}
#[derive(Debug, PartialEq, Eq)]
enum ListMarker {
Asterisk(usize),
Hyphen,
Dot(usize),
}
impl ListMarker {
fn in_markdown(&self) -> &str {
match self {
ListMarker::Asterisk(_) => "- ",
ListMarker::Hyphen => "- ",
ListMarker::Dot(_) => "1. ",
}
}
}
fn process_inline_macros(line: &str) -> anyhow::Result<Cow<'_, str>> {
let mut chars = line.char_indices();
loop {
let (start, end, a_macro) = match get_next_line_component(&mut chars) {
Component::None => break,
Component::Text => continue,
Component::Macro(s, e, m) => (s, e, m),
};
let mut src = line.chars();
let mut processed = String::new();
for _ in 0..start {
processed.push(src.next().unwrap());
}
processed.push_str(a_macro.process()?.as_str());
for _ in start..end {
let _ = src.next().unwrap();
}
let mut pos = end;
loop {
let (start, end, a_macro) = match get_next_line_component(&mut chars) {
Component::None => break,
Component::Text => continue,
Component::Macro(s, e, m) => (s, e, m),
};
for _ in pos..start {
processed.push(src.next().unwrap());
}
processed.push_str(a_macro.process()?.as_str());
for _ in start..end {
let _ = src.next().unwrap();
}
pos = end;
}
for ch in src {
processed.push(ch);
}
return Ok(Cow::Owned(processed));
}
Ok(Cow::Borrowed(line))
}
fn get_next_line_component(chars: &mut std::str::CharIndices<'_>) -> Component {
let (start, mut macro_name) = match chars.next() {
None => return Component::None,
Some((_, ch)) if ch == ' ' || !ch.is_ascii() => return Component::Text,
Some((pos, ch)) => (pos, String::from(ch)),
};
loop {
match chars.next() {
None => return Component::None,
Some((_, ch)) if ch == ' ' || !ch.is_ascii() => return Component::Text,
Some((_, ':')) => break,
Some((_, ch)) => macro_name.push(ch),
}
}
let mut macro_target = String::new();
loop {
match chars.next() {
None => return Component::None,
Some((_, ' ')) => return Component::Text,
Some((_, '[')) => break,
Some((_, ch)) => macro_target.push(ch),
}
}
let mut attr_value = String::new();
let end = loop {
match chars.next() {
None => return Component::None,
Some((pos, ']')) => break pos + 1,
Some((_, ch)) => attr_value.push(ch),
}
};
Component::Macro(start, end, Macro::new(macro_name, macro_target, attr_value))
}
enum Component {
None,
Text,
Macro(usize, usize, Macro),
}
struct Macro {
name: String,
target: String,
attrs: String,
}
impl Macro {
fn new(name: String, target: String, attrs: String) -> Self {
Self { name, target, attrs }
}
fn process(&self) -> anyhow::Result<String> {
let name = &self.name;
let text = match name.as_str() {
"https" => {
let url = &self.target;
let anchor_text = &self.attrs;
format!("[{anchor_text}](https:{url})")
}
"image" => {
let url = &self.target;
let alt = &self.attrs;
format!("![{alt}]({url})")
}
"kbd" => {
let keys = self.attrs.split('+').map(|k| Cow::Owned(format!("<kbd>{k}</kbd>")));
keys.collect::<Vec<_>>().join("+")
}
"pr" => {
let pr = &self.target;
let url = format!("https://github.com/rust-analyzer/rust-analyzer/pull/{pr}");
format!("[`#{pr}`]({url})")
}
"commit" => {
let hash = &self.target;
let short = &hash[0..7];
let url = format!("https://github.com/rust-analyzer/rust-analyzer/commit/{hash}");
format!("[`{short}`]({url})")
}
"release" => {
let date = &self.target;
let url = format!("https://github.com/rust-analyzer/rust-analyzer/releases/{date}");
format!("[`{date}`]({url})")
}
_ => bail!("macro not supported: {name}"),
};
Ok(text)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs::read_to_string;
#[test]
fn test_asciidoc_to_markdown_conversion() {
let input = read_to_string("test_data/input.adoc").unwrap();
let expected = read_to_string("test_data/expected.md").unwrap();
let actual = convert_asciidoc_to_markdown(std::io::Cursor::new(&input)).unwrap();
assert_eq!(actual, expected);
}
macro_rules! test_inline_macro_processing {
($((
$name:ident,
$input:expr,
$expected:expr
),)*) => ($(
#[test]
fn $name() {
let input = $input;
let actual = process_inline_macros(&input).unwrap();
let expected = $expected;
assert_eq!(actual, expected)
}
)*);
}
test_inline_macro_processing! {
(inline_macro_processing_for_empty_line, "", ""),
(inline_macro_processing_for_line_with_no_macro, "foo bar", "foo bar"),
(
inline_macro_processing_for_macro_in_line_start,
"kbd::[Ctrl+T] foo",
"<kbd>Ctrl</kbd>+<kbd>T</kbd> foo"
),
(
inline_macro_processing_for_macro_in_line_end,
"foo kbd::[Ctrl+T]",
"foo <kbd>Ctrl</kbd>+<kbd>T</kbd>"
),
(
inline_macro_processing_for_macro_in_the_middle_of_line,
"foo kbd::[Ctrl+T] foo",
"foo <kbd>Ctrl</kbd>+<kbd>T</kbd> foo"
),
(
inline_macro_processing_for_several_macros,
"foo kbd::[Ctrl+T] foo kbd::[Enter] foo",
"foo <kbd>Ctrl</kbd>+<kbd>T</kbd> foo <kbd>Enter</kbd> foo"
),
(
inline_macro_processing_for_several_macros_without_text_in_between,
"foo kbd::[Ctrl+T]kbd::[Enter] foo",
"foo <kbd>Ctrl</kbd>+<kbd>T</kbd><kbd>Enter</kbd> foo"
),
}
}