{-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FlexibleContexts #-} {- | Module : Text.Pandoc.Readers.AsciiDoc Copyright : Copyright (C) 2024 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane Stability : alpha Portability : portable Reads and evaluates a AsciiDoc document as a Pandoc AST. -} module Text.Pandoc.Readers.AsciiDoc ( readAsciiDoc ) where import Text.Pandoc.Class import Text.Pandoc.Options import Text.Pandoc.Definition import Text.Pandoc.Walk import Text.Pandoc.Shared (addPandocAttributes, blocksToInlines, safeRead, tshow) import qualified Text.Pandoc.UTF8 as UTF8 import qualified AsciiDoc as A import Text.Pandoc.Error import qualified Text.Pandoc.Builder as B import Text.Pandoc.Readers.HTML (readHtml) import Control.Monad.Except (throwError) import Control.Monad (mplus) import Text.Pandoc.Parsing (newPos, sourceName) import Text.Pandoc.Logging import Text.Pandoc.Sources import Control.Monad.State import Data.List (intersperse, foldl') import Data.Char (chr, ord) import qualified Data.Text as T import qualified Data.Map as M import Data.Maybe (fromMaybe) -- import Debug.Trace -- | Read AsciiDoc from an input string and return a Pandoc document. readAsciiDoc :: (PandocMonad m, ToSources a) => ReaderOptions -> a -> m Pandoc readAsciiDoc _opts inp = do let Sources sources = toSources inp (mconcat <$> mapM (\(sourcepos, t) -> A.parseDocument getIncludeFile raiseError (sourceName sourcepos) t) sources) >>= resolveFootnotes >>= resolveStem >>= resolveIcons >>= toPandoc where getIncludeFile fp = UTF8.toText <$> readFileStrict fp raiseError fp pos msg = throwError $ PandocParseError $ T.pack $ msg <> " at " <> show fp <> " char " <> show pos toPandoc :: PandocMonad m => A.Document -> m Pandoc toPandoc doc = Pandoc <$> doMeta (A.docMeta doc) <*> (B.toList <$> doBlocks (A.docBlocks doc)) resolveFootnotes :: Monad m => A.Document -> m A.Document resolveFootnotes doc = do evalStateT (A.mapInlines go doc) (mempty :: M.Map T.Text [A.Inline]) where go (A.Inline attr (A.Footnote (Just (A.FootnoteId fnid)) ils)) = do fnmap <- get case M.lookup fnid fnmap of Just ils' -> pure $ A.Inline attr (A.Footnote (Just (A.FootnoteId fnid)) ils') Nothing -> do put $ M.insert fnid ils fnmap pure $ A.Inline attr (A.Footnote (Just (A.FootnoteId fnid)) ils) go x = pure x resolveStem :: Monad m => A.Document -> m A.Document resolveStem doc = do let defaultType = case M.lookup "stem" (A.docAttributes (A.docMeta doc)) of Just "asciimath" -> A.AsciiMath _ -> A.LaTeXMath let doInlineStem (A.Inline attr (A.Math Nothing t)) = pure $ A.Inline attr (A.Math (Just defaultType) t) doInlineStem x = pure x let doBlockStem (A.Block attr mbtit (A.MathBlock Nothing t)) = pure $ A.Block attr mbtit (A.MathBlock (Just defaultType) t) doBlockStem x = A.mapInlines doInlineStem x A.mapBlocks doBlockStem doc -- resolve icons as either characters in an icon font or images resolveIcons :: Monad m => A.Document -> m A.Document resolveIcons doc = A.mapInlines fromIcon doc where docattrs = A.docAttributes (A.docMeta doc) iconFont = case M.lookup "icons" docattrs of Just "font" -> True _ -> False iconsdir = fromMaybe "./images/icons" $ M.lookup "iconsdir" docattrs icontype = fromMaybe "png" $ M.lookup "icontype" docattrs addClasses cls (A.Attr ps kvs) = A.Attr ps $ case M.lookup "role" kvs of Just r -> M.insert "role" (T.unwords (r : cls)) kvs Nothing -> M.insert "role" (T.unwords cls) kvs fromIcon (A.Inline attr (A.Icon name)) = if iconFont then pure $ A.Inline (addClasses ["fa", "fa-" <> name] attr) (A.Span []) else pure $ -- default is to use an image A.Inline (addClasses ["icon"] attr) (A.InlineImage (A.Target (iconsdir <> "/" <> name <> "." <> icontype)) Nothing Nothing Nothing) fromIcon x = pure x addAttribution :: Maybe A.Attribution -> B.Blocks -> B.Blocks addAttribution Nothing bs = bs addAttribution (Just (A.Attribution t)) bs = B.fromList $ case B.toList bs of [B.Div attr bls] -> [B.Div attr (bls ++ [attrBlock])] [B.BlockQuote bls] -> [B.BlockQuote (bls ++ [attrBlock])] xs -> xs ++ [attrBlock] where attrBlock = Para (B.toList $ B.text $ "\x2014 " <> t) doMeta :: PandocMonad m => A.Meta -> m B.Meta doMeta meta = do tit' <- doInlines (A.docTitle meta) pure $ (if tit' == mempty then id else B.setMeta "title" tit') . (case A.docAuthors meta of [] -> id as -> B.setMeta "author" (map fromAuthor as)) . (case A.docRevision meta of Nothing -> id Just (A.Revision vers mbdate mbremark) -> B.setMeta "version" vers . maybe id (B.setMeta "date") mbdate . maybe id (B.setMeta "remark") mbremark) . flip (foldl' (\m (k,v) -> -- leave out flags that are set just for processing if k == "sectids" || k == "stem" then m else if T.null v then B.setMeta k True m else B.setMeta k v m)) (M.toList (A.docAttributes meta)) $ mempty fromAuthor :: A.Author -> B.Inlines fromAuthor au = B.text (A.authorName au) <> maybe mempty (\email -> " (" <> B.link ("mailto:" <> email) "" (B.str email) <> ")") (A.authorEmail au) doBlocks :: PandocMonad m => [A.Block] -> m B.Blocks doBlocks = fmap mconcat . mapM doBlock addBlockAttr :: A.Attr -> B.Blocks -> B.Blocks addBlockAttr (A.Attr _ kvs') bs = case B.toList bs of x@(B.OrderedList{}) : xs -> -- "start" is handled in list attribs addPandocAttributes (M.toList $ M.delete "start" kvs) (B.singleton x) <> B.fromList xs x:xs -> addPandocAttributes (M.toList kvs) (B.singleton x) <> B.fromList xs [] -> mempty where kvs = M.mapKeys (\k -> if k == "role" then "class" else k) kvs' addBlockTitle :: B.Inlines -> B.Blocks -> B.Blocks addBlockTitle tit' bs = let tit = B.toList tit' in case B.toList bs of [B.Table attr _ colspecs thead tbody tfoot] -> B.singleton $ B.Table attr (B.Caption Nothing [B.Plain tit]) colspecs thead tbody tfoot [B.Figure attr _ bs'] -> B.singleton $ B.Figure attr (B.Caption Nothing [B.Plain tit]) bs' [B.Div attr (B.Div ("",["title"],[]) [Para _] : bs')] -> -- replace existing title, which might be e.g. "Note" B.singleton $ B.Div attr (B.Div ("",["title"],[]) [B.Para tit] : bs') [B.Div attr bs'] -> -- put title Div inside B.singleton $ B.Div attr (B.Div ("",["title"],[]) [B.Para tit] : bs') _ -> B.divWith B.nullAttr (B.divWith ("",["title"],[]) (B.para tit') <> bs) doBlock :: PandocMonad m => A.Block -> m B.Blocks doBlock (A.Block attr@(A.Attr ps kvs) mbtitle bt) = do mbtitle' <- case mbtitle of Nothing -> pure Nothing Just (A.BlockTitle ils) -> Just <$> doInlines ils addBlockAttr attr . maybe id addBlockTitle mbtitle' <$> case bt of A.Section (A.Level lev) ils bs -> do ils' <- doInlines ils bs' <- doBlocks bs pure $ (B.header lev ils') <> bs' A.DiscreteHeading (A.Level lev) ils -> B.header lev <$> doInlines ils A.Paragraph ils -> B.para <$> doInlines ils A.LiteralBlock t -> pure $ B.codeBlock t A.Listing mblang lns -> do let fromCallout (A.Callout i) | i <= 20 = T.pack [' ', chr (0x2460 + i - 1)] | otherwise = "<" <> tshow i <> ">" let fromSourceLine (A.SourceLine t callouts) = t <> mconcat (map fromCallout callouts) let code = T.intercalate "\n" $ map fromSourceLine lns let classes = case mblang of Nothing -> [] Just (A.Language l) -> [l] pure $ B.codeBlockWith ("", classes, []) code A.IncludeListing _ _ Nothing -> pure mempty A.IncludeListing mblang _fp (Just lns) -> doBlock (A.Block mempty mbtitle (A.Listing mblang lns)) A.ExampleBlock bs -> B.divWith ("",["example"],[]) <$> doBlocks bs A.Sidebar bs -> B.divWith ("",["sidebar"],[]) <$> doBlocks bs A.OpenBlock bs -> B.divWith ("",[],[]) <$> doBlocks bs A.QuoteBlock mbattrib bs -> addAttribution mbattrib . B.blockQuote <$> doBlocks bs A.Verse mbattrib bs -> addAttribution mbattrib . B.blockQuote <$> doBlocks bs -- TODO when texmath's asciimath parser works, convert: A.MathBlock (Just A.AsciiMath) t -> pure $ B.para $ B.displayMath t A.MathBlock (Just A.LaTeXMath) t -> pure $ B.para $ B.displayMath t A.MathBlock Nothing _ -> throwError $ PandocParseError "Encountered math type Nothing" A.List (A.BulletList _) items -> B.bulletList <$> mapM doItem items A.List A.CheckList items -> B.bulletList <$> mapM doItem items A.List (A.OrderedList _ mbstart) items -> do let start = fromMaybe (1 :: Int) (mbstart `mplus` (M.lookup "start" kvs >>= safeRead)) let getStyle xs = case xs of "arabic":_ -> Decimal "decimal":_ -> Decimal "loweralpha":_ -> LowerAlpha "upperalpha":_ -> UpperAlpha "lowerroman":_ -> LowerRoman "upperroman":_ -> UpperRoman _:rest -> getStyle rest [] -> DefaultStyle let sty = getStyle ps let delim = DefaultDelim B.orderedListWith (start, sty, delim) <$> mapM doItem items A.List A.CalloutList items -> B.divWith ("",["callout-list"],[]) . B.orderedList <$> mapM doItem items A.DefinitionList items | "ordered" `elem` ps -> B.orderedList <$> mapM (fmap (B.definitionList . (:[])) . doDefListItem) items | otherwise -> B.definitionList <$> mapM doDefListItem items A.Table specs mbHeader rows mbFooter -> do let toAlign A.AlignLeft = B.AlignLeft toAlign A.AlignCenter = B.AlignCenter toAlign A.AlignRight = B.AlignRight let fromCell (A.TableCell bs mbHorizAlign _mbVertAlign colspan rowspan) = B.Cell B.nullAttr (maybe B.AlignDefault toAlign mbHorizAlign) (B.RowSpan rowspan) (B.ColSpan colspan) . B.toList <$> doBlocks bs let fromRow (A.TableRow cs) = B.Row B.nullAttr <$> mapM fromCell cs tbody <- B.TableBody B.nullAttr (B.RowHeadColumns 0) [] <$> mapM fromRow rows thead <- B.TableHead B.nullAttr <$> maybe (pure []) (mapM fromRow) mbHeader tfoot <- B.TableFoot B.nullAttr <$> maybe (pure []) (mapM fromRow) mbFooter let totalWidth = sum $ map (fromMaybe 1 . A.colWidth) specs let toColSpec spec = (maybe B.AlignDefault toAlign (A.colHorizAlign spec), maybe B.ColWidthDefault (B.ColWidth . (\x -> fromIntegral x / fromIntegral totalWidth)) (A.colWidth spec)) let colspecs = map toColSpec specs pure $ B.table (B.Caption Nothing mempty) -- added by addBlockTitle colspecs thead [tbody] tfoot A.BlockImage target mbalt mbw mbh -> do img' <- doInline (A.Inline mempty (A.InlineImage target mbalt mbw mbh)) -- TODO have a global function that adds the title to caption here: pure $ B.figure (Caption Nothing mempty) -- added by addBlockTitle (B.plain img') -- TODO alt text? A.BlockAudio (A.Target t) -> pure $ B.plain $ B.image t "" (B.str t) -- TODO alt text? A.BlockVideo (A.Target t) -> pure $ B.plain $ B.image t "" (B.str t) A.TOC -> pure $ B.divWith ("toc",[],[]) mempty A.Admonition admonitionType bs -> do let admon = T.pack $ show admonitionType bs' <- doBlocks bs pure $ B.divWith ("",[T.toLower admon],[]) $ B.divWith ("",["title"],[]) (B.para (B.str admon)) <> bs' A.PageBreak -> pure $ B.divWith ("", ["page-break"], [("wrapper", "1")]) B.horizontalRule A.ThematicBreak -> pure $ B.horizontalRule A.Include fp (Just bs) -> B.divWith ("",["included"],[("path",T.pack fp)]) <$> doBlocks bs A.Include fp Nothing -> do report $ CouldNotLoadIncludeFile (T.pack fp) (newPos "" 0 0) pure mempty A.PassthroughBlock t -> case runPure (readHtml def{ readerExtensions = extensionsFromList [Ext_raw_html] } t) of Left _ -> pure $ B.rawBlock "html" t Right (Pandoc _ bs) -> pure $ B.fromList bs doItem :: PandocMonad m => A.ListItem -> m B.Blocks doItem (A.ListItem Nothing bs) = doBlocks bs doItem (A.ListItem (Just checkstate) bs) = do bs' <- doBlocks bs let check = case checkstate of A.Checked -> Str "\9746" A.Unchecked -> Str "\9744" pure $ B.fromList $ case B.toList bs' of (B.Para ils : rest) -> B.Para (check : B.Space : ils) : rest (B.Plain ils : rest) -> B.Plain (check : B.Space : ils) : rest rest -> B.Para [check] : rest doDefListItem :: PandocMonad m => ([A.Inline], [A.Block]) -> m (B.Inlines , [B.Blocks]) doDefListItem (lab, bs) = do lab' <- doInlines lab bs' <- doBlocks bs pure (lab', [bs']) doInlines :: PandocMonad m => [A.Inline] -> m B.Inlines doInlines = fmap mconcat . mapM doInline doInline :: PandocMonad m => A.Inline -> m B.Inlines doInline (A.Inline (A.Attr _ps kvs') it) = do let kvs = M.mapKeys (\k -> if k == "role" then "class" else k) kvs' addPandocAttributes (M.toList kvs) <$> case it of A.Str t -> pure $ B.text t A.HardBreak -> pure B.linebreak A.Bold ils -> B.strong <$> doInlines ils A.Italic ils -> B.emph <$> doInlines ils A.Monospace ils -> walk monospaceStr <$> doInlines ils A.Superscript ils -> B.superscript <$> doInlines ils A.Subscript ils -> B.subscript <$> doInlines ils A.Highlight ils -> B.spanWith ("",["mark"],[]) <$> doInlines ils A.Strikethrough ils -> B.strikeout <$> doInlines ils A.DoubleQuoted ils -> B.doubleQuoted <$> doInlines ils A.SingleQuoted ils -> B.singleQuoted <$> doInlines ils -- TODO when texmath's asciimath parser works, convert: A.Math (Just A.AsciiMath) t -> pure $ B.math t A.Math (Just A.LaTeXMath) t -> pure $ B.math t A.Math Nothing _ -> throwError $ PandocParseError "Encountered math type Nothing" A.Icon t -> pure $ B.spanWith ("",["icon"],[("name",t)]) (B.str ("[" <> t <> "]")) A.Button t -> pure $ B.spanWith ("",["button"],[]) (B.strong $ B.str ("[" <> t <> "]")) A.Kbd ts -> pure $ mconcat $ intersperse (B.str "+") $ map (B.spanWith ("",["kbd"],[]) . B.strong . B.str) ts A.Menu ts -> pure $ B.spanWith ("",["menu"],[]) $ B.strong $ B.text $ T.intercalate " › " ts -- TODO do we need linktype? A.Link _linkType (A.Target t) ils -> B.link t "" <$> doInlines ils A.InlineImage (A.Target url) mbalt mbwidth mbheight -> do let alt = case mbalt of Just (A.AltText t) -> B.text t Nothing -> mempty width = case mbwidth of Just (A.Width n) -> [("width", T.pack $ show n <> "px")] Nothing -> [] height = case mbheight of Just (A.Height n) -> [("height", T.pack $ show n <> "px")] Nothing -> [] pure $ B.imageWith ("",[], width ++ height) url "" alt A.Footnote _ ils -> B.note . B.para <$> doInlines ils A.InlineAnchor t _ -> pure $ B.spanWith (t, [], []) mempty A.BibliographyAnchor t _ -> pure $ B.spanWith (t, [], []) mempty A.CrossReference t Nothing -> pure $ B.linkWith ("",["cross-reference"],[]) ("#" <> t) "" (B.str t) A.CrossReference t (Just ils) -> do B.linkWith ("",["cross-reference"],[]) ("#" <> t) "" <$> doInlines ils A.AttributeReference (A.AttributeName t) -> -- if this is here, it's unresolved pure $ B.str ("{" <> t <> "}") A.Span ils -> B.spanWith B.nullAttr <$> doInlines ils A.IndexEntry (A.TermInText t) -> pure $ B.spanWith ("",["index"],[("term",t)]) (B.text t) A.IndexEntry (A.TermConcealed ts) -> pure $ B.spanWith ("",["index"],[("term",T.intercalate "," ts)]) mempty A.Counter name ctype val -> pure $ B.spanWith ("",["counter"],[("name",name)]) $ B.str $ case ctype of A.DecimalCounter -> tshow val A.UpperAlphaCounter -> T.singleton $ chr (ord 'A' + val - 1) A.LowerAlphaCounter -> T.singleton $ chr (ord 'a' + val - 1) -- Passthrough is hard to get right, because pandoc's RawInline needs -- a format specifier. Often in asciidoc passthrough is used as a form -- of escaping, so the best approach seems to be treating it as HTML -- and parsing it: A.Passthrough t -> do case runPure (readHtml def{ readerExtensions = extensionsFromList [Ext_raw_html] } t) of Left _ -> pure $ B.rawInline "html" t Right (Pandoc _ bs) -> pure $ B.fromList . blocksToInlines $ bs monospaceStr :: Inline -> Inline monospaceStr (Str t) = Code B.nullAttr t monospaceStr x = x