aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Readers.hs3
-rw-r--r--src/Text/Pandoc/Readers/XML.hs540
-rw-r--r--src/Text/Pandoc/Writers.hs3
-rw-r--r--src/Text/Pandoc/Writers/XML.hs365
-rw-r--r--src/Text/Pandoc/XMLFormat.hs188
5 files changed, 1099 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs
index 8f4800efb..12d1c6c95 100644
--- a/src/Text/Pandoc/Readers.hs
+++ b/src/Text/Pandoc/Readers.hs
@@ -65,6 +65,7 @@ module Text.Pandoc.Readers
, readTypst
, readDjot
, readPod
+ , readXML
-- * Miscellaneous
, getReader
, getDefaultExtensions
@@ -118,6 +119,7 @@ import Text.Pandoc.Readers.RIS
import Text.Pandoc.Readers.RTF
import Text.Pandoc.Readers.Typst
import Text.Pandoc.Readers.Djot
+import Text.Pandoc.Readers.XML
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
@@ -174,6 +176,7 @@ readers = [("native" , TextReader readNative)
,("djot" , TextReader readDjot)
,("mdoc" , TextReader readMdoc)
,("pod" , TextReader readPod)
+ ,("xml" , TextReader readXML)
]
-- | Retrieve reader, extensions based on format spec (format+extensions).
diff --git a/src/Text/Pandoc/Readers/XML.hs b/src/Text/Pandoc/Readers/XML.hs
new file mode 100644
index 000000000..e3173f4c9
--- /dev/null
+++ b/src/Text/Pandoc/Readers/XML.hs
@@ -0,0 +1,540 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
+-- |
+-- Module : Text.Pandoc.Readers.XML
+-- Copyright : Copyright (C) 2025- Massimiliano Farinella and John MacFarlane
+-- License : GNU GPL, version 2 or above
+--
+-- Maintainer : Massimiliano Farinella <[email protected]>
+-- Stability : WIP
+-- Portability : portable
+--
+-- Conversion of (Pandoc specific) xml to 'Pandoc' document.
+module Text.Pandoc.Readers.XML (readXML) where
+
+import Control.Monad (msum)
+import Control.Monad.Except (throwError)
+import Control.Monad.State.Strict (StateT (runStateT), modify)
+import Data.Char (isSpace)
+import Data.Default (Default (..))
+import qualified Data.List as L
+import qualified Data.Map as M
+import Data.Maybe (catMaybes, fromMaybe, mapMaybe)
+import qualified Data.Set as S (Set, fromList, member)
+import Data.Text (Text)
+import qualified Data.Text as T
+import Data.Text.Lazy (fromStrict)
+import Data.Version (Version, makeVersion)
+import Text.Pandoc.Builder
+import Text.Pandoc.Class.PandocMonad
+import Text.Pandoc.Error (PandocError (..))
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing (ToSources, toSources)
+import Text.Pandoc.Sources (sourcesToText)
+import Text.Pandoc.Version (pandocVersion)
+import Text.Pandoc.XML (lookupEntity)
+import Text.Pandoc.XML.Light
+import Text.Pandoc.XMLFormat
+import Text.Read (readMaybe)
+
+-- TODO: use xmlPath state to give better context when an error occurs
+
+type XMLReader m = StateT XMLReaderState m
+
+data XMLReaderState = XMLReaderState
+ { xmlApiVersion :: Version,
+ xmlMeta :: Meta,
+ xmlContent :: [Content],
+ xmlPath :: [Text]
+ }
+ deriving (Show)
+
+instance Default XMLReaderState where
+ def =
+ XMLReaderState
+ { xmlApiVersion = pandocVersion,
+ xmlMeta = mempty,
+ xmlContent = [],
+ xmlPath = ["root"]
+ }
+
+readXML :: (PandocMonad m, ToSources a) => ReaderOptions -> a -> m Pandoc
+readXML _ inp = do
+ let sources = toSources inp
+ tree <-
+ either (throwError . PandocXMLError "") return $
+ parseXMLContents (fromStrict . sourcesToText $ sources)
+ (bs, st') <- flip runStateT (def {xmlContent = tree}) $ mapM parseBlock tree
+ let blockList = toList $ concatMany bs
+ return $ Pandoc (xmlMeta st') blockList
+
+concatMany :: [Many a] -> Many a
+concatMany = Many . mconcat . map unMany
+
+parseBlocks :: (PandocMonad m) => [Content] -> XMLReader m Blocks
+parseBlocks contents = concatMany <$> mapM parseBlock contents
+
+getBlocks :: (PandocMonad m) => Element -> XMLReader m Blocks
+getBlocks e = parseBlocks (elContent e)
+
+elementName :: Element -> Text
+elementName e = qName $ elName e
+
+attrValue :: Text -> Element -> Text
+attrValue attr =
+ fromMaybe "" . maybeAttrValue attr
+
+maybeAttrValue :: Text -> Element -> Maybe Text
+maybeAttrValue attr elt =
+ lookupAttrBy (\x -> qName x == attr) (elAttribs elt)
+
+parseBlock :: (PandocMonad m) => Content -> XMLReader m Blocks
+parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
+parseBlock (Text (CData _ s _)) =
+ if T.all isSpace s
+ then return mempty
+ else do
+ throwError $ PandocXMLError "" "non-space characters out of inline context"
+parseBlock (CRef x) = do
+ throwError $ PandocXMLError "" ("reference \"" <> x <> "\" out of inline context")
+parseBlock (Elem e) = do
+ let name = elementName e
+ in case (name) of
+ "Pandoc" -> parsePandoc
+ "?xml" -> return mempty
+ "blocks" -> getBlocks e
+ "meta" ->
+ let entry_els = childrenNamed tgNameMetaMapEntry e
+ in do
+ entries <- catMaybes <$> mapM parseMetaMapEntry entry_els
+ mapM_ (uncurry addMeta) entries
+ return mempty
+ "Para" -> para <$> getInlines (elContent e)
+ "Plain" -> do
+ ils <- getInlines (elContent e)
+ return $ singleton . Plain . toList $ ils
+ "Header" -> (headerWith attr level) <$> getInlines (elContent e)
+ where
+ level = textToInt (attrValue atNameLevel e) 1
+ attr = filterAttrAttributes [atNameLevel] $ attrFromElement e
+ "HorizontalRule" -> return horizontalRule
+ "BlockQuote" -> do
+ contents <- getBlocks e
+ return $ blockQuote contents
+ "Div" -> do
+ contents <- getBlocks e
+ return $ divWith (attrFromElement e) contents
+ "BulletList" -> do
+ items <- getListItems e
+ return $ bulletList items
+ "OrderedList" -> do
+ items <- getListItems e
+ return $ orderedListWith (getListAttributes e) items
+ "DefinitionList" -> do
+ let items_contents = getContentsOfElements (isElementNamed tgNameDefListItem) (elContent e)
+ items <- mapM parseDefinitionListItem items_contents
+ return $ definitionList items
+ "Figure" -> do
+ let attr = attrFromElement e
+ (maybe_caption_el, contents) = partitionFirstChildNamed "Caption" $ elContent e
+ figure_caption <- case (maybe_caption_el) of
+ Just (caption_el) -> parseCaption $ elContent caption_el
+ Nothing -> pure emptyCaption
+ blocks <- parseBlocks contents
+ return $ figureWith attr figure_caption blocks
+ "CodeBlock" -> do
+ let attr = attrFromElement e
+ return $ codeBlockWith attr $ strContentRecursive e
+ "RawBlock" -> do
+ let format = (attrValue atNameFormat e)
+ return $ rawBlock format $ strContentRecursive e
+ "LineBlock" -> do
+ lins <- mapM getInlines (contentsOfChildren tgNameLineItem (elContent e))
+ return $ lineBlock lins
+ "Table" -> do
+ -- TODO: check unexpected items
+ let attr = attrFromElement e
+ (maybe_caption_el, after_caption) = partitionFirstChildNamed "Caption" $ elContent e
+ children = elementsWithNames (S.fromList [tgNameColspecs, "TableHead", "TableBody", "TableFoot"]) after_caption
+ is_element tag el = tag == elementName el
+ colspecs <- getColspecs $ L.find (is_element tgNameColspecs) children
+ tbs <- getTableBodies $ filter (is_element "TableBody") children
+ th <- getTableHead $ L.find (is_element "TableHead") children
+ tf <- getTableFoot $ L.find (is_element "TableFoot") children
+ capt <- parseMaybeCaptionElement maybe_caption_el
+ case colspecs of
+ Nothing -> return mempty
+ Just cs -> return $ fromList [Table attr capt cs th tbs tf]
+ _ -> do
+ throwError $ PandocXMLError "" ("unexpected element \"" <> name <> "\" in blocks context")
+ where
+ parsePandoc = do
+ let version = maybeAttrValue atNameApiVersion e
+ apiversion = case (version) of
+ Just (v) -> makeVersion $ map (read . T.unpack) $ T.splitOn "," v
+ Nothing -> pandocVersion
+ in modify $ \st -> st {xmlApiVersion = apiversion}
+ getBlocks e
+
+getListItems :: (PandocMonad m) => Element -> XMLReader m [Blocks]
+getListItems e =
+ let items_els = childrenNamed tgNameListItem e
+ in do
+ mapM getBlocks items_els
+
+getContentsOfElements :: (Content -> Bool) -> [Content] -> [[Content]]
+getContentsOfElements filter_element contents = mapMaybe element_contents $ filter filter_element contents
+ where
+ element_contents :: Content -> Maybe [Content]
+ element_contents c = case (c) of
+ Elem e -> Just (elContent e)
+ _ -> Nothing
+
+strContentRecursive :: Element -> Text
+strContentRecursive =
+ strContent
+ . (\e' -> e' {elContent = map elementToStr $ elContent e'})
+
+elementToStr :: Content -> Content
+elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
+elementToStr x = x
+
+textToInt :: Text -> Int -> Int
+textToInt t deflt =
+ let safe_to_int :: Text -> Maybe Int
+ safe_to_int s = readMaybe $ T.unpack s
+ in case (safe_to_int t) of
+ Nothing -> deflt
+ Just (n) -> n
+
+parseInline :: (PandocMonad m) => Content -> XMLReader m Inlines
+parseInline (Text (CData _ s _)) =
+ return $ text s
+parseInline (CRef ref) =
+ return $
+ maybe (text $ T.toUpper ref) text $
+ lookupEntity ref
+parseInline (Elem e) =
+ let name = elementName e
+ in case (name) of
+ "Space" ->
+ let count = textToInt (attrValue atNameSpaceCount e) 1
+ in return $ fromList $ replicate count Space
+ "Str" -> return $ fromList [Str $ attrValue atNameStrContent e]
+ "Emph" -> innerInlines emph
+ "Strong" -> innerInlines strong
+ "Strikeout" -> innerInlines strikeout
+ "Subscript" -> innerInlines subscript
+ "Superscript" -> innerInlines superscript
+ "Underline" -> innerInlines underline
+ "SoftBreak" -> return softbreak
+ "LineBreak" -> return linebreak
+ "SmallCaps" -> innerInlines smallcaps
+ "Quoted" -> case (attrValue atNameQuoteType e) of
+ "SingleQuote" -> innerInlines singleQuoted
+ _ -> innerInlines doubleQuoted
+ "Math" -> case (attrValue atNameMathType e) of
+ "DisplayMath" -> pure $ displayMath $ strContentRecursive e
+ _ -> pure $ math $ strContentRecursive e
+ "Span" -> innerInlines $ spanWith (attrFromElement e)
+ "Code" -> do
+ let attr = attrFromElement e
+ return $ codeWith attr $ strContentRecursive e
+ "Link" -> innerInlines $ linkWith attr url title
+ where
+ url = attrValue atNameLinkUrl e
+ title = attrValue atNameTitle e
+ attr = filterAttrAttributes [atNameLinkUrl, atNameTitle] $ attrFromElement e
+ "Image" -> innerInlines $ imageWith attr url title
+ where
+ url = attrValue atNameImageUrl e
+ title = attrValue atNameTitle e
+ attr = filterAttrAttributes [atNameImageUrl, atNameTitle] $ attrFromElement e
+ "RawInline" -> do
+ let format = (attrValue atNameFormat e)
+ return $ rawInline format $ strContentRecursive e
+ "Note" -> do
+ contents <- getBlocks e
+ return $ note contents
+ "Cite" ->
+ let (maybe_citations_el, contents) = partitionFirstChildNamed tgNameCitations $ elContent e
+ in case (maybe_citations_el) of
+ Just citations_el -> do
+ citations <- parseCitations $ elContent citations_el
+ (innerInlines' contents) $ cite citations
+ Nothing -> getInlines contents
+ _ -> do
+ throwError $ PandocXMLError "" ("unexpected element \"" <> name <> "\" in inline context")
+ where
+ innerInlines' contents f =
+ f . concatMany
+ <$> mapM parseInline contents
+ innerInlines f = innerInlines' (elContent e) f
+
+getInlines :: (PandocMonad m) => [Content] -> XMLReader m Inlines
+getInlines contents = concatMany <$> mapM parseInline contents
+
+getListAttributes :: Element -> ListAttributes
+getListAttributes e = (start, style, delim)
+ where
+ start = textToInt (attrValue atNameStart e) 1
+ style = case (attrValue atNameNumberStyle e) of
+ "Example" -> Example
+ "Decimal" -> Decimal
+ "LowerRoman" -> LowerRoman
+ "UpperRoman" -> UpperRoman
+ "LowerAlpha" -> LowerAlpha
+ "UpperAlpha" -> UpperAlpha
+ _ -> DefaultStyle
+ delim = case (attrValue atNameNumberDelim e) of
+ "Period" -> Period
+ "OneParen" -> OneParen
+ "TwoParens" -> TwoParens
+ _ -> DefaultDelim
+
+contentsOfChildren :: Text -> [Content] -> [[Content]]
+contentsOfChildren tag contents = mapMaybe childrenElementWithTag contents
+ where
+ childrenElementWithTag :: Content -> Maybe [Content]
+ childrenElementWithTag c = case (c) of
+ (Elem e) -> if tag == elementName e then Just (elContent e) else Nothing
+ _ -> Nothing
+
+alignmentFromText :: Text -> Alignment
+alignmentFromText t = case t of
+ "AlignLeft" -> AlignLeft
+ "AlignRight" -> AlignRight
+ "AlignCenter" -> AlignCenter
+ _ -> AlignDefault
+
+getColWidth :: Text -> ColWidth
+getColWidth txt = case reads (T.unpack txt) of
+ [(value, "")] -> if value == 0.0 then ColWidthDefault else ColWidth value
+ _ -> ColWidthDefault
+
+getColspecs :: (PandocMonad m) => Maybe Element -> XMLReader m (Maybe [ColSpec])
+getColspecs Nothing = pure Nothing
+getColspecs (Just cs) = do
+ return $ Just $ map elementToColSpec (childrenNamed "ColSpec" cs)
+ where
+ elementToColSpec e = (alignmentFromText $ attrValue atNameAlignment e, getColWidth $ attrValue atNameColWidth e)
+
+getTableBody :: (PandocMonad m) => Element -> XMLReader m (Maybe TableBody)
+getTableBody body_el = do
+ let attr = filterAttrAttributes [atNameRowHeadColumns] $ attrFromElement body_el
+ bh = childrenNamed tgNameBodyHeader body_el
+ bb = childrenNamed tgNameBodyBody body_el
+ headcols = textToInt (attrValue atNameRowHeadColumns body_el) 0
+ hrows <- mconcat <$> mapM getRows bh
+ brows <- mconcat <$> mapM getRows bb
+ return $ Just $ TableBody attr (RowHeadColumns headcols) hrows brows
+
+getTableBodies :: (PandocMonad m) => [Element] -> XMLReader m [TableBody]
+getTableBodies body_elements = do
+ catMaybes <$> mapM getTableBody body_elements
+
+getTableHead :: (PandocMonad m) => Maybe Element -> XMLReader m TableHead
+getTableHead maybe_e = case maybe_e of
+ Just e -> do
+ let attr = attrFromElement e
+ rows <- getRows e
+ return $ TableHead attr rows
+ Nothing -> return $ TableHead nullAttr []
+
+getTableFoot :: (PandocMonad m) => Maybe Element -> XMLReader m TableFoot
+getTableFoot maybe_e = case maybe_e of
+ Just e -> do
+ let attr = attrFromElement e
+ rows <- getRows e
+ return $ TableFoot attr rows
+ Nothing -> return $ TableFoot nullAttr []
+
+getCell :: (PandocMonad m) => Element -> XMLReader m Cell
+getCell c = do
+ let alignment = alignmentFromText $ attrValue atNameAlignment c
+ rowspan = RowSpan $ textToInt (attrValue atNameRowspan c) 1
+ colspan = ColSpan $ textToInt (attrValue atNameColspan c) 1
+ attr = filterAttrAttributes [atNameAlignment, atNameRowspan, atNameColspan] $ attrFromElement c
+ blocks <- getBlocks c
+ return $ Cell attr alignment rowspan colspan (toList blocks)
+
+getRows :: (PandocMonad m) => Element -> XMLReader m [Row]
+getRows e = mapM getRow $ childrenNamed "Row" e
+ where
+ getRow r = do
+ cells <- mapM getCell (childrenNamed "Cell" r)
+ return $ Row (attrFromElement r) cells
+
+parseCitations :: (PandocMonad m) => [Content] -> XMLReader m [Citation]
+parseCitations contents = do
+ maybecitations <- mapM getCitation contents
+ return $ catMaybes maybecitations
+ where
+ getCitation :: (PandocMonad m) => Content -> XMLReader m (Maybe Citation)
+ getCitation content = case (content) of
+ (Elem e) ->
+ if qName (elName e) == "Citation"
+ then do
+ p <- inlinesOfChildrenNamed tgNameCitationPrefix e
+ s <- inlinesOfChildrenNamed tgNameCitationSuffix e
+ return $
+ Just
+ ( Citation
+ { citationId = attrValue "id" e,
+ citationPrefix = toList p,
+ citationSuffix = toList s,
+ citationMode = case (attrValue atNameCitationMode e) of
+ "AuthorInText" -> AuthorInText
+ "SuppressAuthor" -> SuppressAuthor
+ _ -> NormalCitation,
+ citationNoteNum = textToInt (attrValue atNameCitationNoteNum e) 0,
+ citationHash = textToInt (attrValue atNameCitationHash e) 0
+ }
+ )
+ else do
+ return Nothing
+ _ -> do
+ return Nothing
+ where
+ inlinesOfChildrenNamed tag e = getInlines $ concatMap (\e' -> elContent e') (childrenNamed tag e)
+
+parseMaybeCaptionElement :: (PandocMonad m) => Maybe Element -> XMLReader m Caption
+parseMaybeCaptionElement Nothing = pure emptyCaption
+parseMaybeCaptionElement (Just e) = parseCaption $ elContent e
+
+parseCaption :: (PandocMonad m) => [Content] -> XMLReader m Caption
+parseCaption contents =
+ let (maybe_shortcaption_el, caption_contents) = partitionFirstChildNamed tgNameShortCaption contents
+ in do
+ blocks <- parseBlocks caption_contents
+ case (maybe_shortcaption_el) of
+ Just shortcaption_el -> do
+ short_caption <- getInlines (elContent shortcaption_el)
+ return $ caption (Just $ toList short_caption) blocks
+ Nothing -> return $ caption Nothing blocks
+
+parseDefinitionListItem :: (PandocMonad m) => [Content] -> XMLReader m (Inlines, [Blocks])
+parseDefinitionListItem contents = do
+ let term_contents = getContentsOfElements (isElementNamed tgNameDefListTerm) contents
+ defs_elements = elementContents $ filter (isElementNamed tgNameDefListDef) contents
+ term_inlines <- getInlines (concat term_contents)
+ defs <- mapM getBlocks defs_elements
+ return (term_inlines, defs)
+
+elementContents :: [Content] -> [Element]
+elementContents contents = mapMaybe toElement contents
+ where
+ toElement :: Content -> Maybe Element
+ toElement (Elem e) = Just e
+ toElement _ = Nothing
+
+isElementNamed :: Text -> Content -> Bool
+isElementNamed t c = case (c) of
+ Elem e -> t == elementName e
+ _ -> False
+
+childrenNamed :: Text -> Element -> [Element]
+childrenNamed tag e = elementContents $ filter (isElementNamed tag) (elContent e)
+
+elementsWithNames :: S.Set Text -> [Content] -> [Element]
+elementsWithNames tags contents = mapMaybe isElementWithNameInSet contents
+ where
+ isElementWithNameInSet c = case (c) of
+ Elem el ->
+ if (elementName el) `S.member` tags
+ then Just el
+ else Nothing
+ _ -> Nothing
+
+partitionFirstChildNamed :: Text -> [Content] -> (Maybe Element, [Content])
+partitionFirstChildNamed tag contents = case (contents) of
+ (Text (CData _ s _) : rest) ->
+ if T.all isSpace s
+ then partitionFirstChildNamed tag rest
+ else (Nothing, contents)
+ (Elem e : rest) ->
+ if tag == elementName e
+ then (Just e, rest)
+ else (Nothing, contents)
+ _ -> (Nothing, contents)
+
+type PandocAttr = (Text, [Text], [(Text, Text)])
+
+filterAttributes :: S.Set Text -> [(Text, Text)] -> [(Text, Text)]
+filterAttributes to_be_removed a = filter keep_attr a
+ where
+ keep_attr (k, _) = not (k `S.member` to_be_removed)
+
+filterAttrAttributes :: [Text] -> PandocAttr -> PandocAttr
+filterAttrAttributes to_be_removed (idn, classes, a) = (idn, classes, filtered)
+ where
+ filtered = filterAttributes (S.fromList to_be_removed) a
+
+attrFromElement :: Element -> PandocAttr
+attrFromElement e = filterAttrAttributes ["id", "class"] (idn, classes, attributes)
+ where
+ idn = attrValue "id" e
+ classes = T.words $ attrValue "class" e
+ attributes = map (\a -> (qName $ attrKey a, attrVal a)) $ elAttribs e
+
+addMeta :: (PandocMonad m) => (ToMetaValue a) => Text -> a -> XMLReader m ()
+addMeta field val = modify (setMeta field val)
+
+instance HasMeta XMLReaderState where
+ setMeta field v s = s {xmlMeta = setMeta field v (xmlMeta s)}
+
+ deleteMeta field s = s {xmlMeta = deleteMeta field (xmlMeta s)}
+
+parseMetaMapEntry :: (PandocMonad m) => Element -> XMLReader m (Maybe (Text, MetaValue))
+parseMetaMapEntry e =
+ let key = attrValue atNameMetaMapEntryKey e
+ in case (key) of
+ "" -> pure Nothing
+ k -> do
+ maybe_value <- parseMetaMapEntryContents $ elContent e
+ case (maybe_value) of
+ Nothing -> return Nothing
+ Just v -> return $ Just (k, v)
+
+parseMetaMapEntryContents :: (PandocMonad m) => [Content] -> XMLReader m (Maybe MetaValue)
+parseMetaMapEntryContents cs = msum <$> mapM parseMeta cs
+
+parseMeta :: (PandocMonad m) => Content -> XMLReader m (Maybe MetaValue)
+parseMeta (Text (CData CDataRaw _ _)) = return Nothing
+parseMeta (Text (CData _ s _)) =
+ if T.all isSpace s
+ then return Nothing
+ else do
+ throwError $ PandocXMLError "" "non-space characters out of inline context in metadata"
+parseMeta (CRef x) =
+ throwError $ PandocXMLError "" ("reference \"" <> x <> "\" out of inline context")
+parseMeta (Elem e) = do
+ let name = elementName e
+ in case (name) of
+ "MetaBool" -> case (attrValue atNameMetaBoolValue e) of
+ "true" -> return $ Just $ MetaBool True
+ _ -> return $ Just $ MetaBool False
+ "MetaString" -> pure Nothing
+ "MetaInlines" -> do
+ inlines <- getInlines (elContent e)
+ return $ Just $ MetaInlines $ toList inlines
+ "MetaBlocks" -> do
+ blocks <- getBlocks e
+ return $ Just $ MetaBlocks $ toList blocks
+ "MetaList" -> do
+ maybe_items <- mapM parseMeta $ elContent e
+ let items = catMaybes maybe_items
+ in -- TODO: report empty MetaList?
+ return $ Just $ MetaList items
+ "MetaMap" ->
+ let entry_els = childrenNamed tgNameMetaMapEntry e
+ in do
+ entries <- catMaybes <$> mapM parseMetaMapEntry entry_els
+ if null entries
+ then
+ -- TODO: report empty MetaMap
+ return Nothing
+ else return $ Just $ MetaMap $ M.fromList entries
+ _ -> do
+ throwError $ PandocXMLError "" ("unexpected element \"" <> name <> "\" in metadata")
diff --git a/src/Text/Pandoc/Writers.hs b/src/Text/Pandoc/Writers.hs
index 3e1974ad6..164bf6d0f 100644
--- a/src/Text/Pandoc/Writers.hs
+++ b/src/Text/Pandoc/Writers.hs
@@ -76,6 +76,7 @@ module Text.Pandoc.Writers
, writeTexinfo
, writeTextile
, writeTypst
+ , writeXML
, writeXWiki
, writeZimWiki
, getWriter
@@ -128,6 +129,7 @@ import Text.Pandoc.Writers.TEI
import Text.Pandoc.Writers.Texinfo
import Text.Pandoc.Writers.Textile
import Text.Pandoc.Writers.Typst
+import Text.Pandoc.Writers.XML
import Text.Pandoc.Writers.XWiki
import Text.Pandoc.Writers.ZimWiki
@@ -203,6 +205,7 @@ writers = [
,("chunkedhtml" , ByteStringWriter writeChunkedHTML)
,("djot" , TextWriter writeDjot)
,("ansi" , TextWriter writeANSI)
+ ,("xml" , TextWriter writeXML)
]
-- | Retrieve writer, extensions based on formatSpec (format+extensions).
diff --git a/src/Text/Pandoc/Writers/XML.hs b/src/Text/Pandoc/Writers/XML.hs
new file mode 100644
index 000000000..35bcb07f8
--- /dev/null
+++ b/src/Text/Pandoc/Writers/XML.hs
@@ -0,0 +1,365 @@
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
+-- |
+-- Module : Text.Pandoc.Writers.XML
+-- Copyright : Copyright (C) 2025- Massimiliano Farinella and John MacFarlane
+-- License : GNU GPL, version 2 or above
+--
+-- Maintainer : Massimiliano Farinella <[email protected]>
+-- Stability : WIP
+-- Portability : portable
+--
+-- Conversion of 'Pandoc' documents to (pandoc specific) xml markup.
+module Text.Pandoc.Writers.XML (writeXML) where
+
+import Data.Map (Map, toList)
+import Data.Maybe (mapMaybe)
+import qualified Data.Text as T
+import Data.Version (versionBranch)
+import Text.Pandoc.Class.PandocMonad (PandocMonad)
+import Text.Pandoc.Definition
+import Text.Pandoc.Options (WriterOptions (..))
+import Text.Pandoc.XML.Light
+import qualified Text.Pandoc.XML.Light as XML
+import Text.Pandoc.XMLFormat
+import Text.XML.Light (xml_header)
+
+type PandocAttr = Text.Pandoc.Definition.Attr
+
+writeXML :: (PandocMonad m) => WriterOptions -> Pandoc -> m T.Text
+writeXML _ doc = do
+ return $ pandocToXmlText doc
+
+text_node :: T.Text -> Content
+text_node text = Text (CData CDataText text Nothing)
+
+emptyElement :: T.Text -> Element
+emptyElement tag =
+ Element
+ { elName = unqual tag,
+ elAttribs = [],
+ elContent = [],
+ elLine = Nothing
+ }
+
+elementWithContents :: T.Text -> [Content] -> Element
+elementWithContents tag contents =
+ Element
+ { elName = unqual tag,
+ elAttribs = [],
+ elContent = contents,
+ elLine = Nothing
+ }
+
+elementWithAttributes :: T.Text -> [XML.Attr] -> Element
+elementWithAttributes tag attributes =
+ Element
+ { elName = unqual tag,
+ elAttribs = attributes,
+ elContent = [],
+ elLine = Nothing
+ }
+
+elementWithAttrAndContents :: T.Text -> PandocAttr -> [Content] -> Element
+elementWithAttrAndContents tag attr contents = addAttrAttributes attr $ elementWithContents tag contents
+
+asBlockOfInlines :: Element -> [Content]
+asBlockOfInlines el = [Elem el, text_node "\n"]
+
+asBlockOfBlocks :: Element -> [Content]
+asBlockOfBlocks el = [Elem newline_before_first, newline]
+ where
+ newline = text_node "\n"
+ newline_before_first = if null (elContent el) then el else prependContents [newline] el
+
+itemName :: (Show a) => a -> T.Text
+itemName a = T.pack $ takeWhile (/= ' ') (show a)
+
+intAsText :: Int -> T.Text
+intAsText i = T.pack $ show i
+
+itemAsEmptyElement :: (Show a) => a -> Element
+itemAsEmptyElement item = emptyElement $ itemName item
+
+pandocToXmlText :: Pandoc -> T.Text
+pandocToXmlText (Pandoc (Meta meta) blocks) = with_header . with_blocks . with_meta . with_version $ el
+ where
+ el = prependContents [text_node "\n"] $ emptyElement "Pandoc"
+ with_version = addAttribute atNameApiVersion (T.intercalate "," $ map (T.pack . show) $ versionBranch pandocTypesVersion)
+ with_meta = appendContents (metaMapToXML meta "meta")
+ with_blocks = appendContents (asBlockOfBlocks $ elementWithContents "blocks" $ blocksToXML blocks)
+ with_header :: Element -> T.Text
+ with_header e = T.concat [T.pack xml_header, "\n", showElement e]
+
+metaMapToXML :: Map T.Text MetaValue -> T.Text -> [Content]
+metaMapToXML mmap tag = asBlockOfBlocks $ elementWithContents tag entries
+ where
+ entries = concatMap to_entry $ toList mmap
+ to_entry :: (T.Text, MetaValue) -> [Content]
+ to_entry (text, metavalue) = asBlockOfBlocks with_key
+ where
+ entry = elementWithContents tgNameMetaMapEntry $ metaValueToXML metavalue
+ with_key = addAttribute atNameMetaMapEntryKey text entry
+
+metaValueToXML :: MetaValue -> [Content]
+metaValueToXML value =
+ let name = itemName value
+ el = itemAsEmptyElement value
+ in case (value) of
+ MetaBool b -> asBlockOfInlines $ addAttribute atNameMetaBoolValue bool_value el
+ where
+ bool_value = if b then "true" else "false"
+ MetaString s -> asBlockOfInlines $ appendContents [text_node s] el
+ MetaInlines inlines -> asBlockOfInlines $ appendContents (inlinesToXML inlines) el
+ MetaBlocks blocks -> asBlockOfBlocks $ appendContents (blocksToXML blocks) el
+ MetaList items -> asBlockOfBlocks $ appendContents (concatMap metaValueToXML items) el
+ MetaMap mm -> metaMapToXML mm name
+
+blocksToXML :: [Block] -> [Content]
+blocksToXML blocks = concatMap blockToXML blocks
+
+inlinesToXML :: [Inline] -> [Content]
+inlinesToXML inlines = concatMap inlineContentToContents (ilsToIlsContent inlines [])
+
+data InlineContent
+ = NormalInline Inline
+ | ElSpace Int
+ | ElStr T.Text
+
+ilsToIlsContent :: [Inline] -> [InlineContent] -> [InlineContent]
+ilsToIlsContent (Space : xs) [] = ilsToIlsContent xs [ElSpace 1]
+ilsToIlsContent (Space : xs) (NormalInline Space : cs) = ilsToIlsContent xs (ElSpace 2 : cs)
+ilsToIlsContent (Space : xs) (ElSpace n : cs) = ilsToIlsContent xs (ElSpace (n + 1) : cs)
+-- empty Str are always encoded as <Str />
+ilsToIlsContent (Str "" : xs) ilct = ilsToIlsContent xs (ElStr "" : ilct)
+-- Str s1, Str s2 -> s1<Str content="s2">
+ilsToIlsContent (Str s2 : xs) (NormalInline str1@(Str _) : ilct) = ilsToIlsContent xs (ElStr s2 : NormalInline str1 : ilct)
+--
+ilsToIlsContent (Str s : xs) ilct =
+ if T.any (== ' ') s
+ then ilsToIlsContent xs (ElStr s : ilct)
+ else ilsToIlsContent xs (NormalInline (Str s) : ilct)
+ilsToIlsContent (x : xs) ilct = ilsToIlsContent xs (NormalInline x : ilct)
+ilsToIlsContent [] ilct = reverse $ lastSpaceAsElem ilct
+ where
+ lastSpaceAsElem :: [InlineContent] -> [InlineContent]
+ lastSpaceAsElem (NormalInline Space : xs) = ElSpace 1 : xs
+ lastSpaceAsElem ilcts = ilcts
+
+inlineContentToContents :: InlineContent -> [Content]
+inlineContentToContents (NormalInline il) = inlineToXML il
+inlineContentToContents (ElSpace 1) = [Elem $ emptyElement "Space"]
+inlineContentToContents (ElSpace n) = [Elem $ addAttribute atNameSpaceCount (intAsText n) (emptyElement "Space")]
+inlineContentToContents (ElStr "") = [Elem $ emptyElement "Str"]
+inlineContentToContents (ElStr s) = [Elem $ addAttribute atNameStrContent s (emptyElement "Str")]
+
+asContents :: Element -> [Content]
+asContents el = [Elem el]
+
+wrapBlocks :: T.Text -> [Block] -> [Content]
+wrapBlocks tag blocks = asBlockOfBlocks $ elementWithContents tag $ blocksToXML blocks
+
+wrapArrayOfBlocks :: T.Text -> [[Block]] -> [Content]
+wrapArrayOfBlocks tag array = concatMap (wrapBlocks tag) array
+
+-- wrapInlines :: T.Text -> [Inline] -> [Content]
+-- wrapInlines tag inlines = asBlockOfInlines $ element_with_contents tag $ inlinesToXML inlines
+
+blockToXML :: Block -> [Content]
+blockToXML block =
+ let el = itemAsEmptyElement block
+ in case (block) of
+ Para inlines -> asBlockOfInlines $ appendContents (inlinesToXML inlines) el
+ Header level (idn, cls, attrs) inlines -> asBlockOfInlines $ appendContents (inlinesToXML inlines) with_attr
+ where
+ with_attr = addAttrAttributes (idn, cls, attrs ++ [(atNameLevel, intAsText level)]) el
+ Plain inlines -> asBlockOfInlines $ appendContents (inlinesToXML inlines) el
+ Div attr blocks -> asBlockOfBlocks $ appendContents (blocksToXML blocks) with_attr
+ where
+ with_attr = addAttrAttributes attr el
+ BulletList items -> asBlockOfBlocks $ appendContents (wrapArrayOfBlocks tgNameListItem items) el
+ OrderedList (start, style, delim) items -> asBlockOfBlocks $ with_contents . with_attrs $ el
+ where
+ with_attrs =
+ addAttributes
+ ( validAttributes
+ [ (atNameStart, intAsText start),
+ (atNameNumberStyle, itemName style),
+ (atNameNumberDelim, itemName delim)
+ ]
+ )
+ with_contents = appendContents (wrapArrayOfBlocks tgNameListItem items)
+ BlockQuote blocks -> asBlockOfBlocks $ appendContents (blocksToXML blocks) el
+ HorizontalRule -> asBlockOfInlines el
+ CodeBlock attr text -> asBlockOfInlines $ with_contents . with_attr $ el
+ where
+ with_contents = appendContents [text_node text]
+ with_attr = addAttrAttributes attr
+ LineBlock lins -> asBlockOfBlocks $ appendContents (concatMap wrapInlines lins) el
+ where
+ wrapInlines inlines = asContents $ appendContents (inlinesToXML inlines) $ emptyElement tgNameLineItem
+ Table attr caption colspecs thead tbodies tfoot -> asBlockOfBlocks $ with_foot . with_bodies . with_head . with_colspecs . with_caption . with_attr $ el
+ where
+ with_attr = addAttrAttributes attr
+ with_caption = appendContents (captionToXML caption)
+ with_colspecs = appendContents (colSpecsToXML colspecs)
+ with_head = appendContents (tableHeadToXML thead)
+ with_bodies = appendContents (concatMap tableBodyToXML tbodies)
+ with_foot = appendContents (tableFootToXML tfoot)
+ Figure attr caption blocks -> asBlockOfBlocks $ with_contents . with_caption . with_attr $ el
+ where
+ with_attr = addAttrAttributes attr
+ with_caption = appendContents (captionToXML caption)
+ with_contents = appendContents (blocksToXML blocks)
+ RawBlock (Format format) text -> asContents $ appendContents [text_node text] raw
+ where
+ raw = addAttribute atNameFormat format el
+ DefinitionList items -> asBlockOfBlocks $ appendContents (map definitionListItemToXML items) el
+
+inlineToXML :: Inline -> [Content]
+inlineToXML inline =
+ let el = itemAsEmptyElement inline
+ wrapInlines inlines = asContents $ appendContents (inlinesToXML inlines) el
+ in case (inline) of
+ Space -> [text_node " "]
+ Str s -> [text_node s]
+ Emph inlines -> wrapInlines inlines
+ Strong inlines -> wrapInlines inlines
+ Quoted quote_type inlines -> asContents $ appendContents (inlinesToXML inlines) quoted
+ where
+ quoted = addAttribute atNameQuoteType (itemName quote_type) el
+ Underline inlines -> wrapInlines inlines
+ Strikeout inlines -> wrapInlines inlines
+ SmallCaps inlines -> wrapInlines inlines
+ Superscript inlines -> wrapInlines inlines
+ Subscript inlines -> wrapInlines inlines
+ SoftBreak -> asContents el
+ LineBreak -> asContents el
+ Span attr inlines -> asContents $ appendContents (inlinesToXML inlines) with_attr
+ where
+ with_attr = addAttrAttributes attr el
+ Link (idn, cls, attrs) inlines (url, title) -> asContents $ appendContents (inlinesToXML inlines) with_attr
+ where
+ with_attr = addAttrAttributes (idn, cls, attrs ++ [(atNameLinkUrl, url), (atNameTitle, title)]) el
+ Image (idn, cls, attrs) inlines (url, title) -> asContents $ appendContents (inlinesToXML inlines) with_attr
+ where
+ with_attr = addAttrAttributes (idn, cls, attrs ++ [(atNameImageUrl, url), (atNameTitle, title)]) el
+ RawInline (Format format) text -> asContents $ appendContents [text_node text] raw
+ where
+ raw = addAttribute atNameFormat format el
+ Math math_type text -> asContents $ appendContents [text_node text] math
+ where
+ math = addAttribute atNameMathType (itemName math_type) el
+ Code attr text -> asContents $ appendContents [text_node text] with_attr
+ where
+ with_attr = addAttrAttributes attr el
+ Note blocks -> asContents $ appendContents (blocksToXML blocks) el
+ Cite citations inlines -> asContents $ appendContents (inlinesToXML inlines) with_citations
+ where
+ with_citations = addCitations citations el
+
+-- TODO: don't let an attribute overwrite id or class
+maybeAttribute :: (T.Text, T.Text) -> Maybe XML.Attr
+maybeAttribute (_, "") = Nothing
+maybeAttribute ("", _) = Nothing
+maybeAttribute (name, value) = Just $ XML.Attr (unqual name) value
+
+validAttributes :: [(T.Text, T.Text)] -> [XML.Attr]
+validAttributes pairs = mapMaybe maybeAttribute pairs
+
+appendContents :: [Content] -> Element -> Element
+appendContents newContents el = el {elContent = (elContent el) ++ newContents}
+
+prependContents :: [Content] -> Element -> Element
+prependContents newContents el = el {elContent = newContents ++ (elContent el)}
+
+addAttributes :: [XML.Attr] -> Element -> Element
+addAttributes newAttrs el = el {elAttribs = newAttrs ++ elAttribs el}
+
+addAttribute :: T.Text -> T.Text -> Element -> Element
+addAttribute attr_name attr_value el = el {elAttribs = new_attr : elAttribs el}
+ where
+ new_attr = XML.Attr (unqual attr_name) attr_value
+
+addAttrAttributes :: PandocAttr -> Element -> Element
+addAttrAttributes (identifier, classes, attributes) el = addAttributes attrs' el
+ where
+ attrs' = mapMaybe maybeAttribute (("id", identifier) : ("class", T.intercalate " " classes) : attributes)
+
+addCitations :: [Citation] -> Element -> Element
+addCitations citations el = appendContents [Elem $ elementWithContents tgNameCitations $ (text_node "\n") : concatMap citation_to_elem citations] el
+ where
+ citation_to_elem :: Citation -> [Content]
+ citation_to_elem citation = asBlockOfInlines with_suffix
+ where
+ cit_elem = elementWithAttributes (itemName citation) attrs
+ prefix = citationPrefix citation
+ suffix = citationSuffix citation
+ with_prefix =
+ if null prefix
+ then cit_elem
+ else appendContents [Elem $ elementWithContents tgNameCitationPrefix $ inlinesToXML prefix] cit_elem
+ with_suffix =
+ if null suffix
+ then with_prefix
+ else appendContents [Elem $ elementWithContents tgNameCitationSuffix $ inlinesToXML suffix] with_prefix
+ attrs =
+ map
+ (\(n, v) -> XML.Attr (unqual n) v)
+ [ ("id", citationId citation),
+ (atNameCitationMode, T.pack $ show $ citationMode citation),
+ (atNameCitationNoteNum, intAsText $ citationNoteNum citation),
+ (atNameCitationHash, intAsText $ citationHash citation)
+ ]
+
+definitionListItemToXML :: ([Inline], [[Block]]) -> Content
+definitionListItemToXML (inlines, defs) = Elem $ elementWithContents tgNameDefListItem $ term ++ wrapArrayOfBlocks tgNameDefListDef defs
+ where
+ term = asBlockOfInlines $ appendContents (inlinesToXML inlines) $ emptyElement tgNameDefListTerm
+
+captionToXML :: Caption -> [Content]
+captionToXML (Caption short blocks) = asBlockOfBlocks with_short_caption
+ where
+ el = elementWithContents "Caption" $ blocksToXML blocks
+ with_short_caption = case (short) of
+ Just inlines -> prependContents (asBlockOfInlines $ elementWithContents tgNameShortCaption $ inlinesToXML inlines) el
+ _ -> el
+
+colSpecToXML :: (Alignment, ColWidth) -> [Content]
+colSpecToXML (align, cw) = asBlockOfInlines colspec
+ where
+ colspec = elementWithAttributes "ColSpec" $ validAttributes [(atNameAlignment, itemName align), (atNameColWidth, colwidth)]
+ colwidth = case (cw) of
+ ColWidth d -> T.pack $ show d
+ ColWidthDefault -> "0"
+
+colSpecsToXML :: [(Alignment, ColWidth)] -> [Content]
+colSpecsToXML colspecs = asBlockOfBlocks $ elementWithContents tgNameColspecs $ concatMap colSpecToXML colspecs
+
+tableHeadToXML :: TableHead -> [Content]
+tableHeadToXML (TableHead attr rows) = asBlockOfBlocks $ elementWithAttrAndContents "TableHead" attr $ concatMap rowToXML rows
+
+tableBodyToXML :: TableBody -> [Content]
+tableBodyToXML (TableBody (idn, cls, attrs) (RowHeadColumns headcols) hrows brows) = asBlockOfBlocks $ elementWithAttrAndContents "TableBody" attr children
+ where
+ attr = (idn, cls, (atNameRowHeadColumns, intAsText headcols) : attrs)
+ header_rows = asBlockOfBlocks $ elementWithContents tgNameBodyHeader $ concatMap rowToXML hrows
+ body_rows = asBlockOfBlocks $ elementWithContents tgNameBodyBody $ concatMap rowToXML brows
+ children = header_rows ++ body_rows
+
+tableFootToXML :: TableFoot -> [Content]
+tableFootToXML (TableFoot attr rows) = asBlockOfBlocks $ elementWithAttrAndContents "TableFoot" attr $ concatMap rowToXML rows
+
+rowToXML :: Row -> [Content]
+rowToXML (Row attr cells) = asBlockOfBlocks $ elementWithAttrAndContents "Row" attr $ concatMap cellToXML cells
+
+cellToXML :: Cell -> [Content]
+cellToXML (Cell (idn, cls, attrs) alignment (RowSpan rowspan) (ColSpan colspan) blocks) = asBlockOfBlocks $ elementWithAttrAndContents "Cell" attr $ blocksToXML blocks
+ where
+ with_alignment a = (atNameAlignment, itemName alignment) : a
+ with_rowspan a = if rowspan > 1 then (atNameRowspan, intAsText rowspan) : a else a
+ with_colspan a = if colspan > 1 then (atNameColspan, intAsText colspan) : a else a
+ attrs' = (with_colspan . with_rowspan . with_alignment) attrs
+ attr = (idn, cls, attrs')
diff --git a/src/Text/Pandoc/XMLFormat.hs b/src/Text/Pandoc/XMLFormat.hs
new file mode 100644
index 000000000..6801de90c
--- /dev/null
+++ b/src/Text/Pandoc/XMLFormat.hs
@@ -0,0 +1,188 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
+module Text.Pandoc.XMLFormat
+ ( atNameAlignment,
+ atNameApiVersion,
+ atNameCitationHash,
+ atNameCitationMode,
+ atNameCitationNoteNum,
+ atNameColspan,
+ atNameColWidth,
+ atNameFormat,
+ atNameImageUrl,
+ atNameLevel,
+ atNameLinkUrl,
+ atNameMathType,
+ atNameMetaBoolValue,
+ atNameMetaMapEntryKey,
+ atNameNumberDelim,
+ atNameNumberStyle,
+ atNameQuoteType,
+ atNameRowHeadColumns,
+ atNameRowspan,
+ atNameSpaceCount,
+ atNameStart,
+ atNameStrContent,
+ atNameTitle,
+ tgNameBodyBody,
+ tgNameBodyHeader,
+ tgNameCitations,
+ tgNameCitationPrefix,
+ tgNameCitationSuffix,
+ tgNameColspecs,
+ tgNameDefListDef,
+ tgNameDefListItem,
+ tgNameDefListTerm,
+ tgNameLineItem,
+ tgNameListItem,
+ tgNameMetaMapEntry,
+ tgNameShortCaption,
+ )
+where
+
+import Data.Text (Text)
+
+-- the attribute carrying the API version of pandoc types in the main Pandoc element
+atNameApiVersion :: Text
+atNameApiVersion = "api-version"
+
+-- the element of a <meta> or <MetaMap> entry
+tgNameMetaMapEntry :: Text
+tgNameMetaMapEntry = "entry"
+
+-- the attribute carrying the key name of a <meta> or <MetaMap> entry
+atNameMetaMapEntryKey :: Text
+atNameMetaMapEntryKey = "key"
+
+-- the attribute carrying the boolean value ("true" or "false") of a MetaBool
+atNameMetaBoolValue :: Text
+atNameMetaBoolValue = "value"
+
+-- level of a Header
+atNameLevel :: Text
+atNameLevel = "level"
+
+-- start number of an OrderedList
+atNameStart :: Text
+atNameStart = "start"
+
+-- number delimiter of an OrderedList
+atNameNumberDelim :: Text
+atNameNumberDelim = "number-delim"
+
+-- number style of an OrderedList
+atNameNumberStyle :: Text
+atNameNumberStyle = "number-style"
+
+-- target title in Image and Link
+atNameTitle :: Text
+atNameTitle = "title"
+
+-- target url in Image
+atNameImageUrl :: Text
+atNameImageUrl = "src"
+
+-- target url in Link
+atNameLinkUrl :: Text
+atNameLinkUrl = "href"
+
+-- QuoteType of a Quoted
+atNameQuoteType :: Text
+atNameQuoteType = "quote-type"
+
+-- MathType of a Math
+atNameMathType :: Text
+atNameMathType = "math-type"
+
+-- format of a RawInline or a RawBlock
+atNameFormat :: Text
+atNameFormat = "format"
+
+-- alignment attribute in a ColSpec or in a Cell
+atNameAlignment :: Text
+atNameAlignment = "alignment"
+
+-- ColWidth attribute in a ColSpec
+atNameColWidth :: Text
+atNameColWidth = "col-width"
+
+-- RowHeadColumns attribute in a TableBody
+atNameRowHeadColumns :: Text
+atNameRowHeadColumns = "row-head-columns"
+
+-- RowSpan attribute in a Cell
+atNameRowspan :: Text
+atNameRowspan = "row-span"
+
+-- ColSpan attribute in a Cell
+atNameColspan :: Text
+atNameColspan = "col-span"
+
+-- the citationMode of a Citation
+atNameCitationMode :: Text
+atNameCitationMode = "mode"
+
+-- the citationHash of a Citation
+atNameCitationHash :: Text
+atNameCitationHash = "hash"
+
+-- the citationNoteNum of a Citation
+atNameCitationNoteNum :: Text
+atNameCitationNoteNum = "note-num"
+
+-- the number of consecutive spaces of the <Space> element
+atNameSpaceCount :: Text
+atNameSpaceCount = "count"
+
+-- the content of the <Str> element
+atNameStrContent :: Text
+atNameStrContent = "content"
+
+-- container of Citation elements in Cite inlines
+tgNameCitations :: Text
+tgNameCitations = "citations"
+
+-- element around the prefix inlines of a Citation
+tgNameCitationPrefix :: Text
+tgNameCitationPrefix = "prefix"
+
+-- element around the suffix inlines of a Citation
+tgNameCitationSuffix :: Text
+tgNameCitationSuffix = "suffix"
+
+-- list item for BulletList and OrderedList
+tgNameListItem :: Text
+tgNameListItem = "item"
+
+-- list item for DefinitionList
+tgNameDefListItem :: Text
+tgNameDefListItem = "item"
+
+-- element around the inlines of the term of a DefinitionList item
+tgNameDefListTerm :: Text
+tgNameDefListTerm = "term"
+
+-- element around the blocks of a definition in a DefinitionList item
+tgNameDefListDef :: Text
+tgNameDefListDef = "def"
+
+-- optional element of the ShortCaption
+tgNameShortCaption :: Text
+tgNameShortCaption = "ShortCaption"
+
+-- element around the ColSpec of a Table
+tgNameColspecs :: Text
+tgNameColspecs = "colspecs"
+
+-- element around the header rows of a TableBody
+tgNameBodyHeader :: Text
+tgNameBodyHeader = "header"
+
+-- element around the body rows of a TableBody
+tgNameBodyBody :: Text
+tgNameBodyBody = "body"
+
+-- element around the inlines of a line in a LineBlock
+tgNameLineItem :: Text
+tgNameLineItem = "line"