1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
|
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
-- |
-- Module : Text.Pandoc.Writers.XML
-- Copyright : Copyright (C) 2025- Massimiliano Farinella and John MacFarlane
-- License : GNU GPL, version 2 or above
--
-- Maintainer : Massimiliano Farinella <[email protected]>
-- Stability : WIP
-- Portability : portable
--
-- Conversion of 'Pandoc' documents to (pandoc specific) xml markup.
module Text.Pandoc.Writers.XML (writeXML) where
import Data.Map (Map, toList)
import Data.Maybe (mapMaybe)
import qualified Data.Text as T
import Data.Version (versionBranch)
import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Options (WriterOptions (..))
import Text.Pandoc.XML.Light
import qualified Text.Pandoc.XML.Light as XML
import Text.Pandoc.XMLFormat
import Text.XML.Light (xml_header)
type PandocAttr = Text.Pandoc.Definition.Attr
writeXML :: (PandocMonad m) => WriterOptions -> Pandoc -> m T.Text
writeXML _ doc = do
return $ pandocToXmlText doc
text_node :: T.Text -> Content
text_node text = Text (CData CDataText text Nothing)
emptyElement :: T.Text -> Element
emptyElement tag =
Element
{ elName = unqual tag,
elAttribs = [],
elContent = [],
elLine = Nothing
}
elementWithContents :: T.Text -> [Content] -> Element
elementWithContents tag contents =
Element
{ elName = unqual tag,
elAttribs = [],
elContent = contents,
elLine = Nothing
}
elementWithAttributes :: T.Text -> [XML.Attr] -> Element
elementWithAttributes tag attributes =
Element
{ elName = unqual tag,
elAttribs = attributes,
elContent = [],
elLine = Nothing
}
elementWithAttrAndContents :: T.Text -> PandocAttr -> [Content] -> Element
elementWithAttrAndContents tag attr contents = addAttrAttributes attr $ elementWithContents tag contents
asBlockOfInlines :: Element -> [Content]
asBlockOfInlines el = [Elem el, text_node "\n"]
asBlockOfBlocks :: Element -> [Content]
asBlockOfBlocks el = [Elem newline_before_first, newline]
where
newline = text_node "\n"
newline_before_first = if null (elContent el) then el else prependContents [newline] el
itemName :: (Show a) => a -> T.Text
itemName a = T.pack $ takeWhile (/= ' ') (show a)
intAsText :: Int -> T.Text
intAsText i = T.pack $ show i
itemAsEmptyElement :: (Show a) => a -> Element
itemAsEmptyElement item = emptyElement $ itemName item
pandocToXmlText :: Pandoc -> T.Text
pandocToXmlText (Pandoc (Meta meta) blocks) = with_header . with_blocks . with_meta . with_version $ el
where
el = prependContents [text_node "\n"] $ emptyElement "Pandoc"
with_version = addAttribute atNameApiVersion (T.intercalate "," $ map (T.pack . show) $ versionBranch pandocTypesVersion)
with_meta = appendContents (metaMapToXML meta "meta")
with_blocks = appendContents (asBlockOfBlocks $ elementWithContents "blocks" $ blocksToXML blocks)
with_header :: Element -> T.Text
with_header e = T.concat [T.pack xml_header, "\n", showElement e]
metaMapToXML :: Map T.Text MetaValue -> T.Text -> [Content]
metaMapToXML mmap tag = asBlockOfBlocks $ elementWithContents tag entries
where
entries = concatMap to_entry $ toList mmap
to_entry :: (T.Text, MetaValue) -> [Content]
to_entry (text, metavalue) = asBlockOfBlocks with_key
where
entry = elementWithContents tgNameMetaMapEntry $ metaValueToXML metavalue
with_key = addAttribute atNameMetaMapEntryKey text entry
metaValueToXML :: MetaValue -> [Content]
metaValueToXML value =
let name = itemName value
el = itemAsEmptyElement value
in case (value) of
MetaBool b -> asBlockOfInlines $ addAttribute atNameMetaBoolValue bool_value el
where
bool_value = if b then "true" else "false"
MetaString s -> asBlockOfInlines $ appendContents [text_node s] el
MetaInlines inlines -> asBlockOfInlines $ appendContents (inlinesToXML inlines) el
MetaBlocks blocks -> asBlockOfBlocks $ appendContents (blocksToXML blocks) el
MetaList items -> asBlockOfBlocks $ appendContents (concatMap metaValueToXML items) el
MetaMap mm -> metaMapToXML mm name
blocksToXML :: [Block] -> [Content]
blocksToXML blocks = concatMap blockToXML blocks
inlinesToXML :: [Inline] -> [Content]
inlinesToXML inlines = concatMap inlineContentToContents (ilsToIlsContent inlines [])
data InlineContent
= NormalInline Inline
| ElSpace Int
| ElStr T.Text
ilsToIlsContent :: [Inline] -> [InlineContent] -> [InlineContent]
ilsToIlsContent (Space : xs) [] = ilsToIlsContent xs [ElSpace 1]
ilsToIlsContent (Space : xs) (NormalInline Space : cs) = ilsToIlsContent xs (ElSpace 2 : cs)
ilsToIlsContent (Space : xs) (ElSpace n : cs) = ilsToIlsContent xs (ElSpace (n + 1) : cs)
-- empty Str are always encoded as <Str />
ilsToIlsContent (Str "" : xs) ilct = ilsToIlsContent xs (ElStr "" : ilct)
-- Str s1, Str s2 -> s1<Str content="s2">
ilsToIlsContent (Str s2 : xs) (NormalInline str1@(Str _) : ilct) = ilsToIlsContent xs (ElStr s2 : NormalInline str1 : ilct)
--
ilsToIlsContent (Str s : xs) ilct =
if T.any (== ' ') s
then ilsToIlsContent xs (ElStr s : ilct)
else ilsToIlsContent xs (NormalInline (Str s) : ilct)
ilsToIlsContent (x : xs) ilct = ilsToIlsContent xs (NormalInline x : ilct)
ilsToIlsContent [] ilct = reverse $ lastSpaceAsElem ilct
where
lastSpaceAsElem :: [InlineContent] -> [InlineContent]
lastSpaceAsElem (NormalInline Space : xs) = ElSpace 1 : xs
lastSpaceAsElem ilcts = ilcts
inlineContentToContents :: InlineContent -> [Content]
inlineContentToContents (NormalInline il) = inlineToXML il
inlineContentToContents (ElSpace 1) = [Elem $ emptyElement "Space"]
inlineContentToContents (ElSpace n) = [Elem $ addAttribute atNameSpaceCount (intAsText n) (emptyElement "Space")]
inlineContentToContents (ElStr "") = [Elem $ emptyElement "Str"]
inlineContentToContents (ElStr s) = [Elem $ addAttribute atNameStrContent s (emptyElement "Str")]
asContents :: Element -> [Content]
asContents el = [Elem el]
wrapBlocks :: T.Text -> [Block] -> [Content]
wrapBlocks tag blocks = asBlockOfBlocks $ elementWithContents tag $ blocksToXML blocks
wrapArrayOfBlocks :: T.Text -> [[Block]] -> [Content]
wrapArrayOfBlocks tag array = concatMap (wrapBlocks tag) array
-- wrapInlines :: T.Text -> [Inline] -> [Content]
-- wrapInlines tag inlines = asBlockOfInlines $ element_with_contents tag $ inlinesToXML inlines
blockToXML :: Block -> [Content]
blockToXML block =
let el = itemAsEmptyElement block
in case (block) of
Para inlines -> asBlockOfInlines $ appendContents (inlinesToXML inlines) el
Header level (idn, cls, attrs) inlines -> asBlockOfInlines $ appendContents (inlinesToXML inlines) with_attr
where
with_attr = addAttrAttributes (idn, cls, attrs ++ [(atNameLevel, intAsText level)]) el
Plain inlines -> asBlockOfInlines $ appendContents (inlinesToXML inlines) el
Div attr blocks -> asBlockOfBlocks $ appendContents (blocksToXML blocks) with_attr
where
with_attr = addAttrAttributes attr el
BulletList items -> asBlockOfBlocks $ appendContents (wrapArrayOfBlocks tgNameListItem items) el
OrderedList (start, style, delim) items -> asBlockOfBlocks $ with_contents . with_attrs $ el
where
with_attrs =
addAttributes
( validAttributes
[ (atNameStart, intAsText start),
(atNameNumberStyle, itemName style),
(atNameNumberDelim, itemName delim)
]
)
with_contents = appendContents (wrapArrayOfBlocks tgNameListItem items)
BlockQuote blocks -> asBlockOfBlocks $ appendContents (blocksToXML blocks) el
HorizontalRule -> asBlockOfInlines el
CodeBlock attr text -> asBlockOfInlines $ with_contents . with_attr $ el
where
with_contents = appendContents [text_node text]
with_attr = addAttrAttributes attr
LineBlock lins -> asBlockOfBlocks $ appendContents (concatMap wrapInlines lins) el
where
wrapInlines inlines = asContents $ appendContents (inlinesToXML inlines) $ emptyElement tgNameLineItem
Table attr caption colspecs thead tbodies tfoot -> asBlockOfBlocks $ with_foot . with_bodies . with_head . with_colspecs . with_caption . with_attr $ el
where
with_attr = addAttrAttributes attr
with_caption = appendContents (captionToXML caption)
with_colspecs = appendContents (colSpecsToXML colspecs)
with_head = appendContents (tableHeadToXML thead)
with_bodies = appendContents (concatMap tableBodyToXML tbodies)
with_foot = appendContents (tableFootToXML tfoot)
Figure attr caption blocks -> asBlockOfBlocks $ with_contents . with_caption . with_attr $ el
where
with_attr = addAttrAttributes attr
with_caption = appendContents (captionToXML caption)
with_contents = appendContents (blocksToXML blocks)
RawBlock (Format format) text -> asContents $ appendContents [text_node text] raw
where
raw = addAttribute atNameFormat format el
DefinitionList items -> asBlockOfBlocks $ appendContents (map definitionListItemToXML items) el
inlineToXML :: Inline -> [Content]
inlineToXML inline =
let el = itemAsEmptyElement inline
wrapInlines inlines = asContents $ appendContents (inlinesToXML inlines) el
in case (inline) of
Space -> [text_node " "]
Str s -> [text_node s]
Emph inlines -> wrapInlines inlines
Strong inlines -> wrapInlines inlines
Quoted quote_type inlines -> asContents $ appendContents (inlinesToXML inlines) quoted
where
quoted = addAttribute atNameQuoteType (itemName quote_type) el
Underline inlines -> wrapInlines inlines
Strikeout inlines -> wrapInlines inlines
SmallCaps inlines -> wrapInlines inlines
Superscript inlines -> wrapInlines inlines
Subscript inlines -> wrapInlines inlines
SoftBreak -> asContents el
LineBreak -> asContents el
Span attr inlines -> asContents $ appendContents (inlinesToXML inlines) with_attr
where
with_attr = addAttrAttributes attr el
Link (idn, cls, attrs) inlines (url, title) -> asContents $ appendContents (inlinesToXML inlines) with_attr
where
with_attr = addAttrAttributes (idn, cls, attrs ++ [(atNameLinkUrl, url), (atNameTitle, title)]) el
Image (idn, cls, attrs) inlines (url, title) -> asContents $ appendContents (inlinesToXML inlines) with_attr
where
with_attr = addAttrAttributes (idn, cls, attrs ++ [(atNameImageUrl, url), (atNameTitle, title)]) el
RawInline (Format format) text -> asContents $ appendContents [text_node text] raw
where
raw = addAttribute atNameFormat format el
Math math_type text -> asContents $ appendContents [text_node text] math
where
math = addAttribute atNameMathType (itemName math_type) el
Code attr text -> asContents $ appendContents [text_node text] with_attr
where
with_attr = addAttrAttributes attr el
Note blocks -> asContents $ appendContents (blocksToXML blocks) el
Cite citations inlines -> asContents $ appendContents (inlinesToXML inlines) with_citations
where
with_citations = addCitations citations el
-- TODO: don't let an attribute overwrite id or class
maybeAttribute :: (T.Text, T.Text) -> Maybe XML.Attr
maybeAttribute (_, "") = Nothing
maybeAttribute ("", _) = Nothing
maybeAttribute (name, value) = Just $ XML.Attr (unqual name) value
validAttributes :: [(T.Text, T.Text)] -> [XML.Attr]
validAttributes pairs = mapMaybe maybeAttribute pairs
appendContents :: [Content] -> Element -> Element
appendContents newContents el = el {elContent = (elContent el) ++ newContents}
prependContents :: [Content] -> Element -> Element
prependContents newContents el = el {elContent = newContents ++ (elContent el)}
addAttributes :: [XML.Attr] -> Element -> Element
addAttributes newAttrs el = el {elAttribs = newAttrs ++ elAttribs el}
addAttribute :: T.Text -> T.Text -> Element -> Element
addAttribute attr_name attr_value el = el {elAttribs = new_attr : elAttribs el}
where
new_attr = XML.Attr (unqual attr_name) attr_value
addAttrAttributes :: PandocAttr -> Element -> Element
addAttrAttributes (identifier, classes, attributes) el = addAttributes attrs' el
where
attrs' = mapMaybe maybeAttribute (("id", identifier) : ("class", T.intercalate " " classes) : attributes)
addCitations :: [Citation] -> Element -> Element
addCitations citations el = appendContents [Elem $ elementWithContents tgNameCitations $ (text_node "\n") : concatMap citation_to_elem citations] el
where
citation_to_elem :: Citation -> [Content]
citation_to_elem citation = asBlockOfInlines with_suffix
where
cit_elem = elementWithAttributes (itemName citation) attrs
prefix = citationPrefix citation
suffix = citationSuffix citation
with_prefix =
if null prefix
then cit_elem
else appendContents [Elem $ elementWithContents tgNameCitationPrefix $ inlinesToXML prefix] cit_elem
with_suffix =
if null suffix
then with_prefix
else appendContents [Elem $ elementWithContents tgNameCitationSuffix $ inlinesToXML suffix] with_prefix
attrs =
map
(\(n, v) -> XML.Attr (unqual n) v)
[ ("id", citationId citation),
(atNameCitationMode, T.pack $ show $ citationMode citation),
(atNameCitationNoteNum, intAsText $ citationNoteNum citation),
(atNameCitationHash, intAsText $ citationHash citation)
]
definitionListItemToXML :: ([Inline], [[Block]]) -> Content
definitionListItemToXML (inlines, defs) = Elem $ elementWithContents tgNameDefListItem $ term ++ wrapArrayOfBlocks tgNameDefListDef defs
where
term = asBlockOfInlines $ appendContents (inlinesToXML inlines) $ emptyElement tgNameDefListTerm
captionToXML :: Caption -> [Content]
captionToXML (Caption short blocks) = asBlockOfBlocks with_short_caption
where
el = elementWithContents "Caption" $ blocksToXML blocks
with_short_caption = case (short) of
Just inlines -> prependContents (asBlockOfInlines $ elementWithContents tgNameShortCaption $ inlinesToXML inlines) el
_ -> el
colSpecToXML :: (Alignment, ColWidth) -> [Content]
colSpecToXML (align, cw) = asBlockOfInlines colspec
where
colspec = elementWithAttributes "ColSpec" $ validAttributes [(atNameAlignment, itemName align), (atNameColWidth, colwidth)]
colwidth = case (cw) of
ColWidth d -> T.pack $ show d
ColWidthDefault -> "0"
colSpecsToXML :: [(Alignment, ColWidth)] -> [Content]
colSpecsToXML colspecs = asBlockOfBlocks $ elementWithContents tgNameColspecs $ concatMap colSpecToXML colspecs
tableHeadToXML :: TableHead -> [Content]
tableHeadToXML (TableHead attr rows) = asBlockOfBlocks $ elementWithAttrAndContents "TableHead" attr $ concatMap rowToXML rows
tableBodyToXML :: TableBody -> [Content]
tableBodyToXML (TableBody (idn, cls, attrs) (RowHeadColumns headcols) hrows brows) = asBlockOfBlocks $ elementWithAttrAndContents "TableBody" attr children
where
attr = (idn, cls, (atNameRowHeadColumns, intAsText headcols) : attrs)
header_rows = asBlockOfBlocks $ elementWithContents tgNameBodyHeader $ concatMap rowToXML hrows
body_rows = asBlockOfBlocks $ elementWithContents tgNameBodyBody $ concatMap rowToXML brows
children = header_rows ++ body_rows
tableFootToXML :: TableFoot -> [Content]
tableFootToXML (TableFoot attr rows) = asBlockOfBlocks $ elementWithAttrAndContents "TableFoot" attr $ concatMap rowToXML rows
rowToXML :: Row -> [Content]
rowToXML (Row attr cells) = asBlockOfBlocks $ elementWithAttrAndContents "Row" attr $ concatMap cellToXML cells
cellToXML :: Cell -> [Content]
cellToXML (Cell (idn, cls, attrs) alignment (RowSpan rowspan) (ColSpan colspan) blocks) = asBlockOfBlocks $ elementWithAttrAndContents "Cell" attr $ blocksToXML blocks
where
with_alignment a = (atNameAlignment, itemName alignment) : a
with_rowspan a = if rowspan > 1 then (atNameRowspan, intAsText rowspan) : a else a
with_colspan a = if colspan > 1 then (atNameColspan, intAsText colspan) : a else a
attrs' = (with_colspan . with_rowspan . with_alignment) attrs
attr = (idn, cls, attrs')
|