aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Writers/XML.hs
blob: 35bcb07f89010ddb4d0a27cadec55566b2a4db1a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}

-- |
--   Module      : Text.Pandoc.Writers.XML
--   Copyright   : Copyright (C) 2025- Massimiliano Farinella and John MacFarlane
--   License     : GNU GPL, version 2 or above
--
--   Maintainer  : Massimiliano Farinella <[email protected]>
--   Stability   : WIP
--   Portability : portable
--
-- Conversion of 'Pandoc' documents to (pandoc specific) xml markup.
module Text.Pandoc.Writers.XML (writeXML) where

import Data.Map (Map, toList)
import Data.Maybe (mapMaybe)
import qualified Data.Text as T
import Data.Version (versionBranch)
import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Options (WriterOptions (..))
import Text.Pandoc.XML.Light
import qualified Text.Pandoc.XML.Light as XML
import Text.Pandoc.XMLFormat
import Text.XML.Light (xml_header)

type PandocAttr = Text.Pandoc.Definition.Attr

writeXML :: (PandocMonad m) => WriterOptions -> Pandoc -> m T.Text
writeXML _ doc = do
  return $ pandocToXmlText doc

text_node :: T.Text -> Content
text_node text = Text (CData CDataText text Nothing)

emptyElement :: T.Text -> Element
emptyElement tag =
  Element
    { elName = unqual tag,
      elAttribs = [],
      elContent = [],
      elLine = Nothing
    }

elementWithContents :: T.Text -> [Content] -> Element
elementWithContents tag contents =
  Element
    { elName = unqual tag,
      elAttribs = [],
      elContent = contents,
      elLine = Nothing
    }

elementWithAttributes :: T.Text -> [XML.Attr] -> Element
elementWithAttributes tag attributes =
  Element
    { elName = unqual tag,
      elAttribs = attributes,
      elContent = [],
      elLine = Nothing
    }

elementWithAttrAndContents :: T.Text -> PandocAttr -> [Content] -> Element
elementWithAttrAndContents tag attr contents = addAttrAttributes attr $ elementWithContents tag contents

asBlockOfInlines :: Element -> [Content]
asBlockOfInlines el = [Elem el, text_node "\n"]

asBlockOfBlocks :: Element -> [Content]
asBlockOfBlocks el = [Elem newline_before_first, newline]
  where
    newline = text_node "\n"
    newline_before_first = if null (elContent el) then el else prependContents [newline] el

itemName :: (Show a) => a -> T.Text
itemName a = T.pack $ takeWhile (/= ' ') (show a)

intAsText :: Int -> T.Text
intAsText i = T.pack $ show i

itemAsEmptyElement :: (Show a) => a -> Element
itemAsEmptyElement item = emptyElement $ itemName item

pandocToXmlText :: Pandoc -> T.Text
pandocToXmlText (Pandoc (Meta meta) blocks) = with_header . with_blocks . with_meta . with_version $ el
  where
    el = prependContents [text_node "\n"] $ emptyElement "Pandoc"
    with_version = addAttribute atNameApiVersion (T.intercalate "," $ map (T.pack . show) $ versionBranch pandocTypesVersion)
    with_meta = appendContents (metaMapToXML meta "meta")
    with_blocks = appendContents (asBlockOfBlocks $ elementWithContents "blocks" $ blocksToXML blocks)
    with_header :: Element -> T.Text
    with_header e = T.concat [T.pack xml_header, "\n", showElement e]

metaMapToXML :: Map T.Text MetaValue -> T.Text -> [Content]
metaMapToXML mmap tag = asBlockOfBlocks $ elementWithContents tag entries
  where
    entries = concatMap to_entry $ toList mmap
    to_entry :: (T.Text, MetaValue) -> [Content]
    to_entry (text, metavalue) = asBlockOfBlocks with_key
      where
        entry = elementWithContents tgNameMetaMapEntry $ metaValueToXML metavalue
        with_key = addAttribute atNameMetaMapEntryKey text entry

metaValueToXML :: MetaValue -> [Content]
metaValueToXML value =
  let name = itemName value
      el = itemAsEmptyElement value
   in case (value) of
        MetaBool b -> asBlockOfInlines $ addAttribute atNameMetaBoolValue bool_value el
          where
            bool_value = if b then "true" else "false"
        MetaString s -> asBlockOfInlines $ appendContents [text_node s] el
        MetaInlines inlines -> asBlockOfInlines $ appendContents (inlinesToXML inlines) el
        MetaBlocks blocks -> asBlockOfBlocks $ appendContents (blocksToXML blocks) el
        MetaList items -> asBlockOfBlocks $ appendContents (concatMap metaValueToXML items) el
        MetaMap mm -> metaMapToXML mm name

blocksToXML :: [Block] -> [Content]
blocksToXML blocks = concatMap blockToXML blocks

inlinesToXML :: [Inline] -> [Content]
inlinesToXML inlines = concatMap inlineContentToContents (ilsToIlsContent inlines [])

data InlineContent
  = NormalInline Inline
  | ElSpace Int
  | ElStr T.Text

ilsToIlsContent :: [Inline] -> [InlineContent] -> [InlineContent]
ilsToIlsContent (Space : xs) [] = ilsToIlsContent xs [ElSpace 1]
ilsToIlsContent (Space : xs) (NormalInline Space : cs) = ilsToIlsContent xs (ElSpace 2 : cs)
ilsToIlsContent (Space : xs) (ElSpace n : cs) = ilsToIlsContent xs (ElSpace (n + 1) : cs)
-- empty Str are always encoded as <Str />
ilsToIlsContent (Str "" : xs) ilct = ilsToIlsContent xs (ElStr "" : ilct)
-- Str s1, Str s2 -> s1<Str content="s2">
ilsToIlsContent (Str s2 : xs) (NormalInline str1@(Str _) : ilct) = ilsToIlsContent xs (ElStr s2 : NormalInline str1 : ilct)
--
ilsToIlsContent (Str s : xs) ilct =
  if T.any (== ' ') s
    then ilsToIlsContent xs (ElStr s : ilct)
    else ilsToIlsContent xs (NormalInline (Str s) : ilct)
ilsToIlsContent (x : xs) ilct = ilsToIlsContent xs (NormalInline x : ilct)
ilsToIlsContent [] ilct = reverse $ lastSpaceAsElem ilct
  where
    lastSpaceAsElem :: [InlineContent] -> [InlineContent]
    lastSpaceAsElem (NormalInline Space : xs) = ElSpace 1 : xs
    lastSpaceAsElem ilcts = ilcts

inlineContentToContents :: InlineContent -> [Content]
inlineContentToContents (NormalInline il) = inlineToXML il
inlineContentToContents (ElSpace 1) = [Elem $ emptyElement "Space"]
inlineContentToContents (ElSpace n) = [Elem $ addAttribute atNameSpaceCount (intAsText n) (emptyElement "Space")]
inlineContentToContents (ElStr "") = [Elem $ emptyElement "Str"]
inlineContentToContents (ElStr s) = [Elem $ addAttribute atNameStrContent s (emptyElement "Str")]

asContents :: Element -> [Content]
asContents el = [Elem el]

wrapBlocks :: T.Text -> [Block] -> [Content]
wrapBlocks tag blocks = asBlockOfBlocks $ elementWithContents tag $ blocksToXML blocks

wrapArrayOfBlocks :: T.Text -> [[Block]] -> [Content]
wrapArrayOfBlocks tag array = concatMap (wrapBlocks tag) array

-- wrapInlines :: T.Text -> [Inline] -> [Content]
-- wrapInlines tag inlines = asBlockOfInlines $ element_with_contents tag $ inlinesToXML inlines

blockToXML :: Block -> [Content]
blockToXML block =
  let el = itemAsEmptyElement block
   in case (block) of
        Para inlines -> asBlockOfInlines $ appendContents (inlinesToXML inlines) el
        Header level (idn, cls, attrs) inlines -> asBlockOfInlines $ appendContents (inlinesToXML inlines) with_attr
          where
            with_attr = addAttrAttributes (idn, cls, attrs ++ [(atNameLevel, intAsText level)]) el
        Plain inlines -> asBlockOfInlines $ appendContents (inlinesToXML inlines) el
        Div attr blocks -> asBlockOfBlocks $ appendContents (blocksToXML blocks) with_attr
          where
            with_attr = addAttrAttributes attr el
        BulletList items -> asBlockOfBlocks $ appendContents (wrapArrayOfBlocks tgNameListItem items) el
        OrderedList (start, style, delim) items -> asBlockOfBlocks $ with_contents . with_attrs $ el
          where
            with_attrs =
              addAttributes
                ( validAttributes
                    [ (atNameStart, intAsText start),
                      (atNameNumberStyle, itemName style),
                      (atNameNumberDelim, itemName delim)
                    ]
                )
            with_contents = appendContents (wrapArrayOfBlocks tgNameListItem items)
        BlockQuote blocks -> asBlockOfBlocks $ appendContents (blocksToXML blocks) el
        HorizontalRule -> asBlockOfInlines el
        CodeBlock attr text -> asBlockOfInlines $ with_contents . with_attr $ el
          where
            with_contents = appendContents [text_node text]
            with_attr = addAttrAttributes attr
        LineBlock lins -> asBlockOfBlocks $ appendContents (concatMap wrapInlines lins) el
          where
            wrapInlines inlines = asContents $ appendContents (inlinesToXML inlines) $ emptyElement tgNameLineItem
        Table attr caption colspecs thead tbodies tfoot -> asBlockOfBlocks $ with_foot . with_bodies . with_head . with_colspecs . with_caption . with_attr $ el
          where
            with_attr = addAttrAttributes attr
            with_caption = appendContents (captionToXML caption)
            with_colspecs = appendContents (colSpecsToXML colspecs)
            with_head = appendContents (tableHeadToXML thead)
            with_bodies = appendContents (concatMap tableBodyToXML tbodies)
            with_foot = appendContents (tableFootToXML tfoot)
        Figure attr caption blocks -> asBlockOfBlocks $ with_contents . with_caption . with_attr $ el
          where
            with_attr = addAttrAttributes attr
            with_caption = appendContents (captionToXML caption)
            with_contents = appendContents (blocksToXML blocks)
        RawBlock (Format format) text -> asContents $ appendContents [text_node text] raw
          where
            raw = addAttribute atNameFormat format el
        DefinitionList items -> asBlockOfBlocks $ appendContents (map definitionListItemToXML items) el

inlineToXML :: Inline -> [Content]
inlineToXML inline =
  let el = itemAsEmptyElement inline
      wrapInlines inlines = asContents $ appendContents (inlinesToXML inlines) el
   in case (inline) of
        Space -> [text_node " "]
        Str s -> [text_node s]
        Emph inlines -> wrapInlines inlines
        Strong inlines -> wrapInlines inlines
        Quoted quote_type inlines -> asContents $ appendContents (inlinesToXML inlines) quoted
          where
            quoted = addAttribute atNameQuoteType (itemName quote_type) el
        Underline inlines -> wrapInlines inlines
        Strikeout inlines -> wrapInlines inlines
        SmallCaps inlines -> wrapInlines inlines
        Superscript inlines -> wrapInlines inlines
        Subscript inlines -> wrapInlines inlines
        SoftBreak -> asContents el
        LineBreak -> asContents el
        Span attr inlines -> asContents $ appendContents (inlinesToXML inlines) with_attr
          where
            with_attr = addAttrAttributes attr el
        Link (idn, cls, attrs) inlines (url, title) -> asContents $ appendContents (inlinesToXML inlines) with_attr
          where
            with_attr = addAttrAttributes (idn, cls, attrs ++ [(atNameLinkUrl, url), (atNameTitle, title)]) el
        Image (idn, cls, attrs) inlines (url, title) -> asContents $ appendContents (inlinesToXML inlines) with_attr
          where
            with_attr = addAttrAttributes (idn, cls, attrs ++ [(atNameImageUrl, url), (atNameTitle, title)]) el
        RawInline (Format format) text -> asContents $ appendContents [text_node text] raw
          where
            raw = addAttribute atNameFormat format el
        Math math_type text -> asContents $ appendContents [text_node text] math
          where
            math = addAttribute atNameMathType (itemName math_type) el
        Code attr text -> asContents $ appendContents [text_node text] with_attr
          where
            with_attr = addAttrAttributes attr el
        Note blocks -> asContents $ appendContents (blocksToXML blocks) el
        Cite citations inlines -> asContents $ appendContents (inlinesToXML inlines) with_citations
          where
            with_citations = addCitations citations el

-- TODO: don't let an attribute overwrite id or class
maybeAttribute :: (T.Text, T.Text) -> Maybe XML.Attr
maybeAttribute (_, "") = Nothing
maybeAttribute ("", _) = Nothing
maybeAttribute (name, value) = Just $ XML.Attr (unqual name) value

validAttributes :: [(T.Text, T.Text)] -> [XML.Attr]
validAttributes pairs = mapMaybe maybeAttribute pairs

appendContents :: [Content] -> Element -> Element
appendContents newContents el = el {elContent = (elContent el) ++ newContents}

prependContents :: [Content] -> Element -> Element
prependContents newContents el = el {elContent = newContents ++ (elContent el)}

addAttributes :: [XML.Attr] -> Element -> Element
addAttributes newAttrs el = el {elAttribs = newAttrs ++ elAttribs el}

addAttribute :: T.Text -> T.Text -> Element -> Element
addAttribute attr_name attr_value el = el {elAttribs = new_attr : elAttribs el}
  where
    new_attr = XML.Attr (unqual attr_name) attr_value

addAttrAttributes :: PandocAttr -> Element -> Element
addAttrAttributes (identifier, classes, attributes) el = addAttributes attrs' el
  where
    attrs' = mapMaybe maybeAttribute (("id", identifier) : ("class", T.intercalate " " classes) : attributes)

addCitations :: [Citation] -> Element -> Element
addCitations citations el = appendContents [Elem $ elementWithContents tgNameCitations $ (text_node "\n") : concatMap citation_to_elem citations] el
  where
    citation_to_elem :: Citation -> [Content]
    citation_to_elem citation = asBlockOfInlines with_suffix
      where
        cit_elem = elementWithAttributes (itemName citation) attrs
        prefix = citationPrefix citation
        suffix = citationSuffix citation
        with_prefix =
          if null prefix
            then cit_elem
            else appendContents [Elem $ elementWithContents tgNameCitationPrefix $ inlinesToXML prefix] cit_elem
        with_suffix =
          if null suffix
            then with_prefix
            else appendContents [Elem $ elementWithContents tgNameCitationSuffix $ inlinesToXML suffix] with_prefix
        attrs =
          map
            (\(n, v) -> XML.Attr (unqual n) v)
            [ ("id", citationId citation),
              (atNameCitationMode, T.pack $ show $ citationMode citation),
              (atNameCitationNoteNum, intAsText $ citationNoteNum citation),
              (atNameCitationHash, intAsText $ citationHash citation)
            ]

definitionListItemToXML :: ([Inline], [[Block]]) -> Content
definitionListItemToXML (inlines, defs) = Elem $ elementWithContents tgNameDefListItem $ term ++ wrapArrayOfBlocks tgNameDefListDef defs
  where
    term = asBlockOfInlines $ appendContents (inlinesToXML inlines) $ emptyElement tgNameDefListTerm

captionToXML :: Caption -> [Content]
captionToXML (Caption short blocks) = asBlockOfBlocks with_short_caption
  where
    el = elementWithContents "Caption" $ blocksToXML blocks
    with_short_caption = case (short) of
      Just inlines -> prependContents (asBlockOfInlines $ elementWithContents tgNameShortCaption $ inlinesToXML inlines) el
      _ -> el

colSpecToXML :: (Alignment, ColWidth) -> [Content]
colSpecToXML (align, cw) = asBlockOfInlines colspec
  where
    colspec = elementWithAttributes "ColSpec" $ validAttributes [(atNameAlignment, itemName align), (atNameColWidth, colwidth)]
    colwidth = case (cw) of
      ColWidth d -> T.pack $ show d
      ColWidthDefault -> "0"

colSpecsToXML :: [(Alignment, ColWidth)] -> [Content]
colSpecsToXML colspecs = asBlockOfBlocks $ elementWithContents tgNameColspecs $ concatMap colSpecToXML colspecs

tableHeadToXML :: TableHead -> [Content]
tableHeadToXML (TableHead attr rows) = asBlockOfBlocks $ elementWithAttrAndContents "TableHead" attr $ concatMap rowToXML rows

tableBodyToXML :: TableBody -> [Content]
tableBodyToXML (TableBody (idn, cls, attrs) (RowHeadColumns headcols) hrows brows) = asBlockOfBlocks $ elementWithAttrAndContents "TableBody" attr children
  where
    attr = (idn, cls, (atNameRowHeadColumns, intAsText headcols) : attrs)
    header_rows = asBlockOfBlocks $ elementWithContents tgNameBodyHeader $ concatMap rowToXML hrows
    body_rows = asBlockOfBlocks $ elementWithContents tgNameBodyBody $ concatMap rowToXML brows
    children = header_rows ++ body_rows

tableFootToXML :: TableFoot -> [Content]
tableFootToXML (TableFoot attr rows) = asBlockOfBlocks $ elementWithAttrAndContents "TableFoot" attr $ concatMap rowToXML rows

rowToXML :: Row -> [Content]
rowToXML (Row attr cells) = asBlockOfBlocks $ elementWithAttrAndContents "Row" attr $ concatMap cellToXML cells

cellToXML :: Cell -> [Content]
cellToXML (Cell (idn, cls, attrs) alignment (RowSpan rowspan) (ColSpan colspan) blocks) = asBlockOfBlocks $ elementWithAttrAndContents "Cell" attr $ blocksToXML blocks
  where
    with_alignment a = (atNameAlignment, itemName alignment) : a
    with_rowspan a = if rowspan > 1 then (atNameRowspan, intAsText rowspan) : a else a
    with_colspan a = if colspan > 1 then (atNameColspan, intAsText colspan) : a else a
    attrs' = (with_colspan . with_rowspan . with_alignment) attrs
    attr = (idn, cls, attrs')