aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Format.hs
blob: 3dfc0ce37e6fb15a33741ca895cef5256546e4e2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
{-# LANGUAGE FlexibleContexts  #-}
{-# LANGUAGE OverloadedStrings #-}
{- |
   Module      : Text.Pandoc.Format
   Copyright   : © 2022-2024 Albert Krewinkel
   License     : GPL-2.0-or-later
   Maintainer  : Albert Krewinkel <[email protected]>

Handling of format specifiers for input and output.
-}
module Text.Pandoc.Format
  ( FlavoredFormat (..)
  , ExtensionsConfig (..)
  , ExtensionsDiff (..)
  , diffExtensions
  , parseFlavoredFormat
  , applyExtensionsDiff
  , getExtensionsConfig
  , formatFromFilePaths
  ) where

import Control.Monad.Except (throwError)
import Data.Char (toLower)
import Data.Foldable (asum)
import Data.List (foldl')
import System.FilePath (splitExtension, takeExtension)
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Error (PandocError (..))
import Text.Pandoc.Extensions
  ( Extension (Ext_literate_haskell)
  , Extensions
  , disableExtensions
  , enableExtension
  , extensionsFromList
  , extensionsToList
  , getAllExtensions
  , getDefaultExtensions
  , showExtension
  , readExtension
  )
import Network.URI (URI (..), parseURI)
import Text.Pandoc.Parsing
import qualified Data.Text as T

-- | Format specifier with the format's name and the lists of extensions
-- to be enabled or disabled.
data FlavoredFormat = FlavoredFormat
  { formatName     :: T.Text
  , formatExtsDiff :: ExtensionsDiff
  } deriving (Show)

-- | Changes to a set of extensions, i.e., list of extensions to be
-- enabled or disabled.
data ExtensionsDiff = ExtensionsDiff
  { extsToEnable  :: Extensions
  , extsToDisable :: Extensions
  } deriving (Show)

instance Semigroup ExtensionsDiff where
  ExtensionsDiff enA disA <> ExtensionsDiff enB disB =
    ExtensionsDiff
    ((enA `disableExtensions` disB) <> enB)
    ((disA `disableExtensions` enB) <> disB)

instance Monoid ExtensionsDiff where
  mempty = ExtensionsDiff mempty mempty
  mappend = (<>)

-- | Calculate the change set to get from one set of extensions to
-- another.
diffExtensions :: Extensions -> Extensions -> ExtensionsDiff
diffExtensions def actual = ExtensionsDiff
  { extsToEnable = actual `disableExtensions` def
  , extsToDisable = def `disableExtensions` actual
  }

-- | Describes the properties of a format.
data ExtensionsConfig = ExtensionsConfig
  { extsDefault   :: Extensions -- ^ Extensions enabled by default
  , extsSupported :: Extensions -- ^ Extensions that can be enabled or disabled.
  } deriving (Show)

-- | Returns the extensions configuration of a format.
getExtensionsConfig :: T.Text -> ExtensionsConfig
getExtensionsConfig fmt = ExtensionsConfig
  { extsDefault = getDefaultExtensions fmt
  , extsSupported = getAllExtensions fmt
  }

instance Semigroup ExtensionsConfig where
  ExtensionsConfig x1 y1 <> ExtensionsConfig x2 y2 =
    ExtensionsConfig (x1 <> x2) (y1 <> y2)

instance Monoid ExtensionsConfig where
  mappend = (<>)
  mempty = ExtensionsConfig mempty mempty

-- | Apply the extension changes in the format spec to the extensions
-- given in the format's extensions configuration. Throws an error in
-- case of an unknown or unsupported extension.
applyExtensionsDiff :: PandocMonad m
                    => ExtensionsConfig
                    -> FlavoredFormat
                    -> m Extensions
applyExtensionsDiff extConf (FlavoredFormat fname extsDiff) = do
  let extsInDiff  = extsToEnable extsDiff <> extsToDisable extsDiff
  let unsupported = extsInDiff `disableExtensions` (extsSupported extConf)
  case extensionsToList unsupported of
    ext:_ -> throwError $ PandocUnsupportedExtensionError
             (showExtension ext) fname
    []    -> pure ((extsDefault extConf `disableExtensions`
                    extsToDisable extsDiff) <> extsToEnable extsDiff)

-- | Parse a format-specifying string into a markup format and the
-- change set to the format's extensions. Throws an error if the spec
-- cannot be parsed or contains an unknown extension.
parseFlavoredFormat :: PandocMonad m
                    => T.Text
                    -> m FlavoredFormat
parseFlavoredFormat spec =
  -- Paths like `latex-foo-bar.lua` or `latex-smart-citations.lua`
  -- should be parsed as the format name. The `-` (or `+`) in the
  -- filename would confuse the extensions parser, so, if `spec` looks
  -- like a filename, the file's basename is split off into the prefix.
  -- Only the remaining part is parsed, and the prefix is appended back
  -- to the format after parsing.
  case parse (fixSourcePos *> formatSpec) "" spec' of
    Right (fname, extsDiff) -> pure (FlavoredFormat (prefix <> fname) extsDiff)
    Left err -> throwError $ PandocFormatError spec (T.pack $ show err)
  where
    fixSourcePos = do
      pos <- getPosition
      setPosition (incSourceColumn pos (T.length prefix))
    formatSpec = do
      name <- parseFormatName
      extsDiff <- pExtensionsDiff
      return ( T.pack name, extsDiff )
    parseFormatName = many1 $ noneOf "-+"
    (prefix, spec') = case splitExtension (T.unpack spec) of
                        (_, "") -> ("", T.toLower spec) -- no extension
                        (p,s)   -> (T.pack p, T.pack s)

pExtensionsDiff :: (UpdateSourcePos s Char, Stream s m Char)
                => ParsecT s u m ExtensionsDiff
pExtensionsDiff = foldl' (flip ($)) mempty <$> many extMod
  where
    extMod = do
      polarity <- oneOf "-+"
      name <- many $ noneOf "-+"
      let ext = readExtension name
      return $ \extsDiff ->
        case polarity of
          '+' -> extsDiff{extsToEnable  = enableExtension ext $
                                          extsToEnable extsDiff}
          _   -> extsDiff{extsToDisable = enableExtension ext $
                                          extsToDisable extsDiff}

-- | Determines default format based on file extensions; uses the format
-- of the first extension that's associated with a format.
--
-- Examples:
--
-- > formatFromFilePaths ["text.unknown", "no-extension"]
-- Nothing
--
-- > formatFromFilePaths ["my.md", "other.rst"]
-- Just "markdown"
formatFromFilePaths :: [FilePath] -> (Maybe FlavoredFormat)
formatFromFilePaths = asum . map formatFromFilePath

-- | Determines format based on file extension.
formatFromFilePath :: FilePath -> Maybe FlavoredFormat
formatFromFilePath x =
  case takeExtension (map toLower fpath) of
    ".Rmd"      -> defFlavor "markdown"
    ".adoc"     -> defFlavor "asciidoc"
    ".asciidoc" -> defFlavor "asciidoc"
    ".bib"      -> defFlavor "biblatex"
    ".context"  -> defFlavor "context"
    ".csv"      -> defFlavor "csv"
    ".ctx"      -> defFlavor "context"
    ".db"       -> defFlavor "docbook"
    ".dj"       -> defFlavor "djot"
    ".doc"      -> defFlavor "doc"  -- so we get an "unknown reader" error
    ".docx"     -> defFlavor "docx"
    ".dokuwiki" -> defFlavor "dokuwiki"
    ".epub"     -> defFlavor "epub"
    ".fb2"      -> defFlavor "fb2"
    ".htm"      -> defFlavor "html"
    ".html"     -> defFlavor "html"
    ".icml"     -> defFlavor "icml"
    ".ipynb"    -> defFlavor "ipynb"
    ".json"     -> defFlavor "json"
    ".latex"    -> defFlavor "latex"
    ".lhs"      -> defFlavor "markdown" `withExtension` Ext_literate_haskell
    ".ltx"      -> defFlavor "latex"
    ".markdown" -> defFlavor "markdown"
    ".markua"   -> defFlavor "markua"
    ".md"       -> defFlavor "markdown"
    ".mdown"    -> defFlavor "markdown"
    ".mdwn"     -> defFlavor "markdown"
    ".mkd"      -> defFlavor "markdown"
    ".mkdn"     -> defFlavor "markdown"
    ".ms"       -> defFlavor "ms"
    ".muse"     -> defFlavor "muse"
    ".native"   -> defFlavor "native"
    ".odt"      -> defFlavor "odt"
    ".opml"     -> defFlavor "opml"
    ".org"      -> defFlavor "org"
    ".pdf"      -> defFlavor "pdf"  -- so we get an "unknown reader" error
    ".pl"       -> defFlavor "pod"
    ".pm"       -> defFlavor "pod"
    ".pod"      -> defFlavor "pod"
    ".pptx"     -> defFlavor "pptx"
    ".ris"      -> defFlavor "ris"
    ".roff"     -> defFlavor "ms"
    ".rst"      -> defFlavor "rst"
    ".rtf"      -> defFlavor "rtf"
    ".s5"       -> defFlavor "s5"
    ".t2t"      -> defFlavor "t2t"
    ".tei"      -> defFlavor "tei"
    ".tex"      -> defFlavor "latex"
    ".texi"     -> defFlavor "texinfo"
    ".texinfo"  -> defFlavor "texinfo"
    ".text"     -> defFlavor "markdown"
    ".textile"  -> defFlavor "textile"
    ".tsv"      -> defFlavor "tsv"
    ".txt"      -> defFlavor "markdown"
    ".typ"      -> defFlavor "typst"
    ".wiki"     -> defFlavor "mediawiki"
    ".xhtml"    -> defFlavor "html"
    ['.',y]     | y `elem` ['1'..'9'] -> defFlavor "man"
    _           -> Nothing
 where
  defFlavor f = Just (FlavoredFormat f mempty)
  withExtension Nothing _ = Nothing
  withExtension (Just (FlavoredFormat f ed)) ext = Just $
    FlavoredFormat f (ed <> ExtensionsDiff (extensionsFromList [ext]) mempty)
  fpath = case parseURI x of
            Nothing -> x
            Just URI{ uriPath = "" } -> "index.html"
            Just URI{ uriPath = "/" } -> "index.html"
            Just URI{ uriPath = up } -> up