aboutsummaryrefslogtreecommitdiff
path: root/pandoc-server/src/Text/Pandoc/Server.hs
blob: bfa2efb32431e04851e5b9c222bd56e6e9d33cad (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
{-# LANGUAGE DataKinds       #-}
{-# LANGUAGE DeriveGeneric   #-}
{-# LANGUAGE TypeOperators   #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module Text.Pandoc.Server
    ( app
    , API
    , ServerOpts(..)
    , Params(..)
    , Blob(..)
    , parseServerOptsFromArgs
    ) where

import Data.Aeson
import qualified Data.Aeson.KeyMap as KeyMap
import Network.Wai
import Servant
import Text.DocTemplates as DocTemplates
import Text.Pandoc
import Text.Pandoc.Writers.Shared (lookupMetaString)
import Text.Pandoc.Citeproc (processCitations)
import Text.Pandoc.Highlighting (lookupHighlightingStyle)
import Text.Pandoc.Chunks (PathTemplate(..))
import qualified Text.Pandoc.UTF8 as UTF8
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TLE
import Data.Maybe (fromMaybe)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Base64 as Base64 (decodeLenient, encode)
import Data.Default
import Control.Monad (when, unless, foldM)
import qualified Data.Set as Set
import Skylighting (defaultSyntaxMap)
import qualified Data.Map as M
import Text.Collate.Lang (Lang (..), parseLang)
import System.Console.GetOpt
import System.Environment (getProgName)
import qualified Control.Exception as E
import Text.Pandoc.Shared (safeStrRead)
import Text.Pandoc.App ( IpynbOutput (..), Opt(..), defaultOpts )
import Text.Pandoc.Builder (setMeta)
import Text.Pandoc.Format (parseFlavoredFormat, formatName)
import Text.Pandoc.SelfContained (makeSelfContained)
import Text.Pandoc.Transforms (headerShift, filterIpynbOutput,
                               eastAsianLineBreakFilter)
import System.Exit
import GHC.Generics (Generic)
import Network.Wai.Middleware.Cors ( cors,
           simpleCorsResourcePolicy, CorsResourcePolicy(corsRequestHeaders) )

data ServerOpts =
  ServerOpts
    { serverPort    :: Int
    , serverTimeout :: Int }
  deriving (Show)

defaultServerOpts :: ServerOpts
defaultServerOpts = ServerOpts { serverPort = 3030, serverTimeout = 2 }

cliOptions :: [OptDescr (ServerOpts -> IO ServerOpts)]
cliOptions =
  [ Option ['p'] ["port"]
      (ReqArg (\s opts -> case safeStrRead s of
                            Just i -> return opts{ serverPort = i }
                            Nothing ->
                              E.throwIO $ PandocOptionError $ T.pack
                                s <> " is not a number") "NUMBER")
      "port number"
  , Option ['t'] ["timeout"]
      (ReqArg (\s opts -> case safeStrRead s of
                            Just i -> return opts{ serverTimeout = i }
                            Nothing ->
                              E.throwIO $ PandocOptionError $ T.pack
                                s <> " is not a number") "NUMBER")
      "timeout (seconds)"

  , Option ['h'] ["help"]
      (NoArg (\_ -> do
        prg <- getProgName
        let header = "Usage: " <> prg <> " [OPTION...]"
        putStrLn $ usageInfo header cliOptions
        exitSuccess))
      "help message"

  , Option ['v'] ["version"]
      (NoArg (\_ -> do
        prg <- getProgName
        putStrLn $ prg <> " " <> T.unpack pandocVersionText
        exitSuccess))
      "version info"

  ]

parseServerOptsFromArgs :: [String] -> IO ServerOpts
parseServerOptsFromArgs args = do
  let handleUnknownOpt x = "Unknown option: " <> x
  case getOpt' Permute cliOptions args of
    (os, ns, unrecognizedOpts, es) -> do
      when (not (null es) || not (null unrecognizedOpts)) $
        E.throwIO $ PandocOptionError $ T.pack $
          concat es ++ unlines (map handleUnknownOpt unrecognizedOpts) ++
          ("Try --help for more information.")
      unless (null ns) $
        E.throwIO $ PandocOptionError $ T.pack $
                     "Unknown arguments: " <> unwords ns
      foldM (flip ($)) defaultServerOpts os

newtype Blob = Blob BL.ByteString
  deriving (Show, Eq)

instance ToJSON Blob where
  toJSON (Blob bs) = toJSON (UTF8.toText . Base64.encode $ BL.toStrict bs)

instance FromJSON Blob where
 parseJSON = withText "Blob" $
   pure . Blob . BL.fromStrict . Base64.decodeLenient . UTF8.fromText

-- This is the data to be supplied by the JSON payload
-- of requests.  Maybe values may be omitted and will be
-- given default values.
data Params = Params
  { options               :: Opt
  , text                  :: Text
  , files                 :: Maybe (M.Map FilePath Blob)
  , citeproc              :: Maybe Bool
  } deriving (Show)

instance Default Params where
  def = Params
    { options = defaultOpts
    , text = mempty
    , files = Nothing
    , citeproc = Nothing
    }

-- Automatically derive code to convert to/from JSON.
instance FromJSON Params where
 parseJSON = withObject "Params" $ \o ->
   Params
     <$> parseJSON (Object o)
     <*> o .: "text"
     <*> o .:? "files"
     <*> o .:? "citeproc"

instance ToJSON Params where
 toJSON params =
   case toJSON (options params) of
     (Object o) -> Object $
       KeyMap.insert "text" (toJSON $ text params)
       . KeyMap.insert "files" (toJSON $ files params)
       . KeyMap.insert "citeproc" (toJSON $ citeproc params)
       $ o
     x -> x

data Message =
  Message
  { verbosity :: Verbosity
  , message   :: Text }
  deriving (Generic, Show)

instance ToJSON Message where
 toEncoding = genericToEncoding defaultOptions

type Base64 = Bool

data Output = Succeeded Text Base64 [Message]
            | Failed Text
  deriving (Generic, Show)

instance ToJSON Output where
  toEncoding (Succeeded o b m) = pairs
    ( "output" .= o  <>
      "base64" .= b  <>
      "messages" .= m )
  toEncoding (Failed errmsg) = pairs
    ( "error" .= errmsg )

-- This is the API.  The "/convert" endpoint takes a request body
-- consisting of a JSON-encoded Params structure and responds to
-- Get requests with either plain text or JSON, depending on the
-- Accept header.
type API =
  ReqBody '[JSON] Params :> Post '[OctetStream] BS.ByteString
  :<|>
  ReqBody '[JSON] Params :> Post '[PlainText] Text
  :<|>
  ReqBody '[JSON] Params :> Post '[JSON] Output
  :<|>
  "batch" :> ReqBody '[JSON] [Params] :> Post '[JSON] [Output]
  :<|>
  "babelmark" :> QueryParam' '[Required] "text" Text :> QueryParam "from" Text :> QueryParam "to" Text :> QueryFlag "standalone" :> Get '[JSON] Value
  :<|>
  "version" :> Get '[PlainText, JSON] Text

app :: Application
app = corsWithContentType $ serve api server

-- | Allow Content-Type header with values other then allowed by simpleCors.
corsWithContentType :: Middleware
corsWithContentType = cors (const $ Just policy)
    where
      policy = simpleCorsResourcePolicy
        { corsRequestHeaders = ["Content-Type"] }

api :: Proxy API
api = Proxy

server :: Server API
server = convertBytes
    :<|> convertText
    :<|> convertJSON
    :<|> mapM convertJSON
    :<|> babelmark  -- for babelmark which expects {"html": "", "version": ""}
    :<|> pure pandocVersionText
 where
  babelmark text' from' to' standalone' = do
    res <- convertText def{
                        text = text',
                        options = defaultOpts{
                          optFrom = from',
                          optTo = to',
                          optStandalone = standalone' }
                      }
    return $ toJSON $ object [ "html" .= res, "version" .= pandocVersion ]

  -- We use runPure for the pandoc conversions, which ensures that
  -- they will do no IO.  This makes the server safe to use.  However,
  -- it will mean that features requiring IO, like RST includes, will not work.
  -- Changing this to
  --    handleErr =<< liftIO (runIO (convert' params))
  -- will allow the IO operations.
  convertText params = handleErr $
    runPure (convert' return (return . UTF8.toText .
                               Base64.encode . BL.toStrict) params)

  convertBytes params = handleErr $
    runPure (convert' (return . UTF8.fromText) (return . BL.toStrict) params)

  convertJSON params = handleErrJSON $
    runPure
      (convert'
        (\t -> Succeeded t False . map toMessage <$> getLog)
        (\bs -> Succeeded (UTF8.toText $ Base64.encode (BL.toStrict bs)) True
                 . map toMessage <$> getLog)
        params)

  toMessage m = Message { verbosity = messageVerbosity m
                        , message = showLogMessage m }

  convert' :: (Text -> PandocPure a)
           -> (BL.ByteString -> PandocPure a)
           -> Params -> PandocPure a
  convert' textHandler bsHandler params = do
    curtime <- getCurrentTime
    -- put files params in ersatz file system
    let addFile :: FilePath -> Blob -> FileTree -> FileTree
        addFile fp (Blob lbs) =
          insertInFileTree fp FileInfo{ infoFileMTime = curtime
                                      , infoFileContents = BL.toStrict lbs }
    case files params of
      Nothing -> return ()
      Just fs -> do
        let filetree = M.foldrWithKey addFile mempty fs
        modifyPureState $ \st -> st{ stFiles = filetree }

    let opts = options params
    readerFormat <- parseFlavoredFormat <$> fromMaybe "markdown" $ optFrom opts
    writerFormat <- parseFlavoredFormat <$> fromMaybe "html" $ optTo opts
    (readerSpec, readerExts) <- getReader readerFormat
    (writerSpec, writerExts) <- getWriter writerFormat

    let isStandalone = optStandalone opts
    let toformat = formatName writerFormat
    hlStyle <- case optSyntaxHighlighting opts of
      "none"      -> pure NoHighlighting
      "idiomatic" -> pure IdiomaticHighlighting
      "default"   -> pure DefaultHighlighting
      s           -> Skylighting <$> lookupHighlightingStyle (T.unpack s)

    mbTemplate <- if isStandalone
                     then case optTemplate opts of
                            Nothing -> Just <$>
                              compileDefaultTemplate toformat
                            Just t  -> Just <$>
                              compileCustomTemplate toformat t
                     else return Nothing

    abbrevs <- Set.fromList . filter (not . T.null) . T.lines . UTF8.toText <$>
                 case optAbbreviations opts of
                      Nothing -> readDataFile "abbreviations"
                      Just f  -> readFileStrict f

    let readeropts = def{ readerExtensions = readerExts
                        , readerStandalone = isStandalone
                        , readerTabStop = optTabStop opts
                        , readerIndentedCodeClasses =
                            optIndentedCodeClasses opts
                        , readerAbbreviations = abbrevs
                        , readerDefaultImageExtension =
                            optDefaultImageExtension opts
                        , readerTrackChanges = optTrackChanges opts
                        , readerStripComments = optStripComments opts
                        }

    let writeropts = WriterOptions
             { writerExtensions = writerExts
             , writerTabStop = optTabStop opts
             , writerWrapText = optWrap opts
             , writerColumns = optColumns opts
             , writerTemplate = mbTemplate
             , writerSyntaxMap = defaultSyntaxMap
             , writerVariables = optVariables opts
             , writerTableOfContents = optTableOfContents opts
             , writerListOfFigures = optListOfFigures opts
             , writerListOfTables = optListOfTables opts
             , writerIncremental = optIncremental opts
             , writerHTMLMathMethod = optHTMLMathMethod opts
             , writerNumberSections = optNumberSections opts
             , writerNumberOffset = optNumberOffset opts
             , writerSectionDivs = optSectionDivs opts
             , writerReferenceLinks = optReferenceLinks opts
             , writerDpi = optDpi opts
             , writerEmailObfuscation = optEmailObfuscation opts
             , writerIdentifierPrefix = optIdentifierPrefix opts
             , writerCiteMethod = optCiteMethod opts
             , writerHtmlQTags = optHtmlQTags opts
             , writerSlideLevel = optSlideLevel opts
             , writerTopLevelDivision = optTopLevelDivision opts
             , writerHighlightMethod = hlStyle
             , writerSetextHeaders = optSetextHeaders opts
             , writerListTables = optListTables opts
             , writerEpubSubdirectory = T.pack $ optEpubSubdirectory opts
             , writerEpubMetadata = T.pack <$> optEpubMetadata opts
             , writerEpubFonts = optEpubFonts opts
             , writerEpubTitlePage    = optEpubTitlePage opts
             , writerSplitLevel = optSplitLevel opts
             , writerChunkTemplate = maybe (PathTemplate "%s-%i.html")
                                         PathTemplate
                                         (optChunkTemplate opts)
             , writerTOCDepth = optTOCDepth opts
             , writerReferenceDoc = optReferenceDoc opts
             , writerReferenceLocation = optReferenceLocation opts
             , writerFigureCaptionPosition = optFigureCaptionPosition opts
             , writerTableCaptionPosition = optTableCaptionPosition opts
             , writerPreferAscii = optAscii opts
             , writerLinkImages = optLinkImages opts
             }

    let reader = case readerSpec of
                TextReader r -> r readeropts
                ByteStringReader r ->
                  r readeropts . BL.fromStrict . Base64.decodeLenient
                    . UTF8.fromText

    let writer d@(Pandoc meta _) = do
          case lookupMetaString "lang" meta of
              ""      -> setTranslations $
                            Lang "en" Nothing (Just "US") [] [] []
              l       -> case parseLang l of
                              Left _   -> report $ InvalidLang l
                              Right l' -> setTranslations l'
          case writerSpec of
                TextWriter w ->
                  w writeropts d >>=
                    (if optEmbedResources opts && htmlFormat (optTo opts)
                        then makeSelfContained
                        else return) >>=
                    textHandler
                ByteStringWriter w ->
                  w writeropts d >>= bsHandler

    let transforms :: Pandoc -> Pandoc
        transforms = (case optShiftHeadingLevelBy opts of
                        0             -> id
                        x             -> headerShift x) .
                   (if extensionEnabled Ext_east_asian_line_breaks
                          readerExts &&
                       not (extensionEnabled Ext_east_asian_line_breaks
                              writerExts &&
                            optWrap opts == WrapPreserve)
                       then eastAsianLineBreakFilter
                       else id) .
                   (case optIpynbOutput opts of
                     IpynbOutputAll  -> id
                     IpynbOutputNone -> filterIpynbOutput Nothing
                     IpynbOutputBest -> filterIpynbOutput (Just $
                       case optTo opts of
                            Just "latex"  -> Format "latex"
                            Just "beamer" -> Format "latex"
                            Nothing       -> Format "html"
                            Just f
                              | htmlFormat (optTo opts) -> Format "html"
                              | otherwise -> Format f))

    let meta =   (case optBibliography opts of
                   [] -> id
                   fs -> setMeta "bibliography" (MetaList
                            (map (MetaString . T.pack) fs))) .
                 maybe id (setMeta "csl" . MetaString . T.pack)
                   (optCSL opts) .
                 maybe id (setMeta "citation-abbreviations" . MetaString .
                              T.pack)
                   (optCitationAbbreviations opts) $
                 optMetadata opts

    let addMetadata m' (Pandoc m bs) = Pandoc (m <> m') bs

    reader (text params) >>=
      return . transforms . addMetadata meta >>=
        (case citeproc params of
          Just True -> processCitations
          _ -> return) >>=
      writer

  htmlFormat :: Maybe Text -> Bool
  htmlFormat Nothing = True
  htmlFormat (Just f) =
    any (`T.isPrefixOf` f)
      ["html","html4","html5","s5","slidy", "slideous","dzslides","revealjs"]

  handleErr (Right t) = return t
  handleErr (Left err) = throwError $
    err500 { errBody = TLE.encodeUtf8 $ TL.fromStrict $ renderError err }

  handleErrJSON (Right o) = return o
  handleErrJSON (Left err) =
    return $ Failed (renderError err)

  compileCustomTemplate toformat t = do
    res <- runWithPartials $ compileTemplate ("custom." <> T.unpack toformat)
               (T.pack t)
    case res of
      Left e -> throwError $ PandocTemplateError (T.pack e)
      Right tpl -> return tpl