1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
|
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module Text.Pandoc.Server
( app
, API
, ServerOpts(..)
, Params(..)
, Blob(..)
, parseServerOptsFromArgs
) where
import Data.Aeson
import qualified Data.Aeson.KeyMap as KeyMap
import Network.Wai
import Servant
import Text.DocTemplates as DocTemplates
import Text.Pandoc
import Text.Pandoc.Writers.Shared (lookupMetaString)
import Text.Pandoc.Citeproc (processCitations)
import Text.Pandoc.Highlighting (lookupHighlightingStyle)
import Text.Pandoc.Chunks (PathTemplate(..))
import qualified Text.Pandoc.UTF8 as UTF8
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TLE
import Data.Maybe (fromMaybe)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Base64 as Base64 (decodeLenient, encode)
import Data.Default
import Control.Monad (when, unless, foldM)
import qualified Data.Set as Set
import Skylighting (defaultSyntaxMap)
import qualified Data.Map as M
import Text.Collate.Lang (Lang (..), parseLang)
import System.Console.GetOpt
import System.Environment (getProgName)
import qualified Control.Exception as E
import Text.Pandoc.Shared (safeStrRead)
import Text.Pandoc.App ( IpynbOutput (..), Opt(..), defaultOpts )
import Text.Pandoc.Builder (setMeta)
import Text.Pandoc.Format (parseFlavoredFormat, formatName)
import Text.Pandoc.SelfContained (makeSelfContained)
import Text.Pandoc.Transforms (headerShift, filterIpynbOutput,
eastAsianLineBreakFilter)
import System.Exit
import GHC.Generics (Generic)
import Network.Wai.Middleware.Cors ( cors,
simpleCorsResourcePolicy, CorsResourcePolicy(corsRequestHeaders) )
data ServerOpts =
ServerOpts
{ serverPort :: Int
, serverTimeout :: Int }
deriving (Show)
defaultServerOpts :: ServerOpts
defaultServerOpts = ServerOpts { serverPort = 3030, serverTimeout = 2 }
cliOptions :: [OptDescr (ServerOpts -> IO ServerOpts)]
cliOptions =
[ Option ['p'] ["port"]
(ReqArg (\s opts -> case safeStrRead s of
Just i -> return opts{ serverPort = i }
Nothing ->
E.throwIO $ PandocOptionError $ T.pack
s <> " is not a number") "NUMBER")
"port number"
, Option ['t'] ["timeout"]
(ReqArg (\s opts -> case safeStrRead s of
Just i -> return opts{ serverTimeout = i }
Nothing ->
E.throwIO $ PandocOptionError $ T.pack
s <> " is not a number") "NUMBER")
"timeout (seconds)"
, Option ['h'] ["help"]
(NoArg (\_ -> do
prg <- getProgName
let header = "Usage: " <> prg <> " [OPTION...]"
putStrLn $ usageInfo header cliOptions
exitSuccess))
"help message"
, Option ['v'] ["version"]
(NoArg (\_ -> do
prg <- getProgName
putStrLn $ prg <> " " <> T.unpack pandocVersionText
exitSuccess))
"version info"
]
parseServerOptsFromArgs :: [String] -> IO ServerOpts
parseServerOptsFromArgs args = do
let handleUnknownOpt x = "Unknown option: " <> x
case getOpt' Permute cliOptions args of
(os, ns, unrecognizedOpts, es) -> do
when (not (null es) || not (null unrecognizedOpts)) $
E.throwIO $ PandocOptionError $ T.pack $
concat es ++ unlines (map handleUnknownOpt unrecognizedOpts) ++
("Try --help for more information.")
unless (null ns) $
E.throwIO $ PandocOptionError $ T.pack $
"Unknown arguments: " <> unwords ns
foldM (flip ($)) defaultServerOpts os
newtype Blob = Blob BL.ByteString
deriving (Show, Eq)
instance ToJSON Blob where
toJSON (Blob bs) = toJSON (UTF8.toText . Base64.encode $ BL.toStrict bs)
instance FromJSON Blob where
parseJSON = withText "Blob" $
pure . Blob . BL.fromStrict . Base64.decodeLenient . UTF8.fromText
-- This is the data to be supplied by the JSON payload
-- of requests. Maybe values may be omitted and will be
-- given default values.
data Params = Params
{ options :: Opt
, text :: Text
, files :: Maybe (M.Map FilePath Blob)
, citeproc :: Maybe Bool
} deriving (Show)
instance Default Params where
def = Params
{ options = defaultOpts
, text = mempty
, files = Nothing
, citeproc = Nothing
}
-- Automatically derive code to convert to/from JSON.
instance FromJSON Params where
parseJSON = withObject "Params" $ \o ->
Params
<$> parseJSON (Object o)
<*> o .: "text"
<*> o .:? "files"
<*> o .:? "citeproc"
instance ToJSON Params where
toJSON params =
case toJSON (options params) of
(Object o) -> Object $
KeyMap.insert "text" (toJSON $ text params)
. KeyMap.insert "files" (toJSON $ files params)
. KeyMap.insert "citeproc" (toJSON $ citeproc params)
$ o
x -> x
data Message =
Message
{ verbosity :: Verbosity
, message :: Text }
deriving (Generic, Show)
instance ToJSON Message where
toEncoding = genericToEncoding defaultOptions
type Base64 = Bool
data Output = Succeeded Text Base64 [Message]
| Failed Text
deriving (Generic, Show)
instance ToJSON Output where
toEncoding (Succeeded o b m) = pairs
( "output" .= o <>
"base64" .= b <>
"messages" .= m )
toEncoding (Failed errmsg) = pairs
( "error" .= errmsg )
-- This is the API. The "/convert" endpoint takes a request body
-- consisting of a JSON-encoded Params structure and responds to
-- Get requests with either plain text or JSON, depending on the
-- Accept header.
type API =
ReqBody '[JSON] Params :> Post '[OctetStream] BS.ByteString
:<|>
ReqBody '[JSON] Params :> Post '[PlainText] Text
:<|>
ReqBody '[JSON] Params :> Post '[JSON] Output
:<|>
"batch" :> ReqBody '[JSON] [Params] :> Post '[JSON] [Output]
:<|>
"babelmark" :> QueryParam' '[Required] "text" Text :> QueryParam "from" Text :> QueryParam "to" Text :> QueryFlag "standalone" :> Get '[JSON] Value
:<|>
"version" :> Get '[PlainText, JSON] Text
app :: Application
app = corsWithContentType $ serve api server
-- | Allow Content-Type header with values other then allowed by simpleCors.
corsWithContentType :: Middleware
corsWithContentType = cors (const $ Just policy)
where
policy = simpleCorsResourcePolicy
{ corsRequestHeaders = ["Content-Type"] }
api :: Proxy API
api = Proxy
server :: Server API
server = convertBytes
:<|> convertText
:<|> convertJSON
:<|> mapM convertJSON
:<|> babelmark -- for babelmark which expects {"html": "", "version": ""}
:<|> pure pandocVersionText
where
babelmark text' from' to' standalone' = do
res <- convertText def{
text = text',
options = defaultOpts{
optFrom = from',
optTo = to',
optStandalone = standalone' }
}
return $ toJSON $ object [ "html" .= res, "version" .= pandocVersion ]
-- We use runPure for the pandoc conversions, which ensures that
-- they will do no IO. This makes the server safe to use. However,
-- it will mean that features requiring IO, like RST includes, will not work.
-- Changing this to
-- handleErr =<< liftIO (runIO (convert' params))
-- will allow the IO operations.
convertText params = handleErr $
runPure (convert' return (return . UTF8.toText .
Base64.encode . BL.toStrict) params)
convertBytes params = handleErr $
runPure (convert' (return . UTF8.fromText) (return . BL.toStrict) params)
convertJSON params = handleErrJSON $
runPure
(convert'
(\t -> Succeeded t False . map toMessage <$> getLog)
(\bs -> Succeeded (UTF8.toText $ Base64.encode (BL.toStrict bs)) True
. map toMessage <$> getLog)
params)
toMessage m = Message { verbosity = messageVerbosity m
, message = showLogMessage m }
convert' :: (Text -> PandocPure a)
-> (BL.ByteString -> PandocPure a)
-> Params -> PandocPure a
convert' textHandler bsHandler params = do
curtime <- getCurrentTime
-- put files params in ersatz file system
let addFile :: FilePath -> Blob -> FileTree -> FileTree
addFile fp (Blob lbs) =
insertInFileTree fp FileInfo{ infoFileMTime = curtime
, infoFileContents = BL.toStrict lbs }
case files params of
Nothing -> return ()
Just fs -> do
let filetree = M.foldrWithKey addFile mempty fs
modifyPureState $ \st -> st{ stFiles = filetree }
let opts = options params
readerFormat <- parseFlavoredFormat <$> fromMaybe "markdown" $ optFrom opts
writerFormat <- parseFlavoredFormat <$> fromMaybe "html" $ optTo opts
(readerSpec, readerExts) <- getReader readerFormat
(writerSpec, writerExts) <- getWriter writerFormat
let isStandalone = optStandalone opts
let toformat = formatName writerFormat
hlStyle <- case optSyntaxHighlighting opts of
"none" -> pure NoHighlighting
"idiomatic" -> pure IdiomaticHighlighting
"default" -> pure DefaultHighlighting
s -> Skylighting <$> lookupHighlightingStyle (T.unpack s)
mbTemplate <- if isStandalone
then case optTemplate opts of
Nothing -> Just <$>
compileDefaultTemplate toformat
Just t -> Just <$>
compileCustomTemplate toformat t
else return Nothing
abbrevs <- Set.fromList . filter (not . T.null) . T.lines . UTF8.toText <$>
case optAbbreviations opts of
Nothing -> readDataFile "abbreviations"
Just f -> readFileStrict f
let readeropts = def{ readerExtensions = readerExts
, readerStandalone = isStandalone
, readerTabStop = optTabStop opts
, readerIndentedCodeClasses =
optIndentedCodeClasses opts
, readerAbbreviations = abbrevs
, readerDefaultImageExtension =
optDefaultImageExtension opts
, readerTrackChanges = optTrackChanges opts
, readerStripComments = optStripComments opts
}
let writeropts = WriterOptions
{ writerExtensions = writerExts
, writerTabStop = optTabStop opts
, writerWrapText = optWrap opts
, writerColumns = optColumns opts
, writerTemplate = mbTemplate
, writerSyntaxMap = defaultSyntaxMap
, writerVariables = optVariables opts
, writerTableOfContents = optTableOfContents opts
, writerListOfFigures = optListOfFigures opts
, writerListOfTables = optListOfTables opts
, writerIncremental = optIncremental opts
, writerHTMLMathMethod = optHTMLMathMethod opts
, writerNumberSections = optNumberSections opts
, writerNumberOffset = optNumberOffset opts
, writerSectionDivs = optSectionDivs opts
, writerReferenceLinks = optReferenceLinks opts
, writerDpi = optDpi opts
, writerEmailObfuscation = optEmailObfuscation opts
, writerIdentifierPrefix = optIdentifierPrefix opts
, writerCiteMethod = optCiteMethod opts
, writerHtmlQTags = optHtmlQTags opts
, writerSlideLevel = optSlideLevel opts
, writerTopLevelDivision = optTopLevelDivision opts
, writerHighlightMethod = hlStyle
, writerSetextHeaders = optSetextHeaders opts
, writerListTables = optListTables opts
, writerEpubSubdirectory = T.pack $ optEpubSubdirectory opts
, writerEpubMetadata = T.pack <$> optEpubMetadata opts
, writerEpubFonts = optEpubFonts opts
, writerEpubTitlePage = optEpubTitlePage opts
, writerSplitLevel = optSplitLevel opts
, writerChunkTemplate = maybe (PathTemplate "%s-%i.html")
PathTemplate
(optChunkTemplate opts)
, writerTOCDepth = optTOCDepth opts
, writerReferenceDoc = optReferenceDoc opts
, writerReferenceLocation = optReferenceLocation opts
, writerFigureCaptionPosition = optFigureCaptionPosition opts
, writerTableCaptionPosition = optTableCaptionPosition opts
, writerPreferAscii = optAscii opts
, writerLinkImages = optLinkImages opts
}
let reader = case readerSpec of
TextReader r -> r readeropts
ByteStringReader r ->
r readeropts . BL.fromStrict . Base64.decodeLenient
. UTF8.fromText
let writer d@(Pandoc meta _) = do
case lookupMetaString "lang" meta of
"" -> setTranslations $
Lang "en" Nothing (Just "US") [] [] []
l -> case parseLang l of
Left _ -> report $ InvalidLang l
Right l' -> setTranslations l'
case writerSpec of
TextWriter w ->
w writeropts d >>=
(if optEmbedResources opts && htmlFormat (optTo opts)
then makeSelfContained
else return) >>=
textHandler
ByteStringWriter w ->
w writeropts d >>= bsHandler
let transforms :: Pandoc -> Pandoc
transforms = (case optShiftHeadingLevelBy opts of
0 -> id
x -> headerShift x) .
(if extensionEnabled Ext_east_asian_line_breaks
readerExts &&
not (extensionEnabled Ext_east_asian_line_breaks
writerExts &&
optWrap opts == WrapPreserve)
then eastAsianLineBreakFilter
else id) .
(case optIpynbOutput opts of
IpynbOutputAll -> id
IpynbOutputNone -> filterIpynbOutput Nothing
IpynbOutputBest -> filterIpynbOutput (Just $
case optTo opts of
Just "latex" -> Format "latex"
Just "beamer" -> Format "latex"
Nothing -> Format "html"
Just f
| htmlFormat (optTo opts) -> Format "html"
| otherwise -> Format f))
let meta = (case optBibliography opts of
[] -> id
fs -> setMeta "bibliography" (MetaList
(map (MetaString . T.pack) fs))) .
maybe id (setMeta "csl" . MetaString . T.pack)
(optCSL opts) .
maybe id (setMeta "citation-abbreviations" . MetaString .
T.pack)
(optCitationAbbreviations opts) $
optMetadata opts
let addMetadata m' (Pandoc m bs) = Pandoc (m <> m') bs
reader (text params) >>=
return . transforms . addMetadata meta >>=
(case citeproc params of
Just True -> processCitations
_ -> return) >>=
writer
htmlFormat :: Maybe Text -> Bool
htmlFormat Nothing = True
htmlFormat (Just f) =
any (`T.isPrefixOf` f)
["html","html4","html5","s5","slidy", "slideous","dzslides","revealjs"]
handleErr (Right t) = return t
handleErr (Left err) = throwError $
err500 { errBody = TLE.encodeUtf8 $ TL.fromStrict $ renderError err }
handleErrJSON (Right o) = return o
handleErrJSON (Left err) =
return $ Failed (renderError err)
compileCustomTemplate toformat t = do
res <- runWithPartials $ compileTemplate ("custom." <> T.unpack toformat)
(T.pack t)
case res of
Left e -> throwError $ PandocTemplateError (T.pack e)
Right tpl -> return tpl
|