diff options
| author | John MacFarlane <[email protected]> | 2022-08-16 16:27:31 -0700 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2022-08-17 12:28:14 -0700 |
| commit | 8ddc2fc79a45283e7b90f59e9a7763e877d4c044 (patch) | |
| tree | 3e9e8f4fdc7370137c46344ba1829aac6c43c6cd /src | |
| parent | 90d52b7129440d7d91bcdf3210513f380063be0a (diff) | |
Integrate server into main pandoc.
- Remove server flag.
- Remove pandoc-server executable.
- Add Text.Pandoc.Server as exposed module. [API change]
- Re-use Opt (and our existing FromJSON instance) for Params.
- Document.
Diffstat (limited to 'src')
| -rw-r--r-- | src/Text/Pandoc/Server.hs | 357 |
1 files changed, 357 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Server.hs b/src/Text/Pandoc/Server.hs new file mode 100644 index 000000000..a7c46f93f --- /dev/null +++ b/src/Text/Pandoc/Server.hs @@ -0,0 +1,357 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE TypeOperators #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE OverloadedStrings #-} +module Text.Pandoc.Server + ( app + , ServerOpts(..) + , Params(..) + , Blob(..) + , parseServerOpts + ) where + +import Data.Aeson +import Network.Wai +import Servant +import Text.DocTemplates as DocTemplates +import Text.Pandoc +import Text.Pandoc.Citeproc (processCitations) +import Text.Pandoc.Highlighting (lookupHighlightingStyle) +import qualified Text.Pandoc.UTF8 as UTF8 +import Data.Text (Text) +import qualified Data.Text as T +import qualified Data.Text.Lazy as TL +import qualified Data.Text.Lazy.Encoding as TLE +import Data.Maybe (fromMaybe) +import Data.Char (isAlphaNum) +import qualified Data.ByteString as BS +import qualified Data.ByteString.Lazy as BL +import Data.ByteString.Base64 (decodeBase64, encodeBase64) +import Data.Default +import Control.Monad (when, foldM) +import qualified Data.Set as Set +import Skylighting (defaultSyntaxMap) +import qualified Data.Map as M +import System.Console.GetOpt +import System.Environment (getArgs, getProgName) +import qualified Control.Exception as E +import Text.Pandoc.Shared (safeStrRead, headerShift, filterIpynbOutput, + eastAsianLineBreakFilter, stripEmptyParagraphs) +import Text.Pandoc.App.Opt ( IpynbOutput (..), Opt(..), defaultOpts ) +import Text.Pandoc.Filter (Filter(..)) +import Text.Pandoc.Builder (setMeta) +import Text.Pandoc.SelfContained (makeSelfContained) +import System.Exit + +data ServerOpts = + ServerOpts + { serverPort :: Int + , serverTimeout :: Int } + deriving (Show) + +defaultServerOpts :: ServerOpts +defaultServerOpts = ServerOpts { serverPort = 3030, serverTimeout = 2 } + +cliOptions :: [OptDescr (ServerOpts -> IO ServerOpts)] +cliOptions = + [ Option ['p'] ["port"] + (ReqArg (\s opts -> case safeStrRead s of + Just i -> return opts{ serverPort = i } + Nothing -> + E.throwIO $ PandocOptionError $ T.pack + s <> " is not a number") "NUMBER") + "port number" + , Option ['t'] ["timeout"] + (ReqArg (\s opts -> case safeStrRead s of + Just i -> return opts{ serverTimeout = i } + Nothing -> + E.throwIO $ PandocOptionError $ T.pack + s <> " is not a number") "NUMBER") + "timeout (seconds)" + + , Option ['h'] ["help"] + (NoArg (\_ -> do + prg <- getProgName + let header = "Usage: " <> prg <> " [OPTION...]" + putStrLn $ usageInfo header cliOptions + exitWith ExitSuccess)) + "help message" + + , Option ['v'] ["version"] + (NoArg (\_ -> do + prg <- getProgName + putStrLn $ prg <> " " <> T.unpack pandocVersion + exitWith ExitSuccess)) + "version info" + + ] + +parseServerOpts :: IO ServerOpts +parseServerOpts = do + args <- getArgs + let handleUnknownOpt x = "Unknown option: " <> x + case getOpt' Permute cliOptions args of + (os, ns, unrecognizedOpts, es) -> do + when (not (null es) || not (null unrecognizedOpts)) $ + E.throwIO $ PandocOptionError $ T.pack $ + concat es ++ unlines (map handleUnknownOpt unrecognizedOpts) ++ + ("Try --help for more information.") + when (not (null ns)) $ + E.throwIO $ PandocOptionError $ T.pack $ + "Unknown arguments: " <> unwords ns + foldM (flip ($)) defaultServerOpts os + +newtype Blob = Blob BL.ByteString + deriving (Show, Eq) + +instance ToJSON Blob where + toJSON (Blob bs) = toJSON (encodeBase64 $ BL.toStrict bs) + +instance FromJSON Blob where + parseJSON = withText "Blob" $ \t -> do + let inp = UTF8.fromText t + case decodeBase64 inp of + Right bs -> return $ Blob $ BL.fromStrict bs + Left _ -> -- treat as regular text + return $ Blob $ BL.fromStrict inp + +-- This is the data to be supplied by the JSON payload +-- of requests. Maybe values may be omitted and will be +-- given default values. +data Params = Params + { options :: Opt + , text :: Text + , files :: Maybe (M.Map FilePath Blob) + } deriving (Show) + +instance Default Params where + def = Params + { options = defaultOpts + , text = mempty + , files = Nothing + } + +-- Automatically derive code to convert to/from JSON. +instance FromJSON Params where + parseJSON = withObject "Params" $ \o -> + Params + <$> parseJSON (Object o) + <*> o .: "text" + <*> o .:? "files" + + +-- This is the API. The "/convert" endpoint takes a request body +-- consisting of a JSON-encoded Params structure and responds to +-- Get requests with either plain text or JSON, depending on the +-- Accept header. +type API = + ReqBody '[JSON] Params :> Post '[PlainText, JSON] Text + :<|> + ReqBody '[JSON] Params :> Post '[OctetStream] BS.ByteString + :<|> + "batch" :> ReqBody '[JSON] [Params] :> Post '[JSON] [Text] + :<|> + "babelmark" :> QueryParam' '[Required] "text" Text :> QueryParam "from" Text :> QueryParam "to" Text :> QueryFlag "standalone" :> Get '[JSON] Value + :<|> + "version" :> Get '[PlainText, JSON] Text + +app :: Application +app = serve api server + +api :: Proxy API +api = Proxy + +server :: Server API +server = convert + :<|> convertBytes + :<|> mapM convert + :<|> babelmark -- for babelmark which expects {"html": "", "version": ""} + :<|> pure pandocVersion + where + babelmark text' from' to' standalone' = do + res <- convert def{ text = text', + options = defaultOpts{ + optFrom = from', + optTo = to', + optStandalone = standalone' } + } + return $ toJSON $ object [ "html" .= res, "version" .= pandocVersion ] + + -- We use runPure for the pandoc conversions, which ensures that + -- they will do no IO. This makes the server safe to use. However, + -- it will mean that features requiring IO, like RST includes, will not work. + -- Changing this to + -- handleErr =<< liftIO (runIO (convert' params)) + -- will allow the IO operations. + convert params = handleErr $ + runPure (convert' id (encodeBase64 . BL.toStrict) params) + + convertBytes params = handleErr $ + runPure (convert' UTF8.fromText BL.toStrict params) + + convert' :: (Text -> a) -> (BL.ByteString -> a) -> Params -> PandocPure a + convert' textHandler bsHandler params = do + curtime <- getCurrentTime + -- put files params in ersatz file system + let addFile :: FilePath -> Blob -> FileTree -> FileTree + addFile fp (Blob lbs) = + insertInFileTree fp FileInfo{ infoFileMTime = curtime + , infoFileContents = BL.toStrict lbs } + case files params of + Nothing -> return () + Just fs -> do + let filetree = M.foldrWithKey addFile mempty fs + modifyPureState $ \st -> st{ stFiles = filetree } + + let opts = options params + let readerFormat = fromMaybe "markdown" $ optFrom opts + let writerFormat = fromMaybe "html" $ optTo opts + (readerSpec, readerExts) <- getReader readerFormat + (writerSpec, writerExts) <- getWriter writerFormat + + let isStandalone = optStandalone opts + let toformat = T.toLower $ T.takeWhile isAlphaNum $ writerFormat + hlStyle <- traverse (lookupHighlightingStyle . T.unpack) + $ optHighlightStyle opts + + mbTemplate <- if isStandalone + then case optTemplate opts of + Nothing -> Just <$> + compileDefaultTemplate toformat + Just t -> Just <$> + compileCustomTemplate toformat t + else return Nothing + + abbrevs <- Set.fromList . filter (not . T.null) . T.lines . UTF8.toText <$> + case optAbbreviations opts of + Nothing -> readDataFile "abbreviations" + Just f -> readFileStrict f + + let readeropts = def{ readerExtensions = readerExts + , readerStandalone = isStandalone + , readerTabStop = optTabStop opts + , readerIndentedCodeClasses = + optIndentedCodeClasses opts + , readerAbbreviations = abbrevs + , readerDefaultImageExtension = + optDefaultImageExtension opts + , readerTrackChanges = optTrackChanges opts + , readerStripComments = optStripComments opts + } + let writeropts = + def{ writerExtensions = writerExts + , writerTabStop = optTabStop opts + , writerWrapText = optWrap opts + , writerColumns = optColumns opts + , writerTemplate = mbTemplate + , writerSyntaxMap = defaultSyntaxMap + , writerVariables = optVariables opts + , writerTableOfContents = optTableOfContents opts + , writerIncremental = optIncremental opts + , writerHTMLMathMethod = optHTMLMathMethod opts + , writerNumberSections = optNumberSections opts + , writerNumberOffset = optNumberOffset opts + , writerSectionDivs = optSectionDivs opts + , writerReferenceLinks = optReferenceLinks opts + , writerDpi = optDpi opts + , writerEmailObfuscation = optEmailObfuscation opts + , writerIdentifierPrefix = optIdentifierPrefix opts + , writerCiteMethod = optCiteMethod opts + , writerHtmlQTags = optHtmlQTags opts + , writerSlideLevel = optSlideLevel opts + , writerTopLevelDivision = optTopLevelDivision opts + , writerListings = optListings opts + , writerHighlightStyle = hlStyle + , writerSetextHeaders = optSetextHeaders opts + , writerEpubSubdirectory = T.pack $ optEpubSubdirectory opts + , writerEpubMetadata = T.pack <$> optEpubMetadata opts + , writerEpubFonts = optEpubFonts opts + , writerEpubChapterLevel = optEpubChapterLevel opts + , writerTOCDepth = optTOCDepth opts + , writerReferenceDoc = optReferenceDoc opts + , writerReferenceLocation = optReferenceLocation opts + , writerPreferAscii = optAscii opts + } + let reader = case readerSpec of + TextReader r -> r readeropts + ByteStringReader r -> \t -> do + let eitherbs = decodeBase64 $ UTF8.fromText t + case eitherbs of + Left errt -> throwError $ PandocSomeError errt + Right bs -> r readeropts $ BL.fromStrict bs + let writer = case writerSpec of + TextWriter w -> + fmap textHandler . + (\d -> w writeropts d >>= + if optEmbedResources opts && htmlFormat (optTo opts) + then makeSelfContained + else return) + ByteStringWriter w -> fmap bsHandler . w writeropts + + let transforms :: Pandoc -> Pandoc + transforms = (case optShiftHeadingLevelBy opts of + 0 -> id + x -> headerShift x) . + (case optStripEmptyParagraphs opts of + True -> stripEmptyParagraphs + False -> id) . + (if extensionEnabled Ext_east_asian_line_breaks + readerExts && + not (extensionEnabled Ext_east_asian_line_breaks + writerExts && + optWrap opts == WrapPreserve) + then eastAsianLineBreakFilter + else id) . + (case optIpynbOutput opts of + IpynbOutputAll -> id + IpynbOutputNone -> filterIpynbOutput Nothing + IpynbOutputBest -> filterIpynbOutput (Just $ + case optTo opts of + Just "latex" -> Format "latex" + Just "beamer" -> Format "latex" + Nothing -> Format "html" + Just f + | htmlFormat (optTo opts) -> Format "html" + | otherwise -> Format f)) + + let meta = (case optBibliography opts of + [] -> id + fs -> setMeta "bibliography" (MetaList + (map (MetaString . T.pack) fs))) . + maybe id (setMeta "csl" . MetaString . T.pack) + (optCSL opts) . + maybe id (setMeta "citation-abbreviations" . MetaString . + T.pack) + (optCitationAbbreviations opts) $ + optMetadata opts + + let addMetadata m' (Pandoc m bs) = Pandoc (m <> m') bs + + let hasCiteprocFilter [] = False + hasCiteprocFilter (CiteprocFilter:_) = True + hasCiteprocFilter (_:xs) = hasCiteprocFilter xs + + reader (text params) >>= + return . transforms . addMetadata meta >>= + (if hasCiteprocFilter (optFilters opts) + then processCitations + else return) >>= + writer + + htmlFormat :: Maybe Text -> Bool + htmlFormat Nothing = True + htmlFormat (Just f) = + any (`T.isPrefixOf` f) + ["html","html4","html5","s5","slidy", "slideous","dzslides","revealjs"] + + handleErr (Right t) = return t + handleErr (Left err) = throwError $ + err500 { errBody = TLE.encodeUtf8 $ TL.fromStrict $ renderError err } + + compileCustomTemplate toformat t = do + res <- runWithPartials $ compileTemplate ("custom." <> T.unpack toformat) + (T.pack t) + case res of + Left e -> throwError $ PandocTemplateError (T.pack e) + Right tpl -> return tpl + |
