aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2022-08-13 17:07:46 -0700
committerJohn MacFarlane <[email protected]>2022-08-13 17:10:31 -0700
commit34e00caee55c82d2ee609a694d73960c111002c2 (patch)
tree221fd9ae559e5d86a8c235555ec42313ca676d58 /src
parent4dfc30ca1020baa15a40d6b890be1a511747861c (diff)
Factor out convertWithOpts' from convertWithOpts.
This runs in any PandocMonad, MonadIO, MonadMask instance.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/App.hs523
1 files changed, 265 insertions, 258 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 5a1f016e0..857ded58b 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -28,6 +28,7 @@ module Text.Pandoc.App (
import qualified Control.Exception as E
import Control.Monad ( (>=>), when, forM_ )
import Control.Monad.Trans ( MonadIO(..) )
+import Control.Monad.Catch ( MonadMask )
import Control.Monad.Except (throwError, catchError)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as B8
@@ -77,6 +78,7 @@ import System.Posix.Terminal (queryTerminal)
convertWithOpts :: Opt -> IO ()
convertWithOpts opts = do
+ let outputFile = fromMaybe "-" (optOutputFile opts)
datadir <- case optDataDir opts of
Nothing -> do
d <- defaultUserDataDir
@@ -86,276 +88,19 @@ convertWithOpts opts = do
else Nothing
Just _ -> return $ optDataDir opts
- let outputFile = fromMaybe "-" (optOutputFile opts)
- let filters = optFilters opts
- let verbosity = optVerbosity opts
-
when (optDumpArgs opts) $
do UTF8.hPutStrLn stdout (T.pack outputFile)
mapM_ (UTF8.hPutStrLn stdout . T.pack)
(fromMaybe ["-"] $ optInputFiles opts)
exitSuccess
- let sources = case optInputFiles opts of
- Just xs | not (optIgnoreArgs opts) -> xs
- _ -> ["-"]
#ifdef _WINDOWS
let istty = True
#else
istty <- liftIO $ queryTerminal stdOutput
#endif
- res <- runIO $ do
-
- setTrace (optTrace opts)
- setVerbosity verbosity
- setUserDataDir datadir
- setResourcePath (optResourcePath opts)
-
- setInputFiles (fromMaybe ["-"] (optInputFiles opts))
- setOutputFile (optOutputFile opts)
-
- -- assign reader and writer based on options and filenames
- readerName <- case optFrom opts of
- Just f -> return f
- Nothing -> case formatFromFilePaths sources of
- Just f' -> return f'
- Nothing | sources == ["-"] -> return "markdown"
- | any (isURI . T.pack) sources -> return "html"
- | otherwise -> do
- report $ CouldNotDeduceFormat
- (map (T.pack . takeExtension) sources) "markdown"
- return "markdown"
-
- let readerNameBase = T.takeWhile (\c -> c /= '+' && c /= '-') readerName
-
- let makeSandboxed pureReader =
- let files = maybe id (:) (optReferenceDoc opts) .
- maybe id (:) (optEpubMetadata opts) .
- maybe id (:) (optEpubCoverImage opts) .
- maybe id (:) (optCSL opts) .
- maybe id (:) (optCitationAbbreviations opts) $
- optEpubFonts opts ++
- optBibliography opts
- in case pureReader of
- TextReader r -> TextReader $ \o t -> sandbox files (r o t)
- ByteStringReader r
- -> ByteStringReader $ \o t -> sandbox files (r o t)
-
- (reader, readerExts) <-
- if ".lua" `T.isSuffixOf` readerName
- then return (TextReader (readCustom (T.unpack readerName)), mempty)
- else if optSandbox opts
- then case runPure (getReader readerName) of
- Left e -> throwError e
- Right (r, rexts) -> return (makeSandboxed r, rexts)
- else getReader readerName
-
- outputSettings <- optToOutputSettings opts
- let format = outputFormat outputSettings
- let writer = outputWriter outputSettings
- let writerName = outputWriterName outputSettings
- let writerNameBase = T.takeWhile (\c -> c /= '+' && c /= '-') writerName
- let writerOptions = outputWriterOptions outputSettings
-
- let pdfOutput = isJust $ outputPdfProgram outputSettings
-
- let bibOutput = writerNameBase == "bibtex" ||
- writerNameBase == "biblatex" ||
- writerNameBase == "csljson"
-
- let standalone = optStandalone opts ||
- not (isTextFormat format) ||
- pdfOutput ||
- bibOutput
-
- when (pdfOutput && readerNameBase == "latex") $
- case optInputFiles opts of
- Just (inputFile:_) -> report $ UnusualConversion $ T.pack $
- "to convert a .tex file to PDF, you get better results by using pdflatex "
- <> "(or lualatex or xelatex) directly, try `pdflatex " <> inputFile
- <> "` instead of `pandoc " <> inputFile <> " -o " <> outputFile <> "`."
- _ -> return ()
-
- -- We don't want to send output to the terminal if the user
- -- does 'pandoc -t docx input.txt'; though we allow them to
- -- force this with '-o -'. On posix systems, we detect
- -- when stdout is being piped and allow output to stdout
- -- in that case, but on Windows we can't.
- when ((pdfOutput || not (isTextFormat format)) &&
- istty && isNothing ( optOutputFile opts)) $
- throwError $ PandocAppError $
- "Cannot write " <> (if pdfOutput then "pdf" else format) <>
- " output to terminal.\n" <>
- "Specify an output file using the -o option, or " <>
- "use '-o -' to force output to stdout."
-
-
- abbrevs <- Set.fromList . filter (not . T.null) . T.lines . UTF8.toText <$>
- case optAbbreviations opts of
- Nothing -> readDataFile "abbreviations"
- Just f -> readFileStrict f
-
- case lookupMetaString "lang" (optMetadata opts) of
- "" -> setTranslations $ Lang "en" Nothing (Just "US") [] [] []
- l -> case parseLang l of
- Left _ -> report $ InvalidLang l
- Right l' -> setTranslations l'
-
- let readerOpts = def{
- readerStandalone = standalone
- , readerColumns = optColumns opts
- , readerTabStop = optTabStop opts
- , readerIndentedCodeClasses = optIndentedCodeClasses opts
- , readerDefaultImageExtension =
- optDefaultImageExtension opts
- , readerTrackChanges = optTrackChanges opts
- , readerAbbreviations = abbrevs
- , readerExtensions = readerExts
- , readerStripComments = optStripComments opts
- }
-
- metadataFromFile <-
- case optMetadataFiles opts of
- [] -> return mempty
- paths -> do
- -- If format is markdown or commonmark, use the enabled extensions,
- -- otherwise treat metadata as pandoc markdown (see #7926, #6832)
- let readerOptsMeta =
- if readerNameBase == "markdown" || readerNameBase == "commonmark"
- then readerOpts
- else readerOpts{ readerExtensions = pandocExtensions }
- mconcat <$> mapM
- (\path -> do raw <- readMetadataFile path
- yamlToMeta readerOptsMeta (Just path) raw) paths
-
- let transforms = (case optShiftHeadingLevelBy opts of
- 0 -> id
- x -> (headerShift x :)) .
- (if optStripEmptyParagraphs opts
- then (stripEmptyParagraphs :)
- else id) .
- (if extensionEnabled Ext_east_asian_line_breaks
- readerExts &&
- not (extensionEnabled Ext_east_asian_line_breaks
- (writerExtensions writerOptions) &&
- writerWrapText writerOptions == WrapPreserve)
- then (eastAsianLineBreakFilter :)
- else id) .
- (case optIpynbOutput opts of
- _ | readerNameBase /= "ipynb" -> id
- IpynbOutputAll -> id
- IpynbOutputNone -> (filterIpynbOutput Nothing :)
- IpynbOutputBest -> (filterIpynbOutput (Just $
- if htmlFormat format
- then Format "html"
- else
- case format of
- "latex" -> Format "latex"
- "beamer" -> Format "latex"
- _ -> Format format) :))
- $ []
-
- let convertTabs = tabFilter (if optPreserveTabs opts ||
- readerNameBase == "t2t" ||
- readerNameBase == "man" ||
- readerNameBase == "tsv"
- then 0
- else optTabStop opts)
-
-
- when (readerNameBase == "markdown_github" ||
- writerNameBase == "markdown_github") $
- report $ Deprecated "markdown_github" "Use gfm instead."
-
- mapM_ (uncurry setRequestHeader) (optRequestHeaders opts)
-
- setNoCheckCertificate (optNoCheckCertificate opts)
-
- let isPandocCiteproc (JSONFilter f) = takeBaseName f == "pandoc-citeproc"
- isPandocCiteproc _ = False
-
- when (any isPandocCiteproc filters) $
- report $ Deprecated "pandoc-citeproc filter"
- "Use --citeproc instead."
-
- let cslMetadata =
- maybe id (setMeta "csl") (optCSL opts) .
- (case optBibliography opts of
- [] -> id
- xs -> setMeta "bibliography" xs) .
- maybe id (setMeta "citation-abbreviations")
- (optCitationAbbreviations opts) $ mempty
-
- let filterEnv = Environment readerOpts writerOptions
-
- inputs <- readSources sources
-
- doc <- (case reader of
- TextReader r
- | readerNameBase == "json" ->
- mconcat <$>
- mapM (inputToText convertTabs
- >=> r readerOpts . (:[])) inputs
- | optFileScope opts ->
- mconcat <$> mapM
- (inputToText convertTabs
- >=> r readerOpts . (:[]))
- inputs
- | otherwise -> mapM (inputToText convertTabs) inputs
- >>= r readerOpts
- ByteStringReader r ->
- mconcat <$> mapM (r readerOpts . inputToLazyByteString) inputs)
- >>= ( return . adjustMetadata (metadataFromFile <>)
- >=> return . adjustMetadata (<> optMetadata opts)
- >=> return . adjustMetadata (<> cslMetadata)
- >=> applyTransforms transforms
- >=> applyFilters filterEnv filters [T.unpack format]
- >=> (if not (optSandbox opts) &&
- (isJust (optExtractMedia opts)
- || writerNameBase == "docx") -- for fallback pngs
- then fillMediaBag
- else return)
- >=> maybe return extractMedia (optExtractMedia opts)
- )
-
- when (writerNameBase == "docx" && not (optSandbox opts)) $ do
- -- create fallback pngs for svgs
- items <- mediaItems <$> getMediaBag
- forM_ items $ \(fp, mt, bs) ->
- case T.takeWhile (/=';') mt of
- "image/svg+xml" -> do
- res <- svgToPng (writerDpi writerOptions) bs
- case res of
- Right bs' -> do
- let fp' = fp <> ".png"
- insertMedia fp' (Just "image/png") bs'
- Left e -> report $ CouldNotConvertImage (T.pack fp) (tshow e)
- _ -> return ()
-
- output <- case writer of
- ByteStringWriter f -> BinaryOutput <$> f writerOptions doc
- TextWriter f -> case outputPdfProgram outputSettings of
- Just pdfProg -> do
- res <- makePDF pdfProg (optPdfEngineOpts opts) f
- writerOptions doc
- case res of
- Right pdf -> return $ BinaryOutput pdf
- Left err' -> throwError $ PandocPDFError $
- TL.toStrict (TE.decodeUtf8With TE.lenientDecode err')
-
- Nothing -> do
- let ensureNl t
- | standalone = t
- | T.null t || T.last t /= '\n' = t <> T.singleton '\n'
- | otherwise = t
- textOutput <- ensureNl <$> f writerOptions doc
- if (optSelfContained opts || optEmbedResources opts) && htmlFormat format
- then TextOutput <$> makeSelfContained textOutput
- else return $ TextOutput textOutput
- reports <- getLog
- return (output, reports)
-
+ res <- runIO $ convertWithOpts' istty datadir opts
case res of
Left e -> E.throwIO e
Right (output, reports) -> do
@@ -373,6 +118,268 @@ convertWithOpts opts = do
TextOutput t -> writerFn eol outputFile t
BinaryOutput bs -> writeFnBinary outputFile bs
+convertWithOpts' :: (PandocMonad m, MonadIO m, MonadMask m)
+ => Bool
+ -> Maybe FilePath
+ -> Opt
+ -> m (PandocOutput, [LogMessage])
+convertWithOpts' istty datadir opts = do
+ let outputFile = fromMaybe "-" (optOutputFile opts)
+ let filters = optFilters opts
+ let verbosity = optVerbosity opts
+
+ let sources = case optInputFiles opts of
+ Just xs | not (optIgnoreArgs opts) -> xs
+ _ -> ["-"]
+ setTrace (optTrace opts)
+ setVerbosity verbosity
+ setUserDataDir datadir
+ setResourcePath (optResourcePath opts)
+
+ setInputFiles (fromMaybe ["-"] (optInputFiles opts))
+ setOutputFile (optOutputFile opts)
+
+ -- assign reader and writer based on options and filenames
+ readerName <- case optFrom opts of
+ Just f -> return f
+ Nothing -> case formatFromFilePaths sources of
+ Just f' -> return f'
+ Nothing | sources == ["-"] -> return "markdown"
+ | any (isURI . T.pack) sources -> return "html"
+ | otherwise -> do
+ report $ CouldNotDeduceFormat
+ (map (T.pack . takeExtension) sources) "markdown"
+ return "markdown"
+
+ let readerNameBase = T.takeWhile (\c -> c /= '+' && c /= '-') readerName
+
+ let makeSandboxed pureReader =
+ let files = maybe id (:) (optReferenceDoc opts) .
+ maybe id (:) (optEpubMetadata opts) .
+ maybe id (:) (optEpubCoverImage opts) .
+ maybe id (:) (optCSL opts) .
+ maybe id (:) (optCitationAbbreviations opts) $
+ optEpubFonts opts ++
+ optBibliography opts
+ in case pureReader of
+ TextReader r -> TextReader $ \o t -> sandbox files (r o t)
+ ByteStringReader r
+ -> ByteStringReader $ \o t -> sandbox files (r o t)
+
+ (reader, readerExts) <-
+ if ".lua" `T.isSuffixOf` readerName
+ then return (TextReader (readCustom (T.unpack readerName)), mempty)
+ else if optSandbox opts
+ then case runPure (getReader readerName) of
+ Left e -> throwError e
+ Right (r, rexts) -> return (makeSandboxed r, rexts)
+ else getReader readerName
+
+ outputSettings <- optToOutputSettings opts
+ let format = outputFormat outputSettings
+ let writer = outputWriter outputSettings
+ let writerName = outputWriterName outputSettings
+ let writerNameBase = T.takeWhile (\c -> c /= '+' && c /= '-') writerName
+ let writerOptions = outputWriterOptions outputSettings
+
+ let pdfOutput = isJust $ outputPdfProgram outputSettings
+
+ let bibOutput = writerNameBase == "bibtex" ||
+ writerNameBase == "biblatex" ||
+ writerNameBase == "csljson"
+
+ let standalone = optStandalone opts ||
+ not (isTextFormat format) ||
+ pdfOutput ||
+ bibOutput
+
+ when (pdfOutput && readerNameBase == "latex") $
+ case optInputFiles opts of
+ Just (inputFile:_) -> report $ UnusualConversion $ T.pack $
+ "to convert a .tex file to PDF, you get better results by using pdflatex "
+ <> "(or lualatex or xelatex) directly, try `pdflatex " <> inputFile
+ <> "` instead of `pandoc " <> inputFile <> " -o " <> outputFile <> "`."
+ _ -> return ()
+
+ -- We don't want to send output to the terminal if the user
+ -- does 'pandoc -t docx input.txt'; though we allow them to
+ -- force this with '-o -'. On posix systems, we detect
+ -- when stdout is being piped and allow output to stdout
+ -- in that case, but on Windows we can't.
+ when ((pdfOutput || not (isTextFormat format)) &&
+ istty && isNothing ( optOutputFile opts)) $
+ throwError $ PandocAppError $
+ "Cannot write " <> (if pdfOutput then "pdf" else format) <>
+ " output to terminal.\n" <>
+ "Specify an output file using the -o option, or " <>
+ "use '-o -' to force output to stdout."
+
+
+ abbrevs <- Set.fromList . filter (not . T.null) . T.lines . UTF8.toText <$>
+ case optAbbreviations opts of
+ Nothing -> readDataFile "abbreviations"
+ Just f -> readFileStrict f
+
+ case lookupMetaString "lang" (optMetadata opts) of
+ "" -> setTranslations $ Lang "en" Nothing (Just "US") [] [] []
+ l -> case parseLang l of
+ Left _ -> report $ InvalidLang l
+ Right l' -> setTranslations l'
+
+ let readerOpts = def{
+ readerStandalone = standalone
+ , readerColumns = optColumns opts
+ , readerTabStop = optTabStop opts
+ , readerIndentedCodeClasses = optIndentedCodeClasses opts
+ , readerDefaultImageExtension =
+ optDefaultImageExtension opts
+ , readerTrackChanges = optTrackChanges opts
+ , readerAbbreviations = abbrevs
+ , readerExtensions = readerExts
+ , readerStripComments = optStripComments opts
+ }
+
+ metadataFromFile <-
+ case optMetadataFiles opts of
+ [] -> return mempty
+ paths -> do
+ -- If format is markdown or commonmark, use the enabled extensions,
+ -- otherwise treat metadata as pandoc markdown (see #7926, #6832)
+ let readerOptsMeta =
+ if readerNameBase == "markdown" || readerNameBase == "commonmark"
+ then readerOpts
+ else readerOpts{ readerExtensions = pandocExtensions }
+ mconcat <$> mapM
+ (\path -> do raw <- readMetadataFile path
+ yamlToMeta readerOptsMeta (Just path) raw) paths
+
+ let transforms = (case optShiftHeadingLevelBy opts of
+ 0 -> id
+ x -> (headerShift x :)) .
+ (if optStripEmptyParagraphs opts
+ then (stripEmptyParagraphs :)
+ else id) .
+ (if extensionEnabled Ext_east_asian_line_breaks
+ readerExts &&
+ not (extensionEnabled Ext_east_asian_line_breaks
+ (writerExtensions writerOptions) &&
+ writerWrapText writerOptions == WrapPreserve)
+ then (eastAsianLineBreakFilter :)
+ else id) .
+ (case optIpynbOutput opts of
+ _ | readerNameBase /= "ipynb" -> id
+ IpynbOutputAll -> id
+ IpynbOutputNone -> (filterIpynbOutput Nothing :)
+ IpynbOutputBest -> (filterIpynbOutput (Just $
+ if htmlFormat format
+ then Format "html"
+ else
+ case format of
+ "latex" -> Format "latex"
+ "beamer" -> Format "latex"
+ _ -> Format format) :))
+ $ []
+
+ let convertTabs = tabFilter (if optPreserveTabs opts ||
+ readerNameBase == "t2t" ||
+ readerNameBase == "man" ||
+ readerNameBase == "tsv"
+ then 0
+ else optTabStop opts)
+
+
+ when (readerNameBase == "markdown_github" ||
+ writerNameBase == "markdown_github") $
+ report $ Deprecated "markdown_github" "Use gfm instead."
+
+ mapM_ (uncurry setRequestHeader) (optRequestHeaders opts)
+
+ setNoCheckCertificate (optNoCheckCertificate opts)
+
+ let isPandocCiteproc (JSONFilter f) = takeBaseName f == "pandoc-citeproc"
+ isPandocCiteproc _ = False
+
+ when (any isPandocCiteproc filters) $
+ report $ Deprecated "pandoc-citeproc filter"
+ "Use --citeproc instead."
+
+ let cslMetadata =
+ maybe id (setMeta "csl") (optCSL opts) .
+ (case optBibliography opts of
+ [] -> id
+ xs -> setMeta "bibliography" xs) .
+ maybe id (setMeta "citation-abbreviations")
+ (optCitationAbbreviations opts) $ mempty
+
+ let filterEnv = Environment readerOpts writerOptions
+
+ inputs <- readSources sources
+
+ doc <- (case reader of
+ TextReader r
+ | readerNameBase == "json" ->
+ mconcat <$>
+ mapM (inputToText convertTabs
+ >=> r readerOpts . (:[])) inputs
+ | optFileScope opts ->
+ mconcat <$> mapM
+ (inputToText convertTabs
+ >=> r readerOpts . (:[]))
+ inputs
+ | otherwise -> mapM (inputToText convertTabs) inputs
+ >>= r readerOpts
+ ByteStringReader r ->
+ mconcat <$> mapM (r readerOpts . inputToLazyByteString) inputs)
+ >>= ( return . adjustMetadata (metadataFromFile <>)
+ >=> return . adjustMetadata (<> optMetadata opts)
+ >=> return . adjustMetadata (<> cslMetadata)
+ >=> applyTransforms transforms
+ >=> applyFilters filterEnv filters [T.unpack format]
+ >=> (if not (optSandbox opts) &&
+ (isJust (optExtractMedia opts)
+ || writerNameBase == "docx") -- for fallback pngs
+ then fillMediaBag
+ else return)
+ >=> maybe return extractMedia (optExtractMedia opts)
+ )
+
+ when (writerNameBase == "docx" && not (optSandbox opts)) $ do
+ -- create fallback pngs for svgs
+ items <- mediaItems <$> getMediaBag
+ forM_ items $ \(fp, mt, bs) ->
+ case T.takeWhile (/=';') mt of
+ "image/svg+xml" -> do
+ res <- svgToPng (writerDpi writerOptions) bs
+ case res of
+ Right bs' -> do
+ let fp' = fp <> ".png"
+ insertMedia fp' (Just "image/png") bs'
+ Left e -> report $ CouldNotConvertImage (T.pack fp) (tshow e)
+ _ -> return ()
+
+ output <- case writer of
+ ByteStringWriter f -> BinaryOutput <$> f writerOptions doc
+ TextWriter f -> case outputPdfProgram outputSettings of
+ Just pdfProg -> do
+ res <- makePDF pdfProg (optPdfEngineOpts opts) f
+ writerOptions doc
+ case res of
+ Right pdf -> return $ BinaryOutput pdf
+ Left err' -> throwError $ PandocPDFError $
+ TL.toStrict (TE.decodeUtf8With TE.lenientDecode err')
+
+ Nothing -> do
+ let ensureNl t
+ | standalone = t
+ | T.null t || T.last t /= '\n' = t <> T.singleton '\n'
+ | otherwise = t
+ textOutput <- ensureNl <$> f writerOptions doc
+ if (optSelfContained opts || optEmbedResources opts) && htmlFormat format
+ then TextOutput <$> makeSelfContained textOutput
+ else return $ TextOutput textOutput
+ reports <- getLog
+ return (output, reports)
+
data PandocOutput = TextOutput Text | BinaryOutput BL.ByteString
deriving (Show)