aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlbert Krewinkel <[email protected]>2023-03-21 05:19:16 +0100
committerGitHub <[email protected]>2023-03-20 21:19:16 -0700
commitf1b55be65f45f3fee0f53dba49d1b338a5588cff (patch)
tree2403450d32c5e39eb04539f21e3dc8ad6080c770 /src
parentb5d54f7f3899ed9d5866abde10008589d7a4ae42 (diff)
T.P.Format: add new function `formatFromFilePaths` [API Change] (#8710)
* T.P.Format: export `formatFromFilePaths` [API change] * Lua: add function `pandoc.format.from_path` * Update lua-filters.md * The old T.P.App.FormatHeuristics module has been removed. This is an alternative to #8693.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/App.hs27
-rw-r--r--src/Text/Pandoc/App/FormatHeuristics.hs92
-rw-r--r--src/Text/Pandoc/App/OutputSettings.hs67
-rw-r--r--src/Text/Pandoc/Format.hs86
4 files changed, 130 insertions, 142 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 5df6d7668..76828e9d8 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -51,7 +51,6 @@ import Text.Pandoc
import Text.Pandoc.Builder (setMeta)
import Text.Pandoc.MediaBag (mediaItems)
import Text.Pandoc.Image (svgToPng)
-import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths)
import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts,
IpynbOutput (..), OptInfo(..))
import Text.Pandoc.App.CommandLineOptions (parseOptions, parseOptionsFromArgs,
@@ -142,20 +141,20 @@ convertWithOpts' scriptingEngine istty datadir opts = do
Just xs | not (optIgnoreArgs opts) -> xs
_ -> ["-"]
+ let defFlavor fmt = Format.FlavoredFormat fmt mempty
-- assign reader and writer based on options and filenames
- readerName <- case optFrom opts of
- Just f -> return f
- Nothing -> case formatFromFilePaths sources of
- Just f' -> return f'
- Nothing | sources == ["-"] -> return "markdown"
- | any (isURI . T.pack) sources -> return "html"
- | otherwise -> do
- report $ CouldNotDeduceFormat
- (map (T.pack . takeExtension) sources) "markdown"
- return "markdown"
-
flvrd@(Format.FlavoredFormat readerNameBase _extsDiff) <-
- Format.parseFlavoredFormat readerName
+ case optFrom opts of
+ Just f -> Format.parseFlavoredFormat f
+ Nothing -> case Format.formatFromFilePaths sources of
+ Just f' -> return f'
+ Nothing | sources == ["-"] -> return $ defFlavor "markdown"
+ | any (isURI . T.pack) sources -> return $ defFlavor "html"
+ | otherwise -> do
+ report $ CouldNotDeduceFormat
+ (map (T.pack . takeExtension) sources) "markdown"
+ return $ defFlavor "markdown"
+
let makeSandboxed pureReader =
let files = maybe id (:) (optReferenceDoc opts) .
maybe id (:) (optEpubMetadata opts) .
@@ -176,7 +175,7 @@ convertWithOpts' scriptingEngine istty datadir opts = do
components <- engineLoadCustom scriptingEngine scriptPath
r <- case customReader components of
Nothing -> throwError $ PandocAppError $
- readerName <> " does not contain a custom reader"
+ readerNameBase <> " does not contain a custom reader"
Just r -> return r
let extsConf = fromMaybe mempty (customExtensions components)
rexts <- Format.applyExtensionsDiff extsConf flvrd
diff --git a/src/Text/Pandoc/App/FormatHeuristics.hs b/src/Text/Pandoc/App/FormatHeuristics.hs
deleted file mode 100644
index 376726f98..000000000
--- a/src/Text/Pandoc/App/FormatHeuristics.hs
+++ /dev/null
@@ -1,92 +0,0 @@
-{-# LANGUAGE OverloadedStrings #-}
-{- |
- Module : Text.Pandoc.App.FormatHeuristics
- Copyright : Copyright (C) 2006-2023 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley@edu>
- Stability : alpha
- Portability : portable
-
-Guess the format of a file from its name.
--}
-module Text.Pandoc.App.FormatHeuristics
- ( formatFromFilePaths
- ) where
-
-import Data.Char (toLower)
-import Data.Foldable (asum)
-import Data.Text (Text)
-import System.FilePath (takeExtension)
-
--- | Determines default format based on file extensions; uses the format
--- of the first extension that's associated with a format.
---
--- Examples:
---
--- > formatFromFilePaths ["text.unknown", "no-extension"]
--- Nothing
---
--- > formatFromFilePaths ["my.md", "other.rst"]
--- Just "markdown"
-formatFromFilePaths :: [FilePath] -> Maybe Text
-formatFromFilePaths = asum . map formatFromFilePath
-
--- | Determines format based on file extension.
-formatFromFilePath :: FilePath -> Maybe Text
-formatFromFilePath x =
- case takeExtension (map toLower x) of
- ".adoc" -> Just "asciidoc"
- ".asciidoc" -> Just "asciidoc"
- ".context" -> Just "context"
- ".ctx" -> Just "context"
- ".db" -> Just "docbook"
- ".doc" -> Just "doc" -- so we get an "unknown reader" error
- ".docx" -> Just "docx"
- ".dokuwiki" -> Just "dokuwiki"
- ".epub" -> Just "epub"
- ".fb2" -> Just "fb2"
- ".htm" -> Just "html"
- ".html" -> Just "html"
- ".icml" -> Just "icml"
- ".json" -> Just "json"
- ".latex" -> Just "latex"
- ".lhs" -> Just "markdown+lhs"
- ".ltx" -> Just "latex"
- ".markdown" -> Just "markdown"
- ".markua" -> Just "markua"
- ".mkdn" -> Just "markdown"
- ".mkd" -> Just "markdown"
- ".mdwn" -> Just "markdown"
- ".mdown" -> Just "markdown"
- ".Rmd" -> Just "markdown"
- ".md" -> Just "markdown"
- ".ms" -> Just "ms"
- ".muse" -> Just "muse"
- ".native" -> Just "native"
- ".odt" -> Just "odt"
- ".opml" -> Just "opml"
- ".org" -> Just "org"
- ".pdf" -> Just "pdf" -- so we get an "unknown reader" error
- ".pptx" -> Just "pptx"
- ".ris" -> Just "ris"
- ".roff" -> Just "ms"
- ".rst" -> Just "rst"
- ".rtf" -> Just "rtf"
- ".s5" -> Just "s5"
- ".t2t" -> Just "t2t"
- ".tei" -> Just "tei"
- ".tex" -> Just "latex"
- ".texi" -> Just "texinfo"
- ".texinfo" -> Just "texinfo"
- ".text" -> Just "markdown"
- ".textile" -> Just "textile"
- ".txt" -> Just "markdown"
- ".wiki" -> Just "mediawiki"
- ".xhtml" -> Just "html"
- ".ipynb" -> Just "ipynb"
- ".csv" -> Just "csv"
- ".tsv" -> Just "tsv"
- ".bib" -> Just "biblatex"
- ['.',y] | y `elem` ['1'..'9'] -> Just "man"
- _ -> Nothing
diff --git a/src/Text/Pandoc/App/OutputSettings.hs b/src/Text/Pandoc/App/OutputSettings.hs
index 525055d72..af5c0ddbc 100644
--- a/src/Text/Pandoc/App/OutputSettings.hs
+++ b/src/Text/Pandoc/App/OutputSettings.hs
@@ -37,10 +37,10 @@ import System.FilePath
import System.IO (stdout)
import Text.Pandoc.Chunks (PathTemplate(..))
import Text.Pandoc
-import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths)
import Text.Pandoc.App.Opt (Opt (..))
import Text.Pandoc.App.CommandLineOptions (engines)
-import qualified Text.Pandoc.Format as Format
+import Text.Pandoc.Format (FlavoredFormat (..), applyExtensionsDiff,
+ parseFlavoredFormat, formatFromFilePaths)
import Text.Pandoc.Highlighting (lookupHighlightingStyle)
import Text.Pandoc.Scripting (ScriptingEngine (engineLoadCustom),
CustomComponents(..))
@@ -72,24 +72,26 @@ optToOutputSettings scriptingEngine opts = do
let pdfOutput = map toLower (takeExtension outputFile) == ".pdf" ||
optTo opts == Just "pdf"
- (writerName, maybePdfProg) <-
+ let defaultOutput = "html"
+ defaultOutputFlavor <- parseFlavoredFormat defaultOutput
+ (flvrd@(FlavoredFormat format _extsDiff), maybePdfProg) <-
if pdfOutput
- then liftIO $ pdfWriterAndProg
- (case optTo opts of
- Just "pdf" -> Nothing
- x -> x)
- (optPdfEngine opts)
+ then do
+ outflavor <- case optTo opts of
+ Just x | x /= "pdf" -> Just <$> parseFlavoredFormat x
+ _ -> pure Nothing
+ liftIO $ pdfWriterAndProg outflavor (optPdfEngine opts)
else case optTo opts of
- Just f -> return (f, Nothing)
+ Just f -> (, Nothing) <$> parseFlavoredFormat f
Nothing
- | outputFile == "-" -> return ("html", Nothing)
- | otherwise ->
- case formatFromFilePaths [outputFile] of
- Nothing -> do
- report $ CouldNotDeduceFormat
- [T.pack $ takeExtension outputFile] "html"
- return ("html", Nothing)
- Just f -> return (f, Nothing)
+ | outputFile == "-" ->
+ return (defaultOutputFlavor, Nothing)
+ | otherwise -> case formatFromFilePaths [outputFile] of
+ Nothing -> do
+ report $ CouldNotDeduceFormat
+ [T.pack $ takeExtension outputFile] defaultOutput
+ return (defaultOutputFlavor,Nothing)
+ Just f -> return (f, Nothing)
let makeSandboxed pureWriter =
let files = maybe id (:) (optReferenceDoc opts) .
@@ -104,9 +106,6 @@ optToOutputSettings scriptingEngine opts = do
ByteStringWriter w ->
ByteStringWriter $ \o d -> sandbox files (w o d)
- flvrd@(Format.FlavoredFormat format _extsDiff) <-
- Format.parseFlavoredFormat writerName
-
let standalone = optStandalone opts || isBinaryFormat format || pdfOutput
let templateOrThrow = \case
Left e -> throwError $ PandocTemplateError (T.pack e)
@@ -134,7 +133,7 @@ optToOutputSettings scriptingEngine opts = do
format <> " does not contain a custom writer"
Just w -> return w
let extsConf = fromMaybe mempty $ customExtensions components
- wexts <- Format.applyExtensionsDiff extsConf flvrd
+ wexts <- applyExtensionsDiff extsConf flvrd
templ <- processCustomTemplate $
case customTemplate components of
Nothing -> throwError $ PandocNoTemplateError format
@@ -272,18 +271,16 @@ setVariableM key val (Context ctx) = return $ Context $ M.alter go key ctx
where go Nothing = Just $ toVal val
go (Just x) = Just x
-baseWriterName :: T.Text -> T.Text
-baseWriterName = T.takeWhile (\c -> c /= '+' && c /= '-')
-
-pdfWriterAndProg :: Maybe T.Text -- ^ user-specified writer name
+pdfWriterAndProg :: Maybe FlavoredFormat -- ^ user-specified format
-> Maybe String -- ^ user-specified pdf-engine
- -> IO (T.Text, Maybe String) -- ^ IO (writerName, maybePdfEngineProg)
+ -> IO (FlavoredFormat, Maybe String) -- ^ format, pdf-engine
pdfWriterAndProg mWriter mEngine =
case go mWriter mEngine of
Right (writ, prog) -> return (writ, Just prog)
Left err -> liftIO $ E.throwIO $ PandocAppError err
where
- go Nothing Nothing = Right ("latex", "pdflatex")
+ go Nothing Nothing = Right
+ (FlavoredFormat "latex" mempty, "pdflatex")
go (Just writer) Nothing = (writer,) <$> engineForWriter writer
go Nothing (Just engine) = (,engine) <$> writerForEngine (takeBaseName engine)
go (Just writer) (Just engine) | isCustomWriter writer =
@@ -291,23 +288,25 @@ pdfWriterAndProg mWriter mEngine =
-- what they are doing.
Right (writer, engine)
go (Just writer) (Just engine) =
- case find (== (baseWriterName writer, takeBaseName engine)) engines of
+ case find (== (formatName writer, takeBaseName engine)) engines of
Just _ -> Right (writer, engine)
Nothing -> Left $ "pdf-engine " <> T.pack engine <>
- " is not compatible with output format " <> writer
+ " is not compatible with output format " <>
+ formatName writer
writerForEngine eng = case [f | (f,e) <- engines, e == eng] of
- fmt : _ -> Right fmt
+ fmt : _ -> Right (FlavoredFormat fmt mempty)
[] -> Left $
"pdf-engine " <> T.pack eng <> " not known"
- engineForWriter "pdf" = Left "pdf writer"
- engineForWriter w = case [e | (f,e) <- engines, f == baseWriterName w] of
+ engineForWriter (FlavoredFormat "pdf" _) = Left "pdf writer"
+ engineForWriter w = case [e | (f,e) <- engines, f == formatName w] of
eng : _ -> Right eng
[] -> Left $
- "cannot produce pdf output from " <> w
+ "cannot produce pdf output from " <>
+ formatName w
- isCustomWriter w = ".lua" `T.isSuffixOf` w
+ isCustomWriter w = ".lua" `T.isSuffixOf` formatName w
isBinaryFormat :: T.Text -> Bool
isBinaryFormat s =
diff --git a/src/Text/Pandoc/Format.hs b/src/Text/Pandoc/Format.hs
index 0cc640362..f79742d3f 100644
--- a/src/Text/Pandoc/Format.hs
+++ b/src/Text/Pandoc/Format.hs
@@ -16,17 +16,22 @@ module Text.Pandoc.Format
, parseFlavoredFormat
, applyExtensionsDiff
, getExtensionsConfig
+ , formatFromFilePaths
) where
import Control.Monad.Except (throwError)
+import Data.Char (toLower)
+import Data.Foldable (asum)
import Data.List (foldl')
-import System.FilePath (splitExtension)
+import System.FilePath (splitExtension, takeExtension)
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Error (PandocError (..))
import Text.Pandoc.Extensions
- ( Extensions
+ ( Extension (Ext_literate_haskell)
+ , Extensions
, disableExtensions
, enableExtension
+ , extensionsFromList
, extensionsToList
, getAllExtensions
, getDefaultExtensions
@@ -148,3 +153,80 @@ pExtensionsDiff = foldl' (flip ($)) mempty <$> many extMod
extsToEnable extsDiff}
_ -> extsDiff{extsToDisable = enableExtension ext $
extsToDisable extsDiff}
+
+-- | Determines default format based on file extensions; uses the format
+-- of the first extension that's associated with a format.
+--
+-- Examples:
+--
+-- > formatFromFilePaths ["text.unknown", "no-extension"]
+-- Nothing
+--
+-- > formatFromFilePaths ["my.md", "other.rst"]
+-- Just "markdown"
+formatFromFilePaths :: [FilePath] -> (Maybe FlavoredFormat)
+formatFromFilePaths = asum . map formatFromFilePath
+
+-- | Determines format based on file extension.
+formatFromFilePath :: FilePath -> Maybe FlavoredFormat
+formatFromFilePath x =
+ case takeExtension (map toLower x) of
+ ".Rmd" -> defFlavor "markdown"
+ ".adoc" -> defFlavor "asciidoc"
+ ".asciidoc" -> defFlavor "asciidoc"
+ ".bib" -> defFlavor "biblatex"
+ ".context" -> defFlavor "context"
+ ".csv" -> defFlavor "csv"
+ ".ctx" -> defFlavor "context"
+ ".db" -> defFlavor "docbook"
+ ".doc" -> defFlavor "doc" -- so we get an "unknown reader" error
+ ".docx" -> defFlavor "docx"
+ ".dokuwiki" -> defFlavor "dokuwiki"
+ ".epub" -> defFlavor "epub"
+ ".fb2" -> defFlavor "fb2"
+ ".htm" -> defFlavor "html"
+ ".html" -> defFlavor "html"
+ ".icml" -> defFlavor "icml"
+ ".ipynb" -> defFlavor "ipynb"
+ ".json" -> defFlavor "json"
+ ".latex" -> defFlavor "latex"
+ ".lhs" -> defFlavor "markdown" `withExtension` Ext_literate_haskell
+ ".ltx" -> defFlavor "latex"
+ ".markdown" -> defFlavor "markdown"
+ ".markua" -> defFlavor "markua"
+ ".md" -> defFlavor "markdown"
+ ".mdown" -> defFlavor "markdown"
+ ".mdwn" -> defFlavor "markdown"
+ ".mkd" -> defFlavor "markdown"
+ ".mkdn" -> defFlavor "markdown"
+ ".ms" -> defFlavor "ms"
+ ".muse" -> defFlavor "muse"
+ ".native" -> defFlavor "native"
+ ".odt" -> defFlavor "odt"
+ ".opml" -> defFlavor "opml"
+ ".org" -> defFlavor "org"
+ ".pdf" -> defFlavor "pdf" -- so we get an "unknown reader" error
+ ".pptx" -> defFlavor "pptx"
+ ".ris" -> defFlavor "ris"
+ ".roff" -> defFlavor "ms"
+ ".rst" -> defFlavor "rst"
+ ".rtf" -> defFlavor "rtf"
+ ".s5" -> defFlavor "s5"
+ ".t2t" -> defFlavor "t2t"
+ ".tei" -> defFlavor "tei"
+ ".tex" -> defFlavor "latex"
+ ".texi" -> defFlavor "texinfo"
+ ".texinfo" -> defFlavor "texinfo"
+ ".text" -> defFlavor "markdown"
+ ".textile" -> defFlavor "textile"
+ ".tsv" -> defFlavor "tsv"
+ ".txt" -> defFlavor "markdown"
+ ".wiki" -> defFlavor "mediawiki"
+ ".xhtml" -> defFlavor "html"
+ ['.',y] | y `elem` ['1'..'9'] -> defFlavor "man"
+ _ -> Nothing
+ where
+ defFlavor f = Just (FlavoredFormat f mempty)
+ withExtension Nothing _ = Nothing
+ withExtension (Just (FlavoredFormat f ed)) ext = Just $
+ FlavoredFormat f (ed <> ExtensionsDiff (extensionsFromList [ext]) mempty)