diff options
| author | John MacFarlane <[email protected]> | 2022-11-09 17:36:23 -0800 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2022-12-20 20:58:44 -0800 |
| commit | 063480accd3421bfc77478b8dbddc1985553ed75 (patch) | |
| tree | 4ef6d846f17085cbc2f7410123d7698c4196b069 /pandoc-lua-engine | |
| parent | 8553459f61eb2290008fafedcfbb1df0bb1fe1b4 (diff) | |
T.P.Scripting: Refactor the scripting engine.
The new type CustomComponents is exported from T.P.Scripting, and the
ScriptEngine fields are changed. Instead of separate fields for custom
readers and writers, we now have a single function that loads any number
of "components" from a script: these may be custom readers, custom
writers, templates for writers, or extension configs. (Note: it's
possible to have a custom reader and a custom writer for a format
together in the same file.)
Pandoc now checks the folder `custom` in the user's data directory for a
matching script if it can't find one in the local directory. Previously,
the `readers` and `writers` data directories were search for custom
readers and writers, respectively. Scripts in those directories must be
moved to the `custom` folder.
Custom readers used to implement a fallback behavior that allowed to
consume just a string value as input to the `Reader` function. This has
been removed, the first argument is now always a list of sources. Use
`tostring` on that argument to get a string.
Closes #8417.
Signed-off-by: Albert Krewinkel <[email protected]>
Diffstat (limited to 'pandoc-lua-engine')
| -rw-r--r-- | pandoc-lua-engine/pandoc-lua-engine.cabal | 5 | ||||
| -rw-r--r-- | pandoc-lua-engine/src/Text/Pandoc/Lua.hs | 9 | ||||
| -rw-r--r-- | pandoc-lua-engine/src/Text/Pandoc/Lua/Custom.hs | 163 | ||||
| -rw-r--r-- | pandoc-lua-engine/src/Text/Pandoc/Lua/Reader.hs | 115 | ||||
| -rw-r--r-- | pandoc-lua-engine/src/Text/Pandoc/Lua/Writer.hs | 106 | ||||
| -rw-r--r-- | pandoc-lua-engine/test/Tests/Lua/Reader.hs | 12 | ||||
| -rw-r--r-- | pandoc-lua-engine/test/Tests/Lua/Writer.hs | 16 |
7 files changed, 187 insertions, 239 deletions
diff --git a/pandoc-lua-engine/pandoc-lua-engine.cabal b/pandoc-lua-engine/pandoc-lua-engine.cabal index e17f0ad08..4aae2a531 100644 --- a/pandoc-lua-engine/pandoc-lua-engine.cabal +++ b/pandoc-lua-engine/pandoc-lua-engine.cabal @@ -64,7 +64,8 @@ library import: common-options hs-source-dirs: src exposed-modules: Text.Pandoc.Lua - other-modules: Text.Pandoc.Lua.Filter + other-modules: Text.Pandoc.Lua.Custom + , Text.Pandoc.Lua.Filter , Text.Pandoc.Lua.Global , Text.Pandoc.Lua.Init , Text.Pandoc.Lua.Marshal.CommonState @@ -87,8 +88,6 @@ library , Text.Pandoc.Lua.Module.Utils , Text.Pandoc.Lua.Orphans , Text.Pandoc.Lua.PandocLua - , Text.Pandoc.Lua.Reader - , Text.Pandoc.Lua.Writer , Text.Pandoc.Lua.Writer.Classic , Text.Pandoc.Lua.Writer.Scaffolding diff --git a/pandoc-lua-engine/src/Text/Pandoc/Lua.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua.hs index 8ff9a7c64..a42107082 100644 --- a/pandoc-lua-engine/src/Text/Pandoc/Lua.hs +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua.hs @@ -13,8 +13,7 @@ Running pandoc Lua filters. module Text.Pandoc.Lua ( -- * High-level functions applyFilter - , readCustom - , writeCustom + , loadCustom -- * Low-level functions , Global(..) , setGlobals @@ -30,8 +29,7 @@ import Text.Pandoc.Error (PandocError) import Text.Pandoc.Lua.Filter (applyFilter) import Text.Pandoc.Lua.Global (Global (..), setGlobals) import Text.Pandoc.Lua.Init (runLua, runLuaNoEnv) -import Text.Pandoc.Lua.Reader (readCustom) -import Text.Pandoc.Lua.Writer (writeCustom) +import Text.Pandoc.Lua.Custom (loadCustom) import Text.Pandoc.Lua.Orphans () import Text.Pandoc.Scripting (ScriptingEngine (..)) import qualified Text.Pandoc.UTF8 as UTF8 @@ -46,6 +44,5 @@ getEngine = do pure $ ScriptingEngine { engineName = maybe "Lua (unknown version)" UTF8.toText versionName , engineApplyFilter = applyFilter - , engineReadCustom = readCustom - , engineWriteCustom = writeCustom + , engineLoadCustom = loadCustom } diff --git a/pandoc-lua-engine/src/Text/Pandoc/Lua/Custom.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Custom.hs new file mode 100644 index 000000000..14eb3b93b --- /dev/null +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua/Custom.hs @@ -0,0 +1,163 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TupleSections #-} +{- | + Module : Text.Pandoc.Lua.Custom + Copyright : © 2021-2022 Albert Krewinkel, John MacFarlane + License : GPL-2.0-or-later + Maintainer : Albert Krewinkel <[email protected]> + +Supports custom parsers written in Lua which produce a Pandoc AST. +-} +module Text.Pandoc.Lua.Custom ( loadCustom ) where +import Control.Exception +import Control.Monad ((<=<), (<$!>)) +import Control.Monad.IO.Class (MonadIO) +import Data.Maybe (fromMaybe) +import HsLua as Lua hiding (Operation (Div)) +import HsLua.Core.Run (GCManagedState, newGCManagedState, withGCManagedState) +import Text.Pandoc.Class (PandocMonad, findFileWithDataFallback) +import Text.Pandoc.Error (PandocError) +import Text.Pandoc.Lua.Global (Global (..), setGlobals) +import Text.Pandoc.Lua.Init (runLuaWith) +import Text.Pandoc.Lua.Marshal.Format (peekExtensionsConfig) +import Text.Pandoc.Lua.Marshal.Pandoc (peekPandoc) +import Text.Pandoc.Lua.Marshal.WriterOptions (pushWriterOptions) +import Text.Pandoc.Readers (Reader (..)) +import Text.Pandoc.Sources (ToSources(..)) +import Text.Pandoc.Scripting (CustomComponents (..)) +import Text.Pandoc.Writers (Writer (..)) +import qualified Text.Pandoc.Lua.Writer.Classic as Classic + +-- | Convert custom markup to Pandoc. +loadCustom :: (PandocMonad m, MonadIO m) + => FilePath -> m (CustomComponents m) +loadCustom luaFile = do + luaState <- liftIO newGCManagedState + luaFile' <- fromMaybe luaFile <$> + findFileWithDataFallback "custom" luaFile + either throw pure <=< runLuaWith luaState $ do + let globals = [ PANDOC_SCRIPT_FILE luaFile ] + setGlobals globals + dofileTrace luaFile' >>= \case + OK -> pure () + _ -> throwErrorAsException + + mextsConf <- rawgetglobal "Extensions" >>= \case + TypeNil -> pure Nothing + TypeFunction -> Just <$!> do + callTrace 0 1 + forcePeek $ peekExtensionsConfig top `lastly` pop 1 + _ -> Just <$!> do + forcePeek $ peekExtensionsConfig top `lastly` pop 1 + + mtemplate <- rawgetglobal "Template" >>= \case + TypeNil -> pure Nothing + TypeFunction -> Just <$!> do + callTrace 0 1 + forcePeek $ peekText top `lastly` pop 1 + _ -> Just <$!> do + forcePeek $ peekText top `lastly` pop 1 + + mreader <- rawgetglobal "Reader" >>= \case + TypeNil -> do + pop 1 + rawgetglobal "ByteStringReader" >>= \case + TypeNil -> pure Nothing + _ -> do + setfield registryindex readerField + pure . Just $ byteStringReader luaState + _ -> do + setfield registryindex readerField + pure . Just $ textReader luaState + + mwriter <- rawgetglobal "Writer" >>= \case + TypeNil -> rawgetglobal "ByteStringWriter" >>= \case + TypeNil -> do + -- Neither `Writer` nor `BinaryWriter` are defined. Check for + -- "Doc"; if present, use the file as a classic writer. + docType <- rawgetglobal "Doc" + pop 3 -- remove nils/value of "Writer", "ByteStringWriter", "Doc" + pure $ + if docType /= TypeFunction + then Nothing + else Just . TextWriter $ \opts doc -> + liftIO $ withGCManagedState luaState $ + Classic.runCustom @PandocError opts doc + _ -> Just <$!> do + -- Binary writer. Writer function is on top of the stack. + setfield registryindex writerField + pure $ ByteStringWriter $ \opts doc -> + -- Call writer with document and writer options as arguments. + liftIO $ withGCManagedState luaState $ do + getfield registryindex writerField + push doc + pushWriterOptions opts + callTrace 2 1 + forcePeek @PandocError $ peekLazyByteString top + _ -> Just <$!> do + -- New-type text writer. Writer function is on top of the stack. + setfield registryindex writerField + pure $ TextWriter $ \opts doc -> + liftIO $ withGCManagedState luaState $ do + getfield registryindex writerField + push doc + pushWriterOptions opts + callTrace 2 1 + forcePeek @PandocError $ peekText top + + pure $ CustomComponents + { customReader = mreader + , customWriter = mwriter + , customTemplate = mtemplate + , customExtensions = mextsConf + } + +-- | "Raw", non-metatable lookup of a key in the global table. +-- +-- Most classic writers contain code that throws an error if a global +-- is not present. This would break our check for the existence of a +-- "Writer" function. We resort to raw access for that reason, but +-- could also catch the error instead. +-- +-- TODO: This function ensures the proper behavior of legacy custom +-- writers. It should be replaced with 'getglobal' in the future. +rawgetglobal :: LuaError e => Name -> LuaE e Lua.Type +rawgetglobal x = do + pushglobaltable + pushName x + rawget (nth 2) <* remove (nth 2) -- remove global table + +-- | Name under which the reader function is stored in the registry. +readerField :: Name +readerField = "Pandoc Reader function" + +-- | Name under which the writer function is stored in the registry. +writerField :: Name +writerField = "Pandoc Writer function" + +-- | Runs a Lua action in a continueable environment. +inLua :: MonadIO m => GCManagedState -> LuaE PandocError a -> m a +inLua st = liftIO . withGCManagedState @PandocError st + +-- | Returns the ByteStringReader function +byteStringReader :: MonadIO m => GCManagedState -> Reader m +byteStringReader st = ByteStringReader $ \ropts input -> inLua st $ do + getfield registryindex readerField + push input + push ropts + pcallTrace 2 1 >>= \case + OK -> forcePeek $ peekPandoc top + _ -> throwErrorAsException + +-- | Returns the TextReader function +textReader :: MonadIO m => GCManagedState -> Reader m +textReader st = TextReader $ \ropts srcs -> inLua st $ do + let input = toSources srcs + getfield registryindex readerField + push input + push ropts + pcallTrace 2 1 >>= \case + OK -> forcePeek $ peekPandoc top + _ -> throwErrorAsException diff --git a/pandoc-lua-engine/src/Text/Pandoc/Lua/Reader.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Reader.hs deleted file mode 100644 index 8e411aeb2..000000000 --- a/pandoc-lua-engine/src/Text/Pandoc/Lua/Reader.hs +++ /dev/null @@ -1,115 +0,0 @@ -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE TypeApplications #-} -{-# LANGUAGE TupleSections #-} -{- | - Module : Text.Pandoc.Lua.Reader - Copyright : Copyright (C) 2021-2022 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <[email protected]> - Stability : alpha - Portability : portable - -Supports custom parsers written in Lua which produce a Pandoc AST. --} -module Text.Pandoc.Lua.Reader ( readCustom ) where -import Control.Exception -import Control.Monad ((<=<), (<$!>), when) -import Control.Monad.IO.Class (MonadIO) -import Data.Maybe (fromMaybe) -import HsLua as Lua hiding (Operation (Div)) -import HsLua.Core.Run (GCManagedState, newGCManagedState, withGCManagedState) -import Text.Pandoc.Class (PandocMonad, findFileWithDataFallback, report) -import Text.Pandoc.Error (PandocError) -import Text.Pandoc.Format (ExtensionsConfig (..)) -import Text.Pandoc.Logging -import Text.Pandoc.Lua.Global (Global (..), setGlobals) -import Text.Pandoc.Lua.Init (runLuaWith) -import Text.Pandoc.Lua.PandocLua -import Text.Pandoc.Lua.Marshal.Format (peekExtensionsConfig) -import Text.Pandoc.Lua.Marshal.Pandoc (peekPandoc) -import Text.Pandoc.Readers (Reader (..)) -import Text.Pandoc.Sources (ToSources(..), sourcesToText) -import qualified Data.Text as T - --- | Convert custom markup to Pandoc. -readCustom :: (PandocMonad m, MonadIO m) - => FilePath -> m (Reader m, ExtensionsConfig) -readCustom luaFile = do - luaState <- liftIO newGCManagedState - luaFile' <- fromMaybe luaFile <$> findFileWithDataFallback "readers" luaFile - either throw pure <=< runLuaWith luaState $ do - let globals = [ PANDOC_SCRIPT_FILE luaFile ] - setGlobals globals - stat <- dofileTrace luaFile' - -- check for error in lua script (later we'll change the return type - -- to handle this more gracefully): - when (stat /= Lua.OK) - Lua.throwErrorAsException - - extsConf <- getglobal "Extensions" >>= \case - TypeNil -> pure $ ExtensionsConfig mempty mempty - _ -> forcePeek $ peekExtensionsConfig top `lastly` pop 1 - - (,extsConf) <$!> getCustomReader luaState - - where - readerField = "PANDOC Reader function" - inLua st = liftIO . withGCManagedState @PandocError st - byteStringReader :: MonadIO m => GCManagedState -> Reader m - byteStringReader st = ByteStringReader $ \ropts input -> inLua st $ do - getfield registryindex readerField - push input - push ropts - callTrace 2 1 - forcePeek $ peekPandoc top - textReader st = TextReader $ \ropts srcs -> inLua st $ do - let input = toSources srcs - getfield registryindex readerField - push input - push ropts - pcallTrace 2 1 >>= \case - OK -> forcePeek $ peekPandoc top - ErrRun -> do - -- Caught a runtime error. Check if parsing might work if we - -- pass a string instead of a Sources list, then retry. - runPeek (peekText top) >>= \case - Failure {} -> - -- not a string error object. Bail! - throwErrorAsException - Success errmsg -> - if "string expected, got pandoc Sources" `T.isInfixOf` errmsg - then do - pop 1 - _ <- unPandocLua $ do - report $ Deprecated "old Reader function signature" $ - T.unlines - [ "Reader functions should accept a sources list; " - , "functions expecting `string` input are deprecated. " - , "Use `tostring` to convert the first argument to a " - , "string." - ] - getglobal "Reader" - push $ sourcesToText input -- push sources as string - push ropts - callTrace 2 1 - forcePeek $ peekPandoc top - else - -- nothing we can do here - throwErrorAsException - _ -> -- not a runtime error, we won't be able to recover from that - throwErrorAsException - getCustomReader st = do - getglobal "Reader" >>= \case - TypeNil -> do - pop 1 - getglobal "ByteStringReader" >>= \case - TypeNil -> failLua $ "No reader function found: either 'Reader' or " - <> "'ByteStringReader' must be defined." - _ -> do - setfield registryindex readerField - pure (byteStringReader st) - _ -> do - setfield registryindex readerField - pure (textReader st) diff --git a/pandoc-lua-engine/src/Text/Pandoc/Lua/Writer.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Writer.hs deleted file mode 100644 index 91573c87b..000000000 --- a/pandoc-lua-engine/src/Text/Pandoc/Lua/Writer.hs +++ /dev/null @@ -1,106 +0,0 @@ -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE TypeApplications #-} -{- | - Module : Text.Pandoc.Lua.Writer - Copyright : Copyright (C) 2012-2022 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <[email protected]> - Stability : alpha - Portability : portable - -Conversion of Pandoc documents using a custom Lua writer. --} -module Text.Pandoc.Lua.Writer - ( writeCustom - ) where - -import Control.Exception -import Control.Monad ((<=<)) -import Data.Default (def) -import Data.Maybe (fromMaybe) -import Data.Text (Text) -import HsLua -import HsLua.Core.Run (newGCManagedState, withGCManagedState) -import Control.Monad.IO.Class (MonadIO) -import Text.Pandoc.Class (PandocMonad, findFileWithDataFallback) -import Text.Pandoc.Error (PandocError (..)) -import Text.Pandoc.Format (ExtensionsConfig (..)) -import Text.Pandoc.Lua.Global (Global (..), setGlobals) -import Text.Pandoc.Lua.Init (runLuaWith) -import Text.Pandoc.Lua.Marshal.Format (peekExtensionsConfig) -import Text.Pandoc.Lua.Marshal.WriterOptions (pushWriterOptions) -import Text.Pandoc.Writers (Writer (..)) -import qualified Text.Pandoc.Lua.Writer.Classic as Classic - --- | Convert Pandoc to custom markup. -writeCustom :: (PandocMonad m, MonadIO m) - => FilePath -> m (Writer m, ExtensionsConfig, Maybe Text) -writeCustom luaFile = do - luaState <- liftIO newGCManagedState - luaFile' <- fromMaybe luaFile <$> findFileWithDataFallback "writers" luaFile - either throw pure <=< runLuaWith luaState $ do - setGlobals [ PANDOC_DOCUMENT mempty - , PANDOC_SCRIPT_FILE luaFile' - , PANDOC_WRITER_OPTIONS def - ] - dofileTrace luaFile' >>= \case - OK -> pure () - _ -> throwErrorAsException - -- Most classic writers contain code that throws an error if a global - -- is not present. This would break our check for the existence of a - -- "Writer" function. We resort to raw access for that reason, but - -- could also catch the error instead. - let rawgetglobal x = do - pushglobaltable - pushName x - rawget (nth 2) <* remove (nth 2) -- remove global table - - let writerField = "Pandoc Writer function" - - extsConf <- rawgetglobal "Extensions" >>= \case - TypeNil -> ExtensionsConfig mempty mempty <$ pop 1 - _ -> forcePeek $ peekExtensionsConfig top `lastly` pop 1 - - mtemplate <- rawgetglobal "Template" >>= \case - TypeNil -> pure Nothing - TypeFunction -> Just <$> do - callTrace 0 1 - forcePeek $ peekText top `lastly` pop 1 - _ -> Just <$> do - forcePeek $ peekText top `lastly` pop 1 - - let addProperties = (, extsConf, mtemplate) - - rawgetglobal "Writer" >>= \case - TypeNil -> rawgetglobal "ByteStringWriter" >>= \case - TypeNil -> do - -- Neither `Writer` nor `BinaryWriter` are defined. Try to - -- use the file as a classic writer. - pop 1 -- remove nil - pure $ addProperties . TextWriter $ \opts doc -> - liftIO $ withGCManagedState luaState $ do - Classic.runCustom @PandocError opts doc - _ -> do - -- Binary writer. Writer function is on top of the stack. - setfield registryindex writerField - pure $ addProperties . ByteStringWriter $ \opts doc -> - -- Call writer with document and writer options as arguments. - liftIO $ withGCManagedState luaState $ do - getfield registryindex writerField - push doc - pushWriterOptions opts - callTrace 2 1 - forcePeek @PandocError $ peekLazyByteString top - _ -> do - -- New-type text writer. Writer function is on top of the stack. - setfield registryindex writerField - pure $ addProperties . TextWriter $ \opts doc -> - liftIO $ withGCManagedState luaState $ do - getfield registryindex writerField - push doc - pushWriterOptions opts - callTrace 2 1 - forcePeek @PandocError $ peekText top diff --git a/pandoc-lua-engine/test/Tests/Lua/Reader.hs b/pandoc-lua-engine/test/Tests/Lua/Reader.hs index 15ad685b4..dbf125432 100644 --- a/pandoc-lua-engine/test/Tests/Lua/Reader.hs +++ b/pandoc-lua-engine/test/Tests/Lua/Reader.hs @@ -9,11 +9,13 @@ Tests for custom Lua readers. -} module Tests.Lua.Reader (tests) where +import Control.Arrow ((>>>)) import Data.Char (chr) import Data.Default (Default (def)) import Text.Pandoc.Class (runIOorExplode) -import Text.Pandoc.Lua (readCustom) -import Text.Pandoc.Readers (Reader (ByteStringReader, TextReader)) +import Text.Pandoc.Lua (loadCustom) +import Text.Pandoc.Readers (Reader (ByteStringReader)) +import Text.Pandoc.Scripting (customReader) import Test.Tasty (TestTree) import Test.Tasty.HUnit ((@?=), testCase) @@ -26,9 +28,9 @@ tests = [ testCase "read binary to code block" $ do input <- BL.readFile "bytestring.bin" doc <- runIOorExplode $ - readCustom "bytestring-reader.lua" >>= \case - (ByteStringReader f, _) -> f def input - (TextReader {}, _) -> error "Expected a bytestring reader" + loadCustom "bytestring-reader.lua" >>= (customReader >>> \case + Just (ByteStringReader f) -> f def input + _ -> error "Expected a bytestring reader") let bytes = mconcat $ map (B.str . T.singleton . chr) [0..255] doc @?= B.doc (B.plain bytes) ] diff --git a/pandoc-lua-engine/test/Tests/Lua/Writer.hs b/pandoc-lua-engine/test/Tests/Lua/Writer.hs index 19db66da0..692c6b83d 100644 --- a/pandoc-lua-engine/test/Tests/Lua/Writer.hs +++ b/pandoc-lua-engine/test/Tests/Lua/Writer.hs @@ -16,9 +16,10 @@ import Text.Pandoc.Class (runIOorExplode, readFileStrict) import Text.Pandoc.Extensions (Extension (..), extensionsFromList) import Text.Pandoc.Format (ExtensionsDiff (..), FlavoredFormat (..), applyExtensionsDiff) -import Text.Pandoc.Lua (writeCustom) +import Text.Pandoc.Lua (loadCustom) import Text.Pandoc.Options (WriterOptions (..)) import Text.Pandoc.Readers (readNative) +import Text.Pandoc.Scripting (CustomComponents (..)) import Text.Pandoc.Writers (Writer (ByteStringWriter, TextWriter)) import Test.Tasty (TestTree) import Test.Tasty.Golden (goldenVsString) @@ -35,9 +36,9 @@ tests = (runIOorExplode $ do source <- UTF8.toText <$> readFileStrict "testsuite.native" doc <- readNative def source - txt <- writeCustom "sample.lua" >>= \case - (TextWriter f, _, _) -> f def doc - _ -> error "Expected a text writer" + txt <- customWriter <$> loadCustom "sample.lua" >>= \case + Just (TextWriter f) -> f def doc + _ -> error "Expected a text writer" pure $ BL.fromStrict (UTF8.fromText txt)) , goldenVsString "tables testsuite" @@ -84,3 +85,10 @@ tests = _ -> error "Expected a text writer" result @?= "smart extension is enabled;\ncitations extension is enabled\n" ] + where + writeCustom fp = do + components <- loadCustom fp + let exts = fromMaybe mempty (customExtensions components) + case customWriter components of + Nothing -> error "Expected a writer to be defined" + Just w -> return (w, exts, customTemplate components) |
