diff options
| author | Albert Krewinkel <[email protected]> | 2025-05-13 23:21:28 +0200 |
|---|---|---|
| committer | Albert Krewinkel <[email protected]> | 2025-05-13 23:38:36 +0200 |
| commit | 31f74fac065023e1accc686753dc6216edd4c9c4 (patch) | |
| tree | 779f1800bed4d8869190143e6b0d0aea7ffe595d /pandoc-lua-engine | |
| parent | 21966099896ecc612786f4ba4bb6f08fcfff5253 (diff) | |
Lua: support sandboxed parsing with `pandoc.read`.
The function `pandoc.read` is now taking an optional fourth parameter
that specifies the environment in which the parser will be run. Passing
the string `sandbox` as the argument causes the reader to run in a
sandbox, thereby preventing all access to the network and file system.
Closes: #10831
Diffstat (limited to 'pandoc-lua-engine')
| -rw-r--r-- | pandoc-lua-engine/src/Text/Pandoc/Lua/Module/Pandoc.hs | 54 | ||||
| -rw-r--r-- | pandoc-lua-engine/test/lua/module/include.tex | 1 | ||||
| -rw-r--r-- | pandoc-lua-engine/test/lua/module/pandoc.lua | 34 | ||||
| -rw-r--r-- | pandoc-lua-engine/test/lua/module/sample.epub | bin | 0 -> 5522 bytes |
4 files changed, 74 insertions, 15 deletions
diff --git a/pandoc-lua-engine/src/Text/Pandoc/Lua/Module/Pandoc.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Module/Pandoc.hs index 4ad93dcbf..522871a73 100644 --- a/pandoc-lua-engine/src/Text/Pandoc/Lua/Module/Pandoc.hs +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua/Module/Pandoc.hs @@ -19,6 +19,7 @@ import Prelude hiding (read) import Control.Applicative ((<|>)) import Control.Monad (forM_, when) import Control.Monad.Catch (catch, handle, throwM) +import Control.Monad.Except (MonadError (throwError)) import Data.Data (Data, dataTypeConstrs, dataTypeOf, showConstr) import Data.Default (Default (..)) import Data.Maybe (fromMaybe) @@ -26,9 +27,10 @@ import Data.Proxy (Proxy (Proxy)) import Data.Text.Encoding.Error (UnicodeException) import HsLua import System.Exit (ExitCode (..)) +import Text.Pandoc.Class (PandocMonad, sandbox) import Text.Pandoc.Definition import Text.Pandoc.Error (PandocError (..)) -import Text.Pandoc.Format (parseFlavoredFormat) +import Text.Pandoc.Format (FlavoredFormat, parseFlavoredFormat) import Text.Pandoc.Lua.Orphans () import Text.Pandoc.Lua.Marshal.AST import Text.Pandoc.Lua.Marshal.Format (peekFlavoredFormat) @@ -234,22 +236,31 @@ functions = =?> "output string, or error triple" , defun "read" - ### (\content mformatspec mreaderOptions -> - handle (failLua . show @UnicodeException) . unPandocLua $ do - flvrd <- maybe (parseFlavoredFormat "markdown") pure mformatspec + ### (\content mformatspec mreaderOptions mreadEnv -> do let readerOpts = fromMaybe def mreaderOptions - getReader flvrd >>= \case - (TextReader r, es) -> - r readerOpts{readerExtensions = es} - (case content of + readEnv = fromMaybe "global" mreadEnv + + readAction :: PandocMonad m => FlavoredFormat -> m Pandoc + readAction flvrd = getReader flvrd >>= \case + (TextReader r, es) -> + r readerOpts{readerExtensions = es} $ + case content of Left bs -> toSources $ UTF8.toText bs - Right sources -> sources) - (ByteStringReader r, es) -> - case content of - Left bs -> r readerOpts{readerExtensions = es} - (BSL.fromStrict bs) - Right _ -> throwM $ PandocLuaError - "Cannot use bytestring reader with Sources") + Right sources -> sources + (ByteStringReader r, es) -> + case content of + Left bs -> r readerOpts{readerExtensions = es} + (BSL.fromStrict bs) + Right _ -> throwError $ PandocLuaError + "Cannot use bytestring reader with Sources" + + handle (failLua . show @UnicodeException) . unPandocLua $ do + flvrd <- maybe (parseFlavoredFormat "markdown") pure mformatspec + case readEnv of + "global" -> readAction flvrd + "sandbox" -> sandbox [] (readAction flvrd) + x -> throwError $ PandocLuaError + ("unknown read environment: " <> x)) <#> parameter (\idx -> (Left <$> peekByteString idx) <|> (Right <$> peekSources idx)) "string|Sources" "content" "text to parse" @@ -257,6 +268,19 @@ functions = "formatspec" "format and extensions") <#> opt (parameter peekReaderOptions "ReaderOptions" "reader_options" "reader options") + <#> opt (parameter peekText "string" "read_env" $ mconcat + [ "which environment the reader operates in: Possible values" + , "are:" + , "" + , "- 'io' is the default and gives the behavior described above." + , "- 'global' uses the same environment that was used to read" + , " the input files; the parser has full access to the" + , " file-system and the mediabag." + , "- 'sandbox' works like 'global' and give the parser access to" + , " the mediabag, but prohibits file-system access." + , "" + , "Defaults to `'io'`. (string)" + ]) =#> functionResult pushPandoc "Pandoc" "result document" , sha1 diff --git a/pandoc-lua-engine/test/lua/module/include.tex b/pandoc-lua-engine/test/lua/module/include.tex new file mode 100644 index 000000000..201a102cd --- /dev/null +++ b/pandoc-lua-engine/test/lua/module/include.tex @@ -0,0 +1 @@ +included diff --git a/pandoc-lua-engine/test/lua/module/pandoc.lua b/pandoc-lua-engine/test/lua/module/pandoc.lua index 5df547d24..3db58c0f8 100644 --- a/pandoc-lua-engine/test/lua/module/pandoc.lua +++ b/pandoc-lua-engine/test/lua/module/pandoc.lua @@ -293,6 +293,40 @@ return { 'Unknown input format nosuchreader' ) end), + group 'read_env' { + test('images are added to the mediabag', function () + local epub = io.open('lua/module/sample.epub', 'rb'):read('a') + local _ = pandoc.read(epub, 'epub') + assert.are_equal( + #pandoc.mediabag.list(), + 1 + ) + end), + test('images from EPUB are added when using the sandbox', function () + local epub = io.open('lua/module/sample.epub', 'rb'):read('a') + local _ = pandoc.read(epub, 'epub', nil, 'sandbox') + assert.are_equal( + #pandoc.mediabag.list(), + 1 + ) + end), + test('includes work in global env', function () + local tex = '\\include{lua/module/include.tex}' + local doc = pandoc.read(tex, 'latex', nil, 'global') + assert.are_equal( + doc.blocks, + pandoc.Blocks{pandoc.Para 'included'} + ) + end), + test('sandbox disallows access to the filesystem', function () + local tex = '\\include{lua/module/include.tex}' + local doc = pandoc.read(tex, 'latex', nil, 'sandbox') + assert.are_equal( + doc.blocks, + pandoc.Blocks{} + ) + end), + }, group 'extensions' { test('string spec', function () local doc = pandoc.read('"vice versa"', 'markdown-smart') diff --git a/pandoc-lua-engine/test/lua/module/sample.epub b/pandoc-lua-engine/test/lua/module/sample.epub Binary files differnew file mode 100644 index 000000000..fca4a1861 --- /dev/null +++ b/pandoc-lua-engine/test/lua/module/sample.epub |
