diff options
| author | Albert Krewinkel <[email protected]> | 2022-10-10 09:33:20 +0200 |
|---|---|---|
| committer | Albert Krewinkel <[email protected]> | 2022-10-10 11:27:42 +0200 |
| commit | a5ffaaa4136f11378c0c7741309edb9fd53e17d8 (patch) | |
| tree | afa29d609290f50cf752754cfca4a23075dc2781 | |
| parent | d6fb8fb20fbb822d58cdfbde34961094c902a708 (diff) | |
Lua: support custom bytestring readers.
| -rw-r--r-- | doc/custom-readers.md | 17 | ||||
| -rw-r--r-- | pandoc-lua-engine/pandoc-lua-engine.cabal | 2 | ||||
| -rw-r--r-- | pandoc-lua-engine/src/Text/Pandoc/Lua/Reader.hs | 74 | ||||
| -rw-r--r-- | pandoc-lua-engine/test/Tests/Lua/Reader.hs | 34 | ||||
| -rw-r--r-- | pandoc-lua-engine/test/bytestring-reader.lua | 7 | ||||
| -rw-r--r-- | pandoc-lua-engine/test/test-pandoc-lua-engine.hs | 2 |
6 files changed, 111 insertions, 25 deletions
diff --git a/doc/custom-readers.md b/doc/custom-readers.md index add2317a2..601d879dd 100644 --- a/doc/custom-readers.md +++ b/doc/custom-readers.md @@ -76,6 +76,23 @@ ensuring backwards compatibility. [patterns]: http://lua-users.org/wiki/PatternsTutorial [lpeg]: http://www.inf.puc-rio.br/~roberto/lpeg/ +# Bytestring readers + +Pandoc expects text input to be UTF-8 encoded. However, formats +like docx, odt, epub, etc. are not text but binary formats. To +read them, pandoc supports `ByteStringReader` functions. These +functions work just like the `Reader` function that process text +input, but instead of a list of sources, `ByteStringReader` +functions are passed a bytestring, i.e., a string that contains +the binary input. + +``` lua +-- read input as epub +function ByteStringReader (input) + return pandoc.read(input, 'epub') +end +``` + # Example: plain text reader This is a simple example using [lpeg] to parse the input diff --git a/pandoc-lua-engine/pandoc-lua-engine.cabal b/pandoc-lua-engine/pandoc-lua-engine.cabal index b8f3abbd1..a122b935c 100644 --- a/pandoc-lua-engine/pandoc-lua-engine.cabal +++ b/pandoc-lua-engine/pandoc-lua-engine.cabal @@ -21,6 +21,7 @@ description: This package provides a pandoc scripting engine based on extra-source-files: README.md , test/bytestring.bin , test/bytestring.lua + , test/bytestring-reader.lua , test/lua/*.lua , test/lua/module/*.lua , test/lua/module/partial.test @@ -131,4 +132,5 @@ test-suite test-pandoc-lua-engine , text >= 1.1.1 && < 2.1 other-modules: Tests.Lua , Tests.Lua.Module + , Tests.Lua.Reader , Tests.Lua.Writer diff --git a/pandoc-lua-engine/src/Text/Pandoc/Lua/Reader.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Reader.hs index 44781c9fb..6aeda526f 100644 --- a/pandoc-lua-engine/src/Text/Pandoc/Lua/Reader.hs +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua/Reader.hs @@ -1,5 +1,6 @@ {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TypeApplications #-} {- | Module : Text.Pandoc.Lua.Reader Copyright : Copyright (C) 2021-2022 John MacFarlane @@ -17,8 +18,9 @@ import Control.Monad ((<=<), when) import Control.Monad.IO.Class (MonadIO) import Data.Maybe (fromMaybe) import HsLua as Lua hiding (Operation (Div)) -import HsLua.Core.Run (newGCManagedState, withGCManagedState) +import HsLua.Core.Run (GCManagedState, newGCManagedState, withGCManagedState) import Text.Pandoc.Class (PandocMonad, findFileWithDataFallback, report) +import Text.Pandoc.Error (PandocError) import Text.Pandoc.Logging import Text.Pandoc.Lua.Global (Global (..), setGlobals) import Text.Pandoc.Lua.Init (runLuaWith) @@ -42,14 +44,23 @@ readCustom luaFile = do -- to handle this more gracefully): when (stat /= Lua.OK) Lua.throwErrorAsException - pure (reader luaState) + getCustomReader luaState where - reader st = TextReader $ \opts srcs -> liftIO . withGCManagedState st $ do + readerField = "PANDOC Reader function" + inLua st = liftIO . withGCManagedState @PandocError st + byteStringReader :: MonadIO m => GCManagedState -> Reader m + byteStringReader st = ByteStringReader $ \ropts input -> inLua st $ do + getfield registryindex readerField + push input + push ropts + callTrace 2 1 + forcePeek $ peekPandoc top + textReader st = TextReader $ \ropts srcs -> inLua st $ do let input = toSources srcs - getglobal "Reader" + getfield registryindex readerField push input - push opts + push ropts pcallTrace 2 1 >>= \case OK -> forcePeek $ peekPandoc top ErrRun -> do @@ -59,25 +70,38 @@ readCustom luaFile = do Failure {} -> -- not a string error object. Bail! throwErrorAsException - Success errmsg -> do + Success errmsg -> if "string expected, got pandoc Sources" `T.isInfixOf` errmsg - then do - pop 1 - _ <- unPandocLua $ do - report $ Deprecated "old Reader function signature" $ - T.unlines - [ "Reader functions should accept a sources list; " - , "functions expecting `string` input are deprecated. " - , "Use `tostring` to convert the first argument to a " - , "string." - ] - getglobal "Reader" - push $ sourcesToText input -- push sources as string - push opts - callTrace 2 1 - forcePeek $ peekPandoc top - else - -- nothing we can do here - throwErrorAsException + then do + pop 1 + _ <- unPandocLua $ do + report $ Deprecated "old Reader function signature" $ + T.unlines + [ "Reader functions should accept a sources list; " + , "functions expecting `string` input are deprecated. " + , "Use `tostring` to convert the first argument to a " + , "string." + ] + getglobal "Reader" + push $ sourcesToText input -- push sources as string + push ropts + callTrace 2 1 + forcePeek $ peekPandoc top + else + -- nothing we can do here + throwErrorAsException _ -> -- not a runtime error, we won't be able to recover from that - throwErrorAsException + throwErrorAsException + getCustomReader st = do + getglobal "Reader" >>= \case + TypeNil -> do + pop 1 + getglobal "ByteStringReader" >>= \case + TypeNil -> failLua $ "No reader function found: either 'Reader' or " + <> "'ByteStringReader' must be defined." + _ -> do + setfield registryindex readerField + pure (byteStringReader st) + _ -> do + setfield registryindex readerField + pure (textReader st) diff --git a/pandoc-lua-engine/test/Tests/Lua/Reader.hs b/pandoc-lua-engine/test/Tests/Lua/Reader.hs new file mode 100644 index 000000000..16474bd91 --- /dev/null +++ b/pandoc-lua-engine/test/Tests/Lua/Reader.hs @@ -0,0 +1,34 @@ +{-# LANGUAGE LambdaCase #-} +{- | +Module : Tests.Lua.Reader +Copyright : © 2022 Albert Krewinkel +License : GPL-2.0-or-later +Maintainer : Albert Krewinkel <[email protected]> + +Tests for custom Lua readers. +-} +module Tests.Lua.Reader (tests) where + +import Data.Char (chr) +import Data.Default (Default (def)) +import Text.Pandoc.Class (runIOorExplode) +import Text.Pandoc.Lua (readCustom) +import Text.Pandoc.Readers (Reader (ByteStringReader, TextReader)) +import Test.Tasty (TestTree) +import Test.Tasty.HUnit ((@?=), testCase) + +import qualified Data.ByteString.Lazy as BL +import qualified Data.Text as T +import qualified Text.Pandoc.Builder as B + +tests :: [TestTree] +tests = + [ testCase "read binary to code block" $ do + input <- BL.readFile "bytestring.bin" + doc <- runIOorExplode $ + readCustom "bytestring-reader.lua" >>= \case + ByteStringReader f -> f def input + TextReader {} -> error "Expected a bytestring reader" + let bytes = mconcat $ map (B.str . T.singleton . chr) [0..255] + doc @?= B.doc (B.plain bytes) + ] diff --git a/pandoc-lua-engine/test/bytestring-reader.lua b/pandoc-lua-engine/test/bytestring-reader.lua new file mode 100644 index 000000000..7908479ec --- /dev/null +++ b/pandoc-lua-engine/test/bytestring-reader.lua @@ -0,0 +1,7 @@ +function ByteStringReader (input, opts) + local chars = pandoc.List{} + for i = 1, #input do + chars:insert(utf8.char(input:byte(i,i))) + end + return pandoc.Pandoc(pandoc.Plain(pandoc.Str(table.concat(chars)))) +end diff --git a/pandoc-lua-engine/test/test-pandoc-lua-engine.hs b/pandoc-lua-engine/test/test-pandoc-lua-engine.hs index 035d92812..21febddb5 100644 --- a/pandoc-lua-engine/test/test-pandoc-lua-engine.hs +++ b/pandoc-lua-engine/test/test-pandoc-lua-engine.hs @@ -2,6 +2,7 @@ module Main (main) where import Test.Tasty (TestTree, defaultMain, testGroup) import qualified Tests.Lua import qualified Tests.Lua.Module +import qualified Tests.Lua.Reader import qualified Tests.Lua.Writer import System.Directory (withCurrentDirectory) @@ -13,4 +14,5 @@ tests = testGroup "pandoc Lua engine" [ testGroup "Lua filters" Tests.Lua.tests , testGroup "Lua modules" Tests.Lua.Module.tests , testGroup "Custom writers" Tests.Lua.Writer.tests + , testGroup "Custom readers" Tests.Lua.Reader.tests ] |
