aboutsummaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorAnton Antich <[email protected]>2025-11-09 13:48:14 +0100
committerJohn MacFarlane <[email protected]>2025-11-24 22:37:39 +0100
commitec75b693e5618c12ddac872d48e084436f1e1b48 (patch)
tree942edb1022128c5f769b928ee5ba3b0a6ee6b4f7 /test
parent5d3b2916d616902d205146c108629053516fd9f4 (diff)
Support pptx (PowerPoint) as an input format.
New module `Text.Pandoc.Readers.Pptx`, exporting `readPptx`. [API change] Factored out some common OOXML functions from Text.Pandoc.Readers.Docx.Util into a non-exported module Text.Pandoc.Readers.OOXML.Shared.
Diffstat (limited to 'test')
-rw-r--r--test/Tests/Readers/Pptx.hs63
-rw-r--r--test/pptx-reader/basic.native149
-rw-r--r--test/pptx-reader/basic.pptxbin0 -> 111674 bytes
-rw-r--r--test/test-pandoc.hs2
4 files changed, 214 insertions, 0 deletions
diff --git a/test/Tests/Readers/Pptx.hs b/test/Tests/Readers/Pptx.hs
new file mode 100644
index 000000000..613d5b50f
--- /dev/null
+++ b/test/Tests/Readers/Pptx.hs
@@ -0,0 +1,63 @@
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+ Module : Tests.Readers.Pptx
+ Copyright : © 2025 Anton Antic
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Anton Antic <[email protected]>
+ Stability : alpha
+ Portability : portable
+
+Tests for the PPTX reader.
+-}
+module Tests.Readers.Pptx (tests) where
+
+import Data.Algorithm.Diff (getDiff)
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Lazy as B
+import qualified Data.Text as T
+import Test.Tasty
+import Test.Tasty.Golden.Advanced
+import Tests.Helpers
+import Text.Pandoc
+import Text.Pandoc.UTF8 as UTF8
+
+defopts :: ReaderOptions
+defopts = def{ readerExtensions = getDefaultExtensions "pptx" }
+
+testCompare :: String -> FilePath -> FilePath -> TestTree
+testCompare = testCompareWithOpts defopts
+
+nativeDiff :: FilePath -> Pandoc -> Pandoc -> IO (Maybe String)
+nativeDiff normPath expectedNative actualNative
+ | expectedNative == actualNative = return Nothing
+ | otherwise = Just <$> do
+ expected <- T.unpack <$> runIOorExplode (writeNative def expectedNative)
+ actual <- T.unpack <$> runIOorExplode (writeNative def actualNative)
+ let dash = replicate 72 '-'
+ let diff = getDiff (lines actual) (lines expected)
+ return $ '\n' : dash ++
+ "\n--- " ++ normPath ++
+ "\n+++ " ++ "test" ++ "\n" ++
+ showDiff (1,1) diff ++ dash
+
+testCompareWithOpts :: ReaderOptions -> String -> FilePath -> FilePath -> TestTree
+testCompareWithOpts opts testName pptxFP nativeFP =
+ goldenTest
+ testName
+ (do nf <- UTF8.toText <$> BS.readFile nativeFP
+ runIOorExplode (readNative def nf))
+ (do df <- B.readFile pptxFP
+ runIOorExplode (readPptx opts df))
+ (nativeDiff nativeFP)
+ (\a -> runIOorExplode (writeNative def{ writerTemplate = Just mempty} a)
+ >>= BS.writeFile nativeFP . UTF8.fromText)
+
+tests :: [TestTree]
+tests = [ testGroup "basic"
+ [ testCompare
+ "text extraction"
+ "pptx-reader/basic.pptx"
+ "pptx-reader/basic.native"
+ ]
+ ]
diff --git a/test/pptx-reader/basic.native b/test/pptx-reader/basic.native
new file mode 100644
index 000000000..954cb9345
--- /dev/null
+++ b/test/pptx-reader/basic.native
@@ -0,0 +1,149 @@
+[ Header 2 ( "slide-1" , [] , [] ) [ Str "LLMs" ]
+, BulletList
+ [ [ Plain
+ [ Str
+ "Provider \61664 Available LLMs \8211 who manages? How?"
+ ]
+ ]
+ , [ Plain
+ [ Str
+ "EW maintained list of \8220approved\8221 LLMs for Universal workers"
+ ]
+ ]
+ , [ Plain
+ [ Str
+ "Rebuilding of UWs to the \8220Newgen\8221 thing completely"
+ ]
+ ]
+ , [ Plain [ Str "Streaming support" ] ]
+ , [ Plain [ Str "Multimodal (voice streaming) models?" ] ]
+ ]
+, Header
+ 2
+ ( "slide-2" , [] , [] )
+ [ Str "Everworker venn diagram" ]
+, Para [ Str "SKILLS" ]
+, Para [ Str "" ]
+, Para [ Str "Specialized Workers / Workflows:" ]
+, Para [ Str "" ]
+, Para [ Str "n8n, UI Path, " ]
+, Para [ Str "other RPA" ]
+, Para [ Str "BRAINS" ]
+, Para [ Str "" ]
+, Para [ Str "Universal Workers / AI Agents:" ]
+, Para [ Str "" ]
+, Para [ Str "openai , anthropic," ]
+, Para [ Str "Crew AI, other " ]
+, Para [ Str "\8220AI natives\8221" ]
+, Para [ Str "KNOWLEDGE " ]
+, Para [ Str "" ]
+, Para [ Str "Data / " ]
+, Para [ Str "RAG Pipelines" ]
+, Para [ Str "" ]
+, Para
+ [ Str "Vector DBs, specialized data prep vendors, \8230" ]
+, Para [ Str "glean" ]
+, Para [ Str "EW" ]
+, Header 2 ( "slide-3" , [] , [] ) [ Str "Table" ]
+, Table
+ ( "" , [] , [] )
+ (Caption Nothing [])
+ [ ( AlignDefault , ColWidthDefault )
+ , ( AlignDefault , ColWidthDefault )
+ , ( AlignDefault , ColWidthDefault )
+ ]
+ (TableHead
+ ( "" , [] , [] )
+ [ Row
+ ( "" , [] , [] )
+ [ Cell
+ ( "" , [] , [] )
+ AlignDefault
+ (RowSpan 1)
+ (ColSpan 1)
+ [ Plain [ Str "Col1" ] ]
+ , Cell
+ ( "" , [] , [] )
+ AlignDefault
+ (RowSpan 1)
+ (ColSpan 1)
+ [ Plain [ Str "Col2" ] ]
+ , Cell
+ ( "" , [] , [] )
+ AlignDefault
+ (RowSpan 1)
+ (ColSpan 1)
+ [ Plain [ Str "Col3" ] ]
+ ]
+ ])
+ [ TableBody
+ ( "" , [] , [] )
+ (RowHeadColumns 0)
+ []
+ [ Row
+ ( "" , [] , [] )
+ [ Cell
+ ( "" , [] , [] )
+ AlignDefault
+ (RowSpan 1)
+ (ColSpan 1)
+ [ Plain [ Str "Name" ] ]
+ , Cell
+ ( "" , [] , [] )
+ AlignDefault
+ (RowSpan 1)
+ (ColSpan 1)
+ [ Plain [ Str "Anton" ] ]
+ , Cell
+ ( "" , [] , [] )
+ AlignDefault
+ (RowSpan 1)
+ (ColSpan 1)
+ [ Plain [ Str "Antich" ] ]
+ ]
+ , Row
+ ( "" , [] , [] )
+ [ Cell
+ ( "" , [] , [] )
+ AlignDefault
+ (RowSpan 1)
+ (ColSpan 1)
+ [ Plain [ Str "Age" ] ]
+ , Cell
+ ( "" , [] , [] )
+ AlignDefault
+ (RowSpan 1)
+ (ColSpan 1)
+ [ Plain [ Str "23" ] ]
+ , Cell
+ ( "" , [] , [] )
+ AlignDefault
+ (RowSpan 1)
+ (ColSpan 1)
+ [ Plain [ Str "years" ] ]
+ ]
+ ]
+ ]
+ (TableFoot ( "" , [] , [] ) [])
+, Para
+ [ Image
+ ( "" , [] , [] ) [] ( "ppt/media/image1.png" , "Picture 6" )
+ ]
+, Header 2 ( "slide-4" , [] , [] ) [ Str "Smart Art" ]
+, Div
+ ( ""
+ , [ "smartart" , "chevron2" ]
+ , [ ( "layout" , "chevron2" ) ]
+ )
+ [ Para [ Strong [ Str "First" ] ]
+ , BulletList
+ [ [ Plain [ Str "another" ] ]
+ , [ Plain [ Str "subtitle" ] ]
+ ]
+ , Para [ Strong [ Str "Second" ] ]
+ , BulletList
+ [ [ Plain [ Str "and yet again" ] ]
+ , [ Plain [ Str "yet more" ] ]
+ ]
+ ]
+]
diff --git a/test/pptx-reader/basic.pptx b/test/pptx-reader/basic.pptx
new file mode 100644
index 000000000..44caef9c3
--- /dev/null
+++ b/test/pptx-reader/basic.pptx
Binary files differ
diff --git a/test/test-pandoc.hs b/test/test-pandoc.hs
index 80d4ada7f..0d04b361f 100644
--- a/test/test-pandoc.hs
+++ b/test/test-pandoc.hs
@@ -12,6 +12,7 @@ import qualified Tests.Command
import qualified Tests.Old
import qualified Tests.Readers.Creole
import qualified Tests.Readers.Docx
+import qualified Tests.Readers.Pptx
import qualified Tests.Readers.DokuWiki
import qualified Tests.Readers.EPUB
import qualified Tests.Readers.FB2
@@ -95,6 +96,7 @@ tests pandocPath = testGroup "pandoc tests"
, testGroup "RST" Tests.Readers.RST.tests
, testGroup "RTF" Tests.Readers.RTF.tests
, testGroup "Docx" Tests.Readers.Docx.tests
+ , testGroup "Pptx" Tests.Readers.Pptx.tests
, testGroup "ODT" Tests.Readers.ODT.tests
, testGroup "Txt2Tags" Tests.Readers.Txt2Tags.tests
, testGroup "EPUB" Tests.Readers.EPUB.tests