aboutsummaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorEvan Silberman <[email protected]>2024-12-19 18:43:24 -0800
committerJohn MacFarlane <[email protected]>2024-12-27 11:10:48 -0800
commitc4716d41c574fc514146e546790e6c69da7f7e71 (patch)
tree812b39a15d284ef82e427aab1223eb26e3a6daca /test
parentaf2b276afeaae3faa4c4a9759d1530e2f1da6871 (diff)
Add Pod reader
Pod ("Plain old documentation") is a markup languaged used principally to document Perl modules and programs. Since it was originally meant to be translated pretty directly to man, the semantics are fairly simple. This Pod reader was developed with reference to the canonical user and implementer documentation of Pod: https://perldoc.perl.org/perlpod and https://perldoc.perl.org/perlpodspec. There are 1490 .pod, .pl, and .pm in the Perl 5.34 distribution found in /System/Library/Perl on my mac. Of those, this reader dies with a parse error on 7 of them. All of them seem to be cases where pod commands are found within a non-colon-prefixed =begin/=end. perlpodspec says I may treat this as an error. [API change] adds readPod
Diffstat (limited to 'test')
-rw-r--r--test/Tests/Old.hs4
-rw-r--r--test/Tests/Readers/Pod.hs175
-rw-r--r--test/pod-reader.native394
-rw-r--r--test/pod-reader.pod155
-rw-r--r--test/test-pandoc.hs2
5 files changed, 730 insertions, 0 deletions
diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs
index bc3a61270..f8cc80625 100644
--- a/test/Tests/Old.hs
+++ b/test/Tests/Old.hs
@@ -246,6 +246,10 @@ tests pandocPath =
[ test' "ansi" ["-f", "markdown", "-t", "ansi"]
"ansi-test.txt" "ansi-test.ansi"
]
+ , testGroup "pod"
+ [ test' "pod" ["-f", "pod", "-t", "native"]
+ "pod-reader.pod" "pod-reader.native"
+ ]
]
where
test' = test pandocPath
diff --git a/test/Tests/Readers/Pod.hs b/test/Tests/Readers/Pod.hs
new file mode 100644
index 000000000..c812e0754
--- /dev/null
+++ b/test/Tests/Readers/Pod.hs
@@ -0,0 +1,175 @@
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+ Module : Tests.Readers.Pod
+ Copyright : © 2024 Evan Silberman
+ License : GNU GPL, version 2 or above
+
+ Maintainer :
+ Stability : alpha
+ Portability : portable
+
+Tests for the Pod reader.
+-}
+
+module Tests.Readers.Pod (tests) where
+
+import Data.Text (Text, pack)
+import Test.Tasty
+import Test.Tasty.HUnit (HasCallStack)
+import Tests.Helpers
+import Text.Pandoc
+import Text.Pandoc.Arbitrary ()
+import Text.Pandoc.Builder
+
+pod :: Text -> Pandoc
+pod t = (purely $ readPod def) ("=pod\n\n" <> t <> "\n\n=cut\n")
+
+manLink :: Text -> Maybe Text -> Inlines -> Inlines
+manLink nm Nothing = linkWith (mempty, mempty, [("manual", nm)]) "" ""
+manLink nm (Just sc) = linkWith (mempty, mempty, [("manual", nm), ("section", sc)]) "" ""
+
+bogusEntity :: String -> TestTree
+bogusEntity t = t =: "E<" <> pack t <> ">" =?> para ("E<" <> str (pack t) <> ">")
+
+infix 4 =:
+(=:) :: (ToString c, HasCallStack)
+ => String -> (Text, c) -> TestTree
+(=:) = test pod
+
+tests :: [TestTree]
+tests = [
+ testGroup "inlines"
+ [ "code with nested inlines" =:
+ "C</I<A> (*PRUNE) I<B>/>" =?>
+ para (code "/A (*PRUNE) B/")
+ , "compact in compact" =:
+ "I<B<strong> emphasis>" =?>
+ para (emph $ (strong "strong") <> " emphasis")
+ , "expanded in compact" =:
+ "I<B<< strong >> emphasis>" =?>
+ para (emph $ (strong "strong") <> " emphasis")
+ , "compact in expanded" =:
+ "I<<< B<strong> emphasis >>>" =?>
+ para (emph $ (strong "strong") <> " emphasis")
+ , "expanded in expanded" =:
+ "I<<< B<<< strong >>> emphasis >>>" =?>
+ para (emph $ (strong "strong") <> " emphasis")
+ ]
+ , testGroup "links"
+ [ testGroup "compact"
+ [ "URL" =:
+ "L<https://example.org>" =?>
+ para (link "https://example.org" "" "https://example.org")
+ , "URL with link text" =:
+ "L<link|https://example.org/index.html>" =?>
+ para (link "https://example.org/index.html" "" "link")
+ , "perl manual" =:
+ "L<Foo::Bar>" =?>
+ para (manLink "Foo::Bar" Nothing "Foo::Bar")
+ , "manual with quoted section" =:
+ "L<crontab(5)/\"DESCRIPTION\">" =?>
+ para (manLink "crontab(5)" (Just "DESCRIPTION") (doubleQuoted "DESCRIPTION" <> " in crontab(5)"))
+ , "manual with section and formatted link text" =:
+ "L<B<< extravagant >> link|HTTP::Simple/is_info>" =?>
+ para (manLink "HTTP::Simple" (Just "is_info") (strong "extravagant" <> " link"))
+ , "internal link" =:
+ "L</section name>" =?>
+ para (link "#section-name" "" (doubleQuoted "section name"))
+ , "internal link with formatting" =:
+ "L</The C<pod2html> command>" =?>
+ para (link "#the-pod2html-command" "" (doubleQuoted ("The " <> code "pod2html" <> " command")))
+ , "link with angle bracket" =:
+ "L<m<>" =?>
+ para (manLink "m<" Nothing "m<")
+ , "empty name" =:
+ "L<|https://example.org>" =?>
+ para (link "https://example.org" "" mempty)
+ ]
+ , testGroup "expanded"
+ [ "URL" =:
+ "L<< https://example.org >>" =?>
+ para (link "https://example.org" "" "https://example.org")
+ , "URL with link text" =:
+ "L<< link|https://example.org/index.html >>" =?>
+ para (link "https://example.org/index.html" "" "link")
+ , "perl manual" =:
+ "L<<< Foo::Bar >>>" =?>
+ para (manLink "Foo::Bar" Nothing "Foo::Bar")
+ , "manual with quoted section" =:
+ "L<< crontab(5)/\"DESCRIPTION\" >>" =?>
+ para (manLink "crontab(5)" (Just "DESCRIPTION") (doubleQuoted "DESCRIPTION" <> " in crontab(5)"))
+ , "manual with section and formatted link text" =:
+ "L<< B<< extravagant >> link|HTTP::Simple/is_info >>" =?>
+ para (manLink "HTTP::Simple" (Just "is_info") (strong "extravagant" <> " link"))
+ , "internal link" =:
+ "L<< /section name >>" =?>
+ para (link "#section-name" "" (doubleQuoted "section name"))
+ , "internal link with formatting" =:
+ "L<<<<< /The C<pod2html> command >>>>>" =?>
+ para (link "#the-pod2html-command" "" (doubleQuoted ("The " <> code "pod2html" <> " command")))
+ , "link with angle bracket" =:
+ "L<< m< >>" =?>
+ para (manLink "m<" Nothing "m<")
+ , "empty name" =:
+ "L<< |https://example.org >>" =?>
+ para (link "https://example.org" "" mempty)
+ ]
+ ]
+ , testGroup "entities"
+ [ testGroup "required"
+ [ "quot" =:
+ "E<quot>" =?>
+ para "\""
+ , "amp" =:
+ "E<amp>" =?>
+ para "&"
+ , "apos" =:
+ "E<apos>" =?>
+ para "'"
+ , "lt" =:
+ "E<lt>" =?>
+ para "<"
+ , "gt" =:
+ "E<gt>" =?>
+ para ">"
+ , "sol" =:
+ "E<sol>" =?>
+ para "/"
+ , "verbar" =:
+ "E<verbar>" =?>
+ para "|"
+ , "lchevron" =:
+ "E<lchevron>" =?>
+ para "«"
+ , "rchevron" =:
+ "E<rchevron>" =?>
+ para "»"
+ ]
+ , testGroup "numeric"
+ [ "decimal" =:
+ "E<162>" =?>
+ para "¢"
+ , "octal" =:
+ "E<0242>" =?>
+ para "¢"
+ , "hexadecimal" =:
+ "E<0xA2>" =?>
+ para "¢"
+ , "hexadecimal variant" =:
+ "E<0x00A2>" =?>
+ para "¢"
+ , "actually decimal" =:
+ "E<099>" =?>
+ para "c"
+ ]
+ , testGroup "bogus"
+ [ bogusEntity "0XA2"
+ , bogusEntity "not a real entity"
+ , bogusEntity "162 1"
+ , bogusEntity "99 bottles of beer"
+ , bogusEntity "0xhh"
+ , bogusEntity "077x"
+ , bogusEntity "0x63 skidoo"
+ ]
+ ]
+ ]
diff --git a/test/pod-reader.native b/test/pod-reader.native
new file mode 100644
index 000000000..07603a80f
--- /dev/null
+++ b/test/pod-reader.native
@@ -0,0 +1,394 @@
+[ Header
+ 1
+ ( "" , [] , [] )
+ [ Str "POD" , Space , Str "TEST" , Space , Str "SUITE" ]
+, Para
+ [ Str "This"
+ , Space
+ , Str "is"
+ , Space
+ , Str "a"
+ , Space
+ , Str "test"
+ , Space
+ , Link
+ ( "" , [] , [] )
+ [ Str "Pod" ]
+ ( "https://perldoc.pl/perlpod" , "" )
+ , Space
+ , Str "document"
+ , Space
+ , Str "for"
+ , Space
+ , Str "pandoc."
+ ]
+, Para
+ [ Str "=head2" , Space , Str "Head" , Space , Str "2" ]
+, Header
+ 3
+ ( "" , [] , [] )
+ [ Str "Head"
+ , Space
+ , Str "3:"
+ , Space
+ , Emph [ Str "The>" , Space , Str "<Return" ]
+ ]
+, Header
+ 4
+ ( "" , [] , [] )
+ [ Str "How"
+ , Space
+ , Str "to"
+ , Space
+ , Str "use"
+ , Space
+ , Str "the"
+ , Space
+ , Link
+ ( "" , [] , [ ( "manual" , "ls(1)" ) ] )
+ [ Str "ls(1)" ]
+ ( "" , "" )
+ , Space
+ , Str "command,"
+ , Space
+ , Str "an"
+ , Space
+ , Str "introduction"
+ ]
+, Header
+ 5
+ ( "" , [] , [] )
+ [ Code ( "" , [] , [] ) "Ricky Jay"
+ , Space
+ , Str "and"
+ , Space
+ , Str "his"
+ , Space
+ , Str "52"
+ , Space
+ , Str "assistants"
+ ]
+, Header
+ 6
+ ( "" , [] , [] )
+ [ Str "The"
+ , Space
+ , Str "=head5"
+ , Space
+ , Str "and"
+ , Space
+ , Str "=head6"
+ , Space
+ , Str "commands"
+ , Space
+ , Str "are"
+ , Space
+ , Str "newer"
+ , Space
+ , Str "and"
+ , Space
+ , Str "my"
+ , Space
+ , Str "syntax"
+ , Space
+ , Str "highlighting"
+ , Space
+ , Str "doesn't"
+ , Space
+ , Str "recognize"
+ , Space
+ , Str "them."
+ , Space
+ , Str "In"
+ , Space
+ , Str "any"
+ , Space
+ , Str "case,"
+ , Space
+ , Str "it"
+ , Space
+ , Str "should"
+ , Space
+ , Str "be"
+ , Space
+ , Str "possible"
+ , Space
+ , Str "to"
+ , Space
+ , Str "have"
+ , Space
+ , Str "a"
+ , Space
+ , Str "very"
+ , Space
+ , Str "long"
+ , Space
+ , Str "paragraph"
+ , Space
+ , Str "in"
+ , Space
+ , Str "the"
+ , Space
+ , Str "heading."
+ ]
+, Header
+ 6
+ ( "" , [] , [] )
+ [ Str "It"
+ , Space
+ , Str "should"
+ , Space
+ , Str "also"
+ , Space
+ , Str "be"
+ , Space
+ , Str "possible"
+ , Space
+ , Str "to"
+ , Space
+ , Str "start"
+ , Space
+ , Str "the"
+ , Space
+ , Str "heading"
+ , Space
+ , Str "paragraph"
+ , Space
+ , Str "on"
+ , Space
+ , Str "the"
+ , Space
+ , Str "next"
+ , Space
+ , Str "line"
+ ]
+, RawBlock
+ (Format "html")
+ "<strong>This is a raw block destined for the HTML format</strong>\n\n"
+, BulletList
+ [ [ Para [ Str "Bulleted" , Space , Str "list" ] ]
+ , [ Para [ Str "Ordered" , Space , Str "list" ]
+ , OrderedList
+ ( 1 , DefaultStyle , DefaultDelim )
+ [ [ Para
+ [ Str "Here's"
+ , Space
+ , Str "a"
+ , Space
+ , Str "verbatim"
+ , Space
+ , Str "paragraph"
+ , Space
+ , Str "in"
+ , Space
+ , Str "this"
+ , Space
+ , Str "list"
+ , Space
+ , Str "item:"
+ ]
+ , CodeBlock
+ ( "" , [] , [] )
+ " this is a code block\nthis is still part of the code block\n so is this.\nIt seems that the prefixed spaces in verbatim blocks in pod don't get stripped.\n\n This should continue the previous code block despite the intervening blank\n line, because the first line starts with a space\n\n\n\n the above blank lines with varying numbers of spaces should also be in\n the code block\n pod formatters should (but not must) expand tabs by default\nso we're not special casing pandoc's behavior there in any way\n"
+ , Para
+ [ Str "Wow,"
+ , Space
+ , Str "that"
+ , Space
+ , Str "was"
+ , Space
+ , Str "fun."
+ ]
+ ]
+ , [ Para [ Str "Definition" , Space , Str "list" ]
+ , DefinitionList
+ [ ( [ Span
+ ( "" , [] , [] )
+ [ Str "Marvin"
+ , Space
+ , Str "the"
+ , Space
+ , Str "Martian"
+ ]
+ ]
+ , [ [ Para
+ [ Str "A"
+ , Space
+ , Str "cartoon"
+ , Space
+ , Str "alien"
+ ]
+ ]
+ ]
+ )
+ , ( [ Span
+ ( "" , [] , [] )
+ [ Emph
+ [ Str "The"
+ , Space
+ , Str "Sun"
+ , Space
+ , Str "Also"
+ , Space
+ , Str "Rises"
+ ]
+ ]
+ ]
+ , [ [ Para
+ [ Str "A"
+ , Space
+ , Str "novel"
+ , Space
+ , Str "by"
+ , Space
+ , Str "Ernest"
+ , Space
+ , Str "Hemingway"
+ ]
+ ]
+ ]
+ )
+ , ( [ Span
+ ( "" , [] , [] )
+ [ Code ( "" , [] , [] ) "undefined" ]
+ ]
+ , [ [] ]
+ )
+ , ( [ Span
+ ( "" , [] , [] )
+ [ Str "And"
+ , Space
+ , Str "now,"
+ , Space
+ , Str "a"
+ , Space
+ , Str "quotation"
+ ]
+ ]
+ , [ [ BlockQuote
+ [ Para
+ [ Str "Where's"
+ , Space
+ , Str "my"
+ , Space
+ , Str "space"
+ , Space
+ , Str "modulator?"
+ ]
+ ]
+ ]
+ ]
+ )
+ ]
+ ]
+ , [ Para
+ [ Str "And"
+ , Space
+ , Str "the"
+ , Space
+ , Str "list"
+ , Space
+ , Str "continues."
+ ]
+ ]
+ ]
+ ]
+ , [ Para
+ [ Str "And"
+ , Space
+ , Str "so"
+ , Space
+ , Str "does"
+ , Space
+ , Str "the"
+ , Space
+ , Str "other"
+ , Space
+ , Str "one,"
+ , Space
+ , Str "even"
+ , Space
+ , Str "if"
+ , Space
+ , Str "I"
+ , Space
+ , Str "forget"
+ , Space
+ , Str "the"
+ , Space
+ , Str "asterisk."
+ ]
+ ]
+ ]
+, Div
+ ( "" , [ "neat" ] , [] )
+ [ Para
+ [ Str "This"
+ , Space
+ , Str "is"
+ , Space
+ , Str "a"
+ , Space
+ , Str "div"
+ , Space
+ , Str "for"
+ , Space
+ , Str "our"
+ , Space
+ , Str "purposes."
+ ]
+ , Para
+ [ Str "It"
+ , Space
+ , Str "should"
+ , Space
+ , Str "parse"
+ , Space
+ , Strong [ Str "content" ]
+ , Space
+ , Str "inside"
+ , Space
+ , Str "of"
+ , Space
+ , Str "it."
+ ]
+ , BulletList
+ [ [ Para [ Str "Like" , Space , Str "this" ] ] ]
+ ]
+, Div
+ ( "" , [ "excitement" ] , [] )
+ [ Para
+ [ Str "this"
+ , Space
+ , Str "is"
+ , Space
+ , Str "its"
+ , Space
+ , Str "own"
+ , Space
+ , Str "div"
+ ]
+ ]
+, RawBlock
+ (Format "html") " <p>and this is its own raw block</p>\n"
+, RawBlock (Format "html") "\n<p>so is this</p>\n"
+, Header
+ 2
+ ( "" , [] , [] )
+ [ Code ( "" , [] , [] ) "=cut"
+ , Space
+ , Str "before"
+ , Space
+ , Str "any"
+ , Space
+ , Code ( "" , [] , [] ) "=item"
+ , Space
+ , Str "in"
+ , Space
+ , Code ( "" , [] , [] ) "=over"
+ ]
+, BulletList
+ [ [ Para [ Str "a" ] , Para [ Str "b" ] ]
+ , [ Para [ Str "c" ] ]
+ ]
+]
diff --git a/test/pod-reader.pod b/test/pod-reader.pod
new file mode 100644
index 000000000..49ddd50a9
--- /dev/null
+++ b/test/pod-reader.pod
@@ -0,0 +1,155 @@
+text before any Pod commands is not parsed, which is unusual for formats
+supported by pandoc, but
+
+=head1 POD TEST SUITE
+
+This is a test L<Pod|https://perldoc.pl/perlpod> document for pandoc.
+
+=encoding utf8
+
+=head2 Head 2
+
+=head3 Head 3: I<< The> <Return >>
+
+=head4 How to use the L<ls(1)> command,
+an introduction
+
+=head5 C<Ricky Jay> and his 52 assistants
+
+=head6 The =head5 and =head6 commands are newer and my syntax highlighting
+doesn't recognize them. In any case, it should be possible to have a very long
+paragraph in the heading.
+
+=head6
+It should also be possible to start the heading paragraph on the next line
+
+=cut
+
+This doesn't get parsed at all.
+
+=begin html
+
+<strong>This is a raw block destined for the HTML format</strong>
+
+=end html
+
+=over
+
+=item *
+
+Bulleted list
+
+=item *
+
+Ordered list
+
+=over
+
+=item 1.
+
+Here's a verbatim paragraph in this list item:
+
+ this is a code block
+this is still part of the code block
+ so is this.
+It seems that the prefixed spaces in verbatim blocks in pod don't get stripped.
+
+ This should continue the previous code block despite the intervening blank
+ line, because the first line starts with a space
+
+
+
+ the above blank lines with varying numbers of spaces should also be in
+ the code block
+ pod formatters should (but not must) expand tabs by default
+so we're not special casing pandoc's behavior there in any way
+
+Wow, that was fun.
+
+=item 2.
+
+Definition list
+
+=over
+
+=item Marvin the MZ<>artian
+
+A cartoon alien
+
+=item I<The Sun
+Also Rises>
+
+A novel by Ernest Hemingway
+
+=item C<undefined>
+
+=item And now, a quotation
+
+=over
+
+Where's my space modulator?
+
+=back
+
+=back
+
+=item 3.
+
+And the list continues.
+
+=back
+
+=item
+
+And so does the other one, even if I forget the asterisk.
+
+=back
+
+=begin :neat
+
+This is a div for our purposes.
+
+It should parse B<< content >> inside of it.
+
+=over
+
+=item
+
+Like this
+
+=back
+
+=end :neat
+
+=for :excitement this is its own div
+
+=for html <p>and this is its own raw block</p>
+
+=for html
+<p>so is this</p>
+
+=head2 C<=cut> before any C<=item> in C<=over>
+
+=over
+
+=cut
+
+blah
+
+=item *
+
+a
+
+=cut
+
+blah blah
+
+=pod
+
+b
+
+=item *
+
+c
+
+=back
diff --git a/test/test-pandoc.hs b/test/test-pandoc.hs
index 6d749b845..d310b932f 100644
--- a/test/test-pandoc.hs
+++ b/test/test-pandoc.hs
@@ -30,6 +30,7 @@ import qualified Tests.Readers.RTF
import qualified Tests.Readers.Txt2Tags
import qualified Tests.Readers.Man
import qualified Tests.Readers.Mdoc
+import qualified Tests.Readers.Pod
import qualified Tests.Shared
import qualified Tests.Writers.AsciiDoc
import qualified Tests.Writers.ConTeXt
@@ -101,6 +102,7 @@ tests pandocPath = testGroup "pandoc tests"
, testGroup "Mdoc" Tests.Readers.Mdoc.tests
, testGroup "FB2" Tests.Readers.FB2.tests
, testGroup "DokuWiki" Tests.Readers.DokuWiki.tests
+ , testGroup "Pod" Tests.Readers.Pod.tests
]
]