diff options
| author | Evan Silberman <[email protected]> | 2024-12-19 18:43:24 -0800 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2024-12-27 11:10:48 -0800 |
| commit | c4716d41c574fc514146e546790e6c69da7f7e71 (patch) | |
| tree | 812b39a15d284ef82e427aab1223eb26e3a6daca /test | |
| parent | af2b276afeaae3faa4c4a9759d1530e2f1da6871 (diff) | |
Add Pod reader
Pod ("Plain old documentation") is a markup languaged used principally
to document Perl modules and programs. Since it was originally meant to
be translated pretty directly to man, the semantics are fairly simple.
This Pod reader was developed with reference to the canonical user and
implementer documentation of Pod: https://perldoc.perl.org/perlpod and
https://perldoc.perl.org/perlpodspec.
There are 1490 .pod, .pl, and .pm in the Perl 5.34 distribution found in
/System/Library/Perl on my mac. Of those, this reader dies with a parse
error on 7 of them. All of them seem to be cases where pod commands are
found within a non-colon-prefixed =begin/=end. perlpodspec says I may
treat this as an error.
[API change] adds readPod
Diffstat (limited to 'test')
| -rw-r--r-- | test/Tests/Old.hs | 4 | ||||
| -rw-r--r-- | test/Tests/Readers/Pod.hs | 175 | ||||
| -rw-r--r-- | test/pod-reader.native | 394 | ||||
| -rw-r--r-- | test/pod-reader.pod | 155 | ||||
| -rw-r--r-- | test/test-pandoc.hs | 2 |
5 files changed, 730 insertions, 0 deletions
diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs index bc3a61270..f8cc80625 100644 --- a/test/Tests/Old.hs +++ b/test/Tests/Old.hs @@ -246,6 +246,10 @@ tests pandocPath = [ test' "ansi" ["-f", "markdown", "-t", "ansi"] "ansi-test.txt" "ansi-test.ansi" ] + , testGroup "pod" + [ test' "pod" ["-f", "pod", "-t", "native"] + "pod-reader.pod" "pod-reader.native" + ] ] where test' = test pandocPath diff --git a/test/Tests/Readers/Pod.hs b/test/Tests/Readers/Pod.hs new file mode 100644 index 000000000..c812e0754 --- /dev/null +++ b/test/Tests/Readers/Pod.hs @@ -0,0 +1,175 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Tests.Readers.Pod + Copyright : © 2024 Evan Silberman + License : GNU GPL, version 2 or above + + Maintainer : + Stability : alpha + Portability : portable + +Tests for the Pod reader. +-} + +module Tests.Readers.Pod (tests) where + +import Data.Text (Text, pack) +import Test.Tasty +import Test.Tasty.HUnit (HasCallStack) +import Tests.Helpers +import Text.Pandoc +import Text.Pandoc.Arbitrary () +import Text.Pandoc.Builder + +pod :: Text -> Pandoc +pod t = (purely $ readPod def) ("=pod\n\n" <> t <> "\n\n=cut\n") + +manLink :: Text -> Maybe Text -> Inlines -> Inlines +manLink nm Nothing = linkWith (mempty, mempty, [("manual", nm)]) "" "" +manLink nm (Just sc) = linkWith (mempty, mempty, [("manual", nm), ("section", sc)]) "" "" + +bogusEntity :: String -> TestTree +bogusEntity t = t =: "E<" <> pack t <> ">" =?> para ("E<" <> str (pack t) <> ">") + +infix 4 =: +(=:) :: (ToString c, HasCallStack) + => String -> (Text, c) -> TestTree +(=:) = test pod + +tests :: [TestTree] +tests = [ + testGroup "inlines" + [ "code with nested inlines" =: + "C</I<A> (*PRUNE) I<B>/>" =?> + para (code "/A (*PRUNE) B/") + , "compact in compact" =: + "I<B<strong> emphasis>" =?> + para (emph $ (strong "strong") <> " emphasis") + , "expanded in compact" =: + "I<B<< strong >> emphasis>" =?> + para (emph $ (strong "strong") <> " emphasis") + , "compact in expanded" =: + "I<<< B<strong> emphasis >>>" =?> + para (emph $ (strong "strong") <> " emphasis") + , "expanded in expanded" =: + "I<<< B<<< strong >>> emphasis >>>" =?> + para (emph $ (strong "strong") <> " emphasis") + ] + , testGroup "links" + [ testGroup "compact" + [ "URL" =: + "L<https://example.org>" =?> + para (link "https://example.org" "" "https://example.org") + , "URL with link text" =: + "L<link|https://example.org/index.html>" =?> + para (link "https://example.org/index.html" "" "link") + , "perl manual" =: + "L<Foo::Bar>" =?> + para (manLink "Foo::Bar" Nothing "Foo::Bar") + , "manual with quoted section" =: + "L<crontab(5)/\"DESCRIPTION\">" =?> + para (manLink "crontab(5)" (Just "DESCRIPTION") (doubleQuoted "DESCRIPTION" <> " in crontab(5)")) + , "manual with section and formatted link text" =: + "L<B<< extravagant >> link|HTTP::Simple/is_info>" =?> + para (manLink "HTTP::Simple" (Just "is_info") (strong "extravagant" <> " link")) + , "internal link" =: + "L</section name>" =?> + para (link "#section-name" "" (doubleQuoted "section name")) + , "internal link with formatting" =: + "L</The C<pod2html> command>" =?> + para (link "#the-pod2html-command" "" (doubleQuoted ("The " <> code "pod2html" <> " command"))) + , "link with angle bracket" =: + "L<m<>" =?> + para (manLink "m<" Nothing "m<") + , "empty name" =: + "L<|https://example.org>" =?> + para (link "https://example.org" "" mempty) + ] + , testGroup "expanded" + [ "URL" =: + "L<< https://example.org >>" =?> + para (link "https://example.org" "" "https://example.org") + , "URL with link text" =: + "L<< link|https://example.org/index.html >>" =?> + para (link "https://example.org/index.html" "" "link") + , "perl manual" =: + "L<<< Foo::Bar >>>" =?> + para (manLink "Foo::Bar" Nothing "Foo::Bar") + , "manual with quoted section" =: + "L<< crontab(5)/\"DESCRIPTION\" >>" =?> + para (manLink "crontab(5)" (Just "DESCRIPTION") (doubleQuoted "DESCRIPTION" <> " in crontab(5)")) + , "manual with section and formatted link text" =: + "L<< B<< extravagant >> link|HTTP::Simple/is_info >>" =?> + para (manLink "HTTP::Simple" (Just "is_info") (strong "extravagant" <> " link")) + , "internal link" =: + "L<< /section name >>" =?> + para (link "#section-name" "" (doubleQuoted "section name")) + , "internal link with formatting" =: + "L<<<<< /The C<pod2html> command >>>>>" =?> + para (link "#the-pod2html-command" "" (doubleQuoted ("The " <> code "pod2html" <> " command"))) + , "link with angle bracket" =: + "L<< m< >>" =?> + para (manLink "m<" Nothing "m<") + , "empty name" =: + "L<< |https://example.org >>" =?> + para (link "https://example.org" "" mempty) + ] + ] + , testGroup "entities" + [ testGroup "required" + [ "quot" =: + "E<quot>" =?> + para "\"" + , "amp" =: + "E<amp>" =?> + para "&" + , "apos" =: + "E<apos>" =?> + para "'" + , "lt" =: + "E<lt>" =?> + para "<" + , "gt" =: + "E<gt>" =?> + para ">" + , "sol" =: + "E<sol>" =?> + para "/" + , "verbar" =: + "E<verbar>" =?> + para "|" + , "lchevron" =: + "E<lchevron>" =?> + para "«" + , "rchevron" =: + "E<rchevron>" =?> + para "»" + ] + , testGroup "numeric" + [ "decimal" =: + "E<162>" =?> + para "¢" + , "octal" =: + "E<0242>" =?> + para "¢" + , "hexadecimal" =: + "E<0xA2>" =?> + para "¢" + , "hexadecimal variant" =: + "E<0x00A2>" =?> + para "¢" + , "actually decimal" =: + "E<099>" =?> + para "c" + ] + , testGroup "bogus" + [ bogusEntity "0XA2" + , bogusEntity "not a real entity" + , bogusEntity "162 1" + , bogusEntity "99 bottles of beer" + , bogusEntity "0xhh" + , bogusEntity "077x" + , bogusEntity "0x63 skidoo" + ] + ] + ] diff --git a/test/pod-reader.native b/test/pod-reader.native new file mode 100644 index 000000000..07603a80f --- /dev/null +++ b/test/pod-reader.native @@ -0,0 +1,394 @@ +[ Header + 1 + ( "" , [] , [] ) + [ Str "POD" , Space , Str "TEST" , Space , Str "SUITE" ] +, Para + [ Str "This" + , Space + , Str "is" + , Space + , Str "a" + , Space + , Str "test" + , Space + , Link + ( "" , [] , [] ) + [ Str "Pod" ] + ( "https://perldoc.pl/perlpod" , "" ) + , Space + , Str "document" + , Space + , Str "for" + , Space + , Str "pandoc." + ] +, Para + [ Str "=head2" , Space , Str "Head" , Space , Str "2" ] +, Header + 3 + ( "" , [] , [] ) + [ Str "Head" + , Space + , Str "3:" + , Space + , Emph [ Str "The>" , Space , Str "<Return" ] + ] +, Header + 4 + ( "" , [] , [] ) + [ Str "How" + , Space + , Str "to" + , Space + , Str "use" + , Space + , Str "the" + , Space + , Link + ( "" , [] , [ ( "manual" , "ls(1)" ) ] ) + [ Str "ls(1)" ] + ( "" , "" ) + , Space + , Str "command," + , Space + , Str "an" + , Space + , Str "introduction" + ] +, Header + 5 + ( "" , [] , [] ) + [ Code ( "" , [] , [] ) "Ricky Jay" + , Space + , Str "and" + , Space + , Str "his" + , Space + , Str "52" + , Space + , Str "assistants" + ] +, Header + 6 + ( "" , [] , [] ) + [ Str "The" + , Space + , Str "=head5" + , Space + , Str "and" + , Space + , Str "=head6" + , Space + , Str "commands" + , Space + , Str "are" + , Space + , Str "newer" + , Space + , Str "and" + , Space + , Str "my" + , Space + , Str "syntax" + , Space + , Str "highlighting" + , Space + , Str "doesn't" + , Space + , Str "recognize" + , Space + , Str "them." + , Space + , Str "In" + , Space + , Str "any" + , Space + , Str "case," + , Space + , Str "it" + , Space + , Str "should" + , Space + , Str "be" + , Space + , Str "possible" + , Space + , Str "to" + , Space + , Str "have" + , Space + , Str "a" + , Space + , Str "very" + , Space + , Str "long" + , Space + , Str "paragraph" + , Space + , Str "in" + , Space + , Str "the" + , Space + , Str "heading." + ] +, Header + 6 + ( "" , [] , [] ) + [ Str "It" + , Space + , Str "should" + , Space + , Str "also" + , Space + , Str "be" + , Space + , Str "possible" + , Space + , Str "to" + , Space + , Str "start" + , Space + , Str "the" + , Space + , Str "heading" + , Space + , Str "paragraph" + , Space + , Str "on" + , Space + , Str "the" + , Space + , Str "next" + , Space + , Str "line" + ] +, RawBlock + (Format "html") + "<strong>This is a raw block destined for the HTML format</strong>\n\n" +, BulletList + [ [ Para [ Str "Bulleted" , Space , Str "list" ] ] + , [ Para [ Str "Ordered" , Space , Str "list" ] + , OrderedList + ( 1 , DefaultStyle , DefaultDelim ) + [ [ Para + [ Str "Here's" + , Space + , Str "a" + , Space + , Str "verbatim" + , Space + , Str "paragraph" + , Space + , Str "in" + , Space + , Str "this" + , Space + , Str "list" + , Space + , Str "item:" + ] + , CodeBlock + ( "" , [] , [] ) + " this is a code block\nthis is still part of the code block\n so is this.\nIt seems that the prefixed spaces in verbatim blocks in pod don't get stripped.\n\n This should continue the previous code block despite the intervening blank\n line, because the first line starts with a space\n\n\n\n the above blank lines with varying numbers of spaces should also be in\n the code block\n pod formatters should (but not must) expand tabs by default\nso we're not special casing pandoc's behavior there in any way\n" + , Para + [ Str "Wow," + , Space + , Str "that" + , Space + , Str "was" + , Space + , Str "fun." + ] + ] + , [ Para [ Str "Definition" , Space , Str "list" ] + , DefinitionList + [ ( [ Span + ( "" , [] , [] ) + [ Str "Marvin" + , Space + , Str "the" + , Space + , Str "Martian" + ] + ] + , [ [ Para + [ Str "A" + , Space + , Str "cartoon" + , Space + , Str "alien" + ] + ] + ] + ) + , ( [ Span + ( "" , [] , [] ) + [ Emph + [ Str "The" + , Space + , Str "Sun" + , Space + , Str "Also" + , Space + , Str "Rises" + ] + ] + ] + , [ [ Para + [ Str "A" + , Space + , Str "novel" + , Space + , Str "by" + , Space + , Str "Ernest" + , Space + , Str "Hemingway" + ] + ] + ] + ) + , ( [ Span + ( "" , [] , [] ) + [ Code ( "" , [] , [] ) "undefined" ] + ] + , [ [] ] + ) + , ( [ Span + ( "" , [] , [] ) + [ Str "And" + , Space + , Str "now," + , Space + , Str "a" + , Space + , Str "quotation" + ] + ] + , [ [ BlockQuote + [ Para + [ Str "Where's" + , Space + , Str "my" + , Space + , Str "space" + , Space + , Str "modulator?" + ] + ] + ] + ] + ) + ] + ] + , [ Para + [ Str "And" + , Space + , Str "the" + , Space + , Str "list" + , Space + , Str "continues." + ] + ] + ] + ] + , [ Para + [ Str "And" + , Space + , Str "so" + , Space + , Str "does" + , Space + , Str "the" + , Space + , Str "other" + , Space + , Str "one," + , Space + , Str "even" + , Space + , Str "if" + , Space + , Str "I" + , Space + , Str "forget" + , Space + , Str "the" + , Space + , Str "asterisk." + ] + ] + ] +, Div + ( "" , [ "neat" ] , [] ) + [ Para + [ Str "This" + , Space + , Str "is" + , Space + , Str "a" + , Space + , Str "div" + , Space + , Str "for" + , Space + , Str "our" + , Space + , Str "purposes." + ] + , Para + [ Str "It" + , Space + , Str "should" + , Space + , Str "parse" + , Space + , Strong [ Str "content" ] + , Space + , Str "inside" + , Space + , Str "of" + , Space + , Str "it." + ] + , BulletList + [ [ Para [ Str "Like" , Space , Str "this" ] ] ] + ] +, Div + ( "" , [ "excitement" ] , [] ) + [ Para + [ Str "this" + , Space + , Str "is" + , Space + , Str "its" + , Space + , Str "own" + , Space + , Str "div" + ] + ] +, RawBlock + (Format "html") " <p>and this is its own raw block</p>\n" +, RawBlock (Format "html") "\n<p>so is this</p>\n" +, Header + 2 + ( "" , [] , [] ) + [ Code ( "" , [] , [] ) "=cut" + , Space + , Str "before" + , Space + , Str "any" + , Space + , Code ( "" , [] , [] ) "=item" + , Space + , Str "in" + , Space + , Code ( "" , [] , [] ) "=over" + ] +, BulletList + [ [ Para [ Str "a" ] , Para [ Str "b" ] ] + , [ Para [ Str "c" ] ] + ] +] diff --git a/test/pod-reader.pod b/test/pod-reader.pod new file mode 100644 index 000000000..49ddd50a9 --- /dev/null +++ b/test/pod-reader.pod @@ -0,0 +1,155 @@ +text before any Pod commands is not parsed, which is unusual for formats +supported by pandoc, but + +=head1 POD TEST SUITE + +This is a test L<Pod|https://perldoc.pl/perlpod> document for pandoc. + +=encoding utf8 + +=head2 Head 2 + +=head3 Head 3: I<< The> <Return >> + +=head4 How to use the L<ls(1)> command, +an introduction + +=head5 C<Ricky Jay> and his 52 assistants + +=head6 The =head5 and =head6 commands are newer and my syntax highlighting +doesn't recognize them. In any case, it should be possible to have a very long +paragraph in the heading. + +=head6 +It should also be possible to start the heading paragraph on the next line + +=cut + +This doesn't get parsed at all. + +=begin html + +<strong>This is a raw block destined for the HTML format</strong> + +=end html + +=over + +=item * + +Bulleted list + +=item * + +Ordered list + +=over + +=item 1. + +Here's a verbatim paragraph in this list item: + + this is a code block +this is still part of the code block + so is this. +It seems that the prefixed spaces in verbatim blocks in pod don't get stripped. + + This should continue the previous code block despite the intervening blank + line, because the first line starts with a space + + + + the above blank lines with varying numbers of spaces should also be in + the code block + pod formatters should (but not must) expand tabs by default +so we're not special casing pandoc's behavior there in any way + +Wow, that was fun. + +=item 2. + +Definition list + +=over + +=item Marvin the MZ<>artian + +A cartoon alien + +=item I<The Sun +Also Rises> + +A novel by Ernest Hemingway + +=item C<undefined> + +=item And now, a quotation + +=over + +Where's my space modulator? + +=back + +=back + +=item 3. + +And the list continues. + +=back + +=item + +And so does the other one, even if I forget the asterisk. + +=back + +=begin :neat + +This is a div for our purposes. + +It should parse B<< content >> inside of it. + +=over + +=item + +Like this + +=back + +=end :neat + +=for :excitement this is its own div + +=for html <p>and this is its own raw block</p> + +=for html +<p>so is this</p> + +=head2 C<=cut> before any C<=item> in C<=over> + +=over + +=cut + +blah + +=item * + +a + +=cut + +blah blah + +=pod + +b + +=item * + +c + +=back diff --git a/test/test-pandoc.hs b/test/test-pandoc.hs index 6d749b845..d310b932f 100644 --- a/test/test-pandoc.hs +++ b/test/test-pandoc.hs @@ -30,6 +30,7 @@ import qualified Tests.Readers.RTF import qualified Tests.Readers.Txt2Tags import qualified Tests.Readers.Man import qualified Tests.Readers.Mdoc +import qualified Tests.Readers.Pod import qualified Tests.Shared import qualified Tests.Writers.AsciiDoc import qualified Tests.Writers.ConTeXt @@ -101,6 +102,7 @@ tests pandocPath = testGroup "pandoc tests" , testGroup "Mdoc" Tests.Readers.Mdoc.tests , testGroup "FB2" Tests.Readers.FB2.tests , testGroup "DokuWiki" Tests.Readers.DokuWiki.tests + , testGroup "Pod" Tests.Readers.Pod.tests ] ] |
