From b75c537d87d66a51acb129f5704a77e5cd59f847 Mon Sep 17 00:00:00 2001 From: Evan Silberman Date: Sun, 3 Aug 2025 18:43:39 -0700 Subject: Fix named entity lookup in POD reader Translating entities by name ultimately relies on Commonmark.Entity.lookupEntity, which de facto requires the entity name to be followed by a semicolon. Paste a semicolon onto the end of the entity name read from POD to look it up. Fixes #11015 --- src/Text/Pandoc/Readers/Pod.hs | 6 +++++- test/Tests/Readers/Pod.hs | 12 ++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Pod.hs b/src/Text/Pandoc/Readers/Pod.hs index 4f4529eba..068c70eb5 100644 --- a/src/Text/Pandoc/Readers/Pod.hs +++ b/src/Text/Pandoc/Readers/Pod.hs @@ -258,7 +258,11 @@ format = try $ do entity (T.stripPrefix "0" -> Just suf) | Just (n, "") <- oct (T.unpack suf) = lookupEntity $ "#" <> tshow n entity (TR.decimal @Integer -> Right (x, "")) = lookupEntity $ "#" <> tshow x - entity x = lookupEntity x + -- named entities in Commonmark.Entity de facto have to be looked up with + -- the semicolon at the end. perlpodspec says arguments to E<> must be + -- alphanumeric, so an argument that already has a trailing semicolon + -- is bogus anyway, so just paste the semicolon on unconditionally. + entity x = lookupEntity (x <> ";") -- god knows there must be a higher order way of writing this thing, where we -- have multiple different possible parser states within the link argument diff --git a/test/Tests/Readers/Pod.hs b/test/Tests/Readers/Pod.hs index c812e0754..dbf2800aa 100644 --- a/test/Tests/Readers/Pod.hs +++ b/test/Tests/Readers/Pod.hs @@ -145,6 +145,17 @@ tests = [ "E" =?> para "»" ] + , testGroup "html" + [ "trade" =: + "E" =?> + para "™" + , "ccaron" =: + "E" =?> + para "č" + , "cent" =: + "E" =?> + para "¢" + ] , testGroup "numeric" [ "decimal" =: "E<162>" =?> @@ -170,6 +181,7 @@ tests = [ , bogusEntity "0xhh" , bogusEntity "077x" , bogusEntity "0x63 skidoo" + , bogusEntity "trade;" ] ] ] -- cgit v1.2.3