aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvan Silberman <[email protected]>2025-08-03 18:43:39 -0700
committerJohn MacFarlane <[email protected]>2025-08-03 22:03:12 -0700
commitb75c537d87d66a51acb129f5704a77e5cd59f847 (patch)
treecb631b3f77698c6940d5d08530cd61ff6e715a0c
parentbc3a8a663d722a654fa13d16dfb10c790b4020e1 (diff)
Fix named entity lookup in POD reader
Translating entities by name ultimately relies on Commonmark.Entity.lookupEntity, which de facto requires the entity name to be followed by a semicolon. Paste a semicolon onto the end of the entity name read from POD to look it up. Fixes #11015
-rw-r--r--src/Text/Pandoc/Readers/Pod.hs6
-rw-r--r--test/Tests/Readers/Pod.hs12
2 files changed, 17 insertions, 1 deletions
diff --git a/src/Text/Pandoc/Readers/Pod.hs b/src/Text/Pandoc/Readers/Pod.hs
index 4f4529eba..068c70eb5 100644
--- a/src/Text/Pandoc/Readers/Pod.hs
+++ b/src/Text/Pandoc/Readers/Pod.hs
@@ -258,7 +258,11 @@ format = try $ do
entity (T.stripPrefix "0" -> Just suf)
| Just (n, "") <- oct (T.unpack suf) = lookupEntity $ "#" <> tshow n
entity (TR.decimal @Integer -> Right (x, "")) = lookupEntity $ "#" <> tshow x
- entity x = lookupEntity x
+ -- named entities in Commonmark.Entity de facto have to be looked up with
+ -- the semicolon at the end. perlpodspec says arguments to E<> must be
+ -- alphanumeric, so an argument that already has a trailing semicolon
+ -- is bogus anyway, so just paste the semicolon on unconditionally.
+ entity x = lookupEntity (x <> ";")
-- god knows there must be a higher order way of writing this thing, where we
-- have multiple different possible parser states within the link argument
diff --git a/test/Tests/Readers/Pod.hs b/test/Tests/Readers/Pod.hs
index c812e0754..dbf2800aa 100644
--- a/test/Tests/Readers/Pod.hs
+++ b/test/Tests/Readers/Pod.hs
@@ -145,6 +145,17 @@ tests = [
"E<rchevron>" =?>
para "»"
]
+ , testGroup "html"
+ [ "trade" =:
+ "E<trade>" =?>
+ para "™"
+ , "ccaron" =:
+ "E<ccaron>" =?>
+ para "č"
+ , "cent" =:
+ "E<cent>" =?>
+ para "¢"
+ ]
, testGroup "numeric"
[ "decimal" =:
"E<162>" =?>
@@ -170,6 +181,7 @@ tests = [
, bogusEntity "0xhh"
, bogusEntity "077x"
, bogusEntity "0x63 skidoo"
+ , bogusEntity "trade;"
]
]
]