diff options
| author | Evan Silberman <[email protected]> | 2025-08-03 18:43:39 -0700 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2025-08-03 22:03:12 -0700 |
| commit | b75c537d87d66a51acb129f5704a77e5cd59f847 (patch) | |
| tree | cb631b3f77698c6940d5d08530cd61ff6e715a0c | |
| parent | bc3a8a663d722a654fa13d16dfb10c790b4020e1 (diff) | |
Fix named entity lookup in POD reader
Translating entities by name ultimately relies on
Commonmark.Entity.lookupEntity, which de facto requires the entity name
to be followed by a semicolon. Paste a semicolon onto the end of the
entity name read from POD to look it up.
Fixes #11015
| -rw-r--r-- | src/Text/Pandoc/Readers/Pod.hs | 6 | ||||
| -rw-r--r-- | test/Tests/Readers/Pod.hs | 12 |
2 files changed, 17 insertions, 1 deletions
diff --git a/src/Text/Pandoc/Readers/Pod.hs b/src/Text/Pandoc/Readers/Pod.hs index 4f4529eba..068c70eb5 100644 --- a/src/Text/Pandoc/Readers/Pod.hs +++ b/src/Text/Pandoc/Readers/Pod.hs @@ -258,7 +258,11 @@ format = try $ do entity (T.stripPrefix "0" -> Just suf) | Just (n, "") <- oct (T.unpack suf) = lookupEntity $ "#" <> tshow n entity (TR.decimal @Integer -> Right (x, "")) = lookupEntity $ "#" <> tshow x - entity x = lookupEntity x + -- named entities in Commonmark.Entity de facto have to be looked up with + -- the semicolon at the end. perlpodspec says arguments to E<> must be + -- alphanumeric, so an argument that already has a trailing semicolon + -- is bogus anyway, so just paste the semicolon on unconditionally. + entity x = lookupEntity (x <> ";") -- god knows there must be a higher order way of writing this thing, where we -- have multiple different possible parser states within the link argument diff --git a/test/Tests/Readers/Pod.hs b/test/Tests/Readers/Pod.hs index c812e0754..dbf2800aa 100644 --- a/test/Tests/Readers/Pod.hs +++ b/test/Tests/Readers/Pod.hs @@ -145,6 +145,17 @@ tests = [ "E<rchevron>" =?> para "»" ] + , testGroup "html" + [ "trade" =: + "E<trade>" =?> + para "™" + , "ccaron" =: + "E<ccaron>" =?> + para "č" + , "cent" =: + "E<cent>" =?> + para "¢" + ] , testGroup "numeric" [ "decimal" =: "E<162>" =?> @@ -170,6 +181,7 @@ tests = [ , bogusEntity "0xhh" , bogusEntity "077x" , bogusEntity "0x63 skidoo" + , bogusEntity "trade;" ] ] ] |
