aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2025-10-13 20:23:22 +0200
committerJohn MacFarlane <[email protected]>2025-10-13 20:23:22 +0200
commitd81ee85952400ef62b83146ed695a5ea113d120d (patch)
treef6b2540276dd4ad54f13e426c219e17ba9ef9771
parent3d6439ad156a24a75d04c90c9ab8aa1b2cd7d1b0 (diff)
RTF reader: improve hyperlink parsing.
Closes #11211.
-rw-r--r--src/Text/Pandoc/Readers/RTF.hs60
-rw-r--r--test/command/11211.md16
2 files changed, 50 insertions, 26 deletions
diff --git a/src/Text/Pandoc/Readers/RTF.hs b/src/Text/Pandoc/Readers/RTF.hs
index 52c549c75..b1195ac19 100644
--- a/src/Text/Pandoc/Readers/RTF.hs
+++ b/src/Text/Pandoc/Readers/RTF.hs
@@ -784,26 +784,40 @@ removeCommonFormatting =
-- {\field{\*\fldinst{HYPERLINK "http://pandoc.org"}}{\fldrslt foo}}
handleField :: PandocMonad m => Blocks -> [Tok] -> RTFParser m Blocks
-handleField bs
- (Tok _
- (Grouped
- (Tok _ (ControlSymbol '*')
- :Tok _ (ControlWord "fldinst" Nothing)
- :Tok _ (Grouped (Tok _ (UnformattedText insttext):rest))
- :_))
- :linktoks)
- | Just linkdest <- getHyperlink insttext
- = do let linkdest' = case rest of
- (Tok _ (ControlSymbol '\\')
- : Tok _ (UnformattedText t)
- : _) | Just bkmrk <- T.stripPrefix "l" t
- -> "#" <> unquote bkmrk
- _ -> linkdest
- modifyGroup $ \g -> g{ gHyperlink = Just linkdest' }
- result <- foldM processTok bs linktoks
- modifyGroup $ \g -> g{ gHyperlink = Nothing }
- return result
-handleField bs _ = pure bs
+handleField bs ts = do
+ let isFieldMod (Tok _ (ControlWord w _)) =
+ w `elem` ["flddirty", "fldedit", "fldlock", "fldpriv"]
+ isFieldMod _ = False
+ case dropWhile isFieldMod ts of
+ [Tok _ (Grouped
+ (Tok _ (ControlSymbol '*')
+ :Tok _ (ControlWord "fldinst" Nothing)
+ :Tok _ (Grouped instrtoks)
+ :_)),
+ Tok _ (Grouped
+ (Tok _ (ControlWord "fldrslt" Nothing)
+ :Tok _ (Grouped resulttoks) : _))] -> do
+ case getHyperlink instrtoks of
+ Just linkdest -> do
+ modifyGroup $ \g -> g{ gHyperlink = Just linkdest }
+ result <- foldM processTok bs resulttoks
+ modifyGroup $ \g -> g{ gHyperlink = Nothing }
+ return result
+ Nothing -> foldM processTok bs resulttoks
+ _ -> pure bs
+
+getHyperlink :: [Tok] -> Maybe Text
+getHyperlink [] = Nothing
+getHyperlink (Tok _ (UnformattedText w) : rest)
+ | Just w' <- unquote <$> T.stripPrefix "HYPERLINK" (T.strip w)
+ = if T.null w'
+ then case rest of
+ (Tok _ (ControlSymbol '\\') : Tok _ (UnformattedText b) : _)
+ | Just bkmrk <- unquote <$> T.stripPrefix "l " (T.strip b)
+ -> Just $ "#" <> bkmrk
+ _ -> Just mempty
+ else Just w'
+getHyperlink (_:ts) = getHyperlink ts
unquote :: Text -> Text
unquote = T.dropWhile (=='"') . T.dropWhileEnd (=='"') . T.strip
@@ -944,12 +958,6 @@ handlePict toks = do
_ -> pict
-getHyperlink :: Text -> Maybe Text
-getHyperlink t =
- case T.stripPrefix "HYPERLINK" (T.strip t) of
- Nothing -> Nothing
- Just rest -> Just $ unquote rest
-
processFontTable :: [Tok] -> FontTable
processFontTable = snd . foldl' go (0, mempty)
where
diff --git a/test/command/11211.md b/test/command/11211.md
new file mode 100644
index 000000000..0ecb8895a
--- /dev/null
+++ b/test/command/11211.md
@@ -0,0 +1,16 @@
+```
+% pandoc -f rtf -t native
+{\field{\*\fldinst{\rtlch\ab0\ai0\af2\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033
+\loch\af2\dbch\af2\hich\f2\insrsid10976062\strike0\ulnone\cf1 HYPERLINK "https://example.com"}{\*\datafield 08d0c9ea79f9bace118c8200aa004ba90b0200000003000000e0c9ea79f9bace118c8200aa004ba90b28000000680074007400700073003a002f002f006500780061006d0070006c0065002e0063006f006d000000}}
+{\fldrslt{\rtlch\ab0\ai0\af2\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af2\dbch\af2\hich\f2\strike0\ul\cf2 link}}}
+^D
+[ Para
+ [ Underline
+ [ Link
+ ( "" , [] , [] )
+ [ Str "link" ]
+ ( "https://example.com" , "" )
+ ]
+ ]
+]
+```