diff options
| author | John MacFarlane <[email protected]> | 2024-12-07 09:28:53 -0800 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2024-12-07 09:28:53 -0800 |
| commit | e70c2ac2eed47c921055f9be7815d629d2f743a6 (patch) | |
| tree | 33119b798b28e900d0ddf5f73386719c9ecb6ef3 | |
| parent | e9389ab4d58fced7055312bbeb86044f60220cd7 (diff) | |
Docx reader: handle `\b`, `\i`, `\y` modifiers in `XE` index entries.
See #10171.
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 9 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/Fields.hs | 38 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 1 |
3 files changed, 32 insertions, 16 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 8bd4aa8b5..a9e53eb39 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -472,10 +472,13 @@ parPartToInlines' (OMathPara exps) = parPartToInlines' (Field info children) = case info of HyperlinkField url -> parPartToInlines' $ ExternalHyperLink url children - IndexrefField entry mbsee -> + IndexrefField ie -> pure $ spanWith ("",["indexref"], - (("entry",entry) : - maybe [] (\x -> [("crossref",x)]) mbsee)) mempty + (("entry", entryTitle ie) : + maybe [] (\x -> [("crossref",x)]) (entrySee ie) + ++ maybe [] (\x -> [("yomi",x)]) (entryYomi ie) + ++ [("bold","") | entryBold ie] + ++ [("italic","") | entryItalic ie])) mempty PagerefField fieldAnchor True -> parPartToInlines' $ InternalHyperLink fieldAnchor children EndNoteCite t -> do formattedCite <- smushInlines <$> mapM parPartToInlines' children diff --git a/src/Text/Pandoc/Readers/Docx/Fields.hs b/src/Text/Pandoc/Readers/Docx/Fields.hs index f21ad425e..8b1837394 100644 --- a/src/Text/Pandoc/Readers/Docx/Fields.hs +++ b/src/Text/Pandoc/Readers/Docx/Fields.hs @@ -12,20 +12,30 @@ For parsing Field definitions in instText tags, as described in ECMA-376-1:2016, §17.16.5 -} module Text.Pandoc.Readers.Docx.Fields ( FieldInfo(..) + , IndexEntry(..) , parseFieldInfo ) where import Data.Functor (($>), void) import qualified Data.Text as T import Text.Pandoc.Parsing +import Data.Maybe (isJust) type URL = T.Text type Anchor = T.Text +data IndexEntry = IndexEntry + { entryTitle :: T.Text + , entrySee :: Maybe T.Text + , entryYomi :: Maybe T.Text + , entryBold :: Bool + , entryItalic :: Bool } + deriving (Show) + data FieldInfo = HyperlinkField URL -- The boolean indicates whether the field is a hyperlink. | PagerefField Anchor Bool - | IndexrefField T.Text (Maybe T.Text) -- second is optional 'see' + | IndexrefField IndexEntry | CslCitation T.Text | CslBibliography | EndNoteCite T.Text @@ -41,11 +51,11 @@ parseFieldInfo = parse fieldInfo "" fieldInfo :: Parser FieldInfo fieldInfo = do spaces - (HyperlinkField <$> hyperlink) + hyperlink <|> - ((uncurry PagerefField) <$> pageref) + pageref <|> - ((uncurry IndexrefField) <$> indexref) + indexref <|> addIn <|> @@ -101,7 +111,7 @@ fieldArgument = do notFollowedBy (char '\\') -- switch quotedString <|> unquotedString -hyperlink :: Parser URL +hyperlink :: Parser FieldInfo hyperlink = do string "HYPERLINK" spaces @@ -110,7 +120,7 @@ hyperlink = do let url = case [s | ('l',s) <- switches] of [s] -> farg <> "#" <> s _ -> farg - return url + return $ HyperlinkField url -- See §17.16.5.45 fieldSwitch :: Parser (Char, T.Text) @@ -119,25 +129,27 @@ fieldSwitch = try $ do char '\\' c <- anyChar spaces - farg <- fieldArgument + farg <- option mempty fieldArgument return (c, farg) -pageref :: Parser (Anchor, Bool) +pageref :: Parser FieldInfo pageref = do string "PAGEREF" spaces farg <- fieldArgument switches <- many fieldSwitch let isLink = any ((== 'h') . fst) switches - return (farg, isLink) + return $ PagerefField farg isLink -- second element of tuple is optional "see". -indexref :: Parser (T.Text, Maybe T.Text) +indexref :: Parser FieldInfo indexref = do string "XE" spaces farg <- fieldArgument switches <- spaces *> many fieldSwitch - case [see | ('t', see) <- switches] of - [see] -> pure (farg, Just see) - _ -> pure (farg, Nothing) + return $ IndexrefField $ IndexEntry{ entryTitle = farg + , entrySee = lookup 't' switches + , entryYomi = lookup 'y' switches + , entryBold = isJust (lookup 'b' switches) + , entryItalic = isJust (lookup 'i' switches) } diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 3b0c5fdc6..20e510d8d 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -43,6 +43,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..) , ChangeType(..) , ChangeInfo(..) , FieldInfo(..) + , IndexEntry(..) , Level(..) , ParaStyleName , CharStyleName |
