aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2024-12-07 09:28:53 -0800
committerJohn MacFarlane <[email protected]>2024-12-07 09:28:53 -0800
commite70c2ac2eed47c921055f9be7815d629d2f743a6 (patch)
tree33119b798b28e900d0ddf5f73386719c9ecb6ef3
parente9389ab4d58fced7055312bbeb86044f60220cd7 (diff)
Docx reader: handle `\b`, `\i`, `\y` modifiers in `XE` index entries.
See #10171.
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs9
-rw-r--r--src/Text/Pandoc/Readers/Docx/Fields.hs38
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs1
3 files changed, 32 insertions, 16 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 8bd4aa8b5..a9e53eb39 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -472,10 +472,13 @@ parPartToInlines' (OMathPara exps) =
parPartToInlines' (Field info children) =
case info of
HyperlinkField url -> parPartToInlines' $ ExternalHyperLink url children
- IndexrefField entry mbsee ->
+ IndexrefField ie ->
pure $ spanWith ("",["indexref"],
- (("entry",entry) :
- maybe [] (\x -> [("crossref",x)]) mbsee)) mempty
+ (("entry", entryTitle ie) :
+ maybe [] (\x -> [("crossref",x)]) (entrySee ie)
+ ++ maybe [] (\x -> [("yomi",x)]) (entryYomi ie)
+ ++ [("bold","") | entryBold ie]
+ ++ [("italic","") | entryItalic ie])) mempty
PagerefField fieldAnchor True -> parPartToInlines' $ InternalHyperLink fieldAnchor children
EndNoteCite t -> do
formattedCite <- smushInlines <$> mapM parPartToInlines' children
diff --git a/src/Text/Pandoc/Readers/Docx/Fields.hs b/src/Text/Pandoc/Readers/Docx/Fields.hs
index f21ad425e..8b1837394 100644
--- a/src/Text/Pandoc/Readers/Docx/Fields.hs
+++ b/src/Text/Pandoc/Readers/Docx/Fields.hs
@@ -12,20 +12,30 @@ For parsing Field definitions in instText tags, as described in
ECMA-376-1:2016, §17.16.5 -}
module Text.Pandoc.Readers.Docx.Fields ( FieldInfo(..)
+ , IndexEntry(..)
, parseFieldInfo
) where
import Data.Functor (($>), void)
import qualified Data.Text as T
import Text.Pandoc.Parsing
+import Data.Maybe (isJust)
type URL = T.Text
type Anchor = T.Text
+data IndexEntry = IndexEntry
+ { entryTitle :: T.Text
+ , entrySee :: Maybe T.Text
+ , entryYomi :: Maybe T.Text
+ , entryBold :: Bool
+ , entryItalic :: Bool }
+ deriving (Show)
+
data FieldInfo = HyperlinkField URL
-- The boolean indicates whether the field is a hyperlink.
| PagerefField Anchor Bool
- | IndexrefField T.Text (Maybe T.Text) -- second is optional 'see'
+ | IndexrefField IndexEntry
| CslCitation T.Text
| CslBibliography
| EndNoteCite T.Text
@@ -41,11 +51,11 @@ parseFieldInfo = parse fieldInfo ""
fieldInfo :: Parser FieldInfo
fieldInfo = do
spaces
- (HyperlinkField <$> hyperlink)
+ hyperlink
<|>
- ((uncurry PagerefField) <$> pageref)
+ pageref
<|>
- ((uncurry IndexrefField) <$> indexref)
+ indexref
<|>
addIn
<|>
@@ -101,7 +111,7 @@ fieldArgument = do
notFollowedBy (char '\\') -- switch
quotedString <|> unquotedString
-hyperlink :: Parser URL
+hyperlink :: Parser FieldInfo
hyperlink = do
string "HYPERLINK"
spaces
@@ -110,7 +120,7 @@ hyperlink = do
let url = case [s | ('l',s) <- switches] of
[s] -> farg <> "#" <> s
_ -> farg
- return url
+ return $ HyperlinkField url
-- See §17.16.5.45
fieldSwitch :: Parser (Char, T.Text)
@@ -119,25 +129,27 @@ fieldSwitch = try $ do
char '\\'
c <- anyChar
spaces
- farg <- fieldArgument
+ farg <- option mempty fieldArgument
return (c, farg)
-pageref :: Parser (Anchor, Bool)
+pageref :: Parser FieldInfo
pageref = do
string "PAGEREF"
spaces
farg <- fieldArgument
switches <- many fieldSwitch
let isLink = any ((== 'h') . fst) switches
- return (farg, isLink)
+ return $ PagerefField farg isLink
-- second element of tuple is optional "see".
-indexref :: Parser (T.Text, Maybe T.Text)
+indexref :: Parser FieldInfo
indexref = do
string "XE"
spaces
farg <- fieldArgument
switches <- spaces *> many fieldSwitch
- case [see | ('t', see) <- switches] of
- [see] -> pure (farg, Just see)
- _ -> pure (farg, Nothing)
+ return $ IndexrefField $ IndexEntry{ entryTitle = farg
+ , entrySee = lookup 't' switches
+ , entryYomi = lookup 'y' switches
+ , entryBold = isJust (lookup 'b' switches)
+ , entryItalic = isJust (lookup 'i' switches) }
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 3b0c5fdc6..20e510d8d 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -43,6 +43,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
, ChangeType(..)
, ChangeInfo(..)
, FieldInfo(..)
+ , IndexEntry(..)
, Level(..)
, ParaStyleName
, CharStyleName