aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2024-12-05 11:37:09 -0800
committerJohn MacFarlane <[email protected]>2024-12-05 11:37:09 -0800
commitf87116a0b415aba7c6b2323ecb5dcac037ceb8d1 (patch)
tree029cf683636c06d807da4cb1df57535cd501a96f /src/Text
parentd07ada43a078fc8b8c56c7885720638cc74dfcf7 (diff)
Docx reader: improve index reference support.
Support crossrefs. Clean up and unify switch parsing for fields.
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs5
-rw-r--r--src/Text/Pandoc/Readers/Docx/Fields.hs50
2 files changed, 28 insertions, 27 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 16451aaed..8bd4aa8b5 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -472,7 +472,10 @@ parPartToInlines' (OMathPara exps) =
parPartToInlines' (Field info children) =
case info of
HyperlinkField url -> parPartToInlines' $ ExternalHyperLink url children
- IndexrefField entry -> pure $ spanWith ("",["indexref"],[("entry",entry)]) mempty
+ IndexrefField entry mbsee ->
+ pure $ spanWith ("",["indexref"],
+ (("entry",entry) :
+ maybe [] (\x -> [("crossref",x)]) mbsee)) mempty
PagerefField fieldAnchor True -> parPartToInlines' $ InternalHyperLink fieldAnchor children
EndNoteCite t -> do
formattedCite <- smushInlines <$> mapM parPartToInlines' children
diff --git a/src/Text/Pandoc/Readers/Docx/Fields.hs b/src/Text/Pandoc/Readers/Docx/Fields.hs
index 4a9f88d46..f21ad425e 100644
--- a/src/Text/Pandoc/Readers/Docx/Fields.hs
+++ b/src/Text/Pandoc/Readers/Docx/Fields.hs
@@ -25,7 +25,7 @@ type Anchor = T.Text
data FieldInfo = HyperlinkField URL
-- The boolean indicates whether the field is a hyperlink.
| PagerefField Anchor Bool
- | IndexrefField T.Text
+ | IndexrefField T.Text (Maybe T.Text) -- second is optional 'see'
| CslCitation T.Text
| CslBibliography
| EndNoteCite T.Text
@@ -45,7 +45,7 @@ fieldInfo = do
<|>
((uncurry PagerefField) <$> pageref)
<|>
- (IndexrefField <$> indexref)
+ ((uncurry IndexrefField) <$> indexref)
<|>
addIn
<|>
@@ -97,49 +97,47 @@ unquotedString :: Parser T.Text
unquotedString = T.pack <$> manyTill anyChar (try $ void (lookAhead space) <|> eof)
fieldArgument :: Parser T.Text
-fieldArgument = quotedString <|> unquotedString
-
--- there are other switches, but this is the only one I've seen in the wild so far, so it's the first one I'll implement. See §17.16.5.25
-hyperlinkSwitch :: Parser (T.Text, T.Text)
-hyperlinkSwitch = do
- sw <- string "\\l"
- spaces
- farg <- fieldArgument
- return (T.pack sw, farg)
+fieldArgument = do
+ notFollowedBy (char '\\') -- switch
+ quotedString <|> unquotedString
hyperlink :: Parser URL
hyperlink = do
string "HYPERLINK"
spaces
farg <- option "" $ notFollowedBy (char '\\') *> fieldArgument
- switches <- spaces *> many hyperlinkSwitch
- let url = case switches of
- ("\\l", s) : _ -> farg <> "#" <> s
- _ -> farg
+ switches <- many fieldSwitch
+ let url = case [s | ('l',s) <- switches] of
+ [s] -> farg <> "#" <> s
+ _ -> farg
return url
-- See §17.16.5.45
-pagerefSwitch :: Parser (T.Text, T.Text)
-pagerefSwitch = do
- sw <- string "\\h"
+fieldSwitch :: Parser (Char, T.Text)
+fieldSwitch = try $ do
+ spaces
+ char '\\'
+ c <- anyChar
spaces
farg <- fieldArgument
- return (T.pack sw, farg)
+ return (c, farg)
pageref :: Parser (Anchor, Bool)
pageref = do
string "PAGEREF"
spaces
farg <- fieldArgument
- switches <- spaces *> many pagerefSwitch
- let isLink = case switches of
- ("\\h", _) : _ -> True
- _ -> False
+ switches <- many fieldSwitch
+ let isLink = any ((== 'h') . fst) switches
return (farg, isLink)
-indexref :: Parser T.Text
+-- second element of tuple is optional "see".
+indexref :: Parser (T.Text, Maybe T.Text)
indexref = do
string "XE"
spaces
- fieldArgument
-
+ farg <- fieldArgument
+ switches <- spaces *> many fieldSwitch
+ case [see | ('t', see) <- switches] of
+ [see] -> pure (farg, Just see)
+ _ -> pure (farg, Nothing)