diff options
| author | John MacFarlane <[email protected]> | 2024-12-05 11:37:09 -0800 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2024-12-05 11:37:09 -0800 |
| commit | f87116a0b415aba7c6b2323ecb5dcac037ceb8d1 (patch) | |
| tree | 029cf683636c06d807da4cb1df57535cd501a96f | |
| parent | d07ada43a078fc8b8c56c7885720638cc74dfcf7 (diff) | |
Docx reader: improve index reference support.
Support crossrefs.
Clean up and unify switch parsing for fields.
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 5 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/Fields.hs | 50 |
2 files changed, 28 insertions, 27 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 16451aaed..8bd4aa8b5 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -472,7 +472,10 @@ parPartToInlines' (OMathPara exps) = parPartToInlines' (Field info children) = case info of HyperlinkField url -> parPartToInlines' $ ExternalHyperLink url children - IndexrefField entry -> pure $ spanWith ("",["indexref"],[("entry",entry)]) mempty + IndexrefField entry mbsee -> + pure $ spanWith ("",["indexref"], + (("entry",entry) : + maybe [] (\x -> [("crossref",x)]) mbsee)) mempty PagerefField fieldAnchor True -> parPartToInlines' $ InternalHyperLink fieldAnchor children EndNoteCite t -> do formattedCite <- smushInlines <$> mapM parPartToInlines' children diff --git a/src/Text/Pandoc/Readers/Docx/Fields.hs b/src/Text/Pandoc/Readers/Docx/Fields.hs index 4a9f88d46..f21ad425e 100644 --- a/src/Text/Pandoc/Readers/Docx/Fields.hs +++ b/src/Text/Pandoc/Readers/Docx/Fields.hs @@ -25,7 +25,7 @@ type Anchor = T.Text data FieldInfo = HyperlinkField URL -- The boolean indicates whether the field is a hyperlink. | PagerefField Anchor Bool - | IndexrefField T.Text + | IndexrefField T.Text (Maybe T.Text) -- second is optional 'see' | CslCitation T.Text | CslBibliography | EndNoteCite T.Text @@ -45,7 +45,7 @@ fieldInfo = do <|> ((uncurry PagerefField) <$> pageref) <|> - (IndexrefField <$> indexref) + ((uncurry IndexrefField) <$> indexref) <|> addIn <|> @@ -97,49 +97,47 @@ unquotedString :: Parser T.Text unquotedString = T.pack <$> manyTill anyChar (try $ void (lookAhead space) <|> eof) fieldArgument :: Parser T.Text -fieldArgument = quotedString <|> unquotedString - --- there are other switches, but this is the only one I've seen in the wild so far, so it's the first one I'll implement. See §17.16.5.25 -hyperlinkSwitch :: Parser (T.Text, T.Text) -hyperlinkSwitch = do - sw <- string "\\l" - spaces - farg <- fieldArgument - return (T.pack sw, farg) +fieldArgument = do + notFollowedBy (char '\\') -- switch + quotedString <|> unquotedString hyperlink :: Parser URL hyperlink = do string "HYPERLINK" spaces farg <- option "" $ notFollowedBy (char '\\') *> fieldArgument - switches <- spaces *> many hyperlinkSwitch - let url = case switches of - ("\\l", s) : _ -> farg <> "#" <> s - _ -> farg + switches <- many fieldSwitch + let url = case [s | ('l',s) <- switches] of + [s] -> farg <> "#" <> s + _ -> farg return url -- See §17.16.5.45 -pagerefSwitch :: Parser (T.Text, T.Text) -pagerefSwitch = do - sw <- string "\\h" +fieldSwitch :: Parser (Char, T.Text) +fieldSwitch = try $ do + spaces + char '\\' + c <- anyChar spaces farg <- fieldArgument - return (T.pack sw, farg) + return (c, farg) pageref :: Parser (Anchor, Bool) pageref = do string "PAGEREF" spaces farg <- fieldArgument - switches <- spaces *> many pagerefSwitch - let isLink = case switches of - ("\\h", _) : _ -> True - _ -> False + switches <- many fieldSwitch + let isLink = any ((== 'h') . fst) switches return (farg, isLink) -indexref :: Parser T.Text +-- second element of tuple is optional "see". +indexref :: Parser (T.Text, Maybe T.Text) indexref = do string "XE" spaces - fieldArgument - + farg <- fieldArgument + switches <- spaces *> many fieldSwitch + case [see | ('t', see) <- switches] of + [see] -> pure (farg, Just see) + _ -> pure (farg, Nothing) |
