1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
|
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Docx.Fields
Copyright : Copyright (C) 2014-2020 Jesse Rosenthal
License : GNU GPL, version 2 or above
Maintainer : Jesse Rosenthal <[email protected]>
Stability : alpha
Portability : portable
For parsing Field definitions in instText tags, as described in
ECMA-376-1:2016, §17.16.5 -}
module Text.Pandoc.Readers.Docx.Fields ( FieldInfo(..)
, IndexEntry(..)
, parseFieldInfo
) where
import Data.Functor (($>), void)
import qualified Data.Text as T
import Text.Pandoc.Parsing
import Data.Maybe (isJust)
type URL = T.Text
type Anchor = T.Text
data IndexEntry = IndexEntry
{ entryTitle :: T.Text
, entrySee :: Maybe T.Text
, entryYomi :: Maybe T.Text
, entryBold :: Bool
, entryItalic :: Bool }
deriving (Show)
data FieldInfo = HyperlinkField URL
-- The boolean indicates whether the field is a hyperlink.
| PagerefField Anchor Bool
| CrossrefField Anchor Bool
| IndexrefField IndexEntry
| CslCitation T.Text
| CslBibliography
| EndNoteCite T.Text
| EndNoteRefList
| UnknownField
deriving (Show)
type Parser = Parsec T.Text ()
parseFieldInfo :: T.Text -> Either ParseError FieldInfo
parseFieldInfo = parse fieldInfo ""
fieldInfo :: Parser FieldInfo
fieldInfo = do
spaces
hyperlink
<|>
pageref
<|>
indexref
<|>
crossref
<|>
addIn
<|>
return UnknownField
addIn :: Parser FieldInfo
addIn = do
string "ADDIN"
spaces
try cslCitation <|> cslBibliography <|> endnoteCite <|> endnoteRefList
cslCitation :: Parser FieldInfo
cslCitation = do
optional (string "ZOTERO_ITEM" *> spaces)
string "CSL_CITATION"
spaces
CslCitation <$> getInput
cslBibliography :: Parser FieldInfo
cslBibliography = do
string "ZOTERO_BIBL" <|> string "Mendeley Bibliography CSL_BIBLIOGRAPHY"
return CslBibliography
endnoteCite :: Parser FieldInfo
endnoteCite = try $ do
string "EN.CITE"
spaces
EndNoteCite <$> getInput
endnoteRefList :: Parser FieldInfo
endnoteRefList = try $ do
string "EN.REFLIST"
return EndNoteRefList
escapedQuote :: Parser T.Text
escapedQuote = string "\\\"" $> "\\\""
inQuotes :: Parser T.Text
inQuotes =
try escapedQuote <|> (T.singleton <$> anyChar)
quotedString :: Parser T.Text
quotedString = do
char '"'
T.concat <$> manyTill inQuotes (try (char '"'))
unquotedString :: Parser T.Text
unquotedString = T.pack <$> manyTill anyChar (try $ void (lookAhead space) <|> eof)
fieldArgument :: Parser T.Text
fieldArgument = do
notFollowedBy (char '\\') -- switch
quotedString <|> unquotedString
hyperlink :: Parser FieldInfo
hyperlink = do
string "HYPERLINK"
spaces
farg <- option "" $ notFollowedBy (char '\\') *> fieldArgument
switches <- many fieldSwitch
let url = case [s | ('l',s) <- switches] of
[s] -> farg <> "#" <> s
_ -> farg
return $ HyperlinkField url
-- See §17.16.5.45
fieldSwitch :: Parser (Char, T.Text)
fieldSwitch = try $ do
spaces
char '\\'
c <- anyChar
spaces
farg <- option mempty fieldArgument
return (c, farg)
pageref :: Parser FieldInfo
pageref = do
string "PAGEREF"
spaces
farg <- fieldArgument
switches <- many fieldSwitch
let isLink = any ((== 'h') . fst) switches
return $ PagerefField farg isLink
crossref :: Parser FieldInfo
crossref = do
string "REF"
spaces
farg <- fieldArgument
switches <- many fieldSwitch
let isLink = any ((== 'h') . fst) switches
return $ CrossrefField farg isLink
-- second element of tuple is optional "see".
indexref :: Parser FieldInfo
indexref = do
string "XE"
spaces
farg <- fieldArgument
switches <- spaces *> many fieldSwitch
return $ IndexrefField $ IndexEntry{ entryTitle = farg
, entrySee = lookup 't' switches
, entryYomi = lookup 'y' switches
, entryBold = isJust (lookup 'b' switches)
, entryItalic = isJust (lookup 'i' switches) }
|