aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Docx/Fields.hs
blob: 89bba8814553d926894a3329f5ece18b1ad7e965 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
{-# LANGUAGE OverloadedStrings #-}
{- |
   Module      : Text.Pandoc.Readers.Docx.Fields
   Copyright   : Copyright (C) 2014-2020 Jesse Rosenthal
   License     : GNU GPL, version 2 or above

   Maintainer  : Jesse Rosenthal <[email protected]>
   Stability   : alpha
   Portability : portable

For parsing Field definitions in instText tags, as described in
ECMA-376-1:2016, §17.16.5 -}

module Text.Pandoc.Readers.Docx.Fields ( FieldInfo(..)
                                       , IndexEntry(..)
                                       , parseFieldInfo
                                       ) where

import Data.Functor (($>), void)
import qualified Data.Text as T
import Text.Pandoc.Parsing
import Data.Maybe (isJust)

type URL = T.Text
type Anchor = T.Text

data IndexEntry = IndexEntry
  { entryTitle :: T.Text
  , entrySee :: Maybe T.Text
  , entryYomi :: Maybe T.Text
  , entryBold :: Bool
  , entryItalic :: Bool }
  deriving (Show)

data FieldInfo = HyperlinkField URL
                -- The boolean indicates whether the field is a hyperlink.
               | PagerefField Anchor Bool
               | CrossrefField Anchor Bool
               | IndexrefField IndexEntry
               | CslCitation T.Text
               | CslBibliography
               | EndNoteCite T.Text
               | EndNoteRefList
               | UnknownField
               deriving (Show)

type Parser = Parsec T.Text ()

parseFieldInfo :: T.Text -> Either ParseError FieldInfo
parseFieldInfo = parse fieldInfo ""

fieldInfo :: Parser FieldInfo
fieldInfo = do
  spaces
  hyperlink
    <|>
    pageref
    <|>
    indexref
    <|>
    crossref
    <|>
    addIn
    <|>
    return UnknownField

addIn :: Parser FieldInfo
addIn = do
  string "ADDIN"
  spaces
  try cslCitation <|> cslBibliography <|> endnoteCite <|> endnoteRefList

cslCitation :: Parser FieldInfo
cslCitation = do
  optional (string "ZOTERO_ITEM" *> spaces)
  string "CSL_CITATION"
  spaces
  CslCitation <$> getInput

cslBibliography :: Parser FieldInfo
cslBibliography = do
  string "ZOTERO_BIBL" <|> string "Mendeley Bibliography CSL_BIBLIOGRAPHY"
  return CslBibliography

endnoteCite :: Parser FieldInfo
endnoteCite = try $ do
  string "EN.CITE"
  spaces
  EndNoteCite <$> getInput

endnoteRefList :: Parser FieldInfo
endnoteRefList = try $ do
  string "EN.REFLIST"
  return EndNoteRefList


escapedQuote :: Parser T.Text
escapedQuote = string "\\\"" $> "\\\""

inQuotes :: Parser T.Text
inQuotes =
  try escapedQuote <|> (T.singleton <$> anyChar)

quotedString :: Parser T.Text
quotedString = do
  char '"'
  T.concat <$> manyTill inQuotes (try (char '"'))

unquotedString :: Parser T.Text
unquotedString = T.pack <$> manyTill anyChar (try $ void (lookAhead space) <|> eof)

fieldArgument :: Parser T.Text
fieldArgument = do
  notFollowedBy (char '\\') -- switch
  quotedString <|> unquotedString

hyperlink :: Parser FieldInfo
hyperlink = do
  string "HYPERLINK"
  spaces
  farg <- option "" $ notFollowedBy (char '\\') *> fieldArgument
  switches <- many fieldSwitch
  let url = case [s | ('l',s) <- switches] of
              [s] -> farg <> "#" <> s
              _   -> farg
  return $ HyperlinkField url

-- See §17.16.5.45
fieldSwitch :: Parser (Char, T.Text)
fieldSwitch = try $ do
  spaces
  char '\\'
  c <- anyChar
  spaces
  farg <- option mempty fieldArgument
  return (c, farg)

pageref :: Parser FieldInfo
pageref = do
  string "PAGEREF"
  spaces
  farg <- fieldArgument
  switches <- many fieldSwitch
  let isLink = any ((== 'h') . fst) switches
  return $ PagerefField farg isLink

crossref :: Parser FieldInfo
crossref = do
  string "REF"
  spaces
  farg <- fieldArgument
  switches <- many fieldSwitch
  let isLink = any ((== 'h') . fst) switches
  return $ CrossrefField farg isLink

-- second element of tuple is optional "see".
indexref :: Parser FieldInfo
indexref = do
  string "XE"
  spaces
  farg <- fieldArgument
  switches <- spaces *> many fieldSwitch
  return $ IndexrefField $ IndexEntry{ entryTitle = farg
                                     , entrySee = lookup 't' switches
                                     , entryYomi = lookup 'y' switches
                                     , entryBold = isJust (lookup 'b' switches)
                                     , entryItalic = isJust (lookup 'i' switches) }