aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Parsing/State.hs
blob: 547ddaabb891cde5f601ddc3cdb30ce3e7b752fb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
{-# LANGUAGE FlexibleInstances          #-}
{-# LANGUAGE MultiParamTypeClasses      #-}
{-# LANGUAGE OverloadedStrings          #-}
{- |
   Module      : Text.Pandoc.Parsing
   Copyright   : Copyright (C) 2006-2024 John MacFarlane
   License     : GPL-2.0-or-later
   Maintainer  : John MacFarlane <[email protected]>

A default parser state with commonly used properties.
-}

module Text.Pandoc.Parsing.State
  ( ParserState (..)
  , ParserContext (..)
  , HeaderType (..)
  , NoteTable
  , NoteTable'
  , Key (..)
  , KeyTable
  , SubstTable
  , defaultParserState
  , toKey
  )
where

import Data.Default (Default (def))
import Data.Text (Text)
import Text.Parsec (SourcePos, getState, setState)
import Text.Pandoc.Builder (Blocks, HasMeta (..), Inlines)
import Text.Pandoc.Definition (Attr, Meta, Target, nullMeta)
import Text.Pandoc.Logging (LogMessage)
import Text.Pandoc.Options (ReaderOptions)
import Text.Pandoc.Parsing.Capabilities
import Text.Pandoc.Parsing.Future
import Text.Pandoc.TeX (Macro)

import qualified Data.Map as M
import qualified Data.Set as Set
import qualified Data.Text as T

-- | Parsing options.
data ParserState = ParserState
  { stateOptions         :: ReaderOptions -- ^ User options
  , stateParserContext   :: ParserContext -- ^ Inside list?
  , stateQuoteContext    :: QuoteContext  -- ^ Inside quoted environment?
  , stateAllowLinks      :: Bool          -- ^ Allow parsing of links
  , stateAllowLineBreaks :: Bool          -- ^ Allow parsing of line breaks
  , stateLastStrPos      :: Maybe SourcePos -- ^ Position after last str parsed
  , stateKeys            :: KeyTable      -- ^ List of reference keys
  , stateHeaderKeys      :: KeyTable      -- ^ List of implicit header ref keys
  , stateSubstitutions   :: SubstTable    -- ^ List of substitution references
  , stateNotes           :: NoteTable     -- ^ List of notes (raw bodies)
  , stateNotes'          :: NoteTable'    -- ^ List of notes (parsed bodies)
  , stateNoteRefs        :: Set.Set Text  -- ^ List of note references used
  , stateInNote          :: Bool          -- ^ True if parsing note contents
  , stateNoteNumber      :: Int           -- ^ Last note number for citations
  , stateMeta            :: Meta          -- ^ Document metadata
  , stateMeta'           :: Future ParserState Meta -- ^ Document metadata
  , stateCitations       :: M.Map Text Text -- ^ RST-style citations
  , stateHeaderTable     :: [HeaderType]  -- ^ Ordered list of header types used
  , stateIdentifiers     :: Set.Set Text  -- ^ Header identifiers used
  , stateNextExample     :: Int           -- ^ Number of next example
  , stateExamples        :: M.Map Text Int -- ^ Map from example labels to numbers
  , stateMacros          :: M.Map Text Macro -- ^ Table of macros defined so far
  , stateRstDefaultRole  :: Text          -- ^ Current rST default
                                           -- interpreted text role
  , stateRstHighlight    :: Maybe Text    -- ^ Current rST literal block
                                           -- language
  , stateRstCustomRoles  :: M.Map Text (Text, Maybe Text, Attr)
    -- ^ Current rST cust text roles;
    -- Triple represents:) Base role 2) Optional format (only for :raw:
    -- roles) 3) Addition classes (rest of Attr is unused)).
  , stateCaption         :: Maybe Inlines -- ^ Caption in current environment
  , stateInHtmlBlock     :: Maybe Text    -- ^ Tag type of HTML block being parsed
  , stateFencedDivLevel  :: Int           -- ^ Depth of fenced div
  , stateContainers      :: [Text]        -- ^ parent include files
  , stateLogMessages     :: [LogMessage]  -- ^ log messages
  , stateMarkdownAttribute :: Bool        -- ^ True if in markdown=1 context
  }

instance Default ParserState where
  def = defaultParserState

instance HasMeta ParserState where
  setMeta field val st =
    st{ stateMeta = setMeta field val $ stateMeta st }
  deleteMeta field st =
    st{ stateMeta = deleteMeta field $ stateMeta st }

instance HasReaderOptions ParserState where
  extractReaderOptions = stateOptions

instance Monad m => HasQuoteContext ParserState m where
  getQuoteContext = stateQuoteContext <$> getState
  withQuoteContext context parser = do
    oldState <- getState
    let oldQuoteContext = stateQuoteContext oldState
    setState oldState { stateQuoteContext = context }
    result <- parser
    newState <- getState
    setState newState { stateQuoteContext = oldQuoteContext }
    return result

instance HasIdentifierList ParserState where
  extractIdentifierList     = stateIdentifiers
  updateIdentifierList f st = st{ stateIdentifiers = f $ stateIdentifiers st }

instance HasMacros ParserState where
  extractMacros        = stateMacros
  updateMacros f st    = st{ stateMacros = f $ stateMacros st }

instance HasLastStrPosition ParserState where
  setLastStrPos pos st = st{ stateLastStrPos = pos }
  getLastStrPos st     = stateLastStrPos st

instance HasLogMessages ParserState where
  addLogMessage msg st = st{ stateLogMessages = msg : stateLogMessages st }
  getLogMessages st = reverse $ stateLogMessages st

instance HasIncludeFiles ParserState where
  getIncludeFiles = stateContainers
  addIncludeFile f s = s{ stateContainers = f : stateContainers s }
  dropLatestIncludeFile s = s { stateContainers = drop 1 $ stateContainers s }

data ParserContext
    = ListItemState   -- ^ Used when running parser on list item contents
    | NullState       -- ^ Default state
    deriving (Eq, Show)

data HeaderType
    = SingleHeader Char  -- ^ Single line of characters underneath
    | DoubleHeader Char  -- ^ Lines of characters above and below
    deriving (Eq, Show)

defaultParserState :: ParserState
defaultParserState = ParserState
  { stateOptions         = def
  , stateParserContext   = NullState
  , stateQuoteContext    = NoQuote
  , stateAllowLinks      = True
  , stateAllowLineBreaks = True
  , stateLastStrPos      = Nothing
  , stateKeys            = M.empty
  , stateHeaderKeys      = M.empty
  , stateSubstitutions   = M.empty
  , stateNotes           = []
  , stateNotes'          = M.empty
  , stateNoteRefs        = Set.empty
  , stateInNote          = False
  , stateNoteNumber      = 0
  , stateMeta            = nullMeta
  , stateMeta'           = return nullMeta
  , stateCitations       = M.empty
  , stateHeaderTable     = []
  , stateIdentifiers     = Set.empty
  , stateNextExample     = 1
  , stateExamples        = M.empty
  , stateMacros          = M.empty
  , stateRstDefaultRole  = "title-reference"
  , stateRstHighlight    = Nothing
  , stateRstCustomRoles  = M.empty
  , stateCaption         = Nothing
  , stateInHtmlBlock     = Nothing
  , stateFencedDivLevel  = 0
  , stateContainers      = []
  , stateLogMessages     = []
  , stateMarkdownAttribute = False
  }

type NoteTable = [(Text, Text)]

type NoteTable' = M.Map Text (SourcePos, Future ParserState Blocks)
-- used in markdown reader

newtype Key = Key Text deriving (Show, Read, Eq, Ord)

toKey :: Text -> Key
toKey = Key . T.toLower . T.unwords . T.words . unbracket
  where unbracket t
          | Just ('[', t') <- T.uncons t
          , Just (t'', ']') <- T.unsnoc t'
          = t''
          | otherwise
          = t

type KeyTable = M.Map Key (Target, Attr)

type SubstTable = M.Map Key Blocks