1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
|
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE StrictData #-}
{- |
Module : Text.Pandoc.Transforms
Copyright : © 2006-2024 John MacFarlane
License : GPL-2.0-or-later
Maintainer : John MacFarlane <jgm@berkeley@edu>
Transformation of a Pandoc document post-parsing
-}
module Text.Pandoc.Transforms
( Transform
, applyTransforms
, adjustLinksAndIds
, eastAsianLineBreakFilter
, filterIpynbOutput
, headerShift
) where
import Data.List (sortOn)
import Data.Text (Text)
import Network.URI (unEscapeString)
import Text.DocLayout (charWidth)
import Text.Pandoc.Definition
( Pandoc (..), Attr, Block (..), Format (..), Inline (..) )
import Text.Pandoc.Generic (bottomUp)
import Text.Pandoc.Options (Extensions)
import Text.Pandoc.Shared (stringify, textToIdentifier)
import Text.Pandoc.Walk (walk)
import qualified Data.Text as T
import qualified Text.Pandoc.Builder as B
-- | Transformation of a Pandoc document post-parsing
type Transform = Pandoc -> Pandoc
-- | Apply a list of transforms to a document, in order.
applyTransforms :: Monad m => [Transform] -> Pandoc -> m Pandoc
applyTransforms transforms d = return $ foldr ($) d transforms
-- | Prefixes identifiers with a string derived from the filepath of
-- @thisfile@; fixes links to targets in @allfiles@ accordingly.
adjustLinksAndIds :: Extensions -- ^ defines how IDs are generated
-> Text -- ^ thisfile
-> [Text] -- ^ allfiles
-> Transform
adjustLinksAndIds exts thisfile allfiles
| length allfiles > 1 = walk fixInline . walk fixBlock
| otherwise = id
where
-- fix ids in blocks
fixBlock :: Block -> Block
fixBlock (CodeBlock attr t) = CodeBlock (fixAttrs attr) t
fixBlock (Header lev attr ils) = Header lev (fixAttrs attr) ils
fixBlock (Table attr cap cols th tbs tf) =
Table (fixAttrs attr) cap cols th tbs tf
fixBlock (Div attr bs) = Div (fixAttrs attr) bs
fixBlock x = x
-- fix ids and links in inlines
fixInline :: Inline -> Inline
fixInline (Code attr t) = Code (fixAttrs attr) t
fixInline (Link attr ils (url,tit)) =
Link (fixAttrs attr) ils (fixURL url,tit)
fixInline (Image attr ils (url,tit)) =
Image (fixAttrs attr) ils (fixURL url,tit)
fixInline (Span attr ils) = Span (fixAttrs attr) ils
fixInline x = x
-- add thisfile as prefix of identifier
fixAttrs :: Attr -> Attr
fixAttrs (i,cs,kvs)
| T.null i = (i,cs,kvs)
| otherwise =
(T.intercalate "__"
(filter (not . T.null) [toIdent thisfile, i]),
cs, kvs)
-- turns a filepath into an identifier
toIdent :: Text -> Text
toIdent = textToIdentifier exts . T.intercalate "__" .
T.split (\c -> c == '/' || c == '\\')
-- if URL begins with file from allfiles, convert to
-- an internal link with the appropriate identifier
fixURL :: Text -> Text
fixURL u =
let (a,b) = T.break (== '#') $ T.pack . unEscapeString . T.unpack $ u
filepart = if T.null a
then toIdent thisfile
else toIdent a
fragpart = T.dropWhile (== '#') b
in if T.null a || a `elem` allfiles
then "#" <> T.intercalate "__"
(filter (not . T.null) [filepart, fragpart])
else u
-- | Process ipynb output cells. If mode is Nothing,
-- remove all output. If mode is Just format, select
-- best output for the format. If format is not ipynb,
-- strip out ANSI escape sequences from CodeBlocks (see #5633).
filterIpynbOutput :: Maybe Format -> Pandoc -> Pandoc
filterIpynbOutput mode = walk go
where go (Div (ident, "output":os, kvs) bs) =
case mode of
Nothing -> Div (ident, "output":os, kvs) []
-- "best" for ipynb includes all formats:
Just fmt
| fmt == Format "ipynb"
-> Div (ident, "output":os, kvs) bs
| otherwise -> Div (ident, "output":os, kvs) $
walk removeANSI $
take 1 $ sortOn rank bs
where
rank (RawBlock (Format "html") _)
| fmt == Format "html" = 1 :: Int
| fmt == Format "markdown" = 3
| otherwise = 4
rank (RawBlock (Format "latex") _)
| fmt == Format "latex" = 1
| fmt == Format "markdown" = 3
| otherwise = 4
rank (RawBlock f _)
| fmt == f = 1
| otherwise = 4
rank (Para [Image{}]) = 2
rank _ = 3
removeANSI (CodeBlock attr code) =
CodeBlock attr (removeANSIEscapes code)
removeANSI x = x
removeANSIEscapes t
| Just cs <- T.stripPrefix "\x1b[" t =
removeANSIEscapes $ T.drop 1 $ T.dropWhile (/='m') cs
| Just (c, cs) <- T.uncons t = T.cons c $ removeANSIEscapes cs
| otherwise = ""
go x = x
-- | Remove soft breaks between East Asian characters.
eastAsianLineBreakFilter :: Pandoc -> Pandoc
eastAsianLineBreakFilter = bottomUp go
where go (x:SoftBreak:y:zs)
| Just (_, b) <- T.unsnoc $ stringify x
, Just (c, _) <- T.uncons $ stringify y
, charWidth b == 2
, charWidth c == 2
= x:y:zs
| otherwise
= x:SoftBreak:y:zs
go xs
= xs
-- | Shift header levels up or down.
headerShift :: Int -> Pandoc -> Pandoc
headerShift n (Pandoc meta (Header m _ ils : bs))
| n < 0
, m + n == 0 = headerShift n $
B.setTitle (B.fromList ils) $ Pandoc meta bs
headerShift n (Pandoc meta bs) = Pandoc meta (walk shift bs)
where
shift :: Block -> Block
shift (Header level attr inner)
| level + n > 0 = Header (level + n) attr inner
| otherwise = Para inner
shift x = x
|