aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Writers/ChunkedHTML.hs
blob: a5e51f7415c85ac4c404e13092e7d27f3d404829 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
{-# LANGUAGE OverloadedStrings   #-}
{- |
   Module      : Text.Pandoc.Writers.ChunkedHTML
   Copyright   : Copyright (C) 2023 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <[email protected]>
   Stability   : alpha
   Portability : portable

Conversion of 'Pandoc' documents to "chunked" HTML (a folder of
linked HTML documents, split by sections.
-}
module Text.Pandoc.Writers.ChunkedHTML (
  writeChunkedHTML
  ) where
import Text.Pandoc.Definition
import Text.Pandoc.Options (WriterOptions(..))
import Text.Pandoc.Shared (stringify, tshow)
import Text.Pandoc.Class (PandocMonad, getPOSIXTime, runPure,
                          fetchItem, insertMedia, getMediaBag)
import Text.Pandoc.MediaBag (mediaItems)
import qualified Data.ByteString.Lazy as BL
import Text.Pandoc.Chunks (splitIntoChunks, Chunk(..), ChunkedDoc(..),
                           SecInfo(..), tocToList)
import Text.Pandoc.URI (isURI)
import Data.Text (Text)
import Data.Tree
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Text.Pandoc.Writers.HTML (writeHtml5String)
import Codec.Archive.Zip (Entry, addEntryToArchive, emptyArchive, toEntry,
                          fromArchive)
import qualified Data.Map as M
import Text.DocTemplates (Context(..), Val(..))
import Text.DocLayout (literal)
import Text.Pandoc.Writers.Shared (defField)
import Data.Aeson (toJSON, encode)
import System.FilePath (isRelative, normalise)
import Data.List (isInfixOf)
import Text.Pandoc.Walk (walkM)
import Text.Pandoc.Builder (setMeta)
import Text.Pandoc.Templates (compileTemplate, WithDefaultPartials(..))
import Control.Monad.Except (throwError)
import Text.Pandoc.Error

-- | Splits document into HTML chunks, dividing them by section,
-- and returns a zip archive of a folder of files.
writeChunkedHTML :: PandocMonad m
                 => WriterOptions -> Pandoc -> m BL.ByteString
writeChunkedHTML opts (Pandoc meta blocks) = do
  walkM addMedia (Pandoc meta blocks)
  epochtime <- floor <$> getPOSIXTime
  let toMediaEntry (fp, _mt, bs) = toEntry fp epochtime bs
  mediaEntries <- map toMediaEntry . mediaItems <$> getMediaBag
  let chunkedDoc = splitIntoChunks (writerChunkTemplate opts)
                     True
                     (Just 1)
                     (writerSplitLevel opts)
                     (Pandoc meta blocks)
  let topChunk =
        Chunk
          { chunkHeading = docTitle meta
          , chunkId = "top"
          , chunkLevel = 0
          , chunkNumber = 0
          , chunkSectionNumber = Nothing
          , chunkPath = "index.html"
          , chunkUp = Nothing
          , chunkPrev = Nothing
          , chunkNext = case chunkedChunks chunkedDoc of
                          [] -> Nothing
                          (x:_) -> Just x
          , chunkUnlisted = True
          , chunkContents = mempty
          }

  let chunks = map (\x -> case chunkUp x of
                             Nothing -> x{ chunkUp = Just topChunk }
                             _ -> x)
               $ case chunkedChunks chunkedDoc of
                   [] -> []
                   (x:xs) -> x{ chunkPrev = Just topChunk } : xs

  let Node secinfo secs = chunkedTOC chunkedDoc
  let tocTree = Node secinfo{ secTitle = docTitle meta,
                              secPath = "index.html" } secs
  let tree = buildTOC opts tocTree
  renderedTOC <- writeHtml5String opts{ writerTemplate = Nothing }
                    (Pandoc nullMeta [tree])
  -- see #8915 -- we need to set the math variable in the top chunk:
  res <- runWithDefaultPartials $ compileTemplate "mathvar" "$math$"
  mathVar <- case res of
    Left e   -> throwError $ PandocTemplateError (T.pack e)
    Right t  -> return t
  tocMathVariable <- writeHtml5String opts{ writerTemplate = Just mathVar }
                    (Pandoc meta (tree:blocks))
  let opts' = opts{ writerVariables =
                        defField "table-of-contents" renderedTOC
                      . defField "math" tocMathVariable
                      $ writerVariables opts }
  entries <- mapM (chunkToEntry opts' meta topChunk) (topChunk : chunks)
  let sitemap = toEntry "sitemap.json" epochtime
                  (encode $ toJSON $ tocTreeToContext tocTree)
  let archive = foldr addEntryToArchive emptyArchive
                 (sitemap : entries ++ mediaEntries)
  return $ fromArchive archive


-- We include in the zip only local media that is in the working directory
-- or below.
addMedia :: PandocMonad m => Inline -> m Inline
addMedia il@(Image _ _ (src,_))
  | not (isURI src)
  , fp <- normalise (T.unpack src)
  , isRelative fp
  , not (".." `isInfixOf` fp) = do
  (bs, mbMime) <- fetchItem (T.pack fp)
  insertMedia fp mbMime (BL.fromStrict bs)
  return il
addMedia il = return il

buildTOC :: WriterOptions -> Tree SecInfo -> Block
buildTOC opts = tocToList (writerNumberSections opts) (writerTOCDepth opts)

chunkToEntry :: PandocMonad m
             => WriterOptions -> Meta -> Chunk -> Chunk -> m Entry
chunkToEntry opts meta topChunk chunk = do
  html <- writeHtml5String opts' (Pandoc meta' blocks)
  epochtime <- floor <$> getPOSIXTime
  let htmlLBS = BL.fromStrict $ TE.encodeUtf8 html
  return $ toEntry (chunkPath chunk) epochtime htmlLBS
 where
  opts' = opts{ writerVariables =
                  addContextVars opts' topChunk chunk $ writerVariables opts }
  meta' = setMeta "pagetitle" (MetaString (stringify $ chunkHeading chunk)) meta
  blocks = chunkContents chunk

tocTreeToContext :: Tree SecInfo -> Context Text
tocTreeToContext (Node secinfo subs) =
  Context $ M.fromList
  [ ("section", MapVal $ secInfoToContext secinfo)
  , ("subsections", ListVal $ map (MapVal . tocTreeToContext) subs)
  ]

secInfoToContext :: SecInfo -> Context Text
secInfoToContext sec =
  Context $ M.fromList
  [ ("title", SimpleVal $ literal $ stringify $ secTitle sec)
  , ("number", maybe NullVal (SimpleVal . literal) (secNumber sec))
  , ("id", SimpleVal $ literal $ secId sec)
  , ("path", SimpleVal $ literal $ secPath sec)
  , ("level", SimpleVal $ literal $ tshow $ secLevel sec)
  ]

addContextVars
  :: WriterOptions -> Chunk -> Chunk -> Context Text -> Context Text
addContextVars opts topChunk chunk context =
     maybe id (defField "next" . navlinks) (chunkNext chunk)
   . maybe id (defField "previous" . navlinks) (chunkPrev chunk)
   . maybe id (defField "up" . navlinks) (chunkUp chunk)
   . maybe id (defField "top" . navlinks) (if chunk == topChunk
                                              then Nothing
                                              else Just topChunk)
   . defField "toc" (chunk == topChunk && writerTableOfContents opts)
    $ context
 where
  navlinks ch = toMapVal [("url", formatPath ch), ("title", formatHeading ch)]
  toMapVal = MapVal . Context . M.fromList
  formatPath = SimpleVal . literal . T.pack . chunkPath
  formatHeading ch = SimpleVal . literal . either (const "") id . runPure $
    writeHtml5String opts{ writerTemplate = Nothing }
      (Pandoc nullMeta [Plain $ chunkHeading ch])