aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2023-01-05 21:52:44 -0800
committerJohn MacFarlane <[email protected]>2023-01-10 21:48:38 -0800
commit1fdc27a2c0f723074c87ba4674575bc30da974f2 (patch)
tree8365affedb3445b474479de6ff6190be6ba2b54c
parentb55cd83a609bcb01f8472a66ab0ddca985c881e4 (diff)
Add ChunkedHTML writer.
- Add module Text.Pandoc.Writers.ChunkedHTML, exporting writeChunkedHtml [API change]. - Revised API for Text.Pandoc.Chunks. `chunkNext`, `chunkPrev`, `chunkUp` are now just `Maybe Chunk`. - Fix assignment of navigation elements of Chunks. - Strip off anchor portion of next and prev links. - Derive Show, Eq, Ord, Generic for ChunkDoc. - Add `chunkSectionNumber`, `chunkUnlisted`. - Automatically unwrap the zip to a directory if an extensionless output file specified. - Incorporate images with relative paths below working dir.
-rw-r--r--MANUAL.txt29
-rw-r--r--data/templates/default.chunkedhtml99
-rw-r--r--pandoc.cabal2
-rw-r--r--src/Text/Pandoc/App.hs23
-rw-r--r--src/Text/Pandoc/App/OutputSettings.hs8
-rw-r--r--src/Text/Pandoc/Chunks.hs183
-rw-r--r--src/Text/Pandoc/Writers.hs3
-rw-r--r--src/Text/Pandoc/Writers/ChunkedHTML.hs181
8 files changed, 433 insertions, 95 deletions
diff --git a/MANUAL.txt b/MANUAL.txt
index 330faff37..7329073b4 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -287,6 +287,7 @@ header when requesting a document from a URL:
- `beamer` ([LaTeX beamer][`beamer`] slide show)
- `bibtex` ([BibTeX] bibliography)
- `biblatex` ([BibLaTeX] bibliography)
+ - `chunkedhtml` (zip archive of multiple linked HTML files)
- `commonmark` ([CommonMark] Markdown)
- `commonmark_x` ([CommonMark] Markdown with extensions)
- `context` ([ConTeXt])
@@ -358,7 +359,12 @@ header when requesting a document from a URL:
: Write output to *FILE* instead of *stdout*. If *FILE* is
`-`, output will go to *stdout*, even if a non-textual format
- (`docx`, `odt`, `epub2`, `epub3`) is specified.
+ (`docx`, `odt`, `epub2`, `epub3`) is specified. If the
+ output format is `chunkedhtml` and *FILE* has no extension,
+ then instead of producing a `.zip` file pandoc will create
+ a directory *FILE* and unpack the zip archive there
+ (unless *FILE* already exists, in which case an error
+ will be raised).
`--data-dir=`*DIRECTORY*
@@ -6822,6 +6828,27 @@ styling of pandoc's default HTML templates is desired (and
in that case the variables defined in [Variables for HTML] may
be used to fine-tune the style).
+# Chunked HTML
+
+`pandoc -t chunkedhtml` will produce a zip archive of linked
+HTML files, one for each section of the original document.
+Internal links will automatically be adjusted to point to
+the right place, images linked to under the working directory
+will be incorporated, and navigation links will be added.
+In addition, a JSON file `sitemap.json` will be included
+describing the hierarchical structure of the files.
+
+If an output file without an extension is specified, then
+it will be interpreted as a directory and the zip archive
+will be automatically unpacked into it (unless it already
+exists, in which case an error will be raised). Otherwise
+a `.zip` file will be produced.
+
+The navigation links can be customized by adjusting the
+template. By default, a table of contents is included only
+on the top page. To include it on every page, set the
+`toc` variable manually.
+
# Jupyter notebooks
When creating a [Jupyter notebook], pandoc will try to infer the
diff --git a/data/templates/default.chunkedhtml b/data/templates/default.chunkedhtml
new file mode 100644
index 000000000..7101c9587
--- /dev/null
+++ b/data/templates/default.chunkedhtml
@@ -0,0 +1,99 @@
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml" lang="$lang$" xml:lang="$lang$"$if(dir)$ dir="$dir$"$endif$>
+<head>
+ <meta charset="utf-8" />
+ <meta name="generator" content="pandoc" />
+ <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
+$for(author-meta)$
+ <meta name="author" content="$author-meta$" />
+$endfor$
+$if(date-meta)$
+ <meta name="dcterms.date" content="$date-meta$" />
+$endif$
+$if(keywords)$
+ <meta name="keywords" content="$for(keywords)$$keywords$$sep$, $endfor$" />
+$endif$
+$if(description-meta)$
+ <meta name="description" content="$description-meta$" />
+$endif$
+ <title>$if(title-prefix)$$title-prefix$ – $endif$$pagetitle$</title>
+ <style>
+ div.sitenav { display: flex; flex-direction: row; flex-wrap: wrap; }
+ span.navlink { flex: 1; }
+ span.navlink-label { display: inline-block; min-width: 4em; }
+ $styles.html()$
+ </style>
+$for(css)$
+ <link rel="stylesheet" href="$css$" />
+$endfor$
+$for(header-includes)$
+ $header-includes$
+$endfor$
+$if(math)$
+ $math$
+$endif$
+</head>
+<body>
+$for(include-before)$
+$include-before$
+$endfor$
+<nav id="sitenav">
+<div class="sitenav">
+<span class="navlink">
+$if(up.url)$
+<span class="navlink-label">Up:</span> <a href="$up.url$" accesskey="u" rel="up">$up.title$</a>
+$endif$
+</span>
+<span class="navlink">
+$if(top)$
+<span class="navlink-label">Top:</span> <a href="$top.url$" accesskey="t" rel="top">$top.title$</a>
+$endif$
+</span>
+</div>
+<div class="sitenav">
+<span class="navlink">
+$if(next.url)$
+<span class="navlink-label">Next:</span> <a href="$next.url$" accesskey="n" rel="next">$next.title$</a>
+$endif$
+</span>
+<span class="navlink">
+$if(previous.url)$
+<span class="navlink-label">Previous:</span> <a href="$previous.url$" accesskey="p" rel="previous">$previous.title$</a>
+$endif$
+</span>
+</div>
+</nav>
+$if(title)$
+<header id="title-block-header">
+<h1 class="title">$title$</h1>
+$if(subtitle)$
+<p class="subtitle">$subtitle$</p>
+$endif$
+$for(author)$
+<p class="author">$author$</p>
+$endfor$
+$if(date)$
+<p class="date">$date$</p>
+$endif$
+$if(abstract)$
+<div class="abstract">
+<div class="abstract-title">$abstract-title$</div>
+$abstract$
+</div>
+$endif$
+</header>
+$endif$
+$if(toc)$
+<nav id="$idprefix$TOC" role="doc-toc">
+$if(toc-title)$
+<h2 id="$idprefix$toc-title">$toc-title$</h2>
+$endif$
+$table-of-contents$
+</nav>
+$endif$
+$body$
+$for(include-after)$
+$include-after$
+$endfor$
+</body>
+</html>
diff --git a/pandoc.cabal b/pandoc.cabal
index 3d694bf3c..ea255917f 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -51,6 +51,7 @@ data-files:
data/templates/styles.citations.html
data/templates/default.html4
data/templates/default.html5
+ data/templates/default.chunkedhtml
data/templates/default.docbook4
data/templates/default.docbook5
data/templates/default.jats_archiving
@@ -584,6 +585,7 @@ library
Text.Pandoc.Writers.JATS,
Text.Pandoc.Writers.OPML,
Text.Pandoc.Writers.HTML,
+ Text.Pandoc.Writers.ChunkedHTML,
Text.Pandoc.Writers.Ipynb,
Text.Pandoc.Writers.ICML,
Text.Pandoc.Writers.Jira,
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index bc996f554..2d4601b68 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -40,7 +40,9 @@ import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TE
import qualified Data.Text.Encoding.Error as TE
-import System.Directory (doesDirectoryExist)
+import System.Directory (doesDirectoryExist, createDirectory)
+import Codec.Archive.Zip (toArchiveOrFail,
+ extractFilesFromArchive, ZipOption(..))
import System.Exit (exitSuccess)
import System.FilePath ( takeBaseName, takeExtension)
import System.IO (nativeNewline, stdout)
@@ -115,6 +117,16 @@ convertWithOpts scriptingEngine opts = do
case output of
TextOutput t -> writerFn eol outputFile t
BinaryOutput bs -> writeFnBinary outputFile bs
+ ZipOutput bs
+ | null (takeExtension outputFile) -> do
+ -- create directory and unzip
+ createDirectory outputFile -- will fail if directory exists
+ let zipopts = [OptRecursive, OptDestination outputFile] ++
+ [OptVerbose | optVerbosity opts == INFO]
+ case toArchiveOrFail bs of
+ Right archive -> extractFilesFromArchive zipopts archive
+ Left e -> E.throwIO $ PandocShouldNeverHappenError $ T.pack e
+ | otherwise -> writeFnBinary outputFile bs
convertWithOpts' :: (PandocMonad m, MonadIO m, MonadMask m)
=> ScriptingEngine
@@ -300,7 +312,9 @@ convertWithOpts' scriptingEngine istty datadir opts = do
createPngFallbacks (writerDpi writerOptions)
output <- case writer of
- ByteStringWriter f -> BinaryOutput <$> f writerOptions doc
+ ByteStringWriter f
+ | format == "chunkedhtml" -> ZipOutput <$> f writerOptions doc
+ | otherwise -> BinaryOutput <$> f writerOptions doc
TextWriter f -> case outputPdfProgram outputSettings of
Just pdfProg -> do
res <- makePDF pdfProg (optPdfEngineOpts opts) f
@@ -322,7 +336,10 @@ convertWithOpts' scriptingEngine istty datadir opts = do
reports <- getLog
return (output, reports)
-data PandocOutput = TextOutput Text | BinaryOutput BL.ByteString
+data PandocOutput =
+ TextOutput Text
+ | BinaryOutput BL.ByteString
+ | ZipOutput BL.ByteString
deriving (Show)
type Transform = Pandoc -> Pandoc
diff --git a/src/Text/Pandoc/App/OutputSettings.hs b/src/Text/Pandoc/App/OutputSettings.hs
index 8208137cd..8e41e51fb 100644
--- a/src/Text/Pandoc/App/OutputSettings.hs
+++ b/src/Text/Pandoc/App/OutputSettings.hs
@@ -106,7 +106,7 @@ optToOutputSettings scriptingEngine opts = do
flvrd@(Format.FlavoredFormat format _extsDiff) <-
Format.parseFlavoredFormat writerName
- let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput
+ let standalone = optStandalone opts || isBinaryFormat format || pdfOutput
let templateOrThrow = \case
Left e -> throwError $ PandocTemplateError (T.pack e)
Right t -> pure t
@@ -300,6 +300,6 @@ pdfWriterAndProg mWriter mEngine =
isCustomWriter w = ".lua" `T.isSuffixOf` w
-isTextFormat :: T.Text -> Bool
-isTextFormat s =
- s `notElem` ["odt","docx","epub2","epub3","epub","pptx","pdf"]
+isBinaryFormat :: T.Text -> Bool
+isBinaryFormat s =
+ s `elem` ["odt","docx","epub2","epub3","epub","pptx","pdf","chunkedhtml"]
diff --git a/src/Text/Pandoc/Chunks.hs b/src/Text/Pandoc/Chunks.hs
index db8ef531e..3b61f3041 100644
--- a/src/Text/Pandoc/Chunks.hs
+++ b/src/Text/Pandoc/Chunks.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE TupleSections #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FlexibleContexts #-}
@@ -25,18 +26,18 @@ module Text.Pandoc.Chunks
, SecInfo(..)
) where
import Text.Pandoc.Definition
-import Text.Pandoc.Shared (makeSections, stringify)
+import Text.Pandoc.Shared (makeSections, stringify, inlineListToIdentifier)
import Text.Pandoc.Walk (Walkable(..))
import Data.Text (Text)
import Text.Printf (printf)
import Data.Maybe (fromMaybe, isNothing)
import qualified Data.Map as M
import qualified Data.Text as T
-import Data.List (find)
import Data.String (IsString)
import GHC.Generics (Generic)
import Text.HTML.TagSoup (Tag (TagOpen), fromAttrib, parseTags)
-import Data.Tree (Tree(..))
+import Data.Tree (Tree(..), unfoldForest, unfoldTree)
+import Data.List (tails)
-- | Split 'Pandoc' into 'Chunk's, e.g. for conversion into
-- a set of HTML pages or EPUB chapters.
@@ -47,64 +48,51 @@ splitIntoChunks :: PathTemplate -- ^ Template for filepath
-> Pandoc
-> ChunkedDoc
splitIntoChunks pathTemplate numberSections mbBaseLevel
- chunkLevel (Pandoc meta blocks) =
+ chunklev (Pandoc meta blocks) =
+ addNav .
fixInternalReferences .
+ walk rmNavAttrs .
(\chunks -> ChunkedDoc{ chunkedMeta = meta
, chunkedChunks = chunks
- , chunkedTOC = toTOCTree
- (concatMap chunkContents chunks) }) .
- makeChunks chunkLevel pathTemplate .
- addNavigation Nothing Nothing .
+ , chunkedTOC = toTOCTree' chunks }) .
+ makeChunks chunklev pathTemplate meta .
makeSections numberSections mbBaseLevel $ blocks
+-- | Add chunkNext, chunkPrev, chunkUp
+addNav :: ChunkedDoc -> ChunkedDoc
+addNav chunkedDoc =
+ chunkedDoc{ chunkedChunks =
+ addNext . addPrev . addUp $ chunkedChunks chunkedDoc }
+
+addUp :: [Chunk] -> [Chunk]
+addUp (c : d : ds)
+ | chunkLevel c < chunkLevel d
+ = c : addUp (d{ chunkUp = Just c } : ds)
+ | chunkLevel c == chunkLevel d
+ = c : addUp (d{ chunkUp = chunkUp c} : ds)
+addUp (c:cs) = c : addUp cs
+addUp [] = []
+
+addNext :: [Chunk] -> [Chunk]
+addNext cs = zipWith go cs (map Just (tail cs) ++ [Nothing])
+ where
+ go c nxt = c{ chunkNext = nxt }
+
+addPrev :: [Chunk] -> [Chunk]
+addPrev cs = zipWith go cs (Nothing : map Just cs)
+ where
+ go c prev = c{ chunkPrev = prev }
+
-- | Fix internal references so they point to the path of the chunk.
fixInternalReferences :: ChunkedDoc -> ChunkedDoc
-fixInternalReferences chunkedDoc =
- walk rmNavAttrs $ walk fixInternalRefs $
- chunkedDoc{ chunkedTOC = newTOC
- , chunkedChunks = newChunks }
+fixInternalReferences chunkedDoc = walk fixInternalRefs chunkedDoc
where
- newTOC = fromMaybe (chunkedTOC chunkedDoc) $
- traverse addSecPath (chunkedTOC chunkedDoc)
-
- newChunks = map fixNav (chunkedChunks chunkedDoc)
-
- fixNav chunk =
- chunk{ chunkNext = chunkNext chunk >>= toNavLink
- , chunkPrev = chunkPrev chunk >>= toNavLink
- , chunkUp = chunkUp chunk >>= toNavLink
- }
-
- toNavLink id' =
- case M.lookup id' refMap of
- Nothing -> Just $ "#" <> id'
- Just fp -> Just $ T.pack fp <> "#" <> id'
-
- addSecPath :: SecInfo -> Maybe SecInfo
- addSecPath secinfo =
- case M.lookup (secId secinfo) refMap of
- Nothing -> Just secinfo
- Just fp -> Just $ secinfo{ secPath = T.pack fp }
-
- -- Remove some attributes we added just to construct chunkNext etc.
- rmNavAttrs :: Block -> Block
- rmNavAttrs (Div (ident,classes,kvs) bs) =
- Div (ident,classes,filter (not . isNavAttr) kvs) bs
- rmNavAttrs b = b
-
- isNavAttr :: (Text,Text) -> Bool
- isNavAttr ("nav-prev",_) = True
- isNavAttr ("nav-next",_) = True
- isNavAttr ("nav-up",_) = True
- isNavAttr ("nav-path",_) = True
- isNavAttr _ = False
-
fixInternalRefs :: Inline -> Inline
fixInternalRefs il@(Link attr ils (src,tit))
= case T.uncons src of
Just ('#', ident) -> Link attr ils (src', tit)
where src' = case M.lookup ident refMap of
- Just fp -> T.pack fp <> src
+ Just chunk -> T.pack (chunkPath chunk) <> src
Nothing -> src
_ -> il
fixInternalRefs il = il
@@ -113,7 +101,7 @@ fixInternalReferences chunkedDoc =
chunkToRefs chunk m =
let idents = chunkId chunk : getIdents (chunkContents chunk)
- in foldr (\ident -> M.insert ident (chunkPath chunk)) m idents
+ in foldr (\ident -> M.insert ident chunk) m idents
getIdents bs = query getBlockIdent bs ++ query getInlineIdent bs
@@ -162,11 +150,11 @@ fixInternalReferences chunkedDoc =
isHtmlFormat _ = False
-makeChunks :: Int -> PathTemplate -> [Block] -> [Chunk]
-makeChunks chunkLevel pathTemplate = secsToChunks 1
+makeChunks :: Int -> PathTemplate -> Meta -> [Block] -> [Chunk]
+makeChunks chunklev pathTemplate meta = secsToChunks 1
where
isChunkHeader :: Block -> Bool
- isChunkHeader (Div (_,"section":_,_) (Header n _ _:_)) = n <= chunkLevel
+ isChunkHeader (Div (_,"section":_,_) (Header n _ _:_)) = n <= chunklev
isChunkHeader _ = False
secsToChunks :: Int -> [Block] -> [Chunk]
@@ -174,11 +162,11 @@ makeChunks chunkLevel pathTemplate = secsToChunks 1
case break isChunkHeader bs of
([], []) -> []
([], (d@(Div attr@(_,"section":_,_) (h@(Header lvl _ _) : bs')) : rest))
- | chunkLevel == lvl ->
+ | chunklev == lvl ->
-- If the header is of the same level as chunks, create a chunk
toChunk chunknum d :
secsToChunks (chunknum + 1) rest
- | chunkLevel > lvl ->
+ | chunklev > lvl ->
case break isChunkHeader bs' of
(xs, ys) -> toChunk chunknum (Div attr (h:xs)) :
secsToChunks (chunknum + 1) (ys ++ rest)
@@ -188,56 +176,55 @@ makeChunks chunkLevel pathTemplate = secsToChunks 1
toChunk :: Int -> Block -> Chunk
toChunk chunknum
- (Div (divid,"section":classes,kvs) (h@(Header _ _ ils) : bs)) =
+ (Div (divid,"section":classes,kvs) (h@(Header lvl _ ils) : bs)) =
Chunk
{ chunkHeading = ils
, chunkId = divid
+ , chunkLevel = lvl
, chunkNumber = chunknum
+ , chunkSectionNumber = secnum
, chunkPath = chunkpath
- , chunkUp = lookup "nav-up" kvs
- , chunkPrev = lookup "nav-prev" kvs
- , chunkNext = lookup "nav-next" kvs
+ , chunkUp = Nothing
+ , chunkNext = Nothing
+ , chunkPrev = Nothing
+ , chunkUnlisted = "unlisted" `elem` classes
, chunkContents =
[Div (divid,"section":classes,kvs') (h : bs)]
}
where kvs' = kvs ++ [("nav-path", T.pack chunkpath)]
+ secnum = lookup "number" kvs
chunkpath = resolvePathTemplate pathTemplate chunknum
(stringify ils)
divid
- (fromMaybe "" (lookup "number" kvs))
+ (fromMaybe "" secnum)
toChunk chunknum (Div ("",["preamble"],[]) bs) =
Chunk
- { chunkHeading = []
- , chunkId = ""
+ { chunkHeading = docTitle meta
+ , chunkId = inlineListToIdentifier mempty $ docTitle meta
+ , chunkLevel = 0
, chunkNumber = chunknum
+ , chunkSectionNumber = Nothing
, chunkPath = resolvePathTemplate pathTemplate chunknum
- "" "" ""
+ (stringify (docTitle meta))
+ (inlineListToIdentifier mempty (docTitle meta))
+ "0"
, chunkUp = Nothing
, chunkPrev = Nothing
, chunkNext = Nothing
+ , chunkUnlisted = False
, chunkContents = bs
}
toChunk _ b = error $ "toChunk called on inappropriate block " <> show b
-- should not happen
--- | Add nav-up, nav-prev, nav-next attributes to each section Div
--- in a document.
-addNavigation :: Maybe Text -> Maybe Text -> [Block] -> [Block]
-addNavigation mbUpId mbPrevId (Div (ident, "section":classes, kvs) bs : xs) =
- Div (ident, "section":classes, kvs ++ navattrs) bs' :
- addNavigation mbUpId (Just ident) xs
+
+-- Remove some attributes we added just to construct chunkNext etc.
+rmNavAttrs :: Block -> Block
+rmNavAttrs (Div (ident,classes,kvs) bs) =
+ Div (ident,classes,filter (not . isNavAttr) kvs) bs
where
- bs' = addNavigation (Just ident) Nothing bs
- navattrs = maybe [] (\x -> [("nav-up", x)]) mbUpId
- ++ maybe [] (\x -> [("nav-prev", x)]) mbPrevId
- ++ maybe [] (\x -> [("nav-next", x)]) mbNextId
- mbNextId = find isSectionDiv bs >>= extractId
- isSectionDiv (Div (_,"section":_,_) _) = True
- isSectionDiv _ = False
- extractId (Div (id',_,_) _) = Just id'
- extractId _ = Nothing
-addNavigation mbUpId mbPrevId (x:xs) = x : addNavigation mbUpId mbPrevId xs
-addNavigation _ _ [] = []
+ isNavAttr (k,_) = "nav-" `T.isPrefixOf` k
+rmNavAttrs b = b
resolvePathTemplate :: PathTemplate
-> Int -- ^ Chunk number
@@ -272,14 +259,17 @@ data Chunk =
Chunk
{ chunkHeading :: [Inline]
, chunkId :: Text
+ , chunkLevel :: Int
, chunkNumber :: Int
+ , chunkSectionNumber :: Maybe Text
, chunkPath :: FilePath
- , chunkUp :: Maybe Text
- , chunkPrev :: Maybe Text
- , chunkNext :: Maybe Text
+ , chunkUp :: Maybe Chunk
+ , chunkPrev :: Maybe Chunk
+ , chunkNext :: Maybe Chunk
+ , chunkUnlisted :: Bool
, chunkContents :: [Block]
}
- deriving (Show, Read, Eq, Ord, Generic)
+ deriving (Show, Eq, Ord, Generic)
instance Walkable Inline Chunk where
query f chunk = query f (chunkContents chunk)
@@ -301,7 +291,7 @@ data ChunkedDoc =
{ chunkedMeta :: Meta
, chunkedTOC :: Tree SecInfo
, chunkedChunks :: [Chunk]
- }
+ } deriving (Show, Eq, Ord, Generic)
instance Walkable Inline ChunkedDoc where
query f doc = query f (chunkedChunks doc) <> query f (chunkedMeta doc)
@@ -333,7 +323,7 @@ data SecInfo =
, secId :: Text
, secPath :: Text
, secLevel :: Int
- } deriving (Show, Ord, Eq)
+ } deriving (Show, Ord, Eq, Generic)
instance Walkable Inline SecInfo where
query f sec = query f (secTitle sec)
@@ -346,12 +336,12 @@ instance Walkable Inline SecInfo where
-- in a form that can be turned into a table of contents.
-- Presupposes that the '[Block]' is the output of 'makeSections'.
toTOCTree :: [Block] -> Tree SecInfo
-toTOCTree bs =
+toTOCTree =
Node SecInfo{ secTitle = []
, secNumber = Nothing
, secId = ""
, secPath = ""
- , secLevel = 0 } $ foldr go [] bs
+ , secLevel = 0 } . foldr go []
where
go :: Block -> [Tree SecInfo] -> [Tree SecInfo]
go (Div (ident,_,_) (Header lev (_,classes,kvs) ils : subsecs))
@@ -364,3 +354,22 @@ toTOCTree bs =
go (Div _ [d@Div{}]) = go d -- #8402
go _ = id
+toTOCTree' :: [Chunk] -> Tree SecInfo
+toTOCTree' =
+ Node SecInfo{ secTitle = []
+ , secNumber = Nothing
+ , secId = ""
+ , secPath = ""
+ , secLevel = 0 } . getNodes . filter (not . skippable)
+ where
+ skippable c = isNothing (chunkSectionNumber c) && chunkUnlisted c
+ getNodes :: [Chunk] -> [Tree SecInfo]
+ getNodes (c:cs) =
+ let (as, bs) = span (\d -> chunkLevel d > chunkLevel c) cs
+ secinfo = SecInfo{ secTitle = chunkHeading c,
+ secNumber = chunkSectionNumber c,
+ secId = chunkId c,
+ secPath = T.pack $ chunkPath c,
+ secLevel = chunkLevel c }
+ in Node secinfo (getNodes as) : getNodes bs
+ getNodes [] = []
diff --git a/src/Text/Pandoc/Writers.hs b/src/Text/Pandoc/Writers.hs
index cf43cfad7..7f7a03603 100644
--- a/src/Text/Pandoc/Writers.hs
+++ b/src/Text/Pandoc/Writers.hs
@@ -24,6 +24,7 @@ module Text.Pandoc.Writers
, writeBeamer
, writeBibTeX
, writeBibLaTeX
+ , writeChunkedHTML
, writeCommonMark
, writeConTeXt
, writeCslJson
@@ -88,6 +89,7 @@ import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.Error
import Text.Pandoc.Writers.AsciiDoc
import Text.Pandoc.Writers.BibTeX
+import Text.Pandoc.Writers.ChunkedHTML
import Text.Pandoc.Writers.CommonMark
import Text.Pandoc.Writers.ConTeXt
import Text.Pandoc.Writers.CslJson
@@ -189,6 +191,7 @@ writers = [
,("bibtex" , TextWriter writeBibTeX)
,("biblatex" , TextWriter writeBibLaTeX)
,("markua" , TextWriter writeMarkua)
+ ,("chunkedhtml" , ByteStringWriter writeChunkedHTML)
]
-- | Retrieve writer, extensions based on formatSpec (format+extensions).
diff --git a/src/Text/Pandoc/Writers/ChunkedHTML.hs b/src/Text/Pandoc/Writers/ChunkedHTML.hs
new file mode 100644
index 000000000..f58733588
--- /dev/null
+++ b/src/Text/Pandoc/Writers/ChunkedHTML.hs
@@ -0,0 +1,181 @@
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE MultiWayIf #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE ViewPatterns #-}
+{- |
+ Module : Text.Pandoc.Writers.ChunkedHTML
+ Copyright : Copyright (C) 2023 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <[email protected]>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to "chunked" HTML (a folder of
+linked HTML documents, split by sections.
+-}
+module Text.Pandoc.Writers.ChunkedHTML (
+ writeChunkedHTML
+ ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Options (WriterOptions(..))
+import Text.Pandoc.Shared (stringify, tshow)
+import Text.Pandoc.Class (PandocMonad, getPOSIXTime, runPure,
+ readFileLazy, insertMedia, getMediaBag)
+import Text.Pandoc.MediaBag (mediaItems)
+import qualified Data.ByteString.Lazy as BL
+import Text.Pandoc.Chunks (splitIntoChunks, Chunk(..), ChunkedDoc(..),
+ SecInfo(..))
+import Data.Text (Text)
+import Data.Tree
+import qualified Data.Text as T
+import qualified Data.Text.Encoding as TE
+import Text.Pandoc.Writers.HTML (writeHtml5String)
+import Codec.Archive.Zip (Entry, addEntryToArchive, emptyArchive, toEntry,
+ fromArchive)
+import qualified Data.Map as M
+import Text.DocTemplates (Context(..), Val(..))
+import Text.DocLayout (literal)
+import Text.Pandoc.Writers.Shared (defField)
+import Data.Aeson (toJSON, encode)
+import System.FilePath (isRelative, normalise)
+import Data.List (isInfixOf)
+import Text.Pandoc.Walk (walkM)
+
+-- | Splits document into HTML chunks, dividing them by section,
+-- and returns a zip archive of a folder of files.
+writeChunkedHTML :: PandocMonad m
+ => WriterOptions -> Pandoc -> m BL.ByteString
+writeChunkedHTML opts (Pandoc meta blocks) = do
+ walkM addMedia (Pandoc meta blocks)
+ epochtime <- floor <$> getPOSIXTime
+ let toMediaEntry (fp, _mt, bs) = toEntry fp epochtime bs
+ mediaEntries <- map toMediaEntry . mediaItems <$> getMediaBag
+ let chunkedDoc = splitIntoChunks "%s-%i.html"
+ True
+ (Just 1)
+ (writerEpubChapterLevel opts)
+ (Pandoc meta blocks)
+ let topChunk =
+ Chunk
+ { chunkHeading = docTitle meta
+ , chunkId = "top"
+ , chunkLevel = 0
+ , chunkNumber = 0
+ , chunkSectionNumber = Nothing
+ , chunkPath = "index.html"
+ , chunkUp = Nothing
+ , chunkPrev = Nothing
+ , chunkNext = case chunkedChunks chunkedDoc of
+ [] -> Nothing
+ (x:_) -> Just x
+ , chunkUnlisted = True
+ , chunkContents = mempty
+ }
+
+ let chunks = map (\x -> case chunkUp x of
+ Nothing -> x{ chunkUp = Just topChunk }
+ _ -> x)
+ $ case chunkedChunks chunkedDoc of
+ [] -> []
+ (x:xs) -> x{ chunkPrev = Just topChunk } : xs
+
+ let Node secinfo secs = chunkedTOC chunkedDoc
+ let tocTree = Node secinfo{ secTitle = docTitle meta,
+ secPath = "index.html" } secs
+ let tocBlocks = buildTOC opts tocTree
+ renderedTOC <- writeHtml5String opts{ writerTemplate = Nothing }
+ (Pandoc nullMeta tocBlocks)
+ let opts' = opts{ writerVariables =
+ defField "table-of-contents" renderedTOC
+ $ writerVariables opts }
+ entries <- mapM (chunkToEntry opts' meta topChunk) (topChunk : chunks)
+ let sitemap = toEntry "sitemap.json" epochtime
+ (encode $ toJSON $ tocTreeToContext tocTree)
+ let archive = foldr addEntryToArchive emptyArchive
+ (sitemap : entries ++ mediaEntries)
+ return $ fromArchive archive
+
+
+addMedia :: PandocMonad m => Inline -> m Inline
+addMedia il@(Image _ _ (src,_))
+ | fp <- normalise (T.unpack src)
+ , isRelative fp
+ , not (".." `isInfixOf` fp) = do
+ bs <- readFileLazy fp
+ insertMedia fp Nothing bs
+ return il
+addMedia il = return il
+
+buildTOC :: WriterOptions -> Tree SecInfo -> [Block]
+buildTOC opts tocTree = buildTOCPart tocTree
+ where
+ buildTOCPart (Node secinfo subsecs) =
+ Plain [Link nullAttr
+ ((maybe [] (\num ->
+ if writerNumberSections opts
+ then [Span ("",["toc-section-number"],[])
+ [Str num, Space]]
+ else []) (secNumber secinfo))
+ ++ secTitle secinfo)
+ (secPath secinfo, "") | secLevel secinfo > 0] :
+ if null subsecs
+ then []
+ else [BulletList (map buildTOCPart $ filter aboveThreshold subsecs)]
+ aboveThreshold (Node sec _) = secLevel sec <= writerTOCDepth opts
+
+chunkToEntry :: PandocMonad m
+ => WriterOptions -> Meta -> Chunk -> Chunk -> m Entry
+chunkToEntry opts meta topChunk chunk = do
+ html <- writeHtml5String opts' (Pandoc meta' blocks)
+ epochtime <- floor <$> getPOSIXTime
+ let htmlLBS = BL.fromStrict $ TE.encodeUtf8 html
+ return $ toEntry (chunkPath chunk) epochtime htmlLBS
+ where
+ opts' = opts{ writerVariables =
+ addContextVars opts' topChunk chunk $ writerVariables opts }
+ meta' = if chunk == topChunk
+ then meta
+ else Meta $ M.fromList [("pagetitle", MetaString
+ (stringify $ chunkHeading chunk))]
+ blocks = chunkContents chunk
+
+tocTreeToContext :: Tree SecInfo -> Context Text
+tocTreeToContext (Node secinfo subs) =
+ Context $ M.fromList
+ [ ("section", MapVal $ secInfoToContext secinfo)
+ , ("subsections", ListVal $ map (MapVal . tocTreeToContext) subs)
+ ]
+
+secInfoToContext :: SecInfo -> Context Text
+secInfoToContext sec =
+ Context $ M.fromList
+ [ ("title", SimpleVal $ literal $ stringify $ secTitle sec)
+ , ("number", maybe NullVal (SimpleVal . literal) (secNumber sec))
+ , ("id", SimpleVal $ literal $ secId sec)
+ , ("path", SimpleVal $ literal $ secPath sec)
+ , ("level", SimpleVal $ literal $ tshow $ secLevel sec)
+ ]
+
+addContextVars
+ :: WriterOptions -> Chunk -> Chunk -> Context Text -> Context Text
+addContextVars opts topChunk chunk context =
+ maybe id (defField "next" . navlinks) (chunkNext chunk)
+ . maybe id (defField "previous" . navlinks) (chunkPrev chunk)
+ . maybe id (defField "up" . navlinks) (chunkUp chunk)
+ . maybe id (defField "top" . navlinks) (if chunk == topChunk
+ then Nothing
+ else Just topChunk)
+ . defField "toc" (chunk == topChunk && writerTableOfContents opts)
+ $ context
+ where
+ navlinks ch = toMapVal [("url", formatPath ch), ("title", formatHeading ch)]
+ toMapVal = MapVal . Context . M.fromList
+ formatPath = SimpleVal . literal . T.pack . chunkPath
+ formatHeading ch = SimpleVal . literal . either (const "") id . runPure $
+ writeHtml5String opts{ writerTemplate = Nothing }
+ (Pandoc nullMeta [Plain $ chunkHeading ch])
+