aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2022-11-06 21:22:51 -0800
committerJohn MacFarlane <[email protected]>2022-11-06 21:31:40 -0800
commit452fe5aa177c36ff21189fadc96adf74f38d4c2f (patch)
tree994658d7f9a3598a8472e3862a9a43345edac924
parent04973586eb36773c3a1d149019edf291b527a719 (diff)
EPUB writer: refactor to use T.P.Chunks.issue6122
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs302
1 files changed, 107 insertions, 195 deletions
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 0dffb9f51..5b2797c4a 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -20,14 +20,14 @@ import Codec.Archive.Zip (Entry, addEntryToArchive, eRelativePath, emptyArchive,
import Control.Applicative ( (<|>) )
import Control.Monad (mplus, unless, when, zipWithM)
import Control.Monad.Except (catchError, throwError)
-import Control.Monad.State.Strict (StateT, evalState, evalStateT, get,
+import Control.Monad.State.Strict (State, StateT, evalState, evalStateT, get,
gets, lift, modify)
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Lazy.Char8 as B8
import Data.Char (isAlphaNum, isAscii, isDigit)
import Data.List (isInfixOf, isPrefixOf)
import qualified Data.Map as M
-import Data.Maybe (fromMaybe, isNothing, mapMaybe, isJust)
+import Data.Maybe (fromMaybe, isNothing, mapMaybe, isJust, catMaybes)
import qualified Data.Set as Set
import qualified Data.Text as T
import Data.Text (Text)
@@ -529,23 +529,27 @@ pandocToEPUB version opts doc = do
-- body pages
-- add level 1 header to beginning if none there
- let secs = makeSections True Nothing
- $ addIdentifiers opts
- $ case blocks of
- (Div _
- (Header{}:_) : _) -> blocks
- (Header 1 _ _ : _) -> blocks
- _ -> Header 1 ("",["unnumbered"],[])
- (docTitle' meta) : blocks
-
- -- create the chapters and their reftable from the original options and the sections
- let (chapters, reftable) =
- createChaptersAndReftable (writerEpubChapterLevel opts) secs
+ let blocks' = addIdentifiers opts
+ $ case blocks of
+ (Div _
+ (Header{}:_) : _) -> blocks
+ (Header 1 _ _ : _) -> blocks
+ _ -> Header 1 ("",["unnumbered"],[])
+ (docTitle' meta) : blocks
+
+ -- create the chapters
+ let chunkedDoc = splitIntoChunks "ch%n.xhtml"
+ (writerNumberSections opts)
+ Nothing
+ (writerEpubChapterLevel opts)
+ (Pandoc meta blocks')
+
-- Create the chapter entries from the chapters.
-- Also requires access to the extended writer options and context
-- as well as the css Context and html writer
- chapterEntries <- createChapterEntries opts' vars cssvars writeHtml chapters
+ chapterEntries <- createChapterEntries opts' vars cssvars writeHtml
+ (chunkedChunks chunkedDoc)
@@ -667,43 +671,14 @@ pandocToEPUB version opts doc = do
contentsEntry <- mkEntry "content.opf" contentsData
-- toc.ncx
- let tocLevel = writerTOCDepth opts
-
- -- Helper function for both the toc and anv Entries
- let navPointNode :: PandocMonad m
- => (Int -> [Inline] -> T.Text -> [Element] -> Element)
- -> Block -> StateT Int m [Element]
- navPointNode formatter (Div (ident,_,_)
- (Header lvl (_,_,kvs) ils : children)) =
- if lvl > tocLevel
- then return []
- else do
- n <- get
- modify (+1)
- let num = fromMaybe "" $ lookup "number" kvs
- let tit = if writerNumberSections opts && not (T.null num)
- then Span ("", ["section-header-number"], [])
- [Str num] : Space : ils
- else ils
- src <- case lookup ident reftable of
- Just x -> return x
- Nothing -> throwError $ PandocSomeError $
- ident <> " not found in reftable"
- subs <- concat <$> mapM (navPointNode formatter) children
- return [formatter n tit src subs]
- navPointNode formatter (Div _ bs) =
- concat <$> mapM (navPointNode formatter) bs
- navPointNode _ _ = return []
-
-- Create the tocEntry from the metadata together with the sections and title.
- tocEntry <- createTocEntry meta metadata plainTitle (chunkedTOC chunkedDoc)
- navPointNode
+ tocEntry <- createTocEntry opts' meta metadata plainTitle
+ (chunkedTOC chunkedDoc)
-- Create the navEntry using the metadata, all of the various writer options,
-- the CSS and HTML helpers, the document and toc title as well as the epub version and all of the sections
- navEntry <- createNavEntry meta metadata opts' True vars cssvars
+ navEntry <- createNavEntry opts' meta metadata True vars cssvars
writeHtml tocTitle version (chunkedTOC chunkedDoc)
- navPointNode
-- mimetype
mimetypeEntry <- mkEntry "mimetype" $
@@ -795,13 +770,13 @@ createChapterEntries :: PandocMonad m =>
-> Context Text
-> (Bool -> Context Text)
-> (WriterOptions -> Pandoc -> StateT EPUBState m B8.ByteString)
- -> [Chapter]
+ -> [Chunk]
-> StateT EPUBState m [Entry]
createChapterEntries opts' vars cssvars writeHtml chapters = do
-- Create an entry from the chapter with the provided number.
-- chapToEntry :: Int -> Chapter -> StateT EPUBState m Entry
- let chapToEntry num (Chapter bs) =
- mkEntry ("text/" ++ T.unpack (showChapter num)) =<<
+ let chapToEntry num chunk =
+ mkEntry ("text/" ++ chunkPath chunk) =<<
-- Combine all provided options
writeHtml opts'{ writerVariables =
Context (M.fromList
@@ -809,12 +784,15 @@ createChapterEntries opts' vars cssvars writeHtml chapters = do
("pagetitle", toVal' $
showChapter num)])
<> cssvars True <> vars } pdoc
- where (pdoc, bodyType) =
+ where bs = chunkContents chunk
+ meta' = setMeta "title" (fromList
+ (walk removeNote
+ (chunkHeading chunk))) nullMeta
+ (pdoc, bodyType) =
case bs of
- (Div (_,"section":_,kvs) (Header _ _ xs : _) : _) ->
+ (Div (_,"section":_,kvs) _ : _) ->
-- remove notes or we get doubled footnotes
- (Pandoc (setMeta "title"
- (walk removeNote $ fromList xs) nullMeta) bs,
+ (Pandoc meta' bs,
-- Check if the chapters belongs to the frontmatter,
-- backmatter of bodymatter defaulting to the body
case lookup "epub:type" kvs of
@@ -823,7 +801,7 @@ createChapterEntries opts' vars cssvars writeHtml chapters = do
| x `elem` frontMatterTypes -> "frontmatter"
| x `elem` backMatterTypes -> "backmatter"
| otherwise -> "bodymatter")
- _ -> (Pandoc nullMeta bs, "bodymatter")
+ _ -> (Pandoc meta' bs, "bodymatter")
frontMatterTypes = ["prologue", "abstract", "acknowledgments",
"copyright-page", "dedication",
"credits", "keywords", "imprint",
@@ -837,117 +815,40 @@ createChapterEntries opts' vars cssvars writeHtml chapters = do
zipWithM chapToEntry [1..] chapters
--- | Splits the blocks into chapters and creates a corresponding reftable
-createChaptersAndReftable :: Int -> [Block] -> ([Chapter], [(Text, Text)])
-createChaptersAndReftable chapterHeaderLevel secs = (chapters, reftable)
- where
- isChapterHeader :: Block -> Bool
- isChapterHeader (Div _ (Header n _ _:_)) = n <= chapterHeaderLevel
- isChapterHeader _ = False
-
- secsToChapters :: [Block] -> [Chapter]
- secsToChapters [] = []
- secsToChapters (d@(Div attr (h@(Header lvl _ _) : bs)) : rest)
- -- If the header is of the same level as chapters, create a chapter
- | chapterHeaderLevel == lvl =
- Chapter [d] : secsToChapters rest
- -- If the header is a level higher than chapters,
- -- create a chapter of everything until the next chapter header.
- | chapterHeaderLevel > lvl =
- Chapter [Div attr (h:xs)] :
- secsToChapters ys ++ secsToChapters rest
- where (xs, ys) = break isChapterHeader bs
- secsToChapters bs =
- -- If this is the last block, keep it as is,
- -- otherwise create a chapter for everything until the next chapter header.
- (if null xs then id else (Chapter xs :)) $ secsToChapters ys
- where (xs, ys) = break isChapterHeader bs
-
- -- Convert the sections to initial chapters
- chapters' = secsToChapters secs
-
- -- Extract references for the reftable from Inline elements
- extractLinkURL' :: Int -> Inline -> [(T.Text, T.Text)]
- extractLinkURL' num (Span (ident, _, _) _)
- | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
- extractLinkURL' num (Link (ident, _, _) _ _)
- | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
- extractLinkURL' num (Image (ident, _, _) _ _)
- | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
- extractLinkURL' num (RawInline fmt raw)
- | isHtmlFormat fmt
- = foldr (\tag ->
- case tag of
- TagOpen{} ->
- case fromAttrib "id" tag of
- "" -> id
- x -> ((x, showChapter num <> "#" <> x):)
- _ -> id)
- [] (parseTags raw)
- extractLinkURL' _ _ = []
-
- -- Extract references for the reftable from Block elements
- extractLinkURL :: Int -> Block -> [(T.Text, T.Text)]
- extractLinkURL num (Div (ident, _, _) _)
- | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
- extractLinkURL num (Header _ (ident, _, _) _)
- | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
- extractLinkURL num (Table (ident,_,_) _ _ _ _ _)
- | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
- extractLinkURL num (RawBlock fmt raw)
- | isHtmlFormat fmt
- = foldr (\tag ->
- case tag of
- TagOpen{} ->
- case fromAttrib "id" tag of
- "" -> id
- x -> ((x, showChapter num <> "#" <> x):)
- _ -> id)
- [] (parseTags raw)
- extractLinkURL num b = query (extractLinkURL' num) b
-
- -- Create a reference table for the chapters with appropriate numbering
- reftable = concat $ zipWith (\(Chapter bs) num ->
- query (extractLinkURL num) bs)
- chapters' [1..]
-
- fixInternalReferences :: Inline -> Inline
- fixInternalReferences (Link attr lab (src, tit))
- | Just ('#', xs) <- T.uncons src = case lookup xs reftable of
- Just ys -> Link attr lab (ys, tit)
- Nothing -> Link attr lab (src, tit)
- fixInternalReferences x = x
-
- -- internal reference IDs change when we chunk the file,
- -- so that '#my-header-1' might turn into 'chap004.xhtml#my-header'.
- -- this fixes that:
- chapters = map (\(Chapter bs) ->
- Chapter $ walk fixInternalReferences bs)
- chapters'
-
createTocEntry :: PandocMonad m =>
- Meta
+ WriterOptions
+ -> Meta
-> EPUBMetadata
-> Text
-> Tree SecInfo
- -> ((Int -> [Inline] -> T.Text -> [Element] -> Element)
- -> Block -> StateT Int m [Element])
-> StateT EPUBState m Entry
-createTocEntry meta metadata plainTitle toctree navPointNode = do
- let navMapFormatter :: Int -> [Inline] -> T.Text -> [Element] -> Element
- navMapFormatter n tit src subs = unode "navPoint" !
- [("id", "navPoint-" <> tshow n)] $
- [ unode "navLabel" $ unode "text" $ stringify tit
- , unode "content" ! [("src", "text/" <> src)] $ ()
- ] ++ subs
+createTocEntry opts meta metadata plainTitle (Node _ secs) = do
+ let mkNavPoint :: Tree SecInfo -> State Int (Maybe Element)
+ mkNavPoint (Node secinfo subsecs)
+ | secLevel secinfo > writerTOCDepth opts = return Nothing
+ | otherwise = do
+ n <- get
+ modify (+ 1)
+ subs <- catMaybes <$> mapM mkNavPoint subsecs
+ let secnum' = case secNumber secinfo of
+ Just t -> t <> " "
+ Nothing -> ""
+ let title' = secnum' <> stringify (secTitle secinfo)
+ return $ Just $ unode "navPoint" !
+ [("id", "navPoint-" <> tshow n)] $
+ [ unode "navLabel" $ unode "text" title'
+ , unode "content" !
+ [("src", "text/" <> secPath secinfo <>
+ "#" <> secId secinfo)] $ ()
+ ] ++ subs
let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $
- [ unode "navLabel" $ unode "text" (stringify $ docTitle' meta)
+ [ unode "navLabel" $ unode "text"
+ (stringify $ docTitle' meta)
, unode "content" ! [("src", "text/title_page.xhtml")]
$ () ]
- navMap <- lift $ evalStateT
- (concat <$> mapM (navPointNode navMapFormatter) secs) 1
+ let navMap = evalState (catMaybes <$> mapM mkNavPoint secs) 1
uuid <- case epubIdentifier metadata of
(x:_) -> return $ identifierText x -- use first identifier as UUID
@@ -975,47 +876,58 @@ createTocEntry meta metadata plainTitle toctree navPointNode = do
mkEntry "toc.ncx" tocData
-createNavEntry :: PandocMonad m =>
- Meta
- -> EPUBMetadata
- -> WriterOptions
- -> Bool
- -> Context Text
- -> (Bool -> Context Text)
- -> (WriterOptions -> Pandoc -> m B8.ByteString)
- -> Text
- -> EPUBVersion
- -> Tree SecInfo
- -> ((Int -> [Inline] -> T.Text -> [Element] -> Element) -> Block -> StateT Int m [Element])
- -> StateT EPUBState m Entry
-createNavEntry meta metadata opts includeTitlePage
- vars cssvars writeHtml tocTitle version toctree navPointNode = do
- let navXhtmlFormatter :: Int -> [Inline] -> T.Text -> [Element] -> Element
- navXhtmlFormatter n tit src subs = unode "li" !
- [("id", "toc-li-" <> tshow n)] $
- (unode "a" !
- [("href", "text/" <> src)]
- $ titElements)
- : case subs of
- [] -> []
- (_:_) -> [unode "ol" ! [("class","toc")] $ subs]
- where titElements = either (const []) id $
- parseXMLContents (TL.fromStrict titRendered)
- titRendered = case P.runPure
- (writeHtmlStringForEPUB version
- opts{ writerTemplate = Nothing }
- (Pandoc nullMeta
- [Plain $ walk clean tit])) of
- Left _ -> stringify tit
- Right x -> x
- -- can't have <a> elements inside generated links...
- clean (Link _ ils _) = Span ("", [], []) ils
- clean (Note _) = Str ""
- clean x = x
+createNavEntry :: PandocMonad m
+ => WriterOptions
+ -> Meta
+ -> EPUBMetadata
+ -> Bool
+ -> Context Text
+ -> (Bool -> Context Text)
+ -> (WriterOptions -> Pandoc -> m B8.ByteString)
+ -> Text
+ -> EPUBVersion
+ -> Tree SecInfo
+ -> StateT EPUBState m Entry
+createNavEntry opts meta metadata includeTitlePage
+ vars cssvars writeHtml tocTitle version (Node _ secs) = do
+ let mkItem :: Tree SecInfo -> State Int (Maybe Element)
+ mkItem (Node secinfo subsecs)
+ | secLevel secinfo > writerTOCDepth opts = return Nothing
+ | otherwise = do
+ n <- get
+ modify (+ 1)
+ subs <- catMaybes <$> mapM mkItem subsecs
+ let secnum' = case secNumber secinfo of
+ Just num -> [Span ("", ["section-header-number"], [])
+ [Str num] , Space]
+ Nothing -> []
+ let title' = secnum' <> secTitle secinfo
+ -- can't have <a> elements inside generated links...
+ let clean (Link _ ils _) = Span ("", [], []) ils
+ clean (Note _) = Str ""
+ clean x = x
+ let titRendered = case P.runPure
+ (writeHtmlStringForEPUB version
+ opts{ writerTemplate = Nothing }
+ (Pandoc nullMeta
+ [Plain $ walk clean title'])) of
+ Left _ -> stringify title'
+ Right x -> x
+ let titElements = either (const []) id $
+ parseXMLContents (TL.fromStrict titRendered)
+
+ return $ Just $ unode "li" !
+ [("id", "toc-li-" <> tshow n)] $
+ (unode "a" !
+ [("href", "text/" <> secPath secinfo <>
+ "#" <> secId secinfo)]
+ $ titElements)
+ : case subs of
+ [] -> []
+ (_:_) -> [unode "ol" ! [("class","toc")] $ subs]
let navtag = if version == EPUB3 then "nav" else "div"
- tocBlocks <- lift $ evalStateT
- (concat <$> mapM (navPointNode navXhtmlFormatter) secs) 1
+ let tocBlocks = evalState (catMaybes <$> mapM mkItem secs) 1
let navBlocks = [RawBlock (Format "html")
$ showElement $ -- prettyprinting introduces bad spaces
unode navtag ! ([("epub:type","toc") | version == EPUB3] ++