diff options
| author | John MacFarlane <[email protected]> | 2022-11-06 21:22:51 -0800 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2022-11-06 21:31:40 -0800 |
| commit | 452fe5aa177c36ff21189fadc96adf74f38d4c2f (patch) | |
| tree | 994658d7f9a3598a8472e3862a9a43345edac924 | |
| parent | 04973586eb36773c3a1d149019edf291b527a719 (diff) | |
EPUB writer: refactor to use T.P.Chunks.issue6122
| -rw-r--r-- | src/Text/Pandoc/Writers/EPUB.hs | 302 |
1 files changed, 107 insertions, 195 deletions
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 0dffb9f51..5b2797c4a 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -20,14 +20,14 @@ import Codec.Archive.Zip (Entry, addEntryToArchive, eRelativePath, emptyArchive, import Control.Applicative ( (<|>) ) import Control.Monad (mplus, unless, when, zipWithM) import Control.Monad.Except (catchError, throwError) -import Control.Monad.State.Strict (StateT, evalState, evalStateT, get, +import Control.Monad.State.Strict (State, StateT, evalState, evalStateT, get, gets, lift, modify) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as B8 import Data.Char (isAlphaNum, isAscii, isDigit) import Data.List (isInfixOf, isPrefixOf) import qualified Data.Map as M -import Data.Maybe (fromMaybe, isNothing, mapMaybe, isJust) +import Data.Maybe (fromMaybe, isNothing, mapMaybe, isJust, catMaybes) import qualified Data.Set as Set import qualified Data.Text as T import Data.Text (Text) @@ -529,23 +529,27 @@ pandocToEPUB version opts doc = do -- body pages -- add level 1 header to beginning if none there - let secs = makeSections True Nothing - $ addIdentifiers opts - $ case blocks of - (Div _ - (Header{}:_) : _) -> blocks - (Header 1 _ _ : _) -> blocks - _ -> Header 1 ("",["unnumbered"],[]) - (docTitle' meta) : blocks - - -- create the chapters and their reftable from the original options and the sections - let (chapters, reftable) = - createChaptersAndReftable (writerEpubChapterLevel opts) secs + let blocks' = addIdentifiers opts + $ case blocks of + (Div _ + (Header{}:_) : _) -> blocks + (Header 1 _ _ : _) -> blocks + _ -> Header 1 ("",["unnumbered"],[]) + (docTitle' meta) : blocks + + -- create the chapters + let chunkedDoc = splitIntoChunks "ch%n.xhtml" + (writerNumberSections opts) + Nothing + (writerEpubChapterLevel opts) + (Pandoc meta blocks') + -- Create the chapter entries from the chapters. -- Also requires access to the extended writer options and context -- as well as the css Context and html writer - chapterEntries <- createChapterEntries opts' vars cssvars writeHtml chapters + chapterEntries <- createChapterEntries opts' vars cssvars writeHtml + (chunkedChunks chunkedDoc) @@ -667,43 +671,14 @@ pandocToEPUB version opts doc = do contentsEntry <- mkEntry "content.opf" contentsData -- toc.ncx - let tocLevel = writerTOCDepth opts - - -- Helper function for both the toc and anv Entries - let navPointNode :: PandocMonad m - => (Int -> [Inline] -> T.Text -> [Element] -> Element) - -> Block -> StateT Int m [Element] - navPointNode formatter (Div (ident,_,_) - (Header lvl (_,_,kvs) ils : children)) = - if lvl > tocLevel - then return [] - else do - n <- get - modify (+1) - let num = fromMaybe "" $ lookup "number" kvs - let tit = if writerNumberSections opts && not (T.null num) - then Span ("", ["section-header-number"], []) - [Str num] : Space : ils - else ils - src <- case lookup ident reftable of - Just x -> return x - Nothing -> throwError $ PandocSomeError $ - ident <> " not found in reftable" - subs <- concat <$> mapM (navPointNode formatter) children - return [formatter n tit src subs] - navPointNode formatter (Div _ bs) = - concat <$> mapM (navPointNode formatter) bs - navPointNode _ _ = return [] - -- Create the tocEntry from the metadata together with the sections and title. - tocEntry <- createTocEntry meta metadata plainTitle (chunkedTOC chunkedDoc) - navPointNode + tocEntry <- createTocEntry opts' meta metadata plainTitle + (chunkedTOC chunkedDoc) -- Create the navEntry using the metadata, all of the various writer options, -- the CSS and HTML helpers, the document and toc title as well as the epub version and all of the sections - navEntry <- createNavEntry meta metadata opts' True vars cssvars + navEntry <- createNavEntry opts' meta metadata True vars cssvars writeHtml tocTitle version (chunkedTOC chunkedDoc) - navPointNode -- mimetype mimetypeEntry <- mkEntry "mimetype" $ @@ -795,13 +770,13 @@ createChapterEntries :: PandocMonad m => -> Context Text -> (Bool -> Context Text) -> (WriterOptions -> Pandoc -> StateT EPUBState m B8.ByteString) - -> [Chapter] + -> [Chunk] -> StateT EPUBState m [Entry] createChapterEntries opts' vars cssvars writeHtml chapters = do -- Create an entry from the chapter with the provided number. -- chapToEntry :: Int -> Chapter -> StateT EPUBState m Entry - let chapToEntry num (Chapter bs) = - mkEntry ("text/" ++ T.unpack (showChapter num)) =<< + let chapToEntry num chunk = + mkEntry ("text/" ++ chunkPath chunk) =<< -- Combine all provided options writeHtml opts'{ writerVariables = Context (M.fromList @@ -809,12 +784,15 @@ createChapterEntries opts' vars cssvars writeHtml chapters = do ("pagetitle", toVal' $ showChapter num)]) <> cssvars True <> vars } pdoc - where (pdoc, bodyType) = + where bs = chunkContents chunk + meta' = setMeta "title" (fromList + (walk removeNote + (chunkHeading chunk))) nullMeta + (pdoc, bodyType) = case bs of - (Div (_,"section":_,kvs) (Header _ _ xs : _) : _) -> + (Div (_,"section":_,kvs) _ : _) -> -- remove notes or we get doubled footnotes - (Pandoc (setMeta "title" - (walk removeNote $ fromList xs) nullMeta) bs, + (Pandoc meta' bs, -- Check if the chapters belongs to the frontmatter, -- backmatter of bodymatter defaulting to the body case lookup "epub:type" kvs of @@ -823,7 +801,7 @@ createChapterEntries opts' vars cssvars writeHtml chapters = do | x `elem` frontMatterTypes -> "frontmatter" | x `elem` backMatterTypes -> "backmatter" | otherwise -> "bodymatter") - _ -> (Pandoc nullMeta bs, "bodymatter") + _ -> (Pandoc meta' bs, "bodymatter") frontMatterTypes = ["prologue", "abstract", "acknowledgments", "copyright-page", "dedication", "credits", "keywords", "imprint", @@ -837,117 +815,40 @@ createChapterEntries opts' vars cssvars writeHtml chapters = do zipWithM chapToEntry [1..] chapters --- | Splits the blocks into chapters and creates a corresponding reftable -createChaptersAndReftable :: Int -> [Block] -> ([Chapter], [(Text, Text)]) -createChaptersAndReftable chapterHeaderLevel secs = (chapters, reftable) - where - isChapterHeader :: Block -> Bool - isChapterHeader (Div _ (Header n _ _:_)) = n <= chapterHeaderLevel - isChapterHeader _ = False - - secsToChapters :: [Block] -> [Chapter] - secsToChapters [] = [] - secsToChapters (d@(Div attr (h@(Header lvl _ _) : bs)) : rest) - -- If the header is of the same level as chapters, create a chapter - | chapterHeaderLevel == lvl = - Chapter [d] : secsToChapters rest - -- If the header is a level higher than chapters, - -- create a chapter of everything until the next chapter header. - | chapterHeaderLevel > lvl = - Chapter [Div attr (h:xs)] : - secsToChapters ys ++ secsToChapters rest - where (xs, ys) = break isChapterHeader bs - secsToChapters bs = - -- If this is the last block, keep it as is, - -- otherwise create a chapter for everything until the next chapter header. - (if null xs then id else (Chapter xs :)) $ secsToChapters ys - where (xs, ys) = break isChapterHeader bs - - -- Convert the sections to initial chapters - chapters' = secsToChapters secs - - -- Extract references for the reftable from Inline elements - extractLinkURL' :: Int -> Inline -> [(T.Text, T.Text)] - extractLinkURL' num (Span (ident, _, _) _) - | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] - extractLinkURL' num (Link (ident, _, _) _ _) - | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] - extractLinkURL' num (Image (ident, _, _) _ _) - | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] - extractLinkURL' num (RawInline fmt raw) - | isHtmlFormat fmt - = foldr (\tag -> - case tag of - TagOpen{} -> - case fromAttrib "id" tag of - "" -> id - x -> ((x, showChapter num <> "#" <> x):) - _ -> id) - [] (parseTags raw) - extractLinkURL' _ _ = [] - - -- Extract references for the reftable from Block elements - extractLinkURL :: Int -> Block -> [(T.Text, T.Text)] - extractLinkURL num (Div (ident, _, _) _) - | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] - extractLinkURL num (Header _ (ident, _, _) _) - | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] - extractLinkURL num (Table (ident,_,_) _ _ _ _ _) - | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] - extractLinkURL num (RawBlock fmt raw) - | isHtmlFormat fmt - = foldr (\tag -> - case tag of - TagOpen{} -> - case fromAttrib "id" tag of - "" -> id - x -> ((x, showChapter num <> "#" <> x):) - _ -> id) - [] (parseTags raw) - extractLinkURL num b = query (extractLinkURL' num) b - - -- Create a reference table for the chapters with appropriate numbering - reftable = concat $ zipWith (\(Chapter bs) num -> - query (extractLinkURL num) bs) - chapters' [1..] - - fixInternalReferences :: Inline -> Inline - fixInternalReferences (Link attr lab (src, tit)) - | Just ('#', xs) <- T.uncons src = case lookup xs reftable of - Just ys -> Link attr lab (ys, tit) - Nothing -> Link attr lab (src, tit) - fixInternalReferences x = x - - -- internal reference IDs change when we chunk the file, - -- so that '#my-header-1' might turn into 'chap004.xhtml#my-header'. - -- this fixes that: - chapters = map (\(Chapter bs) -> - Chapter $ walk fixInternalReferences bs) - chapters' - createTocEntry :: PandocMonad m => - Meta + WriterOptions + -> Meta -> EPUBMetadata -> Text -> Tree SecInfo - -> ((Int -> [Inline] -> T.Text -> [Element] -> Element) - -> Block -> StateT Int m [Element]) -> StateT EPUBState m Entry -createTocEntry meta metadata plainTitle toctree navPointNode = do - let navMapFormatter :: Int -> [Inline] -> T.Text -> [Element] -> Element - navMapFormatter n tit src subs = unode "navPoint" ! - [("id", "navPoint-" <> tshow n)] $ - [ unode "navLabel" $ unode "text" $ stringify tit - , unode "content" ! [("src", "text/" <> src)] $ () - ] ++ subs +createTocEntry opts meta metadata plainTitle (Node _ secs) = do + let mkNavPoint :: Tree SecInfo -> State Int (Maybe Element) + mkNavPoint (Node secinfo subsecs) + | secLevel secinfo > writerTOCDepth opts = return Nothing + | otherwise = do + n <- get + modify (+ 1) + subs <- catMaybes <$> mapM mkNavPoint subsecs + let secnum' = case secNumber secinfo of + Just t -> t <> " " + Nothing -> "" + let title' = secnum' <> stringify (secTitle secinfo) + return $ Just $ unode "navPoint" ! + [("id", "navPoint-" <> tshow n)] $ + [ unode "navLabel" $ unode "text" title' + , unode "content" ! + [("src", "text/" <> secPath secinfo <> + "#" <> secId secinfo)] $ () + ] ++ subs let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $ - [ unode "navLabel" $ unode "text" (stringify $ docTitle' meta) + [ unode "navLabel" $ unode "text" + (stringify $ docTitle' meta) , unode "content" ! [("src", "text/title_page.xhtml")] $ () ] - navMap <- lift $ evalStateT - (concat <$> mapM (navPointNode navMapFormatter) secs) 1 + let navMap = evalState (catMaybes <$> mapM mkNavPoint secs) 1 uuid <- case epubIdentifier metadata of (x:_) -> return $ identifierText x -- use first identifier as UUID @@ -975,47 +876,58 @@ createTocEntry meta metadata plainTitle toctree navPointNode = do mkEntry "toc.ncx" tocData -createNavEntry :: PandocMonad m => - Meta - -> EPUBMetadata - -> WriterOptions - -> Bool - -> Context Text - -> (Bool -> Context Text) - -> (WriterOptions -> Pandoc -> m B8.ByteString) - -> Text - -> EPUBVersion - -> Tree SecInfo - -> ((Int -> [Inline] -> T.Text -> [Element] -> Element) -> Block -> StateT Int m [Element]) - -> StateT EPUBState m Entry -createNavEntry meta metadata opts includeTitlePage - vars cssvars writeHtml tocTitle version toctree navPointNode = do - let navXhtmlFormatter :: Int -> [Inline] -> T.Text -> [Element] -> Element - navXhtmlFormatter n tit src subs = unode "li" ! - [("id", "toc-li-" <> tshow n)] $ - (unode "a" ! - [("href", "text/" <> src)] - $ titElements) - : case subs of - [] -> [] - (_:_) -> [unode "ol" ! [("class","toc")] $ subs] - where titElements = either (const []) id $ - parseXMLContents (TL.fromStrict titRendered) - titRendered = case P.runPure - (writeHtmlStringForEPUB version - opts{ writerTemplate = Nothing } - (Pandoc nullMeta - [Plain $ walk clean tit])) of - Left _ -> stringify tit - Right x -> x - -- can't have <a> elements inside generated links... - clean (Link _ ils _) = Span ("", [], []) ils - clean (Note _) = Str "" - clean x = x +createNavEntry :: PandocMonad m + => WriterOptions + -> Meta + -> EPUBMetadata + -> Bool + -> Context Text + -> (Bool -> Context Text) + -> (WriterOptions -> Pandoc -> m B8.ByteString) + -> Text + -> EPUBVersion + -> Tree SecInfo + -> StateT EPUBState m Entry +createNavEntry opts meta metadata includeTitlePage + vars cssvars writeHtml tocTitle version (Node _ secs) = do + let mkItem :: Tree SecInfo -> State Int (Maybe Element) + mkItem (Node secinfo subsecs) + | secLevel secinfo > writerTOCDepth opts = return Nothing + | otherwise = do + n <- get + modify (+ 1) + subs <- catMaybes <$> mapM mkItem subsecs + let secnum' = case secNumber secinfo of + Just num -> [Span ("", ["section-header-number"], []) + [Str num] , Space] + Nothing -> [] + let title' = secnum' <> secTitle secinfo + -- can't have <a> elements inside generated links... + let clean (Link _ ils _) = Span ("", [], []) ils + clean (Note _) = Str "" + clean x = x + let titRendered = case P.runPure + (writeHtmlStringForEPUB version + opts{ writerTemplate = Nothing } + (Pandoc nullMeta + [Plain $ walk clean title'])) of + Left _ -> stringify title' + Right x -> x + let titElements = either (const []) id $ + parseXMLContents (TL.fromStrict titRendered) + + return $ Just $ unode "li" ! + [("id", "toc-li-" <> tshow n)] $ + (unode "a" ! + [("href", "text/" <> secPath secinfo <> + "#" <> secId secinfo)] + $ titElements) + : case subs of + [] -> [] + (_:_) -> [unode "ol" ! [("class","toc")] $ subs] let navtag = if version == EPUB3 then "nav" else "div" - tocBlocks <- lift $ evalStateT - (concat <$> mapM (navPointNode navXhtmlFormatter) secs) 1 + let tocBlocks = evalState (catMaybes <$> mapM mkItem secs) 1 let navBlocks = [RawBlock (Format "html") $ showElement $ -- prettyprinting introduces bad spaces unode navtag ! ([("epub:type","toc") | version == EPUB3] ++ |
