diff options
| -rw-r--r-- | src/Text/Pandoc/Readers/DocBook.hs | 50 | ||||
| -rw-r--r-- | test/docbook-reader.docbook | 42 | ||||
| -rw-r--r-- | test/docbook-reader.native | 111 |
3 files changed, 173 insertions, 30 deletions
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 66128b12e..c5de8b74f 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -46,7 +46,7 @@ import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad, report) import Text.Pandoc.Options import Text.Pandoc.Logging (LogMessage(..)) -import Text.Pandoc.Shared (safeRead, extractSpaces) +import Text.Pandoc.Shared (safeRead, extractSpaces, addPandocAttributes) import Text.Pandoc.Sources (ToSources(..), sourcesToText) import Text.Pandoc.Transforms (headerShift) import Text.TeXMath (readMathML, writeTeX) @@ -731,9 +731,7 @@ blockTags = Set.fromList $ , "articleinfo" , "attribution" , "authorinitials" - , "bibliodiv" , "biblioentry" - , "bibliography" , "bibliomisc" , "bibliomixed" , "blockquote" @@ -772,31 +770,19 @@ blockTags = Set.fromList $ , "preface" , "procedure" , "programlisting" - , "qandadiv" , "question" - , "refsect1" , "refsect1info" - , "refsect2" , "refsect2info" - , "refsect3" , "refsect3info" - , "refsection" , "refsectioninfo" , "screen" - , "sect1" , "sect1info" - , "sect2" , "sect2info" - , "sect3" , "sect3info" - , "sect4" , "sect4info" - , "sect5" , "sect5info" - , "section" , "sectioninfo" , "simpara" - , "simplesect" , "substeps" , "subtitle" , "table" @@ -804,7 +790,13 @@ blockTags = Set.fromList $ , "titleabbrev" , "toc" , "variablelist" - ] ++ admonitionTags + ] ++ sectionTags ++ admonitionTags + +sectionTags :: [Text] +sectionTags = ["bibliography", "bibliodiv" + , "sect1", "sect2", "sect3", "sect4", "sect5", "section", "simplesect" + , "refsect1", "refsect2", "refsect3", "refsection", "qandadiv" + ] admonitionTags :: [Text] admonitionTags = ["caution","danger","important","note","tip","warning"] @@ -866,14 +858,19 @@ getBlocks e = do modify (\st -> st{ dbElementStack = drop 1 $ dbElementStack st }) return blocks +getRoleAttr :: Element -> [(Text, Text)] -- extract role attribute and add it to the attribute list +getRoleAttr e = case attrValue "role" e of + "" -> [] + r -> [("role", r)] + parseBlock :: PandocMonad m => Content -> DB m Blocks parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE parseBlock (Text (CData _ s _)) = if T.all isSpace s then return mempty else return $ plain $ trimInlines $ text s parseBlock (CRef x) = return $ plain $ str $ T.toUpper x -parseBlock (Elem e) = - case qName (elName e) of +parseBlock (Elem e) = do + parsedBlock <- case qName (elName e) of "toc" -> skip -- skip TOC, since in pandoc it's autogenerated "index" -> skip -- skip index, since page numbers meaningless "para" -> parseMixed para (elContent e) @@ -985,6 +982,9 @@ parseBlock (Elem e) = "title" -> return mempty -- handled in parent element "subtitle" -> return mempty -- handled in parent element _ -> skip >> getBlocks e + if qName (elName e) `elem` sectionTags + then return parsedBlock + else return $ addPandocAttributes (getRoleAttr e) parsedBlock where skip = do let qn = qName $ elName e let name = if "pi-" `T.isPrefixOf` qn @@ -1120,7 +1120,10 @@ parseBlock (Elem e) = modify $ \st -> st{ dbSectionLevel = n } b <- getBlocks e modify $ \st -> st{ dbSectionLevel = n - 1 } - return $ headerWith (elId, classes, maybeToList titleabbrevElAsAttr++attrs) n' headerText <> b + let hdr = addPandocAttributes (getRoleAttr e) + $ headerWith (elId, classes, maybeToList titleabbrevElAsAttr ++ attrs) + n' headerText + return $ hdr <> b titleabbrevElAsAttr = case filterChild (named "titleabbrev") e `mplus` (filterChild (named "info") e >>= @@ -1143,7 +1146,7 @@ parseBlock (Elem e) = b <- p case mbt of Nothing -> return b - Just t -> return $ divWith (attrValue "id" e,[],[]) + Just t -> return $ divWith (attrValue "id" e, [], getRoleAttr e) (divWith ("", ["title"], []) (plain t) <> b) -- Admonitions are parsed into a div. Following other Docbook tools that output HTML, @@ -1243,8 +1246,8 @@ parseInline (Text (CData _ s _)) = do else return $ text s parseInline (CRef ref) = return $ text $ fromMaybe (T.toUpper ref) $ lookupEntity ref -parseInline (Elem e) = - case qName (elName e) of +parseInline (Elem e) = do + parsedInline <- case qName (elName e) of "anchor" -> do return $ spanWith (attrValue "id" e, [], []) mempty "phrase" -> do @@ -1366,6 +1369,9 @@ parseInline (Elem e) = -- <?asciidor-br?> to in handleInstructions, above. "pi-asciidoc-br" -> return linebreak _ -> skip >> innerInlines id + return $ case qName (elName e) of + "emphasis" -> parsedInline + _ -> addPandocAttributes (getRoleAttr e) parsedInline where skip = do let qn = qName $ elName e let name = if "pi-" `T.isPrefixOf` qn diff --git a/test/docbook-reader.docbook b/test/docbook-reader.docbook index 6b7e7e1bc..076908583 100644 --- a/test/docbook-reader.docbook +++ b/test/docbook-reader.docbook @@ -27,9 +27,9 @@ This is a set of tests for pandoc. Most of them are adapted from John Gruber’s markdown test suite. </para> -<sect1 id="headers"> +<sect1 id="headers" role="sect1role"> <title>Headers</title> - <sect2 id="level-2-with-an-embedded-link"> + <sect2 id="level-2-with-an-embedded-link" role="sect2role"> <title>Level 2 with an <ulink url="/url">embedded link</ulink></title> <sect3 id="level-3-with-emphasis"> <title>Level 3 with <emphasis>emphasis</emphasis></title> @@ -74,6 +74,9 @@ <para> Here’s a regular paragraph. </para> + <para role="pararole"> + And here’s a regular paragraph with a role. + </para> <para> In Markdown 1.0.0 and earlier. Version 8. This line turns into a list item. Because a hard-wrapped line in the middle of a paragraph looked like @@ -93,6 +96,11 @@ This is a block quote. It is pretty short. </para> </blockquote> + <blockquote role="roleblockquote"> + <para> + This is a block quote with a role. + </para> + </blockquote> <blockquote> <para> Code in a block quote: @@ -234,6 +242,26 @@ These should not be escaped: \$ \\ \> \[ \{ </listitem> </orderedlist> <para> + with role: + </para> + <orderedlist role="listrole" numeration="arabic"> + <listitem> + <para> + First + </para> + </listitem> + <listitem> + <para> + Second + </para> + </listitem> + <listitem> + <para> + Third + </para> + </listitem> + </orderedlist> + <para> and tight: </para> <orderedlist spacing="compact" numeration="arabic"> @@ -703,6 +731,12 @@ These should not be escaped: \$ \\ \> \[ \{ So is <emphasis role="strong"><emphasis>this</emphasis></emphasis> word. </para> <para> + So is <emphasis role="emphasisrole"><emphasis>this</emphasis></emphasis> word with a role. + </para> + <para> + So is <phrase role="phraserole"><phrase>this</phrase></phrase> phrase with a role. + </para> + <para> This is code: <literal>></literal>, <literal>$</literal>, <literal>\</literal>, <literal>\$</literal>, <literal><html></literal>. @@ -1408,7 +1442,7 @@ or here: <http://example.com/> <para> Table with attributes </para> - <table xml:id="mytableid1" class="mytableclass1 mytableclass2" tabstyle="mytabstyle1"> + <table xml:id="mytableid1" class="mytableclass1 mytableclass2" tabstyle="mytabstyle1" role="tablerole1"> <title> Attribute table caption </title> @@ -1444,7 +1478,7 @@ or here: <http://example.com/> <para> Table with attributes, without caption </para> - <informaltable xml:id="mytableid2" class="mytableclass3 mytableclass4" tabstyle="mytabstyle2"> + <informaltable xml:id="mytableid2" class="mytableclass3 mytableclass4" tabstyle="mytabstyle2" role="tablerole2"> <tgroup> <thead> <th> diff --git a/test/docbook-reader.native b/test/docbook-reader.native index 6d0f72811..94bca827a 100644 --- a/test/docbook-reader.native +++ b/test/docbook-reader.native @@ -62,10 +62,16 @@ Pandoc , Space , Str "suite." ] - , Header 1 ( "headers" , [] , [] ) [ Str "Headers" ] + , Header + 1 + ( "headers" , [] , [ ( "role" , "sect1role" ) ] ) + [ Str "Headers" ] , Header 2 - ( "level-2-with-an-embedded-link" , [] , [] ) + ( "level-2-with-an-embedded-link" + , [] + , [ ( "role" , "sect2role" ) ] + ) [ Str "Level" , Space , Str "2" @@ -151,6 +157,29 @@ Pandoc , Space , Str "paragraph." ] + , Div + ( "" + , [] + , [ ( "wrapper" , "1" ) , ( "role" , "pararole" ) ] + ) + [ Para + [ Str "And" + , Space + , Str "here\8217s" + , Space + , Str "a" + , Space + , Str "regular" + , Space + , Str "paragraph" + , Space + , Str "with" + , Space + , Str "a" + , Space + , Str "role." + ] + ] , Para [ Str "In" , Space @@ -251,6 +280,31 @@ Pandoc , Str "short." ] ] + , Div + ( "" + , [] + , [ ( "wrapper" , "1" ) , ( "role" , "roleblockquote" ) ] + ) + [ BlockQuote + [ Para + [ Str "This" + , Space + , Str "is" + , Space + , Str "a" + , Space + , Str "block" + , Space + , Str "quote" + , Space + , Str "with" + , Space + , Str "a" + , Space + , Str "role." + ] + ] + ] , BlockQuote [ Para [ Str "Code" @@ -348,6 +402,19 @@ Pandoc , [ Para [ Str "Second" ] ] , [ Para [ Str "Third" ] ] ] + , Para [ Str "with" , Space , Str "role:" ] + , Div + ( "" + , [] + , [ ( "wrapper" , "1" ) , ( "role" , "listrole" ) ] + ) + [ OrderedList + ( 1 , Decimal , DefaultDelim ) + [ [ Para [ Str "First" ] ] + , [ Para [ Str "Second" ] ] + , [ Para [ Str "Third" ] ] + ] + ] , Para [ Str "and" , Space , Str "tight:" ] , OrderedList ( 1 , Decimal , DefaultDelim ) @@ -932,6 +999,38 @@ Pandoc , Str "word." ] , Para + [ Str "So" + , Space + , Str "is" + , Space + , Emph [ Emph [ Str "this" ] ] + , Space + , Str "word" + , Space + , Str "with" + , Space + , Str "a" + , Space + , Str "role." + ] + , Para + [ Str "So" + , Space + , Str "is" + , Space + , Span + ( "" , [ "phraserole" ] , [ ( "role" , "phraserole" ) ] ) + [ Str "this" ] + , Space + , Str "phrase" + , Space + , Str "with" + , Space + , Str "a" + , Space + , Str "role." + ] + , Para [ Str "This" , Space , Str "is" @@ -2570,7 +2669,9 @@ Pandoc , Table ( "mytableid1" , [ "mytableclass1" , "mytableclass2" ] - , [ ( "custom-style" , "mytabstyle1" ) ] + , [ ( "role" , "tablerole1" ) + , ( "custom-style" , "mytabstyle1" ) + ] ) (Caption Nothing @@ -2636,7 +2737,9 @@ Pandoc , Table ( "mytableid2" , [ "mytableclass3" , "mytableclass4" ] - , [ ( "custom-style" , "mytabstyle2" ) ] + , [ ( "role" , "tablerole2" ) + , ( "custom-style" , "mytabstyle2" ) + ] ) (Caption Nothing []) [ ( AlignDefault , ColWidthDefault ) |
