aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs50
-rw-r--r--test/docbook-reader.docbook42
-rw-r--r--test/docbook-reader.native111
3 files changed, 173 insertions, 30 deletions
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 66128b12e..c5de8b74f 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -46,7 +46,7 @@ import Text.Pandoc.Builder
import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
import Text.Pandoc.Options
import Text.Pandoc.Logging (LogMessage(..))
-import Text.Pandoc.Shared (safeRead, extractSpaces)
+import Text.Pandoc.Shared (safeRead, extractSpaces, addPandocAttributes)
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
import Text.Pandoc.Transforms (headerShift)
import Text.TeXMath (readMathML, writeTeX)
@@ -731,9 +731,7 @@ blockTags = Set.fromList $
, "articleinfo"
, "attribution"
, "authorinitials"
- , "bibliodiv"
, "biblioentry"
- , "bibliography"
, "bibliomisc"
, "bibliomixed"
, "blockquote"
@@ -772,31 +770,19 @@ blockTags = Set.fromList $
, "preface"
, "procedure"
, "programlisting"
- , "qandadiv"
, "question"
- , "refsect1"
, "refsect1info"
- , "refsect2"
, "refsect2info"
- , "refsect3"
, "refsect3info"
- , "refsection"
, "refsectioninfo"
, "screen"
- , "sect1"
, "sect1info"
- , "sect2"
, "sect2info"
- , "sect3"
, "sect3info"
- , "sect4"
, "sect4info"
- , "sect5"
, "sect5info"
- , "section"
, "sectioninfo"
, "simpara"
- , "simplesect"
, "substeps"
, "subtitle"
, "table"
@@ -804,7 +790,13 @@ blockTags = Set.fromList $
, "titleabbrev"
, "toc"
, "variablelist"
- ] ++ admonitionTags
+ ] ++ sectionTags ++ admonitionTags
+
+sectionTags :: [Text]
+sectionTags = ["bibliography", "bibliodiv"
+ , "sect1", "sect2", "sect3", "sect4", "sect5", "section", "simplesect"
+ , "refsect1", "refsect2", "refsect3", "refsection", "qandadiv"
+ ]
admonitionTags :: [Text]
admonitionTags = ["caution","danger","important","note","tip","warning"]
@@ -866,14 +858,19 @@ getBlocks e = do
modify (\st -> st{ dbElementStack = drop 1 $ dbElementStack st })
return blocks
+getRoleAttr :: Element -> [(Text, Text)] -- extract role attribute and add it to the attribute list
+getRoleAttr e = case attrValue "role" e of
+ "" -> []
+ r -> [("role", r)]
+
parseBlock :: PandocMonad m => Content -> DB m Blocks
parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
parseBlock (Text (CData _ s _)) = if T.all isSpace s
then return mempty
else return $ plain $ trimInlines $ text s
parseBlock (CRef x) = return $ plain $ str $ T.toUpper x
-parseBlock (Elem e) =
- case qName (elName e) of
+parseBlock (Elem e) = do
+ parsedBlock <- case qName (elName e) of
"toc" -> skip -- skip TOC, since in pandoc it's autogenerated
"index" -> skip -- skip index, since page numbers meaningless
"para" -> parseMixed para (elContent e)
@@ -985,6 +982,9 @@ parseBlock (Elem e) =
"title" -> return mempty -- handled in parent element
"subtitle" -> return mempty -- handled in parent element
_ -> skip >> getBlocks e
+ if qName (elName e) `elem` sectionTags
+ then return parsedBlock
+ else return $ addPandocAttributes (getRoleAttr e) parsedBlock
where skip = do
let qn = qName $ elName e
let name = if "pi-" `T.isPrefixOf` qn
@@ -1120,7 +1120,10 @@ parseBlock (Elem e) =
modify $ \st -> st{ dbSectionLevel = n }
b <- getBlocks e
modify $ \st -> st{ dbSectionLevel = n - 1 }
- return $ headerWith (elId, classes, maybeToList titleabbrevElAsAttr++attrs) n' headerText <> b
+ let hdr = addPandocAttributes (getRoleAttr e)
+ $ headerWith (elId, classes, maybeToList titleabbrevElAsAttr ++ attrs)
+ n' headerText
+ return $ hdr <> b
titleabbrevElAsAttr =
case filterChild (named "titleabbrev") e `mplus`
(filterChild (named "info") e >>=
@@ -1143,7 +1146,7 @@ parseBlock (Elem e) =
b <- p
case mbt of
Nothing -> return b
- Just t -> return $ divWith (attrValue "id" e,[],[])
+ Just t -> return $ divWith (attrValue "id" e, [], getRoleAttr e)
(divWith ("", ["title"], []) (plain t) <> b)
-- Admonitions are parsed into a div. Following other Docbook tools that output HTML,
@@ -1243,8 +1246,8 @@ parseInline (Text (CData _ s _)) = do
else return $ text s
parseInline (CRef ref) =
return $ text $ fromMaybe (T.toUpper ref) $ lookupEntity ref
-parseInline (Elem e) =
- case qName (elName e) of
+parseInline (Elem e) = do
+ parsedInline <- case qName (elName e) of
"anchor" -> do
return $ spanWith (attrValue "id" e, [], []) mempty
"phrase" -> do
@@ -1366,6 +1369,9 @@ parseInline (Elem e) =
-- <?asciidor-br?> to in handleInstructions, above.
"pi-asciidoc-br" -> return linebreak
_ -> skip >> innerInlines id
+ return $ case qName (elName e) of
+ "emphasis" -> parsedInline
+ _ -> addPandocAttributes (getRoleAttr e) parsedInline
where skip = do
let qn = qName $ elName e
let name = if "pi-" `T.isPrefixOf` qn
diff --git a/test/docbook-reader.docbook b/test/docbook-reader.docbook
index 6b7e7e1bc..076908583 100644
--- a/test/docbook-reader.docbook
+++ b/test/docbook-reader.docbook
@@ -27,9 +27,9 @@
This is a set of tests for pandoc. Most of them are adapted from John
Gruber’s markdown test suite.
</para>
-<sect1 id="headers">
+<sect1 id="headers" role="sect1role">
<title>Headers</title>
- <sect2 id="level-2-with-an-embedded-link">
+ <sect2 id="level-2-with-an-embedded-link" role="sect2role">
<title>Level 2 with an <ulink url="/url">embedded link</ulink></title>
<sect3 id="level-3-with-emphasis">
<title>Level 3 with <emphasis>emphasis</emphasis></title>
@@ -74,6 +74,9 @@
<para>
Here’s a regular paragraph.
</para>
+ <para role="pararole">
+ And here’s a regular paragraph with a role.
+ </para>
<para>
In Markdown 1.0.0 and earlier. Version 8. This line turns into a list
item. Because a hard-wrapped line in the middle of a paragraph looked like
@@ -93,6 +96,11 @@
This is a block quote. It is pretty short.
</para>
</blockquote>
+ <blockquote role="roleblockquote">
+ <para>
+ This is a block quote with a role.
+ </para>
+ </blockquote>
<blockquote>
<para>
Code in a block quote:
@@ -234,6 +242,26 @@ These should not be escaped: \$ \\ \&gt; \[ \{
</listitem>
</orderedlist>
<para>
+ with role:
+ </para>
+ <orderedlist role="listrole" numeration="arabic">
+ <listitem>
+ <para>
+ First
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Second
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Third
+ </para>
+ </listitem>
+ </orderedlist>
+ <para>
and tight:
</para>
<orderedlist spacing="compact" numeration="arabic">
@@ -703,6 +731,12 @@ These should not be escaped: \$ \\ \&gt; \[ \{
So is <emphasis role="strong"><emphasis>this</emphasis></emphasis> word.
</para>
<para>
+ So is <emphasis role="emphasisrole"><emphasis>this</emphasis></emphasis> word with a role.
+ </para>
+ <para>
+ So is <phrase role="phraserole"><phrase>this</phrase></phrase> phrase with a role.
+ </para>
+ <para>
This is code: <literal>&gt;</literal>, <literal>$</literal>,
<literal>\</literal>, <literal>\$</literal>,
<literal>&lt;html&gt;</literal>.
@@ -1408,7 +1442,7 @@ or here: &lt;http://example.com/&gt;
<para>
Table with attributes
</para>
- <table xml:id="mytableid1" class="mytableclass1 mytableclass2" tabstyle="mytabstyle1">
+ <table xml:id="mytableid1" class="mytableclass1 mytableclass2" tabstyle="mytabstyle1" role="tablerole1">
<title>
Attribute table caption
</title>
@@ -1444,7 +1478,7 @@ or here: &lt;http://example.com/&gt;
<para>
Table with attributes, without caption
</para>
- <informaltable xml:id="mytableid2" class="mytableclass3 mytableclass4" tabstyle="mytabstyle2">
+ <informaltable xml:id="mytableid2" class="mytableclass3 mytableclass4" tabstyle="mytabstyle2" role="tablerole2">
<tgroup>
<thead>
<th>
diff --git a/test/docbook-reader.native b/test/docbook-reader.native
index 6d0f72811..94bca827a 100644
--- a/test/docbook-reader.native
+++ b/test/docbook-reader.native
@@ -62,10 +62,16 @@ Pandoc
, Space
, Str "suite."
]
- , Header 1 ( "headers" , [] , [] ) [ Str "Headers" ]
+ , Header
+ 1
+ ( "headers" , [] , [ ( "role" , "sect1role" ) ] )
+ [ Str "Headers" ]
, Header
2
- ( "level-2-with-an-embedded-link" , [] , [] )
+ ( "level-2-with-an-embedded-link"
+ , []
+ , [ ( "role" , "sect2role" ) ]
+ )
[ Str "Level"
, Space
, Str "2"
@@ -151,6 +157,29 @@ Pandoc
, Space
, Str "paragraph."
]
+ , Div
+ ( ""
+ , []
+ , [ ( "wrapper" , "1" ) , ( "role" , "pararole" ) ]
+ )
+ [ Para
+ [ Str "And"
+ , Space
+ , Str "here\8217s"
+ , Space
+ , Str "a"
+ , Space
+ , Str "regular"
+ , Space
+ , Str "paragraph"
+ , Space
+ , Str "with"
+ , Space
+ , Str "a"
+ , Space
+ , Str "role."
+ ]
+ ]
, Para
[ Str "In"
, Space
@@ -251,6 +280,31 @@ Pandoc
, Str "short."
]
]
+ , Div
+ ( ""
+ , []
+ , [ ( "wrapper" , "1" ) , ( "role" , "roleblockquote" ) ]
+ )
+ [ BlockQuote
+ [ Para
+ [ Str "This"
+ , Space
+ , Str "is"
+ , Space
+ , Str "a"
+ , Space
+ , Str "block"
+ , Space
+ , Str "quote"
+ , Space
+ , Str "with"
+ , Space
+ , Str "a"
+ , Space
+ , Str "role."
+ ]
+ ]
+ ]
, BlockQuote
[ Para
[ Str "Code"
@@ -348,6 +402,19 @@ Pandoc
, [ Para [ Str "Second" ] ]
, [ Para [ Str "Third" ] ]
]
+ , Para [ Str "with" , Space , Str "role:" ]
+ , Div
+ ( ""
+ , []
+ , [ ( "wrapper" , "1" ) , ( "role" , "listrole" ) ]
+ )
+ [ OrderedList
+ ( 1 , Decimal , DefaultDelim )
+ [ [ Para [ Str "First" ] ]
+ , [ Para [ Str "Second" ] ]
+ , [ Para [ Str "Third" ] ]
+ ]
+ ]
, Para [ Str "and" , Space , Str "tight:" ]
, OrderedList
( 1 , Decimal , DefaultDelim )
@@ -932,6 +999,38 @@ Pandoc
, Str "word."
]
, Para
+ [ Str "So"
+ , Space
+ , Str "is"
+ , Space
+ , Emph [ Emph [ Str "this" ] ]
+ , Space
+ , Str "word"
+ , Space
+ , Str "with"
+ , Space
+ , Str "a"
+ , Space
+ , Str "role."
+ ]
+ , Para
+ [ Str "So"
+ , Space
+ , Str "is"
+ , Space
+ , Span
+ ( "" , [ "phraserole" ] , [ ( "role" , "phraserole" ) ] )
+ [ Str "this" ]
+ , Space
+ , Str "phrase"
+ , Space
+ , Str "with"
+ , Space
+ , Str "a"
+ , Space
+ , Str "role."
+ ]
+ , Para
[ Str "This"
, Space
, Str "is"
@@ -2570,7 +2669,9 @@ Pandoc
, Table
( "mytableid1"
, [ "mytableclass1" , "mytableclass2" ]
- , [ ( "custom-style" , "mytabstyle1" ) ]
+ , [ ( "role" , "tablerole1" )
+ , ( "custom-style" , "mytabstyle1" )
+ ]
)
(Caption
Nothing
@@ -2636,7 +2737,9 @@ Pandoc
, Table
( "mytableid2"
, [ "mytableclass3" , "mytableclass4" ]
- , [ ( "custom-style" , "mytabstyle2" ) ]
+ , [ ( "role" , "tablerole2" )
+ , ( "custom-style" , "mytabstyle2" )
+ ]
)
(Caption Nothing [])
[ ( AlignDefault , ColWidthDefault )