aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2025-03-14 15:07:16 -0700
committerJohn MacFarlane <[email protected]>2025-03-14 15:07:16 -0700
commit28eca1e41e32a4f76645df6ea6de4f746a9e3e62 (patch)
tree983e132a6d335375054445e5e966fb3624bc6c6e
parent7301daa799830065e8a69a03cebd756f8317bbea (diff)
Markdown reader: remove some misguided list fanciness.
Previously we tried to handle things like commented out list items: - one <!-- - two --> - three and also things like: - one `and - two` and But the code we added to handle these cases caused problems with other, more straightforward things, like: - one - ``` code ``` - three So we are rolling back all the fanciness, so that the markdown parser now behaves more like the commonmark parser, in which indicators of block-level structure always take priority over indicators of inline structure. Closes #9865. Closes #7778. See also #5628.
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs14
-rw-r--r--test/Tests/Readers/Markdown.hs7
-rw-r--r--test/command/7778.md59
-rw-r--r--test/command/9865.md53
-rw-r--r--test/markdown-reader-more.native8
5 files changed, 117 insertions, 24 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index d73bed229..a4677ec07 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -49,7 +49,7 @@ import Text.Pandoc.Options
import Text.Pandoc.Walk (walk)
import Text.Pandoc.Parsing hiding (tableCaption)
import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag,
- isCommentTag, isInlineTag, isTextTag)
+ isInlineTag, isTextTag)
import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline)
import Text.Pandoc.Shared
import Text.Pandoc.URI (escapeURI, isURI, pBase64DataURI)
@@ -855,15 +855,7 @@ listLine continuationIndent = try $ do
notFollowedByHtmlCloser
notFollowedByDivCloser
optional (() <$ gobbleSpaces continuationIndent)
- listLineCommon
-
-listLineCommon :: PandocMonad m => MarkdownParser m Text
-listLineCommon = T.concat <$> manyTill
- ( many1Char (satisfy $ \c -> c `notElem` ['\n', '<', '`'])
- <|> fmap snd (withRaw code)
- <|> fmap (renderTags . (:[]) . fst) (htmlTag isCommentTag)
- <|> countChar 1 anyChar
- ) newline
+ anyLine
-- parse raw text for one list item, excluding start marker and continuations
rawListItem :: PandocMonad m
@@ -877,7 +869,7 @@ rawListItem fourSpaceRule start = try $ do
let continuationIndent = if fourSpaceRule
then 4
else sourceColumn pos2 - sourceColumn pos1
- first <- listLineCommon
+ first <- anyLine
rest <- many (do notFollowedBy listStart
notFollowedBy (() <$ codeBlockFenced)
notFollowedBy blankline
diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs
index aded6c663..ed0d59843 100644
--- a/test/Tests/Readers/Markdown.hs
+++ b/test/Tests/Readers/Markdown.hs
@@ -188,7 +188,6 @@ tests = [ testGroup "inline code"
, ("after literal backticks", ["`x``- x`" ], [code "x``- x" ])
]
lis = ["`text","y","x`"]
- lis' = ["text","y","x"]
bldLsts w lsts txts
= let (res, res', f) =
foldr (\((_, _, lt), lc) (acc, tacc, t) ->
@@ -210,12 +209,6 @@ tests = [ testGroup "inline code"
=?> bldLsts plain lsts lis
| lsts <- [ [i, j, k] | i <- lists, j <- lists, k <- lists]
]
- <> [ "lists with newlines and indent in backticks" =:
- T.intercalate ("\n" <> T.replicate 4 " ") (zipWith (\i (_, lt, _) -> lt <> i) lis (l:ls))
- =?> let (_, _, f) = l
- in f [plain $ code $ T.intercalate (T.replicate 5 " ") $ "text" : zipWith (\i (_, lt, _) -> lt <> i) (drop 1 lis') ls]
- | (l:ls) <- [ [i, j, k] | i <- lists, j <- lists, k <- lists]
- ]
<> [ "lists with blank lines and indent in backticks" =:
T.intercalate ("\n\n" <> T.replicate 4 " ") (zipWith (\i (_, lt, _) -> lt <> i) lis (l:ls))
<> "\n"
diff --git a/test/command/7778.md b/test/command/7778.md
new file mode 100644
index 000000000..7e89141c2
--- /dev/null
+++ b/test/command/7778.md
@@ -0,0 +1,59 @@
+Here the thing to remember is that block level structure indications
+take precedence over inline level structure indications:
+
+```
+% pandoc
+Term
+: Def
+<!--
+: comment def
+-->
+^D
+<dl>
+<dt>Term</dt>
+<dd>
+Def &lt;!–
+</dd>
+<dd>
+comment def –&gt;
+</dd>
+</dl>
+```
+
+```
+% pandoc
+Term
+: Def
+test <!--
+: comment def
+and -->
+^D
+<dl>
+<dt>Term</dt>
+<dd>
+Def test &lt;!–
+</dd>
+<dd>
+comment def and –&gt;
+</dd>
+</dl>
+```
+
+```
+% pandoc
+Term
+: Def
+`code
+: comment def
+more code`
+^D
+<dl>
+<dt>Term</dt>
+<dd>
+Def `code
+</dd>
+<dd>
+comment def more code`
+</dd>
+</dl>
+```
diff --git a/test/command/9865.md b/test/command/9865.md
new file mode 100644
index 000000000..5641629d2
--- /dev/null
+++ b/test/command/9865.md
@@ -0,0 +1,53 @@
+````
+% pandoc
+- example 1
+- ```
+ one
+ two
+ ```
+- list item three
+^D
+<ul>
+<li>example 1</li>
+<li><pre><code>one
+two</code></pre></li>
+<li>list item three</li>
+</ul>
+````
+
+````
+% pandoc
+- example 3
+- ```
+one
+two
+```
+- list item three
+^D
+<ul>
+<li>example 3</li>
+<li><pre><code>one
+two</code></pre></li>
+<li>list item three</li>
+</ul>
+````
+
+Here is a case that we used to handle differently, but
+#9865 aligns pandoc's markdown parser with commonmark in letting
+block level structure take precedence over inline level structure.
+
+````
+% pandoc
+- a <!--
+
+- b
+
+-->
+- c
+^D
+<ul>
+<li><p>a &lt;!–</p></li>
+<li><p>b</p></li>
+</ul>
+<p>–&gt; - c</p>
+````
diff --git a/test/markdown-reader-more.native b/test/markdown-reader-more.native
index 148cba944..cf5de564d 100644
--- a/test/markdown-reader-more.native
+++ b/test/markdown-reader-more.native
@@ -200,12 +200,8 @@ Pandoc
, Str "item"
]
, BulletList
- [ [ Plain
- [ Str "one"
- , SoftBreak
- , RawInline (Format "html") "<!--\n- two\n-->"
- ]
- ]
+ [ [ Plain [ Str "one" , SoftBreak , Str "<!\8211" ] ]
+ , [ Plain [ Str "two" , SoftBreak , Str "\8211>" ] ]
, [ Plain [ Str "three" ] ]
]
, Header