aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2024-04-10 17:18:05 -0700
committerJohn MacFarlane <[email protected]>2024-04-10 17:18:05 -0700
commita0c1bdeb8b93e2661132a714668dff5f32ffb167 (patch)
tree3c61301197f62cded2dee313da2babf933e1b146
parent820e371d94d9d2c9487f1e32ee6c006132f678d8 (diff)
Markdown reader: auto-close unclosed divs.
This applies to both fenced and HTML-ish varieties. Otherwise we face an exponential performance problem with backtracking. This also accords with the behavior of the `fenced_divs` extension in commonmark. A warning is issued when a div is implicitly closed. Closes #9635.
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs26
-rw-r--r--test/Tests/Readers/Markdown.hs2
-rw-r--r--test/command/9635.md16
3 files changed, 30 insertions, 14 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 206e5bc3c..a78b1062a 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -2102,30 +2102,29 @@ divHtml :: PandocMonad m => MarkdownParser m (F Blocks)
divHtml = do
guardEnabled Ext_native_divs
try $ do
- (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen ("div" :: Text) [])
+ openpos <- getPosition
+ (TagOpen _ attrs, _) <- htmlTag (~== TagOpen ("div" :: Text) [])
-- we set stateInHtmlBlock so that closing tags that can be either block
-- or inline will not be parsed as inline tags
oldInHtmlBlock <- stateInHtmlBlock <$> getState
updateState $ \st -> st{ stateInHtmlBlock = Just "div" }
- bls <- option "" (blankline >> option "" blanklines)
+ optional blanklines
contents <- mconcat <$>
many (notFollowedBy' (htmlTag (~== TagClose ("div" :: Text)))
>> block)
- closed <- option False (True <$ htmlTag (~== TagClose ("div" :: Text)))
- if closed
- then do
- updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
- let ident = fromMaybe "" $ lookup "id" attrs
- let classes = maybe [] T.words $ lookup "class" attrs
- let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
- return $ B.divWith (ident, classes, keyvals) <$> contents
- else -- avoid backtracing
- return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents
+ void (htmlTag (~== TagClose ("div" :: Text))) <|>
+ (getPosition >>= report . UnclosedDiv openpos)
+ let ident = fromMaybe "" $ lookup "id" attrs
+ let classes = maybe [] T.words $ lookup "class" attrs
+ let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+ updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
+ return $ B.divWith (ident, classes, keyvals) <$> contents
divFenced :: PandocMonad m => MarkdownParser m (F Blocks)
divFenced = do
guardEnabled Ext_fenced_divs
try $ do
+ openpos <- getPosition
string ":::"
skipMany (char ':')
skipMany spaceChar
@@ -2135,7 +2134,8 @@ divFenced = do
blankline
updateState $ \st ->
st{ stateFencedDivLevel = stateFencedDivLevel st + 1 }
- bs <- mconcat <$> manyTill block divFenceEnd
+ bs <- mconcat <$> many (notFollowedBy divFenceEnd >> block)
+ divFenceEnd <|> (getPosition >>= report . UnclosedDiv openpos)
updateState $ \st ->
st{ stateFencedDivLevel = stateFencedDivLevel st - 1 }
return $ B.divWith attribs <$> bs
diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs
index 001ca0207..5f67e111b 100644
--- a/test/Tests/Readers/Markdown.hs
+++ b/test/Tests/Readers/Markdown.hs
@@ -449,7 +449,7 @@ tests = [ testGroup "inline code"
<>
codeBlockWith ("",["haskell"],[]) "b"
<>
- rawBlock "html" "<div>\n\n"
+ divWith ("",[],[]) mempty
]
-- the round-trip properties frequently fail
-- , testGroup "round trip"
diff --git a/test/command/9635.md b/test/command/9635.md
new file mode 100644
index 000000000..088a5b6d2
--- /dev/null
+++ b/test/command/9635.md
@@ -0,0 +1,16 @@
+```
+% pandoc
+> ::: {.fence}
+> that is
+> not closed
+
+okay
+^D
+2> [WARNING] Div at _chunk line 1 column 1 unclosed at _chunk line 5 column 1, closing implicitly.
+<blockquote>
+<div class="fence">
+<p>that is not closed</p>
+</div>
+</blockquote>
+<p>okay</p>
+```