diff options
| author | John MacFarlane <[email protected]> | 2024-04-10 17:18:05 -0700 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2024-04-10 17:18:05 -0700 |
| commit | a0c1bdeb8b93e2661132a714668dff5f32ffb167 (patch) | |
| tree | 3c61301197f62cded2dee313da2babf933e1b146 | |
| parent | 820e371d94d9d2c9487f1e32ee6c006132f678d8 (diff) | |
Markdown reader: auto-close unclosed divs.
This applies to both fenced and HTML-ish varieties.
Otherwise we face an exponential performance problem with
backtracking.
This also accords with the behavior of the `fenced_divs`
extension in commonmark.
A warning is issued when a div is implicitly closed.
Closes #9635.
| -rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 26 | ||||
| -rw-r--r-- | test/Tests/Readers/Markdown.hs | 2 | ||||
| -rw-r--r-- | test/command/9635.md | 16 |
3 files changed, 30 insertions, 14 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 206e5bc3c..a78b1062a 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -2102,30 +2102,29 @@ divHtml :: PandocMonad m => MarkdownParser m (F Blocks) divHtml = do guardEnabled Ext_native_divs try $ do - (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen ("div" :: Text) []) + openpos <- getPosition + (TagOpen _ attrs, _) <- htmlTag (~== TagOpen ("div" :: Text) []) -- we set stateInHtmlBlock so that closing tags that can be either block -- or inline will not be parsed as inline tags oldInHtmlBlock <- stateInHtmlBlock <$> getState updateState $ \st -> st{ stateInHtmlBlock = Just "div" } - bls <- option "" (blankline >> option "" blanklines) + optional blanklines contents <- mconcat <$> many (notFollowedBy' (htmlTag (~== TagClose ("div" :: Text))) >> block) - closed <- option False (True <$ htmlTag (~== TagClose ("div" :: Text))) - if closed - then do - updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock } - let ident = fromMaybe "" $ lookup "id" attrs - let classes = maybe [] T.words $ lookup "class" attrs - let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] - return $ B.divWith (ident, classes, keyvals) <$> contents - else -- avoid backtracing - return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents + void (htmlTag (~== TagClose ("div" :: Text))) <|> + (getPosition >>= report . UnclosedDiv openpos) + let ident = fromMaybe "" $ lookup "id" attrs + let classes = maybe [] T.words $ lookup "class" attrs + let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] + updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock } + return $ B.divWith (ident, classes, keyvals) <$> contents divFenced :: PandocMonad m => MarkdownParser m (F Blocks) divFenced = do guardEnabled Ext_fenced_divs try $ do + openpos <- getPosition string ":::" skipMany (char ':') skipMany spaceChar @@ -2135,7 +2134,8 @@ divFenced = do blankline updateState $ \st -> st{ stateFencedDivLevel = stateFencedDivLevel st + 1 } - bs <- mconcat <$> manyTill block divFenceEnd + bs <- mconcat <$> many (notFollowedBy divFenceEnd >> block) + divFenceEnd <|> (getPosition >>= report . UnclosedDiv openpos) updateState $ \st -> st{ stateFencedDivLevel = stateFencedDivLevel st - 1 } return $ B.divWith attribs <$> bs diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs index 001ca0207..5f67e111b 100644 --- a/test/Tests/Readers/Markdown.hs +++ b/test/Tests/Readers/Markdown.hs @@ -449,7 +449,7 @@ tests = [ testGroup "inline code" <> codeBlockWith ("",["haskell"],[]) "b" <> - rawBlock "html" "<div>\n\n" + divWith ("",[],[]) mempty ] -- the round-trip properties frequently fail -- , testGroup "round trip" diff --git a/test/command/9635.md b/test/command/9635.md new file mode 100644 index 000000000..088a5b6d2 --- /dev/null +++ b/test/command/9635.md @@ -0,0 +1,16 @@ +``` +% pandoc +> ::: {.fence} +> that is +> not closed + +okay +^D +2> [WARNING] Div at _chunk line 1 column 1 unclosed at _chunk line 5 column 1, closing implicitly. +<blockquote> +<div class="fence"> +<p>that is not closed</p> +</div> +</blockquote> +<p>okay</p> +``` |
