diff options
| author | John MacFarlane <[email protected]> | 2025-09-16 12:44:52 +0200 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2025-09-16 12:46:25 +0200 |
| commit | cf760c7f66bfa3eb5530d83b878fcb611453ab5e (patch) | |
| tree | 6fcf22f7cf716b5164920d8eddf5d741ed510605 | |
| parent | c3a5513eedf4fbfb135419b2ffa85c81f6521e2b (diff) | |
Markdown reader: Improve superscript/subscript/inline note parsing.
We do not allow inline notes to be followed by `(` or `[`.
Otherwise, we parse inline notes before superscripts.
This fixes #8652.
Also, the sub/superscript parsers have been adjusted so that they
really exclude unescaped spaces (as they did not before, when the
spaces occurred in nested inlines). See #5878 for comment.
| -rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 59 | ||||
| -rw-r--r-- | test/command/8652.md | 28 |
2 files changed, 66 insertions, 21 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 4121240d8..0a17b9a1a 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -195,6 +195,22 @@ litBetween op cl = try $ do char op mconcat <$> (manyTill litChar (char cl)) +litCharNoSpace :: PandocMonad m => MarkdownParser m Text +litCharNoSpace = T.singleton <$> escapedChar'' + <|> characterReference + <|> T.singleton <$> noneOf "\n \r\t" + where + escapedChar'' = do + c <- escapedChar' + pure $ case c of + ' ' -> '\160' + _ -> c + +litBetweenNoSpace :: PandocMonad m => Char -> Char -> MarkdownParser m Text +litBetweenNoSpace op cl = try $ do + char op + mconcat <$> (manyTill litCharNoSpace (char cl)) + -- | Parse a sequence of elements between square brackets, -- including between balanced pairs of square brackets. -- Skip brackets in standard inline escapes, code, raw HTML or LaTeX. @@ -1499,7 +1515,7 @@ inline = do '`' -> code '_' -> strongOrEmph '*' -> strongOrEmph - '^' -> superscript <|> inlineNote -- in this order bc ^[link](/foo)^ + '^' -> inlineNote <|> superscript '[' -> note <|> cite <|> bracketedSpan <|> wikilink B.linkWith <|> link '!' -> image '$' -> math @@ -1705,29 +1721,29 @@ mark = fmap (B.spanWith ("",["mark"],[])) <$> superscript :: PandocMonad m => MarkdownParser m (F Inlines) superscript = do - fmap B.superscript <$> try (do - char '^' - mconcat <$> (try regularSuperscript <|> try mmdShortSuperscript)) - where regularSuperscript = many1Till (do guardEnabled Ext_superscript - notFollowedBy spaceChar - notFollowedBy newline - inline) (char '^') - mmdShortSuperscript = do guardEnabled Ext_short_subsuperscripts - result <- T.pack <$> many1 alphaNum - return $ return $ return $ B.str result + fmap B.superscript <$> (regularSuperscript <|> mmdShortSuperscript) + where + regularSuperscript = do + guardEnabled Ext_superscript + litBetweenNoSpace '^' '^' >>= parseFromString inlines + mmdShortSuperscript = try $ do + guardEnabled Ext_short_subsuperscripts + char '^' + result <- T.pack <$> many1 alphaNum + return $ return $ B.str result subscript :: PandocMonad m => MarkdownParser m (F Inlines) subscript = do - fmap B.subscript <$> try (do - char '~' - mconcat <$> (try regularSubscript <|> mmdShortSubscript)) - where regularSubscript = many1Till (do guardEnabled Ext_subscript - notFollowedBy spaceChar - notFollowedBy newline - inline) (char '~') - mmdShortSubscript = do guardEnabled Ext_short_subsuperscripts - result <- T.pack <$> many1 alphaNum - return $ return $ return $ B.str result + fmap B.subscript <$> (regularSubscript <|> mmdShortSubscript) + where + regularSubscript = do + guardEnabled Ext_subscript + litBetweenNoSpace '~' '~' >>= parseFromString inlines + mmdShortSubscript = try $ do + guardEnabled Ext_short_subsuperscripts + char '~' + result <- T.pack <$> many1 alphaNum + return $ return $ B.str result whitespace :: PandocMonad m => MarkdownParser m (F Inlines) whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace" @@ -2048,6 +2064,7 @@ inlineNote = do updateState $ \st -> st{ stateInNote = True , stateNoteNumber = stateNoteNumber st + 1 } contents <- inBalancedBrackets inlines + notFollowedBy (char '(' <|> char '[') -- ^[link](foo)^ is superscript updateState $ \st -> st{ stateInNote = False } return $ B.note . B.para <$> contents diff --git a/test/command/8652.md b/test/command/8652.md new file mode 100644 index 000000000..e98f1cdec --- /dev/null +++ b/test/command/8652.md @@ -0,0 +1,28 @@ +``` +% pandoc -t native +My text^[My note]isnot^[Text] +^D +[ Para + [ Str "My" + , Space + , Str "text" + , Note [ Para [ Str "My" , Space , Str "note" ] ] + , Str "isnot" + , Note [ Para [ Str "Text" ] ] + ] +] + +``` + +``` +% pandoc -t native +^[note1]test^[note2] +^D +[ Para + [ Note [ Para [ Str "note1" ] ] + , Str "test" + , Note [ Para [ Str "note2" ] ] + ] +] + +``` |
