diff options
| author | Albert Krewinkel <[email protected]> | 2025-08-27 12:47:22 +0200 |
|---|---|---|
| committer | Albert Krewinkel <[email protected]> | 2025-08-27 13:03:46 +0200 |
| commit | 01721c09609bf92c9813ead653e8b4d2f9a8c1d0 (patch) | |
| tree | f90a815395922dbf7c5754b9a04844f65d7df7bd | |
| parent | 6cff8dfc8af7a20afe8b307e29c010db7b6053d8 (diff) | |
Org reader: improve sub- and superscript parsing.
Sub- and superscript must be preceded by a string in Org mode. Some text
preceded by space or at the start of a paragraph was previously parsed
incorrectly as sub- or superscript.
| -rw-r--r-- | src/Text/Pandoc/Readers/Org/Inlines.hs | 26 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Org/Parsing.hs | 1 | ||||
| -rw-r--r-- | test/Tests/Readers/Org/Inline.hs | 12 |
3 files changed, 33 insertions, 6 deletions
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index be8813e5c..f9e38124f 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -2,8 +2,8 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.Inlines - Copyright : Copyright (C) 2014-2024 Albert Krewinkel - License : GNU GPL, version 2 or above + Copyright : Copyright (C) 2014-2025 Albert Krewinkel + License : GPL-2.0-or-later Maintainer : Albert Krewinkel <[email protected]> @@ -88,13 +88,13 @@ inline = , inlineCodeBlock , str , endline + , subscript -- takes precedence over underlined text + , superscript , emphasizedText , code , math , displayMath , verbatim - , subscript - , superscript , inlineLaTeX , exportSnippet , macro @@ -594,11 +594,25 @@ verbatim = return . B.codeWith ("", ["verbatim"], []) <$> verbatimBetween '=' code :: PandocMonad m => OrgParser m (F Inlines) code = return . B.code <$> verbatimBetween '~' +-- | Returns 'True' if the parser position right after a string, and 'False' +-- otherwise. +isAfterString :: PandocMonad m => OrgParser m Bool +isAfterString = do + pos <- getPosition + st <- getState + pure $ getLastStrPos st == Just pos + +-- | Parses subscript markup. Subscripts must be preceded by a string. subscript :: PandocMonad m => OrgParser m (F Inlines) -subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr) +subscript = do + guard =<< isAfterString + fmap B.subscript <$> try (char '_' *> subOrSuperExpr) +-- | Parses superscript markup. Superscript must be preceded by a string. superscript :: PandocMonad m => OrgParser m (F Inlines) -superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr) +superscript = do + guard =<< isAfterString + fmap B.superscript <$> try (char '^' *> subOrSuperExpr) math :: PandocMonad m => OrgParser m (F Inlines) math = return . B.math <$> choice [ math1CharBetween '$' diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index 4f5b38311..3adf9f227 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -54,6 +54,7 @@ module Text.Pandoc.Readers.Org.Parsing , guardEnabled , updateLastStrPos , notAfterString + , getLastStrPos , ParserState (..) , registerHeader , QuoteContext (..) diff --git a/test/Tests/Readers/Org/Inline.hs b/test/Tests/Readers/Org/Inline.hs index 00956e1ce..10cd9c214 100644 --- a/test/Tests/Readers/Org/Inline.hs +++ b/test/Tests/Readers/Org/Inline.hs @@ -88,6 +88,10 @@ tests = "2^{n-1}" =?> para (str "2" <> superscript "n-1") + , "Superscript-like, but not after string" =: + "a ^caret" =?> + para "a ^caret" + , "Subscript simple expression" =: "a_n" =?> para (str "a" <> subscript "n") @@ -96,6 +100,14 @@ tests = "a_{n+1}" =?> para (str "a" <> subscript "n+1") + , "Subscript-like, but not after string" =: + "_underscore" =?> + para "_underscore" + + , "Subscript takes precedence before underline" =: + "text_subscript_" =?> + para (str "text" <> subscript "subscript" <> str "_") + , "Linebreak" =: "line \\\\ \nbreak" =?> para ("line" <> linebreak <> "break") |
