aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlbert Krewinkel <[email protected]>2025-08-27 12:47:22 +0200
committerAlbert Krewinkel <[email protected]>2025-08-27 13:03:46 +0200
commit01721c09609bf92c9813ead653e8b4d2f9a8c1d0 (patch)
treef90a815395922dbf7c5754b9a04844f65d7df7bd
parent6cff8dfc8af7a20afe8b307e29c010db7b6053d8 (diff)
Org reader: improve sub- and superscript parsing.
Sub- and superscript must be preceded by a string in Org mode. Some text preceded by space or at the start of a paragraph was previously parsed incorrectly as sub- or superscript.
-rw-r--r--src/Text/Pandoc/Readers/Org/Inlines.hs26
-rw-r--r--src/Text/Pandoc/Readers/Org/Parsing.hs1
-rw-r--r--test/Tests/Readers/Org/Inline.hs12
3 files changed, 33 insertions, 6 deletions
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index be8813e5c..f9e38124f 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -2,8 +2,8 @@
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.Inlines
- Copyright : Copyright (C) 2014-2024 Albert Krewinkel
- License : GNU GPL, version 2 or above
+ Copyright : Copyright (C) 2014-2025 Albert Krewinkel
+ License : GPL-2.0-or-later
Maintainer : Albert Krewinkel <[email protected]>
@@ -88,13 +88,13 @@ inline =
, inlineCodeBlock
, str
, endline
+ , subscript -- takes precedence over underlined text
+ , superscript
, emphasizedText
, code
, math
, displayMath
, verbatim
- , subscript
- , superscript
, inlineLaTeX
, exportSnippet
, macro
@@ -594,11 +594,25 @@ verbatim = return . B.codeWith ("", ["verbatim"], []) <$> verbatimBetween '='
code :: PandocMonad m => OrgParser m (F Inlines)
code = return . B.code <$> verbatimBetween '~'
+-- | Returns 'True' if the parser position right after a string, and 'False'
+-- otherwise.
+isAfterString :: PandocMonad m => OrgParser m Bool
+isAfterString = do
+ pos <- getPosition
+ st <- getState
+ pure $ getLastStrPos st == Just pos
+
+-- | Parses subscript markup. Subscripts must be preceded by a string.
subscript :: PandocMonad m => OrgParser m (F Inlines)
-subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr)
+subscript = do
+ guard =<< isAfterString
+ fmap B.subscript <$> try (char '_' *> subOrSuperExpr)
+-- | Parses superscript markup. Superscript must be preceded by a string.
superscript :: PandocMonad m => OrgParser m (F Inlines)
-superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr)
+superscript = do
+ guard =<< isAfterString
+ fmap B.superscript <$> try (char '^' *> subOrSuperExpr)
math :: PandocMonad m => OrgParser m (F Inlines)
math = return . B.math <$> choice [ math1CharBetween '$'
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index 4f5b38311..3adf9f227 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -54,6 +54,7 @@ module Text.Pandoc.Readers.Org.Parsing
, guardEnabled
, updateLastStrPos
, notAfterString
+ , getLastStrPos
, ParserState (..)
, registerHeader
, QuoteContext (..)
diff --git a/test/Tests/Readers/Org/Inline.hs b/test/Tests/Readers/Org/Inline.hs
index 00956e1ce..10cd9c214 100644
--- a/test/Tests/Readers/Org/Inline.hs
+++ b/test/Tests/Readers/Org/Inline.hs
@@ -88,6 +88,10 @@ tests =
"2^{n-1}" =?>
para (str "2" <> superscript "n-1")
+ , "Superscript-like, but not after string" =:
+ "a ^caret" =?>
+ para "a ^caret"
+
, "Subscript simple expression" =:
"a_n" =?>
para (str "a" <> subscript "n")
@@ -96,6 +100,14 @@ tests =
"a_{n+1}" =?>
para (str "a" <> subscript "n+1")
+ , "Subscript-like, but not after string" =:
+ "_underscore" =?>
+ para "_underscore"
+
+ , "Subscript takes precedence before underline" =:
+ "text_subscript_" =?>
+ para (str "text" <> subscript "subscript" <> str "_")
+
, "Linebreak" =:
"line \\\\ \nbreak" =?>
para ("line" <> linebreak <> "break")