diff options
| author | Albert Krewinkel <[email protected]> | 2025-07-24 10:46:41 +0200 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2025-08-06 12:54:12 -0700 |
| commit | b24bba7a20b1d40b4b9de71015e29fa535a395f3 (patch) | |
| tree | 93b995f51892b4e7a6cf258d0c98bc2dc9db8e44 | |
| parent | 3a185fb5d008f8ffb1cc1cb03b805a29df24a2c9 (diff) | |
Add `smart_quotes` and `special_strings` extensions for Org
Org mode makes a distinction between smart parsing of quotes, and smart
parsing of special strings like `...`. The finer grained control over
these features is necessary to truthfully reproduce Emacs Org mode
behavior. Special strings are enabled by default, while smart quotes are
disabled.
The behavior of `special_string` is brought closer to the reference
implementation in that `\-` is now treated as a soft hyphen.
| -rw-r--r-- | MANUAL.txt | 17 | ||||
| -rw-r--r-- | pandoc-lua-engine/test/lua/module/pandoc-format.lua | 2 | ||||
| -rw-r--r-- | src/Text/Pandoc/Extensions.hs | 5 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Org/Inlines.hs | 30 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Org/ParserState.hs | 14 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Org/Parsing.hs | 2 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/Org.hs | 35 | ||||
| -rw-r--r-- | test/Tests/Readers/Org/Inline/Smart.hs | 4 |
8 files changed, 77 insertions, 32 deletions
diff --git a/MANUAL.txt b/MANUAL.txt index 4bd8c9e14..df786f159 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -3553,7 +3553,7 @@ input formats `html` output formats -: `markdown`, `latex`, `context`, `rst` +: `markdown`, `latex`, `context`, `org`, `rst` enabled by default in : `markdown`, `latex`, `context` (both input and output) @@ -3903,6 +3903,21 @@ In the `context` output format this enables the use of [Natural Tables Natural tables allow more fine-grained global customization but come at a performance penalty compared to extreme tables. +### Extension: `smart_quotes` (org) ### + +Interpret straight quotes as curly quotes during parsing. When +*writing* Org, then the `smart_quotes` extension has the reverse +effect: what would have been curly quotes comes out straight. + +This extension is implied if `smart` is enabled. + +### Extension: `special_strings` (org) ### + +Interpret `---` as em-dashes, `--` as en-dashes, `\-` as shy +hyphen, and `...` as ellipses. + +This extension is implied if `smart` is enabled. + ### Extension: `tagging` ### {#extension--tagging} Enabling this extension with `context` output will produce markup diff --git a/pandoc-lua-engine/test/lua/module/pandoc-format.lua b/pandoc-lua-engine/test/lua/module/pandoc-format.lua index e3511d7ad..6a1915a16 100644 --- a/pandoc-lua-engine/test/lua/module/pandoc-format.lua +++ b/pandoc-lua-engine/test/lua/module/pandoc-format.lua @@ -42,6 +42,8 @@ return { fancy_lists = false, gfm_auto_identifiers = false, smart = false, + smart_quotes = false, + special_strings = true, task_lists = true, } assert.are_same(format.extensions 'org', org_default_exts) diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index f1c9c0f37..6f173da0e 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -120,6 +120,8 @@ data Extension = | Ext_shortcut_reference_links -- ^ Shortcut reference links | Ext_simple_tables -- ^ Pandoc-style simple tables | Ext_smart -- ^ "Smart" quotes, apostrophes, ellipses, dashes + | Ext_smart_quotes -- ^ "Smart" quotes + | Ext_special_strings -- ^ Treat certain strings like special characters | Ext_sourcepos -- ^ Include source position attributes | Ext_space_in_atx_header -- ^ Require space between # and header text | Ext_spaced_reference_links -- ^ Allow space between two parts of ref link @@ -429,6 +431,7 @@ getDefaultExtensions "commonmark_x" = extensionsFromList ] getDefaultExtensions "org" = extensionsFromList [Ext_citations, + Ext_special_strings, Ext_task_lists, Ext_auto_identifiers] getDefaultExtensions "html" = extensionsFromList @@ -580,6 +583,8 @@ getAllExtensions f = universalExtensions <> getAll f extensionsFromList [ Ext_citations , Ext_smart + , Ext_smart_quotes + , Ext_special_strings , Ext_fancy_lists , Ext_task_lists ] diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 4d901ffc4..be8813e5c 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -98,7 +98,8 @@ inline = , inlineLaTeX , exportSnippet , macro - , smart + , smartQuotes + , specialStrings , symbol ] <* (guard =<< newlinesCountWithinLimits) <?> "inline" @@ -892,29 +893,27 @@ macro = try $ do escapedComma = try $ char '\\' *> oneOf ",\\" eoa = string ")}}}" -smart :: PandocMonad m => OrgParser m (F Inlines) -smart = choice [doubleQuoted, singleQuoted, orgApostrophe, orgDash, orgEllipses] +smartQuotes :: PandocMonad m => OrgParser m (F Inlines) +smartQuotes = do + guard =<< getExportSetting exportSmartQuotes + choice [doubleQuoted, singleQuoted, orgApostrophe] where - orgDash = do - guardOrSmartEnabled =<< getExportSetting exportSpecialStrings - pure <$> dash <* updatePositions '-' - orgEllipses = do - guardOrSmartEnabled =<< getExportSetting exportSpecialStrings - pure <$> ellipses <* updatePositions '.' orgApostrophe = do - guardEnabled Ext_smart (char '\'' <|> char '\8217') <* updateLastPreCharPos <* updateLastForbiddenCharPos returnF (B.str "\x2019") -guardOrSmartEnabled :: PandocMonad m => Bool -> OrgParser m () -guardOrSmartEnabled b = do - smartExtension <- extensionEnabled Ext_smart <$> getOption readerExtensions - guard (b || smartExtension) +specialStrings :: PandocMonad m => OrgParser m (F Inlines) +specialStrings = do + guard =<< getExportSetting exportSpecialStrings + choice [orgDash, orgEllipses, shyHyphen] + where + shyHyphen = pure <$> (B.str "\173" <$ string "\\-") <* updatePositions '-' + orgDash = pure <$> dash <* updatePositions '-' + orgEllipses = pure <$> ellipses <* updatePositions '.' singleQuoted :: PandocMonad m => OrgParser m (F Inlines) singleQuoted = try $ do - guardOrSmartEnabled =<< getExportSetting exportSmartQuotes singleQuoteStart updatePositions '\'' withQuoteContext InSingleQuote $ @@ -926,7 +925,6 @@ singleQuoted = try $ do -- in the same paragraph. doubleQuoted :: PandocMonad m => OrgParser m (F Inlines) doubleQuoted = try $ do - guardOrSmartEnabled =<< getExportSetting exportSmartQuotes doubleQuoteStart updatePositions '"' contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index 95e3b6d7c..87c2b6948 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -3,7 +3,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.ParserState - Copyright : Copyright (C) 2014-2024 Albert Krewinkel + Copyright : Copyright (C) 2014-2025 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <[email protected]> @@ -49,7 +49,7 @@ import Data.Text (Text) import Text.Pandoc.Builder (Blocks) import Text.Pandoc.Definition (Meta (..), nullMeta) import Text.Pandoc.Logging -import Text.Pandoc.Options (ReaderOptions (..)) +import Text.Pandoc.Options (ReaderOptions (..), Extension(..), isEnabled) import Text.Pandoc.Parsing (Future, HasIdentifierList (..), HasIncludeFiles (..), HasLastStrPosition (..), HasLogMessages (..), HasMacros (..), @@ -193,7 +193,15 @@ defaultOrgParserState = OrgParserState optionsToParserState :: ReaderOptions -> OrgParserState optionsToParserState opts = - def { orgStateOptions = opts } + let exportSettings = defaultExportSettings + { exportSmartQuotes = isEnabled Ext_smart opts || + isEnabled Ext_smart_quotes opts + , exportSpecialStrings = isEnabled Ext_smart opts || + isEnabled Ext_special_strings opts + } + in def { orgStateOptions = opts + , orgStateExportSettings = exportSettings + } registerTodoSequence :: TodoSequence -> OrgParserState -> OrgParserState registerTodoSequence todoSeq st = diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index 0c2cd15af..4f5b38311 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -1,6 +1,6 @@ {- | Module : Text.Pandoc.Readers.Org.Parsing - Copyright : Copyright (C) 2014-2024 Albert Krewinkel + Copyright : Copyright (C) 2014-2025 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <[email protected]> diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 0a1137744..8def0cf8b 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -3,8 +3,8 @@ {- | Module : Text.Pandoc.Writers.Org Copyright : © 2010-2015 Puneeth Chaganti <[email protected]> - 2010-2024 John MacFarlane <[email protected]> - 2016-2024 Albert Krewinkel <[email protected]> + 2010-2025 John MacFarlane <[email protected]> + 2016-2025 Albert Krewinkel <[email protected]> License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <[email protected]> @@ -89,20 +89,28 @@ noteToOrg num note = do let marker = "[fn:" ++ show num ++ "] " return $ hang (length marker) (text marker) contents +-- | Replace Unicode characters with their ASCII representation +replaceSpecialStrings :: Text -> Text +replaceSpecialStrings = + let expand c = case c of + '\x00ad' -> "\\-" + '\x2013' -> "--" + '\x2014' -> "---" + '\x2019' -> "'" + '\x2026' -> "..." + _ -> T.singleton c + in T.concatMap expand + -- | Escape special characters for Org. escapeString :: Text -> Doc Text escapeString t | T.all isAlphaNum t = literal t | otherwise = mconcat $ map escChar (T.unpack t) where - escChar '\x2013' = "--" - escChar '\x2014' = "---" - escChar '\x2019' = "'" - escChar '\x2026' = "..." - escChar c - -- escape special chars with ZERO WIDTH SPACE as org manual suggests - | c == '*' || c == '#' || c == '|' = afterBreak "\x200B" <> char c - | otherwise = char c + -- escape special chars with ZERO WIDTH SPACE as org manual suggests + escChar c = if c == '*' || c == '#' || c == '|' + then afterBreak "\x200B" <> char c + else char c isRawFormat :: Format -> Bool isRawFormat f = @@ -522,7 +530,12 @@ inlineToOrg (Cite cs lst) = do return $ "[cite" <> sty <> ":" <> citeItems <> "]" else inlineListToOrg lst inlineToOrg (Code _ str) = return $ "=" <> literal str <> "=" -inlineToOrg (Str str) = return $ escapeString str +inlineToOrg (Str str) = do + opts <- gets stOptions + let str' = if isEnabled Ext_smart opts || isEnabled Ext_special_strings opts + then replaceSpecialStrings str + else str + return $ escapeString str' inlineToOrg (Math t str) = do modify $ \st -> st{ stHasMath = True } return $ if t == InlineMath diff --git a/test/Tests/Readers/Org/Inline/Smart.hs b/test/Tests/Readers/Org/Inline/Smart.hs index f4f80af4b..66c0778df 100644 --- a/test/Tests/Readers/Org/Inline/Smart.hs +++ b/test/Tests/Readers/Org/Inline/Smart.hs @@ -47,6 +47,10 @@ tests = ("/foo---/" =?> para (emph "foo—")) + , test orgSmart "Support for shy (soft) hyphen" + ("Ur\\-instinkt" =?> + para "Ur\173instinkt") + , test orgSmart "Single quotes can be followed by emphasized text" ("Singles on the '/meat market/'" =?> para ("Singles on the " <> singleQuoted (emph "meat market"))) |
