diff options
| author | John MacFarlane <[email protected]> | 2025-06-22 16:33:42 -0700 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2025-07-31 22:08:51 -0700 |
| commit | cfcd442b2fcdac4d5f49b0850f73de0153fa287f (patch) | |
| tree | 1ccc5d77748394845d95086f5283c9aea71f44bb /src/Text | |
| parent | c77476b597ef89e9ab7baf3452f84cd4f5ff1a5a (diff) | |
Extract citationSuffix, citationPrefix.cite-prefix
In transforming pandoc Cite to citeproc Citation,
extract a `citationSuffix` and `citationPrefix` from the
last item's suffix and first item's prefix, respectively, if
they contain a `|` character which separates the item's suffix
or prefix from the whole Citation's.
for example:
[for example, see |@C1; @A3; @B4|, and others]
Here "for example, see" acts as a prefix for the whole
group and will remain at the beginning even if the citation
items are reordered by citeproc. Similarly, ", and others"
will be a suffix for the whole group.
Closes #10894.
Notes:
1. The org reader now adds global prefixes and suffixes the
same way as the Markdown reader: as affixes to the first item's
prefix or the last item's suffix, separated by a pipe (`|`).
2. The org writer, however, has not been modified to convert the
`|` to a `;`, as required by org-cite syntax.
3. This change doesn't currently do what one would expect, because
of changes that were made to citeproc to prevent citation items
with prefixes and suffixes from being sorted. Hence in
`test/command/10894.md`, we have test output
```
(Doe, 2020; Smith, 2021)
```
without affixes, but
```
(see Smith, 2021; Doe, 2020, and others)
```
with affixes. To make this work well, we'd need to remove the citeproc
code that prevented bad results before we had proper global
prefixes and suffixes. However, removing this code would mean that
existing documents would render differently, unless the new pipe
syntax for citation affixes were used. That may be something we want
to avoid.
4. The use of pipes to separate out global affixes from item-level
affixes is a kludge that could be avoided if we added additional
fields to Cite in the pandoc AST. However, AST changes are disruptive,
so perhaps it's not worth doing that.
Diffstat (limited to 'src/Text')
| -rw-r--r-- | src/Text/Pandoc/Citeproc.hs | 40 | ||||
| -rw-r--r-- | src/Text/Pandoc/Citeproc/Locator.hs | 5 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 19 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Org/Inlines.hs | 13 |
4 files changed, 54 insertions, 23 deletions
diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index 5ab245e16..954953fdb 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -299,17 +299,49 @@ getCitations locale otherIdsMap = Foldable.toList . query getCitation where getCitation (Cite cs _fallback) = Seq.singleton $ Citeproc.Citation { Citeproc.citationId = Nothing - , Citeproc.citationPrefix = Nothing - , Citeproc.citationSuffix = Nothing + , Citeproc.citationPrefix = pref + , Citeproc.citationSuffix = suff , Citeproc.citationNoteNumber = case cs of [] -> Nothing (Pandoc.Citation{ Pandoc.citationNoteNum = n }: _) | n > 0 -> Just n | otherwise -> Nothing - , Citeproc.citationItems = - fromPandocCitations locale otherIdsMap cs + , Citeproc.citationItems = items } + where + (pref, suff, items) = + case fromPandocCitations locale otherIdsMap cs of + [] -> (Nothing, Nothing, []) + (i:is) -> + let (pref', i') = case citationItemPrefix i of + Nothing -> (Nothing, i) + Just p -> + case splitInlinesOnPipe (B.toList p) of + (_,[]) -> (Nothing, i) + (as,bs) -> (Just (B.fromList as), + i{ citationItemPrefix = Just (B.fromList bs) }) + (suff', is') = case reverse is of + [] -> (Nothing, []) + (i'':is'') -> + case Citeproc.citationItemSuffix i'' of + Nothing -> (Nothing, is) + Just s -> + case splitInlinesOnPipe (B.toList s) of + (_,[]) -> (Nothing, is) + (as,bs) -> (Just (B.fromList bs), reverse + (i''{ citationItemSuffix = Just (B.fromList as) }:is'')) + in (pref', suff', i':is') + splitInlinesOnPipe ils = + case break isStrWithPipe ils of + (xs,Str s : ys) -> + let (as,bs) = T.break (=='|') s + bs' = T.drop 1 bs + in (xs ++ [Str as | not (T.null as)], + [Str bs' | not (T.null bs')] ++ ys) + _ -> (ils,[]) + isStrWithPipe (Str s) = T.any (=='|') s + isStrWithPipe _ = False getCitation _ = mempty fromPandocCitations :: Locale diff --git a/src/Text/Pandoc/Citeproc/Locator.hs b/src/Text/Pandoc/Citeproc/Locator.hs index ee5459e3d..9ee58506b 100644 --- a/src/Text/Pandoc/Citeproc/Locator.hs +++ b/src/Text/Pandoc/Citeproc/Locator.hs @@ -19,7 +19,6 @@ import Control.Monad (mzero) import qualified Data.Map as M import Data.Char (isSpace, isPunctuation, isDigit) - data LocatorInfo = LocatorInfo{ locatorRaw :: Text , locatorLabel :: Text @@ -57,9 +56,11 @@ pLocatorWords locMap = do maybeAddComma :: [Inline] -> [Inline] maybeAddComma [] = [] maybeAddComma ils@(Space : _) = ils +maybeAddComma ils@(SoftBreak : _) = ils +maybeAddComma ils@(LineBreak : _) = ils maybeAddComma ils@(Str t : _) | Just (c, _) <- T.uncons t - , isPunctuation c = ils + , isPunctuation c || c == '|' = ils maybeAddComma ils = Str "," : Space : ils pLocatorDelimited :: LocatorMap -> LocatorParser LocatorInfo diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index d81c4e3c1..248bf0146 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -55,7 +55,6 @@ import Text.Pandoc.Shared import Text.Pandoc.URI (escapeURI, isURI, pBase64DataURI) import Text.Pandoc.XML (fromEntities) import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs, yamlMetaBlock) --- import Debug.Trace (traceShowId) type MarkdownParser m = ParsecT Sources ParserState m @@ -2247,14 +2246,8 @@ normalCite = try $ do return citations suffix :: PandocMonad m => MarkdownParser m (F Inlines) -suffix = try $ do - hasSpace <- option False (notFollowedBy nonspaceChar >> return True) - spnl - ils <- many (notFollowedBy (oneOf ";]") >> inline) - let rest = trimInlinesF (mconcat ils) - return $ if hasSpace && not (null ils) - then (B.space <>) <$> rest - else rest +suffix = try $ + mconcat <$> many (notFollowedBy (oneOf ";]") >> inline) prefix :: PandocMonad m => MarkdownParser m (F Inlines) prefix = trimInlinesF . mconcat <$> @@ -2274,11 +2267,11 @@ citation = try $ do suff <- suffix noteNum <- stateNoteNumber <$> getState return $ do - x <- pref - y <- suff + pref' <- B.toList <$> pref + suff' <- B.toList <$> suff return Citation{ citationId = key - , citationPrefix = B.toList x - , citationSuffix = B.toList y + , citationPrefix = pref' + , citationSuffix = suff' , citationMode = if suppress_author then SuppressAuthor else NormalCitation diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 4d901ffc4..99f71f306 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -182,21 +182,26 @@ adjustCiteStyle sty cs = do addPrefixToFirstItem :: (F Inlines) -> (F [Citation]) -> (F [Citation]) addPrefixToFirstItem aff cs = do cs' <- cs - aff' <- aff + aff' <- B.toList <$> aff case cs' of [] -> return [] (d:ds) -> return (d{ citationPrefix = - B.toList aff' <> citationPrefix d }:ds) + if null aff' + then citationPrefix d + else aff' ++ (Str "|" : citationPrefix d) }:ds) addSuffixToLastItem :: (F Inlines) -> (F [Citation]) -> (F [Citation]) addSuffixToLastItem aff cs = do cs' <- cs - aff' <- aff + aff' <- B.toList <$> aff case lastMay cs' of Nothing -> return cs' Just d -> return (init cs' ++ [d{ citationSuffix = - citationSuffix d <> B.toList aff' }]) + citationSuffix d <> + if null aff' + then [] + else Str "|" : aff' }]) citeItems :: PandocMonad m => OrgParser m (F [Citation]) citeItems = sequence <$> sepBy1' citeItem (char ';' <* void (many spaceChar)) |
