Add T.P.Citeproc.Name.

This exports `toName`, which previously had been part of T.P.Citeproc.BibTeX. T.P.Readers.RIS,EndNote can now depend on this module without transitively depending on the LaTeX reader, which is used in T.P.Citeproc.BibTeX. Closes #8345.
author: John MacFarlane <[email protected]> 2022-10-02 10:42:58 -0700
committer: John MacFarlane <[email protected]> 2022-10-02 10:59:14 -0700
commit: 8b004e0c267a52cedc2567ad257344f2f2be29ee (patch)
tree: 7d88574556c421c4f143fb6c3e62bcf235804984
parent: 1c01d39a17aee2561ee4da4e70722532e3e4018e (diff)
5 files changed, 191 insertions, 139 deletions
diff --git a/pandoc.cabal b/pandoc.cabal
index 168ecb859..35a7e8681 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -724,6 +724,7 @@ library
                    Text.Pandoc.Slides,
                    Text.Pandoc.Image,
                    Text.Pandoc.Citeproc.BibTeX,
+                   Text.Pandoc.Citeproc.Name,
                    Text.Pandoc.Citeproc.CslJson,
                    Text.Pandoc.Citeproc.Data,
                    Text.Pandoc.Citeproc.Locator,
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index 72f2362e6..679644b7f 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -11,7 +11,8 @@
 -- License     :  BSD-style (see LICENSE)
 --
 -- Maintainer  :  John MacFarlane <[email protected]>
--- Stability   :  unstable-- Portability :  unportable
+-- Stability   :  unstable
+-- Portability :  portable
 --
 -----------------------------------------------------------------------------
 
@@ -19,7 +20,6 @@ module Text.Pandoc.Citeproc.BibTeX
     ( Variant(..)
     , readBibtexString
     , writeBibtexString
-    , toName
     )
     where
 
@@ -35,8 +35,10 @@ import Text.Pandoc.Class (runPure)
 import qualified Text.Pandoc.Walk       as Walk
 import Citeproc.Types
 import Citeproc.Pandoc ()
+import Data.List.Split (splitOn)
 import Text.Pandoc.Citeproc.Util (toIETF, splitStrWhen)
 import Text.Pandoc.Citeproc.Data (biblatexStringMap)
+import Text.Pandoc.Citeproc.Name (toName, NameOpts(..), emptyName)
 import Data.Default
 import           Data.Text              (Text)
 import qualified Data.Text              as T
@@ -44,7 +46,6 @@ import qualified Data.Map               as Map
 import           Data.Maybe
 import           Text.Pandoc.Parsing hiding ((<|>), many)
 import           Control.Applicative
-import           Data.List.Split        (splitOn, splitWhen, wordsBy)
 import           Control.Monad.RWS      hiding ((<>))
 import qualified Data.Sequence          as Seq
 import           Data.Char              (isAlphaNum, isDigit, isLetter,
@@ -406,9 +407,7 @@ itemToReference locale variant item = do
 
     -- names
     let getNameList' f = Just <$>
-         getNameList (("bibtex", case variant of
-                                      Bibtex   -> "true"
-                                      Biblatex -> "false") : opts) f
+         getNameList opts f
 
     author' <- getNameList' "author" <|> return Nothing
     containerAuthor' <- getNameList' "bookauthor" <|> return Nothing
@@ -795,14 +794,6 @@ parseLaTeX lang t =
 latex :: Text -> Bib Inlines
 latex = fmap blocksToInlines . latex' . T.strip
 
-type Options = [(Text, Text)]
-
-parseOptions :: Text -> Options
-parseOptions = map breakOpt . T.splitOn ","
-  where breakOpt x = case T.break (=='=') x of
-                          (w,v) -> (T.toLower $ T.strip w,
-                                    T.toLower $ T.strip $ T.drop 1 v)
-
 bibEntries :: BibParser [Item]
 bibEntries = do
   skipMany nonEntry
@@ -1153,21 +1144,37 @@ concatWith sep = foldl' go mempty
                                                 B.space <> s
 
 
-getNameList :: Options -> Text -> Bib [Name]
+parseOptions :: Text -> [(Text, Text)]
+parseOptions = map breakOpt . T.splitOn ","
+  where breakOpt x = case T.break (=='=') x of
+                          (w,v) -> (T.toLower $ T.strip w,
+                                    T.toLower $ T.strip $ T.drop 1 v)
+
+optionSet :: Text -> [(Text, Text)] -> Bool
+optionSet key opts = case lookup key opts of
+                      Just "true" -> True
+                      Just s      -> s == mempty
+                      _           -> False
+
+getNameList :: [(Text, Text)] -> Text -> Bib [Name]
 getNameList opts  f = do
   fs <- asks fields
   case Map.lookup f fs of
-       Just x  -> latexNames opts x
+       Just x  -> latexNames nameopts x
        Nothing -> notFound f
+ where
+  nameopts = NameOpts{
+                 nameOptsPrefixIsNonDroppingParticle = optionSet "useprefix" opts,
+                 nameOptsUseJuniorComma = optionSet "juniorcomma" opts}
 
-toNameList :: Options -> [Block] -> Bib [Name]
+toNameList :: NameOpts -> [Block] -> Bib [Name]
 toNameList opts [Para xs] =
   filter (/= emptyName) <$> mapM (toName opts . addSpaceAfterPeriod)
                                     (splitByAnd xs)
 toNameList opts [Plain xs] = toNameList opts [Para xs]
 toNameList _ _ = mzero
 
-latexNames :: Options -> Text -> Bib [Name]
+latexNames :: NameOpts -> Text -> Bib [Name]
 latexNames opts t = latex' (T.strip t) >>= toNameList opts
 
 -- see issue 392 for motivation.  We want to treat
@@ -1184,109 +1191,6 @@ addSpaceAfterPeriod = go . splitStrWhen (=='.')
         = Str (T.singleton c):Str ".":Space:go (Str (T.singleton d):xs)
     go (x:xs) = x:go xs
 
-emptyName :: Name
-emptyName =
-    Name {  nameFamily              = Nothing
-          , nameGiven               = Nothing
-          , nameDroppingParticle    = Nothing
-          , nameNonDroppingParticle = Nothing
-          , nameSuffix              = Nothing
-          , nameLiteral             = Nothing
-          , nameCommaSuffix         = False
-          , nameStaticOrdering      = False
-          }
-
-toName :: MonadPlus m => Options -> [Inline] -> m Name
-toName _ [Str "others"] =
-  return emptyName{ nameLiteral = Just "others" }
-toName _ [Span ("",[],[]) ils] = -- corporate author
-  return emptyName{ nameLiteral = Just $ stringify ils }
- -- extended BibLaTeX name format - see #266
-toName _ ils@(Str ys:_) | T.any (== '=') ys = do
-  let commaParts = splitWhen (== Str ",")
-                   . splitStrWhen (\c -> c == ',' || c == '=' || c == '\160')
-                   $ ils
-  let addPart ag (Str "given" : Str "=" : xs) =
-        ag{ nameGiven = case nameGiven ag of
-                          Nothing -> Just $ stringify xs
-                          Just t  -> Just $ t <> " " <> stringify xs }
-      addPart ag (Str "family" : Str "=" : xs) =
-        ag{ nameFamily = Just $ stringify xs }
-      addPart ag (Str "prefix" : Str "=" : xs) =
-        ag{ nameDroppingParticle =  Just $ stringify xs }
-      addPart ag (Str "useprefix" : Str "=" : Str "true" : _) =
-        ag{ nameNonDroppingParticle = nameDroppingParticle ag
-          , nameDroppingParticle    = Nothing }
-      addPart ag (Str "suffix" : Str "=" : xs) =
-        ag{ nameSuffix = Just $ stringify xs }
-      addPart ag (Space : xs) = addPart ag xs
-      addPart ag _ = ag
-  return $ foldl' addPart emptyName commaParts
--- First von Last
--- von Last, First
--- von Last, Jr ,First
--- NOTE: biblatex and bibtex differ on:
--- Drummond de Andrade, Carlos
--- bibtex takes "Drummond de" as the von;
--- biblatex takes the whole as a last name.
--- See https://github.com/plk/biblatex/issues/236
--- Here we implement the more sensible biblatex behavior.
-toName opts ils = do
-  let useprefix = optionSet "useprefix" opts
-  let usecomma  = optionSet "juniorcomma" opts
-  let bibtex    = optionSet "bibtex" opts
-  let words' = wordsBy (\x -> x == Space || x == Str "\160")
-  let commaParts = map words' $ splitWhen (== Str ",")
-                              $ splitStrWhen
-                                   (\c -> c == ',' || c == '\160') ils
-  let (first, vonlast, jr) =
-          case commaParts of
-               --- First is the longest sequence of white-space separated
-               -- words starting with an uppercase and that is not the
-               -- whole string. von is the longest sequence of whitespace
-               -- separated words whose last word starts with lower case
-               -- and that is not the whole string.
-               [fvl]      -> let (caps', rest') = span isCapitalized fvl
-                             in  if null rest' && not (null caps')
-                                 then (init caps', [last caps'], [])
-                                 else (caps', rest', [])
-               [vl,f]     -> (f, vl, [])
-               (vl:j:f:_) -> (f, vl, j )
-               []         -> ([], [], [])
-
-  let (von, lastname) =
-         if bibtex
-            then case span isCapitalized $ reverse vonlast of
-                        ([],w:ws) -> (reverse ws, [w])
-                        (vs, ws)    -> (reverse ws, reverse vs)
-            else case break isCapitalized vonlast of
-                        (vs@(_:_), []) -> (init vs, [last vs])
-                        (vs, ws)       -> (vs, ws)
-  let prefix = T.unwords $ map stringify von
-  let family = T.unwords $ map stringify lastname
-  let suffix = T.unwords $ map stringify jr
-  let given = T.unwords $ map stringify first
-  return
-    Name {  nameFamily              = if T.null family
-                                         then Nothing
-                                         else Just family
-          , nameGiven               = if T.null given
-                                         then Nothing
-                                         else Just given
-          , nameDroppingParticle    = if useprefix || T.null prefix
-                                         then Nothing
-                                         else Just prefix
-          , nameNonDroppingParticle = if useprefix && not (T.null prefix)
-                                         then Just prefix
-                                         else Nothing
-          , nameSuffix              = if T.null suffix
-                                         then Nothing
-                                         else Just suffix
-          , nameLiteral             = Nothing
-          , nameCommaSuffix         = usecomma
-          , nameStaticOrdering      = False
-          }
-
 ordinalize :: Locale -> Text -> Text
 ordinalize locale n =
   let terms = localeTerms locale
@@ -1300,20 +1204,6 @@ ordinalize locale n =
         Just []    -> n
         Just (t:_) -> n <> snd t
 
-isCapitalized :: [Inline] -> Bool
-isCapitalized (Str (T.uncons -> Just (c,cs)) : rest)
-  | isUpper c = True
-  | isDigit c = isCapitalized (Str cs : rest)
-  | otherwise = False
-isCapitalized (_:rest) = isCapitalized rest
-isCapitalized [] = True
-
-optionSet :: Text -> Options -> Bool
-optionSet key opts = case lookup key opts of
-                      Just "true" -> True
-                      Just s      -> s == mempty
-                      _           -> False
-
 getTypeAndGenre :: Bib (Text, Maybe Text)
 getTypeAndGenre = do
   lang <- gets localeLang
diff --git a/src/Text/Pandoc/Citeproc/Name.hs b/src/Text/Pandoc/Citeproc/Name.hs
new file mode 100644
index 000000000..e39cffd28
--- /dev/null
+++ b/src/Text/Pandoc/Citeproc/Name.hs
@@ -0,0 +1,157 @@
+{-# LANGUAGE ViewPatterns      #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# OPTIONS_GHC -fno-warn-unused-do-bind #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Text.CSL.Input.Name
+-- Copyright   :  (c) John MacFarlane
+-- License     :  BSD-style (see LICENSE)
+--
+-- Maintainer  :  John MacFarlane <[email protected]>
+-- Stability   :  unstable
+-- Portability :  portable
+--
+-----------------------------------------------------------------------------
+
+module Text.Pandoc.Citeproc.Name
+    ( toName
+    , NameOpts(..)
+    , emptyName
+    )
+    where
+
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared (stringify)
+import Citeproc.Types
+import Citeproc.Pandoc ()
+import Text.Pandoc.Citeproc.Util (splitStrWhen)
+import qualified Data.Text              as T
+import           Data.List.Split        (splitWhen, wordsBy)
+import           Control.Monad.RWS      hiding ((<>))
+import Data.Char (isUpper, isDigit)
+import Data.List (foldl')
+
+emptyName :: Name
+emptyName =
+    Name {  nameFamily              = Nothing
+          , nameGiven               = Nothing
+          , nameDroppingParticle    = Nothing
+          , nameNonDroppingParticle = Nothing
+          , nameSuffix              = Nothing
+          , nameLiteral             = Nothing
+          , nameCommaSuffix         = False
+          , nameStaticOrdering      = False
+          }
+
+-- | Options for 'toName'.
+data NameOpts =
+  NameOpts
+    { nameOptsPrefixIsNonDroppingParticle :: Bool
+        -- ^ Treat a prefix on the last name as a non-dropping particle
+        -- (default is to treat it as a dropping particle). This corresponds
+        -- to the biblatex option @useprefix@.
+    , nameOptsUseJuniorComma              :: Bool
+        -- ^ Put a comma before a suffix like "Jr." This corresponds to the
+        -- biblatex option @juniorcomma@.
+    } deriving (Show)
+
+-- | Parse a list of 'Inline's into a citeproc 'Name', identifying
+-- first and last name, particles, suffixes.
+toName :: MonadPlus m => NameOpts -> [Inline] -> m Name
+toName _ [Str "others"] =
+  return emptyName{ nameLiteral = Just "others" }
+toName _ [Span ("",[],[]) ils] = -- corporate author
+  return emptyName{ nameLiteral = Just $ stringify ils }
+-- extended BibLaTeX name format - see #266
+toName _ ils@(Str ys:_) | T.any (== '=') ys = do
+  let commaParts = splitWhen (== Str ",")
+                   . splitStrWhen (\c -> c == ',' || c == '=' || c == '\160')
+                   $ ils
+  let addPart ag (Str "given" : Str "=" : xs) =
+        ag{ nameGiven = case nameGiven ag of
+                          Nothing -> Just $ stringify xs
+                          Just t  -> Just $ t <> " " <> stringify xs }
+      addPart ag (Str "family" : Str "=" : xs) =
+        ag{ nameFamily = Just $ stringify xs }
+      addPart ag (Str "prefix" : Str "=" : xs) =
+        ag{ nameDroppingParticle =  Just $ stringify xs }
+      addPart ag (Str "useprefix" : Str "=" : Str "true" : _) =
+        ag{ nameNonDroppingParticle = nameDroppingParticle ag
+          , nameDroppingParticle    = Nothing }
+      addPart ag (Str "suffix" : Str "=" : xs) =
+        ag{ nameSuffix = Just $ stringify xs }
+      addPart ag (Space : xs) = addPart ag xs
+      addPart ag _ = ag
+  return $ foldl' addPart emptyName commaParts
+-- First von Last
+-- von Last, First
+-- von Last, Jr ,First
+-- NOTE: biblatex and bibtex differ on:
+-- Drummond de Andrade, Carlos
+-- bibtex takes "Drummond de" as the von;
+-- biblatex takes the whole as a last name.
+-- See https://github.com/plk/biblatex/issues/236
+-- Here we implement the more sensible biblatex behavior.
+toName opts ils = do
+  let words' = wordsBy (\x -> x == Space || x == Str "\160")
+  let commaParts = map words' $ splitWhen (== Str ",")
+                              $ splitStrWhen
+                                   (\c -> c == ',' || c == '\160') ils
+  let (first, vonlast, jr) =
+          case commaParts of
+               --- First is the longest sequence of white-space separated
+               -- words starting with an uppercase and that is not the
+               -- whole string. von is the longest sequence of whitespace
+               -- separated words whose last word starts with lower case
+               -- and that is not the whole string.
+               [fvl]      -> let (caps', rest') = span isCapitalized fvl
+                             in  if null rest' && not (null caps')
+                                 then (init caps', [last caps'], [])
+                                 else (caps', rest', [])
+               [vl,f]     -> (f, vl, [])
+               (vl:j:f:_) -> (f, vl, j )
+               []         -> ([], [], [])
+
+  let (von, lastname) =
+                 case break isCapitalized vonlast of
+                        (vs@(_:_), []) -> (init vs, [last vs])
+                        (vs, ws)       -> (vs, ws)
+  let prefix = T.unwords $ map stringify von
+  let family = T.unwords $ map stringify lastname
+  let suffix = T.unwords $ map stringify jr
+  let given = T.unwords $ map stringify first
+  return
+    Name {  nameFamily              = if T.null family
+                                         then Nothing
+                                         else Just family
+          , nameGiven               = if T.null given
+                                         then Nothing
+                                         else Just given
+          , nameDroppingParticle    = if nameOptsPrefixIsNonDroppingParticle opts ||
+                                          T.null prefix
+                                         then Nothing
+                                         else Just prefix
+          , nameNonDroppingParticle = if nameOptsPrefixIsNonDroppingParticle opts &&
+                                          not (T.null prefix)
+                                         then Just prefix
+                                         else Nothing
+          , nameSuffix              = if T.null suffix
+                                         then Nothing
+                                         else Just suffix
+          , nameLiteral             = Nothing
+          , nameCommaSuffix         = nameOptsUseJuniorComma opts
+          , nameStaticOrdering      = False
+          }
+
+isCapitalized :: [Inline] -> Bool
+isCapitalized (Str (T.uncons -> Just (c,cs)) : rest)
+  | isUpper c = True
+  | isDigit c = isCapitalized (Str cs : rest)
+  | otherwise = False
+isCapitalized (_:rest) = isCapitalized rest
+isCapitalized [] = True
+
+
diff --git a/src/Text/Pandoc/Readers/EndNote.hs b/src/Text/Pandoc/Readers/EndNote.hs
index 586dcf451..cdfbe6a4e 100644
--- a/src/Text/Pandoc/Readers/EndNote.hs
+++ b/src/Text/Pandoc/Readers/EndNote.hs
@@ -30,7 +30,7 @@ import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Class (PandocMonad)
 import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
 import Text.Pandoc.Sources (Sources(..), ToSources(..), sourcesToText)
-import Text.Pandoc.Citeproc.BibTeX (toName)
+import Text.Pandoc.Citeproc.Name (toName, NameOpts(..))
 import Control.Applicative ((<|>))
 import Control.Monad.Except (throwError)
 import Control.Monad (mzero, unless)
@@ -140,7 +140,9 @@ recordToReference e =
      filterChildrenName (name "contributors") e >>=
      filterChildrenName (name "authors") >>=
      filterChildrenName (name "author") >>=
-     toName [] . B.toList .  B.text . T.strip . getText
+          toName NameOpts{ nameOptsPrefixIsNonDroppingParticle = False
+                         , nameOptsUseJuniorComma = False }
+             . B.toList .  B.text . T.strip . getText
    titles = do
      x <- filterChildrenName (name "titles") e
      (key, name') <- [("title", "title"),
diff --git a/src/Text/Pandoc/Readers/RIS.hs b/src/Text/Pandoc/Readers/RIS.hs
index f73df8135..d4d471050 100644
--- a/src/Text/Pandoc/Readers/RIS.hs
+++ b/src/Text/Pandoc/Readers/RIS.hs
@@ -29,7 +29,7 @@ import Citeproc (Reference(..), ItemId(..), Val(..), Date(..), DateParts(..),
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Class (PandocMonad)
 import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
-import Text.Pandoc.Citeproc.BibTeX (toName)
+import Text.Pandoc.Citeproc.Name (toName, NameOpts(..))
 import Control.Monad.Except (throwError)
 import qualified Data.Text as T
 import Data.Text (Text)
@@ -140,7 +140,9 @@ risRecordToReference keys = addId $ foldr go defref keys
                        M.insert (toVariable k) (FancyVal v)
                        (referenceVariables r) }
    addName k v r =
-     let new = toName [] . B.toList .  B.text $ v
+     let new = toName NameOpts{ nameOptsPrefixIsNonDroppingParticle = False
+                              , nameOptsUseJuniorComma = False }
+                . B.toList .  B.text $ v
          f Nothing   = Just (NamesVal new)
          f (Just (NamesVal ns)) = Just (NamesVal (new ++ ns))
          f (Just x) = Just x
author	John MacFarlane <[email protected]>	2022-10-02 10:42:58 -0700
committer	John MacFarlane <[email protected]>	2022-10-02 10:59:14 -0700
commit	8b004e0c267a52cedc2567ad257344f2f2be29ee (patch)
tree	7d88574556c421c4f143fb6c3e62bcf235804984
parent	1c01d39a17aee2561ee4da4e70722532e3e4018e (diff)