aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2022-10-02 10:42:58 -0700
committerJohn MacFarlane <[email protected]>2022-10-02 10:59:14 -0700
commit8b004e0c267a52cedc2567ad257344f2f2be29ee (patch)
tree7d88574556c421c4f143fb6c3e62bcf235804984
parent1c01d39a17aee2561ee4da4e70722532e3e4018e (diff)
Add T.P.Citeproc.Name.
This exports `toName`, which previously had been part of T.P.Citeproc.BibTeX. T.P.Readers.RIS,EndNote can now depend on this module without transitively depending on the LaTeX reader, which is used in T.P.Citeproc.BibTeX. Closes #8345.
-rw-r--r--pandoc.cabal1
-rw-r--r--src/Text/Pandoc/Citeproc/BibTeX.hs160
-rw-r--r--src/Text/Pandoc/Citeproc/Name.hs157
-rw-r--r--src/Text/Pandoc/Readers/EndNote.hs6
-rw-r--r--src/Text/Pandoc/Readers/RIS.hs6
5 files changed, 191 insertions, 139 deletions
diff --git a/pandoc.cabal b/pandoc.cabal
index 168ecb859..35a7e8681 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -724,6 +724,7 @@ library
Text.Pandoc.Slides,
Text.Pandoc.Image,
Text.Pandoc.Citeproc.BibTeX,
+ Text.Pandoc.Citeproc.Name,
Text.Pandoc.Citeproc.CslJson,
Text.Pandoc.Citeproc.Data,
Text.Pandoc.Citeproc.Locator,
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index 72f2362e6..679644b7f 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -11,7 +11,8 @@
-- License : BSD-style (see LICENSE)
--
-- Maintainer : John MacFarlane <[email protected]>
--- Stability : unstable-- Portability : unportable
+-- Stability : unstable
+-- Portability : portable
--
-----------------------------------------------------------------------------
@@ -19,7 +20,6 @@ module Text.Pandoc.Citeproc.BibTeX
( Variant(..)
, readBibtexString
, writeBibtexString
- , toName
)
where
@@ -35,8 +35,10 @@ import Text.Pandoc.Class (runPure)
import qualified Text.Pandoc.Walk as Walk
import Citeproc.Types
import Citeproc.Pandoc ()
+import Data.List.Split (splitOn)
import Text.Pandoc.Citeproc.Util (toIETF, splitStrWhen)
import Text.Pandoc.Citeproc.Data (biblatexStringMap)
+import Text.Pandoc.Citeproc.Name (toName, NameOpts(..), emptyName)
import Data.Default
import Data.Text (Text)
import qualified Data.Text as T
@@ -44,7 +46,6 @@ import qualified Data.Map as Map
import Data.Maybe
import Text.Pandoc.Parsing hiding ((<|>), many)
import Control.Applicative
-import Data.List.Split (splitOn, splitWhen, wordsBy)
import Control.Monad.RWS hiding ((<>))
import qualified Data.Sequence as Seq
import Data.Char (isAlphaNum, isDigit, isLetter,
@@ -406,9 +407,7 @@ itemToReference locale variant item = do
-- names
let getNameList' f = Just <$>
- getNameList (("bibtex", case variant of
- Bibtex -> "true"
- Biblatex -> "false") : opts) f
+ getNameList opts f
author' <- getNameList' "author" <|> return Nothing
containerAuthor' <- getNameList' "bookauthor" <|> return Nothing
@@ -795,14 +794,6 @@ parseLaTeX lang t =
latex :: Text -> Bib Inlines
latex = fmap blocksToInlines . latex' . T.strip
-type Options = [(Text, Text)]
-
-parseOptions :: Text -> Options
-parseOptions = map breakOpt . T.splitOn ","
- where breakOpt x = case T.break (=='=') x of
- (w,v) -> (T.toLower $ T.strip w,
- T.toLower $ T.strip $ T.drop 1 v)
-
bibEntries :: BibParser [Item]
bibEntries = do
skipMany nonEntry
@@ -1153,21 +1144,37 @@ concatWith sep = foldl' go mempty
B.space <> s
-getNameList :: Options -> Text -> Bib [Name]
+parseOptions :: Text -> [(Text, Text)]
+parseOptions = map breakOpt . T.splitOn ","
+ where breakOpt x = case T.break (=='=') x of
+ (w,v) -> (T.toLower $ T.strip w,
+ T.toLower $ T.strip $ T.drop 1 v)
+
+optionSet :: Text -> [(Text, Text)] -> Bool
+optionSet key opts = case lookup key opts of
+ Just "true" -> True
+ Just s -> s == mempty
+ _ -> False
+
+getNameList :: [(Text, Text)] -> Text -> Bib [Name]
getNameList opts f = do
fs <- asks fields
case Map.lookup f fs of
- Just x -> latexNames opts x
+ Just x -> latexNames nameopts x
Nothing -> notFound f
+ where
+ nameopts = NameOpts{
+ nameOptsPrefixIsNonDroppingParticle = optionSet "useprefix" opts,
+ nameOptsUseJuniorComma = optionSet "juniorcomma" opts}
-toNameList :: Options -> [Block] -> Bib [Name]
+toNameList :: NameOpts -> [Block] -> Bib [Name]
toNameList opts [Para xs] =
filter (/= emptyName) <$> mapM (toName opts . addSpaceAfterPeriod)
(splitByAnd xs)
toNameList opts [Plain xs] = toNameList opts [Para xs]
toNameList _ _ = mzero
-latexNames :: Options -> Text -> Bib [Name]
+latexNames :: NameOpts -> Text -> Bib [Name]
latexNames opts t = latex' (T.strip t) >>= toNameList opts
-- see issue 392 for motivation. We want to treat
@@ -1184,109 +1191,6 @@ addSpaceAfterPeriod = go . splitStrWhen (=='.')
= Str (T.singleton c):Str ".":Space:go (Str (T.singleton d):xs)
go (x:xs) = x:go xs
-emptyName :: Name
-emptyName =
- Name { nameFamily = Nothing
- , nameGiven = Nothing
- , nameDroppingParticle = Nothing
- , nameNonDroppingParticle = Nothing
- , nameSuffix = Nothing
- , nameLiteral = Nothing
- , nameCommaSuffix = False
- , nameStaticOrdering = False
- }
-
-toName :: MonadPlus m => Options -> [Inline] -> m Name
-toName _ [Str "others"] =
- return emptyName{ nameLiteral = Just "others" }
-toName _ [Span ("",[],[]) ils] = -- corporate author
- return emptyName{ nameLiteral = Just $ stringify ils }
- -- extended BibLaTeX name format - see #266
-toName _ ils@(Str ys:_) | T.any (== '=') ys = do
- let commaParts = splitWhen (== Str ",")
- . splitStrWhen (\c -> c == ',' || c == '=' || c == '\160')
- $ ils
- let addPart ag (Str "given" : Str "=" : xs) =
- ag{ nameGiven = case nameGiven ag of
- Nothing -> Just $ stringify xs
- Just t -> Just $ t <> " " <> stringify xs }
- addPart ag (Str "family" : Str "=" : xs) =
- ag{ nameFamily = Just $ stringify xs }
- addPart ag (Str "prefix" : Str "=" : xs) =
- ag{ nameDroppingParticle = Just $ stringify xs }
- addPart ag (Str "useprefix" : Str "=" : Str "true" : _) =
- ag{ nameNonDroppingParticle = nameDroppingParticle ag
- , nameDroppingParticle = Nothing }
- addPart ag (Str "suffix" : Str "=" : xs) =
- ag{ nameSuffix = Just $ stringify xs }
- addPart ag (Space : xs) = addPart ag xs
- addPart ag _ = ag
- return $ foldl' addPart emptyName commaParts
--- First von Last
--- von Last, First
--- von Last, Jr ,First
--- NOTE: biblatex and bibtex differ on:
--- Drummond de Andrade, Carlos
--- bibtex takes "Drummond de" as the von;
--- biblatex takes the whole as a last name.
--- See https://github.com/plk/biblatex/issues/236
--- Here we implement the more sensible biblatex behavior.
-toName opts ils = do
- let useprefix = optionSet "useprefix" opts
- let usecomma = optionSet "juniorcomma" opts
- let bibtex = optionSet "bibtex" opts
- let words' = wordsBy (\x -> x == Space || x == Str "\160")
- let commaParts = map words' $ splitWhen (== Str ",")
- $ splitStrWhen
- (\c -> c == ',' || c == '\160') ils
- let (first, vonlast, jr) =
- case commaParts of
- --- First is the longest sequence of white-space separated
- -- words starting with an uppercase and that is not the
- -- whole string. von is the longest sequence of whitespace
- -- separated words whose last word starts with lower case
- -- and that is not the whole string.
- [fvl] -> let (caps', rest') = span isCapitalized fvl
- in if null rest' && not (null caps')
- then (init caps', [last caps'], [])
- else (caps', rest', [])
- [vl,f] -> (f, vl, [])
- (vl:j:f:_) -> (f, vl, j )
- [] -> ([], [], [])
-
- let (von, lastname) =
- if bibtex
- then case span isCapitalized $ reverse vonlast of
- ([],w:ws) -> (reverse ws, [w])
- (vs, ws) -> (reverse ws, reverse vs)
- else case break isCapitalized vonlast of
- (vs@(_:_), []) -> (init vs, [last vs])
- (vs, ws) -> (vs, ws)
- let prefix = T.unwords $ map stringify von
- let family = T.unwords $ map stringify lastname
- let suffix = T.unwords $ map stringify jr
- let given = T.unwords $ map stringify first
- return
- Name { nameFamily = if T.null family
- then Nothing
- else Just family
- , nameGiven = if T.null given
- then Nothing
- else Just given
- , nameDroppingParticle = if useprefix || T.null prefix
- then Nothing
- else Just prefix
- , nameNonDroppingParticle = if useprefix && not (T.null prefix)
- then Just prefix
- else Nothing
- , nameSuffix = if T.null suffix
- then Nothing
- else Just suffix
- , nameLiteral = Nothing
- , nameCommaSuffix = usecomma
- , nameStaticOrdering = False
- }
-
ordinalize :: Locale -> Text -> Text
ordinalize locale n =
let terms = localeTerms locale
@@ -1300,20 +1204,6 @@ ordinalize locale n =
Just [] -> n
Just (t:_) -> n <> snd t
-isCapitalized :: [Inline] -> Bool
-isCapitalized (Str (T.uncons -> Just (c,cs)) : rest)
- | isUpper c = True
- | isDigit c = isCapitalized (Str cs : rest)
- | otherwise = False
-isCapitalized (_:rest) = isCapitalized rest
-isCapitalized [] = True
-
-optionSet :: Text -> Options -> Bool
-optionSet key opts = case lookup key opts of
- Just "true" -> True
- Just s -> s == mempty
- _ -> False
-
getTypeAndGenre :: Bib (Text, Maybe Text)
getTypeAndGenre = do
lang <- gets localeLang
diff --git a/src/Text/Pandoc/Citeproc/Name.hs b/src/Text/Pandoc/Citeproc/Name.hs
new file mode 100644
index 000000000..e39cffd28
--- /dev/null
+++ b/src/Text/Pandoc/Citeproc/Name.hs
@@ -0,0 +1,157 @@
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# OPTIONS_GHC -fno-warn-unused-do-bind #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module : Text.CSL.Input.Name
+-- Copyright : (c) John MacFarlane
+-- License : BSD-style (see LICENSE)
+--
+-- Maintainer : John MacFarlane <[email protected]>
+-- Stability : unstable
+-- Portability : portable
+--
+-----------------------------------------------------------------------------
+
+module Text.Pandoc.Citeproc.Name
+ ( toName
+ , NameOpts(..)
+ , emptyName
+ )
+ where
+
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared (stringify)
+import Citeproc.Types
+import Citeproc.Pandoc ()
+import Text.Pandoc.Citeproc.Util (splitStrWhen)
+import qualified Data.Text as T
+import Data.List.Split (splitWhen, wordsBy)
+import Control.Monad.RWS hiding ((<>))
+import Data.Char (isUpper, isDigit)
+import Data.List (foldl')
+
+emptyName :: Name
+emptyName =
+ Name { nameFamily = Nothing
+ , nameGiven = Nothing
+ , nameDroppingParticle = Nothing
+ , nameNonDroppingParticle = Nothing
+ , nameSuffix = Nothing
+ , nameLiteral = Nothing
+ , nameCommaSuffix = False
+ , nameStaticOrdering = False
+ }
+
+-- | Options for 'toName'.
+data NameOpts =
+ NameOpts
+ { nameOptsPrefixIsNonDroppingParticle :: Bool
+ -- ^ Treat a prefix on the last name as a non-dropping particle
+ -- (default is to treat it as a dropping particle). This corresponds
+ -- to the biblatex option @useprefix@.
+ , nameOptsUseJuniorComma :: Bool
+ -- ^ Put a comma before a suffix like "Jr." This corresponds to the
+ -- biblatex option @juniorcomma@.
+ } deriving (Show)
+
+-- | Parse a list of 'Inline's into a citeproc 'Name', identifying
+-- first and last name, particles, suffixes.
+toName :: MonadPlus m => NameOpts -> [Inline] -> m Name
+toName _ [Str "others"] =
+ return emptyName{ nameLiteral = Just "others" }
+toName _ [Span ("",[],[]) ils] = -- corporate author
+ return emptyName{ nameLiteral = Just $ stringify ils }
+-- extended BibLaTeX name format - see #266
+toName _ ils@(Str ys:_) | T.any (== '=') ys = do
+ let commaParts = splitWhen (== Str ",")
+ . splitStrWhen (\c -> c == ',' || c == '=' || c == '\160')
+ $ ils
+ let addPart ag (Str "given" : Str "=" : xs) =
+ ag{ nameGiven = case nameGiven ag of
+ Nothing -> Just $ stringify xs
+ Just t -> Just $ t <> " " <> stringify xs }
+ addPart ag (Str "family" : Str "=" : xs) =
+ ag{ nameFamily = Just $ stringify xs }
+ addPart ag (Str "prefix" : Str "=" : xs) =
+ ag{ nameDroppingParticle = Just $ stringify xs }
+ addPart ag (Str "useprefix" : Str "=" : Str "true" : _) =
+ ag{ nameNonDroppingParticle = nameDroppingParticle ag
+ , nameDroppingParticle = Nothing }
+ addPart ag (Str "suffix" : Str "=" : xs) =
+ ag{ nameSuffix = Just $ stringify xs }
+ addPart ag (Space : xs) = addPart ag xs
+ addPart ag _ = ag
+ return $ foldl' addPart emptyName commaParts
+-- First von Last
+-- von Last, First
+-- von Last, Jr ,First
+-- NOTE: biblatex and bibtex differ on:
+-- Drummond de Andrade, Carlos
+-- bibtex takes "Drummond de" as the von;
+-- biblatex takes the whole as a last name.
+-- See https://github.com/plk/biblatex/issues/236
+-- Here we implement the more sensible biblatex behavior.
+toName opts ils = do
+ let words' = wordsBy (\x -> x == Space || x == Str "\160")
+ let commaParts = map words' $ splitWhen (== Str ",")
+ $ splitStrWhen
+ (\c -> c == ',' || c == '\160') ils
+ let (first, vonlast, jr) =
+ case commaParts of
+ --- First is the longest sequence of white-space separated
+ -- words starting with an uppercase and that is not the
+ -- whole string. von is the longest sequence of whitespace
+ -- separated words whose last word starts with lower case
+ -- and that is not the whole string.
+ [fvl] -> let (caps', rest') = span isCapitalized fvl
+ in if null rest' && not (null caps')
+ then (init caps', [last caps'], [])
+ else (caps', rest', [])
+ [vl,f] -> (f, vl, [])
+ (vl:j:f:_) -> (f, vl, j )
+ [] -> ([], [], [])
+
+ let (von, lastname) =
+ case break isCapitalized vonlast of
+ (vs@(_:_), []) -> (init vs, [last vs])
+ (vs, ws) -> (vs, ws)
+ let prefix = T.unwords $ map stringify von
+ let family = T.unwords $ map stringify lastname
+ let suffix = T.unwords $ map stringify jr
+ let given = T.unwords $ map stringify first
+ return
+ Name { nameFamily = if T.null family
+ then Nothing
+ else Just family
+ , nameGiven = if T.null given
+ then Nothing
+ else Just given
+ , nameDroppingParticle = if nameOptsPrefixIsNonDroppingParticle opts ||
+ T.null prefix
+ then Nothing
+ else Just prefix
+ , nameNonDroppingParticle = if nameOptsPrefixIsNonDroppingParticle opts &&
+ not (T.null prefix)
+ then Just prefix
+ else Nothing
+ , nameSuffix = if T.null suffix
+ then Nothing
+ else Just suffix
+ , nameLiteral = Nothing
+ , nameCommaSuffix = nameOptsUseJuniorComma opts
+ , nameStaticOrdering = False
+ }
+
+isCapitalized :: [Inline] -> Bool
+isCapitalized (Str (T.uncons -> Just (c,cs)) : rest)
+ | isUpper c = True
+ | isDigit c = isCapitalized (Str cs : rest)
+ | otherwise = False
+isCapitalized (_:rest) = isCapitalized rest
+isCapitalized [] = True
+
+
diff --git a/src/Text/Pandoc/Readers/EndNote.hs b/src/Text/Pandoc/Readers/EndNote.hs
index 586dcf451..cdfbe6a4e 100644
--- a/src/Text/Pandoc/Readers/EndNote.hs
+++ b/src/Text/Pandoc/Readers/EndNote.hs
@@ -30,7 +30,7 @@ import Text.Pandoc.Error (PandocError(..))
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
import Text.Pandoc.Sources (Sources(..), ToSources(..), sourcesToText)
-import Text.Pandoc.Citeproc.BibTeX (toName)
+import Text.Pandoc.Citeproc.Name (toName, NameOpts(..))
import Control.Applicative ((<|>))
import Control.Monad.Except (throwError)
import Control.Monad (mzero, unless)
@@ -140,7 +140,9 @@ recordToReference e =
filterChildrenName (name "contributors") e >>=
filterChildrenName (name "authors") >>=
filterChildrenName (name "author") >>=
- toName [] . B.toList . B.text . T.strip . getText
+ toName NameOpts{ nameOptsPrefixIsNonDroppingParticle = False
+ , nameOptsUseJuniorComma = False }
+ . B.toList . B.text . T.strip . getText
titles = do
x <- filterChildrenName (name "titles") e
(key, name') <- [("title", "title"),
diff --git a/src/Text/Pandoc/Readers/RIS.hs b/src/Text/Pandoc/Readers/RIS.hs
index f73df8135..d4d471050 100644
--- a/src/Text/Pandoc/Readers/RIS.hs
+++ b/src/Text/Pandoc/Readers/RIS.hs
@@ -29,7 +29,7 @@ import Citeproc (Reference(..), ItemId(..), Val(..), Date(..), DateParts(..),
import Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
-import Text.Pandoc.Citeproc.BibTeX (toName)
+import Text.Pandoc.Citeproc.Name (toName, NameOpts(..))
import Control.Monad.Except (throwError)
import qualified Data.Text as T
import Data.Text (Text)
@@ -140,7 +140,9 @@ risRecordToReference keys = addId $ foldr go defref keys
M.insert (toVariable k) (FancyVal v)
(referenceVariables r) }
addName k v r =
- let new = toName [] . B.toList . B.text $ v
+ let new = toName NameOpts{ nameOptsPrefixIsNonDroppingParticle = False
+ , nameOptsUseJuniorComma = False }
+ . B.toList . B.text $ v
f Nothing = Just (NamesVal new)
f (Just (NamesVal ns)) = Just (NamesVal (new ++ ns))
f (Just x) = Just x