diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/Text/Pandoc/Citeproc/BibTeX.hs | 160 | ||||
| -rw-r--r-- | src/Text/Pandoc/Citeproc/Name.hs | 157 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/EndNote.hs | 6 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/RIS.hs | 6 |
4 files changed, 190 insertions, 139 deletions
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs index 72f2362e6..679644b7f 100644 --- a/src/Text/Pandoc/Citeproc/BibTeX.hs +++ b/src/Text/Pandoc/Citeproc/BibTeX.hs @@ -11,7 +11,8 @@ -- License : BSD-style (see LICENSE) -- -- Maintainer : John MacFarlane <[email protected]> --- Stability : unstable-- Portability : unportable +-- Stability : unstable +-- Portability : portable -- ----------------------------------------------------------------------------- @@ -19,7 +20,6 @@ module Text.Pandoc.Citeproc.BibTeX ( Variant(..) , readBibtexString , writeBibtexString - , toName ) where @@ -35,8 +35,10 @@ import Text.Pandoc.Class (runPure) import qualified Text.Pandoc.Walk as Walk import Citeproc.Types import Citeproc.Pandoc () +import Data.List.Split (splitOn) import Text.Pandoc.Citeproc.Util (toIETF, splitStrWhen) import Text.Pandoc.Citeproc.Data (biblatexStringMap) +import Text.Pandoc.Citeproc.Name (toName, NameOpts(..), emptyName) import Data.Default import Data.Text (Text) import qualified Data.Text as T @@ -44,7 +46,6 @@ import qualified Data.Map as Map import Data.Maybe import Text.Pandoc.Parsing hiding ((<|>), many) import Control.Applicative -import Data.List.Split (splitOn, splitWhen, wordsBy) import Control.Monad.RWS hiding ((<>)) import qualified Data.Sequence as Seq import Data.Char (isAlphaNum, isDigit, isLetter, @@ -406,9 +407,7 @@ itemToReference locale variant item = do -- names let getNameList' f = Just <$> - getNameList (("bibtex", case variant of - Bibtex -> "true" - Biblatex -> "false") : opts) f + getNameList opts f author' <- getNameList' "author" <|> return Nothing containerAuthor' <- getNameList' "bookauthor" <|> return Nothing @@ -795,14 +794,6 @@ parseLaTeX lang t = latex :: Text -> Bib Inlines latex = fmap blocksToInlines . latex' . T.strip -type Options = [(Text, Text)] - -parseOptions :: Text -> Options -parseOptions = map breakOpt . T.splitOn "," - where breakOpt x = case T.break (=='=') x of - (w,v) -> (T.toLower $ T.strip w, - T.toLower $ T.strip $ T.drop 1 v) - bibEntries :: BibParser [Item] bibEntries = do skipMany nonEntry @@ -1153,21 +1144,37 @@ concatWith sep = foldl' go mempty B.space <> s -getNameList :: Options -> Text -> Bib [Name] +parseOptions :: Text -> [(Text, Text)] +parseOptions = map breakOpt . T.splitOn "," + where breakOpt x = case T.break (=='=') x of + (w,v) -> (T.toLower $ T.strip w, + T.toLower $ T.strip $ T.drop 1 v) + +optionSet :: Text -> [(Text, Text)] -> Bool +optionSet key opts = case lookup key opts of + Just "true" -> True + Just s -> s == mempty + _ -> False + +getNameList :: [(Text, Text)] -> Text -> Bib [Name] getNameList opts f = do fs <- asks fields case Map.lookup f fs of - Just x -> latexNames opts x + Just x -> latexNames nameopts x Nothing -> notFound f + where + nameopts = NameOpts{ + nameOptsPrefixIsNonDroppingParticle = optionSet "useprefix" opts, + nameOptsUseJuniorComma = optionSet "juniorcomma" opts} -toNameList :: Options -> [Block] -> Bib [Name] +toNameList :: NameOpts -> [Block] -> Bib [Name] toNameList opts [Para xs] = filter (/= emptyName) <$> mapM (toName opts . addSpaceAfterPeriod) (splitByAnd xs) toNameList opts [Plain xs] = toNameList opts [Para xs] toNameList _ _ = mzero -latexNames :: Options -> Text -> Bib [Name] +latexNames :: NameOpts -> Text -> Bib [Name] latexNames opts t = latex' (T.strip t) >>= toNameList opts -- see issue 392 for motivation. We want to treat @@ -1184,109 +1191,6 @@ addSpaceAfterPeriod = go . splitStrWhen (=='.') = Str (T.singleton c):Str ".":Space:go (Str (T.singleton d):xs) go (x:xs) = x:go xs -emptyName :: Name -emptyName = - Name { nameFamily = Nothing - , nameGiven = Nothing - , nameDroppingParticle = Nothing - , nameNonDroppingParticle = Nothing - , nameSuffix = Nothing - , nameLiteral = Nothing - , nameCommaSuffix = False - , nameStaticOrdering = False - } - -toName :: MonadPlus m => Options -> [Inline] -> m Name -toName _ [Str "others"] = - return emptyName{ nameLiteral = Just "others" } -toName _ [Span ("",[],[]) ils] = -- corporate author - return emptyName{ nameLiteral = Just $ stringify ils } - -- extended BibLaTeX name format - see #266 -toName _ ils@(Str ys:_) | T.any (== '=') ys = do - let commaParts = splitWhen (== Str ",") - . splitStrWhen (\c -> c == ',' || c == '=' || c == '\160') - $ ils - let addPart ag (Str "given" : Str "=" : xs) = - ag{ nameGiven = case nameGiven ag of - Nothing -> Just $ stringify xs - Just t -> Just $ t <> " " <> stringify xs } - addPart ag (Str "family" : Str "=" : xs) = - ag{ nameFamily = Just $ stringify xs } - addPart ag (Str "prefix" : Str "=" : xs) = - ag{ nameDroppingParticle = Just $ stringify xs } - addPart ag (Str "useprefix" : Str "=" : Str "true" : _) = - ag{ nameNonDroppingParticle = nameDroppingParticle ag - , nameDroppingParticle = Nothing } - addPart ag (Str "suffix" : Str "=" : xs) = - ag{ nameSuffix = Just $ stringify xs } - addPart ag (Space : xs) = addPart ag xs - addPart ag _ = ag - return $ foldl' addPart emptyName commaParts --- First von Last --- von Last, First --- von Last, Jr ,First --- NOTE: biblatex and bibtex differ on: --- Drummond de Andrade, Carlos --- bibtex takes "Drummond de" as the von; --- biblatex takes the whole as a last name. --- See https://github.com/plk/biblatex/issues/236 --- Here we implement the more sensible biblatex behavior. -toName opts ils = do - let useprefix = optionSet "useprefix" opts - let usecomma = optionSet "juniorcomma" opts - let bibtex = optionSet "bibtex" opts - let words' = wordsBy (\x -> x == Space || x == Str "\160") - let commaParts = map words' $ splitWhen (== Str ",") - $ splitStrWhen - (\c -> c == ',' || c == '\160') ils - let (first, vonlast, jr) = - case commaParts of - --- First is the longest sequence of white-space separated - -- words starting with an uppercase and that is not the - -- whole string. von is the longest sequence of whitespace - -- separated words whose last word starts with lower case - -- and that is not the whole string. - [fvl] -> let (caps', rest') = span isCapitalized fvl - in if null rest' && not (null caps') - then (init caps', [last caps'], []) - else (caps', rest', []) - [vl,f] -> (f, vl, []) - (vl:j:f:_) -> (f, vl, j ) - [] -> ([], [], []) - - let (von, lastname) = - if bibtex - then case span isCapitalized $ reverse vonlast of - ([],w:ws) -> (reverse ws, [w]) - (vs, ws) -> (reverse ws, reverse vs) - else case break isCapitalized vonlast of - (vs@(_:_), []) -> (init vs, [last vs]) - (vs, ws) -> (vs, ws) - let prefix = T.unwords $ map stringify von - let family = T.unwords $ map stringify lastname - let suffix = T.unwords $ map stringify jr - let given = T.unwords $ map stringify first - return - Name { nameFamily = if T.null family - then Nothing - else Just family - , nameGiven = if T.null given - then Nothing - else Just given - , nameDroppingParticle = if useprefix || T.null prefix - then Nothing - else Just prefix - , nameNonDroppingParticle = if useprefix && not (T.null prefix) - then Just prefix - else Nothing - , nameSuffix = if T.null suffix - then Nothing - else Just suffix - , nameLiteral = Nothing - , nameCommaSuffix = usecomma - , nameStaticOrdering = False - } - ordinalize :: Locale -> Text -> Text ordinalize locale n = let terms = localeTerms locale @@ -1300,20 +1204,6 @@ ordinalize locale n = Just [] -> n Just (t:_) -> n <> snd t -isCapitalized :: [Inline] -> Bool -isCapitalized (Str (T.uncons -> Just (c,cs)) : rest) - | isUpper c = True - | isDigit c = isCapitalized (Str cs : rest) - | otherwise = False -isCapitalized (_:rest) = isCapitalized rest -isCapitalized [] = True - -optionSet :: Text -> Options -> Bool -optionSet key opts = case lookup key opts of - Just "true" -> True - Just s -> s == mempty - _ -> False - getTypeAndGenre :: Bib (Text, Maybe Text) getTypeAndGenre = do lang <- gets localeLang diff --git a/src/Text/Pandoc/Citeproc/Name.hs b/src/Text/Pandoc/Citeproc/Name.hs new file mode 100644 index 000000000..e39cffd28 --- /dev/null +++ b/src/Text/Pandoc/Citeproc/Name.hs @@ -0,0 +1,157 @@ +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE FlexibleContexts #-} +{-# OPTIONS_GHC -fno-warn-unused-do-bind #-} +----------------------------------------------------------------------------- +-- | +-- Module : Text.CSL.Input.Name +-- Copyright : (c) John MacFarlane +-- License : BSD-style (see LICENSE) +-- +-- Maintainer : John MacFarlane <[email protected]> +-- Stability : unstable +-- Portability : portable +-- +----------------------------------------------------------------------------- + +module Text.Pandoc.Citeproc.Name + ( toName + , NameOpts(..) + , emptyName + ) + where + +import Text.Pandoc.Definition +import Text.Pandoc.Shared (stringify) +import Citeproc.Types +import Citeproc.Pandoc () +import Text.Pandoc.Citeproc.Util (splitStrWhen) +import qualified Data.Text as T +import Data.List.Split (splitWhen, wordsBy) +import Control.Monad.RWS hiding ((<>)) +import Data.Char (isUpper, isDigit) +import Data.List (foldl') + +emptyName :: Name +emptyName = + Name { nameFamily = Nothing + , nameGiven = Nothing + , nameDroppingParticle = Nothing + , nameNonDroppingParticle = Nothing + , nameSuffix = Nothing + , nameLiteral = Nothing + , nameCommaSuffix = False + , nameStaticOrdering = False + } + +-- | Options for 'toName'. +data NameOpts = + NameOpts + { nameOptsPrefixIsNonDroppingParticle :: Bool + -- ^ Treat a prefix on the last name as a non-dropping particle + -- (default is to treat it as a dropping particle). This corresponds + -- to the biblatex option @useprefix@. + , nameOptsUseJuniorComma :: Bool + -- ^ Put a comma before a suffix like "Jr." This corresponds to the + -- biblatex option @juniorcomma@. + } deriving (Show) + +-- | Parse a list of 'Inline's into a citeproc 'Name', identifying +-- first and last name, particles, suffixes. +toName :: MonadPlus m => NameOpts -> [Inline] -> m Name +toName _ [Str "others"] = + return emptyName{ nameLiteral = Just "others" } +toName _ [Span ("",[],[]) ils] = -- corporate author + return emptyName{ nameLiteral = Just $ stringify ils } +-- extended BibLaTeX name format - see #266 +toName _ ils@(Str ys:_) | T.any (== '=') ys = do + let commaParts = splitWhen (== Str ",") + . splitStrWhen (\c -> c == ',' || c == '=' || c == '\160') + $ ils + let addPart ag (Str "given" : Str "=" : xs) = + ag{ nameGiven = case nameGiven ag of + Nothing -> Just $ stringify xs + Just t -> Just $ t <> " " <> stringify xs } + addPart ag (Str "family" : Str "=" : xs) = + ag{ nameFamily = Just $ stringify xs } + addPart ag (Str "prefix" : Str "=" : xs) = + ag{ nameDroppingParticle = Just $ stringify xs } + addPart ag (Str "useprefix" : Str "=" : Str "true" : _) = + ag{ nameNonDroppingParticle = nameDroppingParticle ag + , nameDroppingParticle = Nothing } + addPart ag (Str "suffix" : Str "=" : xs) = + ag{ nameSuffix = Just $ stringify xs } + addPart ag (Space : xs) = addPart ag xs + addPart ag _ = ag + return $ foldl' addPart emptyName commaParts +-- First von Last +-- von Last, First +-- von Last, Jr ,First +-- NOTE: biblatex and bibtex differ on: +-- Drummond de Andrade, Carlos +-- bibtex takes "Drummond de" as the von; +-- biblatex takes the whole as a last name. +-- See https://github.com/plk/biblatex/issues/236 +-- Here we implement the more sensible biblatex behavior. +toName opts ils = do + let words' = wordsBy (\x -> x == Space || x == Str "\160") + let commaParts = map words' $ splitWhen (== Str ",") + $ splitStrWhen + (\c -> c == ',' || c == '\160') ils + let (first, vonlast, jr) = + case commaParts of + --- First is the longest sequence of white-space separated + -- words starting with an uppercase and that is not the + -- whole string. von is the longest sequence of whitespace + -- separated words whose last word starts with lower case + -- and that is not the whole string. + [fvl] -> let (caps', rest') = span isCapitalized fvl + in if null rest' && not (null caps') + then (init caps', [last caps'], []) + else (caps', rest', []) + [vl,f] -> (f, vl, []) + (vl:j:f:_) -> (f, vl, j ) + [] -> ([], [], []) + + let (von, lastname) = + case break isCapitalized vonlast of + (vs@(_:_), []) -> (init vs, [last vs]) + (vs, ws) -> (vs, ws) + let prefix = T.unwords $ map stringify von + let family = T.unwords $ map stringify lastname + let suffix = T.unwords $ map stringify jr + let given = T.unwords $ map stringify first + return + Name { nameFamily = if T.null family + then Nothing + else Just family + , nameGiven = if T.null given + then Nothing + else Just given + , nameDroppingParticle = if nameOptsPrefixIsNonDroppingParticle opts || + T.null prefix + then Nothing + else Just prefix + , nameNonDroppingParticle = if nameOptsPrefixIsNonDroppingParticle opts && + not (T.null prefix) + then Just prefix + else Nothing + , nameSuffix = if T.null suffix + then Nothing + else Just suffix + , nameLiteral = Nothing + , nameCommaSuffix = nameOptsUseJuniorComma opts + , nameStaticOrdering = False + } + +isCapitalized :: [Inline] -> Bool +isCapitalized (Str (T.uncons -> Just (c,cs)) : rest) + | isUpper c = True + | isDigit c = isCapitalized (Str cs : rest) + | otherwise = False +isCapitalized (_:rest) = isCapitalized rest +isCapitalized [] = True + + diff --git a/src/Text/Pandoc/Readers/EndNote.hs b/src/Text/Pandoc/Readers/EndNote.hs index 586dcf451..cdfbe6a4e 100644 --- a/src/Text/Pandoc/Readers/EndNote.hs +++ b/src/Text/Pandoc/Readers/EndNote.hs @@ -30,7 +30,7 @@ import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue) import Text.Pandoc.Sources (Sources(..), ToSources(..), sourcesToText) -import Text.Pandoc.Citeproc.BibTeX (toName) +import Text.Pandoc.Citeproc.Name (toName, NameOpts(..)) import Control.Applicative ((<|>)) import Control.Monad.Except (throwError) import Control.Monad (mzero, unless) @@ -140,7 +140,9 @@ recordToReference e = filterChildrenName (name "contributors") e >>= filterChildrenName (name "authors") >>= filterChildrenName (name "author") >>= - toName [] . B.toList . B.text . T.strip . getText + toName NameOpts{ nameOptsPrefixIsNonDroppingParticle = False + , nameOptsUseJuniorComma = False } + . B.toList . B.text . T.strip . getText titles = do x <- filterChildrenName (name "titles") e (key, name') <- [("title", "title"), diff --git a/src/Text/Pandoc/Readers/RIS.hs b/src/Text/Pandoc/Readers/RIS.hs index f73df8135..d4d471050 100644 --- a/src/Text/Pandoc/Readers/RIS.hs +++ b/src/Text/Pandoc/Readers/RIS.hs @@ -29,7 +29,7 @@ import Citeproc (Reference(..), ItemId(..), Val(..), Date(..), DateParts(..), import Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue) -import Text.Pandoc.Citeproc.BibTeX (toName) +import Text.Pandoc.Citeproc.Name (toName, NameOpts(..)) import Control.Monad.Except (throwError) import qualified Data.Text as T import Data.Text (Text) @@ -140,7 +140,9 @@ risRecordToReference keys = addId $ foldr go defref keys M.insert (toVariable k) (FancyVal v) (referenceVariables r) } addName k v r = - let new = toName [] . B.toList . B.text $ v + let new = toName NameOpts{ nameOptsPrefixIsNonDroppingParticle = False + , nameOptsUseJuniorComma = False } + . B.toList . B.text $ v f Nothing = Just (NamesVal new) f (Just (NamesVal ns)) = Just (NamesVal (new ++ ns)) f (Just x) = Just x |
