aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Citeproc/Name.hs
blob: 68662cb57d787ebcdad3795f58a7427ffcca7832 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
{-# LANGUAGE ViewPatterns      #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE FlexibleContexts #-}
{-# OPTIONS_GHC -fno-warn-unused-do-bind #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  Text.CSL.Input.Name
-- Copyright   :  (c) John MacFarlane
-- License     :  BSD-style (see LICENSE)
--
-- Maintainer  :  John MacFarlane <[email protected]>
-- Stability   :  unstable
-- Portability :  portable
--
-----------------------------------------------------------------------------

module Text.Pandoc.Citeproc.Name
    ( toName
    , NameOpts(..)
    , emptyName
    )
    where

import Text.Pandoc.Definition
import Text.Pandoc.Shared (stringify)
import Citeproc.Types
import Citeproc.Pandoc ()
import Text.Pandoc.Citeproc.Util (splitStrWhen)
import qualified Data.Text              as T
import           Data.List.Split        (splitWhen, wordsBy)
import Data.Char (isUpper, isDigit)
import qualified Data.List as L

emptyName :: Name
emptyName =
    Name {  nameFamily              = Nothing
          , nameGiven               = Nothing
          , nameDroppingParticle    = Nothing
          , nameNonDroppingParticle = Nothing
          , nameSuffix              = Nothing
          , nameLiteral             = Nothing
          , nameCommaSuffix         = False
          , nameStaticOrdering      = False
          }

-- | Options for 'toName'.
data NameOpts =
  NameOpts
    { nameOptsPrefixIsNonDroppingParticle :: Bool
        -- ^ Treat a prefix on the last name as a non-dropping particle
        -- (default is to treat it as a dropping particle). This corresponds
        -- to the biblatex option @useprefix@.
    , nameOptsUseJuniorComma              :: Bool
        -- ^ Put a comma before a suffix like "Jr." This corresponds to the
        -- biblatex option @juniorcomma@.
    } deriving (Show)

-- | Parse a list of 'Inline's into a citeproc 'Name', identifying
-- first and last name, particles, suffixes.
toName :: Monad m => NameOpts -> [Inline] -> m Name
toName _ [Str "others"] =
  return emptyName{ nameLiteral = Just "others" }
toName _ [Span ("",[],[]) ils] = -- corporate author
  return emptyName{ nameLiteral = Just $ stringify ils }
-- extended BibLaTeX name format - see #266
toName _ ils@(Str ys:_) | T.any (== '=') ys = do
  let commaParts = splitWhen (== Str ",")
                   . splitStrWhen (\c -> c == ',' || c == '=' || c == '\160')
                   $ ils
  let addPart ag (Str "given" : Str "=" : xs) =
        ag{ nameGiven = case nameGiven ag of
                          Nothing -> Just $ stringify xs
                          Just t  -> Just $ t <> " " <> stringify xs }
      addPart ag (Str "family" : Str "=" : xs) =
        ag{ nameFamily = Just $ stringify xs }
      addPart ag (Str "prefix" : Str "=" : xs) =
        ag{ nameDroppingParticle =  Just $ stringify xs }
      addPart ag (Str "useprefix" : Str "=" : Str "true" : _) =
        ag{ nameNonDroppingParticle = nameDroppingParticle ag
          , nameDroppingParticle    = Nothing }
      addPart ag (Str "suffix" : Str "=" : xs) =
        ag{ nameSuffix = Just $ stringify xs }
      addPart ag (Space : xs) = addPart ag xs
      addPart ag _ = ag
  return $ L.foldl' addPart emptyName commaParts
-- First von Last
-- von Last, First
-- von Last, Jr ,First
-- NOTE: biblatex and bibtex differ on:
-- Drummond de Andrade, Carlos
-- bibtex takes "Drummond de" as the von;
-- biblatex takes the whole as a last name.
-- See https://github.com/plk/biblatex/issues/236
-- Here we implement the more sensible biblatex behavior.
toName opts ils = do
  let words' = wordsBy (\x -> x == Space || x == Str "\160")
  let commaParts = map words' $ splitWhen (== Str ",")
                              $ splitStrWhen
                                   (\c -> c == ',' || c == '\160') ils
  let (first, vonlast, jr) =
          case commaParts of
               --- First is the longest sequence of white-space separated
               -- words starting with an uppercase and that is not the
               -- whole string. von is the longest sequence of whitespace
               -- separated words whose last word starts with lower case
               -- and that is not the whole string.
               [fvl]      -> let (caps', rest') = span isCapitalized fvl
                             in  if null rest' && not (null caps')
                                 then (init caps', [last caps'], [])
                                 else (caps', rest', [])
               [vl,f]     -> (f, vl, [])
               (vl:j:f:_) -> (f, vl, j )
               []         -> ([], [], [])

  let (von, lastname) =
                 case break isCapitalized vonlast of
                        (vs@(_:_), []) -> (init vs, [last vs])
                        (vs, ws)       -> (vs, ws)
  let prefix = T.unwords $ map stringify von
  let family = T.unwords $ map stringify lastname
  let suffix = T.unwords $ map stringify jr
  let given = T.unwords $ map stringify first
  return
    Name {  nameFamily              = if T.null family
                                         then Nothing
                                         else Just family
          , nameGiven               = if T.null given
                                         then Nothing
                                         else Just given
          , nameDroppingParticle    = if nameOptsPrefixIsNonDroppingParticle opts ||
                                          T.null prefix
                                         then Nothing
                                         else Just prefix
          , nameNonDroppingParticle = if nameOptsPrefixIsNonDroppingParticle opts &&
                                          not (T.null prefix)
                                         then Just prefix
                                         else Nothing
          , nameSuffix              = if T.null suffix
                                         then Nothing
                                         else Just suffix
          , nameLiteral             = Nothing
          , nameCommaSuffix         = nameOptsUseJuniorComma opts
          , nameStaticOrdering      = False
          }

isCapitalized :: [Inline] -> Bool
isCapitalized (Str (T.uncons -> Just (c,cs)) : rest)
  | isUpper c = True
  | isDigit c = isCapitalized (Str cs : rest)
  | otherwise = False
isCapitalized (_:rest) = isCapitalized rest
isCapitalized [] = True