aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Asciify.hs
blob: 99e18c4f6db4856554104a8c0d4263436e120501 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
{- |
   Module      : Text.Pandoc.Asciify
   Copyright   : Copyright (C) 2013-2024 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <[email protected]>
   Stability   : alpha
   Portability : portable

Function to convert accented latin letters to their unaccented
ascii equivalents (used in constructing HTML identifiers).
-}
module Text.Pandoc.Asciify (toAsciiChar, toAsciiText)
where
import Data.Char (isAscii, isMark)
import qualified Data.Text.Normalize as TN
import Data.Text (Text)
import qualified Data.Text as T

toAsciiText :: Text -> Text
toAsciiText = T.filter isAscii . T.map specialCase . TN.normalize (TN.NFD)
 where
  specialCase '\x131' = 'i' -- Turkish undotted i
  specialCase c = c

toAsciiChar :: Char -> Maybe Char
toAsciiChar c = case T.unpack (TN.normalize TN.NFD (T.singleton c)) of
                  (x:xs) | isAscii x
                         , all isMark xs
                         -> Just x
                  ['\x131'] -> Just 'i'  -- Turkish undotted i
                  _      -> Nothing