blob: 99e18c4f6db4856554104a8c0d4263436e120501 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
{- |
Module : Text.Pandoc.Asciify
Copyright : Copyright (C) 2013-2024 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <[email protected]>
Stability : alpha
Portability : portable
Function to convert accented latin letters to their unaccented
ascii equivalents (used in constructing HTML identifiers).
-}
module Text.Pandoc.Asciify (toAsciiChar, toAsciiText)
where
import Data.Char (isAscii, isMark)
import qualified Data.Text.Normalize as TN
import Data.Text (Text)
import qualified Data.Text as T
toAsciiText :: Text -> Text
toAsciiText = T.filter isAscii . T.map specialCase . TN.normalize (TN.NFD)
where
specialCase '\x131' = 'i' -- Turkish undotted i
specialCase c = c
toAsciiChar :: Char -> Maybe Char
toAsciiChar c = case T.unpack (TN.normalize TN.NFD (T.singleton c)) of
(x:xs) | isAscii x
, all isMark xs
-> Just x
['\x131'] -> Just 'i' -- Turkish undotted i
_ -> Nothing
|