1 files changed, 175 insertions, 118 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 797a0a0b0..926116e23 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -8,6 +8,7 @@
 {-# LANGUAGE FlexibleInstances     #-}
 {-# LANGUAGE TypeSynonymInstances  #-}
 {-# LANGUAGE LambdaCase            #-}
+{-# LANGUAGE OverloadedStrings     #-}
 {- |
    Module      : Text.Pandoc.Shared
    Copyright   : Copyright (C) 2006-2019 John MacFarlane
@@ -22,14 +23,20 @@ Utility functions and definitions used by the various Pandoc modules.
 module Text.Pandoc.Shared (
                      -- * List processing
                      splitBy,
+                     splitTextBy,
                      splitByIndices,
                      splitStringByIndices,
+                     splitTextByIndices,
                      substitute,
                      ordNub,
                      -- * Text processing
                      ToString (..),
+                     ToText (..),
+                     tshow,
                      backslashEscapes,
                      escapeStringUsing,
+                     elemText,
+                     notElemText,
                      stripTrailingNewlines,
                      trim,
                      triml,
@@ -37,6 +44,7 @@ module Text.Pandoc.Shared (
                      trimMath,
                      stripFirstAndLast,
                      camelCaseToHyphenated,
+                     camelCaseStrToHyphenated,
                      toRomanNumeral,
                      escapeURI,
                      tabFilter,
@@ -90,6 +98,7 @@ module Text.Pandoc.Shared (
                      defaultBlocksSeparator,
                      -- * Safe read
                      safeRead,
+                     safeStrRead,
                      -- * User data directory
                      defaultUserDataDirs,
                      -- * Version
@@ -133,8 +142,8 @@ import Text.DocLayout (charWidth)
 import Text.Pandoc.Walk
 
 -- | Version number of pandoc library.
-pandocVersion :: String
-pandocVersion = showVersion version
+pandocVersion :: T.Text
+pandocVersion = T.pack $ showVersion version
 
 --
 -- List processing
@@ -148,6 +157,13 @@ splitBy isSep lst =
       rest'         = dropWhile isSep rest
   in  first:splitBy isSep rest'
 
+splitTextBy :: (Char -> Bool) -> T.Text -> [T.Text]
+splitTextBy isSep t
+  | T.null t = []
+  | otherwise = let (first, rest) = T.break isSep t
+                    rest'         = T.dropWhile isSep rest
+                in  first : splitTextBy isSep rest'
+
 splitByIndices :: [Int] -> [a] -> [[a]]
 splitByIndices [] lst = [lst]
 splitByIndices (x:xs) lst = first:splitByIndices (map (\y -> y - x)  xs) rest
@@ -160,6 +176,9 @@ splitStringByIndices (x:xs) lst =
   let (first, rest) = splitAt' x lst in
   first : splitStringByIndices (map (\y -> y - x) xs) rest
 
+splitTextByIndices :: [Int] -> T.Text -> [T.Text]
+splitTextByIndices ns = fmap T.pack . splitStringByIndices ns . T.unpack
+
 splitAt' :: Int -> [Char] -> ([Char],[Char])
 splitAt' _ []          = ([],[])
 splitAt' n xs | n <= 0 = ([],xs)
@@ -195,89 +214,115 @@ instance ToString String where
 instance ToString T.Text where
   toString = T.unpack
 
+class ToText a where
+  toText :: a -> T.Text
+
+instance ToText String where
+  toText = T.pack
+
+instance ToText T.Text where
+  toText = id
+
+tshow :: Show a => a -> T.Text
+tshow = T.pack . show
+
 -- | Returns an association list of backslash escapes for the
 -- designated characters.
 backslashEscapes :: [Char]    -- ^ list of special characters to escape
-                 -> [(Char, String)]
-backslashEscapes = map (\ch -> (ch, ['\\',ch]))
+                 -> [(Char, T.Text)]
+backslashEscapes = map (\ch -> (ch, T.pack ['\\',ch]))
 
 -- | Escape a string of characters, using an association list of
 -- characters and strings.
-escapeStringUsing :: [(Char, String)] -> String -> String
-escapeStringUsing _ [] = ""
-escapeStringUsing escapeTable (x:xs) =
-  case lookup x escapeTable of
-       Just str -> str ++ rest
-       Nothing  -> x:rest
-  where rest = escapeStringUsing escapeTable xs
+escapeStringUsing :: [(Char, T.Text)] -> T.Text -> T.Text
+escapeStringUsing tbl = T.concatMap $ \c -> fromMaybe (T.singleton c) $ lookup c tbl
+
+-- | @True@ exactly when the @Char@ appears in the @Text@.
+elemText :: Char -> T.Text -> Bool
+elemText c = T.any (== c)
+
+-- | @True@ exactly when the @Char@ does not appear in the @Text@.
+notElemText :: Char -> T.Text -> Bool
+notElemText c = T.all (/= c)
 
 -- | Strip trailing newlines from string.
-stripTrailingNewlines :: String -> String
-stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse
+stripTrailingNewlines :: T.Text -> T.Text
+stripTrailingNewlines = T.dropWhileEnd (== '\n')
 
 -- | Remove leading and trailing space (including newlines) from string.
-trim :: String -> String
-trim = triml . trimr
+trim :: T.Text -> T.Text
+trim = T.dropAround (`elemText` " \r\n\t")
 
 -- | Remove leading space (including newlines) from string.
-triml :: String -> String
-triml = dropWhile (`elem` " \r\n\t")
+triml :: T.Text -> T.Text
+triml = T.dropWhile (`elemText` " \r\n\t")
 
 -- | Remove trailing space (including newlines) from string.
-trimr :: String -> String
-trimr = reverse . triml . reverse
+trimr :: T.Text -> T.Text
+trimr = T.dropWhileEnd (`elemText` " \r\n\t")
 
 -- | Trim leading space and trailing space unless after \.
-trimMath :: String -> String
-trimMath = triml . reverse . stripspace . reverse
+trimMath :: T.Text -> T.Text
+trimMath = triml . T.reverse . stripBeginSpace . T.reverse -- no Text.spanEnd
   where
-  stripspace (c1:c2:cs)
-    | c1  `elem` [' ','\t','\n','\r']
-    , c2 /= '\\' = stripspace (c2:cs)
-  stripspace cs = cs
+    stripBeginSpace t
+      | T.null pref = t
+      | Just ('\\', _) <- T.uncons suff = T.cons (T.last pref) suff
+      | otherwise = suff
+      where
+        (pref, suff) = T.span (`elemText` " \t\n\r") t
 
 -- | Strip leading and trailing characters from string
-stripFirstAndLast :: String -> String
-stripFirstAndLast str =
-  drop 1 $ take (length str - 1) str
+stripFirstAndLast :: T.Text -> T.Text
+stripFirstAndLast t = case T.uncons t of
+  Just (_, t') -> case T.unsnoc t' of
+    Just (t'', _) -> t''
+    _             -> t'
+  _               -> ""
 
 -- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
-camelCaseToHyphenated :: String -> String
-camelCaseToHyphenated [] = ""
-camelCaseToHyphenated (a:b:rest)
+camelCaseToHyphenated :: T.Text -> T.Text
+camelCaseToHyphenated = T.pack . camelCaseStrToHyphenated . T.unpack
+
+-- This may not work as expected on general Unicode, if it contains
+-- letters with a longer lower case form than upper case. I don't know
+-- what the camel case practices of affected scripts are, though.
+camelCaseStrToHyphenated :: String -> String
+camelCaseStrToHyphenated [] = ""
+camelCaseStrToHyphenated (a:b:rest)
   | isLower a
-  , isUpper b = a:'-':toLower b:camelCaseToHyphenated rest
+  , isUpper b = a:'-':toLower b:camelCaseStrToHyphenated rest
 -- handle ABCDef = abc-def
-camelCaseToHyphenated (a:b:c:rest)
+camelCaseStrToHyphenated (a:b:c:rest)
   | isUpper a
   , isUpper b
-  , isLower c = toLower a:'-':toLower b:camelCaseToHyphenated (c:rest)
-camelCaseToHyphenated (a:rest) = toLower a:camelCaseToHyphenated rest
+  , isLower c = toLower a:'-':toLower b:camelCaseStrToHyphenated (c:rest)
+camelCaseStrToHyphenated (a:rest) = toLower a:camelCaseStrToHyphenated rest
 
 -- | Convert number < 4000 to uppercase roman numeral.
-toRomanNumeral :: Int -> String
+toRomanNumeral :: Int -> T.Text
 toRomanNumeral x
   | x >= 4000 || x < 0 = "?"
-  | x >= 1000 = "M" ++ toRomanNumeral (x - 1000)
-  | x >= 900  = "CM" ++ toRomanNumeral (x - 900)
-  | x >= 500  = "D" ++ toRomanNumeral (x - 500)
-  | x >= 400  = "CD" ++ toRomanNumeral (x - 400)
-  | x >= 100  = "C" ++ toRomanNumeral (x - 100)
-  | x >= 90   = "XC" ++ toRomanNumeral (x - 90)
-  | x >= 50   = "L"  ++ toRomanNumeral (x - 50)
-  | x >= 40   = "XL" ++ toRomanNumeral (x - 40)
-  | x >= 10   = "X" ++ toRomanNumeral (x - 10)
+  | x >= 1000 = "M" <> toRomanNumeral (x - 1000)
+  | x >= 900  = "CM" <> toRomanNumeral (x - 900)
+  | x >= 500  = "D" <> toRomanNumeral (x - 500)
+  | x >= 400  = "CD" <> toRomanNumeral (x - 400)
+  | x >= 100  = "C" <> toRomanNumeral (x - 100)
+  | x >= 90   = "XC" <> toRomanNumeral (x - 90)
+  | x >= 50   = "L"  <> toRomanNumeral (x - 50)
+  | x >= 40   = "XL" <> toRomanNumeral (x - 40)
+  | x >= 10   = "X" <> toRomanNumeral (x - 10)
   | x == 9    = "IX"
-  | x >= 5    = "V" ++ toRomanNumeral (x - 5)
+  | x >= 5    = "V" <> toRomanNumeral (x - 5)
   | x == 4    = "IV"
-  | x >= 1    = "I" ++ toRomanNumeral (x - 1)
+  | x >= 1    = "I" <> toRomanNumeral (x - 1)
   | otherwise = ""
 
 -- | Escape whitespace and some punctuation characters in URI.
-escapeURI :: String -> String
-escapeURI = escapeURIString (not . needsEscaping)
-  where needsEscaping c = isSpace c || c `elem`
-                           ['<','>','|','"','{','}','[',']','^', '`']
+escapeURI :: T.Text -> T.Text
+escapeURI = T.pack . escapeURIString (not . needsEscaping) . T.unpack
+  where needsEscaping c = isSpace c || c `elemText` "<>|\"{}[]^`"
+
 
 -- | Convert tabs to spaces. Tabs will be preserved if tab stop is set to 0.
 tabFilter :: Int       -- ^ Tab stop
@@ -304,8 +349,11 @@ crFilter = T.filter (/= '\r')
 -- | Parse a date and convert (if possible) to "YYYY-MM-DD" format. We
 -- limit years to the range 1601-9999 (ISO 8601 accepts greater than
 -- or equal to 1583, but MS Word only accepts dates starting 1601).
-normalizeDate :: String -> Maybe String
-normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
+normalizeDate :: T.Text -> Maybe T.Text
+normalizeDate = fmap T.pack . normalizeDate' . T.unpack
+
+normalizeDate' :: String -> Maybe String
+normalizeDate' s = fmap (formatTime defaultTimeLocale "%F")
   (msum $ map (\fs -> parsetimeWith fs s >>= rejectBadYear) formats :: Maybe Day)
   where rejectBadYear day = case toGregorian day of
           (y, _, _) | y >= 1601 && y <= 9999 -> Just day
@@ -321,26 +369,26 @@ normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
 
 -- | Generate infinite lazy list of markers for an ordered list,
 -- depending on list attributes.
-orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
+orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [T.Text]
 orderedListMarkers (start, numstyle, numdelim) =
-  let singleton c = [c]
-      nums = case numstyle of
-                     DefaultStyle -> map show [start..]
-                     Example      -> map show [start..]
-                     Decimal      -> map show [start..]
+  let nums = case numstyle of
+                     DefaultStyle -> map tshow [start..]
+                     Example      -> map tshow [start..]
+                     Decimal      -> map tshow [start..]
                      UpperAlpha   -> drop (start - 1) $ cycle $
-                                     map singleton ['A'..'Z']
+                                     map T.singleton ['A'..'Z']
                      LowerAlpha   -> drop (start - 1) $ cycle $
-                                     map singleton ['a'..'z']
+                                     map T.singleton ['a'..'z']
                      UpperRoman   -> map toRomanNumeral [start..]
-                     LowerRoman   -> map (map toLower . toRomanNumeral) [start..]
+                     LowerRoman   -> map (T.toLower . toRomanNumeral) [start..]
       inDelim str = case numdelim of
-                            DefaultDelim -> str ++ "."
-                            Period       -> str ++ "."
-                            OneParen     -> str ++ ")"
-                            TwoParens    -> "(" ++ str ++ ")"
+                            DefaultDelim -> str <> "."
+                            Period       -> str <> "."
+                            OneParen     -> str <> ")"
+                            TwoParens    -> "(" <> str <> ")"
   in  map inDelim nums
 
+
 -- | Extract the leading and trailing spaces from inside an inline element
 -- and place them outside the element.  SoftBreaks count as Spaces for
 -- these purposes.
@@ -387,15 +435,16 @@ deQuote x = x
 -- | Convert pandoc structure to a string with formatting removed.
 -- Footnotes are skipped (since we don't want their contents in link
 -- labels).
-stringify :: Walkable Inline a => a -> String
+stringify :: Walkable Inline a => a -> T.Text
 stringify = query go . walk (deNote . deQuote)
-  where go :: Inline -> [Char]
+  where go :: Inline -> T.Text
         go Space                                       = " "
         go SoftBreak                                   = " "
         go (Str x)                                     = x
         go (Code _ x)                                  = x
         go (Math _ x)                                  = x
-        go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105
+        go (RawInline (Format "html") (T.unpack -> ('<':'b':'r':_)))
+                                                       = " " -- see #2105
         go LineBreak                                   = " "
         go _                                           = ""
 
@@ -407,7 +456,7 @@ stringify = query go . walk (deNote . deQuote)
 capitalize :: Walkable Inline a => a -> a
 capitalize = walk go
   where go :: Inline -> Inline
-        go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s)
+        go (Str s) = Str $ T.toUpper s
         go x       = x
 
 -- | Change final list item from @Para@ to @Plain@ if the list contains
@@ -463,7 +512,7 @@ isPara _        = False
 -- | Convert Pandoc inline list to plain text identifier.  HTML
 -- identifiers must start with a letter, and may contain only
 -- letters, digits, and the characters _-.
-inlineListToIdentifier :: Extensions -> [Inline] -> String
+inlineListToIdentifier :: Extensions -> [Inline] -> T.Text
 inlineListToIdentifier exts =
   dropNonLetter . filterAscii . toIdent . stringify . walk unEmojify
   where
@@ -476,23 +525,23 @@ inlineListToIdentifier exts =
     unEmoji x = x
     dropNonLetter
       | extensionEnabled Ext_gfm_auto_identifiers exts = id
-      | otherwise = dropWhile (not . isAlpha)
+      | otherwise = T.dropWhile (not . isAlpha)
     filterAscii
       | extensionEnabled Ext_ascii_identifiers exts
-        = mapMaybe toAsciiChar
+        = T.pack . mapMaybe toAsciiChar . T.unpack
       | otherwise = id
     toIdent
       | extensionEnabled Ext_gfm_auto_identifiers exts =
-        filterPunct . spaceToDash . map toLower
-      | otherwise = intercalate "-" . words . filterPunct . map toLower
-    filterPunct = filter (\c -> isSpace c || isAlphaNum c || isAllowedPunct c)
+        filterPunct . spaceToDash . T.toLower
+      | otherwise = T.intercalate "-" . T.words . filterPunct . T.toLower
+    filterPunct = T.filter (\c -> isSpace c || isAlphaNum c || isAllowedPunct c)
     isAllowedPunct c
       | extensionEnabled Ext_gfm_auto_identifiers exts
         = c == '-' || c == '_' ||
           generalCategory c `elem` [NonSpacingMark, SpacingCombiningMark,
                                     EnclosingMark, ConnectorPunctuation]
       | otherwise = c == '_' || c == '-' || c == '.'
-    spaceToDash = map (\c -> if isSpace c then '-' else c)
+    spaceToDash = T.map (\c -> if isSpace c then '-' else c)
 
 
 -- | Put a list of Pandoc blocks into a hierarchical structure:
@@ -529,7 +578,7 @@ makeSections numbering mbBaseLevel bs =
                    -- don't touch number if already present
                    case lookup "number" kvs of
                      Nothing | numbering ->
-                        ("number", intercalate "." (map show newnum)) : kvs
+                        ("number", T.intercalate "." (map tshow newnum)) : kvs
                      _ -> kvs)
     return $
       Div divattr (Header level' attr title' : sectionContents') : rest'
@@ -542,7 +591,7 @@ makeSections numbering mbBaseLevel bs =
     let inner' =
           case inner of
             (Div (dident',dclasses',dkvs') zs@(Header{}:zs') : ws)
-              | null dident ->
+              | T.null dident ->
                 Div (dident',dclasses' ++ dclasses,dkvs' ++ dkvs) zs : ws
               | otherwise -> -- keep id on header so we don't lose anchor
                 Div (dident,dclasses ++ dclasses',dkvs ++ dkvs')
@@ -564,7 +613,7 @@ headerLtEq _ _                   = False
 
 -- | Generate a unique identifier from a list of inlines.
 -- Second argument is a list of already used identifiers.
-uniqueIdent :: Extensions -> [Inline] -> Set.Set String -> String
+uniqueIdent :: Extensions -> [Inline] -> Set.Set T.Text -> T.Text
 uniqueIdent exts title' usedIdents =
   if baseIdent `Set.member` usedIdents
      then case find (\x -> not $ numIdent x `Set.member` usedIdents)
@@ -577,7 +626,7 @@ uniqueIdent exts title' usedIdents =
     baseIdent = case inlineListToIdentifier exts title' of
                      "" -> "section"
                      x  -> x
-    numIdent n = baseIdent ++ "-" ++ show n
+    numIdent n = baseIdent <> "-" <> tshow n
 
 -- | True if block is a Header block.
 isHeaderBlock :: Block -> Bool
@@ -664,7 +713,7 @@ handleTaskListItem handleInlines exts bls =
 -- | Set a field of a 'Meta' object.  If the field already has a value,
 -- convert it into a list with the new value appended to the old value(s).
 addMetaField :: ToMetaValue a
-             => String
+             => T.Text
              -> a
              -> Meta
              -> Meta
@@ -686,12 +735,16 @@ makeMeta title authors date =
 -- | Remove soft breaks between East Asian characters.
 eastAsianLineBreakFilter :: Pandoc -> Pandoc
 eastAsianLineBreakFilter = bottomUp go
-  where go (x:SoftBreak:y:zs) =
-         case (stringify x, stringify y) of
-               (xs@(_:_), c:_)
-                 | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs
-               _ -> x:SoftBreak:y:zs
-        go xs = xs
+  where go (x:SoftBreak:y:zs)
+          | Just (_, b) <- T.unsnoc $ stringify x
+          , Just (c, _) <- T.uncons $ stringify y
+          , charWidth b == 2
+          , charWidth c == 2
+          = x:y:zs
+          | otherwise
+          = x:SoftBreak:y:zs
+        go xs
+          = xs
 
 -- | Builder for underline.
 -- This probably belongs in Builder.hs in pandoc-types.
@@ -702,27 +755,28 @@ underlineSpan = B.spanWith ("", ["underline"], [])
 -- | Set of HTML elements that are represented as Span with a class equal as
 -- the element tag itself.
 htmlSpanLikeElements :: Set.Set T.Text
-htmlSpanLikeElements = Set.fromList [T.pack "kbd", T.pack "mark", T.pack "dfn"]
+htmlSpanLikeElements = Set.fromList ["kbd", "mark", "dfn"]
 
 -- | Returns the first sentence in a list of inlines, and the rest.
 breakSentence :: [Inline] -> ([Inline], [Inline])
 breakSentence [] = ([],[])
 breakSentence xs =
-  let isSentenceEndInline (Str ys@(_:_)) | last ys == '.' = True
-      isSentenceEndInline (Str ys@(_:_)) | last ys == '?' = True
-      isSentenceEndInline LineBreak      = True
-      isSentenceEndInline _              = False
+  let isSentenceEndInline (Str ys)
+        | Just (_, c) <- T.unsnoc ys = c == '.' || c == '?'
+      isSentenceEndInline LineBreak  = True
+      isSentenceEndInline _          = False
       (as, bs) = break isSentenceEndInline xs
   in  case bs of
-           []             -> (as, [])
-           [c]            -> (as ++ [c], [])
-           (c:Space:cs)   -> (as ++ [c], cs)
-           (c:SoftBreak:cs) -> (as ++ [c], cs)
-           (Str ".":Str (')':ys):cs) -> (as ++ [Str ".", Str (')':ys)], cs)
-           (x@(Str ('.':')':_)):cs) -> (as ++ [x], cs)
-           (LineBreak:x@(Str ('.':_)):cs) -> (as ++[LineBreak], x:cs)
-           (c:cs)         -> (as ++ [c] ++ ds, es)
-              where (ds, es) = breakSentence cs
+        []             -> (as, [])
+        [c]            -> (as ++ [c], [])
+        (c:Space:cs)   -> (as ++ [c], cs)
+        (c:SoftBreak:cs) -> (as ++ [c], cs)
+        (Str ".":Str s@(T.uncons -> Just (')',_)):cs)
+          -> (as ++ [Str ".", Str s], cs)
+        (x@(Str (T.stripPrefix ".)" -> Just _)):cs) -> (as ++ [x], cs)
+        (LineBreak:x@(Str (T.uncons -> Just ('.',_))):cs) -> (as ++[LineBreak], x:cs)
+        (c:cs)         -> (as ++ [c] ++ ds, es)
+          where (ds, es) = breakSentence cs
 
 -- | Split a list of inlines into sentences.
 splitSentences :: [Inline] -> [[Inline]]
@@ -763,10 +817,11 @@ filterIpynbOutput mode = walk go
                   removeANSI (CodeBlock attr code) =
                     CodeBlock attr (removeANSIEscapes code)
                   removeANSI x = x
-                  removeANSIEscapes [] = []
-                  removeANSIEscapes ('\x1b':'[':cs) =
-                    removeANSIEscapes (drop 1 $ dropWhile (/='m') cs)
-                  removeANSIEscapes (c:cs) = c : removeANSIEscapes cs
+                  removeANSIEscapes t
+                    | Just cs <- T.stripPrefix "\x1b[" t =
+                        removeANSIEscapes $ T.drop 1 $ T.dropWhile (/='m') cs
+                    | Just (c, cs) <- T.uncons t = T.cons c $ removeANSIEscapes cs
+                    | otherwise = ""
         go x = x
 
 --
@@ -774,12 +829,12 @@ filterIpynbOutput mode = walk go
 --
 
 -- | Render HTML tags.
-renderTags' :: [Tag String] -> String
+renderTags' :: [Tag T.Text] -> T.Text
 renderTags' = renderTagsOptions
                renderOptions{ optMinimize = matchTags ["hr", "br", "img",
                                                        "meta", "link"]
                             , optRawTag   = matchTags ["script", "style"] }
-              where matchTags tags = flip elem tags . map toLower
+              where matchTags tags = flip elem tags . T.toLower
 
 --
 -- File handling
@@ -826,8 +881,8 @@ collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories
 -- Convert the path part of a file: URI to a regular path.
 -- On windows, @/c:/foo@ should be @c:/foo@.
 -- On linux, @/foo@ should be @/foo@.
-uriPathToPath :: String -> FilePath
-uriPathToPath path =
+uriPathToPath :: T.Text -> FilePath
+uriPathToPath (T.unpack -> path) =
 #ifdef _WINDOWS
   case path of
     '/':ps -> ps
@@ -853,7 +908,7 @@ filteredFilesFromArchive zf f =
 
 -- | Schemes from http://www.iana.org/assignments/uri-schemes.html plus
 -- the unofficial schemes doi, javascript, isbn, pmid.
-schemes :: Set.Set String
+schemes :: Set.Set T.Text
 schemes = Set.fromList
   -- Official IANA schemes
   [ "aaa", "aaas", "about", "acap", "acct", "acr", "adiumxtra", "afp", "afs"
@@ -905,11 +960,11 @@ schemes = Set.fromList
 
 -- | Check if the string is a valid URL with a IANA or frequently used but
 -- unofficial scheme (see @schemes@).
-isURI :: String -> Bool
-isURI = maybe False hasKnownScheme . parseURI
+isURI :: T.Text -> Bool
+isURI = maybe False hasKnownScheme . parseURI . T.unpack
   where
-    hasKnownScheme = (`Set.member` schemes) . map toLower .
-                     filter (/= ':') . uriScheme
+    hasKnownScheme = (`Set.member` schemes) . T.toLower .
+                     T.filter (/= ':') . T.pack . uriScheme
 
 ---
 --- Squash blocks into inlines
@@ -962,12 +1017,14 @@ defaultBlocksSeparator =
 -- Safe read
 --
 
-safeRead :: (MonadPlus m, Read a) => String -> m a
-safeRead s = case reads s of
+safeRead :: (MonadPlus m, Read a) => T.Text -> m a
+safeRead = safeStrRead . T.unpack
+
+safeStrRead :: (MonadPlus m, Read a) => String -> m a
+safeStrRead s = case reads s of
                   (d,x):_
                     | all isSpace x -> return d
                   _                 -> mzero
-
 --
 -- User data directory
 --