1 files changed, 112 insertions, 119 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index e03ac6a97..1c2892d6a 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -1,5 +1,5 @@
-{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE LambdaCase        #-}
+{-# LANGUAGE NoImplicitPrelude     #-}
+{-# LANGUAGE LambdaCase            #-}
 {-# LANGUAGE FlexibleContexts      #-}
 {-# LANGUAGE FlexibleInstances     #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
@@ -35,8 +35,7 @@ import Control.Monad.Reader (ReaderT, ask, asks, lift, local, runReaderT)
 import Data.Char (isAlphaNum, isLetter)
 import Data.Default (Default (..), def)
 import Data.Foldable (for_)
-import Data.List (isPrefixOf)
-import Data.List.Split (wordsBy, splitWhen)
+import Data.List.Split (splitWhen)
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe, isJust, isNothing)
 import Data.Monoid (First (..))
@@ -62,8 +61,8 @@ import Text.Pandoc.Options (
     extensionEnabled)
 import Text.Pandoc.Parsing hiding ((<|>))
 import Text.Pandoc.Shared (addMetaField, blocksToInlines', crFilter, escapeURI,
-                           extractSpaces, htmlSpanLikeElements,
-                           onlySimpleTableCells, safeRead, underlineSpan)
+                           extractSpaces, htmlSpanLikeElements, elemText, splitTextBy,
+                           onlySimpleTableCells, safeRead, underlineSpan, tshow)
 import Text.Pandoc.Walk
 import Text.Parsec.Error
 import Text.TeXMath (readMathML, writeTeX)
@@ -93,14 +92,14 @@ readHtml opts inp = do
        "source" tags
   case result of
     Right doc -> return doc
-    Left  err -> throwError $ PandocParseError $ getError err
+    Left  err -> throwError $ PandocParseError $ T.pack $ getError err
 
 replaceNotes :: PandocMonad m => [Block] -> TagParser m [Block]
 replaceNotes bs = do
   st <- getState
   return $ walk (replaceNotes' (noteTable st)) bs
 
-replaceNotes' :: [(String, Blocks)] -> Inline -> Inline
+replaceNotes' :: [(Text, Blocks)] -> Inline -> Inline
 replaceNotes' noteTbl (RawInline (Format "noteref") ref) =
   maybe (Str "") (Note . B.toList) $ lookup ref noteTbl
 replaceNotes' _ x = x
@@ -108,9 +107,9 @@ replaceNotes' _ x = x
 data HTMLState =
   HTMLState
   {  parserState :: ParserState,
-     noteTable   :: [(String, Blocks)],
+     noteTable   :: [(Text, Blocks)],
      baseHref    :: Maybe URI,
-     identifiers :: Set.Set String,
+     identifiers :: Set.Set Text,
      logMessages :: [LogMessage],
      macros      :: M.Map Text Macro
   }
@@ -134,7 +133,7 @@ pHtml :: PandocMonad m => TagParser m Blocks
 pHtml = try $ do
   (TagOpen "html" attr) <- lookAhead pAny
   for_ (lookup "lang" attr) $
-    updateState . B.setMeta "lang" . B.text . T.unpack
+    updateState . B.setMeta "lang" . B.text
   pInTags "html" block
 
 pBody :: PandocMonad m => TagParser m Blocks
@@ -146,11 +145,11 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAny)
         setTitle t = mempty <$ updateState (B.setMeta "title" t)
         pMetaTag = do
           mt <- pSatisfy (matchTagOpen "meta" [])
-          let name = T.unpack $ fromAttrib "name" mt
-          if null name
+          let name = fromAttrib "name" mt
+          if T.null name
              then return mempty
              else do
-               let content = T.unpack $ fromAttrib "content" mt
+               let content = fromAttrib "content" mt
                updateState $ \s ->
                  let ps = parserState s in
                  s{ parserState = ps{
@@ -187,13 +186,13 @@ block = do
             , pFigure
             , pRawHtmlBlock
             ]
-  trace (take 60 $ show $ B.toList res)
+  trace (T.take 60 $ tshow $ B.toList res)
   return res
 
-namespaces :: PandocMonad m => [(String, TagParser m Inlines)]
+namespaces :: PandocMonad m => [(Text, TagParser m Inlines)]
 namespaces = [(mathMLNamespace, pMath True)]
 
-mathMLNamespace :: String
+mathMLNamespace :: Text
 mathMLNamespace = "http://www.w3.org/1998/Math/MathML"
 
 eSwitch :: (PandocMonad m, Monoid a)
@@ -233,7 +232,7 @@ eFootnote = try $ do
   content <- pInTags tag block
   addNote ident content
 
-addNote :: PandocMonad m => String -> Blocks -> TagParser m ()
+addNote :: PandocMonad m => Text -> Blocks -> TagParser m ()
 addNote uid cont = updateState (\s -> s {noteTable = (uid, cont) : noteTable s})
 
 eNoteref :: PandocMonad m => TagParser m Inlines
@@ -245,8 +244,8 @@ eNoteref = try $ do
                     -> (lookup "type" as <|> lookup "epub:type" as)
                         == Just "noteref"
                  _  -> False)
-  ident <- case T.unpack <$> lookup "href" attr of
-             Just ('#':rest) -> return rest
+  ident <- case lookup "href" attr >>= T.uncons of
+             Just ('#', rest) -> return rest
              _ -> mzero
   _ <- manyTill pAny (pSatisfy (\case
                                    TagClose t -> t == tag
@@ -287,7 +286,7 @@ pListItem nonItem = do
   maybe id addId (lookup "id" attr) <$>
     pInTags "li" block <* skipMany nonItem
 
-parseListStyleType :: String -> ListNumberStyle
+parseListStyleType :: Text -> ListNumberStyle
 parseListStyleType "lower-roman" = LowerRoman
 parseListStyleType "upper-roman" = UpperRoman
 parseListStyleType "lower-alpha" = LowerAlpha
@@ -295,7 +294,7 @@ parseListStyleType "upper-alpha" = UpperAlpha
 parseListStyleType "decimal"     = Decimal
 parseListStyleType _             = DefaultStyle
 
-parseTypeAttr :: String -> ListNumberStyle
+parseTypeAttr :: Text -> ListNumberStyle
 parseTypeAttr "i" = LowerRoman
 parseTypeAttr "I" = UpperRoman
 parseTypeAttr "a" = LowerAlpha
@@ -404,20 +403,19 @@ pDiv = try $ do
 
 pRawHtmlBlock :: PandocMonad m => TagParser m Blocks
 pRawHtmlBlock = do
-  raw <- T.unpack <$>
-    (pHtmlBlock "script" <|> pHtmlBlock "style" <|> pHtmlBlock "textarea"
-      <|> pRawTag)
+  raw <- (pHtmlBlock "script" <|> pHtmlBlock "style" <|> pHtmlBlock "textarea"
+          <|> pRawTag)
   exts <- getOption readerExtensions
-  if extensionEnabled Ext_raw_html exts && not (null raw)
+  if extensionEnabled Ext_raw_html exts && not (T.null raw)
      then return $ B.rawBlock "html" raw
      else ignore raw
 
-ignore :: (Monoid a, PandocMonad m) => String -> TagParser m a
+ignore :: (Monoid a, PandocMonad m) => Text -> TagParser m a
 ignore raw = do
   pos <- getPosition
   -- raw can be null for tags like <!DOCTYPE>; see paRawTag
   -- in this case we don't want a warning:
-  unless (null raw) $
+  unless (T.null raw) $
     logMessage $ SkippedContent raw pos
   return mempty
 
@@ -438,7 +436,7 @@ eSection = try $ do
 
 headerLevel :: Text -> TagParser m Int
 headerLevel tagtype =
-  case safeRead (T.unpack (T.drop 1 tagtype)) of
+  case safeRead (T.drop 1 tagtype) of
         Just level ->
 --          try (do
 --            guardEnabled Ext_epub_html_exts
@@ -468,7 +466,7 @@ pHeader = try $ do
   level <- headerLevel tagtype
   contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof)
   let ident = fromMaybe "" $ lookup "id" attr
-  let classes = maybe [] words $ lookup "class" attr
+  let classes = maybe [] T.words $ lookup "class" attr
   let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"]
   attr'' <- registerHeader (ident, classes, keyvals) contents
   return $ if bodyTitle
@@ -529,14 +527,14 @@ pCol = try $ do
   optional $ pSatisfy (matchTagClose "col")
   skipMany pBlank
   let width = case lookup "width" attribs of
-           Nothing -> case lookup "style" attribs of
-               Just ('w':'i':'d':'t':'h':':':xs) | '%' `elem` xs ->
-                 fromMaybe 0.0 $ safeRead (filter
-                   (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
-               _ -> 0.0
-           Just x | not (null x) && last x == '%' ->
-             fromMaybe 0.0 $ safeRead (init x)
-           _ -> 0.0
+                Nothing -> case lookup "style" attribs of
+                  Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs ->
+                    fromMaybe 0.0 $ safeRead (T.filter
+                      (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
+                  _ -> 0.0
+                Just (T.unsnoc -> Just (xs, '%')) ->
+                  fromMaybe 0.0 $ safeRead xs
+                _ -> 0.0
   if width > 0.0
     then return $ width / 100.0
     else return 0.0
@@ -562,7 +560,7 @@ pCell celltype = try $ do
   let extractAlign' []                 = ""
       extractAlign' ("text-align":x:_) = x
       extractAlign' (_:xs)             = extractAlign' xs
-  let extractAlign = extractAlign' . wordsBy (`elem` [' ','\t',';',':'])
+  let extractAlign = extractAlign' . splitTextBy (`elemText` " \t;:")
   let align = case maybeFromAttrib "align" tag `mplus`
                    (extractAlign <$> maybeFromAttrib "style" tag) of
                    Just "left"   -> AlignLeft
@@ -610,7 +608,7 @@ pFigure = try $ do
   let caption = fromMaybe mempty mbcap
   case B.toList <$> mbimg of
        Just [Image attr _ (url, tit)] ->
-         return $ B.para $ B.imageWith attr url ("fig:" ++ tit) caption
+         return $ B.para $ B.imageWith attr url ("fig:" <> tit) caption
        _ -> mzero
 
 pCodeBlock :: PandocMonad m => TagParser m Blocks
@@ -618,21 +616,21 @@ pCodeBlock = try $ do
   TagOpen _ attr' <- pSatisfy (matchTagOpen "pre" [])
   let attr = toStringAttr attr'
   contents <- manyTill pAny (pCloses "pre" <|> eof)
-  let rawText = concatMap tagToString contents
+  let rawText = T.concat $ map tagToText contents
   -- drop leading newline if any
-  let result' = case rawText of
-                     '\n':xs -> xs
-                     _       -> rawText
+  let result' = case T.uncons rawText of
+                     Just ('\n', xs) -> xs
+                     _               -> rawText
   -- drop trailing newline if any
-  let result = case reverse result' of
-                    '\n':_ -> init result'
-                    _      -> result'
+  let result = case T.unsnoc result' of
+                    Just (result'', '\n') -> result''
+                    _                     -> result'
   return $ B.codeBlockWith (mkAttr attr) result
 
-tagToString :: Tag Text -> String
-tagToString (TagText s)      = T.unpack s
-tagToString (TagOpen "br" _) = "\n"
-tagToString _                = ""
+tagToText :: Tag Text -> Text
+tagToText (TagText s)      = s
+tagToText (TagOpen "br" _) = "\n"
+tagToText _                = ""
 
 inline :: PandocMonad m => TagParser m Inlines
 inline = choice
@@ -667,7 +665,7 @@ pLocation = do
 pSat :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text)
 pSat f = do
   pos <- getPosition
-  token show (const pos) (\x -> if f x then Just x else Nothing)
+  token tshow (const pos) (\x -> if f x then Just x else Nothing)
 
 pSatisfy :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text)
 pSatisfy f = try $ optional pLocation >> pSat f
@@ -688,10 +686,10 @@ pQ = choice $ map try [citedQuote, normalQuote]
   where citedQuote = do
           tag <- pSatisfy $ tagOpenLit "q" (any ((=="cite") . fst))
 
-          url <- canonicalizeUrl $ T.unpack $ fromAttrib "cite" tag
-          let uid = fromMaybe (T.unpack $ fromAttrib "name" tag) $
+          url <- canonicalizeUrl $ fromAttrib "cite" tag
+          let uid = fromMaybe (fromAttrib "name" tag) $
                        maybeFromAttrib "id" tag
-          let cls = words $ T.unpack $ fromAttrib "class" tag
+          let cls = T.words $ fromAttrib "class" tag
 
           makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url)])
         normalQuote = do
@@ -729,7 +727,7 @@ pSpanLike =
       TagOpen _ attrs <- pSatisfy $ tagOpenLit tagName (const True)
       let (ids, cs, kvs) = mkAttr . toStringAttr $ attrs
       content <- mconcat <$> manyTill inline (pCloses tagName <|> eof)
-      return $ B.spanWith (ids, T.unpack tagName : cs, kvs) content
+      return $ B.spanWith (ids, tagName : cs, kvs) content
 
 pSmall :: PandocMonad m => TagParser m Inlines
 pSmall = pInlinesInTags "small" (B.spanWith ("",["small"],[]))
@@ -753,19 +751,18 @@ pLineBreak = do
 
 -- Unlike fromAttrib from tagsoup, this distinguishes
 -- between a missing attribute and an attribute with empty content.
-maybeFromAttrib :: String -> Tag Text -> Maybe String
-maybeFromAttrib name (TagOpen _ attrs) =
-  T.unpack <$> lookup (T.pack name) attrs
+maybeFromAttrib :: Text -> Tag Text -> Maybe Text
+maybeFromAttrib name (TagOpen _ attrs) = lookup name attrs
 maybeFromAttrib _ _ = Nothing
 
 pLink :: PandocMonad m => TagParser m Inlines
 pLink = try $ do
   tag <- pSatisfy $ tagOpenLit "a" (const True)
-  let title = T.unpack $ fromAttrib "title" tag
+  let title = fromAttrib "title" tag
   -- take id from id attribute if present, otherwise name
-  let uid = fromMaybe (T.unpack $ fromAttrib "name" tag) $
+  let uid = fromMaybe (fromAttrib "name" tag) $
                maybeFromAttrib "id" tag
-  let cls = words $ T.unpack $ fromAttrib "class" tag
+  let cls = T.words $ fromAttrib "class" tag
   lab <- mconcat <$> manyTill inline (pCloses "a")
   -- check for href; if href, then a link, otherwise a span
   case maybeFromAttrib "href" tag of
@@ -778,34 +775,33 @@ pLink = try $ do
 pImage :: PandocMonad m => TagParser m Inlines
 pImage = do
   tag <- pSelfClosing (=="img") (isJust . lookup "src")
-  url <- canonicalizeUrl $ T.unpack $ fromAttrib "src" tag
-  let title = T.unpack $ fromAttrib "title" tag
-  let alt = T.unpack $ fromAttrib "alt" tag
-  let uid = T.unpack $ fromAttrib "id" tag
-  let cls = words $ T.unpack $ fromAttrib "class" tag
+  url <- canonicalizeUrl $ fromAttrib "src" tag
+  let title = fromAttrib "title" tag
+  let alt = fromAttrib "alt" tag
+  let uid = fromAttrib "id" tag
+  let cls = T.words $ fromAttrib "class" tag
   let getAtt k = case fromAttrib k tag of
                    "" -> []
-                   v  -> [(T.unpack k, T.unpack v)]
+                   v  -> [(k, v)]
   let kvs = concatMap getAtt ["width", "height", "sizes", "srcset"]
   return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt)
 
-pCodeWithClass :: PandocMonad m => [(T.Text,String)] -> TagParser m Inlines 
-pCodeWithClass elemToClass = try $ do 
+pCodeWithClass :: PandocMonad m => [(T.Text,Text)] -> TagParser m Inlines
+pCodeWithClass elemToClass = try $ do
   let tagTest = flip elem . fmap fst $ elemToClass
   TagOpen open attr' <- pSatisfy $ tagOpen tagTest (const True)
   result <- manyTill pAny (pCloses open)
   let (ids,cs,kvs) = mkAttr . toStringAttr $ attr'
       cs'          = maybe cs (:cs) . lookup open $ elemToClass
   return . B.codeWith (ids,cs',kvs) .
-    unwords . lines . T.unpack . innerText $ result
+    T.unwords . T.lines . innerText $ result
 
 pCode :: PandocMonad m => TagParser m Inlines
 pCode = try $ do
   (TagOpen open attr') <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True)
   let attr = toStringAttr attr'
   result <- manyTill pAny (pCloses open)
-  return $ B.codeWith (mkAttr attr) $ unwords $ lines $ T.unpack $
-           innerText result
+  return $ B.codeWith (mkAttr attr) $ T.unwords $ T.lines $ innerText result
 
 pSpan :: PandocMonad m => TagParser m Inlines
 pSpan = try $ do
@@ -817,7 +813,7 @@ pSpan = try $ do
                     where styleAttr   = fromMaybe "" $ lookup "style" attr
                           fontVariant = fromMaybe "" $ pickStyleAttrProps ["font-variant"] styleAttr
                           classes     = fromMaybe [] $
-                                          words <$> lookup "class" attr
+                                          T.words <$> lookup "class" attr
   let tag = if isSmallCaps then B.smallcaps else B.spanWith (mkAttr attr)
   return $ tag contents
 
@@ -829,18 +825,17 @@ pRawHtmlInline = do
                    then pSatisfy (not . isBlockTag)
                    else pSatisfy isInlineTag
   exts <- getOption readerExtensions
-  let raw = T.unpack $ renderTags' [result]
+  let raw = renderTags' [result]
   if extensionEnabled Ext_raw_html exts
      then return $ B.rawInline "html" raw
      else ignore raw
 
-mathMLToTeXMath :: String -> Either String String
+mathMLToTeXMath :: Text -> Either Text Text
 mathMLToTeXMath s = writeTeX <$> readMathML s
 
-toStringAttr :: [(Text, Text)] -> [(String, String)]
+toStringAttr :: [(Text, Text)] -> [(Text, Text)]
 toStringAttr = map go
-  where go (x,y) = (T.unpack (fromMaybe x $ T.stripPrefix "data-" x),
-                    T.unpack y)
+  where go (x,y) = (fromMaybe x $ T.stripPrefix "data-" x, y)
 
 pScriptMath :: PandocMonad m => TagParser m Inlines
 pScriptMath = try $ do
@@ -849,8 +844,7 @@ pScriptMath = try $ do
                     Just x | "math/tex" `T.isPrefixOf` x
                       -> return $ "display" `T.isSuffixOf` x
                     _ -> mzero
-  contents <- T.unpack . innerText <$>
-                manyTill pAny (pSatisfy (matchTagClose "script"))
+  contents <- innerText <$> manyTill pAny (pSatisfy (matchTagClose "script"))
   return $ (if isdisplay then B.displayMath else B.math) contents
 
 pMath :: PandocMonad m => Bool -> TagParser m Inlines
@@ -862,11 +856,11 @@ pMath inCase = try $ do
   unless inCase $
     guard (maybe True (== mathMLNamespace) (lookup "xmlns" attr))
   contents <- manyTill pAny (pSatisfy (matchTagClose "math"))
-  case mathMLToTeXMath (T.unpack $ renderTags $
+  case mathMLToTeXMath (renderTags $
           [open] <> contents <> [TagClose "math"]) of
        Left _   -> return $ B.spanWith ("",["math"],attr) $ B.text $
-                             T.unpack $ innerText contents
-       Right [] -> return mempty
+                             innerText contents
+       Right "" -> return mempty
        Right x  -> return $ case lookup "display" attr of
                                  Just "block" -> B.displayMath x
                                  _            -> B.math x
@@ -925,7 +919,7 @@ pTagText = try $ do
   parsed <- lift $ lift $
             flip runReaderT qu $ runParserT (many pTagContents) st "text" str
   case parsed of
-       Left _        -> throwError $ PandocParseError $ "Could not parse `" <> T.unpack str <> "'"
+       Left _        -> throwError $ PandocParseError $ "Could not parse `" <> str <> "'"
        Right result  -> return $ mconcat result
 
 pBlank :: PandocMonad m => TagParser m ()
@@ -954,11 +948,11 @@ pRawTeX = do
   guardEnabled Ext_raw_tex
   inp <- getInput
   st <- getState
-  res <- lift $ runParserT (withRaw rawLaTeXInline) st "chunk" (T.unpack inp)
+  res <- lift $ runParserT (withRaw rawLaTeXInline) st "chunk" inp
   case res of
        Left _                -> mzero
        Right (contents, raw) -> do
-         _ <- count (length raw) anyChar
+         _ <- count (T.length raw) anyChar
          return $ B.rawInline "tex" contents
 
 pStr :: PandocMonad m => InlinesParser m Inlines
@@ -966,7 +960,7 @@ pStr = do
   result <- many1 $ satisfy $ \c ->
                      not (isSpace c) && not (isSpecial c) && not (isBad c)
   updateLastStrPos
-  return $ B.str result
+  return $ B.str $ T.pack result
 
 isSpecial :: Char -> Bool
 isSpecial '"'     = True
@@ -982,7 +976,7 @@ isSpecial '\8221' = True
 isSpecial _       = False
 
 pSymbol :: PandocMonad m => InlinesParser m Inlines
-pSymbol = satisfy isSpecial >>= return . B.str . (:[])
+pSymbol = satisfy isSpecial >>= return . B.str . T.singleton
 
 isBad :: Char -> Bool
 isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML
@@ -1019,7 +1013,7 @@ pBad = do
                 '\158' -> '\382'
                 '\159' -> '\376'
                 _      -> '?'
-  return $ B.str [c']
+  return $ B.str $ T.singleton c'
 
 pSpace :: PandocMonad m => InlinesParser m Inlines
 pSpace = many1 (satisfy isSpace) >>= \xs ->
@@ -1156,8 +1150,8 @@ _ `closes` _ = False
 
 -- | Matches a stretch of HTML in balanced tags.
 htmlInBalanced :: Monad m
-               => (Tag String -> Bool)
-               -> ParserT String st m String
+               => (Tag Text -> Bool)
+               -> ParserT Text st m Text
 htmlInBalanced f = try $ do
   lookAhead (char '<')
   inp <- getInput
@@ -1174,21 +1168,21 @@ htmlInBalanced f = try $ do
                         (TagClose _ : TagPosition er ec : _) -> do
                           let ls = er - sr
                           let cs = ec - sc
-                          lscontents <- unlines <$> count ls anyLine
+                          lscontents <- T.unlines <$> count ls anyLine
                           cscontents <- count cs anyChar
                           closetag <- do
                             x <- many (satisfy (/='>'))
                             char '>'
                             return (x <> ">")
-                          return (lscontents <> cscontents <> closetag)
+                          return $ lscontents <> T.pack cscontents <> T.pack closetag
                         _ -> mzero
     _ -> mzero
 
-htmlInBalanced' :: String
-                -> [Tag String]
-                -> [Tag String]
+htmlInBalanced' :: Text
+                -> [Tag Text]
+                -> [Tag Text]
 htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts
-  where go :: Int -> [Tag String] -> Maybe [Tag String]
+  where go :: Int -> [Tag Text] -> Maybe [Tag Text]
         go n (t@(TagOpen tn' _):rest) | tn' == tagname =
               (t :) <$> go (n + 1) rest
         go 1 (t@(TagClose tn'):_) | tn' == tagname =
@@ -1204,8 +1198,8 @@ hasTagWarning _                = False
 
 -- | Matches a tag meeting a certain condition.
 htmlTag :: (HasReaderOptions st, Monad m)
-        => (Tag String -> Bool)
-        -> ParserT [Char] st m (Tag String, String)
+        => (Tag Text -> Bool)
+        -> ParserT Text st m (Tag Text, Text)
 htmlTag f = try $ do
   lookAhead (char '<')
   startpos <- getPosition
@@ -1213,7 +1207,7 @@ htmlTag f = try $ do
   let ts = canonicalizeTags $ parseTagsOptions
                                parseOptions{ optTagWarning = False
                                            , optTagPosition = True }
-                               (inp ++ " ") -- add space to ensure that
+                               (inp <> " ") -- add space to ensure that
                                -- we get a TagPosition after the tag
   (next, ln, col) <- case ts of
                       (TagPosition{} : next : TagPosition ln col : _)
@@ -1225,13 +1219,12 @@ htmlTag f = try $ do
   -- so we exclude . even though it's a valid character
   -- in XML element names
   let isNameChar c = isAlphaNum c || c == ':' || c == '-' || c == '_'
-  let isName s = case s of
-                      []      -> False
-                      (c:cs)  -> isLetter c && all isNameChar cs
-  let isPI s = case s of
-                      ('?':_) -> True -- processing instruction
-                      _       -> False
-
+  let isName s = case T.uncons s of
+                   Nothing      -> False
+                   Just (c, cs) -> isLetter c && T.all isNameChar cs
+  let isPI s = case T.uncons s of
+                 Just ('?', _) -> True -- processing instruction
+                 _             -> False
   let endpos = if ln == 1
                   then setSourceColumn startpos
                          (sourceColumn startpos + (col - 1))
@@ -1247,18 +1240,18 @@ htmlTag f = try $ do
        -- basic sanity check, since the parser is very forgiving
        -- and finds tags in stuff like x<y)
        guard $ isName tagname || isPI tagname
-       guard $ not $ null tagname
+       guard $ not $ T.null tagname
        -- <https://example.org> should NOT be a tag either.
        -- tagsoup will parse it as TagOpen "https:" [("example.org","")]
-       guard $ last tagname /= ':'
+       guard $ T.last tagname /= ':'
        char '<'
        rendered <- manyTill anyChar endAngle
-       return (next, "<" ++ rendered ++ ">")
+       return (next, T.pack $ "<" ++ rendered ++ ">")
   case next of
        TagComment s
-         | "<!--" `isPrefixOf` inp -> do
+         | "<!--" `T.isPrefixOf` inp -> do
           string "<!--"
-          count (length s) anyChar
+          count (T.length s) anyChar
           string "-->"
           stripComments <- getOption readerStripComments
           if stripComments
@@ -1272,12 +1265,12 @@ htmlTag f = try $ do
          handleTag tagname
        _ -> mzero
 
-mkAttr :: [(String, String)] -> Attr
+mkAttr :: [(Text, Text)] -> Attr
 mkAttr attr = (attribsId, attribsClasses, attribsKV)
   where attribsId = fromMaybe "" $ lookup "id" attr
-        attribsClasses = words (fromMaybe "" $ lookup "class" attr) <> epubTypes
+        attribsClasses = T.words (fromMaybe "" $ lookup "class" attr) <> epubTypes
         attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
-        epubTypes = words $ fromMaybe "" $ lookup "epub:type" attr
+        epubTypes = T.words $ fromMaybe "" $ lookup "epub:type" attr
 
 -- Strip namespace prefixes
 stripPrefixes :: [Tag Text] -> [Tag Text]
@@ -1304,11 +1297,11 @@ isSpace _    = False
 -- Utilities
 
 -- | Adjusts a url according to the document's base URL.
-canonicalizeUrl :: PandocMonad m => String -> TagParser m String
+canonicalizeUrl :: PandocMonad m => Text -> TagParser m Text
 canonicalizeUrl url = do
   mbBaseHref <- baseHref <$> getState
-  return $ case (parseURIReference url, mbBaseHref) of
-                (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs)
+  return $ case (parseURIReference (T.unpack url), mbBaseHref) of
+                (Just rel, Just bs) -> tshow (rel `nonStrictRelativeTo` bs)
                 _                   -> url