diff options
| author | John MacFarlane <[email protected]> | 2013-08-20 22:21:34 -0700 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2013-08-20 22:21:34 -0700 |
| commit | 91a6f998d7a1295be759f918ce12651767fcca66 (patch) | |
| tree | da8d7733a7d8622c0dd9ad7ff781355cbc3bcbeb | |
| parent | b1d08a8aa824dee1ebf8f1db6b780516c22f0c4b (diff) | |
HTML reader: Improved handling of code with --parse-raw.issue221
A code or pre block that contains HTML formatting will be parsed
as raw HTML if `--parse-raw` is used.
| -rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 60 |
1 files changed, 39 insertions, 21 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 7ca554fa3..6d19ab038 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -284,22 +284,31 @@ pPara = do pCodeBlock :: TagParser [Block] pCodeBlock = try $ do - TagOpen _ attr <- pSatisfy (~== TagOpen "pre" []) + opentag@(TagOpen _ attr) <- pSatisfy (~== TagOpen "pre" []) contents <- manyTill pAnyTag (pCloses "pre" <|> eof) - let rawText = concatMap fromTagText $ filter isTagText contents - -- drop leading newline if any - let result' = case rawText of - '\n':xs -> xs - _ -> rawText - -- drop trailing newline if any - let result = case reverse result' of - '\n':_ -> init result' - _ -> result' - let attribsId = fromMaybe "" $ lookup "id" attr - let attribsClasses = words $ fromMaybe "" $ lookup "class" attr - let attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr - let attribs = (attribsId, attribsClasses, attribsKV) - return [CodeBlock attribs result] + let needsRaw (TagOpen x _) = x /= "code" + needsRaw (TagClose x) = x /= "code" + needsRaw _ = False + parseRaw <- getOption readerParseRaw + if any needsRaw contents && parseRaw + then return $ -- code block has tags in it, treat as raw HTML + [RawBlock (Format "html") $ + renderTags' (opentag : contents ++ [TagClose "pre"])] + else do + let rawText = concatMap fromTagText $ filter isTagText contents + -- drop leading newline if any + let result' = case rawText of + '\n':xs -> xs + _ -> rawText + -- drop trailing newline if any + let result = case reverse result' of + '\n':_ -> init result' + _ -> result' + let attribsId = fromMaybe "" $ lookup "id" attr + let attribsClasses = words $ fromMaybe "" $ lookup "class" attr + let attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr + let attribs = (attribsId, attribsClasses, attribsKV) + return [CodeBlock attribs result] inline :: TagParser [Inline] inline = choice @@ -395,13 +404,22 @@ pImage = do pCode :: TagParser [Inline] pCode = try $ do - (TagOpen open attr) <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True) + opentag@(TagOpen open attr) <- pSatisfy $ + tagOpen (`elem` ["code","tt"]) (const True) result <- manyTill pAnyTag (pCloses open) - let ident = fromMaybe "" $ lookup "id" attr - let classes = words $ fromMaybe [] $ lookup "class" attr - let rest = filter (\(x,_) -> x /= "id" && x /= "class") attr - return [Code (ident,classes,rest) - $ intercalate " " $ lines $ innerText result] + let needsRaw (TagOpen _ _) = True + needsRaw (TagClose _) = True + needsRaw _ = False + parseRaw <- getOption readerParseRaw + if any needsRaw result && parseRaw + then return [RawInline (Format "html") $ renderTags' + (opentag : result ++ [TagClose open])] + else do + let ident = fromMaybe "" $ lookup "id" attr + let classes = words $ fromMaybe [] $ lookup "class" attr + let rest = filter (\(x,_) -> x /= "id" && x /= "class") attr + return [Code (ident,classes,rest) + $ intercalate " " $ lines $ innerText result] pRawHtmlInline :: TagParser [Inline] pRawHtmlInline = do |
