aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2022-09-18 19:17:48 -0700
committerJohn MacFarlane <[email protected]>2022-09-18 19:17:48 -0700
commitc0ff59c272109d863beefcef0f796f758eaa8c84 (patch)
treec98bf3b92753cdf3299b1d7c251ef212d6e12037 /src/Text
parent311a3406878d4f02ffe4c087ece82f9d4156ff92 (diff)
BibTeX parser: fix handling of `%` in url field.
`%` does not function as a comment character inside `url` (where URL-encoding is common). Commit 6fb2973a582116cd515c6f7e68794cca22955511 mistakenly took this reassignment of `%` to be a general feature of braced (but not quoted) BibTeX fields. This commit restores the correct behavior of `%` in braced fields other than `url`, and corrects the behavior of `%` in `url` when the value is quoted. Closes #7678 (again).
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Citeproc/BibTeX.hs27
1 files changed, 23 insertions, 4 deletions
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index 5db99d66b..548190439 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -846,10 +846,19 @@ inBraces :: BibParser Text
inBraces = do
char '{'
res <- manyTill
+ ( take1WhileP (\c -> c /= '{' && c /= '}' && c /= '\\' && c /= '%')
+ <|> (char '\\' >> T.cons '\\' . T.singleton <$> anyChar)
+ <|> ("" <$ (char '%' >> anyLine))
+ <|> (braced <$> inBraces)
+ ) (char '}')
+ return $ T.concat res
+
+inBracesURL :: BibParser Text
+inBracesURL = do
+ char '{'
+ res <- manyTill
( take1WhileP (\c -> c /= '{' && c /= '}' && c /= '\\')
- <|> (char '\\' >> (do c <- oneOf "{}"
- return $ T.pack ['\\',c])
- <|> return "\\")
+ <|> (char '\\' >> T.cons '\\' . T.singleton <$> anyChar)
<|> (braced <$> inBraces)
) (char '}')
return $ T.concat res
@@ -867,6 +876,14 @@ inQuotes = do
<|> braced <$> inBraces
) (char '"')
+inQuotesURL :: BibParser Text
+inQuotesURL = do
+ char '"'
+ T.concat <$> manyTill
+ ( take1WhileP (\c -> c /= '{' && c /= '"' && c /= '\\')
+ <|> (char '\\' >> T.cons '\\' . T.singleton <$> anyChar)
+ ) (char '"')
+
fieldName :: BibParser Text
fieldName = resolveAlias . T.toLower
<$> take1WhileP (\c ->
@@ -902,7 +919,9 @@ entField = do
spaces'
char '='
spaces'
- vs <- (expandString <|> inQuotes <|> inBraces <|> rawWord) `sepBy`
+ let inQ = if k == "url" then inQuotesURL else inQuotes
+ let inB = if k == "url" then inBracesURL else inBraces
+ vs <- (expandString <|> inQ <|> inB <|> rawWord) `sepBy`
try (spaces' >> char '#' >> spaces')
spaces'
return (k, T.concat vs)