aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2022-09-18 19:17:48 -0700
committerJohn MacFarlane <[email protected]>2022-09-18 19:17:48 -0700
commitc0ff59c272109d863beefcef0f796f758eaa8c84 (patch)
treec98bf3b92753cdf3299b1d7c251ef212d6e12037
parent311a3406878d4f02ffe4c087ece82f9d4156ff92 (diff)
BibTeX parser: fix handling of `%` in url field.
`%` does not function as a comment character inside `url` (where URL-encoding is common). Commit 6fb2973a582116cd515c6f7e68794cca22955511 mistakenly took this reassignment of `%` to be a general feature of braced (but not quoted) BibTeX fields. This commit restores the correct behavior of `%` in braced fields other than `url`, and corrects the behavior of `%` in `url` when the value is quoted. Closes #7678 (again).
-rw-r--r--src/Text/Pandoc/Citeproc/BibTeX.hs27
-rw-r--r--test/command/7678.md31
2 files changed, 54 insertions, 4 deletions
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index 5db99d66b..548190439 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -846,10 +846,19 @@ inBraces :: BibParser Text
inBraces = do
char '{'
res <- manyTill
+ ( take1WhileP (\c -> c /= '{' && c /= '}' && c /= '\\' && c /= '%')
+ <|> (char '\\' >> T.cons '\\' . T.singleton <$> anyChar)
+ <|> ("" <$ (char '%' >> anyLine))
+ <|> (braced <$> inBraces)
+ ) (char '}')
+ return $ T.concat res
+
+inBracesURL :: BibParser Text
+inBracesURL = do
+ char '{'
+ res <- manyTill
( take1WhileP (\c -> c /= '{' && c /= '}' && c /= '\\')
- <|> (char '\\' >> (do c <- oneOf "{}"
- return $ T.pack ['\\',c])
- <|> return "\\")
+ <|> (char '\\' >> T.cons '\\' . T.singleton <$> anyChar)
<|> (braced <$> inBraces)
) (char '}')
return $ T.concat res
@@ -867,6 +876,14 @@ inQuotes = do
<|> braced <$> inBraces
) (char '"')
+inQuotesURL :: BibParser Text
+inQuotesURL = do
+ char '"'
+ T.concat <$> manyTill
+ ( take1WhileP (\c -> c /= '{' && c /= '"' && c /= '\\')
+ <|> (char '\\' >> T.cons '\\' . T.singleton <$> anyChar)
+ ) (char '"')
+
fieldName :: BibParser Text
fieldName = resolveAlias . T.toLower
<$> take1WhileP (\c ->
@@ -902,7 +919,9 @@ entField = do
spaces'
char '='
spaces'
- vs <- (expandString <|> inQuotes <|> inBraces <|> rawWord) `sepBy`
+ let inQ = if k == "url" then inQuotesURL else inQuotes
+ let inB = if k == "url" then inBracesURL else inBraces
+ vs <- (expandString <|> inQ <|> inB <|> rawWord) `sepBy`
try (spaces' >> char '#' >> spaces')
spaces'
return (k, T.concat vs)
diff --git a/test/command/7678.md b/test/command/7678.md
new file mode 100644
index 000000000..b6a09ef20
--- /dev/null
+++ b/test/command/7678.md
@@ -0,0 +1,31 @@
+```
+% pandoc -f bibtex -t csljson
+@misc{doe,
+ author = "Jane Doe",
+ title = "Work",
+ year = "2021",
+ url = "%20and%20"
+}
+^D
+[
+ {
+ "URL": "%20and%20",
+ "author": [
+ {
+ "family": "Doe",
+ "given": "Jane"
+ }
+ ],
+ "id": "doe",
+ "issued": {
+ "date-parts": [
+ [
+ 2021
+ ]
+ ]
+ },
+ "title": "Work",
+ "type": ""
+ }
+]
+```