diff options
| author | John MacFarlane <[email protected]> | 2022-09-18 19:17:48 -0700 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2022-09-18 19:17:48 -0700 |
| commit | c0ff59c272109d863beefcef0f796f758eaa8c84 (patch) | |
| tree | c98bf3b92753cdf3299b1d7c251ef212d6e12037 /src/Text | |
| parent | 311a3406878d4f02ffe4c087ece82f9d4156ff92 (diff) | |
BibTeX parser: fix handling of `%` in url field.
`%` does not function as a comment character inside `url`
(where URL-encoding is common).
Commit 6fb2973a582116cd515c6f7e68794cca22955511 mistakenly
took this reassignment of `%` to be a general feature of
braced (but not quoted) BibTeX fields.
This commit restores the correct behavior of `%` in braced fields
other than `url`, and corrects the behavior of `%` in `url`
when the value is quoted.
Closes #7678 (again).
Diffstat (limited to 'src/Text')
| -rw-r--r-- | src/Text/Pandoc/Citeproc/BibTeX.hs | 27 |
1 files changed, 23 insertions, 4 deletions
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs index 5db99d66b..548190439 100644 --- a/src/Text/Pandoc/Citeproc/BibTeX.hs +++ b/src/Text/Pandoc/Citeproc/BibTeX.hs @@ -846,10 +846,19 @@ inBraces :: BibParser Text inBraces = do char '{' res <- manyTill + ( take1WhileP (\c -> c /= '{' && c /= '}' && c /= '\\' && c /= '%') + <|> (char '\\' >> T.cons '\\' . T.singleton <$> anyChar) + <|> ("" <$ (char '%' >> anyLine)) + <|> (braced <$> inBraces) + ) (char '}') + return $ T.concat res + +inBracesURL :: BibParser Text +inBracesURL = do + char '{' + res <- manyTill ( take1WhileP (\c -> c /= '{' && c /= '}' && c /= '\\') - <|> (char '\\' >> (do c <- oneOf "{}" - return $ T.pack ['\\',c]) - <|> return "\\") + <|> (char '\\' >> T.cons '\\' . T.singleton <$> anyChar) <|> (braced <$> inBraces) ) (char '}') return $ T.concat res @@ -867,6 +876,14 @@ inQuotes = do <|> braced <$> inBraces ) (char '"') +inQuotesURL :: BibParser Text +inQuotesURL = do + char '"' + T.concat <$> manyTill + ( take1WhileP (\c -> c /= '{' && c /= '"' && c /= '\\') + <|> (char '\\' >> T.cons '\\' . T.singleton <$> anyChar) + ) (char '"') + fieldName :: BibParser Text fieldName = resolveAlias . T.toLower <$> take1WhileP (\c -> @@ -902,7 +919,9 @@ entField = do spaces' char '=' spaces' - vs <- (expandString <|> inQuotes <|> inBraces <|> rawWord) `sepBy` + let inQ = if k == "url" then inQuotesURL else inQuotes + let inB = if k == "url" then inBracesURL else inBraces + vs <- (expandString <|> inQ <|> inB <|> rawWord) `sepBy` try (spaces' >> char '#' >> spaces') spaces' return (k, T.concat vs) |
