aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2022-10-18 12:42:12 -0700
committerJohn MacFarlane <[email protected]>2022-10-18 12:42:12 -0700
commiteff82cfe4de44a111250ce9ce3ecee2fd4d99924 (patch)
tree086bec92e1f2acdd6c61296418787adba75cf05b
parent20492d523c8324e36781cfbbc8092c796f94b151 (diff)
Markdown reader: avoid duplicate ids with auto_identifiers.
We previously avoided generating a duplicate with another automatically generated identifier; now we also avoid duplicates with explicit identifiers that occur before the header for which an identifier is being generated. (Collisions are still possible for identifiers that occur after the header.) T.P.Shared: `makeSections` is also modified so it doesn't give bad results when the enclosing Div has a different identifier from the header, as may now happen.
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs33
-rw-r--r--src/Text/Pandoc/Shared.hs26
-rw-r--r--test/command/6384.md8
-rw-r--r--test/command/section-divs.md6
4 files changed, 45 insertions, 28 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 773119fa3..b31f4792b 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -368,7 +368,7 @@ referenceKey = try $ do
addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes
>> many (try $ spnl >> keyValAttr)
blanklines
- let attr' = extractIdClass $ foldl' (\x f -> f x) attr addKvs
+ let attr'@(ident,_,_) = extractIdClass $ foldl' (\x f -> f x) attr addKvs
target = (escapeURI $ trimr src, tit)
st <- getState
let oldkeys = stateKeys st
@@ -380,6 +380,7 @@ referenceKey = try $ do
-- or section. See #3701.
logMessage $ DuplicateLinkReference raw pos
_ -> return ()
+ registerIdentifier ident
updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys }
return $ return mempty
@@ -702,10 +703,11 @@ codeBlockFenced = try $ do
(try $ do
blockDelimiter (== c) (Just size)
blanklines)
- return $ return $
- case rawattr of
- Left syn -> B.rawBlock syn contents
- Right attr -> B.codeBlockWith attr contents
+ case rawattr of
+ Left syn -> return $ return $ B.rawBlock syn contents
+ Right attr@(ident,_,_) -> do
+ registerIdentifier ident
+ return $ return $ B.codeBlockWith attr contents
-- correctly handle github language identifiers
toLanguageId :: Text -> Text
@@ -1605,10 +1607,11 @@ code = try $ do
<|>
(Right <$> option ("",[],[])
(guardEnabled Ext_inline_code_attributes >> try attributes))
- return $ return $
- case rawattr of
- Left syn -> B.rawInline syn $! result
- Right attr -> B.codeWith attr $! result
+ case rawattr of
+ Left syn -> return $ return $ B.rawInline syn $! result
+ Right attr@(ident,_,_) -> do
+ registerIdentifier ident
+ return $ return $ B.codeWith attr $! result
math :: PandocMonad m => MarkdownParser m (F Inlines)
math = (return . B.displayMath <$> (mathDisplay >>= applyMacros))
@@ -1824,7 +1827,8 @@ bracketedSpan = do
guardEnabled Ext_bracketed_spans
try $ do
(lab,_) <- reference
- attr <- attributes
+ attr@(ident,_,_) <- attributes
+ registerIdentifier ident
return $ wrapSpan attr <$> lab
-- | Given an @Attr@ value, this returns a function to wrap the contents
@@ -1863,8 +1867,9 @@ regLink constructor lab = try $ do
rebase <- option False (True <$ guardEnabled Ext_rebase_relative_paths)
pos <- getPosition
let src' = if rebase then rebasePath pos src else src
- attr <- option nullAttr $
+ attr@(ident,_,_) <- option nullAttr $
guardEnabled Ext_link_attributes >> attributes
+ registerIdentifier ident
return $ constructor attr src' tit <$> lab
-- a link like [this][ref] or [this][] or [this]
@@ -2034,6 +2039,7 @@ spanHtml = do
(TagOpen _ attrs, _) <- htmlTag (~== TagOpen ("span" :: Text) [])
contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose ("span" :: Text)))
let ident = fromMaybe "" $ lookup "id" attrs
+ registerIdentifier ident
let classes = maybe [] T.words $ lookup "class" attrs
let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
return $ wrapSpan (ident, classes, keyvals) <$> contents
@@ -2055,6 +2061,7 @@ divHtml = do
then do
updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
let ident = fromMaybe "" $ lookup "id" attrs
+ registerIdentifier ident
let classes = maybe [] T.words $ lookup "class" attrs
let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
return $ B.divWith (ident, classes, keyvals) <$> contents
@@ -2068,7 +2075,9 @@ divFenced = do
string ":::"
skipMany (char ':')
skipMany spaceChar
- attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1Char nonspaceChar)
+ attribs@(ident,_,_) <- attributes
+ <|> ((\x -> ("",[x],[])) <$> many1Char nonspaceChar)
+ registerIdentifier ident
skipMany spaceChar
skipMany (char ':')
blankline
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 2b93b18f4..a7408b151 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -562,7 +562,7 @@ makeSections numbering mbBaseLevel bs =
let attr = ("",classes,kvs')
return $
Div divattr (Header level' attr title' : sectionContents') : rest'
- go (Div divattr@(dident,dclasses,_) (Header level hattr title':ys) : xs)
+ go (Div divattr@(dident,dclasses,dkvs) (Header level hattr title':ys) : xs)
| all (\case
Header level' _ _ -> level' > level
_ -> True) ys
@@ -573,9 +573,15 @@ makeSections numbering mbBaseLevel bs =
rest <- go xs
return $
case inner of
- [Div divattr'@(dident',_,_) zs]
- | T.null dident || T.null dident' || dident == dident'
- -> Div (combineAttr divattr' divattr) zs : rest
+ [Div (dident',dclasses'@("section":_),dkvs')
+ (Header lev (_,hcs,hkvs) ils : zs)]
+ -> Div (if T.null dident
+ then dident'
+ else dident, combineClasses dclasses' dclasses,
+ combineKvs dkvs' dkvs)
+ (Header lev (if T.null dident
+ then "" -- dident' promoted to Div
+ else dident', hcs, hkvs) ils : zs) : rest
_ -> Div divattr inner : rest
go (Div attr xs : rest) = do
xs' <- go xs
@@ -585,13 +591,15 @@ makeSections numbering mbBaseLevel bs =
go (x:xs) = (x :) <$> go xs
go [] = return []
- combineAttr :: Attr -> Attr -> Attr
- combineAttr (id1, classes1, kvs1) (id2, classes2, kvs2) =
- (if T.null id1 then id2 else id1,
- ordNub (classes1 ++ classes2),
+ combineClasses :: [T.Text] -> [T.Text] -> [T.Text]
+ combineClasses classes1 classes2 =
+ classes1 ++ [cl | cl <- classes2, cl `notElem` classes1]
+
+ combineKvs :: [(T.Text, T.Text)] -> [(T.Text, T.Text)] -> [(T.Text, T.Text)]
+ combineKvs kvs1 kvs2 =
foldr (\(k,v) kvs -> case lookup k kvs of
Nothing -> (k,v):kvs
- Just _ -> kvs) mempty (kvs1 ++ kvs2))
+ Just _ -> kvs) mempty (kvs1 ++ kvs2)
headerLtEq :: Int -> Block -> Bool
headerLtEq level (Header l _ _) = l <= level
diff --git a/test/command/6384.md b/test/command/6384.md
index 1be1c3e45..5873f433a 100644
--- a/test/command/6384.md
+++ b/test/command/6384.md
@@ -1,16 +1,16 @@
```
% pandoc --wrap=preserve --file-scope command/file1.txt command/file2.txt
^D
-<div id="command__file1.txt">
+<section id="command__file1.txt">
<h1 id="command__file1.txt__zed">Zed</h1>
<p><a href="bar">foo</a>
and <a href="#command__file1.txt__zed">Zed</a>
and <a href="#command__file2.txt__zed">other Zed</a>
and <a href="#command__file2.txt">other file</a>
and <a href="c.md#zed">foreign Zed</a></p>
-</div>
-<div id="command__file2.txt">
+</section>
+<section id="command__file2.txt">
<h2 id="command__file2.txt__zed">Zed</h2>
<p><a href="baz">foo</a></p>
-</div>
+</section>
```
diff --git a/test/command/section-divs.md b/test/command/section-divs.md
index 5ee28bba8..d34ed3bd1 100644
--- a/test/command/section-divs.md
+++ b/test/command/section-divs.md
@@ -14,12 +14,12 @@ Ok
:::
^D
<section id="hi" class="level1">
-<h1>Hi</h1>
+<h1 id="hi-1">Hi</h1>
<section id="there" class="level2">
-<h2>there</h2>
+<h2 id="there-1">there</h2>
</section>
</section>
<section id="ok" class="level1">
-<h1>Ok</h1>
+<h1 id="ok-1">Ok</h1>
</section>
```