diff options
| author | John MacFarlane <[email protected]> | 2022-10-18 12:42:12 -0700 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2022-10-18 12:42:12 -0700 |
| commit | eff82cfe4de44a111250ce9ce3ecee2fd4d99924 (patch) | |
| tree | 086bec92e1f2acdd6c61296418787adba75cf05b | |
| parent | 20492d523c8324e36781cfbbc8092c796f94b151 (diff) | |
Markdown reader: avoid duplicate ids with auto_identifiers.
We previously avoided generating a duplicate with another automatically
generated identifier; now we also avoid duplicates with explicit
identifiers that occur before the header for which an identifier
is being generated. (Collisions are still possible for identifiers
that occur after the header.)
T.P.Shared: `makeSections` is also modified so it doesn't give
bad results when the enclosing Div has a different identifier
from the header, as may now happen.
| -rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 33 | ||||
| -rw-r--r-- | src/Text/Pandoc/Shared.hs | 26 | ||||
| -rw-r--r-- | test/command/6384.md | 8 | ||||
| -rw-r--r-- | test/command/section-divs.md | 6 |
4 files changed, 45 insertions, 28 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 773119fa3..b31f4792b 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -368,7 +368,7 @@ referenceKey = try $ do addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes >> many (try $ spnl >> keyValAttr) blanklines - let attr' = extractIdClass $ foldl' (\x f -> f x) attr addKvs + let attr'@(ident,_,_) = extractIdClass $ foldl' (\x f -> f x) attr addKvs target = (escapeURI $ trimr src, tit) st <- getState let oldkeys = stateKeys st @@ -380,6 +380,7 @@ referenceKey = try $ do -- or section. See #3701. logMessage $ DuplicateLinkReference raw pos _ -> return () + registerIdentifier ident updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys } return $ return mempty @@ -702,10 +703,11 @@ codeBlockFenced = try $ do (try $ do blockDelimiter (== c) (Just size) blanklines) - return $ return $ - case rawattr of - Left syn -> B.rawBlock syn contents - Right attr -> B.codeBlockWith attr contents + case rawattr of + Left syn -> return $ return $ B.rawBlock syn contents + Right attr@(ident,_,_) -> do + registerIdentifier ident + return $ return $ B.codeBlockWith attr contents -- correctly handle github language identifiers toLanguageId :: Text -> Text @@ -1605,10 +1607,11 @@ code = try $ do <|> (Right <$> option ("",[],[]) (guardEnabled Ext_inline_code_attributes >> try attributes)) - return $ return $ - case rawattr of - Left syn -> B.rawInline syn $! result - Right attr -> B.codeWith attr $! result + case rawattr of + Left syn -> return $ return $ B.rawInline syn $! result + Right attr@(ident,_,_) -> do + registerIdentifier ident + return $ return $ B.codeWith attr $! result math :: PandocMonad m => MarkdownParser m (F Inlines) math = (return . B.displayMath <$> (mathDisplay >>= applyMacros)) @@ -1824,7 +1827,8 @@ bracketedSpan = do guardEnabled Ext_bracketed_spans try $ do (lab,_) <- reference - attr <- attributes + attr@(ident,_,_) <- attributes + registerIdentifier ident return $ wrapSpan attr <$> lab -- | Given an @Attr@ value, this returns a function to wrap the contents @@ -1863,8 +1867,9 @@ regLink constructor lab = try $ do rebase <- option False (True <$ guardEnabled Ext_rebase_relative_paths) pos <- getPosition let src' = if rebase then rebasePath pos src else src - attr <- option nullAttr $ + attr@(ident,_,_) <- option nullAttr $ guardEnabled Ext_link_attributes >> attributes + registerIdentifier ident return $ constructor attr src' tit <$> lab -- a link like [this][ref] or [this][] or [this] @@ -2034,6 +2039,7 @@ spanHtml = do (TagOpen _ attrs, _) <- htmlTag (~== TagOpen ("span" :: Text) []) contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose ("span" :: Text))) let ident = fromMaybe "" $ lookup "id" attrs + registerIdentifier ident let classes = maybe [] T.words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] return $ wrapSpan (ident, classes, keyvals) <$> contents @@ -2055,6 +2061,7 @@ divHtml = do then do updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock } let ident = fromMaybe "" $ lookup "id" attrs + registerIdentifier ident let classes = maybe [] T.words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] return $ B.divWith (ident, classes, keyvals) <$> contents @@ -2068,7 +2075,9 @@ divFenced = do string ":::" skipMany (char ':') skipMany spaceChar - attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1Char nonspaceChar) + attribs@(ident,_,_) <- attributes + <|> ((\x -> ("",[x],[])) <$> many1Char nonspaceChar) + registerIdentifier ident skipMany spaceChar skipMany (char ':') blankline diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 2b93b18f4..a7408b151 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -562,7 +562,7 @@ makeSections numbering mbBaseLevel bs = let attr = ("",classes,kvs') return $ Div divattr (Header level' attr title' : sectionContents') : rest' - go (Div divattr@(dident,dclasses,_) (Header level hattr title':ys) : xs) + go (Div divattr@(dident,dclasses,dkvs) (Header level hattr title':ys) : xs) | all (\case Header level' _ _ -> level' > level _ -> True) ys @@ -573,9 +573,15 @@ makeSections numbering mbBaseLevel bs = rest <- go xs return $ case inner of - [Div divattr'@(dident',_,_) zs] - | T.null dident || T.null dident' || dident == dident' - -> Div (combineAttr divattr' divattr) zs : rest + [Div (dident',dclasses'@("section":_),dkvs') + (Header lev (_,hcs,hkvs) ils : zs)] + -> Div (if T.null dident + then dident' + else dident, combineClasses dclasses' dclasses, + combineKvs dkvs' dkvs) + (Header lev (if T.null dident + then "" -- dident' promoted to Div + else dident', hcs, hkvs) ils : zs) : rest _ -> Div divattr inner : rest go (Div attr xs : rest) = do xs' <- go xs @@ -585,13 +591,15 @@ makeSections numbering mbBaseLevel bs = go (x:xs) = (x :) <$> go xs go [] = return [] - combineAttr :: Attr -> Attr -> Attr - combineAttr (id1, classes1, kvs1) (id2, classes2, kvs2) = - (if T.null id1 then id2 else id1, - ordNub (classes1 ++ classes2), + combineClasses :: [T.Text] -> [T.Text] -> [T.Text] + combineClasses classes1 classes2 = + classes1 ++ [cl | cl <- classes2, cl `notElem` classes1] + + combineKvs :: [(T.Text, T.Text)] -> [(T.Text, T.Text)] -> [(T.Text, T.Text)] + combineKvs kvs1 kvs2 = foldr (\(k,v) kvs -> case lookup k kvs of Nothing -> (k,v):kvs - Just _ -> kvs) mempty (kvs1 ++ kvs2)) + Just _ -> kvs) mempty (kvs1 ++ kvs2) headerLtEq :: Int -> Block -> Bool headerLtEq level (Header l _ _) = l <= level diff --git a/test/command/6384.md b/test/command/6384.md index 1be1c3e45..5873f433a 100644 --- a/test/command/6384.md +++ b/test/command/6384.md @@ -1,16 +1,16 @@ ``` % pandoc --wrap=preserve --file-scope command/file1.txt command/file2.txt ^D -<div id="command__file1.txt"> +<section id="command__file1.txt"> <h1 id="command__file1.txt__zed">Zed</h1> <p><a href="bar">foo</a> and <a href="#command__file1.txt__zed">Zed</a> and <a href="#command__file2.txt__zed">other Zed</a> and <a href="#command__file2.txt">other file</a> and <a href="c.md#zed">foreign Zed</a></p> -</div> -<div id="command__file2.txt"> +</section> +<section id="command__file2.txt"> <h2 id="command__file2.txt__zed">Zed</h2> <p><a href="baz">foo</a></p> -</div> +</section> ``` diff --git a/test/command/section-divs.md b/test/command/section-divs.md index 5ee28bba8..d34ed3bd1 100644 --- a/test/command/section-divs.md +++ b/test/command/section-divs.md @@ -14,12 +14,12 @@ Ok ::: ^D <section id="hi" class="level1"> -<h1>Hi</h1> +<h1 id="hi-1">Hi</h1> <section id="there" class="level2"> -<h2>there</h2> +<h2 id="there-1">there</h2> </section> </section> <section id="ok" class="level1"> -<h1>Ok</h1> +<h1 id="ok-1">Ok</h1> </section> ``` |
