From eff82cfe4de44a111250ce9ce3ecee2fd4d99924 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 18 Oct 2022 12:42:12 -0700 Subject: Markdown reader: avoid duplicate ids with auto_identifiers. We previously avoided generating a duplicate with another automatically generated identifier; now we also avoid duplicates with explicit identifiers that occur before the header for which an identifier is being generated. (Collisions are still possible for identifiers that occur after the header.) T.P.Shared: `makeSections` is also modified so it doesn't give bad results when the enclosing Div has a different identifier from the header, as may now happen. --- src/Text/Pandoc/Readers/Markdown.hs | 33 +++++++++++++++++++++------------ src/Text/Pandoc/Shared.hs | 26 +++++++++++++++++--------- test/command/6384.md | 8 ++++---- test/command/section-divs.md | 6 +++--- 4 files changed, 45 insertions(+), 28 deletions(-) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 773119fa3..b31f4792b 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -368,7 +368,7 @@ referenceKey = try $ do addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes >> many (try $ spnl >> keyValAttr) blanklines - let attr' = extractIdClass $ foldl' (\x f -> f x) attr addKvs + let attr'@(ident,_,_) = extractIdClass $ foldl' (\x f -> f x) attr addKvs target = (escapeURI $ trimr src, tit) st <- getState let oldkeys = stateKeys st @@ -380,6 +380,7 @@ referenceKey = try $ do -- or section. See #3701. logMessage $ DuplicateLinkReference raw pos _ -> return () + registerIdentifier ident updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys } return $ return mempty @@ -702,10 +703,11 @@ codeBlockFenced = try $ do (try $ do blockDelimiter (== c) (Just size) blanklines) - return $ return $ - case rawattr of - Left syn -> B.rawBlock syn contents - Right attr -> B.codeBlockWith attr contents + case rawattr of + Left syn -> return $ return $ B.rawBlock syn contents + Right attr@(ident,_,_) -> do + registerIdentifier ident + return $ return $ B.codeBlockWith attr contents -- correctly handle github language identifiers toLanguageId :: Text -> Text @@ -1605,10 +1607,11 @@ code = try $ do <|> (Right <$> option ("",[],[]) (guardEnabled Ext_inline_code_attributes >> try attributes)) - return $ return $ - case rawattr of - Left syn -> B.rawInline syn $! result - Right attr -> B.codeWith attr $! result + case rawattr of + Left syn -> return $ return $ B.rawInline syn $! result + Right attr@(ident,_,_) -> do + registerIdentifier ident + return $ return $ B.codeWith attr $! result math :: PandocMonad m => MarkdownParser m (F Inlines) math = (return . B.displayMath <$> (mathDisplay >>= applyMacros)) @@ -1824,7 +1827,8 @@ bracketedSpan = do guardEnabled Ext_bracketed_spans try $ do (lab,_) <- reference - attr <- attributes + attr@(ident,_,_) <- attributes + registerIdentifier ident return $ wrapSpan attr <$> lab -- | Given an @Attr@ value, this returns a function to wrap the contents @@ -1863,8 +1867,9 @@ regLink constructor lab = try $ do rebase <- option False (True <$ guardEnabled Ext_rebase_relative_paths) pos <- getPosition let src' = if rebase then rebasePath pos src else src - attr <- option nullAttr $ + attr@(ident,_,_) <- option nullAttr $ guardEnabled Ext_link_attributes >> attributes + registerIdentifier ident return $ constructor attr src' tit <$> lab -- a link like [this][ref] or [this][] or [this] @@ -2034,6 +2039,7 @@ spanHtml = do (TagOpen _ attrs, _) <- htmlTag (~== TagOpen ("span" :: Text) []) contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose ("span" :: Text))) let ident = fromMaybe "" $ lookup "id" attrs + registerIdentifier ident let classes = maybe [] T.words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] return $ wrapSpan (ident, classes, keyvals) <$> contents @@ -2055,6 +2061,7 @@ divHtml = do then do updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock } let ident = fromMaybe "" $ lookup "id" attrs + registerIdentifier ident let classes = maybe [] T.words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] return $ B.divWith (ident, classes, keyvals) <$> contents @@ -2068,7 +2075,9 @@ divFenced = do string ":::" skipMany (char ':') skipMany spaceChar - attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1Char nonspaceChar) + attribs@(ident,_,_) <- attributes + <|> ((\x -> ("",[x],[])) <$> many1Char nonspaceChar) + registerIdentifier ident skipMany spaceChar skipMany (char ':') blankline diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 2b93b18f4..a7408b151 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -562,7 +562,7 @@ makeSections numbering mbBaseLevel bs = let attr = ("",classes,kvs') return $ Div divattr (Header level' attr title' : sectionContents') : rest' - go (Div divattr@(dident,dclasses,_) (Header level hattr title':ys) : xs) + go (Div divattr@(dident,dclasses,dkvs) (Header level hattr title':ys) : xs) | all (\case Header level' _ _ -> level' > level _ -> True) ys @@ -573,9 +573,15 @@ makeSections numbering mbBaseLevel bs = rest <- go xs return $ case inner of - [Div divattr'@(dident',_,_) zs] - | T.null dident || T.null dident' || dident == dident' - -> Div (combineAttr divattr' divattr) zs : rest + [Div (dident',dclasses'@("section":_),dkvs') + (Header lev (_,hcs,hkvs) ils : zs)] + -> Div (if T.null dident + then dident' + else dident, combineClasses dclasses' dclasses, + combineKvs dkvs' dkvs) + (Header lev (if T.null dident + then "" -- dident' promoted to Div + else dident', hcs, hkvs) ils : zs) : rest _ -> Div divattr inner : rest go (Div attr xs : rest) = do xs' <- go xs @@ -585,13 +591,15 @@ makeSections numbering mbBaseLevel bs = go (x:xs) = (x :) <$> go xs go [] = return [] - combineAttr :: Attr -> Attr -> Attr - combineAttr (id1, classes1, kvs1) (id2, classes2, kvs2) = - (if T.null id1 then id2 else id1, - ordNub (classes1 ++ classes2), + combineClasses :: [T.Text] -> [T.Text] -> [T.Text] + combineClasses classes1 classes2 = + classes1 ++ [cl | cl <- classes2, cl `notElem` classes1] + + combineKvs :: [(T.Text, T.Text)] -> [(T.Text, T.Text)] -> [(T.Text, T.Text)] + combineKvs kvs1 kvs2 = foldr (\(k,v) kvs -> case lookup k kvs of Nothing -> (k,v):kvs - Just _ -> kvs) mempty (kvs1 ++ kvs2)) + Just _ -> kvs) mempty (kvs1 ++ kvs2) headerLtEq :: Int -> Block -> Bool headerLtEq level (Header l _ _) = l <= level diff --git a/test/command/6384.md b/test/command/6384.md index 1be1c3e45..5873f433a 100644 --- a/test/command/6384.md +++ b/test/command/6384.md @@ -1,16 +1,16 @@ ``` % pandoc --wrap=preserve --file-scope command/file1.txt command/file2.txt ^D -
+

Zed

foo and Zed and other Zed and other file and foreign Zed

-
-
+ +

Zed

foo

-
+ ``` diff --git a/test/command/section-divs.md b/test/command/section-divs.md index 5ee28bba8..d34ed3bd1 100644 --- a/test/command/section-divs.md +++ b/test/command/section-divs.md @@ -14,12 +14,12 @@ Ok ::: ^D
-

Hi

+

Hi

-

there

+

there

-

Ok

+

Ok

``` -- cgit v1.2.3