diff options
| author | John MacFarlane <[email protected]> | 2022-09-06 20:54:06 -0700 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2022-09-06 21:23:59 -0700 |
| commit | 1ff6afdec8e0def4445026262ec941c70d6f3d07 (patch) | |
| tree | ec808bcd485548141e3e21d09cbaa24931497aa6 | |
| parent | 47dcb5720e6f3bb334df5fb58b0fbe32c062a4a4 (diff) | |
Add prefixes to identifiers with `--file-scope`.multifile
This change only affects the case where `--file-scope` is used
and more than one file is specified on the command line.
In this case, identifiers will be prefixed with a string
derived from the file path, to disambiguate them. For example,
an identifier `foo` in `contents/file1.txt` will become
`contents__file1.txt__foo`. Links will be adjusted accordingly:
if `file2.txt` links to `file1.txt#foo`, then the link will
be changed to point to `#file1.txt__foo`. Similarly, a link
to `file1.txt` will point to `#file1.txt`. A Div with an
identifier derived from the file path will be added around
each file's content, so that links to files will still work.
Closes #6384.
[API change]: Text.Pandoc.Shared exports `textToIdentifier`.
| -rw-r--r-- | MANUAL.txt | 11 | ||||
| -rw-r--r-- | pandoc.cabal | 2 | ||||
| -rw-r--r-- | src/Text/Pandoc/App.hs | 68 | ||||
| -rw-r--r-- | src/Text/Pandoc/Shared.hs | 13 | ||||
| -rw-r--r-- | test/command/6384.md | 16 | ||||
| -rw-r--r-- | test/command/file1.txt | 9 | ||||
| -rw-r--r-- | test/command/file2.txt | 5 |
7 files changed, 117 insertions, 7 deletions
diff --git a/MANUAL.txt b/MANUAL.txt index 4ba409c76..e80c920f2 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -564,6 +564,17 @@ header when requesting a document from a URL: footnotes and links will not work across files. Reading binary files (docx, odt, epub) implies `--file-scope`. + If two or more files are processed using `--file-scope`, + prefixes based on the filenames will be added to identifiers + in order to disambiguate them, and internal links will + be adjusted accordingly. For example, a header with + identifier `foo` in `subdir/file1.txt` will have its + identifier changed to `subdir__file1.txt__foo`. + + In addition, a Div with an identifier based on the filename + will be added around the file's content, so that internal + links to the filename will point to this Div's identifier. + `-F` *PROGRAM*, `--filter=`*PROGRAM* : Specify an executable to be used as a filter transforming the diff --git a/pandoc.cabal b/pandoc.cabal index e0becb723..ac4949137 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -216,6 +216,8 @@ extra-source-files: test/command/B.txt test/command/C.txt test/command/D.txt + test/command/file1.txt + test/command/file2.txt test/command/three.txt test/command/01.csv test/command/chap1/spider.png diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 66c37b0e0..e5e916a14 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -49,6 +49,7 @@ import System.FilePath ( takeBaseName, takeExtension) import System.IO (nativeNewline, stdout) import qualified System.IO as IO (Newline (..)) import Text.Pandoc +import Text.Pandoc.Walk (walk) import Text.Pandoc.Builder (setMeta) import Text.Pandoc.MediaBag (mediaItems) import Text.Pandoc.MIME (getCharset, MimeType) @@ -66,7 +67,7 @@ import Text.Pandoc.PDF (makePDF) import Text.Pandoc.SelfContained (makeSelfContained) import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs, headerShift, isURI, tabFilter, uriPathToPath, filterIpynbOutput, - defaultUserDataDir, tshow) + defaultUserDataDir, tshow, textToIdentifier) import Text.Pandoc.Writers.Shared (lookupMetaString) import Text.Pandoc.Readers.Markdown (yamlToMeta) import Text.Pandoc.Readers.Custom (readCustom) @@ -315,6 +316,7 @@ convertWithOpts' istty datadir opts = do inputs <- readSources sources + doc <- (case reader of TextReader r | readerNameBase == "json" -> @@ -323,8 +325,11 @@ convertWithOpts' istty datadir opts = do >=> r readerOpts . (:[])) inputs | optFileScope opts -> mconcat <$> mapM - (inputToText convertTabs - >=> r readerOpts . (:[])) + (\source -> do + (fp, txt) <- inputToText convertTabs source + adjustLinksAndIds (readerExtensions readerOpts) + (T.pack fp) (map (T.pack . fst) inputs) + <$> r readerOpts [(fp, txt)]) inputs | otherwise -> mapM (inputToText convertTabs) inputs >>= r readerOpts @@ -465,3 +470,60 @@ writeFnBinary f = BL.writeFile (UTF8.encodePath f) writerFn :: IO.Newline -> FilePath -> Text -> IO () writerFn eol "-" = UTF8.putStrWith eol writerFn eol f = UTF8.writeFileWith eol f + +adjustLinksAndIds :: Extensions -> Text -> [Text] -> Pandoc -> Pandoc +adjustLinksAndIds exts thisfile allfiles + | length allfiles > 1 = addDiv . walk fixInline . walk fixBlock + | otherwise = id + where + toIdent :: Text -> Text + toIdent = textToIdentifier exts . T.intercalate "__" . + T.split (\c -> c == '/' || c == '\\') + + addDiv :: Pandoc -> Pandoc + addDiv (Pandoc m bs) + | T.null thisfile = Pandoc m bs + | otherwise = Pandoc m [Div (toIdent thisfile,[],[]) bs] + + fixBlock :: Block -> Block + fixBlock (CodeBlock attr t) = CodeBlock (fixAttrs attr) t + fixBlock (Header lev attr ils) = Header lev (fixAttrs attr) ils + fixBlock (Table attr cap cols th tbs tf) = + Table (fixAttrs attr) cap cols th tbs tf + fixBlock (Div attr bs) = Div (fixAttrs attr) bs + fixBlock x = x + + -- add thisfile as prefix of identifier + fixAttrs :: Attr -> Attr + fixAttrs (i,cs,kvs) + | T.null i = (i,cs,kvs) + | otherwise = + (T.intercalate "__" + (filter (not . T.null) [toIdent thisfile, i]), + cs, kvs) + + -- if URL begins with file from allfiles, convert to + -- an internal link with the appropriate identifier + fixURL :: Text -> Text + fixURL u = + let (a,b) = T.break (== '#') u + filepart = if T.null a + then toIdent thisfile + else toIdent a + fragpart = T.dropWhile (== '#') b + in if T.null a || a `elem` allfiles + then "#" <> T.intercalate "__" + (filter (not . T.null) [filepart, fragpart]) + else u + + fixInline :: Inline -> Inline + fixInline (Code attr t) = Code (fixAttrs attr) t + fixInline (Link attr ils (url,tit)) = + Link (fixAttrs attr) ils (fixURL url,tit) + fixInline (Image attr ils (url,tit)) = + Image (fixAttrs attr) ils (fixURL url,tit) + fixInline (Span attr ils) = Span (fixAttrs attr) ils + fixInline x = x + + + diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 35a854bf6..3afa1c0c9 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -57,6 +57,7 @@ module Text.Pandoc.Shared ( makeSections, uniqueIdent, inlineListToIdentifier, + textToIdentifier, isHeaderBlock, headerShift, stripEmptyParagraphs, @@ -497,12 +498,10 @@ isPara :: Block -> Bool isPara (Para _) = True isPara _ = False --- | Convert Pandoc inline list to plain text identifier. HTML --- identifiers must start with a letter, and may contain only --- letters, digits, and the characters _-. +-- | Convert Pandoc inline list to plain text identifier. inlineListToIdentifier :: Extensions -> [Inline] -> T.Text inlineListToIdentifier exts = - dropNonLetter . filterAscii . toIdent . stringify . walk unEmojify + textToIdentifier exts . stringify . walk unEmojify where unEmojify :: [Inline] -> [Inline] unEmojify @@ -511,6 +510,12 @@ inlineListToIdentifier exts = | otherwise = id unEmoji (Span ("",["emoji"],[("data-emoji",ename)]) _) = Str ename unEmoji x = x + +-- | Convert string to plain text identifier. +textToIdentifier :: Extensions -> T.Text -> T.Text +textToIdentifier exts = + dropNonLetter . filterAscii . toIdent + where dropNonLetter | extensionEnabled Ext_gfm_auto_identifiers exts = id | otherwise = T.dropWhile (not . isAlpha) diff --git a/test/command/6384.md b/test/command/6384.md new file mode 100644 index 000000000..1be1c3e45 --- /dev/null +++ b/test/command/6384.md @@ -0,0 +1,16 @@ +``` +% pandoc --wrap=preserve --file-scope command/file1.txt command/file2.txt +^D +<div id="command__file1.txt"> +<h1 id="command__file1.txt__zed">Zed</h1> +<p><a href="bar">foo</a> +and <a href="#command__file1.txt__zed">Zed</a> +and <a href="#command__file2.txt__zed">other Zed</a> +and <a href="#command__file2.txt">other file</a> +and <a href="c.md#zed">foreign Zed</a></p> +</div> +<div id="command__file2.txt"> +<h2 id="command__file2.txt__zed">Zed</h2> +<p><a href="baz">foo</a></p> +</div> +``` diff --git a/test/command/file1.txt b/test/command/file1.txt new file mode 100644 index 000000000..5416f3a6c --- /dev/null +++ b/test/command/file1.txt @@ -0,0 +1,9 @@ +# Zed + +[foo]: bar + +[foo] +and [Zed](#zed) +and [other Zed](command/file2.txt#zed) +and [other file](command/file2.txt) +and [foreign Zed](c.md#zed) diff --git a/test/command/file2.txt b/test/command/file2.txt new file mode 100644 index 000000000..20ee06c8c --- /dev/null +++ b/test/command/file2.txt @@ -0,0 +1,5 @@ +## Zed + +[foo]: baz + +[foo] |
