aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2022-09-06 20:54:06 -0700
committerJohn MacFarlane <[email protected]>2022-09-06 21:23:59 -0700
commit1ff6afdec8e0def4445026262ec941c70d6f3d07 (patch)
treeec808bcd485548141e3e21d09cbaa24931497aa6
parent47dcb5720e6f3bb334df5fb58b0fbe32c062a4a4 (diff)
Add prefixes to identifiers with `--file-scope`.multifile
This change only affects the case where `--file-scope` is used and more than one file is specified on the command line. In this case, identifiers will be prefixed with a string derived from the file path, to disambiguate them. For example, an identifier `foo` in `contents/file1.txt` will become `contents__file1.txt__foo`. Links will be adjusted accordingly: if `file2.txt` links to `file1.txt#foo`, then the link will be changed to point to `#file1.txt__foo`. Similarly, a link to `file1.txt` will point to `#file1.txt`. A Div with an identifier derived from the file path will be added around each file's content, so that links to files will still work. Closes #6384. [API change]: Text.Pandoc.Shared exports `textToIdentifier`.
-rw-r--r--MANUAL.txt11
-rw-r--r--pandoc.cabal2
-rw-r--r--src/Text/Pandoc/App.hs68
-rw-r--r--src/Text/Pandoc/Shared.hs13
-rw-r--r--test/command/6384.md16
-rw-r--r--test/command/file1.txt9
-rw-r--r--test/command/file2.txt5
7 files changed, 117 insertions, 7 deletions
diff --git a/MANUAL.txt b/MANUAL.txt
index 4ba409c76..e80c920f2 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -564,6 +564,17 @@ header when requesting a document from a URL:
footnotes and links will not work across files. Reading binary
files (docx, odt, epub) implies `--file-scope`.
+ If two or more files are processed using `--file-scope`,
+ prefixes based on the filenames will be added to identifiers
+ in order to disambiguate them, and internal links will
+ be adjusted accordingly. For example, a header with
+ identifier `foo` in `subdir/file1.txt` will have its
+ identifier changed to `subdir__file1.txt__foo`.
+
+ In addition, a Div with an identifier based on the filename
+ will be added around the file's content, so that internal
+ links to the filename will point to this Div's identifier.
+
`-F` *PROGRAM*, `--filter=`*PROGRAM*
: Specify an executable to be used as a filter transforming the
diff --git a/pandoc.cabal b/pandoc.cabal
index e0becb723..ac4949137 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -216,6 +216,8 @@ extra-source-files:
test/command/B.txt
test/command/C.txt
test/command/D.txt
+ test/command/file1.txt
+ test/command/file2.txt
test/command/three.txt
test/command/01.csv
test/command/chap1/spider.png
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 66c37b0e0..e5e916a14 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -49,6 +49,7 @@ import System.FilePath ( takeBaseName, takeExtension)
import System.IO (nativeNewline, stdout)
import qualified System.IO as IO (Newline (..))
import Text.Pandoc
+import Text.Pandoc.Walk (walk)
import Text.Pandoc.Builder (setMeta)
import Text.Pandoc.MediaBag (mediaItems)
import Text.Pandoc.MIME (getCharset, MimeType)
@@ -66,7 +67,7 @@ import Text.Pandoc.PDF (makePDF)
import Text.Pandoc.SelfContained (makeSelfContained)
import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs,
headerShift, isURI, tabFilter, uriPathToPath, filterIpynbOutput,
- defaultUserDataDir, tshow)
+ defaultUserDataDir, tshow, textToIdentifier)
import Text.Pandoc.Writers.Shared (lookupMetaString)
import Text.Pandoc.Readers.Markdown (yamlToMeta)
import Text.Pandoc.Readers.Custom (readCustom)
@@ -315,6 +316,7 @@ convertWithOpts' istty datadir opts = do
inputs <- readSources sources
+
doc <- (case reader of
TextReader r
| readerNameBase == "json" ->
@@ -323,8 +325,11 @@ convertWithOpts' istty datadir opts = do
>=> r readerOpts . (:[])) inputs
| optFileScope opts ->
mconcat <$> mapM
- (inputToText convertTabs
- >=> r readerOpts . (:[]))
+ (\source -> do
+ (fp, txt) <- inputToText convertTabs source
+ adjustLinksAndIds (readerExtensions readerOpts)
+ (T.pack fp) (map (T.pack . fst) inputs)
+ <$> r readerOpts [(fp, txt)])
inputs
| otherwise -> mapM (inputToText convertTabs) inputs
>>= r readerOpts
@@ -465,3 +470,60 @@ writeFnBinary f = BL.writeFile (UTF8.encodePath f)
writerFn :: IO.Newline -> FilePath -> Text -> IO ()
writerFn eol "-" = UTF8.putStrWith eol
writerFn eol f = UTF8.writeFileWith eol f
+
+adjustLinksAndIds :: Extensions -> Text -> [Text] -> Pandoc -> Pandoc
+adjustLinksAndIds exts thisfile allfiles
+ | length allfiles > 1 = addDiv . walk fixInline . walk fixBlock
+ | otherwise = id
+ where
+ toIdent :: Text -> Text
+ toIdent = textToIdentifier exts . T.intercalate "__" .
+ T.split (\c -> c == '/' || c == '\\')
+
+ addDiv :: Pandoc -> Pandoc
+ addDiv (Pandoc m bs)
+ | T.null thisfile = Pandoc m bs
+ | otherwise = Pandoc m [Div (toIdent thisfile,[],[]) bs]
+
+ fixBlock :: Block -> Block
+ fixBlock (CodeBlock attr t) = CodeBlock (fixAttrs attr) t
+ fixBlock (Header lev attr ils) = Header lev (fixAttrs attr) ils
+ fixBlock (Table attr cap cols th tbs tf) =
+ Table (fixAttrs attr) cap cols th tbs tf
+ fixBlock (Div attr bs) = Div (fixAttrs attr) bs
+ fixBlock x = x
+
+ -- add thisfile as prefix of identifier
+ fixAttrs :: Attr -> Attr
+ fixAttrs (i,cs,kvs)
+ | T.null i = (i,cs,kvs)
+ | otherwise =
+ (T.intercalate "__"
+ (filter (not . T.null) [toIdent thisfile, i]),
+ cs, kvs)
+
+ -- if URL begins with file from allfiles, convert to
+ -- an internal link with the appropriate identifier
+ fixURL :: Text -> Text
+ fixURL u =
+ let (a,b) = T.break (== '#') u
+ filepart = if T.null a
+ then toIdent thisfile
+ else toIdent a
+ fragpart = T.dropWhile (== '#') b
+ in if T.null a || a `elem` allfiles
+ then "#" <> T.intercalate "__"
+ (filter (not . T.null) [filepart, fragpart])
+ else u
+
+ fixInline :: Inline -> Inline
+ fixInline (Code attr t) = Code (fixAttrs attr) t
+ fixInline (Link attr ils (url,tit)) =
+ Link (fixAttrs attr) ils (fixURL url,tit)
+ fixInline (Image attr ils (url,tit)) =
+ Image (fixAttrs attr) ils (fixURL url,tit)
+ fixInline (Span attr ils) = Span (fixAttrs attr) ils
+ fixInline x = x
+
+
+
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 35a854bf6..3afa1c0c9 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -57,6 +57,7 @@ module Text.Pandoc.Shared (
makeSections,
uniqueIdent,
inlineListToIdentifier,
+ textToIdentifier,
isHeaderBlock,
headerShift,
stripEmptyParagraphs,
@@ -497,12 +498,10 @@ isPara :: Block -> Bool
isPara (Para _) = True
isPara _ = False
--- | Convert Pandoc inline list to plain text identifier. HTML
--- identifiers must start with a letter, and may contain only
--- letters, digits, and the characters _-.
+-- | Convert Pandoc inline list to plain text identifier.
inlineListToIdentifier :: Extensions -> [Inline] -> T.Text
inlineListToIdentifier exts =
- dropNonLetter . filterAscii . toIdent . stringify . walk unEmojify
+ textToIdentifier exts . stringify . walk unEmojify
where
unEmojify :: [Inline] -> [Inline]
unEmojify
@@ -511,6 +510,12 @@ inlineListToIdentifier exts =
| otherwise = id
unEmoji (Span ("",["emoji"],[("data-emoji",ename)]) _) = Str ename
unEmoji x = x
+
+-- | Convert string to plain text identifier.
+textToIdentifier :: Extensions -> T.Text -> T.Text
+textToIdentifier exts =
+ dropNonLetter . filterAscii . toIdent
+ where
dropNonLetter
| extensionEnabled Ext_gfm_auto_identifiers exts = id
| otherwise = T.dropWhile (not . isAlpha)
diff --git a/test/command/6384.md b/test/command/6384.md
new file mode 100644
index 000000000..1be1c3e45
--- /dev/null
+++ b/test/command/6384.md
@@ -0,0 +1,16 @@
+```
+% pandoc --wrap=preserve --file-scope command/file1.txt command/file2.txt
+^D
+<div id="command__file1.txt">
+<h1 id="command__file1.txt__zed">Zed</h1>
+<p><a href="bar">foo</a>
+and <a href="#command__file1.txt__zed">Zed</a>
+and <a href="#command__file2.txt__zed">other Zed</a>
+and <a href="#command__file2.txt">other file</a>
+and <a href="c.md#zed">foreign Zed</a></p>
+</div>
+<div id="command__file2.txt">
+<h2 id="command__file2.txt__zed">Zed</h2>
+<p><a href="baz">foo</a></p>
+</div>
+```
diff --git a/test/command/file1.txt b/test/command/file1.txt
new file mode 100644
index 000000000..5416f3a6c
--- /dev/null
+++ b/test/command/file1.txt
@@ -0,0 +1,9 @@
+# Zed
+
+[foo]: bar
+
+[foo]
+and [Zed](#zed)
+and [other Zed](command/file2.txt#zed)
+and [other file](command/file2.txt)
+and [foreign Zed](c.md#zed)
diff --git a/test/command/file2.txt b/test/command/file2.txt
new file mode 100644
index 000000000..20ee06c8c
--- /dev/null
+++ b/test/command/file2.txt
@@ -0,0 +1,5 @@
+## Zed
+
+[foo]: baz
+
+[foo]