diff options
| author | John MacFarlane <[email protected]> | 2024-06-22 21:37:06 -0700 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2024-06-22 21:37:06 -0700 |
| commit | b07c05a1e55721a8a09ac372d79547ade752a882 (patch) | |
| tree | f157fbf89696e7bc289b829c13379381d3ff602d /src/Text | |
| parent | bee5b1fcd1d5900832b0e7e9bdcc81c8a3f5fab4 (diff) | |
OpenXML writer: be craftier in adding East Asian font hints.
In some cases we need to break up a long text run including
both western and East Asian text, so that the punctuation in
the western text doesn't become double-wide.
Closes #9817.
Diffstat (limited to 'src/Text')
| -rw-r--r-- | src/Text/Pandoc/Writers/Docx/OpenXML.hs | 40 |
1 files changed, 27 insertions, 13 deletions
diff --git a/src/Text/Pandoc/Writers/Docx/OpenXML.hs b/src/Text/Pandoc/Writers/Docx/OpenXML.hs index 8d4a53945..f93ffc940 100644 --- a/src/Text/Pandoc/Writers/Docx/OpenXML.hs +++ b/src/Text/Pandoc/Writers/Docx/OpenXML.hs @@ -24,7 +24,7 @@ import Control.Monad (when, unless) import Control.Applicative ((<|>)) import Control.Monad.Except (catchError) import qualified Data.ByteString.Lazy as BL -import Data.Char (isLetter) +import Data.Char (isLetter, isSpace) import Data.Bifunctor (first) import Text.Pandoc.Char (isCJK) import Data.Ord (comparing) @@ -64,7 +64,7 @@ import Text.Pandoc.Writers.Shared import Text.TeXMath import Text.Pandoc.Writers.OOXML import Text.Pandoc.XML.Light as XML -import Data.List (sortBy, intercalate) +import Data.List (sortBy, intercalate, groupBy) -- from wml.xsd EG_RPrBase rPrTagOrder :: M.Map Text Int @@ -612,23 +612,37 @@ formattedString str = [w] -> formattedString' w ws -> do sh <- formattedRun [mknode "w:softHyphen" [] ()] - intercalate sh <$> mapM formattedString' ws + intercalate [sh] <$> mapM formattedString' ws formattedString' :: PandocMonad m => Text -> WS m [Element] formattedString' str = do inDel <- asks envInDel - let addFontProp - | T.any isCJK str - = withTextProp (mknode "w:rFonts" [("w:hint","eastAsia")] ()) - | otherwise = id - addFontProp $ - formattedRun [ mktnode (if inDel then "w:delText" else "w:t") - [("xml:space","preserve")] (stripInvalidChars str) ] - -formattedRun :: PandocMonad m => [Element] -> WS m [Element] + let mkrun s = + (if T.any isCJK s + then withTextProp (mknode "w:rFonts" [("w:hint","eastAsia")] ()) + else id) $ formattedRun + [ mktnode (if inDel then "w:delText" else "w:t") + [("xml:space","preserve")] $ s ] + mapM mkrun $ breakIntoChunks $ stripInvalidChars str + +-- For motivation see #9817. +breakIntoChunks :: Text -> [Text] +breakIntoChunks t + | T.null t = [] + | T.any isCJK t + = let cs = T.groupBy (\c d -> (isSpace c && isSpace d) || + not (isSpace c || isSpace d)) t + css = groupBy (\x y -> not (T.any isCJK x || T.any isCJK y) + || (T.all isSpace x && not (T.any isCJK y)) + || (T.all isSpace y && not (T.any isCJK x))) + cs + in map mconcat css + | otherwise = [t] + +formattedRun :: PandocMonad m => [Element] -> WS m Element formattedRun els = do props <- getTextProps - return [ mknode "w:r" [] $ props ++ els ] + return $ mknode "w:r" [] $ props ++ els -- | Convert an inline element to OpenXML. inlineToOpenXML :: PandocMonad m => WriterOptions -> Inline -> WS m [Content] |
