aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2024-06-22 21:37:06 -0700
committerJohn MacFarlane <[email protected]>2024-06-22 21:37:06 -0700
commitb07c05a1e55721a8a09ac372d79547ade752a882 (patch)
treef157fbf89696e7bc289b829c13379381d3ff602d /src/Text
parentbee5b1fcd1d5900832b0e7e9bdcc81c8a3f5fab4 (diff)
OpenXML writer: be craftier in adding East Asian font hints.
In some cases we need to break up a long text run including both western and East Asian text, so that the punctuation in the western text doesn't become double-wide. Closes #9817.
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Writers/Docx/OpenXML.hs40
1 files changed, 27 insertions, 13 deletions
diff --git a/src/Text/Pandoc/Writers/Docx/OpenXML.hs b/src/Text/Pandoc/Writers/Docx/OpenXML.hs
index 8d4a53945..f93ffc940 100644
--- a/src/Text/Pandoc/Writers/Docx/OpenXML.hs
+++ b/src/Text/Pandoc/Writers/Docx/OpenXML.hs
@@ -24,7 +24,7 @@ import Control.Monad (when, unless)
import Control.Applicative ((<|>))
import Control.Monad.Except (catchError)
import qualified Data.ByteString.Lazy as BL
-import Data.Char (isLetter)
+import Data.Char (isLetter, isSpace)
import Data.Bifunctor (first)
import Text.Pandoc.Char (isCJK)
import Data.Ord (comparing)
@@ -64,7 +64,7 @@ import Text.Pandoc.Writers.Shared
import Text.TeXMath
import Text.Pandoc.Writers.OOXML
import Text.Pandoc.XML.Light as XML
-import Data.List (sortBy, intercalate)
+import Data.List (sortBy, intercalate, groupBy)
-- from wml.xsd EG_RPrBase
rPrTagOrder :: M.Map Text Int
@@ -612,23 +612,37 @@ formattedString str =
[w] -> formattedString' w
ws -> do
sh <- formattedRun [mknode "w:softHyphen" [] ()]
- intercalate sh <$> mapM formattedString' ws
+ intercalate [sh] <$> mapM formattedString' ws
formattedString' :: PandocMonad m => Text -> WS m [Element]
formattedString' str = do
inDel <- asks envInDel
- let addFontProp
- | T.any isCJK str
- = withTextProp (mknode "w:rFonts" [("w:hint","eastAsia")] ())
- | otherwise = id
- addFontProp $
- formattedRun [ mktnode (if inDel then "w:delText" else "w:t")
- [("xml:space","preserve")] (stripInvalidChars str) ]
-
-formattedRun :: PandocMonad m => [Element] -> WS m [Element]
+ let mkrun s =
+ (if T.any isCJK s
+ then withTextProp (mknode "w:rFonts" [("w:hint","eastAsia")] ())
+ else id) $ formattedRun
+ [ mktnode (if inDel then "w:delText" else "w:t")
+ [("xml:space","preserve")] $ s ]
+ mapM mkrun $ breakIntoChunks $ stripInvalidChars str
+
+-- For motivation see #9817.
+breakIntoChunks :: Text -> [Text]
+breakIntoChunks t
+ | T.null t = []
+ | T.any isCJK t
+ = let cs = T.groupBy (\c d -> (isSpace c && isSpace d) ||
+ not (isSpace c || isSpace d)) t
+ css = groupBy (\x y -> not (T.any isCJK x || T.any isCJK y)
+ || (T.all isSpace x && not (T.any isCJK y))
+ || (T.all isSpace y && not (T.any isCJK x)))
+ cs
+ in map mconcat css
+ | otherwise = [t]
+
+formattedRun :: PandocMonad m => [Element] -> WS m Element
formattedRun els = do
props <- getTextProps
- return [ mknode "w:r" [] $ props ++ els ]
+ return $ mknode "w:r" [] $ props ++ els
-- | Convert an inline element to OpenXML.
inlineToOpenXML :: PandocMonad m => WriterOptions -> Inline -> WS m [Content]