diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/Docx')
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Combine.hs | 101 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse/Styles.hs | 18 |
3 files changed, 54 insertions, 68 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs index 54736cd0e..427a73dbe 100644 --- a/src/Text/Pandoc/Readers/Docx/Combine.hs +++ b/src/Text/Pandoc/Readers/Docx/Combine.hs @@ -1,10 +1,9 @@ -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE PatternGuards #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.Combine Copyright : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>, - 2014-2020 John MacFarlane <jgm@berkeley.edu> + 2014-2020 John MacFarlane <jgm@berkeley.edu>, + 2020 Nikolay Yakimov <root@livid.pp.ru> License : GNU GPL, version 2 or above Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> @@ -59,79 +58,61 @@ module Text.Pandoc.Readers.Docx.Combine ( smushInlines where import Data.List -import Data.Sequence (ViewL (..), ViewR (..), viewl, viewr, (><), (|>)) -import qualified Data.Sequence as Seq (null) +import Data.Bifunctor +import Data.Sequence ( ViewL (..), ViewR (..), viewl, viewr, spanr, spanl + , (><), (|>) ) import Text.Pandoc.Builder data Modifier a = Modifier (a -> a) | AttrModifier (Attr -> a -> a) Attr - | NullModifier spaceOutInlinesL :: Inlines -> (Inlines, Inlines) spaceOutInlinesL ms = (l, stackInlines fs (m' <> r)) - where (l, m, r) = spaceOutInlines ms - (fs, m') = unstackInlines m + where (l, (fs, m'), r) = spaceOutInlines ms spaceOutInlinesR :: Inlines -> (Inlines, Inlines) spaceOutInlinesR ms = (stackInlines fs (l <> m'), r) - where (l, m, r) = spaceOutInlines ms - (fs, m') = unstackInlines m + where (l, (fs, m'), r) = spaceOutInlines ms -spaceOutInlines :: Inlines -> (Inlines, Inlines, Inlines) +spaceOutInlines :: Inlines -> (Inlines, ([Modifier Inlines], Inlines), Inlines) spaceOutInlines ils = let (fs, ils') = unstackInlines ils - contents = unMany ils' - left = case viewl contents of - (Space :< _) -> space - _ -> mempty - right = case viewr contents of - (_ :> Space) -> space - _ -> mempty in - (left, stackInlines fs $ trimInlines . Many $ contents, right) + (left, (right, contents')) = second (spanr isSpace) $ spanl isSpace $ unMany ils' + -- NOTE: spanr counterintuitively returns suffix as the FIRST tuple element + in (Many left, (fs, Many contents'), Many right) + +isSpace :: Inline -> Bool +isSpace Space = True +isSpace SoftBreak = True +isSpace _ = False stackInlines :: [Modifier Inlines] -> Inlines -> Inlines stackInlines [] ms = ms -stackInlines (NullModifier : fs) ms = stackInlines fs ms stackInlines (Modifier f : fs) ms = - if isEmpty ms + if null ms then stackInlines fs ms else f $ stackInlines fs ms stackInlines (AttrModifier f attr : fs) ms = f attr $ stackInlines fs ms unstackInlines :: Inlines -> ([Modifier Inlines], Inlines) -unstackInlines ms = case ilModifier ms of - NullModifier -> ([], ms) - _ -> (f : fs, ms') where - f = ilModifier ms - (fs, ms') = unstackInlines $ ilInnards ms - -ilModifier :: Inlines -> Modifier Inlines -ilModifier ils = case viewl (unMany ils) of - (x :< xs) | Seq.null xs -> case x of - (Emph _) -> Modifier emph - (Strong _) -> Modifier strong - (SmallCaps _) -> Modifier smallcaps - (Strikeout _) -> Modifier strikeout - (Superscript _) -> Modifier superscript - (Subscript _) -> Modifier subscript - (Link attr _ tgt) -> Modifier $ linkWith attr (fst tgt) (snd tgt) - (Span attr _) -> AttrModifier spanWith attr - _ -> NullModifier - _ -> NullModifier - -ilInnards :: Inlines -> Inlines -ilInnards ils = case viewl (unMany ils) of - (x :< xs) | Seq.null xs -> case x of - (Emph lst) -> fromList lst - (Strong lst) -> fromList lst - (SmallCaps lst) -> fromList lst - (Strikeout lst) -> fromList lst - (Superscript lst) -> fromList lst - (Subscript lst) -> fromList lst - (Link _ lst _) -> fromList lst - (Span _ lst) -> fromList lst - _ -> ils - _ -> ils +unstackInlines ms = case ilModifierAndInnards ms of + Nothing -> ([], ms) + Just (f, inner) -> first (f :) $ unstackInlines inner + +ilModifierAndInnards :: Inlines -> Maybe (Modifier Inlines, Inlines) +ilModifierAndInnards ils = case viewl $ unMany ils of + x :< xs | null xs -> second fromList <$> case x of + Emph lst -> Just (Modifier emph, lst) + Strong lst -> Just (Modifier strong, lst) + SmallCaps lst -> Just (Modifier smallcaps, lst) + Strikeout lst -> Just (Modifier strikeout, lst) + Underline lst -> Just (Modifier underline, lst) + Superscript lst -> Just (Modifier superscript, lst) + Subscript lst -> Just (Modifier subscript, lst) + Link attr lst tgt -> Just (Modifier $ linkWith attr (fst tgt) (snd tgt), lst) + Span attr lst -> Just (AttrModifier spanWith attr, lst) + _ -> Nothing + _ -> Nothing inlinesL :: Inlines -> (Inlines, Inlines) inlinesL ils = case viewl $ unMany ils of @@ -161,12 +142,12 @@ combineSingletonInlines x y = y_rem_attr = filter isAttrModifier y_remaining in case null shared of - True | isEmpty xs && isEmpty ys -> - stackInlines (x_rem_attr ++ y_rem_attr) mempty - | isEmpty xs -> + True | null xs && null ys -> + stackInlines (x_rem_attr <> y_rem_attr) mempty + | null xs -> let (sp, y') = spaceOutInlinesL y in stackInlines x_rem_attr mempty <> sp <> y' - | isEmpty ys -> + | null ys -> let (x', sp) = spaceOutInlinesR x in x' <> sp <> stackInlines y_rem_attr mempty | otherwise -> @@ -193,12 +174,8 @@ combineBlocks bs cs = bs <> cs instance (Monoid a, Eq a) => Eq (Modifier a) where (Modifier f) == (Modifier g) = f mempty == g mempty (AttrModifier f attr) == (AttrModifier g attr') = f attr mempty == g attr' mempty - NullModifier == NullModifier = True _ == _ = False -isEmpty :: (Monoid a, Eq a) => a -> Bool -isEmpty x = x == mempty - isAttrModifier :: Modifier a -> Bool isAttrModifier (AttrModifier _ _) = True isAttrModifier _ = False diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 199ca6d03..eab4f4e0d 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -259,10 +259,13 @@ newtype Cell = Cell [BodyPart] leftBiasedMergeRunStyle :: RunStyle -> RunStyle -> RunStyle leftBiasedMergeRunStyle a b = RunStyle { isBold = isBold a <|> isBold b + , isBoldCTL = isBoldCTL a <|> isBoldCTL b , isItalic = isItalic a <|> isItalic b + , isItalicCTL = isItalicCTL a <|> isItalicCTL b , isSmallCaps = isSmallCaps a <|> isSmallCaps b , isStrike = isStrike a <|> isStrike b , isRTL = isRTL a <|> isRTL b + , isForceCTL = isForceCTL a <|> isForceCTL b , rVertAlign = rVertAlign a <|> rVertAlign b , rUnderline = rUnderline a <|> rUnderline b , rParentStyle = rParentStyle a diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs index bfbc65cb0..236167187 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs @@ -44,7 +44,6 @@ module Text.Pandoc.Readers.Docx.Parse.Styles ( ) where import Codec.Archive.Zip import Control.Applicative ((<|>)) -import Control.Monad.Except import Data.Function (on) import Data.String (IsString(..)) import qualified Data.Map as M @@ -101,10 +100,13 @@ data CharStyle = CharStyle { cStyleId :: CharStyleId } deriving (Show) data RunStyle = RunStyle { isBold :: Maybe Bool + , isBoldCTL :: Maybe Bool , isItalic :: Maybe Bool + , isItalicCTL :: Maybe Bool , isSmallCaps :: Maybe Bool , isStrike :: Maybe Bool , isRTL :: Maybe Bool + , isForceCTL :: Maybe Bool , rVertAlign :: Maybe VertAlign , rUnderline :: Maybe String , rParentStyle :: Maybe CharStyle @@ -121,10 +123,13 @@ data ParStyle = ParStyle { headingLev :: Maybe (ParaStyleName, Int) defaultRunStyle :: RunStyle defaultRunStyle = RunStyle { isBold = Nothing + , isBoldCTL = Nothing , isItalic = Nothing + , isItalicCTL = Nothing , isSmallCaps = Nothing , isStrike = Nothing , isRTL = Nothing + , isForceCTL = Nothing , rVertAlign = Nothing , rUnderline = Nothing , rParentStyle = Nothing @@ -240,20 +245,21 @@ elemToCharStyle :: NameSpaces elemToCharStyle ns element parentStyle = CharStyle <$> (CharStyleId <$> findAttrTextByName ns "w" "styleId" element) <*> getElementStyleName ns element - <*> (Just $ elemToRunStyle ns element parentStyle) + <*> Just (elemToRunStyle ns element parentStyle) elemToRunStyle :: NameSpaces -> Element -> Maybe CharStyle -> RunStyle elemToRunStyle ns element parentStyle | Just rPr <- findChildByName ns "w" "rPr" element = RunStyle { - isBold = checkOnOff ns rPr (elemName ns "w" "b") `mplus` - checkOnOff ns rPr (elemName ns "w" "bCs") - , isItalic = checkOnOff ns rPr (elemName ns "w" "i") `mplus` - checkOnOff ns rPr (elemName ns "w" "iCs") + isBold = checkOnOff ns rPr (elemName ns "w" "b") + , isBoldCTL = checkOnOff ns rPr (elemName ns "w" "bCs") + , isItalic = checkOnOff ns rPr (elemName ns "w" "i") + , isItalicCTL = checkOnOff ns rPr (elemName ns "w" "iCs") , isSmallCaps = checkOnOff ns rPr (elemName ns "w" "smallCaps") , isStrike = checkOnOff ns rPr (elemName ns "w" "strike") , isRTL = checkOnOff ns rPr (elemName ns "w" "rtl") + , isForceCTL = checkOnOff ns rPr (elemName ns "w" "cs") , rVertAlign = findChildByName ns "w" "vertAlign" rPr >>= findAttrByName ns "w" "val" >>= |