From 427466f80c3897b8341a1e576bd488be3cea77c6 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Sun, 10 Aug 2014 17:04:13 +0100 Subject: Docx Fonts: Derives Show and Eq --- src/Text/Pandoc/Readers/Docx/Fonts.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Text/Pandoc/Readers/Docx/Fonts.hs b/src/Text/Pandoc/Readers/Docx/Fonts.hs index cd56eb115..b44c71412 100644 --- a/src/Text/Pandoc/Readers/Docx/Fonts.hs +++ b/src/Text/Pandoc/Readers/Docx/Fonts.hs @@ -32,6 +32,7 @@ module Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..)) where -- | Enumeration of recognised fonts data Font = Symbol -- ^ + deriving (Show, Eq) -- | Given a font and codepoint, returns the corresponding unicode -- character -- cgit v1.2.3 From 973ed469de293a2fb812de6bde7f234896856461 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Sun, 10 Aug 2014 17:05:17 +0100 Subject: Docx Parse: Improved font recognition when specified in rFonts element --- src/Text/Pandoc/Readers/Docx/Parse.hs | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 7d1171ee3..1abd4bc6b 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE PatternGuards, ViewPatterns #-} {- Copyright (C) 2014 Jesse Rosenthal @@ -59,18 +59,19 @@ import Data.Bits ((.|.)) import qualified Data.ByteString.Lazy as B import qualified Text.Pandoc.UTF8 as UTF8 import Control.Monad.Reader -import Control.Applicative ((<$>)) +import Control.Applicative ((<$>), (<|>)) import qualified Data.Map as M import Text.Pandoc.Compat.Except import Text.TeXMath.Readers.OMML (readOMML) import Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..)) import Text.TeXMath (Exp) -import Data.Char (readLitChar) +import Data.Char (readLitChar, ord, chr) data ReaderEnv = ReaderEnv { envNotes :: Notes , envNumbering :: Numbering , envRelationships :: [Relationship] , envMedia :: Media + , envFont :: Maybe Font } deriving Show @@ -234,7 +235,7 @@ archiveToDocx archive = do numbering = archiveToNumbering archive rels = archiveToRelationships archive media = archiveToMedia archive - rEnv = ReaderEnv notes numbering rels media + rEnv = ReaderEnv notes numbering rels media Nothing doc <- runD (archiveToDocument archive) rEnv return $ Docx doc @@ -672,12 +673,21 @@ elemToRunElem :: NameSpaces -> Element -> D RunElem elemToRunElem ns element | isElem ns "w" "t" element || isElem ns "w" "delText" element - || isElem ns "m" "t" element = - return $ TextRun $ strContent element + || isElem ns "m" "t" element = do + let str = strContent element + font <- asks envFont + case font of + Nothing -> return $ TextRun str + Just f -> return . TextRun $ + map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str | isElem ns "w" "br" element = return LnBrk | isElem ns "w" "tab" element = return Tab | isElem ns "w" "sym" element = return (getSymChar ns element) | otherwise = throwError WrongElem + where + lowerFromPrivate (ord -> c) + | c >= ord '\xF000' = chr $ c - ord '\xF000' + | otherwise = chr c -- The char attribute is a hex string getSymChar :: NameSpaces -> Element -> RunElem @@ -700,6 +710,15 @@ stringToFont _ = Nothing elemToRunElems :: NameSpaces -> Element -> D [RunElem] elemToRunElems ns element | isElem ns "w" "r" element - || isElem ns "m" "r" element = - mapD (elemToRunElem ns) (elChildren element) + || isElem ns "m" "r" element = do + let qualName = elemName ns "w" + let font = do + fontElem <- findElement (qualName "rFonts") element + stringToFont =<< + (foldr (<|>) Nothing $ + map (flip findAttr fontElem . qualName) ["ascii", "hAnsi"]) + local (setFont font) (mapD (elemToRunElem ns) (elChildren element)) elemToRunElems _ _ = throwError WrongElem + +setFont :: Maybe Font -> ReaderEnv -> ReaderEnv +setFont f s = s{envFont = f} -- cgit v1.2.3 From 853830d12be6ba22b0efe1e915eb946ac7b3f9ed Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Sun, 10 Aug 2014 17:05:52 +0100 Subject: Docx Parse: Updated tests --- tests/docx.unicode.docx | Bin 11472 -> 11506 bytes tests/docx.unicode.native | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/docx.unicode.docx b/tests/docx.unicode.docx index 4360f6be7..cf902c6c6 100644 Binary files a/tests/docx.unicode.docx and b/tests/docx.unicode.docx differ diff --git a/tests/docx.unicode.native b/tests/docx.unicode.native index f37bbb1a7..aee7ef74b 100644 --- a/tests/docx.unicode.native +++ b/tests/docx.unicode.native @@ -1 +1 @@ -[Para [Str "Hello,",Space,Str "\19990\30028.",Space,Str "This",Space,Str "costs",Space,Str "\8364\&10.\8744\8744"]] +[Para [Str "Hello,",Space,Str "\19990\30028.",Space,Str "This",Space,Str "costs",Space,Str "\8364\&10.\8744\8744("]] -- cgit v1.2.3