aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-08-11 08:14:34 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2014-08-11 08:14:34 -0700
commit270e9a26cc60a3052e6f0b8675d4c517704bec36 (patch)
tree6ec7d9477f605ee7e85877516c96ce3c9621c6fd
parent4e483dbf9a15e7b9605cc80b400d5a1746735d8e (diff)
parent853830d12be6ba22b0efe1e915eb946ac7b3f9ed (diff)
downloadpandoc-270e9a26cc60a3052e6f0b8675d4c517704bec36.tar.gz
Merge pull request #1518 from jkr/fontsmore
Symbol font improvements from mpickering.
-rw-r--r--src/Text/Pandoc/Readers/Docx/Fonts.hs1
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs35
-rw-r--r--tests/docx.unicode.docxbin11472 -> 11506 bytes
-rw-r--r--tests/docx.unicode.native2
4 files changed, 29 insertions, 9 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Fonts.hs b/src/Text/Pandoc/Readers/Docx/Fonts.hs
index cd56eb115..b44c71412 100644
--- a/src/Text/Pandoc/Readers/Docx/Fonts.hs
+++ b/src/Text/Pandoc/Readers/Docx/Fonts.hs
@@ -32,6 +32,7 @@ module Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..)) where
-- | Enumeration of recognised fonts
data Font = Symbol -- ^ <http://en.wikipedia.org/wiki/Symbol_(typeface) Adobe Symbol>
+ deriving (Show, Eq)
-- | Given a font and codepoint, returns the corresponding unicode
-- character
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 7d1171ee3..1abd4bc6b 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE PatternGuards, ViewPatterns #-}
{-
Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
@@ -59,18 +59,19 @@ import Data.Bits ((.|.))
import qualified Data.ByteString.Lazy as B
import qualified Text.Pandoc.UTF8 as UTF8
import Control.Monad.Reader
-import Control.Applicative ((<$>))
+import Control.Applicative ((<$>), (<|>))
import qualified Data.Map as M
import Text.Pandoc.Compat.Except
import Text.TeXMath.Readers.OMML (readOMML)
import Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..))
import Text.TeXMath (Exp)
-import Data.Char (readLitChar)
+import Data.Char (readLitChar, ord, chr)
data ReaderEnv = ReaderEnv { envNotes :: Notes
, envNumbering :: Numbering
, envRelationships :: [Relationship]
, envMedia :: Media
+ , envFont :: Maybe Font
}
deriving Show
@@ -234,7 +235,7 @@ archiveToDocx archive = do
numbering = archiveToNumbering archive
rels = archiveToRelationships archive
media = archiveToMedia archive
- rEnv = ReaderEnv notes numbering rels media
+ rEnv = ReaderEnv notes numbering rels media Nothing
doc <- runD (archiveToDocument archive) rEnv
return $ Docx doc
@@ -672,12 +673,21 @@ elemToRunElem :: NameSpaces -> Element -> D RunElem
elemToRunElem ns element
| isElem ns "w" "t" element
|| isElem ns "w" "delText" element
- || isElem ns "m" "t" element =
- return $ TextRun $ strContent element
+ || isElem ns "m" "t" element = do
+ let str = strContent element
+ font <- asks envFont
+ case font of
+ Nothing -> return $ TextRun str
+ Just f -> return . TextRun $
+ map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str
| isElem ns "w" "br" element = return LnBrk
| isElem ns "w" "tab" element = return Tab
| isElem ns "w" "sym" element = return (getSymChar ns element)
| otherwise = throwError WrongElem
+ where
+ lowerFromPrivate (ord -> c)
+ | c >= ord '\xF000' = chr $ c - ord '\xF000'
+ | otherwise = chr c
-- The char attribute is a hex string
getSymChar :: NameSpaces -> Element -> RunElem
@@ -700,6 +710,15 @@ stringToFont _ = Nothing
elemToRunElems :: NameSpaces -> Element -> D [RunElem]
elemToRunElems ns element
| isElem ns "w" "r" element
- || isElem ns "m" "r" element =
- mapD (elemToRunElem ns) (elChildren element)
+ || isElem ns "m" "r" element = do
+ let qualName = elemName ns "w"
+ let font = do
+ fontElem <- findElement (qualName "rFonts") element
+ stringToFont =<<
+ (foldr (<|>) Nothing $
+ map (flip findAttr fontElem . qualName) ["ascii", "hAnsi"])
+ local (setFont font) (mapD (elemToRunElem ns) (elChildren element))
elemToRunElems _ _ = throwError WrongElem
+
+setFont :: Maybe Font -> ReaderEnv -> ReaderEnv
+setFont f s = s{envFont = f}
diff --git a/tests/docx.unicode.docx b/tests/docx.unicode.docx
index 4360f6be7..cf902c6c6 100644
--- a/tests/docx.unicode.docx
+++ b/tests/docx.unicode.docx
Binary files differ
diff --git a/tests/docx.unicode.native b/tests/docx.unicode.native
index f37bbb1a7..aee7ef74b 100644
--- a/tests/docx.unicode.native
+++ b/tests/docx.unicode.native
@@ -1 +1 @@
-[Para [Str "Hello,",Space,Str "\19990\30028.",Space,Str "This",Space,Str "costs",Space,Str "\8364\&10.\8744\8744"]]
+[Para [Str "Hello,",Space,Str "\19990\30028.",Space,Str "This",Space,Str "costs",Space,Str "\8364\&10.\8744\8744("]]