aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs5
-rw-r--r--src/Text/Pandoc/Readers/Docx/Fields.hs89
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs94
3 files changed, 183 insertions, 5 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index e0f32b908..21120824f 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -446,6 +446,11 @@ parPartToInlines' (PlainOMath exps) =
return $ math $ writeTeX exps
parPartToInlines' (SmartTag runs) = do
smushInlines <$> mapM runToInlines runs
+parPartToInlines' (Field info runs) = do
+ case info of
+ HyperlinkField url -> parPartToInlines' $ ExternalHyperLink url runs
+ UnknownField -> smushInlines <$> mapM runToInlines runs
+parPartToInlines' NullParPart = return mempty
isAnchorSpan :: Inline -> Bool
isAnchorSpan (Span (_, classes, kvs) _) =
diff --git a/src/Text/Pandoc/Readers/Docx/Fields.hs b/src/Text/Pandoc/Readers/Docx/Fields.hs
new file mode 100644
index 000000000..69758b431
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/Fields.hs
@@ -0,0 +1,89 @@
+{-
+Copyright (C) 2014-2018 Jesse Rosenthal <jrosenthal@jhu.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Docx.Fields
+ Copyright : Copyright (C) 2014-2018 Jesse Rosenthal
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
+ Stability : alpha
+ Portability : portable
+
+For parsing Field definitions in instText tags, as described in
+ECMA-376-1:2016, §17.16.5 -}
+
+module Text.Pandoc.Readers.Docx.Fields ( FieldInfo(..)
+ , parseFieldInfo
+ ) where
+
+import Text.Parsec
+import Text.Parsec.String (Parser)
+
+type URL = String
+
+data FieldInfo = HyperlinkField URL
+ | UnknownField
+ deriving (Show)
+
+parseFieldInfo :: String -> Either ParseError FieldInfo
+parseFieldInfo = parse fieldInfo ""
+
+fieldInfo :: Parser FieldInfo
+fieldInfo =
+ (try $ HyperlinkField <$> hyperlink)
+ <|>
+ return UnknownField
+
+escapedQuote :: Parser String
+escapedQuote = string "\\\""
+
+inQuotes :: Parser String
+inQuotes = do
+ (try escapedQuote) <|> (anyChar >>= (\c -> return [c]))
+
+quotedString :: Parser String
+quotedString = do
+ char '"'
+ concat <$> manyTill inQuotes (try (char '"'))
+
+unquotedString :: Parser String
+unquotedString = manyTill anyChar (try (space))
+
+fieldArgument :: Parser String
+fieldArgument = quotedString <|> unquotedString
+
+-- there are other switches, but this is the only one I've seen in the wild so far, so it's the first one I'll implement. See §17.16.5.25
+hyperlinkSwitch :: Parser (String, String)
+hyperlinkSwitch = do
+ sw <- string "\\l"
+ spaces
+ farg <- fieldArgument
+ return (sw, farg)
+
+hyperlink :: Parser URL
+hyperlink = do
+ many space
+ string "HYPERLINK"
+ spaces
+ farg <- fieldArgument
+ switches <- (spaces *> many hyperlinkSwitch)
+ let url = case switches of
+ ("\\l", s) : _ -> farg ++ ('#': s)
+ _ -> farg
+ return url
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index b79b39369..5f648666f 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -54,6 +54,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
, TrackedChange(..)
, ChangeType(..)
, ChangeInfo(..)
+ , FieldInfo(..)
, archiveToDocx
, archiveToDocxWithWarnings
) where
@@ -70,6 +71,7 @@ import qualified Data.Map as M
import Data.Maybe
import System.FilePath
import Text.Pandoc.Readers.Docx.Util
+import Text.Pandoc.Readers.Docx.Fields
import Text.Pandoc.Shared (filteredFilesFromArchive, safeRead)
import qualified Text.Pandoc.UTF8 as UTF8
import Text.TeXMath (Exp)
@@ -90,10 +92,19 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes
}
deriving Show
-data ReaderState = ReaderState { stateWarnings :: [String] }
+data ReaderState = ReaderState { stateWarnings :: [String]
+ , stateFldCharState :: FldCharState
+ }
deriving Show
-data DocxError = DocxError | WrongElem
+data FldCharState = FldCharOpen
+ | FldCharFieldInfo FieldInfo
+ | FldCharContent FieldInfo [Run]
+ | FldCharClosed
+ deriving (Show)
+
+data DocxError = DocxError
+ | WrongElem
deriving Show
type D = ExceptT DocxError (ReaderT ReaderEnv (State ReaderState))
@@ -265,6 +276,9 @@ data ParPart = PlainRun Run
| Chart -- placeholder for now
| PlainOMath [Exp]
| SmartTag [Run]
+ | Field FieldInfo [Run]
+ | NullParPart -- when we need to return nothing, but
+ -- not because of an error.
deriving Show
data Run = Run RunStyle [RunElem]
@@ -328,7 +342,9 @@ archiveToDocxWithWarnings archive = do
(styles, parstyles) = archiveToStyles archive
rEnv =
ReaderEnv notes comments numbering rels media Nothing styles parstyles InDocument
- rState = ReaderState { stateWarnings = [] }
+ rState = ReaderState { stateWarnings = []
+ , stateFldCharState = FldCharClosed
+ }
(eitherDoc, st) = runD (archiveToDocument archive) rEnv rState
case eitherDoc of
Right doc -> Right (Docx doc, stateWarnings st)
@@ -736,9 +752,77 @@ elemToParPart ns element
, c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/chart"
, Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) drawingElem
= return Chart
+{-
+The next one is a bit complicated. fldChar fields work by first
+having a <w:fldChar fldCharType="begin"> in a run, then a run with
+<w:instrText>, then a <w:fldChar fldCharType="separate"> run, then the
+content runs, and finally a <w:fldChar fldCharType="end"> run. For
+example (omissions and my comments in brackets):
+
+ <w:r>
+ [...]
+ <w:fldChar w:fldCharType="begin"/>
+ </w:r>
+ <w:r>
+ [...]
+ <w:instrText xml:space="preserve"> HYPERLINK [hyperlink url] </w:instrText>
+ </w:r>
+ <w:r>
+ [...]
+ <w:fldChar w:fldCharType="separate"/>
+ </w:r>
+ <w:r w:rsidRPr=[...]>
+ [...]
+ <w:t>Foundations of Analysis, 2nd Edition</w:t>
+ </w:r>
+ <w:r>
+ [...]
+ <w:fldChar w:fldCharType="end"/>
+ </w:r>
+
+So we do this in a number of steps. If we encounter the fldchar begin
+tag, we start open a fldchar state variable (see state above). We add
+the instrtext to it as FieldInfo. Then we close that and start adding
+the runs when we get to separate. Then when we get to end, we produce
+the Field type with approriate FieldInfo and Runs.
+-}
elemToParPart ns element
- | isElem ns "w" "r" element =
- elemToRun ns element >>= (\r -> return $ PlainRun r)
+ | isElem ns "w" "r" element
+ , Just fldChar <- findChildByName ns "w" "fldChar" element
+ , Just fldCharType <- findAttrByName ns "w" "fldCharType" fldChar = do
+ fldCharState <- gets stateFldCharState
+ case fldCharState of
+ FldCharClosed | fldCharType == "begin" -> do
+ modify $ \st -> st {stateFldCharState = FldCharOpen}
+ return NullParPart
+ FldCharFieldInfo info | fldCharType == "separate" -> do
+ modify $ \st -> st {stateFldCharState = FldCharContent info []}
+ return NullParPart
+ FldCharContent info runs | fldCharType == "end" -> do
+ modify $ \st -> st {stateFldCharState = FldCharClosed}
+ return $ Field info $ reverse runs
+ _ -> throwError WrongElem
+elemToParPart ns element
+ | isElem ns "w" "r" element
+ , Just instrText <- findChildByName ns "w" "instrText" element = do
+ fldCharState <- gets stateFldCharState
+ case fldCharState of
+ FldCharOpen -> do
+ info <- eitherToD $ parseFieldInfo $ strContent instrText
+ modify $ \st -> st{stateFldCharState = FldCharFieldInfo info}
+ return NullParPart
+ _ -> return NullParPart
+elemToParPart ns element
+ | isElem ns "w" "r" element = do
+ run <- elemToRun ns element
+ -- we check to see if we have an open FldChar in state that we're
+ -- recording.
+ fldCharState <- gets stateFldCharState
+ case fldCharState of
+ FldCharContent info runs -> do
+ modify $ \st -> st{stateFldCharState = FldCharContent info (run : runs)}
+ return NullParPart
+ _ -> return $ PlainRun run
elemToParPart ns element
| Just change <- getTrackedChange ns element = do
runs <- mapD (elemToRun ns) (elChildren element)