aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-07-29 11:15:34 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2014-07-29 11:15:34 -0700
commit8d4eebaff42cfe474042a7dafaa86950640b9e82 (patch)
treee1c58d8ac6442a3eb4cfa6015757226e860a48b9 /src/Text/Pandoc/Readers
parent4ddb8433a8326a872fd430d9aae7e2ba8ad7b051 (diff)
parent54708da371e767cd42598ea8f7fbd7d45c57421b (diff)
downloadpandoc-8d4eebaff42cfe474042a7dafaa86950640b9e82.tar.gz
Merge pull request #1463 from jkr/metadata
Make metadata out of styled pars
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs84
1 files changed, 73 insertions, 11 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 196a3cec5..48a23dd3c 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -87,7 +87,7 @@ import Text.Pandoc.Readers.Docx.Reducible
import Text.Pandoc.Readers.Docx.TexChar
import Text.Pandoc.Shared
import Data.Maybe (mapMaybe, fromMaybe)
-import Data.List (delete, isPrefixOf, (\\), intercalate)
+import Data.List (delete, isPrefixOf, (\\), intercalate, intersect)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as B
import Data.ByteString.Base64 (encode)
@@ -101,7 +101,8 @@ readDocx :: ReaderOptions
-> Pandoc
readDocx opts bytes =
case archiveToDocx (toArchive bytes) of
- Right docx -> Pandoc nullMeta (docxToBlocks opts docx)
+ Right docx -> Pandoc meta blks where
+ (meta, blks) = (docxToMetaAndBlocks opts docx)
Left _ -> error $ "couldn't parse docx file"
data DState = DState { docxAnchorMap :: M.Map String String
@@ -134,6 +135,65 @@ spansToKeep = []
divsToKeep :: [String]
divsToKeep = ["list-item", "Definition", "DefinitionTerm"]
+metaStyles :: M.Map String String
+metaStyles = M.fromList [ ("Title", "title")
+ , ("Subtitle", "subtitle")
+ , ("Author", "author")
+ , ("Date", "date")
+ , ("Abstract", "abstract")]
+
+sepBodyParts :: [BodyPart] -> ([BodyPart], [BodyPart])
+sepBodyParts = span (\bp -> (isMetaPar bp || isEmptyPar bp))
+
+isMetaPar :: BodyPart -> Bool
+isMetaPar (Paragraph pPr _) =
+ not $ null $ intersect (pStyle pPr) (M.keys metaStyles)
+isMetaPar _ = False
+
+isEmptyPar :: BodyPart -> Bool
+isEmptyPar (Paragraph _ parParts) =
+ all isEmptyParPart parParts
+ where
+ isEmptyParPart (PlainRun (Run _ runElems)) = all isEmptyElem runElems
+ isEmptyParPart _ = False
+ isEmptyElem (TextRun s) = trim s == ""
+ isEmptyElem _ = True
+isEmptyPar _ = False
+
+bodyPartsToMeta' :: [BodyPart] -> DocxContext (M.Map String MetaValue)
+bodyPartsToMeta' [] = return M.empty
+bodyPartsToMeta' (bp : bps)
+ | (Paragraph pPr parParts) <- bp
+ , (c : _)<- intersect (pStyle pPr) (M.keys metaStyles)
+ , (Just metaField) <- M.lookup c metaStyles = do
+ inlines <- parPartsToInlines parParts
+ remaining <- bodyPartsToMeta' bps
+ let
+ f (MetaInlines ils) (MetaInlines ils') = MetaBlocks [Para ils, Para ils']
+ f (MetaInlines ils) (MetaBlocks blks) = MetaBlocks ((Para ils) : blks)
+ f m (MetaList mv) = MetaList (m : mv)
+ f m n = MetaList [m, n]
+ return $ M.insertWith f metaField (MetaInlines inlines) remaining
+bodyPartsToMeta' (_ : bps) = bodyPartsToMeta' bps
+
+bodyPartsToMeta :: [BodyPart] -> DocxContext Meta
+bodyPartsToMeta bps = do
+ mp <- bodyPartsToMeta' bps
+ let mp' =
+ case M.lookup "author" mp of
+ Just mv -> M.insert "author" (fixAuthors mv) mp
+ Nothing -> mp
+ return $ Meta mp'
+
+fixAuthors :: MetaValue -> MetaValue
+fixAuthors (MetaBlocks blks) =
+ MetaList $ map g $ filter f blks
+ where f (Para _) = True
+ f _ = False
+ g (Para ils) = MetaInlines ils
+ g _ = MetaInlines []
+fixAuthors mv = mv
+
runStyleToContainers :: RunStyle -> [Container Inline]
runStyleToContainers rPr =
let spanClassToContainers :: String -> [Container Inline]
@@ -615,24 +675,26 @@ rewriteLink l@(Link ils ('#':target, title)) = do
Nothing -> l
rewriteLink il = return il
-
-bodyToBlocks :: Body -> DocxContext [Block]
-bodyToBlocks (Body bps) = do
- blks <- concatMapM bodyPartToBlocks bps >>=
+bodyToMetaAndBlocks :: Body -> DocxContext (Meta, [Block])
+bodyToMetaAndBlocks (Body bps) = do
+ let (metabps, blkbps) = sepBodyParts bps
+ meta <- bodyPartsToMeta metabps
+ blks <- concatMapM bodyPartToBlocks blkbps >>=
walkM rewriteLink
return $
- blocksToDefinitions $
- blocksToBullets $ blks
+ (meta,
+ blocksToDefinitions $
+ blocksToBullets $ blks)
-docxToBlocks :: ReaderOptions -> Docx -> [Block]
-docxToBlocks opts d@(Docx (Document _ body)) =
+docxToMetaAndBlocks :: ReaderOptions -> Docx -> (Meta, [Block])
+docxToMetaAndBlocks opts d@(Docx (Document _ body)) =
let dState = DState { docxAnchorMap = M.empty
, docxInHeaderBlock = False
, docxInTexSubscript = False}
dEnv = DEnv { docxOptions = opts
, docxDocument = d}
in
- evalDocxContext (bodyToBlocks body) dEnv dState
+ evalDocxContext (bodyToMetaAndBlocks body) dEnv dState
ilToCode :: Inline -> String
ilToCode (Str s) = s