diff options
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 84 |
1 files changed, 73 insertions, 11 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 196a3cec5..48a23dd3c 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -87,7 +87,7 @@ import Text.Pandoc.Readers.Docx.Reducible import Text.Pandoc.Readers.Docx.TexChar import Text.Pandoc.Shared import Data.Maybe (mapMaybe, fromMaybe) -import Data.List (delete, isPrefixOf, (\\), intercalate) +import Data.List (delete, isPrefixOf, (\\), intercalate, intersect) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as B import Data.ByteString.Base64 (encode) @@ -101,7 +101,8 @@ readDocx :: ReaderOptions -> Pandoc readDocx opts bytes = case archiveToDocx (toArchive bytes) of - Right docx -> Pandoc nullMeta (docxToBlocks opts docx) + Right docx -> Pandoc meta blks where + (meta, blks) = (docxToMetaAndBlocks opts docx) Left _ -> error $ "couldn't parse docx file" data DState = DState { docxAnchorMap :: M.Map String String @@ -134,6 +135,65 @@ spansToKeep = [] divsToKeep :: [String] divsToKeep = ["list-item", "Definition", "DefinitionTerm"] +metaStyles :: M.Map String String +metaStyles = M.fromList [ ("Title", "title") + , ("Subtitle", "subtitle") + , ("Author", "author") + , ("Date", "date") + , ("Abstract", "abstract")] + +sepBodyParts :: [BodyPart] -> ([BodyPart], [BodyPart]) +sepBodyParts = span (\bp -> (isMetaPar bp || isEmptyPar bp)) + +isMetaPar :: BodyPart -> Bool +isMetaPar (Paragraph pPr _) = + not $ null $ intersect (pStyle pPr) (M.keys metaStyles) +isMetaPar _ = False + +isEmptyPar :: BodyPart -> Bool +isEmptyPar (Paragraph _ parParts) = + all isEmptyParPart parParts + where + isEmptyParPart (PlainRun (Run _ runElems)) = all isEmptyElem runElems + isEmptyParPart _ = False + isEmptyElem (TextRun s) = trim s == "" + isEmptyElem _ = True +isEmptyPar _ = False + +bodyPartsToMeta' :: [BodyPart] -> DocxContext (M.Map String MetaValue) +bodyPartsToMeta' [] = return M.empty +bodyPartsToMeta' (bp : bps) + | (Paragraph pPr parParts) <- bp + , (c : _)<- intersect (pStyle pPr) (M.keys metaStyles) + , (Just metaField) <- M.lookup c metaStyles = do + inlines <- parPartsToInlines parParts + remaining <- bodyPartsToMeta' bps + let + f (MetaInlines ils) (MetaInlines ils') = MetaBlocks [Para ils, Para ils'] + f (MetaInlines ils) (MetaBlocks blks) = MetaBlocks ((Para ils) : blks) + f m (MetaList mv) = MetaList (m : mv) + f m n = MetaList [m, n] + return $ M.insertWith f metaField (MetaInlines inlines) remaining +bodyPartsToMeta' (_ : bps) = bodyPartsToMeta' bps + +bodyPartsToMeta :: [BodyPart] -> DocxContext Meta +bodyPartsToMeta bps = do + mp <- bodyPartsToMeta' bps + let mp' = + case M.lookup "author" mp of + Just mv -> M.insert "author" (fixAuthors mv) mp + Nothing -> mp + return $ Meta mp' + +fixAuthors :: MetaValue -> MetaValue +fixAuthors (MetaBlocks blks) = + MetaList $ map g $ filter f blks + where f (Para _) = True + f _ = False + g (Para ils) = MetaInlines ils + g _ = MetaInlines [] +fixAuthors mv = mv + runStyleToContainers :: RunStyle -> [Container Inline] runStyleToContainers rPr = let spanClassToContainers :: String -> [Container Inline] @@ -615,24 +675,26 @@ rewriteLink l@(Link ils ('#':target, title)) = do Nothing -> l rewriteLink il = return il - -bodyToBlocks :: Body -> DocxContext [Block] -bodyToBlocks (Body bps) = do - blks <- concatMapM bodyPartToBlocks bps >>= +bodyToMetaAndBlocks :: Body -> DocxContext (Meta, [Block]) +bodyToMetaAndBlocks (Body bps) = do + let (metabps, blkbps) = sepBodyParts bps + meta <- bodyPartsToMeta metabps + blks <- concatMapM bodyPartToBlocks blkbps >>= walkM rewriteLink return $ - blocksToDefinitions $ - blocksToBullets $ blks + (meta, + blocksToDefinitions $ + blocksToBullets $ blks) -docxToBlocks :: ReaderOptions -> Docx -> [Block] -docxToBlocks opts d@(Docx (Document _ body)) = +docxToMetaAndBlocks :: ReaderOptions -> Docx -> (Meta, [Block]) +docxToMetaAndBlocks opts d@(Docx (Document _ body)) = let dState = DState { docxAnchorMap = M.empty , docxInHeaderBlock = False , docxInTexSubscript = False} dEnv = DEnv { docxOptions = opts , docxDocument = d} in - evalDocxContext (bodyToBlocks body) dEnv dState + evalDocxContext (bodyToMetaAndBlocks body) dEnv dState ilToCode :: Inline -> String ilToCode (Str s) = s |