diff options
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 40 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Lists.hs | 10 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 7 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org.hs | 11 |
4 files changed, 42 insertions, 26 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 8ebe59569..4b5fbfdfc 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -85,7 +85,7 @@ import Text.Pandoc.Readers.Docx.Reducible import Text.Pandoc.Shared import Text.Pandoc.MediaBag (insertMedia, MediaBag) import Data.Maybe (isJust) -import Data.List (delete, stripPrefix, (\\), intersect) +import Data.List (delete, stripPrefix, (\\), intersect, isPrefixOf) import Data.Monoid import Text.TeXMath (writeTeX) import Data.Default (Default) @@ -203,6 +203,13 @@ blockQuoteDivs = ["Quote", "BlockQuote", "BlockQuotation"] codeDivs :: [String] codeDivs = ["SourceCode"] + +-- For the moment, we have English, Danish, German, and French. This +-- is fairly ad-hoc, and there might be a more systematic way to do +-- it, but it's better than nothing. +headerPrefixes :: [String] +headerPrefixes = ["Heading", "Overskrift", "berschrift", "Titre"] + runElemToInlines :: RunElem -> Inlines runElemToInlines (TextRun s) = text s runElemToInlines (LnBrk) = linebreak @@ -461,12 +468,11 @@ bodyPartToBlocks (Paragraph pPr parparts) $ codeBlock $ concatMap parPartToString parparts | (c : cs) <- filter (isJust . isHeaderClass) $ pStyle pPr - , Just n <- isHeaderClass c = do + , Just (prefix, n) <- isHeaderClass c = do ils <- local (\s-> s{docxInHeaderBlock=True}) $ (concatReduce <$> mapM parPartToInlines parparts) - makeHeaderAnchor $ - headerWith ("", delete ("Heading" ++ show n) cs, []) n ils + headerWith ("", delete (prefix ++ show n) cs, []) n ils | otherwise = do ils <- concatReduce <$> mapM parPartToInlines parparts >>= (return . fromList . trimLineBreaks . normalizeSpaces . toList) @@ -535,23 +541,18 @@ rewriteLink' l@(Link ils ('#':target, title)) = do Nothing -> l rewriteLink' il = return il -rewriteLink :: Blocks -> DocxContext Blocks -rewriteLink ils = case viewl $ unMany ils of - (x :< xs) -> do - x' <- walkM rewriteLink' x - xs' <- rewriteLink $ Many xs - return $ (singleton x') <> xs' - EmptyL -> return ils +rewriteLinks :: [Block] -> DocxContext [Block] +rewriteLinks = mapM (walkM rewriteLink') bodyToOutput :: Body -> DocxContext (Meta, [Block], MediaBag) bodyToOutput (Body bps) = do let (metabps, blkbps) = sepBodyParts bps meta <- bodyPartsToMeta metabps blks <- concatReduce <$> mapM bodyPartToBlocks blkbps - blks' <- rewriteLink blks + blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks mediaBag <- gets docxMediaBag return $ (meta, - blocksToDefinitions $ blocksToBullets $ toList blks', + blks', mediaBag) docxToOutput :: ReaderOptions -> Docx -> (Meta, [Block], MediaBag) @@ -559,10 +560,11 @@ docxToOutput opts (Docx (Document _ body)) = let dEnv = def { docxOptions = opts} in evalDocxContext (bodyToOutput body) dEnv def -isHeaderClass :: String -> Maybe Int -isHeaderClass s | Just s' <- stripPrefix "Heading" s = - case reads s' :: [(Int, String)] of - [] -> Nothing - ((n, "") : []) -> Just n - _ -> Nothing +isHeaderClass :: String -> Maybe (String, Int) +isHeaderClass s | (pref:_) <- filter (\h -> isPrefixOf h s) headerPrefixes + , Just s' <- stripPrefix pref s = + case reads s' :: [(Int, String)] of + [] -> Nothing + ((n, "") : []) -> Just (pref, n) + _ -> Nothing isHeaderClass _ = Nothing diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs index ea195c14a..c265ad074 100644 --- a/src/Text/Pandoc/Readers/Docx/Lists.hs +++ b/src/Text/Pandoc/Readers/Docx/Lists.hs @@ -160,8 +160,14 @@ flatToBullets' num xs@(b : elems) flatToBullets :: [Block] -> [Block] flatToBullets elems = flatToBullets' (-1) elems +singleItemHeaderToHeader :: Block -> Block +singleItemHeaderToHeader (OrderedList _ [[h@(Header _ _ _)]]) = h +singleItemHeaderToHeader blk = blk + + blocksToBullets :: [Block] -> [Block] blocksToBullets blks = + map singleItemHeaderToHeader $ bottomUp removeListDivs $ flatToBullets $ (handleListParagraphs blks) @@ -221,7 +227,3 @@ removeListDivs = concatMap removeListDivs' blocksToDefinitions :: [Block] -> [Block] blocksToDefinitions = blocksToDefinitions' [] [] - - - - diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 4ea5f41d5..4e0bb375a 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -440,7 +440,7 @@ pCodeBlock :: TagParser Blocks pCodeBlock = try $ do TagOpen _ attr <- pSatisfy (~== TagOpen "pre" []) contents <- manyTill pAnyTag (pCloses "pre" <|> eof) - let rawText = concatMap fromTagText $ filter isTagText contents + let rawText = concatMap tagToString contents -- drop leading newline if any let result' = case rawText of '\n':xs -> xs @@ -451,6 +451,11 @@ pCodeBlock = try $ do _ -> result' return $ B.codeBlockWith (mkAttr attr) result +tagToString :: Tag String -> String +tagToString (TagText s) = s +tagToString (TagOpen "br" _) = "\n" +tagToString _ = "" + inline :: TagParser Inlines inline = choice [ eNoteref diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 62421d2fb..5c00a1b27 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -37,7 +37,7 @@ import Text.Pandoc.Options import qualified Text.Pandoc.Parsing as P import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF , newline, orderedListMarker - , parseFromString + , parseFromString, blanklines ) import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) import Text.Pandoc.Shared (compactify', compactify'DL) @@ -242,6 +242,13 @@ newline = <* updateLastPreCharPos <* updateLastForbiddenCharPos +-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes. +blanklines :: OrgParser [Char] +blanklines = + P.blanklines + <* updateLastPreCharPos + <* updateLastForbiddenCharPos + -- -- parsing blocks -- @@ -856,7 +863,7 @@ definitionListItem parseMarkerGetLength = try $ do line1 <- anyLineNewline blank <- option "" ("\n" <$ blankline) cont <- concat <$> many (listContinuation markerLength) - term' <- parseFromString inline term + term' <- parseFromString parseInlines term contents' <- parseFromString parseBlocks $ line1 ++ blank ++ cont return $ (,) <$> term' <*> fmap (:[]) contents' |