aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs40
-rw-r--r--src/Text/Pandoc/Readers/Docx/Lists.hs10
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs7
-rw-r--r--src/Text/Pandoc/Readers/Org.hs11
4 files changed, 42 insertions, 26 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 8ebe59569..4b5fbfdfc 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -85,7 +85,7 @@ import Text.Pandoc.Readers.Docx.Reducible
import Text.Pandoc.Shared
import Text.Pandoc.MediaBag (insertMedia, MediaBag)
import Data.Maybe (isJust)
-import Data.List (delete, stripPrefix, (\\), intersect)
+import Data.List (delete, stripPrefix, (\\), intersect, isPrefixOf)
import Data.Monoid
import Text.TeXMath (writeTeX)
import Data.Default (Default)
@@ -203,6 +203,13 @@ blockQuoteDivs = ["Quote", "BlockQuote", "BlockQuotation"]
codeDivs :: [String]
codeDivs = ["SourceCode"]
+
+-- For the moment, we have English, Danish, German, and French. This
+-- is fairly ad-hoc, and there might be a more systematic way to do
+-- it, but it's better than nothing.
+headerPrefixes :: [String]
+headerPrefixes = ["Heading", "Overskrift", "berschrift", "Titre"]
+
runElemToInlines :: RunElem -> Inlines
runElemToInlines (TextRun s) = text s
runElemToInlines (LnBrk) = linebreak
@@ -461,12 +468,11 @@ bodyPartToBlocks (Paragraph pPr parparts)
$ codeBlock
$ concatMap parPartToString parparts
| (c : cs) <- filter (isJust . isHeaderClass) $ pStyle pPr
- , Just n <- isHeaderClass c = do
+ , Just (prefix, n) <- isHeaderClass c = do
ils <- local (\s-> s{docxInHeaderBlock=True}) $
(concatReduce <$> mapM parPartToInlines parparts)
-
makeHeaderAnchor $
- headerWith ("", delete ("Heading" ++ show n) cs, []) n ils
+ headerWith ("", delete (prefix ++ show n) cs, []) n ils
| otherwise = do
ils <- concatReduce <$> mapM parPartToInlines parparts >>=
(return . fromList . trimLineBreaks . normalizeSpaces . toList)
@@ -535,23 +541,18 @@ rewriteLink' l@(Link ils ('#':target, title)) = do
Nothing -> l
rewriteLink' il = return il
-rewriteLink :: Blocks -> DocxContext Blocks
-rewriteLink ils = case viewl $ unMany ils of
- (x :< xs) -> do
- x' <- walkM rewriteLink' x
- xs' <- rewriteLink $ Many xs
- return $ (singleton x') <> xs'
- EmptyL -> return ils
+rewriteLinks :: [Block] -> DocxContext [Block]
+rewriteLinks = mapM (walkM rewriteLink')
bodyToOutput :: Body -> DocxContext (Meta, [Block], MediaBag)
bodyToOutput (Body bps) = do
let (metabps, blkbps) = sepBodyParts bps
meta <- bodyPartsToMeta metabps
blks <- concatReduce <$> mapM bodyPartToBlocks blkbps
- blks' <- rewriteLink blks
+ blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks
mediaBag <- gets docxMediaBag
return $ (meta,
- blocksToDefinitions $ blocksToBullets $ toList blks',
+ blks',
mediaBag)
docxToOutput :: ReaderOptions -> Docx -> (Meta, [Block], MediaBag)
@@ -559,10 +560,11 @@ docxToOutput opts (Docx (Document _ body)) =
let dEnv = def { docxOptions = opts} in
evalDocxContext (bodyToOutput body) dEnv def
-isHeaderClass :: String -> Maybe Int
-isHeaderClass s | Just s' <- stripPrefix "Heading" s =
- case reads s' :: [(Int, String)] of
- [] -> Nothing
- ((n, "") : []) -> Just n
- _ -> Nothing
+isHeaderClass :: String -> Maybe (String, Int)
+isHeaderClass s | (pref:_) <- filter (\h -> isPrefixOf h s) headerPrefixes
+ , Just s' <- stripPrefix pref s =
+ case reads s' :: [(Int, String)] of
+ [] -> Nothing
+ ((n, "") : []) -> Just (pref, n)
+ _ -> Nothing
isHeaderClass _ = Nothing
diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs
index ea195c14a..c265ad074 100644
--- a/src/Text/Pandoc/Readers/Docx/Lists.hs
+++ b/src/Text/Pandoc/Readers/Docx/Lists.hs
@@ -160,8 +160,14 @@ flatToBullets' num xs@(b : elems)
flatToBullets :: [Block] -> [Block]
flatToBullets elems = flatToBullets' (-1) elems
+singleItemHeaderToHeader :: Block -> Block
+singleItemHeaderToHeader (OrderedList _ [[h@(Header _ _ _)]]) = h
+singleItemHeaderToHeader blk = blk
+
+
blocksToBullets :: [Block] -> [Block]
blocksToBullets blks =
+ map singleItemHeaderToHeader $
bottomUp removeListDivs $
flatToBullets $ (handleListParagraphs blks)
@@ -221,7 +227,3 @@ removeListDivs = concatMap removeListDivs'
blocksToDefinitions :: [Block] -> [Block]
blocksToDefinitions = blocksToDefinitions' [] []
-
-
-
-
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 4ea5f41d5..4e0bb375a 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -440,7 +440,7 @@ pCodeBlock :: TagParser Blocks
pCodeBlock = try $ do
TagOpen _ attr <- pSatisfy (~== TagOpen "pre" [])
contents <- manyTill pAnyTag (pCloses "pre" <|> eof)
- let rawText = concatMap fromTagText $ filter isTagText contents
+ let rawText = concatMap tagToString contents
-- drop leading newline if any
let result' = case rawText of
'\n':xs -> xs
@@ -451,6 +451,11 @@ pCodeBlock = try $ do
_ -> result'
return $ B.codeBlockWith (mkAttr attr) result
+tagToString :: Tag String -> String
+tagToString (TagText s) = s
+tagToString (TagOpen "br" _) = "\n"
+tagToString _ = ""
+
inline :: TagParser Inlines
inline = choice
[ eNoteref
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 62421d2fb..5c00a1b27 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -37,7 +37,7 @@ import Text.Pandoc.Options
import qualified Text.Pandoc.Parsing as P
import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
, newline, orderedListMarker
- , parseFromString
+ , parseFromString, blanklines
)
import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
import Text.Pandoc.Shared (compactify', compactify'DL)
@@ -242,6 +242,13 @@ newline =
<* updateLastPreCharPos
<* updateLastForbiddenCharPos
+-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
+blanklines :: OrgParser [Char]
+blanklines =
+ P.blanklines
+ <* updateLastPreCharPos
+ <* updateLastForbiddenCharPos
+
--
-- parsing blocks
--
@@ -856,7 +863,7 @@ definitionListItem parseMarkerGetLength = try $ do
line1 <- anyLineNewline
blank <- option "" ("\n" <$ blankline)
cont <- concat <$> many (listContinuation markerLength)
- term' <- parseFromString inline term
+ term' <- parseFromString parseInlines term
contents' <- parseFromString parseBlocks $ line1 ++ blank ++ cont
return $ (,) <$> term' <*> fmap (:[]) contents'