aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs32
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs24
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs2
-rw-r--r--src/Text/Pandoc/Readers/Org/Blocks.hs51
4 files changed, 87 insertions, 22 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 99e6f99e6..48a512be2 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -73,6 +73,7 @@ import Text.TeXMath (Exp)
import Text.TeXMath.Readers.OMML (readOMML)
import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, stringToFont)
import Text.XML.Light
+import qualified Text.XML.Light.Cursor as XMLC
data ReaderEnv = ReaderEnv { envNotes :: Notes
, envComments :: Comments
@@ -117,6 +118,32 @@ mapD f xs =
in
concatMapM handler xs
+unwrapSDT :: NameSpaces -> Content -> Content
+unwrapSDT ns (Elem element)
+ | isElem ns "w" "sdt" element
+ , Just sdtContent <- findChildByName ns "w" "sdtContent" element
+ , child : _ <- elChildren sdtContent
+ = Elem child
+unwrapSDT _ content = content
+
+walkDocument' :: NameSpaces -> XMLC.Cursor -> XMLC.Cursor
+walkDocument' ns cur =
+ let modifiedCur = XMLC.modifyContent (unwrapSDT ns) cur
+ in
+ case XMLC.nextDF modifiedCur of
+ Just cur' -> walkDocument' ns cur'
+ Nothing -> XMLC.root modifiedCur
+
+walkDocument :: NameSpaces -> Element -> Maybe Element
+walkDocument ns element =
+ let cur = XMLC.fromContent (Elem element)
+ cur' = walkDocument' ns cur
+ in
+ case XMLC.toTree cur' of
+ Elem element' -> Just element'
+ _ -> Nothing
+
+
data Docx = Docx Document
deriving Show
@@ -298,7 +325,10 @@ archiveToDocument zf = do
docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
let namespaces = elemToNameSpaces docElem
bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem
- body <- elemToBody namespaces bodyElem
+ let bodyElem' = case walkDocument namespaces bodyElem of
+ Just e -> e
+ Nothing -> bodyElem
+ body <- elemToBody namespaces bodyElem'
return $ Document namespaces body
elemToBody :: NameSpaces -> Element -> D Body
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index f5f296712..65171d37a 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -51,7 +51,7 @@ import Data.Char (isAlphaNum, isDigit, isLetter)
import Data.Default (Default (..), def)
import Data.Foldable (for_)
import Data.List (intercalate, isPrefixOf)
-import Data.List.Split (wordsBy)
+import Data.List.Split (wordsBy, splitWhen)
import qualified Data.Map as M
import Data.Maybe (fromMaybe, isJust, isNothing)
import Data.Monoid (First (..), (<>))
@@ -70,12 +70,12 @@ import Text.Pandoc.Error
import Text.Pandoc.Logging
import Text.Pandoc.Options (
Extension (Ext_epub_html_exts, Ext_empty_paragraphs, Ext_native_divs,
- Ext_native_spans, Ext_raw_html),
+ Ext_native_spans, Ext_raw_html, Ext_line_blocks),
ReaderOptions (readerExtensions, readerStripComments),
extensionEnabled)
import Text.Pandoc.Parsing hiding ((<|>))
-import Text.Pandoc.Shared (addMetaField, crFilter, escapeURI, extractSpaces,
- safeRead, underlineSpan)
+import Text.Pandoc.Shared (addMetaField, blocksToInlines', crFilter, escapeURI,
+ extractSpaces, safeRead, underlineSpan)
import Text.Pandoc.Walk
import Text.Parsec.Error
import Text.TeXMath (readMathML, writeTeX)
@@ -191,6 +191,7 @@ block = do
, pHtml
, pHead
, pBody
+ , pLineBlock
, pDiv
, pPlain
, pFigure
@@ -377,6 +378,16 @@ pRawTag = do
then return mempty
else return $ renderTags' [tag]
+pLineBlock :: PandocMonad m => TagParser m Blocks
+pLineBlock = try $ do
+ guardEnabled Ext_line_blocks
+ _ <- pSatisfy $ tagOpen (=="div") (== [("class","line-block")])
+ ils <- trimInlines . mconcat <$> manyTill inline (pSatisfy (tagClose (=="div")))
+ let lns = map B.fromList $
+ splitWhen (== LineBreak) $ filter (/= SoftBreak) $
+ B.toList ils
+ return $ B.lineBlock lns
+
pDiv :: PandocMonad m => TagParser m Blocks
pDiv = try $ do
guardEnabled Ext_native_divs
@@ -588,8 +599,9 @@ pFigure = try $ do
skipMany pBlank
let pImg = (\x -> (Just x, Nothing)) <$>
(pOptInTag "p" pImage <* skipMany pBlank)
- pCapt = (\x -> (Nothing, Just x)) <$>
- (pInTags "figcaption" inline <* skipMany pBlank)
+ pCapt = (\x -> (Nothing, Just x)) <$> do
+ bs <- pInTags "figcaption" block
+ return $ blocksToInlines' $ B.toList bs
pSkip = (Nothing, Nothing) <$ pSatisfy (not . matchTagClose "figure")
res <- many (pImg <|> pCapt <|> pSkip)
let mbimg = msum $ map fst res
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 6c5567ffd..e0972bb6c 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1726,7 +1726,7 @@ inline = (mempty <$ comment)
<|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb)
<|> (str . (:[]) <$> primEscape)
<|> regularSymbol
- <|> (do res <- symbolIn "#^'`\"[]"
+ <|> (do res <- symbolIn "#^'`\"[]&"
pos <- getPosition
let s = T.unpack (untoken res)
report $ ParsingUnescaped s pos
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index cc6abbfa5..a930652af 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -41,7 +41,6 @@ import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename,
originalLang, translateLang)
import Text.Pandoc.Builder (Blocks, Inlines)
-import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Options
@@ -54,6 +53,9 @@ import Data.List (foldl', isPrefixOf)
import Data.Maybe (fromMaybe, isJust, isNothing)
import Data.Monoid ((<>))
+import qualified Text.Pandoc.Builder as B
+import qualified Text.Pandoc.Walk as Walk
+
--
-- parsing blocks
--
@@ -509,19 +511,18 @@ include :: PandocMonad m => OrgParser m (F Blocks)
include = try $ do
metaLineStart <* stringAnyCase "include:" <* skipSpaces
filename <- includeTarget
- blockType <- optionMaybe $ skipSpaces *> many1 alphaNum
- blocksParser <- case blockType of
- Just "example" ->
- return $ pure . B.codeBlock <$> parseRaw
- Just "export" -> do
- format <- skipSpaces *> many (noneOf "\n\r\t ")
- return $ pure . B.rawBlock format <$> parseRaw
- Just "src" -> do
- language <- skipSpaces *> many (noneOf "\n\r\t ")
- let attr = (mempty, [language], mempty)
- return $ pure . B.codeBlockWith attr <$> parseRaw
- _ -> return $ pure . B.fromList <$> blockList
- anyLine
+ includeArgs <- many (try $ skipSpaces *> many1 alphaNum)
+ params <- keyValues
+ blocksParser <- case includeArgs of
+ ("example" : _) -> return $ pure . B.codeBlock <$> parseRaw
+ ["export"] -> return . returnF $ B.fromList []
+ ("export" : format : []) -> return $ pure . B.rawBlock format <$> parseRaw
+ ("src" : rest) -> do
+ let attr = case rest of
+ [lang] -> (mempty, [lang], mempty)
+ _ -> nullAttr
+ return $ pure . B.codeBlockWith attr <$> parseRaw
+ _ -> return $ return . B.fromList . blockFilter params <$> blockList
insertIncludedFileF blocksParser ["."] filename
where
includeTarget :: PandocMonad m => OrgParser m FilePath
@@ -532,6 +533,28 @@ include = try $ do
parseRaw :: PandocMonad m => OrgParser m String
parseRaw = many anyChar
+ blockFilter :: [(String, String)] -> [Block] -> [Block]
+ blockFilter params blks =
+ let minlvl = lookup "minlevel" params
+ in case (minlvl >>= safeRead :: Maybe Int) of
+ Nothing -> blks
+ Just lvl -> let levels = Walk.query headerLevel blks
+ -- CAVE: partial function in else
+ curMin = if null levels then 0 else minimum levels
+ in Walk.walk (shiftHeader (curMin - lvl)) blks
+
+ headerLevel :: Block -> [Int]
+ headerLevel (Header lvl _attr _content) = [lvl]
+ headerLevel _ = []
+
+ shiftHeader :: Int -> Block -> Block
+ shiftHeader shift blk =
+ if shift <= 0
+ then blk
+ else case blk of
+ (Header lvl attr content) -> Header (lvl - shift) attr content
+ _ -> blk
+
rawExportLine :: PandocMonad m => OrgParser m Blocks
rawExportLine = try $ do
metaLineStart