diff options
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 32 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 24 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Blocks.hs | 51 |
4 files changed, 87 insertions, 22 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 99e6f99e6..48a512be2 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -73,6 +73,7 @@ import Text.TeXMath (Exp) import Text.TeXMath.Readers.OMML (readOMML) import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, stringToFont) import Text.XML.Light +import qualified Text.XML.Light.Cursor as XMLC data ReaderEnv = ReaderEnv { envNotes :: Notes , envComments :: Comments @@ -117,6 +118,32 @@ mapD f xs = in concatMapM handler xs +unwrapSDT :: NameSpaces -> Content -> Content +unwrapSDT ns (Elem element) + | isElem ns "w" "sdt" element + , Just sdtContent <- findChildByName ns "w" "sdtContent" element + , child : _ <- elChildren sdtContent + = Elem child +unwrapSDT _ content = content + +walkDocument' :: NameSpaces -> XMLC.Cursor -> XMLC.Cursor +walkDocument' ns cur = + let modifiedCur = XMLC.modifyContent (unwrapSDT ns) cur + in + case XMLC.nextDF modifiedCur of + Just cur' -> walkDocument' ns cur' + Nothing -> XMLC.root modifiedCur + +walkDocument :: NameSpaces -> Element -> Maybe Element +walkDocument ns element = + let cur = XMLC.fromContent (Elem element) + cur' = walkDocument' ns cur + in + case XMLC.toTree cur' of + Elem element' -> Just element' + _ -> Nothing + + data Docx = Docx Document deriving Show @@ -298,7 +325,10 @@ archiveToDocument zf = do docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry let namespaces = elemToNameSpaces docElem bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem - body <- elemToBody namespaces bodyElem + let bodyElem' = case walkDocument namespaces bodyElem of + Just e -> e + Nothing -> bodyElem + body <- elemToBody namespaces bodyElem' return $ Document namespaces body elemToBody :: NameSpaces -> Element -> D Body diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index f5f296712..65171d37a 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -51,7 +51,7 @@ import Data.Char (isAlphaNum, isDigit, isLetter) import Data.Default (Default (..), def) import Data.Foldable (for_) import Data.List (intercalate, isPrefixOf) -import Data.List.Split (wordsBy) +import Data.List.Split (wordsBy, splitWhen) import qualified Data.Map as M import Data.Maybe (fromMaybe, isJust, isNothing) import Data.Monoid (First (..), (<>)) @@ -70,12 +70,12 @@ import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.Options ( Extension (Ext_epub_html_exts, Ext_empty_paragraphs, Ext_native_divs, - Ext_native_spans, Ext_raw_html), + Ext_native_spans, Ext_raw_html, Ext_line_blocks), ReaderOptions (readerExtensions, readerStripComments), extensionEnabled) import Text.Pandoc.Parsing hiding ((<|>)) -import Text.Pandoc.Shared (addMetaField, crFilter, escapeURI, extractSpaces, - safeRead, underlineSpan) +import Text.Pandoc.Shared (addMetaField, blocksToInlines', crFilter, escapeURI, + extractSpaces, safeRead, underlineSpan) import Text.Pandoc.Walk import Text.Parsec.Error import Text.TeXMath (readMathML, writeTeX) @@ -191,6 +191,7 @@ block = do , pHtml , pHead , pBody + , pLineBlock , pDiv , pPlain , pFigure @@ -377,6 +378,16 @@ pRawTag = do then return mempty else return $ renderTags' [tag] +pLineBlock :: PandocMonad m => TagParser m Blocks +pLineBlock = try $ do + guardEnabled Ext_line_blocks + _ <- pSatisfy $ tagOpen (=="div") (== [("class","line-block")]) + ils <- trimInlines . mconcat <$> manyTill inline (pSatisfy (tagClose (=="div"))) + let lns = map B.fromList $ + splitWhen (== LineBreak) $ filter (/= SoftBreak) $ + B.toList ils + return $ B.lineBlock lns + pDiv :: PandocMonad m => TagParser m Blocks pDiv = try $ do guardEnabled Ext_native_divs @@ -588,8 +599,9 @@ pFigure = try $ do skipMany pBlank let pImg = (\x -> (Just x, Nothing)) <$> (pOptInTag "p" pImage <* skipMany pBlank) - pCapt = (\x -> (Nothing, Just x)) <$> - (pInTags "figcaption" inline <* skipMany pBlank) + pCapt = (\x -> (Nothing, Just x)) <$> do + bs <- pInTags "figcaption" block + return $ blocksToInlines' $ B.toList bs pSkip = (Nothing, Nothing) <$ pSatisfy (not . matchTagClose "figure") res <- many (pImg <|> pCapt <|> pSkip) let mbimg = msum $ map fst res diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 6c5567ffd..e0972bb6c 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1726,7 +1726,7 @@ inline = (mempty <$ comment) <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb) <|> (str . (:[]) <$> primEscape) <|> regularSymbol - <|> (do res <- symbolIn "#^'`\"[]" + <|> (do res <- symbolIn "#^'`\"[]&" pos <- getPosition let s = T.unpack (untoken res) report $ ParsingUnescaped s pos diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index cc6abbfa5..a930652af 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -41,7 +41,6 @@ import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename, originalLang, translateLang) import Text.Pandoc.Builder (Blocks, Inlines) -import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Options @@ -54,6 +53,9 @@ import Data.List (foldl', isPrefixOf) import Data.Maybe (fromMaybe, isJust, isNothing) import Data.Monoid ((<>)) +import qualified Text.Pandoc.Builder as B +import qualified Text.Pandoc.Walk as Walk + -- -- parsing blocks -- @@ -509,19 +511,18 @@ include :: PandocMonad m => OrgParser m (F Blocks) include = try $ do metaLineStart <* stringAnyCase "include:" <* skipSpaces filename <- includeTarget - blockType <- optionMaybe $ skipSpaces *> many1 alphaNum - blocksParser <- case blockType of - Just "example" -> - return $ pure . B.codeBlock <$> parseRaw - Just "export" -> do - format <- skipSpaces *> many (noneOf "\n\r\t ") - return $ pure . B.rawBlock format <$> parseRaw - Just "src" -> do - language <- skipSpaces *> many (noneOf "\n\r\t ") - let attr = (mempty, [language], mempty) - return $ pure . B.codeBlockWith attr <$> parseRaw - _ -> return $ pure . B.fromList <$> blockList - anyLine + includeArgs <- many (try $ skipSpaces *> many1 alphaNum) + params <- keyValues + blocksParser <- case includeArgs of + ("example" : _) -> return $ pure . B.codeBlock <$> parseRaw + ["export"] -> return . returnF $ B.fromList [] + ("export" : format : []) -> return $ pure . B.rawBlock format <$> parseRaw + ("src" : rest) -> do + let attr = case rest of + [lang] -> (mempty, [lang], mempty) + _ -> nullAttr + return $ pure . B.codeBlockWith attr <$> parseRaw + _ -> return $ return . B.fromList . blockFilter params <$> blockList insertIncludedFileF blocksParser ["."] filename where includeTarget :: PandocMonad m => OrgParser m FilePath @@ -532,6 +533,28 @@ include = try $ do parseRaw :: PandocMonad m => OrgParser m String parseRaw = many anyChar + blockFilter :: [(String, String)] -> [Block] -> [Block] + blockFilter params blks = + let minlvl = lookup "minlevel" params + in case (minlvl >>= safeRead :: Maybe Int) of + Nothing -> blks + Just lvl -> let levels = Walk.query headerLevel blks + -- CAVE: partial function in else + curMin = if null levels then 0 else minimum levels + in Walk.walk (shiftHeader (curMin - lvl)) blks + + headerLevel :: Block -> [Int] + headerLevel (Header lvl _attr _content) = [lvl] + headerLevel _ = [] + + shiftHeader :: Int -> Block -> Block + shiftHeader shift blk = + if shift <= 0 + then blk + else case blk of + (Header lvl attr content) -> Header (lvl - shift) attr content + _ -> blk + rawExportLine :: PandocMonad m => OrgParser m Blocks rawExportLine = try $ do metaLineStart |