diff options
Diffstat (limited to 'src/Text/Pandoc/Readers')
34 files changed, 1252 insertions, 1822 deletions
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index e98ee066e..3c62f8db5 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -34,15 +34,15 @@ where import CMark import Data.List (groupBy) -import Data.Text (pack, unpack) +import Data.Text (Text, unpack) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Options -- | Parse a CommonMark formatted string into a 'Pandoc' structure. -readCommonMark :: PandocMonad m => ReaderOptions -> String -> m Pandoc +readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readCommonMark opts s = return $ - nodeToPandoc $ commonmarkToNode opts' $ pack s + nodeToPandoc $ commonmarkToNode opts' s where opts' = if extensionEnabled Ext_smart (readerExtensions opts) then [optNormalize, optSmart] else [optNormalize] diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index bef256a93..bd3c7c356 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -16,6 +16,8 @@ import Text.TeXMath (readMathML, writeTeX) import Data.Default import Data.Foldable (asum) import Text.Pandoc.Class (PandocMonad) +import Data.Text (Text) +import qualified Data.Text as T {- @@ -522,11 +524,11 @@ instance Default DBState where , dbContent = [] } -readDocBook :: PandocMonad m => ReaderOptions -> String -> m Pandoc +readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readDocBook _ inp = do - let tree = normalizeTree . parseXML . handleInstructions $ inp + let tree = normalizeTree . parseXML . handleInstructions $ T.unpack inp (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock $ tree - return $ Pandoc (dbMeta st') (toList . mconcat $ bs) + return $ Pandoc (dbMeta st') (toList . mconcat $ bs) -- We treat <?asciidoc-br?> specially (issue #1236), converting it -- to <br/>, since xml-light doesn't parse the instruction correctly. diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 683277993..2757314ab 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -3,7 +3,7 @@ {-# LANGUAGE PatternGuards #-} {- -Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu> +Copyright (C) 2014-2017 Jesse Rosenthal <jrosenthal@jhu.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Docx - Copyright : Copyright (C) 2014-2016 Jesse Rosenthal + Copyright : Copyright (C) 2014-2017 Jesse Rosenthal License : GNU GPL, version 2 or above Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs index 94b4d919a..8be2e1894 100644 --- a/src/Text/Pandoc/Readers/Docx/Lists.hs +++ b/src/Text/Pandoc/Readers/Docx/Lists.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu> +Copyright (C) 2014-2017 Jesse Rosenthal <jrosenthal@jhu.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Docx.Lists - Copyright : Copyright (C) 2014-2016 Jesse Rosenthal + Copyright : Copyright (C) 2014-2017 Jesse Rosenthal License : GNU GPL, version 2 or above Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 0f23555f4..e6736100f 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -3,7 +3,7 @@ {-# LANGUAGE ViewPatterns #-} {- -Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu> +Copyright (C) 2014-2017 Jesse Rosenthal <jrosenthal@jhu.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Docx.Parse - Copyright : Copyright (C) 2014-2016 Jesse Rosenthal + Copyright : Copyright (C) 2014-2017 Jesse Rosenthal License : GNU GPL, version 2 or above Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index db58e9654..c0d8029dc 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -13,6 +13,8 @@ import Control.DeepSeq (NFData, deepseq) import Control.Monad (guard, liftM) import Control.Monad.Except (throwError) import qualified Data.ByteString.Lazy as BL (ByteString) +import qualified Data.Text.Lazy.Encoding as TL +import qualified Data.Text.Lazy as TL import Data.List (isInfixOf, isPrefixOf) import qualified Data.Map as M (Map, elems, fromList, lookup) import Data.Maybe (fromMaybe, mapMaybe) @@ -73,7 +75,7 @@ archiveToEPUB os archive = do mimeToReader "application/xhtml+xml" (unEscapeString -> root) (unEscapeString -> path) = do fname <- findEntryByPathE (root </> path) archive - html <- readHtml os' . UTF8.toStringLazy $ fromEntry fname + html <- readHtml os' . TL.toStrict . TL.decodeUtf8 $ fromEntry fname return $ fixInternalReferences path html mimeToReader s _ (unEscapeString -> path) | s `elem` imageMimes = return $ imageToPandoc path diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 14b051539..94f933c4d 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -1,7 +1,7 @@ {-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses, -ViewPatterns#-} +ViewPatterns, OverloadedStrings #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.HTML - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -34,6 +34,7 @@ module Text.Pandoc.Readers.HTML ( readHtml , htmlInBalanced , isInlineTag , isBlockTag + , NamedTag(..) , isTextTag , isCommentTag ) where @@ -43,7 +44,7 @@ import Text.HTML.TagSoup.Match import Text.Pandoc.Definition import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..)) -import Text.Pandoc.Shared ( extractSpaces, renderTags', addMetaField +import Text.Pandoc.Shared ( extractSpaces, addMetaField , escapeURI, safeRead ) import Text.Pandoc.Options (ReaderOptions(readerExtensions), extensionEnabled, Extension (Ext_epub_html_exts, @@ -53,12 +54,14 @@ import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Walk import qualified Data.Map as M import Data.Maybe ( fromMaybe, isJust) -import Data.List ( intercalate, isInfixOf, isPrefixOf ) +import Data.List ( intercalate, isPrefixOf ) import Data.Char ( isDigit, isLetter, isAlphaNum ) import Control.Monad ( guard, mzero, void, unless ) import Control.Arrow ((***)) import Control.Applicative ( (<|>) ) import Data.Monoid (First (..)) +import Data.Text (Text) +import qualified Data.Text as T import Text.TeXMath (readMathML, writeTeX) import Data.Default (Default (..), def) import Control.Monad.Reader (ask, asks, local, ReaderT, runReaderT, lift) @@ -74,11 +77,12 @@ import Control.Monad.Except (throwError) -- | Convert HTML-formatted string to 'Pandoc' document. readHtml :: PandocMonad m => ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assumes @'\n'@ line endings) + -> Text -- ^ String to parse (assumes @'\n'@ line endings) -> m Pandoc readHtml opts inp = do let tags = stripPrefixes . canonicalizeTags $ - parseTagsOptions parseOptions{ optTagPosition = True } inp + parseTagsOptions parseOptions{ optTagPosition = True } + inp parseDoc = do blocks <- (fixPlains False) . mconcat <$> manyTill block eof meta <- stateMeta . parserState <$> getState @@ -128,7 +132,7 @@ setInPlain = local (\s -> s {inPlain = True}) type HTMLParser m s = ParserT s HTMLState (ReaderT HTMLLocal m) -type TagParser m = HTMLParser m [Tag String] +type TagParser m = HTMLParser m [Tag Text] pBody :: PandocMonad m => TagParser m Blocks pBody = pInTags "body" block @@ -138,12 +142,12 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag where pTitle = pInTags "title" inline >>= setTitle . trimInlines setTitle t = mempty <$ (updateState $ B.setMeta "title" t) pMetaTag = do - mt <- pSatisfy (~== TagOpen "meta" []) - let name = fromAttrib "name" mt + mt <- pSatisfy (matchTagOpen "meta" []) + let name = T.unpack $ fromAttrib "name" mt if null name then return mempty else do - let content = fromAttrib "content" mt + let content = T.unpack $ fromAttrib "content" mt updateState $ \s -> let ps = parserState s in s{ parserState = ps{ @@ -151,9 +155,9 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag (stateMeta ps) } } return mempty pBaseTag = do - bt <- pSatisfy (~== TagOpen "base" []) + bt <- pSatisfy (matchTagOpen "base" []) updateState $ \st -> st{ baseHref = - parseURIReference $ fromAttrib "href" bt } + parseURIReference $ T.unpack $ fromAttrib "href" bt } return mempty block :: PandocMonad m => TagParser m Blocks @@ -193,29 +197,31 @@ eSwitch :: (PandocMonad m, Monoid a) -> TagParser m a eSwitch constructor parser = try $ do guardEnabled Ext_epub_html_exts - pSatisfy (~== TagOpen "switch" []) + pSatisfy (matchTagOpen "switch" []) cases <- getFirst . mconcat <$> manyTill (First <$> (eCase <* skipMany pBlank) ) - (lookAhead $ try $ pSatisfy (~== TagOpen "default" [])) + (lookAhead $ try $ pSatisfy (matchTagOpen "default" [])) skipMany pBlank fallback <- pInTags "default" (skipMany pBlank *> parser <* skipMany pBlank) skipMany pBlank - pSatisfy (~== TagClose "switch") + pSatisfy (matchTagClose "switch") return $ maybe fallback constructor cases eCase :: PandocMonad m => TagParser m (Maybe Inlines) eCase = do skipMany pBlank - TagOpen _ attr <- lookAhead $ pSatisfy $ (~== TagOpen "case" []) + TagOpen _ attr' <- lookAhead $ pSatisfy $ (matchTagOpen "case" []) + let attr = toStringAttr attr' case (flip lookup namespaces) =<< lookup "required-namespace" attr of Just p -> Just <$> (pInTags "case" (skipMany pBlank *> p <* skipMany pBlank)) - Nothing -> Nothing <$ manyTill pAnyTag (pSatisfy (~== TagClose "case")) + Nothing -> Nothing <$ manyTill pAnyTag (pSatisfy (matchTagClose "case")) eFootnote :: PandocMonad m => TagParser m () eFootnote = try $ do let notes = ["footnote", "rearnote"] guardEnabled Ext_epub_html_exts - (TagOpen tag attr) <- lookAhead $ pAnyTag + (TagOpen tag attr') <- lookAhead $ pAnyTag + let attr = toStringAttr attr' guard (maybe False (flip elem notes) (lookup "type" attr)) let ident = fromMaybe "" (lookup "id" attr) content <- pInTags tag block @@ -227,7 +233,8 @@ addNote uid cont = updateState (\s -> s {noteTable = (uid, cont) : (noteTable s) eNoteref :: PandocMonad m => TagParser m Inlines eNoteref = try $ do guardEnabled Ext_epub_html_exts - TagOpen tag attr <- lookAhead $ pAnyTag + TagOpen tag attr' <- lookAhead $ pAnyTag + let attr = toStringAttr attr' guard (maybe False (== "noteref") (lookup "type" attr)) let ident = maybe "" (dropWhile (== '#')) (lookup "href" attr) guard (not (null ident)) @@ -247,10 +254,10 @@ pList = pBulletList <|> pOrderedList <|> pDefinitionList pBulletList :: PandocMonad m => TagParser m Blocks pBulletList = try $ do - pSatisfy (~== TagOpen "ul" []) + pSatisfy (matchTagOpen "ul" []) let nonItem = pSatisfy (\t -> not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) && - not (t ~== TagClose "ul")) + not (matchTagClose "ul" t)) -- note: if they have an <ol> or <ul> not in scope of a <li>, -- treat it as a list item, though it's not valid xhtml... skipMany nonItem @@ -259,7 +266,8 @@ pBulletList = try $ do pListItem :: PandocMonad m => TagParser m a -> TagParser m Blocks pListItem nonItem = do - TagOpen _ attr <- lookAhead $ pSatisfy (~== TagOpen "li" []) + TagOpen _ attr' <- lookAhead $ pSatisfy (matchTagOpen "li" []) + let attr = toStringAttr attr' let addId ident bs = case B.toList bs of (Plain ils:xs) -> B.fromList (Plain [Span (ident, [], []) ils] : xs) @@ -285,7 +293,8 @@ parseTypeAttr _ = DefaultStyle pOrderedList :: PandocMonad m => TagParser m Blocks pOrderedList = try $ do - TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" []) + TagOpen _ attribs' <- pSatisfy (matchTagOpen "ol" []) + let attribs = toStringAttr attribs' let (start, style) = (sta', sty') where sta = fromMaybe "1" $ lookup "start" attribs @@ -307,7 +316,7 @@ pOrderedList = try $ do ] let nonItem = pSatisfy (\t -> not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) && - not (t ~== TagClose "ol")) + not (matchTagClose "ol" t)) -- note: if they have an <ol> or <ul> not in scope of a <li>, -- treat it as a list item, though it's not valid xhtml... skipMany nonItem @@ -316,14 +325,14 @@ pOrderedList = try $ do pDefinitionList :: PandocMonad m => TagParser m Blocks pDefinitionList = try $ do - pSatisfy (~== TagOpen "dl" []) + pSatisfy (matchTagOpen "dl" []) items <- manyTill pDefListItem (pCloses "dl") return $ B.definitionList items pDefListItem :: PandocMonad m => TagParser m (Inlines, [Blocks]) pDefListItem = try $ do - let nonItem = pSatisfy (\t -> not (t ~== TagOpen "dt" []) && - not (t ~== TagOpen "dd" []) && not (t ~== TagClose "dl")) + let nonItem = pSatisfy (\t -> not (matchTagOpen "dt" [] t) && + not (matchTagOpen "dd" [] t) && not (matchTagClose "dl" t)) terms <- many1 (try $ skipMany nonItem >> pInTags "dt" inline) defs <- many1 (try $ skipMany nonItem >> pInTags "dd" block) skipMany nonItem @@ -346,12 +355,12 @@ fixPlains inList bs = if any isParaish bs' plainToPara x = x bs' = B.toList bs -pRawTag :: PandocMonad m => TagParser m String +pRawTag :: PandocMonad m => TagParser m Text pRawTag = do tag <- pAnyTag let ignorable x = x `elem` ["html","head","body","!DOCTYPE","?xml"] if tagOpen ignorable (const True) tag || tagClose ignorable tag - then return [] + then return mempty else return $ renderTags' [tag] pDiv :: PandocMonad m => TagParser m Blocks @@ -360,7 +369,8 @@ pDiv = try $ do let isDivLike "div" = True isDivLike "section" = True isDivLike _ = False - TagOpen tag attr <- lookAhead $ pSatisfy $ tagOpen isDivLike (const True) + TagOpen tag attr' <- lookAhead $ pSatisfy $ tagOpen isDivLike (const True) + let attr = toStringAttr attr' contents <- pInTags tag block let (ident, classes, kvs) = mkAttr attr let classes' = if tag == "section" @@ -370,7 +380,7 @@ pDiv = try $ do pRawHtmlBlock :: PandocMonad m => TagParser m Blocks pRawHtmlBlock = do - raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag + raw <- T.unpack <$> (pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag) exts <- getOption readerExtensions if extensionEnabled Ext_raw_html exts && not (null raw) then return $ B.rawBlock "html" raw @@ -385,33 +395,35 @@ ignore raw = do logMessage $ SkippedContent raw pos return mempty -pHtmlBlock :: PandocMonad m => String -> TagParser m String +pHtmlBlock :: PandocMonad m => Text -> TagParser m Text pHtmlBlock t = try $ do - open <- pSatisfy (~== TagOpen t []) - contents <- manyTill pAnyTag (pSatisfy (~== TagClose t)) - return $ renderTags' $ [open] ++ contents ++ [TagClose t] + open <- pSatisfy (matchTagOpen t []) + contents <- manyTill pAnyTag (pSatisfy (matchTagClose t)) + return $ renderTags' $ [open] <> contents <> [TagClose t] -- Sets chapter context eSection :: PandocMonad m => TagParser m Blocks eSection = try $ do - let matchChapter as = maybe False (isInfixOf "chapter") (lookup "type" as) + let matchChapter as = maybe False (T.isInfixOf "chapter") (lookup "type" as) let sectTag = tagOpen (`elem` sectioningContent) matchChapter TagOpen tag _ <- lookAhead $ pSatisfy sectTag setInChapter (pInTags tag block) -headerLevel :: PandocMonad m => String -> TagParser m Int +headerLevel :: PandocMonad m => Text -> TagParser m Int headerLevel tagtype = do - let level = read (drop 1 tagtype) - (try $ do - guardEnabled Ext_epub_html_exts - asks inChapter >>= guard - return (level - 1)) - <|> - return level + case safeRead (T.unpack (T.drop 1 tagtype)) of + Just level -> + (try $ do + guardEnabled Ext_epub_html_exts + asks inChapter >>= guard + return (level - 1)) + <|> + return level + Nothing -> fail "Could not retrieve header level" eTitlePage :: PandocMonad m => TagParser m () eTitlePage = try $ do - let isTitlePage as = maybe False (isInfixOf "titlepage") (lookup "type" as) + let isTitlePage as = maybe False (T.isInfixOf "titlepage") (lookup "type" as) let groupTag = tagOpen (\x -> x `elem` groupingContent || x == "section") isTitlePage TagOpen tag _ <- lookAhead $ pSatisfy groupTag @@ -419,19 +431,21 @@ eTitlePage = try $ do pHeader :: PandocMonad m => TagParser m Blocks pHeader = try $ do - TagOpen tagtype attr <- pSatisfy $ + TagOpen tagtype attr' <- pSatisfy $ tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"]) (const True) - let bodyTitle = TagOpen tagtype attr ~== TagOpen "h1" [("class","title")] + let attr = toStringAttr attr' + let bodyTitle = TagOpen tagtype attr' ~== TagOpen ("h1" :: Text) + [("class","title")] level <- headerLevel tagtype contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof) let ident = fromMaybe "" $ lookup "id" attr let classes = maybe [] words $ lookup "class" attr let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"] - attr' <- registerHeader (ident, classes, keyvals) contents + attr'' <- registerHeader (ident, classes, keyvals) contents return $ if bodyTitle then mempty -- skip a representation of the title in the body - else B.headerWith attr' level contents + else B.headerWith attr'' level contents pHrule :: PandocMonad m => TagParser m Blocks pHrule = do @@ -440,7 +454,7 @@ pHrule = do pTable :: PandocMonad m => TagParser m Blocks pTable = try $ do - TagOpen _ _ <- pSatisfy (~== TagOpen "table" []) + TagOpen _ _ <- pSatisfy (matchTagOpen "table" []) skipMany pBlank caption <- option mempty $ pInTags "caption" inline <* skipMany pBlank widths' <- (mconcat <$> many1 pColgroup) <|> many pCol @@ -454,8 +468,8 @@ pTable = try $ do else return head'' rowsLs <- many pTBody rows' <- pOptInTag "tfoot" $ many pTr - TagClose _ <- pSatisfy (~== TagClose "table") - let rows'' = (concat rowsLs) ++ rows' + TagClose _ <- pSatisfy (matchTagClose "table") + let rows'' = (concat rowsLs) <> rows' -- fail on empty table guard $ not $ null head' && null rows'' let isSinglePlain x = case B.toList x of @@ -466,7 +480,7 @@ pTable = try $ do let cols = length $ if null head' then head rows'' else head' -- add empty cells to short rows let addEmpties r = case cols - length r of - n | n > 0 -> r ++ replicate n mempty + n | n > 0 -> r <> replicate n mempty | otherwise -> r let rows = map addEmpties rows'' let aligns = replicate cols AlignDefault @@ -479,15 +493,16 @@ pTable = try $ do pCol :: PandocMonad m => TagParser m Double pCol = try $ do - TagOpen _ attribs <- pSatisfy (~== TagOpen "col" []) + TagOpen _ attribs' <- pSatisfy (matchTagOpen "col" []) + let attribs = toStringAttr attribs' skipMany pBlank - optional $ pSatisfy (~== TagClose "col") + optional $ pSatisfy (matchTagClose "col") skipMany pBlank return $ case lookup "width" attribs of Nothing -> case lookup "style" attribs of Just ('w':'i':'d':'t':'h':':':xs) | '%' `elem` xs -> fromMaybe 0.0 $ safeRead ('0':'.':filter - (`notElem` " \t\r\n%'\";") xs) + (`notElem` (" \t\r\n%'\";" :: [Char])) xs) _ -> 0.0 Just x | not (null x) && last x == '%' -> fromMaybe 0.0 $ safeRead ('0':'.':init x) @@ -495,18 +510,18 @@ pCol = try $ do pColgroup :: PandocMonad m => TagParser m [Double] pColgroup = try $ do - pSatisfy (~== TagOpen "colgroup" []) + pSatisfy (matchTagOpen "colgroup" []) skipMany pBlank manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank -noColOrRowSpans :: Tag String -> Bool +noColOrRowSpans :: Tag Text -> Bool noColOrRowSpans t = isNullOrOne "colspan" && isNullOrOne "rowspan" where isNullOrOne x = case fromAttrib x t of "" -> True "1" -> True _ -> False -pCell :: PandocMonad m => String -> TagParser m [Blocks] +pCell :: PandocMonad m => Text -> TagParser m [Blocks] pCell celltype = try $ do skipMany pBlank res <- pInTags' celltype noColOrRowSpans block @@ -532,7 +547,8 @@ pPara = do pCodeBlock :: PandocMonad m => TagParser m Blocks pCodeBlock = try $ do - TagOpen _ attr <- pSatisfy (~== TagOpen "pre" []) + TagOpen _ attr' <- pSatisfy (matchTagOpen "pre" []) + let attr = toStringAttr attr' contents <- manyTill pAnyTag (pCloses "pre" <|> eof) let rawText = concatMap tagToString contents -- drop leading newline if any @@ -545,8 +561,8 @@ pCodeBlock = try $ do _ -> result' return $ B.codeBlockWith (mkAttr attr) result -tagToString :: Tag String -> String -tagToString (TagText s) = s +tagToString :: Tag Text -> String +tagToString (TagText s) = T.unpack s tagToString (TagOpen "br" _) = "\n" tagToString _ = "" @@ -575,20 +591,20 @@ pLocation = do (TagPosition r c) <- pSat isTagPosition setPosition $ newPos "input" r c -pSat :: PandocMonad m => (Tag String -> Bool) -> TagParser m (Tag String) +pSat :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text) pSat f = do pos <- getPosition token show (const pos) (\x -> if f x then Just x else Nothing) -pSatisfy :: PandocMonad m => (Tag String -> Bool) -> TagParser m (Tag String) +pSatisfy :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text) pSatisfy f = try $ optional pLocation >> pSat f -pAnyTag :: PandocMonad m => TagParser m (Tag String) +pAnyTag :: PandocMonad m => TagParser m (Tag Text) pAnyTag = pSatisfy (const True) pSelfClosing :: PandocMonad m - => (String -> Bool) -> ([Attribute String] -> Bool) - -> TagParser m (Tag String) + => (Text -> Bool) -> ([Attribute Text] -> Bool) + -> TagParser m (Tag Text) pSelfClosing f g = do open <- pSatisfy (tagOpen f g) optional $ pSatisfy (tagClose f) @@ -626,7 +642,7 @@ pStrikeout = do pInlinesInTags "s" B.strikeout <|> pInlinesInTags "strike" B.strikeout <|> pInlinesInTags "del" B.strikeout <|> - try (do pSatisfy (~== TagOpen "span" [("class","strikeout")]) + try (do pSatisfy (matchTagOpen "span" [("class","strikeout")]) contents <- mconcat <$> manyTill inline (pCloses "span") return $ B.strikeout contents) @@ -637,17 +653,19 @@ pLineBreak = do -- Unlike fromAttrib from tagsoup, this distinguishes -- between a missing attribute and an attribute with empty content. -maybeFromAttrib :: String -> Tag String -> Maybe String -maybeFromAttrib name (TagOpen _ attrs) = lookup name attrs +maybeFromAttrib :: String -> Tag Text -> Maybe String +maybeFromAttrib name (TagOpen _ attrs) = + T.unpack <$> lookup (T.pack name) attrs maybeFromAttrib _ _ = Nothing pLink :: PandocMonad m => TagParser m Inlines pLink = try $ do tag <- pSatisfy $ tagOpenLit "a" (const True) - let title = fromAttrib "title" tag + let title = T.unpack $ fromAttrib "title" tag -- take id from id attribute if present, otherwise name - let uid = maybe (fromAttrib "name" tag) id $ maybeFromAttrib "id" tag - let cls = words $ fromAttrib "class" tag + let uid = maybe (T.unpack $ fromAttrib "name" tag) id $ + maybeFromAttrib "id" tag + let cls = words $ T.unpack $ fromAttrib "class" tag lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a") -- check for href; if href, then a link, otherwise a span case maybeFromAttrib "href" tag of @@ -665,30 +683,33 @@ pImage :: PandocMonad m => TagParser m Inlines pImage = do tag <- pSelfClosing (=="img") (isJust . lookup "src") mbBaseHref <- baseHref <$> getState - let url' = fromAttrib "src" tag + let url' = T.unpack $ fromAttrib "src" tag let url = case (parseURIReference url', mbBaseHref) of (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs) _ -> url' - let title = fromAttrib "title" tag - let alt = fromAttrib "alt" tag - let uid = fromAttrib "id" tag - let cls = words $ fromAttrib "class" tag + let title = T.unpack $ fromAttrib "title" tag + let alt = T.unpack $ fromAttrib "alt" tag + let uid = T.unpack $ fromAttrib "id" tag + let cls = words $ T.unpack $ fromAttrib "class" tag let getAtt k = case fromAttrib k tag of "" -> [] - v -> [(k, v)] + v -> [(T.unpack k, T.unpack v)] let kvs = concat $ map getAtt ["width", "height", "sizes", "srcset"] return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt) pCode :: PandocMonad m => TagParser m Inlines pCode = try $ do - (TagOpen open attr) <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True) + (TagOpen open attr') <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True) + let attr = toStringAttr attr' result <- manyTill pAnyTag (pCloses open) - return $ B.codeWith (mkAttr attr) $ intercalate " " $ lines $ innerText result + return $ B.codeWith (mkAttr attr) $ intercalate " " $ lines $ T.unpack $ + innerText result pSpan :: PandocMonad m => TagParser m Inlines pSpan = try $ do guardEnabled Ext_native_spans - TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True) + TagOpen _ attr' <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True) + let attr = toStringAttr attr' contents <- pInTags "span" inline let isSmallCaps = fontVariant == "small-caps" || "smallcaps" `elem` classes where styleAttr = fromMaybe "" $ lookup "style" attr @@ -706,7 +727,7 @@ pRawHtmlInline = do then pSatisfy (not . isBlockTag) else pSatisfy isInlineTag exts <- getOption readerExtensions - let raw = renderTags' [result] + let raw = T.unpack $ renderTags' [result] if extensionEnabled Ext_raw_html exts then return $ B.rawInline "html" raw else ignore raw @@ -714,32 +735,38 @@ pRawHtmlInline = do mathMLToTeXMath :: String -> Either String String mathMLToTeXMath s = writeTeX <$> readMathML s +toStringAttr :: [(Text, Text)] -> [(String, String)] +toStringAttr = map go + where go (x,y) = (T.unpack x, T.unpack y) + pMath :: PandocMonad m => Bool -> TagParser m Inlines pMath inCase = try $ do - open@(TagOpen _ attr) <- pSatisfy $ tagOpen (=="math") (const True) + open@(TagOpen _ attr') <- pSatisfy $ tagOpen (=="math") (const True) -- we'll assume math tags are MathML unless specially marked -- otherwise... + let attr = toStringAttr attr' unless inCase $ guard (maybe True (== mathMLNamespace) (lookup "xmlns" attr)) - contents <- manyTill pAnyTag (pSatisfy (~== TagClose "math")) - case mathMLToTeXMath (renderTags $ [open] ++ contents ++ [TagClose "math"]) of + contents <- manyTill pAnyTag (pSatisfy (matchTagClose "math")) + case mathMLToTeXMath (T.unpack $ renderTags $ + [open] <> contents <> [TagClose "math"]) of Left _ -> return $ B.spanWith ("",["math"],attr) $ B.text $ - innerText contents + T.unpack $ innerText contents Right [] -> return mempty Right x -> return $ case lookup "display" attr of Just "block" -> B.displayMath x _ -> B.math x -pInlinesInTags :: PandocMonad m => String -> (Inlines -> Inlines) +pInlinesInTags :: PandocMonad m => Text -> (Inlines -> Inlines) -> TagParser m Inlines pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline -pInTags :: (PandocMonad m, Monoid a) => String -> TagParser m a -> TagParser m a +pInTags :: (PandocMonad m, Monoid a) => Text -> TagParser m a -> TagParser m a pInTags tagtype parser = pInTags' tagtype (const True) parser pInTags' :: (PandocMonad m, Monoid a) - => String - -> (Tag String -> Bool) + => Text + -> (Tag Text -> Bool) -> TagParser m a -> TagParser m a pInTags' tagtype tagtest parser = try $ do @@ -748,18 +775,18 @@ pInTags' tagtype tagtest parser = try $ do -- parses p, preceeded by an optional opening tag -- and followed by an optional closing tags -pOptInTag :: PandocMonad m => String -> TagParser m a -> TagParser m a +pOptInTag :: PandocMonad m => Text -> TagParser m a -> TagParser m a pOptInTag tagtype p = try $ do skipMany pBlank - optional $ pSatisfy (~== TagOpen tagtype []) + optional $ pSatisfy (matchTagOpen tagtype []) skipMany pBlank x <- p skipMany pBlank - optional $ pSatisfy (~== TagClose tagtype) + optional $ pSatisfy (matchTagClose tagtype) skipMany pBlank return x -pCloses :: PandocMonad m => String -> TagParser m () +pCloses :: PandocMonad m => Text -> TagParser m () pCloses tagtype = try $ do t <- lookAhead $ pSatisfy $ \tag -> isTagClose tag || isTagOpen tag case t of @@ -780,15 +807,15 @@ pTagText = try $ do parsed <- lift $ lift $ flip runReaderT qu $ runParserT (many pTagContents) st "text" str case parsed of - Left _ -> throwError $ PandocParseError $ "Could not parse `" ++ str ++ "'" + Left _ -> throwError $ PandocParseError $ "Could not parse `" <> T.unpack str <> "'" Right result -> return $ mconcat result pBlank :: PandocMonad m => TagParser m () pBlank = try $ do (TagText str) <- pSatisfy isTagText - guard $ all isSpace str + guard $ T.all isSpace str -type InlinesParser m = HTMLParser m String +type InlinesParser m = HTMLParser m Text pTagContents :: PandocMonad m => InlinesParser m Inlines pTagContents = @@ -869,80 +896,89 @@ pSpace = many1 (satisfy isSpace) >>= \xs -> -- Constants -- -eitherBlockOrInline :: [String] -eitherBlockOrInline = ["audio", "applet", "button", "iframe", "embed", - "del", "ins", - "progress", "map", "area", "noscript", "script", - "object", "svg", "video", "source"] - -{- -inlineHtmlTags :: [[Char]] -inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", - "br", "cite", "code", "dfn", "em", "font", "i", "img", - "input", "kbd", "label", "q", "s", "samp", "select", - "small", "span", "strike", "strong", "sub", "sup", - "textarea", "tt", "u", "var"] --} - -blockHtmlTags :: [String] -blockHtmlTags = ["?xml", "!DOCTYPE", "address", "article", "aside", - "blockquote", "body", "button", "canvas", - "caption", "center", "col", "colgroup", "dd", "dir", "div", - "dl", "dt", "fieldset", "figcaption", "figure", - "footer", "form", "h1", "h2", "h3", "h4", - "h5", "h6", "head", "header", "hgroup", "hr", "html", - "isindex", "menu", "noframes", "ol", "output", "p", "pre", - "section", "table", "tbody", "textarea", - "thead", "tfoot", "ul", "dd", - "dt", "frameset", "li", "tbody", "td", "tfoot", - "th", "thead", "tr", "script", "style"] +eitherBlockOrInline :: Set.Set Text +eitherBlockOrInline = Set.fromList + ["audio", "applet", "button", "iframe", "embed", + "del", "ins", "progress", "map", "area", "noscript", "script", + "object", "svg", "video", "source"] + +blockHtmlTags :: Set.Set Text +blockHtmlTags = Set.fromList + ["?xml", "!DOCTYPE", "address", "article", "aside", + "blockquote", "body", "canvas", + "caption", "center", "col", "colgroup", "dd", "details", + "dir", "div", "dl", "dt", "fieldset", "figcaption", "figure", + "footer", "form", "h1", "h2", "h3", "h4", + "h5", "h6", "head", "header", "hgroup", "hr", "html", + "isindex", "menu", "noframes", "ol", "output", "p", "pre", + "section", "table", "tbody", "textarea", + "thead", "tfoot", "ul", "dd", + "dt", "frameset", "li", "tbody", "td", "tfoot", + "th", "thead", "tr", "script", "style"] -- We want to allow raw docbook in markdown documents, so we -- include docbook block tags here too. -blockDocBookTags :: [String] -blockDocBookTags = ["calloutlist", "bibliolist", "glosslist", "itemizedlist", - "orderedlist", "segmentedlist", "simplelist", - "variablelist", "caution", "important", "note", "tip", - "warning", "address", "literallayout", "programlisting", - "programlistingco", "screen", "screenco", "screenshot", - "synopsis", "example", "informalexample", "figure", - "informalfigure", "table", "informaltable", "para", - "simpara", "formalpara", "equation", "informalequation", - "figure", "screenshot", "mediaobject", "qandaset", - "procedure", "task", "cmdsynopsis", "funcsynopsis", - "classsynopsis", "blockquote", "epigraph", "msgset", - "sidebar", "title"] - -epubTags :: [String] -epubTags = ["case", "switch", "default"] - -blockTags :: [String] -blockTags = blockHtmlTags ++ blockDocBookTags ++ epubTags - -isInlineTag :: Tag String -> Bool -isInlineTag t = tagOpen isInlineTagName (const True) t || - tagClose isInlineTagName t || - tagComment (const True) t - where isInlineTagName x = x `notElem` blockTags - -isBlockTag :: Tag String -> Bool -isBlockTag t = tagOpen isBlockTagName (const True) t || - tagClose isBlockTagName t || - tagComment (const True) t - where isBlockTagName ('?':_) = True - isBlockTagName ('!':_) = True - isBlockTagName x = x `elem` blockTags - || x `elem` eitherBlockOrInline - -isTextTag :: Tag String -> Bool +blockDocBookTags :: Set.Set Text +blockDocBookTags = Set.fromList + ["calloutlist", "bibliolist", "glosslist", "itemizedlist", + "orderedlist", "segmentedlist", "simplelist", + "variablelist", "caution", "important", "note", "tip", + "warning", "address", "literallayout", "programlisting", + "programlistingco", "screen", "screenco", "screenshot", + "synopsis", "example", "informalexample", "figure", + "informalfigure", "table", "informaltable", "para", + "simpara", "formalpara", "equation", "informalequation", + "figure", "screenshot", "mediaobject", "qandaset", + "procedure", "task", "cmdsynopsis", "funcsynopsis", + "classsynopsis", "blockquote", "epigraph", "msgset", + "sidebar", "title"] + +epubTags :: Set.Set Text +epubTags = Set.fromList ["case", "switch", "default"] + +blockTags :: Set.Set Text +blockTags = Set.unions [blockHtmlTags, blockDocBookTags, epubTags] + +class NamedTag a where + getTagName :: a -> Maybe Text + +instance NamedTag (Tag Text) where + getTagName (TagOpen t _) = Just t + getTagName (TagClose t) = Just t + getTagName _ = Nothing + +instance NamedTag (Tag String) where + getTagName (TagOpen t _) = Just (T.pack t) + getTagName (TagClose t) = Just (T.pack t) + getTagName _ = Nothing + +isInlineTag :: NamedTag (Tag a) => Tag a -> Bool +isInlineTag t = isInlineTagName || isCommentTag t + where isInlineTagName = case getTagName t of + Just x -> x + `Set.notMember` blockTags + Nothing -> False + +isBlockTag :: NamedTag (Tag a) => Tag a -> Bool +isBlockTag t = isBlockTagName || isTagComment t + where isBlockTagName = + case getTagName t of + Just x + | "?" `T.isPrefixOf` x -> True + | "!" `T.isPrefixOf` x -> True + | otherwise -> x `Set.member` blockTags + || x `Set.member` eitherBlockOrInline + Nothing -> False + +isTextTag :: Tag a -> Bool isTextTag = tagText (const True) -isCommentTag :: Tag String -> Bool +isCommentTag :: Tag a -> Bool isCommentTag = tagComment (const True) -- taken from HXT and extended -- See http://www.w3.org/TR/html5/syntax.html sec 8.1.2.4 optional tags -closes :: String -> String -> Bool +closes :: Text -> Text -> Bool _ `closes` "body" = False _ `closes` "html" = False "body" `closes` "head" = True @@ -975,8 +1011,9 @@ t `closes` t2 | t `elem` ["h1","h2","h3","h4","h5","h6","dl","ol","ul","table","div","p"] && t2 `elem` ["h1","h2","h3","h4","h5","h6","p" ] = True -- not "div" t1 `closes` t2 | - t1 `elem` blockTags && - t2 `notElem` (blockTags ++ eitherBlockOrInline) = True + t1 `Set.member` blockTags && + t2 `Set.notMember` blockTags && + t2 `Set.notMember` eitherBlockOrInline = True _ `closes` _ = False --- parsers for use in markdown, textile readers @@ -1003,8 +1040,11 @@ htmlInBalanced f = try $ do let cs = ec - sc lscontents <- unlines <$> count ls anyLine cscontents <- count cs anyChar - (_,closetag) <- htmlTag (~== TagClose tn) - return (lscontents ++ cscontents ++ closetag) + closetag <- do + x <- many (satisfy (/='>')) + char '>' + return (x <> ">") + return (lscontents <> cscontents <> closetag) _ -> mzero _ -> mzero @@ -1022,7 +1062,7 @@ htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts go n (t:ts') = (t :) <$> go n ts' go _ [] = mzero -hasTagWarning :: [Tag String] -> Bool +hasTagWarning :: [Tag a] -> Bool hasTagWarning (TagWarning _:_) = True hasTagWarning _ = False @@ -1050,47 +1090,48 @@ htmlTag f = try $ do -- basic sanity check, since the parser is very forgiving -- and finds tags in stuff like x<y) guard $ isName tagname + guard $ not $ null tagname -- <https://example.org> should NOT be a tag either. -- tagsoup will parse it as TagOpen "https:" [("example.org","")] guard $ last tagname /= ':' rendered <- manyTill anyChar (char '>') - return (next, rendered ++ ">") + return (next, rendered <> ">") case next of TagComment s | "<!--" `isPrefixOf` inp -> do count (length s + 4) anyChar skipMany (satisfy (/='>')) char '>' - return (next, "<!--" ++ s ++ "-->") + return (next, "<!--" <> s <> "-->") | otherwise -> fail "bogus comment mode, HTML5 parse error" TagOpen tagname attr -> do guard $ all (isName . fst) attr handleTag tagname - TagClose tagname -> handleTag tagname + TagClose tagname -> + handleTag tagname _ -> mzero mkAttr :: [(String, String)] -> Attr mkAttr attr = (attribsId, attribsClasses, attribsKV) where attribsId = fromMaybe "" $ lookup "id" attr - attribsClasses = (words $ fromMaybe "" $ lookup "class" attr) ++ epubTypes + attribsClasses = (words $ fromMaybe "" $ lookup "class" attr) <> epubTypes attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr epubTypes = words $ fromMaybe "" $ lookup "epub:type" attr -- Strip namespace prefixes -stripPrefixes :: [Tag String] -> [Tag String] +stripPrefixes :: [Tag Text] -> [Tag Text] stripPrefixes = map stripPrefix -stripPrefix :: Tag String -> Tag String +stripPrefix :: Tag Text -> Tag Text stripPrefix (TagOpen s as) = TagOpen (stripPrefix' s) (map (stripPrefix' *** id) as) stripPrefix (TagClose s) = TagClose (stripPrefix' s) stripPrefix x = x -stripPrefix' :: String -> String +stripPrefix' :: Text -> Text stripPrefix' s = - case span (/= ':') s of - (_, "") -> s - (_, (_:ts)) -> ts + if T.null t then s else T.drop 1 t + where (_, t) = T.span (/= ':') s isSpace :: Char -> Bool isSpace ' ' = True @@ -1133,19 +1174,32 @@ instance HasLastStrPosition HTMLState where setLastStrPos s st = st {parserState = setLastStrPos s (parserState st)} getLastStrPos = getLastStrPos . parserState +-- For now we need a special verison here; the one in Shared has String type +renderTags' :: [Tag Text] -> Text +renderTags' = renderTagsOptions + renderOptions{ optMinimize = matchTags ["hr", "br", "img", + "meta", "link"] + , optRawTag = matchTags ["script", "style"] } + where matchTags = \tags -> flip elem tags . T.toLower + -- EPUB Specific -- -- -sectioningContent :: [String] +sectioningContent :: [Text] sectioningContent = ["article", "aside", "nav", "section"] -groupingContent :: [String] +groupingContent :: [Text] groupingContent = ["p", "hr", "pre", "blockquote", "ol" , "ul", "li", "dl", "dt", "dt", "dd" , "figure", "figcaption", "div", "main"] +matchTagClose :: Text -> (Tag Text -> Bool) +matchTagClose t = (~== TagClose t) + +matchTagOpen :: Text -> [(Text, Text)] -> (Tag Text -> Bool) +matchTagOpen t as = (~== TagOpen t as) {- @@ -1153,7 +1207,7 @@ types :: [(String, ([String], Int))] types = -- Document divisions map (\s -> (s, (["section", "body"], 0))) ["volume", "part", "chapter", "division"] - ++ -- Document section and components + <> -- Document section and components [ ("abstract", ([], 0))] -} diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index 28caa528e..b22b71b96 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -16,6 +16,7 @@ module Text.Pandoc.Readers.Haddock import Control.Monad.Except (throwError) import Data.List (intersperse, stripPrefix) +import Data.Text (Text, unpack) import Data.Maybe (fromMaybe) import Data.Monoid ((<>)) import Documentation.Haddock.Parser @@ -32,9 +33,9 @@ import Text.Pandoc.Shared (splitBy, trim) -- | Parse Haddock markup and return a 'Pandoc' document. readHaddock :: PandocMonad m => ReaderOptions - -> String + -> Text -> m Pandoc -readHaddock opts s = case readHaddockEither opts s of +readHaddock opts s = case readHaddockEither opts (unpack s) of Right result -> return result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 1d13f7107..17fb48548 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.LaTeX - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -39,6 +39,7 @@ import Control.Applicative (many, optional, (<|>)) import Control.Monad import Control.Monad.Except (throwError) import Data.Char (chr, isAlphaNum, isLetter, ord) +import Data.Text (Text, unpack) import Data.List (intercalate, isPrefixOf) import qualified Data.Map as M import Data.Maybe (fromMaybe, maybeToList) @@ -46,7 +47,7 @@ import Safe (minimumDef) import System.FilePath (addExtension, replaceExtension, takeExtension) import Text.Pandoc.Builder import Text.Pandoc.Class (PandocMonad, PandocPure, lookupEnv, readFileFromDirs, - report, setResourcePath) + report, setResourcePath, getResourcePath) import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension) import Text.Pandoc.ImageSize (numUnit, showFl) import Text.Pandoc.Logging @@ -59,10 +60,10 @@ import Text.Pandoc.Walk -- | Parse LaTeX from string and return 'Pandoc' document. readLaTeX :: PandocMonad m => ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assumes @'\n'@ line endings) + -> Text -- ^ String to parse (assumes @'\n'@ line endings) -> m Pandoc readLaTeX opts ltx = do - parsed <- readWithM parseLaTeX def{ stateOptions = opts } ltx + parsed <- readWithM parseLaTeX def{ stateOptions = opts } (unpack ltx) case parsed of Right result -> return result Left e -> throwError e @@ -276,8 +277,6 @@ block = (mempty <$ comment) <|> blockCommand <|> paragraph <|> grouped block - <|> (mempty <$ char '&') -- loose & in table environment - blocks :: PandocMonad m => LP m Blocks blocks = mconcat <$> many block @@ -304,8 +303,8 @@ blockCommand = try $ do rawcommand <- getRawCommand name' transformed <- applyMacros' rawcommand guard $ transformed /= rawcommand - notFollowedBy $ parseFromString inlines transformed - parseFromString blocks transformed + notFollowedBy $ parseFromString' inlines transformed + parseFromString' blocks transformed lookupListDefault raw [name',name] blockCommands inBrackets :: Inlines -> Inlines @@ -432,7 +431,7 @@ coloredBlock stylename = do graphicsPath :: PandocMonad m => LP m Blocks graphicsPath = do ps <- bgroup *> (manyTill braced egroup) - setResourcePath (".":ps) + getResourcePath >>= setResourcePath . (++ ps) return mempty addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m () @@ -490,16 +489,19 @@ inlineCommand = try $ do transformed <- applyMacros' rawcommand exts <- getOption readerExtensions if transformed /= rawcommand - then parseFromString inlines transformed + then parseFromString' inlines transformed else if extensionEnabled Ext_raw_tex exts then return $ rawInline "latex" rawcommand else ignore rawcommand (lookupListDefault raw [name',name] inlineCommands <* optional (try (string "{}"))) -unlessParseRaw :: PandocMonad m => LP m () -unlessParseRaw = getOption readerExtensions >>= - guard . not . extensionEnabled Ext_raw_tex +rawInlineOr :: PandocMonad m => String -> LP m Inlines -> LP m Inlines +rawInlineOr name' fallback = do + parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions + if parseRaw + then rawInline "latex" <$> getRawCommand name' + else fallback isBlockCommand :: String -> Bool isBlockCommand s = s `M.member` (blockCommands :: M.Map String (LP PandocPure Blocks)) @@ -507,20 +509,20 @@ isBlockCommand s = s `M.member` (blockCommands :: M.Map String (LP PandocPure Bl inlineEnvironments :: PandocMonad m => M.Map String (LP m Inlines) inlineEnvironments = M.fromList - [ ("displaymath", mathEnv id Nothing "displaymath") - , ("math", math <$> verbEnv "math") - , ("equation", mathEnv id Nothing "equation") - , ("equation*", mathEnv id Nothing "equation*") - , ("gather", mathEnv id (Just "gathered") "gather") - , ("gather*", mathEnv id (Just "gathered") "gather*") - , ("multline", mathEnv id (Just "gathered") "multline") - , ("multline*", mathEnv id (Just "gathered") "multline*") - , ("eqnarray", mathEnv id (Just "aligned") "eqnarray") - , ("eqnarray*", mathEnv id (Just "aligned") "eqnarray*") - , ("align", mathEnv id (Just "aligned") "align") - , ("align*", mathEnv id (Just "aligned") "align*") - , ("alignat", mathEnv id (Just "aligned") "alignat") - , ("alignat*", mathEnv id (Just "aligned") "alignat*") + [ ("displaymath", mathEnvWith id Nothing "displaymath") + , ("math", math <$> mathEnv "math") + , ("equation", mathEnvWith id Nothing "equation") + , ("equation*", mathEnvWith id Nothing "equation*") + , ("gather", mathEnvWith id (Just "gathered") "gather") + , ("gather*", mathEnvWith id (Just "gathered") "gather*") + , ("multline", mathEnvWith id (Just "gathered") "multline") + , ("multline*", mathEnvWith id (Just "gathered") "multline*") + , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray") + , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*") + , ("align", mathEnvWith id (Just "aligned") "align") + , ("align*", mathEnvWith id (Just "aligned") "align*") + , ("alignat", mathEnvWith id (Just "aligned") "alignat") + , ("alignat*", mathEnvWith id (Just "aligned") "alignat*") ] inlineCommands :: PandocMonad m => M.Map String (LP m Inlines) @@ -547,11 +549,11 @@ inlineCommands = M.fromList $ , ("dots", lit "…") , ("mdots", lit "…") , ("sim", lit "~") - , ("label", unlessParseRaw >> (inBrackets <$> tok)) - , ("ref", unlessParseRaw >> (inBrackets <$> tok)) + , ("label", rawInlineOr "label" (inBrackets <$> tok)) + , ("ref", rawInlineOr "ref" (inBrackets <$> tok)) , ("textgreek", tok) , ("sep", lit ",") - , ("cref", unlessParseRaw >> (inBrackets <$> tok)) -- from cleveref.sty + , ("cref", rawInlineOr "cref" (inBrackets <$> tok)) -- from cleveref.sty , ("(", mathInline $ manyTill anyChar (try $ string "\\)")) , ("[", mathDisplay $ manyTill anyChar (try $ string "\\]")) , ("ensuremath", mathInline braced) @@ -605,7 +607,7 @@ inlineCommands = M.fromList $ , ("u", option (str "u") $ try $ tok >>= accent breve) , ("i", lit "i") , ("\\", linebreak <$ (optional (bracketed inline) *> spaces')) - , (",", pure mempty) + , (",", lit "\8198") , ("@", pure mempty) , (" ", lit "\160") , ("ps", pure $ str "PS." <> space) @@ -698,6 +700,9 @@ inlineCommands = M.fromList $ -- LaTeX colors , ("textcolor", coloredInline "color") , ("colorbox", coloredInline "background-color") + -- fontawesome + , ("faCheck", lit "\10003") + , ("faClose", lit "\10007") ] ++ map ignoreInlines -- these commands will be ignored unless --parse-raw is specified, -- in which case they will appear as raw latex blocks: @@ -1045,7 +1050,7 @@ rawEnv name = do (bs, raw) <- withRaw $ env name blocks raw' <- applyMacros' $ beginCommand ++ raw if raw' /= beginCommand ++ raw - then parseFromString blocks raw' + then parseFromString' blocks raw' else if parseRaw then return $ rawBlock "latex" $ beginCommand ++ raw' else do @@ -1055,6 +1060,19 @@ rawEnv name = do report $ SkippedContent ("\\end{" ++ name ++ "}") pos2 return bs +rawVerbEnv :: PandocMonad m => String -> LP m Blocks +rawVerbEnv name = do + pos <- getPosition + (_, raw) <- withRaw $ verbEnv name + let raw' = "\\begin{tikzpicture}" ++ raw + exts <- getOption readerExtensions + let parseRaw = extensionEnabled Ext_raw_tex exts + if parseRaw + then return $ rawBlock "latex" raw' + else do + report $ SkippedContent raw' pos + return mempty + ---- maybeAddExtension :: String -> FilePath -> FilePath @@ -1119,7 +1137,7 @@ parseListingsOptions options = keyval :: PandocMonad m => LP m (String, String) keyval = try $ do key <- many1 alphaNum - val <- option "" $ char '=' >> many1 (alphaNum <|> char '.' <|> char '\\') + val <- option "" $ char '=' >> braced <|> (many1 (alphaNum <|> oneOf ".:-|\\")) skipMany spaceChar optional (char ',') skipMany spaceChar @@ -1130,7 +1148,7 @@ keyvals :: PandocMonad m => LP m [(String, String)] keyvals = try $ char '[' *> manyTill keyval (char ']') alltt :: PandocMonad m => String -> LP m Blocks -alltt t = walk strToCode <$> parseFromString blocks +alltt t = walk strToCode <$> parseFromString' blocks (substitute " " "\\ " $ substitute "%" "\\%" $ intercalate "\\\\\n" $ lines t) where strToCode (Str s) = Code nullAttr s @@ -1176,11 +1194,12 @@ environments = M.fromList , ("subfigure", env "subfigure" $ skipopts *> tok *> figure) , ("center", env "center" blocks) , ("longtable", env "longtable" $ - resetCaption *> simpTable False >>= addTableCaption) + resetCaption *> simpTable "longtable" False >>= addTableCaption) , ("table", env "table" $ resetCaption *> skipopts *> blocks >>= addTableCaption) - , ("tabular*", env "tabular" $ simpTable True) - , ("tabular", env "tabular" $ simpTable False) + , ("tabular*", env "tabular" $ simpTable "tabular*" True) + , ("tabularx", env "tabularx" $ simpTable "tabularx" True) + , ("tabular", env "tabular" $ simpTable "tabular" False) , ("quote", blockQuote <$> env "quote" blocks) , ("quotation", blockQuote <$> env "quotation" blocks) , ("verse", blockQuote <$> env "verse" blocks) @@ -1210,19 +1229,20 @@ environments = M.fromList , ("obeylines", parseFromString (para . trimInlines . mconcat <$> many inline) =<< intercalate "\\\\\n" . lines <$> verbEnv "obeylines") - , ("displaymath", mathEnv para Nothing "displaymath") - , ("equation", mathEnv para Nothing "equation") - , ("equation*", mathEnv para Nothing "equation*") - , ("gather", mathEnv para (Just "gathered") "gather") - , ("gather*", mathEnv para (Just "gathered") "gather*") - , ("multline", mathEnv para (Just "gathered") "multline") - , ("multline*", mathEnv para (Just "gathered") "multline*") - , ("eqnarray", mathEnv para (Just "aligned") "eqnarray") - , ("eqnarray*", mathEnv para (Just "aligned") "eqnarray*") - , ("align", mathEnv para (Just "aligned") "align") - , ("align*", mathEnv para (Just "aligned") "align*") - , ("alignat", mathEnv para (Just "aligned") "alignat") - , ("alignat*", mathEnv para (Just "aligned") "alignat*") + , ("displaymath", mathEnvWith para Nothing "displaymath") + , ("equation", mathEnvWith para Nothing "equation") + , ("equation*", mathEnvWith para Nothing "equation*") + , ("gather", mathEnvWith para (Just "gathered") "gather") + , ("gather*", mathEnvWith para (Just "gathered") "gather*") + , ("multline", mathEnvWith para (Just "gathered") "multline") + , ("multline*", mathEnvWith para (Just "gathered") "multline*") + , ("eqnarray", mathEnvWith para (Just "aligned") "eqnarray") + , ("eqnarray*", mathEnvWith para (Just "aligned") "eqnarray*") + , ("align", mathEnvWith para (Just "aligned") "align") + , ("align*", mathEnvWith para (Just "aligned") "align*") + , ("alignat", mathEnvWith para (Just "aligned") "alignat") + , ("alignat*", mathEnvWith para (Just "aligned") "alignat*") + , ("tikzpicture", rawVerbEnv "tikzpicture") ] figure :: PandocMonad m => LP m Blocks @@ -1287,19 +1307,32 @@ listenv name p = try $ do updateState $ \st -> st{ stateParserContext = oldCtx } return res -mathEnv :: PandocMonad m => (Inlines -> a) -> Maybe String -> String -> LP m a -mathEnv f innerEnv name = f <$> mathDisplay (inner <$> verbEnv name) +mathEnvWith :: PandocMonad m + => (Inlines -> a) -> Maybe String -> String -> LP m a +mathEnvWith f innerEnv name = f <$> mathDisplay (inner <$> mathEnv name) where inner x = case innerEnv of Nothing -> x Just y -> "\\begin{" ++ y ++ "}\n" ++ x ++ "\\end{" ++ y ++ "}" +mathEnv :: PandocMonad m => String -> LP m String +mathEnv name = do + skipopts + optional blankline + let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name) + charMuncher = skipMany comment *> + (many1 (noneOf "\\%") <|> try (string "\\%") + <|> try (string "\\\\") <|> count 1 anyChar) + res <- concat <$> manyTill charMuncher endEnv + return $ stripTrailingNewlines res + verbEnv :: PandocMonad m => String -> LP m String verbEnv name = do skipopts optional blankline let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name) - res <- manyTill anyChar endEnv + charMuncher = anyChar + res <- manyTill charMuncher endEnv return $ stripTrailingNewlines res fancyverbEnv :: PandocMonad m => String -> LP m Blocks @@ -1314,7 +1347,7 @@ fancyverbEnv name = do codeBlockWith attr <$> verbEnv name orderedList' :: PandocMonad m => LP m Blocks -orderedList' = do +orderedList' = try $ do optional sp (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $ try $ char '[' *> anyOrderedListMarker <* char ']' @@ -1429,7 +1462,7 @@ complexNatbibCitation mode = try $ do -- tables -parseAligns :: PandocMonad m => LP m [(String, Alignment, String)] +parseAligns :: PandocMonad m => LP m [(Alignment, Double, (String, String))] parseAligns = try $ do bgroup let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ (char '@' >> braced) @@ -1437,18 +1470,36 @@ parseAligns = try $ do let cAlign = AlignCenter <$ char 'c' let lAlign = AlignLeft <$ char 'l' let rAlign = AlignRight <$ char 'r' - let parAlign = AlignLeft <$ (char 'p' >> braced) + let parAlign = AlignLeft <$ char 'p' + -- algins from tabularx + let xAlign = AlignLeft <$ char 'X' + let mAlign = AlignLeft <$ char 'm' + let bAlign = AlignLeft <$ char 'b' let alignChar = cAlign <|> lAlign <|> rAlign <|> parAlign + <|> xAlign <|> mAlign <|> bAlign let alignPrefix = char '>' >> braced let alignSuffix = char '<' >> braced + let colWidth = try $ do + char '{' + ds <- many1 (oneOf "0123456789.") + spaces + string "\\linewidth" + char '}' + case safeRead ds of + Just w -> return w + Nothing -> return 0.0 let alignSpec = do spaces pref <- option "" alignPrefix spaces - ch <- alignChar + al <- alignChar + width <- colWidth <|> option 0.0 (do s <- braced + pos <- getPosition + report $ SkippedContent s pos + return 0.0) spaces suff <- option "" alignSuffix - return (pref, ch, suff) + return (al, width, (pref, suff)) aligns' <- sepEndBy alignSpec maybeBar spaces egroup @@ -1478,24 +1529,26 @@ amp :: PandocMonad m => LP m () amp = () <$ try (spaces' *> char '&' <* spaces') parseTableRow :: PandocMonad m - => Int -- ^ number of columns - -> [String] -- ^ prefixes - -> [String] -- ^ suffixes + => String -- ^ table environment name + -> [(String, String)] -- ^ pref/suffixes -> LP m [Blocks] -parseTableRow cols prefixes suffixes = try $ do - let tableCellRaw = many (notFollowedBy - (amp <|> lbreak <|> - (() <$ try (string "\\end"))) >> anyChar) - let minipage = try $ controlSeq "begin" *> string "{minipage}" *> - env "minipage" - (skipopts *> spaces' *> optional braced *> spaces' *> blocks) - let tableCell = minipage <|> - ((plain . trimInlines . mconcat) <$> many inline) +parseTableRow envname prefsufs = try $ do + let cols = length prefsufs + let tableCellRaw = concat <$> many + (do notFollowedBy amp + notFollowedBy lbreak + notFollowedBy $ () <$ try (string ("\\end{" ++ envname ++ "}")) + many1 (noneOf "&%\n\r\\") + <|> try (string "\\&") + <|> count 1 anyChar) + let plainify bs = case toList bs of + [Para ils] -> plain (fromList ils) + _ -> bs rawcells <- sepBy1 tableCellRaw amp guard $ length rawcells == cols - let rawcells' = zipWith3 (\c p s -> p ++ trim c ++ s) - rawcells prefixes suffixes - cells' <- mapM (parseFromString tableCell) rawcells' + let rawcells' = zipWith (\c (p, s) -> p ++ trim c ++ s) rawcells prefsufs + let tableCell = plainify <$> blocks + cells' <- mapM (parseFromString' tableCell) rawcells' let numcells = length cells' guard $ numcells <= cols && numcells >= 1 guard $ cells' /= [mempty] @@ -1507,21 +1560,22 @@ parseTableRow cols prefixes suffixes = try $ do spaces' :: PandocMonad m => LP m () spaces' = spaces *> skipMany (comment *> spaces) -simpTable :: PandocMonad m => Bool -> LP m Blocks -simpTable hasWidthParameter = try $ do +simpTable :: PandocMonad m => String -> Bool -> LP m Blocks +simpTable envname hasWidthParameter = try $ do when hasWidthParameter $ () <$ (spaces' >> tok) skipopts - (prefixes, aligns, suffixes) <- unzip3 <$> parseAligns - let cols = length aligns + colspecs <- parseAligns + let (aligns, widths, prefsufs) = unzip3 colspecs + let cols = length colspecs optional $ controlSeq "caption" *> skipopts *> setCaption optional lbreak spaces' skipMany hline spaces' - header' <- option [] $ try (parseTableRow cols prefixes suffixes <* + header' <- option [] $ try (parseTableRow envname prefsufs <* lbreak <* many1 hline) spaces' - rows <- sepEndBy (parseTableRow cols prefixes suffixes) + rows <- sepEndBy (parseTableRow envname prefsufs) (lbreak <* optional (skipMany hline)) spaces' optional $ controlSeq "caption" *> skipopts *> setCaption @@ -1531,7 +1585,7 @@ simpTable hasWidthParameter = try $ do then replicate cols mempty else header' lookAhead $ controlSeq "end" -- make sure we're at end - return $ table mempty (zip aligns (repeat 0)) header'' rows + return $ table mempty (zip aligns widths) header'' rows removeDoubleQuotes :: String -> String removeDoubleQuotes ('"':xs) = diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 5515c735b..e1c481311 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -2,7 +2,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Markdown - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -55,11 +55,9 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.Emoji (emojis) -import Text.Pandoc.Generic (bottomUp) import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (tableWith) -import Text.Pandoc.Pretty (charWidth) import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag, isCommentTag, isInlineTag, isTextTag) import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline) @@ -72,10 +70,11 @@ type MarkdownParser m = ParserT [Char] ParserState m -- | Read markdown from an input string and return a Pandoc document. readMarkdown :: PandocMonad m => ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Text -- ^ String to parse (assuming @'\n'@ line endings) -> m Pandoc readMarkdown opts s = do - parsed <- (readWithM parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n") + parsed <- (readWithM parseMarkdown) def{ stateOptions = opts } + (T.unpack s ++ "\n\n") case parsed of Right result -> return result Left e -> throwError e @@ -155,9 +154,11 @@ litChar = escapedChar' inlinesInBalancedBrackets :: PandocMonad m => MarkdownParser m (F Inlines) inlinesInBalancedBrackets = do char '[' + pos <- getPosition (_, raw) <- withRaw $ charsInBalancedBrackets 1 guard $ not $ null raw - parseFromString (trimInlinesF . mconcat <$> many inline) (init raw) + parseFromString' (setPosition pos >> + trimInlinesF . mconcat <$> many inline) (init raw) charsInBalancedBrackets :: PandocMonad m => Int -> MarkdownParser m () charsInBalancedBrackets 0 = return () @@ -189,7 +190,7 @@ rawTitleBlockLine = do titleLine :: PandocMonad m => MarkdownParser m (F Inlines) titleLine = try $ do raw <- rawTitleBlockLine - res <- parseFromString (many inline) raw + res <- parseFromString' (many inline) raw return $ trimInlinesF $ mconcat res authorsLine :: PandocMonad m => MarkdownParser m (F [Inlines]) @@ -200,12 +201,12 @@ authorsLine = try $ do (trimInlinesF . mconcat <$> many (try $ notFollowedBy sep >> inline)) sep - sequence <$> parseFromString pAuthors raw + sequence <$> parseFromString' pAuthors raw dateLine :: PandocMonad m => MarkdownParser m (F Inlines) dateLine = try $ do raw <- rawTitleBlockLine - res <- parseFromString (many inline) raw + res <- parseFromString' (many inline) raw return $ trimInlinesF $ mconcat res titleBlock :: PandocMonad m => MarkdownParser m () @@ -290,7 +291,7 @@ ignorable t = (T.pack "_") `T.isSuffixOf` t toMetaValue :: PandocMonad m => Text -> MarkdownParser m (F MetaValue) -toMetaValue x = toMeta <$> parseFromString parseBlocks (T.unpack x) +toMetaValue x = toMeta <$> parseFromString' parseBlocks (T.unpack x) where toMeta p = do p' <- p @@ -360,20 +361,20 @@ parseMarkdown = do optional titleBlock blocks <- parseBlocks st <- getState + -- check for notes with no corresponding note references + let notesUsed = stateNoteRefs st + let notesDefined = M.keys (stateNotes' st) + mapM_ (\n -> unless (n `Set.member` notesUsed) $ do + -- lookup to get sourcepos + case M.lookup n (stateNotes' st) of + Just (pos, _) -> report (NoteDefinedButNotUsed n pos) + Nothing -> error "The impossible happened.") notesDefined let doc = runF (do Pandoc _ bs <- B.doc <$> blocks meta <- stateMeta' st return $ Pandoc meta bs) st reportLogMessages (do guardEnabled Ext_east_asian_line_breaks - return $ bottomUp softBreakFilter doc) <|> return doc - -softBreakFilter :: [Inline] -> [Inline] -softBreakFilter (x:SoftBreak:y:zs) = - case (stringify x, stringify y) of - (xs@(_:_), (c:_)) - | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs - _ -> x:SoftBreak:y:zs -softBreakFilter xs = xs + return $ eastAsianLineBreakFilter doc) <|> return doc referenceKey :: PandocMonad m => MarkdownParser m (F Blocks) referenceKey = try $ do @@ -392,7 +393,9 @@ referenceKey = try $ do src <- try betweenAngles <|> sourceURL tit <- option "" referenceTitle attr <- option nullAttr $ try $ - guardEnabled Ext_link_attributes >> skipSpaces >> attributes + do guardEnabled Ext_link_attributes + skipSpaces >> optional newline >> skipSpaces + attributes addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes >> many (try $ spnl >> keyValAttr) blanklines @@ -402,8 +405,12 @@ referenceKey = try $ do let oldkeys = stateKeys st let key = toKey raw case M.lookup key oldkeys of - Just _ -> logMessage $ DuplicateLinkReference raw pos - Nothing -> return () + Just (t,a) | not (t == target && a == attr') -> + -- We don't warn on two duplicate keys if the targets are also + -- the same. This can happen naturally with --reference-location=block + -- or section. See #3701. + logMessage $ DuplicateLinkReference raw pos + _ -> return () updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys } return $ return mempty @@ -464,13 +471,12 @@ noteBlock = try $ do rest <- many $ try $ blanklines >> indentSpaces >> rawLines let raw = unlines (first:rest) ++ "\n" optional blanklines - parsed <- parseFromString parseBlocks raw - let newnote = (ref, parsed) + parsed <- parseFromString' parseBlocks raw oldnotes <- stateNotes' <$> getState - case lookup ref oldnotes of + case M.lookup ref oldnotes of Just _ -> logMessage $ DuplicateNoteReference ref pos Nothing -> return () - updateState $ \s -> s { stateNotes' = newnote : oldnotes } + updateState $ \s -> s { stateNotes' = M.insert ref (pos, parsed) oldnotes } return mempty -- @@ -614,7 +620,7 @@ hrule = try $ do -- indentedLine :: PandocMonad m => MarkdownParser m String -indentedLine = indentSpaces >> anyLine >>= return . (++ "\n") +indentedLine = indentSpaces >> anyLineNewline blockDelimiter :: PandocMonad m => (Char -> Bool) @@ -772,7 +778,7 @@ blockQuote :: PandocMonad m => MarkdownParser m (F Blocks) blockQuote = do raw <- emailBlockQuote -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n" + contents <- parseFromString' parseBlocks $ (intercalate "\n" raw) ++ "\n\n" return $ B.blockQuote <$> contents -- @@ -868,8 +874,7 @@ listContinuationLine = try $ do notFollowedBy' listStart notFollowedByHtmlCloser optional indentSpaces - result <- anyLine - return $ result ++ "\n" + anyLineNewline listItem :: PandocMonad m => MarkdownParser m a @@ -885,7 +890,7 @@ listItem start = try $ do setState $ state {stateParserContext = ListItemState} -- parse the extracted block, which may contain various block elements: let raw = concat (first:continuations) - contents <- parseFromString parseBlocks raw + contents <- parseFromString' parseBlocks raw updateState (\st -> st {stateParserContext = oldContext}) return contents @@ -932,8 +937,8 @@ definitionListItem :: PandocMonad m => Bool -> MarkdownParser m (F (Inlines, [Bl definitionListItem compact = try $ do rawLine' <- anyLine raw <- many1 $ defRawBlock compact - term <- parseFromString (trimInlinesF . mconcat <$> many inline) rawLine' - contents <- mapM (parseFromString parseBlocks . (++"\n")) raw + term <- parseFromString' (trimInlinesF . mconcat <$> many inline) rawLine' + contents <- mapM (parseFromString' parseBlocks . (++"\n")) raw optional blanklines return $ liftM2 (,) term (sequence contents) @@ -941,7 +946,7 @@ defRawBlock :: PandocMonad m => Bool -> MarkdownParser m String defRawBlock compact = try $ do hasBlank <- option False $ blankline >> return True defListMarker - firstline <- anyLine + firstline <- anyLineNewline let dline = try ( do notFollowedBy blankline notFollowedByHtmlCloser @@ -956,7 +961,7 @@ defRawBlock compact = try $ do ln <- indentSpaces >> notFollowedBy blankline >> anyLine lns <- many dline return $ trailing ++ unlines (ln:lns) - return $ trimr (firstline ++ "\n" ++ unlines rawlines ++ cont) ++ + return $ trimr (firstline ++ unlines rawlines ++ cont) ++ if hasBlank || not (null cont) then "\n\n" else "" definitionList :: PandocMonad m => MarkdownParser m (F Blocks) @@ -1088,13 +1093,19 @@ rawTeXBlock = do rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks) rawHtmlBlocks = do (TagOpen tagtype _, raw) <- htmlTag isBlockTag + -- we don't want '<td> text' to be a code block: + skipMany spaceChar + indentlevel <- (blankline >> length <$> many (char ' ')) <|> return 0 -- try to find closing tag -- we set stateInHtmlBlock so that closing tags that can be either block or -- inline will not be parsed as inline tags oldInHtmlBlock <- stateInHtmlBlock <$> getState updateState $ \st -> st{ stateInHtmlBlock = Just tagtype } let closer = htmlTag (\x -> x ~== TagClose tagtype) - contents <- mconcat <$> many (notFollowedBy' closer >> block) + let block' = do notFollowedBy' closer + atMostSpaces indentlevel + block + contents <- mconcat <$> many block' result <- (closer >>= \(_, rawcloser) -> return ( return (B.rawBlock "html" $ stripMarkdownAttribute raw) <> @@ -1119,7 +1130,7 @@ lineBlock :: PandocMonad m => MarkdownParser m (F Blocks) lineBlock = try $ do guardEnabled Ext_line_blocks lines' <- lineBlockLines >>= - mapM (parseFromString (trimInlinesF . mconcat <$> many inline)) + mapM (parseFromString' (trimInlinesF . mconcat <$> many inline)) return $ B.lineBlock <$> sequence lines' -- @@ -1162,7 +1173,7 @@ simpleTableHeader headless = try $ do then replicate (length dashes) "" else rawHeads heads <- fmap sequence - $ mapM (parseFromString (mconcat <$> many plain)) + $ mapM (parseFromString' (mconcat <$> many plain)) $ map trim rawHeads' return (heads, aligns, indices) @@ -1208,7 +1219,7 @@ tableLine :: PandocMonad m => [Int] -> MarkdownParser m (F [Blocks]) tableLine indices = rawTableLine indices >>= - fmap sequence . mapM (parseFromString (mconcat <$> many plain)) + fmap sequence . mapM (parseFromString' (mconcat <$> many plain)) -- Parse a multiline table row and return a list of blocks (columns). multilineRow :: PandocMonad m @@ -1217,7 +1228,7 @@ multilineRow :: PandocMonad m multilineRow indices = do colLines <- many1 (rawTableLine indices) let cols = map unlines $ transpose colLines - fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) cols + fmap sequence $ mapM (parseFromString' (mconcat <$> many plain)) cols -- Parses a table caption: inlines beginning with 'Table:' -- and followed by blank lines. @@ -1275,7 +1286,7 @@ multilineTableHeader headless = try $ do then replicate (length dashes) "" else map (unlines . map trim) rawHeadsList heads <- fmap sequence $ - mapM (parseFromString (mconcat <$> many plain)) $ + mapM (parseFromString' (mconcat <$> many plain)) $ map trim rawHeads return (heads, aligns, indices) @@ -1285,89 +1296,7 @@ multilineTableHeader headless = try $ do -- ending with a footer (dashed line followed by blank line). gridTable :: PandocMonad m => Bool -- ^ Headerless table -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]]) -gridTable headless = - tableWith (gridTableHeader headless) gridTableRow - (gridTableSep '-') gridTableFooter - -gridTableSplitLine :: [Int] -> String -> [String] -gridTableSplitLine indices line = map removeFinalBar $ tail $ - splitStringByIndices (init indices) $ trimr line - -gridPart :: PandocMonad m => Char -> ParserT [Char] st m ((Int, Int), Alignment) -gridPart ch = do - leftColon <- option False (True <$ char ':') - dashes <- many1 (char ch) - rightColon <- option False (True <$ char ':') - char '+' - let lengthDashes = length dashes + (if leftColon then 1 else 0) + - (if rightColon then 1 else 0) - let alignment = case (leftColon, rightColon) of - (True, True) -> AlignCenter - (True, False) -> AlignLeft - (False, True) -> AlignRight - (False, False) -> AlignDefault - return ((lengthDashes, lengthDashes + 1), alignment) - -gridDashedLines :: PandocMonad m => Char -> ParserT [Char] st m [((Int, Int), Alignment)] -gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline - -removeFinalBar :: String -> String -removeFinalBar = - reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse - --- | Separator between rows of grid table. -gridTableSep :: PandocMonad m => Char -> MarkdownParser m Char -gridTableSep ch = try $ gridDashedLines ch >> return '\n' - --- | Parse header for a grid table. -gridTableHeader :: PandocMonad m => Bool -- ^ Headerless table - -> MarkdownParser m (F [Blocks], [Alignment], [Int]) -gridTableHeader headless = try $ do - optional blanklines - dashes <- gridDashedLines '-' - rawContent <- if headless - then return [] - else many1 (try (char '|' >> anyLine)) - underDashes <- if headless - then return dashes - else gridDashedLines '=' - guard $ length dashes == length underDashes - let lines' = map (snd . fst) underDashes - let indices = scanl (+) 0 lines' - let aligns = map snd underDashes - let rawHeads = if headless - then replicate (length underDashes) "" - else map (unlines . map trim) $ transpose - $ map (gridTableSplitLine indices) rawContent - heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads - return (heads, aligns, indices) - -gridTableRawLine :: PandocMonad m => [Int] -> MarkdownParser m [String] -gridTableRawLine indices = do - char '|' - line <- anyLine - return (gridTableSplitLine indices line) - --- | Parse row of grid table. -gridTableRow :: PandocMonad m => [Int] - -> MarkdownParser m (F [Blocks]) -gridTableRow indices = do - colLines <- many1 (gridTableRawLine indices) - let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ - transpose colLines - fmap compactify <$> fmap sequence (mapM (parseFromString parseBlocks) cols) - -removeOneLeadingSpace :: [String] -> [String] -removeOneLeadingSpace xs = - if all startsWithSpace xs - then map (drop 1) xs - else xs - where startsWithSpace "" = True - startsWithSpace (y:_) = y == ' ' - --- | Parse footer for a grid table. -gridTableFooter :: PandocMonad m => MarkdownParser m [Char] -gridTableFooter = blanklines +gridTable headless = gridTableWith' parseBlocks headless pipeBreak :: PandocMonad m => MarkdownParser m ([Alignment], [Int]) pipeBreak = try $ do @@ -1414,7 +1343,7 @@ pipeTableRow = try $ do let chunk = void (code <|> math <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline') <|> void (noneOf "|\n\r") let cellContents = ((trim . snd) <$> withRaw (many chunk)) >>= - parseFromString pipeTableCell + parseFromString' pipeTableCell cells <- cellContents `sepEndBy1` (char '|') -- surrounding pipes needed for a one-column table: guard $ not (length cells == 1 && not openPipe) @@ -1522,6 +1451,7 @@ inline = choice [ whitespace , autoLink , spanHtml , rawHtmlInline + , escapedNewline , escapedChar , rawLaTeXInline' , exampleRef @@ -1538,16 +1468,20 @@ escapedChar' = try $ do (guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum)) <|> (guardEnabled Ext_angle_brackets_escapable >> oneOf "\\`*_{}[]()>#+-.!~\"<>") - <|> (guardEnabled Ext_escaped_line_breaks >> char '\n') <|> oneOf "\\`*_{}[]()>#+-.!~\"" +escapedNewline :: PandocMonad m => MarkdownParser m (F Inlines) +escapedNewline = try $ do + guardEnabled Ext_escaped_line_breaks + char '\\' + lookAhead (char '\n') -- don't consume the newline (see #3730) + return $ return B.linebreak + escapedChar :: PandocMonad m => MarkdownParser m (F Inlines) escapedChar = do result <- escapedChar' case result of ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space - '\n' -> guardEnabled Ext_escaped_line_breaks >> - return (return B.linebreak) -- "\[newline]" is a linebreak _ -> return $ return $ B.str [result] ltSign :: PandocMonad m => MarkdownParser m (F Inlines) @@ -1629,9 +1563,9 @@ ender c n = try $ do three :: PandocMonad m => Char -> MarkdownParser m (F Inlines) three c = do contents <- mconcat <$> many (notFollowedBy (ender c 1) >> inline) - (ender c 3 >> return ((B.strong . B.emph) <$> contents)) - <|> (ender c 2 >> one c (B.strong <$> contents)) - <|> (ender c 1 >> two c (B.emph <$> contents)) + (ender c 3 >> updateLastStrPos >> return ((B.strong . B.emph) <$> contents)) + <|> (ender c 2 >> updateLastStrPos >> one c (B.strong <$> contents)) + <|> (ender c 1 >> updateLastStrPos >> two c (B.emph <$> contents)) <|> return (return (B.str [c,c,c]) <> contents) -- Parse inlines til you hit two c's, and emit strong. @@ -1639,7 +1573,8 @@ three c = do two :: PandocMonad m => Char -> F Inlines -> MarkdownParser m (F Inlines) two c prefix' = do contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline) - (ender c 2 >> return (B.strong <$> (prefix' <> contents))) + (ender c 2 >> updateLastStrPos >> + return (B.strong <$> (prefix' <> contents))) <|> return (return (B.str [c,c]) <> (prefix' <> contents)) -- Parse inlines til you hit a c, and emit emph. @@ -1650,7 +1585,7 @@ one c prefix' = do <|> try (string [c,c] >> notFollowedBy (ender c 1) >> two c mempty) ) - (ender c 1 >> return (B.emph <$> (prefix' <> contents))) + (ender c 1 >> updateLastStrPos >> return (B.emph <$> (prefix' <> contents))) <|> return (return (B.str [c]) <> (prefix' <> contents)) strongOrEmph :: PandocMonad m => MarkdownParser m (F Inlines) @@ -1814,15 +1749,17 @@ referenceLink :: PandocMonad m referenceLink constructor (lab, raw) = do sp <- (True <$ lookAhead (char ' ')) <|> return False (_,raw') <- option (mempty, "") $ - lookAhead (try (guardEnabled Ext_citations >> - spnl >> normalCite >> return (mempty, ""))) + lookAhead (try (do guardEnabled Ext_citations + guardDisabled Ext_spaced_reference_links <|> spnl + normalCite + return (mempty, ""))) <|> - try (spnl >> reference) + try ((guardDisabled Ext_spaced_reference_links <|> spnl) >> reference) when (raw' == "") $ guardEnabled Ext_shortcut_reference_links let labIsRef = raw' == "" || raw' == "[]" let key = toKey $ if labIsRef then raw else raw' - parsedRaw <- parseFromString (mconcat <$> many inline) raw' - fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw + parsedRaw <- parseFromString' (mconcat <$> many inline) raw' + fallback <- parseFromString' (mconcat <$> many inline) $ dropBrackets raw implicitHeaderRefs <- option False $ True <$ guardEnabled Ext_implicit_header_references let makeFallback = do @@ -1887,16 +1824,17 @@ note :: PandocMonad m => MarkdownParser m (F Inlines) note = try $ do guardEnabled Ext_footnotes ref <- noteMarker + updateState $ \st -> st{ stateNoteRefs = Set.insert ref (stateNoteRefs st) } return $ do notes <- asksF stateNotes' - case lookup ref notes of + case M.lookup ref notes of Nothing -> return $ B.str $ "[^" ++ ref ++ "]" - Just contents -> do + Just (_pos, contents) -> do st <- askF -- process the note in a context that doesn't resolve -- notes, to avoid infinite looping with notes inside -- notes: - let contents' = runF contents st{ stateNotes' = [] } + let contents' = runF contents st{ stateNotes' = M.empty } return $ B.note contents' inlineNote :: PandocMonad m => MarkdownParser m (F Inlines) @@ -2028,7 +1966,7 @@ textualCite = try $ do let (spaces',raw') = span isSpace raw spc | null spaces' = mempty | otherwise = B.space - lab <- parseFromString (mconcat <$> many inline) $ dropBrackets raw' + lab <- parseFromString' (mconcat <$> many inline) $ dropBrackets raw' fallback <- referenceLink B.linkWith (lab,raw') return $ do fallback' <- fallback diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index b35f39aad..a3ff60c14 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -3,7 +3,7 @@ {-# LANGUAGE TypeSynonymInstances #-} -- RelaxedPolyRec needed for inlinesBetween on GHC < 7 {- - Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu> + Copyright (C) 2012-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.MediaWiki - Copyright : Copyright (C) 2012-2015 John MacFarlane + Copyright : Copyright (C) 2012-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -41,6 +41,7 @@ module Text.Pandoc.Readers.MediaWiki ( readMediaWiki ) where import Control.Monad import Control.Monad.Except (throwError) import Data.Char (isDigit, isSpace) +import Data.Text (Text, unpack) import qualified Data.Foldable as F import Data.List (intercalate, intersperse, isPrefixOf) import qualified Data.Map as M @@ -64,7 +65,7 @@ import Text.Pandoc.XML (fromEntities) -- | Read mediawiki from an input string and return a Pandoc document. readMediaWiki :: PandocMonad m => ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Text -- ^ String to parse (assuming @'\n'@ line endings) -> m Pandoc readMediaWiki opts s = do parsed <- readWithM parseMediaWiki MWState{ mwOptions = opts @@ -74,8 +75,9 @@ readMediaWiki opts s = do , mwHeaderMap = M.empty , mwIdentifierList = Set.empty , mwLogMessages = [] + , mwInTT = False } - (s ++ "\n") + (unpack s ++ "\n") case parsed of Right result -> return result Left e -> throwError e @@ -87,6 +89,7 @@ data MWState = MWState { mwOptions :: ReaderOptions , mwHeaderMap :: M.Map Inlines String , mwIdentifierList :: Set.Set String , mwLogMessages :: [LogMessage] + , mwInTT :: Bool } type MWParser m = ParserT [Char] MWState m @@ -569,7 +572,12 @@ inlineTag = do TagOpen "sub" _ -> B.subscript <$> inlinesInTags "sub" TagOpen "sup" _ -> B.superscript <$> inlinesInTags "sup" TagOpen "code" _ -> encode <$> inlinesInTags "code" - TagOpen "tt" _ -> encode <$> inlinesInTags "tt" + TagOpen "tt" _ -> do + inTT <- mwInTT <$> getState + updateState $ \st -> st{ mwInTT = True } + result <- encode <$> inlinesInTags "tt" + updateState $ \st -> st{ mwInTT = inTT } + return result TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask" _ -> B.rawInline "html" . snd <$> htmlTag (~== tag) @@ -688,6 +696,10 @@ strong = B.strong <$> nested (inlinesBetween start end) end = try $ sym "'''" doubleQuotes :: PandocMonad m => MWParser m Inlines -doubleQuotes = B.doubleQuoted <$> nested (inlinesBetween openDoubleQuote closeDoubleQuote) +doubleQuotes = do + guardEnabled Ext_smart + inTT <- mwInTT <$> getState + guard (not inTT) + B.doubleQuoted <$> nested (inlinesBetween openDoubleQuote closeDoubleQuote) where openDoubleQuote = sym "\"" >> lookAhead nonspaceChar closeDoubleQuote = try $ sym "\"" diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs index 2e307fa4f..abc2ed38a 100644 --- a/src/Text/Pandoc/Readers/Native.hs +++ b/src/Text/Pandoc/Readers/Native.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2011-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2011-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Native - Copyright : Copyright (C) 2011-2015 John MacFarlane + Copyright : Copyright (C) 2011-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -37,6 +37,7 @@ import Text.Pandoc.Shared (safeRead) import Control.Monad.Except (throwError) import Text.Pandoc.Class import Text.Pandoc.Error +import Data.Text (Text, unpack) -- | Read native formatted text and return a Pandoc document. -- The input may be a full pandoc document, a block list, a block, @@ -50,22 +51,22 @@ import Text.Pandoc.Error -- readNative :: PandocMonad m => ReaderOptions - -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Text -- ^ String to parse (assuming @'\n'@ line endings) -> m Pandoc readNative _ s = - case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of + case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead (unpack s)) of Right doc -> return doc Left _ -> throwError $ PandocParseError "couldn't read native" -readBlocks :: String -> Either PandocError [Block] -readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s) +readBlocks :: Text -> Either PandocError [Block] +readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead (unpack s)) -readBlock :: String -> Either PandocError Block -readBlock s = maybe (Plain <$> readInlines s) Right (safeRead s) +readBlock :: Text -> Either PandocError Block +readBlock s = maybe (Plain <$> readInlines s) Right (safeRead (unpack s)) -readInlines :: String -> Either PandocError [Inline] -readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead s) +readInlines :: Text -> Either PandocError [Inline] +readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead (unpack s)) -readInline :: String -> Either PandocError Inline -readInline s = maybe (Left . PandocParseError $ "Could not read: " ++ s) Right (safeRead s) +readInline :: Text -> Either PandocError Inline +readInline s = maybe (Left . PandocParseError $ "Could not read: " ++ unpack s) Right (safeRead (unpack s)) diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index cf1c8f479..591d7590e 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -2,6 +2,7 @@ module Text.Pandoc.Readers.OPML ( readOPML ) where import Control.Monad.State import Data.Char (toUpper) +import Data.Text (Text, unpack, pack) import Data.Default import Data.Generics import Text.HTML.TagSoup.Entity (lookupEntity) @@ -28,9 +29,10 @@ instance Default OPMLState where , opmlDocDate = mempty } -readOPML :: PandocMonad m => ReaderOptions -> String -> m Pandoc +readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readOPML _ inp = do - (bs, st') <- flip runStateT def (mapM parseBlock $ normalizeTree $ parseXML inp) + (bs, st') <- flip runStateT def + (mapM parseBlock $ normalizeTree $ parseXML (unpack inp)) return $ setTitle (opmlDocTitle st') $ setAuthors (opmlDocAuthors st') $ @@ -69,10 +71,10 @@ asHtml :: PandocMonad m => String -> OPML m Inlines asHtml s = (\(Pandoc _ bs) -> case bs of [Plain ils] -> fromList ils - _ -> mempty) <$> (lift $ readHtml def s) + _ -> mempty) <$> (lift $ readHtml def (pack s)) asMarkdown :: PandocMonad m => String -> OPML m Blocks -asMarkdown s = (\(Pandoc _ bs) -> fromList bs) <$> (lift $ readMarkdown def s) +asMarkdown s = (\(Pandoc _ bs) -> fromList bs) <$> (lift $ readMarkdown def (pack s)) getBlocks :: PandocMonad m => Element -> OPML m Blocks getBlocks e = mconcat <$> (mapM parseBlock $ elContent e) diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs index b056f1ecc..3d716ba19 100644 --- a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs +++ b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs @@ -59,10 +59,6 @@ withState :: (state -> a -> (state, b)) -> ArrowState state a b withState = ArrowState . uncurry -- | Constructor -withState' :: ((state, a) -> (state, b)) -> ArrowState state a b -withState' = ArrowState - --- | Constructor modifyState :: (state -> state ) -> ArrowState state a a modifyState = ArrowState . first @@ -79,10 +75,6 @@ extractFromState :: (state -> b ) -> ArrowState state x b extractFromState f = ArrowState $ \(state,_) -> (state, f state) -- | Constructor -withUnchangedState :: (state -> a -> b ) -> ArrowState state a b -withUnchangedState f = ArrowState $ \(state,a) -> (state, f state a) - --- | Constructor tryModifyState :: (state -> Either f state) -> ArrowState state a (Either f a) tryModifyState f = ArrowState $ \(state,a) @@ -107,43 +99,9 @@ instance ArrowChoice (ArrowState state) where Left l -> (s, Left l) Right r -> second Right $ runArrowState a (s,r) -instance ArrowLoop (ArrowState state) where - loop a = ArrowState $ \(s, x) - -> let (s', (x', _d)) = runArrowState a (s, (x, _d)) - in (s', x') - instance ArrowApply (ArrowState state) where app = ArrowState $ \(s, (f,b)) -> runArrowState f (s,b) - --- | Embedding of a state arrow in a state arrow with a different state type. -switchState :: (s -> s') -> (s' -> s) -> ArrowState s' x y -> ArrowState s x y -switchState there back a = ArrowState $ first there - >>> runArrowState a - >>> first back - --- | Lift a state arrow to modify the state of an arrow --- with a different state type. -liftToState :: (s -> s') -> ArrowState s' s s -> ArrowState s x x -liftToState unlift a = modifyState $ unlift &&& id - >>> runArrowState a - >>> snd - --- | Switches the type of the state temporarily. --- Drops the intermediate result state, behaving like the identity arrow, --- save for side effects in the state. -withSubState :: ArrowState s x s2 -> ArrowState s2 s s -> ArrowState s x x -withSubState unlift a = keepingTheValue (withSubState unlift a) >>^ fst - --- | Switches the type of the state temporarily. --- Returns the resulting sub-state. -withSubState' :: ArrowState s x s' -> ArrowState s' s s -> ArrowState s x s' -withSubState' unlift a = ArrowState $ runArrowState unlift - >>> switch - >>> runArrowState a - >>> switch - where switch (x,y) = (y,x) - -- | Switches the type of the state temporarily. -- Drops the intermediate result state, behaving like a fallible -- identity arrow, save for side effects in the state. @@ -175,42 +133,6 @@ foldS :: (Foldable f, Monoid m) => ArrowState s x m -> ArrowState s (f x) m foldS a = ArrowState $ \(s,f) -> foldr a' (s,mempty) f where a' x (s',m) = second (m <>) $ runArrowState a (s',x) --- | Fold a state arrow through something 'Foldable'. Collect the results --- in a 'Monoid'. --- Intermediate form of a fold between one with "only" a 'Monoid' --- and one with any function. -foldSL :: (Foldable f, Monoid m) => ArrowState s x m -> ArrowState s (f x) m -foldSL a = ArrowState $ \(s,f) -> foldl a' (s,mempty) f - where a' (s',m) x = second (m <>) $ runArrowState a (s',x) - --- | Fold a fallible state arrow through something 'Foldable'. Collect the --- results in a 'Monoid'. --- Intermediate form of a fold between one with "only" a 'Monoid' --- and one with any function. --- If the iteration fails, the state will be reset to the initial one. -foldS' :: (Foldable f, Monoid m) - => ArrowState s x (Either e m) - -> ArrowState s (f x) (Either e m) -foldS' a = ArrowState $ \(s,f) -> foldr (a' s) (s,Right mempty) f - where a' s x (s',Right m) = case runArrowState a (s',x) of - (s'',Right m') -> (s'', Right (m <> m')) - (_ ,Left e ) -> (s , Left e) - a' _ _ e = e - --- | Fold a fallible state arrow through something 'Foldable'. Collect the --- results in a 'Monoid'. --- Intermediate form of a fold between one with "only" a 'Monoid' --- and one with any function. --- If the iteration fails, the state will be reset to the initial one. -foldSL' :: (Foldable f, Monoid m) - => ArrowState s x (Either e m) - -> ArrowState s (f x) (Either e m) -foldSL' a = ArrowState $ \(s,f) -> foldl (a' s) (s,Right mempty) f - where a' s (s',Right m) x = case runArrowState a (s',x) of - (s'',Right m') -> (s'', Right (m <> m')) - (_ ,Left e ) -> (s , Left e) - a' _ e _ = e - -- | Fold a state arrow through something 'Foldable'. Collect the results in a -- 'MonadPlus'. iterateS :: (Foldable f, MonadPlus m) @@ -239,15 +161,3 @@ iterateS' a = ArrowState $ \(s,f) -> foldr (a' s) (s,Right mzero) f (s'',Right m') -> (s'',Right $ mplus m $ return m') (_ ,Left e ) -> (s ,Left e ) a' _ _ e = e - --- | Fold a fallible state arrow through something 'Foldable'. --- Collect the results in a 'MonadPlus'. --- If the iteration fails, the state will be reset to the initial one. -iterateSL' :: (Foldable f, MonadPlus m) - => ArrowState s x (Either e y ) - -> ArrowState s (f x) (Either e (m y)) -iterateSL' a = ArrowState $ \(s,f) -> foldl (a' s) (s,Right mzero) f - where a' s (s',Right m) x = case runArrowState a (s',x) of - (s'',Right m') -> (s'',Right $ mplus m $ return m') - (_ ,Left e ) -> (s ,Left e ) - a' _ e _ = e diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs index 218a85661..ecef8b6e3 100644 --- a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs +++ b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs @@ -40,10 +40,7 @@ with an equivalent return value. module Text.Pandoc.Readers.Odt.Arrows.Utils where import Control.Arrow -import Control.Monad ( join, MonadPlus(..) ) - -import qualified Data.Foldable as F -import Data.Monoid +import Control.Monad ( join ) import Text.Pandoc.Readers.Odt.Generic.Fallible import Text.Pandoc.Readers.Odt.Generic.Utils @@ -63,12 +60,6 @@ and5 :: (Arrow a) and6 :: (Arrow a) => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5 -> a b (c0,c1,c2,c3,c4,c5 ) -and7 :: (Arrow a) - => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5->a b c6 - -> a b (c0,c1,c2,c3,c4,c5,c6 ) -and8 :: (Arrow a) - => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5->a b c6->a b c7 - -> a b (c0,c1,c2,c3,c4,c5,c6,c7) and3 a b c = (and2 a b ) &&& c >>^ \((z,y ) , x) -> (z,y,x ) @@ -78,10 +69,6 @@ and5 a b c d e = (and4 a b c d ) &&& e >>^ \((z,y,x,w ) , v) -> (z,y,x,w,v ) and6 a b c d e f = (and5 a b c d e ) &&& f >>^ \((z,y,x,w,v ) , u) -> (z,y,x,w,v,u ) -and7 a b c d e f g = (and6 a b c d e f ) &&& g - >>^ \((z,y,x,w,v,u ) , t) -> (z,y,x,w,v,u,t ) -and8 a b c d e f g h = (and7 a b c d e f g) &&& h - >>^ \((z,y,x,w,v,u,t) , s) -> (z,y,x,w,v,u,t,s) liftA2 :: (Arrow a) => (x -> y -> z) -> a b x -> a b y -> a b z liftA2 f a b = a &&& b >>^ uncurry f @@ -98,19 +85,11 @@ liftA5 :: (Arrow a) => (z->y->x->w->v -> r) liftA6 :: (Arrow a) => (z->y->x->w->v->u -> r) -> a b z->a b y->a b x->a b w->a b v->a b u -> a b r -liftA7 :: (Arrow a) => (z->y->x->w->v->u->t -> r) - -> a b z->a b y->a b x->a b w->a b v->a b u->a b t - -> a b r -liftA8 :: (Arrow a) => (z->y->x->w->v->u->t->s -> r) - -> a b z->a b y->a b x->a b w->a b v->a b u->a b t->a b s - -> a b r liftA3 fun a b c = and3 a b c >>^ uncurry3 fun liftA4 fun a b c d = and4 a b c d >>^ uncurry4 fun liftA5 fun a b c d e = and5 a b c d e >>^ uncurry5 fun liftA6 fun a b c d e f = and6 a b c d e f >>^ uncurry6 fun -liftA7 fun a b c d e f g = and7 a b c d e f g >>^ uncurry7 fun -liftA8 fun a b c d e f g h = and8 a b c d e f g h >>^ uncurry8 fun liftA :: (Arrow a) => (y -> z) -> a b y -> a b z liftA fun a = a >>^ fun @@ -124,28 +103,12 @@ liftA fun a = a >>^ fun duplicate :: (Arrow a) => a b (b,b) duplicate = arr $ join (,) --- | Lifts the combination of two values into an arrow. -joinOn :: (Arrow a) => (x -> y -> z) -> a (x,y) z -joinOn = arr.uncurry - -- | Applies a function to the uncurried result-pair of an arrow-application. -- (The %-symbol was chosen to evoke an association with pairs.) (>>%) :: (Arrow a) => a x (b,c) -> (b -> c -> d) -> a x d a >>% f = a >>^ uncurry f --- | '(>>%)' with its arguments flipped -(%<<) :: (Arrow a) => (b -> c -> d) -> a x (b,c) -> a x d -(%<<) = flip (>>%) - --- | Precomposition with an uncurried function -(%>>) :: (Arrow a) => (b -> c -> d) -> a d r -> a (b,c) r -f %>> a = uncurry f ^>> a - --- | Precomposition with an uncurried function (right to left variant) -(<<%) :: (Arrow a) => a d r -> (b -> c -> d) -> a (b,c) r -(<<%) = flip (%>>) - -infixr 2 >>%, %<<, %>>, <<% +infixr 2 >>% -- | Duplicate a value and apply an arrow to the second instance. @@ -156,56 +119,6 @@ infixr 2 >>%, %<<, %>>, <<% keepingTheValue :: (Arrow a) => a b c -> a b (b,c) keepingTheValue a = returnA &&& a --- | Duplicate a value and apply an arrow to the first instance. --- Aequivalent to --- > \a -> duplicate >>> first a --- or --- > \a -> a &&& returnA -keepingTheValue' :: (Arrow a) => a b c -> a b (c,b) -keepingTheValue' a = a &&& returnA - --- | 'bind' from the "Maybe"-Monad lifted into an 'ArrowChoice'. --- Actually, it's the more complex '(>=>)', because 'bind' alone does not --- combine as nicely in arrow form. --- The current implementation is not the most efficient one, because it can --- not return directly if a 'Nothing' is encountered. That in turn follows --- from the type system, as 'Nothing' has an "invisible" type parameter that --- can not be dropped early. --- --- Also, there probably is a way to generalize this to other monads --- or applicatives, but I'm leaving that as an exercise to the reader. --- I have a feeling there is a new Arrow-typeclass to be found that is less --- restrictive than 'ArrowApply'. If it is already out there, --- I have not seen it yet. ('ArrowPlus' for example is not general enough.) -(>>>=) :: (ArrowChoice a) => a x (Maybe b) -> a b (Maybe c) -> a x (Maybe c) -a1 >>>= a2 = a1 >>> maybeToChoice >>> right a2 >>> choiceToMaybe >>^ join - -infixr 2 >>>= - --- | 'mplus' Lifted into an arrow. No 'ArrowPlus' required. --- (But still different from a true bind) -(>++<) :: (Arrow a, MonadPlus m) => a x (m b) -> a x (m b) -> a x (m b) -(>++<) = liftA2 mplus - --- | Left-compose with a pure function -leftLift :: (ArrowChoice a) => (l -> l') -> a (Either l r) (Either l' r) -leftLift = left.arr - --- | Right-compose with a pure function -rightLift :: (ArrowChoice a) => (r -> r') -> a (Either l r) (Either l r') -rightLift = right.arr - - -( ^+++ ) :: (ArrowChoice a) => (b -> c) -> a b' c' -> a (Either b b') (Either c c') -( +++^ ) :: (ArrowChoice a) => a b c -> (b' -> c') -> a (Either b b') (Either c c') -( ^+++^ ) :: (ArrowChoice a) => (b -> c) -> (b' -> c') -> a (Either b b') (Either c c') - -l ^+++ r = leftLift l >>> right r -l +++^ r = left l >>> rightLift r -l ^+++^ r = leftLift l >>> rightLift r - -infixr 2 ^+++, +++^, ^+++^ - ( ^||| ) :: (ArrowChoice a) => (b -> d) -> a c d -> a (Either b c) d ( |||^ ) :: (ArrowChoice a) => a b d -> (c -> d) -> a (Either b c) d ( ^|||^ ) :: (ArrowChoice a) => (b -> d) -> (c -> d) -> a (Either b c) d @@ -218,33 +131,12 @@ infixr 2 ^||| , |||^, ^|||^ ( ^&&& ) :: (Arrow a) => (b -> c) -> a b c' -> a b (c,c') ( &&&^ ) :: (Arrow a) => a b c -> (b -> c') -> a b (c,c') -( ^&&&^ ) :: (Arrow a) => (b -> c) -> (b -> c') -> a b (c,c') l ^&&& r = arr l &&& r l &&&^ r = l &&& arr r -l ^&&&^ r = arr l &&& arr r - -infixr 3 ^&&&, &&&^, ^&&&^ -( ^*** ) :: (Arrow a) => (b -> c) -> a b' c' -> a (b,b') (c,c') -( ***^ ) :: (Arrow a) => a b c -> (b' -> c') -> a (b,b') (c,c') -( ^***^ ) :: (Arrow a) => (b -> c) -> (b' -> c') -> a (b,b') (c,c') +infixr 3 ^&&&, &&&^ -l ^*** r = arr l *** r -l ***^ r = l *** arr r -l ^***^ r = arr l *** arr r - -infixr 3 ^***, ***^, ^***^ - --- | A version of --- --- >>> \p -> arr (\x -> if p x the Right x else Left x) --- --- but with p being an arrow -choose :: (ArrowChoice a) => a b Bool -> a b (Either b b) -choose checkValue = keepingTheValue checkValue >>^ select - where select (x,True ) = Right x - select (x,False ) = Left x -- | Converts @Right a@ into @Just a@ and @Left _@ into @Nothing@. choiceToMaybe :: (ArrowChoice a) => a (Either l r) (Maybe r) @@ -258,130 +150,15 @@ maybeToChoice = arr maybeToEither returnV :: (Arrow a) => c -> a x c returnV = arr.const --- | 'returnA' dropping everything -returnA_ :: (Arrow a) => a _b () -returnA_ = returnV () - --- | Wrapper for an arrow that can be evaluated im parallel. All --- Arrows can be evaluated in parallel, as long as they return a --- monoid. -newtype ParallelArrow a b c = CoEval { evalParallelArrow :: a b c } - deriving (Eq, Ord, Show) - -instance (Arrow a, Monoid m) => Monoid (ParallelArrow a b m) where - mempty = CoEval $ returnV mempty - (CoEval a) `mappend` (CoEval ~b) = CoEval $ a &&& b >>% mappend - --- | Evaluates a collection of arrows in a parallel fashion. --- --- This is in essence a fold of '(&&&)' over the collection, --- so the actual execution order and parallelity depends on the --- implementation of '(&&&)' in the arrow in question. --- The default implementation of '(&&&)' for example keeps the --- order as given in the collection. --- --- This function can be seen as a generalization of --- 'Control.Applicative.sequenceA' to arrows or as an alternative to --- a fold with 'Control.Applicative.WrappedArrow', which --- substitutes the monoid with function application. --- -coEval :: (Arrow a, F.Foldable f, Monoid m) => f (a b m) -> a b m -coEval = evalParallelArrow . (F.foldMap CoEval) - -- | Defines Left as failure, Right as success type FallibleArrow a input failure success = a input (Either failure success) -type ReFallibleArrow a failure success success' - = FallibleArrow a (Either failure success) failure success' - --- | Wrapper for fallible arrows. Fallible arrows are all arrows that return --- an Either value where left is a faliure and right is a success value. -newtype AlternativeArrow a input failure success - = TryArrow { evalAlternativeArrow :: FallibleArrow a input failure success } - - -instance (ArrowChoice a, Monoid failure) - => Monoid (AlternativeArrow a input failure success) where - mempty = TryArrow $ returnV $ Left mempty - (TryArrow a) `mappend` (TryArrow b) - = TryArrow $ a &&& b - >>^ \(a',~b') - -> ( (\a'' -> left (mappend a'') b') ||| Right ) - a' - --- | Evaluates a collection of fallible arrows, trying each one in succession. --- Left values are interpreted as failures, right values as successes. --- --- The evaluation is stopped once an arrow succeeds. --- Up to that point, all failures are collected in the failure-monoid. --- Note that '()' is a monoid, and thus can serve as a failure-collector if --- you are uninterested in the exact failures. --- --- This is in essence a fold of '(&&&)' over the collection, enhanced with a --- little bit of repackaging, so the actual execution order depends on the --- implementation of '(&&&)' in the arrow in question. --- The default implementation of '(&&&)' for example keeps the --- order as given in the collection. --- -tryArrows :: (ArrowChoice a, F.Foldable f, Monoid failure) - => f (FallibleArrow a b failure success) - -> FallibleArrow a b failure success -tryArrows = evalAlternativeArrow . (F.foldMap TryArrow) - --- -liftSuccess :: (ArrowChoice a) - => (success -> success') - -> ReFallibleArrow a failure success success' -liftSuccess = rightLift - -- liftAsSuccess :: (ArrowChoice a) => a x success -> FallibleArrow a x failure success liftAsSuccess a = a >>^ Right --- -asFallibleArrow :: (ArrowChoice a) - => a x success - -> FallibleArrow a x failure success -asFallibleArrow a = a >>^ Right - --- | Raises an error into a 'ReFallibleArrow' if the arrow is already in --- "error mode" -liftError :: (ArrowChoice a, Monoid failure) - => failure - -> ReFallibleArrow a failure success success -liftError e = leftLift (e <>) - --- | Raises an error into a 'FallibleArrow', droping both the arrow input --- and any previously stored error value. -_raiseA :: (ArrowChoice a) - => failure - -> FallibleArrow a x failure success -_raiseA e = returnV (Left e) - --- | Raises an empty error into a 'FallibleArrow', droping both the arrow input --- and any previously stored error value. -_raiseAEmpty :: (ArrowChoice a, Monoid failure) - => FallibleArrow a x failure success -_raiseAEmpty = _raiseA mempty - --- | Raises an error into a 'ReFallibleArrow', possibly appending the new error --- to an existing one -raiseA :: (ArrowChoice a, Monoid failure) - => failure - -> ReFallibleArrow a failure success success -raiseA e = arr $ Left.(either (<> e) (const e)) - --- | Raises an empty error into a 'ReFallibleArrow'. If there already is an --- error, nothing changes. --- (Note that this function is only aequivalent to @raiseA mempty@ iff the --- failure monoid follows the monoid laws.) -raiseAEmpty :: (ArrowChoice a, Monoid failure) - => ReFallibleArrow a failure success success -raiseAEmpty = arr (fromRight (const mempty) >>> Left) - - -- | Execute the second arrow if the first succeeds (>>?) :: (ArrowChoice a) => FallibleArrow a x failure success @@ -410,20 +187,6 @@ a >>?^? b = a >>> Left ^|||^ b -> FallibleArrow a x failure success' a ^>>? b = a ^>> Left ^||| b --- | Execute the lifted second arrow if the lifted first arrow succeeds -(^>>?^) :: (ArrowChoice a) - => (x -> Either failure success) - -> (success -> success') - -> FallibleArrow a x failure success' -a ^>>?^ f = arr $ a >>> right f - --- | Execute the lifted second arrow if the lifted first arrow succeeds -(^>>?^?) :: (ArrowChoice a) - => (x -> Either failure success) - -> (success -> Either failure success') - -> FallibleArrow a x failure success' -a ^>>?^? f = a ^>> Left ^|||^ f - -- | Execute the second, non-fallible arrow if the first arrow succeeds (>>?!) :: (ArrowChoice a) => FallibleArrow a x failure success @@ -453,33 +216,9 @@ a ^>>?% f = arr a >>?^ (uncurry f) a >>?%? f = a >>?^? (uncurry f) infixr 1 >>?, >>?^, >>?^? -infixr 1 ^>>?, ^>>?^, ^>>?^?, >>?! +infixr 1 ^>>?, >>?! infixr 1 >>?%, ^>>?%, >>?%? --- | Keep values that are Right, replace Left values by a constant. -ifFailedUse :: (ArrowChoice a) => v -> a (Either f v) v -ifFailedUse v = arr $ either (const v) id - --- | '(&&)' lifted into an arrow -(<&&>) :: (Arrow a) => a x Bool -> a x Bool -> a x Bool -(<&&>) = liftA2 (&&) - --- | '(||)' lifted into an arrow -(<||>) :: (Arrow a) => a x Bool -> a x Bool -> a x Bool -(<||>) = liftA2 (||) - --- | An equivalent of '(&&)' in a fallible arrow -(>&&<) :: (ArrowChoice a, Monoid f) => FallibleArrow a x f s - -> FallibleArrow a x f s' - -> FallibleArrow a x f (s,s') -(>&&<) = liftA2 chooseMin - --- | An equivalent of '(||)' in some forms of fallible arrows -(>||<) :: (ArrowChoice a, Monoid f, Monoid s) => FallibleArrow a x f s - -> FallibleArrow a x f s - -> FallibleArrow a x f s -(>||<) = liftA2 chooseMax - -- | An arrow version of a short-circuit (<|>) ifFailedDo :: (ArrowChoice a) => FallibleArrow a x f y @@ -489,7 +228,4 @@ ifFailedDo a b = keepingTheValue a >>> repackage ^>> (b |||^ Right) where repackage (x , Left _) = Left x repackage (_ , Right y) = Right y -infixr 4 <&&>, <||>, >&&<, >||< infixr 1 `ifFailedDo` - - diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs index a1bd8cb59..777c10df5 100644 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs @@ -94,8 +94,6 @@ data ReaderState , envMedia :: Media -- | Hold binary resources used in the document , odtMediaBag :: MediaBag --- , sequences --- , trackedChangeIDs } deriving ( Show ) @@ -899,9 +897,6 @@ read_reference_ref = matchingElement NsText "reference-ref" -- Entry point ---------------------- ---read_plain_content :: OdtReaderSafe _x Inlines ---read_plain_content = strContent >>^ text - read_text :: OdtReaderSafe _x Pandoc read_text = matchChildContent' [ read_header , read_paragraph diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs index 877443543..4d6a67b8e 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs @@ -39,10 +39,6 @@ compatible instances of "ArrowChoice". -- We export everything module Text.Pandoc.Readers.Odt.Generic.Fallible where -import Control.Applicative -import Control.Monad - -import qualified Data.Foldable as F import Data.Monoid ((<>)) -- | Default for now. Will probably become a class at some point. @@ -51,16 +47,6 @@ type Failure = () type Fallible a = Either Failure a --- | False -> Left (), True -> Right () -boolToEither :: Bool -> Fallible () -boolToEither False = Left () -boolToEither True = Right () - --- | False -> Left (), True -> Right () -boolToChoice :: Bool -> Fallible () -boolToChoice False = Left () -boolToChoice True = Right () - -- maybeToEither :: Maybe a -> Fallible a maybeToEither (Just a) = Right a @@ -71,21 +57,11 @@ eitherToMaybe :: Either _l a -> Maybe a eitherToMaybe (Left _) = Nothing eitherToMaybe (Right a) = Just a --- | > untagEither === either id id -untagEither :: Either a a -> a -untagEither (Left a) = a -untagEither (Right a) = a - -- | > fromLeft f === either f id fromLeft :: (a -> b) -> Either a b -> b fromLeft f (Left a) = f a fromLeft _ (Right b) = b --- | > fromRight f === either id f -fromRight :: (a -> b) -> Either b a -> b -fromRight _ (Left b) = b -fromRight f (Right a) = f a - -- | > recover a === fromLeft (const a) === either (const a) id recover :: a -> Either _f a -> a recover a (Left _) = a @@ -110,24 +86,6 @@ collapseEither (Left f ) = Left f collapseEither (Right (Left f)) = Left f collapseEither (Right (Right x)) = Right x --- | If either of the values represents an error, the result is a --- (possibly combined) error. If both values represent a success, --- both are returned. -chooseMin :: (Monoid a) => Either a b -> Either a b' -> Either a (b,b') -chooseMin = chooseMinWith (,) - --- | If either of the values represents an error, the result is a --- (possibly combined) error. If both values represent a success, --- a combination is returned. -chooseMinWith :: (Monoid a) => (b -> b' -> c) - -> Either a b - -> Either a b' - -> Either a c -chooseMinWith (><) (Right a) (Right b) = Right $ a >< b -chooseMinWith _ (Left a) (Left b) = Left $ a <> b -chooseMinWith _ (Left a) _ = Left a -chooseMinWith _ _ (Left b) = Left b - -- | If either of the values represents a non-error, the result is a -- (possibly combined) non-error. If both values represent an error, an error -- is returned. @@ -152,87 +110,11 @@ chooseMaxWith _ _ (Right b) = Right b class ChoiceVector v where spreadChoice :: v (Either f a) -> Either f (v a) --- Let's do a few examples first - -instance ChoiceVector Maybe where - spreadChoice (Just (Left f)) = Left f - spreadChoice (Just (Right x)) = Right (Just x) - spreadChoice Nothing = Right Nothing - -instance ChoiceVector (Either l) where - spreadChoice (Right (Left f)) = Left f - spreadChoice (Right (Right x)) = Right (Right x) - spreadChoice (Left x ) = Right (Left x) - instance ChoiceVector ((,) a) where spreadChoice (_, Left f) = Left f spreadChoice (x, Right y) = Right (x,y) -- Wasn't there a newtype somewhere with the elements flipped? --- --- More instances later, first some discussion. --- --- I'll have to freshen up on type system details to see how (or if) to do --- something like --- --- > instance (ChoiceVector a, ChoiceVector b) => ChoiceVector (a b) where --- > : --- --- But maybe it would be even better to use something like --- --- > class ChoiceVector v v' f | v -> v' f where --- > spreadChoice :: v -> Either f v' --- --- That way, more places in @v@ could spread the cheer, e.g.: --- --- As before: --- -- ( a , Either f b) (a , b) f --- > instance ChoiceVector ((,) a (Either f b)) ((,) a b) f where --- > spreadChoice (_, Left f) = Left f --- > spreadChoice (a, Right b) = Right (a,b) --- --- But also: --- -- ( Either f a , b) (a , b) f --- > instance ChoiceVector ((,) (Either f a) b) ((,) a b) f where --- > spreadChoice (Right a,b) = Right (a,b) --- > spreadChoice (Left f,_) = Left f --- --- And maybe even: --- -- ( Either f a , Either f b) (a , b) f --- > instance ChoiceVector ((,) (Either f a) (Either f b)) ((,) a b) f where --- > spreadChoice (Right a , Right b) = Right (a,b) --- > spreadChoice (Left f , _ ) = Left f --- > spreadChoice ( _ , Left f) = Left f --- --- Of course that would lead to a lot of overlapping instances... --- But I can't think of a different way. A selector function might help, --- but not even a "Data.Traversable" is powerful enough for that. --- But maybe someone has already solved all this with a lens library. --- --- Well, it's an interesting academic question. But for practical purposes, --- I have more than enough right now. - -instance ChoiceVector ((,,) a b) where - spreadChoice (_,_, Left f) = Left f - spreadChoice (a,b, Right x) = Right (a,b,x) - -instance ChoiceVector ((,,,) a b c) where - spreadChoice (_,_,_, Left f) = Left f - spreadChoice (a,b,c, Right x) = Right (a,b,c,x) - -instance ChoiceVector ((,,,,) a b c d) where - spreadChoice (_,_,_,_, Left f) = Left f - spreadChoice (a,b,c,d, Right x) = Right (a,b,c,d,x) - -instance ChoiceVector (Const a) where - spreadChoice (Const c) = Right (Const c) -- need to repackage because of implicit types - --- | Fails on the first error -instance ChoiceVector [] where - spreadChoice = sequence -- using the monad instance of Either. - -- Could be generalized to "Data.Traversable" - but why play - -- with UndecidableInstances unless this is really needed. - -- | Wrapper for a list. While the normal list instance of 'ChoiceVector' -- fails whenever it can, this type will never fail. newtype SuccessList a = SuccessList { collectNonFailing :: [a] } @@ -247,14 +129,3 @@ instance ChoiceVector SuccessList where collectRights :: [Either _l r] -> [r] collectRights = collectNonFailing . untag . spreadChoice . SuccessList where untag = fromLeft (error "Unexpected Left") - --- | A version of 'collectRights' generalized to other containers. The --- container must be both "reducible" and "buildable". Most general containers --- should fullfill these requirements, but there is no single typeclass --- (that I know of) for that. --- Therefore, they are split between 'Foldable' and 'MonadPlus'. --- (Note that 'Data.Traversable.Traversable' alone would not be enough, either.) -collectRightsF :: (F.Foldable c, MonadPlus c) => c (Either _l r) -> c r -collectRightsF = F.foldr unTagRight mzero - where unTagRight (Right x) = mplus $ return x - unTagRight _ = id diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs index 6c10ed61d..4af4242b6 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs @@ -38,8 +38,6 @@ module Text.Pandoc.Readers.Odt.Generic.Utils , uncurry4 , uncurry5 , uncurry6 -, uncurry7 -, uncurry8 , swap , reverseComposition , bool @@ -148,15 +146,11 @@ uncurry3 :: (a->b->c -> z) -> (a,b,c ) -> z uncurry4 :: (a->b->c->d -> z) -> (a,b,c,d ) -> z uncurry5 :: (a->b->c->d->e -> z) -> (a,b,c,d,e ) -> z uncurry6 :: (a->b->c->d->e->f -> z) -> (a,b,c,d,e,f ) -> z -uncurry7 :: (a->b->c->d->e->f->g -> z) -> (a,b,c,d,e,f,g ) -> z -uncurry8 :: (a->b->c->d->e->f->g->h -> z) -> (a,b,c,d,e,f,g,h) -> z uncurry3 fun (a,b,c ) = fun a b c uncurry4 fun (a,b,c,d ) = fun a b c d uncurry5 fun (a,b,c,d,e ) = fun a b c d e uncurry6 fun (a,b,c,d,e,f ) = fun a b c d e f -uncurry7 fun (a,b,c,d,e,f,g ) = fun a b c d e f g -uncurry8 fun (a,b,c,d,e,f,g,h) = fun a b c d e f g h swap :: (a,b) -> (b,a) swap (a,b) = (b,a) @@ -168,4 +162,3 @@ findBy :: (a -> Maybe b) -> [a] -> Maybe b findBy _ [] = Nothing findBy f ((f -> Just x):_ ) = Just x findBy f ( _:xs) = findBy f xs - diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs index 8c03d1a09..1c3e08a7f 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs @@ -41,50 +41,17 @@ module Text.Pandoc.Readers.Odt.Generic.XMLConverter , XMLConverterState , XMLConverter , FallibleXMLConverter -, swapPosition -, runConverter -, runConverter'' , runConverter' -, runConverterF' -, runConverterF -, getCurrentElement , getExtraState , setExtraState , modifyExtraState -, convertingExtraState , producingExtraState -, lookupNSiri -, lookupNSprefix -, readNSattributes -, elemName -, elemNameIs -, strContent -, elContent -, currentElem -, currentElemIs -, expectElement -, elChildren -, findChildren -, filterChildren -, filterChildrenName , findChild' -, findChild -, filterChild' -, filterChild -, filterChildName' -, filterChildName -, isSet , isSet' , isSetWithDefault -, hasAttrValueOf' -, failIfNotAttrValueOf -, isThatTheAttrValue -, searchAttrIn -, searchAttrWith , searchAttr , lookupAttr , lookupAttr' -, lookupAttrWithDefault , lookupDefaultingAttr , findAttr' , findAttr @@ -93,25 +60,9 @@ module Text.Pandoc.Readers.Odt.Generic.XMLConverter , readAttr' , readAttrWithDefault , getAttr --- , (>/<) --- , (?>/<) , executeIn -, collectEvery , withEveryL -, withEvery , tryAll -, tryAll' -, IdXMLConverter -, MaybeEConverter -, ElementMatchConverter -, MaybeCConverter -, ContentMatchConverter -, makeMatcherE -, makeMatcherC -, prepareMatchersE -, prepareMatchersC -, matchChildren -, matchContent'' , matchContent' , matchContent ) where @@ -121,7 +72,6 @@ import Control.Monad ( MonadPlus ) import Control.Arrow import qualified Data.Map as M -import qualified Data.Foldable as F import Data.Default import Data.Maybe @@ -210,17 +160,6 @@ currentElement state = head (parentElements state) -- | Replace the current position by another, modifying the extra state -- in the process -swapPosition :: (extraState -> extraState') - -> [XML.Element] - -> XMLConverterState nsID extraState - -> XMLConverterState nsID extraState' -swapPosition f stack state - = state { parentElements = stack - , moreState = f (moreState state) - } - --- | Replace the current position by another, modifying the extra state --- in the process swapStack' :: XMLConverterState nsID extraState -> [XML.Element] -> ( XMLConverterState nsID extraState , [XML.Element] ) @@ -264,14 +203,6 @@ runConverter :: XMLConverter nsID extraState input output -> output runConverter converter state input = snd $ runArrowState converter (state,input) --- -runConverter'' :: (NameSpaceID nsID) - => XMLConverter nsID extraState (Fallible ()) output - -> extraState - -> XML.Element - -> output -runConverter'' converter extraState element = runConverter (readNSattributes >>> converter) (createStartState element extraState) () - runConverter' :: (NameSpaceID nsID) => FallibleXMLConverter nsID extraState () success -> extraState @@ -280,20 +211,6 @@ runConverter' :: (NameSpaceID nsID) runConverter' converter extraState element = runConverter (readNSattributes >>? converter) (createStartState element extraState) () -- -runConverterF' :: FallibleXMLConverter nsID extraState x y - -> XMLConverterState nsID extraState - -> Fallible x -> Fallible y -runConverterF' a s e = runConverter (returnV e >>? a) s e - --- -runConverterF :: (NameSpaceID nsID) - => FallibleXMLConverter nsID extraState XML.Element x - -> extraState - -> Fallible XML.Element -> Fallible x -runConverterF a s = either failWith - (\e -> runConverter a (createStartState e s) e) - --- getCurrentElement :: XMLConverter nsID extraState x XML.Element getCurrentElement = extractFromState currentElement @@ -430,57 +347,15 @@ elemNameIs nsID name = keepingTheValue (lookupNSiri nsID) >>% hasThatName -------------------------------------------------------------------------------- -- -strContent :: XMLConverter nsID extraState x String -strContent = getCurrentElement - >>^ XML.strContent - --- elContent :: XMLConverter nsID extraState x [XML.Content] elContent = getCurrentElement >>^ XML.elContent -------------------------------------------------------------------------------- --- Current element --------------------------------------------------------------------------------- - --- -currentElem :: XMLConverter nsID extraState x (XML.QName) -currentElem = getCurrentElement - >>^ XML.elName - -currentElemIs :: (NameSpaceID nsID) - => nsID -> ElementName - -> XMLConverter nsID extraState x Bool -currentElemIs nsID name = getCurrentElement - >>> elemNameIs nsID name - - - -{- -currentElemIs'' nsID name = ( (getCurrentElement >>^ XML.elName >>> - (XML.qName >>^ (&&).(== name) ) - ^&&&^ - (XML.qIRI >>^ (==) ) - ) >>% (.) - ) &&& lookupNSiri nsID >>% ($) --} - --- -expectElement :: (NameSpaceID nsID) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState x () -expectElement nsID name = currentElemIs nsID name - >>^ boolToChoice - --------------------------------------------------------------------------------- -- Chilren -------------------------------------------------------------------------------- -- -elChildren :: XMLConverter nsID extraState x [XML.Element] -elChildren = getCurrentElement - >>^ XML.elChildren - -- findChildren :: (NameSpaceID nsID) => nsID -> ElementName @@ -490,18 +365,6 @@ findChildren nsID name = elemName nsID name >>% XML.findChildren -- -filterChildren :: (XML.Element -> Bool) - -> XMLConverter nsID extraState x [XML.Element] -filterChildren p = getCurrentElement - >>^ XML.filterChildren p - --- -filterChildrenName :: (XML.QName -> Bool) - -> XMLConverter nsID extraState x [XML.Element] -filterChildrenName p = getCurrentElement - >>^ XML.filterChildrenName p - --- findChild' :: (NameSpaceID nsID) => nsID -> ElementName @@ -517,45 +380,12 @@ findChild :: (NameSpaceID nsID) findChild nsID name = findChild' nsID name >>> maybeToChoice --- -filterChild' :: (XML.Element -> Bool) - -> XMLConverter nsID extraState x (Maybe XML.Element) -filterChild' p = getCurrentElement - >>^ XML.filterChild p - --- -filterChild :: (XML.Element -> Bool) - -> FallibleXMLConverter nsID extraState x XML.Element -filterChild p = filterChild' p - >>> maybeToChoice - --- -filterChildName' :: (XML.QName -> Bool) - -> XMLConverter nsID extraState x (Maybe XML.Element) -filterChildName' p = getCurrentElement - >>^ XML.filterChildName p - --- -filterChildName :: (XML.QName -> Bool) - -> FallibleXMLConverter nsID extraState x XML.Element -filterChildName p = filterChildName' p - >>> maybeToChoice - -------------------------------------------------------------------------------- -- Attributes -------------------------------------------------------------------------------- -- -isSet :: (NameSpaceID nsID) - => nsID -> AttributeName - -> (Either Failure Bool) - -> FallibleXMLConverter nsID extraState x Bool -isSet nsID attrName deflt - = findAttr' nsID attrName - >>^ maybe deflt stringToBool - --- isSet' :: (NameSpaceID nsID) => nsID -> AttributeName -> XMLConverter nsID extraState x (Maybe Bool) @@ -570,34 +400,6 @@ isSetWithDefault nsID attrName def' = isSet' nsID attrName >>^ fromMaybe def' --- -hasAttrValueOf' :: (NameSpaceID nsID) - => nsID -> AttributeName - -> AttributeValue - -> XMLConverter nsID extraState x Bool -hasAttrValueOf' nsID attrName attrValue - = findAttr nsID attrName - >>> ( const False ^|||^ (==attrValue)) - --- -failIfNotAttrValueOf :: (NameSpaceID nsID) - => nsID -> AttributeName - -> AttributeValue - -> FallibleXMLConverter nsID extraState x () -failIfNotAttrValueOf nsID attrName attrValue - = hasAttrValueOf' nsID attrName attrValue - >>^ boolToChoice - --- | Is the value that is currently transported in the arrow the value of --- the specified attribute? -isThatTheAttrValue :: (NameSpaceID nsID) - => nsID -> AttributeName - -> FallibleXMLConverter nsID extraState AttributeValue Bool -isThatTheAttrValue nsID attrName - = keepingTheValue - (findAttr nsID attrName) - >>% right.(==) - -- | Lookup value in a dictionary, fail if no attribute found or value -- not in dictionary searchAttrIn :: (NameSpaceID nsID) @@ -608,18 +410,6 @@ searchAttrIn nsID attrName dict = findAttr nsID attrName >>?^? maybeToChoice.(`lookup` dict ) - --- | Lookup value in a dictionary. Fail if no attribute found. If value not in --- dictionary, return default value -searchAttrWith :: (NameSpaceID nsID) - => nsID -> AttributeName - -> a - -> [(AttributeValue,a)] - -> FallibleXMLConverter nsID extraState x a -searchAttrWith nsID attrName defV dict - = findAttr nsID attrName - >>?^ (fromMaybe defV).(`lookup` dict ) - -- | Lookup value in a dictionary. If attribute or value not found, -- return default value searchAttr :: (NameSpaceID nsID) @@ -789,16 +579,6 @@ prepareIteration nsID name = keepingTheValue (findChildren nsID name) >>% distributeValue --- | Applies a converter to every child element of a specific type. --- Collects results in a 'Monoid'. --- Fails completely if any conversion fails. -collectEvery :: (NameSpaceID nsID, Monoid m) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState a m - -> FallibleXMLConverter nsID extraState a m -collectEvery nsID name a = prepareIteration nsID name - >>> foldS' (switchingTheStack a) - -- withEveryL :: (NameSpaceID nsID) => nsID -> ElementName @@ -826,16 +606,6 @@ tryAll nsID name a = prepareIteration nsID name >>> iterateS (switchingTheStack a) >>^ collectRights --- | Applies a converter to every child element of a specific type. --- Collects all successful results. -tryAll' :: (NameSpaceID nsID, F.Foldable c, MonadPlus c) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState b a - -> XMLConverter nsID extraState b (c a) -tryAll' nsID name a = prepareIteration nsID name - >>> iterateS (switchingTheStack a) - >>^ collectRightsF - -------------------------------------------------------------------------------- -- Matching children -------------------------------------------------------------------------------- @@ -843,15 +613,6 @@ tryAll' nsID name a = prepareIteration nsID name type IdXMLConverter nsID moreState x = XMLConverter nsID moreState x x -type MaybeEConverter nsID moreState x - = Maybe (IdXMLConverter nsID moreState (x, XML.Element)) - --- Chainable converter that helps deciding which converter to actually use. -type ElementMatchConverter nsID extraState x - = IdXMLConverter nsID - extraState - (MaybeEConverter nsID extraState x, XML.Element) - type MaybeCConverter nsID moreState x = Maybe (IdXMLConverter nsID moreState (x, XML.Content)) @@ -862,26 +623,6 @@ type ContentMatchConverter nsID extraState x (MaybeCConverter nsID extraState x, XML.Content) -- Helper function: The @c@ is actually a converter that is to be selected by --- matching XML elements to the first two parameters. --- The fold used to match elements however is very simple, so to use it, --- this function wraps the converter in another converter that unifies --- the accumulator. Think of a lot of converters with the resulting type --- chained together. The accumulator not only transports the element --- unchanged to the next matcher, it also does the actual selecting by --- combining the intermediate results with '(<|>)'. -makeMatcherE :: (NameSpaceID nsID) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState a a - -> ElementMatchConverter nsID extraState a -makeMatcherE nsID name c = ( second ( - elemNameIs nsID name - >>^ bool Nothing (Just tryC) - ) - >>% (<|>) - ) &&&^ snd - where tryC = (fst ^&&& executeThere c >>% recover) &&&^ snd - --- Helper function: The @c@ is actually a converter that is to be selected by -- matching XML content to the first two parameters. -- The fold used to match elements however is very simple, so to use it, -- this function wraps the converter in another converter that unifies @@ -914,13 +655,6 @@ makeMatcherC nsID name c = ( second ( contentToElem _ -> failEmpty -- Creates and chains a bunch of matchers -prepareMatchersE :: (NameSpaceID nsID) - => [(nsID, ElementName, FallibleXMLConverter nsID extraState x x)] - -> ElementMatchConverter nsID extraState x ---prepareMatchersE = foldSs . (map $ uncurry3 makeMatcherE) -prepareMatchersE = reverseComposition . (map $ uncurry3 makeMatcherE) - --- Creates and chains a bunch of matchers prepareMatchersC :: (NameSpaceID nsID) => [(nsID, ElementName, FallibleXMLConverter nsID extraState x x)] -> ContentMatchConverter nsID extraState x @@ -928,52 +662,6 @@ prepareMatchersC :: (NameSpaceID nsID) prepareMatchersC = reverseComposition . (map $ uncurry3 makeMatcherC) -- | Takes a list of element-data - converter groups and --- * Finds all children of the current element --- * Matches each group to each child in order (at most one group per child) --- * Filters non-matched children --- * Chains all found converters in child-order --- * Applies the chain to the input element -matchChildren :: (NameSpaceID nsID) - => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)] - -> XMLConverter nsID extraState a a -matchChildren lookups = let matcher = prepareMatchersE lookups - in keepingTheValue ( - elChildren - >>> map (Nothing,) - ^>> iterateSL matcher - >>^ catMaybes.map (\(m,e) -> fmap (swallowElem e) m) - -- >>> foldSs - >>> reverseComposition - ) - >>> swap - ^>> app - where - -- let the converter swallow the element and drop the element - -- in the return value - swallowElem element converter = (,element) ^>> converter >>^ fst - --- -matchContent'' :: (NameSpaceID nsID) - => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)] - -> XMLConverter nsID extraState a a -matchContent'' lookups = let matcher = prepareMatchersC lookups - in keepingTheValue ( - elContent - >>> map (Nothing,) - ^>> iterateSL matcher - >>^ catMaybes.map (\(m,c) -> fmap (swallowContent c) m) - -- >>> foldSs - >>> reverseComposition - ) - >>> swap - ^>> app - where - -- let the converter swallow the content and drop the content - -- in the return value - swallowContent content converter = (,content) ^>> converter >>^ fst - - --- | Takes a list of element-data - converter groups and -- * Finds all content of the current element -- * Matches each group to each piece of content in order -- (at most one group per piece of content) @@ -1018,14 +706,6 @@ matchContent lookups fallback -- Internals -------------------------------------------------------------------------------- -stringToBool :: (Monoid failure) => String -> Either failure Bool -stringToBool val -- stringToBool' val >>> maybeToChoice - | val `elem` trueValues = succeedWith True - | val `elem` falseValues = succeedWith False - | otherwise = failEmpty - where trueValues = ["true" ,"on" ,"1"] - falseValues = ["false","off","0"] - stringToBool' :: String -> Maybe Bool stringToBool' val | val `elem` trueValues = Just True | val `elem` falseValues = Just False diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs index 26ba6df82..87a6dc91c 100644 --- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs +++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs @@ -50,23 +50,11 @@ module Text.Pandoc.Readers.Odt.StyleReader , ListLevelType (..) , LengthOrPercent (..) , lookupStyle -, getTextProperty -, getTextProperty' -, getParaProperty -, getListStyle , getListLevelStyle , getStyleFamily -, lookupDefaultStyle , lookupDefaultStyle' , lookupListStyleByName -, getPropertyChain -, textPropertyChain -, stylePropertyChain -, stylePropertyChain' -, getStylePropertyChain , extendedStylePropertyChain -, extendedStylePropertyChain' -, liftStyles , readStylesAt ) where @@ -83,7 +71,6 @@ import Data.Maybe import qualified Text.XML.Light as XML -import Text.Pandoc.Readers.Odt.Arrows.State import Text.Pandoc.Readers.Odt.Arrows.Utils import Text.Pandoc.Readers.Odt.Generic.Utils @@ -624,20 +611,11 @@ lookupStyle :: StyleName -> Styles -> Maybe Style lookupStyle name Styles{..} = M.lookup name stylesByName -- -lookupDefaultStyle :: StyleFamily -> Styles -> StyleProperties -lookupDefaultStyle family Styles{..} = fromMaybe def - (M.lookup family defaultStyleMap) - --- lookupDefaultStyle' :: Styles -> StyleFamily -> StyleProperties lookupDefaultStyle' Styles{..} family = fromMaybe def (M.lookup family defaultStyleMap) -- -getListStyle :: Style -> Styles -> Maybe ListStyle -getListStyle Style{..} styles = listStyle >>= (`lookupListStyleByName` styles) - --- lookupListStyleByName :: StyleName -> Styles -> Maybe ListStyle lookupListStyleByName name Styles{..} = M.lookup name listStylesByName @@ -681,64 +659,3 @@ extendedStylePropertyChain [style] styles = (stylePropertyChain style s ++ (maybeToList (fmap (lookupDefaultStyle' styles) (getStyleFamily style styles))) extendedStylePropertyChain (style:trace) styles = (stylePropertyChain style styles) ++ (extendedStylePropertyChain trace styles) --- Optimizable with Data.Sequence - --- -extendedStylePropertyChain' :: [Style] -> Styles -> Maybe [StyleProperties] -extendedStylePropertyChain' [] _ = Nothing -extendedStylePropertyChain' [style] styles = Just ( - (stylePropertyChain style styles) - ++ (maybeToList (fmap (lookupDefaultStyle' styles) (getStyleFamily style styles))) - ) -extendedStylePropertyChain' (style:trace) styles = fmap ((stylePropertyChain style styles) ++) - (extendedStylePropertyChain' trace styles) - --- -stylePropertyChain' :: Styles -> Style -> [StyleProperties] -stylePropertyChain' = flip stylePropertyChain - --- -getStylePropertyChain :: StyleName -> Styles -> [StyleProperties] -getStylePropertyChain name styles = maybe [] - (`stylePropertyChain` styles) - (lookupStyle name styles) - --- -getPropertyChain :: (StyleProperties -> Maybe a) -> Style -> Styles -> [a] -getPropertyChain extract style styles = catMaybes - $ map extract - $ stylePropertyChain style styles - --- -textPropertyChain :: Style -> Styles -> [TextProperties] -textPropertyChain = getPropertyChain textProperties - --- -paraPropertyChain :: Style -> Styles -> [ParaProperties] -paraPropertyChain = getPropertyChain paraProperties - --- -getTextProperty :: (TextProperties -> a) -> Style -> Styles -> Maybe a -getTextProperty extract style styles = fmap extract - $ listToMaybe - $ textPropertyChain style styles - --- -getTextProperty' :: (TextProperties -> Maybe a) -> Style -> Styles -> Maybe a -getTextProperty' extract style styles = F.asum - $ map extract - $ textPropertyChain style styles - --- -getParaProperty :: (ParaProperties -> a) -> Style -> Styles -> Maybe a -getParaProperty extract style styles = fmap extract - $ listToMaybe - $ paraPropertyChain style styles - --- | Lifts the reader into another readers' state. -liftStyles :: (OdtConverterState s -> OdtConverterState Styles) - -> (OdtConverterState Styles -> OdtConverterState s ) - -> XMLReader s x x -liftStyles extract inject = switchState extract inject - $ convertingExtraState M.empty readAllStyles - diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 5e509178d..5e0d67d10 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Org - Copyright : Copyright (C) 2014-2016 Albert Krewinkel + Copyright : Copyright (C) 2014-2017 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -40,15 +40,18 @@ import Text.Pandoc.Parsing (reportLogMessages) import Control.Monad.Except (throwError) import Control.Monad.Reader (runReaderT) +import Data.Text (Text) +import qualified Data.Text as T -- | Parse org-mode string and return a Pandoc document. readOrg :: PandocMonad m => ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Text -- ^ String to parse (assuming @'\n'@ line endings) -> m Pandoc readOrg opts s = do parsed <- flip runReaderT def $ - readWithM parseOrg (optionsToParserState opts) (s ++ "\n\n") + readWithM parseOrg (optionsToParserState opts) + (T.unpack s ++ "\n\n") case parsed of Right result -> return result Left _ -> throwError $ PandocParseError "problem parsing org" diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs index cc2e82d5b..9c6614c99 100644 --- a/src/Text/Pandoc/Readers/Org/BlockStarts.hs +++ b/src/Text/Pandoc/Readers/Org/BlockStarts.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,8 +17,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel + Module : Text.Pandoc.Readers.Org.BlockStarts + Copyright : Copyright (C) 2014-2017 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -61,8 +61,12 @@ headerStart = try $ tableStart :: Monad m => OrgParser m Char tableStart = try $ skipSpaces *> char '|' +gridTableStart :: Monad m => OrgParser m () +gridTableStart = try $ skipSpaces <* char '+' <* char '-' + + latexEnvStart :: Monad m => OrgParser m String -latexEnvStart = try $ do +latexEnvStart = try $ skipSpaces *> string "\\begin{" *> latexEnvName <* string "}" @@ -93,8 +97,7 @@ orderedListStart = genericListStart orderedListMarker where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") drawerStart :: Monad m => OrgParser m String -drawerStart = try $ - skipSpaces *> drawerName <* skipSpaces <* newline +drawerStart = try $ skipSpaces *> drawerName <* skipSpaces <* newline where drawerName = char ':' *> manyTill nonspaceChar (char ':') metaLineStart :: Monad m => OrgParser m () @@ -116,8 +119,8 @@ noteMarker = try $ do -- | Succeeds if the parser is at the end of a block. endOfBlock :: Monad m => OrgParser m () -endOfBlock = lookAhead . try $ do - void blankline <|> anyBlockStart +endOfBlock = lookAhead . try $ + void blankline <|> anyBlockStart where -- Succeeds if there is a new block starting at this position. anyBlockStart :: Monad m => OrgParser m () @@ -126,6 +129,7 @@ endOfBlock = lookAhead . try $ do , hline , metaLineStart , commentLineStart + , gridTableStart , void noteMarker , void tableStart , void drawerStart @@ -134,4 +138,3 @@ endOfBlock = lookAhead . try $ do , void bulletListStart , void orderedListStart ] - diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index b0a19b833..3e0ab0127 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -1,6 +1,3 @@ -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE ViewPatterns #-} {- Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -18,9 +15,10 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} - +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE RecordWildCards #-} {- | - Module : Text.Pandoc.Readers.Org.Options + Module : Text.Pandoc.Readers.Org.Blocks Copyright : Copyright (C) 2014-2017 Albert Krewinkel License : GNU GPL, version 2 or above @@ -34,6 +32,7 @@ module Text.Pandoc.Readers.Org.Blocks ) where import Text.Pandoc.Readers.Org.BlockStarts +import Text.Pandoc.Readers.Org.DocumentTree (documentTree, headlineToBlocks) import Text.Pandoc.Readers.Org.Inlines import Text.Pandoc.Readers.Org.Meta (metaExport, metaKey, metaLine) import Text.Pandoc.Readers.Org.ParserState @@ -52,211 +51,21 @@ import Control.Monad (foldM, guard, mzero, void) import Data.Char (isSpace, toLower, toUpper) import Data.Default (Default) import Data.List (foldl', isPrefixOf) -import Data.Maybe (fromMaybe, isNothing) +import Data.Maybe (fromMaybe, isJust, isNothing) import Data.Monoid ((<>)) -- --- Org headers --- -newtype Tag = Tag { fromTag :: String } - deriving (Show, Eq) - --- | Create a tag containing the given string. -toTag :: String -> Tag -toTag = Tag - --- | The key (also called name or type) of a property. -newtype PropertyKey = PropertyKey { fromKey :: String } - deriving (Show, Eq, Ord) - --- | Create a property key containing the given string. Org mode keys are --- case insensitive and are hence converted to lower case. -toPropertyKey :: String -> PropertyKey -toPropertyKey = PropertyKey . map toLower - --- | The value assigned to a property. -newtype PropertyValue = PropertyValue { fromValue :: String } - --- | Create a property value containing the given string. -toPropertyValue :: String -> PropertyValue -toPropertyValue = PropertyValue - --- | Check whether the property value is non-nil (i.e. truish). -isNonNil :: PropertyValue -> Bool -isNonNil p = map toLower (fromValue p) `notElem` ["()", "{}", "nil"] - --- | Key/value pairs from a PROPERTIES drawer -type Properties = [(PropertyKey, PropertyValue)] - --- | Org mode headline (i.e. a document subtree). -data Headline = Headline - { headlineLevel :: Int - , headlineTodoMarker :: Maybe TodoMarker - , headlineText :: Inlines - , headlineTags :: [Tag] - , headlineProperties :: Properties - , headlineContents :: Blocks - , headlineChildren :: [Headline] - } - --- --- Parsing headlines and subtrees --- - --- | Read an Org mode headline and its contents (i.e. a document subtree). --- @lvl@ gives the minimum acceptable level of the tree. -headline :: PandocMonad m => Int -> OrgParser m (F Headline) -headline lvl = try $ do - level <- headerStart - guard (lvl <= level) - todoKw <- optionMaybe todoKeyword - title <- trimInlinesF . mconcat <$> manyTill inline endOfTitle - tags <- option [] headerTags - newline - properties <- option mempty propertiesDrawer - contents <- blocks - children <- many (headline (level + 1)) - return $ do - title' <- title - contents' <- contents - children' <- sequence children - return $ Headline - { headlineLevel = level - , headlineTodoMarker = todoKw - , headlineText = title' - , headlineTags = tags - , headlineProperties = properties - , headlineContents = contents' - , headlineChildren = children' - } - where - endOfTitle :: Monad m => OrgParser m () - endOfTitle = void . lookAhead $ optional headerTags *> newline - - headerTags :: Monad m => OrgParser m [Tag] - headerTags = try $ - let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':' - in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces) - --- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks -headlineToBlocks :: Monad m => Headline -> OrgParser m Blocks -headlineToBlocks hdln@(Headline {..}) = do - maxHeadlineLevels <- getExportSetting exportHeadlineLevels - case () of - _ | any isNoExportTag headlineTags -> return mempty - _ | any isArchiveTag headlineTags -> archivedHeadlineToBlocks hdln - _ | isCommentTitle headlineText -> return mempty - _ | headlineLevel >= maxHeadlineLevels -> headlineToHeaderWithList hdln - _ | otherwise -> headlineToHeaderWithContents hdln - -isNoExportTag :: Tag -> Bool -isNoExportTag = (== toTag "noexport") - -isArchiveTag :: Tag -> Bool -isArchiveTag = (== toTag "ARCHIVE") - --- | Check if the title starts with COMMENT. --- FIXME: This accesses builder internals not intended for use in situations --- like these. Replace once keyword parsing is supported. -isCommentTitle :: Inlines -> Bool -isCommentTitle (B.toList -> (Str "COMMENT":_)) = True -isCommentTitle _ = False - -archivedHeadlineToBlocks :: Monad m => Headline -> OrgParser m Blocks -archivedHeadlineToBlocks hdln = do - archivedTreesOption <- getExportSetting exportArchivedTrees - case archivedTreesOption of - ArchivedTreesNoExport -> return mempty - ArchivedTreesExport -> headlineToHeaderWithContents hdln - ArchivedTreesHeadlineOnly -> headlineToHeader hdln - -headlineToHeaderWithList :: Monad m => Headline -> OrgParser m Blocks -headlineToHeaderWithList hdln@(Headline {..}) = do - maxHeadlineLevels <- getExportSetting exportHeadlineLevels - header <- headlineToHeader hdln - listElements <- sequence (map headlineToBlocks headlineChildren) - let listBlock = if null listElements - then mempty - else B.orderedList listElements - let headerText = if maxHeadlineLevels == headlineLevel - then header - else flattenHeader header - return $ headerText <> headlineContents <> listBlock - where - flattenHeader :: Blocks -> Blocks - flattenHeader blks = - case B.toList blks of - (Header _ _ inlns:_) -> B.para (B.fromList inlns) - _ -> mempty - -headlineToHeaderWithContents :: Monad m => Headline -> OrgParser m Blocks -headlineToHeaderWithContents hdln@(Headline {..}) = do - header <- headlineToHeader hdln - childrenBlocks <- mconcat <$> sequence (map headlineToBlocks headlineChildren) - return $ header <> headlineContents <> childrenBlocks - -headlineToHeader :: Monad m => Headline -> OrgParser m Blocks -headlineToHeader (Headline {..}) = do - exportTodoKeyword <- getExportSetting exportWithTodoKeywords - let todoText = if exportTodoKeyword - then case headlineTodoMarker of - Just kw -> todoKeywordToInlines kw <> B.space - Nothing -> mempty - else mempty - let text = tagTitle (todoText <> headlineText) headlineTags - let propAttr = propertiesToAttr headlineProperties - attr <- registerHeader propAttr headlineText - return $ B.headerWith attr headlineLevel text - -todoKeyword :: Monad m => OrgParser m TodoMarker -todoKeyword = try $ do - taskStates <- activeTodoMarkers <$> getState - let kwParser tdm = try $ (tdm <$ string (todoMarkerName tdm) <* spaceChar) - choice (map kwParser taskStates) - -todoKeywordToInlines :: TodoMarker -> Inlines -todoKeywordToInlines tdm = - let todoText = todoMarkerName tdm - todoState = map toLower . show $ todoMarkerState tdm - classes = [todoState, todoText] - in B.spanWith (mempty, classes, mempty) (B.str todoText) - -propertiesToAttr :: Properties -> Attr -propertiesToAttr properties = - let - toStringPair prop = (fromKey (fst prop), fromValue (snd prop)) - customIdKey = toPropertyKey "custom_id" - classKey = toPropertyKey "class" - unnumberedKey = toPropertyKey "unnumbered" - specialProperties = [customIdKey, classKey, unnumberedKey] - id' = fromMaybe mempty . fmap fromValue . lookup customIdKey $ properties - cls = fromMaybe mempty . fmap fromValue . lookup classKey $ properties - kvs' = map toStringPair . filter ((`notElem` specialProperties) . fst) - $ properties - isUnnumbered = - fromMaybe False . fmap isNonNil . lookup unnumberedKey $ properties - in - (id', words cls ++ (if isUnnumbered then ["unnumbered"] else []), kvs') - -tagTitle :: Inlines -> [Tag] -> Inlines -tagTitle title tags = title <> (mconcat $ map tagToInline tags) - -tagToInline :: Tag -> Inlines -tagToInline t = B.spanWith ("", ["tag"], [("data-tag-name", fromTag t)]) mempty - - --- -- parsing blocks -- -- | Get a list of blocks. blockList :: PandocMonad m => OrgParser m [Block] blockList = do - initialBlocks <- blocks - headlines <- sequence <$> manyTill (headline 1) eof + headlines <- documentTree blocks inline st <- getState - headlineBlocks <- fmap mconcat . sequence . map headlineToBlocks $ runF headlines st - return . B.toList $ (runF initialBlocks st) <> headlineBlocks + headlineBlocks <- headlineToBlocks $ runF headlines st + -- ignore first headline, it's the document's title + return . drop 1 . B.toList $ headlineBlocks -- | Get the meta information saved in the state. meta :: Monad m => OrgParser m Meta @@ -274,6 +83,7 @@ block = choice [ mempty <$ blanklines , figure , example , genericDrawer + , include , specialLine , horizontalRule , list @@ -302,7 +112,7 @@ data BlockAttributes = BlockAttributes -- | Convert BlockAttributes into pandoc Attr attrFromBlockAttributes :: BlockAttributes -> Attr -attrFromBlockAttributes (BlockAttributes{..}) = +attrFromBlockAttributes BlockAttributes{..} = let ident = fromMaybe mempty $ lookup "id" blockAttrKeyValues classes = case lookup "class" blockAttrKeyValues of @@ -311,18 +121,18 @@ attrFromBlockAttributes (BlockAttributes{..}) = kv = filter ((`notElem` ["id", "class"]) . fst) blockAttrKeyValues in (ident, classes, kv) -stringyMetaAttribute :: Monad m => (String -> Bool) -> OrgParser m (String, String) -stringyMetaAttribute attrCheck = try $ do +stringyMetaAttribute :: Monad m => OrgParser m (String, String) +stringyMetaAttribute = try $ do metaLineStart attrName <- map toUpper <$> many1Till nonspaceChar (char ':') - guard $ attrCheck attrName skipSpaces - attrValue <- anyLine + attrValue <- anyLine <|> ("" <$ newline) return (attrName, attrValue) blockAttributes :: PandocMonad m => OrgParser m BlockAttributes blockAttributes = try $ do - kv <- many (stringyMetaAttribute attrCheck) + kv <- many stringyMetaAttribute + guard $ all (attrCheck . fst) kv let caption = foldl' (appendValues "CAPTION") Nothing kv let kvAttrs = foldl' (appendValues "ATTR_HTML") Nothing kv let name = lookup "NAME" kv @@ -331,7 +141,7 @@ blockAttributes = try $ do Nothing -> return Nothing Just s -> Just <$> parseFromString inlines (s ++ "\n") kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs - return $ BlockAttributes + return BlockAttributes { blockAttrName = name , blockAttrLabel = label , blockAttrCaption = caption' @@ -339,13 +149,7 @@ blockAttributes = try $ do } where attrCheck :: String -> Bool - attrCheck attr = - case attr of - "NAME" -> True - "LABEL" -> True - "CAPTION" -> True - "ATTR_HTML" -> True - _ -> False + attrCheck x = x `elem` ["NAME", "LABEL", "CAPTION", "ATTR_HTML", "RESULTS"] appendValues :: String -> Maybe String -> (String, String) -> Maybe String appendValues attrName accValue (key, value) = @@ -355,6 +159,7 @@ blockAttributes = try $ do Just acc -> Just $ acc ++ ' ':value Nothing -> Just value +-- | Parse key-value pairs for HTML attributes keyValues :: Monad m => OrgParser m [(String, String)] keyValues = try $ manyTill ((,) <$> key <*> value) newline @@ -381,7 +186,7 @@ orgBlock = try $ do blockAttrs <- blockAttributes blkType <- blockHeaderStart ($ blkType) $ - case (map toLower blkType) of + case map toLower blkType of "export" -> exportBlock "comment" -> rawBlockLines (const mempty) "html" -> rawBlockLines (return . B.rawBlock (lowercase blkType)) @@ -402,10 +207,10 @@ orgBlock = try $ do lowercase = map toLower rawBlockLines :: Monad m => (String -> F Blocks) -> String -> OrgParser m (F Blocks) -rawBlockLines f blockType = (ignHeaders *> (f <$> rawBlockContent blockType)) +rawBlockLines f blockType = ignHeaders *> (f <$> rawBlockContent blockType) parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> String -> OrgParser m (F Blocks) -parseBlockLines f blockType = (ignHeaders *> (f <$> parsedBlockContent)) +parseBlockLines f blockType = ignHeaders *> (f <$> parsedBlockContent) where parsedBlockContent :: PandocMonad m => OrgParser m (F Blocks) parsedBlockContent = try $ do @@ -433,8 +238,7 @@ rawBlockContent blockType = try $ do stripIndent strs = map (drop (shortestIndent strs)) strs shortestIndent :: [String] -> Int - shortestIndent = foldr min maxBound - . map (length . takeWhile isSpace) + shortestIndent = foldr (min . length . takeWhile isSpace) maxBound . filter (not . null) tabsToSpaces :: Int -> String -> String @@ -442,7 +246,7 @@ rawBlockContent blockType = try $ do tabsToSpaces tabLen cs'@(c:cs) = case c of ' ' -> ' ':tabsToSpaces tabLen cs - '\t' -> (take tabLen $ repeat ' ') ++ tabsToSpaces tabLen cs + '\t' -> replicate tabLen ' ' ++ tabsToSpaces tabLen cs _ -> cs' commaEscaped :: String -> String @@ -490,16 +294,15 @@ codeBlock blockAttrs blockType = do skipSpaces (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) content <- rawBlockContent blockType - resultsContent <- trailingResultsBlock + resultsContent <- option mempty babelResultsBlock let id' = fromMaybe mempty $ blockAttrName blockAttrs let codeBlck = B.codeBlockWith ( id', classes, kv ) content let labelledBlck = maybe (pure codeBlck) (labelDiv codeBlck) (blockAttrCaption blockAttrs) - let resultBlck = fromMaybe mempty resultsContent return $ - (if exportsCode kv then labelledBlck else mempty) <> - (if exportsResults kv then resultBlck else mempty) + (if exportsCode kv then labelledBlck else mempty) <> + (if exportsResults kv then resultsContent else mempty) where labelDiv :: Blocks -> F Inlines -> F Blocks labelDiv blk value = @@ -514,12 +317,16 @@ codeBlock blockAttrs blockType = do exportsResults :: [(String, String)] -> Bool exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports" -trailingResultsBlock :: PandocMonad m => OrgParser m (Maybe (F Blocks)) -trailingResultsBlock = optionMaybe . try $ do +-- | Parse the result of an evaluated babel code block. +babelResultsBlock :: PandocMonad m => OrgParser m (F Blocks) +babelResultsBlock = try $ do blanklines - stringAnyCase "#+RESULTS:" - blankline + resultsMarker <|> + (lookAhead . void . try $ + manyTill (metaLineStart *> anyLineNewline) resultsMarker) block + where + resultsMarker = try . void $ stringAnyCase "#+RESULTS:" *> blankline -- | Parse code block arguments codeHeaderArgs :: Monad m => OrgParser m ([String], [(String, String)]) @@ -527,13 +334,13 @@ codeHeaderArgs = try $ do language <- skipSpaces *> orgArgWord (switchClasses, switchKv) <- switchesAsAttributes parameters <- manyTill blockOption newline - return $ ( translateLang language : switchClasses - , originalLang language <> switchKv <> parameters - ) + return ( translateLang language : switchClasses + , originalLang language <> switchKv <> parameters + ) switchesAsAttributes :: Monad m => OrgParser m ([String], [(String, String)]) switchesAsAttributes = try $ do - switches <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar)) + switches <- skipSpaces *> try (switch `sepBy` many1 spaceChar) return $ foldr addToAttr ([], []) switches where addToAttr :: (Char, Maybe String, SwitchPolarity) @@ -541,10 +348,10 @@ switchesAsAttributes = try $ do -> ([String], [(String, String)]) addToAttr ('n', lineNum, pol) (cls, kv) = let kv' = case lineNum of - Just num -> (("startFrom", num):kv) + Just num -> ("startFrom", num):kv Nothing -> kv cls' = case pol of - SwitchPlus -> "continuedSourceBlock":cls + SwitchPlus -> "continuedSourceBlock":cls SwitchMinus -> cls in ("numberLines":cls', kv') addToAttr _ x = x @@ -573,7 +380,7 @@ genericSwitch :: Monad m genericSwitch c p = try $ do polarity <- switchPolarity <* char c <* skipSpaces arg <- optionMaybe p - return $ (c, arg, polarity) + return (c, arg, polarity) -- | Reads a line number switch option. The line number switch can be used with -- example and source blocks. @@ -593,8 +400,8 @@ orgParamValue = try $ *> noneOf "\n\r" `many1Till` endOfValue <* skipSpaces where - endOfValue = lookAhead $ (try $ skipSpaces <* oneOf "\n\r") - <|> (try $ skipSpaces1 <* orgArgKey) + endOfValue = lookAhead $ try (skipSpaces <* oneOf "\n\r") + <|> try (skipSpaces1 <* orgArgKey) -- @@ -612,7 +419,7 @@ genericDrawer = try $ do -- Include drawer if it is explicitly included in or not explicitly excluded -- from the list of drawers that should be exported. PROPERTIES drawers are -- never exported. - case (exportDrawers . orgStateExportSettings $ state) of + case exportDrawers . orgStateExportSettings $ state of _ | name == "PROPERTIES" -> return mempty Left names | name `elem` names -> return mempty Right names | name `notElem` names -> return mempty @@ -631,25 +438,6 @@ drawerEnd :: Monad m => OrgParser m String drawerEnd = try $ skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline --- | Read a :PROPERTIES: drawer and return the key/value pairs contained --- within. -propertiesDrawer :: Monad m => OrgParser m Properties -propertiesDrawer = try $ do - drawerType <- drawerStart - guard $ map toUpper drawerType == "PROPERTIES" - manyTill property (try drawerEnd) - where - property :: Monad m => OrgParser m (PropertyKey, PropertyValue) - property = try $ (,) <$> key <*> value - - key :: Monad m => OrgParser m PropertyKey - key = fmap toPropertyKey . try $ - skipSpaces *> char ':' *> many1Till nonspaceChar (char ':') - - value :: Monad m => OrgParser m PropertyValue - value = fmap toPropertyValue . try $ - skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline) - -- -- Figures @@ -665,7 +453,7 @@ figure = try $ do Nothing -> mzero Just imgSrc -> do guard (isImageFilename imgSrc) - let isFigure = not . isNothing $ blockAttrCaption figAttrs + let isFigure = isJust $ blockAttrCaption figAttrs return $ imageBlock isFigure figAttrs imgSrc where selfTarget :: PandocMonad m => OrgParser m String @@ -700,8 +488,7 @@ endOfParagraph = try $ skipSpaces *> newline *> endOfBlock -- | Example code marked up by a leading colon. example :: Monad m => OrgParser m (F Blocks) -example = try $ do - returnF . exampleCode =<< unlines <$> many1 exampleLine +example = try $ returnF . exampleCode =<< unlines <$> many1 exampleLine where exampleLine :: Monad m => OrgParser m String exampleLine = try $ exampleLineStart *> anyLine @@ -717,6 +504,34 @@ exampleCode = B.codeBlockWith ("", ["example"], []) specialLine :: PandocMonad m => OrgParser m (F Blocks) specialLine = fmap return . try $ rawExportLine <|> metaLine <|> commentLine +-- | Include the content of a file. +include :: PandocMonad m => OrgParser m (F Blocks) +include = try $ do + metaLineStart <* stringAnyCase "include:" <* skipSpaces + filename <- includeTarget + blockType <- optionMaybe $ skipSpaces *> many1 alphaNum + blocksParser <- case blockType of + Just "example" -> + return $ pure . B.codeBlock <$> parseRaw + Just "export" -> do + format <- skipSpaces *> many (noneOf "\n\r\t ") + return $ pure . B.rawBlock format <$> parseRaw + Just "src" -> do + language <- skipSpaces *> many (noneOf "\n\r\t ") + let attr = (mempty, [language], mempty) + return $ pure . B.codeBlockWith attr <$> parseRaw + _ -> return $ pure . B.fromList <$> blockList + anyLine + insertIncludedFileF blocksParser ["."] filename + where + includeTarget :: PandocMonad m => OrgParser m FilePath + includeTarget = do + char '"' + manyTill (noneOf "\n\r\t") (char '"') + + parseRaw :: PandocMonad m => OrgParser m String + parseRaw = many anyChar + rawExportLine :: PandocMonad m => OrgParser m Blocks rawExportLine = try $ do metaLineStart @@ -755,11 +570,15 @@ data OrgTable = OrgTable } table :: PandocMonad m => OrgParser m (F Blocks) -table = try $ do +table = gridTableWith blocks True <|> orgTable + +-- | A normal org table +orgTable :: PandocMonad m => OrgParser m (F Blocks) +orgTable = try $ do -- don't allow a table on the first line of a list item; org requires that -- tables start at first non-space character on the line - let isFirstInListItem st = (orgStateParserContext st == ListItemState) && - (orgStateLastPreCharPos st == Nothing) + let isFirstInListItem st = orgStateParserContext st == ListItemState && + isNothing (orgStateLastPreCharPos st) guard =<< not . isFirstInListItem <$> getState blockAttrs <- blockAttributes lookAhead tableStart @@ -772,7 +591,7 @@ orgToPandocTable :: OrgTable -> Inlines -> Blocks orgToPandocTable (OrgTable colProps heads lns) caption = - let totalWidth = if any (not . isNothing) (map columnRelWidth colProps) + let totalWidth = if any isJust (map columnRelWidth colProps) then Just . sum $ map (fromMaybe 1 . columnRelWidth) colProps else Nothing in B.table caption (map (convertColProp totalWidth) colProps) heads lns @@ -782,7 +601,7 @@ orgToPandocTable (OrgTable colProps heads lns) caption = let align' = fromMaybe AlignDefault $ columnAlignment colProp width' = fromMaybe 0 $ (\w t -> (fromIntegral w / fromIntegral t)) - <$> (columnRelWidth colProp) + <$> columnRelWidth colProp <*> totalWidth in (align', width') @@ -808,7 +627,7 @@ tableAlignRow = try $ do columnPropertyCell :: Monad m => OrgParser m ColumnProperty columnPropertyCell = emptyCell <|> propCell <?> "alignment info" where - emptyCell = ColumnProperty Nothing Nothing <$ (try $ skipSpaces *> endOfCell) + emptyCell = ColumnProperty Nothing Nothing <$ try (skipSpaces *> endOfCell) propCell = try $ ColumnProperty <$> (skipSpaces *> char '<' @@ -854,28 +673,28 @@ normalizeTable (OrgTable colProps heads rows) = rowToContent :: OrgTable -> OrgTableRow -> F OrgTable -rowToContent orgTable row = +rowToContent tbl row = case row of OrgHlineRow -> return singleRowPromotedToHeader OrgAlignRow props -> return . setProperties $ props OrgContentRow cs -> appendToBody cs where singleRowPromotedToHeader :: OrgTable - singleRowPromotedToHeader = case orgTable of - OrgTable{ orgTableHeader = [], orgTableRows = b:[] } -> - orgTable{ orgTableHeader = b , orgTableRows = [] } - _ -> orgTable + singleRowPromotedToHeader = case tbl of + OrgTable{ orgTableHeader = [], orgTableRows = [b] } -> + tbl{ orgTableHeader = b , orgTableRows = [] } + _ -> tbl setProperties :: [ColumnProperty] -> OrgTable - setProperties ps = orgTable{ orgTableColumnProperties = ps } + setProperties ps = tbl{ orgTableColumnProperties = ps } appendToBody :: F [Blocks] -> F OrgTable appendToBody frow = do newRow <- frow - let oldRows = orgTableRows orgTable + let oldRows = orgTableRows tbl -- NOTE: This is an inefficient O(n) operation. This should be changed -- if performance ever becomes a problem. - return orgTable{ orgTableRows = oldRows ++ [newRow] } + return tbl{ orgTableRows = oldRows ++ [newRow] } -- @@ -917,7 +736,7 @@ noteBlock = try $ do paraOrPlain :: PandocMonad m => OrgParser m (F Blocks) paraOrPlain = try $ do -- Make sure we are not looking at a headline - notFollowedBy' (char '*' *> (oneOf " *")) + notFollowedBy' (char '*' *> oneOf " *") ils <- inlines nl <- option False (newline *> return True) -- Read block as paragraph, except if we are in a list context and the block @@ -926,7 +745,7 @@ paraOrPlain = try $ do try (guard nl *> notFollowedBy (inList *> (() <$ orderedListStart <|> bulletListStart)) *> return (B.para <$> ils)) - <|> (return (B.plain <$> ils)) + <|> return (B.plain <$> ils) -- @@ -938,16 +757,16 @@ list = choice [ definitionList, bulletList, orderedList ] <?> "list" definitionList :: PandocMonad m => OrgParser m (F Blocks) definitionList = try $ do n <- lookAhead (bulletListStart' Nothing) - fmap B.definitionList . fmap compactifyDL . sequence + fmap (B.definitionList . compactifyDL) . sequence <$> many1 (definitionListItem $ bulletListStart' (Just n)) bulletList :: PandocMonad m => OrgParser m (F Blocks) bulletList = try $ do n <- lookAhead (bulletListStart' Nothing) - fmap B.bulletList . fmap compactify . sequence + fmap (B.bulletList . compactify) . sequence <$> many1 (listItem (bulletListStart' $ Just n)) orderedList :: PandocMonad m => OrgParser m (F Blocks) -orderedList = fmap B.orderedList . fmap compactify . sequence +orderedList = fmap (B.orderedList . compactify) . sequence <$> many1 (listItem orderedListStart) bulletListStart' :: Monad m => Maybe Int -> OrgParser m Int @@ -1004,16 +823,3 @@ listContinuation markerLength = try $ <*> many blankline) where listLine = try $ indentWith markerLength *> anyLineNewline - - -- indent by specified number of spaces (or equiv. tabs) - indentWith :: Monad m => Int -> OrgParser m String - indentWith num = do - tabStop <- getOption readerTabStop - if num < tabStop - then count num (char ' ') - else choice [ try (count num (char ' ')) - , try (char '\t' >> count (num - tabStop) (char ' ')) ] - --- | Parse any line, include the final newline in the output. -anyLineNewline :: Monad m => OrgParser m String -anyLineNewline = (++ "\n") <$> anyLine diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs new file mode 100644 index 000000000..743f6cc0e --- /dev/null +++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs @@ -0,0 +1,304 @@ +{- +Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE ViewPatterns #-} +{- | + Module : Text.Pandoc.Readers.Org.DocumentTree + Copyright : Copyright (C) 2014-2017 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + +Parsers for org-mode headlines and document subtrees +-} +module Text.Pandoc.Readers.Org.DocumentTree + ( documentTree + , headlineToBlocks + ) where + +import Control.Arrow ((***)) +import Control.Monad (guard, void) +import Data.Char (toLower, toUpper) +import Data.List (intersperse) +import Data.Monoid ((<>)) +import Text.Pandoc.Builder (Blocks, Inlines) +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Definition +import Text.Pandoc.Readers.Org.BlockStarts +import Text.Pandoc.Readers.Org.ParserState +import Text.Pandoc.Readers.Org.Parsing + +import qualified Data.Map as Map +import qualified Text.Pandoc.Builder as B + +-- +-- Org headers +-- + +-- | Parse input as org document tree. +documentTree :: PandocMonad m + => OrgParser m (F Blocks) + -> OrgParser m (F Inlines) + -> OrgParser m (F Headline) +documentTree blocks inline = do + initialBlocks <- blocks + headlines <- sequence <$> manyTill (headline blocks inline 1) eof + title <- fmap (getTitle . unMeta) . orgStateMeta <$> getState + return $ do + headlines' <- headlines + initialBlocks' <- initialBlocks + title' <- title + return Headline + { headlineLevel = 0 + , headlineTodoMarker = Nothing + , headlineText = B.fromList title' + , headlineTags = mempty + , headlineProperties = mempty + , headlineContents = initialBlocks' + , headlineChildren = headlines' + } + where + getTitle :: Map.Map String MetaValue -> [Inline] + getTitle metamap = + case Map.lookup "title" metamap of + Just (MetaInlines inlns) -> inlns + _ -> [] + +newtype Tag = Tag { fromTag :: String } + deriving (Show, Eq) + +-- | Create a tag containing the given string. +toTag :: String -> Tag +toTag = Tag + +-- | The key (also called name or type) of a property. +newtype PropertyKey = PropertyKey { fromKey :: String } + deriving (Show, Eq, Ord) + +-- | Create a property key containing the given string. Org mode keys are +-- case insensitive and are hence converted to lower case. +toPropertyKey :: String -> PropertyKey +toPropertyKey = PropertyKey . map toLower + +-- | The value assigned to a property. +newtype PropertyValue = PropertyValue { fromValue :: String } + +-- | Create a property value containing the given string. +toPropertyValue :: String -> PropertyValue +toPropertyValue = PropertyValue + +-- | Check whether the property value is non-nil (i.e. truish). +isNonNil :: PropertyValue -> Bool +isNonNil p = map toLower (fromValue p) `notElem` ["()", "{}", "nil"] + +-- | Key/value pairs from a PROPERTIES drawer +type Properties = [(PropertyKey, PropertyValue)] + +-- | Org mode headline (i.e. a document subtree). +data Headline = Headline + { headlineLevel :: Int + , headlineTodoMarker :: Maybe TodoMarker + , headlineText :: Inlines + , headlineTags :: [Tag] + , headlineProperties :: Properties + , headlineContents :: Blocks + , headlineChildren :: [Headline] + } + +-- | Read an Org mode headline and its contents (i.e. a document subtree). +-- @lvl@ gives the minimum acceptable level of the tree. +headline :: PandocMonad m + => OrgParser m (F Blocks) + -> OrgParser m (F Inlines) + -> Int + -> OrgParser m (F Headline) +headline blocks inline lvl = try $ do + level <- headerStart + guard (lvl <= level) + todoKw <- optionMaybe todoKeyword + title <- trimInlinesF . mconcat <$> manyTill inline endOfTitle + tags <- option [] headerTags + newline + properties <- option mempty propertiesDrawer + contents <- blocks + children <- many (headline blocks inline (level + 1)) + return $ do + title' <- title + contents' <- contents + children' <- sequence children + return Headline + { headlineLevel = level + , headlineTodoMarker = todoKw + , headlineText = title' + , headlineTags = tags + , headlineProperties = properties + , headlineContents = contents' + , headlineChildren = children' + } + where + endOfTitle :: Monad m => OrgParser m () + endOfTitle = void . lookAhead $ optional headerTags *> newline + + headerTags :: Monad m => OrgParser m [Tag] + headerTags = try $ + let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':' + in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces) + +-- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks +headlineToBlocks :: Monad m => Headline -> OrgParser m Blocks +headlineToBlocks hdln@Headline {..} = do + maxHeadlineLevels <- getExportSetting exportHeadlineLevels + case () of + _ | any isNoExportTag headlineTags -> return mempty + _ | any isArchiveTag headlineTags -> archivedHeadlineToBlocks hdln + _ | isCommentTitle headlineText -> return mempty + _ | headlineLevel >= maxHeadlineLevels -> headlineToHeaderWithList hdln + _ | otherwise -> headlineToHeaderWithContents hdln + +isNoExportTag :: Tag -> Bool +isNoExportTag = (== toTag "noexport") + +isArchiveTag :: Tag -> Bool +isArchiveTag = (== toTag "ARCHIVE") + +-- | Check if the title starts with COMMENT. +-- FIXME: This accesses builder internals not intended for use in situations +-- like these. Replace once keyword parsing is supported. +isCommentTitle :: Inlines -> Bool +isCommentTitle (B.toList -> (Str "COMMENT":_)) = True +isCommentTitle _ = False + +archivedHeadlineToBlocks :: Monad m => Headline -> OrgParser m Blocks +archivedHeadlineToBlocks hdln = do + archivedTreesOption <- getExportSetting exportArchivedTrees + case archivedTreesOption of + ArchivedTreesNoExport -> return mempty + ArchivedTreesExport -> headlineToHeaderWithContents hdln + ArchivedTreesHeadlineOnly -> headlineToHeader hdln + +headlineToHeaderWithList :: Monad m => Headline -> OrgParser m Blocks +headlineToHeaderWithList hdln@Headline {..} = do + maxHeadlineLevels <- getExportSetting exportHeadlineLevels + header <- headlineToHeader hdln + listElements <- mapM headlineToBlocks headlineChildren + let listBlock = if null listElements + then mempty + else B.orderedList listElements + let headerText = if maxHeadlineLevels == headlineLevel + then header + else flattenHeader header + return $ headerText <> headlineContents <> listBlock + where + flattenHeader :: Blocks -> Blocks + flattenHeader blks = + case B.toList blks of + (Header _ _ inlns:_) -> B.para (B.fromList inlns) + _ -> mempty + +headlineToHeaderWithContents :: Monad m => Headline -> OrgParser m Blocks +headlineToHeaderWithContents hdln@Headline {..} = do + header <- headlineToHeader hdln + childrenBlocks <- mconcat <$> mapM headlineToBlocks headlineChildren + return $ header <> headlineContents <> childrenBlocks + +headlineToHeader :: Monad m => Headline -> OrgParser m Blocks +headlineToHeader Headline {..} = do + exportTodoKeyword <- getExportSetting exportWithTodoKeywords + exportTags <- getExportSetting exportWithTags + let todoText = if exportTodoKeyword + then case headlineTodoMarker of + Just kw -> todoKeywordToInlines kw <> B.space + Nothing -> mempty + else mempty + let text = todoText <> headlineText <> + if exportTags + then tagsToInlines headlineTags + else mempty + let propAttr = propertiesToAttr headlineProperties + attr <- registerHeader propAttr headlineText + return $ B.headerWith attr headlineLevel text + +todoKeyword :: Monad m => OrgParser m TodoMarker +todoKeyword = try $ do + taskStates <- activeTodoMarkers <$> getState + let kwParser tdm = try (tdm <$ string (todoMarkerName tdm) <* spaceChar) + choice (map kwParser taskStates) + +todoKeywordToInlines :: TodoMarker -> Inlines +todoKeywordToInlines tdm = + let todoText = todoMarkerName tdm + todoState = map toLower . show $ todoMarkerState tdm + classes = [todoState, todoText] + in B.spanWith (mempty, classes, mempty) (B.str todoText) + +propertiesToAttr :: Properties -> Attr +propertiesToAttr properties = + let + toStringPair = fromKey *** fromValue + customIdKey = toPropertyKey "custom_id" + classKey = toPropertyKey "class" + unnumberedKey = toPropertyKey "unnumbered" + specialProperties = [customIdKey, classKey, unnumberedKey] + id' = maybe mempty fromValue . lookup customIdKey $ properties + cls = maybe mempty fromValue . lookup classKey $ properties + kvs' = map toStringPair . filter ((`notElem` specialProperties) . fst) + $ properties + isUnnumbered = + maybe False isNonNil . lookup unnumberedKey $ properties + in + (id', words cls ++ ["unnumbered" | isUnnumbered], kvs') + +tagsToInlines :: [Tag] -> Inlines +tagsToInlines [] = mempty +tagsToInlines tags = + (B.space <>) . mconcat . intersperse (B.str "\160") . map tagToInline $ tags + where + tagToInline :: Tag -> Inlines + tagToInline t = tagSpan t . B.smallcaps . B.str $ fromTag t + +-- | Wrap the given inline in a span, marking it as a tag. +tagSpan :: Tag -> Inlines -> Inlines +tagSpan t = B.spanWith ("", ["tag"], [("data-tag-name", fromTag t)]) + + + + + +-- | Read a :PROPERTIES: drawer and return the key/value pairs contained +-- within. +propertiesDrawer :: Monad m => OrgParser m Properties +propertiesDrawer = try $ do + drawerType <- drawerStart + guard $ map toUpper drawerType == "PROPERTIES" + manyTill property (try endOfDrawer) + where + property :: Monad m => OrgParser m (PropertyKey, PropertyValue) + property = try $ (,) <$> key <*> value + + key :: Monad m => OrgParser m PropertyKey + key = fmap toPropertyKey . try $ + skipSpaces *> char ':' *> many1Till nonspaceChar (char ':') + + value :: Monad m => OrgParser m PropertyValue + value = fmap toPropertyValue . try $ + skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline) + + endOfDrawer :: Monad m => OrgParser m String + endOfDrawer = try $ + skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs index 1d6fdd7e1..11f0972d5 100644 --- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs +++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,8 +17,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2016 Albert Krewinkel + Module : Text.Pandoc.Readers.Org.ExportSettings + Copyright : © 2016–2017 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -71,7 +71,7 @@ exportSetting = choice , ignoredSetting "pri" , ignoredSetting "prop" , ignoredSetting "stat" - , ignoredSetting "tags" + , booleanSetting "tags" (\val es -> es { exportWithTags = val }) , ignoredSetting "tasks" , ignoredSetting "tex" , ignoredSetting "timestamp" diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 64ffb8ef5..66273e05d 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,8 +18,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel + Module : Text.Pandoc.Readers.Org.Inlines + Copyright : Copyright (C) 2014-2017 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -48,7 +48,7 @@ import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) import Text.TeXMath (DisplayType (..), readTeX, writePandoc) import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap -import Control.Monad (guard, mplus, mzero, void, when) +import Control.Monad (guard, mplus, mzero, unless, void, when) import Control.Monad.Trans (lift) import Data.Char (isAlphaNum, isSpace) import Data.List (intersperse) @@ -63,7 +63,7 @@ import Prelude hiding (sequence) -- recordAnchorId :: PandocMonad m => String -> OrgParser m () recordAnchorId i = updateState $ \s -> - s{ orgStateAnchorIds = i : (orgStateAnchorIds s) } + s{ orgStateAnchorIds = i : orgStateAnchorIds s } pushToInlineCharStack :: PandocMonad m => Char -> OrgParser m () pushToInlineCharStack c = updateState $ \s -> @@ -120,6 +120,7 @@ inline = , superscript , inlineLaTeX , exportSnippet + , macro , smart , symbol ] <* (guard =<< newlinesCountWithinLimits) @@ -183,7 +184,7 @@ cite = try $ berkeleyCite <|> do , orgRefCite , berkeleyTextualCite ] - return $ (flip B.cite (B.text raw)) <$> cs + return $ flip B.cite (B.text raw) <$> cs -- | A citation in Pandoc Org-mode style (@[prefix \@citekey suffix]@). pandocOrgCite :: PandocMonad m => OrgParser m (F [Citation]) @@ -208,7 +209,7 @@ normalOrgRefCite = try $ do orgRefCiteList :: PandocMonad m => CitationMode -> OrgParser m (F Citation) orgRefCiteList citeMode = try $ do key <- orgRefCiteKey - returnF $ Citation + returnF Citation { citationId = key , citationPrefix = mempty , citationSuffix = mempty @@ -231,11 +232,11 @@ berkeleyCite = try $ do return $ if parens then toCite - . maybe id (\p -> alterFirst (prependPrefix p)) prefix - . maybe id (\s -> alterLast (appendSuffix s)) suffix + . maybe id (alterFirst . prependPrefix) prefix + . maybe id (alterLast . appendSuffix) suffix $ citationList else maybe mempty (<> " ") prefix - <> (toListOfCites $ map toInTextMode citationList) + <> toListOfCites (map toInTextMode citationList) <> maybe mempty (", " <>) suffix where toCite :: [Citation] -> Inlines @@ -249,7 +250,7 @@ berkeleyCite = try $ do alterFirst, alterLast :: (a -> a) -> [a] -> [a] alterFirst _ [] = [] - alterFirst f (c:cs) = (f c):cs + alterFirst f (c:cs) = f c : cs alterLast f = reverse . alterFirst f . reverse prependPrefix, appendSuffix :: Inlines -> Citation -> Citation @@ -270,7 +271,7 @@ berkeleyCitationList = try $ do skipSpaces commonPrefix <- optionMaybe (try $ citationListPart <* char ';') citations <- citeList - commonSuffix <- optionMaybe (try $ citationListPart) + commonSuffix <- optionMaybe (try citationListPart) char ']' return (BerkeleyCitationList parens <$> sequence commonPrefix @@ -338,8 +339,15 @@ linkLikeOrgRefCite = try $ do -- | Read a citation key. The characters allowed in citation keys are taken -- from the `org-ref-cite-re` variable in `org-ref.el`. orgRefCiteKey :: PandocMonad m => OrgParser m String -orgRefCiteKey = try . many1 . satisfy $ \c -> - isAlphaNum c || c `elem` ("-_:\\./"::String) +orgRefCiteKey = + let citeKeySpecialChars = "-_:\\./," :: String + isCiteKeySpecialChar c = c `elem` citeKeySpecialChars + isCiteKeyChar c = isAlphaNum c || isCiteKeySpecialChar c + endOfCitation = try $ do + many $ satisfy isCiteKeySpecialChar + satisfy $ not . isCiteKeyChar + in try $ satisfy isCiteKeyChar `many1Till` lookAhead endOfCitation + -- | Supported citation types. Only a small subset of org-ref types is -- supported for now. TODO: rewrite this, use LaTeX reader as template. @@ -365,15 +373,16 @@ citation = try $ do return $ do x <- pref y <- suff - return $ Citation{ citationId = key - , citationPrefix = B.toList x - , citationSuffix = B.toList y - , citationMode = if suppress_author - then SuppressAuthor - else NormalCitation - , citationNoteNum = 0 - , citationHash = 0 - } + return Citation + { citationId = key + , citationPrefix = B.toList x + , citationSuffix = B.toList y + , citationMode = if suppress_author + then SuppressAuthor + else NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } where prefix = trimInlinesF . mconcat <$> manyTill inline (char ']' <|> (']' <$ lookAhead citeKey)) @@ -395,7 +404,7 @@ inlineNote = try $ do ref <- many alphaNum char ':' note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']') - when (not $ null ref) $ + unless (null ref) $ addToNotesTable ("fn:" ++ ref, note) return $ B.note <$> note @@ -405,7 +414,7 @@ referencedNote = try $ do return $ do notes <- asksF orgStateNotes' case lookup ref notes of - Nothing -> return $ B.str $ "[" ++ ref ++ "]" + Nothing -> return . B.str $ "[" ++ ref ++ "]" Just contents -> do st <- askF let contents' = runF contents st{ orgStateNotes' = [] } @@ -429,7 +438,7 @@ explicitOrImageLink = try $ do src <- srcF case cleanLinkString title of Just imgSrc | isImageFilename imgSrc -> - pure $ B.link src "" $ B.image imgSrc mempty mempty + pure . B.link src "" $ B.image imgSrc mempty mempty _ -> linkToInlinesF src =<< title' @@ -686,13 +695,13 @@ mathEnd c = try $ do return res -enclosedInlines :: PandocMonad m => OrgParser m a +enclosedInlines :: (PandocMonad m, Show b) => OrgParser m a -> OrgParser m b -> OrgParser m (F Inlines) enclosedInlines start end = try $ trimInlinesF . mconcat <$> enclosed start end inline -enclosedRaw :: PandocMonad m => OrgParser m a +enclosedRaw :: (PandocMonad m, Show b) => OrgParser m a -> OrgParser m b -> OrgParser m String enclosedRaw start end = try $ @@ -771,7 +780,7 @@ notAfterForbiddenBorderChar = do -- | Read a sub- or superscript expression subOrSuperExpr :: PandocMonad m => OrgParser m (F Inlines) subOrSuperExpr = try $ - choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r") + choice [ charsInBalanced '{' '}' (noneOf "\n\r") , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r") , simpleSubOrSuperString ] >>= parseFromString (mconcat <$> many inline) @@ -809,7 +818,7 @@ inlineLaTeX = try $ do enableExtension Ext_raw_tex (readerExtensions def) } } texMathToPandoc :: String -> Maybe [Inline] - texMathToPandoc cs = (maybeRight $ readTeX cs) >>= writePandoc DisplayInline + texMathToPandoc cs = maybeRight (readTeX cs) >>= writePandoc DisplayInline maybeRight :: Either a b -> Maybe b maybeRight = either (const Nothing) Just @@ -839,26 +848,49 @@ exportSnippet = try $ do snippet <- manyTill anyChar (try $ string "@@") returnF $ B.rawInline format snippet +macro :: PandocMonad m => OrgParser m (F Inlines) +macro = try $ do + recursionDepth <- orgStateMacroDepth <$> getState + guard $ recursionDepth < 15 + string "{{{" + name <- many alphaNum + args <- ([] <$ string "}}}") + <|> char '(' *> argument `sepBy` char ',' <* eoa + expander <- lookupMacro name <$> getState + case expander of + Nothing -> mzero + Just fn -> do + updateState $ \s -> s { orgStateMacroDepth = recursionDepth + 1 } + res <- parseFromString (mconcat <$> many inline) $ fn args + updateState $ \s -> s { orgStateMacroDepth = recursionDepth } + return res + where + argument = many $ notFollowedBy eoa *> noneOf "," + eoa = string ")}}}" + smart :: PandocMonad m => OrgParser m (F Inlines) -smart = do - guardEnabled Ext_smart - doubleQuoted <|> singleQuoted <|> - choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses]) +smart = choice [doubleQuoted, singleQuoted, orgApostrophe, orgDash, orgEllipses] where orgDash = do - guard =<< getExportSetting exportSpecialStrings - dash <* updatePositions '-' + guardOrSmartEnabled =<< getExportSetting exportSpecialStrings + pure <$> dash <* updatePositions '-' orgEllipses = do - guard =<< getExportSetting exportSpecialStrings - ellipses <* updatePositions '.' - orgApostrophe = - (char '\'' <|> char '\8217') <* updateLastPreCharPos - <* updateLastForbiddenCharPos - *> return (B.str "\x2019") + guardOrSmartEnabled =<< getExportSetting exportSpecialStrings + pure <$> ellipses <* updatePositions '.' + orgApostrophe = do + guardEnabled Ext_smart + (char '\'' <|> char '\8217') <* updateLastPreCharPos + <* updateLastForbiddenCharPos + returnF (B.str "\x2019") + +guardOrSmartEnabled :: PandocMonad m => Bool -> OrgParser m () +guardOrSmartEnabled b = do + smartExtension <- extensionEnabled Ext_smart <$> getOption readerExtensions + guard (b || smartExtension) singleQuoted :: PandocMonad m => OrgParser m (F Inlines) singleQuoted = try $ do - guard =<< getExportSetting exportSmartQuotes + guardOrSmartEnabled =<< getExportSetting exportSmartQuotes singleQuoteStart updatePositions '\'' withQuoteContext InSingleQuote $ @@ -870,10 +902,13 @@ singleQuoted = try $ do -- in the same paragraph. doubleQuoted :: PandocMonad m => OrgParser m (F Inlines) doubleQuoted = try $ do - guard =<< getExportSetting exportSmartQuotes + guardOrSmartEnabled =<< getExportSetting exportSmartQuotes doubleQuoteStart updatePositions '"' contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) - (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return - (fmap B.doubleQuoted . trimInlinesF $ contents)) - <|> (return $ return (B.str "\8220") <> contents) + let doubleQuotedContent = withQuoteContext InDoubleQuote $ do + doubleQuoteEnd + updateLastForbiddenCharPos + return . fmap B.doubleQuoted . trimInlinesF $ contents + let leftQuoteAndContent = return $ pure (B.str "\8220") <> contents + doubleQuotedContent <|> leftQuoteAndContent diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index 7938fc6c6..d22902eae 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -44,7 +44,7 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition -import Control.Monad (mzero, void) +import Control.Monad (mzero, void, when) import Data.Char (toLower) import Data.List (intersperse) import qualified Data.Map as M @@ -75,14 +75,16 @@ declarationLine :: PandocMonad m => OrgParser m () declarationLine = try $ do key <- map toLower <$> metaKey (key', value) <- metaValue key - updateState $ \st -> st { orgStateMeta = B.setMeta key' <$> value <*> orgStateMeta st } + let addMetaValue st = + st { orgStateMeta = B.setMeta key' <$> value <*> orgStateMeta st } + when (key' /= "results") $ updateState addMetaValue metaKey :: Monad m => OrgParser m String metaKey = map toLower <$> many1 (noneOf ": \n\r") <* char ':' <* skipSpaces -metaValue :: PandocMonad m => String -> OrgParser m (String, (F MetaValue)) +metaValue :: PandocMonad m => String -> OrgParser m (String, F MetaValue) metaValue key = let inclKey = "header-includes" in case key of @@ -109,7 +111,7 @@ metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline metaInlinesCommaSeparated :: PandocMonad m => OrgParser m (F MetaValue) metaInlinesCommaSeparated = do - itemStrs <- (many1 (noneOf ",\n")) `sepBy1` (char ',') + itemStrs <- many1 (noneOf ",\n") `sepBy1` char ',' newline items <- mapM (parseFromString inlinesTillNewline . (++ "\n")) itemStrs let toMetaInlines = MetaInlines . B.toList @@ -151,6 +153,7 @@ optionLine = try $ do "todo" -> todoSequence >>= updateState . registerTodoSequence "seq_todo" -> todoSequence >>= updateState . registerTodoSequence "typ_todo" -> todoSequence >>= updateState . registerTodoSequence + "macro" -> macroDefinition >>= updateState . registerMacro _ -> mzero addLinkFormat :: Monad m => String @@ -160,7 +163,7 @@ addLinkFormat key formatter = updateState $ \s -> let fs = orgStateLinkFormatters s in s{ orgStateLinkFormatters = M.insert key formatter fs } -parseLinkFormat :: Monad m => OrgParser m ((String, String -> String)) +parseLinkFormat :: Monad m => OrgParser m (String, String -> String) parseLinkFormat = try $ do linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces linkSubst <- parseFormat @@ -169,8 +172,7 @@ parseLinkFormat = try $ do -- | An ad-hoc, single-argument-only implementation of a printf-style format -- parser. parseFormat :: Monad m => OrgParser m (String -> String) -parseFormat = try $ do - replacePlain <|> replaceUrl <|> justAppend +parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend where -- inefficient, but who cares replacePlain = try $ (\x -> concat . flip intersperse x) @@ -218,3 +220,27 @@ todoSequence = try $ do let todoMarkers = map (TodoMarker Todo) todo doneMarkers = map (TodoMarker Done) done in todoMarkers ++ doneMarkers + +macroDefinition :: Monad m => OrgParser m (String, [String] -> String) +macroDefinition = try $ do + macroName <- many1 nonspaceChar <* skipSpaces + firstPart <- expansionPart + (elemOrder, parts) <- unzip <$> many ((,) <$> placeholder <*> expansionPart) + let expander = mconcat . alternate (firstPart:parts) . reorder elemOrder + return (macroName, expander) + where + placeholder :: Monad m => OrgParser m Int + placeholder = try . fmap read $ char '$' *> many1 digit + + expansionPart :: Monad m => OrgParser m String + expansionPart = try $ many (notFollowedBy placeholder *> noneOf "\n\r") + + alternate :: [a] -> [a] -> [a] + alternate [] ys = ys + alternate xs [] = xs + alternate (x:xs) (y:ys) = x : y : alternate xs ys + + reorder :: [Int] -> [String] -> [String] + reorder perm xs = + let element n = take 1 $ drop (n - 1) xs + in concatMap element perm diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index bdd1dc951..92f868516 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -1,8 +1,7 @@ -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,8 +19,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel + Module : Text.Pandoc.Readers.Org.ParserState + Copyright : Copyright (C) 2014-2017 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -39,6 +38,9 @@ module Text.Pandoc.Readers.Org.ParserState , TodoState (..) , activeTodoMarkers , registerTodoSequence + , MacroExpander + , lookupMacro + , registerMacro , F , askF , asksF @@ -58,14 +60,14 @@ import qualified Data.Set as Set import Text.Pandoc.Builder (Blocks, Inlines) import Text.Pandoc.Definition (Meta (..), nullMeta) -import Text.Pandoc.Options (ReaderOptions (..)) import Text.Pandoc.Logging -import Text.Pandoc.Parsing (HasHeaderMap (..), HasIdentifierList (..), - HasLogMessages (..), - HasLastStrPosition (..), HasQuoteContext (..), +import Text.Pandoc.Options (ReaderOptions (..)) +import Text.Pandoc.Parsing (Future, HasHeaderMap (..), HasIdentifierList (..), + HasIncludeFiles (..), HasLastStrPosition (..), + HasLogMessages (..), HasQuoteContext (..), HasReaderOptions (..), ParserContext (..), - QuoteContext (..), SourcePos, Future, - askF, asksF, returnF, runF, trimInlinesF) + QuoteContext (..), SourcePos, askF, asksF, returnF, + runF, trimInlinesF) -- | This is used to delay evaluation until all relevant information has been -- parsed and made available in the parser state. @@ -78,6 +80,8 @@ type OrgNoteTable = [OrgNoteRecord] -- | Map of functions for link transformations. The map key is refers to the -- link-type, the corresponding function transforms the given link string. type OrgLinkFormatters = M.Map String (String -> String) +-- | Macro expander function +type MacroExpander = [String] -> String -- | The states in which a todo item can be data TodoState = Todo | Done @@ -101,10 +105,13 @@ data OrgParserState = OrgParserState , orgStateExportSettings :: ExportSettings , orgStateHeaderMap :: M.Map Inlines String , orgStateIdentifiers :: Set.Set String + , orgStateIncludeFiles :: [String] , orgStateLastForbiddenCharPos :: Maybe SourcePos , orgStateLastPreCharPos :: Maybe SourcePos , orgStateLastStrPos :: Maybe SourcePos , orgStateLinkFormatters :: OrgLinkFormatters + , orgStateMacros :: M.Map String MacroExpander + , orgStateMacroDepth :: Int , orgStateMeta :: F Meta , orgStateNotes' :: OrgNoteTable , orgStateOptions :: ReaderOptions @@ -141,6 +148,12 @@ instance HasLogMessages OrgParserState where addLogMessage msg st = st{ orgLogMessages = msg : orgLogMessages st } getLogMessages st = reverse $ orgLogMessages st +instance HasIncludeFiles OrgParserState where + getIncludeFiles = orgStateIncludeFiles + addIncludeFile f st = st { orgStateIncludeFiles = f : orgStateIncludeFiles st } + dropLatestIncludeFile st = + st { orgStateIncludeFiles = drop 1 $ orgStateIncludeFiles st } + instance Default OrgParserState where def = defaultOrgParserState @@ -152,10 +165,13 @@ defaultOrgParserState = OrgParserState , orgStateExportSettings = def , orgStateHeaderMap = M.empty , orgStateIdentifiers = Set.empty + , orgStateIncludeFiles = [] , orgStateLastForbiddenCharPos = Nothing , orgStateLastPreCharPos = Nothing , orgStateLastStrPos = Nothing , orgStateLinkFormatters = M.empty + , orgStateMacros = M.empty + , orgStateMacroDepth = 0 , orgStateMeta = return nullMeta , orgStateNotes' = [] , orgStateOptions = def @@ -185,6 +201,15 @@ activeTodoSequences st = activeTodoMarkers :: OrgParserState -> TodoSequence activeTodoMarkers = concat . activeTodoSequences +lookupMacro :: String -> OrgParserState -> Maybe MacroExpander +lookupMacro macroName = M.lookup macroName . orgStateMacros + +registerMacro :: (String, MacroExpander) -> OrgParserState -> OrgParserState +registerMacro (name, expander) st = + let curMacros = orgStateMacros st + in st{ orgStateMacros = M.insert name expander curMacros } + + -- -- Export Settings @@ -213,6 +238,7 @@ data ExportSettings = ExportSettings , exportWithAuthor :: Bool -- ^ Include author in final meta-data , exportWithCreator :: Bool -- ^ Include creator in final meta-data , exportWithEmail :: Bool -- ^ Include email in final meta-data + , exportWithTags :: Bool -- ^ Keep tags as part of headlines , exportWithTodoKeywords :: Bool -- ^ Keep TODO keywords in headers } @@ -225,11 +251,12 @@ defaultExportSettings = ExportSettings , exportDrawers = Left ["LOGBOOK"] , exportEmphasizedText = True , exportHeadlineLevels = 3 - , exportSmartQuotes = True + , exportSmartQuotes = False , exportSpecialStrings = True , exportSubSuperscripts = True , exportWithAuthor = True , exportWithCreator = True , exportWithEmail = True + , exportWithTags = True , exportWithTodoKeywords = True } diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index 464ef9ca6..3273c92e4 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,8 +17,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel + Module : Text.Pandoc.Readers.Org.Parsing + Copyright : Copyright (C) 2014-2017 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -31,6 +31,8 @@ functions are adapted to Org-mode specific functionality. module Text.Pandoc.Readers.Org.Parsing ( OrgParser , anyLine + , anyLineNewline + , indentWith , blanklines , newline , parseFromString @@ -70,6 +72,8 @@ module Text.Pandoc.Readers.Org.Parsing , dash , ellipses , citeKey + , gridTableWith + , insertIncludedFileF -- * Re-exports from Text.Pandoc.Parsec , runParser , runParserT diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs index f89ce6732..952082ec1 100644 --- a/src/Text/Pandoc/Readers/Org/Shared.hs +++ b/src/Text/Pandoc/Readers/Org/Shared.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,8 +18,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel + Module : Text.Pandoc.Readers.Org.Shared + Copyright : Copyright (C) 2014-2017 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -56,7 +56,7 @@ cleanLinkString s = '.':'/':_ -> Just s -- relative path '.':'.':'/':_ -> Just s -- relative path -- Relative path or URL (file schema) - 'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s' + 'f':'i':'l':'e':':':s' -> Just $ if "//" `isPrefixOf` s' then s else s' _ | isUrl s -> Just s -- URL _ -> Nothing where diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 7564998ff..fb5f6f2d4 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.RST - Copyright : Copyright (C) 2006-2015 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -32,10 +32,11 @@ Conversion from reStructuredText to 'Pandoc' document. -} module Text.Pandoc.Readers.RST ( readRST ) where import Control.Monad (guard, liftM, mzero, when) +import Control.Monad.Identity (Identity(..)) import Control.Monad.Except (throwError) import Data.Char (isHexDigit, isSpace, toLower, toUpper) -import Data.List (deleteFirstsBy, findIndex, intercalate, isInfixOf, isSuffixOf, - nub, sort, transpose, union) +import Data.List (deleteFirstsBy, findIndex, intercalate, isInfixOf, + isSuffixOf, nub, sort, transpose, union) import qualified Data.Map as M import Data.Maybe (fromMaybe, isJust) import Data.Monoid ((<>)) @@ -52,20 +53,22 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Shared import Text.Printf (printf) +import Data.Text (Text) +import qualified Data.Text as T -- TODO: -- [ ] .. parsed-literal -- [ ] :widths: attribute in .. table -- [ ] .. csv-table --- [ ] .. list-table -- | Parse reStructuredText string and return Pandoc document. readRST :: PandocMonad m => ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Text -- ^ String to parse (assuming @'\n'@ line endings) -> m Pandoc readRST opts s = do - parsed <- (readWithM parseRST) def{ stateOptions = opts } (s ++ "\n\n") + parsed <- (readWithM parseRST) def{ stateOptions = opts } + (T.unpack s ++ "\n\n") case parsed of Right result -> return result Left e -> throwError e @@ -131,7 +134,10 @@ metaFromDefList ds meta = adjustAuthors $ foldr f meta ds adjustAuthors (Meta metamap) = Meta $ M.adjust splitAuthors "author" $ M.adjust toPlain "date" $ M.adjust toPlain "title" - $ M.mapKeys (\k -> if k == "authors" then "author" else k) + $ M.mapKeys (\k -> + if k == "authors" + then "author" + else k) $ metamap toPlain (MetaBlocks [Para xs]) = MetaInlines xs toPlain x = x @@ -193,7 +199,7 @@ parseRST = do parseCitation :: PandocMonad m => (String, String) -> RSTParser m (Inlines, [Blocks]) parseCitation (ref, raw) = do - contents <- parseFromString parseBlocks raw + contents <- parseFromString' parseBlocks raw return $ (B.spanWith (ref, ["citation-label"], []) (B.str ref), [contents]) @@ -243,7 +249,7 @@ fieldListItem :: PandocMonad m => Int -> RSTParser m (Inlines, [Blocks]) fieldListItem minIndent = try $ do (name, raw) <- rawFieldListItem minIndent term <- parseInlineFromString name - contents <- parseFromString parseBlocks raw + contents <- parseFromString' parseBlocks raw optional blanklines return (term, [contents]) @@ -442,7 +448,7 @@ blockQuote :: PandocMonad m => RSTParser m Blocks blockQuote = do raw <- indentedBlock -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ raw ++ "\n\n" + contents <- parseFromString' parseBlocks $ raw ++ "\n\n" return $ B.blockQuote contents {- @@ -530,7 +536,7 @@ definitionListItem = try $ do term <- trimInlines . mconcat <$> many1Till inline endline raw <- indentedBlock -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ raw ++ "\n" + contents <- parseFromString' parseBlocks $ raw ++ "\n" return (term, [contents]) definitionList :: PandocMonad m => RSTParser m Blocks @@ -558,26 +564,16 @@ listLine :: Monad m => Int -> RSTParser m [Char] listLine markerLength = try $ do notFollowedBy blankline indentWith markerLength - line <- anyLine - return $ line ++ "\n" - --- indent by specified number of spaces (or equiv. tabs) -indentWith :: Monad m => Int -> RSTParser m [Char] -indentWith num = do - tabStop <- getOption readerTabStop - if (num < tabStop) - then count num (char ' ') - else choice [ try (count num (char ' ')), - (try (char '\t' >> count (num - tabStop) (char ' '))) ] + anyLineNewline -- parse raw text for one list item, excluding start marker and continuations rawListItem :: Monad m => RSTParser m Int -> RSTParser m (Int, [Char]) rawListItem start = try $ do markerLength <- start - firstLine <- anyLine + firstLine <- anyLineNewline restLines <- many (listLine markerLength) - return (markerLength, (firstLine ++ "\n" ++ (concat restLines))) + return (markerLength, firstLine ++ concat restLines) -- continuation of a list item - indented and separated by blankline or -- (in compact lists) endline. @@ -602,13 +598,17 @@ listItem start = try $ do let oldContext = stateParserContext state setState $ state {stateParserContext = ListItemState} -- parse the extracted block, which may itself contain block elements - parsed <- parseFromString parseBlocks $ concat (first:rest) ++ "\n" + parsed <- parseFromString' parseBlocks $ concat (first:rest) ++ "\n" updateState (\st -> st {stateParserContext = oldContext}) return $ case B.toList parsed of - [Para xs] -> B.singleton $ Plain xs - [Para xs, BulletList ys] -> B.fromList [Plain xs, BulletList ys] - [Para xs, OrderedList s ys] -> B.fromList [Plain xs, OrderedList s ys] - [Para xs, DefinitionList ys] -> B.fromList [Plain xs, DefinitionList ys] + [Para xs] -> + B.singleton $ Plain xs + [Para xs, BulletList ys] -> + B.fromList [Plain xs, BulletList ys] + [Para xs, OrderedList s ys] -> + B.fromList [Plain xs, OrderedList s ys] + [Para xs, DefinitionList ys] -> + B.fromList [Plain xs, DefinitionList ys] _ -> parsed orderedList :: PandocMonad m => RSTParser m Blocks @@ -685,22 +685,23 @@ directive' = do (lengthToDim . filter (not . isSpace)) case label of "table" -> tableDirective top fields body' + "list-table" -> listTableDirective top fields body' "line-block" -> lineBlockDirective body' "raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body) "role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields - "container" -> parseFromString parseBlocks body' + "container" -> parseFromString' parseBlocks body' "replace" -> B.para <$> -- consumed by substKey parseInlineFromString (trim top) "unicode" -> B.para <$> -- consumed by substKey parseInlineFromString (trim $ unicodeTransform top) - "compound" -> parseFromString parseBlocks body' - "pull-quote" -> B.blockQuote <$> parseFromString parseBlocks body' - "epigraph" -> B.blockQuote <$> parseFromString parseBlocks body' - "highlights" -> B.blockQuote <$> parseFromString parseBlocks body' + "compound" -> parseFromString' parseBlocks body' + "pull-quote" -> B.blockQuote <$> parseFromString' parseBlocks body' + "epigraph" -> B.blockQuote <$> parseFromString' parseBlocks body' + "highlights" -> B.blockQuote <$> parseFromString' parseBlocks body' "rubric" -> B.para . B.strong <$> parseInlineFromString top _ | label `elem` ["attention","caution","danger","error","hint", "important","note","tip","warning","admonition"] -> - do bod <- parseFromString parseBlocks $ top ++ "\n\n" ++ body' + do bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body' let lab = case label of "admonition" -> mempty (l:ls) -> B.divWith ("",["admonition-title"],[]) @@ -713,11 +714,11 @@ directive' = do (trim top ++ if null subtit then "" else (": " ++ subtit)) - bod <- parseFromString parseBlocks body' + bod <- parseFromString' parseBlocks body' return $ B.divWith ("",["sidebar"],[]) $ tit <> bod "topic" -> do tit <- B.para . B.strong <$> parseInlineFromString top - bod <- parseFromString parseBlocks body' + bod <- parseFromString' parseBlocks body' return $ B.divWith ("",["topic"],[]) $ tit <> bod "default-role" -> mempty <$ updateState (\s -> s { stateRstDefaultRole = @@ -733,9 +734,10 @@ directive' = do "math" -> return $ B.para $ mconcat $ map B.displayMath $ toChunks $ top ++ "\n\n" ++ body "figure" -> do - (caption, legend) <- parseFromString extractCaption body' + (caption, legend) <- parseFromString' extractCaption body' let src = escapeURI $ trim top - return $ B.para (B.imageWith (imgAttr "figclass") src "fig:" caption) <> legend + return $ B.para (B.imageWith (imgAttr "figclass") src "fig:" + caption) <> legend "image" -> do let src = escapeURI $ trim top let alt = B.str $ maybe "image" trim $ lookup "alt" fields @@ -746,38 +748,74 @@ directive' = do $ B.imageWith attr src "" alt Nothing -> B.imageWith attr src "" alt "class" -> do - let attrs = ("", (splitBy isSpace $ trim top), map (\(k,v) -> (k, trimr v)) fields) + let attrs = ("", (splitBy isSpace $ trim top), + map (\(k,v) -> (k, trimr v)) fields) -- directive content or the first immediately following element children <- case body of "" -> block - _ -> parseFromString parseBlocks body' + _ -> parseFromString' parseBlocks body' return $ B.divWith attrs children other -> do pos <- getPosition logMessage $ SkippedContent (".. " ++ other) pos - bod <- parseFromString parseBlocks $ top ++ "\n\n" ++ body' + bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body' return $ B.divWith ("",[other],[]) bod tableDirective :: PandocMonad m => String -> [(String, String)] -> String -> RSTParser m Blocks tableDirective top _fields body = do - bs <- parseFromString parseBlocks body + bs <- parseFromString' parseBlocks body case B.toList bs of [Table _ aligns' widths' header' rows'] -> do - title <- parseFromString (trimInlines . mconcat <$> many inline) top + title <- parseFromString' (trimInlines . mconcat <$> many inline) top -- TODO widths -- align is not applicable since we can't represent whole table align return $ B.singleton $ Table (B.toList title) aligns' widths' header' rows' _ -> return mempty + +-- TODO: :stub-columns:. +-- Only the first row becomes the header even if header-rows: > 1, +-- since Pandoc doesn't support a table with multiple header rows. +-- We don't need to parse :align: as it represents the whole table align. +listTableDirective :: PandocMonad m + => String -> [(String, String)] -> String + -> RSTParser m Blocks +listTableDirective top fields body = do + bs <- parseFromString' parseBlocks body + title <- parseFromString' (trimInlines . mconcat <$> many inline) top + let rows = takeRows $ B.toList bs + headerRowsNum = fromMaybe (0 :: Int) $ + lookup "header-rows" fields >>= safeRead + (headerRow,bodyRows,numOfCols) = case rows of + x:xs -> if headerRowsNum > 0 + then (x, xs, length x) + else ([], rows, length x) + _ -> ([],[],0) + widths = case trim <$> lookup "widths" fields of + Just "auto" -> replicate numOfCols 0 + Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) $ + splitBy (`elem` (" ," :: String)) specs + _ -> replicate numOfCols 0 + return $ B.table title + (zip (replicate numOfCols AlignDefault) widths) + headerRow + bodyRows + where takeRows [BulletList rows] = map takeCells rows + takeRows _ = [] + takeCells [BulletList cells] = map B.fromList cells + takeCells _ = [] + normWidths ws = map (/ max 1 (sum ws)) ws + -- TODO: -- - Only supports :format: fields with a single format for :raw: roles, -- change Text.Pandoc.Definition.Format to fix -addNewRole :: PandocMonad m => String -> [(String, String)] -> RSTParser m Blocks +addNewRole :: PandocMonad m + => String -> [(String, String)] -> RSTParser m Blocks addNewRole roleString fields = do pos <- getPosition - (role, parentRole) <- parseFromString inheritedRole roleString + (role, parentRole) <- parseFromString' inheritedRole roleString customRoles <- stateRstCustomRoles <$> getState let getBaseRole (r, f, a) roles = case M.lookup r roles of @@ -804,7 +842,8 @@ addNewRole roleString fields = do SkippedContent ":format: [because parent of role is not :raw:]" pos _ -> logMessage $ SkippedContent (":" ++ key ++ ":") pos when (parentRole == "raw" && countKeys "format" > 1) $ - logMessage $ SkippedContent ":format: [after first in definition of role]" + logMessage $ SkippedContent + ":format: [after first in definition of role]" pos when (parentRole == "code" && countKeys "language" > 1) $ logMessage $ SkippedContent @@ -819,7 +858,8 @@ addNewRole roleString fields = do where countKeys k = length . filter (== k) . map fst $ fields inheritedRole = - (,) <$> roleName <*> ((char '(' *> roleName <* char ')') <|> pure "span") + (,) <$> roleName <*> ((char '(' *> roleName <* char ')') + <|> pure "span") -- Can contain character codes as decimal numbers or @@ -996,7 +1036,8 @@ substKey = try $ do [Para ils] -> return $ B.fromList ils _ -> mzero let key = toKey $ stripFirstAndLast ref - updateState $ \s -> s{ stateSubstitutions = M.insert key il $ stateSubstitutions s } + updateState $ \s -> s{ stateSubstitutions = + M.insert key il $ stateSubstitutions s } anonymousKey :: Monad m => RSTParser m () anonymousKey = try $ do @@ -1005,7 +1046,8 @@ anonymousKey = try $ do pos <- getPosition let key = toKey $ "_" ++ printf "%09d" (sourceLine pos) --TODO: parse width, height, class and name attributes - updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } + updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ + stateKeys s } stripTicks :: String -> String stripTicks = reverse . stripTick . reverse . stripTick @@ -1020,7 +1062,8 @@ regularKey = try $ do src <- targetURI let key = toKey $ stripTicks ref --TODO: parse width, height, class and name attributes - updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } + updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ + stateKeys s } headerBlock :: PandocMonad m => RSTParser m [Char] headerBlock = do @@ -1087,7 +1130,7 @@ simpleTableRow indices = do let cols = map unlines . transpose $ firstLine : conLines ++ [replicate (length indices) "" | not (null conLines)] - mapM (parseFromString parseBlocks) cols + mapM (parseFromString' parseBlocks) cols simpleTableSplitLine :: [Int] -> String -> [String] simpleTableSplitLine indices line = @@ -1110,7 +1153,7 @@ simpleTableHeader headless = try $ do let rawHeads = if headless then replicate (length dashes) "" else simpleTableSplitLine indices rawContent - heads <- mapM (parseFromString (mconcat <$> many plain)) $ + heads <- mapM (parseFromString' (mconcat <$> many plain)) $ map trim rawHeads return (heads, aligns, indices) @@ -1119,8 +1162,12 @@ simpleTable :: PandocMonad m => Bool -- ^ Headerless table -> RSTParser m Blocks simpleTable headless = do - tbl <- tableWith (simpleTableHeader headless) simpleTableRow - sep simpleTableFooter + let wrapIdFst (a, b, c) = (Identity a, b, c) + wrapId = fmap Identity + tbl <- runIdentity <$> tableWith + (wrapIdFst <$> simpleTableHeader headless) + (wrapId <$> simpleTableRow) + sep simpleTableFooter -- Simple tables get 0s for relative column widths (i.e., use default) case B.toList tbl of [Table c a _w h l] -> return $ B.singleton $ @@ -1134,7 +1181,8 @@ simpleTable headless = do gridTable :: PandocMonad m => Bool -- ^ Headerless table -> RSTParser m Blocks -gridTable headerless = gridTableWith parseBlocks headerless +gridTable headerless = runIdentity <$> + gridTableWith (Identity <$> parseBlocks) headerless table :: PandocMonad m => RSTParser m Blocks table = gridTable False <|> simpleTable False <|> @@ -1161,7 +1209,7 @@ inline = choice [ note -- can start with whitespace, so try before ws , symbol ] <?> "inline" parseInlineFromString :: PandocMonad m => String -> RSTParser m Inlines -parseInlineFromString = parseFromString (trimInlines . mconcat <$> many inline) +parseInlineFromString = parseFromString' (trimInlines . mconcat <$> many inline) hyphens :: Monad m => RSTParser m Inlines hyphens = do @@ -1220,7 +1268,8 @@ interpretedRole = try $ do (role, contents) <- roleBefore <|> roleAfter renderRole contents Nothing role nullAttr -renderRole :: PandocMonad m => String -> Maybe String -> String -> Attr -> RSTParser m Inlines +renderRole :: PandocMonad m + => String -> Maybe String -> String -> Attr -> RSTParser m Inlines renderRole contents fmt role attr = case role of "sup" -> return $ B.superscript $ B.str contents "superscript" -> return $ B.superscript $ B.str contents @@ -1353,7 +1402,8 @@ referenceLink = try $ do (k:_) -> return k ((src,tit), attr) <- lookupKey [] key -- if anonymous link, remove key so it won't be used again - when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable } + when (isAnonKey key) $ updateState $ \s -> + s{ stateKeys = M.delete key keyTable } return $ B.linkWith attr src tit label' -- We keep a list of oldkeys so we can detect lookup loops. @@ -1423,7 +1473,7 @@ note = try $ do -- Note references inside other notes are allowed in reST, but -- not yet in this implementation. updateState $ \st -> st{ stateNotes = [] } - contents <- parseFromString parseBlocks raw + contents <- parseFromString' parseBlocks raw let newnotes = if (ref == "*" || ref == "#") -- auto-numbered -- delete the note so the next auto-numbered note -- doesn't get the same contents: diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index ecb609ae9..9e544c4ac 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -49,14 +49,17 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed, macro, nested) import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag) import Text.Pandoc.XML (fromEntities) +import Data.Text (Text) +import qualified Data.Text as T -- | Read twiki from an input string and return a Pandoc document. readTWiki :: PandocMonad m => ReaderOptions - -> String + -> Text -> m Pandoc readTWiki opts s = do - res <- readWithM parseTWiki def{ stateOptions = opts } (s ++ "\n\n") + res <- readWithM parseTWiki def{ stateOptions = opts } + (T.unpack s ++ "\n\n") case res of Left e -> throwError e Right d -> return d @@ -106,7 +109,7 @@ parseHtmlContentWithAttrs tag parser = do parsedContent <- try $ parseContent content return (attr, parsedContent) where - parseContent = parseFromString $ nested $ manyTill parser endOfContent + parseContent = parseFromString' $ nested $ manyTill parser endOfContent endOfContent = try $ skipMany blankline >> skipSpaces >> eof parseHtmlContent :: PandocMonad m => String -> TWParser m a -> TWParser m [a] @@ -233,7 +236,7 @@ listItemLine prefix marker = lineContent >>= parseContent >>= return . mconcat filterSpaces = reverse . dropWhile (== ' ') . reverse listContinuation = notFollowedBy (string prefix >> marker) >> string " " >> lineContent - parseContent = parseFromString $ many1 $ nestedList <|> parseInline + parseContent = parseFromString' $ many1 $ nestedList <|> parseInline parseInline = many1Till inline (lastNewline <|> newlineBeforeNestedList) >>= return . B.plain . mconcat nestedList = list prefix @@ -297,7 +300,7 @@ noautolink = do setState $ st{ stateAllowLinks = True } return $ mconcat blocks where - parseContent = parseFromString $ many $ block + parseContent = parseFromString' $ many $ block para :: PandocMonad m => TWParser m B.Blocks para = many1Till inline endOfParaElement >>= return . result . mconcat @@ -349,13 +352,13 @@ linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline) where lastNewline = eof >> return mempty innerNewline = return B.space -between :: (Monoid c, PandocMonad m) +between :: (Monoid c, PandocMonad m, Show b) => TWParser m a -> TWParser m b -> (TWParser m b -> TWParser m c) -> TWParser m c between start end p = mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end) -enclosed :: (Monoid b, PandocMonad m) +enclosed :: (Monoid b, PandocMonad m, Show a) => TWParser m a -> (TWParser m a -> TWParser m b) -> TWParser m b enclosed sep p = between sep (try $ sep <* endMarker) p where @@ -525,4 +528,4 @@ linkText = do return (url, "", content) where linkContent = (char '[') >> many1Till anyChar (char ']') >>= parseLinkContent - parseLinkContent = parseFromString $ many1 inline + parseLinkContent = parseFromString' $ many1 inline diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 047aa061c..1669e3e51 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -1,6 +1,6 @@ {- -Copyright (C) 2010-2015 Paul Rivier <paul*rivier#demotera*com> | tr '*#' '.@' - and John MacFarlane +Copyright (C) 2010-2012 Paul Rivier <paul*rivier#demotera*com> | tr '*#' '.@' + 2010-2017 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Textile - Copyright : Copyright (C) 2010-2015 Paul Rivier and John MacFarlane + Copyright : Copyright (C) 2010-2012 Paul Rivier + 2010-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : Paul Rivier <paul*rivier#demotera*com> @@ -69,14 +70,17 @@ import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag) import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline) import Text.Pandoc.Shared (trim) +import Data.Text (Text) +import qualified Data.Text as T -- | Parse a Textile text and return a Pandoc document. readTextile :: PandocMonad m => ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Text -- ^ String to parse (assuming @'\n'@ line endings) -> m Pandoc readTextile opts s = do - parsed <- readWithM parseTextile def{ stateOptions = opts } (s ++ "\n\n") + parsed <- readWithM parseTextile def{ stateOptions = opts } + (T.unpack s ++ "\n\n") case parsed of Right result -> return result Left e -> throwError e @@ -314,7 +318,7 @@ definitionListItem = try $ do optional whitespace >> newline s <- many1Till anyChar (try (string "=:" >> newline)) -- this ++ "\n\n" does not look very good - ds <- parseFromString parseBlocks (s ++ "\n\n") + ds <- parseFromString' parseBlocks (s ++ "\n\n") return [ds] -- raw content @@ -366,7 +370,7 @@ tableCell = try $ do notFollowedBy blankline raw <- trim <$> many (noneOf "|\n" <|> try (char '\n' <* notFollowedBy blankline)) - content <- mconcat <$> parseFromString (many inline) raw + content <- mconcat <$> parseFromString' (many inline) raw return ((isHeader, alignment), B.plain content) -- | A table row is made of many table cells @@ -388,7 +392,7 @@ table = try $ do _ <- attributes char '.' rawcapt <- trim <$> anyLine - parseFromString (mconcat <$> many inline) rawcapt + parseFromString' (mconcat <$> many inline) rawcapt rawrows <- many1 $ (skipMany ignorableRow) >> tableRow skipMany ignorableRow blanklines @@ -506,7 +510,7 @@ note = try $ do notes <- stateNotes <$> getState case lookup ref notes of Nothing -> fail "note not found" - Just raw -> B.note <$> parseFromString parseBlocks raw + Just raw -> B.note <$> parseFromString' parseBlocks raw -- | Special chars markupChars :: [Char] @@ -585,8 +589,9 @@ link = try $ do char ':' let stop = if bracketed then char ']' - else lookAhead $ space <|> - try (oneOf "!.,;:" *> (space <|> newline)) + else lookAhead $ space <|> eof' <|> + try (oneOf "!.,;:" *> + (space <|> newline <|> eof')) url <- many1Till nonspaceChar stop let name' = if B.toList name == [Str "$"] then B.str url else name return $ if attr == nullAttr @@ -690,7 +695,7 @@ langAttr = do return $ \(id',classes,keyvals) -> (id',classes,("lang",lang):keyvals) -- | Parses material surrounded by a parser. -surrounded :: PandocMonad m +surrounded :: (PandocMonad m, Show t) => ParserT [Char] st m t -- ^ surrounding parser -> ParserT [Char] st m a -- ^ content parser (to be used repeatedly) -> ParserT [Char] st m [a] @@ -727,3 +732,5 @@ groupedInlineMarkup = try $ do singleton :: a -> [a] singleton x = [x] +eof' :: Monad m => ParserT [Char] s m Char +eof' = '\n' <$ eof diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index 33f785109..260bb7fff 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -42,11 +42,11 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (macro, space, spaces, uri) import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI) ---import Network.URI (isURI) -- Not sure whether to use this function import Control.Monad (guard, void, when) import Control.Monad.Reader (Reader, asks, runReader) import Data.Default - +import Data.Text (Text) +import qualified Data.Text as T import Control.Monad.Except (catchError, throwError) import Data.Time.Format (formatTime) import Text.Pandoc.Class (PandocMonad) @@ -91,11 +91,11 @@ getT2TMeta = do -- | Read Txt2Tags from an input string returning a Pandoc document readTxt2Tags :: PandocMonad m => ReaderOptions - -> String + -> Text -> m Pandoc readTxt2Tags opts s = do meta <- getT2TMeta - let parsed = flip runReader meta $ readWithM parseT2T (def {stateOptions = opts}) (s ++ "\n\n") + let parsed = flip runReader meta $ readWithM parseT2T (def {stateOptions = opts}) (T.unpack s ++ "\n\n") case parsed of Right result -> return $ result Left e -> throwError e @@ -213,7 +213,7 @@ quote :: T2T Blocks quote = try $ do lookAhead tab rawQuote <- many1 (tab *> optional spaces *> anyLine) - contents <- parseFromString parseBlocks (intercalate "\n" rawQuote ++ "\n\n") + contents <- parseFromString' parseBlocks (intercalate "\n" rawQuote ++ "\n\n") return $ B.blockQuote contents commentLine :: T2T Inlines @@ -265,7 +265,7 @@ listItem start end = try $ do firstLine <- anyLineNewline blank <- option "" ("\n" <$ blankline) rest <- concat <$> many (listContinuation markerLength) - parseFromString end $ firstLine ++ blank ++ rest + parseFromString' end $ firstLine ++ blank ++ rest -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. @@ -277,12 +277,6 @@ listContinuation markerLength = try $ <*> many blankline) where listLine = try $ indentWith markerLength *> anyLineNewline -anyLineNewline :: T2T String -anyLineNewline = (++ "\n") <$> anyLine - -indentWith :: Int -> T2T String -indentWith n = count n space - -- Table table :: T2T Blocks @@ -446,7 +440,7 @@ inlineMarkup p f c special = try $ do Just middle -> do lastChar <- anyChar end <- many1 (char c) - let parser inp = parseFromString (mconcat <$> many p) inp + let parser inp = parseFromString' (mconcat <$> many p) inp let start' = case drop 2 start of "" -> mempty xs -> special xs |