diff options
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/Creole.hs | 12 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/DocBook.hs | 48 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Lists.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/EPUB.hs | 6 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 129 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 34 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 18 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Muse.hs | 22 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/OPML.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Odt/ContentReader.hs | 1 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Odt/StyleReader.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 93 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Vimwiki.hs | 10 |
15 files changed, 200 insertions, 185 deletions
diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs index 4da259c0e..b4eb6eaef 100644 --- a/src/Text/Pandoc/Readers/Creole.hs +++ b/src/Text/Pandoc/Readers/Creole.hs @@ -27,7 +27,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA License : GNU GPL, version 2 or above Maintainer : Sascha Wilde <wilde@sha-bang.de> - Stability : WIP + Stability : alpha Portability : portable Conversion of creole text to 'Pandoc' document. @@ -64,7 +64,7 @@ readCreole opts s = do type CRLParser = ParserT [Char] ParserState -- --- Utility funcitons +-- Utility functions -- (<+>) :: (Monad m, Monoid a) => m a -> m a -> m a @@ -111,7 +111,8 @@ block = do return res nowiki :: PandocMonad m => CRLParser m B.Blocks -nowiki = try $ fmap (B.codeBlock . mconcat) (nowikiStart >> manyTill content nowikiEnd) +nowiki = try $ fmap (B.codeBlock . mconcat) (nowikiStart + >> manyTill content nowikiEnd) where content = brackets <|> line brackets = try $ option "" ((:[]) <$> newline) @@ -154,7 +155,8 @@ listItem :: PandocMonad m => Char -> Int -> CRLParser m B.Blocks listItem c n = fmap (B.plain . B.trimInlines .mconcat) (listStart >> many1Till inline itemEnd) where - listStart = try $ optional newline >> skipSpaces >> count n (char c) + listStart = try $ skipSpaces >> optional newline >> skipSpaces + >> count n (char c) >> lookAhead (noneOf [c]) >> skipSpaces itemEnd = endOfParaElement <|> nextItem n <|> if n < 3 then nextItem (n+1) @@ -193,7 +195,7 @@ endOfParaElement = lookAhead $ endOfInput <|> endOfPara startOf :: PandocMonad m => CRLParser m a -> CRLParser m () startOf p = try $ blankline >> p >> return mempty startOfList = startOf $ anyList 1 - startOfTable =startOf table + startOfTable = startOf table startOfHeader = startOf header startOfNowiki = startOf nowiki hr = startOf horizontalRule diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 0f3f6f6e3..728f77a05 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -1,22 +1,22 @@ +{-# LANGUAGE ExplicitForAll #-} module Text.Pandoc.Readers.DocBook ( readDocBook ) where -import Data.Char (toUpper, isSpace) -import Text.Pandoc.Shared (safeRead, crFilter) -import Text.Pandoc.Options -import Text.Pandoc.Definition -import Text.Pandoc.Builder -import Text.XML.Light -import Text.HTML.TagSoup.Entity (lookupEntity) +import Control.Monad.State.Strict +import Data.Char (isSpace, toUpper) +import Data.Default import Data.Either (rights) +import Data.Foldable (asum) import Data.Generics -import Control.Monad.State.Strict import Data.List (intersperse) import Data.Maybe (fromMaybe) -import Text.TeXMath (readMathML, writeTeX) -import Data.Default -import Data.Foldable (asum) -import Text.Pandoc.Class (PandocMonad) import Data.Text (Text) import qualified Data.Text as T +import Text.HTML.TagSoup.Entity (lookupEntity) +import Text.Pandoc.Builder +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Options +import Text.Pandoc.Shared (crFilter, safeRead) +import Text.TeXMath (readMathML, writeTeX) +import Text.XML.Light {- @@ -538,12 +538,12 @@ handleInstructions ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>': handleInstructions xs = case break (=='<') xs of (ys, []) -> ys ([], '<':zs) -> '<' : handleInstructions zs - (ys, zs) -> ys ++ handleInstructions zs + (ys, zs) -> ys ++ handleInstructions zs getFigure :: PandocMonad m => Element -> DB m Blocks getFigure e = do tit <- case filterChild (named "title") e of - Just t -> getInlines t + Just t -> getInlines t Nothing -> return mempty modify $ \st -> st{ dbFigureTitle = tit } res <- getBlocks e @@ -797,8 +797,8 @@ parseBlock (Elem e) = return $ p <> b <> x codeBlockWithLang = do let classes' = case attrValue "language" e of - "" -> [] - x -> [x] + "" -> [] + x -> [x] return $ codeBlockWith (attrValue "id" e, classes', []) $ trimNl $ strContentRecursive e parseBlockquote = do @@ -871,11 +871,11 @@ parseBlock (Elem e) = || x == '.') w Nothing -> 0 :: Double let numrows = case bodyrows of - [] -> 0 - xs -> maximum $ map length xs + [] -> 0 + xs -> maximum $ map length xs let aligns = case colspecs of - [] -> replicate numrows AlignDefault - cs -> map toAlignment cs + [] -> replicate numrows AlignDefault + cs -> map toAlignment cs let widths = case colspecs of [] -> replicate numrows 0 cs -> let ws = map toWidth cs @@ -895,7 +895,7 @@ parseBlock (Elem e) = headerText <- case filterChild (named "title") e `mplus` (filterChild (named "info") e >>= filterChild (named "title")) of - Just t -> getInlines t + Just t -> getInlines t Nothing -> return mempty modify $ \st -> st{ dbSectionLevel = n } b <- getBlocks e @@ -989,10 +989,10 @@ parseInline (Elem e) = return $ linkWith attr href "" ils' "foreignphrase" -> emph <$> innerInlines "emphasis" -> case attrValue "role" e of - "bold" -> strong <$> innerInlines - "strong" -> strong <$> innerInlines + "bold" -> strong <$> innerInlines + "strong" -> strong <$> innerInlines "strikethrough" -> strikeout <$> innerInlines - _ -> emph <$> innerInlines + _ -> emph <$> innerInlines "footnote" -> (note . mconcat) <$> mapM parseBlock (elContent e) "title" -> return mempty diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 1874a011a..295b79195 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -214,14 +214,14 @@ codeDivs :: [String] codeDivs = ["SourceCode"] runElemToInlines :: RunElem -> Inlines -runElemToInlines (TextRun s) = text s +runElemToInlines (TextRun s) = text s runElemToInlines LnBrk = linebreak runElemToInlines Tab = space runElemToInlines SoftHyphen = text "\xad" runElemToInlines NoBreakHyphen = text "\x2011" runElemToString :: RunElem -> String -runElemToString (TextRun s) = s +runElemToString (TextRun s) = s runElemToString LnBrk = ['\n'] runElemToString Tab = ['\t'] runElemToString SoftHyphen = ['\xad'] diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs index 53840c609..70eccd7d6 100644 --- a/src/Text/Pandoc/Readers/Docx/Lists.hs +++ b/src/Text/Pandoc/Readers/Docx/Lists.hs @@ -157,7 +157,7 @@ flatToBullets elems = flatToBullets' (-1) elems singleItemHeaderToHeader :: Block -> Block singleItemHeaderToHeader (OrderedList _ [[h@Header{}]]) = h -singleItemHeaderToHeader blk = blk +singleItemHeaderToHeader blk = blk blocksToBullets :: [Block] -> [Block] diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index fea595027..99e6f99e6 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -106,7 +106,7 @@ eitherToD (Right b) = return b eitherToD (Left _) = throwError DocxError concatMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b] -concatMapM f xs = fmap concat (mapM f xs) +concatMapM f xs = liftM concat (mapM f xs) -- This is similar to `mapMaybe`: it maps a function returning the D diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index c1eb6ca59..3b13bbe13 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -1,7 +1,7 @@ -{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE ViewPatterns #-} module Text.Pandoc.Readers.EPUB (readEPUB) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 277405b09..915fa852f 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -1,5 +1,8 @@ -{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses, -ViewPatterns, OverloadedStrings #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> @@ -39,43 +42,42 @@ module Text.Pandoc.Readers.HTML ( readHtml , isCommentTag ) where +import Control.Applicative ((<|>)) +import Control.Arrow ((***)) +import Control.Monad (guard, mplus, msum, mzero, unless, void) +import Control.Monad.Except (throwError) +import Control.Monad.Reader (ReaderT, ask, asks, lift, local, runReaderT) +import Data.Char (isAlphaNum, isDigit, isLetter) +import Data.Default (Default (..), def) +import Data.Foldable (for_) +import Data.List (intercalate, isPrefixOf) +import Data.List.Split (wordsBy) +import qualified Data.Map as M +import Data.Maybe (fromMaybe, isJust, isNothing) +import Data.Monoid (First (..)) +import Data.Monoid ((<>)) +import qualified Data.Set as Set +import Data.Text (Text) +import qualified Data.Text as T +import Network.URI (URI, nonStrictRelativeTo, parseURIReference) import Text.HTML.TagSoup import Text.HTML.TagSoup.Match -import Text.Pandoc.Definition +import Text.Pandoc.Builder (Blocks, HasMeta (..), Inlines, trimInlines) import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..)) -import Text.Pandoc.Shared ( extractSpaces, addMetaField - , escapeURI, safeRead, crFilter, underlineSpan ) -import Text.Pandoc.Options ( - ReaderOptions(readerExtensions,readerStripComments), extensionEnabled, - Extension (Ext_epub_html_exts, - Ext_raw_html, Ext_native_divs, Ext_native_spans)) +import Text.Pandoc.Class (PandocMonad (..)) +import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps) +import Text.Pandoc.Definition +import Text.Pandoc.Error import Text.Pandoc.Logging +import Text.Pandoc.Options (Extension (Ext_epub_html_exts, Ext_native_divs, Ext_native_spans, Ext_raw_html), + ReaderOptions (readerExtensions, readerStripComments), + extensionEnabled) import Text.Pandoc.Parsing hiding ((<|>)) +import Text.Pandoc.Shared (addMetaField, crFilter, escapeURI, extractSpaces, + safeRead, underlineSpan) import Text.Pandoc.Walk -import qualified Data.Map as M -import Data.Foldable ( for_ ) -import Data.Maybe ( fromMaybe, isJust, isNothing ) -import Data.List.Split ( wordsBy ) -import Data.List ( intercalate, isPrefixOf ) -import Data.Char ( isDigit, isLetter, isAlphaNum ) -import Control.Monad ( guard, mzero, void, unless, mplus, msum ) -import Control.Arrow ((***)) -import Control.Applicative ( (<|>) ) -import Data.Monoid (First (..)) -import Data.Text (Text) -import qualified Data.Text as T -import Text.TeXMath (readMathML, writeTeX) -import Data.Default (Default (..), def) -import Control.Monad.Reader (ask, asks, local, ReaderT, runReaderT, lift) -import Network.URI (URI, parseURIReference, nonStrictRelativeTo) -import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps) -import Data.Monoid ((<>)) import Text.Parsec.Error -import qualified Data.Set as Set -import Text.Pandoc.Error -import Text.Pandoc.Class (PandocMonad(..)) -import Control.Monad.Except (throwError) +import Text.TeXMath (readMathML, writeTeX) -- | Convert HTML-formatted string to 'Pandoc' document. readHtml :: PandocMonad m @@ -123,8 +125,8 @@ data HTMLState = } data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext - , inChapter :: Bool -- ^ Set if in chapter section - , inPlain :: Bool -- ^ Set if in pPlain + , inChapter :: Bool -- ^ Set if in chapter section + , inPlain :: Bool -- ^ Set if in pPlain } setInChapter :: PandocMonad m => HTMLParser m s a -> HTMLParser m s a @@ -354,16 +356,16 @@ fixPlains :: Bool -> Blocks -> Blocks fixPlains inList bs = if any isParaish bs' then B.fromList $ map plainToPara bs' else bs - where isParaish (Para _) = True - isParaish (CodeBlock _ _) = True - isParaish (Header _ _ _) = True - isParaish (BlockQuote _) = True - isParaish (BulletList _) = not inList - isParaish (OrderedList _ _) = not inList + where isParaish (Para _) = True + isParaish (CodeBlock _ _) = True + isParaish (Header _ _ _) = True + isParaish (BlockQuote _) = True + isParaish (BulletList _) = not inList + isParaish (OrderedList _ _) = not inList isParaish (DefinitionList _) = not inList - isParaish _ = False + isParaish _ = False plainToPara (Plain xs) = Para xs - plainToPara x = x + plainToPara x = x bs' = B.toList bs pRawTag :: PandocMonad m => TagParser m Text @@ -377,10 +379,10 @@ pRawTag = do pDiv :: PandocMonad m => TagParser m Blocks pDiv = try $ do guardEnabled Ext_native_divs - let isDivLike "div" = True + let isDivLike "div" = True isDivLike "section" = True - isDivLike "main" = True - isDivLike _ = False + isDivLike "main" = True + isDivLike _ = False TagOpen tag attr' <- lookAhead $ pSatisfy $ tagOpen isDivLike (const True) let attr = toStringAttr attr' contents <- pInTags tag block @@ -545,9 +547,9 @@ pCell celltype = try $ do skipMany pBlank tag <- lookAhead $ pSatisfy (\t -> t ~== TagOpen celltype [] && noColOrRowSpans t) - let extractAlign' [] = "" + let extractAlign' [] = "" extractAlign' ("text-align":x:_) = x - extractAlign' (_:xs) = extractAlign' xs + extractAlign' (_:xs) = extractAlign' xs let extractAlign = extractAlign' . wordsBy (`elem` [' ','\t',';',':']) let align = case maybeFromAttrib "align" tag `mplus` (extractAlign <$> maybeFromAttrib "style" tag) of @@ -603,18 +605,18 @@ pCodeBlock = try $ do let rawText = concatMap tagToString contents -- drop leading newline if any let result' = case rawText of - '\n':xs -> xs - _ -> rawText + '\n':xs -> xs + _ -> rawText -- drop trailing newline if any let result = case reverse result' of - '\n':_ -> init result' - _ -> result' + '\n':_ -> init result' + _ -> result' return $ B.codeBlockWith (mkAttr attr) result tagToString :: Tag Text -> String -tagToString (TagText s) = T.unpack s +tagToString (TagText s) = T.unpack s tagToString (TagOpen "br" _) = "\n" -tagToString _ = "" +tagToString _ = "" inline :: PandocMonad m => TagParser m Inlines inline = choice @@ -893,16 +895,16 @@ pStr = do return $ B.str result isSpecial :: Char -> Bool -isSpecial '"' = True -isSpecial '\'' = True -isSpecial '.' = True -isSpecial '-' = True -isSpecial '$' = True +isSpecial '"' = True +isSpecial '\'' = True +isSpecial '.' = True +isSpecial '-' = True +isSpecial '$' = True isSpecial '\8216' = True isSpecial '\8217' = True isSpecial '\8220' = True isSpecial '\8221' = True -isSpecial _ = False +isSpecial _ = False pSymbol :: PandocMonad m => InlinesParser m Inlines pSymbol = satisfy isSpecial >>= return . B.str . (:[]) @@ -1123,7 +1125,7 @@ htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts hasTagWarning :: [Tag a] -> Bool hasTagWarning (TagWarning _:_) = True -hasTagWarning _ = False +hasTagWarning _ = False -- | Matches a tag meeting a certain condition. htmlTag :: (HasReaderOptions st, Monad m) @@ -1148,7 +1150,7 @@ htmlTag f = try $ do -- in XML elemnet names let isNameChar c = isAlphaNum c || c == ':' || c == '-' || c == '_' let isName s = case s of - [] -> False + [] -> False (c:cs) -> isLetter c && all isNameChar cs let endAngle = try $ do char '>' @@ -1170,8 +1172,9 @@ htmlTag f = try $ do case next of TagComment s | "<!--" `isPrefixOf` inp -> do - char '<' - manyTill anyChar endAngle + string "<!--" + count (length s) anyChar + string "-->" stripComments <- getOption readerStripComments if stripComments then return (next, "") diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index c91e8bd79..9bac3d3a7 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -430,7 +430,7 @@ doMacros n = do Nothing -> return () Just (Macro expansionPoint numargs optarg newtoks) -> do setInput ts - let getarg = spaces >> braced + let getarg = try $ spaces >> bracedOrToken args <- case optarg of Nothing -> count numargs getarg Just o -> @@ -438,7 +438,14 @@ doMacros n = do <*> count (numargs - 1) getarg let addTok (Tok _ (Arg i) _) acc | i > 0 , i <= numargs = - map (setpos spos) (args !! (i - 1)) ++ acc + foldr addTok acc (args !! (i - 1)) + -- add space if needed after control sequence + -- see #4007 + addTok (Tok _ (CtrlSeq x) txt) + acc@(Tok _ Word _ : _) + | not (T.null txt) && + (isLetter (T.last txt)) = + Tok spos (CtrlSeq x) (txt <> " ") : acc addTok t acc = setpos spos t : acc ts' <- getInput setInput $ foldr addTok ts' newtoks @@ -665,7 +672,7 @@ removeDoubleQuotes t = Data.Maybe.fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\"" doubleQuote :: PandocMonad m => LP m Inlines -doubleQuote = +doubleQuote = quoted' doubleQuoted (try $ count 2 $ symbol '`') (void $ try $ count 2 $ symbol '\'') <|> quoted' doubleQuoted ((:[]) <$> symbol '“') (void $ symbol '”') @@ -674,7 +681,7 @@ doubleQuote = (void $ try $ sequence [symbol '"', symbol '\'']) singleQuote :: PandocMonad m => LP m Inlines -singleQuote = +singleQuote = quoted' singleQuoted ((:[]) <$> symbol '`') (try $ symbol '\'' >> notFollowedBy (satisfyTok startsWithLetter)) @@ -1125,7 +1132,7 @@ inlineCommand' = try $ do lookupListDefault raw names inlineCommands tok :: PandocMonad m => LP m Inlines -tok = grouped inline <|> inlineCommand' <|> singleChar' +tok = try $ spaces >> grouped inline <|> inlineCommand' <|> singleChar' where singleChar' = do Tok _ _ t <- singleChar return (str (T.unpack t)) @@ -1824,7 +1831,7 @@ letmacro = do Tok _ (CtrlSeq name) _ <- anyControlSeq optional $ symbol '=' spaces - contents <- braced <|> ((:[]) <$> (anyControlSeq <|> singleChar)) + contents <- bracedOrToken return (name, Macro ExpandWhenDefined 0 Nothing contents) defmacro :: PandocMonad m => LP m (Text, Macro) @@ -1832,7 +1839,9 @@ defmacro = try $ do controlSeq "def" Tok _ (CtrlSeq name) _ <- anyControlSeq numargs <- option 0 $ argSeq 1 - contents <- withVerbatimMode braced + -- we use withVerbatimMode, because macros are to be expanded + -- at point of use, not point of definition + contents <- withVerbatimMode bracedOrToken return (name, Macro ExpandWhenUsed numargs Nothing contents) -- Note: we don't yet support fancy things like #1.#2 @@ -1846,6 +1855,9 @@ isArgTok :: Tok -> Bool isArgTok (Tok _ (Arg _) _) = True isArgTok _ = False +bracedOrToken :: PandocMonad m => LP m [Tok] +bracedOrToken = braced <|> ((:[]) <$> (anyControlSeq <|> singleChar)) + newcommand :: PandocMonad m => LP m (Text, Macro) newcommand = do pos <- getPosition @@ -1861,9 +1873,7 @@ newcommand = do spaces optarg <- option Nothing $ Just <$> try bracketedToks spaces - contents <- withVerbatimMode braced - -- we use withVerbatimMode, because macros are to be expanded - -- at point of use, not point of definition + contents <- withVerbatimMode bracedOrToken when (mtype == "newcommand") $ do macros <- sMacros <$> getState case M.lookup name macros of @@ -1885,9 +1895,9 @@ newenvironment = do spaces optarg <- option Nothing $ Just <$> try bracketedToks spaces - startcontents <- withVerbatimMode braced + startcontents <- withVerbatimMode bracedOrToken spaces - endcontents <- withVerbatimMode braced + endcontents <- withVerbatimMode bracedOrToken when (mtype == "newenvironment") $ do macros <- sMacros <$> getState case M.lookup name macros of diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 69e70f9f5..98552e65d 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -241,7 +241,7 @@ yamlMetaBlock = try $ do case Yaml.decodeEither' $ UTF8.fromString rawYaml of Right (Yaml.Object hashmap) -> do let alist = H.toList hashmap - mapM_ (\(k, v) -> + mapM_ (\(k, v) -> if ignorable k then return () else do @@ -320,7 +320,7 @@ yamlToMeta (Yaml.Array xs) = do return $ B.toMetaValue xs'' yamlToMeta (Yaml.Object o) = do let alist = H.toList o - foldM (\m (k,v) -> + foldM (\m (k,v) -> if ignorable k then return m else do @@ -846,6 +846,7 @@ listLine continuationIndent = try $ do skipMany spaceChar listStart) notFollowedByHtmlCloser + notFollowedByDivCloser optional (() <$ gobbleSpaces continuationIndent) listLineCommon @@ -883,16 +884,24 @@ listContinuation continuationIndent = try $ do x <- try $ do notFollowedBy blankline notFollowedByHtmlCloser + notFollowedByDivCloser gobbleSpaces continuationIndent anyLineNewline xs <- many $ try $ do notFollowedBy blankline notFollowedByHtmlCloser + notFollowedByDivCloser gobbleSpaces continuationIndent <|> notFollowedBy' listStart anyLineNewline blanks <- many blankline return $ concat (x:xs) ++ blanks +notFollowedByDivCloser :: PandocMonad m => MarkdownParser m () +notFollowedByDivCloser = do + guardDisabled Ext_fenced_divs <|> + do divLevel <- stateFencedDivLevel <$> getState + guard (divLevel < 1) <|> notFollowedBy divFenceEnd + notFollowedByHtmlCloser :: PandocMonad m => MarkdownParser m () notFollowedByHtmlCloser = do inHtmlBlock <- stateInHtmlBlock <$> getState @@ -965,6 +974,7 @@ defRawBlock compact = try $ do let dline = try ( do notFollowedBy blankline notFollowedByHtmlCloser + notFollowedByDivCloser if compact -- laziness not compatible with compact then () <$ indentSpaces else (() <$ indentSpaces) @@ -1688,10 +1698,8 @@ endline = try $ do guardEnabled Ext_blank_before_header <|> (notFollowedBy . char =<< atxChar) -- atx header guardDisabled Ext_backtick_code_blocks <|> notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced)) - guardDisabled Ext_fenced_divs <|> - do divLevel <- stateFencedDivLevel <$> getState - guard (divLevel < 1) <|> notFollowedBy divFenceEnd notFollowedByHtmlCloser + notFollowedByDivCloser (eof >> return mempty) <|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak)) <|> (guardEnabled Ext_ignore_line_breaks >> return mempty) diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index 6cc505e3b..6f4244ac3 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleContexts #-} {- Copyright (C) 2017 Alexander Krotov <ilabdsf@gmail.com> @@ -42,23 +42,23 @@ module Text.Pandoc.Readers.Muse (readMuse) where import Control.Monad import Control.Monad.Except (throwError) -import qualified Data.Map as M import Data.Char (isLetter) -import Data.Text (Text, unpack) import Data.List (stripPrefix) +import qualified Data.Map as M import Data.Maybe (fromMaybe) +import Data.Text (Text, unpack) +import System.FilePath (takeExtension) import Text.HTML.TagSoup import Text.Pandoc.Builder (Blocks, Inlines) import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Class (PandocMonad(..)) +import Text.Pandoc.Class (PandocMonad (..)) import Text.Pandoc.Definition import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Shared (crFilter) import Text.Pandoc.Parsing hiding (nested) import Text.Pandoc.Readers.HTML (htmlTag) +import Text.Pandoc.Shared (crFilter) import Text.Pandoc.XML (fromEntities) -import System.FilePath (takeExtension) -- | Read Muse from an input string and return a Pandoc document. readMuse :: PandocMonad m @@ -233,14 +233,14 @@ exampleTag = do return $ return $ B.codeBlockWith attr $ chop contents where lchop s = case s of '\n':ss -> ss - _ -> s + _ -> s rchop = reverse . lchop . reverse -- Trim up to one newline from the beginning and the end, -- in case opening and/or closing tags are on separate lines. chop = lchop . rchop literal :: PandocMonad m => MuseParser m (F Blocks) -literal = fmap (return . rawBlock) $ htmlElement "literal" +literal = (return . rawBlock) <$> htmlElement "literal" where format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs rawBlock (attrs, content) = B.rawBlock (format attrs) content @@ -315,7 +315,7 @@ noteBlock = try $ do content <- mconcat <$> blocksTillNote oldnotes <- stateNotes' <$> getState case M.lookup ref oldnotes of - Just _ -> logMessage $ DuplicateNoteReference ref pos + Just _ -> logMessage $ DuplicateNoteReference ref pos Nothing -> return () updateState $ \s -> s{ stateNotes' = M.insert ref (pos, content) oldnotes } return mempty @@ -445,7 +445,7 @@ definitionList = do data MuseTable = MuseTable { museTableCaption :: Inlines , museTableHeaders :: [[Blocks]] - , museTableRows :: [[Blocks]] + , museTableRows :: [[Blocks]] , museTableFooters :: [[Blocks]] } @@ -658,7 +658,7 @@ str :: PandocMonad m => MuseParser m (F Inlines) str = fmap (return . B.str) (many1 alphaNum <|> count 1 characterReference) symbol :: PandocMonad m => MuseParser m (F Inlines) -symbol = fmap (return . B.str) $ count 1 nonspaceChar +symbol = (return . B.str) <$> count 1 nonspaceChar link :: PandocMonad m => MuseParser m (F Inlines) link = try $ do diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index e3ef67bc1..1a1375b16 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -58,7 +58,7 @@ normalizeTree = everywhere (mkT go) go xs = xs convertEntity :: String -> String -convertEntity e = maybe (map toUpper e) id (lookupEntity e) +convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) -- convenience function to get an attribute value, defaulting to "" attrValue :: String -> Element -> String diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs index 69eececc8..44bd89278 100644 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs @@ -49,7 +49,6 @@ import Data.Maybe import qualified Text.XML.Light as XML -import Text.Pandoc.Definition import Text.Pandoc.Builder import Text.Pandoc.MediaBag (MediaBag, insertMedia) import Text.Pandoc.Shared diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs index abb131983..1384072d1 100644 --- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs +++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs @@ -1,5 +1,5 @@ {-# LANGUAGE Arrows #-} -{-# LANGUAGE PatternGuards #-} + {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE TupleSections #-} diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index dae9fe40a..de488adfe 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -31,11 +31,12 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion from reStructuredText to 'Pandoc' document. -} module Text.Pandoc.Readers.RST ( readRST ) where +import Control.Arrow (second) import Control.Monad (forM_, guard, liftM, mplus, mzero, when) import Control.Monad.Except (throwError) import Control.Monad.Identity (Identity (..)) import Data.Char (isHexDigit, isSpace, toLower, toUpper) -import Data.List (deleteFirstsBy, findIndex, intercalate, isInfixOf, isSuffixOf, +import Data.List (deleteFirstsBy, elemIndex, intercalate, isInfixOf, isSuffixOf, nub, sort, transpose, union) import qualified Data.Map as M import Data.Maybe (fromMaybe, isJust) @@ -43,8 +44,7 @@ import Data.Monoid ((<>)) import Data.Sequence (ViewR (..), viewr) import Data.Text (Text) import qualified Data.Text as T -import Text.Pandoc.Builder (fromList, setMeta) -import Text.Pandoc.Builder (Blocks, Inlines, trimInlines) +import Text.Pandoc.Builder (Blocks, Inlines, fromList, setMeta, trimInlines) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad, fetchItem, readFileFromDirs) import Text.Pandoc.CSV (CSVOptions (..), defaultCSVOptions, parseCSV) @@ -67,7 +67,7 @@ readRST :: PandocMonad m -> Text -- ^ String to parse (assuming @'\n'@ line endings) -> m Pandoc readRST opts s = do - parsed <- (readWithM parseRST) def{ stateOptions = opts } + parsed <- readWithM parseRST def{ stateOptions = opts } (T.unpack (crFilter s) ++ "\n\n") case parsed of Right result -> return result @@ -100,9 +100,9 @@ isHeader _ _ = False -- | Promote all headers in a list of blocks. (Part of -- title transformation for RST.) promoteHeaders :: Int -> [Block] -> [Block] -promoteHeaders num ((Header level attr text):rest) = - (Header (level - num) attr text):(promoteHeaders num rest) -promoteHeaders num (other:rest) = other:(promoteHeaders num rest) +promoteHeaders num (Header level attr text:rest) = + Header (level - num) attr text:promoteHeaders num rest +promoteHeaders num (other:rest) = other:promoteHeaders num rest promoteHeaders _ [] = [] -- | If list of blocks starts with a header (or a header and subheader) @@ -114,11 +114,11 @@ titleTransform :: ([Block], Meta) -- ^ list of blocks, metadata titleTransform (bs, meta) = let (bs', meta') = case bs of - ((Header 1 _ head1):(Header 2 _ head2):rest) + (Header 1 _ head1:Header 2 _ head2:rest) | not (any (isHeader 1) rest || any (isHeader 2) rest) -> -- tit/sub (promoteHeaders 2 rest, setMeta "title" (fromList head1) $ setMeta "subtitle" (fromList head2) meta) - ((Header 1 _ head1):rest) + (Header 1 _ head1:rest) | not (any (isHeader 1) rest) -> -- title only (promoteHeaders 1 rest, setMeta "title" (fromList head1) meta) @@ -137,8 +137,7 @@ metaFromDefList ds meta = adjustAuthors $ foldr f meta ds $ M.mapKeys (\k -> if k == "authors" then "author" - else k) - $ metamap + else k) metamap toPlain (MetaBlocks [Para xs]) = MetaInlines xs toPlain x = x splitAuthors (MetaBlocks [Para xs]) @@ -201,7 +200,7 @@ parseCitation :: PandocMonad m => (String, String) -> RSTParser m (Inlines, [Blocks]) parseCitation (ref, raw) = do contents <- parseFromString' parseBlocks raw - return $ (B.spanWith (ref, ["citation-label"], []) (B.str ref), + return (B.spanWith (ref, ["citation-label"], []) (B.str ref), [contents]) @@ -289,7 +288,7 @@ para = try $ do newline blanklines case viewr (B.unMany result) of - ys :> (Str xs) | "::" `isSuffixOf` xs -> do + ys :> Str xs | "::" `isSuffixOf` xs -> do raw <- option mempty codeBlockBody return $ B.para (B.Many ys <> B.str (take (length xs - 1) xs)) <> raw @@ -313,9 +312,9 @@ doubleHeader = do -- if so, get appropriate level. if not, add to list. state <- getState let headerTable = stateHeaderTable state - let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of + let (headerTable',level) = case elemIndex (DoubleHeader c) headerTable of Just ind -> (headerTable, ind + 1) - Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1) + Nothing -> (headerTable ++ [DoubleHeader c], length headerTable + 1) setState (state { stateHeaderTable = headerTable' }) attr <- registerHeader nullAttr txt return $ B.headerWith attr level txt @@ -329,8 +328,8 @@ doubleHeader' = try $ do newline txt <- trimInlines . mconcat <$> many1 (notFollowedBy blankline >> inline) pos <- getPosition - let len = (sourceColumn pos) - 1 - if (len > lenTop) then fail "title longer than border" else return () + let len = sourceColumn pos - 1 + when (len > lenTop) $ fail "title longer than border" blankline -- spaces and newline count lenTop (char c) -- the bottom line blanklines @@ -342,9 +341,9 @@ singleHeader = do (txt, c) <- singleHeader' state <- getState let headerTable = stateHeaderTable state - let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of + let (headerTable',level) = case elemIndex (SingleHeader c) headerTable of Just ind -> (headerTable, ind + 1) - Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1) + Nothing -> (headerTable ++ [SingleHeader c], length headerTable + 1) setState (state { stateHeaderTable = headerTable' }) attr <- registerHeader nullAttr txt return $ B.headerWith attr level txt @@ -355,7 +354,7 @@ singleHeader' = try $ do lookAhead $ anyLine >> oneOf underlineChars txt <- trimInlines . mconcat <$> many1 (do {notFollowedBy newline; inline}) pos <- getPosition - let len = (sourceColumn pos) - 1 + let len = sourceColumn pos - 1 blankline c <- oneOf underlineChars count (len - 1) (char c) @@ -491,8 +490,7 @@ includeDirective top fields body = do Just x | x >= 0 -> x | otherwise -> numLines + x -- negative from end let contentLines' = drop (startLine' - 1) - $ take (endLine' - 1) - $ contentLines + $ take (endLine' - 1) contentLines let contentLines'' = (case trim <$> lookup "end-before" fields of Just patt -> takeWhile (not . (patt `isInfixOf`)) Nothing -> id) . @@ -692,7 +690,7 @@ directive' = do "csv-table" -> csvTableDirective top fields body' "line-block" -> lineBlockDirective body' "raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body) - "role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields + "role" -> addNewRole top $ map (second trim) fields "container" -> parseFromString' parseBlocks body' "replace" -> B.para <$> -- consumed by substKey parseInlineFromString (trim top) @@ -733,7 +731,7 @@ directive' = do codeblock (words $ fromMaybe [] $ lookup "class" fields) (lookup "number-lines" fields) (trim top) body "aafig" -> do - let attribs = ("", ["aafig"], map (\(k,v) -> (k, trimr v)) fields) + let attribs = ("", ["aafig"], map (second trimr) fields) return $ B.codeBlockWith attribs $ stripTrailingNewlines body "math" -> return $ B.para $ mconcat $ map B.displayMath $ toChunks $ top ++ "\n\n" ++ body @@ -752,8 +750,8 @@ directive' = do $ B.imageWith attr src "" alt Nothing -> B.imageWith attr src "" alt "class" -> do - let attrs = ("", (splitBy isSpace $ trim top), - map (\(k,v) -> (k, trimr v)) fields) + let attrs = ("", splitBy isSpace $ trim top, + map (second trimr) fields) -- directive content or the first immediately following element children <- case body of "" -> block @@ -857,7 +855,7 @@ csvTableDirective top fields rawcsv = do Just h -> h ++ "\n" ++ rawcsv' Nothing -> rawcsv') case res of - Left e -> do + Left e -> throwError $ PandocParsecError "csv table" e Right rawrows -> do let parseCell = parseFromString' (plain <|> return mempty) . T.unpack @@ -909,13 +907,13 @@ addNewRole roleString fields = do in (ident, nub . (role :) . annotate $ classes, keyValues) -- warn about syntax we ignore - flip mapM_ fields $ \(key, _) -> case key of - "language" -> when (baseRole /= "code") $ logMessage $ - SkippedContent ":language: [because parent of role is not :code:]" - pos - "format" -> when (baseRole /= "raw") $ logMessage $ - SkippedContent ":format: [because parent of role is not :raw:]" pos - _ -> logMessage $ SkippedContent (":" ++ key ++ ":") pos + forM_ fields $ \(key, _) -> case key of + "language" -> when (baseRole /= "code") $ logMessage $ + SkippedContent ":language: [because parent of role is not :code:]" + pos + "format" -> when (baseRole /= "raw") $ logMessage $ + SkippedContent ":format: [because parent of role is not :raw:]" pos + _ -> logMessage $ SkippedContent (":" ++ key ++ ":") pos when (parentRole == "raw" && countKeys "format" > 1) $ logMessage $ SkippedContent ":format: [after first in definition of role]" @@ -983,7 +981,7 @@ codeblock classes numberLines lang body = return $ B.codeBlockWith attribs $ stripTrailingNewlines body where attribs = ("", classes', kvs) classes' = "sourceCode" : lang - : maybe [] (\_ -> ["numberLines"]) numberLines + : maybe [] (const ["numberLines"]) numberLines ++ classes kvs = case numberLines of Just "" -> [] @@ -1038,7 +1036,8 @@ noteMarker :: Monad m => RSTParser m [Char] noteMarker = do char '[' res <- many1 digit - <|> (try $ char '#' >> liftM ('#':) simpleReferenceName') + <|> + try (char '#' >> liftM ('#':) simpleReferenceName') <|> count 1 (oneOf "#*") char ']' return res @@ -1050,13 +1049,11 @@ noteMarker = do quotedReferenceName :: PandocMonad m => RSTParser m Inlines quotedReferenceName = try $ do char '`' >> notFollowedBy (char '`') -- `` means inline code! - label' <- trimInlines . mconcat <$> many1Till inline (char '`') - return label' + trimInlines . mconcat <$> many1Till inline (char '`') unquotedReferenceName :: PandocMonad m => RSTParser m Inlines -unquotedReferenceName = try $ do - label' <- trimInlines . mconcat <$> many1Till inline (lookAhead $ char ':') - return label' +unquotedReferenceName = try $ do -- `` means inline code! + trimInlines . mconcat <$> many1Till inline (lookAhead $ char ':') -- Simple reference names are single words consisting of alphanumerics -- plus isolated (no two adjacent) internal hyphens, underscores, @@ -1066,7 +1063,8 @@ simpleReferenceName' :: Monad m => ParserT [Char] st m String simpleReferenceName' = do x <- alphaNum xs <- many $ alphaNum - <|> (try $ oneOf "-_:+." <* lookAhead alphaNum) + <|> + try (oneOf "-_:+." <* lookAhead alphaNum) return (x:xs) simpleReferenceName :: Monad m => ParserT [Char] st m Inlines @@ -1074,7 +1072,7 @@ simpleReferenceName = B.str <$> simpleReferenceName' referenceName :: PandocMonad m => RSTParser m Inlines referenceName = quotedReferenceName <|> - (try $ simpleReferenceName <* lookAhead (char ':')) <|> + try (simpleReferenceName <* lookAhead (char ':')) <|> unquotedReferenceName referenceKey :: PandocMonad m => RSTParser m [Char] @@ -1093,7 +1091,7 @@ targetURI = do contents <- many1 (try (many spaceChar >> newline >> many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n") blanklines - return $ escapeURI $ trim $ contents + return $ escapeURI $ trim contents substKey :: PandocMonad m => RSTParser m () substKey = try $ do @@ -1258,8 +1256,7 @@ simpleTableHeader headless = try $ do let rawHeads = if headless then replicate (length dashes) "" else simpleTableSplitLine indices rawContent - heads <- mapM (parseFromString' (mconcat <$> many plain)) $ - map trim rawHeads + heads <- mapM ( (parseFromString' (mconcat <$> many plain)) . trim) rawHeads return (heads, aligns, indices) -- Parse a simple table. @@ -1450,10 +1447,8 @@ endline = try $ do notFollowedBy blankline -- parse potential list-starts at beginning of line differently in a list: st <- getState - if (stateParserContext st) == ListItemState - then notFollowedBy (anyOrderedListMarker >> spaceChar) >> + when ((stateParserContext st) == ListItemState) $ notFollowedBy (anyOrderedListMarker >> spaceChar) >> notFollowedBy' bulletListStart - else return () return B.softbreak -- diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs index 49da5a6c6..fecbb2fb4 100644 --- a/src/Text/Pandoc/Readers/Vimwiki.hs +++ b/src/Text/Pandoc/Readers/Vimwiki.hs @@ -91,12 +91,10 @@ import Text.Pandoc.Parsing (F, ParserState, ParserT, blanklines, emailAddress, registerHeader, runF, spaceChar, stateMeta', stateOptions, uri) import Text.Pandoc.Shared (crFilter, splitBy, stringify, stripFirstAndLast) -import Text.Parsec.Char - (alphaNum, anyChar, char, newline, noneOf, spaces, string, oneOf, - space) -import Text.Parsec.Combinator - (choice, count, eof, many1, manyTill, notFollowedBy, option, - skipMany1, between, lookAhead) +import Text.Parsec.Char (alphaNum, anyChar, char, newline, noneOf, oneOf, space, + spaces, string) +import Text.Parsec.Combinator (between, choice, count, eof, lookAhead, many1, + manyTill, notFollowedBy, option, skipMany1) import Text.Parsec.Prim (getState, many, try, updateState, (<|>)) readVimwiki :: PandocMonad m => ReaderOptions -> Text -> m Pandoc |