diff options
Diffstat (limited to 'src/Text')
-rw-r--r-- | src/Text/Pandoc.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Parsing.hs | 22 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 16 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 36 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 14 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/MediaWiki.hs | 594 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Textile.hs | 16 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docbook.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docx.hs | 39 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/EPUB.hs | 17 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 7 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/LaTeX.hs | 40 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Markdown.hs | 12 |
14 files changed, 722 insertions, 98 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 33706816e..1e6b1d010 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -66,6 +66,7 @@ module Text.Pandoc , writers -- * Readers: converting /to/ Pandoc format , readMarkdown + , readMediaWiki , readRST , readLaTeX , readHtml @@ -110,6 +111,7 @@ module Text.Pandoc import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.Readers.Markdown +import Text.Pandoc.Readers.MediaWiki import Text.Pandoc.Readers.RST import Text.Pandoc.Readers.DocBook import Text.Pandoc.Readers.LaTeX @@ -179,6 +181,7 @@ readers = [("native" , \_ -> readNative) ,("markdown_strict" , readMarkdown) ,("markdown" , readMarkdown) ,("rst" , readRST) + ,("mediawiki" , readMediaWiki) ,("docbook" , readDocBook) ,("textile" , readTextile) -- TODO : textile+lhs ,("html" , readHtml) diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 50691f409..bee96be82 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -82,6 +82,7 @@ module Text.Pandoc.Parsing ( (>>~), ellipses, apostrophe, dash, + nested, macro, applyMacros', Parser, @@ -379,10 +380,11 @@ uri = try $ do char ')' return $ '(' : res ++ ")" str <- liftM concat $ many1 $ inParens <|> count 1 (innerPunct <|> uriChar) + str' <- option str $ char '/' >> return (str ++ "/") -- now see if they amount to an absolute URI - case parseURI (escapeURI str) of + case parseURI (escapeURI str') of Just uri' -> if uriScheme uri' `elem` protocols - then return (str, show uri') + then return (str', show uri') else fail "not a URI" Nothing -> fail "not a URI" @@ -811,8 +813,8 @@ quoted :: Parser [Char] ParserState Inline quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser withQuoteContext :: QuoteContext - -> Parser [Char] ParserState a - -> Parser [Char] ParserState a + -> Parser [tok] ParserState a + -> Parser [tok] ParserState a withQuoteContext context parser = do oldState <- getState let oldQuoteContext = stateQuoteContext oldState @@ -924,6 +926,18 @@ emDashOld = do try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-') return (Str "\8212") +-- This is used to prevent exponential blowups for things like: +-- a**a*a**a*a**a*a**a*a**a*a**a*a** +nested :: Parser s ParserState a + -> Parser s ParserState a +nested p = do + nestlevel <- stateMaxNestingLevel `fmap` getState + guard $ nestlevel > 0 + updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 } + res <- p + updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } + return res + -- -- Macros -- diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index e5c310ffc..424d9bdec 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -271,6 +271,7 @@ pCodeBlock = try $ do inline :: TagParser [Inline] inline = choice [ pTagText + , pQ , pEmph , pStrong , pSuperscript @@ -306,6 +307,17 @@ pSelfClosing f g = do optional $ pSatisfy (tagClose f) return open +pQ :: TagParser [Inline] +pQ = do + quoteContext <- stateQuoteContext `fmap` getState + let quoteType = case quoteContext of + InDoubleQuote -> SingleQuote + _ -> DoubleQuote + let innerQuoteContext = if quoteType == SingleQuote + then InSingleQuote + else InDoubleQuote + withQuoteContext innerQuoteContext $ pInlinesInTags "q" (Quoted quoteType) + pEmph :: TagParser [Inline] pEmph = pInlinesInTags "em" Emph <|> pInlinesInTags "i" Emph @@ -585,9 +597,9 @@ htmlInBalanced f = try $ do return $ tag ++ concat contents ++ endtag -- | Matches a tag meeting a certain condition. -htmlTag :: (Tag String -> Bool) -> Parser [Char] ParserState (Tag String, String) +htmlTag :: (Tag String -> Bool) -> Parser [Char] st (Tag String, String) htmlTag f = try $ do - lookAhead (char '<') + lookAhead $ char '<' >> (oneOf "/!?" <|> letter) (next : _) <- getInput >>= return . canonicalizeTags . parseTags guard $ f next -- advance the parser diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 4a5a14d6a..86ae400de 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -166,10 +166,8 @@ double_quote = (doubleQuoted . mconcat) <$> (try $ string "``" *> manyTill inline (try $ string "''")) single_quote :: LP Inlines -single_quote = char '`' *> - ( try ((singleQuoted . mconcat) <$> - manyTill inline (try $ char '\'' >> notFollowedBy letter)) - <|> lit "`") +single_quote = (singleQuoted . mconcat) <$> + (try $ char '`' *> manyTill inline (try $ char '\'' >> notFollowedBy letter)) inline :: LP Inlines inline = (mempty <$ comment) @@ -181,6 +179,9 @@ inline = (mempty <$ comment) ((char '-') *> option (str "–") (str "—" <$ char '-'))) <|> double_quote <|> single_quote + <|> (str "“" <$ try (string "``")) -- nb. {``} won't be caught by double_quote + <|> (str "”" <$ try (string "''")) + <|> (str "‘" <$ char '`') -- nb. {`} won't be caught by single_quote <|> (str "’" <$ char '\'') <|> (str "\160" <$ char '~') <|> (mathDisplay $ string "$$" *> mathChars <* string "$$") @@ -188,10 +189,9 @@ inline = (mempty <$ comment) <|> (superscript <$> (char '^' *> tok)) <|> (subscript <$> (char '_' *> tok)) <|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb) - <|> (str <$> count 1 tildeEscape) - <|> (str <$> string "]") - <|> (str <$> string "#") -- TODO print warning? - <|> (str <$> string "&") -- TODO print warning? + <|> (str . (:[]) <$> tildeEscape) + <|> (str . (:[]) <$> oneOf "[]") + <|> (str . (:[]) <$> oneOf "#&") -- TODO print warning? -- <|> (str <$> count 1 (satisfy (\c -> c /= '\\' && c /='\n' && c /='}' && c /='{'))) -- eat random leftover characters inlines :: LP Inlines @@ -203,8 +203,8 @@ block = (mempty <$ comment) <|> environment <|> mempty <$ macro -- TODO improve macros, make them work everywhere <|> blockCommand - <|> grouped block <|> paragraph + <|> grouped block <|> (mempty <$ char '&') -- loose & in table environment @@ -214,6 +214,7 @@ blocks = mconcat <$> many block blockCommand :: LP Blocks blockCommand = try $ do name <- anyControlSeq + guard $ name /= "begin" && name /= "end" star <- option "" (string "*" <* optional sp) let name' = name ++ star case M.lookup name' blockCommands of @@ -265,8 +266,6 @@ blockCommands = M.fromList $ , ("closing", skipopts *> closing) -- , ("rule", skipopts *> tok *> tok *> pure horizontalRule) - , ("begin", mzero) -- these are here so they won't be interpreted as inline - , ("end", mzero) , ("item", skipopts *> loose_item) , ("documentclass", skipopts *> braced *> preamble) , ("centerline", (para . trimInlines) <$> (skipopts *> tok)) @@ -321,6 +320,7 @@ section lvl = do inlineCommand :: LP Inlines inlineCommand = try $ do name <- anyControlSeq + guard $ name /= "begin" && name /= "end" guard $ not $ isBlockCommand name parseRaw <- getOption readerParseRaw star <- option "" (string "*") @@ -352,6 +352,7 @@ inlineCommands = M.fromList $ , ("textsubscript", subscript <$> tok) , ("textbackslash", lit "\\") , ("backslash", lit "\\") + , ("slash", lit "/") , ("textbf", strong <$> tok) , ("ldots", lit "…") , ("dots", lit "…") @@ -644,11 +645,7 @@ inlineText :: LP Inlines inlineText = str <$> many1 inlineChar inlineChar :: LP Char -inlineChar = satisfy $ \c -> - not (c == '\\' || c == '$' || c == '%' || c == '^' || c == '_' || - c == '&' || c == '~' || c == '#' || c == '{' || c == '}' || - c == '^' || c == '\'' || c == '`' || c == '-' || c == ']' || - c == ' ' || c == '\t' || c == '\n' ) +inlineChar = noneOf "\\$%^_&~#{}^'`-[] \t\n" environment :: LP Blocks environment = do @@ -745,6 +742,9 @@ environments = M.fromList , ("lstlisting", codeBlock <$> (verbEnv "lstlisting")) , ("minted", liftA2 (\l c -> codeBlockWith ("",[l],[]) c) (grouped (many1 $ satisfy (/= '}'))) (verbEnv "minted")) + , ("obeylines", parseFromString + (para . trimInlines . mconcat <$> many inline) =<< + intercalate "\\\\\n" . lines <$> verbEnv "obeylines") , ("displaymath", mathEnv Nothing "displaymath") , ("equation", mathEnv Nothing "equation") , ("equation*", mathEnv Nothing "equation*") @@ -801,7 +801,9 @@ descItem = do return (ils, [bs]) env :: String -> LP a -> LP a -env name p = p <* (controlSeq "end" *> braced >>= guard . (== name)) +env name p = p <* + (try (controlSeq "end" *> braced >>= guard . (== name)) + <?> ("\\end{" ++ name ++ "}")) listenv :: String -> LP a -> LP a listenv name p = try $ do diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 2407e137c..1c2cc12f1 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,6 +1,4 @@ {-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7 -{-# LANGUAGE FlexibleInstances, TypeSynonymInstances, - GeneralizedNewtypeDeriving #-} {- Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> @@ -1294,18 +1292,6 @@ inlinesBetween start end = where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline) innerSpace = try $ whitespace >>~ notFollowedBy' end --- This is used to prevent exponential blowups for things like: --- a**a*a**a*a**a*a**a*a**a*a**a*a** -nested :: Parser [Char] ParserState a - -> Parser [Char] ParserState a -nested p = do - nestlevel <- stateMaxNestingLevel `fmap` getState - guard $ nestlevel > 0 - updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 } - res <- p - updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } - return res - emph :: Parser [Char] ParserState (F Inlines) emph = fmap B.emph <$> nested (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd) diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs new file mode 100644 index 000000000..7936be38b --- /dev/null +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -0,0 +1,594 @@ +{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7 +{- + Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.MediaWiki + Copyright : Copyright (C) 2012 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of mediawiki text to 'Pandoc' document. +-} +{- +TODO: +_ correctly handle tables within tables +_ parse templates? +-} +module Text.Pandoc.Readers.MediaWiki ( readMediaWiki ) where + +import Text.Pandoc.Definition +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) +import Text.Pandoc.Options +import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isCommentTag ) +import Text.Pandoc.XML ( fromEntities ) +import Text.Pandoc.Parsing hiding ( nested ) +import Text.Pandoc.Generic ( bottomUp ) +import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead ) +import Data.Monoid (mconcat, mempty) +import Control.Applicative ((<$>), (<*), (*>), (<$)) +import Control.Monad +import Data.List (intersperse, intercalate, isPrefixOf ) +import Text.HTML.TagSoup +import Data.Sequence (viewl, ViewL(..), (<|)) +import Data.Char (isDigit) + +-- | Read mediawiki from an input string and return a Pandoc document. +readMediaWiki :: ReaderOptions -- ^ Reader options + -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Pandoc +readMediaWiki opts s = + case runParser parseMediaWiki MWState{ mwOptions = opts + , mwMaxNestingLevel = 4 + , mwNextLinkNumber = 1 + , mwCategoryLinks = [] + } + "source" (s ++ "\n") of + Left err' -> error $ "\nError:\n" ++ show err' + Right result -> result + +data MWState = MWState { mwOptions :: ReaderOptions + , mwMaxNestingLevel :: Int + , mwNextLinkNumber :: Int + , mwCategoryLinks :: [Inlines] + } + +type MWParser = Parser [Char] MWState + +-- +-- auxiliary functions +-- + +-- This is used to prevent exponential blowups for things like: +-- ''a'''a''a'''a''a'''a''a'''a +nested :: MWParser a -> MWParser a +nested p = do + nestlevel <- mwMaxNestingLevel `fmap` getState + guard $ nestlevel > 0 + updateState $ \st -> st{ mwMaxNestingLevel = mwMaxNestingLevel st - 1 } + res <- p + updateState $ \st -> st{ mwMaxNestingLevel = nestlevel } + return res + +specialChars :: [Char] +specialChars = "'[]<=&*{}|\"" + +spaceChars :: [Char] +spaceChars = " \n\t" + +sym :: String -> MWParser () +sym s = () <$ try (string s) + +newBlockTags :: [String] +newBlockTags = ["haskell","syntaxhighlight","source","gallery","references"] + +isBlockTag' :: Tag String -> Bool +isBlockTag' tag@(TagOpen t _) = (isBlockTag tag || t `elem` newBlockTags) && + t `notElem` eitherBlockOrInline +isBlockTag' tag@(TagClose t) = (isBlockTag tag || t `elem` newBlockTags) && + t `notElem` eitherBlockOrInline +isBlockTag' tag = isBlockTag tag + +isInlineTag' :: Tag String -> Bool +isInlineTag' (TagComment _) = True +isInlineTag' t = not (isBlockTag' t) + +eitherBlockOrInline :: [String] +eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins", + "map", "area", "object"] + +htmlComment :: MWParser () +htmlComment = () <$ htmlTag isCommentTag + +inlinesInTags :: String -> MWParser Inlines +inlinesInTags tag = try $ do + (_,raw) <- htmlTag (~== TagOpen tag []) + if '/' `elem` raw -- self-closing tag + then return mempty + else trimInlines . mconcat <$> + manyTill inline (htmlTag (~== TagClose tag)) + +blocksInTags :: String -> MWParser Blocks +blocksInTags tag = try $ do + (_,raw) <- htmlTag (~== TagOpen tag []) + if '/' `elem` raw -- self-closing tag + then return mempty + else mconcat <$> manyTill block (htmlTag (~== TagClose tag)) + +charsInTags :: String -> MWParser [Char] +charsInTags tag = try $ do + (_,raw) <- htmlTag (~== TagOpen tag []) + if '/' `elem` raw -- self-closing tag + then return "" + else innerText . parseTags <$> + manyTill anyChar (htmlTag (~== TagClose tag)) + +-- +-- main parser +-- + +parseMediaWiki :: MWParser Pandoc +parseMediaWiki = do + bs <- mconcat <$> many block + spaces + eof + categoryLinks <- reverse . mwCategoryLinks <$> getState + let categories = if null categoryLinks + then mempty + else B.para $ mconcat $ intersperse B.space categoryLinks + return $ B.doc $ bs <> categories + +-- +-- block parsers +-- + +block :: MWParser Blocks +block = mempty <$ skipMany1 blankline + <|> table + <|> header + <|> hrule + <|> orderedList + <|> bulletList + <|> definitionList + <|> mempty <$ try (spaces *> htmlComment) + <|> preformatted + <|> blockTag + <|> (B.rawBlock "mediawiki" <$> template) + <|> para + +para :: MWParser Blocks +para = B.para . trimInlines . mconcat <$> many1 inline + +table :: MWParser Blocks +table = do + tableStart + styles <- manyTill anyChar newline + let tableWidth = case lookup "width" $ parseAttrs styles of + Just w -> maybe 1.0 id $ parseWidth w + Nothing -> 1.0 + caption <- option mempty tableCaption + optional rowsep + hasheader <- option False $ True <$ (lookAhead (char '!')) + (cellspecs',hdr) <- unzip <$> tableRow + let widths = map ((tableWidth *) . snd) cellspecs' + let restwidth = tableWidth - sum widths + let zerocols = length $ filter (==0.0) widths + let defaultwidth = if zerocols == 0 || zerocols == length widths + then 0.0 + else restwidth / fromIntegral zerocols + let widths' = map (\w -> if w == 0 then defaultwidth else w) widths + let cellspecs = zip (map fst cellspecs') widths' + rows' <- many $ try $ rowsep *> (map snd <$> tableRow) + tableEnd + let cols = length hdr + let (headers,rows) = if hasheader + then (hdr, rows') + else (replicate cols mempty, hdr:rows') + return $ B.table caption cellspecs headers rows + +parseAttrs :: String -> [(String,String)] +parseAttrs s = case parse (many parseAttr) "attributes" s of + Right r -> r + Left _ -> [] + +parseAttr :: Parser String () (String, String) +parseAttr = try $ do + skipMany spaceChar + k <- many1 letter + char '=' + char '"' + v <- many1Till anyChar (char '"') + return (k,v) + +tableStart :: MWParser () +tableStart = try $ guardColumnOne *> sym "{|" + +tableEnd :: MWParser () +tableEnd = try $ guardColumnOne *> sym "|}" <* blanklines + +rowsep :: MWParser () +rowsep = try $ guardColumnOne *> sym "|-" <* blanklines + +cellsep :: MWParser () +cellsep = try $ + (guardColumnOne <* + ( (char '|' <* notFollowedBy (oneOf "-}+")) + <|> (char '!') + ) + ) + <|> (() <$ try (string "||")) + <|> (() <$ try (string "!!")) + +tableCaption :: MWParser Inlines +tableCaption = try $ do + guardColumnOne + sym "|+" + skipMany spaceChar + res <- manyTill anyChar newline >>= parseFromString (many inline) + return $ trimInlines $ mconcat res + +tableRow :: MWParser [((Alignment, Double), Blocks)] +tableRow = try $ many tableCell + +tableCell :: MWParser ((Alignment, Double), Blocks) +tableCell = try $ do + cellsep + skipMany spaceChar + attrs <- option [] $ try $ parseAttrs <$> + manyTill (satisfy (/='\n')) (char '|' <* notFollowedBy (char '|')) + skipMany spaceChar + ls <- concat <$> many (notFollowedBy (cellsep <|> rowsep <|> tableEnd) *> + ((snd <$> withRaw table) <|> count 1 anyChar)) + bs <- parseFromString (mconcat <$> many block) ls + let align = case lookup "align" attrs of + Just "left" -> AlignLeft + Just "right" -> AlignRight + Just "center" -> AlignCenter + _ -> AlignDefault + let width = case lookup "width" attrs of + Just xs -> maybe 0.0 id $ parseWidth xs + Nothing -> 0.0 + return ((align, width), bs) + +parseWidth :: String -> Maybe Double +parseWidth s = + case reverse s of + ('%':ds) | all isDigit ds -> safeRead ('0':'.':reverse ds) + _ -> Nothing + +template :: MWParser String +template = try $ do + string "{{" + notFollowedBy (char '{') + let chunk = template <|> variable <|> many1 (noneOf "{}") <|> count 1 anyChar + contents <- manyTill chunk (try $ string "}}") + return $ "{{" ++ concat contents ++ "}}" + +blockTag :: MWParser Blocks +blockTag = do + (tag, _) <- lookAhead $ htmlTag isBlockTag' + case tag of + TagOpen "blockquote" _ -> B.blockQuote <$> blocksInTags "blockquote" + TagOpen "pre" _ -> B.codeBlock . trimCode <$> charsInTags "pre" + TagOpen "syntaxhighlight" attrs -> syntaxhighlight "syntaxhighlight" attrs + TagOpen "source" attrs -> syntaxhighlight "source" attrs + TagOpen "haskell" _ -> B.codeBlockWith ("",["haskell"],[]) . trimCode <$> + charsInTags "haskell" + TagOpen "gallery" _ -> blocksInTags "gallery" + TagOpen "p" _ -> mempty <$ htmlTag (~== tag) + TagClose "p" -> mempty <$ htmlTag (~== tag) + _ -> B.rawBlock "html" . snd <$> htmlTag (~== tag) + +trimCode :: String -> String +trimCode ('\n':xs) = stripTrailingNewlines xs +trimCode xs = stripTrailingNewlines xs + +syntaxhighlight :: String -> [Attribute String] -> MWParser Blocks +syntaxhighlight tag attrs = try $ do + let mblang = lookup "lang" attrs + let mbstart = lookup "start" attrs + let mbline = lookup "line" attrs + let classes = maybe [] (:[]) mblang ++ maybe [] (const ["numberLines"]) mbline + let kvs = maybe [] (\x -> [("startFrom",x)]) mbstart + contents <- charsInTags tag + return $ B.codeBlockWith ("",classes,kvs) $ trimCode contents + +hrule :: MWParser Blocks +hrule = B.horizontalRule <$ try (string "----" *> many (char '-') *> newline) + +guardColumnOne :: MWParser () +guardColumnOne = getPosition >>= \pos -> guard (sourceColumn pos == 1) + +preformatted :: MWParser Blocks +preformatted = try $ do + guardColumnOne + char ' ' + let endline' = B.linebreak <$ (try $ newline <* char ' ') + let whitespace' = B.str <$> many1 ('\160' <$ spaceChar) + let spToNbsp ' ' = '\160' + spToNbsp x = x + let nowiki' = mconcat . intersperse B.linebreak . map B.str . + lines . fromEntities . map spToNbsp <$> try + (htmlTag (~== TagOpen "nowiki" []) *> + manyTill anyChar (htmlTag (~== TagClose "nowiki"))) + let inline' = whitespace' <|> endline' <|> nowiki' <|> inline + let strToCode (Str s) = Code ("",[],[]) s + strToCode x = x + B.para . bottomUp strToCode . mconcat <$> many1 inline' + +header :: MWParser Blocks +header = try $ do + guardColumnOne + eqs <- many1 (char '=') + let lev = length eqs + guard $ lev <= 6 + contents <- trimInlines . mconcat <$> manyTill inline (count lev $ char '=') + return $ B.header lev contents + +bulletList :: MWParser Blocks +bulletList = B.bulletList <$> + ( many1 (listItem '*') + <|> (htmlTag (~== TagOpen "ul" []) *> spaces *> many (listItem '*' <|> li) <* + optional (htmlTag (~== TagClose "ul"))) ) + +orderedList :: MWParser Blocks +orderedList = + (B.orderedList <$> many1 (listItem '#')) + <|> (B.orderedList <$> (htmlTag (~== TagOpen "ul" []) *> spaces *> + many (listItem '#' <|> li) <* + optional (htmlTag (~== TagClose "ul")))) + <|> do (tag,_) <- htmlTag (~== TagOpen "ol" []) + spaces + items <- many (listItem '#' <|> li) + optional (htmlTag (~== TagClose "ol")) + let start = maybe 1 id $ safeRead $ fromAttrib "start" tag + return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items + +definitionList :: MWParser Blocks +definitionList = B.definitionList <$> many1 defListItem + +defListItem :: MWParser (Inlines, [Blocks]) +defListItem = try $ do + terms <- mconcat . intersperse B.linebreak <$> many defListTerm + -- we allow dd with no dt, or dt with no dd + defs <- if B.isNull terms + then many1 $ listItem ':' + else many $ listItem ':' + return (terms, defs) + +defListTerm :: MWParser Inlines +defListTerm = char ';' >> skipMany spaceChar >> manyTill anyChar newline >>= + parseFromString (trimInlines . mconcat <$> many inline) + +listStart :: Char -> MWParser () +listStart c = char c *> notFollowedBy listStartChar + +listStartChar :: MWParser Char +listStartChar = oneOf "*#;:" + +anyListStart :: MWParser Char +anyListStart = char '*' + <|> char '#' + <|> char ':' + <|> char ';' + +li :: MWParser Blocks +li = lookAhead (htmlTag (~== TagOpen "li" [])) *> + (firstParaToPlain <$> blocksInTags "li") <* spaces + +listItem :: Char -> MWParser Blocks +listItem c = try $ do + extras <- many (try $ char c <* lookAhead listStartChar) + if null extras + then listItem' c + else do + skipMany spaceChar + first <- concat <$> manyTill listChunk newline + rest <- many + (try $ string extras *> (concat <$> manyTill listChunk newline)) + contents <- parseFromString (many1 $ listItem' c) + (unlines (first : rest)) + case c of + '*' -> return $ B.bulletList contents + '#' -> return $ B.orderedList contents + ':' -> return $ B.definitionList [(mempty, contents)] + _ -> mzero + +-- The point of this is to handle stuff like +-- * {{cite book +-- | blah +-- | blah +-- }} +-- * next list item +-- which seems to be valid mediawiki. +listChunk :: MWParser String +listChunk = template <|> count 1 anyChar + +listItem' :: Char -> MWParser Blocks +listItem' c = try $ do + listStart c + skipMany spaceChar + first <- concat <$> manyTill listChunk newline + rest <- many (try $ char c *> lookAhead listStartChar *> + (concat <$> manyTill listChunk newline)) + parseFromString (firstParaToPlain . mconcat <$> many1 block) + $ unlines $ first : rest + +firstParaToPlain :: Blocks -> Blocks +firstParaToPlain contents = + case viewl (B.unMany contents) of + (Para xs) :< ys -> B.Many $ (Plain xs) <| ys + _ -> contents + +-- +-- inline parsers +-- + +inline :: MWParser Inlines +inline = whitespace + <|> url + <|> str + <|> doubleQuotes + <|> strong + <|> emph + <|> image + <|> internalLink + <|> externalLink + <|> inlineTag + <|> B.singleton <$> charRef + <|> inlineHtml + <|> (B.rawInline "mediawiki" <$> variable) + <|> (B.rawInline "mediawiki" <$> template) + <|> special + +str :: MWParser Inlines +str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars) + +variable :: MWParser String +variable = try $ do + string "{{{" + contents <- manyTill anyChar (try $ string "}}}") + return $ "{{{" ++ contents ++ "}}}" + +inlineTag :: MWParser Inlines +inlineTag = do + (tag, _) <- lookAhead $ htmlTag isInlineTag' + case tag of + TagOpen "ref" _ -> B.note . B.plain <$> inlinesInTags "ref" + TagOpen "nowiki" _ -> try $ do + (_,raw) <- htmlTag (~== tag) + if '/' `elem` raw + then return mempty + else B.text . fromEntities <$> + manyTill anyChar (htmlTag (~== TagClose "nowiki")) + TagOpen "br" _ -> B.linebreak <$ (htmlTag (~== TagOpen "br" []) -- will get /> too + *> optional blankline) + TagOpen "strike" _ -> B.strikeout <$> inlinesInTags "strike" + TagOpen "del" _ -> B.strikeout <$> inlinesInTags "del" + TagOpen "sub" _ -> B.subscript <$> inlinesInTags "sub" + TagOpen "sup" _ -> B.superscript <$> inlinesInTags "sup" + TagOpen "math" _ -> B.math <$> charsInTags "math" + TagOpen "code" _ -> B.code <$> charsInTags "code" + TagOpen "tt" _ -> B.code <$> charsInTags "tt" + TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask" + _ -> B.rawInline "html" . snd <$> htmlTag (~== tag) + +special :: MWParser Inlines +special = B.str <$> count 1 (notFollowedBy' (htmlTag isBlockTag') *> + oneOf specialChars) + +inlineHtml :: MWParser Inlines +inlineHtml = B.rawInline "html" . snd <$> htmlTag isInlineTag' + +whitespace :: MWParser Inlines +whitespace = B.space <$ (skipMany1 spaceChar <|> endline <|> htmlComment) + +endline :: MWParser () +endline = () <$ try (newline <* + notFollowedBy blankline <* + notFollowedBy' hrule <* + notFollowedBy tableStart <* + notFollowedBy' header <* + notFollowedBy anyListStart) + +image :: MWParser Inlines +image = try $ do + sym "[[" + sym "File:" + fname <- many1 (noneOf "|]") + _ <- many (try $ char '|' *> imageOption) + caption <- (B.str fname <$ sym "]]") + <|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]"))) + return $ B.image fname "image" caption + +imageOption :: MWParser String +imageOption = + try (oneOfStrings [ "border", "thumbnail", "frameless" + , "thumb", "upright", "left", "right" + , "center", "none", "baseline", "sub" + , "super", "top", "text-top", "middle" + , "bottom", "text-bottom" ]) + <|> try (string "frame") + <|> try (many1 (oneOf "x0123456789") <* string "px") + <|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]")) + +internalLink :: MWParser Inlines +internalLink = try $ do + sym "[[" + let addUnderscores x = let (pref,suff) = break (=='#') x + in pref ++ intercalate "_" (words suff) + pagename <- unwords . words <$> many (noneOf "|]") + label <- option (B.text pagename) $ char '|' *> + ( (mconcat <$> many1 (notFollowedBy (char ']') *> inline)) + -- the "pipe trick" + -- [[Help:Contents|] -> "Contents" + <|> (return $ B.text $ drop 1 $ dropWhile (/=':') pagename) ) + sym "]]" + linktrail <- B.text <$> many letter + let link = B.link (addUnderscores pagename) "wikilink" (label <> linktrail) + if "Category:" `isPrefixOf` pagename + then do + updateState $ \st -> st{ mwCategoryLinks = link : mwCategoryLinks st } + return mempty + else return link + +externalLink :: MWParser Inlines +externalLink = try $ do + char '[' + (_, src) <- uri + lab <- try (trimInlines . mconcat <$> + (skipMany1 spaceChar *> manyTill inline (char ']'))) + <|> do char ']' + num <- mwNextLinkNumber <$> getState + updateState $ \st -> st{ mwNextLinkNumber = num + 1 } + return $ B.str $ show num + return $ B.link src "" lab + +url :: MWParser Inlines +url = do + (orig, src) <- uri + return $ B.link src "" (B.str orig) + +-- | Parses a list of inlines between start and end delimiters. +inlinesBetween :: (Show b) => MWParser a -> MWParser b -> MWParser Inlines +inlinesBetween start end = + (trimInlines . mconcat) <$> try (start >> many1Till inner end) + where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline) + innerSpace = try $ whitespace >>~ notFollowedBy' end + +emph :: MWParser Inlines +emph = B.emph <$> nested (inlinesBetween start end) + where start = sym "''" >> lookAhead nonspaceChar + end = try $ notFollowedBy' (() <$ strong) >> sym "''" + +strong :: MWParser Inlines +strong = B.strong <$> nested (inlinesBetween start end) + where start = sym "'''" >> lookAhead nonspaceChar + end = try $ sym "'''" + +doubleQuotes :: MWParser Inlines +doubleQuotes = B.doubleQuoted . trimInlines . mconcat <$> try + ((getState >>= guard . readerSmart . mwOptions) *> + openDoubleQuote *> manyTill inline closeDoubleQuote ) + where openDoubleQuote = char '"' <* lookAhead alphaNum + closeDoubleQuote = char '"' <* notFollowedBy alphaNum + diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index a26b1623d..74653efcf 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -757,7 +757,7 @@ simpleTableHeader headless = try $ do rawContent <- if headless then return "" else simpleTableSep '=' >> anyLine - dashes <- simpleDashedLines '=' + dashes <- simpleDashedLines '=' <|> simpleDashedLines '-' newline let lines' = map snd dashes let indices = scanl (+) 0 lines' diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 89f281ae8..dc95d9a56 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -47,7 +47,6 @@ Left to be implemented: TODO : refactor common patterns across readers : - autolink - - smartPunctuation - more ... -} @@ -62,6 +61,7 @@ import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag ) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.HTML.TagSoup.Match +import Data.List ( intercalate ) import Data.Char ( digitToInt, isUpper ) import Control.Monad ( guard, liftM ) import Control.Applicative ((<$>), (*>), (<*)) @@ -412,7 +412,7 @@ note = try $ do -- | Special chars markupChars :: [Char] -markupChars = "\\[]*#_@~-+^|%=" +markupChars = "\\*#_@~-+^|%=[]" -- | Break strings on following chars. Space tab and newline break for -- inlines breaking. Open paren breaks for mark. Quote, dash and dot @@ -427,13 +427,15 @@ wordBoundaries = markupChars ++ stringBreakers -- | Parse a hyphened sequence of words hyphenedWords :: Parser [Char] ParserState String -hyphenedWords = try $ do +hyphenedWords = intercalate "-" <$> sepBy1 wordChunk (char '-') + +wordChunk :: Parser [Char] ParserState String +wordChunk = try $ do hd <- noneOf wordBoundaries tl <- many ( (noneOf wordBoundaries) <|> - try (oneOf markupChars <* lookAhead (noneOf wordBoundaries) ) ) - let wd = hd:tl - option wd $ try $ - (\r -> concat [wd, "-", r]) <$> (char '-' *> hyphenedWords) + try (notFollowedBy' note *> oneOf markupChars + <* lookAhead (noneOf wordBoundaries) ) ) + return $ hd:tl -- | Any string str :: Parser [Char] ParserState Inline diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index e696fc63e..a38f57074 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -103,7 +103,7 @@ elementToDocbook opts lvl (Sec _ _num id' title elements) = n | n == 0 -> "chapter" | n >= 1 && n <= 5 -> "sect" ++ show n | otherwise -> "simplesect" - in inTags True tag [("id",id')] $ + in inTags True tag [("id", writerIdentifierPrefix opts ++ id')] $ inTagsSimple "title" (inlinesToDocbook opts title) $$ vcat (map (elementToDocbook opts (lvl + 1)) elements') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 05c9555c6..84bf95dfb 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -60,7 +60,7 @@ data WriterState = WriterState{ , stExternalLinks :: M.Map String String , stImages :: M.Map FilePath (String, B.ByteString) , stListLevel :: Int - , stListMarker :: ListMarker + , stListNumId :: Int , stNumStyles :: M.Map ListMarker Int , stLists :: [ListMarker] } @@ -79,7 +79,7 @@ defaultWriterState = WriterState{ , stExternalLinks = M.empty , stImages = M.empty , stListLevel = -1 - , stListMarker = NoMarker + , stListNumId = 1 , stNumStyles = M.fromList [(NoMarker, 0)] , stLists = [NoMarker] } @@ -285,6 +285,9 @@ mkLvl marker lvl = patternFor TwoParens s = "(" ++ s ++ ")" patternFor _ s = s ++ "." +getNumId :: WS Int +getNumId = length `fmap` gets stLists + -- | Convert Pandoc document to string in OpenXML format. writeOpenXML :: WriterOptions -> Pandoc -> WS Element writeOpenXML opts (Pandoc (Meta tit auths dat) blocks) = do @@ -402,11 +405,13 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do blockToOpenXML opts (BulletList lst) = do let marker = BulletMarker addList marker - asList $ concat `fmap` mapM (listItemToOpenXML opts marker) lst + numid <- getNumId + asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst blockToOpenXML opts (OrderedList (start, numstyle, numdelim) lst) = do let marker = NumberMarker numstyle numdelim start addList marker - asList $ concat `fmap` mapM (listItemToOpenXML opts marker) lst + numid <- getNumId + asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst blockToOpenXML opts (DefinitionList items) = concat `fmap` mapM (definitionListItemToOpenXML opts) items @@ -418,9 +423,6 @@ definitionListItemToOpenXML opts (term,defs) = do $ concat `fmap` mapM (blocksToOpenXML opts) defs return $ term' ++ defs' -getNumId :: WS Int -getNumId = length `fmap` gets stLists - addList :: ListMarker -> WS () addList marker = do lists <- gets stLists @@ -431,11 +433,11 @@ addList marker = do Nothing -> modify $ \st -> st{ stNumStyles = M.insert marker (M.size numStyles + 1) numStyles } -listItemToOpenXML :: WriterOptions -> ListMarker -> [Block] -> WS [Element] +listItemToOpenXML :: WriterOptions -> Int -> [Block] -> WS [Element] listItemToOpenXML _ _ [] = return [] -listItemToOpenXML opts marker (first:rest) = do - first' <- withMarker marker $ blockToOpenXML opts first - rest' <- withMarker NoMarker $ blocksToOpenXML opts rest +listItemToOpenXML opts numid (first:rest) = do + first' <- withNumId numid $ blockToOpenXML opts first + rest' <- withNumId 1 $ blocksToOpenXML opts rest return $ first' ++ rest' alignmentToString :: Alignment -> [Char] @@ -449,12 +451,12 @@ alignmentToString alignment = case alignment of inlinesToOpenXML :: WriterOptions -> [Inline] -> WS [Element] inlinesToOpenXML opts lst = concat `fmap` mapM (inlineToOpenXML opts) lst -withMarker :: ListMarker -> WS a -> WS a -withMarker m p = do - origMarker <- gets stListMarker - modify $ \st -> st{ stListMarker = m } +withNumId :: Int -> WS a -> WS a +withNumId numid p = do + origNumId <- gets stListNumId + modify $ \st -> st{ stListNumId = numid } result <- p - modify $ \st -> st{ stListMarker = origMarker } + modify $ \st -> st{ stListNumId = origNumId } return result asList :: WS a -> WS a @@ -489,10 +491,7 @@ getParaProps :: WS [Element] getParaProps = do props <- gets stParaProperties listLevel <- gets stListLevel - listMarker <- gets stListMarker - numid <- case listMarker of - NoMarker -> return 1 - _ -> getNumId + numid <- gets stListNumId let listPr = if listLevel >= 0 then [ mknode "w:numPr" [] [ mknode "w:numId" [("w:val",show numid)] () diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index b6527c6c8..6f8931caa 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -32,6 +32,7 @@ import Data.IORef import Data.Maybe ( fromMaybe, isNothing ) import Data.List ( findIndices, isPrefixOf ) import System.Environment ( getEnv ) +import Text.Printf (printf) import System.FilePath ( (</>), (<.>), takeBaseName, takeExtension, takeFileName ) import qualified Data.ByteString.Lazy as B import Data.ByteString.Lazy.UTF8 ( fromString ) @@ -122,8 +123,9 @@ writeEPUB opts doc@(Pandoc meta _) = do let chapters = map titleize chunks let chapToHtml = writeHtmlString opts'{ writerTemplate = pageTemplate } let chapterToEntry :: Int -> Pandoc -> Entry - chapterToEntry num chap = mkEntry ("ch" ++ show num ++ ".xhtml") $ - fromString $ chapToHtml chap + chapterToEntry num chap = mkEntry + (showChapter num) $ + fromString $ chapToHtml chap let chapterEntries = zipWith chapterToEntry [1..] chapters -- contents.opf @@ -334,11 +336,15 @@ data IdentState = IdentState{ identTable :: [(String,String)] } deriving (Read, Show) +-- Returns filename for chapter number. +showChapter :: Int -> String +showChapter = printf "ch%03d.xhtml" + -- Go through a block list and construct a table -- correlating the automatically constructed references -- that would be used in a normal pandoc document with -- new URLs to be used in the EPUB. For example, what --- was "header-1" might turn into "ch6.xhtml#header". +-- was "header-1" might turn into "ch006.xhtml#header". correlateRefs :: [Block] -> [(String,String)] correlateRefs bs = identTable $ execState (mapM_ go bs) IdentState{ chapterNumber = 0 @@ -358,8 +364,9 @@ correlateRefs bs = identTable $ execState (mapM_ go bs) modify $ \s -> s{ runningIdents = runningid : runningIdents st , chapterIdents = maybe (chapterIdents st) (: chapterIdents st) chapid - , identTable = (runningid, "ch" ++ show (chapterNumber st) ++ - ".xhtml" ++ maybe "" ('#':) chapid) : identTable st + , identTable = (runningid, + showChapter (chapterNumber st) ++ + maybe "" ('#':) chapid) : identTable st } go _ = return () diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index c6c4a8fd7..ebb705a61 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -482,10 +482,13 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do return $ foldl (!) (ordList opts contents) attribs blockToHtml opts (DefinitionList lst) = do contents <- mapM (\(term, defs) -> - do term' <- liftM (H.dt) $ inlineListToHtml opts term + do term' <- if null term + then return mempty + else liftM (H.dt) $ inlineListToHtml opts term defs' <- mapM ((liftM (\x -> H.dd $ (x >> nl opts))) . blockListToHtml opts) defs - return $ mconcat $ nl opts : term' : nl opts : defs') lst + return $ mconcat $ nl opts : term' : nl opts : + intersperse (nl opts) defs') lst let lst' = H.dl $ mconcat contents >> nl opts let lst'' = if writerIncremental opts then lst' ! A.class_ "incremental" diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index abbbd4d01..2b5c7e84b 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -49,8 +49,7 @@ import Text.Pandoc.Highlighting (highlight, styleToLaTeX, data WriterState = WriterState { stInNote :: Bool -- true if we're in a note , stInTable :: Bool -- true if we're in a table - , stTableNotes :: [(Char, Doc)] -- List of markers, notes - -- in current table + , stTableNotes :: [Doc] -- List of notes in current table , stOLLevel :: Int -- level of ordered list nesting , stOptions :: WriterOptions -- writer options, so they don't have to be parameter , stVerbInNote :: Bool -- true if document has verbatim text in note @@ -190,7 +189,7 @@ stringToLaTeX isUrl (x:xs) = do '$' -> "\\$" ++ rest '%' -> "\\%" ++ rest '&' -> "\\&" ++ rest - '_' -> "\\_" ++ rest + '_' | not isUrl -> "\\_" ++ rest '#' -> "\\#" ++ rest '-' -> case xs of -- prevent adjacent hyphens from forming ligatures ('-':_) -> "-{}" ++ rest @@ -382,27 +381,27 @@ blockToLaTeX (Table caption aligns widths heads rows) = do modify $ \s -> s{ stInTable = True, stTableNotes = [] } headers <- if all null heads then return empty - else liftM ($$ "\\ML") - $ (tableRowToLaTeX True aligns widths) heads + else ($$ "\\hline\\noalign{\\medskip}") `fmap` + (tableRowToLaTeX True aligns widths) heads captionText <- inlineListToLaTeX caption let capt = if isEmpty captionText then empty - else text "caption = {" <> captionText <> "}," <> space + else text "\\noalign{\\medskip}" + $$ text "\\caption" <> braces captionText rows' <- mapM (tableRowToLaTeX False aligns widths) rows - let rows'' = intersperse ("\\\\\\noalign{\\medskip}") rows' tableNotes <- liftM (reverse . stTableNotes) get - let toNote (marker, x) = "\\tnote" <> brackets (char marker) <> - braces (nest 2 x) + let toNote x = "\\footnotetext" <> braces (nest 2 x) let notes = vcat $ map toNote tableNotes let colDescriptors = text $ concat $ map toColDescriptor aligns - let tableBody = - ("\\ctable" <> brackets (capt <> text "pos = H, center, botcap")) - <> braces colDescriptors - $$ braces ("% notes" <> cr <> notes <> cr) - $$ braces (text "% rows" $$ "\\FL" $$ - vcat (headers : rows'') $$ "\\LL" <> cr) modify $ \s -> s{ stTable = True, stInTable = False, stTableNotes = [] } - return $ tableBody + return $ "\\begin{longtable}[c]" <> braces colDescriptors + $$ "\\hline\\noalign{\\medskip}" + $$ headers + $$ vcat rows' + $$ "\\hline" + $$ capt + $$ notes + $$ "\\end{longtable}" toColDescriptor :: Alignment -> String toColDescriptor align = @@ -433,7 +432,7 @@ tableRowToLaTeX header aligns widths cols = do braces (text (printf "%.2f\\columnwidth" w)) <> braces (halign a <> cr <> c <> cr) let cells = zipWith3 toCell widths aligns renderedCells - return $ hcat $ intersperse (" & ") cells + return $ hsep (intersperse "&" cells) $$ "\\\\\\noalign{\\medskip}" listItemToLaTeX :: [Block] -> State WriterState Doc listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item" $$) . @@ -572,7 +571,7 @@ inlineToLaTeX (LineBreak) = return "\\\\" inlineToLaTeX Space = return space inlineToLaTeX (Link txt ('#':ident, _)) = do contents <- inlineListToLaTeX txt - ident' <- stringToLaTeX False ident + ident' <- stringToLaTeX True ident return $ text "\\hyperref" <> brackets (text ident') <> braces contents inlineToLaTeX (Link txt (src, _)) = case txt of @@ -600,9 +599,8 @@ inlineToLaTeX (Note contents) = do if inTable then do curnotes <- liftM stTableNotes get - let marker = cycle ['a'..'z'] !! length curnotes - modify $ \s -> s{ stTableNotes = (marker, contents') : curnotes } - return $ "\\tmark" <> brackets (char marker) <> space + modify $ \s -> s{ stTableNotes = contents' : curnotes } + return $ "\\footnotemark" <> space else return $ "\\footnote" <> braces (nest 2 contents' <> optnl) -- note: a \n before } needed when note ends with a Verbatim environment diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index d88419feb..1a0731710 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -54,9 +54,12 @@ data WriterState = WriterState { stNotes :: Notes -- | Convert Pandoc to Markdown. writeMarkdown :: WriterOptions -> Pandoc -> String writeMarkdown opts document = - evalState (pandocToMarkdown opts document) WriterState{ stNotes = [] - , stRefs = [] - , stPlain = False } + evalState (pandocToMarkdown opts{ + writerWrapText = writerWrapText opts && + not (isEnabled Ext_hard_line_breaks opts) } + document) WriterState{ stNotes = [] + , stRefs = [] + , stPlain = False } -- | Convert Pandoc to plain text (like markdown, but without links, -- pictures, or inline formatting). @@ -588,8 +591,9 @@ inlineToMarkdown opts (RawInline f str) return $ text str inlineToMarkdown _ (RawInline _ _) = return empty inlineToMarkdown opts (LineBreak) + | isEnabled Ext_hard_line_breaks opts = return cr | isEnabled Ext_escaped_line_breaks opts = return $ "\\" <> cr - | otherwise = return $ " " <> cr + | otherwise = return $ " " <> cr inlineToMarkdown _ Space = return space inlineToMarkdown opts (Cite (c:cs) lst) | writerCiteMethod opts == Citeproc = inlineListToMarkdown opts lst |