aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc.hs3
-rw-r--r--src/Text/Pandoc/Parsing.hs22
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs16
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs36
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs14
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs594
-rw-r--r--src/Text/Pandoc/Readers/RST.hs2
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs16
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs2
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs39
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs17
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs7
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs40
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs12
14 files changed, 722 insertions, 98 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 33706816e..1e6b1d010 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -66,6 +66,7 @@ module Text.Pandoc
, writers
-- * Readers: converting /to/ Pandoc format
, readMarkdown
+ , readMediaWiki
, readRST
, readLaTeX
, readHtml
@@ -110,6 +111,7 @@ module Text.Pandoc
import Text.Pandoc.Definition
import Text.Pandoc.Generic
import Text.Pandoc.Readers.Markdown
+import Text.Pandoc.Readers.MediaWiki
import Text.Pandoc.Readers.RST
import Text.Pandoc.Readers.DocBook
import Text.Pandoc.Readers.LaTeX
@@ -179,6 +181,7 @@ readers = [("native" , \_ -> readNative)
,("markdown_strict" , readMarkdown)
,("markdown" , readMarkdown)
,("rst" , readRST)
+ ,("mediawiki" , readMediaWiki)
,("docbook" , readDocBook)
,("textile" , readTextile) -- TODO : textile+lhs
,("html" , readHtml)
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 50691f409..bee96be82 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -82,6 +82,7 @@ module Text.Pandoc.Parsing ( (>>~),
ellipses,
apostrophe,
dash,
+ nested,
macro,
applyMacros',
Parser,
@@ -379,10 +380,11 @@ uri = try $ do
char ')'
return $ '(' : res ++ ")"
str <- liftM concat $ many1 $ inParens <|> count 1 (innerPunct <|> uriChar)
+ str' <- option str $ char '/' >> return (str ++ "/")
-- now see if they amount to an absolute URI
- case parseURI (escapeURI str) of
+ case parseURI (escapeURI str') of
Just uri' -> if uriScheme uri' `elem` protocols
- then return (str, show uri')
+ then return (str', show uri')
else fail "not a URI"
Nothing -> fail "not a URI"
@@ -811,8 +813,8 @@ quoted :: Parser [Char] ParserState Inline
quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
withQuoteContext :: QuoteContext
- -> Parser [Char] ParserState a
- -> Parser [Char] ParserState a
+ -> Parser [tok] ParserState a
+ -> Parser [tok] ParserState a
withQuoteContext context parser = do
oldState <- getState
let oldQuoteContext = stateQuoteContext oldState
@@ -924,6 +926,18 @@ emDashOld = do
try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-')
return (Str "\8212")
+-- This is used to prevent exponential blowups for things like:
+-- a**a*a**a*a**a*a**a*a**a*a**a*a**
+nested :: Parser s ParserState a
+ -> Parser s ParserState a
+nested p = do
+ nestlevel <- stateMaxNestingLevel `fmap` getState
+ guard $ nestlevel > 0
+ updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
+ res <- p
+ updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
+ return res
+
--
-- Macros
--
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index e5c310ffc..424d9bdec 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -271,6 +271,7 @@ pCodeBlock = try $ do
inline :: TagParser [Inline]
inline = choice
[ pTagText
+ , pQ
, pEmph
, pStrong
, pSuperscript
@@ -306,6 +307,17 @@ pSelfClosing f g = do
optional $ pSatisfy (tagClose f)
return open
+pQ :: TagParser [Inline]
+pQ = do
+ quoteContext <- stateQuoteContext `fmap` getState
+ let quoteType = case quoteContext of
+ InDoubleQuote -> SingleQuote
+ _ -> DoubleQuote
+ let innerQuoteContext = if quoteType == SingleQuote
+ then InSingleQuote
+ else InDoubleQuote
+ withQuoteContext innerQuoteContext $ pInlinesInTags "q" (Quoted quoteType)
+
pEmph :: TagParser [Inline]
pEmph = pInlinesInTags "em" Emph <|> pInlinesInTags "i" Emph
@@ -585,9 +597,9 @@ htmlInBalanced f = try $ do
return $ tag ++ concat contents ++ endtag
-- | Matches a tag meeting a certain condition.
-htmlTag :: (Tag String -> Bool) -> Parser [Char] ParserState (Tag String, String)
+htmlTag :: (Tag String -> Bool) -> Parser [Char] st (Tag String, String)
htmlTag f = try $ do
- lookAhead (char '<')
+ lookAhead $ char '<' >> (oneOf "/!?" <|> letter)
(next : _) <- getInput >>= return . canonicalizeTags . parseTags
guard $ f next
-- advance the parser
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 4a5a14d6a..86ae400de 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -166,10 +166,8 @@ double_quote = (doubleQuoted . mconcat) <$>
(try $ string "``" *> manyTill inline (try $ string "''"))
single_quote :: LP Inlines
-single_quote = char '`' *>
- ( try ((singleQuoted . mconcat) <$>
- manyTill inline (try $ char '\'' >> notFollowedBy letter))
- <|> lit "`")
+single_quote = (singleQuoted . mconcat) <$>
+ (try $ char '`' *> manyTill inline (try $ char '\'' >> notFollowedBy letter))
inline :: LP Inlines
inline = (mempty <$ comment)
@@ -181,6 +179,9 @@ inline = (mempty <$ comment)
((char '-') *> option (str "–") (str "—" <$ char '-')))
<|> double_quote
<|> single_quote
+ <|> (str "“" <$ try (string "``")) -- nb. {``} won't be caught by double_quote
+ <|> (str "”" <$ try (string "''"))
+ <|> (str "‘" <$ char '`') -- nb. {`} won't be caught by single_quote
<|> (str "’" <$ char '\'')
<|> (str "\160" <$ char '~')
<|> (mathDisplay $ string "$$" *> mathChars <* string "$$")
@@ -188,10 +189,9 @@ inline = (mempty <$ comment)
<|> (superscript <$> (char '^' *> tok))
<|> (subscript <$> (char '_' *> tok))
<|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb)
- <|> (str <$> count 1 tildeEscape)
- <|> (str <$> string "]")
- <|> (str <$> string "#") -- TODO print warning?
- <|> (str <$> string "&") -- TODO print warning?
+ <|> (str . (:[]) <$> tildeEscape)
+ <|> (str . (:[]) <$> oneOf "[]")
+ <|> (str . (:[]) <$> oneOf "#&") -- TODO print warning?
-- <|> (str <$> count 1 (satisfy (\c -> c /= '\\' && c /='\n' && c /='}' && c /='{'))) -- eat random leftover characters
inlines :: LP Inlines
@@ -203,8 +203,8 @@ block = (mempty <$ comment)
<|> environment
<|> mempty <$ macro -- TODO improve macros, make them work everywhere
<|> blockCommand
- <|> grouped block
<|> paragraph
+ <|> grouped block
<|> (mempty <$ char '&') -- loose & in table environment
@@ -214,6 +214,7 @@ blocks = mconcat <$> many block
blockCommand :: LP Blocks
blockCommand = try $ do
name <- anyControlSeq
+ guard $ name /= "begin" && name /= "end"
star <- option "" (string "*" <* optional sp)
let name' = name ++ star
case M.lookup name' blockCommands of
@@ -265,8 +266,6 @@ blockCommands = M.fromList $
, ("closing", skipopts *> closing)
--
, ("rule", skipopts *> tok *> tok *> pure horizontalRule)
- , ("begin", mzero) -- these are here so they won't be interpreted as inline
- , ("end", mzero)
, ("item", skipopts *> loose_item)
, ("documentclass", skipopts *> braced *> preamble)
, ("centerline", (para . trimInlines) <$> (skipopts *> tok))
@@ -321,6 +320,7 @@ section lvl = do
inlineCommand :: LP Inlines
inlineCommand = try $ do
name <- anyControlSeq
+ guard $ name /= "begin" && name /= "end"
guard $ not $ isBlockCommand name
parseRaw <- getOption readerParseRaw
star <- option "" (string "*")
@@ -352,6 +352,7 @@ inlineCommands = M.fromList $
, ("textsubscript", subscript <$> tok)
, ("textbackslash", lit "\\")
, ("backslash", lit "\\")
+ , ("slash", lit "/")
, ("textbf", strong <$> tok)
, ("ldots", lit "…")
, ("dots", lit "…")
@@ -644,11 +645,7 @@ inlineText :: LP Inlines
inlineText = str <$> many1 inlineChar
inlineChar :: LP Char
-inlineChar = satisfy $ \c ->
- not (c == '\\' || c == '$' || c == '%' || c == '^' || c == '_' ||
- c == '&' || c == '~' || c == '#' || c == '{' || c == '}' ||
- c == '^' || c == '\'' || c == '`' || c == '-' || c == ']' ||
- c == ' ' || c == '\t' || c == '\n' )
+inlineChar = noneOf "\\$%^_&~#{}^'`-[] \t\n"
environment :: LP Blocks
environment = do
@@ -745,6 +742,9 @@ environments = M.fromList
, ("lstlisting", codeBlock <$> (verbEnv "lstlisting"))
, ("minted", liftA2 (\l c -> codeBlockWith ("",[l],[]) c)
(grouped (many1 $ satisfy (/= '}'))) (verbEnv "minted"))
+ , ("obeylines", parseFromString
+ (para . trimInlines . mconcat <$> many inline) =<<
+ intercalate "\\\\\n" . lines <$> verbEnv "obeylines")
, ("displaymath", mathEnv Nothing "displaymath")
, ("equation", mathEnv Nothing "equation")
, ("equation*", mathEnv Nothing "equation*")
@@ -801,7 +801,9 @@ descItem = do
return (ils, [bs])
env :: String -> LP a -> LP a
-env name p = p <* (controlSeq "end" *> braced >>= guard . (== name))
+env name p = p <*
+ (try (controlSeq "end" *> braced >>= guard . (== name))
+ <?> ("\\end{" ++ name ++ "}"))
listenv :: String -> LP a -> LP a
listenv name p = try $ do
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 2407e137c..1c2cc12f1 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1,6 +1,4 @@
{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7
-{-# LANGUAGE FlexibleInstances, TypeSynonymInstances,
- GeneralizedNewtypeDeriving #-}
{-
Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
@@ -1294,18 +1292,6 @@ inlinesBetween start end =
where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
innerSpace = try $ whitespace >>~ notFollowedBy' end
--- This is used to prevent exponential blowups for things like:
--- a**a*a**a*a**a*a**a*a**a*a**a*a**
-nested :: Parser [Char] ParserState a
- -> Parser [Char] ParserState a
-nested p = do
- nestlevel <- stateMaxNestingLevel `fmap` getState
- guard $ nestlevel > 0
- updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
- res <- p
- updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
- return res
-
emph :: Parser [Char] ParserState (F Inlines)
emph = fmap B.emph <$> nested
(inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd)
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
new file mode 100644
index 000000000..7936be38b
--- /dev/null
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -0,0 +1,594 @@
+{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7
+{-
+ Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.MediaWiki
+ Copyright : Copyright (C) 2012 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of mediawiki text to 'Pandoc' document.
+-}
+{-
+TODO:
+_ correctly handle tables within tables
+_ parse templates?
+-}
+module Text.Pandoc.Readers.MediaWiki ( readMediaWiki ) where
+
+import Text.Pandoc.Definition
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
+import Text.Pandoc.Options
+import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isCommentTag )
+import Text.Pandoc.XML ( fromEntities )
+import Text.Pandoc.Parsing hiding ( nested )
+import Text.Pandoc.Generic ( bottomUp )
+import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead )
+import Data.Monoid (mconcat, mempty)
+import Control.Applicative ((<$>), (<*), (*>), (<$))
+import Control.Monad
+import Data.List (intersperse, intercalate, isPrefixOf )
+import Text.HTML.TagSoup
+import Data.Sequence (viewl, ViewL(..), (<|))
+import Data.Char (isDigit)
+
+-- | Read mediawiki from an input string and return a Pandoc document.
+readMediaWiki :: ReaderOptions -- ^ Reader options
+ -> String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> Pandoc
+readMediaWiki opts s =
+ case runParser parseMediaWiki MWState{ mwOptions = opts
+ , mwMaxNestingLevel = 4
+ , mwNextLinkNumber = 1
+ , mwCategoryLinks = []
+ }
+ "source" (s ++ "\n") of
+ Left err' -> error $ "\nError:\n" ++ show err'
+ Right result -> result
+
+data MWState = MWState { mwOptions :: ReaderOptions
+ , mwMaxNestingLevel :: Int
+ , mwNextLinkNumber :: Int
+ , mwCategoryLinks :: [Inlines]
+ }
+
+type MWParser = Parser [Char] MWState
+
+--
+-- auxiliary functions
+--
+
+-- This is used to prevent exponential blowups for things like:
+-- ''a'''a''a'''a''a'''a''a'''a
+nested :: MWParser a -> MWParser a
+nested p = do
+ nestlevel <- mwMaxNestingLevel `fmap` getState
+ guard $ nestlevel > 0
+ updateState $ \st -> st{ mwMaxNestingLevel = mwMaxNestingLevel st - 1 }
+ res <- p
+ updateState $ \st -> st{ mwMaxNestingLevel = nestlevel }
+ return res
+
+specialChars :: [Char]
+specialChars = "'[]<=&*{}|\""
+
+spaceChars :: [Char]
+spaceChars = " \n\t"
+
+sym :: String -> MWParser ()
+sym s = () <$ try (string s)
+
+newBlockTags :: [String]
+newBlockTags = ["haskell","syntaxhighlight","source","gallery","references"]
+
+isBlockTag' :: Tag String -> Bool
+isBlockTag' tag@(TagOpen t _) = (isBlockTag tag || t `elem` newBlockTags) &&
+ t `notElem` eitherBlockOrInline
+isBlockTag' tag@(TagClose t) = (isBlockTag tag || t `elem` newBlockTags) &&
+ t `notElem` eitherBlockOrInline
+isBlockTag' tag = isBlockTag tag
+
+isInlineTag' :: Tag String -> Bool
+isInlineTag' (TagComment _) = True
+isInlineTag' t = not (isBlockTag' t)
+
+eitherBlockOrInline :: [String]
+eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins",
+ "map", "area", "object"]
+
+htmlComment :: MWParser ()
+htmlComment = () <$ htmlTag isCommentTag
+
+inlinesInTags :: String -> MWParser Inlines
+inlinesInTags tag = try $ do
+ (_,raw) <- htmlTag (~== TagOpen tag [])
+ if '/' `elem` raw -- self-closing tag
+ then return mempty
+ else trimInlines . mconcat <$>
+ manyTill inline (htmlTag (~== TagClose tag))
+
+blocksInTags :: String -> MWParser Blocks
+blocksInTags tag = try $ do
+ (_,raw) <- htmlTag (~== TagOpen tag [])
+ if '/' `elem` raw -- self-closing tag
+ then return mempty
+ else mconcat <$> manyTill block (htmlTag (~== TagClose tag))
+
+charsInTags :: String -> MWParser [Char]
+charsInTags tag = try $ do
+ (_,raw) <- htmlTag (~== TagOpen tag [])
+ if '/' `elem` raw -- self-closing tag
+ then return ""
+ else innerText . parseTags <$>
+ manyTill anyChar (htmlTag (~== TagClose tag))
+
+--
+-- main parser
+--
+
+parseMediaWiki :: MWParser Pandoc
+parseMediaWiki = do
+ bs <- mconcat <$> many block
+ spaces
+ eof
+ categoryLinks <- reverse . mwCategoryLinks <$> getState
+ let categories = if null categoryLinks
+ then mempty
+ else B.para $ mconcat $ intersperse B.space categoryLinks
+ return $ B.doc $ bs <> categories
+
+--
+-- block parsers
+--
+
+block :: MWParser Blocks
+block = mempty <$ skipMany1 blankline
+ <|> table
+ <|> header
+ <|> hrule
+ <|> orderedList
+ <|> bulletList
+ <|> definitionList
+ <|> mempty <$ try (spaces *> htmlComment)
+ <|> preformatted
+ <|> blockTag
+ <|> (B.rawBlock "mediawiki" <$> template)
+ <|> para
+
+para :: MWParser Blocks
+para = B.para . trimInlines . mconcat <$> many1 inline
+
+table :: MWParser Blocks
+table = do
+ tableStart
+ styles <- manyTill anyChar newline
+ let tableWidth = case lookup "width" $ parseAttrs styles of
+ Just w -> maybe 1.0 id $ parseWidth w
+ Nothing -> 1.0
+ caption <- option mempty tableCaption
+ optional rowsep
+ hasheader <- option False $ True <$ (lookAhead (char '!'))
+ (cellspecs',hdr) <- unzip <$> tableRow
+ let widths = map ((tableWidth *) . snd) cellspecs'
+ let restwidth = tableWidth - sum widths
+ let zerocols = length $ filter (==0.0) widths
+ let defaultwidth = if zerocols == 0 || zerocols == length widths
+ then 0.0
+ else restwidth / fromIntegral zerocols
+ let widths' = map (\w -> if w == 0 then defaultwidth else w) widths
+ let cellspecs = zip (map fst cellspecs') widths'
+ rows' <- many $ try $ rowsep *> (map snd <$> tableRow)
+ tableEnd
+ let cols = length hdr
+ let (headers,rows) = if hasheader
+ then (hdr, rows')
+ else (replicate cols mempty, hdr:rows')
+ return $ B.table caption cellspecs headers rows
+
+parseAttrs :: String -> [(String,String)]
+parseAttrs s = case parse (many parseAttr) "attributes" s of
+ Right r -> r
+ Left _ -> []
+
+parseAttr :: Parser String () (String, String)
+parseAttr = try $ do
+ skipMany spaceChar
+ k <- many1 letter
+ char '='
+ char '"'
+ v <- many1Till anyChar (char '"')
+ return (k,v)
+
+tableStart :: MWParser ()
+tableStart = try $ guardColumnOne *> sym "{|"
+
+tableEnd :: MWParser ()
+tableEnd = try $ guardColumnOne *> sym "|}" <* blanklines
+
+rowsep :: MWParser ()
+rowsep = try $ guardColumnOne *> sym "|-" <* blanklines
+
+cellsep :: MWParser ()
+cellsep = try $
+ (guardColumnOne <*
+ ( (char '|' <* notFollowedBy (oneOf "-}+"))
+ <|> (char '!')
+ )
+ )
+ <|> (() <$ try (string "||"))
+ <|> (() <$ try (string "!!"))
+
+tableCaption :: MWParser Inlines
+tableCaption = try $ do
+ guardColumnOne
+ sym "|+"
+ skipMany spaceChar
+ res <- manyTill anyChar newline >>= parseFromString (many inline)
+ return $ trimInlines $ mconcat res
+
+tableRow :: MWParser [((Alignment, Double), Blocks)]
+tableRow = try $ many tableCell
+
+tableCell :: MWParser ((Alignment, Double), Blocks)
+tableCell = try $ do
+ cellsep
+ skipMany spaceChar
+ attrs <- option [] $ try $ parseAttrs <$>
+ manyTill (satisfy (/='\n')) (char '|' <* notFollowedBy (char '|'))
+ skipMany spaceChar
+ ls <- concat <$> many (notFollowedBy (cellsep <|> rowsep <|> tableEnd) *>
+ ((snd <$> withRaw table) <|> count 1 anyChar))
+ bs <- parseFromString (mconcat <$> many block) ls
+ let align = case lookup "align" attrs of
+ Just "left" -> AlignLeft
+ Just "right" -> AlignRight
+ Just "center" -> AlignCenter
+ _ -> AlignDefault
+ let width = case lookup "width" attrs of
+ Just xs -> maybe 0.0 id $ parseWidth xs
+ Nothing -> 0.0
+ return ((align, width), bs)
+
+parseWidth :: String -> Maybe Double
+parseWidth s =
+ case reverse s of
+ ('%':ds) | all isDigit ds -> safeRead ('0':'.':reverse ds)
+ _ -> Nothing
+
+template :: MWParser String
+template = try $ do
+ string "{{"
+ notFollowedBy (char '{')
+ let chunk = template <|> variable <|> many1 (noneOf "{}") <|> count 1 anyChar
+ contents <- manyTill chunk (try $ string "}}")
+ return $ "{{" ++ concat contents ++ "}}"
+
+blockTag :: MWParser Blocks
+blockTag = do
+ (tag, _) <- lookAhead $ htmlTag isBlockTag'
+ case tag of
+ TagOpen "blockquote" _ -> B.blockQuote <$> blocksInTags "blockquote"
+ TagOpen "pre" _ -> B.codeBlock . trimCode <$> charsInTags "pre"
+ TagOpen "syntaxhighlight" attrs -> syntaxhighlight "syntaxhighlight" attrs
+ TagOpen "source" attrs -> syntaxhighlight "source" attrs
+ TagOpen "haskell" _ -> B.codeBlockWith ("",["haskell"],[]) . trimCode <$>
+ charsInTags "haskell"
+ TagOpen "gallery" _ -> blocksInTags "gallery"
+ TagOpen "p" _ -> mempty <$ htmlTag (~== tag)
+ TagClose "p" -> mempty <$ htmlTag (~== tag)
+ _ -> B.rawBlock "html" . snd <$> htmlTag (~== tag)
+
+trimCode :: String -> String
+trimCode ('\n':xs) = stripTrailingNewlines xs
+trimCode xs = stripTrailingNewlines xs
+
+syntaxhighlight :: String -> [Attribute String] -> MWParser Blocks
+syntaxhighlight tag attrs = try $ do
+ let mblang = lookup "lang" attrs
+ let mbstart = lookup "start" attrs
+ let mbline = lookup "line" attrs
+ let classes = maybe [] (:[]) mblang ++ maybe [] (const ["numberLines"]) mbline
+ let kvs = maybe [] (\x -> [("startFrom",x)]) mbstart
+ contents <- charsInTags tag
+ return $ B.codeBlockWith ("",classes,kvs) $ trimCode contents
+
+hrule :: MWParser Blocks
+hrule = B.horizontalRule <$ try (string "----" *> many (char '-') *> newline)
+
+guardColumnOne :: MWParser ()
+guardColumnOne = getPosition >>= \pos -> guard (sourceColumn pos == 1)
+
+preformatted :: MWParser Blocks
+preformatted = try $ do
+ guardColumnOne
+ char ' '
+ let endline' = B.linebreak <$ (try $ newline <* char ' ')
+ let whitespace' = B.str <$> many1 ('\160' <$ spaceChar)
+ let spToNbsp ' ' = '\160'
+ spToNbsp x = x
+ let nowiki' = mconcat . intersperse B.linebreak . map B.str .
+ lines . fromEntities . map spToNbsp <$> try
+ (htmlTag (~== TagOpen "nowiki" []) *>
+ manyTill anyChar (htmlTag (~== TagClose "nowiki")))
+ let inline' = whitespace' <|> endline' <|> nowiki' <|> inline
+ let strToCode (Str s) = Code ("",[],[]) s
+ strToCode x = x
+ B.para . bottomUp strToCode . mconcat <$> many1 inline'
+
+header :: MWParser Blocks
+header = try $ do
+ guardColumnOne
+ eqs <- many1 (char '=')
+ let lev = length eqs
+ guard $ lev <= 6
+ contents <- trimInlines . mconcat <$> manyTill inline (count lev $ char '=')
+ return $ B.header lev contents
+
+bulletList :: MWParser Blocks
+bulletList = B.bulletList <$>
+ ( many1 (listItem '*')
+ <|> (htmlTag (~== TagOpen "ul" []) *> spaces *> many (listItem '*' <|> li) <*
+ optional (htmlTag (~== TagClose "ul"))) )
+
+orderedList :: MWParser Blocks
+orderedList =
+ (B.orderedList <$> many1 (listItem '#'))
+ <|> (B.orderedList <$> (htmlTag (~== TagOpen "ul" []) *> spaces *>
+ many (listItem '#' <|> li) <*
+ optional (htmlTag (~== TagClose "ul"))))
+ <|> do (tag,_) <- htmlTag (~== TagOpen "ol" [])
+ spaces
+ items <- many (listItem '#' <|> li)
+ optional (htmlTag (~== TagClose "ol"))
+ let start = maybe 1 id $ safeRead $ fromAttrib "start" tag
+ return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items
+
+definitionList :: MWParser Blocks
+definitionList = B.definitionList <$> many1 defListItem
+
+defListItem :: MWParser (Inlines, [Blocks])
+defListItem = try $ do
+ terms <- mconcat . intersperse B.linebreak <$> many defListTerm
+ -- we allow dd with no dt, or dt with no dd
+ defs <- if B.isNull terms
+ then many1 $ listItem ':'
+ else many $ listItem ':'
+ return (terms, defs)
+
+defListTerm :: MWParser Inlines
+defListTerm = char ';' >> skipMany spaceChar >> manyTill anyChar newline >>=
+ parseFromString (trimInlines . mconcat <$> many inline)
+
+listStart :: Char -> MWParser ()
+listStart c = char c *> notFollowedBy listStartChar
+
+listStartChar :: MWParser Char
+listStartChar = oneOf "*#;:"
+
+anyListStart :: MWParser Char
+anyListStart = char '*'
+ <|> char '#'
+ <|> char ':'
+ <|> char ';'
+
+li :: MWParser Blocks
+li = lookAhead (htmlTag (~== TagOpen "li" [])) *>
+ (firstParaToPlain <$> blocksInTags "li") <* spaces
+
+listItem :: Char -> MWParser Blocks
+listItem c = try $ do
+ extras <- many (try $ char c <* lookAhead listStartChar)
+ if null extras
+ then listItem' c
+ else do
+ skipMany spaceChar
+ first <- concat <$> manyTill listChunk newline
+ rest <- many
+ (try $ string extras *> (concat <$> manyTill listChunk newline))
+ contents <- parseFromString (many1 $ listItem' c)
+ (unlines (first : rest))
+ case c of
+ '*' -> return $ B.bulletList contents
+ '#' -> return $ B.orderedList contents
+ ':' -> return $ B.definitionList [(mempty, contents)]
+ _ -> mzero
+
+-- The point of this is to handle stuff like
+-- * {{cite book
+-- | blah
+-- | blah
+-- }}
+-- * next list item
+-- which seems to be valid mediawiki.
+listChunk :: MWParser String
+listChunk = template <|> count 1 anyChar
+
+listItem' :: Char -> MWParser Blocks
+listItem' c = try $ do
+ listStart c
+ skipMany spaceChar
+ first <- concat <$> manyTill listChunk newline
+ rest <- many (try $ char c *> lookAhead listStartChar *>
+ (concat <$> manyTill listChunk newline))
+ parseFromString (firstParaToPlain . mconcat <$> many1 block)
+ $ unlines $ first : rest
+
+firstParaToPlain :: Blocks -> Blocks
+firstParaToPlain contents =
+ case viewl (B.unMany contents) of
+ (Para xs) :< ys -> B.Many $ (Plain xs) <| ys
+ _ -> contents
+
+--
+-- inline parsers
+--
+
+inline :: MWParser Inlines
+inline = whitespace
+ <|> url
+ <|> str
+ <|> doubleQuotes
+ <|> strong
+ <|> emph
+ <|> image
+ <|> internalLink
+ <|> externalLink
+ <|> inlineTag
+ <|> B.singleton <$> charRef
+ <|> inlineHtml
+ <|> (B.rawInline "mediawiki" <$> variable)
+ <|> (B.rawInline "mediawiki" <$> template)
+ <|> special
+
+str :: MWParser Inlines
+str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars)
+
+variable :: MWParser String
+variable = try $ do
+ string "{{{"
+ contents <- manyTill anyChar (try $ string "}}}")
+ return $ "{{{" ++ contents ++ "}}}"
+
+inlineTag :: MWParser Inlines
+inlineTag = do
+ (tag, _) <- lookAhead $ htmlTag isInlineTag'
+ case tag of
+ TagOpen "ref" _ -> B.note . B.plain <$> inlinesInTags "ref"
+ TagOpen "nowiki" _ -> try $ do
+ (_,raw) <- htmlTag (~== tag)
+ if '/' `elem` raw
+ then return mempty
+ else B.text . fromEntities <$>
+ manyTill anyChar (htmlTag (~== TagClose "nowiki"))
+ TagOpen "br" _ -> B.linebreak <$ (htmlTag (~== TagOpen "br" []) -- will get /> too
+ *> optional blankline)
+ TagOpen "strike" _ -> B.strikeout <$> inlinesInTags "strike"
+ TagOpen "del" _ -> B.strikeout <$> inlinesInTags "del"
+ TagOpen "sub" _ -> B.subscript <$> inlinesInTags "sub"
+ TagOpen "sup" _ -> B.superscript <$> inlinesInTags "sup"
+ TagOpen "math" _ -> B.math <$> charsInTags "math"
+ TagOpen "code" _ -> B.code <$> charsInTags "code"
+ TagOpen "tt" _ -> B.code <$> charsInTags "tt"
+ TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask"
+ _ -> B.rawInline "html" . snd <$> htmlTag (~== tag)
+
+special :: MWParser Inlines
+special = B.str <$> count 1 (notFollowedBy' (htmlTag isBlockTag') *>
+ oneOf specialChars)
+
+inlineHtml :: MWParser Inlines
+inlineHtml = B.rawInline "html" . snd <$> htmlTag isInlineTag'
+
+whitespace :: MWParser Inlines
+whitespace = B.space <$ (skipMany1 spaceChar <|> endline <|> htmlComment)
+
+endline :: MWParser ()
+endline = () <$ try (newline <*
+ notFollowedBy blankline <*
+ notFollowedBy' hrule <*
+ notFollowedBy tableStart <*
+ notFollowedBy' header <*
+ notFollowedBy anyListStart)
+
+image :: MWParser Inlines
+image = try $ do
+ sym "[["
+ sym "File:"
+ fname <- many1 (noneOf "|]")
+ _ <- many (try $ char '|' *> imageOption)
+ caption <- (B.str fname <$ sym "]]")
+ <|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]")))
+ return $ B.image fname "image" caption
+
+imageOption :: MWParser String
+imageOption =
+ try (oneOfStrings [ "border", "thumbnail", "frameless"
+ , "thumb", "upright", "left", "right"
+ , "center", "none", "baseline", "sub"
+ , "super", "top", "text-top", "middle"
+ , "bottom", "text-bottom" ])
+ <|> try (string "frame")
+ <|> try (many1 (oneOf "x0123456789") <* string "px")
+ <|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]"))
+
+internalLink :: MWParser Inlines
+internalLink = try $ do
+ sym "[["
+ let addUnderscores x = let (pref,suff) = break (=='#') x
+ in pref ++ intercalate "_" (words suff)
+ pagename <- unwords . words <$> many (noneOf "|]")
+ label <- option (B.text pagename) $ char '|' *>
+ ( (mconcat <$> many1 (notFollowedBy (char ']') *> inline))
+ -- the "pipe trick"
+ -- [[Help:Contents|] -> "Contents"
+ <|> (return $ B.text $ drop 1 $ dropWhile (/=':') pagename) )
+ sym "]]"
+ linktrail <- B.text <$> many letter
+ let link = B.link (addUnderscores pagename) "wikilink" (label <> linktrail)
+ if "Category:" `isPrefixOf` pagename
+ then do
+ updateState $ \st -> st{ mwCategoryLinks = link : mwCategoryLinks st }
+ return mempty
+ else return link
+
+externalLink :: MWParser Inlines
+externalLink = try $ do
+ char '['
+ (_, src) <- uri
+ lab <- try (trimInlines . mconcat <$>
+ (skipMany1 spaceChar *> manyTill inline (char ']')))
+ <|> do char ']'
+ num <- mwNextLinkNumber <$> getState
+ updateState $ \st -> st{ mwNextLinkNumber = num + 1 }
+ return $ B.str $ show num
+ return $ B.link src "" lab
+
+url :: MWParser Inlines
+url = do
+ (orig, src) <- uri
+ return $ B.link src "" (B.str orig)
+
+-- | Parses a list of inlines between start and end delimiters.
+inlinesBetween :: (Show b) => MWParser a -> MWParser b -> MWParser Inlines
+inlinesBetween start end =
+ (trimInlines . mconcat) <$> try (start >> many1Till inner end)
+ where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
+ innerSpace = try $ whitespace >>~ notFollowedBy' end
+
+emph :: MWParser Inlines
+emph = B.emph <$> nested (inlinesBetween start end)
+ where start = sym "''" >> lookAhead nonspaceChar
+ end = try $ notFollowedBy' (() <$ strong) >> sym "''"
+
+strong :: MWParser Inlines
+strong = B.strong <$> nested (inlinesBetween start end)
+ where start = sym "'''" >> lookAhead nonspaceChar
+ end = try $ sym "'''"
+
+doubleQuotes :: MWParser Inlines
+doubleQuotes = B.doubleQuoted . trimInlines . mconcat <$> try
+ ((getState >>= guard . readerSmart . mwOptions) *>
+ openDoubleQuote *> manyTill inline closeDoubleQuote )
+ where openDoubleQuote = char '"' <* lookAhead alphaNum
+ closeDoubleQuote = char '"' <* notFollowedBy alphaNum
+
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index a26b1623d..74653efcf 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -757,7 +757,7 @@ simpleTableHeader headless = try $ do
rawContent <- if headless
then return ""
else simpleTableSep '=' >> anyLine
- dashes <- simpleDashedLines '='
+ dashes <- simpleDashedLines '=' <|> simpleDashedLines '-'
newline
let lines' = map snd dashes
let indices = scanl (+) 0 lines'
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 89f281ae8..dc95d9a56 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -47,7 +47,6 @@ Left to be implemented:
TODO : refactor common patterns across readers :
- autolink
- - smartPunctuation
- more ...
-}
@@ -62,6 +61,7 @@ import Text.Pandoc.Parsing
import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag )
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
import Text.HTML.TagSoup.Match
+import Data.List ( intercalate )
import Data.Char ( digitToInt, isUpper )
import Control.Monad ( guard, liftM )
import Control.Applicative ((<$>), (*>), (<*))
@@ -412,7 +412,7 @@ note = try $ do
-- | Special chars
markupChars :: [Char]
-markupChars = "\\[]*#_@~-+^|%="
+markupChars = "\\*#_@~-+^|%=[]"
-- | Break strings on following chars. Space tab and newline break for
-- inlines breaking. Open paren breaks for mark. Quote, dash and dot
@@ -427,13 +427,15 @@ wordBoundaries = markupChars ++ stringBreakers
-- | Parse a hyphened sequence of words
hyphenedWords :: Parser [Char] ParserState String
-hyphenedWords = try $ do
+hyphenedWords = intercalate "-" <$> sepBy1 wordChunk (char '-')
+
+wordChunk :: Parser [Char] ParserState String
+wordChunk = try $ do
hd <- noneOf wordBoundaries
tl <- many ( (noneOf wordBoundaries) <|>
- try (oneOf markupChars <* lookAhead (noneOf wordBoundaries) ) )
- let wd = hd:tl
- option wd $ try $
- (\r -> concat [wd, "-", r]) <$> (char '-' *> hyphenedWords)
+ try (notFollowedBy' note *> oneOf markupChars
+ <* lookAhead (noneOf wordBoundaries) ) )
+ return $ hd:tl
-- | Any string
str :: Parser [Char] ParserState Inline
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index e696fc63e..a38f57074 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -103,7 +103,7 @@ elementToDocbook opts lvl (Sec _ _num id' title elements) =
n | n == 0 -> "chapter"
| n >= 1 && n <= 5 -> "sect" ++ show n
| otherwise -> "simplesect"
- in inTags True tag [("id",id')] $
+ in inTags True tag [("id", writerIdentifierPrefix opts ++ id')] $
inTagsSimple "title" (inlinesToDocbook opts title) $$
vcat (map (elementToDocbook opts (lvl + 1)) elements')
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 05c9555c6..84bf95dfb 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -60,7 +60,7 @@ data WriterState = WriterState{
, stExternalLinks :: M.Map String String
, stImages :: M.Map FilePath (String, B.ByteString)
, stListLevel :: Int
- , stListMarker :: ListMarker
+ , stListNumId :: Int
, stNumStyles :: M.Map ListMarker Int
, stLists :: [ListMarker]
}
@@ -79,7 +79,7 @@ defaultWriterState = WriterState{
, stExternalLinks = M.empty
, stImages = M.empty
, stListLevel = -1
- , stListMarker = NoMarker
+ , stListNumId = 1
, stNumStyles = M.fromList [(NoMarker, 0)]
, stLists = [NoMarker]
}
@@ -285,6 +285,9 @@ mkLvl marker lvl =
patternFor TwoParens s = "(" ++ s ++ ")"
patternFor _ s = s ++ "."
+getNumId :: WS Int
+getNumId = length `fmap` gets stLists
+
-- | Convert Pandoc document to string in OpenXML format.
writeOpenXML :: WriterOptions -> Pandoc -> WS Element
writeOpenXML opts (Pandoc (Meta tit auths dat) blocks) = do
@@ -402,11 +405,13 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do
blockToOpenXML opts (BulletList lst) = do
let marker = BulletMarker
addList marker
- asList $ concat `fmap` mapM (listItemToOpenXML opts marker) lst
+ numid <- getNumId
+ asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst
blockToOpenXML opts (OrderedList (start, numstyle, numdelim) lst) = do
let marker = NumberMarker numstyle numdelim start
addList marker
- asList $ concat `fmap` mapM (listItemToOpenXML opts marker) lst
+ numid <- getNumId
+ asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst
blockToOpenXML opts (DefinitionList items) =
concat `fmap` mapM (definitionListItemToOpenXML opts) items
@@ -418,9 +423,6 @@ definitionListItemToOpenXML opts (term,defs) = do
$ concat `fmap` mapM (blocksToOpenXML opts) defs
return $ term' ++ defs'
-getNumId :: WS Int
-getNumId = length `fmap` gets stLists
-
addList :: ListMarker -> WS ()
addList marker = do
lists <- gets stLists
@@ -431,11 +433,11 @@ addList marker = do
Nothing -> modify $ \st ->
st{ stNumStyles = M.insert marker (M.size numStyles + 1) numStyles }
-listItemToOpenXML :: WriterOptions -> ListMarker -> [Block] -> WS [Element]
+listItemToOpenXML :: WriterOptions -> Int -> [Block] -> WS [Element]
listItemToOpenXML _ _ [] = return []
-listItemToOpenXML opts marker (first:rest) = do
- first' <- withMarker marker $ blockToOpenXML opts first
- rest' <- withMarker NoMarker $ blocksToOpenXML opts rest
+listItemToOpenXML opts numid (first:rest) = do
+ first' <- withNumId numid $ blockToOpenXML opts first
+ rest' <- withNumId 1 $ blocksToOpenXML opts rest
return $ first' ++ rest'
alignmentToString :: Alignment -> [Char]
@@ -449,12 +451,12 @@ alignmentToString alignment = case alignment of
inlinesToOpenXML :: WriterOptions -> [Inline] -> WS [Element]
inlinesToOpenXML opts lst = concat `fmap` mapM (inlineToOpenXML opts) lst
-withMarker :: ListMarker -> WS a -> WS a
-withMarker m p = do
- origMarker <- gets stListMarker
- modify $ \st -> st{ stListMarker = m }
+withNumId :: Int -> WS a -> WS a
+withNumId numid p = do
+ origNumId <- gets stListNumId
+ modify $ \st -> st{ stListNumId = numid }
result <- p
- modify $ \st -> st{ stListMarker = origMarker }
+ modify $ \st -> st{ stListNumId = origNumId }
return result
asList :: WS a -> WS a
@@ -489,10 +491,7 @@ getParaProps :: WS [Element]
getParaProps = do
props <- gets stParaProperties
listLevel <- gets stListLevel
- listMarker <- gets stListMarker
- numid <- case listMarker of
- NoMarker -> return 1
- _ -> getNumId
+ numid <- gets stListNumId
let listPr = if listLevel >= 0
then [ mknode "w:numPr" []
[ mknode "w:numId" [("w:val",show numid)] ()
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index b6527c6c8..6f8931caa 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -32,6 +32,7 @@ import Data.IORef
import Data.Maybe ( fromMaybe, isNothing )
import Data.List ( findIndices, isPrefixOf )
import System.Environment ( getEnv )
+import Text.Printf (printf)
import System.FilePath ( (</>), (<.>), takeBaseName, takeExtension, takeFileName )
import qualified Data.ByteString.Lazy as B
import Data.ByteString.Lazy.UTF8 ( fromString )
@@ -122,8 +123,9 @@ writeEPUB opts doc@(Pandoc meta _) = do
let chapters = map titleize chunks
let chapToHtml = writeHtmlString opts'{ writerTemplate = pageTemplate }
let chapterToEntry :: Int -> Pandoc -> Entry
- chapterToEntry num chap = mkEntry ("ch" ++ show num ++ ".xhtml") $
- fromString $ chapToHtml chap
+ chapterToEntry num chap = mkEntry
+ (showChapter num) $
+ fromString $ chapToHtml chap
let chapterEntries = zipWith chapterToEntry [1..] chapters
-- contents.opf
@@ -334,11 +336,15 @@ data IdentState = IdentState{
identTable :: [(String,String)]
} deriving (Read, Show)
+-- Returns filename for chapter number.
+showChapter :: Int -> String
+showChapter = printf "ch%03d.xhtml"
+
-- Go through a block list and construct a table
-- correlating the automatically constructed references
-- that would be used in a normal pandoc document with
-- new URLs to be used in the EPUB. For example, what
--- was "header-1" might turn into "ch6.xhtml#header".
+-- was "header-1" might turn into "ch006.xhtml#header".
correlateRefs :: [Block] -> [(String,String)]
correlateRefs bs = identTable $ execState (mapM_ go bs)
IdentState{ chapterNumber = 0
@@ -358,8 +364,9 @@ correlateRefs bs = identTable $ execState (mapM_ go bs)
modify $ \s -> s{ runningIdents = runningid : runningIdents st
, chapterIdents = maybe (chapterIdents st)
(: chapterIdents st) chapid
- , identTable = (runningid, "ch" ++ show (chapterNumber st) ++
- ".xhtml" ++ maybe "" ('#':) chapid) : identTable st
+ , identTable = (runningid,
+ showChapter (chapterNumber st) ++
+ maybe "" ('#':) chapid) : identTable st
}
go _ = return ()
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index c6c4a8fd7..ebb705a61 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -482,10 +482,13 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
return $ foldl (!) (ordList opts contents) attribs
blockToHtml opts (DefinitionList lst) = do
contents <- mapM (\(term, defs) ->
- do term' <- liftM (H.dt) $ inlineListToHtml opts term
+ do term' <- if null term
+ then return mempty
+ else liftM (H.dt) $ inlineListToHtml opts term
defs' <- mapM ((liftM (\x -> H.dd $ (x >> nl opts))) .
blockListToHtml opts) defs
- return $ mconcat $ nl opts : term' : nl opts : defs') lst
+ return $ mconcat $ nl opts : term' : nl opts :
+ intersperse (nl opts) defs') lst
let lst' = H.dl $ mconcat contents >> nl opts
let lst'' = if writerIncremental opts
then lst' ! A.class_ "incremental"
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index abbbd4d01..2b5c7e84b 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -49,8 +49,7 @@ import Text.Pandoc.Highlighting (highlight, styleToLaTeX,
data WriterState =
WriterState { stInNote :: Bool -- true if we're in a note
, stInTable :: Bool -- true if we're in a table
- , stTableNotes :: [(Char, Doc)] -- List of markers, notes
- -- in current table
+ , stTableNotes :: [Doc] -- List of notes in current table
, stOLLevel :: Int -- level of ordered list nesting
, stOptions :: WriterOptions -- writer options, so they don't have to be parameter
, stVerbInNote :: Bool -- true if document has verbatim text in note
@@ -190,7 +189,7 @@ stringToLaTeX isUrl (x:xs) = do
'$' -> "\\$" ++ rest
'%' -> "\\%" ++ rest
'&' -> "\\&" ++ rest
- '_' -> "\\_" ++ rest
+ '_' | not isUrl -> "\\_" ++ rest
'#' -> "\\#" ++ rest
'-' -> case xs of -- prevent adjacent hyphens from forming ligatures
('-':_) -> "-{}" ++ rest
@@ -382,27 +381,27 @@ blockToLaTeX (Table caption aligns widths heads rows) = do
modify $ \s -> s{ stInTable = True, stTableNotes = [] }
headers <- if all null heads
then return empty
- else liftM ($$ "\\ML")
- $ (tableRowToLaTeX True aligns widths) heads
+ else ($$ "\\hline\\noalign{\\medskip}") `fmap`
+ (tableRowToLaTeX True aligns widths) heads
captionText <- inlineListToLaTeX caption
let capt = if isEmpty captionText
then empty
- else text "caption = {" <> captionText <> "}," <> space
+ else text "\\noalign{\\medskip}"
+ $$ text "\\caption" <> braces captionText
rows' <- mapM (tableRowToLaTeX False aligns widths) rows
- let rows'' = intersperse ("\\\\\\noalign{\\medskip}") rows'
tableNotes <- liftM (reverse . stTableNotes) get
- let toNote (marker, x) = "\\tnote" <> brackets (char marker) <>
- braces (nest 2 x)
+ let toNote x = "\\footnotetext" <> braces (nest 2 x)
let notes = vcat $ map toNote tableNotes
let colDescriptors = text $ concat $ map toColDescriptor aligns
- let tableBody =
- ("\\ctable" <> brackets (capt <> text "pos = H, center, botcap"))
- <> braces colDescriptors
- $$ braces ("% notes" <> cr <> notes <> cr)
- $$ braces (text "% rows" $$ "\\FL" $$
- vcat (headers : rows'') $$ "\\LL" <> cr)
modify $ \s -> s{ stTable = True, stInTable = False, stTableNotes = [] }
- return $ tableBody
+ return $ "\\begin{longtable}[c]" <> braces colDescriptors
+ $$ "\\hline\\noalign{\\medskip}"
+ $$ headers
+ $$ vcat rows'
+ $$ "\\hline"
+ $$ capt
+ $$ notes
+ $$ "\\end{longtable}"
toColDescriptor :: Alignment -> String
toColDescriptor align =
@@ -433,7 +432,7 @@ tableRowToLaTeX header aligns widths cols = do
braces (text (printf "%.2f\\columnwidth" w)) <>
braces (halign a <> cr <> c <> cr)
let cells = zipWith3 toCell widths aligns renderedCells
- return $ hcat $ intersperse (" & ") cells
+ return $ hsep (intersperse "&" cells) $$ "\\\\\\noalign{\\medskip}"
listItemToLaTeX :: [Block] -> State WriterState Doc
listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item" $$) .
@@ -572,7 +571,7 @@ inlineToLaTeX (LineBreak) = return "\\\\"
inlineToLaTeX Space = return space
inlineToLaTeX (Link txt ('#':ident, _)) = do
contents <- inlineListToLaTeX txt
- ident' <- stringToLaTeX False ident
+ ident' <- stringToLaTeX True ident
return $ text "\\hyperref" <> brackets (text ident') <> braces contents
inlineToLaTeX (Link txt (src, _)) =
case txt of
@@ -600,9 +599,8 @@ inlineToLaTeX (Note contents) = do
if inTable
then do
curnotes <- liftM stTableNotes get
- let marker = cycle ['a'..'z'] !! length curnotes
- modify $ \s -> s{ stTableNotes = (marker, contents') : curnotes }
- return $ "\\tmark" <> brackets (char marker) <> space
+ modify $ \s -> s{ stTableNotes = contents' : curnotes }
+ return $ "\\footnotemark" <> space
else return $ "\\footnote" <> braces (nest 2 contents' <> optnl)
-- note: a \n before } needed when note ends with a Verbatim environment
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index d88419feb..1a0731710 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -54,9 +54,12 @@ data WriterState = WriterState { stNotes :: Notes
-- | Convert Pandoc to Markdown.
writeMarkdown :: WriterOptions -> Pandoc -> String
writeMarkdown opts document =
- evalState (pandocToMarkdown opts document) WriterState{ stNotes = []
- , stRefs = []
- , stPlain = False }
+ evalState (pandocToMarkdown opts{
+ writerWrapText = writerWrapText opts &&
+ not (isEnabled Ext_hard_line_breaks opts) }
+ document) WriterState{ stNotes = []
+ , stRefs = []
+ , stPlain = False }
-- | Convert Pandoc to plain text (like markdown, but without links,
-- pictures, or inline formatting).
@@ -588,8 +591,9 @@ inlineToMarkdown opts (RawInline f str)
return $ text str
inlineToMarkdown _ (RawInline _ _) = return empty
inlineToMarkdown opts (LineBreak)
+ | isEnabled Ext_hard_line_breaks opts = return cr
| isEnabled Ext_escaped_line_breaks opts = return $ "\\" <> cr
- | otherwise = return $ " " <> cr
+ | otherwise = return $ " " <> cr
inlineToMarkdown _ Space = return space
inlineToMarkdown opts (Cite (c:cs) lst)
| writerCiteMethod opts == Citeproc = inlineListToMarkdown opts lst