aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/Biblio.hs12
-rw-r--r--src/Text/Pandoc/Highlighting.hs7
-rw-r--r--src/Text/Pandoc/MIME.hs2
-rw-r--r--src/Text/Pandoc/Options.hs277
-rw-r--r--src/Text/Pandoc/Parsing.hs483
-rw-r--r--src/Text/Pandoc/Pretty.hs13
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs14
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs109
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs34
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs1423
-rw-r--r--src/Text/Pandoc/Readers/Native.hs33
-rw-r--r--src/Text/Pandoc/Readers/RST.hs316
-rw-r--r--src/Text/Pandoc/Readers/TeXMath.hs2
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs204
-rw-r--r--src/Text/Pandoc/SelfContained.hs17
-rw-r--r--src/Text/Pandoc/Shared.hs167
-rw-r--r--src/Text/Pandoc/Templates.hs60
-rw-r--r--src/Text/Pandoc/UTF8.hs2
-rw-r--r--src/Text/Pandoc/Writers/AsciiDoc.hs6
-rw-r--r--src/Text/Pandoc/Writers/ConTeXt.hs47
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs76
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs19
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs31
-rw-r--r--src/Text/Pandoc/Writers/FB2.hs616
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs21
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs9
-rw-r--r--src/Text/Pandoc/Writers/Man.hs81
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs330
-rw-r--r--src/Text/Pandoc/Writers/MediaWiki.hs52
-rw-r--r--src/Text/Pandoc/Writers/Native.hs10
-rw-r--r--src/Text/Pandoc/Writers/ODT.hs19
-rw-r--r--src/Text/Pandoc/Writers/OpenDocument.hs2
-rw-r--r--src/Text/Pandoc/Writers/Org.hs60
-rw-r--r--src/Text/Pandoc/Writers/RST.hs51
-rw-r--r--src/Text/Pandoc/Writers/RTF.hs85
-rw-r--r--src/Text/Pandoc/Writers/Texinfo.hs27
-rw-r--r--src/Text/Pandoc/Writers/Textile.hs31
37 files changed, 3034 insertions, 1714 deletions
diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs
index cece13fba..13569a4d9 100644
--- a/src/Text/Pandoc/Biblio.hs
+++ b/src/Text/Pandoc/Biblio.hs
@@ -38,7 +38,7 @@ import qualified Text.CSL as CSL ( Cite(..) )
import Text.Pandoc.Definition
import Text.Pandoc.Generic
import Text.Pandoc.Shared (stringify)
-import Text.ParserCombinators.Parsec
+import Text.Parsec
import Control.Monad
-- | Process a 'Pandoc' document by adding citations formatted
@@ -165,7 +165,7 @@ locatorWords inp =
breakup (x : xs) = x : breakup xs
splitup = groupBy (\x y -> x /= '\160' && y /= '\160')
-pLocatorWords :: GenParser Inline st (String, [Inline])
+pLocatorWords :: Parsec [Inline] st (String, [Inline])
pLocatorWords = do
l <- pLocator
s <- getInput -- rest is suffix
@@ -173,16 +173,16 @@ pLocatorWords = do
then return (init l, Str "," : s)
else return (l, s)
-pMatch :: (Inline -> Bool) -> GenParser Inline st Inline
+pMatch :: (Inline -> Bool) -> Parsec [Inline] st Inline
pMatch condition = try $ do
t <- anyToken
guard $ condition t
return t
-pSpace :: GenParser Inline st Inline
+pSpace :: Parsec [Inline] st Inline
pSpace = pMatch (\t -> t == Space || t == Str "\160")
-pLocator :: GenParser Inline st String
+pLocator :: Parsec [Inline] st String
pLocator = try $ do
optional $ pMatch (== Str ",")
optional pSpace
@@ -190,7 +190,7 @@ pLocator = try $ do
gs <- many1 pWordWithDigits
return $ stringify f ++ (' ' : unwords gs)
-pWordWithDigits :: GenParser Inline st String
+pWordWithDigits :: Parsec [Inline] st String
pWordWithDigits = try $ do
pSpace
r <- many1 (notFollowedBy pSpace >> anyToken)
diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs
index 080acebee..95df88099 100644
--- a/src/Text/Pandoc/Highlighting.hs
+++ b/src/Text/Pandoc/Highlighting.hs
@@ -47,6 +47,7 @@ module Text.Pandoc.Highlighting ( languages
, Style
) where
import Text.Pandoc.Definition
+import Text.Pandoc.Shared (safeRead)
import Text.Highlighting.Kate
import Data.List (find)
import Data.Maybe (fromMaybe)
@@ -60,9 +61,9 @@ highlight :: (FormatOptions -> [SourceLine] -> a) -- ^ Formatter
-> String -- ^ Raw contents of the CodeBlock
-> Maybe a -- ^ Maybe the formatted result
highlight formatter (_, classes, keyvals) rawCode =
- let firstNum = case reads (fromMaybe "1" $ lookup "startFrom" keyvals) of
- ((n,_):_) -> n
- [] -> 1
+ let firstNum = case safeRead (fromMaybe "1" $ lookup "startFrom" keyvals) of
+ Just n -> n
+ Nothing -> 1
fmtOpts = defaultFormatOpts{
startNumber = firstNum,
numberLines = any (`elem`
diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs
index f9749cece..9cde57e4d 100644
--- a/src/Text/Pandoc/MIME.hs
+++ b/src/Text/Pandoc/MIME.hs
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.MIME
Copyright : Copyright (C) 2011 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
new file mode 100644
index 000000000..a9c8bf710
--- /dev/null
+++ b/src/Text/Pandoc/Options.hs
@@ -0,0 +1,277 @@
+{-
+Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Options
+ Copyright : Copyright (C) 2012 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Data structures and functions for representing parser and writer
+options.
+-}
+module Text.Pandoc.Options ( Extension(..)
+ , pandocExtensions
+ , strictExtensions
+ , ReaderOptions(..)
+ , HTMLMathMethod (..)
+ , CiteMethod (..)
+ , ObfuscationMethod (..)
+ , HTMLSlideVariant (..)
+ , WriterOptions (..)
+ , def
+ , isEnabled
+ ) where
+import Data.Set (Set)
+import qualified Data.Set as Set
+import Data.Default
+import Text.Pandoc.Highlighting (Style, pygments)
+
+-- | Individually selectable syntax extensions.
+data Extension =
+ Ext_footnotes -- ^ Pandoc/PHP/MMD style footnotes
+ | Ext_inline_notes -- ^ Pandoc-style inline notes
+ | Ext_pandoc_title_block -- ^ Pandoc title block
+ | Ext_mmd_title_block -- ^ Multimarkdown metadata block
+ | Ext_table_captions -- ^ Pandoc-style table captions
+ -- | Ext_image_captions
+ | Ext_simple_tables -- ^ Pandoc-style simple tables
+ | Ext_multiline_tables -- ^ Pandoc-style multiline tables
+ | Ext_grid_tables -- ^ Grid tables (pandoc, reST)
+ | Ext_pipe_tables -- ^ Pipe tables (as in PHP markdown extra)
+ | Ext_citations -- ^ Pandoc/citeproc citations
+ | Ext_raw_tex -- ^ Allow raw TeX (other than math)
+ | Ext_raw_html -- ^ Allow raw HTML
+ | Ext_tex_math_dollars -- ^ TeX math between $..$ or $$..$$
+ | Ext_tex_math_single_backslash -- ^ TeX math btw \(..\) \[..\]
+ | Ext_tex_math_double_backslash -- ^ TeX math btw \\(..\\) \\[..\\]
+ | Ext_latex_macros -- ^ Parse LaTeX macro definitions (for math only)
+ | Ext_fenced_code_blocks -- ^ Parse fenced code blocks
+ | Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks
+ | Ext_backtick_code_blocks -- ^ Github style ``` code blocks
+ | Ext_inline_code_attributes -- ^ Allow attributes on inline code
+ | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks
+ | Ext_markdown_attribute -- ^ Interpret text inside HTML as markdown
+ -- iff container has attribute 'markdown'
+ | Ext_escaped_line_breaks -- ^ Treat a backslash at EOL as linebreak
+ | Ext_autolink_code_spans -- ^ Put autolink text inside code spans
+ | Ext_fancy_lists -- ^ Enable fancy list numbers and delimiters
+ | Ext_startnum -- ^ Make start number of ordered list significant
+ | Ext_definition_lists -- ^ Definition lists as in pandoc, mmd, php
+ | Ext_example_lists -- ^ Markdown-style numbered examples
+ | Ext_all_symbols_escapable -- ^ Make all non-alphanumerics escapable
+ | Ext_intraword_underscores -- ^ Treat underscore inside word as literal
+ | Ext_blank_before_blockquote -- ^ Require blank line before a blockquote
+ | Ext_blank_before_header -- ^ Require blank line before a header
+ -- | Ext_significant_bullets
+ | Ext_strikeout -- ^ Strikeout using ~~this~~ syntax
+ | Ext_superscript -- ^ Superscript using ^this^ syntax
+ | Ext_subscript -- ^ Subscript using ~this~ syntax
+ | Ext_hard_line_breaks -- ^ All newlines become hard line breaks
+ | Ext_literate_haskell -- ^ Enable literate Haskell conventions
+ | Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions
+ deriving (Show, Read, Enum, Eq, Ord, Bounded)
+
+pandocExtensions :: Set Extension
+pandocExtensions = Set.fromList
+ [ Ext_footnotes
+ , Ext_inline_notes
+ , Ext_pandoc_title_block
+ , Ext_table_captions
+ -- , Ext_image_captions
+ , Ext_simple_tables
+ , Ext_multiline_tables
+ , Ext_grid_tables
+ , Ext_pipe_tables
+ , Ext_citations
+ , Ext_raw_tex
+ , Ext_raw_html
+ , Ext_tex_math_dollars
+ , Ext_latex_macros
+ , Ext_fenced_code_blocks
+ , Ext_fenced_code_attributes
+ , Ext_backtick_code_blocks
+ , Ext_inline_code_attributes
+ , Ext_markdown_in_html_blocks
+ , Ext_escaped_line_breaks
+ , Ext_autolink_code_spans
+ , Ext_fancy_lists
+ , Ext_startnum
+ , Ext_definition_lists
+ , Ext_example_lists
+ , Ext_all_symbols_escapable
+ , Ext_intraword_underscores
+ , Ext_blank_before_blockquote
+ , Ext_blank_before_header
+ -- , Ext_significant_bullets
+ , Ext_strikeout
+ , Ext_superscript
+ , Ext_subscript
+ ]
+
+strictExtensions :: Set Extension
+strictExtensions = Set.fromList
+ [ Ext_raw_html ]
+
+data ReaderOptions = ReaderOptions{
+ readerExtensions :: Set Extension -- ^ Syntax extensions
+ , readerSmart :: Bool -- ^ Smart punctuation
+ , readerStrict :: Bool -- ^ FOR TRANSITION ONLY
+ , readerStandalone :: Bool -- ^ Standalone document with header
+ , readerParseRaw :: Bool -- ^ Parse raw HTML, LaTeX
+ , readerColumns :: Int -- ^ Number of columns in terminal
+ , readerTabStop :: Int -- ^ Tab stop
+ , readerOldDashes :: Bool -- ^ Use pandoc <= 1.8.2.1 behavior
+ -- in parsing dashes; -- is em-dash;
+ -- - before numerial is en-dash
+ , readerCitations :: [String] -- ^ List of available citations
+ , readerApplyMacros :: Bool -- ^ Apply macros to TeX math
+ , readerIndentedCodeClasses :: [String] -- ^ Default classes for
+ -- indented code blocks
+} deriving (Show, Read)
+
+instance Default ReaderOptions
+ where def = ReaderOptions{
+ readerExtensions = pandocExtensions
+ , readerSmart = False
+ , readerStrict = False
+ , readerStandalone = False
+ , readerParseRaw = False
+ , readerColumns = 80
+ , readerTabStop = 4
+ , readerOldDashes = False
+ , readerCitations = []
+ , readerApplyMacros = True
+ , readerIndentedCodeClasses = []
+ }
+
+--
+-- Writer options
+--
+
+data HTMLMathMethod = PlainMath
+ | LaTeXMathML (Maybe String) -- url of LaTeXMathML.js
+ | JsMath (Maybe String) -- url of jsMath load script
+ | GladTeX
+ | WebTeX String -- url of TeX->image script.
+ | MathML (Maybe String) -- url of MathMLinHTML.js
+ | MathJax String -- url of MathJax.js
+ deriving (Show, Read, Eq)
+
+data CiteMethod = Citeproc -- use citeproc to render them
+ | Natbib -- output natbib cite commands
+ | Biblatex -- output biblatex cite commands
+ deriving (Show, Read, Eq)
+
+-- | Methods for obfuscating email addresses in HTML.
+data ObfuscationMethod = NoObfuscation
+ | ReferenceObfuscation
+ | JavascriptObfuscation
+ deriving (Show, Read, Eq)
+
+-- | Varieties of HTML slide shows.
+data HTMLSlideVariant = S5Slides
+ | SlidySlides
+ | SlideousSlides
+ | DZSlides
+ | NoSlides
+ deriving (Show, Read, Eq)
+
+-- | Options for writers
+data WriterOptions = WriterOptions
+ { writerStandalone :: Bool -- ^ Include header and footer
+ , writerTemplate :: String -- ^ Template to use in standalone mode
+ , writerVariables :: [(String, String)] -- ^ Variables to set in template
+ , writerEPUBMetadata :: String -- ^ Metadata to include in EPUB
+ , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs
+ , writerTableOfContents :: Bool -- ^ Include table of contents
+ , writerSlideVariant :: HTMLSlideVariant -- ^ Are we writing S5, Slidy or Slideous?
+ , writerIncremental :: Bool -- ^ True if lists should be incremental
+ , writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML
+ , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc)
+ , writerNumberSections :: Bool -- ^ Number sections in LaTeX
+ , writerSectionDivs :: Bool -- ^ Put sections in div tags in HTML
+ , writerExtensions :: Set Extension -- ^ Markdown extensions that can be used
+ , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
+ , writerWrapText :: Bool -- ^ Wrap text to line length
+ , writerColumns :: Int -- ^ Characters in a line (for text wrapping)
+ , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails
+ , writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML
+ , writerSourceDirectory :: FilePath -- ^ Directory path of 1st source file
+ , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory
+ , writerCiteMethod :: CiteMethod -- ^ How to print cites
+ , writerBiblioFiles :: [FilePath] -- ^ Biblio files to use for citations
+ , writerHtml5 :: Bool -- ^ Produce HTML5
+ , writerBeamer :: Bool -- ^ Produce beamer LaTeX slide show
+ , writerSlideLevel :: Maybe Int -- ^ Force header level of slides
+ , writerChapters :: Bool -- ^ Use "chapter" for top-level sects
+ , writerListings :: Bool -- ^ Use listings package for code
+ , writerHighlight :: Bool -- ^ Highlight source code
+ , writerHighlightStyle :: Style -- ^ Style to use for highlighting
+ , writerSetextHeaders :: Bool -- ^ Use setext headers for levels 1-2 in markdown
+ , writerTeXLigatures :: Bool -- ^ Use tex ligatures quotes, dashes in latex
+ , writerEpubStylesheet :: Maybe String -- ^ EPUB stylesheet specified at command line
+ , writerEpubFonts :: [FilePath] -- ^ Paths to fonts to embed
+ , writerReferenceODT :: Maybe FilePath -- ^ Path to reference ODT if specified
+ , writerReferenceDocx :: Maybe FilePath -- ^ Ptah to reference DOCX if specified
+ } deriving Show
+
+instance Default WriterOptions where
+ def = WriterOptions { writerStandalone = False
+ , writerTemplate = ""
+ , writerVariables = []
+ , writerEPUBMetadata = ""
+ , writerTabStop = 4
+ , writerTableOfContents = False
+ , writerSlideVariant = NoSlides
+ , writerIncremental = False
+ , writerHTMLMathMethod = PlainMath
+ , writerIgnoreNotes = False
+ , writerNumberSections = False
+ , writerSectionDivs = False
+ , writerExtensions = pandocExtensions
+ , writerReferenceLinks = False
+ , writerWrapText = True
+ , writerColumns = 72
+ , writerEmailObfuscation = JavascriptObfuscation
+ , writerIdentifierPrefix = ""
+ , writerSourceDirectory = "."
+ , writerUserDataDir = Nothing
+ , writerCiteMethod = Citeproc
+ , writerBiblioFiles = []
+ , writerHtml5 = False
+ , writerBeamer = False
+ , writerSlideLevel = Nothing
+ , writerChapters = False
+ , writerListings = False
+ , writerHighlight = False
+ , writerHighlightStyle = pygments
+ , writerSetextHeaders = True
+ , writerTeXLigatures = True
+ , writerEpubStylesheet = Nothing
+ , writerEpubFonts = []
+ , writerReferenceODT = Nothing
+ , writerReferenceDocx = Nothing
+ }
+
+-- | Returns True if the given extension is enabled.
+isEnabled :: Extension -> WriterOptions -> Bool
+isEnabled ext opts = ext `Set.member` (writerExtensions opts)
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index cac2b71ca..50691f409 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-
Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
@@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Parsing
Copyright : Copyright (C) 2006-2010 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
@@ -48,8 +49,6 @@ module Text.Pandoc.Parsing ( (>>~),
withHorizDisplacement,
withRaw,
nullBlock,
- failIfStrict,
- failUnlessLHS,
escaped,
characterReference,
updateLastStrPos,
@@ -57,37 +56,127 @@ module Text.Pandoc.Parsing ( (>>~),
orderedListMarker,
charRef,
tableWith,
+ widthsFromIndices,
gridTableWith,
readWith,
testStringWith,
+ getOption,
+ guardEnabled,
+ guardDisabled,
ParserState (..),
defaultParserState,
HeaderType (..),
ParserContext (..),
QuoteContext (..),
NoteTable,
+ NoteTable',
KeyTable,
- Key,
+ Key (..),
toKey,
- fromKey,
- lookupKeySrc,
smartPunctuation,
+ withQuoteContext,
+ singleQuoteStart,
+ singleQuoteEnd,
+ doubleQuoteStart,
+ doubleQuoteEnd,
+ ellipses,
+ apostrophe,
+ dash,
macro,
- applyMacros' )
+ applyMacros',
+ Parser,
+ F(..),
+ runF,
+ askF,
+ asksF,
+ -- * Re-exports from Text.Pandoc.Parsec
+ runParser,
+ parse,
+ anyToken,
+ getInput,
+ setInput,
+ unexpected,
+ char,
+ letter,
+ digit,
+ alphaNum,
+ skipMany,
+ skipMany1,
+ spaces,
+ space,
+ anyChar,
+ satisfy,
+ newline,
+ string,
+ count,
+ eof,
+ noneOf,
+ oneOf,
+ lookAhead,
+ notFollowedBy,
+ many,
+ many1,
+ manyTill,
+ (<|>),
+ (<?>),
+ choice,
+ try,
+ sepBy,
+ sepBy1,
+ sepEndBy,
+ sepEndBy1,
+ endBy,
+ endBy1,
+ option,
+ optional,
+ optionMaybe,
+ getState,
+ setState,
+ updateState,
+ getPosition,
+ setPosition,
+ sourceColumn,
+ sourceLine,
+ newPos,
+ token
+ )
where
import Text.Pandoc.Definition
-import Text.Pandoc.Generic
+import Text.Pandoc.Options
+import Text.Pandoc.Builder (Blocks)
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
-import Text.ParserCombinators.Parsec
+import Text.Parsec
+import Text.Parsec.Pos (newPos)
import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit, isPunctuation )
import Data.List ( intercalate, transpose )
import Network.URI ( parseURI, URI (..), isAllowedInURI )
-import Control.Monad ( join, liftM, guard )
import Text.Pandoc.Shared
import qualified Data.Map as M
import Text.TeXMath.Macros (applyMacros, Macro, parseMacroDefinitions)
import Text.HTML.TagSoup.Entity ( lookupEntity )
+import Data.Default
+import qualified Data.Set as Set
+import Control.Monad.Reader
+import Data.Monoid
+
+type Parser t s = Parsec t s
+
+newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Functor)
+
+runF :: F a -> ParserState -> a
+runF = runReader . unF
+
+askF :: F ParserState
+askF = F ask
+
+asksF :: (ParserState -> a) -> F a
+asksF f = F $ asks f
+
+instance Monoid a => Monoid (F a) where
+ mempty = return mempty
+ mappend = liftM2 mappend
+ mconcat = liftM mconcat . sequence
-- | Like >>, but returns the operation on the left.
-- (Suggested by Tillmann Rendel on Haskell-cafe list.)
@@ -95,62 +184,69 @@ import Text.HTML.TagSoup.Entity ( lookupEntity )
a >>~ b = a >>= \x -> b >> return x
-- | Parse any line of text
-anyLine :: GenParser Char st [Char]
+anyLine :: Parser [Char] st [Char]
anyLine = manyTill anyChar newline
-- | Like @manyTill@, but reads at least one item.
-many1Till :: GenParser tok st a
- -> GenParser tok st end
- -> GenParser tok st [a]
+many1Till :: Parser [tok] st a
+ -> Parser [tok] st end
+ -> Parser [tok] st [a]
many1Till p end = do
first <- p
rest <- manyTill p end
return (first:rest)
--- | A more general form of @notFollowedBy@. This one allows any
+-- | A more general form of @notFollowedBy@. This one allows any
-- type of parser to be specified, and succeeds only if that parser fails.
-- It does not consume any input.
-notFollowedBy' :: Show b => GenParser a st b -> GenParser a st ()
+notFollowedBy' :: Show b => Parser [a] st b -> Parser [a] st ()
notFollowedBy' p = try $ join $ do a <- try p
return (unexpected (show a))
<|>
return (return ())
-- (This version due to Andrew Pimlott on the Haskell mailing list.)
--- | Parses one of a list of strings (tried in order).
-oneOfStrings :: [String] -> GenParser Char st String
-oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings
+-- | Parses one of a list of strings (tried in order).
+oneOfStrings :: [String] -> Parser [Char] st String
+oneOfStrings [] = fail "no strings"
+oneOfStrings strs = do
+ c <- anyChar
+ let strs' = [xs | (x:xs) <- strs, x == c]
+ case strs' of
+ [] -> fail "not found"
+ z | "" `elem` z -> return [c]
+ | otherwise -> (c:) `fmap` oneOfStrings strs'
-- | Parses a space or tab.
-spaceChar :: CharParser st Char
+spaceChar :: Parser [Char] st Char
spaceChar = satisfy $ \c -> c == ' ' || c == '\t'
-- | Parses a nonspace, nonnewline character.
-nonspaceChar :: CharParser st Char
+nonspaceChar :: Parser [Char] st Char
nonspaceChar = satisfy $ \x -> x /= '\t' && x /= '\n' && x /= ' ' && x /= '\r'
-- | Skips zero or more spaces or tabs.
-skipSpaces :: GenParser Char st ()
+skipSpaces :: Parser [Char] st ()
skipSpaces = skipMany spaceChar
-- | Skips zero or more spaces or tabs, then reads a newline.
-blankline :: GenParser Char st Char
+blankline :: Parser [Char] st Char
blankline = try $ skipSpaces >> newline
-- | Parses one or more blank lines and returns a string of newlines.
-blanklines :: GenParser Char st [Char]
+blanklines :: Parser [Char] st [Char]
blanklines = many1 blankline
-- | Parses material enclosed between start and end parsers.
-enclosed :: GenParser Char st t -- ^ start parser
- -> GenParser Char st end -- ^ end parser
- -> GenParser Char st a -- ^ content parser (to be used repeatedly)
- -> GenParser Char st [a]
-enclosed start end parser = try $
+enclosed :: Parser [Char] st t -- ^ start parser
+ -> Parser [Char] st end -- ^ end parser
+ -> Parser [Char] st a -- ^ content parser (to be used repeatedly)
+ -> Parser [Char] st [a]
+enclosed start end parser = try $
start >> notFollowedBy space >> many1Till parser end
-- | Parse string, case insensitive.
-stringAnyCase :: [Char] -> CharParser st String
+stringAnyCase :: [Char] -> Parser [Char] st String
stringAnyCase [] = string ""
stringAnyCase (x:xs) = do
firstChar <- char (toUpper x) <|> char (toLower x)
@@ -158,7 +254,7 @@ stringAnyCase (x:xs) = do
return (firstChar:rest)
-- | Parse contents of 'str' using 'parser' and return result.
-parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a
+parseFromString :: Parser [tok] st a -> [tok] -> Parser [tok] st a
parseFromString parser str = do
oldPos <- getPosition
oldInput <- getInput
@@ -169,8 +265,8 @@ parseFromString parser str = do
return result
-- | Parse raw line block up to and including blank lines.
-lineClump :: GenParser Char st String
-lineClump = blanklines
+lineClump :: Parser [Char] st String
+lineClump = blanklines
<|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines)
-- | Parse a string of characters between an open character
@@ -178,8 +274,8 @@ lineClump = blanklines
-- pairs of open and close, which must be different. For example,
-- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))"
-- and return "hello (there)".
-charsInBalanced :: Char -> Char -> GenParser Char st Char
- -> GenParser Char st String
+charsInBalanced :: Char -> Char -> Parser [Char] st Char
+ -> Parser [Char] st String
charsInBalanced open close parser = try $ do
char open
let isDelim c = c == open || c == close
@@ -204,13 +300,13 @@ uppercaseRomanDigits = map toUpper lowercaseRomanDigits
-- | Parses a roman numeral (uppercase or lowercase), returns number.
romanNumeral :: Bool -- ^ Uppercase if true
- -> GenParser Char st Int
+ -> Parser [Char] st Int
romanNumeral upperCase = do
- let romanDigits = if upperCase
- then uppercaseRomanDigits
+ let romanDigits = if upperCase
+ then uppercaseRomanDigits
else lowercaseRomanDigits
lookAhead $ oneOf romanDigits
- let [one, five, ten, fifty, hundred, fivehundred, thousand] =
+ let [one, five, ten, fifty, hundred, fivehundred, thousand] =
map char romanDigits
thousands <- many thousand >>= (return . (1000 *) . length)
ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900
@@ -234,14 +330,14 @@ romanNumeral upperCase = do
-- Parsers for email addresses and URIs
-emailChar :: GenParser Char st Char
+emailChar :: Parser [Char] st Char
emailChar = alphaNum <|>
satisfy (\c -> c == '-' || c == '+' || c == '_' || c == '.')
-domainChar :: GenParser Char st Char
+domainChar :: Parser [Char] st Char
domainChar = alphaNum <|> char '-'
-domain :: GenParser Char st [Char]
+domain :: Parser [Char] st [Char]
domain = do
first <- many1 domainChar
dom <- many1 $ try (char '.' >> many1 domainChar )
@@ -249,7 +345,7 @@ domain = do
-- | Parses an email address; returns original and corresponding
-- escaped mailto: URI.
-emailAddress :: GenParser Char st (String, String)
+emailAddress :: Parser [Char] st (String, String)
emailAddress = try $ do
firstLetter <- alphaNum
restAddr <- many emailChar
@@ -260,7 +356,7 @@ emailAddress = try $ do
return (full, escapeURI $ "mailto:" ++ full)
-- | Parses a URI. Returns pair of original and URI-escaped version.
-uri :: GenParser Char st (String, String)
+uri :: Parser [Char] st (String, String)
uri = try $ do
let protocols = [ "http:", "https:", "ftp:", "file:", "mailto:",
"news:", "telnet:" ]
@@ -294,8 +390,8 @@ uri = try $ do
-- displacement (the difference between the source column at the end
-- and the source column at the beginning). Vertical displacement
-- (source row) is ignored.
-withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply
- -> GenParser Char st (a, Int) -- ^ (result, displacement)
+withHorizDisplacement :: Parser [Char] st a -- ^ Parser to apply
+ -> Parser [Char] st (a, Int) -- ^ (result, displacement)
withHorizDisplacement parser = do
pos1 <- getPosition
result <- parser
@@ -304,7 +400,7 @@ withHorizDisplacement parser = do
-- | Applies a parser and returns the raw string that was parsed,
-- along with the value produced by the parser.
-withRaw :: GenParser Char st a -> GenParser Char st (a, [Char])
+withRaw :: Parser [Char] st a -> Parser [Char] st (a, [Char])
withRaw parser = do
pos1 <- getPosition
inp <- getInput
@@ -321,26 +417,16 @@ withRaw parser = do
-- | Parses a character and returns 'Null' (so that the parser can move on
-- if it gets stuck).
-nullBlock :: GenParser Char st Block
+nullBlock :: Parser [Char] st Block
nullBlock = anyChar >> return Null
--- | Fail if reader is in strict markdown syntax mode.
-failIfStrict :: GenParser a ParserState ()
-failIfStrict = do
- state <- getState
- if stateStrict state then fail "strict mode" else return ()
-
--- | Fail unless we're in literate haskell mode.
-failUnlessLHS :: GenParser tok ParserState ()
-failUnlessLHS = getState >>= guard . stateLiterateHaskell
-
-- | Parses backslash, then applies character parser.
-escaped :: GenParser Char st Char -- ^ Parser for character to escape
- -> GenParser Char st Char
+escaped :: Parser [Char] st Char -- ^ Parser for character to escape
+ -> Parser [Char] st Char
escaped parser = try $ char '\\' >> parser
-- | Parse character entity.
-characterReference :: GenParser Char st Char
+characterReference :: Parser [Char] st Char
characterReference = try $ do
char '&'
ent <- many1Till nonspaceChar (char ';')
@@ -349,19 +435,19 @@ characterReference = try $ do
Nothing -> fail "entity not found"
-- | Parses an uppercase roman numeral and returns (UpperRoman, number).
-upperRoman :: GenParser Char st (ListNumberStyle, Int)
+upperRoman :: Parser [Char] st (ListNumberStyle, Int)
upperRoman = do
num <- romanNumeral True
return (UpperRoman, num)
-- | Parses a lowercase roman numeral and returns (LowerRoman, number).
-lowerRoman :: GenParser Char st (ListNumberStyle, Int)
+lowerRoman :: Parser [Char] st (ListNumberStyle, Int)
lowerRoman = do
num <- romanNumeral False
return (LowerRoman, num)
-- | Parses a decimal numeral and returns (Decimal, number).
-decimal :: GenParser Char st (ListNumberStyle, Int)
+decimal :: Parser [Char] st (ListNumberStyle, Int)
decimal = do
num <- many1 digit
return (Decimal, read num)
@@ -370,7 +456,7 @@ decimal = do
-- returns (DefaultStyle, [next example number]). The next
-- example number is incremented in parser state, and the label
-- (if present) is added to the label table.
-exampleNum :: GenParser Char ParserState (ListNumberStyle, Int)
+exampleNum :: Parser [Char] ParserState (ListNumberStyle, Int)
exampleNum = do
char '@'
lab <- many (alphaNum <|> satisfy (\c -> c == '_' || c == '-'))
@@ -384,38 +470,38 @@ exampleNum = do
return (Example, num)
-- | Parses a '#' returns (DefaultStyle, 1).
-defaultNum :: GenParser Char st (ListNumberStyle, Int)
+defaultNum :: Parser [Char] st (ListNumberStyle, Int)
defaultNum = do
char '#'
return (DefaultStyle, 1)
-- | Parses a lowercase letter and returns (LowerAlpha, number).
-lowerAlpha :: GenParser Char st (ListNumberStyle, Int)
+lowerAlpha :: Parser [Char] st (ListNumberStyle, Int)
lowerAlpha = do
ch <- oneOf ['a'..'z']
return (LowerAlpha, ord ch - ord 'a' + 1)
-- | Parses an uppercase letter and returns (UpperAlpha, number).
-upperAlpha :: GenParser Char st (ListNumberStyle, Int)
+upperAlpha :: Parser [Char] st (ListNumberStyle, Int)
upperAlpha = do
ch <- oneOf ['A'..'Z']
return (UpperAlpha, ord ch - ord 'A' + 1)
-- | Parses a roman numeral i or I
-romanOne :: GenParser Char st (ListNumberStyle, Int)
+romanOne :: Parser [Char] st (ListNumberStyle, Int)
romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
(char 'I' >> return (UpperRoman, 1))
-- | Parses an ordered list marker and returns list attributes.
-anyOrderedListMarker :: GenParser Char ParserState ListAttributes
-anyOrderedListMarker = choice $
+anyOrderedListMarker :: Parser [Char] ParserState ListAttributes
+anyOrderedListMarker = choice $
[delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
numParser <- [decimal, exampleNum, defaultNum, romanOne,
lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
-- | Parses a list number (num) followed by a period, returns list attributes.
-inPeriod :: GenParser Char st (ListNumberStyle, Int)
- -> GenParser Char st ListAttributes
+inPeriod :: Parser [Char] st (ListNumberStyle, Int)
+ -> Parser [Char] st ListAttributes
inPeriod num = try $ do
(style, start) <- num
char '.'
@@ -423,18 +509,18 @@ inPeriod num = try $ do
then DefaultDelim
else Period
return (start, style, delim)
-
+
-- | Parses a list number (num) followed by a paren, returns list attributes.
-inOneParen :: GenParser Char st (ListNumberStyle, Int)
- -> GenParser Char st ListAttributes
+inOneParen :: Parser [Char] st (ListNumberStyle, Int)
+ -> Parser [Char] st ListAttributes
inOneParen num = try $ do
(style, start) <- num
char ')'
return (start, style, OneParen)
-- | Parses a list number (num) enclosed in parens, returns list attributes.
-inTwoParens :: GenParser Char st (ListNumberStyle, Int)
- -> GenParser Char st ListAttributes
+inTwoParens :: Parser [Char] st (ListNumberStyle, Int)
+ -> Parser [Char] st ListAttributes
inTwoParens num = try $ do
char '('
(style, start) <- num
@@ -443,9 +529,9 @@ inTwoParens num = try $ do
-- | Parses an ordered list marker with a given style and delimiter,
-- returns number.
-orderedListMarker :: ListNumberStyle
- -> ListNumberDelim
- -> GenParser Char ParserState Int
+orderedListMarker :: ListNumberStyle
+ -> ListNumberDelim
+ -> Parser [Char] ParserState Int
orderedListMarker style delim = do
let num = defaultNum <|> -- # can continue any kind of list
case style of
@@ -465,38 +551,34 @@ orderedListMarker style delim = do
return start
-- | Parses a character reference and returns a Str element.
-charRef :: GenParser Char st Inline
+charRef :: Parser [Char] st Inline
charRef = do
c <- characterReference
return $ Str [c]
-- | Parse a table using 'headerParser', 'rowParser',
-- 'lineParser', and 'footerParser'.
-tableWith :: GenParser Char ParserState ([[Block]], [Alignment], [Int])
- -> ([Int] -> GenParser Char ParserState [[Block]])
- -> GenParser Char ParserState sep
- -> GenParser Char ParserState end
- -> GenParser Char ParserState [Inline]
- -> GenParser Char ParserState Block
-tableWith headerParser rowParser lineParser footerParser captionParser = try $ do
- caption' <- option [] captionParser
+tableWith :: Parser [Char] ParserState ([[Block]], [Alignment], [Int])
+ -> ([Int] -> Parser [Char] ParserState [[Block]])
+ -> Parser [Char] ParserState sep
+ -> Parser [Char] ParserState end
+ -> Parser [Char] ParserState Block
+tableWith headerParser rowParser lineParser footerParser = try $ do
(heads, aligns, indices) <- headerParser
- lines' <- rowParser indices `sepEndBy` lineParser
+ lines' <- rowParser indices `sepEndBy1` lineParser
footerParser
- caption <- if null caption'
- then option [] captionParser
- else return caption'
- state <- getState
- let numColumns = stateColumns state
- let widths = widthsFromIndices numColumns indices
- return $ Table caption aligns widths heads lines'
+ numColumns <- getOption readerColumns
+ let widths = if (indices == [])
+ then replicate (length aligns) 0.0
+ else widthsFromIndices numColumns indices
+ return $ Table [] aligns widths heads lines'
-- Calculate relative widths of table columns, based on indices
widthsFromIndices :: Int -- Number of columns on terminal
-> [Int] -- Indices
-> [Double] -- Fractional relative sizes of columns
-widthsFromIndices _ [] = []
-widthsFromIndices numColumns' indices =
+widthsFromIndices _ [] = []
+widthsFromIndices numColumns' indices =
let numColumns = max numColumns' (if null indices then 0 else last indices)
lengths' = zipWith (-) indices (0:indices)
lengths = reverse $
@@ -516,28 +598,30 @@ widthsFromIndices numColumns' indices =
fracs = map (\l -> (fromIntegral l) / quotient) lengths in
tail fracs
+---
+
-- Parse a grid table: starts with row of '-' on top, then header
-- (which may be grid), then the rows,
-- which may be grid, separated by blank lines, and
-- ending with a footer (dashed line followed by blank line).
-gridTableWith :: GenParser Char ParserState Block -- ^ Block parser
- -> GenParser Char ParserState [Inline] -- ^ Caption parser
+gridTableWith :: Parser [Char] ParserState [Block] -- ^ Block list parser
-> Bool -- ^ Headerless table
- -> GenParser Char ParserState Block
-gridTableWith block tableCaption headless =
- tableWith (gridTableHeader headless block) (gridTableRow block) (gridTableSep '-') gridTableFooter tableCaption
+ -> Parser [Char] ParserState Block
+gridTableWith blocks headless =
+ tableWith (gridTableHeader headless blocks) (gridTableRow blocks)
+ (gridTableSep '-') gridTableFooter
gridTableSplitLine :: [Int] -> String -> [String]
gridTableSplitLine indices line = map removeFinalBar $ tail $
splitStringByIndices (init indices) $ removeTrailingSpace line
-gridPart :: Char -> GenParser Char st (Int, Int)
+gridPart :: Char -> Parser [Char] st (Int, Int)
gridPart ch = do
dashes <- many1 (char ch)
char '+'
return (length dashes, length dashes + 1)
-gridDashedLines :: Char -> GenParser Char st [(Int,Int)]
+gridDashedLines :: Char -> Parser [Char] st [(Int,Int)]
gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline
removeFinalBar :: String -> String
@@ -545,18 +629,18 @@ removeFinalBar =
reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse
-- | Separator between rows of grid table.
-gridTableSep :: Char -> GenParser Char ParserState Char
+gridTableSep :: Char -> Parser [Char] ParserState Char
gridTableSep ch = try $ gridDashedLines ch >> return '\n'
-- | Parse header for a grid table.
gridTableHeader :: Bool -- ^ Headerless table
- -> GenParser Char ParserState Block
- -> GenParser Char ParserState ([[Block]], [Alignment], [Int])
-gridTableHeader headless block = try $ do
+ -> Parser [Char] ParserState [Block]
+ -> Parser [Char] ParserState ([[Block]], [Alignment], [Int])
+gridTableHeader headless blocks = try $ do
optional blanklines
dashes <- gridDashedLines '-'
rawContent <- if headless
- then return $ repeat ""
+ then return $ repeat ""
else many1
(notFollowedBy (gridTableSep '=') >> char '|' >>
many1Till anyChar newline)
@@ -571,25 +655,25 @@ gridTableHeader headless block = try $ do
then replicate (length dashes) ""
else map (intercalate " ") $ transpose
$ map (gridTableSplitLine indices) rawContent
- heads <- mapM (parseFromString $ many block) $
+ heads <- mapM (parseFromString blocks) $
map removeLeadingTrailingSpace rawHeads
return (heads, aligns, indices)
-gridTableRawLine :: [Int] -> GenParser Char ParserState [String]
+gridTableRawLine :: [Int] -> Parser [Char] ParserState [String]
gridTableRawLine indices = do
char '|'
line <- many1Till anyChar newline
return (gridTableSplitLine indices line)
-- | Parse row of grid table.
-gridTableRow :: GenParser Char ParserState Block
+gridTableRow :: Parser [Char] ParserState [Block]
-> [Int]
- -> GenParser Char ParserState [[Block]]
-gridTableRow block indices = do
+ -> Parser [Char] ParserState [[Block]]
+gridTableRow blocks indices = do
colLines <- many1 (gridTableRawLine indices)
let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
transpose colLines
- mapM (liftM compactifyCell . parseFromString (many block)) cols
+ mapM (liftM compactifyCell . parseFromString blocks) cols
removeOneLeadingSpace :: [String] -> [String]
removeOneLeadingSpace xs =
@@ -603,23 +687,23 @@ compactifyCell :: [Block] -> [Block]
compactifyCell bs = head $ compactify [bs]
-- | Parse footer for a grid table.
-gridTableFooter :: GenParser Char ParserState [Char]
+gridTableFooter :: Parser [Char] ParserState [Char]
gridTableFooter = blanklines
---
-- | Parse a string with a given parser and state.
-readWith :: GenParser t ParserState a -- ^ parser
+readWith :: Parser [t] ParserState a -- ^ parser
-> ParserState -- ^ initial state
-> [t] -- ^ input
-> a
-readWith parser state input =
+readWith parser state input =
case runParser parser state "source" input of
Left err' -> error $ "\nError:\n" ++ show err'
Right result -> result
-- | Parse a string with @parser@ (for testing).
-testStringWith :: (Show a) => GenParser Char ParserState a
+testStringWith :: (Show a) => Parser [Char] ParserState a
-> String
-> IO ()
testStringWith parser str = UTF8.putStrLn $ show $
@@ -627,72 +711,67 @@ testStringWith parser str = UTF8.putStrLn $ show $
-- | Parsing options.
data ParserState = ParserState
- { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX?
+ { stateOptions :: ReaderOptions, -- ^ User options
stateParserContext :: ParserContext, -- ^ Inside list?
stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment?
+ stateAllowLinks :: Bool, -- ^ Allow parsing of links
stateMaxNestingLevel :: Int, -- ^ Max # of nested Strong/Emph
stateLastStrPos :: Maybe SourcePos, -- ^ Position after last str parsed
- stateKeys :: KeyTable, -- ^ List of reference keys
- stateCitations :: [String], -- ^ List of available citations
- stateNotes :: NoteTable, -- ^ List of notes
- stateTabStop :: Int, -- ^ Tab stop
- stateStandalone :: Bool, -- ^ Parse bibliographic info?
+ stateKeys :: KeyTable, -- ^ List of reference keys (with fallbacks)
+ stateNotes :: NoteTable, -- ^ List of notes (raw bodies)
+ stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies)
stateTitle :: [Inline], -- ^ Title of document
stateAuthors :: [[Inline]], -- ^ Authors of document
stateDate :: [Inline], -- ^ Date of document
- stateStrict :: Bool, -- ^ Use strict markdown syntax?
- stateSmart :: Bool, -- ^ Use smart typography?
- stateOldDashes :: Bool, -- ^ Use pandoc <= 1.8.2.1 behavior
- -- in parsing dashes; -- is em-dash;
- -- before numeral is en-dash
- stateLiterateHaskell :: Bool, -- ^ Treat input as literate haskell
- stateColumns :: Int, -- ^ Number of columns in terminal
stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used
- stateIndentedCodeClasses :: [String], -- ^ Classes to use for indented code blocks
stateNextExample :: Int, -- ^ Number of next example
- stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers
+ stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers
stateHasChapters :: Bool, -- ^ True if \chapter encountered
- stateApplyMacros :: Bool, -- ^ Apply LaTeX macros?
stateMacros :: [Macro], -- ^ List of macros defined so far
stateRstDefaultRole :: String -- ^ Current rST default interpreted text role
}
- deriving Show
+
+instance Default ParserState where
+ def = defaultParserState
defaultParserState :: ParserState
-defaultParserState =
- ParserState { stateParseRaw = False,
+defaultParserState =
+ ParserState { stateOptions = def,
stateParserContext = NullState,
stateQuoteContext = NoQuote,
+ stateAllowLinks = True,
stateMaxNestingLevel = 6,
stateLastStrPos = Nothing,
stateKeys = M.empty,
- stateCitations = [],
stateNotes = [],
- stateTabStop = 4,
- stateStandalone = False,
+ stateNotes' = [],
stateTitle = [],
stateAuthors = [],
stateDate = [],
- stateStrict = False,
- stateSmart = False,
- stateOldDashes = False,
- stateLiterateHaskell = False,
- stateColumns = 80,
stateHeaderTable = [],
- stateIndentedCodeClasses = [],
stateNextExample = 1,
stateExamples = M.empty,
stateHasChapters = False,
- stateApplyMacros = True,
stateMacros = [],
stateRstDefaultRole = "title-reference"}
-data HeaderType
+getOption :: (ReaderOptions -> a) -> Parser s ParserState a
+getOption f = (f . stateOptions) `fmap` getState
+
+-- | Succeed only if the extension is enabled.
+guardEnabled :: Extension -> Parser s ParserState ()
+guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext
+
+-- | Succeed only if the extension is disabled.
+guardDisabled :: Extension -> Parser s ParserState ()
+guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext
+
+data HeaderType
= SingleHeader Char -- ^ Single line of characters underneath
| DoubleHeader Char -- ^ Lines of characters above and below
deriving (Eq, Show)
-data ParserContext
+data ParserContext
= ListItemState -- ^ Used when running parser on list item contents
| NullState -- ^ Default state
deriving (Eq, Show)
@@ -705,51 +784,35 @@ data QuoteContext
type NoteTable = [(String, String)]
-newtype Key = Key [Inline] deriving (Show, Read, Eq, Ord)
+type NoteTable' = [(String, F Blocks)] -- used in markdown reader
-toKey :: [Inline] -> Key
-toKey = Key . bottomUp lowercase
- where lowercase :: Inline -> Inline
- lowercase (Str xs) = Str (map toLower xs)
- lowercase (Math t xs) = Math t (map toLower xs)
- lowercase (Code attr xs) = Code attr (map toLower xs)
- lowercase (RawInline f xs) = RawInline f (map toLower xs)
- lowercase LineBreak = Space
- lowercase x = x
+newtype Key = Key String deriving (Show, Read, Eq, Ord)
-fromKey :: Key -> [Inline]
-fromKey (Key xs) = xs
+toKey :: String -> Key
+toKey = Key . map toLower . unwords . words
type KeyTable = M.Map Key Target
--- | Look up key in key table and return target object.
-lookupKeySrc :: KeyTable -- ^ Key table
- -> Key -- ^ Key
- -> Maybe Target
-lookupKeySrc table key = case M.lookup key table of
- Nothing -> Nothing
- Just src -> Just src
-
-- | Fail unless we're in "smart typography" mode.
-failUnlessSmart :: GenParser tok ParserState ()
-failUnlessSmart = getState >>= guard . stateSmart
+failUnlessSmart :: Parser [tok] ParserState ()
+failUnlessSmart = getOption readerSmart >>= guard
-smartPunctuation :: GenParser Char ParserState Inline
- -> GenParser Char ParserState Inline
+smartPunctuation :: Parser [Char] ParserState Inline
+ -> Parser [Char] ParserState Inline
smartPunctuation inlineParser = do
failUnlessSmart
choice [ quoted inlineParser, apostrophe, dash, ellipses ]
-apostrophe :: GenParser Char ParserState Inline
+apostrophe :: Parser [Char] ParserState Inline
apostrophe = (char '\'' <|> char '\8217') >> return (Str "\x2019")
-quoted :: GenParser Char ParserState Inline
- -> GenParser Char ParserState Inline
+quoted :: Parser [Char] ParserState Inline
+ -> Parser [Char] ParserState Inline
quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
withQuoteContext :: QuoteContext
- -> (GenParser Char ParserState Inline)
- -> GenParser Char ParserState Inline
+ -> Parser [Char] ParserState a
+ -> Parser [Char] ParserState a
withQuoteContext context parser = do
oldState <- getState
let oldQuoteContext = stateQuoteContext oldState
@@ -759,39 +822,39 @@ withQuoteContext context parser = do
setState newState { stateQuoteContext = oldQuoteContext }
return result
-singleQuoted :: GenParser Char ParserState Inline
- -> GenParser Char ParserState Inline
+singleQuoted :: Parser [Char] ParserState Inline
+ -> Parser [Char] ParserState Inline
singleQuoted inlineParser = try $ do
singleQuoteStart
withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>=
return . Quoted SingleQuote . normalizeSpaces
-doubleQuoted :: GenParser Char ParserState Inline
- -> GenParser Char ParserState Inline
+doubleQuoted :: Parser [Char] ParserState Inline
+ -> Parser [Char] ParserState Inline
doubleQuoted inlineParser = try $ do
doubleQuoteStart
withQuoteContext InDoubleQuote $ do
contents <- manyTill inlineParser doubleQuoteEnd
return . Quoted DoubleQuote . normalizeSpaces $ contents
-failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState ()
+failIfInQuoteContext :: QuoteContext -> Parser [tok] ParserState ()
failIfInQuoteContext context = do
st <- getState
if stateQuoteContext st == context
then fail "already inside quotes"
else return ()
-charOrRef :: [Char] -> GenParser Char st Char
+charOrRef :: [Char] -> Parser [Char] st Char
charOrRef cs =
oneOf cs <|> try (do c <- characterReference
guard (c `elem` cs)
return c)
-updateLastStrPos :: GenParser Char ParserState ()
-updateLastStrPos = getPosition >>= \p ->
+updateLastStrPos :: Parser [Char] ParserState ()
+updateLastStrPos = getPosition >>= \p ->
updateState $ \s -> s{ stateLastStrPos = Just p }
-singleQuoteStart :: GenParser Char ParserState ()
+singleQuoteStart :: Parser [Char] ParserState ()
singleQuoteStart = do
failIfInQuoteContext InSingleQuote
pos <- getPosition
@@ -802,61 +865,61 @@ singleQuoteStart = do
notFollowedBy (oneOf ")!],;:-? \t\n")
notFollowedBy (char '.') <|> lookAhead (string "..." >> return ())
notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
- satisfy (not . isAlphaNum)))
+ satisfy (not . isAlphaNum)))
-- possess/contraction
return ()
-singleQuoteEnd :: GenParser Char st ()
+singleQuoteEnd :: Parser [Char] st ()
singleQuoteEnd = try $ do
charOrRef "'\8217\146"
notFollowedBy alphaNum
-doubleQuoteStart :: GenParser Char ParserState ()
+doubleQuoteStart :: Parser [Char] ParserState ()
doubleQuoteStart = do
failIfInQuoteContext InDoubleQuote
try $ do charOrRef "\"\8220\147"
notFollowedBy (satisfy (\c -> c == ' ' || c == '\t' || c == '\n'))
-doubleQuoteEnd :: GenParser Char st ()
+doubleQuoteEnd :: Parser [Char] st ()
doubleQuoteEnd = do
charOrRef "\"\8221\148"
return ()
-ellipses :: GenParser Char st Inline
+ellipses :: Parser [Char] st Inline
ellipses = do
try (charOrRef "\8230\133") <|> try (string "..." >> return '…')
return (Str "\8230")
-dash :: GenParser Char ParserState Inline
+dash :: Parser [Char] ParserState Inline
dash = do
- oldDashes <- stateOldDashes `fmap` getState
+ oldDashes <- getOption readerOldDashes
if oldDashes
then emDashOld <|> enDashOld
else Str `fmap` (hyphenDash <|> emDash <|> enDash)
-- Two hyphens = en-dash, three = em-dash
-hyphenDash :: GenParser Char st String
+hyphenDash :: Parser [Char] st String
hyphenDash = do
try $ string "--"
option "\8211" (char '-' >> return "\8212")
-emDash :: GenParser Char st String
+emDash :: Parser [Char] st String
emDash = do
try (charOrRef "\8212\151")
return "\8212"
-enDash :: GenParser Char st String
+enDash :: Parser [Char] st String
enDash = do
try (charOrRef "\8212\151")
return "\8211"
-enDashOld :: GenParser Char st Inline
+enDashOld :: Parser [Char] st Inline
enDashOld = do
try (charOrRef "\8211\150") <|>
try (char '-' >> lookAhead (satisfy isDigit) >> return '–')
return (Str "\8211")
-emDashOld :: GenParser Char st Inline
+emDashOld :: Parser [Char] st Inline
emDashOld = do
try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-')
return (Str "\8212")
@@ -866,24 +929,24 @@ emDashOld = do
--
-- | Parse a \newcommand or \renewcommand macro definition.
-macro :: GenParser Char ParserState Block
+macro :: Parser [Char] ParserState Block
macro = do
- apply <- stateApplyMacros `fmap` getState
+ apply <- getOption readerApplyMacros
inp <- getInput
case parseMacroDefinitions inp of
- ([], _) -> pzero
- (ms, rest) -> do def <- count (length inp - length rest) anyChar
+ ([], _) -> mzero
+ (ms, rest) -> do def' <- count (length inp - length rest) anyChar
if apply
then do
updateState $ \st ->
st { stateMacros = ms ++ stateMacros st }
return Null
- else return $ RawBlock "latex" def
+ else return $ RawBlock "latex" def'
-- | Apply current macros to string.
-applyMacros' :: String -> GenParser Char ParserState String
+applyMacros' :: String -> Parser [Char] ParserState String
applyMacros' target = do
- apply <- liftM stateApplyMacros getState
+ apply <- getOption readerApplyMacros
if apply
then do macros <- liftM stateMacros getState
return $ applyMacros macros target
diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs
index 0372dbe5d..211fdf20e 100644
--- a/src/Text/Pandoc/Pretty.hs
+++ b/src/Text/Pandoc/Pretty.hs
@@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111(-1)307 USA
{- |
Module : Text.Pandoc.Pretty
Copyright : Copyright (C) 2010 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
@@ -202,18 +202,17 @@ chomp d = Doc (fromList dl')
outp :: (IsString a, Monoid a)
=> Int -> String -> DocState a
-outp off s | off <= 0 = do
+outp off s | off < 0 = do -- offset < 0 means newline characters
st' <- get
let rawpref = prefix st'
when (column st' == 0 && usePrefix st' && not (null rawpref)) $ do
let pref = reverse $ dropWhile isSpace $ reverse rawpref
modify $ \st -> st{ output = fromString pref : output st
, column = column st + realLength pref }
- when (off < 0) $ do
- modify $ \st -> st { output = fromString s : output st
- , column = 0
- , newlines = newlines st + 1 }
-outp off s = do
+ modify $ \st -> st { output = fromString s : output st
+ , column = 0
+ , newlines = newlines st + 1 }
+outp off s = do -- offset >= 0 (0 might be combining char)
st' <- get
let pref = prefix st'
when (column st' == 0 && usePrefix st' && not (null pref)) $ do
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 62f7c61a0..685fa1ee4 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -1,6 +1,6 @@
module Text.Pandoc.Readers.DocBook ( readDocBook ) where
import Data.Char (toUpper, isDigit)
-import Text.Pandoc.Parsing (ParserState(..))
+import Text.Pandoc.Options
import Text.Pandoc.Definition
import Text.Pandoc.Builder
import Text.XML.Light
@@ -455,13 +455,13 @@ List of all DocBook tags, with [x] indicating implemented,
[x] tocfront - An entry in a table of contents for a front matter component
[x] toclevel1 - A top-level entry within a table of contents entry for a
chapter-like component
-[x] toclevel2 - A second-level entry within a table of contents entry for a
+[x] toclevel2 - A second-level entry within a table of contents entry for a
chapter-like component
-[x] toclevel3 - A third-level entry within a table of contents entry for a
+[x] toclevel3 - A third-level entry within a table of contents entry for a
chapter-like component
-[x] toclevel4 - A fourth-level entry within a table of contents entry for a
+[x] toclevel4 - A fourth-level entry within a table of contents entry for a
chapter-like component
-[x] toclevel5 - A fifth-level entry within a table of contents entry for a
+[x] toclevel5 - A fifth-level entry within a table of contents entry for a
chapter-like component
[x] tocpart - An entry in a table of contents for a part of a book
[ ] token - A unit of information
@@ -503,7 +503,7 @@ data DBState = DBState{ dbSectionLevel :: Int
, dbBook :: Bool
} deriving Show
-readDocBook :: ParserState -> String -> Pandoc
+readDocBook :: ReaderOptions -> String -> Pandoc
readDocBook _ inp = setTitle (dbDocTitle st')
$ setAuthors (dbDocAuthors st')
$ setDate (dbDocDate st')
@@ -574,7 +574,7 @@ addToStart toadd bs =
(Para xs : rest) -> para (toadd <> fromList xs) <> fromList rest
_ -> bs
--- function that is used by both mediaobject (in parseBlock)
+-- function that is used by both mediaobject (in parseBlock)
-- and inlinemediaobject (in parseInline)
getImage :: Element -> DB Inlines
getImage e = do
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 536bddd39..e5c310ffc 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -19,10 +19,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.HTML
Copyright : Copyright (C) 2006-2010 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
+ Stability : alpha
Portability : portable
Conversion of HTML to 'Pandoc' document.
@@ -36,18 +36,17 @@ module Text.Pandoc.Readers.HTML ( readHtml
, isCommentTag
) where
-import Text.ParserCombinators.Parsec
-import Text.ParserCombinators.Parsec.Pos
import Text.HTML.TagSoup
import Text.HTML.TagSoup.Match
import Text.Pandoc.Definition
import Text.Pandoc.Builder (text, toList)
import Text.Pandoc.Shared
+import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Data.Maybe ( fromMaybe, isJust )
import Data.List ( intercalate )
-import Data.Char ( isDigit, toLower )
-import Control.Monad ( liftM, guard, when )
+import Data.Char ( isDigit )
+import Control.Monad ( liftM, guard, when, mzero )
isSpace :: Char -> Bool
isSpace ' ' = True
@@ -56,11 +55,11 @@ isSpace '\n' = True
isSpace _ = False
-- | Convert HTML-formatted string to 'Pandoc' document.
-readHtml :: ParserState -- ^ Parser state
+readHtml :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assumes @'\n'@ line endings)
-> Pandoc
-readHtml st inp = Pandoc meta blocks
- where blocks = readWith parseBody st rest
+readHtml opts inp = Pandoc meta blocks
+ where blocks = readWith parseBody def{ stateOptions = opts } rest
tags = canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True } inp
hasHeader = any (~== TagOpen "head" []) tags
@@ -68,7 +67,7 @@ readHtml st inp = Pandoc meta blocks
then parseHeader tags
else (Meta [] [] [], tags)
-type TagParser = GenParser (Tag String) ParserState
+type TagParser = Parser [Tag String] ParserState
-- TODO - fix this - not every header has a title tag
parseHeader :: [Tag String] -> (Meta, [Tag String])
@@ -96,18 +95,6 @@ block = choice
, pRawHtmlBlock
]
--- repeated in SelfContained -- consolidate eventually
-renderTags' :: [Tag String] -> String
-renderTags' = renderTagsOptions
- renderOptions{ optMinimize = \x ->
- let y = map toLower x
- in y == "hr" || y == "br" ||
- y == "img" || y == "meta" ||
- y == "link"
- , optRawTag = \x ->
- let y = map toLower x
- in y == "script" || y == "style" }
-
pList :: TagParser [Block]
pList = pBulletList <|> pOrderedList <|> pDefinitionList
@@ -126,25 +113,22 @@ pBulletList = try $ do
pOrderedList :: TagParser [Block]
pOrderedList = try $ do
TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" [])
- st <- getState
- let (start, style) = if stateStrict st
- then (1, DefaultStyle)
- else (sta', sty')
- where sta = fromMaybe "1" $
- lookup "start" attribs
- sta' = if all isDigit sta
- then read sta
- else 1
- sty = fromMaybe (fromMaybe "" $
- lookup "style" attribs) $
- lookup "class" attribs
- sty' = case sty of
- "lower-roman" -> LowerRoman
- "upper-roman" -> UpperRoman
- "lower-alpha" -> LowerAlpha
- "upper-alpha" -> UpperAlpha
- "decimal" -> Decimal
- _ -> DefaultStyle
+ let (start, style) = (sta', sty')
+ where sta = fromMaybe "1" $
+ lookup "start" attribs
+ sta' = if all isDigit sta
+ then read sta
+ else 1
+ sty = fromMaybe (fromMaybe "" $
+ lookup "style" attribs) $
+ lookup "class" attribs
+ sty' = case sty of
+ "lower-roman" -> LowerRoman
+ "upper-roman" -> UpperRoman
+ "lower-alpha" -> LowerAlpha
+ "upper-alpha" -> UpperAlpha
+ "decimal" -> Decimal
+ _ -> DefaultStyle
let nonItem = pSatisfy (\t ->
not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) &&
not (t ~== TagClose "ol"))
@@ -196,8 +180,8 @@ pRawTag = do
pRawHtmlBlock :: TagParser [Block]
pRawHtmlBlock = do
raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag
- state <- getState
- if stateParseRaw state && not (null raw)
+ parseRaw <- getOption readerParseRaw
+ if parseRaw && not (null raw)
then return [RawBlock "html" raw]
else return []
@@ -235,7 +219,7 @@ pSimpleTable = try $ do
rows <- pOptInTag "tbody"
$ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td")
skipMany pBlank
- TagClose _ <- pSatisfy (~== TagClose "table")
+ TagClose _ <- pSatisfy (~== TagClose "table")
let cols = maximum $ map length rows
let aligns = replicate cols AlignLeft
let widths = replicate cols 0
@@ -281,10 +265,7 @@ pCodeBlock = try $ do
let attribsId = fromMaybe "" $ lookup "id" attr
let attribsClasses = words $ fromMaybe "" $ lookup "class" attr
let attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
- st <- getState
- let attribs = if stateStrict st
- then ("",[],[])
- else (attribsId, attribsClasses, attribsKV)
+ let attribs = (attribsId, attribsClasses, attribsKV)
return [CodeBlock attribs result]
inline :: TagParser [Inline]
@@ -310,7 +291,7 @@ pLocation = do
pSat :: (Tag String -> Bool) -> TagParser (Tag String)
pSat f = do
pos <- getPosition
- token show (const pos) (\x -> if f x then Just x else Nothing)
+ token show (const pos) (\x -> if f x then Just x else Nothing)
pSatisfy :: (Tag String -> Bool) -> TagParser (Tag String)
pSatisfy f = try $ optional pLocation >> pSat f
@@ -332,14 +313,13 @@ pStrong :: TagParser [Inline]
pStrong = pInlinesInTags "strong" Strong <|> pInlinesInTags "b" Strong
pSuperscript :: TagParser [Inline]
-pSuperscript = failIfStrict >> pInlinesInTags "sup" Superscript
+pSuperscript = pInlinesInTags "sup" Superscript
pSubscript :: TagParser [Inline]
-pSubscript = failIfStrict >> pInlinesInTags "sub" Subscript
+pSubscript = pInlinesInTags "sub" Subscript
pStrikeout :: TagParser [Inline]
pStrikeout = do
- failIfStrict
pInlinesInTags "s" Strikeout <|>
pInlinesInTags "strike" Strikeout <|>
pInlinesInTags "del" Strikeout <|>
@@ -381,8 +361,8 @@ pCode = try $ do
pRawHtmlInline :: TagParser [Inline]
pRawHtmlInline = do
result <- pSatisfy (tagComment (const True)) <|> pSatisfy isInlineTag
- state <- getState
- if stateParseRaw state
+ parseRaw <- getOption readerParseRaw
+ if parseRaw
then return [RawInline "html" $ renderTags' [result]]
else return []
@@ -417,7 +397,7 @@ pCloses tagtype = try $ do
(TagClose "ul") | tagtype == "li" -> return ()
(TagClose "ol") | tagtype == "li" -> return ()
(TagClose "dl") | tagtype == "li" -> return ()
- _ -> pzero
+ _ -> mzero
pTagText :: TagParser [Inline]
pTagText = try $ do
@@ -432,11 +412,11 @@ pBlank = try $ do
(TagText str) <- pSatisfy isTagText
guard $ all isSpace str
-pTagContents :: GenParser Char ParserState Inline
+pTagContents :: Parser [Char] ParserState Inline
pTagContents =
pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol <|> pBad
-pStr :: GenParser Char ParserState Inline
+pStr :: Parser [Char] ParserState Inline
pStr = do
result <- many1 $ satisfy $ \c ->
not (isSpace c) && not (isSpecial c) && not (isBad c)
@@ -455,13 +435,13 @@ isSpecial '\8220' = True
isSpecial '\8221' = True
isSpecial _ = False
-pSymbol :: GenParser Char ParserState Inline
+pSymbol :: Parser [Char] ParserState Inline
pSymbol = satisfy isSpecial >>= return . Str . (:[])
isBad :: Char -> Bool
isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML
-pBad :: GenParser Char ParserState Inline
+pBad :: Parser [Char] ParserState Inline
pBad = do
c <- satisfy isBad
let c' = case c of
@@ -495,7 +475,7 @@ pBad = do
_ -> '?'
return $ Str [c']
-pSpace :: GenParser Char ParserState Inline
+pSpace :: Parser [Char] ParserState Inline
pSpace = many1 (satisfy isSpace) >> return Space
--
@@ -593,20 +573,19 @@ _ `closes` _ = False
--- parsers for use in markdown, textile readers
-- | Matches a stretch of HTML in balanced tags.
-htmlInBalanced :: (Tag String -> Bool) -> GenParser Char ParserState String
+htmlInBalanced :: (Tag String -> Bool) -> Parser [Char] ParserState String
htmlInBalanced f = try $ do
(TagOpen t _, tag) <- htmlTag f
guard $ '/' `notElem` tag -- not a self-closing tag
- let nonTagChunk = many1 $ satisfy (/= '<')
let stopper = htmlTag (~== TagClose t)
let anytag = liftM snd $ htmlTag (const True)
contents <- many $ notFollowedBy' stopper >>
- (nonTagChunk <|> htmlInBalanced (const True) <|> anytag)
+ (htmlInBalanced f <|> anytag <|> count 1 anyChar)
endtag <- liftM snd stopper
return $ tag ++ concat contents ++ endtag
-- | Matches a tag meeting a certain condition.
-htmlTag :: (Tag String -> Bool) -> GenParser Char ParserState (Tag String, String)
+htmlTag :: (Tag String -> Bool) -> Parser [Char] ParserState (Tag String, String)
htmlTag f = try $ do
lookAhead (char '<')
(next : _) <- getInput >>= return . canonicalizeTags . parseTags
@@ -617,7 +596,7 @@ htmlTag f = try $ do
count (length s + 4) anyChar
skipMany (satisfy (/='>'))
char '>'
- return (next, "<!--" ++ s ++ "-->")
+ return (next, "<!--" ++ s ++ "-->")
_ -> do
rendered <- manyTill anyChar (char '>')
return (next, rendered ++ ">")
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 3178945e4..4a5a14d6a 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -33,10 +33,10 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX,
handleIncludes
) where
-import Text.ParserCombinators.Parsec hiding ((<|>), space, many, optional)
import Text.Pandoc.Definition
import Text.Pandoc.Shared
-import Text.Pandoc.Parsing
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding ((<|>), many, optional, space)
import qualified Text.Pandoc.UTF8 as UTF8
import Data.Char ( chr, ord )
import Control.Monad
@@ -47,12 +47,13 @@ import Data.Monoid
import System.FilePath (replaceExtension)
import Data.List (intercalate)
import qualified Data.Map as M
+import qualified Control.Exception as E
-- | Parse LaTeX from string and return 'Pandoc' document.
-readLaTeX :: ParserState -- ^ Parser state, including options for parser
+readLaTeX :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assumes @'\n'@ line endings)
-> Pandoc
-readLaTeX = readWith parseLaTeX
+readLaTeX opts = readWith parseLaTeX def{ stateOptions = opts }
parseLaTeX :: LP Pandoc
parseLaTeX = do
@@ -64,7 +65,7 @@ parseLaTeX = do
let date' = stateDate st
return $ Pandoc (Meta title' authors' date') $ toList bs
-type LP = GenParser Char ParserState
+type LP = Parser [Char] ParserState
anyControlSeq :: LP String
anyControlSeq = do
@@ -186,7 +187,7 @@ inline = (mempty <$ comment)
<|> (mathInline $ char '$' *> mathChars <* char '$')
<|> (superscript <$> (char '^' *> tok))
<|> (subscript <$> (char '_' *> tok))
- <|> (failUnlessLHS *> char '|' *> doLHSverb)
+ <|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb)
<|> (str <$> count 1 tildeEscape)
<|> (str <$> string "]")
<|> (str <$> string "#") -- TODO print warning?
@@ -230,14 +231,14 @@ ignoreInlines name = (name, doraw <|> (mempty <$ optargs))
where optargs = skipopts *> skipMany (try $ optional sp *> braced)
contseq = '\\':name
doraw = (rawInline "latex" . (contseq ++) . snd) <$>
- (getState >>= guard . stateParseRaw >> (withRaw optargs))
+ (getOption readerParseRaw >>= guard >> (withRaw optargs))
ignoreBlocks :: String -> (String, LP Blocks)
ignoreBlocks name = (name, doraw <|> (mempty <$ optargs))
where optargs = skipopts *> skipMany (try $ optional sp *> braced)
contseq = '\\':name
doraw = (rawBlock "latex" . (contseq ++) . snd) <$>
- (getState >>= guard . stateParseRaw >> (withRaw optargs))
+ (getOption readerParseRaw >>= guard >> (withRaw optargs))
blockCommands :: M.Map String (LP Blocks)
blockCommands = M.fromList $
@@ -321,7 +322,7 @@ inlineCommand :: LP Inlines
inlineCommand = try $ do
name <- anyControlSeq
guard $ not $ isBlockCommand name
- parseRaw <- stateParseRaw `fmap` getState
+ parseRaw <- getOption readerParseRaw
star <- option "" (string "*")
let name' = name ++ star
let rawargs = withRaw (skipopts *> option "" dimenarg
@@ -336,7 +337,7 @@ inlineCommand = try $ do
Nothing -> raw
unlessParseRaw :: LP ()
-unlessParseRaw = getState >>= guard . not . stateParseRaw
+unlessParseRaw = getOption readerParseRaw >>= guard . not
isBlockCommand :: String -> Bool
isBlockCommand s = maybe False (const True) $ M.lookup s blockCommands
@@ -660,7 +661,7 @@ environment = do
rawEnv :: String -> LP Blocks
rawEnv name = do
let addBegin x = "\\begin{" ++ name ++ "}" ++ x
- parseRaw <- stateParseRaw `fmap` getState
+ parseRaw <- getOption readerParseRaw
if parseRaw
then (rawBlock "latex" . addBegin) <$>
(withRaw (env name blocks) >>= applyMacros' . snd)
@@ -671,8 +672,9 @@ handleIncludes :: String -> IO String
handleIncludes [] = return []
handleIncludes ('\\':xs) =
case runParser include defaultParserState "input" ('\\':xs) of
- Right (fs, rest) -> do let getfile f = catch (UTF8.readFile f)
- (\_ -> return "")
+ Right (fs, rest) -> do let getfile f = E.catch (UTF8.readFile f)
+ (\e -> let _ = (e :: E.SomeException)
+ in return "")
yss <- mapM getfile fs
(intercalate "\n" yss ++) `fmap`
handleIncludes rest
@@ -713,10 +715,10 @@ verbatimEnv = do
rest <- getInput
return (r,rest)
-rawLaTeXBlock :: GenParser Char ParserState String
+rawLaTeXBlock :: Parser [Char] ParserState String
rawLaTeXBlock = snd <$> withRaw (environment <|> blockCommand)
-rawLaTeXInline :: GenParser Char ParserState Inline
+rawLaTeXInline :: Parser [Char] ParserState Inline
rawLaTeXInline = do
(res, raw) <- withRaw inlineCommand
if res == mempty
@@ -735,7 +737,7 @@ environments = M.fromList
, ("itemize", bulletList <$> listenv "itemize" (many item))
, ("description", definitionList <$> listenv "description" (many descItem))
, ("enumerate", ordered_list)
- , ("code", failUnlessLHS *>
+ , ("code", guardEnabled Ext_literate_haskell *>
(codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
verbEnv "code"))
, ("verbatim", codeBlock <$> (verbEnv "verbatim"))
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 51a727996..2407e137c 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1,4 +1,6 @@
{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7
+{-# LANGUAGE FlexibleInstances, TypeSynonymInstances,
+ GeneralizedNewtypeDeriving #-}
{-
Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
@@ -20,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.Markdown
Copyright : Copyright (C) 2006-2010 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
@@ -33,26 +35,34 @@ module Text.Pandoc.Readers.Markdown ( readMarkdown ) where
import Data.List ( transpose, sortBy, findIndex, intercalate )
import qualified Data.Map as M
import Data.Ord ( comparing )
-import Data.Char ( isAlphaNum )
+import Data.Char ( isAlphaNum, toLower )
import Data.Maybe
import Text.Pandoc.Definition
-import Text.Pandoc.Generic
-import Text.Pandoc.Shared
-import Text.Pandoc.Parsing
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
+import Text.Pandoc.Options
+import Text.Pandoc.Shared hiding (compactify)
+import Text.Pandoc.Parsing hiding (tableWith)
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag,
isTextTag, isCommentTag )
import Text.Pandoc.XML ( fromEntities )
-import Text.ParserCombinators.Parsec
-import Control.Monad (when, liftM, guard, mzero)
+import Data.Monoid (mconcat, mempty)
+import Control.Applicative ((<$>), (<*), (*>), (<$))
+import Control.Monad
import Text.HTML.TagSoup
import Text.HTML.TagSoup.Match (tagOpen)
+import qualified Data.Set as Set
-- | Read markdown from an input string and return a Pandoc document.
-readMarkdown :: ParserState -- ^ Parser state, including options for parser
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
+readMarkdown :: ReaderOptions -- ^ Reader options
+ -> String -- ^ String to parse (assuming @'\n'@ line endings)
-> Pandoc
-readMarkdown state s = (readWith parseMarkdown) state (s ++ "\n\n")
+readMarkdown opts s =
+ (readWith parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n")
+
+trimInlinesF :: F Inlines -> F Inlines
+trimInlinesF = liftM trimInlines
--
-- Constants and data structure definitions
@@ -70,7 +80,7 @@ isHruleChar '-' = True
isHruleChar '_' = True
isHruleChar _ = False
-setextHChars :: [Char]
+setextHChars :: String
setextHChars = "=-"
isBlank :: Char -> Bool
@@ -83,71 +93,72 @@ isBlank _ = False
-- auxiliary functions
--
-indentSpaces :: GenParser Char ParserState [Char]
+isNull :: F Inlines -> Bool
+isNull ils = B.isNull $ runF ils def
+
+spnl :: Parser [Char] st ()
+spnl = try $ do
+ skipSpaces
+ optional newline
+ skipSpaces
+ notFollowedBy (char '\n')
+
+indentSpaces :: Parser [Char] ParserState String
indentSpaces = try $ do
- state <- getState
- let tabStop = stateTabStop state
+ tabStop <- getOption readerTabStop
count tabStop (char ' ') <|>
string "\t" <?> "indentation"
-nonindentSpaces :: GenParser Char ParserState [Char]
+nonindentSpaces :: Parser [Char] ParserState String
nonindentSpaces = do
- state <- getState
- let tabStop = stateTabStop state
+ tabStop <- getOption readerTabStop
sps <- many (char ' ')
- if length sps < tabStop
+ if length sps < tabStop
then return sps
else unexpected "indented line"
-skipNonindentSpaces :: GenParser Char ParserState ()
+skipNonindentSpaces :: Parser [Char] ParserState ()
skipNonindentSpaces = do
- state <- getState
- atMostSpaces (stateTabStop state - 1)
+ tabStop <- getOption readerTabStop
+ atMostSpaces (tabStop - 1)
-atMostSpaces :: Int -> GenParser Char ParserState ()
+atMostSpaces :: Int -> Parser [Char] ParserState ()
atMostSpaces 0 = notFollowedBy (char ' ')
atMostSpaces n = (char ' ' >> atMostSpaces (n-1)) <|> return ()
-litChar :: GenParser Char ParserState Char
+litChar :: Parser [Char] ParserState Char
litChar = escapedChar'
<|> noneOf "\n"
<|> (newline >> notFollowedBy blankline >> return ' ')
--- | Fail unless we're at beginning of a line.
-failUnlessBeginningOfLine :: GenParser tok st ()
-failUnlessBeginningOfLine = do
- pos <- getPosition
- if sourceColumn pos == 1 then return () else fail "not beginning of line"
-
-- | Parse a sequence of inline elements between square brackets,
-- including inlines between balanced pairs of square brackets.
-inlinesInBalancedBrackets :: GenParser Char ParserState Inline
- -> GenParser Char ParserState [Inline]
-inlinesInBalancedBrackets parser = try $ do
+inlinesInBalancedBrackets :: Parser [Char] ParserState (F Inlines)
+inlinesInBalancedBrackets = try $ do
char '['
- result <- manyTill ( (do lookAhead $ try $ do (Str res) <- parser
- guard (res == "[")
- bal <- inlinesInBalancedBrackets parser
- return $ [Str "["] ++ bal ++ [Str "]"])
- <|> (count 1 parser))
+ result <- manyTill ( (do lookAhead $ try $ do x <- inline
+ guard (runF x def == B.str "[")
+ bal <- inlinesInBalancedBrackets
+ return $ (\x -> B.str "[" <> x <> B.str "]") <$> bal)
+ <|> inline)
(char ']')
- return $ concat result
+ return $ mconcat result
--
-- document structure
--
-titleLine :: GenParser Char ParserState [Inline]
+titleLine :: Parser [Char] ParserState (F Inlines)
titleLine = try $ do
char '%'
skipSpaces
res <- many $ (notFollowedBy newline >> inline)
<|> try (endline >> whitespace)
newline
- return $ normalizeSpaces res
+ return $ trimInlinesF $ mconcat res
-authorsLine :: GenParser Char ParserState [[Inline]]
-authorsLine = try $ do
+authorsLine :: Parser [Char] ParserState (F [Inlines])
+authorsLine = try $ do
char '%'
skipSpaces
authors <- sepEndBy (many (notFollowedBy (satisfy $ \c ->
@@ -155,67 +166,63 @@ authorsLine = try $ do
(char ';' <|>
try (newline >> notFollowedBy blankline >> spaceChar))
newline
- return $ filter (not . null) $ map normalizeSpaces authors
+ return $ sequence $ filter (not . isNull) $ map (trimInlinesF . mconcat) authors
-dateLine :: GenParser Char ParserState [Inline]
+dateLine :: Parser [Char] ParserState (F Inlines)
dateLine = try $ do
char '%'
skipSpaces
- date <- manyTill inline newline
- return $ normalizeSpaces date
-
-titleBlock :: GenParser Char ParserState ([Inline], [[Inline]], [Inline])
-titleBlock = try $ do
- failIfStrict
- title <- option [] titleLine
- author <- option [] authorsLine
- date <- option [] dateLine
+ trimInlinesF . mconcat <$> manyTill inline newline
+
+titleBlock :: Parser [Char] ParserState (F Inlines, F [Inlines], F Inlines)
+titleBlock = pandocTitleBlock <|> mmdTitleBlock
+
+pandocTitleBlock :: Parser [Char] ParserState (F Inlines, F [Inlines], F Inlines)
+pandocTitleBlock = try $ do
+ guardEnabled Ext_pandoc_title_block
+ title <- option mempty titleLine
+ author <- option (return []) authorsLine
+ date <- option mempty dateLine
optional blanklines
return (title, author, date)
-parseMarkdown :: GenParser Char ParserState Pandoc
+mmdTitleBlock :: Parser [Char] ParserState (F Inlines, F [Inlines], F Inlines)
+mmdTitleBlock = try $ do
+ guardEnabled Ext_mmd_title_block
+ kvPairs <- many1 kvPair
+ blanklines
+ let title = maybe mempty return $ lookup "title" kvPairs
+ let author = maybe mempty (\x -> return [x]) $ lookup "author" kvPairs
+ let date = maybe mempty return $ lookup "date" kvPairs
+ return (title, author, date)
+
+kvPair :: Parser [Char] ParserState (String, Inlines)
+kvPair = try $ do
+ key <- many1Till (alphaNum <|> oneOf "_- ") (char ':')
+ val <- manyTill anyChar
+ (try $ newline >> lookAhead (blankline <|> nonspaceChar))
+ let key' = concat $ words $ map toLower key
+ let val' = trimInlines $ B.text val
+ return (key',val')
+
+parseMarkdown :: Parser [Char] ParserState Pandoc
parseMarkdown = do
-- markdown allows raw HTML
- updateState (\state -> state { stateParseRaw = True })
- startPos <- getPosition
- -- go through once just to get list of reference keys and notes
- -- docMinusKeys is the raw document with blanks where the keys/notes were...
- st <- getState
- let firstPassParser = referenceKey
- <|> (if stateStrict st then pzero else noteBlock)
- <|> liftM snd (withRaw codeBlockDelimited)
- <|> lineClump
- docMinusKeys <- liftM concat $ manyTill firstPassParser eof
- setInput docMinusKeys
- setPosition startPos
- st' <- getState
- let reversedNotes = stateNotes st'
- updateState $ \s -> s { stateNotes = reverse reversedNotes }
- -- now parse it for real...
- (title, author, date) <- option ([],[],[]) titleBlock
+ updateState $ \state -> state { stateOptions =
+ let oldOpts = stateOptions state in
+ oldOpts{ readerParseRaw = True } }
+ (title, authors, date) <- option (mempty,return [],mempty) titleBlock
blocks <- parseBlocks
- let doc = Pandoc (Meta title author date) $ filter (/= Null) blocks
- -- if there are labeled examples, change references into numbers
- examples <- liftM stateExamples getState
- let handleExampleRef :: Inline -> Inline
- handleExampleRef z@(Str ('@':xs)) =
- case M.lookup xs examples of
- Just n -> Str (show n)
- Nothing -> z
- handleExampleRef z = z
- if M.null examples
- then return doc
- else return $ bottomUp handleExampleRef doc
-
---
--- initial pass for references and notes
---
+ st <- getState
+ return $ B.setTitle (runF title st)
+ $ B.setAuthors (runF authors st)
+ $ B.setDate (runF date st)
+ $ B.doc $ runF blocks st
-referenceKey :: GenParser Char ParserState [Char]
+referenceKey :: Parser [Char] ParserState (F Blocks)
referenceKey = try $ do
- startPos <- getPosition
skipNonindentSpaces
- lab <- reference
+ (_,raw) <- reference
char ':'
skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
let sourceURL = liftM unwords $ many $ try $ do
@@ -223,22 +230,20 @@ referenceKey = try $ do
skipMany spaceChar
optional $ newline >> notFollowedBy blankline
skipMany spaceChar
- notFollowedBy' reference
+ notFollowedBy' (() <$ reference)
many1 $ escapedChar' <|> satisfy (not . isBlank)
let betweenAngles = try $ char '<' >>
manyTill (escapedChar' <|> litChar) (char '>')
src <- try betweenAngles <|> sourceURL
tit <- option "" referenceTitle
blanklines
- endPos <- getPosition
let target = (escapeURI $ removeTrailingSpace src, tit)
st <- getState
let oldkeys = stateKeys st
- updateState $ \s -> s { stateKeys = M.insert (toKey lab) target oldkeys }
- -- return blanks so line count isn't affected
- return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+ updateState $ \s -> s { stateKeys = M.insert (toKey raw) target oldkeys }
+ return $ return mempty
-referenceTitle :: GenParser Char ParserState String
+referenceTitle :: Parser [Char] ParserState String
referenceTitle = try $ do
skipSpaces >> optional newline >> skipSpaces
tit <- (charsInBalanced '(' ')' litChar >>= return . unwords . words)
@@ -247,25 +252,38 @@ referenceTitle = try $ do
notFollowedBy (noneOf ")\n")))
return $ fromEntities tit
-noteMarker :: GenParser Char ParserState [Char]
+-- | PHP Markdown Extra style abbreviation key. Currently
+-- we just skip them, since Pandoc doesn't have an element for
+-- an abbreviation.
+abbrevKey :: Parser [Char] ParserState (F Blocks)
+abbrevKey = do
+ guardEnabled Ext_abbreviations
+ try $ do
+ char '*'
+ reference
+ char ':'
+ skipMany (satisfy (/= '\n'))
+ blanklines
+ return $ return mempty
+
+noteMarker :: Parser [Char] ParserState String
noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']')
-rawLine :: GenParser Char ParserState [Char]
+rawLine :: Parser [Char] ParserState String
rawLine = try $ do
notFollowedBy blankline
notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker
optional indentSpaces
anyLine
-rawLines :: GenParser Char ParserState [Char]
+rawLines :: Parser [Char] ParserState String
rawLines = do
first <- anyLine
rest <- many rawLine
return $ unlines (first:rest)
-noteBlock :: GenParser Char ParserState [Char]
+noteBlock :: Parser [Char] ParserState (F Blocks)
noteBlock = try $ do
- startPos <- getPosition
skipNonindentSpaces
ref <- noteMarker
char ':'
@@ -275,87 +293,75 @@ noteBlock = try $ do
(try (blankline >> indentSpaces >>
notFollowedBy blankline))
optional blanklines
- endPos <- getPosition
- let newnote = (ref, (intercalate "\n" raw) ++ "\n\n")
- st <- getState
- let oldnotes = stateNotes st
- updateState $ \s -> s { stateNotes = newnote : oldnotes }
- -- return blanks so line count isn't affected
- return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+ parsed <- parseFromString parseBlocks $ unlines raw ++ "\n"
+ let newnote = (ref, parsed)
+ updateState $ \s -> s { stateNotes' = newnote : stateNotes' s }
+ return mempty
--
-- parsing blocks
--
-parseBlocks :: GenParser Char ParserState [Block]
-parseBlocks = manyTill block eof
-
-block :: GenParser Char ParserState Block
-block = do
- st <- getState
- choice (if stateStrict st
- then [ header
- , codeBlockIndented
- , blockQuote
- , hrule
- , bulletList
- , orderedList
- , htmlBlock
- , para
- , plain
- , nullBlock ]
- else [ codeBlockDelimited
- , macro
- , header
- , table
- , codeBlockIndented
- , lhsCodeBlock
- , blockQuote
- , hrule
- , bulletList
- , orderedList
- , definitionList
- , rawTeXBlock
- , para
- , rawHtmlBlocks
- , plain
- , nullBlock ]) <?> "block"
+parseBlocks :: Parser [Char] ParserState (F Blocks)
+parseBlocks = mconcat <$> manyTill block eof
+
+block :: Parser [Char] ParserState (F Blocks)
+block = choice [ codeBlockFenced
+ , codeBlockBackticks
+ , guardEnabled Ext_latex_macros *> (mempty <$ macro)
+ , header
+ , rawTeXBlock
+ , htmlBlock
+ , table
+ , codeBlockIndented
+ , lhsCodeBlock
+ , blockQuote
+ , hrule
+ , bulletList
+ , orderedList
+ , definitionList
+ , noteBlock
+ , referenceKey
+ , abbrevKey
+ , para
+ , plain
+ ] <?> "block"
--
-- header blocks
--
-header :: GenParser Char ParserState Block
+header :: Parser [Char] ParserState (F Blocks)
header = setextHeader <|> atxHeader <?> "header"
-atxHeader :: GenParser Char ParserState Block
+atxHeader :: Parser [Char] ParserState (F Blocks)
atxHeader = try $ do
level <- many1 (char '#') >>= return . length
notFollowedBy (char '.' <|> char ')') -- this would be a list
skipSpaces
- text <- manyTill inline atxClosing >>= return . normalizeSpaces
- return $ Header level text
+ text <- trimInlinesF . mconcat <$> manyTill inline atxClosing
+ return $ B.header level <$> text
-atxClosing :: GenParser Char st [Char]
+atxClosing :: Parser [Char] st String
atxClosing = try $ skipMany (char '#') >> blanklines
-setextHeader :: GenParser Char ParserState Block
+setextHeader :: Parser [Char] ParserState (F Blocks)
setextHeader = try $ do
-- This lookahead prevents us from wasting time parsing Inlines
-- unless necessary -- it gives a significant performance boost.
lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline
- text <- many1Till inline newline
+ text <- trimInlinesF . mconcat <$> many1Till inline newline
underlineChar <- oneOf setextHChars
many (char underlineChar)
blanklines
let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1
- return $ Header level (normalizeSpaces text)
+ return $ B.header level <$> text
--
-- hrule block
--
-hrule :: GenParser Char st Block
+hrule :: Parser [Char] st (F Blocks)
hrule = try $ do
skipSpaces
start <- satisfy isHruleChar
@@ -363,32 +369,26 @@ hrule = try $ do
skipMany (spaceChar <|> char start)
newline
optional blanklines
- return HorizontalRule
+ return $ return B.horizontalRule
--
-- code blocks
--
-indentedLine :: GenParser Char ParserState [Char]
+indentedLine :: Parser [Char] ParserState String
indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n")
blockDelimiter :: (Char -> Bool)
-> Maybe Int
- -> GenParser Char st (Int, (String, [String], [(String, String)]), Char)
+ -> Parser [Char] st Int
blockDelimiter f len = try $ do
c <- lookAhead (satisfy f)
- size <- case len of
- Just l -> count l (char c) >> many (char c) >> return l
- Nothing -> count 3 (char c) >> many (char c) >>=
- return . (+ 3) . length
- many spaceChar
- attr <- option ([],[],[])
- $ attributes -- ~~~ {.ruby}
- <|> (many1 alphaNum >>= \x -> return ([],[x],[])) -- github variant ```ruby
- blankline
- return (size, attr, c)
+ case len of
+ Just l -> count l (char c) >> many (char c) >> return l
+ Nothing -> count 3 (char c) >> many (char c) >>=
+ return . (+ 3) . length
-attributes :: GenParser Char st ([Char], [[Char]], [([Char], [Char])])
+attributes :: Parser [Char] st (String, [String], [(String, String)])
attributes = try $ do
char '{'
spnl
@@ -400,28 +400,28 @@ attributes = try $ do
| otherwise = firstNonNull xs
return (firstNonNull $ reverse ids, concat classes, concat keyvals)
-attribute :: GenParser Char st ([Char], [[Char]], [([Char], [Char])])
+attribute :: Parser [Char] st (String, [String], [(String, String)])
attribute = identifierAttr <|> classAttr <|> keyValAttr
-identifier :: GenParser Char st [Char]
+identifier :: Parser [Char] st String
identifier = do
first <- letter
rest <- many $ alphaNum <|> oneOf "-_:."
return (first:rest)
-identifierAttr :: GenParser Char st ([Char], [a], [a1])
+identifierAttr :: Parser [Char] st (String, [a], [a1])
identifierAttr = try $ do
char '#'
result <- identifier
return (result,[],[])
-classAttr :: GenParser Char st ([Char], [[Char]], [a])
+classAttr :: Parser [Char] st (String, [String], [a])
classAttr = try $ do
char '.'
result <- identifier
return ("",[result],[])
-keyValAttr :: GenParser Char st ([Char], [a], [([Char], [Char])])
+keyValAttr :: Parser [Char] st (String, [a], [(String, String)])
keyValAttr = try $ do
key <- identifier
char '='
@@ -430,33 +430,49 @@ keyValAttr = try $ do
<|> many nonspaceChar
return ("",[],[(key,val)])
-codeBlockDelimited :: GenParser Char st Block
-codeBlockDelimited = try $ do
- (size, attr, c) <- blockDelimiter (\c -> c == '~' || c == '`') Nothing
- contents <- manyTill anyLine (blockDelimiter (== c) (Just size))
+codeBlockFenced :: Parser [Char] ParserState (F Blocks)
+codeBlockFenced = try $ do
+ guardEnabled Ext_fenced_code_blocks
+ size <- blockDelimiter (=='~') Nothing
+ skipMany spaceChar
+ attr <- option ([],[],[]) $
+ guardEnabled Ext_fenced_code_attributes >> attributes
+ blankline
+ contents <- manyTill anyLine (blockDelimiter (=='~') (Just size))
+ blanklines
+ return $ return $ B.codeBlockWith attr $ intercalate "\n" contents
+
+codeBlockBackticks :: Parser [Char] ParserState (F Blocks)
+codeBlockBackticks = try $ do
+ guardEnabled Ext_backtick_code_blocks
+ blockDelimiter (=='`') (Just 3)
+ skipMany spaceChar
+ cls <- many1 alphaNum
+ blankline
+ contents <- manyTill anyLine $ blockDelimiter (=='`') (Just 3)
blanklines
- return $ CodeBlock attr $ intercalate "\n" contents
+ return $ return $ B.codeBlockWith ("",[cls],[]) $ intercalate "\n" contents
-codeBlockIndented :: GenParser Char ParserState Block
+codeBlockIndented :: Parser [Char] ParserState (F Blocks)
codeBlockIndented = do
- contents <- many1 (indentedLine <|>
+ contents <- many1 (indentedLine <|>
try (do b <- blanklines
l <- indentedLine
return $ b ++ l))
optional blanklines
- st <- getState
- return $ CodeBlock ("", stateIndentedCodeClasses st, []) $
+ classes <- getOption readerIndentedCodeClasses
+ return $ return $ B.codeBlockWith ("", classes, []) $
stripTrailingNewlines $ concat contents
-lhsCodeBlock :: GenParser Char ParserState Block
+lhsCodeBlock :: Parser [Char] ParserState (F Blocks)
lhsCodeBlock = do
- failUnlessLHS
- liftM (CodeBlock ("",["sourceCode","literate","haskell"],[]))
- (lhsCodeBlockBird <|> lhsCodeBlockLaTeX)
- <|> liftM (CodeBlock ("",["sourceCode","haskell"],[]))
- lhsCodeBlockInverseBird
+ guardEnabled Ext_literate_haskell
+ (return . B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
+ (lhsCodeBlockBird <|> lhsCodeBlockLaTeX))
+ <|> (return . B.codeBlockWith ("",["sourceCode","haskell"],[]) <$>
+ lhsCodeBlockInverseBird)
-lhsCodeBlockLaTeX :: GenParser Char ParserState String
+lhsCodeBlockLaTeX :: Parser [Char] ParserState String
lhsCodeBlockLaTeX = try $ do
string "\\begin{code}"
manyTill spaceChar newline
@@ -464,13 +480,13 @@ lhsCodeBlockLaTeX = try $ do
blanklines
return $ stripTrailingNewlines contents
-lhsCodeBlockBird :: GenParser Char ParserState String
+lhsCodeBlockBird :: Parser [Char] ParserState String
lhsCodeBlockBird = lhsCodeBlockBirdWith '>'
-lhsCodeBlockInverseBird :: GenParser Char ParserState String
+lhsCodeBlockInverseBird :: Parser [Char] ParserState String
lhsCodeBlockInverseBird = lhsCodeBlockBirdWith '<'
-lhsCodeBlockBirdWith :: Char -> GenParser Char ParserState String
+lhsCodeBlockBirdWith :: Char -> Parser [Char] ParserState String
lhsCodeBlockBirdWith c = try $ do
pos <- getPosition
when (sourceColumn pos /= 1) $ fail "Not in first column"
@@ -482,25 +498,24 @@ lhsCodeBlockBirdWith c = try $ do
blanklines
return $ intercalate "\n" lns'
-birdTrackLine :: Char -> GenParser Char st [Char]
+birdTrackLine :: Char -> Parser [Char] st String
birdTrackLine c = try $ do
char c
-- allow html tags on left margin:
when (c == '<') $ notFollowedBy letter
manyTill anyChar newline
-
--
-- block quotes
--
-emailBlockQuoteStart :: GenParser Char ParserState Char
+emailBlockQuoteStart :: Parser [Char] ParserState Char
emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' >>~ optional (char ' ')
-emailBlockQuote :: GenParser Char ParserState [[Char]]
+emailBlockQuote :: Parser [Char] ParserState [String]
emailBlockQuote = try $ do
emailBlockQuoteStart
- raw <- sepBy (many (nonEndline <|>
+ raw <- sepBy (many (nonEndline <|>
(try (endline >> notFollowedBy emailBlockQuoteStart >>
return '\n'))))
(try (newline >> emailBlockQuoteStart))
@@ -508,51 +523,50 @@ emailBlockQuote = try $ do
optional blanklines
return raw
-blockQuote :: GenParser Char ParserState Block
-blockQuote = do
+blockQuote :: Parser [Char] ParserState (F Blocks)
+blockQuote = do
raw <- emailBlockQuote
-- parse the extracted block, which may contain various block elements:
contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n"
- return $ BlockQuote contents
-
+ return $ B.blockQuote <$> contents
+
--
-- list blocks
--
-bulletListStart :: GenParser Char ParserState ()
+bulletListStart :: Parser [Char] ParserState ()
bulletListStart = try $ do
optional newline -- if preceded by a Plain block in a list context
skipNonindentSpaces
- notFollowedBy' hrule -- because hrules start out just like lists
+ notFollowedBy' (() <$ hrule) -- because hrules start out just like lists
satisfy isBulletListMarker
spaceChar
skipSpaces
-anyOrderedListStart :: GenParser Char ParserState (Int, ListNumberStyle, ListNumberDelim)
+anyOrderedListStart :: Parser [Char] ParserState (Int, ListNumberStyle, ListNumberDelim)
anyOrderedListStart = try $ do
optional newline -- if preceded by a Plain block in a list context
skipNonindentSpaces
notFollowedBy $ string "p." >> spaceChar >> digit -- page number
- state <- getState
- if stateStrict state
- then do many1 digit
- char '.'
- spaceChar
- return (1, DefaultStyle, DefaultDelim)
- else do (num, style, delim) <- anyOrderedListMarker
- -- if it could be an abbreviated first name, insist on more than one space
- if delim == Period && (style == UpperAlpha || (style == UpperRoman &&
- num `elem` [1, 5, 10, 50, 100, 500, 1000]))
- then char '\t' <|> (try $ char ' ' >> spaceChar)
- else spaceChar
- skipSpaces
- return (num, style, delim)
-
-listStart :: GenParser Char ParserState ()
+ (guardDisabled Ext_fancy_lists >>
+ do many1 digit
+ char '.'
+ spaceChar
+ return (1, DefaultStyle, DefaultDelim))
+ <|> do (num, style, delim) <- anyOrderedListMarker
+ -- if it could be an abbreviated first name, insist on more than one space
+ if delim == Period && (style == UpperAlpha || (style == UpperRoman &&
+ num `elem` [1, 5, 10, 50, 100, 500, 1000]))
+ then char '\t' <|> (try $ char ' ' >> spaceChar)
+ else spaceChar
+ skipSpaces
+ return (num, style, delim)
+
+listStart :: Parser [Char] ParserState ()
listStart = bulletListStart <|> (anyOrderedListStart >> return ())
-- parse a line of a list item (start = parser for beginning of list item)
-listLine :: GenParser Char ParserState [Char]
+listLine :: Parser [Char] ParserState String
listLine = try $ do
notFollowedBy blankline
notFollowedBy' (do indentSpaces
@@ -562,8 +576,8 @@ listLine = try $ do
return $ concat chunks ++ "\n"
-- parse raw text for one list item, excluding start marker and continuations
-rawListItem :: GenParser Char ParserState a
- -> GenParser Char ParserState [Char]
+rawListItem :: Parser [Char] ParserState a
+ -> Parser [Char] ParserState String
rawListItem start = try $ do
start
first <- listLine
@@ -571,17 +585,17 @@ rawListItem start = try $ do
blanks <- many blankline
return $ concat (first:rest) ++ blanks
--- continuation of a list item - indented and separated by blankline
+-- continuation of a list item - indented and separated by blankline
-- or (in compact lists) endline.
-- note: nested lists are parsed as continuations
-listContinuation :: GenParser Char ParserState [Char]
+listContinuation :: Parser [Char] ParserState String
listContinuation = try $ do
lookAhead indentSpaces
result <- many1 listContinuationLine
blanks <- many blankline
return $ concat result ++ blanks
-listContinuationLine :: GenParser Char ParserState [Char]
+listContinuationLine :: Parser [Char] ParserState String
listContinuationLine = try $ do
notFollowedBy blankline
notFollowedBy' listStart
@@ -589,8 +603,8 @@ listContinuationLine = try $ do
result <- manyTill anyChar newline
return $ result ++ "\n"
-listItem :: GenParser Char ParserState a
- -> GenParser Char ParserState [Block]
+listItem :: Parser [Char] ParserState a
+ -> Parser [Char] ParserState (F Blocks)
listItem start = try $ do
first <- rawListItem start
continuations <- many listContinuation
@@ -606,38 +620,59 @@ listItem start = try $ do
updateState (\st -> st {stateParserContext = oldContext})
return contents
-orderedList :: GenParser Char ParserState Block
+orderedList :: Parser [Char] ParserState (F Blocks)
orderedList = try $ do
(start, style, delim) <- lookAhead anyOrderedListStart
- items <- many1 $ listItem $ try $
- do optional newline -- if preceded by a Plain block in a list context
- skipNonindentSpaces
- orderedListMarker style delim
- return $ OrderedList (start, style, delim) $ compactify items
-
-bulletList :: GenParser Char ParserState Block
-bulletList =
- many1 (listItem bulletListStart) >>= return . BulletList . compactify
+ unless ((style == DefaultStyle || style == Decimal || style == Example) &&
+ (delim == DefaultDelim || delim == Period)) $
+ guardEnabled Ext_fancy_lists
+ when (style == Example) $ guardEnabled Ext_example_lists
+ items <- fmap sequence $ many1 $ listItem
+ ( try $ do
+ optional newline -- if preceded by Plain block in a list
+ skipNonindentSpaces
+ orderedListMarker style delim )
+ start' <- option 1 $ guardEnabled Ext_startnum >> return start
+ return $ B.orderedListWith (start', style, delim) <$> fmap compactify items
+
+-- | Change final list item from @Para@ to @Plain@ if the list contains
+-- no other @Para@ blocks. (From Shared, modified for Blocks rather than [Block].)
+compactify :: [Blocks] -- ^ List of list items (each a list of blocks)
+ -> [Blocks]
+compactify [] = []
+compactify items =
+ let (others, final) = (init items, last items)
+ in case reverse (B.toList final) of
+ (Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of
+ -- if this is only Para, change to Plain
+ [_] -> others ++ [B.fromList (reverse $ Plain a : xs)]
+ _ -> items
+ _ -> items
+
+bulletList :: Parser [Char] ParserState (F Blocks)
+bulletList = do
+ items <- fmap sequence $ many1 $ listItem bulletListStart
+ return $ B.bulletList <$> fmap compactify items
-- definition lists
-defListMarker :: GenParser Char ParserState ()
+defListMarker :: Parser [Char] ParserState ()
defListMarker = do
sps <- nonindentSpaces
char ':' <|> char '~'
- st <- getState
- let tabStop = stateTabStop st
+ tabStop <- getOption readerTabStop
let remaining = tabStop - (length sps + 1)
if remaining > 0
then count remaining (char ' ') <|> string "\t"
- else pzero
+ else mzero
return ()
-definitionListItem :: GenParser Char ParserState ([Inline], [[Block]])
+definitionListItem :: Parser [Char] ParserState (F (Inlines, [Blocks]))
definitionListItem = try $ do
+ guardEnabled Ext_definition_lists
-- first, see if this has any chance of being a definition list:
lookAhead (anyLine >> optional blankline >> defListMarker)
- term <- manyTill inline newline
+ term <- trimInlinesF . mconcat <$> manyTill inline newline
optional blankline
raw <- many1 defRawBlock
state <- getState
@@ -645,9 +680,9 @@ definitionListItem = try $ do
-- parse the extracted block, which may contain various block elements:
contents <- mapM (parseFromString parseBlocks) raw
updateState (\st -> st {stateParserContext = oldContext})
- return ((normalizeSpaces term), contents)
+ return $ liftM2 (,) term (sequence contents)
-defRawBlock :: GenParser Char ParserState [Char]
+defRawBlock :: Parser [Char] ParserState String
defRawBlock = try $ do
defListMarker
firstline <- anyLine
@@ -659,119 +694,149 @@ defRawBlock = try $ do
return $ unlines lns ++ trl
return $ firstline ++ "\n" ++ unlines rawlines ++ trailing ++ cont
-definitionList :: GenParser Char ParserState Block
+definitionList :: Parser [Char] ParserState (F Blocks)
definitionList = do
- items <- many1 definitionListItem
- -- "compactify" the definition list:
- let defs = map snd items
- let defBlocks = reverse $ concat $ concat defs
- let isPara (Para _) = True
+ items <- fmap sequence $ many1 definitionListItem
+ return $ B.definitionList <$> fmap compactifyDL items
+
+compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
+compactifyDL items =
+ let defs = concatMap snd items
+ defBlocks = reverse $ concatMap B.toList defs
+ isPara (Para _) = True
isPara _ = False
- let items' = case take 1 defBlocks of
- [Para x] -> if not $ any isPara (drop 1 defBlocks)
- then let (t,ds) = last items
- lastDef = last ds
- ds' = init ds ++
- [init lastDef ++ [Plain x]]
- in init items ++ [(t, ds')]
- else items
- _ -> items
- return $ DefinitionList items'
+ in case defBlocks of
+ (Para x:_) -> if not $ any isPara (drop 1 defBlocks)
+ then let (t,ds) = last items
+ lastDef = B.toList $ last ds
+ ds' = init ds ++
+ [B.fromList $ init lastDef ++ [Plain x]]
+ in init items ++ [(t, ds')]
+ else items
+ _ -> items
--
-- paragraph block
--
+{-
isHtmlOrBlank :: Inline -> Bool
isHtmlOrBlank (RawInline "html" _) = True
isHtmlOrBlank (Space) = True
isHtmlOrBlank (LineBreak) = True
isHtmlOrBlank _ = False
+-}
-para :: GenParser Char ParserState Block
-para = try $ do
- result <- liftM normalizeSpaces $ many1 inline
- guard $ not . all isHtmlOrBlank $ result
- option (Plain result) $ try $ do
+para :: Parser [Char] ParserState (F Blocks)
+para = try $ do
+ result <- trimInlinesF . mconcat <$> many1 inline
+ -- TODO remove this if not really needed? and remove isHtmlOrBlank
+ -- guard $ not $ F.all isHtmlOrBlank result
+ option (B.plain <$> result) $ try $ do
newline
- blanklines <|>
- (getState >>= guard . stateStrict >>
- lookAhead (blockQuote <|> header) >> return "")
- return $ Para result
+ (blanklines >> return mempty)
+ <|> (guardDisabled Ext_blank_before_blockquote >> lookAhead blockQuote)
+ <|> (guardDisabled Ext_blank_before_header >> lookAhead header)
+ return $ B.para <$> result
-plain :: GenParser Char ParserState Block
-plain = many1 inline >>~ spaces >>= return . Plain . normalizeSpaces
+plain :: Parser [Char] ParserState (F Blocks)
+plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline <* spaces
---
+--
-- raw html
--
-htmlElement :: GenParser Char ParserState [Char]
+htmlElement :: Parser [Char] ParserState String
htmlElement = strictHtmlBlock <|> liftM snd (htmlTag isBlockTag)
-htmlBlock :: GenParser Char ParserState Block
-htmlBlock = try $ do
- failUnlessBeginningOfLine
+htmlBlock :: Parser [Char] ParserState (F Blocks)
+htmlBlock = do
+ guardEnabled Ext_raw_html
+ res <- (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks)
+ <|> htmlBlock'
+ return $ return $ B.rawBlock "html" res
+
+htmlBlock' :: Parser [Char] ParserState String
+htmlBlock' = try $ do
first <- htmlElement
finalSpace <- many spaceChar
finalNewlines <- many newline
- return $ RawBlock "html" $ first ++ finalSpace ++ finalNewlines
+ return $ first ++ finalSpace ++ finalNewlines
-strictHtmlBlock :: GenParser Char ParserState [Char]
-strictHtmlBlock = do
- failUnlessBeginningOfLine
- htmlInBalanced (not . isInlineTag)
+strictHtmlBlock :: Parser [Char] ParserState String
+strictHtmlBlock = htmlInBalanced (not . isInlineTag)
-rawVerbatimBlock :: GenParser Char ParserState String
+rawVerbatimBlock :: Parser [Char] ParserState String
rawVerbatimBlock = try $ do
(TagOpen tag _, open) <- htmlTag (tagOpen (\t ->
- t == "pre" || t == "style" || t == "script")
- (const True))
+ t == "pre" || t == "style" || t == "script")
+ (const True))
contents <- manyTill anyChar (htmlTag (~== TagClose tag))
return $ open ++ contents ++ renderTags [TagClose tag]
-rawTeXBlock :: GenParser Char ParserState Block
+rawTeXBlock :: Parser [Char] ParserState (F Blocks)
rawTeXBlock = do
- failIfStrict
- result <- liftM (RawBlock "latex") rawLaTeXBlock
- <|> liftM (RawBlock "context") rawConTeXtEnvironment
+ guardEnabled Ext_raw_tex
+ result <- (B.rawBlock "latex" <$> rawLaTeXBlock)
+ <|> (B.rawBlock "context" <$> rawConTeXtEnvironment)
spaces
- return result
+ return $ return result
-rawHtmlBlocks :: GenParser Char ParserState Block
+rawHtmlBlocks :: Parser [Char] ParserState String
rawHtmlBlocks = do
- htmlBlocks <- many1 $ do blk <- rawVerbatimBlock <|>
- liftM snd (htmlTag isBlockTag)
- sps <- do sp1 <- many spaceChar
- sp2 <- option "" (blankline >> return "\n")
- sp3 <- many spaceChar
- sp4 <- option "" blanklines
- return $ sp1 ++ sp2 ++ sp3 ++ sp4
- -- note: we want raw html to be able to
- -- precede a code block, when separated
- -- by a blank line
- return $ blk ++ sps
+ htmlBlocks <- many1 $ try $ do
+ s <- rawVerbatimBlock <|> try (
+ do (t,raw) <- htmlTag isBlockTag
+ exts <- getOption readerExtensions
+ -- if open tag, need markdown="1" if
+ -- markdown_attributes extension is set
+ case t of
+ TagOpen _ as
+ | Ext_markdown_attribute `Set.member`
+ exts ->
+ if "markdown" `notElem`
+ map fst as
+ then mzero
+ else return $
+ stripMarkdownAttribute raw
+ | otherwise -> return raw
+ _ -> return raw )
+ sps <- do sp1 <- many spaceChar
+ sp2 <- option "" (blankline >> return "\n")
+ sp3 <- many spaceChar
+ sp4 <- option "" blanklines
+ return $ sp1 ++ sp2 ++ sp3 ++ sp4
+ -- note: we want raw html to be able to
+ -- precede a code block, when separated
+ -- by a blank line
+ return $ s ++ sps
let combined = concat htmlBlocks
- let combined' = if last combined == '\n' then init combined else combined
- return $ RawBlock "html" combined'
+ return $ if last combined == '\n' then init combined else combined
+
+-- remove markdown="1" attribute
+stripMarkdownAttribute :: String -> String
+stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s
+ where filterAttrib (TagOpen t as) = TagOpen t
+ [(k,v) | (k,v) <- as, k /= "markdown"]
+ filterAttrib x = x
--
-- Tables
---
+--
-- Parse a dashed line with optional trailing spaces; return its length
-- and the length including trailing space.
-dashedLine :: Char
- -> GenParser Char st (Int, Int)
+dashedLine :: Char
+ -> Parser [Char] st (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many spaceChar
return $ (length dashes, length $ dashes ++ sp)
--- Parse a table header with dashed lines of '-' preceded by
+-- Parse a table header with dashed lines of '-' preceded by
-- one (or zero) line of text.
-simpleTableHeader :: Bool -- ^ Headerless table
- -> GenParser Char ParserState ([[Block]], [Alignment], [Int])
+simpleTableHeader :: Bool -- ^ Headerless table
+ -> Parser [Char] ParserState (F [Blocks], [Alignment], [Int])
simpleTableHeader headless = try $ do
rawContent <- if headless
then return ""
@@ -784,84 +849,104 @@ simpleTableHeader headless = try $ do
-- If no header, calculate alignment on basis of first row of text
rawHeads <- liftM (tail . splitStringByIndices (init indices)) $
if headless
- then lookAhead anyLine
+ then lookAhead anyLine
else return rawContent
let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths
let rawHeads' = if headless
then replicate (length dashes) ""
- else rawHeads
- heads <- mapM (parseFromString (many plain)) $
- map removeLeadingTrailingSpace rawHeads'
+ else rawHeads
+ heads <- fmap sequence
+ $ mapM (parseFromString (mconcat <$> many plain))
+ $ map removeLeadingTrailingSpace rawHeads'
return (heads, aligns, indices)
+-- Returns an alignment type for a table, based on a list of strings
+-- (the rows of the column header) and a number (the length of the
+-- dashed line under the rows.
+alignType :: [String]
+ -> Int
+ -> Alignment
+alignType [] _ = AlignDefault
+alignType strLst len =
+ let nonempties = filter (not . null) $ map removeTrailingSpace strLst
+ (leftSpace, rightSpace) =
+ case sortBy (comparing length) nonempties of
+ (x:_) -> (head x `elem` " \t", length x < len)
+ [] -> (False, False)
+ in case (leftSpace, rightSpace) of
+ (True, False) -> AlignRight
+ (False, True) -> AlignLeft
+ (True, True) -> AlignCenter
+ (False, False) -> AlignDefault
+
-- Parse a table footer - dashed lines followed by blank line.
-tableFooter :: GenParser Char ParserState [Char]
+tableFooter :: Parser [Char] ParserState String
tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines
-- Parse a table separator - dashed line.
-tableSep :: GenParser Char ParserState Char
+tableSep :: Parser [Char] ParserState Char
tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> char '\n'
-- Parse a raw line and split it into chunks by indices.
rawTableLine :: [Int]
- -> GenParser Char ParserState [String]
+ -> Parser [Char] ParserState [String]
rawTableLine indices = do
notFollowedBy' (blanklines <|> tableFooter)
line <- many1Till anyChar newline
- return $ map removeLeadingTrailingSpace $ tail $
+ return $ map removeLeadingTrailingSpace $ tail $
splitStringByIndices (init indices) line
-- Parse a table line and return a list of lists of blocks (columns).
tableLine :: [Int]
- -> GenParser Char ParserState [[Block]]
-tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain))
+ -> Parser [Char] ParserState (F [Blocks])
+tableLine indices = rawTableLine indices >>=
+ fmap sequence . mapM (parseFromString (mconcat <$> many plain))
-- Parse a multiline table row and return a list of blocks (columns).
multilineRow :: [Int]
- -> GenParser Char ParserState [[Block]]
+ -> Parser [Char] ParserState (F [Blocks])
multilineRow indices = do
colLines <- many1 (rawTableLine indices)
let cols = map unlines $ transpose colLines
- mapM (parseFromString (many plain)) cols
+ fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) cols
-- Parses a table caption: inlines beginning with 'Table:'
-- and followed by blank lines.
-tableCaption :: GenParser Char ParserState [Inline]
+tableCaption :: Parser [Char] ParserState (F Inlines)
tableCaption = try $ do
+ guardEnabled Ext_table_captions
skipNonindentSpaces
string ":" <|> string "Table:"
- result <- many1 inline
- blanklines
- return $ normalizeSpaces result
+ trimInlinesF . mconcat <$> many1 inline <* blanklines
-- Parse a simple table with '---' header and one line per row.
simpleTable :: Bool -- ^ Headerless table
- -> GenParser Char ParserState Block
+ -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]])
simpleTable headless = do
- Table c a _w h l <- tableWith (simpleTableHeader headless) tableLine
+ (aligns, _widths, heads', lines') <-
+ tableWith (simpleTableHeader headless) tableLine
(return ())
(if headless then tableFooter else tableFooter <|> blanklines)
- tableCaption
-- Simple tables get 0s for relative column widths (i.e., use default)
- return $ Table c a (replicate (length a) 0) h l
+ return (aligns, replicate (length aligns) 0, heads', lines')
-- Parse a multiline table: starts with row of '-' on top, then header
-- (which may be multiline), then the rows,
-- which may be multiline, separated by blank lines, and
-- ending with a footer (dashed line followed by blank line).
multilineTable :: Bool -- ^ Headerless table
- -> GenParser Char ParserState Block
+ -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]])
multilineTable headless =
- tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter tableCaption
+ tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter
multilineTableHeader :: Bool -- ^ Headerless table
- -> GenParser Char ParserState ([[Block]], [Alignment], [Int])
+ -> Parser [Char] ParserState (F [Blocks], [Alignment], [Int])
multilineTableHeader headless = try $ do
if headless
then return '\n'
else tableSep >>~ notFollowedBy blankline
rawContent <- if headless
- then return $ repeat ""
+ then return $ repeat ""
else many1
(notFollowedBy tableSep >> many1Till anyChar newline)
initSp <- nonindentSpaces
@@ -872,54 +957,206 @@ multilineTableHeader headless = try $ do
rawHeadsList <- if headless
then liftM (map (:[]) . tail .
splitStringByIndices (init indices)) $ lookAhead anyLine
- else return $ transpose $ map
+ else return $ transpose $ map
(\ln -> tail $ splitStringByIndices (init indices) ln)
rawContent
let aligns = zipWith alignType rawHeadsList lengths
let rawHeads = if headless
then replicate (length dashes) ""
else map (intercalate " ") rawHeadsList
- heads <- mapM (parseFromString (many plain)) $
+ heads <- fmap sequence $
+ mapM (parseFromString (mconcat <$> many plain)) $
map removeLeadingTrailingSpace rawHeads
return (heads, aligns, indices)
--- Returns an alignment type for a table, based on a list of strings
--- (the rows of the column header) and a number (the length of the
--- dashed line under the rows.
-alignType :: [String]
- -> Int
- -> Alignment
-alignType [] _ = AlignDefault
-alignType strLst len =
- let nonempties = filter (not . null) $ map removeTrailingSpace strLst
- (leftSpace, rightSpace) =
- case sortBy (comparing length) nonempties of
- (x:_) -> (head x `elem` " \t", length x < len)
- [] -> (False, False)
- in case (leftSpace, rightSpace) of
- (True, False) -> AlignRight
- (False, True) -> AlignLeft
- (True, True) -> AlignCenter
- (False, False) -> AlignDefault
-
+-- Parse a grid table: starts with row of '-' on top, then header
+-- (which may be grid), then the rows,
+-- which may be grid, separated by blank lines, and
+-- ending with a footer (dashed line followed by blank line).
gridTable :: Bool -- ^ Headerless table
- -> GenParser Char ParserState Block
-gridTable = gridTableWith block tableCaption
+ -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]])
+gridTable headless =
+ tableWith (gridTableHeader headless) gridTableRow
+ (gridTableSep '-') gridTableFooter
+
+gridTableSplitLine :: [Int] -> String -> [String]
+gridTableSplitLine indices line = map removeFinalBar $ tail $
+ splitStringByIndices (init indices) $ removeTrailingSpace line
+
+gridPart :: Char -> Parser [Char] st (Int, Int)
+gridPart ch = do
+ dashes <- many1 (char ch)
+ char '+'
+ return (length dashes, length dashes + 1)
-table :: GenParser Char ParserState Block
-table = multilineTable False <|> simpleTable True <|>
- simpleTable False <|> multilineTable True <|>
- gridTable False <|> gridTable True <?> "table"
+gridDashedLines :: Char -> Parser [Char] st [(Int,Int)]
+gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline
+
+removeFinalBar :: String -> String
+removeFinalBar =
+ reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse
+
+-- | Separator between rows of grid table.
+gridTableSep :: Char -> Parser [Char] ParserState Char
+gridTableSep ch = try $ gridDashedLines ch >> return '\n'
+
+-- | Parse header for a grid table.
+gridTableHeader :: Bool -- ^ Headerless table
+ -> Parser [Char] ParserState (F [Blocks], [Alignment], [Int])
+gridTableHeader headless = try $ do
+ optional blanklines
+ dashes <- gridDashedLines '-'
+ rawContent <- if headless
+ then return $ repeat ""
+ else many1
+ (notFollowedBy (gridTableSep '=') >> char '|' >>
+ many1Till anyChar newline)
+ if headless
+ then return ()
+ else gridTableSep '=' >> return ()
+ let lines' = map snd dashes
+ let indices = scanl (+) 0 lines'
+ let aligns = replicate (length lines') AlignDefault
+ -- RST does not have a notion of alignments
+ let rawHeads = if headless
+ then replicate (length dashes) ""
+ else map (intercalate " ") $ transpose
+ $ map (gridTableSplitLine indices) rawContent
+ heads <- fmap sequence $ mapM (parseFromString block) $
+ map removeLeadingTrailingSpace rawHeads
+ return (heads, aligns, indices)
---
+gridTableRawLine :: [Int] -> Parser [Char] ParserState [String]
+gridTableRawLine indices = do
+ char '|'
+ line <- many1Till anyChar newline
+ return (gridTableSplitLine indices line)
+
+-- | Parse row of grid table.
+gridTableRow :: [Int]
+ -> Parser [Char] ParserState (F [Blocks])
+gridTableRow indices = do
+ colLines <- many1 (gridTableRawLine indices)
+ let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
+ transpose colLines
+ fmap compactify <$> fmap sequence (mapM (parseFromString block) cols)
+
+removeOneLeadingSpace :: [String] -> [String]
+removeOneLeadingSpace xs =
+ if all startsWithSpace xs
+ then map (drop 1) xs
+ else xs
+ where startsWithSpace "" = True
+ startsWithSpace (y:_) = y == ' '
+
+-- | Parse footer for a grid table.
+gridTableFooter :: Parser [Char] ParserState [Char]
+gridTableFooter = blanklines
+
+pipeTable :: Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]])
+pipeTable = try $ do
+ let pipeBreak = nonindentSpaces *> optional (char '|') *>
+ pipeTableHeaderPart `sepBy1` sepPipe <*
+ optional (char '|') <* blankline
+ (heads,aligns) <- try ( pipeBreak >>= \als ->
+ return (return $ replicate (length als) mempty, als))
+ <|> ( pipeTableRow >>= \row -> pipeBreak >>= \als ->
+
+ return (row, als) )
+ lines' <- sequence <$> many1 pipeTableRow
+ blanklines
+ let widths = replicate (length aligns) 0.0
+ return $ (aligns, widths, heads, lines')
+
+sepPipe :: Parser [Char] ParserState ()
+sepPipe = try $ do
+ char '|' <|> char '+'
+ notFollowedBy blankline
+
+-- parse a row, also returning probable alignments for org-table cells
+pipeTableRow :: Parser [Char] ParserState (F [Blocks])
+pipeTableRow = do
+ nonindentSpaces
+ optional (char '|')
+ let cell = mconcat <$>
+ many (notFollowedBy (blankline <|> char '|') >> inline)
+ first <- cell
+ sepPipe
+ rest <- cell `sepBy1` sepPipe
+ optional (char '|')
+ blankline
+ let cells = sequence (first:rest)
+ return $ do
+ cells' <- cells
+ return $ map
+ (\ils ->
+ case trimInlines ils of
+ ils' | B.isNull ils' -> mempty
+ | otherwise -> B.plain $ ils') cells'
+
+pipeTableHeaderPart :: Parser [Char] st Alignment
+pipeTableHeaderPart = do
+ left <- optionMaybe (char ':')
+ many1 (char '-')
+ right <- optionMaybe (char ':')
+ return $
+ case (left,right) of
+ (Nothing,Nothing) -> AlignDefault
+ (Just _,Nothing) -> AlignLeft
+ (Nothing,Just _) -> AlignRight
+ (Just _,Just _) -> AlignCenter
+
+-- Succeed only if current line contains a pipe.
+scanForPipe :: Parser [Char] st ()
+scanForPipe = lookAhead (manyTill (satisfy (/='\n')) (char '|')) >> return ()
+
+-- | Parse a table using 'headerParser', 'rowParser',
+-- 'lineParser', and 'footerParser'. Variant of the version in
+-- Text.Pandoc.Parsing.
+tableWith :: Parser [Char] ParserState (F [Blocks], [Alignment], [Int])
+ -> ([Int] -> Parser [Char] ParserState (F [Blocks]))
+ -> Parser [Char] ParserState sep
+ -> Parser [Char] ParserState end
+ -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]])
+tableWith headerParser rowParser lineParser footerParser = try $ do
+ (heads, aligns, indices) <- headerParser
+ lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser
+ footerParser
+ numColumns <- getOption readerColumns
+ let widths = if (indices == [])
+ then replicate (length aligns) 0.0
+ else widthsFromIndices numColumns indices
+ return $ (aligns, widths, heads, lines')
+
+table :: Parser [Char] ParserState (F Blocks)
+table = try $ do
+ frontCaption <- option Nothing (Just <$> tableCaption)
+ (aligns, widths, heads, lns) <-
+ try (guardEnabled Ext_pipe_tables >> scanForPipe >> pipeTable) <|>
+ try (guardEnabled Ext_multiline_tables >>
+ multilineTable False) <|>
+ try (guardEnabled Ext_simple_tables >>
+ (simpleTable True <|> simpleTable False)) <|>
+ try (guardEnabled Ext_multiline_tables >>
+ multilineTable True) <|>
+ try (guardEnabled Ext_grid_tables >>
+ (gridTable False <|> gridTable True)) <?> "table"
+ optional blanklines
+ caption <- case frontCaption of
+ Nothing -> option (return mempty) tableCaption
+ Just c -> return c
+ return $ do
+ caption' <- caption
+ heads' <- heads
+ lns' <- lns
+ return $ B.table caption' (zip aligns widths) heads' lns'
+
+--
-- inline
--
-inline :: GenParser Char ParserState Inline
-inline = choice inlineParsers <?> "inline"
-
-inlineParsers :: [GenParser Char ParserState Inline]
-inlineParsers = [ whitespace
+inline :: Parser [Char] ParserState (F Inlines)
+inline = choice [ whitespace
, str
, endline
, code
@@ -927,8 +1164,8 @@ inlineParsers = [ whitespace
, strong
, emph
, note
- , link
, cite
+ , link
, image
, math
, strikeout
@@ -940,115 +1177,127 @@ inlineParsers = [ whitespace
, escapedChar
, rawLaTeXInline'
, exampleRef
- , smartPunctuation inline
- , charRef
+ , smart
+ , return . B.singleton <$> charRef
, symbol
- , ltSign ]
+ , ltSign
+ ] <?> "inline"
-escapedChar' :: GenParser Char ParserState Char
+escapedChar' :: Parser [Char] ParserState Char
escapedChar' = try $ do
char '\\'
- state <- getState
- if stateStrict state
- then oneOf "\\`*_{}[]()>#+-.!~"
- else satisfy (not . isAlphaNum)
+ (guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum))
+ <|> oneOf "\\`*_{}[]()>#+-.!~"
-escapedChar :: GenParser Char ParserState Inline
+escapedChar :: Parser [Char] ParserState (F Inlines)
escapedChar = do
result <- escapedChar'
- return $ case result of
- ' ' -> Str "\160" -- "\ " is a nonbreaking space
- '\n' -> LineBreak -- "\[newline]" is a linebreak
- _ -> Str [result]
+ case result of
+ ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space
+ '\n' -> guardEnabled Ext_escaped_line_breaks >>
+ return (return B.linebreak) -- "\[newline]" is a linebreak
+ _ -> return $ return $ B.str [result]
-ltSign :: GenParser Char ParserState Inline
+ltSign :: Parser [Char] ParserState (F Inlines)
ltSign = do
- st <- getState
- if stateStrict st
- then char '<'
- else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html
- return $ Str ['<']
+ guardDisabled Ext_raw_html
+ <|> guardDisabled Ext_markdown_in_html_blocks
+ <|> (notFollowedBy' rawHtmlBlocks >> return ())
+ char '<'
+ return $ return $ B.str "<"
-exampleRef :: GenParser Char ParserState Inline
+exampleRef :: Parser [Char] ParserState (F Inlines)
exampleRef = try $ do
+ guardEnabled Ext_example_lists
char '@'
lab <- many1 (alphaNum <|> oneOf "-_")
- -- We just return a Str. These are replaced with numbers
- -- later. See the end of parseMarkdown.
- return $ Str $ '@' : lab
-
-symbol :: GenParser Char ParserState Inline
-symbol = do
+ return $ do
+ st <- askF
+ return $ case M.lookup lab (stateExamples st) of
+ Just n -> B.str (show n)
+ Nothing -> B.str ('@':lab)
+
+symbol :: Parser [Char] ParserState (F Inlines)
+symbol = do
result <- noneOf "<\\\n\t "
<|> try (do lookAhead $ char '\\'
- notFollowedBy' rawTeXBlock
+ notFollowedBy' (() <$ rawTeXBlock)
char '\\')
- return $ Str [result]
+ return $ return $ B.str [result]
-- parses inline code, between n `s and n `s
-code :: GenParser Char ParserState Inline
-code = try $ do
+code :: Parser [Char] ParserState (F Inlines)
+code = try $ do
starts <- many1 (char '`')
skipSpaces
result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
(char '\n' >> notFollowedBy' blankline >> return " "))
- (try (skipSpaces >> count (length starts) (char '`') >>
+ (try (skipSpaces >> count (length starts) (char '`') >>
notFollowedBy (char '`')))
- attr <- option ([],[],[]) (try $ optional whitespace >> attributes)
- return $ Code attr $ removeLeadingTrailingSpace $ concat result
-
-mathWord :: GenParser Char st [Char]
-mathWord = liftM concat $ many1 mathChunk
-
-mathChunk :: GenParser Char st [Char]
-mathChunk = do char '\\'
- c <- anyChar
- return ['\\',c]
- <|> many1 (satisfy $ \c -> not (isBlank c || c == '\\' || c == '$'))
-
-math :: GenParser Char ParserState Inline
-math = (mathDisplay >>= applyMacros' >>= return . Math DisplayMath)
- <|> (mathInline >>= applyMacros' >>= return . Math InlineMath)
-
-mathDisplay :: GenParser Char ParserState String
-mathDisplay = try $ do
- failIfStrict
- string "$$"
- many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string "$$")
-
-mathInline :: GenParser Char ParserState String
-mathInline = try $ do
- failIfStrict
- char '$'
+ attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes >>
+ optional whitespace >> attributes)
+ return $ return $ B.codeWith attr $ removeLeadingTrailingSpace $ concat result
+
+math :: Parser [Char] ParserState (F Inlines)
+math = (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
+ <|> (return . B.math <$> (mathInline >>= applyMacros'))
+
+mathDisplay :: Parser [Char] ParserState String
+mathDisplay =
+ (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
+ <|> (guardEnabled Ext_tex_math_single_backslash >>
+ mathDisplayWith "\\[" "\\]")
+ <|> (guardEnabled Ext_tex_math_double_backslash >>
+ mathDisplayWith "\\\\[" "\\\\]")
+
+mathDisplayWith :: String -> String -> Parser [Char] ParserState String
+mathDisplayWith op cl = try $ do
+ string op
+ many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl)
+
+mathInline :: Parser [Char] ParserState String
+mathInline =
+ (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
+ <|> (guardEnabled Ext_tex_math_single_backslash >>
+ mathInlineWith "\\(" "\\)")
+ <|> (guardEnabled Ext_tex_math_double_backslash >>
+ mathInlineWith "\\\\(" "\\\\)")
+
+mathInlineWith :: String -> String -> Parser [Char] ParserState String
+mathInlineWith op cl = try $ do
+ string op
notFollowedBy space
- words' <- sepBy1 mathWord (many1 (spaceChar <|> (newline >>~ notFollowedBy' blankline)))
- char '$'
- notFollowedBy digit
- return $ intercalate " " words'
+ words' <- many1Till (count 1 (noneOf "\n\\")
+ <|> (char '\\' >> anyChar >>= \c -> return ['\\',c])
+ <|> count 1 newline <* notFollowedBy' blankline
+ *> return " ")
+ (try $ string cl)
+ notFollowedBy digit -- to prevent capture of $5
+ return $ concat words'
-- to avoid performance problems, treat 4 or more _ or * or ~ or ^ in a row
-- as a literal rather than attempting to parse for emph/strong/strikeout/super/sub
-fours :: GenParser Char st Inline
+fours :: Parser [Char] st (F Inlines)
fours = try $ do
x <- char '*' <|> char '_' <|> char '~' <|> char '^'
count 2 $ satisfy (==x)
rest <- many1 (satisfy (==x))
- return $ Str (x:x:x:rest)
+ return $ return $ B.str (x:x:x:rest)
-- | Parses a list of inlines between start and end delimiters.
inlinesBetween :: (Show b)
- => GenParser Char ParserState a
- -> GenParser Char ParserState b
- -> GenParser Char ParserState [Inline]
+ => Parser [Char] ParserState a
+ -> Parser [Char] ParserState b
+ -> Parser [Char] ParserState (F Inlines)
inlinesBetween start end =
- normalizeSpaces `liftM` try (start >> many1Till inner end)
- where inner = innerSpace <|> (notFollowedBy' whitespace >> inline)
+ (trimInlinesF . mconcat) <$> try (start >> many1Till inner end)
+ where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
innerSpace = try $ whitespace >>~ notFollowedBy' end
-- This is used to prevent exponential blowups for things like:
-- a**a*a**a*a**a*a**a*a**a*a**a*a**
-nested :: GenParser Char ParserState a
- -> GenParser Char ParserState a
+nested :: Parser [Char] ParserState a
+ -> Parser [Char] ParserState a
nested p = do
nestlevel <- stateMaxNestingLevel `fmap` getState
guard $ nestlevel > 0
@@ -1057,54 +1306,57 @@ nested p = do
updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
return res
-emph :: GenParser Char ParserState Inline
-emph = Emph `fmap` nested
+emph :: Parser [Char] ParserState (F Inlines)
+emph = fmap B.emph <$> nested
(inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd)
where starStart = char '*' >> lookAhead nonspaceChar
- starEnd = notFollowedBy' strong >> char '*'
+ starEnd = notFollowedBy' (() <$ strong) >> char '*'
ulStart = char '_' >> lookAhead nonspaceChar
- ulEnd = notFollowedBy' strong >> char '_'
+ ulEnd = notFollowedBy' (() <$ strong) >> char '_'
-strong :: GenParser Char ParserState Inline
-strong = Strong `liftM` nested
+strong :: Parser [Char] ParserState (F Inlines)
+strong = fmap B.strong <$> nested
(inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd)
where starStart = string "**" >> lookAhead nonspaceChar
starEnd = try $ string "**"
ulStart = string "__" >> lookAhead nonspaceChar
ulEnd = try $ string "__"
-strikeout :: GenParser Char ParserState Inline
-strikeout = Strikeout `liftM`
- (failIfStrict >> inlinesBetween strikeStart strikeEnd)
+strikeout :: Parser [Char] ParserState (F Inlines)
+strikeout = fmap B.strikeout <$>
+ (guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd)
where strikeStart = string "~~" >> lookAhead nonspaceChar
>> notFollowedBy (char '~')
strikeEnd = try $ string "~~"
-superscript :: GenParser Char ParserState Inline
-superscript = failIfStrict >> enclosed (char '^') (char '^')
- (notFollowedBy spaceChar >> inline) >>= -- may not contain Space
- return . Superscript
+superscript :: Parser [Char] ParserState (F Inlines)
+superscript = fmap B.superscript <$> try (do
+ guardEnabled Ext_superscript
+ char '^'
+ mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '^'))
-subscript :: GenParser Char ParserState Inline
-subscript = failIfStrict >> enclosed (char '~') (char '~')
- (notFollowedBy spaceChar >> inline) >>= -- may not contain Space
- return . Subscript
+subscript :: Parser [Char] ParserState (F Inlines)
+subscript = fmap B.subscript <$> try (do
+ guardEnabled Ext_subscript
+ char '~'
+ mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '~'))
-whitespace :: GenParser Char ParserState Inline
-whitespace = spaceChar >>
- ( (spaceChar >> skipMany spaceChar >> option Space (endline >> return LineBreak))
- <|> (skipMany spaceChar >> return Space) ) <?> "whitespace"
+whitespace :: Parser [Char] ParserState (F Inlines)
+whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
+ where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
+ regsp = skipMany spaceChar >> return B.space
-nonEndline :: GenParser Char st Char
+nonEndline :: Parser [Char] st Char
nonEndline = satisfy (/='\n')
-str :: GenParser Char ParserState Inline
+str :: Parser [Char] ParserState (F Inlines)
str = do
- smart <- stateSmart `fmap` getState
+ isSmart <- readerSmart . stateOptions <$> getState
a <- alphaNum
as <- many $ alphaNum
- <|> (try $ char '_' >>~ lookAhead alphaNum)
- <|> if smart
+ <|> (guardEnabled Ext_intraword_underscores >>
+ try (char '_' >>~ lookAhead alphaNum))
+ <|> if isSmart
then (try $ satisfy (\c -> c == '\'' || c == '\x2019') >>
lookAhead alphaNum >> return '\x2019')
-- for things like l'aide
@@ -1113,15 +1365,16 @@ str = do
updateState $ \s -> s{ stateLastStrPos = Just pos }
let result = a:as
let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
- if smart
+ if isSmart
then case likelyAbbrev result of
- [] -> return $ Str result
+ [] -> return $ return $ B.str result
xs -> choice (map (\x ->
try (string x >> oneOf " \n" >>
lookAhead alphaNum >>
- return (Str $ result ++ spacesToNbr x ++ "\160"))) xs)
- <|> (return $ Str result)
- else return $ Str result
+ return (return $ B.str
+ $ result ++ spacesToNbr x ++ "\160"))) xs)
+ <|> (return $ return $ B.str result)
+ else return $ return $ B.str result
-- | if the string matches the beginning of an abbreviation (before
-- the first period, return strings that would finish the abbreviation.
@@ -1136,39 +1389,38 @@ likelyAbbrev x =
in map snd $ filter (\(y,_) -> y == x) abbrPairs
-- an endline character that can be treated as a space, not a structural break
-endline :: GenParser Char ParserState Inline
+endline :: Parser [Char] ParserState (F Inlines)
endline = try $ do
newline
notFollowedBy blankline
- st <- getState
- when (stateStrict st) $ do
- notFollowedBy emailBlockQuoteStart
- notFollowedBy (char '#') -- atx header
+ guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart
+ guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header
-- parse potential list-starts differently if in a list:
+ st <- getState
when (stateParserContext st == ListItemState) $ do
notFollowedBy' bulletListStart
notFollowedBy' anyOrderedListStart
- return Space
+ (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak))
+ <|> (return $ return B.space)
--
-- links
--
-- a reference label for a link
-reference :: GenParser Char ParserState [Inline]
+reference :: Parser [Char] ParserState (F Inlines, String)
reference = do notFollowedBy' (string "[^") -- footnote reference
- result <- inlinesInBalancedBrackets inline
- return $ normalizeSpaces result
+ withRaw $ trimInlinesF <$> inlinesInBalancedBrackets
-- source for a link, with optional title
-source :: GenParser Char ParserState (String, [Char])
+source :: Parser [Char] ParserState (String, String)
source =
(try $ charsInBalanced '(' ')' litChar >>= parseFromString source') <|>
-- the following is needed for cases like: [ref](/url(a).
(enclosed (char '(') (char ')') litChar >>= parseFromString source')
-- auxiliary function for source
-source' :: GenParser Char ParserState (String, [Char])
+source' :: Parser [Char] ParserState (String, String)
source' = do
skipSpaces
let nl = char '\n' >>~ notFollowedBy blankline
@@ -1186,7 +1438,7 @@ source' = do
eof
return (escapeURI $ removeTrailingSpace src, tit)
-linkTitle :: GenParser Char ParserState String
+linkTitle :: Parser [Char] ParserState String
linkTitle = try $ do
(many1 spaceChar >> option '\n' newline) <|> newline
skipSpaces
@@ -1194,78 +1446,88 @@ linkTitle = try $ do
tit <- manyTill litChar (try (char delim >> skipSpaces >> eof))
return $ fromEntities tit
-link :: GenParser Char ParserState Inline
+link :: Parser [Char] ParserState (F Inlines)
link = try $ do
- lab <- reference
- (src, tit) <- source <|> referenceLink lab
- return $ Link (delinkify lab) (src, tit)
-
-delinkify :: [Inline] -> [Inline]
-delinkify = bottomUp $ concatMap go
- where go (Link lab _) = lab
- go x = [x]
+ st <- getState
+ guard $ stateAllowLinks st
+ setState $ st{ stateAllowLinks = False }
+ (lab,raw) <- reference
+ setState $ st{ stateAllowLinks = True }
+ regLink B.link lab <|> referenceLink B.link (lab,raw)
+
+regLink :: (String -> String -> Inlines -> Inlines)
+ -> F Inlines -> Parser [Char] ParserState (F Inlines)
+regLink constructor lab = try $ do
+ (src, tit) <- source
+ return $ constructor src tit <$> lab
-- a link like [this][ref] or [this][] or [this]
-referenceLink :: [Inline]
- -> GenParser Char ParserState (String, [Char])
-referenceLink lab = do
- ref <- option [] (try (optional (char ' ') >>
- optional (newline >> skipSpaces) >> reference))
- let ref' = if null ref then lab else ref
- state <- getState
- case lookupKeySrc (stateKeys state) (toKey ref') of
- Nothing -> fail "no corresponding key"
- Just target -> return target
-
-autoLink :: GenParser Char ParserState Inline
+referenceLink :: (String -> String -> Inlines -> Inlines)
+ -> (F Inlines, String) -> Parser [Char] ParserState (F Inlines)
+referenceLink constructor (lab, raw) = do
+ raw' <- try (optional (char ' ') >>
+ optional (newline >> skipSpaces) >>
+ (snd <$> reference)) <|> return ""
+ let key = toKey $ if raw' == "[]" || raw' == "" then raw else raw'
+ let dropRB (']':xs) = xs
+ dropRB xs = xs
+ let dropLB ('[':xs) = xs
+ dropLB xs = xs
+ let dropBrackets = reverse . dropRB . reverse . dropLB
+ fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw
+ return $ do
+ keys <- asksF stateKeys
+ case M.lookup key keys of
+ Nothing -> (\x -> B.str "[" <> x <> B.str "]" <> B.str raw') <$> fallback
+ Just (src,tit) -> constructor src tit <$> lab
+
+autoLink :: Parser [Char] ParserState (F Inlines)
autoLink = try $ do
char '<'
(orig, src) <- uri <|> emailAddress
char '>'
- st <- getState
- return $ if stateStrict st
- then Link [Str orig] (src, "")
- else Link [Code ("",["url"],[]) orig] (src, "")
+ (guardEnabled Ext_autolink_code_spans >>
+ return (return $ B.link src "" (B.codeWith ("",["url"],[]) orig)))
+ <|> return (return $ B.link src "" (B.str orig))
-image :: GenParser Char ParserState Inline
+image :: Parser [Char] ParserState (F Inlines)
image = try $ do
char '!'
- lab <- reference
- (src, tit) <- source <|> referenceLink lab
- return $ Image lab (src,tit)
+ (lab,raw) <- reference
+ regLink B.image lab <|> referenceLink B.image (lab,raw)
-note :: GenParser Char ParserState Inline
+note :: Parser [Char] ParserState (F Inlines)
note = try $ do
- failIfStrict
+ guardEnabled Ext_footnotes
ref <- noteMarker
- state <- getState
- let notes = stateNotes state
- case lookup ref notes of
- Nothing -> fail "note not found"
- Just raw -> do
- -- We temporarily empty the note list while parsing the note,
- -- so that we don't get infinite loops with notes inside notes...
- -- Note references inside other notes do not work.
- updateState $ \st -> st{ stateNotes = [] }
- contents <- parseFromString parseBlocks raw
- updateState $ \st -> st{ stateNotes = notes }
- return $ Note contents
-
-inlineNote :: GenParser Char ParserState Inline
+ return $ do
+ notes <- asksF stateNotes'
+ case lookup ref notes of
+ Nothing -> return $ B.str $ "[^" ++ ref ++ "]"
+ Just contents -> do
+ st <- askF
+ -- process the note in a context that doesn't resolve
+ -- notes, to avoid infinite looping with notes inside
+ -- notes:
+ let contents' = runF contents st{ stateNotes' = [] }
+ return $ B.note contents'
+
+inlineNote :: Parser [Char] ParserState (F Inlines)
inlineNote = try $ do
- failIfStrict
+ guardEnabled Ext_inline_notes
char '^'
- contents <- inlinesInBalancedBrackets inline
- return $ Note [Para contents]
+ contents <- inlinesInBalancedBrackets
+ return $ B.note . B.para <$> contents
-rawLaTeXInline' :: GenParser Char ParserState Inline
+rawLaTeXInline' :: Parser [Char] ParserState (F Inlines)
rawLaTeXInline' = try $ do
- failIfStrict
+ guardEnabled Ext_raw_tex
lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env
RawInline _ s <- rawLaTeXInline
- return $ RawInline "tex" s -- "tex" because it might be context or latex
+ return $ return $ B.rawInline "tex" s
+ -- "tex" because it might be context or latex
-rawConTeXtEnvironment :: GenParser Char st String
+rawConTeXtEnvironment :: Parser [Char] st String
rawConTeXtEnvironment = try $ do
string "\\start"
completion <- inBrackets (letter <|> digit <|> spaceChar)
@@ -1274,37 +1536,33 @@ rawConTeXtEnvironment = try $ do
(try $ string "\\stop" >> string completion)
return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion
-inBrackets :: (GenParser Char st Char) -> GenParser Char st String
+inBrackets :: (Parser [Char] st Char) -> Parser [Char] st String
inBrackets parser = do
char '['
contents <- many parser
char ']'
return $ "[" ++ contents ++ "]"
-rawHtmlInline :: GenParser Char ParserState Inline
+rawHtmlInline :: Parser [Char] ParserState (F Inlines)
rawHtmlInline = do
- st <- getState
- (_,result) <- if stateStrict st
- then htmlTag (not . isTextTag)
- else htmlTag isInlineTag
- return $ RawInline "html" result
+ guardEnabled Ext_raw_html
+ mdInHtml <- option False $
+ guardEnabled Ext_markdown_in_html_blocks >> return True
+ (_,result) <- if mdInHtml
+ then htmlTag isInlineTag
+ else htmlTag (not . isTextTag)
+ return $ return $ B.rawInline "html" result
-- Citations
-cite :: GenParser Char ParserState Inline
+cite :: Parser [Char] ParserState (F Inlines)
cite = do
- failIfStrict
+ guardEnabled Ext_citations
+ getOption readerCitations >>= guard . not . null
citations <- textualCite <|> normalCite
- return $ Cite citations []
+ return $ flip B.cite mempty <$> citations
-spnl :: GenParser Char st ()
-spnl = try $ do
- skipSpaces
- optional newline
- skipSpaces
- notFollowedBy (char '\n')
-
-textualCite :: GenParser Char ParserState [Citation]
+textualCite :: Parser [Char] ParserState (F [Citation])
textualCite = try $ do
(_, key) <- citeKey
let first = Citation{ citationId = key
@@ -1314,22 +1572,25 @@ textualCite = try $ do
, citationNoteNum = 0
, citationHash = 0
}
- rest <- option [] $ try $ spnl >> normalCite
- if null rest
- then option [first] $ bareloc first
- else return $ first : rest
+ mbrest <- option Nothing $ try $ spnl >> Just <$> normalCite
+ case mbrest of
+ Just rest -> return $ (first:) <$> rest
+ Nothing -> option (return [first]) $ bareloc first
-bareloc :: Citation -> GenParser Char ParserState [Citation]
+bareloc :: Citation -> Parser [Char] ParserState (F [Citation])
bareloc c = try $ do
spnl
char '['
suff <- suffix
- rest <- option [] $ try $ char ';' >> citeList
+ rest <- option (return []) $ try $ char ';' >> citeList
spnl
char ']'
- return $ c{ citationSuffix = suff } : rest
+ return $ do
+ suff' <- suff
+ rest' <- rest
+ return $ c{ citationSuffix = B.toList suff' } : rest'
-normalCite :: GenParser Char ParserState [Citation]
+normalCite :: Parser [Char] ParserState (F [Citation])
normalCite = try $ do
char '['
spnl
@@ -1338,7 +1599,7 @@ normalCite = try $ do
char ']'
return citations
-citeKey :: GenParser Char ParserState (Bool, String)
+citeKey :: Parser [Char] ParserState (Bool, String)
citeKey = try $ do
suppress_author <- option False (char '-' >> return True)
char '@'
@@ -1346,34 +1607,37 @@ citeKey = try $ do
let internal p = try $ p >>~ lookAhead (letter <|> digit)
rest <- many $ letter <|> digit <|> internal (oneOf ":.#$%&-_?<>~")
let key = first:rest
- st <- getState
- guard $ key `elem` stateCitations st
+ citations' <- getOption readerCitations
+ guard $ key `elem` citations'
return (suppress_author, key)
-suffix :: GenParser Char ParserState [Inline]
+suffix :: Parser [Char] ParserState (F Inlines)
suffix = try $ do
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
spnl
- rest <- liftM normalizeSpaces $ many $ notFollowedBy (oneOf ";]") >> inline
+ rest <- trimInlinesF . mconcat <$> many (notFollowedBy (oneOf ";]") >> inline)
return $ if hasSpace
- then Space : rest
+ then (B.space <>) <$> rest
else rest
-prefix :: GenParser Char ParserState [Inline]
-prefix = liftM normalizeSpaces $
+prefix :: Parser [Char] ParserState (F Inlines)
+prefix = trimInlinesF . mconcat <$>
manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey))
-citeList :: GenParser Char ParserState [Citation]
-citeList = sepBy1 citation (try $ char ';' >> spnl)
+citeList :: Parser [Char] ParserState (F [Citation])
+citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl)
-citation :: GenParser Char ParserState Citation
+citation :: Parser [Char] ParserState (F Citation)
citation = try $ do
pref <- prefix
(suppress_author, key) <- citeKey
suff <- suffix
- return $ Citation{ citationId = key
- , citationPrefix = pref
- , citationSuffix = suff
+ return $ do
+ x <- pref
+ y <- suff
+ return $ Citation{ citationId = key
+ , citationPrefix = B.toList x
+ , citationSuffix = B.toList y
, citationMode = if suppress_author
then SuppressAuthor
else NormalCitation
@@ -1381,3 +1645,22 @@ citation = try $ do
, citationHash = 0
}
+smart :: Parser [Char] ParserState (F Inlines)
+smart = do
+ getOption readerSmart >>= guard
+ doubleQuoted <|> singleQuoted <|>
+ choice (map (return . B.singleton <$>) [apostrophe, dash, ellipses])
+
+singleQuoted :: Parser [Char] ParserState (F Inlines)
+singleQuoted = try $ do
+ singleQuoteStart
+ withQuoteContext InSingleQuote $
+ fmap B.singleQuoted . trimInlinesF . mconcat <$>
+ many1Till inline singleQuoteEnd
+
+doubleQuoted :: Parser [Char] ParserState (F Inlines)
+doubleQuoted = try $ do
+ doubleQuoteStart
+ withQuoteContext InDoubleQuote $
+ fmap B.doubleQuoted . trimInlinesF . mconcat <$>
+ many1Till inline doubleQuoteEnd
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
index 2c6fcc6e6..a0e5a0635 100644
--- a/src/Text/Pandoc/Readers/Native.hs
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.Native
Copyright : Copyright (C) 2011 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
@@ -31,6 +31,7 @@ Conversion of a string representation of a pandoc type (@Pandoc@,
module Text.Pandoc.Readers.Native ( readNative ) where
import Text.Pandoc.Definition
+import Text.Pandoc.Shared (safeRead)
nullMeta :: Meta
nullMeta = Meta{ docTitle = []
@@ -51,31 +52,31 @@ nullMeta = Meta{ docTitle = []
readNative :: String -- ^ String to parse (assuming @'\n'@ line endings)
-> Pandoc
readNative s =
- case reads s of
- (d,_):_ -> d
- [] -> Pandoc nullMeta $ readBlocks s
+ case safeRead s of
+ Just d -> d
+ Nothing -> Pandoc nullMeta $ readBlocks s
readBlocks :: String -> [Block]
readBlocks s =
- case reads s of
- (d,_):_ -> d
- [] -> [readBlock s]
+ case safeRead s of
+ Just d -> d
+ Nothing -> [readBlock s]
readBlock :: String -> Block
readBlock s =
- case reads s of
- (d,_):_ -> d
- [] -> Plain $ readInlines s
+ case safeRead s of
+ Just d -> d
+ Nothing -> Plain $ readInlines s
readInlines :: String -> [Inline]
readInlines s =
- case reads s of
- (d,_):_ -> d
- [] -> [readInline s]
+ case safeRead s of
+ Just d -> d
+ Nothing -> [readInline s]
readInline :: String -> Inline
readInline s =
- case reads s of
- (d,_):_ -> d
- [] -> error "Cannot parse document"
+ case safeRead s of
+ Just d -> d
+ Nothing -> error "Cannot parse document"
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index d1010a736..9fb976903 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -17,9 +17,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
- Module : Text.Pandoc.Readers.RST
+ Module : Text.Pandoc.Readers.RST
Copyright : Copyright (C) 2006-2010 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
@@ -27,24 +27,24 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion from reStructuredText to 'Pandoc' document.
-}
-module Text.Pandoc.Readers.RST (
+module Text.Pandoc.Readers.RST (
readRST
) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
-import Text.ParserCombinators.Parsec
-import Control.Monad ( when, liftM )
+import Text.Pandoc.Options
+import Control.Monad ( when, liftM, guard, mzero )
import Data.List ( findIndex, intercalate, transpose, sort, deleteFirstsBy )
import qualified Data.Map as M
import Text.Printf ( printf )
import Data.Maybe ( catMaybes )
-- | Parse reStructuredText string and return Pandoc document.
-readRST :: ParserState -- ^ Parser state, including options for parser
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
+readRST :: ReaderOptions -- ^ Reader options
+ -> String -- ^ String to parse (assuming @'\n'@ line endings)
-> Pandoc
-readRST state s = (readWith parseRST) state (s ++ "\n\n")
+readRST opts s = (readWith parseRST) def{ stateOptions = opts } (s ++ "\n\n")
--
-- Constants and data structure definitions
@@ -58,7 +58,7 @@ underlineChars = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
-- treat these as potentially non-text when parsing inline:
specialChars :: [Char]
-specialChars = "\\`|*_<>$:[]()-.\"'\8216\8217\8220\8221"
+specialChars = "\\`|*_<>$:/[]{}()-.\"'\8216\8217\8220\8221"
--
-- parsing documents
@@ -71,14 +71,14 @@ isHeader _ _ = False
-- | Promote all headers in a list of blocks. (Part of
-- title transformation for RST.)
promoteHeaders :: Int -> [Block] -> [Block]
-promoteHeaders num ((Header level text):rest) =
+promoteHeaders num ((Header level text):rest) =
(Header (level - num) text):(promoteHeaders num rest)
promoteHeaders num (other:rest) = other:(promoteHeaders num rest)
promoteHeaders _ [] = []
-- | If list of blocks starts with a header (or a header and subheader)
-- of level that are not found elsewhere, return it as a title and
--- promote all the other headers.
+-- promote all the other headers.
titleTransform :: [Block] -- ^ list of blocks
-> ([Block], [Inline]) -- ^ modified list of blocks, title
titleTransform ((Header 1 head1):(Header 2 head2):rest) |
@@ -89,7 +89,7 @@ titleTransform ((Header 1 head1):rest) |
(promoteHeaders 1 rest, head1)
titleTransform blocks = (blocks, [])
-parseRST :: GenParser Char ParserState Pandoc
+parseRST :: Parser [Char] ParserState Pandoc
parseRST = do
optional blanklines -- skip blank lines at beginning of file
startPos <- getPosition
@@ -103,12 +103,13 @@ parseRST = do
let reversedNotes = stateNotes st'
updateState $ \s -> s { stateNotes = reverse reversedNotes }
-- now parse it for real...
- blocks <- parseBlocks
+ blocks <- parseBlocks
let blocks' = filter (/= Null) blocks
- state <- getState
- let (blocks'', title) = if stateStandalone state
+ standalone <- getOption readerStandalone
+ let (blocks'', title) = if standalone
then titleTransform blocks'
else (blocks', [])
+ state <- getState
let authors = stateAuthors state
let date = stateDate state
let title' = if (null title) then (stateTitle state) else title
@@ -118,10 +119,10 @@ parseRST = do
-- parsing blocks
--
-parseBlocks :: GenParser Char ParserState [Block]
+parseBlocks :: Parser [Char] ParserState [Block]
parseBlocks = manyTill block eof
-block :: GenParser Char ParserState Block
+block :: Parser [Char] ParserState Block
block = choice [ codeBlock
, rawBlock
, blockQuote
@@ -146,7 +147,7 @@ block = choice [ codeBlock
-- field list
--
-rawFieldListItem :: String -> GenParser Char ParserState (String, String)
+rawFieldListItem :: String -> Parser [Char] ParserState (String, String)
rawFieldListItem indent = try $ do
string indent
char ':'
@@ -160,7 +161,7 @@ rawFieldListItem indent = try $ do
return (name, raw)
fieldListItem :: String
- -> GenParser Char ParserState (Maybe ([Inline], [[Block]]))
+ -> Parser [Char] ParserState (Maybe ([Inline], [[Block]]))
fieldListItem indent = try $ do
(name, raw) <- rawFieldListItem indent
let term = [Str name]
@@ -187,7 +188,7 @@ extractContents [Plain auth] = auth
extractContents [Para auth] = auth
extractContents _ = []
-fieldList :: GenParser Char ParserState Block
+fieldList :: Parser [Char] ParserState Block
fieldList = try $ do
indent <- lookAhead $ many spaceChar
items <- many1 $ fieldListItem indent
@@ -199,7 +200,7 @@ fieldList = try $ do
-- line block
--
-lineBlockLine :: GenParser Char ParserState [Inline]
+lineBlockLine :: Parser [Char] ParserState [Inline]
lineBlockLine = try $ do
char '|'
char ' ' <|> lookAhead (char '\n')
@@ -210,7 +211,7 @@ lineBlockLine = try $ do
then normalizeSpaces line
else Str white : normalizeSpaces line
-lineBlock :: GenParser Char ParserState Block
+lineBlock :: Parser [Char] ParserState Block
lineBlock = try $ do
lines' <- many1 lineBlockLine
blanklines
@@ -220,14 +221,14 @@ lineBlock = try $ do
-- paragraph block
--
-para :: GenParser Char ParserState Block
+para :: Parser [Char] ParserState Block
para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph"
-codeBlockStart :: GenParser Char st Char
+codeBlockStart :: Parser [Char] st Char
codeBlockStart = string "::" >> blankline >> blankline
-- paragraph that ends in a :: starting a code block
-paraBeforeCodeBlock :: GenParser Char ParserState Block
+paraBeforeCodeBlock :: Parser [Char] ParserState Block
paraBeforeCodeBlock = try $ do
result <- many1 (notFollowedBy' codeBlockStart >> inline)
lookAhead (string "::")
@@ -236,21 +237,21 @@ paraBeforeCodeBlock = try $ do
else (normalizeSpaces result) ++ [Str ":"]
-- regular paragraph
-paraNormal :: GenParser Char ParserState Block
-paraNormal = try $ do
+paraNormal :: Parser [Char] ParserState Block
+paraNormal = try $ do
result <- many1 inline
newline
blanklines
return $ Para $ normalizeSpaces result
-plain :: GenParser Char ParserState Block
-plain = many1 inline >>= return . Plain . normalizeSpaces
+plain :: Parser [Char] ParserState Block
+plain = many1 inline >>= return . Plain . normalizeSpaces
--
-- image block
--
-imageBlock :: GenParser Char ParserState Block
+imageBlock :: Parser [Char] ParserState Block
imageBlock = try $ do
string ".. image:: "
src <- manyTill anyChar newline
@@ -265,11 +266,11 @@ imageBlock = try $ do
-- header blocks
--
-header :: GenParser Char ParserState Block
+header :: Parser [Char] ParserState Block
header = doubleHeader <|> singleHeader <?> "header"
-- a header with lines on top and bottom
-doubleHeader :: GenParser Char ParserState Block
+doubleHeader :: Parser [Char] ParserState Block
doubleHeader = try $ do
c <- oneOf underlineChars
rest <- many (char c) -- the top line
@@ -283,7 +284,7 @@ doubleHeader = try $ do
blankline -- spaces and newline
count lenTop (char c) -- the bottom line
blanklines
- -- check to see if we've had this kind of header before.
+ -- check to see if we've had this kind of header before.
-- if so, get appropriate level. if not, add to list.
state <- getState
let headerTable = stateHeaderTable state
@@ -294,8 +295,8 @@ doubleHeader = try $ do
return $ Header level (normalizeSpaces txt)
-- a header with line on the bottom only
-singleHeader :: GenParser Char ParserState Block
-singleHeader = try $ do
+singleHeader :: Parser [Char] ParserState Block
+singleHeader = try $ do
notFollowedBy' whitespace
txt <- many1 (do {notFollowedBy blankline; inline})
pos <- getPosition
@@ -317,7 +318,7 @@ singleHeader = try $ do
-- hrule block
--
-hrule :: GenParser Char st Block
+hrule :: Parser [Char] st Block
hrule = try $ do
chr <- oneOf underlineChars
count 3 (char chr)
@@ -331,14 +332,14 @@ hrule = try $ do
--
-- read a line indented by a given string
-indentedLine :: String -> GenParser Char st [Char]
+indentedLine :: String -> Parser [Char] st [Char]
indentedLine indents = try $ do
string indents
manyTill anyChar newline
-- one or more indented lines, possibly separated by blank lines.
-- any amount of indentation will work.
-indentedBlock :: GenParser Char st [Char]
+indentedBlock :: Parser [Char] st [Char]
indentedBlock = try $ do
indents <- lookAhead $ many1 spaceChar
lns <- many1 $ try $ do b <- option "" blanklines
@@ -347,7 +348,7 @@ indentedBlock = try $ do
optional blanklines
return $ unlines lns
-codeBlock :: GenParser Char st Block
+codeBlock :: Parser [Char] st Block
codeBlock = try $ do
codeBlockStart
result <- indentedBlock
@@ -355,7 +356,7 @@ codeBlock = try $ do
-- | The 'code-block' directive (from Sphinx) that allows a language to be
-- specified.
-customCodeBlock :: GenParser Char st Block
+customCodeBlock :: Parser [Char] st Block
customCodeBlock = try $ do
string ".. code-block:: "
language <- manyTill anyChar newline
@@ -364,7 +365,7 @@ customCodeBlock = try $ do
return $ CodeBlock ("", ["sourceCode", language], []) $ stripTrailingNewlines result
-figureBlock :: GenParser Char ParserState Block
+figureBlock :: Parser [Char] ParserState Block
figureBlock = try $ do
string ".. figure::"
src <- removeLeadingTrailingSpace `fmap` manyTill anyChar newline
@@ -372,24 +373,24 @@ figureBlock = try $ do
caption <- parseFromString extractCaption body
return $ Para [Image caption (src,"")]
-extractCaption :: GenParser Char ParserState [Inline]
+extractCaption :: Parser [Char] ParserState [Inline]
extractCaption = try $ do
manyTill anyLine blanklines
many inline
-- | The 'math' directive (from Sphinx) for display math.
-mathBlock :: GenParser Char st Block
+mathBlock :: Parser [Char] st Block
mathBlock = try $ do
string ".. math::"
mathBlockMultiline <|> mathBlockOneLine
-mathBlockOneLine :: GenParser Char st Block
+mathBlockOneLine :: Parser [Char] st Block
mathBlockOneLine = try $ do
result <- manyTill anyChar newline
blanklines
return $ Para [Math DisplayMath $ removeLeadingTrailingSpace result]
-mathBlockMultiline :: GenParser Char st Block
+mathBlockMultiline :: Parser [Char] st Block
mathBlockMultiline = try $ do
blanklines
result <- indentedBlock
@@ -404,9 +405,9 @@ mathBlockMultiline = try $ do
$ filter (not . null) $ splitBy null lns'
return $ Para $ map (Math DisplayMath) eqs
-lhsCodeBlock :: GenParser Char ParserState Block
+lhsCodeBlock :: Parser [Char] ParserState Block
lhsCodeBlock = try $ do
- failUnlessLHS
+ guardEnabled Ext_literate_haskell
optional codeBlockStart
pos <- getPosition
when (sourceColumn pos /= 1) $ fail "Not in first column"
@@ -418,7 +419,7 @@ lhsCodeBlock = try $ do
blanklines
return $ CodeBlock ("", ["sourceCode", "literate", "haskell"], []) $ intercalate "\n" lns'
-birdTrackLine :: GenParser Char st [Char]
+birdTrackLine :: Parser [Char] st [Char]
birdTrackLine = do
char '>'
manyTill anyChar newline
@@ -427,7 +428,7 @@ birdTrackLine = do
-- raw html/latex/etc
--
-rawBlock :: GenParser Char st Block
+rawBlock :: Parser [Char] st Block
rawBlock = try $ do
string ".. raw:: "
lang <- many1 (letter <|> digit)
@@ -439,7 +440,7 @@ rawBlock = try $ do
-- block quotes
--
-blockQuote :: GenParser Char ParserState Block
+blockQuote :: Parser [Char] ParserState Block
blockQuote = do
raw <- indentedBlock
-- parse the extracted block, which may contain various block elements:
@@ -450,10 +451,10 @@ blockQuote = do
-- list blocks
--
-list :: GenParser Char ParserState Block
+list :: Parser [Char] ParserState Block
list = choice [ bulletList, orderedList, definitionList ] <?> "list"
-definitionListItem :: GenParser Char ParserState ([Inline], [[Block]])
+definitionListItem :: Parser [Char] ParserState ([Inline], [[Block]])
definitionListItem = try $ do
-- avoid capturing a directive or comment
notFollowedBy (try $ char '.' >> char '.')
@@ -463,11 +464,11 @@ definitionListItem = try $ do
contents <- parseFromString parseBlocks $ raw ++ "\n"
return (normalizeSpaces term, [contents])
-definitionList :: GenParser Char ParserState Block
+definitionList :: Parser [Char] ParserState Block
definitionList = many1 definitionListItem >>= return . DefinitionList
-- parses bullet list start and returns its length (inc. following whitespace)
-bulletListStart :: GenParser Char st Int
+bulletListStart :: Parser [Char] st Int
bulletListStart = try $ do
notFollowedBy' hrule -- because hrules start out just like lists
marker <- oneOf bulletListMarkers
@@ -477,14 +478,14 @@ bulletListStart = try $ do
-- parses ordered list start and returns its length (inc following whitespace)
orderedListStart :: ListNumberStyle
-> ListNumberDelim
- -> GenParser Char ParserState Int
+ -> Parser [Char] ParserState Int
orderedListStart style delim = try $ do
(_, markerLen) <- withHorizDisplacement (orderedListMarker style delim)
white <- many1 spaceChar
return $ markerLen + length white
-- parse a line of a list item
-listLine :: Int -> GenParser Char ParserState [Char]
+listLine :: Int -> Parser [Char] ParserState [Char]
listLine markerLength = try $ do
notFollowedBy blankline
indentWith markerLength
@@ -492,36 +493,35 @@ listLine markerLength = try $ do
return $ line ++ "\n"
-- indent by specified number of spaces (or equiv. tabs)
-indentWith :: Int -> GenParser Char ParserState [Char]
+indentWith :: Int -> Parser [Char] ParserState [Char]
indentWith num = do
- state <- getState
- let tabStop = stateTabStop state
+ tabStop <- getOption readerTabStop
if (num < tabStop)
then count num (char ' ')
- else choice [ try (count num (char ' ')),
- (try (char '\t' >> count (num - tabStop) (char ' '))) ]
+ else choice [ try (count num (char ' ')),
+ (try (char '\t' >> count (num - tabStop) (char ' '))) ]
-- parse raw text for one list item, excluding start marker and continuations
-rawListItem :: GenParser Char ParserState Int
- -> GenParser Char ParserState (Int, [Char])
+rawListItem :: Parser [Char] ParserState Int
+ -> Parser [Char] ParserState (Int, [Char])
rawListItem start = try $ do
markerLength <- start
firstLine <- manyTill anyChar newline
restLines <- many (listLine markerLength)
return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))
--- continuation of a list item - indented and separated by blankline or
--- (in compact lists) endline.
+-- continuation of a list item - indented and separated by blankline or
+-- (in compact lists) endline.
-- Note: nested lists are parsed as continuations.
-listContinuation :: Int -> GenParser Char ParserState [Char]
+listContinuation :: Int -> Parser [Char] ParserState [Char]
listContinuation markerLength = try $ do
blanks <- many1 blankline
result <- many1 (listLine markerLength)
return $ blanks ++ concat result
-listItem :: GenParser Char ParserState Int
- -> GenParser Char ParserState [Block]
-listItem start = try $ do
+listItem :: Parser [Char] ParserState Int
+ -> Parser [Char] ParserState [Block]
+listItem start = try $ do
(markerLength, first) <- rawListItem start
rest <- many (listContinuation markerLength)
blanks <- choice [ try (many blankline >>~ lookAhead start),
@@ -537,22 +537,22 @@ listItem start = try $ do
updateState (\st -> st {stateParserContext = oldContext})
return parsed
-orderedList :: GenParser Char ParserState Block
+orderedList :: Parser [Char] ParserState Block
orderedList = try $ do
(start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar)
items <- many1 (listItem (orderedListStart style delim))
let items' = compactify items
return $ OrderedList (start, style, delim) items'
-bulletList :: GenParser Char ParserState Block
-bulletList = many1 (listItem bulletListStart) >>=
+bulletList :: Parser [Char] ParserState Block
+bulletList = many1 (listItem bulletListStart) >>=
return . BulletList . compactify
--
-- default-role block
--
-defaultRoleBlock :: GenParser Char ParserState Block
+defaultRoleBlock :: Parser [Char] ParserState Block
defaultRoleBlock = try $ do
string ".. default-role::"
-- doesn't enforce any restrictions on the role name; embedded spaces shouldn't be allowed, for one
@@ -570,7 +570,7 @@ defaultRoleBlock = try $ do
-- unknown directive (e.g. comment)
--
-unknownDirective :: GenParser Char st Block
+unknownDirective :: Parser [Char] st Block
unknownDirective = try $ do
string ".."
notFollowedBy (noneOf " \t\n")
@@ -582,7 +582,7 @@ unknownDirective = try $ do
--- note block
---
-noteBlock :: GenParser Char ParserState [Char]
+noteBlock :: Parser [Char] ParserState [Char]
noteBlock = try $ do
startPos <- getPosition
string ".."
@@ -601,7 +601,7 @@ noteBlock = try $ do
-- return blanks so line count isn't affected
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
-noteMarker :: GenParser Char ParserState [Char]
+noteMarker :: Parser [Char] ParserState [Char]
noteMarker = do
char '['
res <- many1 digit
@@ -614,13 +614,13 @@ noteMarker = do
-- reference key
--
-quotedReferenceName :: GenParser Char ParserState [Inline]
+quotedReferenceName :: Parser [Char] ParserState [Inline]
quotedReferenceName = try $ do
char '`' >> notFollowedBy (char '`') -- `` means inline code!
- label' <- many1Till inline (char '`')
+ label' <- many1Till inline (char '`')
return label'
-unquotedReferenceName :: GenParser Char ParserState [Inline]
+unquotedReferenceName :: Parser [Char] ParserState [Inline]
unquotedReferenceName = try $ do
label' <- many1Till inline (lookAhead $ char ':')
return label'
@@ -629,24 +629,24 @@ unquotedReferenceName = try $ do
-- plus isolated (no two adjacent) internal hyphens, underscores,
-- periods, colons and plus signs; no whitespace or other characters
-- are allowed.
-simpleReferenceName' :: GenParser Char st String
+simpleReferenceName' :: Parser [Char] st String
simpleReferenceName' = do
x <- alphaNum
xs <- many $ alphaNum
<|> (try $ oneOf "-_:+." >> lookAhead alphaNum)
return (x:xs)
-simpleReferenceName :: GenParser Char st [Inline]
+simpleReferenceName :: Parser [Char] st [Inline]
simpleReferenceName = do
raw <- simpleReferenceName'
return [Str raw]
-referenceName :: GenParser Char ParserState [Inline]
+referenceName :: Parser [Char] ParserState [Inline]
referenceName = quotedReferenceName <|>
(try $ simpleReferenceName >>~ lookAhead (char ':')) <|>
unquotedReferenceName
-referenceKey :: GenParser Char ParserState [Char]
+referenceKey :: Parser [Char] ParserState [Char]
referenceKey = do
startPos <- getPosition
(key, target) <- choice [imageKey, anonymousKey, regularKey]
@@ -658,38 +658,43 @@ referenceKey = do
-- return enough blanks to replace key
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
-targetURI :: GenParser Char st [Char]
+targetURI :: Parser [Char] st [Char]
targetURI = do
skipSpaces
optional newline
- contents <- many1 (try (many spaceChar >> newline >>
+ contents <- many1 (try (many spaceChar >> newline >>
many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n")
blanklines
return $ escapeURI $ removeLeadingTrailingSpace $ contents
-imageKey :: GenParser Char ParserState (Key, Target)
+imageKey :: Parser [Char] ParserState (Key, Target)
imageKey = try $ do
string ".. |"
- ref <- manyTill inline (char '|')
+ (_,ref) <- withRaw (manyTill inline (char '|'))
skipSpaces
string "image::"
src <- targetURI
- return (toKey (normalizeSpaces ref), (src, ""))
+ return (toKey $ init ref, (src, ""))
-anonymousKey :: GenParser Char st (Key, Target)
+anonymousKey :: Parser [Char] st (Key, Target)
anonymousKey = try $ do
oneOfStrings [".. __:", "__"]
src <- targetURI
pos <- getPosition
- return (toKey [Str $ "_" ++ printf "%09d" (sourceLine pos)], (src, ""))
+ return (toKey $ "_" ++ printf "%09d" (sourceLine pos), (src, ""))
-regularKey :: GenParser Char ParserState (Key, Target)
+stripTicks :: String -> String
+stripTicks = reverse . stripTick . reverse . stripTick
+ where stripTick ('`':xs) = xs
+ stripTick xs = xs
+
+regularKey :: Parser [Char] ParserState (Key, Target)
regularKey = try $ do
string ".. _"
- ref <- referenceName
+ (_,ref) <- withRaw referenceName
char ':'
src <- targetURI
- return (toKey (normalizeSpaces ref), (src, ""))
+ return (toKey $ stripTicks ref, (src, ""))
--
-- tables
@@ -702,37 +707,37 @@ regularKey = try $ do
-- Simple tables TODO:
-- - column spans
-- - multiline support
--- - ensure that rightmost column span does not need to reach end
+-- - ensure that rightmost column span does not need to reach end
-- - require at least 2 columns
--
-- Grid tables TODO:
-- - column spans
-dashedLine :: Char -> GenParser Char st (Int, Int)
+dashedLine :: Char -> Parser [Char] st (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many (char ' ')
return (length dashes, length $ dashes ++ sp)
-simpleDashedLines :: Char -> GenParser Char st [(Int,Int)]
+simpleDashedLines :: Char -> Parser [Char] st [(Int,Int)]
simpleDashedLines ch = try $ many1 (dashedLine ch)
-- Parse a table row separator
-simpleTableSep :: Char -> GenParser Char ParserState Char
+simpleTableSep :: Char -> Parser [Char] ParserState Char
simpleTableSep ch = try $ simpleDashedLines ch >> newline
-- Parse a table footer
-simpleTableFooter :: GenParser Char ParserState [Char]
+simpleTableFooter :: Parser [Char] ParserState [Char]
simpleTableFooter = try $ simpleTableSep '=' >> blanklines
-- Parse a raw line and split it into chunks by indices.
-simpleTableRawLine :: [Int] -> GenParser Char ParserState [String]
+simpleTableRawLine :: [Int] -> Parser [Char] ParserState [String]
simpleTableRawLine indices = do
line <- many1Till anyChar newline
return (simpleTableSplitLine indices line)
-- Parse a table row and return a list of blocks (columns).
-simpleTableRow :: [Int] -> GenParser Char ParserState [[Block]]
+simpleTableRow :: [Int] -> Parser [Char] ParserState [[Block]]
simpleTableRow indices = do
notFollowedBy' simpleTableFooter
firstLine <- simpleTableRawLine indices
@@ -745,8 +750,8 @@ simpleTableSplitLine indices line =
map removeLeadingTrailingSpace
$ tail $ splitByIndices (init indices) line
-simpleTableHeader :: Bool -- ^ Headerless table
- -> GenParser Char ParserState ([[Block]], [Alignment], [Int])
+simpleTableHeader :: Bool -- ^ Headerless table
+ -> Parser [Char] ParserState ([[Block]], [Alignment], [Int])
simpleTableHeader headless = try $ do
optional blanklines
rawContent <- if headless
@@ -766,28 +771,28 @@ simpleTableHeader headless = try $ do
-- Parse a simple table.
simpleTable :: Bool -- ^ Headerless table
- -> GenParser Char ParserState Block
+ -> Parser [Char] ParserState Block
simpleTable headless = do
- Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter (return [])
+ Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter
-- Simple tables get 0s for relative column widths (i.e., use default)
return $ Table c a (replicate (length a) 0) h l
where
sep = return () -- optional (simpleTableSep '-')
gridTable :: Bool -- ^ Headerless table
- -> GenParser Char ParserState Block
-gridTable = gridTableWith block (return [])
+ -> Parser [Char] ParserState Block
+gridTable = gridTableWith parseBlocks
-table :: GenParser Char ParserState Block
+table :: Parser [Char] ParserState Block
table = gridTable False <|> simpleTable False <|>
gridTable True <|> simpleTable True <?> "table"
- --
+ --
-- inline
--
-inline :: GenParser Char ParserState Inline
+inline :: Parser [Char] ParserState Inline
inline = choice [ whitespace
, link
, str
@@ -805,44 +810,53 @@ inline = choice [ whitespace
, escapedChar
, symbol ] <?> "inline"
-hyphens :: GenParser Char ParserState Inline
+hyphens :: Parser [Char] ParserState Inline
hyphens = do
result <- many1 (char '-')
- option Space endline
+ option Space endline
-- don't want to treat endline after hyphen or dash as a space
return $ Str result
-escapedChar :: GenParser Char st Inline
+escapedChar :: Parser [Char] st Inline
escapedChar = do c <- escaped anyChar
return $ if c == ' ' -- '\ ' is null in RST
then Str ""
else Str [c]
-symbol :: GenParser Char ParserState Inline
-symbol = do
+symbol :: Parser [Char] ParserState Inline
+symbol = do
result <- oneOf specialChars
return $ Str [result]
-- parses inline code, between codeStart and codeEnd
-code :: GenParser Char ParserState Inline
-code = try $ do
+code :: Parser [Char] ParserState Inline
+code = try $ do
string "``"
result <- manyTill anyChar (try (string "``"))
return $ Code nullAttr
$ removeLeadingTrailingSpace $ intercalate " " $ lines result
-emph :: GenParser Char ParserState Inline
-emph = enclosed (char '*') (char '*') inline >>=
+-- succeeds only if we're not right after a str (ie. in middle of word)
+atStart :: Parser [Char] ParserState a -> Parser [Char] ParserState a
+atStart p = do
+ pos <- getPosition
+ st <- getState
+ -- single quote start can't be right after str
+ guard $ stateLastStrPos st /= Just pos
+ p
+
+emph :: Parser [Char] ParserState Inline
+emph = enclosed (atStart $ char '*') (char '*') inline >>=
return . Emph . normalizeSpaces
-strong :: GenParser Char ParserState Inline
-strong = enclosed (string "**") (try $ string "**") inline >>=
+strong :: Parser [Char] ParserState Inline
+strong = enclosed (atStart $ string "**") (try $ string "**") inline >>=
return . Strong . normalizeSpaces
-- Parses inline interpreted text which is required to have the given role.
-- This decision is based on the role marker (if present),
-- and the current default interpreted text role.
-interpreted :: [Char] -> GenParser Char ParserState [Char]
+interpreted :: [Char] -> Parser [Char] ParserState [Char]
interpreted role = try $ do
state <- getState
if role == stateRstDefaultRole state
@@ -856,30 +870,30 @@ interpreted role = try $ do
-- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
-- but it should be good enough for most purposes
unmarkedInterpretedText = do
- result <- enclosed (char '`') (char '`') anyChar
+ result <- enclosed (atStart $ char '`') (char '`') anyChar
return result
-superscript :: GenParser Char ParserState Inline
+superscript :: Parser [Char] ParserState Inline
superscript = interpreted "sup" >>= \x -> return (Superscript [Str x])
-subscript :: GenParser Char ParserState Inline
+subscript :: Parser [Char] ParserState Inline
subscript = interpreted "sub" >>= \x -> return (Subscript [Str x])
-math :: GenParser Char ParserState Inline
+math :: Parser [Char] ParserState Inline
math = interpreted "math" >>= \x -> return (Math InlineMath x)
-whitespace :: GenParser Char ParserState Inline
+whitespace :: Parser [Char] ParserState Inline
whitespace = many1 spaceChar >> return Space <?> "whitespace"
-str :: GenParser Char ParserState Inline
+str :: Parser [Char] ParserState Inline
str = do
- result <- many1 (noneOf (specialChars ++ "\t\n "))
- pos <- getPosition
- updateState $ \s -> s{ stateLastStrPos = Just pos }
+ let strChar = noneOf ("\t\n " ++ specialChars)
+ result <- many1 strChar
+ updateLastStrPos
return $ Str result
-- an endline character that can be treated as a space, not a structural break
-endline :: GenParser Char ParserState Inline
+endline :: Parser [Char] ParserState Inline
endline = try $ do
newline
notFollowedBy blankline
@@ -895,14 +909,14 @@ endline = try $ do
-- links
--
-link :: GenParser Char ParserState Inline
+link :: Parser [Char] ParserState Inline
link = choice [explicitLink, referenceLink, autoLink] <?> "link"
-explicitLink :: GenParser Char ParserState Inline
+explicitLink :: Parser [Char] ParserState Inline
explicitLink = try $ do
char '`'
notFollowedBy (char '`') -- `` marks start of inline code
- label' <- manyTill (notFollowedBy (char '`') >> inline)
+ label' <- manyTill (notFollowedBy (char '`') >> inline)
(try (spaces >> char '<'))
src <- manyTill (noneOf ">\n") (char '>')
skipSpaces
@@ -910,53 +924,53 @@ explicitLink = try $ do
return $ Link (normalizeSpaces label')
(escapeURI $ removeLeadingTrailingSpace src, "")
-referenceLink :: GenParser Char ParserState Inline
+referenceLink :: Parser [Char] ParserState Inline
referenceLink = try $ do
- label' <- (quotedReferenceName <|> simpleReferenceName) >>~ char '_'
+ (label',ref) <- withRaw (quotedReferenceName <|> simpleReferenceName) >>~
+ char '_'
state <- getState
let keyTable = stateKeys state
- let isAnonKey x = case fromKey x of
- [Str ('_':_)] -> True
- _ -> False
- key <- option (toKey label') $
+ let isAnonKey (Key ('_':_)) = True
+ isAnonKey _ = False
+ key <- option (toKey $ stripTicks ref) $
do char '_'
let anonKeys = sort $ filter isAnonKey $ M.keys keyTable
if null anonKeys
- then pzero
+ then mzero
else return (head anonKeys)
- (src,tit) <- case lookupKeySrc keyTable key of
+ (src,tit) <- case M.lookup key keyTable of
Nothing -> fail "no corresponding key"
Just target -> return target
-- if anonymous link, remove key so it won't be used again
when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable }
- return $ Link (normalizeSpaces label') (src, tit)
+ return $ Link (normalizeSpaces label') (src, tit)
-autoURI :: GenParser Char ParserState Inline
+autoURI :: Parser [Char] ParserState Inline
autoURI = do
(orig, src) <- uri
return $ Link [Str orig] (src, "")
-autoEmail :: GenParser Char ParserState Inline
+autoEmail :: Parser [Char] ParserState Inline
autoEmail = do
(orig, src) <- emailAddress
return $ Link [Str orig] (src, "")
-autoLink :: GenParser Char ParserState Inline
+autoLink :: Parser [Char] ParserState Inline
autoLink = autoURI <|> autoEmail
-- For now, we assume that all substitution references are for images.
-image :: GenParser Char ParserState Inline
+image :: Parser [Char] ParserState Inline
image = try $ do
char '|'
- ref <- manyTill inline (char '|')
+ (alt,ref) <- withRaw (manyTill inline (char '|'))
state <- getState
let keyTable = stateKeys state
- (src,tit) <- case lookupKeySrc keyTable (toKey ref) of
+ (src,tit) <- case M.lookup (toKey $ init ref) keyTable of
Nothing -> fail "no corresponding key"
Just target -> return target
- return $ Image (normalizeSpaces ref) (src, tit)
+ return $ Image (normalizeSpaces alt) (src, tit)
-note :: GenParser Char ParserState Inline
+note :: Parser [Char] ParserState Inline
note = try $ do
ref <- noteMarker
char '_'
diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs
index 67dfe6753..fe49a992e 100644
--- a/src/Text/Pandoc/Readers/TeXMath.hs
+++ b/src/Text/Pandoc/Readers/TeXMath.hs
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.TeXMath
Copyright : Copyright (C) 2007-2010 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 348900d38..89f281ae8 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.Textile
Copyright : Copyright (C) 2010-2012 Paul Rivier and John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : Paul Rivier <paul*rivier#demotera*com>
Stability : alpha
@@ -56,29 +56,34 @@ TODO : refactor common patterns across readers :
module Text.Pandoc.Readers.Textile ( readTextile) where
import Text.Pandoc.Definition
-import Text.Pandoc.Shared
+import Text.Pandoc.Shared
+import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag )
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
-import Text.ParserCombinators.Parsec
import Text.HTML.TagSoup.Match
import Data.Char ( digitToInt, isUpper )
import Control.Monad ( guard, liftM )
import Control.Applicative ((<$>), (*>), (<*))
-- | Parse a Textile text and return a Pandoc document.
-readTextile :: ParserState -- ^ Parser state, including options for parser
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Pandoc
-readTextile state s =
- (readWith parseTextile) state{ stateOldDashes = True } (s ++ "\n\n")
+readTextile :: ReaderOptions -- ^ Reader options
+ -> String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> Pandoc
+readTextile opts s =
+ (readWith parseTextile) def{ stateOptions = opts } (s ++ "\n\n")
-- | Generate a Pandoc ADT from a textile document
-parseTextile :: GenParser Char ParserState Pandoc
+parseTextile :: Parser [Char] ParserState Pandoc
parseTextile = do
-- textile allows raw HTML and does smart punctuation by default
- updateState (\state -> state { stateParseRaw = True, stateSmart = True })
+ oldOpts <- stateOptions `fmap` getState
+ updateState $ \state -> state{ stateOptions =
+ oldOpts{ readerSmart = True
+ , readerParseRaw = True
+ , readerOldDashes = True
+ } }
many blankline
startPos <- getPosition
-- go through once just to get list of reference keys and notes
@@ -93,10 +98,10 @@ parseTextile = do
blocks <- parseBlocks
return $ Pandoc (Meta [] [] []) blocks -- FIXME
-noteMarker :: GenParser Char ParserState [Char]
+noteMarker :: Parser [Char] ParserState [Char]
noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.')
-noteBlock :: GenParser Char ParserState [Char]
+noteBlock :: Parser [Char] ParserState [Char]
noteBlock = try $ do
startPos <- getPosition
ref <- noteMarker
@@ -111,11 +116,11 @@ noteBlock = try $ do
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
-- | Parse document blocks
-parseBlocks :: GenParser Char ParserState [Block]
+parseBlocks :: Parser [Char] ParserState [Block]
parseBlocks = manyTill block eof
-- | Block parsers list tried in definition order
-blockParsers :: [GenParser Char ParserState Block]
+blockParsers :: [Parser [Char] ParserState Block]
blockParsers = [ codeBlock
, header
, blockQuote
@@ -128,20 +133,20 @@ blockParsers = [ codeBlock
, nullBlock ]
-- | Any block in the order of definition of blockParsers
-block :: GenParser Char ParserState Block
+block :: Parser [Char] ParserState Block
block = choice blockParsers <?> "block"
-codeBlock :: GenParser Char ParserState Block
+codeBlock :: Parser [Char] ParserState Block
codeBlock = codeBlockBc <|> codeBlockPre
-codeBlockBc :: GenParser Char ParserState Block
+codeBlockBc :: Parser [Char] ParserState Block
codeBlockBc = try $ do
string "bc. "
contents <- manyTill anyLine blanklines
return $ CodeBlock ("",[],[]) $ unlines contents
-- | Code Blocks in Textile are between <pre> and </pre>
-codeBlockPre :: GenParser Char ParserState Block
+codeBlockPre :: Parser [Char] ParserState Block
codeBlockPre = try $ do
htmlTag (tagOpen (=="pre") null)
result' <- manyTill anyChar (try $ htmlTag (tagClose (=="pre")) >> blockBreak)
@@ -156,7 +161,7 @@ codeBlockPre = try $ do
return $ CodeBlock ("",[],[]) result'''
-- | Header of the form "hN. content" with N in 1..6
-header :: GenParser Char ParserState Block
+header :: Parser [Char] ParserState Block
header = try $ do
char 'h'
level <- digitToInt <$> oneOf "123456"
@@ -165,14 +170,14 @@ header = try $ do
return $ Header level name
-- | Blockquote of the form "bq. content"
-blockQuote :: GenParser Char ParserState Block
+blockQuote :: Parser [Char] ParserState Block
blockQuote = try $ do
string "bq" >> optional attributes >> char '.' >> whitespace
BlockQuote . singleton <$> para
-- Horizontal rule
-hrule :: GenParser Char st Block
+hrule :: Parser [Char] st Block
hrule = try $ do
skipSpaces
start <- oneOf "-*"
@@ -187,39 +192,39 @@ hrule = try $ do
-- | Can be a bullet list or an ordered list. This implementation is
-- strict in the nesting, sublist must start at exactly "parent depth
-- plus one"
-anyList :: GenParser Char ParserState Block
+anyList :: Parser [Char] ParserState Block
anyList = try $ ( (anyListAtDepth 1) <* blanklines )
-- | This allow one type of list to be nested into an other type,
-- provided correct nesting
-anyListAtDepth :: Int -> GenParser Char ParserState Block
+anyListAtDepth :: Int -> Parser [Char] ParserState Block
anyListAtDepth depth = choice [ bulletListAtDepth depth,
orderedListAtDepth depth,
definitionList ]
-- | Bullet List of given depth, depth being the number of leading '*'
-bulletListAtDepth :: Int -> GenParser Char ParserState Block
+bulletListAtDepth :: Int -> Parser [Char] ParserState Block
bulletListAtDepth depth = try $ BulletList <$> many1 (bulletListItemAtDepth depth)
-- | Bullet List Item of given depth, depth being the number of
-- leading '*'
-bulletListItemAtDepth :: Int -> GenParser Char ParserState [Block]
+bulletListItemAtDepth :: Int -> Parser [Char] ParserState [Block]
bulletListItemAtDepth = genericListItemAtDepth '*'
-- | Ordered List of given depth, depth being the number of
-- leading '#'
-orderedListAtDepth :: Int -> GenParser Char ParserState Block
+orderedListAtDepth :: Int -> Parser [Char] ParserState Block
orderedListAtDepth depth = try $ do
items <- many1 (orderedListItemAtDepth depth)
return (OrderedList (1, DefaultStyle, DefaultDelim) items)
-- | Ordered List Item of given depth, depth being the number of
-- leading '#'
-orderedListItemAtDepth :: Int -> GenParser Char ParserState [Block]
+orderedListItemAtDepth :: Int -> Parser [Char] ParserState [Block]
orderedListItemAtDepth = genericListItemAtDepth '#'
-- | Common implementation of list items
-genericListItemAtDepth :: Char -> Int -> GenParser Char ParserState [Block]
+genericListItemAtDepth :: Char -> Int -> Parser [Char] ParserState [Block]
genericListItemAtDepth c depth = try $ do
count depth (char c) >> optional attributes >> whitespace
p <- inlines
@@ -227,22 +232,22 @@ genericListItemAtDepth c depth = try $ do
return ((Plain p):sublist)
-- | A definition list is a set of consecutive definition items
-definitionList :: GenParser Char ParserState Block
+definitionList :: Parser [Char] ParserState Block
definitionList = try $ DefinitionList <$> many1 definitionListItem
-
+
-- | A definition list item in textile begins with '- ', followed by
-- the term defined, then spaces and ":=". The definition follows, on
-- the same single line, or spaned on multiple line, after a line
-- break.
-definitionListItem :: GenParser Char ParserState ([Inline], [[Block]])
+definitionListItem :: Parser [Char] ParserState ([Inline], [[Block]])
definitionListItem = try $ do
string "- "
term <- many1Till inline (try (whitespace >> string ":="))
- def <- inlineDef <|> multilineDef
- return (term, def)
- where inlineDef :: GenParser Char ParserState [[Block]]
+ def' <- inlineDef <|> multilineDef
+ return (term, def')
+ where inlineDef :: Parser [Char] ParserState [[Block]]
inlineDef = liftM (\d -> [[Plain d]]) $ try (whitespace >> inlines)
- multilineDef :: GenParser Char ParserState [[Block]]
+ multilineDef :: Parser [Char] ParserState [[Block]]
multilineDef = try $ do
optional whitespace >> newline
s <- many1Till anyChar (try (string "=:" >> newline))
@@ -252,76 +257,76 @@ definitionListItem = try $ do
-- | This terminates a block such as a paragraph. Because of raw html
-- blocks support, we have to lookAhead for a rawHtmlBlock.
-blockBreak :: GenParser Char ParserState ()
+blockBreak :: Parser [Char] ParserState ()
blockBreak = try (newline >> blanklines >> return ()) <|>
(lookAhead rawHtmlBlock >> return ())
-- raw content
-- | A raw Html Block, optionally followed by blanklines
-rawHtmlBlock :: GenParser Char ParserState Block
+rawHtmlBlock :: Parser [Char] ParserState Block
rawHtmlBlock = try $ do
(_,b) <- htmlTag isBlockTag
optional blanklines
return $ RawBlock "html" b
-- | Raw block of LaTeX content
-rawLaTeXBlock' :: GenParser Char ParserState Block
+rawLaTeXBlock' :: Parser [Char] ParserState Block
rawLaTeXBlock' = do
- failIfStrict
+ guardEnabled Ext_raw_tex
RawBlock "latex" <$> (rawLaTeXBlock <* spaces)
-- | In textile, paragraphs are separated by blank lines.
-para :: GenParser Char ParserState Block
+para :: Parser [Char] ParserState Block
para = try $ Para . normalizeSpaces <$> manyTill inline blockBreak
-- Tables
-
+
-- | A table cell spans until a pipe |
-tableCell :: GenParser Char ParserState TableCell
+tableCell :: Parser [Char] ParserState TableCell
tableCell = do
c <- many1 (noneOf "|\n")
content <- parseFromString (many1 inline) c
return $ [ Plain $ normalizeSpaces content ]
-- | A table row is made of many table cells
-tableRow :: GenParser Char ParserState [TableCell]
+tableRow :: Parser [Char] ParserState [TableCell]
tableRow = try $ ( char '|' *> (endBy1 tableCell (char '|')) <* newline)
-- | Many table rows
-tableRows :: GenParser Char ParserState [[TableCell]]
+tableRows :: Parser [Char] ParserState [[TableCell]]
tableRows = many1 tableRow
-- | Table headers are made of cells separated by a tag "|_."
-tableHeaders :: GenParser Char ParserState [TableCell]
+tableHeaders :: Parser [Char] ParserState [TableCell]
tableHeaders = let separator = (try $ string "|_.") in
try $ ( separator *> (sepBy1 tableCell separator) <* char '|' <* newline )
-
+
-- | A table with an optional header. Current implementation can
-- handle tables with and without header, but will parse cells
-- alignment attributes as content.
-table :: GenParser Char ParserState Block
+table :: Parser [Char] ParserState Block
table = try $ do
headers <- option [] tableHeaders
rows <- tableRows
blanklines
let nbOfCols = max (length headers) (length $ head rows)
- return $ Table []
+ return $ Table []
(replicate nbOfCols AlignDefault)
(replicate nbOfCols 0.0)
headers
rows
-
+
-- | Blocks like 'p' and 'table' do not need explicit block tag.
-- However, they can be used to set HTML/CSS attributes when needed.
maybeExplicitBlock :: String -- ^ block tag name
- -> GenParser Char ParserState Block -- ^ implicit block
- -> GenParser Char ParserState Block
+ -> Parser [Char] ParserState Block -- ^ implicit block
+ -> Parser [Char] ParserState Block
maybeExplicitBlock name blk = try $ do
- optional $ try $ string name >> optional attributes >> char '.' >>
+ optional $ try $ string name >> optional attributes >> char '.' >>
((try whitespace) <|> endline)
blk
@@ -333,15 +338,15 @@ maybeExplicitBlock name blk = try $ do
-- | Any inline element
-inline :: GenParser Char ParserState Inline
+inline :: Parser [Char] ParserState Inline
inline = choice inlineParsers <?> "inline"
-- | List of consecutive inlines before a newline
-inlines :: GenParser Char ParserState [Inline]
+inlines :: Parser [Char] ParserState [Inline]
inlines = manyTill inline newline
-- | Inline parsers tried in order
-inlineParsers :: [GenParser Char ParserState Inline]
+inlineParsers :: [Parser [Char] ParserState Inline]
inlineParsers = [ autoLink
, str
, whitespace
@@ -362,7 +367,7 @@ inlineParsers = [ autoLink
]
-- | Inline markups
-inlineMarkup :: GenParser Char ParserState Inline
+inlineMarkup :: Parser [Char] ParserState Inline
inlineMarkup = choice [ simpleInline (string "??") (Cite [])
, simpleInline (string "**") Strong
, simpleInline (string "__") Emph
@@ -375,29 +380,29 @@ inlineMarkup = choice [ simpleInline (string "??") (Cite [])
]
-- | Trademark, registered, copyright
-mark :: GenParser Char st Inline
+mark :: Parser [Char] st Inline
mark = try $ char '(' >> (try tm <|> try reg <|> copy)
-reg :: GenParser Char st Inline
+reg :: Parser [Char] st Inline
reg = do
oneOf "Rr"
char ')'
return $ Str "\174"
-tm :: GenParser Char st Inline
+tm :: Parser [Char] st Inline
tm = do
oneOf "Tt"
oneOf "Mm"
char ')'
return $ Str "\8482"
-copy :: GenParser Char st Inline
+copy :: Parser [Char] st Inline
copy = do
oneOf "Cc"
char ')'
return $ Str "\169"
-note :: GenParser Char ParserState Inline
+note :: Parser [Char] ParserState Inline
note = try $ do
ref <- (char '[' *> many1 digit <* char ']')
notes <- stateNotes <$> getState
@@ -405,7 +410,7 @@ note = try $ do
Nothing -> fail "note not found"
Just raw -> liftM Note $ parseFromString parseBlocks raw
--- | Special chars
+-- | Special chars
markupChars :: [Char]
markupChars = "\\[]*#_@~-+^|%="
@@ -421,17 +426,17 @@ wordBoundaries :: [Char]
wordBoundaries = markupChars ++ stringBreakers
-- | Parse a hyphened sequence of words
-hyphenedWords :: GenParser Char ParserState String
+hyphenedWords :: Parser [Char] ParserState String
hyphenedWords = try $ do
hd <- noneOf wordBoundaries
- tl <- many ( (noneOf wordBoundaries) <|>
+ tl <- many ( (noneOf wordBoundaries) <|>
try (oneOf markupChars <* lookAhead (noneOf wordBoundaries) ) )
let wd = hd:tl
- option wd $ try $
+ option wd $ try $
(\r -> concat [wd, "-", r]) <$> (char '-' *> hyphenedWords)
-- | Any string
-str :: GenParser Char ParserState Inline
+str :: Parser [Char] ParserState Inline
str = do
baseStr <- hyphenedWords
-- RedCloth compliance : if parsed word is uppercase and immediatly
@@ -444,44 +449,57 @@ str = do
return $ Str fullStr
-- | Textile allows HTML span infos, we discard them
-htmlSpan :: GenParser Char ParserState Inline
+htmlSpan :: Parser [Char] ParserState Inline
htmlSpan = try $ Str <$> ( char '%' *> attributes *> manyTill anyChar (char '%') )
-- | Some number of space chars
-whitespace :: GenParser Char ParserState Inline
+whitespace :: Parser [Char] ParserState Inline
whitespace = many1 spaceChar >> return Space <?> "whitespace"
-- | In Textile, an isolated endline character is a line break
-endline :: GenParser Char ParserState Inline
+endline :: Parser [Char] ParserState Inline
endline = try $ do
newline >> notFollowedBy blankline
return LineBreak
-rawHtmlInline :: GenParser Char ParserState Inline
+rawHtmlInline :: Parser [Char] ParserState Inline
rawHtmlInline = RawInline "html" . snd <$> htmlTag isInlineTag
-
--- | Raw LaTeX Inline
-rawLaTeXInline' :: GenParser Char ParserState Inline
+
+-- | Raw LaTeX Inline
+rawLaTeXInline' :: Parser [Char] ParserState Inline
rawLaTeXInline' = try $ do
- failIfStrict
+ guardEnabled Ext_raw_tex
rawLaTeXInline
--- | Textile standard link syntax is "label":target
-link :: GenParser Char ParserState Inline
-link = try $ do
+-- | Textile standard link syntax is "label":target. But we
+-- can also have ["label":target].
+link :: Parser [Char] ParserState Inline
+link = linkB <|> linkNoB
+
+linkNoB :: Parser [Char] ParserState Inline
+linkNoB = try $ do
+ name <- surrounded (char '"') inline
+ char ':'
+ let stopChars = "!.,;:"
+ url <- manyTill nonspaceChar (lookAhead $ space <|> try (oneOf stopChars >> (space <|> newline)))
+ return $ Link name (url, "")
+
+linkB :: Parser [Char] ParserState Inline
+linkB = try $ do
+ char '['
name <- surrounded (char '"') inline
char ':'
- url <- manyTill (anyChar) (lookAhead $ (space <|> try (oneOf ".;,:" >> (space <|> newline))))
+ url <- manyTill nonspaceChar (char ']')
return $ Link name (url, "")
-- | Detect plain links to http or email.
-autoLink :: GenParser Char ParserState Inline
+autoLink :: Parser [Char] ParserState Inline
autoLink = do
(orig, src) <- (try uri <|> try emailAddress)
return $ Link [Str orig] (src, "")
-- | image embedding
-image :: GenParser Char ParserState Inline
+image :: Parser [Char] ParserState Inline
image = try $ do
char '!' >> notFollowedBy space
src <- manyTill anyChar (lookAhead $ oneOf "!(")
@@ -489,49 +507,49 @@ image = try $ do
char '!'
return $ Image [Str alt] (src, alt)
-escapedInline :: GenParser Char ParserState Inline
+escapedInline :: Parser [Char] ParserState Inline
escapedInline = escapedEqs <|> escapedTag
-escapedEqs :: GenParser Char ParserState Inline
+escapedEqs :: Parser [Char] ParserState Inline
escapedEqs = Str <$> (try $ string "==" *> manyTill anyChar (try $ string "=="))
-- | literal text escaped btw <notextile> tags
-escapedTag :: GenParser Char ParserState Inline
+escapedTag :: Parser [Char] ParserState Inline
escapedTag = Str <$>
(try $ string "<notextile>" *> manyTill anyChar (try $ string "</notextile>"))
-- | Any special symbol defined in wordBoundaries
-symbol :: GenParser Char ParserState Inline
+symbol :: Parser [Char] ParserState Inline
symbol = Str . singleton <$> oneOf wordBoundaries
-- | Inline code
-code :: GenParser Char ParserState Inline
+code :: Parser [Char] ParserState Inline
code = code1 <|> code2
-code1 :: GenParser Char ParserState Inline
+code1 :: Parser [Char] ParserState Inline
code1 = Code nullAttr <$> surrounded (char '@') anyChar
-code2 :: GenParser Char ParserState Inline
+code2 :: Parser [Char] ParserState Inline
code2 = do
htmlTag (tagOpen (=="tt") null)
Code nullAttr <$> manyTill anyChar (try $ htmlTag $ tagClose (=="tt"))
-- | Html / CSS attributes
-attributes :: GenParser Char ParserState String
+attributes :: Parser [Char] ParserState String
attributes = choice [ enclosed (char '(') (char ')') anyChar,
enclosed (char '{') (char '}') anyChar,
enclosed (char '[') (char ']') anyChar]
-- | Parses material surrounded by a parser.
-surrounded :: GenParser Char st t -- ^ surrounding parser
- -> GenParser Char st a -- ^ content parser (to be used repeatedly)
- -> GenParser Char st [a]
+surrounded :: Parser [Char] st t -- ^ surrounding parser
+ -> Parser [Char] st a -- ^ content parser (to be used repeatedly)
+ -> Parser [Char] st [a]
surrounded border = enclosed border (try border)
-- | Inlines are most of the time of the same form
-simpleInline :: GenParser Char ParserState t -- ^ surrounding parser
+simpleInline :: Parser [Char] ParserState t -- ^ surrounding parser
-> ([Inline] -> Inline) -- ^ Inline constructor
- -> GenParser Char ParserState Inline -- ^ content parser (to be used repeatedly)
+ -> Parser [Char] ParserState Inline -- ^ content parser (to be used repeatedly)
simpleInline border construct = surrounded border (inlineWithAttribute) >>=
return . construct . normalizeSpaces
where inlineWithAttribute = (try $ optional attributes) >> inline
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index a80ab0c63..7a21f6f3a 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -42,7 +42,7 @@ import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>))
import Data.Char (toLower, isAscii, isAlphaNum)
import Codec.Compression.GZip as Gzip
import qualified Data.ByteString.Lazy as L
-import Text.Pandoc.Shared (findDataFile)
+import Text.Pandoc.Shared (findDataFile, renderTags')
import Text.Pandoc.MIME (getMimeType)
import System.Directory (doesFileExist)
@@ -102,14 +102,14 @@ convertTag userdata t@(TagOpen "script" as) =
src -> do
(raw, mime) <- getRaw userdata (fromAttrib "type" t) src
let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
- return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
+ return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
convertTag userdata t@(TagOpen "link" as) =
case fromAttrib "href" t of
[] -> return t
src -> do
(raw, mime) <- getRaw userdata (fromAttrib "type" t) src
let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
- return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"])
+ return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"])
convertTag _ t = return t
cssURLs :: Maybe FilePath -> FilePath -> ByteString -> IO ByteString
@@ -163,14 +163,3 @@ makeSelfContained userdata inp = do
out' <- mapM (convertTag userdata) tags
return $ renderTags' out'
--- repeated from HTML reader:
-renderTags' :: [Tag String] -> String
-renderTags' = renderTagsOptions
- renderOptions{ optMinimize = \x ->
- let y = map toLower x
- in y == "hr" || y == "br" ||
- y == "img" || y == "meta" ||
- y == "link"
- , optRawTag = \x ->
- let y = map toLower x
- in y == "script" || y == "style" }
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index f14a57c1f..d86f9a390 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Shared
Copyright : Copyright (C) 2006-2010 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
@@ -59,13 +59,8 @@ module Text.Pandoc.Shared (
uniqueIdent,
isHeaderBlock,
headerShift,
- -- * Writer options
- HTMLMathMethod (..),
- CiteMethod (..),
- ObfuscationMethod (..),
- HTMLSlideVariant (..),
- WriterOptions (..),
- defaultWriterOptions,
+ -- * TagSoup HTML handling
+ renderTags',
-- * File handling
inDirectory,
findDataFile,
@@ -73,6 +68,8 @@ module Text.Pandoc.Shared (
-- * Error handling
err,
warn,
+ -- * Safe read
+ safeRead
) where
import Text.Pandoc.Definition
@@ -90,11 +87,12 @@ import Data.Generics (Typeable, Data)
import qualified Control.Monad.State as S
import Control.Monad (msum)
import Paths_pandoc (getDataFileName)
-import Text.Pandoc.Highlighting (Style, pygments)
import Text.Pandoc.Pretty (charWidth)
import System.Locale (defaultTimeLocale)
import Data.Time
import System.IO (stderr)
+import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
+ renderOptions)
--
-- List processing
@@ -149,7 +147,7 @@ backslashEscapes = map (\ch -> (ch, ['\\',ch]))
-- characters and strings.
escapeStringUsing :: [(Char, String)] -> String -> String
escapeStringUsing _ [] = ""
-escapeStringUsing escapeTable (x:xs) =
+escapeStringUsing escapeTable (x:xs) =
case (lookup x escapeTable) of
Just str -> str ++ rest
Nothing -> x:rest
@@ -176,7 +174,7 @@ stripFirstAndLast :: String -> String
stripFirstAndLast str =
drop 1 $ take ((length str) - 1) str
--- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
+-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
camelCaseToHyphenated :: String -> String
camelCaseToHyphenated [] = ""
camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
@@ -247,13 +245,13 @@ normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
-- | Generate infinite lazy list of markers for an ordered list,
-- depending on list attributes.
orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
-orderedListMarkers (start, numstyle, numdelim) =
+orderedListMarkers (start, numstyle, numdelim) =
let singleton c = [c]
nums = case numstyle of
DefaultStyle -> map show [start..]
Example -> map show [start..]
Decimal -> map show [start..]
- UpperAlpha -> drop (start - 1) $ cycle $
+ UpperAlpha -> drop (start - 1) $ cycle $
map singleton ['A'..'Z']
LowerAlpha -> drop (start - 1) $ cycle $
map singleton ['a'..'z']
@@ -271,13 +269,12 @@ orderedListMarkers (start, numstyle, numdelim) =
-- remove empty Str elements.
normalizeSpaces :: [Inline] -> [Inline]
normalizeSpaces = cleanup . dropWhile isSpaceOrEmpty
- where cleanup [] = []
- cleanup (Space:rest) = let rest' = dropWhile isSpaceOrEmpty rest
- in case rest' of
- [] -> []
- _ -> Space : cleanup rest'
+ where cleanup [] = []
+ cleanup (Space:rest) = case dropWhile isSpaceOrEmpty rest of
+ [] -> []
+ (x:xs) -> Space : x : cleanup xs
cleanup ((Str ""):rest) = cleanup rest
- cleanup (x:rest) = x : cleanup rest
+ cleanup (x:rest) = x : cleanup rest
isSpaceOrEmpty :: Inline -> Bool
isSpaceOrEmpty Space = True
@@ -386,7 +383,7 @@ isPara (Para _) = True
isPara _ = False
-- | Data structure for defining hierarchical Pandoc documents
-data Element = Blk Block
+data Element = Blk Block
| Sec Int [Int] String [Inline] [Element]
-- lvl num ident label contents
deriving (Eq, Read, Show, Typeable, Data)
@@ -414,7 +411,7 @@ hierarchicalizeWithIds ((Header level title'):xs) = do
let ident = uniqueIdent title' usedIdents
let lastnum' = take level lastnum
let newnum = if length lastnum' >= level
- then init lastnum' ++ [last lastnum' + 1]
+ then init lastnum' ++ [last lastnum' + 1]
else lastnum ++ replicate (level - length lastnum - 1) 0 ++ [1]
S.put (newnum, (ident : usedIdents))
let (sectionContents, rest) = break (headerLtEq level) xs
@@ -456,112 +453,20 @@ headerShift n = bottomUp shift
shift x = x
--
--- Writer options
+-- TagSoup HTML handling
--
-data HTMLMathMethod = PlainMath
- | LaTeXMathML (Maybe String) -- url of LaTeXMathML.js
- | JsMath (Maybe String) -- url of jsMath load script
- | GladTeX
- | WebTeX String -- url of TeX->image script.
- | MathML (Maybe String) -- url of MathMLinHTML.js
- | MathJax String -- url of MathJax.js
- deriving (Show, Read, Eq)
-
-data CiteMethod = Citeproc -- use citeproc to render them
- | Natbib -- output natbib cite commands
- | Biblatex -- output biblatex cite commands
- deriving (Show, Read, Eq)
-
--- | Methods for obfuscating email addresses in HTML.
-data ObfuscationMethod = NoObfuscation
- | ReferenceObfuscation
- | JavascriptObfuscation
- deriving (Show, Read, Eq)
-
--- | Varieties of HTML slide shows.
-data HTMLSlideVariant = S5Slides
- | SlidySlides
- | SlideousSlides
- | DZSlides
- | NoSlides
- deriving (Show, Read, Eq)
-
--- | Options for writers
-data WriterOptions = WriterOptions
- { writerStandalone :: Bool -- ^ Include header and footer
- , writerTemplate :: String -- ^ Template to use in standalone mode
- , writerVariables :: [(String, String)] -- ^ Variables to set in template
- , writerEPUBMetadata :: String -- ^ Metadata to include in EPUB
- , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs
- , writerTableOfContents :: Bool -- ^ Include table of contents
- , writerSlideVariant :: HTMLSlideVariant -- ^ Are we writing S5, Slidy or Slideous?
- , writerIncremental :: Bool -- ^ True if lists should be incremental
- , writerXeTeX :: Bool -- ^ Create latex suitable for use by xetex
- , writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML
- , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc)
- , writerNumberSections :: Bool -- ^ Number sections in LaTeX
- , writerSectionDivs :: Bool -- ^ Put sections in div tags in HTML
- , writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax
- , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
- , writerWrapText :: Bool -- ^ Wrap text to line length
- , writerColumns :: Int -- ^ Characters in a line (for text wrapping)
- , writerLiterateHaskell :: Bool -- ^ Write as literate haskell
- , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails
- , writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML
- , writerSourceDirectory :: FilePath -- ^ Directory path of 1st source file
- , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory
- , writerCiteMethod :: CiteMethod -- ^ How to print cites
- , writerBiblioFiles :: [FilePath] -- ^ Biblio files to use for citations
- , writerHtml5 :: Bool -- ^ Produce HTML5
- , writerBeamer :: Bool -- ^ Produce beamer LaTeX slide show
- , writerSlideLevel :: Maybe Int -- ^ Force header level of slides
- , writerChapters :: Bool -- ^ Use "chapter" for top-level sects
- , writerListings :: Bool -- ^ Use listings package for code
- , writerHighlight :: Bool -- ^ Highlight source code
- , writerHighlightStyle :: Style -- ^ Style to use for highlighting
- , writerSetextHeaders :: Bool -- ^ Use setext headers for levels 1-2 in markdown
- , writerTeXLigatures :: Bool -- ^ Use tex ligatures quotes, dashes in latex
- } deriving Show
-
-{-# DEPRECATED writerXeTeX "writerXeTeX no longer does anything" #-}
--- | Default writer options.
-defaultWriterOptions :: WriterOptions
-defaultWriterOptions =
- WriterOptions { writerStandalone = False
- , writerTemplate = ""
- , writerVariables = []
- , writerEPUBMetadata = ""
- , writerTabStop = 4
- , writerTableOfContents = False
- , writerSlideVariant = NoSlides
- , writerIncremental = False
- , writerXeTeX = False
- , writerHTMLMathMethod = PlainMath
- , writerIgnoreNotes = False
- , writerNumberSections = False
- , writerSectionDivs = False
- , writerStrictMarkdown = False
- , writerReferenceLinks = False
- , writerWrapText = True
- , writerColumns = 72
- , writerLiterateHaskell = False
- , writerEmailObfuscation = JavascriptObfuscation
- , writerIdentifierPrefix = ""
- , writerSourceDirectory = "."
- , writerUserDataDir = Nothing
- , writerCiteMethod = Citeproc
- , writerBiblioFiles = []
- , writerHtml5 = False
- , writerBeamer = False
- , writerSlideLevel = Nothing
- , writerChapters = False
- , writerListings = False
- , writerHighlight = False
- , writerHighlightStyle = pygments
- , writerSetextHeaders = True
- , writerTeXLigatures = True
- }
+-- | Render HTML tags.
+renderTags' :: [Tag String] -> String
+renderTags' = renderTagsOptions
+ renderOptions{ optMinimize = \x ->
+ let y = map toLower x
+ in y == "hr" || y == "br" ||
+ y == "img" || y == "meta" ||
+ y == "link"
+ , optRawTag = \x ->
+ let y = map toLower x
+ in y == "script" || y == "style" }
--
-- File handling
@@ -606,3 +511,15 @@ warn :: String -> IO ()
warn msg = do
name <- getProgName
UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
+
+--
+-- Safe read
+--
+
+safeRead :: (Monad m, Read a) => String -> m a
+safeRead s = case reads s of
+ (d,x):_
+ | all isSpace x -> return d
+ _ -> fail $ "Could not read `" ++ s ++ "'"
+
+
diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index dfdcd8e63..4f5ad54bd 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -30,7 +30,7 @@ A simple templating system with variable substitution and conditionals.
Example:
> renderTemplate [("name","Sam"),("salary","50,000")] $
-> "Hi, $name$. $if(salary)$You make $$$salary$.$else$No salary data.$endif$"
+> "Hi, $name$. $if(salary)$You make $$$salary$.$else$No salary data.$endif$"
> "Hi, John. You make $50,000."
A slot for an interpolated variable is a variable name surrounded
@@ -68,8 +68,8 @@ module Text.Pandoc.Templates ( renderTemplate
, TemplateTarget
, getDefaultTemplate ) where
-import Text.ParserCombinators.Parsec
-import Control.Monad (liftM, when, forM)
+import Text.Parsec
+import Control.Monad (liftM, when, forM, mzero)
import System.FilePath
import Data.List (intercalate, intersperse)
#if MIN_VERSION_blaze_html(0,5,0)
@@ -83,22 +83,26 @@ import Text.Pandoc.Shared (readDataFile)
import qualified Control.Exception.Extensible as E (try, IOException)
-- | Get default template for the specified writer.
-getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first
- -> String -- ^ Name of writer
+getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first
+ -> String -- ^ Name of writer
-> IO (Either E.IOException String)
-getDefaultTemplate _ "native" = return $ Right ""
-getDefaultTemplate _ "json" = return $ Right ""
-getDefaultTemplate _ "docx" = return $ Right ""
-getDefaultTemplate user "odt" = getDefaultTemplate user "opendocument"
-getDefaultTemplate user "epub" = getDefaultTemplate user "html"
getDefaultTemplate user writer = do
- let format = takeWhile (/='+') writer -- strip off "+lhs" if present
- let fname = "templates" </> "default" <.> format
- E.try $ readDataFile user fname
+ let format = takeWhile (`notElem` "+-") writer -- strip off extensions
+ case format of
+ "native" -> return $ Right ""
+ "json" -> return $ Right ""
+ "docx" -> return $ Right ""
+ "epub" -> return $ Right ""
+ "odt" -> getDefaultTemplate user "opendocument"
+ "markdown_strict" -> getDefaultTemplate user "markdown"
+ "multimarkdown" -> getDefaultTemplate user "markdown"
+ "markdown_github" -> getDefaultTemplate user "markdown"
+ _ -> let fname = "templates" </> "default" <.> format
+ in E.try $ readDataFile user fname
data TemplateState = TemplateState Int [(String,String)]
-adjustPosition :: String -> GenParser Char TemplateState String
+adjustPosition :: String -> Parsec [Char] TemplateState String
adjustPosition str = do
let lastline = takeWhile (/= '\n') $ reverse str
updateState $ \(TemplateState pos x) ->
@@ -108,18 +112,18 @@ adjustPosition str = do
return str
class TemplateTarget a where
- toTarget :: String -> a
+ toTarget :: String -> a
instance TemplateTarget String where
toTarget = id
-instance TemplateTarget ByteString where
+instance TemplateTarget ByteString where
toTarget = fromString
instance TemplateTarget Html where
toTarget = preEscapedString
--- | Renders a template
+-- | Renders a template
renderTemplate :: TemplateTarget a
=> [(String,String)] -- ^ Assoc. list of values for variables
-> String -- ^ Template
@@ -132,21 +136,21 @@ renderTemplate vals templ =
reservedWords :: [String]
reservedWords = ["else","endif","for","endfor","sep"]
-parseTemplate :: GenParser Char TemplateState [String]
+parseTemplate :: Parsec [Char] TemplateState [String]
parseTemplate =
many $ (plaintext <|> escapedDollar <|> conditional <|> for <|> variable)
>>= adjustPosition
-plaintext :: GenParser Char TemplateState String
+plaintext :: Parsec [Char] TemplateState String
plaintext = many1 $ noneOf "$"
-escapedDollar :: GenParser Char TemplateState String
+escapedDollar :: Parsec [Char] TemplateState String
escapedDollar = try $ string "$$" >> return "$"
-skipEndline :: GenParser Char st ()
+skipEndline :: Parsec [Char] st ()
skipEndline = try $ skipMany (oneOf " \t") >> newline >> return ()
-conditional :: GenParser Char TemplateState String
+conditional :: Parsec [Char] TemplateState String
conditional = try $ do
TemplateState pos vars <- getState
string "$if("
@@ -170,7 +174,7 @@ conditional = try $ do
then ifContents
else elseContents
-for :: GenParser Char TemplateState String
+for :: Parsec [Char] TemplateState String
for = try $ do
TemplateState pos vars <- getState
string "$for("
@@ -178,14 +182,14 @@ for = try $ do
string ")$"
-- if newline after the "for", then a newline after "endfor" will be swallowed
multiline <- option False $ try $ skipEndline >> return True
- let matches = filter (\(k,_) -> k == id') vars
+ let matches = filter (\(k,_) -> k == id') vars
let indent = replicate pos ' '
contents <- forM matches $ \m -> do
updateState $ \(TemplateState p v) -> TemplateState p (m:v)
raw <- liftM concat $ lookAhead parseTemplate
return $ intercalate ('\n':indent) $ lines $ raw ++ "\n"
parseTemplate
- sep <- option "" $ do try (string "$sep$")
+ sep <- option "" $ do try (string "$sep$")
when multiline $ optional skipEndline
liftM concat parseTemplate
string "$endfor$"
@@ -193,16 +197,16 @@ for = try $ do
setState $ TemplateState pos vars
return $ concat $ intersperse sep contents
-ident :: GenParser Char TemplateState String
+ident :: Parsec [Char] TemplateState String
ident = do
first <- letter
rest <- many (alphaNum <|> oneOf "_-")
let id' = first : rest
if id' `elem` reservedWords
- then pzero
+ then mzero
else return id'
-variable :: GenParser Char TemplateState String
+variable :: Parsec [Char] TemplateState String
variable = try $ do
char '$'
id' <- ident
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
index e2959eae7..508ad30a9 100644
--- a/src/Text/Pandoc/UTF8.hs
+++ b/src/Text/Pandoc/UTF8.hs
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.UTF8
Copyright : Copyright (C) 2010 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
index 1913eb92b..e314cf70e 100644
--- a/src/Text/Pandoc/Writers/AsciiDoc.hs
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -40,8 +40,8 @@ module Text.Pandoc.Writers.AsciiDoc (writeAsciiDoc) where
import Text.Pandoc.Definition
import Text.Pandoc.Templates (renderTemplate)
import Text.Pandoc.Shared
-import Text.Pandoc.Parsing hiding (blankline)
-import Text.ParserCombinators.Parsec ( runParser, GenParser )
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding (blankline, space)
import Data.List ( isPrefixOf, intersperse, intercalate )
import Text.Pandoc.Pretty
import Control.Monad.State
@@ -93,7 +93,7 @@ escapeString = escapeStringUsing escs
where escs = backslashEscapes "{"
-- | Ordered list start parser for use in Para below.
-olMarker :: GenParser Char ParserState Char
+olMarker :: Parser [Char] ParserState Char
olMarker = do (start, style', delim) <- anyOrderedListMarker
if delim == Period &&
(style' == UpperAlpha || (style' == UpperRoman &&
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 964320eb2..df11d79cc 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -20,10 +20,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.ConTeXt
Copyright : Copyright (C) 2007-2010 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
+ Stability : alpha
Portability : portable
Conversion of 'Pandoc' format into ConTeXt.
@@ -31,6 +31,7 @@ Conversion of 'Pandoc' format into ConTeXt.
module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
+import Text.Pandoc.Options
import Text.Pandoc.Generic (queryWith)
import Text.Printf ( printf )
import Data.List ( intercalate )
@@ -39,23 +40,23 @@ import Text.Pandoc.Pretty
import Text.Pandoc.Templates ( renderTemplate )
import Network.URI ( isURI, unEscapeString )
-data WriterState =
+data WriterState =
WriterState { stNextRef :: Int -- number of next URL reference
, stOrderedListLevel :: Int -- level of ordered list
, stOptions :: WriterOptions -- writer options
}
orderedListStyles :: [[Char]]
-orderedListStyles = cycle ["[n]","[a]", "[r]", "[g]"]
+orderedListStyles = cycle ["[n]","[a]", "[r]", "[g]"]
-- | Convert Pandoc to ConTeXt.
writeConTeXt :: WriterOptions -> Pandoc -> String
-writeConTeXt options document =
+writeConTeXt options document =
let defaultWriterState = WriterState { stNextRef = 1
, stOrderedListLevel = 0
, stOptions = options
- }
- in evalState (pandocToConTeXt options document) defaultWriterState
+ }
+ in evalState (pandocToConTeXt options document) defaultWriterState
pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState String
pandocToConTeXt options (Pandoc (Meta title authors date) blocks) = do
@@ -120,7 +121,7 @@ elementToConTeXt opts (Sec level _ id' title' elements) = do
return $ vcat (header' : innerContents)
-- | Convert Pandoc block element to ConTeXt.
-blockToConTeXt :: Block
+blockToConTeXt :: Block
-> State WriterState Doc
blockToConTeXt Null = return empty
blockToConTeXt (Plain lst) = inlineListToConTeXt lst
@@ -128,7 +129,7 @@ blockToConTeXt (Para [Image txt (src,_)]) = do
capt <- inlineListToConTeXt txt
return $ blankline $$ "\\placefigure[here,nonumber]" <> braces capt <>
braces ("\\externalfigure" <> brackets (text src)) <> blankline
-blockToConTeXt (Para lst) = do
+blockToConTeXt (Para lst) = do
contents <- inlineListToConTeXt lst
return $ contents <> blankline
blockToConTeXt (BlockQuote lst) = do
@@ -147,18 +148,18 @@ blockToConTeXt (OrderedList (start, style', delim) lst) = do
let level = stOrderedListLevel st
put $ st {stOrderedListLevel = level + 1}
contents <- mapM listItemToConTeXt lst
- put $ st {stOrderedListLevel = level}
+ put $ st {stOrderedListLevel = level}
let start' = if start == 1 then "" else "start=" ++ show start
let delim' = case delim of
DefaultDelim -> ""
- Period -> "stopper=."
- OneParen -> "stopper=)"
+ Period -> "stopper=."
+ OneParen -> "stopper=)"
TwoParens -> "left=(,stopper=)"
- let width = maximum $ map length $ take (length contents)
+ let width = maximum $ map length $ take (length contents)
(orderedListMarkers (start, style', delim))
let width' = (toEnum width + 1) / 2
- let width'' = if width' > (1.5 :: Double)
- then "width=" ++ show width' ++ "em"
+ let width'' = if width' > (1.5 :: Double)
+ then "width=" ++ show width' ++ "em"
else ""
let specs2Items = filter (not . null) [start', delim', width'']
let specs2 = if null specs2Items
@@ -166,8 +167,8 @@ blockToConTeXt (OrderedList (start, style', delim) lst) = do
else "[" ++ intercalate "," specs2Items ++ "]"
let style'' = case style' of
DefaultStyle -> orderedListStyles !! level
- Decimal -> "[n]"
- Example -> "[n]"
+ Decimal -> "[n]"
+ Example -> "[n]"
LowerRoman -> "[r]"
UpperRoman -> "[R]"
LowerAlpha -> "[a]"
@@ -182,21 +183,21 @@ blockToConTeXt HorizontalRule = return $ "\\thinrule" <> blankline
blockToConTeXt (Header level lst) = sectionHeader "" level lst
blockToConTeXt (Table caption aligns widths heads rows) = do
let colDescriptor colWidth alignment = (case alignment of
- AlignLeft -> 'l'
+ AlignLeft -> 'l'
AlignRight -> 'r'
AlignCenter -> 'c'
AlignDefault -> 'l'):
if colWidth == 0
then "|"
else ("p(" ++ printf "%.2f" colWidth ++ "\\textwidth)|")
- let colDescriptors = "|" ++ (concat $
+ let colDescriptors = "|" ++ (concat $
zipWith colDescriptor widths aligns)
headers <- if all null heads
then return empty
- else liftM ($$ "\\HL") $ tableRowToConTeXt heads
- captionText <- inlineListToConTeXt caption
+ else liftM ($$ "\\HL") $ tableRowToConTeXt heads
+ captionText <- inlineListToConTeXt caption
let captionText' = if null caption then text "none" else captionText
- rows' <- mapM tableRowToConTeXt rows
+ rows' <- mapM tableRowToConTeXt rows
return $ "\\placetable[here]" <> braces captionText' $$
"\\starttable" <> brackets (text colDescriptors) $$
"\\HL" $$ headers $$
@@ -230,7 +231,7 @@ inlineListToConTeXt lst = liftM hcat $ mapM inlineToConTeXt lst
-- | Convert inline element to ConTeXt
inlineToConTeXt :: Inline -- ^ Inline to convert
-> State WriterState Doc
-inlineToConTeXt (Emph lst) = do
+inlineToConTeXt (Emph lst) = do
contents <- inlineListToConTeXt lst
return $ braces $ "\\em " <> contents
inlineToConTeXt (Strong lst) = do
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 1bcf99dcf..e696fc63e 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.Docbook
Copyright : Copyright (C) 2006-2010 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
@@ -31,6 +31,7 @@ module Text.Pandoc.Writers.Docbook ( writeDocbook) where
import Text.Pandoc.Definition
import Text.Pandoc.XML
import Text.Pandoc.Shared
+import Text.Pandoc.Options
import Text.Pandoc.Templates (renderTemplate)
import Text.Pandoc.Readers.TeXMath
import Data.List ( isPrefixOf, intercalate, isSuffixOf )
@@ -47,23 +48,23 @@ authorToDocbook opts name' =
let name = render Nothing $ inlinesToDocbook opts name'
in if ',' `elem` name
then -- last name first
- let (lastname, rest) = break (==',') name
+ let (lastname, rest) = break (==',') name
firstname = removeLeadingSpace rest in
- inTagsSimple "firstname" (text $ escapeStringForXML firstname) <>
- inTagsSimple "surname" (text $ escapeStringForXML lastname)
+ inTagsSimple "firstname" (text $ escapeStringForXML firstname) <>
+ inTagsSimple "surname" (text $ escapeStringForXML lastname)
else -- last name last
let namewords = words name
- lengthname = length namewords
+ lengthname = length namewords
(firstname, lastname) = case lengthname of
- 0 -> ("","")
+ 0 -> ("","")
1 -> ("", name)
n -> (intercalate " " (take (n-1) namewords), last namewords)
- in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$
- inTagsSimple "surname" (text $ escapeStringForXML lastname)
+ in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$
+ inTagsSimple "surname" (text $ escapeStringForXML lastname)
-- | Convert Pandoc document to string in Docbook format.
writeDocbook :: WriterOptions -> Pandoc -> String
-writeDocbook opts (Pandoc (Meta tit auths dat) blocks) =
+writeDocbook opts (Pandoc (Meta tit auths dat) blocks) =
let title = inlinesToDocbook opts tit
authors = map (authorToDocbook opts) auths
date = inlinesToDocbook opts dat
@@ -92,7 +93,7 @@ writeDocbook opts (Pandoc (Meta tit auths dat) blocks) =
-- | Convert an Element to Docbook.
elementToDocbook :: WriterOptions -> Int -> Element -> Doc
-elementToDocbook opts _ (Blk block) = blockToDocbook opts block
+elementToDocbook opts _ (Blk block) = blockToDocbook opts block
elementToDocbook opts lvl (Sec _ _num id' title elements) =
-- Docbook doesn't allow sections with no content, so insert some if needed
let elements' = if null elements
@@ -115,10 +116,10 @@ plainToPara :: Block -> Block
plainToPara (Plain x) = Para x
plainToPara x = x
--- | Convert a list of pairs of terms and definitions into a list of
+-- | Convert a list of pairs of terms and definitions into a list of
-- Docbook varlistentrys.
deflistItemsToDocbook :: WriterOptions -> [([Inline],[[Block]])] -> Doc
-deflistItemsToDocbook opts items =
+deflistItemsToDocbook opts items =
vcat $ map (\(term, defs) -> deflistItemToDocbook opts term defs) items
-- | Convert a term and a list of blocks into a Docbook varlistentry.
@@ -144,13 +145,16 @@ blockToDocbook _ Null = empty
blockToDocbook _ (Header _ _) = empty -- should not occur after hierarchicalize
blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst
blockToDocbook opts (Para [Image txt (src,_)]) =
- let capt = inlinesToDocbook opts txt
+ let alt = inlinesToDocbook opts txt
+ capt = if null txt
+ then empty
+ else inTagsSimple "title" alt
in inTagsIndented "figure" $
- inTagsSimple "title" capt $$
+ capt $$
(inTagsIndented "mediaobject" $
(inTagsIndented "imageobject"
(selfClosingTag "imagedata" [("fileref",src)])) $$
- inTagsSimple "textobject" (inTagsSimple "phrase" capt))
+ inTagsSimple "textobject" (inTagsSimple "phrase" alt))
blockToDocbook opts (Para lst) =
inTagsIndented "para" $ inlinesToDocbook opts lst
blockToDocbook opts (BlockQuote blocks) =
@@ -167,9 +171,9 @@ blockToDocbook _ (CodeBlock (_,classes,_) str) =
then [s]
else languagesByExtension . map toLower $ s
langs = concatMap langsFrom classes
-blockToDocbook opts (BulletList lst) =
- inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst
-blockToDocbook _ (OrderedList _ []) = empty
+blockToDocbook opts (BulletList lst) =
+ inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst
+blockToDocbook _ (OrderedList _ []) = empty
blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) =
let attribs = case numstyle of
DefaultStyle -> []
@@ -182,12 +186,12 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) =
items = if start == 1
then listItemsToDocbook opts (first:rest)
else (inTags True "listitem" [("override",show start)]
- (blocksToDocbook opts $ map plainToPara first)) $$
- listItemsToDocbook opts rest
+ (blocksToDocbook opts $ map plainToPara first)) $$
+ listItemsToDocbook opts rest
in inTags True "orderedlist" attribs items
-blockToDocbook opts (DefinitionList lst) =
- inTagsIndented "variablelist" $ deflistItemsToDocbook opts lst
-blockToDocbook _ (RawBlock "docbook" str) = text str -- raw XML block
+blockToDocbook opts (DefinitionList lst) =
+ inTagsIndented "variablelist" $ deflistItemsToDocbook opts lst
+blockToDocbook _ (RawBlock "docbook" str) = text str -- raw XML block
-- we allow html for compatibility with earlier versions of pandoc
blockToDocbook _ (RawBlock "html" str) = text str -- raw XML block
blockToDocbook _ (RawBlock _ _) = empty
@@ -237,26 +241,26 @@ inlinesToDocbook opts lst = hcat $ map (inlineToDocbook opts) lst
-- | Convert an inline element to Docbook.
inlineToDocbook :: WriterOptions -> Inline -> Doc
-inlineToDocbook _ (Str str) = text $ escapeStringForXML str
-inlineToDocbook opts (Emph lst) =
+inlineToDocbook _ (Str str) = text $ escapeStringForXML str
+inlineToDocbook opts (Emph lst) =
inTagsSimple "emphasis" $ inlinesToDocbook opts lst
-inlineToDocbook opts (Strong lst) =
+inlineToDocbook opts (Strong lst) =
inTags False "emphasis" [("role", "strong")] $ inlinesToDocbook opts lst
-inlineToDocbook opts (Strikeout lst) =
+inlineToDocbook opts (Strikeout lst) =
inTags False "emphasis" [("role", "strikethrough")] $
inlinesToDocbook opts lst
-inlineToDocbook opts (Superscript lst) =
+inlineToDocbook opts (Superscript lst) =
inTagsSimple "superscript" $ inlinesToDocbook opts lst
-inlineToDocbook opts (Subscript lst) =
+inlineToDocbook opts (Subscript lst) =
inTagsSimple "subscript" $ inlinesToDocbook opts lst
-inlineToDocbook opts (SmallCaps lst) =
+inlineToDocbook opts (SmallCaps lst) =
inTags False "emphasis" [("role", "smallcaps")] $
inlinesToDocbook opts lst
-inlineToDocbook opts (Quoted _ lst) =
+inlineToDocbook opts (Quoted _ lst) =
inTagsSimple "quote" $ inlinesToDocbook opts lst
inlineToDocbook opts (Cite _ lst) =
- inlinesToDocbook opts lst
-inlineToDocbook _ (Code _ str) =
+ inlinesToDocbook opts lst
+inlineToDocbook _ (Code _ str) =
inTagsSimple "literal" $ text (escapeStringForXML str)
inlineToDocbook opts (Math t str)
| isMathML (writerHTMLMathMethod opts) =
@@ -282,7 +286,7 @@ inlineToDocbook _ Space = space
inlineToDocbook opts (Link txt (src, _)) =
if isPrefixOf "mailto:" src
then let src' = drop 7 src
- emailLink = inTagsSimple "email" $ text $
+ emailLink = inTagsSimple "email" $ text $
escapeStringForXML $ src'
in case txt of
[Code _ s] | s == src' -> emailLink
@@ -292,14 +296,14 @@ inlineToDocbook opts (Link txt (src, _)) =
then inTags False "link" [("linkend", drop 1 src)]
else inTags False "ulink" [("url", src)]) $
inlinesToDocbook opts txt
-inlineToDocbook _ (Image _ (src, tit)) =
+inlineToDocbook _ (Image _ (src, tit)) =
let titleDoc = if null tit
then empty
else inTagsIndented "objectinfo" $
inTagsIndented "title" (text $ escapeStringForXML tit)
in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $
titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]
-inlineToDocbook opts (Note contents) =
+inlineToDocbook opts (Note contents) =
inTagsIndented "footnote" $ blocksToDocbook opts contents
isMathML :: HTMLMathMethod -> Bool
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 396e7a482..05c9555c6 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -43,6 +43,7 @@ import Text.Pandoc.Generic
import System.Directory
import Text.Pandoc.ImageSize
import Text.Pandoc.Shared hiding (Element)
+import Text.Pandoc.Options
import Text.Pandoc.Readers.TeXMath
import Text.Pandoc.Highlighting ( highlight )
import Text.Highlighting.Kate.Types ()
@@ -93,14 +94,13 @@ mknode s attrs =
add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) . node (unqual s)
-- | Produce an Docx file from a Pandoc document.
-writeDocx :: Maybe FilePath -- ^ Path specified by --reference-docx
- -> WriterOptions -- ^ Writer options
+writeDocx :: WriterOptions -- ^ Writer options
-> Pandoc -- ^ Document to convert
-> IO B.ByteString
-writeDocx mbRefDocx opts doc@(Pandoc (Meta tit auths date) _) = do
+writeDocx opts doc@(Pandoc (Meta tit auths date) _) = do
let datadir = writerUserDataDir opts
refArchive <- liftM toArchive $
- case mbRefDocx of
+ case writerReferenceDocx opts of
Just f -> B.readFile f
Nothing -> do
let defaultDocx = getDataFileName "reference.docx" >>= B.readFile
@@ -543,7 +543,7 @@ inlineToOpenXML opts (SmallCaps lst) =
inlineToOpenXML opts (Strikeout lst) =
withTextProp (mknode "w:strike" [] ())
$ inlinesToOpenXML opts lst
-inlineToOpenXML _ LineBreak = return [ mknode "w:br" [] () ]
+inlineToOpenXML _ LineBreak = return [br]
inlineToOpenXML _ (RawInline f str)
| f == "openxml" = return [ x | Elem x <- parseXML str ]
| otherwise = return []
@@ -562,16 +562,14 @@ inlineToOpenXML opts (Math DisplayMath str) =
Left _ -> do
fallback <- inlinesToOpenXML opts (readTeXMath str)
return $ [br] ++ fallback ++ [br]
- where br = mknode "w:br" [] ()
inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst
inlineToOpenXML _ (Code attrs str) =
withTextProp (rStyle "VerbatimChar")
$ case highlight formatOpenXML attrs str of
- Nothing -> intercalate [mknode "w:br" [] ()]
+ Nothing -> intercalate [br]
`fmap` (mapM formattedString $ lines str)
Just h -> return h
- where formatOpenXML _fmtOpts = intercalate [mknode "w:br" [] ()] .
- map (map toHlTok)
+ where formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok)
toHlTok (toktype,tok) = mknode "w:r" []
[ mknode "w:rPr" []
[ rStyle $ show toktype ]
@@ -669,3 +667,6 @@ inlineToOpenXML opts (Image alt (src, tit)) = do
liftIO $ UTF8.hPutStrLn stderr $
"Could not find image `" ++ src ++ "', skipping..."
inlinesToOpenXML opts alt
+
+br :: Element
+br = mknode "w:r" [] [mknode "w:cr" [] () ]
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index b423f136f..46310e398 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -38,6 +38,7 @@ import Data.ByteString.Lazy.UTF8 ( fromString )
import Codec.Archive.Zip
import Data.Time.Clock.POSIX
import Text.Pandoc.Shared hiding ( Element )
+import Text.Pandoc.Options
import Text.Pandoc.Definition
import Text.Pandoc.Generic
import Control.Monad.State
@@ -52,12 +53,10 @@ import Prelude hiding (catch)
import Control.Exception (catch, SomeException)
-- | Produce an EPUB file from a Pandoc document.
-writeEPUB :: Maybe String -- ^ EPUB stylesheet specified at command line
- -> [FilePath] -- ^ Paths to fonts to embed
- -> WriterOptions -- ^ Writer options
+writeEPUB :: WriterOptions -- ^ Writer options
-> Pandoc -- ^ Document to convert
-> IO B.ByteString
-writeEPUB mbStylesheet fonts opts doc@(Pandoc meta _) = do
+writeEPUB opts doc@(Pandoc meta _) = do
epochtime <- floor `fmap` getPOSIXTime
let mkEntry path content = toEntry path epochtime content
let opts' = opts{ writerEmailObfuscation = NoObfuscation
@@ -107,7 +106,7 @@ writeEPUB mbStylesheet fonts opts doc@(Pandoc meta _) = do
-- handle fonts
let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f
- fontEntries <- mapM mkFontEntry fonts
+ fontEntries <- mapM mkFontEntry $ writerEpubFonts opts
-- body pages
let isH1 (Header 1 _) = True
@@ -232,7 +231,7 @@ writeEPUB mbStylesheet fonts opts doc@(Pandoc meta _) = do
let appleEntry = mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple
-- stylesheet
- stylesheet <- case mbStylesheet of
+ stylesheet <- case writerEpubStylesheet opts of
Just s -> return s
Nothing -> readDataFile (writerUserDataDir opts) "epub.css"
let stylesheetEntry = mkEntry "stylesheet.css" $ fromString stylesheet
@@ -249,12 +248,14 @@ metadataElement metadataXML uuid lang title authors date mbCoverImage =
let userNodes = parseXML metadataXML
elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/")
,("xmlns:opf","http://www.idpf.org/2007/opf")] $
- filter isDublinCoreElement $ onlyElems userNodes
+ filter isMetadataElement $ onlyElems userNodes
dublinElements = ["contributor","coverage","creator","date",
"description","format","identifier","language","publisher",
"relation","rights","source","subject","title","type"]
- isDublinCoreElement e = qPrefix (elName e) == Just "dc" &&
- qName (elName e) `elem` dublinElements
+ isMetadataElement e = (qPrefix (elName e) == Just "dc" &&
+ qName (elName e) `elem` dublinElements) ||
+ (qPrefix (elName e) == Nothing &&
+ qName (elName e) `elem` ["link","meta"])
contains e n = not (null (findElements (QName n Nothing (Just "dc")) e))
newNodes = [ unode "dc:title" title | not (elt `contains` "title") ] ++
[ unode "dc:language" lang | not (elt `contains` "language") ] ++
@@ -288,10 +289,8 @@ transformInlines _ sourceDir picsRef (Image lab (src,tit) : xs) = do
transformInlines (MathML _) _ _ (x@(Math _ _) : xs) = do
let writeHtmlInline opts z = removeTrailingSpace $
writeHtmlString opts $ Pandoc (Meta [] [] []) [Plain [z]]
- mathml = writeHtmlInline defaultWriterOptions{
- writerHTMLMathMethod = MathML Nothing } x
- fallback = writeHtmlInline defaultWriterOptions{
- writerHTMLMathMethod = PlainMath } x
+ mathml = writeHtmlInline def{writerHTMLMathMethod = MathML Nothing } x
+ fallback = writeHtmlInline def{writerHTMLMathMethod = PlainMath } x
inOps = "<ops:switch xmlns:ops=\"http://www.idpf.org/2007/ops\">" ++
"<ops:case required-namespace=\"http://www.w3.org/1998/Math/MathML\">" ++
mathml ++ "</ops:case><ops:default>" ++ fallback ++ "</ops:default>" ++
@@ -312,9 +311,9 @@ ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity .
unEntity ('&':'#':xs) =
let (ds,ys) = break (==';') xs
rest = drop 1 ys
- in case reads ('\'':'\\':ds ++ "'") of
- ((x,_):_) -> x : unEntity rest
- _ -> '&':'#':unEntity xs
+ in case safeRead ('\'':'\\':ds ++ "'") of
+ Just x -> x : unEntity rest
+ Nothing -> '&':'#':unEntity xs
unEntity (x:xs) = x : unEntity xs
imageTypeOf :: FilePath -> Maybe String
diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs
new file mode 100644
index 000000000..301d80c54
--- /dev/null
+++ b/src/Text/Pandoc/Writers/FB2.hs
@@ -0,0 +1,616 @@
+{-
+Copyright (c) 2011-2012, Sergey Astanin
+All rights reserved.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- | Conversion of 'Pandoc' documents to FB2 (FictionBook2) format.
+
+FictionBook is an XML-based e-book format. For more information see:
+<http://www.fictionbook.org/index.php/Eng:XML_Schema_Fictionbook_2.1>
+
+-}
+module Text.Pandoc.Writers.FB2 (writeFB2) where
+
+import Control.Monad.State (StateT, evalStateT, get, modify)
+import Control.Monad.State (liftM, liftM2, liftIO)
+import Data.ByteString.Base64 (encode)
+import Data.Char (toUpper, toLower, isSpace, isAscii, isControl)
+import Data.List (intersperse, intercalate, isPrefixOf)
+import Data.Either (lefts, rights)
+import Network.Browser (browse, request, setAllowRedirects, setOutHandler)
+import Network.HTTP (catchIO_, getRequest, getHeaders, getResponseBody)
+import Network.HTTP (lookupHeader, HeaderName(..), urlEncode)
+import Network.URI (isURI, unEscapeString)
+import System.FilePath (takeExtension)
+import Text.XML.Light
+import qualified Control.Exception as E
+import qualified Data.ByteString as B
+import qualified Text.XML.Light as X
+import qualified Text.XML.Light.Cursor as XC
+
+import Text.Pandoc.Definition
+import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def)
+import Text.Pandoc.Shared (orderedListMarkers)
+import Text.Pandoc.Generic (bottomUp)
+
+-- | Data to be written at the end of the document:
+-- (foot)notes, URLs, references, images.
+data FbRenderState = FbRenderState
+ { footnotes :: [ (Int, String, [Content]) ] -- ^ #, ID, text
+ , imagesToFetch :: [ (String, String) ] -- ^ filename, URL or path
+ , parentListMarker :: String -- ^ list marker of the parent ordered list
+ , parentBulletLevel :: Int -- ^ nesting level of the unordered list
+ , writerOptions :: WriterOptions
+ } deriving (Show)
+
+-- | FictionBook building monad.
+type FBM = StateT FbRenderState IO
+
+newFB :: FbRenderState
+newFB = FbRenderState { footnotes = [], imagesToFetch = []
+ , parentListMarker = "", parentBulletLevel = 0
+ , writerOptions = def }
+
+data ImageMode = NormalImage | InlineImage deriving (Eq)
+instance Show ImageMode where
+ show NormalImage = "imageType"
+ show InlineImage = "inlineImageType"
+
+-- | Produce an FB2 document from a 'Pandoc' document.
+writeFB2 :: WriterOptions -- ^ conversion options
+ -> Pandoc -- ^ document to convert
+ -> IO String -- ^ FictionBook2 document (not encoded yet)
+writeFB2 opts (Pandoc meta blocks) = flip evalStateT newFB $ do
+ modify (\s -> s { writerOptions = opts { writerStandalone = True } })
+ desc <- description meta
+ fp <- frontpage meta
+ secs <- renderSections 1 blocks
+ let body = el "body" $ fp ++ secs
+ notes <- renderFootnotes
+ (imgs,missing) <- liftM imagesToFetch get >>= \s -> liftIO (fetchImages s)
+ let body' = replaceImagesWithAlt missing body
+ let fb2_xml = el "FictionBook" (fb2_attrs, [desc, body'] ++ notes ++ imgs)
+ return $ xml_head ++ (showContent fb2_xml)
+ where
+ xml_head = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+ fb2_attrs =
+ let xmlns = "http://www.gribuser.ru/xml/fictionbook/2.0"
+ xlink = "http://www.w3.org/1999/xlink"
+ in [ uattr "xmlns" xmlns
+ , attr ("xmlns", "l") xlink ]
+ --
+ frontpage :: Meta -> FBM [Content]
+ frontpage meta' = do
+ t <- cMapM toXml . docTitle $ meta'
+ return $
+ [ el "title" (el "p" t)
+ , el "annotation" (map (el "p" . cMap plain)
+ (docAuthors meta' ++ [docDate meta']))
+ ]
+ description :: Meta -> FBM Content
+ description meta' = do
+ bt <- booktitle meta'
+ let as = authors meta'
+ dd <- docdate meta'
+ return $ el "description"
+ [ el "title-info" (bt ++ as ++ dd)
+ , el "document-info" [ el "program-used" "pandoc" ] -- FIXME: +version
+ ]
+ booktitle :: Meta -> FBM [Content]
+ booktitle meta' = do
+ t <- cMapM toXml . docTitle $ meta'
+ return $ if null t
+ then []
+ else [ el "book-title" t ]
+ authors :: Meta -> [Content]
+ authors meta' = cMap author (docAuthors meta')
+ author :: [Inline] -> [Content]
+ author ss =
+ let ws = words . cMap plain $ ss
+ email = (el "email") `fmap` (take 1 $ filter ('@' `elem`) ws)
+ ws' = filter ('@' `notElem`) ws
+ names = case ws' of
+ (nickname:[]) -> [ el "nickname" nickname ]
+ (fname:lname:[]) -> [ el "first-name" fname
+ , el "last-name" lname ]
+ (fname:rest) -> [ el "first-name" fname
+ , el "middle-name" (concat . init $ rest)
+ , el "last-name" (last rest) ]
+ ([]) -> []
+ in list $ el "author" (names ++ email)
+ docdate :: Meta -> FBM [Content]
+ docdate meta' = do
+ let ss = docDate meta'
+ d <- cMapM toXml ss
+ return $ if null d
+ then []
+ else [el "date" d]
+
+-- | Divide the stream of blocks into sections and convert to XML
+-- representation.
+renderSections :: Int -> [Block] -> FBM [Content]
+renderSections level blocks = do
+ let secs = splitSections level blocks
+ mapM (renderSection level) secs
+
+renderSection :: Int -> ([Inline], [Block]) -> FBM Content
+renderSection level (ttl, body) = do
+ title <- if null ttl
+ then return []
+ else return . list . el "title" . formatTitle $ ttl
+ content <- if (hasSubsections body)
+ then renderSections (level + 1) body
+ else cMapM blockToXml body
+ return $ el "section" (title ++ content)
+ where
+ hasSubsections = any isHeader
+ isHeader (Header _ _) = True
+ isHeader _ = False
+
+-- | Only <p> and <empty-line> are allowed within <title> in FB2.
+formatTitle :: [Inline] -> [Content]
+formatTitle inlines =
+ let lns = split isLineBreak inlines
+ lns' = map (el "p" . cMap plain) lns
+ in intersperse (el "empty-line" ()) lns'
+
+split :: (a -> Bool) -> [a] -> [[a]]
+split _ [] = []
+split cond xs = let (b,a) = break cond xs
+ in (b:split cond (drop 1 a))
+
+isLineBreak :: Inline -> Bool
+isLineBreak LineBreak = True
+isLineBreak _ = False
+
+-- | Divide the stream of block elements into sections: [(title, blocks)].
+splitSections :: Int -> [Block] -> [([Inline], [Block])]
+splitSections level blocks = reverse $ revSplit (reverse blocks)
+ where
+ revSplit [] = []
+ revSplit rblocks =
+ let (lastsec, before) = break sameLevel rblocks
+ (header, prevblocks) =
+ case before of
+ ((Header n title):prevblocks') ->
+ if n == level
+ then (title, prevblocks')
+ else ([], before)
+ _ -> ([], before)
+ in (header, reverse lastsec) : revSplit prevblocks
+ sameLevel (Header n _) = n == level
+ sameLevel _ = False
+
+-- | Make another FictionBook body with footnotes.
+renderFootnotes :: FBM [Content]
+renderFootnotes = do
+ fns <- footnotes `liftM` get
+ if null fns
+ then return [] -- no footnotes
+ else return . list $
+ el "body" ([uattr "name" "notes"], map renderFN (reverse fns))
+ where
+ renderFN (n, idstr, cs) =
+ let fn_texts = (el "title" (el "p" (show n))) : cs
+ in el "section" ([uattr "id" idstr], fn_texts)
+
+-- | Fetch images and encode them for the FictionBook XML.
+-- Return image data and a list of hrefs of the missing images.
+fetchImages :: [(String,String)] -> IO ([Content],[String])
+fetchImages links = do
+ imgs <- mapM (uncurry fetchImage) links
+ return $ (rights imgs, lefts imgs)
+
+-- | Fetch image data from disk or from network and make a <binary> XML section.
+-- Return either (Left hrefOfMissingImage) or (Right xmlContent).
+fetchImage :: String -> String -> IO (Either String Content)
+fetchImage href link = do
+ mbimg <-
+ case (isURI link, readDataURI link) of
+ (True, Just (mime,_,True,base64)) ->
+ let mime' = map toLower mime
+ in if mime' == "image/png" || mime' == "image/jpeg"
+ then return (Just (mime',base64))
+ else return Nothing
+ (True, Just _) -> return Nothing -- not base64-encoded
+ (True, Nothing) -> fetchURL link
+ (False, _) -> do
+ d <- nothingOnError $ B.readFile (unEscapeString link)
+ let t = case map toLower (takeExtension link) of
+ ".png" -> Just "image/png"
+ ".jpg" -> Just "image/jpeg"
+ ".jpeg" -> Just "image/jpeg"
+ ".jpe" -> Just "image/jpeg"
+ _ -> Nothing -- only PNG and JPEG are supported in FB2
+ return $ liftM2 (,) t (liftM (toStr . encode) d)
+ case mbimg of
+ Just (imgtype, imgdata) -> do
+ return . Right $ el "binary"
+ ( [uattr "id" href
+ , uattr "content-type" imgtype]
+ , txt imgdata )
+ _ -> return (Left ('#':href))
+ where
+ nothingOnError :: (IO B.ByteString) -> (IO (Maybe B.ByteString))
+ nothingOnError action = liftM Just action `E.catch` omnihandler
+ omnihandler :: E.SomeException -> IO (Maybe B.ByteString)
+ omnihandler _ = return Nothing
+
+-- | Extract mime type and encoded data from the Data URI.
+readDataURI :: String -- ^ URI
+ -> Maybe (String,String,Bool,String)
+ -- ^ Maybe (mime,charset,isBase64,data)
+readDataURI uri =
+ let prefix = "data:"
+ in if not (prefix `isPrefixOf` uri)
+ then Nothing
+ else
+ let rest = drop (length prefix) uri
+ meta = takeWhile (/= ',') rest -- without trailing ','
+ uridata = drop (length meta + 1) rest
+ parts = split (== ';') meta
+ (mime,cs,enc)=foldr upd ("text/plain","US-ASCII",False) parts
+ in Just (mime,cs,enc,uridata)
+ where
+ upd str m@(mime,cs,enc)
+ | isMimeType str = (str,cs,enc)
+ | "charset=" `isPrefixOf` str = (mime,drop (length "charset=") str,enc)
+ | str == "base64" = (mime,cs,True)
+ | otherwise = m
+
+-- Without parameters like ;charset=...; see RFC 2045, 5.1
+isMimeType :: String -> Bool
+isMimeType s =
+ case split (=='/') s of
+ [mtype,msubtype] ->
+ ((map toLower mtype) `elem` types
+ || "x-" `isPrefixOf` (map toLower mtype))
+ && all valid mtype
+ && all valid msubtype
+ _ -> False
+ where
+ types = ["text","image","audio","video","application","message","multipart"]
+ valid c = isAscii c && not (isControl c) && not (isSpace c) &&
+ c `notElem` "()<>@,;:\\\"/[]?="
+
+-- | Fetch URL, return its Content-Type and binary data on success.
+fetchURL :: String -> IO (Maybe (String, String))
+fetchURL url = do
+ flip catchIO_ (return Nothing) $ do
+ r <- browse $ do
+ setOutHandler (const (return ()))
+ setAllowRedirects True
+ liftM snd . request . getRequest $ url
+ let content_type = lookupHeader HdrContentType (getHeaders r)
+ content <- liftM (Just . toStr . encode . toBS) . getResponseBody $ Right r
+ return $ liftM2 (,) content_type content
+ where
+
+toBS :: String -> B.ByteString
+toBS = B.pack . map (toEnum . fromEnum)
+
+toStr :: B.ByteString -> String
+toStr = map (toEnum . fromEnum) . B.unpack
+
+footnoteID :: Int -> String
+footnoteID i = "n" ++ (show i)
+
+linkID :: Int -> String
+linkID i = "l" ++ (show i)
+
+-- | Convert a block-level Pandoc's element to FictionBook XML representation.
+blockToXml :: Block -> FBM [Content]
+blockToXml (Plain ss) = cMapM toXml ss -- FIXME: can lead to malformed FB2
+blockToXml (Para [Math DisplayMath formula]) = insertMath NormalImage formula
+blockToXml (Para [img@(Image _ _)]) = insertImage NormalImage img
+blockToXml (Para ss) = liftM (list . el "p") $ cMapM toXml ss
+blockToXml (CodeBlock _ s) = return . spaceBeforeAfter .
+ map (el "p" . el "code") . lines $ s
+blockToXml (RawBlock _ s) = return . spaceBeforeAfter .
+ map (el "p" . el "code") . lines $ s
+blockToXml (BlockQuote bs) = liftM (list . el "cite") $ cMapM blockToXml bs
+blockToXml (OrderedList a bss) = do
+ state <- get
+ let pmrk = parentListMarker state
+ let markers = map ((pmrk ++ " ") ++) $ orderedListMarkers a
+ let mkitem mrk bs = do
+ modify (\s -> s { parentListMarker = mrk })
+ itemtext <- cMapM blockToXml . paraToPlain $ bs
+ modify (\s -> s { parentListMarker = pmrk }) -- old parent marker
+ return . el "p" $ [ txt mrk, txt " " ] ++ itemtext
+ mapM (uncurry mkitem) (zip markers bss)
+blockToXml (BulletList bss) = do
+ state <- get
+ let level = parentBulletLevel state
+ let pmrk = parentListMarker state
+ let prefix = replicate (length pmrk) ' '
+ let bullets = ["\x2022", "\x25e6", "*", "\x2043", "\x2023"]
+ let mrk = prefix ++ bullets !! (level `mod` (length bullets))
+ let mkitem bs = do
+ modify (\s -> s { parentBulletLevel = (level+1) })
+ itemtext <- cMapM blockToXml . paraToPlain $ bs
+ modify (\s -> s { parentBulletLevel = level }) -- restore bullet level
+ return $ el "p" $ [ txt (mrk ++ " ") ] ++ itemtext
+ mapM mkitem bss
+blockToXml (DefinitionList defs) =
+ cMapM mkdef defs
+ where
+ mkdef (term, bss) = do
+ def' <- cMapM (cMapM blockToXml . sep . paraToPlain . map indent) bss
+ t <- wrap "strong" term
+ return [ el "p" t, el "p" def' ]
+ sep blocks =
+ if all needsBreak blocks then
+ blocks ++ [Plain [LineBreak]]
+ else
+ blocks
+ needsBreak (Para _) = False
+ needsBreak (Plain ins) = LineBreak `notElem` ins
+ needsBreak _ = True
+blockToXml (Header _ _) = -- should never happen, see renderSections
+ error "unexpected header in section text"
+blockToXml HorizontalRule = return
+ [ el "empty-line" ()
+ , el "p" (txt (replicate 10 '—'))
+ , el "empty-line" () ]
+blockToXml (Table caption aligns _ headers rows) = do
+ hd <- mkrow "th" headers aligns
+ bd <- mapM (\r -> mkrow "td" r aligns) rows
+ c <- return . el "emphasis" =<< cMapM toXml caption
+ return [el "table" (hd : bd), el "p" c]
+ where
+ mkrow :: String -> [TableCell] -> [Alignment] -> FBM Content
+ mkrow tag cells aligns' =
+ (el "tr") `liftM` (mapM (mkcell tag) (zip cells aligns'))
+ --
+ mkcell :: String -> (TableCell, Alignment) -> FBM Content
+ mkcell tag (cell, align) = do
+ cblocks <- cMapM blockToXml cell
+ return $ el tag ([align_attr align], cblocks)
+ --
+ align_attr a = Attr (QName "align" Nothing Nothing) (align_str a)
+ align_str AlignLeft = "left"
+ align_str AlignCenter = "center"
+ align_str AlignRight = "right"
+ align_str AlignDefault = "left"
+blockToXml Null = return []
+
+-- Replace paragraphs with plain text and line break.
+-- Necessary to simulate multi-paragraph lists in FB2.
+paraToPlain :: [Block] -> [Block]
+paraToPlain [] = []
+paraToPlain (Para inlines : rest) =
+ let p = (Plain (inlines ++ [LineBreak]))
+ in p : paraToPlain rest
+paraToPlain (p:rest) = p : paraToPlain rest
+
+-- Simulate increased indentation level. Will not really work
+-- for multi-line paragraphs.
+indent :: Block -> Block
+indent = indentBlock
+ where
+ -- indentation space
+ spacer :: String
+ spacer = replicate 4 ' '
+ --
+ indentBlock (Plain ins) = Plain ((Str spacer):ins)
+ indentBlock (Para ins) = Para ((Str spacer):ins)
+ indentBlock (CodeBlock a s) =
+ let s' = unlines . map (spacer++) . lines $ s
+ in CodeBlock a s'
+ indentBlock (BlockQuote bs) = BlockQuote (map indent bs)
+ indentBlock (Header l ins) = Header l (indentLines ins)
+ indentBlock everythingElse = everythingElse
+ -- indent every (explicit) line
+ indentLines :: [Inline] -> [Inline]
+ indentLines ins = let lns = split isLineBreak ins :: [[Inline]]
+ in intercalate [LineBreak] $ map ((Str spacer):) lns
+
+-- | Convert a Pandoc's Inline element to FictionBook XML representation.
+toXml :: Inline -> FBM [Content]
+toXml (Str s) = return [txt s]
+toXml (Emph ss) = list `liftM` wrap "emphasis" ss
+toXml (Strong ss) = list `liftM` wrap "strong" ss
+toXml (Strikeout ss) = list `liftM` wrap "strikethrough" ss
+toXml (Superscript ss) = list `liftM` wrap "sup" ss
+toXml (Subscript ss) = list `liftM` wrap "sub" ss
+toXml (SmallCaps ss) = cMapM toXml $ bottomUp (map toUpper) ss
+toXml (Quoted SingleQuote ss) = do -- FIXME: should be language-specific
+ inner <- cMapM toXml ss
+ return $ [txt "‘"] ++ inner ++ [txt "’"]
+toXml (Quoted DoubleQuote ss) = do
+ inner <- cMapM toXml ss
+ return $ [txt "“"] ++ inner ++ [txt "”"]
+toXml (Cite _ ss) = cMapM toXml ss -- FIXME: support citation styles
+toXml (Code _ s) = return [el "code" s]
+toXml Space = return [txt " "]
+toXml LineBreak = return [el "empty-line" ()]
+toXml (Math _ formula) = insertMath InlineImage formula
+toXml (RawInline _ _) = return [] -- raw TeX and raw HTML are suppressed
+toXml (Link text (url,ttl)) = do
+ fns <- footnotes `liftM` get
+ let n = 1 + length fns
+ let ln_id = linkID n
+ let ln_ref = list . el "sup" . txt $ "[" ++ show n ++ "]"
+ ln_text <- cMapM toXml text
+ let ln_desc =
+ let ttl' = dropWhile isSpace ttl
+ in if null ttl'
+ then list . el "p" $ el "code" url
+ else list . el "p" $ [ txt (ttl' ++ ": "), el "code" url ]
+ modify (\s -> s { footnotes = (n, ln_id, ln_desc) : fns })
+ return $ ln_text ++
+ [ el "a"
+ ( [ attr ("l","href") ('#':ln_id)
+ , uattr "type" "note" ]
+ , ln_ref) ]
+toXml img@(Image _ _) = insertImage InlineImage img
+toXml (Note bs) = do
+ fns <- footnotes `liftM` get
+ let n = 1 + length fns
+ let fn_id = footnoteID n
+ fn_desc <- cMapM blockToXml bs
+ modify (\s -> s { footnotes = (n, fn_id, fn_desc) : fns })
+ let fn_ref = el "sup" . txt $ "[" ++ show n ++ "]"
+ return . list $ el "a" ( [ attr ("l","href") ('#':fn_id)
+ , uattr "type" "note" ]
+ , fn_ref )
+
+insertMath :: ImageMode -> String -> FBM [Content]
+insertMath immode formula = do
+ htmlMath <- return . writerHTMLMathMethod . writerOptions =<< get
+ case htmlMath of
+ WebTeX url -> do
+ let alt = [Code nullAttr formula]
+ let imgurl = url ++ urlEncode formula
+ let img = Image alt (imgurl, "")
+ insertImage immode img
+ _ -> return [el "code" formula]
+
+insertImage :: ImageMode -> Inline -> FBM [Content]
+insertImage immode (Image alt (url,ttl)) = do
+ images <- imagesToFetch `liftM` get
+ let n = 1 + length images
+ let fname = "image" ++ show n
+ modify (\s -> s { imagesToFetch = (fname, url) : images })
+ let ttlattr = case (immode, null ttl) of
+ (NormalImage, False) -> [ uattr "title" ttl ]
+ _ -> []
+ return . list $
+ el "image" $
+ [ attr ("l","href") ('#':fname)
+ , attr ("l","type") (show immode)
+ , uattr "alt" (cMap plain alt) ]
+ ++ ttlattr
+insertImage _ _ = error "unexpected inline instead of image"
+
+replaceImagesWithAlt :: [String] -> Content -> Content
+replaceImagesWithAlt missingHrefs body =
+ let cur = XC.fromContent body
+ cur' = replaceAll cur
+ in XC.toTree . XC.root $ cur'
+ where
+ --
+ replaceAll :: XC.Cursor -> XC.Cursor
+ replaceAll c =
+ let n = XC.current c
+ c' = if isImage n && isMissing n
+ then XC.modifyContent replaceNode c
+ else c
+ in case XC.nextDF c' of
+ (Just cnext) -> replaceAll cnext
+ Nothing -> c' -- end of document
+ --
+ isImage :: Content -> Bool
+ isImage (Elem e) = (elName e) == (uname "image")
+ isImage _ = False
+ --
+ isMissing (Elem img@(Element _ _ _ _)) =
+ let imgAttrs = elAttribs img
+ badAttrs = map (attr ("l","href")) missingHrefs
+ in any (`elem` imgAttrs) badAttrs
+ isMissing _ = False
+ --
+ replaceNode :: Content -> Content
+ replaceNode n@(Elem img@(Element _ _ _ _)) =
+ let attrs = elAttribs img
+ alt = getAttrVal attrs (uname "alt")
+ imtype = getAttrVal attrs (qname "l" "type")
+ in case (alt, imtype) of
+ (Just alt', Just imtype') ->
+ if imtype' == show NormalImage
+ then el "p" alt'
+ else txt alt'
+ (Just alt', Nothing) -> txt alt' -- no type attribute
+ _ -> n -- don't replace if alt text is not found
+ replaceNode n = n
+ --
+ getAttrVal :: [X.Attr] -> QName -> Maybe String
+ getAttrVal attrs name =
+ case filter ((name ==) . attrKey) attrs of
+ (a:_) -> Just (attrVal a)
+ _ -> Nothing
+
+
+-- | Wrap all inlines with an XML tag (given its unqualified name).
+wrap :: String -> [Inline] -> FBM Content
+wrap tagname inlines = el tagname `liftM` cMapM toXml inlines
+
+-- " Create a singleton list.
+list :: a -> [a]
+list = (:[])
+
+-- | Convert an 'Inline' to plaintext.
+plain :: Inline -> String
+plain (Str s) = s
+plain (Emph ss) = concat (map plain ss)
+plain (Strong ss) = concat (map plain ss)
+plain (Strikeout ss) = concat (map plain ss)
+plain (Superscript ss) = concat (map plain ss)
+plain (Subscript ss) = concat (map plain ss)
+plain (SmallCaps ss) = concat (map plain ss)
+plain (Quoted _ ss) = concat (map plain ss)
+plain (Cite _ ss) = concat (map plain ss) -- FIXME
+plain (Code _ s) = s
+plain Space = " "
+plain LineBreak = "\n"
+plain (Math _ s) = s
+plain (RawInline _ s) = s
+plain (Link text (url,_)) = concat (map plain text ++ [" <", url, ">"])
+plain (Image alt _) = concat (map plain alt)
+plain (Note _) = "" -- FIXME
+
+-- | Create an XML element.
+el :: (Node t)
+ => String -- ^ unqualified element name
+ -> t -- ^ node contents
+ -> Content -- ^ XML content
+el name cs = Elem $ unode name cs
+
+-- | Put empty lines around content
+spaceBeforeAfter :: [Content] -> [Content]
+spaceBeforeAfter cs =
+ let emptyline = el "empty-line" ()
+ in [emptyline] ++ cs ++ [emptyline]
+
+-- | Create a plain-text XML content.
+txt :: String -> Content
+txt s = Text $ CData CDataText s Nothing
+
+-- | Create an XML attribute with an unqualified name.
+uattr :: String -> String -> Text.XML.Light.Attr
+uattr name val = Attr (uname name) val
+
+-- | Create an XML attribute with a qualified name from given namespace.
+attr :: (String, String) -> String -> Text.XML.Light.Attr
+attr (ns, name) val = Attr (qname ns name) val
+
+-- | Unqualified name
+uname :: String -> QName
+uname name = QName name Nothing Nothing
+
+-- | Qualified name
+qname :: String -> String -> QName
+qname ns name = QName name Nothing (Just ns)
+
+-- | Abbreviation for 'concatMap'.
+cMap :: (a -> [b]) -> [a] -> [b]
+cMap = concatMap
+
+-- | Monadic equivalent of 'concatMap'.
+cMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b]
+cMapM f xs = concat `liftM` mapM f xs \ No newline at end of file
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index b8474ee3f..c6c4a8fd7 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -32,6 +32,7 @@ Conversion of 'Pandoc' documents to HTML.
module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
+import Text.Pandoc.Options
import Text.Pandoc.Templates
import Text.Pandoc.Generic
import Text.Pandoc.Readers.TeXMath
@@ -272,7 +273,7 @@ elementToHtml slideLevel opts (Sec level num id' title' elements) = do
-- title slides have no content of their own
then filter isSec elements
else elements
- let header'' = if (writerStrictMarkdown opts || writerSectionDivs opts ||
+ let header'' = if (writerSectionDivs opts ||
writerSlideVariant opts == S5Slides || slide)
then header'
else header' ! prefixedId opts id'
@@ -378,13 +379,17 @@ blockToHtml _ Null = return mempty
blockToHtml opts (Plain lst) = inlineListToHtml opts lst
blockToHtml opts (Para [Image txt (s,tit)]) = do
img <- inlineToHtml opts (Image txt (s,tit))
- capt <- inlineListToHtml opts txt
+ let tocapt = if writerHtml5 opts
+ then H5.figcaption
+ else H.p ! A.class_ "caption"
+ capt <- if null txt
+ then return mempty
+ else tocapt `fmap` inlineListToHtml opts txt
return $ if writerHtml5 opts
then H5.figure $ mconcat
- [nl opts, img, H5.figcaption capt, nl opts]
+ [nl opts, img, capt, nl opts]
else H.div ! A.class_ "figure" $ mconcat
- [nl opts, img, H.p ! A.class_ "caption" $ capt,
- nl opts]
+ [nl opts, img, capt, nl opts]
blockToHtml opts (Para lst) = do
contents <- inlineListToHtml opts lst
return $ H.p contents
@@ -392,7 +397,7 @@ blockToHtml _ (RawBlock "html" str) = return $ preEscapedString str
blockToHtml _ (RawBlock _ _) = return mempty
blockToHtml opts (HorizontalRule) = return $ if writerHtml5 opts then H5.hr else H.hr
blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do
- let tolhs = writerLiterateHaskell opts &&
+ let tolhs = isEnabled Ext_literate_haskell opts &&
any (\c -> map toLower c == "haskell") classes &&
any (\c -> map toLower c == "literate") classes
classes' = if tolhs
@@ -618,7 +623,7 @@ inlineToHtml opts inline =
! A.src (toValue $ url ++ urlEncode str)
! A.alt (toValue str)
! A.title (toValue str)
- let brtag = if writerHtml5 opts then H5.br else H.br
+ let brtag = if writerHtml5 opts then H5.br else H.br
return $ case t of
InlineMath -> m
DisplayMath -> brtag >> m >> brtag
@@ -638,7 +643,7 @@ inlineToHtml opts inline =
Left _ -> inlineListToHtml opts
(readTeXMath str) >>= return .
(H.span ! A.class_ "math")
- MathJax _ -> return $ toHtml $
+ MathJax _ -> return $ H.span ! A.class_ "math" $ toHtml $
case t of
InlineMath -> "\\(" ++ str ++ "\\)"
DisplayMath -> "\\[" ++ str ++ "\\]"
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 7beee2d42..abbbd4d01 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -32,6 +32,7 @@ module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where
import Text.Pandoc.Definition
import Text.Pandoc.Generic
import Text.Pandoc.Shared
+import Text.Pandoc.Options
import Text.Pandoc.Templates
import Text.Printf ( printf )
import Network.URI ( isAbsoluteURI, unEscapeString )
@@ -265,10 +266,12 @@ blockToLaTeX :: Block -- ^ Block to convert
blockToLaTeX Null = return empty
blockToLaTeX (Plain lst) = inlineListToLaTeX lst
blockToLaTeX (Para [Image txt (src,tit)]) = do
- capt <- inlineListToLaTeX txt
+ capt <- if null txt
+ then return empty
+ else (\c -> "\\caption" <> braces c) `fmap` inlineListToLaTeX txt
img <- inlineToLaTeX (Image txt (src,tit))
return $ "\\begin{figure}[htbp]" $$ "\\centering" $$ img $$
- ("\\caption{" <> capt <> char '}') $$ "\\end{figure}"
+ capt $$ "\\end{figure}"
blockToLaTeX (Para lst) = do
result <- inlineListToLaTeX lst
return result
@@ -287,7 +290,7 @@ blockToLaTeX (BlockQuote lst) = do
blockToLaTeX (CodeBlock (_,classes,keyvalAttr) str) = do
opts <- gets stOptions
case () of
- _ | writerLiterateHaskell opts && "haskell" `elem` classes &&
+ _ | isEnabled Ext_literate_haskell opts && "haskell" `elem` classes &&
"literate" `elem` classes -> lhsCodeBlock
| writerListings opts -> listingsCodeBlock
| writerHighlight opts && not (null classes) -> highlightedCodeBlock
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index c481e6c87..bececde25 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -17,9 +17,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
- Module : Text.Pandoc.Writers.Man
+ Module : Text.Pandoc.Writers.Man
Copyright : Copyright (C) 2007-2010 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
@@ -32,6 +32,7 @@ module Text.Pandoc.Writers.Man ( writeMan) where
import Text.Pandoc.Definition
import Text.Pandoc.Templates
import Text.Pandoc.Shared
+import Text.Pandoc.Options
import Text.Pandoc.Readers.TeXMath
import Text.Printf ( printf )
import Data.List ( isPrefixOf, intersperse, intercalate )
@@ -44,21 +45,21 @@ data WriterState = WriterState { stNotes :: Notes
-- | Convert Pandoc to Man.
writeMan :: WriterOptions -> Pandoc -> String
-writeMan opts document = evalState (pandocToMan opts document) (WriterState [] False)
+writeMan opts document = evalState (pandocToMan opts document) (WriterState [] False)
-- | Return groff man representation of document.
pandocToMan :: WriterOptions -> Pandoc -> State WriterState String
pandocToMan opts (Pandoc (Meta title authors date) blocks) = do
titleText <- inlineListToMan opts title
authors' <- mapM (inlineListToMan opts) authors
- date' <- inlineListToMan opts date
+ date' <- inlineListToMan opts date
let colwidth = if writerWrapText opts
then Just $ writerColumns opts
else Nothing
let render' = render colwidth
let (cmdName, rest) = break (== ' ') $ render' titleText
let (title', section) = case reverse cmdName of
- (')':d:'(':xs) | d `elem` ['0'..'9'] ->
+ (')':d:'(':xs) | d `elem` ['0'..'9'] ->
(text (reverse xs), char d)
xs -> (text (reverse xs), doubleQuotes empty)
let description = hsep $
@@ -86,7 +87,7 @@ notesToMan :: WriterOptions -> [[Block]] -> State WriterState Doc
notesToMan opts notes =
if null notes
then return empty
- else mapM (\(num, note) -> noteToMan opts num note) (zip [1..] notes) >>=
+ else mapM (\(num, note) -> noteToMan opts num note) (zip [1..] notes) >>=
return . (text ".SH NOTES" $$) . vcat
-- | Return man representation of a note.
@@ -94,7 +95,7 @@ noteToMan :: WriterOptions -> Int -> [Block] -> State WriterState Doc
noteToMan opts num note = do
contents <- blockListToMan opts note
let marker = cr <> text ".SS " <> brackets (text (show num))
- return $ marker $$ contents
+ return $ marker $$ contents
-- | Association list of characters to escape.
manEscapes :: [(Char, String)]
@@ -104,7 +105,7 @@ manEscapes = [ ('\160', "\\ ")
, ('\x2014', "\\[em]")
, ('\x2013', "\\[en]")
, ('\x2026', "\\&...")
- ] ++ backslashEscapes "@\\"
+ ] ++ backslashEscapes "-@\\"
-- | Escape special characters for Man.
escapeString :: String -> String
@@ -113,7 +114,7 @@ escapeString = escapeStringUsing manEscapes
-- | Escape a literal (code) section for Man.
escapeCode :: String -> String
escapeCode = concat . intersperse "\n" . map escapeLine . lines where
- escapeLine codeline =
+ escapeLine codeline =
case escapeStringUsing (manEscapes ++ backslashEscapes "\t ") codeline of
a@('.':_) -> "\\&" ++ a
b -> b
@@ -150,14 +151,14 @@ splitSentences xs =
-- | Convert Pandoc block element to man.
blockToMan :: WriterOptions -- ^ Options
-> Block -- ^ Block element
- -> State WriterState Doc
+ -> State WriterState Doc
blockToMan _ Null = return empty
-blockToMan opts (Plain inlines) =
+blockToMan opts (Plain inlines) =
liftM vcat $ mapM (inlineListToMan opts) $ splitSentences inlines
blockToMan opts (Para inlines) = do
contents <- liftM vcat $ mapM (inlineListToMan opts) $
splitSentences inlines
- return $ text ".PP" $$ contents
+ return $ text ".PP" $$ contents
blockToMan _ (RawBlock "man" str) = return $ text str
blockToMan _ (RawBlock _ _) = return empty
blockToMan _ HorizontalRule = return $ text ".PP" $$ text " * * * * *"
@@ -166,7 +167,7 @@ blockToMan opts (Header level inlines) = do
let heading = case level of
1 -> ".SH "
_ -> ".SS "
- return $ text heading <> contents
+ return $ text heading <> contents
blockToMan _ (CodeBlock _ str) = return $
text ".IP" $$
text ".nf" $$
@@ -174,10 +175,10 @@ blockToMan _ (CodeBlock _ str) = return $
text (escapeCode str) $$
text "\\f[]" $$
text ".fi"
-blockToMan opts (BlockQuote blocks) = do
+blockToMan opts (BlockQuote blocks) = do
contents <- blockListToMan opts blocks
return $ text ".RS" $$ contents $$ text ".RE"
-blockToMan opts (Table caption alignments widths headers rows) =
+blockToMan opts (Table caption alignments widths headers rows) =
let aligncode AlignLeft = "l"
aligncode AlignRight = "r"
aligncode AlignCenter = "c"
@@ -190,53 +191,53 @@ blockToMan opts (Table caption alignments widths headers rows) =
else map (printf "w(%0.2fn)" . (70 *)) widths
-- 78n default width - 8n indent = 70n
let coldescriptions = text $ intercalate " "
- (zipWith (\align width -> aligncode align ++ width)
+ (zipWith (\align width -> aligncode align ++ width)
alignments iwidths) ++ "."
colheadings <- mapM (blockListToMan opts) headers
- let makeRow cols = text "T{" $$
- (vcat $ intersperse (text "T}@T{") cols) $$
+ let makeRow cols = text "T{" $$
+ (vcat $ intersperse (text "T}@T{") cols) $$
text "T}"
let colheadings' = if all null headers
then empty
else makeRow colheadings $$ char '_'
- body <- mapM (\row -> do
+ body <- mapM (\row -> do
cols <- mapM (blockListToMan opts) row
return $ makeRow cols) rows
- return $ text ".PP" $$ caption' $$
- text ".TS" $$ text "tab(@);" $$ coldescriptions $$
+ return $ text ".PP" $$ caption' $$
+ text ".TS" $$ text "tab(@);" $$ coldescriptions $$
colheadings' $$ vcat body $$ text ".TE"
blockToMan opts (BulletList items) = do
contents <- mapM (bulletListItemToMan opts) items
- return (vcat contents)
+ return (vcat contents)
blockToMan opts (OrderedList attribs items) = do
- let markers = take (length items) $ orderedListMarkers attribs
+ let markers = take (length items) $ orderedListMarkers attribs
let indent = 1 + (maximum $ map length markers)
contents <- mapM (\(num, item) -> orderedListItemToMan opts num indent item) $
- zip markers items
+ zip markers items
return (vcat contents)
-blockToMan opts (DefinitionList items) = do
+blockToMan opts (DefinitionList items) = do
contents <- mapM (definitionListItemToMan opts) items
return (vcat contents)
-- | Convert bullet list item (list of blocks) to man.
bulletListItemToMan :: WriterOptions -> [Block] -> State WriterState Doc
bulletListItemToMan _ [] = return empty
-bulletListItemToMan opts ((Para first):rest) =
+bulletListItemToMan opts ((Para first):rest) =
bulletListItemToMan opts ((Plain first):rest)
bulletListItemToMan opts ((Plain first):rest) = do
- first' <- blockToMan opts (Plain first)
+ first' <- blockToMan opts (Plain first)
rest' <- blockListToMan opts rest
let first'' = text ".IP \\[bu] 2" $$ first'
let rest'' = if null rest
then empty
else text ".RS 2" $$ rest' $$ text ".RE"
- return (first'' $$ rest'')
+ return (first'' $$ rest'')
bulletListItemToMan opts (first:rest) = do
first' <- blockToMan opts first
rest' <- blockListToMan opts rest
return $ text "\\[bu] .RS 2" $$ first' $$ rest' $$ text ".RE"
-
+
-- | Convert ordered list item (a list of blocks) to man.
orderedListItemToMan :: WriterOptions -- ^ options
-> String -- ^ order marker for list item
@@ -244,7 +245,7 @@ orderedListItemToMan :: WriterOptions -- ^ options
-> [Block] -- ^ list item (list of blocks)
-> State WriterState Doc
orderedListItemToMan _ _ _ [] = return empty
-orderedListItemToMan opts num indent ((Para first):rest) =
+orderedListItemToMan opts num indent ((Para first):rest) =
orderedListItemToMan opts num indent ((Plain first):rest)
orderedListItemToMan opts num indent (first:rest) = do
first' <- blockToMan opts first
@@ -254,17 +255,17 @@ orderedListItemToMan opts num indent (first:rest) = do
let rest'' = if null rest
then empty
else text ".RS 4" $$ rest' $$ text ".RE"
- return $ first'' $$ rest''
+ return $ first'' $$ rest''
-- | Convert definition list item (label, list of blocks) to man.
definitionListItemToMan :: WriterOptions
- -> ([Inline],[[Block]])
+ -> ([Inline],[[Block]])
-> State WriterState Doc
definitionListItemToMan opts (label, defs) = do
labelText <- inlineListToMan opts label
- contents <- if null defs
+ contents <- if null defs
then return empty
- else liftM vcat $ forM defs $ \blocks -> do
+ else liftM vcat $ forM defs $ \blocks -> do
let (first, rest) = case blocks of
((Para x):y) -> (Plain x,y)
(x:y) -> (x,y)
@@ -278,7 +279,7 @@ definitionListItemToMan opts (label, defs) = do
-- | Convert list of Pandoc block elements to man.
blockListToMan :: WriterOptions -- ^ Options
-> [Block] -- ^ List of block elements
- -> State WriterState Doc
+ -> State WriterState Doc
blockListToMan opts blocks =
mapM (blockToMan opts) blocks >>= (return . vcat)
@@ -292,7 +293,7 @@ inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat)
-- | Convert Pandoc inline element to man.
inlineToMan :: WriterOptions -> Inline -> State WriterState Doc
-inlineToMan opts (Emph lst) = do
+inlineToMan opts (Emph lst) = do
contents <- inlineListToMan opts lst
return $ text "\\f[I]" <> contents <> text "\\f[]"
inlineToMan opts (Strong lst) = do
@@ -333,16 +334,16 @@ inlineToMan opts (Link txt (src, _)) = do
let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
return $ case txt of
[Code _ s]
- | s == srcSuffix -> char '<' <> text srcSuffix <> char '>'
+ | s == srcSuffix -> char '<' <> text srcSuffix <> char '>'
_ -> linktext <> text " (" <> text src <> char ')'
inlineToMan opts (Image alternate (source, tit)) = do
- let txt = if (null alternate) || (alternate == [Str ""]) ||
+ let txt = if (null alternate) || (alternate == [Str ""]) ||
(alternate == [Str source]) -- to prevent autolinks
then [Str "image"]
else alternate
- linkPart <- inlineToMan opts (Link txt (source, tit))
+ linkPart <- inlineToMan opts (Link txt (source, tit))
return $ char '[' <> text "IMAGE: " <> linkPart <> char ']'
-inlineToMan _ (Note contents) = do
+inlineToMan _ (Note contents) = do
-- add to notes in state
modify $ \st -> st{ stNotes = contents : stNotes st }
notes <- liftM stNotes get
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 9cbcaeb47..d88419feb 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE OverloadedStrings, TupleSections #-}
{-
Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
@@ -18,9 +18,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
- Module : Text.Pandoc.Writers.Markdown
+ Module : Text.Pandoc.Writers.Markdown
Copyright : Copyright (C) 2006-2010 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
@@ -35,11 +35,15 @@ import Text.Pandoc.Definition
import Text.Pandoc.Generic
import Text.Pandoc.Templates (renderTemplate)
import Text.Pandoc.Shared
-import Text.Pandoc.Parsing hiding (blankline)
-import Text.ParserCombinators.Parsec ( runParser, GenParser )
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding (blankline, char, space)
import Data.List ( group, isPrefixOf, find, intersperse, transpose )
import Text.Pandoc.Pretty
import Control.Monad.State
+import qualified Data.Set as Set
+import Text.Pandoc.Writers.HTML (writeHtmlString)
+import Text.Pandoc.Readers.TeXMath (readTeXMath)
+import Text.HTML.TagSoup (renderTags, parseTags, isTagText, Tag(..))
type Notes = [[Block]]
type Refs = [([Inline], Target)]
@@ -49,7 +53,7 @@ data WriterState = WriterState { stNotes :: Notes
-- | Convert Pandoc to Markdown.
writeMarkdown :: WriterOptions -> Pandoc -> String
-writeMarkdown opts document =
+writeMarkdown opts document =
evalState (pandocToMarkdown opts document) WriterState{ stNotes = []
, stRefs = []
, stPlain = False }
@@ -58,7 +62,9 @@ writeMarkdown opts document =
-- pictures, or inline formatting).
writePlain :: WriterOptions -> Pandoc -> String
writePlain opts document =
- evalState (pandocToMarkdown opts{writerStrictMarkdown = True}
+ evalState (pandocToMarkdown opts{
+ writerExtensions = Set.delete Ext_escaped_line_breaks $
+ writerExtensions opts }
document') WriterState{ stNotes = []
, stRefs = []
, stPlain = True }
@@ -81,15 +87,41 @@ plainify = bottomUp go
go (Cite _ cits) = SmallCaps cits
go x = x
+pandocTitleBlock :: Doc -> [Doc] -> Doc -> Doc
+pandocTitleBlock tit auths dat =
+ hang 2 (text "% ") tit <> cr <>
+ hang 2 (text "% ") (hcat (intersperse (text "; ") auths)) <> cr <>
+ hang 2 (text "% ") dat <> cr
+
+mmdTitleBlock :: Doc -> [Doc] -> Doc -> Doc
+mmdTitleBlock tit auths dat =
+ hang 8 (text "Title: ") tit <> cr <>
+ hang 8 (text "Author: ") (hcat (intersperse (text "; ") auths)) <> cr <>
+ hang 8 (text "Date: ") dat <> cr
+
+plainTitleBlock :: Doc -> [Doc] -> Doc -> Doc
+plainTitleBlock tit auths dat =
+ tit <> cr <>
+ (hcat (intersperse (text "; ") auths)) <> cr <>
+ dat <> cr
+
-- | Return markdown representation of document.
pandocToMarkdown :: WriterOptions -> Pandoc -> State WriterState String
pandocToMarkdown opts (Pandoc (Meta title authors date) blocks) = do
title' <- inlineListToMarkdown opts title
authors' <- mapM (inlineListToMarkdown opts) authors
date' <- inlineListToMarkdown opts date
- let titleblock = not $ null title && null authors && null date
+ isPlain <- gets stPlain
+ let titleblock = case True of
+ _ | isPlain ->
+ plainTitleBlock title' authors' date'
+ | isEnabled Ext_pandoc_title_block opts ->
+ pandocTitleBlock title' authors' date'
+ | isEnabled Ext_mmd_title_block opts ->
+ mmdTitleBlock title' authors' date'
+ | otherwise -> empty
let headerBlocks = filter isHeaderBlock blocks
- let toc = if writerTableOfContents opts
+ let toc = if writerTableOfContents opts
then tableOfContents opts headerBlocks
else empty
body <- blockListToMarkdown opts blocks
@@ -106,11 +138,9 @@ pandocToMarkdown opts (Pandoc (Meta title authors date) blocks) = do
let context = writerVariables opts ++
[ ("toc", render colwidth toc)
, ("body", main)
- , ("title", render colwidth title')
- , ("date", render colwidth date')
] ++
- [ ("titleblock", "yes") | titleblock ] ++
- [ ("author", render colwidth a) | a <- authors' ]
+ [ ("titleblock", render colwidth titleblock)
+ | not (null title && null authors && null date) ]
if writerStandalone opts
then return $ renderTemplate context $ writerTemplate opts
else return main
@@ -119,9 +149,9 @@ pandocToMarkdown opts (Pandoc (Meta title authors date) blocks) = do
refsToMarkdown :: WriterOptions -> Refs -> State WriterState Doc
refsToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat
--- | Return markdown representation of a reference key.
-keyToMarkdown :: WriterOptions
- -> ([Inline], (String, String))
+-- | Return markdown representation of a reference key.
+keyToMarkdown :: WriterOptions
+ -> ([Inline], (String, String))
-> State WriterState Doc
keyToMarkdown opts (label, (src, tit)) = do
label' <- inlineListToMarkdown opts label
@@ -133,7 +163,7 @@ keyToMarkdown opts (label, (src, tit)) = do
-- | Return markdown representation of notes.
notesToMarkdown :: WriterOptions -> [[Block]] -> State WriterState Doc
-notesToMarkdown opts notes =
+notesToMarkdown opts notes =
mapM (\(num, note) -> noteToMarkdown opts num note) (zip [1..] notes) >>=
return . vsep
@@ -142,12 +172,16 @@ noteToMarkdown :: WriterOptions -> Int -> [Block] -> State WriterState Doc
noteToMarkdown opts num blocks = do
contents <- blockListToMarkdown opts blocks
let num' = text $ show num
- let marker = text "[^" <> num' <> text "]:"
+ let marker = if isEnabled Ext_footnotes opts
+ then text "[^" <> num' <> text "]:"
+ else text "[" <> num' <> text "]"
let markerSize = 4 + offset num'
let spacer = case writerTabStop opts - markerSize of
n | n > 0 -> text $ replicate n ' '
_ -> text " "
- return $ hang (writerTabStop opts) (marker <> spacer) contents
+ return $ if isEnabled Ext_footnotes opts
+ then hang (writerTabStop opts) (marker <> spacer) contents
+ else marker <> spacer <> contents
-- | Escape special characters for Markdown.
escapeString :: String -> String
@@ -155,7 +189,7 @@ escapeString = escapeStringUsing markdownEscapes
where markdownEscapes = backslashEscapes "\\`*_$<>#~^"
-- | Construct table of contents from list of header blocks.
-tableOfContents :: WriterOptions -> [Block] -> Doc
+tableOfContents :: WriterOptions -> [Block] -> Doc
tableOfContents opts headers =
let opts' = opts { writerIgnoreNotes = True }
contents = BulletList $ map elementToListItem $ hierarchicalize headers
@@ -166,7 +200,7 @@ tableOfContents opts headers =
-- | Converts an Element to a list item for a table of contents,
elementToListItem :: Element -> [Block]
elementToListItem (Blk _) = []
-elementToListItem (Sec _ _ _ headerText subsecs) = [Plain headerText] ++
+elementToListItem (Sec _ _ _ headerText subsecs) = [Plain headerText] ++
if null subsecs
then []
else [BulletList $ map elementToListItem subsecs]
@@ -188,9 +222,9 @@ attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys]
<> "=\"" <> text v <> "\"") ks
-- | Ordered list start parser for use in Para below.
-olMarker :: GenParser Char ParserState Char
+olMarker :: Parser [Char] ParserState Char
olMarker = do (start, style', delim) <- anyOrderedListMarker
- if delim == Period &&
+ if delim == Period &&
(style' == UpperAlpha || (style' == UpperRoman &&
start `elem` [1, 5, 10, 50, 100, 500, 1000]))
then spaceChar >> spaceChar
@@ -206,7 +240,7 @@ beginsWithOrderedListMarker str =
-- | Convert Pandoc block element to markdown.
blockToMarkdown :: WriterOptions -- ^ Options
-> Block -- ^ Block element
- -> State WriterState Doc
+ -> State WriterState Doc
blockToMarkdown _ Null = return empty
blockToMarkdown opts (Plain inlines) = do
contents <- inlineListToMarkdown opts inlines
@@ -215,14 +249,21 @@ blockToMarkdown opts (Para inlines) = do
contents <- inlineListToMarkdown opts inlines
-- escape if para starts with ordered list marker
st <- get
- let esc = if (not (writerStrictMarkdown opts)) &&
+ let esc = if isEnabled Ext_all_symbols_escapable opts &&
not (stPlain st) &&
beginsWithOrderedListMarker (render Nothing contents)
then text "\x200B" -- zero-width space, a hack
else empty
return $ esc <> contents <> blankline
-blockToMarkdown _ (RawBlock f str)
- | f == "html" || f == "latex" || f == "tex" || f == "markdown" = do
+blockToMarkdown opts (RawBlock f str)
+ | f == "html" = do
+ st <- get
+ if stPlain st
+ then return empty
+ else return $ if isEnabled Ext_markdown_attribute opts
+ then text (addMarkdownAttribute str) <> text "\n"
+ else text str <> text "\n"
+ | f == "latex" || f == "tex" || f == "markdown" = do
st <- get
if stPlain st
then return empty
@@ -243,88 +284,148 @@ blockToMarkdown opts (Header level inlines) = do
contents <> cr <> text (replicate (offset contents) '-') <>
blankline
-- ghc interprets '#' characters in column 1 as linenum specifiers.
- _ | stPlain st || writerLiterateHaskell opts ->
+ _ | stPlain st || isEnabled Ext_literate_haskell opts ->
contents <> blankline
_ -> text (replicate level '#') <> space <> contents <> blankline
blockToMarkdown opts (CodeBlock (_,classes,_) str)
| "haskell" `elem` classes && "literate" `elem` classes &&
- writerLiterateHaskell opts =
+ isEnabled Ext_literate_haskell opts =
return $ prefixed "> " (text str) <> blankline
blockToMarkdown opts (CodeBlock attribs str) = return $
- if writerStrictMarkdown opts || attribs == nullAttr
- then nest (writerTabStop opts) (text str) <> blankline
- else -- use delimited code block
- (tildes <> space <> attrs <> cr <> text str <>
- cr <> tildes) <> blankline
- where tildes = text "~~~~"
- attrs = attrsToMarkdown attribs
+ case attribs of
+ x | x /= nullAttr && isEnabled Ext_fenced_code_blocks opts ->
+ tildes <> space <> attrs <> cr <> text str <>
+ cr <> tildes <> blankline
+ (_,(cls:_),_) | isEnabled Ext_backtick_code_blocks opts ->
+ backticks <> space <> text cls <> cr <> text str <>
+ cr <> backticks <> blankline
+ _ -> nest (writerTabStop opts) (text str) <> blankline
+ where tildes = text $ case [ln | ln <- lines str, all (=='~') ln] of
+ [] -> "~~~~"
+ xs -> case maximum $ map length xs of
+ n | n < 3 -> "~~~~"
+ | otherwise -> replicate (n+1) '~'
+ backticks = text "```"
+ attrs = if isEnabled Ext_fenced_code_attributes opts
+ then attrsToMarkdown attribs
+ else empty
blockToMarkdown opts (BlockQuote blocks) = do
st <- get
-- if we're writing literate haskell, put a space before the bird tracks
-- so they won't be interpreted as lhs...
- let leader = if writerLiterateHaskell opts
+ let leader = if isEnabled Ext_literate_haskell opts
then " > "
else if stPlain st
then " "
else "> "
contents <- blockListToMarkdown opts blocks
return $ (prefixed leader contents) <> blankline
-blockToMarkdown opts (Table caption aligns widths headers rows) = do
+blockToMarkdown opts t@(Table caption aligns widths headers rows) = do
caption' <- inlineListToMarkdown opts caption
- let caption'' = if null caption
+ let caption'' = if null caption || not (isEnabled Ext_table_captions opts)
then empty
else blankline <> ": " <> caption' <> blankline
- headers' <- mapM (blockListToMarkdown opts) headers
+ rawHeaders <- mapM (blockListToMarkdown opts) headers
+ rawRows <- mapM (mapM (blockListToMarkdown opts)) rows
+ let isSimple = all (==0) widths
+ (nst,tbl) <- case isSimple of
+ True | isEnabled Ext_simple_tables opts -> fmap (nest 2,) $
+ pandocTable opts (all null headers) aligns widths
+ rawHeaders rawRows
+ | isEnabled Ext_pipe_tables opts -> fmap (id,) $
+ pipeTable (all null headers) aligns rawHeaders rawRows
+ | otherwise -> fmap (id,) $
+ return $ text $ writeHtmlString def
+ $ Pandoc (Meta [] [] []) [t]
+ False | isEnabled Ext_multiline_tables opts -> fmap (nest 2,) $
+ pandocTable opts (all null headers) aligns widths
+ rawHeaders rawRows
+ | otherwise -> fmap (id,) $
+ return $ text $ writeHtmlString def
+ $ Pandoc (Meta [] [] []) [t]
+ return $ nst $ tbl $$ blankline $$ caption'' $$ blankline
+blockToMarkdown opts (BulletList items) = do
+ contents <- mapM (bulletListItemToMarkdown opts) items
+ return $ cat contents <> blankline
+blockToMarkdown opts (OrderedList (start,sty,delim) items) = do
+ let start' = if isEnabled Ext_startnum opts then start else 1
+ let sty' = if isEnabled Ext_fancy_lists opts then sty else DefaultStyle
+ let delim' = if isEnabled Ext_fancy_lists opts then delim else DefaultDelim
+ let attribs = (start', sty', delim')
+ let markers = orderedListMarkers attribs
+ let markers' = map (\m -> if length m < 3
+ then m ++ replicate (3 - length m) ' '
+ else m) markers
+ contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $
+ zip markers' items
+ return $ cat contents <> blankline
+blockToMarkdown opts (DefinitionList items) = do
+ contents <- mapM (definitionListItemToMarkdown opts) items
+ return $ cat contents <> blankline
+
+addMarkdownAttribute :: String -> String
+addMarkdownAttribute s =
+ case span isTagText $ reverse $ parseTags s of
+ (xs,(TagOpen t attrs:rest)) ->
+ renderTags $ reverse rest ++ (TagOpen t attrs' : reverse xs)
+ where attrs' = ("markdown","1"):[(x,y) | (x,y) <- attrs,
+ x /= "markdown"]
+ _ -> s
+
+pipeTable :: Bool -> [Alignment] -> [Doc] -> [[Doc]] -> State WriterState Doc
+pipeTable headless aligns rawHeaders rawRows = do
+ let torow cs = nowrap $ text "|" <>
+ hcat (intersperse (text "|") $ map chomp cs) <> text "|"
+ let toborder (a, h) = let wid = max (offset h) 3
+ in text $ case a of
+ AlignLeft -> ':':replicate (wid - 1) '-'
+ AlignCenter -> ':':replicate (wid - 2) '-' ++ ":"
+ AlignRight -> replicate (wid - 1) '-' ++ ":"
+ AlignDefault -> replicate wid '-'
+ let header = if headless then empty else torow rawHeaders
+ let border = torow $ map toborder $ zip aligns rawHeaders
+ let body = vcat $ map torow rawRows
+ return $ header $$ border $$ body
+
+pandocTable :: WriterOptions -> Bool -> [Alignment] -> [Double]
+ -> [Doc] -> [[Doc]] -> State WriterState Doc
+pandocTable opts headless aligns widths rawHeaders rawRows = do
+ let isSimple = all (==0) widths
let alignHeader alignment = case alignment of
AlignLeft -> lblock
AlignCenter -> cblock
AlignRight -> rblock
AlignDefault -> lblock
- rawRows <- mapM (mapM (blockListToMarkdown opts)) rows
- let isSimple = all (==0) widths
let numChars = maximum . map offset
- let widthsInChars =
- if isSimple
- then map ((+2) . numChars) $ transpose (headers' : rawRows)
- else map (floor . (fromIntegral (writerColumns opts) *)) widths
+ let widthsInChars = if isSimple
+ then map ((+2) . numChars)
+ $ transpose (rawHeaders : rawRows)
+ else map
+ (floor . (fromIntegral (writerColumns opts) *))
+ widths
let makeRow = hcat . intersperse (lblock 1 (text " ")) .
(zipWith3 alignHeader aligns widthsInChars)
let rows' = map makeRow rawRows
- let head' = makeRow headers'
+ let head' = makeRow rawHeaders
let maxRowHeight = maximum $ map height (head':rows')
let underline = cat $ intersperse (text " ") $
map (\width -> text (replicate width '-')) widthsInChars
let border = if maxRowHeight > 1
then text (replicate (sum widthsInChars +
length widthsInChars - 1) '-')
- else if all null headers
+ else if headless
then underline
else empty
- let head'' = if all null headers
+ let head'' = if headless
then empty
else border <> cr <> head'
let body = if maxRowHeight > 1
then vsep rows'
else vcat rows'
- let bottom = if all null headers
+ let bottom = if headless
then underline
else border
- return $ nest 2 $ head'' $$ underline $$ body $$
- bottom $$ blankline $$ caption'' $$ blankline
-blockToMarkdown opts (BulletList items) = do
- contents <- mapM (bulletListItemToMarkdown opts) items
- return $ cat contents <> blankline
-blockToMarkdown opts (OrderedList attribs items) = do
- let markers = orderedListMarkers attribs
- let markers' = map (\m -> if length m < 3
- then m ++ replicate (3 - length m) ' '
- else m) markers
- contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $
- zip markers' items
- return $ cat contents <> blankline
-blockToMarkdown opts (DefinitionList items) = do
- contents <- mapM (definitionListItemToMarkdown opts) items
- return $ cat contents <> blankline
+ return $ head'' $$ underline $$ body $$ bottom
-- | Convert bullet list item (list of blocks) to markdown.
bulletListItemToMarkdown :: WriterOptions -> [Block] -> State WriterState Doc
@@ -349,32 +450,38 @@ orderedListItemToMarkdown opts marker items = do
-- | Convert definition list item (label, list of blocks) to markdown.
definitionListItemToMarkdown :: WriterOptions
- -> ([Inline],[[Block]])
+ -> ([Inline],[[Block]])
-> State WriterState Doc
definitionListItemToMarkdown opts (label, defs) = do
labelText <- inlineListToMarkdown opts label
- let tabStop = writerTabStop opts
- st <- get
- let leader = if stPlain st then " " else ": "
- let sps = case writerTabStop opts - 3 of
- n | n > 0 -> text $ replicate n ' '
- _ -> text " "
defs' <- mapM (mapM (blockToMarkdown opts)) defs
- let contents = vcat $ map (\d -> hang tabStop (leader <> sps) $ vcat d <> cr) defs'
- return $ nowrap labelText <> cr <> contents <> cr
+ if isEnabled Ext_definition_lists opts
+ then do
+ let tabStop = writerTabStop opts
+ st <- get
+ let leader = if stPlain st then " " else ": "
+ let sps = case writerTabStop opts - 3 of
+ n | n > 0 -> text $ replicate n ' '
+ _ -> text " "
+ let contents = vcat $ map (\d -> hang tabStop (leader <> sps) $ vcat d <> cr) defs'
+ return $ nowrap labelText <> cr <> contents <> cr
+ else do
+ return $ nowrap labelText <> text " " <> cr <>
+ vsep (map vsep defs') <> blankline
-- | Convert list of Pandoc block elements to markdown.
blockListToMarkdown :: WriterOptions -- ^ Options
-> [Block] -- ^ List of block elements
- -> State WriterState Doc
+ -> State WriterState Doc
blockListToMarkdown opts blocks =
mapM (blockToMarkdown opts) (fixBlocks blocks) >>= return . cat
-- insert comment between list and indented code block, or the
-- code block will be treated as a list continuation paragraph
where fixBlocks (b : CodeBlock attr x : rest)
- | (writerStrictMarkdown opts || attr == nullAttr) && isListBlock b =
+ | (not (isEnabled Ext_fenced_code_blocks opts) || attr == nullAttr)
+ && isListBlock b =
b : RawBlock "html" "<!-- -->\n" : CodeBlock attr x :
- fixBlocks rest
+ fixBlocks rest
fixBlocks (x : xs) = x : fixBlocks xs
fixBlocks [] = []
isListBlock (BulletList _) = True
@@ -412,7 +519,7 @@ escapeSpaces x = x
-- | Convert Pandoc inline element to markdown.
inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc
-inlineToMarkdown opts (Emph lst) = do
+inlineToMarkdown opts (Emph lst) = do
contents <- inlineListToMarkdown opts lst
return $ "*" <> contents <> "*"
inlineToMarkdown opts (Strong lst) = do
@@ -420,15 +527,21 @@ inlineToMarkdown opts (Strong lst) = do
return $ "**" <> contents <> "**"
inlineToMarkdown opts (Strikeout lst) = do
contents <- inlineListToMarkdown opts lst
- return $ "~~" <> contents <> "~~"
+ return $ if isEnabled Ext_strikeout opts
+ then "~~" <> contents <> "~~"
+ else "<s>" <> contents <> "</s>"
inlineToMarkdown opts (Superscript lst) = do
let lst' = bottomUp escapeSpaces lst
contents <- inlineListToMarkdown opts lst'
- return $ "^" <> contents <> "^"
+ return $ if isEnabled Ext_superscript opts
+ then "^" <> contents <> "^"
+ else "<sup>" <> contents <> "</sup>"
inlineToMarkdown opts (Subscript lst) = do
let lst' = bottomUp escapeSpaces lst
contents <- inlineListToMarkdown opts lst'
- return $ "~" <> contents <> "~"
+ return $ if isEnabled Ext_subscript opts
+ then "~" <> contents <> "~"
+ else "<sub>" <> contents <> "</sub>"
inlineToMarkdown opts (SmallCaps lst) = inlineListToMarkdown opts lst
inlineToMarkdown opts (Quoted SingleQuote lst) = do
contents <- inlineListToMarkdown opts lst
@@ -437,33 +550,46 @@ inlineToMarkdown opts (Quoted DoubleQuote lst) = do
contents <- inlineListToMarkdown opts lst
return $ "“" <> contents <> "”"
inlineToMarkdown opts (Code attr str) =
- let tickGroups = filter (\s -> '`' `elem` s) $ group str
+ let tickGroups = filter (\s -> '`' `elem` s) $ group str
longest = if null tickGroups
then 0
- else maximum $ map length tickGroups
- marker = replicate (longest + 1) '`'
+ else maximum $ map length tickGroups
+ marker = replicate (longest + 1) '`'
spacer = if (longest == 0) then "" else " "
- attrs = if writerStrictMarkdown opts || attr == nullAttr
- then empty
- else attrsToMarkdown attr
+ attrs = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr
+ then attrsToMarkdown attr
+ else empty
in return $ text (marker ++ spacer ++ str ++ spacer ++ marker) <> attrs
inlineToMarkdown _ (Str str) = do
st <- get
if stPlain st
then return $ text str
else return $ text $ escapeString str
-inlineToMarkdown _ (Math InlineMath str) =
- return $ "$" <> text str <> "$"
-inlineToMarkdown _ (Math DisplayMath str) =
- return $ "$$" <> text str <> "$$"
-inlineToMarkdown _ (RawInline f str)
- | f == "html" || f == "latex" || f == "tex" || f == "markdown" =
+inlineToMarkdown opts (Math InlineMath str)
+ | isEnabled Ext_tex_math_dollars opts =
+ return $ "$" <> text str <> "$"
+ | isEnabled Ext_tex_math_single_backslash opts =
+ return $ "\\(" <> text str <> "\\)"
+ | isEnabled Ext_tex_math_double_backslash opts =
+ return $ "\\\\(" <> text str <> "\\\\)"
+ | otherwise = inlineListToMarkdown opts $ readTeXMath str
+inlineToMarkdown opts (Math DisplayMath str)
+ | isEnabled Ext_tex_math_dollars opts =
+ return $ "$$" <> text str <> "$$"
+ | isEnabled Ext_tex_math_single_backslash opts =
+ return $ "\\[" <> text str <> "\\]"
+ | isEnabled Ext_tex_math_double_backslash opts =
+ return $ "\\\\[" <> text str <> "\\\\]"
+ | otherwise = (\x -> cr <> x <> cr) `fmap`
+ inlineListToMarkdown opts (readTeXMath str)
+inlineToMarkdown opts (RawInline f str)
+ | f == "html" || f == "markdown" ||
+ (isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) =
return $ text str
inlineToMarkdown _ (RawInline _ _) = return empty
-inlineToMarkdown opts (LineBreak) = return $
- if writerStrictMarkdown opts
- then " " <> cr
- else "\\" <> cr
+inlineToMarkdown opts (LineBreak)
+ | isEnabled Ext_escaped_line_breaks opts = return $ "\\" <> cr
+ | otherwise = return $ " " <> cr
inlineToMarkdown _ Space = return space
inlineToMarkdown opts (Cite (c:cs) lst)
| writerCiteMethod opts == Citeproc = inlineListToMarkdown opts lst
@@ -513,7 +639,7 @@ inlineToMarkdown opts (Link txt (src, tit)) = do
then "[]"
else "[" <> reftext <> "]"
in first <> second
- else "[" <> linktext <> "](" <>
+ else "[" <> linktext <> "](" <>
text src <> linktitle <> ")"
inlineToMarkdown opts (Image alternate (source, tit)) = do
let txt = if null alternate || alternate == [Str source]
@@ -522,8 +648,10 @@ inlineToMarkdown opts (Image alternate (source, tit)) = do
else alternate
linkPart <- inlineToMarkdown opts (Link txt (source, tit))
return $ "!" <> linkPart
-inlineToMarkdown _ (Note contents) = do
+inlineToMarkdown opts (Note contents) = do
modify (\st -> st{ stNotes = contents : stNotes st })
st <- get
let ref = show $ (length $ stNotes st)
- return $ "[^" <> text ref <> "]"
+ if isEnabled Ext_footnotes opts
+ then return $ "[^" <> text ref <> "]"
+ else return $ "[" <> text ref <> "]"
diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs
index b32c5327d..84d7393c1 100644
--- a/src/Text/Pandoc/Writers/MediaWiki.hs
+++ b/src/Text/Pandoc/Writers/MediaWiki.hs
@@ -17,9 +17,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
- Module : Text.Pandoc.Writers.MediaWiki
+ Module : Text.Pandoc.Writers.MediaWiki
Copyright : Copyright (C) 2008-2010 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
@@ -31,7 +31,8 @@ MediaWiki: <http://www.mediawiki.org/wiki/MediaWiki>
-}
module Text.Pandoc.Writers.MediaWiki ( writeMediaWiki ) where
import Text.Pandoc.Definition
-import Text.Pandoc.Shared
+import Text.Pandoc.Options
+import Text.Pandoc.Shared
import Text.Pandoc.Templates (renderTemplate)
import Text.Pandoc.XML ( escapeStringForXML )
import Data.List ( intersect, intercalate )
@@ -46,9 +47,9 @@ data WriterState = WriterState {
-- | Convert Pandoc to MediaWiki.
writeMediaWiki :: WriterOptions -> Pandoc -> String
-writeMediaWiki opts document =
- evalState (pandocToMediaWiki opts document)
- (WriterState { stNotes = False, stListLevel = [], stUseTags = False })
+writeMediaWiki opts document =
+ evalState (pandocToMediaWiki opts document)
+ (WriterState { stNotes = False, stListLevel = [], stUseTags = False })
-- | Return MediaWiki representation of document.
pandocToMediaWiki :: WriterOptions -> Pandoc -> State WriterState String
@@ -57,7 +58,7 @@ pandocToMediaWiki opts (Pandoc _ blocks) = do
notesExist <- get >>= return . stNotes
let notes = if notesExist
then "\n<references />"
- else ""
+ else ""
let main = body ++ notes
let context = writerVariables opts ++
[ ("body", main) ] ++
@@ -70,22 +71,23 @@ pandocToMediaWiki opts (Pandoc _ blocks) = do
escapeString :: String -> String
escapeString = escapeStringForXML
--- | Convert Pandoc block element to MediaWiki.
+-- | Convert Pandoc block element to MediaWiki.
blockToMediaWiki :: WriterOptions -- ^ Options
-> Block -- ^ Block element
- -> State WriterState String
+ -> State WriterState String
blockToMediaWiki _ Null = return ""
-blockToMediaWiki opts (Plain inlines) =
+blockToMediaWiki opts (Plain inlines) =
inlineListToMediaWiki opts inlines
blockToMediaWiki opts (Para [Image txt (src,tit)]) = do
- capt <- inlineListToMediaWiki opts txt
+ capt <- if null txt
+ then return ""
+ else ("|caption " ++) `fmap` inlineListToMediaWiki opts txt
let opt = if null txt
then ""
- else "|alt=" ++ if null tit then capt else tit ++
- "|caption " ++ capt
+ else "|alt=" ++ if null tit then capt else tit ++ capt
return $ "[[Image:" ++ src ++ "|frame|none" ++ opt ++ "]]\n"
blockToMediaWiki opts (Para inlines) = do
@@ -115,7 +117,7 @@ blockToMediaWiki _ (CodeBlock (_,classes,_) str) = do
"javascript", "latex", "lisp", "lua", "matlab", "mirc", "mpasm", "mysql", "nsis", "objc",
"ocaml", "ocaml-brief", "oobas", "oracle8", "pascal", "perl", "php", "php-brief", "plsql",
"python", "qbasic", "rails", "reg", "robots", "ruby", "sas", "scheme", "sdlbasic",
- "smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl",
+ "smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl",
"visualfoxpro", "winbatch", "xml", "xpp", "z80"]
let (beg, end) = if null at
then ("<pre" ++ if null classes then ">" else " class=\"" ++ unwords classes ++ "\">", "</pre>")
@@ -124,7 +126,7 @@ blockToMediaWiki _ (CodeBlock (_,classes,_) str) = do
blockToMediaWiki opts (BlockQuote blocks) = do
contents <- blockListToMediaWiki opts blocks
- return $ "<blockquote>" ++ contents ++ "</blockquote>"
+ return $ "<blockquote>" ++ contents ++ "</blockquote>"
blockToMediaWiki opts (Table capt aligns widths headers rows') = do
let alignStrings = map alignmentToString aligns
@@ -221,7 +223,7 @@ listItemToMediaWiki opts items = do
-- | Convert definition list item (label, list of blocks) to MediaWiki.
definitionListItemToMediaWiki :: WriterOptions
- -> ([Inline],[[Block]])
+ -> ([Inline],[[Block]])
-> State WriterState String
definitionListItemToMediaWiki opts (label, items) = do
labelText <- inlineListToMediaWiki opts label
@@ -242,7 +244,7 @@ isSimpleList x =
BulletList items -> all isSimpleListItem items
OrderedList (num, sty, _) items -> all isSimpleListItem items &&
num == 1 && sty `elem` [DefaultStyle, Decimal]
- DefinitionList items -> all isSimpleListItem $ concatMap snd items
+ DefinitionList items -> all isSimpleListItem $ concatMap snd items
_ -> False
-- | True if list item can be handled with the simple wiki syntax. False if
@@ -287,8 +289,8 @@ tableRowToMediaWiki opts alignStrings rownum cols' = do
0 -> "header"
x | x `rem` 2 == 1 -> "odd"
_ -> "even"
- cols'' <- sequence $ zipWith
- (\alignment item -> tableItemToMediaWiki opts celltype alignment item)
+ cols'' <- sequence $ zipWith
+ (\alignment item -> tableItemToMediaWiki opts celltype alignment item)
alignStrings cols'
return $ "<tr class=\"" ++ rowclass ++ "\">\n" ++ unlines cols'' ++ "</tr>"
@@ -313,7 +315,7 @@ tableItemToMediaWiki opts celltype align' item = do
-- | Convert list of Pandoc block elements to MediaWiki.
blockListToMediaWiki :: WriterOptions -- ^ Options
-> [Block] -- ^ List of block elements
- -> State WriterState String
+ -> State WriterState String
blockListToMediaWiki opts blocks =
mapM (blockToMediaWiki opts) blocks >>= return . vcat
@@ -325,9 +327,9 @@ inlineListToMediaWiki opts lst =
-- | Convert Pandoc inline element to MediaWiki.
inlineToMediaWiki :: WriterOptions -> Inline -> State WriterState String
-inlineToMediaWiki opts (Emph lst) = do
+inlineToMediaWiki opts (Emph lst) = do
contents <- inlineListToMediaWiki opts lst
- return $ "''" ++ contents ++ "''"
+ return $ "''" ++ contents ++ "''"
inlineToMediaWiki opts (Strong lst) = do
contents <- inlineListToMediaWiki opts lst
@@ -365,8 +367,8 @@ inlineToMediaWiki _ (Str str) = return $ escapeString str
inlineToMediaWiki _ (Math _ str) = return $ "<math>" ++ str ++ "</math>"
-- note: str should NOT be escaped
-inlineToMediaWiki _ (RawInline "mediawiki" str) = return str
-inlineToMediaWiki _ (RawInline "html" str) = return str
+inlineToMediaWiki _ (RawInline "mediawiki" str) = return str
+inlineToMediaWiki _ (RawInline "html" str) = return str
inlineToMediaWiki _ (RawInline _ _) = return ""
inlineToMediaWiki _ (LineBreak) = return "<br />\n"
@@ -392,7 +394,7 @@ inlineToMediaWiki opts (Image alt (source, tit)) = do
else "|" ++ tit
return $ "[[Image:" ++ source ++ txt ++ "]]"
-inlineToMediaWiki opts (Note contents) = do
+inlineToMediaWiki opts (Note contents) = do
contents' <- blockListToMediaWiki opts contents
modify (\s -> s { stNotes = True })
return $ "<ref>" ++ contents' ++ "</ref>"
diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs
index d2b56cd17..7fb304e86 100644
--- a/src/Text/Pandoc/Writers/Native.hs
+++ b/src/Text/Pandoc/Writers/Native.hs
@@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.Native
Copyright : Copyright (C) 2006-2010 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
@@ -34,7 +34,7 @@ metadata.
-}
module Text.Pandoc.Writers.Native ( writeNative )
where
-import Text.Pandoc.Shared ( WriterOptions(..) )
+import Text.Pandoc.Options ( WriterOptions(..) )
import Data.List ( intersperse )
import Text.Pandoc.Definition
import Text.Pandoc.Pretty
@@ -47,17 +47,17 @@ prettyList ds =
prettyBlock :: Block -> Doc
prettyBlock (BlockQuote blocks) =
"BlockQuote" $$ prettyList (map prettyBlock blocks)
-prettyBlock (OrderedList attribs blockLists) =
+prettyBlock (OrderedList attribs blockLists) =
"OrderedList" <> space <> text (show attribs) $$
(prettyList $ map (prettyList . map prettyBlock) blockLists)
-prettyBlock (BulletList blockLists) =
+prettyBlock (BulletList blockLists) =
"BulletList" $$
(prettyList $ map (prettyList . map prettyBlock) blockLists)
prettyBlock (DefinitionList items) = "DefinitionList" $$
(prettyList $ map deflistitem items)
where deflistitem (term, defs) = "(" <> text (show term) <> "," <> cr <>
nest 1 (prettyList $ map (prettyList . map prettyBlock) defs) <> ")"
-prettyBlock (Table caption aligns widths header rows) =
+prettyBlock (Table caption aligns widths header rows) =
"Table " <> text (show caption) <> " " <> text (show aligns) <> " " <>
text (show widths) $$
prettyRow header $$
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 9e3dba98a..f43d0a087 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -36,7 +36,8 @@ import Data.ByteString.Lazy.UTF8 ( fromString )
import Codec.Archive.Zip
import Data.Time.Clock.POSIX
import Paths_pandoc ( getDataFileName )
-import Text.Pandoc.Shared hiding (Element)
+import Text.Pandoc.Options ( WriterOptions(..) )
+import Text.Pandoc.Shared ( stringify )
import Text.Pandoc.ImageSize ( readImageSize, sizeInPoints )
import Text.Pandoc.MIME ( getMimeType )
import Text.Pandoc.Definition
@@ -47,16 +48,16 @@ import Control.Monad (liftM)
import Network.URI ( unEscapeString )
import Text.Pandoc.XML
import Text.Pandoc.Pretty
+import qualified Control.Exception as E
-- | Produce an ODT file from a Pandoc document.
-writeODT :: Maybe FilePath -- ^ Path specified by --reference-odt
- -> WriterOptions -- ^ Writer options
+writeODT :: WriterOptions -- ^ Writer options
-> Pandoc -- ^ Document to convert
-> IO B.ByteString
-writeODT mbRefOdt opts doc@(Pandoc (Meta title _ _) _) = do
+writeODT opts doc@(Pandoc (Meta title _ _) _) = do
let datadir = writerUserDataDir opts
refArchive <- liftM toArchive $
- case mbRefOdt of
+ case writerReferenceODT opts of
Just f -> B.readFile f
Nothing -> do
let defaultODT = getDataFileName "reference.odt" >>= B.readFile
@@ -128,9 +129,9 @@ transformPic sourceDir entriesRef (Image lab (src,tit)) = do
Nothing -> tit
entries <- readIORef entriesRef
let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src'
- catch (readEntry [] (sourceDir </> src') >>= \entry ->
- modifyIORef entriesRef (entry{ eRelativePath = newsrc } :) >>
- return (Image lab (newsrc, tit')))
- (\_ -> return (Emph lab))
+ E.catch (readEntry [] (sourceDir </> src') >>= \entry ->
+ modifyIORef entriesRef (entry{ eRelativePath = newsrc } :) >>
+ return (Image lab (newsrc, tit')))
+ (\e -> let _ = (e :: E.SomeException) in return (Emph lab))
transformPic _ _ x = return x
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index a0317511a..027ddfda1 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -31,7 +31,7 @@ Conversion of 'Pandoc' documents to OpenDocument XML.
-}
module Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) where
import Text.Pandoc.Definition
-import Text.Pandoc.Shared
+import Text.Pandoc.Options
import Text.Pandoc.XML
import Text.Pandoc.Templates (renderTemplate)
import Text.Pandoc.Readers.TeXMath
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index 7eb943a22..b885a7a40 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.Org
Copyright : Copyright (C) 2010 Puneeth Chaganti
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : Puneeth Chaganti <punchagan@gmail.com>
Stability : alpha
@@ -32,14 +32,15 @@ Org-Mode: <http://orgmode.org>
-}
module Text.Pandoc.Writers.Org ( writeOrg) where
import Text.Pandoc.Definition
-import Text.Pandoc.Shared
+import Text.Pandoc.Options
+import Text.Pandoc.Shared
import Text.Pandoc.Pretty
import Text.Pandoc.Templates (renderTemplate)
import Data.List ( intersect, intersperse, transpose )
import Control.Monad.State
import Control.Applicative ( (<$>) )
-data WriterState =
+data WriterState =
WriterState { stNotes :: [[Block]]
, stLinks :: Bool
, stImages :: Bool
@@ -49,7 +50,7 @@ data WriterState =
-- | Convert Pandoc to Org.
writeOrg :: WriterOptions -> Pandoc -> String
-writeOrg opts document =
+writeOrg opts document =
let st = WriterState { stNotes = [], stLinks = False,
stImages = False, stHasMath = False,
stOptions = opts }
@@ -82,8 +83,8 @@ pandocToOrg (Pandoc (Meta tit auth dat) blocks) = do
-- | Return Org representation of notes.
notesToOrg :: [[Block]] -> State WriterState Doc
-notesToOrg notes =
- mapM (\(num, note) -> noteToOrg num note) (zip [1..] notes) >>=
+notesToOrg notes =
+ mapM (\(num, note) -> noteToOrg num note) (zip [1..] notes) >>=
return . vsep
-- | Return Org representation of a note.
@@ -106,21 +107,24 @@ titleToOrg :: [Inline] -> State WriterState Doc
titleToOrg [] = return empty
titleToOrg lst = do
contents <- inlineListToOrg lst
- return $ "#+TITLE: " <> contents
+ return $ "#+TITLE: " <> contents
--- | Convert Pandoc block element to Org.
+-- | Convert Pandoc block element to Org.
blockToOrg :: Block -- ^ Block element
- -> State WriterState Doc
+ -> State WriterState Doc
blockToOrg Null = return empty
blockToOrg (Plain inlines) = inlineListToOrg inlines
blockToOrg (Para [Image txt (src,tit)]) = do
- capt <- inlineListToOrg txt
+ capt <- if null txt
+ then return empty
+ else (\c -> "#+CAPTION: " <> c <> blankline) `fmap`
+ inlineListToOrg txt
img <- inlineToOrg (Image txt (src,tit))
- return $ "#+CAPTION: " <> capt <> blankline <> img
+ return $ capt <> img
blockToOrg (Para inlines) = do
contents <- inlineListToOrg inlines
return $ contents <> blankline
-blockToOrg (RawBlock "html" str) =
+blockToOrg (RawBlock "html" str) =
return $ blankline $$ "#+BEGIN_HTML" $$
nest 2 (text str) $$ "#+END_HTML" $$ blankline
blockToOrg (RawBlock f str) | f == "org" || f == "latex" || f == "tex" =
@@ -134,17 +138,17 @@ blockToOrg (Header level inlines) = do
blockToOrg (CodeBlock (_,classes,_) str) = do
opts <- stOptions <$> get
let tabstop = writerTabStop opts
- let at = classes `intersect` ["asymptote", "C", "clojure", "css", "ditaa",
- "dot", "emacs-lisp", "gnuplot", "haskell", "js", "latex",
- "ledger", "lisp", "matlab", "mscgen", "ocaml", "octave",
- "oz", "perl", "plantuml", "python", "R", "ruby", "sass",
+ let at = classes `intersect` ["asymptote", "C", "clojure", "css", "ditaa",
+ "dot", "emacs-lisp", "gnuplot", "haskell", "js", "latex",
+ "ledger", "lisp", "matlab", "mscgen", "ocaml", "octave",
+ "oz", "perl", "plantuml", "python", "R", "ruby", "sass",
"scheme", "screen", "sh", "sql", "sqlite"]
let (beg, end) = case at of
[] -> ("#+BEGIN_EXAMPLE", "#+END_EXAMPLE")
(x:_) -> ("#+BEGIN_SRC " ++ x, "#+END_SRC")
return $ text beg $$ nest tabstop (text str) $$ text end $$ blankline
blockToOrg (BlockQuote blocks) = do
- contents <- blockListToOrg blocks
+ contents <- blockListToOrg blocks
return $ blankline $$ "#+BEGIN_QUOTE" $$
nest 2 contents $$ "#+END_QUOTE" $$ blankline
blockToOrg (Table caption' _ _ headers rows) = do
@@ -155,11 +159,11 @@ blockToOrg (Table caption' _ _ headers rows) = do
headers' <- mapM blockListToOrg headers
rawRows <- mapM (mapM blockListToOrg) rows
let numChars = maximum . map offset
- -- FIXME: width is not being used.
+ -- FIXME: width is not being used.
let widthsInChars =
map ((+2) . numChars) $ transpose (headers' : rawRows)
- -- FIXME: Org doesn't allow blocks with height more than 1.
- let hpipeBlocks blocks = hcat [beg, middle, end]
+ -- FIXME: Org doesn't allow blocks with height more than 1.
+ let hpipeBlocks blocks = hcat [beg, middle, end]
where h = maximum (map height blocks)
sep' = lblock 3 $ vcat (map text $ replicate h " | ")
beg = lblock 2 $ vcat (map text $ replicate h "| ")
@@ -170,7 +174,7 @@ blockToOrg (Table caption' _ _ headers rows) = do
rows' <- mapM (\row -> do cols <- mapM blockListToOrg row
return $ makeRow cols) rows
let border ch = char '|' <> char ch <>
- (hcat $ intersperse (char ch <> char '+' <> char ch) $
+ (hcat $ intersperse (char ch <> char '+' <> char ch) $
map (\l -> text $ replicate l ch) widthsInChars) <>
char ch <> char '|'
let body = vcat rows'
@@ -186,7 +190,7 @@ blockToOrg (OrderedList (start, _, delim) items) = do
let delim' = case delim of
TwoParens -> OneParen
x -> x
- let markers = take (length items) $ orderedListMarkers
+ let markers = take (length items) $ orderedListMarkers
(start, Decimal, delim')
let maxMarkerLength = maximum $ map length markers
let markers' = map (\m -> let s = maxMarkerLength - length m
@@ -222,7 +226,7 @@ definitionListItemToOrg (label, defs) = do
-- | Convert list of Pandoc block elements to Org.
blockListToOrg :: [Block] -- ^ List of block elements
- -> State WriterState Doc
+ -> State WriterState Doc
blockListToOrg blocks = mapM blockToOrg blocks >>= return . vcat
-- | Convert list of Pandoc inline elements to Org.
@@ -231,19 +235,19 @@ inlineListToOrg lst = mapM inlineToOrg lst >>= return . hcat
-- | Convert Pandoc inline element to Org.
inlineToOrg :: Inline -> State WriterState Doc
-inlineToOrg (Emph lst) = do
+inlineToOrg (Emph lst) = do
contents <- inlineListToOrg lst
return $ "/" <> contents <> "/"
inlineToOrg (Strong lst) = do
contents <- inlineListToOrg lst
return $ "*" <> contents <> "*"
-inlineToOrg (Strikeout lst) = do
+inlineToOrg (Strikeout lst) = do
contents <- inlineListToOrg lst
return $ "+" <> contents <> "+"
-inlineToOrg (Superscript lst) = do
+inlineToOrg (Superscript lst) = do
contents <- inlineListToOrg lst
return $ "^{" <> contents <> "}"
-inlineToOrg (Subscript lst) = do
+inlineToOrg (Subscript lst) = do
contents <- inlineListToOrg lst
return $ "_{" <> contents <> "}"
inlineToOrg (SmallCaps lst) = inlineListToOrg lst
@@ -276,7 +280,7 @@ inlineToOrg (Link txt (src, _)) = do
inlineToOrg (Image _ (source, _)) = do
modify $ \s -> s{ stImages = True }
return $ "[[" <> text source <> "]]"
-inlineToOrg (Note contents) = do
+inlineToOrg (Note contents) = do
-- add to notes in state
notes <- get >>= (return . stNotes)
modify $ \st -> st { stNotes = contents:notes }
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index d98079940..5b0b5a414 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -18,9 +18,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
- Module : Text.Pandoc.Writers.RST
+ Module : Text.Pandoc.Writers.RST
Copyright : Copyright (C) 2006-2010 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
@@ -32,7 +32,8 @@ reStructuredText: <http://docutils.sourceforge.net/rst.html>
-}
module Text.Pandoc.Writers.RST ( writeRST) where
import Text.Pandoc.Definition
-import Text.Pandoc.Shared
+import Text.Pandoc.Options
+import Text.Pandoc.Shared
import Text.Pandoc.Templates (renderTemplate)
import Data.List ( isPrefixOf, intersperse, transpose )
import Text.Pandoc.Pretty
@@ -42,7 +43,7 @@ import Data.Char (isSpace)
type Refs = [([Inline], Target)]
-data WriterState =
+data WriterState =
WriterState { stNotes :: [[Block]]
, stLinks :: Refs
, stImages :: Refs
@@ -52,7 +53,7 @@ data WriterState =
-- | Convert Pandoc to RST.
writeRST :: WriterOptions -> Pandoc -> String
-writeRST opts document =
+writeRST opts document =
let st = WriterState { stNotes = [], stLinks = [],
stImages = [], stHasMath = False,
stOptions = opts }
@@ -89,8 +90,8 @@ pandocToRST (Pandoc (Meta tit auth dat) blocks) = do
refsToRST :: Refs -> State WriterState Doc
refsToRST refs = mapM keyToRST refs >>= return . vcat
--- | Return RST representation of a reference key.
-keyToRST :: ([Inline], (String, String))
+-- | Return RST representation of a reference key.
+keyToRST :: ([Inline], (String, String))
-> State WriterState Doc
keyToRST (label, (src, _)) = do
label' <- inlineListToRST label
@@ -101,7 +102,7 @@ keyToRST (label, (src, _)) = do
-- | Return RST representation of notes.
notesToRST :: [[Block]] -> State WriterState Doc
-notesToRST notes =
+notesToRST notes =
mapM (\(num, note) -> noteToRST num note) (zip [1..] notes) >>=
return . vsep
@@ -116,8 +117,8 @@ noteToRST num note = do
pictRefsToRST :: Refs -> State WriterState Doc
pictRefsToRST refs = mapM pictToRST refs >>= return . vcat
--- | Return RST representation of a picture substitution reference.
-pictToRST :: ([Inline], (String, String))
+-- | Return RST representation of a picture substitution reference.
+pictToRST :: ([Inline], (String, String))
-> State WriterState Doc
pictToRST (label, (src, _)) = do
label' <- inlineListToRST label
@@ -135,9 +136,9 @@ titleToRST lst = do
let border = text (replicate titleLength '=')
return $ border $$ contents $$ border
--- | Convert Pandoc block element to RST.
+-- | Convert Pandoc block element to RST.
blockToRST :: Block -- ^ Block element
- -> State WriterState Doc
+ -> State WriterState Doc
blockToRST Null = return empty
blockToRST (Plain inlines) = inlineListToRST inlines
blockToRST (Para [Image txt (src,tit)]) = do
@@ -163,12 +164,12 @@ blockToRST (CodeBlock (_,classes,_) str) = do
opts <- stOptions <$> get
let tabstop = writerTabStop opts
if "haskell" `elem` classes && "literate" `elem` classes &&
- writerLiterateHaskell opts
+ isEnabled Ext_literate_haskell opts
then return $ prefixed "> " (text str) $$ blankline
else return $ "::" $+$ nest tabstop (text str) $$ blankline
blockToRST (BlockQuote blocks) = do
tabstop <- get >>= (return . writerTabStop . stOptions)
- contents <- blockListToRST blocks
+ contents <- blockListToRST blocks
return $ (nest tabstop contents) <> blankline
blockToRST (Table caption _ widths headers rows) = do
caption' <- inlineListToRST caption
@@ -184,7 +185,7 @@ blockToRST (Table caption _ widths headers rows) = do
if isSimple
then map ((+2) . numChars) $ transpose (headers' : rawRows)
else map (floor . (fromIntegral (writerColumns opts) *)) widths
- let hpipeBlocks blocks = hcat [beg, middle, end]
+ let hpipeBlocks blocks = hcat [beg, middle, end]
where h = maximum (map height blocks)
sep' = lblock 3 $ vcat (map text $ replicate h " | ")
beg = lblock 2 $ vcat (map text $ replicate h "| ")
@@ -195,7 +196,7 @@ blockToRST (Table caption _ widths headers rows) = do
rows' <- mapM (\row -> do cols <- mapM blockListToRST row
return $ makeRow cols) rows
let border ch = char '+' <> char ch <>
- (hcat $ intersperse (char ch <> char '+' <> char ch) $
+ (hcat $ intersperse (char ch <> char '+' <> char ch) $
map (\l -> text $ replicate l ch) widthsInChars) <>
char ch <> char '+'
let body = vcat $ intersperse (border '-') rows'
@@ -208,9 +209,9 @@ blockToRST (BulletList items) = do
-- ensure that sublists have preceding blank line
return $ blankline $$ vcat contents $$ blankline
blockToRST (OrderedList (start, style', delim) items) = do
- let markers = if start == 1 && style' == DefaultStyle && delim == DefaultDelim
+ let markers = if start == 1 && style' == DefaultStyle && delim == DefaultDelim
then take (length items) $ repeat "#."
- else take (length items) $ orderedListMarkers
+ else take (length items) $ orderedListMarkers
(start, style', delim)
let maxMarkerLength = maximum $ map length markers
let markers' = map (\m -> let s = maxMarkerLength - length m
@@ -249,7 +250,7 @@ definitionListItemToRST (label, defs) = do
-- | Convert list of Pandoc block elements to RST.
blockListToRST :: [Block] -- ^ List of block elements
- -> State WriterState Doc
+ -> State WriterState Doc
blockListToRST blocks = mapM blockToRST blocks >>= return . vcat
-- | Convert list of Pandoc inline elements to RST.
@@ -303,19 +304,19 @@ inlineListToRST lst = mapM inlineToRST (insertBS lst) >>= return . hcat
-- | Convert Pandoc inline element to RST.
inlineToRST :: Inline -> State WriterState Doc
-inlineToRST (Emph lst) = do
+inlineToRST (Emph lst) = do
contents <- inlineListToRST lst
return $ "*" <> contents <> "*"
inlineToRST (Strong lst) = do
contents <- inlineListToRST lst
return $ "**" <> contents <> "**"
-inlineToRST (Strikeout lst) = do
+inlineToRST (Strikeout lst) = do
contents <- inlineListToRST lst
return $ "[STRIKEOUT:" <> contents <> "]"
-inlineToRST (Superscript lst) = do
+inlineToRST (Superscript lst) = do
contents <- inlineListToRST lst
return $ ":sup:`" <> contents <> "`"
-inlineToRST (Subscript lst) = do
+inlineToRST (Subscript lst) = do
contents <- inlineListToRST lst
return $ ":sub:`" <> contents <> "`"
inlineToRST (SmallCaps lst) = inlineListToRST lst
@@ -358,7 +359,7 @@ inlineToRST (Link txt (src, tit)) = do
else return $ "`" <> linktext <> " <" <> text src <> ">`_"
inlineToRST (Image alternate (source, tit)) = do
pics <- get >>= return . stImages
- let labelsUsed = map fst pics
+ let labelsUsed = map fst pics
let txt = if null alternate || alternate == [Str ""] ||
alternate `elem` labelsUsed
then [Str $ "image" ++ show (length pics)]
@@ -369,7 +370,7 @@ inlineToRST (Image alternate (source, tit)) = do
modify $ \st -> st { stImages = pics' }
label <- inlineListToRST txt
return $ "|" <> label <> "|"
-inlineToRST (Note contents) = do
+inlineToRST (Note contents) = do
-- add to notes in state
notes <- get >>= return . stNotes
modify $ \st -> st { stNotes = contents:notes }
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index 4e7c2a7cd..1919eb3f2 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -19,16 +19,17 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.RTF
Copyright : Copyright (C) 2006-2010 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
+ Stability : alpha
Portability : portable
Conversion of 'Pandoc' documents to RTF (rich text format).
-}
module Text.Pandoc.Writers.RTF ( writeRTF, rtfEmbedImage ) where
import Text.Pandoc.Definition
+import Text.Pandoc.Options
import Text.Pandoc.Shared
import Text.Pandoc.Readers.TeXMath
import Text.Pandoc.Templates (renderTemplate)
@@ -38,6 +39,7 @@ import System.FilePath ( takeExtension )
import qualified Data.ByteString as B
import Text.Printf ( printf )
import Network.URI ( isAbsoluteURI, unEscapeString )
+import qualified Control.Exception as E
-- | Convert Image inlines into a raw RTF embedded image, read from a file.
-- If file not found or filetype not jpeg or png, leave the inline unchanged.
@@ -47,7 +49,8 @@ rtfEmbedImage x@(Image _ (src,_)) = do
if ext `elem` [".jpg",".jpeg",".png"] && not (isAbsoluteURI src)
then do
let src' = unEscapeString src
- imgdata <- catch (B.readFile src') (\_ -> return B.empty)
+ imgdata <- E.catch (B.readFile src')
+ (\e -> let _ = (e :: E.SomeException) in return B.empty)
let bytes = map (printf "%02x") $ B.unpack imgdata
let filetype = case ext of
".jpg" -> "\\jpegblip"
@@ -63,7 +66,7 @@ rtfEmbedImage x = return x
-- | Convert Pandoc to a string in rich text format.
writeRTF :: WriterOptions -> Pandoc -> String
-writeRTF options (Pandoc (Meta title authors date) blocks) =
+writeRTF options (Pandoc (Meta title authors date) blocks) =
let titletext = inlineListToRTF title
authorstext = map inlineListToRTF authors
datetext = inlineListToRTF date
@@ -82,11 +85,11 @@ writeRTF options (Pandoc (Meta title authors date) blocks) =
else body
-- | Construct table of contents from list of header blocks.
-tableOfContents :: [Block] -> String
+tableOfContents :: [Block] -> String
tableOfContents headers =
let contentsTree = hierarchicalize headers
- in concatMap (blockToRTF 0 AlignDefault) $
- [Header 1 [Str "Contents"],
+ in concatMap (blockToRTF 0 AlignDefault) $
+ [Header 1 [Str "Contents"],
BulletList (map elementToListItem contentsTree)]
elementToListItem :: Element -> [Block]
@@ -100,7 +103,7 @@ elementToListItem (Sec _ _ _ sectext subsecs) = [Plain sectext] ++
handleUnicode :: String -> String
handleUnicode [] = []
handleUnicode (c:cs) =
- if ord c > 127
+ if ord c > 127
then '\\':'u':(show (ord c)) ++ "?" ++ handleUnicode cs
else c:(handleUnicode cs)
@@ -130,32 +133,32 @@ rtfParSpaced :: Int -- ^ space after (in twips)
-> Int -- ^ first line indent (relative to block) (in twips)
-> Alignment -- ^ alignment
-> String -- ^ string with content
- -> String
-rtfParSpaced spaceAfter indent firstLineIndent alignment content =
+ -> String
+rtfParSpaced spaceAfter indent firstLineIndent alignment content =
let alignString = case alignment of
AlignLeft -> "\\ql "
AlignRight -> "\\qr "
AlignCenter -> "\\qc "
AlignDefault -> "\\ql "
in "{\\pard " ++ alignString ++
- "\\f0 \\sa" ++ (show spaceAfter) ++ " \\li" ++ (show indent) ++
+ "\\f0 \\sa" ++ (show spaceAfter) ++ " \\li" ++ (show indent) ++
" \\fi" ++ (show firstLineIndent) ++ " " ++ content ++ "\\par}\n"
--- | Default paragraph.
+-- | Default paragraph.
rtfPar :: Int -- ^ block indent (in twips)
-> Int -- ^ first line indent (relative to block) (in twips)
-> Alignment -- ^ alignment
-> String -- ^ string with content
- -> String
-rtfPar = rtfParSpaced 180
+ -> String
+rtfPar = rtfParSpaced 180
-- | Compact paragraph (e.g. for compact list items).
rtfCompact :: Int -- ^ block indent (in twips)
-> Int -- ^ first line indent (relative to block) (in twips)
-> Alignment -- ^ alignment
-> String -- ^ string with content
- -> String
-rtfCompact = rtfParSpaced 0
+ -> String
+rtfCompact = rtfParSpaced 0
-- number of twips to indent
indentIncrement :: Int
@@ -172,7 +175,7 @@ bulletMarker indent = case indent `mod` 720 of
-- | Returns appropriate (list of) ordered list markers for indent level.
orderedMarkers :: Int -> ListAttributes -> [String]
-orderedMarkers indent (start, style, delim) =
+orderedMarkers indent (start, style, delim) =
if style == DefaultStyle && delim == DefaultDelim
then case indent `mod` 720 of
0 -> orderedListMarkers (start, Decimal, Period)
@@ -185,30 +188,30 @@ blockToRTF :: Int -- ^ indent level
-> Block -- ^ block to convert
-> String
blockToRTF _ _ Null = ""
-blockToRTF indent alignment (Plain lst) =
+blockToRTF indent alignment (Plain lst) =
rtfCompact indent 0 alignment $ inlineListToRTF lst
-blockToRTF indent alignment (Para lst) =
+blockToRTF indent alignment (Para lst) =
rtfPar indent 0 alignment $ inlineListToRTF lst
-blockToRTF indent alignment (BlockQuote lst) =
- concatMap (blockToRTF (indent + indentIncrement) alignment) lst
+blockToRTF indent alignment (BlockQuote lst) =
+ concatMap (blockToRTF (indent + indentIncrement) alignment) lst
blockToRTF indent _ (CodeBlock _ str) =
rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str))
blockToRTF _ _ (RawBlock "rtf" str) = str
blockToRTF _ _ (RawBlock _ _) = ""
-blockToRTF indent alignment (BulletList lst) = spaceAtEnd $
+blockToRTF indent alignment (BulletList lst) = spaceAtEnd $
concatMap (listItemToRTF alignment indent (bulletMarker indent)) lst
-blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $
+blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $
zipWith (listItemToRTF alignment indent) (orderedMarkers indent attribs) lst
-blockToRTF indent alignment (DefinitionList lst) = spaceAtEnd $
+blockToRTF indent alignment (DefinitionList lst) = spaceAtEnd $
concatMap (definitionListItemToRTF alignment indent) lst
-blockToRTF indent _ HorizontalRule =
+blockToRTF indent _ HorizontalRule =
rtfPar indent 0 AlignCenter "\\emdash\\emdash\\emdash\\emdash\\emdash"
blockToRTF indent alignment (Header level lst) = rtfPar indent 0 alignment $
"\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ inlineListToRTF lst
-blockToRTF indent alignment (Table caption aligns sizes headers rows) =
+blockToRTF indent alignment (Table caption aligns sizes headers rows) =
(if all null headers
then ""
- else tableRowToRTF True indent aligns sizes headers) ++
+ else tableRowToRTF True indent aligns sizes headers) ++
concatMap (tableRowToRTF False indent aligns sizes) rows ++
rtfPar indent 0 alignment (inlineListToRTF caption)
@@ -230,7 +233,7 @@ tableRowToRTF header indent aligns sizes' cols =
end = "}\n\\intbl\\row}\n"
in start ++ columns ++ end
-tableItemToRTF :: Int -> Alignment -> [Block] -> String
+tableItemToRTF :: Int -> Alignment -> [Block] -> String
tableItemToRTF indent alignment item =
let contents = concatMap (blockToRTF indent alignment) item
in "{\\intbl " ++ contents ++ "\\cell}\n"
@@ -238,7 +241,7 @@ tableItemToRTF indent alignment item =
-- | Ensure that there's the same amount of space after compact
-- lists as after regular lists.
spaceAtEnd :: String -> String
-spaceAtEnd str =
+spaceAtEnd str =
if isSuffixOf "\\par}\n" str
then (take ((length str) - 6) str) ++ "\\sa180\\par}\n"
else str
@@ -249,10 +252,10 @@ listItemToRTF :: Alignment -- ^ alignment
-> String -- ^ list start marker
-> [Block] -- ^ list item (list of blocks)
-> [Char]
-listItemToRTF alignment indent marker [] =
- rtfCompact (indent + listIncrement) (0 - listIncrement) alignment
- (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ")
-listItemToRTF alignment indent marker list =
+listItemToRTF alignment indent marker [] =
+ rtfCompact (indent + listIncrement) (0 - listIncrement) alignment
+ (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ")
+listItemToRTF alignment indent marker list =
let (first:rest) = map (blockToRTF (indent + listIncrement) alignment) list
listMarker = "\\fi" ++ show (0 - listIncrement) ++ " " ++ marker ++ "\\tx" ++
show listIncrement ++ "\\tab"
@@ -275,7 +278,7 @@ definitionListItemToRTF alignment indent (label, defs) =
let labelText = blockToRTF indent alignment (Plain label)
itemsText = concatMap (blockToRTF (indent + listIncrement) alignment) $
concat defs
- in labelText ++ itemsText
+ in labelText ++ itemsText
-- | Convert list of inline items to RTF.
inlineListToRTF :: [Inline] -- ^ list of inlines to convert
@@ -291,9 +294,9 @@ inlineToRTF (Strikeout lst) = "{\\strike " ++ (inlineListToRTF lst) ++ "}"
inlineToRTF (Superscript lst) = "{\\super " ++ (inlineListToRTF lst) ++ "}"
inlineToRTF (Subscript lst) = "{\\sub " ++ (inlineListToRTF lst) ++ "}"
inlineToRTF (SmallCaps lst) = "{\\scaps " ++ (inlineListToRTF lst) ++ "}"
-inlineToRTF (Quoted SingleQuote lst) =
+inlineToRTF (Quoted SingleQuote lst) =
"\\u8216'" ++ (inlineListToRTF lst) ++ "\\u8217'"
-inlineToRTF (Quoted DoubleQuote lst) =
+inlineToRTF (Quoted DoubleQuote lst) =
"\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\""
inlineToRTF (Code _ str) = "{\\f1 " ++ (codeStringToRTF str) ++ "}"
inlineToRTF (Str str) = stringToRTF str
@@ -303,11 +306,11 @@ inlineToRTF (RawInline "rtf" str) = str
inlineToRTF (RawInline _ _) = ""
inlineToRTF (LineBreak) = "\\line "
inlineToRTF Space = " "
-inlineToRTF (Link text (src, _)) =
- "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++
+inlineToRTF (Link text (src, _)) =
+ "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++
"\"}}{\\fldrslt{\\ul\n" ++ (inlineListToRTF text) ++ "\n}}}\n"
-inlineToRTF (Image _ (source, _)) =
- "{\\cf1 [image: " ++ source ++ "]\\cf0}"
+inlineToRTF (Image _ (source, _)) =
+ "{\\cf1 [image: " ++ source ++ "]\\cf0}"
inlineToRTF (Note contents) =
- "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++
+ "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++
(concatMap (blockToRTF 0 AlignDefault) contents) ++ "}"
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index 6bb782899..40e76c615 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -19,16 +19,17 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.Texinfo
Copyright : Copyright (C) 2008-2010 John MacFarlane and Peter Wang
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
+ Stability : alpha
Portability : portable
Conversion of 'Pandoc' format into Texinfo.
-}
module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where
import Text.Pandoc.Definition
+import Text.Pandoc.Options
import Text.Pandoc.Shared
import Text.Pandoc.Templates (renderTemplate)
import Text.Printf ( printf )
@@ -40,7 +41,7 @@ import Text.Pandoc.Pretty
import Network.URI ( isAbsoluteURI, unEscapeString )
import System.FilePath
-data WriterState =
+data WriterState =
WriterState { stStrikeout :: Bool -- document contains strikeout
, stSuperscript :: Bool -- document contains superscript
, stSubscript :: Bool -- document contains subscript
@@ -53,8 +54,8 @@ data WriterState =
-- | Convert Pandoc to Texinfo.
writeTexinfo :: WriterOptions -> Pandoc -> String
-writeTexinfo options document =
- evalState (pandocToTexinfo options $ wrapTop document) $
+writeTexinfo options document =
+ evalState (pandocToTexinfo options $ wrapTop document) $
WriterState { stStrikeout = False, stSuperscript = False, stSubscript = False }
-- | Add a "Top" node around the document, needed by Texinfo.
@@ -116,10 +117,12 @@ blockToTexinfo (Plain lst) =
inlineListToTexinfo lst
blockToTexinfo (Para [Image txt (src,tit)]) = do
- capt <- inlineListToTexinfo txt
+ capt <- if null txt
+ then return empty
+ else (\c -> text "@caption" <> braces c) `fmap`
+ inlineListToTexinfo txt
img <- inlineToTexinfo (Image txt (src,tit))
- return $ text "@float" $$ img $$ (text "@caption{" <> capt <> char '}') $$
- text "@end float"
+ return $ text "@float" $$ img $$ capt $$ text "@end float"
blockToTexinfo (Para lst) =
inlineListToTexinfo lst -- this is handled differently from Plain in blockListToTexinfo
@@ -217,7 +220,7 @@ blockToTexinfo (Table caption aligns widths heads rows) = do
else return $ "@columnfractions " ++ concatMap (printf "%.2f ") widths
let tableBody = text ("@multitable " ++ colDescriptors) $$
headers $$
- vcat rowsText $$
+ vcat rowsText $$
text "@end multitable"
return $ if isEmpty captionText
then tableBody <> blankline
@@ -241,7 +244,7 @@ tableAnyRowToTexinfo :: String
-> [[Block]]
-> State WriterState Doc
tableAnyRowToTexinfo itemtype aligns cols =
- zipWithM alignedBlock aligns cols >>=
+ zipWithM alignedBlock aligns cols >>=
return . (text itemtype $$) . foldl (\row item -> row $$
(if isEmpty row then empty else text " @tab ") <> item) empty
@@ -358,8 +361,8 @@ inlineToTexinfo :: Inline -- ^ Inline to convert
inlineToTexinfo (Emph lst) =
inlineListToTexinfo lst >>= return . inCmd "emph"
-inlineToTexinfo (Strong lst) =
- inlineListToTexinfo lst >>= return . inCmd "strong"
+inlineToTexinfo (Strong lst) =
+ inlineListToTexinfo lst >>= return . inCmd "strong"
inlineToTexinfo (Strikeout lst) = do
modify $ \st -> st{ stStrikeout = True }
diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs
index 26d5ec6d7..5f3bb6bcd 100644
--- a/src/Text/Pandoc/Writers/Textile.hs
+++ b/src/Text/Pandoc/Writers/Textile.hs
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.Textile
Copyright : Copyright (C) 2010 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
@@ -31,7 +31,8 @@ Textile: <http://thresholdstate.com/articles/4312/the-textile-reference-manual>
-}
module Text.Pandoc.Writers.Textile ( writeTextile ) where
import Text.Pandoc.Definition
-import Text.Pandoc.Shared
+import Text.Pandoc.Options
+import Text.Pandoc.Shared
import Text.Pandoc.Templates (renderTemplate)
import Text.Pandoc.XML ( escapeStringForXML )
import Data.List ( intercalate )
@@ -46,9 +47,9 @@ data WriterState = WriterState {
-- | Convert Pandoc to Textile.
writeTextile :: WriterOptions -> Pandoc -> String
-writeTextile opts document =
- evalState (pandocToTextile opts document)
- (WriterState { stNotes = [], stListLevel = [], stUseTags = False })
+writeTextile opts document =
+ evalState (pandocToTextile opts document)
+ (WriterState { stNotes = [], stListLevel = [], stUseTags = False })
-- | Return Textile representation of document.
pandocToTextile :: WriterOptions -> Pandoc -> State WriterState String
@@ -90,14 +91,14 @@ escapeCharForTextile x = case x of
escapeStringForTextile :: String -> String
escapeStringForTextile = concatMap escapeCharForTextile
--- | Convert Pandoc block element to Textile.
+-- | Convert Pandoc block element to Textile.
blockToTextile :: WriterOptions -- ^ Options
-> Block -- ^ Block element
- -> State WriterState String
+ -> State WriterState String
blockToTextile _ Null = return ""
-blockToTextile opts (Plain inlines) =
+blockToTextile opts (Plain inlines) =
inlineListToTextile opts inlines
blockToTextile opts (Para [Image txt (src,tit)]) = do
@@ -236,7 +237,7 @@ listItemToTextile opts items = do
-- | Convert definition list item (label, list of blocks) to Textile.
definitionListItemToTextile :: WriterOptions
- -> ([Inline],[[Block]])
+ -> ([Inline],[[Block]])
-> State WriterState String
definitionListItemToTextile opts (label, items) = do
labelText <- inlineListToTextile opts label
@@ -294,8 +295,8 @@ tableRowToTextile opts alignStrings rownum cols' = do
0 -> "header"
x | x `rem` 2 == 1 -> "odd"
_ -> "even"
- cols'' <- sequence $ zipWith
- (\alignment item -> tableItemToTextile opts celltype alignment item)
+ cols'' <- sequence $ zipWith
+ (\alignment item -> tableItemToTextile opts celltype alignment item)
alignStrings cols'
return $ "<tr class=\"" ++ rowclass ++ "\">\n" ++ unlines cols'' ++ "</tr>"
@@ -320,7 +321,7 @@ tableItemToTextile opts celltype align' item = do
-- | Convert list of Pandoc block elements to Textile.
blockListToTextile :: WriterOptions -- ^ Options
-> [Block] -- ^ List of block elements
- -> State WriterState String
+ -> State WriterState String
blockListToTextile opts blocks =
mapM (blockToTextile opts) blocks >>= return . vcat
@@ -332,11 +333,11 @@ inlineListToTextile opts lst =
-- | Convert Pandoc inline element to Textile.
inlineToTextile :: WriterOptions -> Inline -> State WriterState String
-inlineToTextile opts (Emph lst) = do
+inlineToTextile opts (Emph lst) = do
contents <- inlineListToTextile opts lst
return $ if '_' `elem` contents
then "<em>" ++ contents ++ "</em>"
- else "_" ++ contents ++ "_"
+ else "_" ++ contents ++ "_"
inlineToTextile opts (Strong lst) = do
contents <- inlineListToTextile opts lst
@@ -377,7 +378,7 @@ inlineToTextile opts (Cite _ lst) = inlineListToTextile opts lst
inlineToTextile _ (Code _ str) =
return $ if '@' `elem` str
then "<tt>" ++ escapeStringForXML str ++ "</tt>"
- else "@" ++ str ++ "@"
+ else "@" ++ str ++ "@"
inlineToTextile _ (Str str) = return $ escapeStringForTextile str