diff options
Diffstat (limited to 'src/Text')
38 files changed, 3157 insertions, 1788 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 432a5c2ba..33706816e 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -20,10 +20,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha + Stability : alpha Portability : portable This helper module exports the main writers, readers, and data @@ -43,9 +43,8 @@ inline links: > > markdownToRST :: String -> String > markdownToRST = -> (writeRST defaultWriterOptions {writerReferenceLinks = True}) . -> readMarkdown defaultParserState -> +> (writeRST def {writerReferenceLinks = True}) . readMarkdown def +> > main = getContents >>= putStrLn . markdownToRST Note: all of the readers assume that the input text has @'\n'@ @@ -55,11 +54,13 @@ you should remove @'\r'@ characters using @filter (/='\r')@. -} module Text.Pandoc - ( + ( -- * Definitions module Text.Pandoc.Definition -- * Generics , module Text.Pandoc.Generic + -- * Options + , module Text.Pandoc.Options -- * Lists of readers and writers , readers , writers @@ -71,15 +72,8 @@ module Text.Pandoc , readTextile , readDocBook , readNative - -- * Parser state used in readers - , ParserState (..) - , defaultParserState - , ParserContext (..) - , QuoteContext (..) - , KeyTable - , NoteTable - , HeaderType (..) -- * Writers: converting /from/ Pandoc format + , Writer (..) , writeNative , writeMarkdown , writePlain @@ -98,19 +92,16 @@ module Text.Pandoc , writeODT , writeDocx , writeEPUB + , writeFB2 , writeOrg , writeAsciiDoc - -- * Writer options used in writers - , WriterOptions (..) - , HTMLSlideVariant (..) - , HTMLMathMethod (..) - , CiteMethod (..) - , defaultWriterOptions -- * Rendering templates and default templates , module Text.Pandoc.Templates -- * Version , pandocVersion -- * Miscellaneous + , getReader + , getWriter , rtfEmbedImage , jsonFilter , ToJsonFilter(..) @@ -127,7 +118,7 @@ import Text.Pandoc.Readers.Textile import Text.Pandoc.Readers.Native import Text.Pandoc.Writers.Native import Text.Pandoc.Writers.Markdown -import Text.Pandoc.Writers.RST +import Text.Pandoc.Writers.RST import Text.Pandoc.Writers.LaTeX import Text.Pandoc.Writers.ConTeXt import Text.Pandoc.Writers.Texinfo @@ -135,85 +126,143 @@ import Text.Pandoc.Writers.HTML import Text.Pandoc.Writers.ODT import Text.Pandoc.Writers.Docx import Text.Pandoc.Writers.EPUB +import Text.Pandoc.Writers.FB2 import Text.Pandoc.Writers.Docbook import Text.Pandoc.Writers.OpenDocument import Text.Pandoc.Writers.Man -import Text.Pandoc.Writers.RTF +import Text.Pandoc.Writers.RTF import Text.Pandoc.Writers.MediaWiki import Text.Pandoc.Writers.Textile import Text.Pandoc.Writers.Org import Text.Pandoc.Writers.AsciiDoc import Text.Pandoc.Templates -import Text.Pandoc.Parsing -import Text.Pandoc.Shared +import Text.Pandoc.Options +import Text.Pandoc.Shared (safeRead) +import Data.ByteString.Lazy (ByteString) +import Data.List (intercalate) import Data.Version (showVersion) import Text.JSON.Generic +import Data.Set (Set) +import qualified Data.Set as Set +import Text.Parsec +import Text.Parsec.Error import Paths_pandoc (version) -- | Version number of pandoc library. pandocVersion :: String pandocVersion = showVersion version +parseFormatSpec :: String + -> Either ParseError (String, Set Extension -> Set Extension) +parseFormatSpec = parse formatSpec "" + where formatSpec = do + name <- formatName + extMods <- many extMod + return (name, foldl (.) id extMods) + formatName = many1 $ noneOf "-+" + extMod = do + polarity <- oneOf "-+" + name <- many $ noneOf "-+" + ext <- case safeRead ("Ext_" ++ name) of + Just n -> return n + Nothing + | name == "lhs" -> return Ext_literate_haskell + | otherwise -> fail $ "Unknown extension: " ++ name + return $ case polarity of + '-' -> Set.delete ext + _ -> Set.insert ext + -- | Association list of formats and readers. -readers :: [(String, ParserState -> String -> Pandoc)] +readers :: [(String, ReaderOptions -> String -> Pandoc)] readers = [("native" , \_ -> readNative) ,("json" , \_ -> decodeJSON) + ,("markdown_strict" , readMarkdown) ,("markdown" , readMarkdown) - ,("markdown+lhs" , \st -> - readMarkdown st{ stateLiterateHaskell = True}) ,("rst" , readRST) - ,("rst+lhs" , \st -> - readRST st{ stateLiterateHaskell = True}) ,("docbook" , readDocBook) - ,("textile" , readTextile) -- TODO : textile+lhs + ,("textile" , readTextile) -- TODO : textile+lhs ,("html" , readHtml) ,("latex" , readLaTeX) - ,("latex+lhs" , \st -> - readLaTeX st{ stateLiterateHaskell = True}) ] --- | Association list of formats and writers (omitting the --- binary writers, odt, docx, and epub). -writers :: [ ( String, WriterOptions -> Pandoc -> String ) ] -writers = [("native" , writeNative) - ,("json" , \_ -> encodeJSON) - ,("html" , writeHtmlString) - ,("html5" , \o -> - writeHtmlString o{ writerHtml5 = True }) - ,("html+lhs" , \o -> - writeHtmlString o{ writerLiterateHaskell = True }) - ,("html5+lhs" , \o -> - writeHtmlString o{ writerLiterateHaskell = True, - writerHtml5 = True }) - ,("s5" , writeHtmlString) - ,("slidy" , writeHtmlString) - ,("slideous" , writeHtmlString) - ,("dzslides" , writeHtmlString) - ,("docbook" , writeDocbook) - ,("opendocument" , writeOpenDocument) - ,("latex" , writeLaTeX) - ,("latex+lhs" , \o -> - writeLaTeX o{ writerLiterateHaskell = True }) - ,("beamer" , \o -> - writeLaTeX o{ writerBeamer = True }) - ,("beamer+lhs" , \o -> - writeLaTeX o{ writerBeamer = True, writerLiterateHaskell = True }) - ,("context" , writeConTeXt) - ,("texinfo" , writeTexinfo) - ,("man" , writeMan) - ,("markdown" , writeMarkdown) - ,("markdown+lhs" , \o -> - writeMarkdown o{ writerLiterateHaskell = True }) - ,("plain" , writePlain) - ,("rst" , writeRST) - ,("rst+lhs" , \o -> - writeRST o{ writerLiterateHaskell = True }) - ,("mediawiki" , writeMediaWiki) - ,("textile" , writeTextile) - ,("rtf" , writeRTF) - ,("org" , writeOrg) - ,("asciidoc" , writeAsciiDoc) - ] +data Writer = PureStringWriter (WriterOptions -> Pandoc -> String) + | IOStringWriter (WriterOptions -> Pandoc -> IO String) + | IOByteStringWriter (WriterOptions -> Pandoc -> IO ByteString) + +-- | Association list of formats and writers. +writers :: [ ( String, Writer ) ] +writers = [ + ("native" , PureStringWriter writeNative) + ,("json" , PureStringWriter $ \_ -> encodeJSON) + ,("docx" , IOByteStringWriter writeDocx) + ,("odt" , IOByteStringWriter writeODT) + ,("epub" , IOByteStringWriter writeEPUB) + ,("fb2" , IOStringWriter writeFB2) + ,("html" , PureStringWriter writeHtmlString) + ,("html5" , PureStringWriter $ \o -> + writeHtmlString o{ writerHtml5 = True }) + ,("s5" , PureStringWriter $ \o -> + writeHtmlString o{ writerSlideVariant = S5Slides + , writerTableOfContents = False }) + ,("slidy" , PureStringWriter $ \o -> + writeHtmlString o{ writerSlideVariant = SlidySlides }) + ,("slideous" , PureStringWriter $ \o -> + writeHtmlString o{ writerSlideVariant = SlideousSlides }) + ,("dzslides" , PureStringWriter $ \o -> + writeHtmlString o{ writerSlideVariant = DZSlides + , writerHtml5 = True }) + ,("docbook" , PureStringWriter writeDocbook) + ,("opendocument" , PureStringWriter writeOpenDocument) + ,("latex" , PureStringWriter writeLaTeX) + ,("beamer" , PureStringWriter $ \o -> + writeLaTeX o{ writerBeamer = True }) + ,("context" , PureStringWriter writeConTeXt) + ,("texinfo" , PureStringWriter writeTexinfo) + ,("man" , PureStringWriter writeMan) + ,("markdown" , PureStringWriter writeMarkdown) + ,("markdown_strict" , PureStringWriter writeMarkdown) + ,("plain" , PureStringWriter writePlain) + ,("rst" , PureStringWriter writeRST) + ,("mediawiki" , PureStringWriter writeMediaWiki) + ,("textile" , PureStringWriter writeTextile) + ,("rtf" , PureStringWriter writeRTF) + ,("org" , PureStringWriter writeOrg) + ,("asciidoc" , PureStringWriter writeAsciiDoc) + ] + +getDefaultExtensions :: String -> Set Extension +getDefaultExtensions "markdown_strict" = strictExtensions +getDefaultExtensions _ = pandocExtensions + +-- | Retrieve reader based on formatSpec (format+extensions). +getReader :: String -> Either String (ReaderOptions -> String -> Pandoc) +getReader s = + case parseFormatSpec s of + Left e -> Left $ intercalate "\n" $ [m | Message m <- errorMessages e] + Right (readerName, setExts) -> + case lookup readerName readers of + Nothing -> Left $ "Unknown reader: " ++ readerName + Just r -> Right $ \o -> + r o{ readerExtensions = setExts $ + getDefaultExtensions readerName } + +-- | Retrieve writer based on formatSpec (format+extensions). +getWriter :: String -> Either String Writer +getWriter s = + case parseFormatSpec s of + Left e -> Left $ intercalate "\n" $ [m | Message m <- errorMessages e] + Right (writerName, setExts) -> + case lookup writerName writers of + Nothing -> Left $ "Unknown writer: " ++ writerName + Just (PureStringWriter r) -> Right $ PureStringWriter $ + \o -> r o{ writerExtensions = setExts $ + getDefaultExtensions writerName } + Just (IOStringWriter r) -> Right $ IOStringWriter $ + \o -> r o{ writerExtensions = setExts $ + getDefaultExtensions writerName } + Just (IOByteStringWriter r) -> Right $ IOByteStringWriter $ + \o -> r o{ writerExtensions = setExts $ + getDefaultExtensions writerName } {-# DEPRECATED jsonFilter "Use toJsonFilter instead" #-} -- | Converts a transformation on the Pandoc AST into a function diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index cece13fba..13569a4d9 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -38,7 +38,7 @@ import qualified Text.CSL as CSL ( Cite(..) ) import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.Shared (stringify) -import Text.ParserCombinators.Parsec +import Text.Parsec import Control.Monad -- | Process a 'Pandoc' document by adding citations formatted @@ -165,7 +165,7 @@ locatorWords inp = breakup (x : xs) = x : breakup xs splitup = groupBy (\x y -> x /= '\160' && y /= '\160') -pLocatorWords :: GenParser Inline st (String, [Inline]) +pLocatorWords :: Parsec [Inline] st (String, [Inline]) pLocatorWords = do l <- pLocator s <- getInput -- rest is suffix @@ -173,16 +173,16 @@ pLocatorWords = do then return (init l, Str "," : s) else return (l, s) -pMatch :: (Inline -> Bool) -> GenParser Inline st Inline +pMatch :: (Inline -> Bool) -> Parsec [Inline] st Inline pMatch condition = try $ do t <- anyToken guard $ condition t return t -pSpace :: GenParser Inline st Inline +pSpace :: Parsec [Inline] st Inline pSpace = pMatch (\t -> t == Space || t == Str "\160") -pLocator :: GenParser Inline st String +pLocator :: Parsec [Inline] st String pLocator = try $ do optional $ pMatch (== Str ",") optional pSpace @@ -190,7 +190,7 @@ pLocator = try $ do gs <- many1 pWordWithDigits return $ stringify f ++ (' ' : unwords gs) -pWordWithDigits :: GenParser Inline st String +pWordWithDigits :: Parsec [Inline] st String pWordWithDigits = try $ do pSpace r <- many1 (notFollowedBy pSpace >> anyToken) diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs index 080acebee..95df88099 100644 --- a/src/Text/Pandoc/Highlighting.hs +++ b/src/Text/Pandoc/Highlighting.hs @@ -47,6 +47,7 @@ module Text.Pandoc.Highlighting ( languages , Style ) where import Text.Pandoc.Definition +import Text.Pandoc.Shared (safeRead) import Text.Highlighting.Kate import Data.List (find) import Data.Maybe (fromMaybe) @@ -60,9 +61,9 @@ highlight :: (FormatOptions -> [SourceLine] -> a) -- ^ Formatter -> String -- ^ Raw contents of the CodeBlock -> Maybe a -- ^ Maybe the formatted result highlight formatter (_, classes, keyvals) rawCode = - let firstNum = case reads (fromMaybe "1" $ lookup "startFrom" keyvals) of - ((n,_):_) -> n - [] -> 1 + let firstNum = case safeRead (fromMaybe "1" $ lookup "startFrom" keyvals) of + Just n -> n + Nothing -> 1 fmtOpts = defaultFormatOpts{ startNumber = firstNum, numberLines = any (`elem` diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index f9749cece..9cde57e4d 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.MIME Copyright : Copyright (C) 2011 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs new file mode 100644 index 000000000..a9c8bf710 --- /dev/null +++ b/src/Text/Pandoc/Options.hs @@ -0,0 +1,277 @@ +{- +Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Options + Copyright : Copyright (C) 2012 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Data structures and functions for representing parser and writer +options. +-} +module Text.Pandoc.Options ( Extension(..) + , pandocExtensions + , strictExtensions + , ReaderOptions(..) + , HTMLMathMethod (..) + , CiteMethod (..) + , ObfuscationMethod (..) + , HTMLSlideVariant (..) + , WriterOptions (..) + , def + , isEnabled + ) where +import Data.Set (Set) +import qualified Data.Set as Set +import Data.Default +import Text.Pandoc.Highlighting (Style, pygments) + +-- | Individually selectable syntax extensions. +data Extension = + Ext_footnotes -- ^ Pandoc/PHP/MMD style footnotes + | Ext_inline_notes -- ^ Pandoc-style inline notes + | Ext_pandoc_title_block -- ^ Pandoc title block + | Ext_mmd_title_block -- ^ Multimarkdown metadata block + | Ext_table_captions -- ^ Pandoc-style table captions + -- | Ext_image_captions + | Ext_simple_tables -- ^ Pandoc-style simple tables + | Ext_multiline_tables -- ^ Pandoc-style multiline tables + | Ext_grid_tables -- ^ Grid tables (pandoc, reST) + | Ext_pipe_tables -- ^ Pipe tables (as in PHP markdown extra) + | Ext_citations -- ^ Pandoc/citeproc citations + | Ext_raw_tex -- ^ Allow raw TeX (other than math) + | Ext_raw_html -- ^ Allow raw HTML + | Ext_tex_math_dollars -- ^ TeX math between $..$ or $$..$$ + | Ext_tex_math_single_backslash -- ^ TeX math btw \(..\) \[..\] + | Ext_tex_math_double_backslash -- ^ TeX math btw \\(..\\) \\[..\\] + | Ext_latex_macros -- ^ Parse LaTeX macro definitions (for math only) + | Ext_fenced_code_blocks -- ^ Parse fenced code blocks + | Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks + | Ext_backtick_code_blocks -- ^ Github style ``` code blocks + | Ext_inline_code_attributes -- ^ Allow attributes on inline code + | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks + | Ext_markdown_attribute -- ^ Interpret text inside HTML as markdown + -- iff container has attribute 'markdown' + | Ext_escaped_line_breaks -- ^ Treat a backslash at EOL as linebreak + | Ext_autolink_code_spans -- ^ Put autolink text inside code spans + | Ext_fancy_lists -- ^ Enable fancy list numbers and delimiters + | Ext_startnum -- ^ Make start number of ordered list significant + | Ext_definition_lists -- ^ Definition lists as in pandoc, mmd, php + | Ext_example_lists -- ^ Markdown-style numbered examples + | Ext_all_symbols_escapable -- ^ Make all non-alphanumerics escapable + | Ext_intraword_underscores -- ^ Treat underscore inside word as literal + | Ext_blank_before_blockquote -- ^ Require blank line before a blockquote + | Ext_blank_before_header -- ^ Require blank line before a header + -- | Ext_significant_bullets + | Ext_strikeout -- ^ Strikeout using ~~this~~ syntax + | Ext_superscript -- ^ Superscript using ^this^ syntax + | Ext_subscript -- ^ Subscript using ~this~ syntax + | Ext_hard_line_breaks -- ^ All newlines become hard line breaks + | Ext_literate_haskell -- ^ Enable literate Haskell conventions + | Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions + deriving (Show, Read, Enum, Eq, Ord, Bounded) + +pandocExtensions :: Set Extension +pandocExtensions = Set.fromList + [ Ext_footnotes + , Ext_inline_notes + , Ext_pandoc_title_block + , Ext_table_captions + -- , Ext_image_captions + , Ext_simple_tables + , Ext_multiline_tables + , Ext_grid_tables + , Ext_pipe_tables + , Ext_citations + , Ext_raw_tex + , Ext_raw_html + , Ext_tex_math_dollars + , Ext_latex_macros + , Ext_fenced_code_blocks + , Ext_fenced_code_attributes + , Ext_backtick_code_blocks + , Ext_inline_code_attributes + , Ext_markdown_in_html_blocks + , Ext_escaped_line_breaks + , Ext_autolink_code_spans + , Ext_fancy_lists + , Ext_startnum + , Ext_definition_lists + , Ext_example_lists + , Ext_all_symbols_escapable + , Ext_intraword_underscores + , Ext_blank_before_blockquote + , Ext_blank_before_header + -- , Ext_significant_bullets + , Ext_strikeout + , Ext_superscript + , Ext_subscript + ] + +strictExtensions :: Set Extension +strictExtensions = Set.fromList + [ Ext_raw_html ] + +data ReaderOptions = ReaderOptions{ + readerExtensions :: Set Extension -- ^ Syntax extensions + , readerSmart :: Bool -- ^ Smart punctuation + , readerStrict :: Bool -- ^ FOR TRANSITION ONLY + , readerStandalone :: Bool -- ^ Standalone document with header + , readerParseRaw :: Bool -- ^ Parse raw HTML, LaTeX + , readerColumns :: Int -- ^ Number of columns in terminal + , readerTabStop :: Int -- ^ Tab stop + , readerOldDashes :: Bool -- ^ Use pandoc <= 1.8.2.1 behavior + -- in parsing dashes; -- is em-dash; + -- - before numerial is en-dash + , readerCitations :: [String] -- ^ List of available citations + , readerApplyMacros :: Bool -- ^ Apply macros to TeX math + , readerIndentedCodeClasses :: [String] -- ^ Default classes for + -- indented code blocks +} deriving (Show, Read) + +instance Default ReaderOptions + where def = ReaderOptions{ + readerExtensions = pandocExtensions + , readerSmart = False + , readerStrict = False + , readerStandalone = False + , readerParseRaw = False + , readerColumns = 80 + , readerTabStop = 4 + , readerOldDashes = False + , readerCitations = [] + , readerApplyMacros = True + , readerIndentedCodeClasses = [] + } + +-- +-- Writer options +-- + +data HTMLMathMethod = PlainMath + | LaTeXMathML (Maybe String) -- url of LaTeXMathML.js + | JsMath (Maybe String) -- url of jsMath load script + | GladTeX + | WebTeX String -- url of TeX->image script. + | MathML (Maybe String) -- url of MathMLinHTML.js + | MathJax String -- url of MathJax.js + deriving (Show, Read, Eq) + +data CiteMethod = Citeproc -- use citeproc to render them + | Natbib -- output natbib cite commands + | Biblatex -- output biblatex cite commands + deriving (Show, Read, Eq) + +-- | Methods for obfuscating email addresses in HTML. +data ObfuscationMethod = NoObfuscation + | ReferenceObfuscation + | JavascriptObfuscation + deriving (Show, Read, Eq) + +-- | Varieties of HTML slide shows. +data HTMLSlideVariant = S5Slides + | SlidySlides + | SlideousSlides + | DZSlides + | NoSlides + deriving (Show, Read, Eq) + +-- | Options for writers +data WriterOptions = WriterOptions + { writerStandalone :: Bool -- ^ Include header and footer + , writerTemplate :: String -- ^ Template to use in standalone mode + , writerVariables :: [(String, String)] -- ^ Variables to set in template + , writerEPUBMetadata :: String -- ^ Metadata to include in EPUB + , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs + , writerTableOfContents :: Bool -- ^ Include table of contents + , writerSlideVariant :: HTMLSlideVariant -- ^ Are we writing S5, Slidy or Slideous? + , writerIncremental :: Bool -- ^ True if lists should be incremental + , writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML + , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc) + , writerNumberSections :: Bool -- ^ Number sections in LaTeX + , writerSectionDivs :: Bool -- ^ Put sections in div tags in HTML + , writerExtensions :: Set Extension -- ^ Markdown extensions that can be used + , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst + , writerWrapText :: Bool -- ^ Wrap text to line length + , writerColumns :: Int -- ^ Characters in a line (for text wrapping) + , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails + , writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML + , writerSourceDirectory :: FilePath -- ^ Directory path of 1st source file + , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory + , writerCiteMethod :: CiteMethod -- ^ How to print cites + , writerBiblioFiles :: [FilePath] -- ^ Biblio files to use for citations + , writerHtml5 :: Bool -- ^ Produce HTML5 + , writerBeamer :: Bool -- ^ Produce beamer LaTeX slide show + , writerSlideLevel :: Maybe Int -- ^ Force header level of slides + , writerChapters :: Bool -- ^ Use "chapter" for top-level sects + , writerListings :: Bool -- ^ Use listings package for code + , writerHighlight :: Bool -- ^ Highlight source code + , writerHighlightStyle :: Style -- ^ Style to use for highlighting + , writerSetextHeaders :: Bool -- ^ Use setext headers for levels 1-2 in markdown + , writerTeXLigatures :: Bool -- ^ Use tex ligatures quotes, dashes in latex + , writerEpubStylesheet :: Maybe String -- ^ EPUB stylesheet specified at command line + , writerEpubFonts :: [FilePath] -- ^ Paths to fonts to embed + , writerReferenceODT :: Maybe FilePath -- ^ Path to reference ODT if specified + , writerReferenceDocx :: Maybe FilePath -- ^ Ptah to reference DOCX if specified + } deriving Show + +instance Default WriterOptions where + def = WriterOptions { writerStandalone = False + , writerTemplate = "" + , writerVariables = [] + , writerEPUBMetadata = "" + , writerTabStop = 4 + , writerTableOfContents = False + , writerSlideVariant = NoSlides + , writerIncremental = False + , writerHTMLMathMethod = PlainMath + , writerIgnoreNotes = False + , writerNumberSections = False + , writerSectionDivs = False + , writerExtensions = pandocExtensions + , writerReferenceLinks = False + , writerWrapText = True + , writerColumns = 72 + , writerEmailObfuscation = JavascriptObfuscation + , writerIdentifierPrefix = "" + , writerSourceDirectory = "." + , writerUserDataDir = Nothing + , writerCiteMethod = Citeproc + , writerBiblioFiles = [] + , writerHtml5 = False + , writerBeamer = False + , writerSlideLevel = Nothing + , writerChapters = False + , writerListings = False + , writerHighlight = False + , writerHighlightStyle = pygments + , writerSetextHeaders = True + , writerTeXLigatures = True + , writerEpubStylesheet = Nothing + , writerEpubFonts = [] + , writerReferenceODT = Nothing + , writerReferenceDocx = Nothing + } + +-- | Returns True if the given extension is enabled. +isEnabled :: Extension -> WriterOptions -> Bool +isEnabled ext opts = ext `Set.member` (writerExtensions opts) diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index cac2b71ca..50691f409 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE GeneralizedNewtypeDeriving #-} {- Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> @@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Parsing Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -48,8 +49,6 @@ module Text.Pandoc.Parsing ( (>>~), withHorizDisplacement, withRaw, nullBlock, - failIfStrict, - failUnlessLHS, escaped, characterReference, updateLastStrPos, @@ -57,37 +56,127 @@ module Text.Pandoc.Parsing ( (>>~), orderedListMarker, charRef, tableWith, + widthsFromIndices, gridTableWith, readWith, testStringWith, + getOption, + guardEnabled, + guardDisabled, ParserState (..), defaultParserState, HeaderType (..), ParserContext (..), QuoteContext (..), NoteTable, + NoteTable', KeyTable, - Key, + Key (..), toKey, - fromKey, - lookupKeySrc, smartPunctuation, + withQuoteContext, + singleQuoteStart, + singleQuoteEnd, + doubleQuoteStart, + doubleQuoteEnd, + ellipses, + apostrophe, + dash, macro, - applyMacros' ) + applyMacros', + Parser, + F(..), + runF, + askF, + asksF, + -- * Re-exports from Text.Pandoc.Parsec + runParser, + parse, + anyToken, + getInput, + setInput, + unexpected, + char, + letter, + digit, + alphaNum, + skipMany, + skipMany1, + spaces, + space, + anyChar, + satisfy, + newline, + string, + count, + eof, + noneOf, + oneOf, + lookAhead, + notFollowedBy, + many, + many1, + manyTill, + (<|>), + (<?>), + choice, + try, + sepBy, + sepBy1, + sepEndBy, + sepEndBy1, + endBy, + endBy1, + option, + optional, + optionMaybe, + getState, + setState, + updateState, + getPosition, + setPosition, + sourceColumn, + sourceLine, + newPos, + token + ) where import Text.Pandoc.Definition -import Text.Pandoc.Generic +import Text.Pandoc.Options +import Text.Pandoc.Builder (Blocks) import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) -import Text.ParserCombinators.Parsec +import Text.Parsec +import Text.Parsec.Pos (newPos) import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit, isPunctuation ) import Data.List ( intercalate, transpose ) import Network.URI ( parseURI, URI (..), isAllowedInURI ) -import Control.Monad ( join, liftM, guard ) import Text.Pandoc.Shared import qualified Data.Map as M import Text.TeXMath.Macros (applyMacros, Macro, parseMacroDefinitions) import Text.HTML.TagSoup.Entity ( lookupEntity ) +import Data.Default +import qualified Data.Set as Set +import Control.Monad.Reader +import Data.Monoid + +type Parser t s = Parsec t s + +newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Functor) + +runF :: F a -> ParserState -> a +runF = runReader . unF + +askF :: F ParserState +askF = F ask + +asksF :: (ParserState -> a) -> F a +asksF f = F $ asks f + +instance Monoid a => Monoid (F a) where + mempty = return mempty + mappend = liftM2 mappend + mconcat = liftM mconcat . sequence -- | Like >>, but returns the operation on the left. -- (Suggested by Tillmann Rendel on Haskell-cafe list.) @@ -95,62 +184,69 @@ import Text.HTML.TagSoup.Entity ( lookupEntity ) a >>~ b = a >>= \x -> b >> return x -- | Parse any line of text -anyLine :: GenParser Char st [Char] +anyLine :: Parser [Char] st [Char] anyLine = manyTill anyChar newline -- | Like @manyTill@, but reads at least one item. -many1Till :: GenParser tok st a - -> GenParser tok st end - -> GenParser tok st [a] +many1Till :: Parser [tok] st a + -> Parser [tok] st end + -> Parser [tok] st [a] many1Till p end = do first <- p rest <- manyTill p end return (first:rest) --- | A more general form of @notFollowedBy@. This one allows any +-- | A more general form of @notFollowedBy@. This one allows any -- type of parser to be specified, and succeeds only if that parser fails. -- It does not consume any input. -notFollowedBy' :: Show b => GenParser a st b -> GenParser a st () +notFollowedBy' :: Show b => Parser [a] st b -> Parser [a] st () notFollowedBy' p = try $ join $ do a <- try p return (unexpected (show a)) <|> return (return ()) -- (This version due to Andrew Pimlott on the Haskell mailing list.) --- | Parses one of a list of strings (tried in order). -oneOfStrings :: [String] -> GenParser Char st String -oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings +-- | Parses one of a list of strings (tried in order). +oneOfStrings :: [String] -> Parser [Char] st String +oneOfStrings [] = fail "no strings" +oneOfStrings strs = do + c <- anyChar + let strs' = [xs | (x:xs) <- strs, x == c] + case strs' of + [] -> fail "not found" + z | "" `elem` z -> return [c] + | otherwise -> (c:) `fmap` oneOfStrings strs' -- | Parses a space or tab. -spaceChar :: CharParser st Char +spaceChar :: Parser [Char] st Char spaceChar = satisfy $ \c -> c == ' ' || c == '\t' -- | Parses a nonspace, nonnewline character. -nonspaceChar :: CharParser st Char +nonspaceChar :: Parser [Char] st Char nonspaceChar = satisfy $ \x -> x /= '\t' && x /= '\n' && x /= ' ' && x /= '\r' -- | Skips zero or more spaces or tabs. -skipSpaces :: GenParser Char st () +skipSpaces :: Parser [Char] st () skipSpaces = skipMany spaceChar -- | Skips zero or more spaces or tabs, then reads a newline. -blankline :: GenParser Char st Char +blankline :: Parser [Char] st Char blankline = try $ skipSpaces >> newline -- | Parses one or more blank lines and returns a string of newlines. -blanklines :: GenParser Char st [Char] +blanklines :: Parser [Char] st [Char] blanklines = many1 blankline -- | Parses material enclosed between start and end parsers. -enclosed :: GenParser Char st t -- ^ start parser - -> GenParser Char st end -- ^ end parser - -> GenParser Char st a -- ^ content parser (to be used repeatedly) - -> GenParser Char st [a] -enclosed start end parser = try $ +enclosed :: Parser [Char] st t -- ^ start parser + -> Parser [Char] st end -- ^ end parser + -> Parser [Char] st a -- ^ content parser (to be used repeatedly) + -> Parser [Char] st [a] +enclosed start end parser = try $ start >> notFollowedBy space >> many1Till parser end -- | Parse string, case insensitive. -stringAnyCase :: [Char] -> CharParser st String +stringAnyCase :: [Char] -> Parser [Char] st String stringAnyCase [] = string "" stringAnyCase (x:xs) = do firstChar <- char (toUpper x) <|> char (toLower x) @@ -158,7 +254,7 @@ stringAnyCase (x:xs) = do return (firstChar:rest) -- | Parse contents of 'str' using 'parser' and return result. -parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a +parseFromString :: Parser [tok] st a -> [tok] -> Parser [tok] st a parseFromString parser str = do oldPos <- getPosition oldInput <- getInput @@ -169,8 +265,8 @@ parseFromString parser str = do return result -- | Parse raw line block up to and including blank lines. -lineClump :: GenParser Char st String -lineClump = blanklines +lineClump :: Parser [Char] st String +lineClump = blanklines <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines) -- | Parse a string of characters between an open character @@ -178,8 +274,8 @@ lineClump = blanklines -- pairs of open and close, which must be different. For example, -- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))" -- and return "hello (there)". -charsInBalanced :: Char -> Char -> GenParser Char st Char - -> GenParser Char st String +charsInBalanced :: Char -> Char -> Parser [Char] st Char + -> Parser [Char] st String charsInBalanced open close parser = try $ do char open let isDelim c = c == open || c == close @@ -204,13 +300,13 @@ uppercaseRomanDigits = map toUpper lowercaseRomanDigits -- | Parses a roman numeral (uppercase or lowercase), returns number. romanNumeral :: Bool -- ^ Uppercase if true - -> GenParser Char st Int + -> Parser [Char] st Int romanNumeral upperCase = do - let romanDigits = if upperCase - then uppercaseRomanDigits + let romanDigits = if upperCase + then uppercaseRomanDigits else lowercaseRomanDigits lookAhead $ oneOf romanDigits - let [one, five, ten, fifty, hundred, fivehundred, thousand] = + let [one, five, ten, fifty, hundred, fivehundred, thousand] = map char romanDigits thousands <- many thousand >>= (return . (1000 *) . length) ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900 @@ -234,14 +330,14 @@ romanNumeral upperCase = do -- Parsers for email addresses and URIs -emailChar :: GenParser Char st Char +emailChar :: Parser [Char] st Char emailChar = alphaNum <|> satisfy (\c -> c == '-' || c == '+' || c == '_' || c == '.') -domainChar :: GenParser Char st Char +domainChar :: Parser [Char] st Char domainChar = alphaNum <|> char '-' -domain :: GenParser Char st [Char] +domain :: Parser [Char] st [Char] domain = do first <- many1 domainChar dom <- many1 $ try (char '.' >> many1 domainChar ) @@ -249,7 +345,7 @@ domain = do -- | Parses an email address; returns original and corresponding -- escaped mailto: URI. -emailAddress :: GenParser Char st (String, String) +emailAddress :: Parser [Char] st (String, String) emailAddress = try $ do firstLetter <- alphaNum restAddr <- many emailChar @@ -260,7 +356,7 @@ emailAddress = try $ do return (full, escapeURI $ "mailto:" ++ full) -- | Parses a URI. Returns pair of original and URI-escaped version. -uri :: GenParser Char st (String, String) +uri :: Parser [Char] st (String, String) uri = try $ do let protocols = [ "http:", "https:", "ftp:", "file:", "mailto:", "news:", "telnet:" ] @@ -294,8 +390,8 @@ uri = try $ do -- displacement (the difference between the source column at the end -- and the source column at the beginning). Vertical displacement -- (source row) is ignored. -withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply - -> GenParser Char st (a, Int) -- ^ (result, displacement) +withHorizDisplacement :: Parser [Char] st a -- ^ Parser to apply + -> Parser [Char] st (a, Int) -- ^ (result, displacement) withHorizDisplacement parser = do pos1 <- getPosition result <- parser @@ -304,7 +400,7 @@ withHorizDisplacement parser = do -- | Applies a parser and returns the raw string that was parsed, -- along with the value produced by the parser. -withRaw :: GenParser Char st a -> GenParser Char st (a, [Char]) +withRaw :: Parser [Char] st a -> Parser [Char] st (a, [Char]) withRaw parser = do pos1 <- getPosition inp <- getInput @@ -321,26 +417,16 @@ withRaw parser = do -- | Parses a character and returns 'Null' (so that the parser can move on -- if it gets stuck). -nullBlock :: GenParser Char st Block +nullBlock :: Parser [Char] st Block nullBlock = anyChar >> return Null --- | Fail if reader is in strict markdown syntax mode. -failIfStrict :: GenParser a ParserState () -failIfStrict = do - state <- getState - if stateStrict state then fail "strict mode" else return () - --- | Fail unless we're in literate haskell mode. -failUnlessLHS :: GenParser tok ParserState () -failUnlessLHS = getState >>= guard . stateLiterateHaskell - -- | Parses backslash, then applies character parser. -escaped :: GenParser Char st Char -- ^ Parser for character to escape - -> GenParser Char st Char +escaped :: Parser [Char] st Char -- ^ Parser for character to escape + -> Parser [Char] st Char escaped parser = try $ char '\\' >> parser -- | Parse character entity. -characterReference :: GenParser Char st Char +characterReference :: Parser [Char] st Char characterReference = try $ do char '&' ent <- many1Till nonspaceChar (char ';') @@ -349,19 +435,19 @@ characterReference = try $ do Nothing -> fail "entity not found" -- | Parses an uppercase roman numeral and returns (UpperRoman, number). -upperRoman :: GenParser Char st (ListNumberStyle, Int) +upperRoman :: Parser [Char] st (ListNumberStyle, Int) upperRoman = do num <- romanNumeral True return (UpperRoman, num) -- | Parses a lowercase roman numeral and returns (LowerRoman, number). -lowerRoman :: GenParser Char st (ListNumberStyle, Int) +lowerRoman :: Parser [Char] st (ListNumberStyle, Int) lowerRoman = do num <- romanNumeral False return (LowerRoman, num) -- | Parses a decimal numeral and returns (Decimal, number). -decimal :: GenParser Char st (ListNumberStyle, Int) +decimal :: Parser [Char] st (ListNumberStyle, Int) decimal = do num <- many1 digit return (Decimal, read num) @@ -370,7 +456,7 @@ decimal = do -- returns (DefaultStyle, [next example number]). The next -- example number is incremented in parser state, and the label -- (if present) is added to the label table. -exampleNum :: GenParser Char ParserState (ListNumberStyle, Int) +exampleNum :: Parser [Char] ParserState (ListNumberStyle, Int) exampleNum = do char '@' lab <- many (alphaNum <|> satisfy (\c -> c == '_' || c == '-')) @@ -384,38 +470,38 @@ exampleNum = do return (Example, num) -- | Parses a '#' returns (DefaultStyle, 1). -defaultNum :: GenParser Char st (ListNumberStyle, Int) +defaultNum :: Parser [Char] st (ListNumberStyle, Int) defaultNum = do char '#' return (DefaultStyle, 1) -- | Parses a lowercase letter and returns (LowerAlpha, number). -lowerAlpha :: GenParser Char st (ListNumberStyle, Int) +lowerAlpha :: Parser [Char] st (ListNumberStyle, Int) lowerAlpha = do ch <- oneOf ['a'..'z'] return (LowerAlpha, ord ch - ord 'a' + 1) -- | Parses an uppercase letter and returns (UpperAlpha, number). -upperAlpha :: GenParser Char st (ListNumberStyle, Int) +upperAlpha :: Parser [Char] st (ListNumberStyle, Int) upperAlpha = do ch <- oneOf ['A'..'Z'] return (UpperAlpha, ord ch - ord 'A' + 1) -- | Parses a roman numeral i or I -romanOne :: GenParser Char st (ListNumberStyle, Int) +romanOne :: Parser [Char] st (ListNumberStyle, Int) romanOne = (char 'i' >> return (LowerRoman, 1)) <|> (char 'I' >> return (UpperRoman, 1)) -- | Parses an ordered list marker and returns list attributes. -anyOrderedListMarker :: GenParser Char ParserState ListAttributes -anyOrderedListMarker = choice $ +anyOrderedListMarker :: Parser [Char] ParserState ListAttributes +anyOrderedListMarker = choice $ [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens], numParser <- [decimal, exampleNum, defaultNum, romanOne, lowerAlpha, lowerRoman, upperAlpha, upperRoman]] -- | Parses a list number (num) followed by a period, returns list attributes. -inPeriod :: GenParser Char st (ListNumberStyle, Int) - -> GenParser Char st ListAttributes +inPeriod :: Parser [Char] st (ListNumberStyle, Int) + -> Parser [Char] st ListAttributes inPeriod num = try $ do (style, start) <- num char '.' @@ -423,18 +509,18 @@ inPeriod num = try $ do then DefaultDelim else Period return (start, style, delim) - + -- | Parses a list number (num) followed by a paren, returns list attributes. -inOneParen :: GenParser Char st (ListNumberStyle, Int) - -> GenParser Char st ListAttributes +inOneParen :: Parser [Char] st (ListNumberStyle, Int) + -> Parser [Char] st ListAttributes inOneParen num = try $ do (style, start) <- num char ')' return (start, style, OneParen) -- | Parses a list number (num) enclosed in parens, returns list attributes. -inTwoParens :: GenParser Char st (ListNumberStyle, Int) - -> GenParser Char st ListAttributes +inTwoParens :: Parser [Char] st (ListNumberStyle, Int) + -> Parser [Char] st ListAttributes inTwoParens num = try $ do char '(' (style, start) <- num @@ -443,9 +529,9 @@ inTwoParens num = try $ do -- | Parses an ordered list marker with a given style and delimiter, -- returns number. -orderedListMarker :: ListNumberStyle - -> ListNumberDelim - -> GenParser Char ParserState Int +orderedListMarker :: ListNumberStyle + -> ListNumberDelim + -> Parser [Char] ParserState Int orderedListMarker style delim = do let num = defaultNum <|> -- # can continue any kind of list case style of @@ -465,38 +551,34 @@ orderedListMarker style delim = do return start -- | Parses a character reference and returns a Str element. -charRef :: GenParser Char st Inline +charRef :: Parser [Char] st Inline charRef = do c <- characterReference return $ Str [c] -- | Parse a table using 'headerParser', 'rowParser', -- 'lineParser', and 'footerParser'. -tableWith :: GenParser Char ParserState ([[Block]], [Alignment], [Int]) - -> ([Int] -> GenParser Char ParserState [[Block]]) - -> GenParser Char ParserState sep - -> GenParser Char ParserState end - -> GenParser Char ParserState [Inline] - -> GenParser Char ParserState Block -tableWith headerParser rowParser lineParser footerParser captionParser = try $ do - caption' <- option [] captionParser +tableWith :: Parser [Char] ParserState ([[Block]], [Alignment], [Int]) + -> ([Int] -> Parser [Char] ParserState [[Block]]) + -> Parser [Char] ParserState sep + -> Parser [Char] ParserState end + -> Parser [Char] ParserState Block +tableWith headerParser rowParser lineParser footerParser = try $ do (heads, aligns, indices) <- headerParser - lines' <- rowParser indices `sepEndBy` lineParser + lines' <- rowParser indices `sepEndBy1` lineParser footerParser - caption <- if null caption' - then option [] captionParser - else return caption' - state <- getState - let numColumns = stateColumns state - let widths = widthsFromIndices numColumns indices - return $ Table caption aligns widths heads lines' + numColumns <- getOption readerColumns + let widths = if (indices == []) + then replicate (length aligns) 0.0 + else widthsFromIndices numColumns indices + return $ Table [] aligns widths heads lines' -- Calculate relative widths of table columns, based on indices widthsFromIndices :: Int -- Number of columns on terminal -> [Int] -- Indices -> [Double] -- Fractional relative sizes of columns -widthsFromIndices _ [] = [] -widthsFromIndices numColumns' indices = +widthsFromIndices _ [] = [] +widthsFromIndices numColumns' indices = let numColumns = max numColumns' (if null indices then 0 else last indices) lengths' = zipWith (-) indices (0:indices) lengths = reverse $ @@ -516,28 +598,30 @@ widthsFromIndices numColumns' indices = fracs = map (\l -> (fromIntegral l) / quotient) lengths in tail fracs +--- + -- Parse a grid table: starts with row of '-' on top, then header -- (which may be grid), then the rows, -- which may be grid, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). -gridTableWith :: GenParser Char ParserState Block -- ^ Block parser - -> GenParser Char ParserState [Inline] -- ^ Caption parser +gridTableWith :: Parser [Char] ParserState [Block] -- ^ Block list parser -> Bool -- ^ Headerless table - -> GenParser Char ParserState Block -gridTableWith block tableCaption headless = - tableWith (gridTableHeader headless block) (gridTableRow block) (gridTableSep '-') gridTableFooter tableCaption + -> Parser [Char] ParserState Block +gridTableWith blocks headless = + tableWith (gridTableHeader headless blocks) (gridTableRow blocks) + (gridTableSep '-') gridTableFooter gridTableSplitLine :: [Int] -> String -> [String] gridTableSplitLine indices line = map removeFinalBar $ tail $ splitStringByIndices (init indices) $ removeTrailingSpace line -gridPart :: Char -> GenParser Char st (Int, Int) +gridPart :: Char -> Parser [Char] st (Int, Int) gridPart ch = do dashes <- many1 (char ch) char '+' return (length dashes, length dashes + 1) -gridDashedLines :: Char -> GenParser Char st [(Int,Int)] +gridDashedLines :: Char -> Parser [Char] st [(Int,Int)] gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline removeFinalBar :: String -> String @@ -545,18 +629,18 @@ removeFinalBar = reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse -- | Separator between rows of grid table. -gridTableSep :: Char -> GenParser Char ParserState Char +gridTableSep :: Char -> Parser [Char] ParserState Char gridTableSep ch = try $ gridDashedLines ch >> return '\n' -- | Parse header for a grid table. gridTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block - -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) -gridTableHeader headless block = try $ do + -> Parser [Char] ParserState [Block] + -> Parser [Char] ParserState ([[Block]], [Alignment], [Int]) +gridTableHeader headless blocks = try $ do optional blanklines dashes <- gridDashedLines '-' rawContent <- if headless - then return $ repeat "" + then return $ repeat "" else many1 (notFollowedBy (gridTableSep '=') >> char '|' >> many1Till anyChar newline) @@ -571,25 +655,25 @@ gridTableHeader headless block = try $ do then replicate (length dashes) "" else map (intercalate " ") $ transpose $ map (gridTableSplitLine indices) rawContent - heads <- mapM (parseFromString $ many block) $ + heads <- mapM (parseFromString blocks) $ map removeLeadingTrailingSpace rawHeads return (heads, aligns, indices) -gridTableRawLine :: [Int] -> GenParser Char ParserState [String] +gridTableRawLine :: [Int] -> Parser [Char] ParserState [String] gridTableRawLine indices = do char '|' line <- many1Till anyChar newline return (gridTableSplitLine indices line) -- | Parse row of grid table. -gridTableRow :: GenParser Char ParserState Block +gridTableRow :: Parser [Char] ParserState [Block] -> [Int] - -> GenParser Char ParserState [[Block]] -gridTableRow block indices = do + -> Parser [Char] ParserState [[Block]] +gridTableRow blocks indices = do colLines <- many1 (gridTableRawLine indices) let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ transpose colLines - mapM (liftM compactifyCell . parseFromString (many block)) cols + mapM (liftM compactifyCell . parseFromString blocks) cols removeOneLeadingSpace :: [String] -> [String] removeOneLeadingSpace xs = @@ -603,23 +687,23 @@ compactifyCell :: [Block] -> [Block] compactifyCell bs = head $ compactify [bs] -- | Parse footer for a grid table. -gridTableFooter :: GenParser Char ParserState [Char] +gridTableFooter :: Parser [Char] ParserState [Char] gridTableFooter = blanklines --- -- | Parse a string with a given parser and state. -readWith :: GenParser t ParserState a -- ^ parser +readWith :: Parser [t] ParserState a -- ^ parser -> ParserState -- ^ initial state -> [t] -- ^ input -> a -readWith parser state input = +readWith parser state input = case runParser parser state "source" input of Left err' -> error $ "\nError:\n" ++ show err' Right result -> result -- | Parse a string with @parser@ (for testing). -testStringWith :: (Show a) => GenParser Char ParserState a +testStringWith :: (Show a) => Parser [Char] ParserState a -> String -> IO () testStringWith parser str = UTF8.putStrLn $ show $ @@ -627,72 +711,67 @@ testStringWith parser str = UTF8.putStrLn $ show $ -- | Parsing options. data ParserState = ParserState - { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX? + { stateOptions :: ReaderOptions, -- ^ User options stateParserContext :: ParserContext, -- ^ Inside list? stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? + stateAllowLinks :: Bool, -- ^ Allow parsing of links stateMaxNestingLevel :: Int, -- ^ Max # of nested Strong/Emph stateLastStrPos :: Maybe SourcePos, -- ^ Position after last str parsed - stateKeys :: KeyTable, -- ^ List of reference keys - stateCitations :: [String], -- ^ List of available citations - stateNotes :: NoteTable, -- ^ List of notes - stateTabStop :: Int, -- ^ Tab stop - stateStandalone :: Bool, -- ^ Parse bibliographic info? + stateKeys :: KeyTable, -- ^ List of reference keys (with fallbacks) + stateNotes :: NoteTable, -- ^ List of notes (raw bodies) + stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies) stateTitle :: [Inline], -- ^ Title of document stateAuthors :: [[Inline]], -- ^ Authors of document stateDate :: [Inline], -- ^ Date of document - stateStrict :: Bool, -- ^ Use strict markdown syntax? - stateSmart :: Bool, -- ^ Use smart typography? - stateOldDashes :: Bool, -- ^ Use pandoc <= 1.8.2.1 behavior - -- in parsing dashes; -- is em-dash; - -- before numeral is en-dash - stateLiterateHaskell :: Bool, -- ^ Treat input as literate haskell - stateColumns :: Int, -- ^ Number of columns in terminal stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used - stateIndentedCodeClasses :: [String], -- ^ Classes to use for indented code blocks stateNextExample :: Int, -- ^ Number of next example - stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers + stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers stateHasChapters :: Bool, -- ^ True if \chapter encountered - stateApplyMacros :: Bool, -- ^ Apply LaTeX macros? stateMacros :: [Macro], -- ^ List of macros defined so far stateRstDefaultRole :: String -- ^ Current rST default interpreted text role } - deriving Show + +instance Default ParserState where + def = defaultParserState defaultParserState :: ParserState -defaultParserState = - ParserState { stateParseRaw = False, +defaultParserState = + ParserState { stateOptions = def, stateParserContext = NullState, stateQuoteContext = NoQuote, + stateAllowLinks = True, stateMaxNestingLevel = 6, stateLastStrPos = Nothing, stateKeys = M.empty, - stateCitations = [], stateNotes = [], - stateTabStop = 4, - stateStandalone = False, + stateNotes' = [], stateTitle = [], stateAuthors = [], stateDate = [], - stateStrict = False, - stateSmart = False, - stateOldDashes = False, - stateLiterateHaskell = False, - stateColumns = 80, stateHeaderTable = [], - stateIndentedCodeClasses = [], stateNextExample = 1, stateExamples = M.empty, stateHasChapters = False, - stateApplyMacros = True, stateMacros = [], stateRstDefaultRole = "title-reference"} -data HeaderType +getOption :: (ReaderOptions -> a) -> Parser s ParserState a +getOption f = (f . stateOptions) `fmap` getState + +-- | Succeed only if the extension is enabled. +guardEnabled :: Extension -> Parser s ParserState () +guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext + +-- | Succeed only if the extension is disabled. +guardDisabled :: Extension -> Parser s ParserState () +guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext + +data HeaderType = SingleHeader Char -- ^ Single line of characters underneath | DoubleHeader Char -- ^ Lines of characters above and below deriving (Eq, Show) -data ParserContext +data ParserContext = ListItemState -- ^ Used when running parser on list item contents | NullState -- ^ Default state deriving (Eq, Show) @@ -705,51 +784,35 @@ data QuoteContext type NoteTable = [(String, String)] -newtype Key = Key [Inline] deriving (Show, Read, Eq, Ord) +type NoteTable' = [(String, F Blocks)] -- used in markdown reader -toKey :: [Inline] -> Key -toKey = Key . bottomUp lowercase - where lowercase :: Inline -> Inline - lowercase (Str xs) = Str (map toLower xs) - lowercase (Math t xs) = Math t (map toLower xs) - lowercase (Code attr xs) = Code attr (map toLower xs) - lowercase (RawInline f xs) = RawInline f (map toLower xs) - lowercase LineBreak = Space - lowercase x = x +newtype Key = Key String deriving (Show, Read, Eq, Ord) -fromKey :: Key -> [Inline] -fromKey (Key xs) = xs +toKey :: String -> Key +toKey = Key . map toLower . unwords . words type KeyTable = M.Map Key Target --- | Look up key in key table and return target object. -lookupKeySrc :: KeyTable -- ^ Key table - -> Key -- ^ Key - -> Maybe Target -lookupKeySrc table key = case M.lookup key table of - Nothing -> Nothing - Just src -> Just src - -- | Fail unless we're in "smart typography" mode. -failUnlessSmart :: GenParser tok ParserState () -failUnlessSmart = getState >>= guard . stateSmart +failUnlessSmart :: Parser [tok] ParserState () +failUnlessSmart = getOption readerSmart >>= guard -smartPunctuation :: GenParser Char ParserState Inline - -> GenParser Char ParserState Inline +smartPunctuation :: Parser [Char] ParserState Inline + -> Parser [Char] ParserState Inline smartPunctuation inlineParser = do failUnlessSmart choice [ quoted inlineParser, apostrophe, dash, ellipses ] -apostrophe :: GenParser Char ParserState Inline +apostrophe :: Parser [Char] ParserState Inline apostrophe = (char '\'' <|> char '\8217') >> return (Str "\x2019") -quoted :: GenParser Char ParserState Inline - -> GenParser Char ParserState Inline +quoted :: Parser [Char] ParserState Inline + -> Parser [Char] ParserState Inline quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser withQuoteContext :: QuoteContext - -> (GenParser Char ParserState Inline) - -> GenParser Char ParserState Inline + -> Parser [Char] ParserState a + -> Parser [Char] ParserState a withQuoteContext context parser = do oldState <- getState let oldQuoteContext = stateQuoteContext oldState @@ -759,39 +822,39 @@ withQuoteContext context parser = do setState newState { stateQuoteContext = oldQuoteContext } return result -singleQuoted :: GenParser Char ParserState Inline - -> GenParser Char ParserState Inline +singleQuoted :: Parser [Char] ParserState Inline + -> Parser [Char] ParserState Inline singleQuoted inlineParser = try $ do singleQuoteStart withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>= return . Quoted SingleQuote . normalizeSpaces -doubleQuoted :: GenParser Char ParserState Inline - -> GenParser Char ParserState Inline +doubleQuoted :: Parser [Char] ParserState Inline + -> Parser [Char] ParserState Inline doubleQuoted inlineParser = try $ do doubleQuoteStart withQuoteContext InDoubleQuote $ do contents <- manyTill inlineParser doubleQuoteEnd return . Quoted DoubleQuote . normalizeSpaces $ contents -failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState () +failIfInQuoteContext :: QuoteContext -> Parser [tok] ParserState () failIfInQuoteContext context = do st <- getState if stateQuoteContext st == context then fail "already inside quotes" else return () -charOrRef :: [Char] -> GenParser Char st Char +charOrRef :: [Char] -> Parser [Char] st Char charOrRef cs = oneOf cs <|> try (do c <- characterReference guard (c `elem` cs) return c) -updateLastStrPos :: GenParser Char ParserState () -updateLastStrPos = getPosition >>= \p -> +updateLastStrPos :: Parser [Char] ParserState () +updateLastStrPos = getPosition >>= \p -> updateState $ \s -> s{ stateLastStrPos = Just p } -singleQuoteStart :: GenParser Char ParserState () +singleQuoteStart :: Parser [Char] ParserState () singleQuoteStart = do failIfInQuoteContext InSingleQuote pos <- getPosition @@ -802,61 +865,61 @@ singleQuoteStart = do notFollowedBy (oneOf ")!],;:-? \t\n") notFollowedBy (char '.') <|> lookAhead (string "..." >> return ()) notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >> - satisfy (not . isAlphaNum))) + satisfy (not . isAlphaNum))) -- possess/contraction return () -singleQuoteEnd :: GenParser Char st () +singleQuoteEnd :: Parser [Char] st () singleQuoteEnd = try $ do charOrRef "'\8217\146" notFollowedBy alphaNum -doubleQuoteStart :: GenParser Char ParserState () +doubleQuoteStart :: Parser [Char] ParserState () doubleQuoteStart = do failIfInQuoteContext InDoubleQuote try $ do charOrRef "\"\8220\147" notFollowedBy (satisfy (\c -> c == ' ' || c == '\t' || c == '\n')) -doubleQuoteEnd :: GenParser Char st () +doubleQuoteEnd :: Parser [Char] st () doubleQuoteEnd = do charOrRef "\"\8221\148" return () -ellipses :: GenParser Char st Inline +ellipses :: Parser [Char] st Inline ellipses = do try (charOrRef "\8230\133") <|> try (string "..." >> return '…') return (Str "\8230") -dash :: GenParser Char ParserState Inline +dash :: Parser [Char] ParserState Inline dash = do - oldDashes <- stateOldDashes `fmap` getState + oldDashes <- getOption readerOldDashes if oldDashes then emDashOld <|> enDashOld else Str `fmap` (hyphenDash <|> emDash <|> enDash) -- Two hyphens = en-dash, three = em-dash -hyphenDash :: GenParser Char st String +hyphenDash :: Parser [Char] st String hyphenDash = do try $ string "--" option "\8211" (char '-' >> return "\8212") -emDash :: GenParser Char st String +emDash :: Parser [Char] st String emDash = do try (charOrRef "\8212\151") return "\8212" -enDash :: GenParser Char st String +enDash :: Parser [Char] st String enDash = do try (charOrRef "\8212\151") return "\8211" -enDashOld :: GenParser Char st Inline +enDashOld :: Parser [Char] st Inline enDashOld = do try (charOrRef "\8211\150") <|> try (char '-' >> lookAhead (satisfy isDigit) >> return '–') return (Str "\8211") -emDashOld :: GenParser Char st Inline +emDashOld :: Parser [Char] st Inline emDashOld = do try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-') return (Str "\8212") @@ -866,24 +929,24 @@ emDashOld = do -- -- | Parse a \newcommand or \renewcommand macro definition. -macro :: GenParser Char ParserState Block +macro :: Parser [Char] ParserState Block macro = do - apply <- stateApplyMacros `fmap` getState + apply <- getOption readerApplyMacros inp <- getInput case parseMacroDefinitions inp of - ([], _) -> pzero - (ms, rest) -> do def <- count (length inp - length rest) anyChar + ([], _) -> mzero + (ms, rest) -> do def' <- count (length inp - length rest) anyChar if apply then do updateState $ \st -> st { stateMacros = ms ++ stateMacros st } return Null - else return $ RawBlock "latex" def + else return $ RawBlock "latex" def' -- | Apply current macros to string. -applyMacros' :: String -> GenParser Char ParserState String +applyMacros' :: String -> Parser [Char] ParserState String applyMacros' target = do - apply <- liftM stateApplyMacros getState + apply <- getOption readerApplyMacros if apply then do macros <- liftM stateMacros getState return $ applyMacros macros target diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index 0372dbe5d..211fdf20e 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111(-1)307 USA {- | Module : Text.Pandoc.Pretty Copyright : Copyright (C) 2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -202,18 +202,17 @@ chomp d = Doc (fromList dl') outp :: (IsString a, Monoid a) => Int -> String -> DocState a -outp off s | off <= 0 = do +outp off s | off < 0 = do -- offset < 0 means newline characters st' <- get let rawpref = prefix st' when (column st' == 0 && usePrefix st' && not (null rawpref)) $ do let pref = reverse $ dropWhile isSpace $ reverse rawpref modify $ \st -> st{ output = fromString pref : output st , column = column st + realLength pref } - when (off < 0) $ do - modify $ \st -> st { output = fromString s : output st - , column = 0 - , newlines = newlines st + 1 } -outp off s = do + modify $ \st -> st { output = fromString s : output st + , column = 0 + , newlines = newlines st + 1 } +outp off s = do -- offset >= 0 (0 might be combining char) st' <- get let pref = prefix st' when (column st' == 0 && usePrefix st' && not (null pref)) $ do diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 62f7c61a0..685fa1ee4 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -1,6 +1,6 @@ module Text.Pandoc.Readers.DocBook ( readDocBook ) where import Data.Char (toUpper, isDigit) -import Text.Pandoc.Parsing (ParserState(..)) +import Text.Pandoc.Options import Text.Pandoc.Definition import Text.Pandoc.Builder import Text.XML.Light @@ -455,13 +455,13 @@ List of all DocBook tags, with [x] indicating implemented, [x] tocfront - An entry in a table of contents for a front matter component [x] toclevel1 - A top-level entry within a table of contents entry for a chapter-like component -[x] toclevel2 - A second-level entry within a table of contents entry for a +[x] toclevel2 - A second-level entry within a table of contents entry for a chapter-like component -[x] toclevel3 - A third-level entry within a table of contents entry for a +[x] toclevel3 - A third-level entry within a table of contents entry for a chapter-like component -[x] toclevel4 - A fourth-level entry within a table of contents entry for a +[x] toclevel4 - A fourth-level entry within a table of contents entry for a chapter-like component -[x] toclevel5 - A fifth-level entry within a table of contents entry for a +[x] toclevel5 - A fifth-level entry within a table of contents entry for a chapter-like component [x] tocpart - An entry in a table of contents for a part of a book [ ] token - A unit of information @@ -503,7 +503,7 @@ data DBState = DBState{ dbSectionLevel :: Int , dbBook :: Bool } deriving Show -readDocBook :: ParserState -> String -> Pandoc +readDocBook :: ReaderOptions -> String -> Pandoc readDocBook _ inp = setTitle (dbDocTitle st') $ setAuthors (dbDocAuthors st') $ setDate (dbDocDate st') @@ -574,7 +574,7 @@ addToStart toadd bs = (Para xs : rest) -> para (toadd <> fromList xs) <> fromList rest _ -> bs --- function that is used by both mediaobject (in parseBlock) +-- function that is used by both mediaobject (in parseBlock) -- and inlinemediaobject (in parseInline) getImage :: Element -> DB Inlines getImage e = do diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 536bddd39..e5c310ffc 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -19,10 +19,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.HTML Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha + Stability : alpha Portability : portable Conversion of HTML to 'Pandoc' document. @@ -36,18 +36,17 @@ module Text.Pandoc.Readers.HTML ( readHtml , isCommentTag ) where -import Text.ParserCombinators.Parsec -import Text.ParserCombinators.Parsec.Pos import Text.HTML.TagSoup import Text.HTML.TagSoup.Match import Text.Pandoc.Definition import Text.Pandoc.Builder (text, toList) import Text.Pandoc.Shared +import Text.Pandoc.Options import Text.Pandoc.Parsing import Data.Maybe ( fromMaybe, isJust ) import Data.List ( intercalate ) -import Data.Char ( isDigit, toLower ) -import Control.Monad ( liftM, guard, when ) +import Data.Char ( isDigit ) +import Control.Monad ( liftM, guard, when, mzero ) isSpace :: Char -> Bool isSpace ' ' = True @@ -56,11 +55,11 @@ isSpace '\n' = True isSpace _ = False -- | Convert HTML-formatted string to 'Pandoc' document. -readHtml :: ParserState -- ^ Parser state +readHtml :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assumes @'\n'@ line endings) -> Pandoc -readHtml st inp = Pandoc meta blocks - where blocks = readWith parseBody st rest +readHtml opts inp = Pandoc meta blocks + where blocks = readWith parseBody def{ stateOptions = opts } rest tags = canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } inp hasHeader = any (~== TagOpen "head" []) tags @@ -68,7 +67,7 @@ readHtml st inp = Pandoc meta blocks then parseHeader tags else (Meta [] [] [], tags) -type TagParser = GenParser (Tag String) ParserState +type TagParser = Parser [Tag String] ParserState -- TODO - fix this - not every header has a title tag parseHeader :: [Tag String] -> (Meta, [Tag String]) @@ -96,18 +95,6 @@ block = choice , pRawHtmlBlock ] --- repeated in SelfContained -- consolidate eventually -renderTags' :: [Tag String] -> String -renderTags' = renderTagsOptions - renderOptions{ optMinimize = \x -> - let y = map toLower x - in y == "hr" || y == "br" || - y == "img" || y == "meta" || - y == "link" - , optRawTag = \x -> - let y = map toLower x - in y == "script" || y == "style" } - pList :: TagParser [Block] pList = pBulletList <|> pOrderedList <|> pDefinitionList @@ -126,25 +113,22 @@ pBulletList = try $ do pOrderedList :: TagParser [Block] pOrderedList = try $ do TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" []) - st <- getState - let (start, style) = if stateStrict st - then (1, DefaultStyle) - else (sta', sty') - where sta = fromMaybe "1" $ - lookup "start" attribs - sta' = if all isDigit sta - then read sta - else 1 - sty = fromMaybe (fromMaybe "" $ - lookup "style" attribs) $ - lookup "class" attribs - sty' = case sty of - "lower-roman" -> LowerRoman - "upper-roman" -> UpperRoman - "lower-alpha" -> LowerAlpha - "upper-alpha" -> UpperAlpha - "decimal" -> Decimal - _ -> DefaultStyle + let (start, style) = (sta', sty') + where sta = fromMaybe "1" $ + lookup "start" attribs + sta' = if all isDigit sta + then read sta + else 1 + sty = fromMaybe (fromMaybe "" $ + lookup "style" attribs) $ + lookup "class" attribs + sty' = case sty of + "lower-roman" -> LowerRoman + "upper-roman" -> UpperRoman + "lower-alpha" -> LowerAlpha + "upper-alpha" -> UpperAlpha + "decimal" -> Decimal + _ -> DefaultStyle let nonItem = pSatisfy (\t -> not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) && not (t ~== TagClose "ol")) @@ -196,8 +180,8 @@ pRawTag = do pRawHtmlBlock :: TagParser [Block] pRawHtmlBlock = do raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag - state <- getState - if stateParseRaw state && not (null raw) + parseRaw <- getOption readerParseRaw + if parseRaw && not (null raw) then return [RawBlock "html" raw] else return [] @@ -235,7 +219,7 @@ pSimpleTable = try $ do rows <- pOptInTag "tbody" $ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td") skipMany pBlank - TagClose _ <- pSatisfy (~== TagClose "table") + TagClose _ <- pSatisfy (~== TagClose "table") let cols = maximum $ map length rows let aligns = replicate cols AlignLeft let widths = replicate cols 0 @@ -281,10 +265,7 @@ pCodeBlock = try $ do let attribsId = fromMaybe "" $ lookup "id" attr let attribsClasses = words $ fromMaybe "" $ lookup "class" attr let attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr - st <- getState - let attribs = if stateStrict st - then ("",[],[]) - else (attribsId, attribsClasses, attribsKV) + let attribs = (attribsId, attribsClasses, attribsKV) return [CodeBlock attribs result] inline :: TagParser [Inline] @@ -310,7 +291,7 @@ pLocation = do pSat :: (Tag String -> Bool) -> TagParser (Tag String) pSat f = do pos <- getPosition - token show (const pos) (\x -> if f x then Just x else Nothing) + token show (const pos) (\x -> if f x then Just x else Nothing) pSatisfy :: (Tag String -> Bool) -> TagParser (Tag String) pSatisfy f = try $ optional pLocation >> pSat f @@ -332,14 +313,13 @@ pStrong :: TagParser [Inline] pStrong = pInlinesInTags "strong" Strong <|> pInlinesInTags "b" Strong pSuperscript :: TagParser [Inline] -pSuperscript = failIfStrict >> pInlinesInTags "sup" Superscript +pSuperscript = pInlinesInTags "sup" Superscript pSubscript :: TagParser [Inline] -pSubscript = failIfStrict >> pInlinesInTags "sub" Subscript +pSubscript = pInlinesInTags "sub" Subscript pStrikeout :: TagParser [Inline] pStrikeout = do - failIfStrict pInlinesInTags "s" Strikeout <|> pInlinesInTags "strike" Strikeout <|> pInlinesInTags "del" Strikeout <|> @@ -381,8 +361,8 @@ pCode = try $ do pRawHtmlInline :: TagParser [Inline] pRawHtmlInline = do result <- pSatisfy (tagComment (const True)) <|> pSatisfy isInlineTag - state <- getState - if stateParseRaw state + parseRaw <- getOption readerParseRaw + if parseRaw then return [RawInline "html" $ renderTags' [result]] else return [] @@ -417,7 +397,7 @@ pCloses tagtype = try $ do (TagClose "ul") | tagtype == "li" -> return () (TagClose "ol") | tagtype == "li" -> return () (TagClose "dl") | tagtype == "li" -> return () - _ -> pzero + _ -> mzero pTagText :: TagParser [Inline] pTagText = try $ do @@ -432,11 +412,11 @@ pBlank = try $ do (TagText str) <- pSatisfy isTagText guard $ all isSpace str -pTagContents :: GenParser Char ParserState Inline +pTagContents :: Parser [Char] ParserState Inline pTagContents = pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol <|> pBad -pStr :: GenParser Char ParserState Inline +pStr :: Parser [Char] ParserState Inline pStr = do result <- many1 $ satisfy $ \c -> not (isSpace c) && not (isSpecial c) && not (isBad c) @@ -455,13 +435,13 @@ isSpecial '\8220' = True isSpecial '\8221' = True isSpecial _ = False -pSymbol :: GenParser Char ParserState Inline +pSymbol :: Parser [Char] ParserState Inline pSymbol = satisfy isSpecial >>= return . Str . (:[]) isBad :: Char -> Bool isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML -pBad :: GenParser Char ParserState Inline +pBad :: Parser [Char] ParserState Inline pBad = do c <- satisfy isBad let c' = case c of @@ -495,7 +475,7 @@ pBad = do _ -> '?' return $ Str [c'] -pSpace :: GenParser Char ParserState Inline +pSpace :: Parser [Char] ParserState Inline pSpace = many1 (satisfy isSpace) >> return Space -- @@ -593,20 +573,19 @@ _ `closes` _ = False --- parsers for use in markdown, textile readers -- | Matches a stretch of HTML in balanced tags. -htmlInBalanced :: (Tag String -> Bool) -> GenParser Char ParserState String +htmlInBalanced :: (Tag String -> Bool) -> Parser [Char] ParserState String htmlInBalanced f = try $ do (TagOpen t _, tag) <- htmlTag f guard $ '/' `notElem` tag -- not a self-closing tag - let nonTagChunk = many1 $ satisfy (/= '<') let stopper = htmlTag (~== TagClose t) let anytag = liftM snd $ htmlTag (const True) contents <- many $ notFollowedBy' stopper >> - (nonTagChunk <|> htmlInBalanced (const True) <|> anytag) + (htmlInBalanced f <|> anytag <|> count 1 anyChar) endtag <- liftM snd stopper return $ tag ++ concat contents ++ endtag -- | Matches a tag meeting a certain condition. -htmlTag :: (Tag String -> Bool) -> GenParser Char ParserState (Tag String, String) +htmlTag :: (Tag String -> Bool) -> Parser [Char] ParserState (Tag String, String) htmlTag f = try $ do lookAhead (char '<') (next : _) <- getInput >>= return . canonicalizeTags . parseTags @@ -617,7 +596,7 @@ htmlTag f = try $ do count (length s + 4) anyChar skipMany (satisfy (/='>')) char '>' - return (next, "<!--" ++ s ++ "-->") + return (next, "<!--" ++ s ++ "-->") _ -> do rendered <- manyTill anyChar (char '>') return (next, rendered ++ ">") diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 3178945e4..4a5a14d6a 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -33,10 +33,10 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX, handleIncludes ) where -import Text.ParserCombinators.Parsec hiding ((<|>), space, many, optional) import Text.Pandoc.Definition import Text.Pandoc.Shared -import Text.Pandoc.Parsing +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding ((<|>), many, optional, space) import qualified Text.Pandoc.UTF8 as UTF8 import Data.Char ( chr, ord ) import Control.Monad @@ -47,12 +47,13 @@ import Data.Monoid import System.FilePath (replaceExtension) import Data.List (intercalate) import qualified Data.Map as M +import qualified Control.Exception as E -- | Parse LaTeX from string and return 'Pandoc' document. -readLaTeX :: ParserState -- ^ Parser state, including options for parser +readLaTeX :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assumes @'\n'@ line endings) -> Pandoc -readLaTeX = readWith parseLaTeX +readLaTeX opts = readWith parseLaTeX def{ stateOptions = opts } parseLaTeX :: LP Pandoc parseLaTeX = do @@ -64,7 +65,7 @@ parseLaTeX = do let date' = stateDate st return $ Pandoc (Meta title' authors' date') $ toList bs -type LP = GenParser Char ParserState +type LP = Parser [Char] ParserState anyControlSeq :: LP String anyControlSeq = do @@ -186,7 +187,7 @@ inline = (mempty <$ comment) <|> (mathInline $ char '$' *> mathChars <* char '$') <|> (superscript <$> (char '^' *> tok)) <|> (subscript <$> (char '_' *> tok)) - <|> (failUnlessLHS *> char '|' *> doLHSverb) + <|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb) <|> (str <$> count 1 tildeEscape) <|> (str <$> string "]") <|> (str <$> string "#") -- TODO print warning? @@ -230,14 +231,14 @@ ignoreInlines name = (name, doraw <|> (mempty <$ optargs)) where optargs = skipopts *> skipMany (try $ optional sp *> braced) contseq = '\\':name doraw = (rawInline "latex" . (contseq ++) . snd) <$> - (getState >>= guard . stateParseRaw >> (withRaw optargs)) + (getOption readerParseRaw >>= guard >> (withRaw optargs)) ignoreBlocks :: String -> (String, LP Blocks) ignoreBlocks name = (name, doraw <|> (mempty <$ optargs)) where optargs = skipopts *> skipMany (try $ optional sp *> braced) contseq = '\\':name doraw = (rawBlock "latex" . (contseq ++) . snd) <$> - (getState >>= guard . stateParseRaw >> (withRaw optargs)) + (getOption readerParseRaw >>= guard >> (withRaw optargs)) blockCommands :: M.Map String (LP Blocks) blockCommands = M.fromList $ @@ -321,7 +322,7 @@ inlineCommand :: LP Inlines inlineCommand = try $ do name <- anyControlSeq guard $ not $ isBlockCommand name - parseRaw <- stateParseRaw `fmap` getState + parseRaw <- getOption readerParseRaw star <- option "" (string "*") let name' = name ++ star let rawargs = withRaw (skipopts *> option "" dimenarg @@ -336,7 +337,7 @@ inlineCommand = try $ do Nothing -> raw unlessParseRaw :: LP () -unlessParseRaw = getState >>= guard . not . stateParseRaw +unlessParseRaw = getOption readerParseRaw >>= guard . not isBlockCommand :: String -> Bool isBlockCommand s = maybe False (const True) $ M.lookup s blockCommands @@ -660,7 +661,7 @@ environment = do rawEnv :: String -> LP Blocks rawEnv name = do let addBegin x = "\\begin{" ++ name ++ "}" ++ x - parseRaw <- stateParseRaw `fmap` getState + parseRaw <- getOption readerParseRaw if parseRaw then (rawBlock "latex" . addBegin) <$> (withRaw (env name blocks) >>= applyMacros' . snd) @@ -671,8 +672,9 @@ handleIncludes :: String -> IO String handleIncludes [] = return [] handleIncludes ('\\':xs) = case runParser include defaultParserState "input" ('\\':xs) of - Right (fs, rest) -> do let getfile f = catch (UTF8.readFile f) - (\_ -> return "") + Right (fs, rest) -> do let getfile f = E.catch (UTF8.readFile f) + (\e -> let _ = (e :: E.SomeException) + in return "") yss <- mapM getfile fs (intercalate "\n" yss ++) `fmap` handleIncludes rest @@ -713,10 +715,10 @@ verbatimEnv = do rest <- getInput return (r,rest) -rawLaTeXBlock :: GenParser Char ParserState String +rawLaTeXBlock :: Parser [Char] ParserState String rawLaTeXBlock = snd <$> withRaw (environment <|> blockCommand) -rawLaTeXInline :: GenParser Char ParserState Inline +rawLaTeXInline :: Parser [Char] ParserState Inline rawLaTeXInline = do (res, raw) <- withRaw inlineCommand if res == mempty @@ -735,7 +737,7 @@ environments = M.fromList , ("itemize", bulletList <$> listenv "itemize" (many item)) , ("description", definitionList <$> listenv "description" (many descItem)) , ("enumerate", ordered_list) - , ("code", failUnlessLHS *> + , ("code", guardEnabled Ext_literate_haskell *> (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> verbEnv "code")) , ("verbatim", codeBlock <$> (verbEnv "verbatim")) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 51a727996..2407e137c 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,4 +1,6 @@ {-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7 +{-# LANGUAGE FlexibleInstances, TypeSynonymInstances, + GeneralizedNewtypeDeriving #-} {- Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> @@ -20,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Markdown Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -33,26 +35,34 @@ module Text.Pandoc.Readers.Markdown ( readMarkdown ) where import Data.List ( transpose, sortBy, findIndex, intercalate ) import qualified Data.Map as M import Data.Ord ( comparing ) -import Data.Char ( isAlphaNum ) +import Data.Char ( isAlphaNum, toLower ) import Data.Maybe import Text.Pandoc.Definition -import Text.Pandoc.Generic -import Text.Pandoc.Shared -import Text.Pandoc.Parsing +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) +import Text.Pandoc.Options +import Text.Pandoc.Shared hiding (compactify) +import Text.Pandoc.Parsing hiding (tableWith) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag, isCommentTag ) import Text.Pandoc.XML ( fromEntities ) -import Text.ParserCombinators.Parsec -import Control.Monad (when, liftM, guard, mzero) +import Data.Monoid (mconcat, mempty) +import Control.Applicative ((<$>), (<*), (*>), (<$)) +import Control.Monad import Text.HTML.TagSoup import Text.HTML.TagSoup.Match (tagOpen) +import qualified Data.Set as Set -- | Read markdown from an input string and return a Pandoc document. -readMarkdown :: ParserState -- ^ Parser state, including options for parser - -> String -- ^ String to parse (assuming @'\n'@ line endings) +readMarkdown :: ReaderOptions -- ^ Reader options + -> String -- ^ String to parse (assuming @'\n'@ line endings) -> Pandoc -readMarkdown state s = (readWith parseMarkdown) state (s ++ "\n\n") +readMarkdown opts s = + (readWith parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n") + +trimInlinesF :: F Inlines -> F Inlines +trimInlinesF = liftM trimInlines -- -- Constants and data structure definitions @@ -70,7 +80,7 @@ isHruleChar '-' = True isHruleChar '_' = True isHruleChar _ = False -setextHChars :: [Char] +setextHChars :: String setextHChars = "=-" isBlank :: Char -> Bool @@ -83,71 +93,72 @@ isBlank _ = False -- auxiliary functions -- -indentSpaces :: GenParser Char ParserState [Char] +isNull :: F Inlines -> Bool +isNull ils = B.isNull $ runF ils def + +spnl :: Parser [Char] st () +spnl = try $ do + skipSpaces + optional newline + skipSpaces + notFollowedBy (char '\n') + +indentSpaces :: Parser [Char] ParserState String indentSpaces = try $ do - state <- getState - let tabStop = stateTabStop state + tabStop <- getOption readerTabStop count tabStop (char ' ') <|> string "\t" <?> "indentation" -nonindentSpaces :: GenParser Char ParserState [Char] +nonindentSpaces :: Parser [Char] ParserState String nonindentSpaces = do - state <- getState - let tabStop = stateTabStop state + tabStop <- getOption readerTabStop sps <- many (char ' ') - if length sps < tabStop + if length sps < tabStop then return sps else unexpected "indented line" -skipNonindentSpaces :: GenParser Char ParserState () +skipNonindentSpaces :: Parser [Char] ParserState () skipNonindentSpaces = do - state <- getState - atMostSpaces (stateTabStop state - 1) + tabStop <- getOption readerTabStop + atMostSpaces (tabStop - 1) -atMostSpaces :: Int -> GenParser Char ParserState () +atMostSpaces :: Int -> Parser [Char] ParserState () atMostSpaces 0 = notFollowedBy (char ' ') atMostSpaces n = (char ' ' >> atMostSpaces (n-1)) <|> return () -litChar :: GenParser Char ParserState Char +litChar :: Parser [Char] ParserState Char litChar = escapedChar' <|> noneOf "\n" <|> (newline >> notFollowedBy blankline >> return ' ') --- | Fail unless we're at beginning of a line. -failUnlessBeginningOfLine :: GenParser tok st () -failUnlessBeginningOfLine = do - pos <- getPosition - if sourceColumn pos == 1 then return () else fail "not beginning of line" - -- | Parse a sequence of inline elements between square brackets, -- including inlines between balanced pairs of square brackets. -inlinesInBalancedBrackets :: GenParser Char ParserState Inline - -> GenParser Char ParserState [Inline] -inlinesInBalancedBrackets parser = try $ do +inlinesInBalancedBrackets :: Parser [Char] ParserState (F Inlines) +inlinesInBalancedBrackets = try $ do char '[' - result <- manyTill ( (do lookAhead $ try $ do (Str res) <- parser - guard (res == "[") - bal <- inlinesInBalancedBrackets parser - return $ [Str "["] ++ bal ++ [Str "]"]) - <|> (count 1 parser)) + result <- manyTill ( (do lookAhead $ try $ do x <- inline + guard (runF x def == B.str "[") + bal <- inlinesInBalancedBrackets + return $ (\x -> B.str "[" <> x <> B.str "]") <$> bal) + <|> inline) (char ']') - return $ concat result + return $ mconcat result -- -- document structure -- -titleLine :: GenParser Char ParserState [Inline] +titleLine :: Parser [Char] ParserState (F Inlines) titleLine = try $ do char '%' skipSpaces res <- many $ (notFollowedBy newline >> inline) <|> try (endline >> whitespace) newline - return $ normalizeSpaces res + return $ trimInlinesF $ mconcat res -authorsLine :: GenParser Char ParserState [[Inline]] -authorsLine = try $ do +authorsLine :: Parser [Char] ParserState (F [Inlines]) +authorsLine = try $ do char '%' skipSpaces authors <- sepEndBy (many (notFollowedBy (satisfy $ \c -> @@ -155,67 +166,63 @@ authorsLine = try $ do (char ';' <|> try (newline >> notFollowedBy blankline >> spaceChar)) newline - return $ filter (not . null) $ map normalizeSpaces authors + return $ sequence $ filter (not . isNull) $ map (trimInlinesF . mconcat) authors -dateLine :: GenParser Char ParserState [Inline] +dateLine :: Parser [Char] ParserState (F Inlines) dateLine = try $ do char '%' skipSpaces - date <- manyTill inline newline - return $ normalizeSpaces date - -titleBlock :: GenParser Char ParserState ([Inline], [[Inline]], [Inline]) -titleBlock = try $ do - failIfStrict - title <- option [] titleLine - author <- option [] authorsLine - date <- option [] dateLine + trimInlinesF . mconcat <$> manyTill inline newline + +titleBlock :: Parser [Char] ParserState (F Inlines, F [Inlines], F Inlines) +titleBlock = pandocTitleBlock <|> mmdTitleBlock + +pandocTitleBlock :: Parser [Char] ParserState (F Inlines, F [Inlines], F Inlines) +pandocTitleBlock = try $ do + guardEnabled Ext_pandoc_title_block + title <- option mempty titleLine + author <- option (return []) authorsLine + date <- option mempty dateLine optional blanklines return (title, author, date) -parseMarkdown :: GenParser Char ParserState Pandoc +mmdTitleBlock :: Parser [Char] ParserState (F Inlines, F [Inlines], F Inlines) +mmdTitleBlock = try $ do + guardEnabled Ext_mmd_title_block + kvPairs <- many1 kvPair + blanklines + let title = maybe mempty return $ lookup "title" kvPairs + let author = maybe mempty (\x -> return [x]) $ lookup "author" kvPairs + let date = maybe mempty return $ lookup "date" kvPairs + return (title, author, date) + +kvPair :: Parser [Char] ParserState (String, Inlines) +kvPair = try $ do + key <- many1Till (alphaNum <|> oneOf "_- ") (char ':') + val <- manyTill anyChar + (try $ newline >> lookAhead (blankline <|> nonspaceChar)) + let key' = concat $ words $ map toLower key + let val' = trimInlines $ B.text val + return (key',val') + +parseMarkdown :: Parser [Char] ParserState Pandoc parseMarkdown = do -- markdown allows raw HTML - updateState (\state -> state { stateParseRaw = True }) - startPos <- getPosition - -- go through once just to get list of reference keys and notes - -- docMinusKeys is the raw document with blanks where the keys/notes were... - st <- getState - let firstPassParser = referenceKey - <|> (if stateStrict st then pzero else noteBlock) - <|> liftM snd (withRaw codeBlockDelimited) - <|> lineClump - docMinusKeys <- liftM concat $ manyTill firstPassParser eof - setInput docMinusKeys - setPosition startPos - st' <- getState - let reversedNotes = stateNotes st' - updateState $ \s -> s { stateNotes = reverse reversedNotes } - -- now parse it for real... - (title, author, date) <- option ([],[],[]) titleBlock + updateState $ \state -> state { stateOptions = + let oldOpts = stateOptions state in + oldOpts{ readerParseRaw = True } } + (title, authors, date) <- option (mempty,return [],mempty) titleBlock blocks <- parseBlocks - let doc = Pandoc (Meta title author date) $ filter (/= Null) blocks - -- if there are labeled examples, change references into numbers - examples <- liftM stateExamples getState - let handleExampleRef :: Inline -> Inline - handleExampleRef z@(Str ('@':xs)) = - case M.lookup xs examples of - Just n -> Str (show n) - Nothing -> z - handleExampleRef z = z - if M.null examples - then return doc - else return $ bottomUp handleExampleRef doc - --- --- initial pass for references and notes --- + st <- getState + return $ B.setTitle (runF title st) + $ B.setAuthors (runF authors st) + $ B.setDate (runF date st) + $ B.doc $ runF blocks st -referenceKey :: GenParser Char ParserState [Char] +referenceKey :: Parser [Char] ParserState (F Blocks) referenceKey = try $ do - startPos <- getPosition skipNonindentSpaces - lab <- reference + (_,raw) <- reference char ':' skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[') let sourceURL = liftM unwords $ many $ try $ do @@ -223,22 +230,20 @@ referenceKey = try $ do skipMany spaceChar optional $ newline >> notFollowedBy blankline skipMany spaceChar - notFollowedBy' reference + notFollowedBy' (() <$ reference) many1 $ escapedChar' <|> satisfy (not . isBlank) let betweenAngles = try $ char '<' >> manyTill (escapedChar' <|> litChar) (char '>') src <- try betweenAngles <|> sourceURL tit <- option "" referenceTitle blanklines - endPos <- getPosition let target = (escapeURI $ removeTrailingSpace src, tit) st <- getState let oldkeys = stateKeys st - updateState $ \s -> s { stateKeys = M.insert (toKey lab) target oldkeys } - -- return blanks so line count isn't affected - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' + updateState $ \s -> s { stateKeys = M.insert (toKey raw) target oldkeys } + return $ return mempty -referenceTitle :: GenParser Char ParserState String +referenceTitle :: Parser [Char] ParserState String referenceTitle = try $ do skipSpaces >> optional newline >> skipSpaces tit <- (charsInBalanced '(' ')' litChar >>= return . unwords . words) @@ -247,25 +252,38 @@ referenceTitle = try $ do notFollowedBy (noneOf ")\n"))) return $ fromEntities tit -noteMarker :: GenParser Char ParserState [Char] +-- | PHP Markdown Extra style abbreviation key. Currently +-- we just skip them, since Pandoc doesn't have an element for +-- an abbreviation. +abbrevKey :: Parser [Char] ParserState (F Blocks) +abbrevKey = do + guardEnabled Ext_abbreviations + try $ do + char '*' + reference + char ':' + skipMany (satisfy (/= '\n')) + blanklines + return $ return mempty + +noteMarker :: Parser [Char] ParserState String noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']') -rawLine :: GenParser Char ParserState [Char] +rawLine :: Parser [Char] ParserState String rawLine = try $ do notFollowedBy blankline notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker optional indentSpaces anyLine -rawLines :: GenParser Char ParserState [Char] +rawLines :: Parser [Char] ParserState String rawLines = do first <- anyLine rest <- many rawLine return $ unlines (first:rest) -noteBlock :: GenParser Char ParserState [Char] +noteBlock :: Parser [Char] ParserState (F Blocks) noteBlock = try $ do - startPos <- getPosition skipNonindentSpaces ref <- noteMarker char ':' @@ -275,87 +293,75 @@ noteBlock = try $ do (try (blankline >> indentSpaces >> notFollowedBy blankline)) optional blanklines - endPos <- getPosition - let newnote = (ref, (intercalate "\n" raw) ++ "\n\n") - st <- getState - let oldnotes = stateNotes st - updateState $ \s -> s { stateNotes = newnote : oldnotes } - -- return blanks so line count isn't affected - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' + parsed <- parseFromString parseBlocks $ unlines raw ++ "\n" + let newnote = (ref, parsed) + updateState $ \s -> s { stateNotes' = newnote : stateNotes' s } + return mempty -- -- parsing blocks -- -parseBlocks :: GenParser Char ParserState [Block] -parseBlocks = manyTill block eof - -block :: GenParser Char ParserState Block -block = do - st <- getState - choice (if stateStrict st - then [ header - , codeBlockIndented - , blockQuote - , hrule - , bulletList - , orderedList - , htmlBlock - , para - , plain - , nullBlock ] - else [ codeBlockDelimited - , macro - , header - , table - , codeBlockIndented - , lhsCodeBlock - , blockQuote - , hrule - , bulletList - , orderedList - , definitionList - , rawTeXBlock - , para - , rawHtmlBlocks - , plain - , nullBlock ]) <?> "block" +parseBlocks :: Parser [Char] ParserState (F Blocks) +parseBlocks = mconcat <$> manyTill block eof + +block :: Parser [Char] ParserState (F Blocks) +block = choice [ codeBlockFenced + , codeBlockBackticks + , guardEnabled Ext_latex_macros *> (mempty <$ macro) + , header + , rawTeXBlock + , htmlBlock + , table + , codeBlockIndented + , lhsCodeBlock + , blockQuote + , hrule + , bulletList + , orderedList + , definitionList + , noteBlock + , referenceKey + , abbrevKey + , para + , plain + ] <?> "block" -- -- header blocks -- -header :: GenParser Char ParserState Block +header :: Parser [Char] ParserState (F Blocks) header = setextHeader <|> atxHeader <?> "header" -atxHeader :: GenParser Char ParserState Block +atxHeader :: Parser [Char] ParserState (F Blocks) atxHeader = try $ do level <- many1 (char '#') >>= return . length notFollowedBy (char '.' <|> char ')') -- this would be a list skipSpaces - text <- manyTill inline atxClosing >>= return . normalizeSpaces - return $ Header level text + text <- trimInlinesF . mconcat <$> manyTill inline atxClosing + return $ B.header level <$> text -atxClosing :: GenParser Char st [Char] +atxClosing :: Parser [Char] st String atxClosing = try $ skipMany (char '#') >> blanklines -setextHeader :: GenParser Char ParserState Block +setextHeader :: Parser [Char] ParserState (F Blocks) setextHeader = try $ do -- This lookahead prevents us from wasting time parsing Inlines -- unless necessary -- it gives a significant performance boost. lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline - text <- many1Till inline newline + text <- trimInlinesF . mconcat <$> many1Till inline newline underlineChar <- oneOf setextHChars many (char underlineChar) blanklines let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1 - return $ Header level (normalizeSpaces text) + return $ B.header level <$> text -- -- hrule block -- -hrule :: GenParser Char st Block +hrule :: Parser [Char] st (F Blocks) hrule = try $ do skipSpaces start <- satisfy isHruleChar @@ -363,32 +369,26 @@ hrule = try $ do skipMany (spaceChar <|> char start) newline optional blanklines - return HorizontalRule + return $ return B.horizontalRule -- -- code blocks -- -indentedLine :: GenParser Char ParserState [Char] +indentedLine :: Parser [Char] ParserState String indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n") blockDelimiter :: (Char -> Bool) -> Maybe Int - -> GenParser Char st (Int, (String, [String], [(String, String)]), Char) + -> Parser [Char] st Int blockDelimiter f len = try $ do c <- lookAhead (satisfy f) - size <- case len of - Just l -> count l (char c) >> many (char c) >> return l - Nothing -> count 3 (char c) >> many (char c) >>= - return . (+ 3) . length - many spaceChar - attr <- option ([],[],[]) - $ attributes -- ~~~ {.ruby} - <|> (many1 alphaNum >>= \x -> return ([],[x],[])) -- github variant ```ruby - blankline - return (size, attr, c) + case len of + Just l -> count l (char c) >> many (char c) >> return l + Nothing -> count 3 (char c) >> many (char c) >>= + return . (+ 3) . length -attributes :: GenParser Char st ([Char], [[Char]], [([Char], [Char])]) +attributes :: Parser [Char] st (String, [String], [(String, String)]) attributes = try $ do char '{' spnl @@ -400,28 +400,28 @@ attributes = try $ do | otherwise = firstNonNull xs return (firstNonNull $ reverse ids, concat classes, concat keyvals) -attribute :: GenParser Char st ([Char], [[Char]], [([Char], [Char])]) +attribute :: Parser [Char] st (String, [String], [(String, String)]) attribute = identifierAttr <|> classAttr <|> keyValAttr -identifier :: GenParser Char st [Char] +identifier :: Parser [Char] st String identifier = do first <- letter rest <- many $ alphaNum <|> oneOf "-_:." return (first:rest) -identifierAttr :: GenParser Char st ([Char], [a], [a1]) +identifierAttr :: Parser [Char] st (String, [a], [a1]) identifierAttr = try $ do char '#' result <- identifier return (result,[],[]) -classAttr :: GenParser Char st ([Char], [[Char]], [a]) +classAttr :: Parser [Char] st (String, [String], [a]) classAttr = try $ do char '.' result <- identifier return ("",[result],[]) -keyValAttr :: GenParser Char st ([Char], [a], [([Char], [Char])]) +keyValAttr :: Parser [Char] st (String, [a], [(String, String)]) keyValAttr = try $ do key <- identifier char '=' @@ -430,33 +430,49 @@ keyValAttr = try $ do <|> many nonspaceChar return ("",[],[(key,val)]) -codeBlockDelimited :: GenParser Char st Block -codeBlockDelimited = try $ do - (size, attr, c) <- blockDelimiter (\c -> c == '~' || c == '`') Nothing - contents <- manyTill anyLine (blockDelimiter (== c) (Just size)) +codeBlockFenced :: Parser [Char] ParserState (F Blocks) +codeBlockFenced = try $ do + guardEnabled Ext_fenced_code_blocks + size <- blockDelimiter (=='~') Nothing + skipMany spaceChar + attr <- option ([],[],[]) $ + guardEnabled Ext_fenced_code_attributes >> attributes + blankline + contents <- manyTill anyLine (blockDelimiter (=='~') (Just size)) + blanklines + return $ return $ B.codeBlockWith attr $ intercalate "\n" contents + +codeBlockBackticks :: Parser [Char] ParserState (F Blocks) +codeBlockBackticks = try $ do + guardEnabled Ext_backtick_code_blocks + blockDelimiter (=='`') (Just 3) + skipMany spaceChar + cls <- many1 alphaNum + blankline + contents <- manyTill anyLine $ blockDelimiter (=='`') (Just 3) blanklines - return $ CodeBlock attr $ intercalate "\n" contents + return $ return $ B.codeBlockWith ("",[cls],[]) $ intercalate "\n" contents -codeBlockIndented :: GenParser Char ParserState Block +codeBlockIndented :: Parser [Char] ParserState (F Blocks) codeBlockIndented = do - contents <- many1 (indentedLine <|> + contents <- many1 (indentedLine <|> try (do b <- blanklines l <- indentedLine return $ b ++ l)) optional blanklines - st <- getState - return $ CodeBlock ("", stateIndentedCodeClasses st, []) $ + classes <- getOption readerIndentedCodeClasses + return $ return $ B.codeBlockWith ("", classes, []) $ stripTrailingNewlines $ concat contents -lhsCodeBlock :: GenParser Char ParserState Block +lhsCodeBlock :: Parser [Char] ParserState (F Blocks) lhsCodeBlock = do - failUnlessLHS - liftM (CodeBlock ("",["sourceCode","literate","haskell"],[])) - (lhsCodeBlockBird <|> lhsCodeBlockLaTeX) - <|> liftM (CodeBlock ("",["sourceCode","haskell"],[])) - lhsCodeBlockInverseBird + guardEnabled Ext_literate_haskell + (return . B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> + (lhsCodeBlockBird <|> lhsCodeBlockLaTeX)) + <|> (return . B.codeBlockWith ("",["sourceCode","haskell"],[]) <$> + lhsCodeBlockInverseBird) -lhsCodeBlockLaTeX :: GenParser Char ParserState String +lhsCodeBlockLaTeX :: Parser [Char] ParserState String lhsCodeBlockLaTeX = try $ do string "\\begin{code}" manyTill spaceChar newline @@ -464,13 +480,13 @@ lhsCodeBlockLaTeX = try $ do blanklines return $ stripTrailingNewlines contents -lhsCodeBlockBird :: GenParser Char ParserState String +lhsCodeBlockBird :: Parser [Char] ParserState String lhsCodeBlockBird = lhsCodeBlockBirdWith '>' -lhsCodeBlockInverseBird :: GenParser Char ParserState String +lhsCodeBlockInverseBird :: Parser [Char] ParserState String lhsCodeBlockInverseBird = lhsCodeBlockBirdWith '<' -lhsCodeBlockBirdWith :: Char -> GenParser Char ParserState String +lhsCodeBlockBirdWith :: Char -> Parser [Char] ParserState String lhsCodeBlockBirdWith c = try $ do pos <- getPosition when (sourceColumn pos /= 1) $ fail "Not in first column" @@ -482,25 +498,24 @@ lhsCodeBlockBirdWith c = try $ do blanklines return $ intercalate "\n" lns' -birdTrackLine :: Char -> GenParser Char st [Char] +birdTrackLine :: Char -> Parser [Char] st String birdTrackLine c = try $ do char c -- allow html tags on left margin: when (c == '<') $ notFollowedBy letter manyTill anyChar newline - -- -- block quotes -- -emailBlockQuoteStart :: GenParser Char ParserState Char +emailBlockQuoteStart :: Parser [Char] ParserState Char emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' >>~ optional (char ' ') -emailBlockQuote :: GenParser Char ParserState [[Char]] +emailBlockQuote :: Parser [Char] ParserState [String] emailBlockQuote = try $ do emailBlockQuoteStart - raw <- sepBy (many (nonEndline <|> + raw <- sepBy (many (nonEndline <|> (try (endline >> notFollowedBy emailBlockQuoteStart >> return '\n')))) (try (newline >> emailBlockQuoteStart)) @@ -508,51 +523,50 @@ emailBlockQuote = try $ do optional blanklines return raw -blockQuote :: GenParser Char ParserState Block -blockQuote = do +blockQuote :: Parser [Char] ParserState (F Blocks) +blockQuote = do raw <- emailBlockQuote -- parse the extracted block, which may contain various block elements: contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n" - return $ BlockQuote contents - + return $ B.blockQuote <$> contents + -- -- list blocks -- -bulletListStart :: GenParser Char ParserState () +bulletListStart :: Parser [Char] ParserState () bulletListStart = try $ do optional newline -- if preceded by a Plain block in a list context skipNonindentSpaces - notFollowedBy' hrule -- because hrules start out just like lists + notFollowedBy' (() <$ hrule) -- because hrules start out just like lists satisfy isBulletListMarker spaceChar skipSpaces -anyOrderedListStart :: GenParser Char ParserState (Int, ListNumberStyle, ListNumberDelim) +anyOrderedListStart :: Parser [Char] ParserState (Int, ListNumberStyle, ListNumberDelim) anyOrderedListStart = try $ do optional newline -- if preceded by a Plain block in a list context skipNonindentSpaces notFollowedBy $ string "p." >> spaceChar >> digit -- page number - state <- getState - if stateStrict state - then do many1 digit - char '.' - spaceChar - return (1, DefaultStyle, DefaultDelim) - else do (num, style, delim) <- anyOrderedListMarker - -- if it could be an abbreviated first name, insist on more than one space - if delim == Period && (style == UpperAlpha || (style == UpperRoman && - num `elem` [1, 5, 10, 50, 100, 500, 1000])) - then char '\t' <|> (try $ char ' ' >> spaceChar) - else spaceChar - skipSpaces - return (num, style, delim) - -listStart :: GenParser Char ParserState () + (guardDisabled Ext_fancy_lists >> + do many1 digit + char '.' + spaceChar + return (1, DefaultStyle, DefaultDelim)) + <|> do (num, style, delim) <- anyOrderedListMarker + -- if it could be an abbreviated first name, insist on more than one space + if delim == Period && (style == UpperAlpha || (style == UpperRoman && + num `elem` [1, 5, 10, 50, 100, 500, 1000])) + then char '\t' <|> (try $ char ' ' >> spaceChar) + else spaceChar + skipSpaces + return (num, style, delim) + +listStart :: Parser [Char] ParserState () listStart = bulletListStart <|> (anyOrderedListStart >> return ()) -- parse a line of a list item (start = parser for beginning of list item) -listLine :: GenParser Char ParserState [Char] +listLine :: Parser [Char] ParserState String listLine = try $ do notFollowedBy blankline notFollowedBy' (do indentSpaces @@ -562,8 +576,8 @@ listLine = try $ do return $ concat chunks ++ "\n" -- parse raw text for one list item, excluding start marker and continuations -rawListItem :: GenParser Char ParserState a - -> GenParser Char ParserState [Char] +rawListItem :: Parser [Char] ParserState a + -> Parser [Char] ParserState String rawListItem start = try $ do start first <- listLine @@ -571,17 +585,17 @@ rawListItem start = try $ do blanks <- many blankline return $ concat (first:rest) ++ blanks --- continuation of a list item - indented and separated by blankline +-- continuation of a list item - indented and separated by blankline -- or (in compact lists) endline. -- note: nested lists are parsed as continuations -listContinuation :: GenParser Char ParserState [Char] +listContinuation :: Parser [Char] ParserState String listContinuation = try $ do lookAhead indentSpaces result <- many1 listContinuationLine blanks <- many blankline return $ concat result ++ blanks -listContinuationLine :: GenParser Char ParserState [Char] +listContinuationLine :: Parser [Char] ParserState String listContinuationLine = try $ do notFollowedBy blankline notFollowedBy' listStart @@ -589,8 +603,8 @@ listContinuationLine = try $ do result <- manyTill anyChar newline return $ result ++ "\n" -listItem :: GenParser Char ParserState a - -> GenParser Char ParserState [Block] +listItem :: Parser [Char] ParserState a + -> Parser [Char] ParserState (F Blocks) listItem start = try $ do first <- rawListItem start continuations <- many listContinuation @@ -606,38 +620,59 @@ listItem start = try $ do updateState (\st -> st {stateParserContext = oldContext}) return contents -orderedList :: GenParser Char ParserState Block +orderedList :: Parser [Char] ParserState (F Blocks) orderedList = try $ do (start, style, delim) <- lookAhead anyOrderedListStart - items <- many1 $ listItem $ try $ - do optional newline -- if preceded by a Plain block in a list context - skipNonindentSpaces - orderedListMarker style delim - return $ OrderedList (start, style, delim) $ compactify items - -bulletList :: GenParser Char ParserState Block -bulletList = - many1 (listItem bulletListStart) >>= return . BulletList . compactify + unless ((style == DefaultStyle || style == Decimal || style == Example) && + (delim == DefaultDelim || delim == Period)) $ + guardEnabled Ext_fancy_lists + when (style == Example) $ guardEnabled Ext_example_lists + items <- fmap sequence $ many1 $ listItem + ( try $ do + optional newline -- if preceded by Plain block in a list + skipNonindentSpaces + orderedListMarker style delim ) + start' <- option 1 $ guardEnabled Ext_startnum >> return start + return $ B.orderedListWith (start', style, delim) <$> fmap compactify items + +-- | Change final list item from @Para@ to @Plain@ if the list contains +-- no other @Para@ blocks. (From Shared, modified for Blocks rather than [Block].) +compactify :: [Blocks] -- ^ List of list items (each a list of blocks) + -> [Blocks] +compactify [] = [] +compactify items = + let (others, final) = (init items, last items) + in case reverse (B.toList final) of + (Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of + -- if this is only Para, change to Plain + [_] -> others ++ [B.fromList (reverse $ Plain a : xs)] + _ -> items + _ -> items + +bulletList :: Parser [Char] ParserState (F Blocks) +bulletList = do + items <- fmap sequence $ many1 $ listItem bulletListStart + return $ B.bulletList <$> fmap compactify items -- definition lists -defListMarker :: GenParser Char ParserState () +defListMarker :: Parser [Char] ParserState () defListMarker = do sps <- nonindentSpaces char ':' <|> char '~' - st <- getState - let tabStop = stateTabStop st + tabStop <- getOption readerTabStop let remaining = tabStop - (length sps + 1) if remaining > 0 then count remaining (char ' ') <|> string "\t" - else pzero + else mzero return () -definitionListItem :: GenParser Char ParserState ([Inline], [[Block]]) +definitionListItem :: Parser [Char] ParserState (F (Inlines, [Blocks])) definitionListItem = try $ do + guardEnabled Ext_definition_lists -- first, see if this has any chance of being a definition list: lookAhead (anyLine >> optional blankline >> defListMarker) - term <- manyTill inline newline + term <- trimInlinesF . mconcat <$> manyTill inline newline optional blankline raw <- many1 defRawBlock state <- getState @@ -645,9 +680,9 @@ definitionListItem = try $ do -- parse the extracted block, which may contain various block elements: contents <- mapM (parseFromString parseBlocks) raw updateState (\st -> st {stateParserContext = oldContext}) - return ((normalizeSpaces term), contents) + return $ liftM2 (,) term (sequence contents) -defRawBlock :: GenParser Char ParserState [Char] +defRawBlock :: Parser [Char] ParserState String defRawBlock = try $ do defListMarker firstline <- anyLine @@ -659,119 +694,149 @@ defRawBlock = try $ do return $ unlines lns ++ trl return $ firstline ++ "\n" ++ unlines rawlines ++ trailing ++ cont -definitionList :: GenParser Char ParserState Block +definitionList :: Parser [Char] ParserState (F Blocks) definitionList = do - items <- many1 definitionListItem - -- "compactify" the definition list: - let defs = map snd items - let defBlocks = reverse $ concat $ concat defs - let isPara (Para _) = True + items <- fmap sequence $ many1 definitionListItem + return $ B.definitionList <$> fmap compactifyDL items + +compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])] +compactifyDL items = + let defs = concatMap snd items + defBlocks = reverse $ concatMap B.toList defs + isPara (Para _) = True isPara _ = False - let items' = case take 1 defBlocks of - [Para x] -> if not $ any isPara (drop 1 defBlocks) - then let (t,ds) = last items - lastDef = last ds - ds' = init ds ++ - [init lastDef ++ [Plain x]] - in init items ++ [(t, ds')] - else items - _ -> items - return $ DefinitionList items' + in case defBlocks of + (Para x:_) -> if not $ any isPara (drop 1 defBlocks) + then let (t,ds) = last items + lastDef = B.toList $ last ds + ds' = init ds ++ + [B.fromList $ init lastDef ++ [Plain x]] + in init items ++ [(t, ds')] + else items + _ -> items -- -- paragraph block -- +{- isHtmlOrBlank :: Inline -> Bool isHtmlOrBlank (RawInline "html" _) = True isHtmlOrBlank (Space) = True isHtmlOrBlank (LineBreak) = True isHtmlOrBlank _ = False +-} -para :: GenParser Char ParserState Block -para = try $ do - result <- liftM normalizeSpaces $ many1 inline - guard $ not . all isHtmlOrBlank $ result - option (Plain result) $ try $ do +para :: Parser [Char] ParserState (F Blocks) +para = try $ do + result <- trimInlinesF . mconcat <$> many1 inline + -- TODO remove this if not really needed? and remove isHtmlOrBlank + -- guard $ not $ F.all isHtmlOrBlank result + option (B.plain <$> result) $ try $ do newline - blanklines <|> - (getState >>= guard . stateStrict >> - lookAhead (blockQuote <|> header) >> return "") - return $ Para result + (blanklines >> return mempty) + <|> (guardDisabled Ext_blank_before_blockquote >> lookAhead blockQuote) + <|> (guardDisabled Ext_blank_before_header >> lookAhead header) + return $ B.para <$> result -plain :: GenParser Char ParserState Block -plain = many1 inline >>~ spaces >>= return . Plain . normalizeSpaces +plain :: Parser [Char] ParserState (F Blocks) +plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline <* spaces --- +-- -- raw html -- -htmlElement :: GenParser Char ParserState [Char] +htmlElement :: Parser [Char] ParserState String htmlElement = strictHtmlBlock <|> liftM snd (htmlTag isBlockTag) -htmlBlock :: GenParser Char ParserState Block -htmlBlock = try $ do - failUnlessBeginningOfLine +htmlBlock :: Parser [Char] ParserState (F Blocks) +htmlBlock = do + guardEnabled Ext_raw_html + res <- (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks) + <|> htmlBlock' + return $ return $ B.rawBlock "html" res + +htmlBlock' :: Parser [Char] ParserState String +htmlBlock' = try $ do first <- htmlElement finalSpace <- many spaceChar finalNewlines <- many newline - return $ RawBlock "html" $ first ++ finalSpace ++ finalNewlines + return $ first ++ finalSpace ++ finalNewlines -strictHtmlBlock :: GenParser Char ParserState [Char] -strictHtmlBlock = do - failUnlessBeginningOfLine - htmlInBalanced (not . isInlineTag) +strictHtmlBlock :: Parser [Char] ParserState String +strictHtmlBlock = htmlInBalanced (not . isInlineTag) -rawVerbatimBlock :: GenParser Char ParserState String +rawVerbatimBlock :: Parser [Char] ParserState String rawVerbatimBlock = try $ do (TagOpen tag _, open) <- htmlTag (tagOpen (\t -> - t == "pre" || t == "style" || t == "script") - (const True)) + t == "pre" || t == "style" || t == "script") + (const True)) contents <- manyTill anyChar (htmlTag (~== TagClose tag)) return $ open ++ contents ++ renderTags [TagClose tag] -rawTeXBlock :: GenParser Char ParserState Block +rawTeXBlock :: Parser [Char] ParserState (F Blocks) rawTeXBlock = do - failIfStrict - result <- liftM (RawBlock "latex") rawLaTeXBlock - <|> liftM (RawBlock "context") rawConTeXtEnvironment + guardEnabled Ext_raw_tex + result <- (B.rawBlock "latex" <$> rawLaTeXBlock) + <|> (B.rawBlock "context" <$> rawConTeXtEnvironment) spaces - return result + return $ return result -rawHtmlBlocks :: GenParser Char ParserState Block +rawHtmlBlocks :: Parser [Char] ParserState String rawHtmlBlocks = do - htmlBlocks <- many1 $ do blk <- rawVerbatimBlock <|> - liftM snd (htmlTag isBlockTag) - sps <- do sp1 <- many spaceChar - sp2 <- option "" (blankline >> return "\n") - sp3 <- many spaceChar - sp4 <- option "" blanklines - return $ sp1 ++ sp2 ++ sp3 ++ sp4 - -- note: we want raw html to be able to - -- precede a code block, when separated - -- by a blank line - return $ blk ++ sps + htmlBlocks <- many1 $ try $ do + s <- rawVerbatimBlock <|> try ( + do (t,raw) <- htmlTag isBlockTag + exts <- getOption readerExtensions + -- if open tag, need markdown="1" if + -- markdown_attributes extension is set + case t of + TagOpen _ as + | Ext_markdown_attribute `Set.member` + exts -> + if "markdown" `notElem` + map fst as + then mzero + else return $ + stripMarkdownAttribute raw + | otherwise -> return raw + _ -> return raw ) + sps <- do sp1 <- many spaceChar + sp2 <- option "" (blankline >> return "\n") + sp3 <- many spaceChar + sp4 <- option "" blanklines + return $ sp1 ++ sp2 ++ sp3 ++ sp4 + -- note: we want raw html to be able to + -- precede a code block, when separated + -- by a blank line + return $ s ++ sps let combined = concat htmlBlocks - let combined' = if last combined == '\n' then init combined else combined - return $ RawBlock "html" combined' + return $ if last combined == '\n' then init combined else combined + +-- remove markdown="1" attribute +stripMarkdownAttribute :: String -> String +stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s + where filterAttrib (TagOpen t as) = TagOpen t + [(k,v) | (k,v) <- as, k /= "markdown"] + filterAttrib x = x -- -- Tables --- +-- -- Parse a dashed line with optional trailing spaces; return its length -- and the length including trailing space. -dashedLine :: Char - -> GenParser Char st (Int, Int) +dashedLine :: Char + -> Parser [Char] st (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many spaceChar return $ (length dashes, length $ dashes ++ sp) --- Parse a table header with dashed lines of '-' preceded by +-- Parse a table header with dashed lines of '-' preceded by -- one (or zero) line of text. -simpleTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) +simpleTableHeader :: Bool -- ^ Headerless table + -> Parser [Char] ParserState (F [Blocks], [Alignment], [Int]) simpleTableHeader headless = try $ do rawContent <- if headless then return "" @@ -784,84 +849,104 @@ simpleTableHeader headless = try $ do -- If no header, calculate alignment on basis of first row of text rawHeads <- liftM (tail . splitStringByIndices (init indices)) $ if headless - then lookAhead anyLine + then lookAhead anyLine else return rawContent let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths let rawHeads' = if headless then replicate (length dashes) "" - else rawHeads - heads <- mapM (parseFromString (many plain)) $ - map removeLeadingTrailingSpace rawHeads' + else rawHeads + heads <- fmap sequence + $ mapM (parseFromString (mconcat <$> many plain)) + $ map removeLeadingTrailingSpace rawHeads' return (heads, aligns, indices) +-- Returns an alignment type for a table, based on a list of strings +-- (the rows of the column header) and a number (the length of the +-- dashed line under the rows. +alignType :: [String] + -> Int + -> Alignment +alignType [] _ = AlignDefault +alignType strLst len = + let nonempties = filter (not . null) $ map removeTrailingSpace strLst + (leftSpace, rightSpace) = + case sortBy (comparing length) nonempties of + (x:_) -> (head x `elem` " \t", length x < len) + [] -> (False, False) + in case (leftSpace, rightSpace) of + (True, False) -> AlignRight + (False, True) -> AlignLeft + (True, True) -> AlignCenter + (False, False) -> AlignDefault + -- Parse a table footer - dashed lines followed by blank line. -tableFooter :: GenParser Char ParserState [Char] +tableFooter :: Parser [Char] ParserState String tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines -- Parse a table separator - dashed line. -tableSep :: GenParser Char ParserState Char +tableSep :: Parser [Char] ParserState Char tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> char '\n' -- Parse a raw line and split it into chunks by indices. rawTableLine :: [Int] - -> GenParser Char ParserState [String] + -> Parser [Char] ParserState [String] rawTableLine indices = do notFollowedBy' (blanklines <|> tableFooter) line <- many1Till anyChar newline - return $ map removeLeadingTrailingSpace $ tail $ + return $ map removeLeadingTrailingSpace $ tail $ splitStringByIndices (init indices) line -- Parse a table line and return a list of lists of blocks (columns). tableLine :: [Int] - -> GenParser Char ParserState [[Block]] -tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain)) + -> Parser [Char] ParserState (F [Blocks]) +tableLine indices = rawTableLine indices >>= + fmap sequence . mapM (parseFromString (mconcat <$> many plain)) -- Parse a multiline table row and return a list of blocks (columns). multilineRow :: [Int] - -> GenParser Char ParserState [[Block]] + -> Parser [Char] ParserState (F [Blocks]) multilineRow indices = do colLines <- many1 (rawTableLine indices) let cols = map unlines $ transpose colLines - mapM (parseFromString (many plain)) cols + fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) cols -- Parses a table caption: inlines beginning with 'Table:' -- and followed by blank lines. -tableCaption :: GenParser Char ParserState [Inline] +tableCaption :: Parser [Char] ParserState (F Inlines) tableCaption = try $ do + guardEnabled Ext_table_captions skipNonindentSpaces string ":" <|> string "Table:" - result <- many1 inline - blanklines - return $ normalizeSpaces result + trimInlinesF . mconcat <$> many1 inline <* blanklines -- Parse a simple table with '---' header and one line per row. simpleTable :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block + -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]]) simpleTable headless = do - Table c a _w h l <- tableWith (simpleTableHeader headless) tableLine + (aligns, _widths, heads', lines') <- + tableWith (simpleTableHeader headless) tableLine (return ()) (if headless then tableFooter else tableFooter <|> blanklines) - tableCaption -- Simple tables get 0s for relative column widths (i.e., use default) - return $ Table c a (replicate (length a) 0) h l + return (aligns, replicate (length aligns) 0, heads', lines') -- Parse a multiline table: starts with row of '-' on top, then header -- (which may be multiline), then the rows, -- which may be multiline, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). multilineTable :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block + -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]]) multilineTable headless = - tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter tableCaption + tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter multilineTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) + -> Parser [Char] ParserState (F [Blocks], [Alignment], [Int]) multilineTableHeader headless = try $ do if headless then return '\n' else tableSep >>~ notFollowedBy blankline rawContent <- if headless - then return $ repeat "" + then return $ repeat "" else many1 (notFollowedBy tableSep >> many1Till anyChar newline) initSp <- nonindentSpaces @@ -872,54 +957,206 @@ multilineTableHeader headless = try $ do rawHeadsList <- if headless then liftM (map (:[]) . tail . splitStringByIndices (init indices)) $ lookAhead anyLine - else return $ transpose $ map + else return $ transpose $ map (\ln -> tail $ splitStringByIndices (init indices) ln) rawContent let aligns = zipWith alignType rawHeadsList lengths let rawHeads = if headless then replicate (length dashes) "" else map (intercalate " ") rawHeadsList - heads <- mapM (parseFromString (many plain)) $ + heads <- fmap sequence $ + mapM (parseFromString (mconcat <$> many plain)) $ map removeLeadingTrailingSpace rawHeads return (heads, aligns, indices) --- Returns an alignment type for a table, based on a list of strings --- (the rows of the column header) and a number (the length of the --- dashed line under the rows. -alignType :: [String] - -> Int - -> Alignment -alignType [] _ = AlignDefault -alignType strLst len = - let nonempties = filter (not . null) $ map removeTrailingSpace strLst - (leftSpace, rightSpace) = - case sortBy (comparing length) nonempties of - (x:_) -> (head x `elem` " \t", length x < len) - [] -> (False, False) - in case (leftSpace, rightSpace) of - (True, False) -> AlignRight - (False, True) -> AlignLeft - (True, True) -> AlignCenter - (False, False) -> AlignDefault - +-- Parse a grid table: starts with row of '-' on top, then header +-- (which may be grid), then the rows, +-- which may be grid, separated by blank lines, and +-- ending with a footer (dashed line followed by blank line). gridTable :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block -gridTable = gridTableWith block tableCaption + -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]]) +gridTable headless = + tableWith (gridTableHeader headless) gridTableRow + (gridTableSep '-') gridTableFooter + +gridTableSplitLine :: [Int] -> String -> [String] +gridTableSplitLine indices line = map removeFinalBar $ tail $ + splitStringByIndices (init indices) $ removeTrailingSpace line + +gridPart :: Char -> Parser [Char] st (Int, Int) +gridPart ch = do + dashes <- many1 (char ch) + char '+' + return (length dashes, length dashes + 1) -table :: GenParser Char ParserState Block -table = multilineTable False <|> simpleTable True <|> - simpleTable False <|> multilineTable True <|> - gridTable False <|> gridTable True <?> "table" +gridDashedLines :: Char -> Parser [Char] st [(Int,Int)] +gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline + +removeFinalBar :: String -> String +removeFinalBar = + reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse + +-- | Separator between rows of grid table. +gridTableSep :: Char -> Parser [Char] ParserState Char +gridTableSep ch = try $ gridDashedLines ch >> return '\n' + +-- | Parse header for a grid table. +gridTableHeader :: Bool -- ^ Headerless table + -> Parser [Char] ParserState (F [Blocks], [Alignment], [Int]) +gridTableHeader headless = try $ do + optional blanklines + dashes <- gridDashedLines '-' + rawContent <- if headless + then return $ repeat "" + else many1 + (notFollowedBy (gridTableSep '=') >> char '|' >> + many1Till anyChar newline) + if headless + then return () + else gridTableSep '=' >> return () + let lines' = map snd dashes + let indices = scanl (+) 0 lines' + let aligns = replicate (length lines') AlignDefault + -- RST does not have a notion of alignments + let rawHeads = if headless + then replicate (length dashes) "" + else map (intercalate " ") $ transpose + $ map (gridTableSplitLine indices) rawContent + heads <- fmap sequence $ mapM (parseFromString block) $ + map removeLeadingTrailingSpace rawHeads + return (heads, aligns, indices) --- +gridTableRawLine :: [Int] -> Parser [Char] ParserState [String] +gridTableRawLine indices = do + char '|' + line <- many1Till anyChar newline + return (gridTableSplitLine indices line) + +-- | Parse row of grid table. +gridTableRow :: [Int] + -> Parser [Char] ParserState (F [Blocks]) +gridTableRow indices = do + colLines <- many1 (gridTableRawLine indices) + let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ + transpose colLines + fmap compactify <$> fmap sequence (mapM (parseFromString block) cols) + +removeOneLeadingSpace :: [String] -> [String] +removeOneLeadingSpace xs = + if all startsWithSpace xs + then map (drop 1) xs + else xs + where startsWithSpace "" = True + startsWithSpace (y:_) = y == ' ' + +-- | Parse footer for a grid table. +gridTableFooter :: Parser [Char] ParserState [Char] +gridTableFooter = blanklines + +pipeTable :: Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]]) +pipeTable = try $ do + let pipeBreak = nonindentSpaces *> optional (char '|') *> + pipeTableHeaderPart `sepBy1` sepPipe <* + optional (char '|') <* blankline + (heads,aligns) <- try ( pipeBreak >>= \als -> + return (return $ replicate (length als) mempty, als)) + <|> ( pipeTableRow >>= \row -> pipeBreak >>= \als -> + + return (row, als) ) + lines' <- sequence <$> many1 pipeTableRow + blanklines + let widths = replicate (length aligns) 0.0 + return $ (aligns, widths, heads, lines') + +sepPipe :: Parser [Char] ParserState () +sepPipe = try $ do + char '|' <|> char '+' + notFollowedBy blankline + +-- parse a row, also returning probable alignments for org-table cells +pipeTableRow :: Parser [Char] ParserState (F [Blocks]) +pipeTableRow = do + nonindentSpaces + optional (char '|') + let cell = mconcat <$> + many (notFollowedBy (blankline <|> char '|') >> inline) + first <- cell + sepPipe + rest <- cell `sepBy1` sepPipe + optional (char '|') + blankline + let cells = sequence (first:rest) + return $ do + cells' <- cells + return $ map + (\ils -> + case trimInlines ils of + ils' | B.isNull ils' -> mempty + | otherwise -> B.plain $ ils') cells' + +pipeTableHeaderPart :: Parser [Char] st Alignment +pipeTableHeaderPart = do + left <- optionMaybe (char ':') + many1 (char '-') + right <- optionMaybe (char ':') + return $ + case (left,right) of + (Nothing,Nothing) -> AlignDefault + (Just _,Nothing) -> AlignLeft + (Nothing,Just _) -> AlignRight + (Just _,Just _) -> AlignCenter + +-- Succeed only if current line contains a pipe. +scanForPipe :: Parser [Char] st () +scanForPipe = lookAhead (manyTill (satisfy (/='\n')) (char '|')) >> return () + +-- | Parse a table using 'headerParser', 'rowParser', +-- 'lineParser', and 'footerParser'. Variant of the version in +-- Text.Pandoc.Parsing. +tableWith :: Parser [Char] ParserState (F [Blocks], [Alignment], [Int]) + -> ([Int] -> Parser [Char] ParserState (F [Blocks])) + -> Parser [Char] ParserState sep + -> Parser [Char] ParserState end + -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]]) +tableWith headerParser rowParser lineParser footerParser = try $ do + (heads, aligns, indices) <- headerParser + lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser + footerParser + numColumns <- getOption readerColumns + let widths = if (indices == []) + then replicate (length aligns) 0.0 + else widthsFromIndices numColumns indices + return $ (aligns, widths, heads, lines') + +table :: Parser [Char] ParserState (F Blocks) +table = try $ do + frontCaption <- option Nothing (Just <$> tableCaption) + (aligns, widths, heads, lns) <- + try (guardEnabled Ext_pipe_tables >> scanForPipe >> pipeTable) <|> + try (guardEnabled Ext_multiline_tables >> + multilineTable False) <|> + try (guardEnabled Ext_simple_tables >> + (simpleTable True <|> simpleTable False)) <|> + try (guardEnabled Ext_multiline_tables >> + multilineTable True) <|> + try (guardEnabled Ext_grid_tables >> + (gridTable False <|> gridTable True)) <?> "table" + optional blanklines + caption <- case frontCaption of + Nothing -> option (return mempty) tableCaption + Just c -> return c + return $ do + caption' <- caption + heads' <- heads + lns' <- lns + return $ B.table caption' (zip aligns widths) heads' lns' + +-- -- inline -- -inline :: GenParser Char ParserState Inline -inline = choice inlineParsers <?> "inline" - -inlineParsers :: [GenParser Char ParserState Inline] -inlineParsers = [ whitespace +inline :: Parser [Char] ParserState (F Inlines) +inline = choice [ whitespace , str , endline , code @@ -927,8 +1164,8 @@ inlineParsers = [ whitespace , strong , emph , note - , link , cite + , link , image , math , strikeout @@ -940,115 +1177,127 @@ inlineParsers = [ whitespace , escapedChar , rawLaTeXInline' , exampleRef - , smartPunctuation inline - , charRef + , smart + , return . B.singleton <$> charRef , symbol - , ltSign ] + , ltSign + ] <?> "inline" -escapedChar' :: GenParser Char ParserState Char +escapedChar' :: Parser [Char] ParserState Char escapedChar' = try $ do char '\\' - state <- getState - if stateStrict state - then oneOf "\\`*_{}[]()>#+-.!~" - else satisfy (not . isAlphaNum) + (guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum)) + <|> oneOf "\\`*_{}[]()>#+-.!~" -escapedChar :: GenParser Char ParserState Inline +escapedChar :: Parser [Char] ParserState (F Inlines) escapedChar = do result <- escapedChar' - return $ case result of - ' ' -> Str "\160" -- "\ " is a nonbreaking space - '\n' -> LineBreak -- "\[newline]" is a linebreak - _ -> Str [result] + case result of + ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space + '\n' -> guardEnabled Ext_escaped_line_breaks >> + return (return B.linebreak) -- "\[newline]" is a linebreak + _ -> return $ return $ B.str [result] -ltSign :: GenParser Char ParserState Inline +ltSign :: Parser [Char] ParserState (F Inlines) ltSign = do - st <- getState - if stateStrict st - then char '<' - else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html - return $ Str ['<'] + guardDisabled Ext_raw_html + <|> guardDisabled Ext_markdown_in_html_blocks + <|> (notFollowedBy' rawHtmlBlocks >> return ()) + char '<' + return $ return $ B.str "<" -exampleRef :: GenParser Char ParserState Inline +exampleRef :: Parser [Char] ParserState (F Inlines) exampleRef = try $ do + guardEnabled Ext_example_lists char '@' lab <- many1 (alphaNum <|> oneOf "-_") - -- We just return a Str. These are replaced with numbers - -- later. See the end of parseMarkdown. - return $ Str $ '@' : lab - -symbol :: GenParser Char ParserState Inline -symbol = do + return $ do + st <- askF + return $ case M.lookup lab (stateExamples st) of + Just n -> B.str (show n) + Nothing -> B.str ('@':lab) + +symbol :: Parser [Char] ParserState (F Inlines) +symbol = do result <- noneOf "<\\\n\t " <|> try (do lookAhead $ char '\\' - notFollowedBy' rawTeXBlock + notFollowedBy' (() <$ rawTeXBlock) char '\\') - return $ Str [result] + return $ return $ B.str [result] -- parses inline code, between n `s and n `s -code :: GenParser Char ParserState Inline -code = try $ do +code :: Parser [Char] ParserState (F Inlines) +code = try $ do starts <- many1 (char '`') skipSpaces result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> (char '\n' >> notFollowedBy' blankline >> return " ")) - (try (skipSpaces >> count (length starts) (char '`') >> + (try (skipSpaces >> count (length starts) (char '`') >> notFollowedBy (char '`'))) - attr <- option ([],[],[]) (try $ optional whitespace >> attributes) - return $ Code attr $ removeLeadingTrailingSpace $ concat result - -mathWord :: GenParser Char st [Char] -mathWord = liftM concat $ many1 mathChunk - -mathChunk :: GenParser Char st [Char] -mathChunk = do char '\\' - c <- anyChar - return ['\\',c] - <|> many1 (satisfy $ \c -> not (isBlank c || c == '\\' || c == '$')) - -math :: GenParser Char ParserState Inline -math = (mathDisplay >>= applyMacros' >>= return . Math DisplayMath) - <|> (mathInline >>= applyMacros' >>= return . Math InlineMath) - -mathDisplay :: GenParser Char ParserState String -mathDisplay = try $ do - failIfStrict - string "$$" - many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string "$$") - -mathInline :: GenParser Char ParserState String -mathInline = try $ do - failIfStrict - char '$' + attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes >> + optional whitespace >> attributes) + return $ return $ B.codeWith attr $ removeLeadingTrailingSpace $ concat result + +math :: Parser [Char] ParserState (F Inlines) +math = (return . B.displayMath <$> (mathDisplay >>= applyMacros')) + <|> (return . B.math <$> (mathInline >>= applyMacros')) + +mathDisplay :: Parser [Char] ParserState String +mathDisplay = + (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$") + <|> (guardEnabled Ext_tex_math_single_backslash >> + mathDisplayWith "\\[" "\\]") + <|> (guardEnabled Ext_tex_math_double_backslash >> + mathDisplayWith "\\\\[" "\\\\]") + +mathDisplayWith :: String -> String -> Parser [Char] ParserState String +mathDisplayWith op cl = try $ do + string op + many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl) + +mathInline :: Parser [Char] ParserState String +mathInline = + (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$") + <|> (guardEnabled Ext_tex_math_single_backslash >> + mathInlineWith "\\(" "\\)") + <|> (guardEnabled Ext_tex_math_double_backslash >> + mathInlineWith "\\\\(" "\\\\)") + +mathInlineWith :: String -> String -> Parser [Char] ParserState String +mathInlineWith op cl = try $ do + string op notFollowedBy space - words' <- sepBy1 mathWord (many1 (spaceChar <|> (newline >>~ notFollowedBy' blankline))) - char '$' - notFollowedBy digit - return $ intercalate " " words' + words' <- many1Till (count 1 (noneOf "\n\\") + <|> (char '\\' >> anyChar >>= \c -> return ['\\',c]) + <|> count 1 newline <* notFollowedBy' blankline + *> return " ") + (try $ string cl) + notFollowedBy digit -- to prevent capture of $5 + return $ concat words' -- to avoid performance problems, treat 4 or more _ or * or ~ or ^ in a row -- as a literal rather than attempting to parse for emph/strong/strikeout/super/sub -fours :: GenParser Char st Inline +fours :: Parser [Char] st (F Inlines) fours = try $ do x <- char '*' <|> char '_' <|> char '~' <|> char '^' count 2 $ satisfy (==x) rest <- many1 (satisfy (==x)) - return $ Str (x:x:x:rest) + return $ return $ B.str (x:x:x:rest) -- | Parses a list of inlines between start and end delimiters. inlinesBetween :: (Show b) - => GenParser Char ParserState a - -> GenParser Char ParserState b - -> GenParser Char ParserState [Inline] + => Parser [Char] ParserState a + -> Parser [Char] ParserState b + -> Parser [Char] ParserState (F Inlines) inlinesBetween start end = - normalizeSpaces `liftM` try (start >> many1Till inner end) - where inner = innerSpace <|> (notFollowedBy' whitespace >> inline) + (trimInlinesF . mconcat) <$> try (start >> many1Till inner end) + where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline) innerSpace = try $ whitespace >>~ notFollowedBy' end -- This is used to prevent exponential blowups for things like: -- a**a*a**a*a**a*a**a*a**a*a**a*a** -nested :: GenParser Char ParserState a - -> GenParser Char ParserState a +nested :: Parser [Char] ParserState a + -> Parser [Char] ParserState a nested p = do nestlevel <- stateMaxNestingLevel `fmap` getState guard $ nestlevel > 0 @@ -1057,54 +1306,57 @@ nested p = do updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } return res -emph :: GenParser Char ParserState Inline -emph = Emph `fmap` nested +emph :: Parser [Char] ParserState (F Inlines) +emph = fmap B.emph <$> nested (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd) where starStart = char '*' >> lookAhead nonspaceChar - starEnd = notFollowedBy' strong >> char '*' + starEnd = notFollowedBy' (() <$ strong) >> char '*' ulStart = char '_' >> lookAhead nonspaceChar - ulEnd = notFollowedBy' strong >> char '_' + ulEnd = notFollowedBy' (() <$ strong) >> char '_' -strong :: GenParser Char ParserState Inline -strong = Strong `liftM` nested +strong :: Parser [Char] ParserState (F Inlines) +strong = fmap B.strong <$> nested (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd) where starStart = string "**" >> lookAhead nonspaceChar starEnd = try $ string "**" ulStart = string "__" >> lookAhead nonspaceChar ulEnd = try $ string "__" -strikeout :: GenParser Char ParserState Inline -strikeout = Strikeout `liftM` - (failIfStrict >> inlinesBetween strikeStart strikeEnd) +strikeout :: Parser [Char] ParserState (F Inlines) +strikeout = fmap B.strikeout <$> + (guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd) where strikeStart = string "~~" >> lookAhead nonspaceChar >> notFollowedBy (char '~') strikeEnd = try $ string "~~" -superscript :: GenParser Char ParserState Inline -superscript = failIfStrict >> enclosed (char '^') (char '^') - (notFollowedBy spaceChar >> inline) >>= -- may not contain Space - return . Superscript +superscript :: Parser [Char] ParserState (F Inlines) +superscript = fmap B.superscript <$> try (do + guardEnabled Ext_superscript + char '^' + mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '^')) -subscript :: GenParser Char ParserState Inline -subscript = failIfStrict >> enclosed (char '~') (char '~') - (notFollowedBy spaceChar >> inline) >>= -- may not contain Space - return . Subscript +subscript :: Parser [Char] ParserState (F Inlines) +subscript = fmap B.subscript <$> try (do + guardEnabled Ext_subscript + char '~' + mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '~')) -whitespace :: GenParser Char ParserState Inline -whitespace = spaceChar >> - ( (spaceChar >> skipMany spaceChar >> option Space (endline >> return LineBreak)) - <|> (skipMany spaceChar >> return Space) ) <?> "whitespace" +whitespace :: Parser [Char] ParserState (F Inlines) +whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace" + where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak) + regsp = skipMany spaceChar >> return B.space -nonEndline :: GenParser Char st Char +nonEndline :: Parser [Char] st Char nonEndline = satisfy (/='\n') -str :: GenParser Char ParserState Inline +str :: Parser [Char] ParserState (F Inlines) str = do - smart <- stateSmart `fmap` getState + isSmart <- readerSmart . stateOptions <$> getState a <- alphaNum as <- many $ alphaNum - <|> (try $ char '_' >>~ lookAhead alphaNum) - <|> if smart + <|> (guardEnabled Ext_intraword_underscores >> + try (char '_' >>~ lookAhead alphaNum)) + <|> if isSmart then (try $ satisfy (\c -> c == '\'' || c == '\x2019') >> lookAhead alphaNum >> return '\x2019') -- for things like l'aide @@ -1113,15 +1365,16 @@ str = do updateState $ \s -> s{ stateLastStrPos = Just pos } let result = a:as let spacesToNbr = map (\c -> if c == ' ' then '\160' else c) - if smart + if isSmart then case likelyAbbrev result of - [] -> return $ Str result + [] -> return $ return $ B.str result xs -> choice (map (\x -> try (string x >> oneOf " \n" >> lookAhead alphaNum >> - return (Str $ result ++ spacesToNbr x ++ "\160"))) xs) - <|> (return $ Str result) - else return $ Str result + return (return $ B.str + $ result ++ spacesToNbr x ++ "\160"))) xs) + <|> (return $ return $ B.str result) + else return $ return $ B.str result -- | if the string matches the beginning of an abbreviation (before -- the first period, return strings that would finish the abbreviation. @@ -1136,39 +1389,38 @@ likelyAbbrev x = in map snd $ filter (\(y,_) -> y == x) abbrPairs -- an endline character that can be treated as a space, not a structural break -endline :: GenParser Char ParserState Inline +endline :: Parser [Char] ParserState (F Inlines) endline = try $ do newline notFollowedBy blankline - st <- getState - when (stateStrict st) $ do - notFollowedBy emailBlockQuoteStart - notFollowedBy (char '#') -- atx header + guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart + guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header -- parse potential list-starts differently if in a list: + st <- getState when (stateParserContext st == ListItemState) $ do notFollowedBy' bulletListStart notFollowedBy' anyOrderedListStart - return Space + (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak)) + <|> (return $ return B.space) -- -- links -- -- a reference label for a link -reference :: GenParser Char ParserState [Inline] +reference :: Parser [Char] ParserState (F Inlines, String) reference = do notFollowedBy' (string "[^") -- footnote reference - result <- inlinesInBalancedBrackets inline - return $ normalizeSpaces result + withRaw $ trimInlinesF <$> inlinesInBalancedBrackets -- source for a link, with optional title -source :: GenParser Char ParserState (String, [Char]) +source :: Parser [Char] ParserState (String, String) source = (try $ charsInBalanced '(' ')' litChar >>= parseFromString source') <|> -- the following is needed for cases like: [ref](/url(a). (enclosed (char '(') (char ')') litChar >>= parseFromString source') -- auxiliary function for source -source' :: GenParser Char ParserState (String, [Char]) +source' :: Parser [Char] ParserState (String, String) source' = do skipSpaces let nl = char '\n' >>~ notFollowedBy blankline @@ -1186,7 +1438,7 @@ source' = do eof return (escapeURI $ removeTrailingSpace src, tit) -linkTitle :: GenParser Char ParserState String +linkTitle :: Parser [Char] ParserState String linkTitle = try $ do (many1 spaceChar >> option '\n' newline) <|> newline skipSpaces @@ -1194,78 +1446,88 @@ linkTitle = try $ do tit <- manyTill litChar (try (char delim >> skipSpaces >> eof)) return $ fromEntities tit -link :: GenParser Char ParserState Inline +link :: Parser [Char] ParserState (F Inlines) link = try $ do - lab <- reference - (src, tit) <- source <|> referenceLink lab - return $ Link (delinkify lab) (src, tit) - -delinkify :: [Inline] -> [Inline] -delinkify = bottomUp $ concatMap go - where go (Link lab _) = lab - go x = [x] + st <- getState + guard $ stateAllowLinks st + setState $ st{ stateAllowLinks = False } + (lab,raw) <- reference + setState $ st{ stateAllowLinks = True } + regLink B.link lab <|> referenceLink B.link (lab,raw) + +regLink :: (String -> String -> Inlines -> Inlines) + -> F Inlines -> Parser [Char] ParserState (F Inlines) +regLink constructor lab = try $ do + (src, tit) <- source + return $ constructor src tit <$> lab -- a link like [this][ref] or [this][] or [this] -referenceLink :: [Inline] - -> GenParser Char ParserState (String, [Char]) -referenceLink lab = do - ref <- option [] (try (optional (char ' ') >> - optional (newline >> skipSpaces) >> reference)) - let ref' = if null ref then lab else ref - state <- getState - case lookupKeySrc (stateKeys state) (toKey ref') of - Nothing -> fail "no corresponding key" - Just target -> return target - -autoLink :: GenParser Char ParserState Inline +referenceLink :: (String -> String -> Inlines -> Inlines) + -> (F Inlines, String) -> Parser [Char] ParserState (F Inlines) +referenceLink constructor (lab, raw) = do + raw' <- try (optional (char ' ') >> + optional (newline >> skipSpaces) >> + (snd <$> reference)) <|> return "" + let key = toKey $ if raw' == "[]" || raw' == "" then raw else raw' + let dropRB (']':xs) = xs + dropRB xs = xs + let dropLB ('[':xs) = xs + dropLB xs = xs + let dropBrackets = reverse . dropRB . reverse . dropLB + fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw + return $ do + keys <- asksF stateKeys + case M.lookup key keys of + Nothing -> (\x -> B.str "[" <> x <> B.str "]" <> B.str raw') <$> fallback + Just (src,tit) -> constructor src tit <$> lab + +autoLink :: Parser [Char] ParserState (F Inlines) autoLink = try $ do char '<' (orig, src) <- uri <|> emailAddress char '>' - st <- getState - return $ if stateStrict st - then Link [Str orig] (src, "") - else Link [Code ("",["url"],[]) orig] (src, "") + (guardEnabled Ext_autolink_code_spans >> + return (return $ B.link src "" (B.codeWith ("",["url"],[]) orig))) + <|> return (return $ B.link src "" (B.str orig)) -image :: GenParser Char ParserState Inline +image :: Parser [Char] ParserState (F Inlines) image = try $ do char '!' - lab <- reference - (src, tit) <- source <|> referenceLink lab - return $ Image lab (src,tit) + (lab,raw) <- reference + regLink B.image lab <|> referenceLink B.image (lab,raw) -note :: GenParser Char ParserState Inline +note :: Parser [Char] ParserState (F Inlines) note = try $ do - failIfStrict + guardEnabled Ext_footnotes ref <- noteMarker - state <- getState - let notes = stateNotes state - case lookup ref notes of - Nothing -> fail "note not found" - Just raw -> do - -- We temporarily empty the note list while parsing the note, - -- so that we don't get infinite loops with notes inside notes... - -- Note references inside other notes do not work. - updateState $ \st -> st{ stateNotes = [] } - contents <- parseFromString parseBlocks raw - updateState $ \st -> st{ stateNotes = notes } - return $ Note contents - -inlineNote :: GenParser Char ParserState Inline + return $ do + notes <- asksF stateNotes' + case lookup ref notes of + Nothing -> return $ B.str $ "[^" ++ ref ++ "]" + Just contents -> do + st <- askF + -- process the note in a context that doesn't resolve + -- notes, to avoid infinite looping with notes inside + -- notes: + let contents' = runF contents st{ stateNotes' = [] } + return $ B.note contents' + +inlineNote :: Parser [Char] ParserState (F Inlines) inlineNote = try $ do - failIfStrict + guardEnabled Ext_inline_notes char '^' - contents <- inlinesInBalancedBrackets inline - return $ Note [Para contents] + contents <- inlinesInBalancedBrackets + return $ B.note . B.para <$> contents -rawLaTeXInline' :: GenParser Char ParserState Inline +rawLaTeXInline' :: Parser [Char] ParserState (F Inlines) rawLaTeXInline' = try $ do - failIfStrict + guardEnabled Ext_raw_tex lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env RawInline _ s <- rawLaTeXInline - return $ RawInline "tex" s -- "tex" because it might be context or latex + return $ return $ B.rawInline "tex" s + -- "tex" because it might be context or latex -rawConTeXtEnvironment :: GenParser Char st String +rawConTeXtEnvironment :: Parser [Char] st String rawConTeXtEnvironment = try $ do string "\\start" completion <- inBrackets (letter <|> digit <|> spaceChar) @@ -1274,37 +1536,33 @@ rawConTeXtEnvironment = try $ do (try $ string "\\stop" >> string completion) return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion -inBrackets :: (GenParser Char st Char) -> GenParser Char st String +inBrackets :: (Parser [Char] st Char) -> Parser [Char] st String inBrackets parser = do char '[' contents <- many parser char ']' return $ "[" ++ contents ++ "]" -rawHtmlInline :: GenParser Char ParserState Inline +rawHtmlInline :: Parser [Char] ParserState (F Inlines) rawHtmlInline = do - st <- getState - (_,result) <- if stateStrict st - then htmlTag (not . isTextTag) - else htmlTag isInlineTag - return $ RawInline "html" result + guardEnabled Ext_raw_html + mdInHtml <- option False $ + guardEnabled Ext_markdown_in_html_blocks >> return True + (_,result) <- if mdInHtml + then htmlTag isInlineTag + else htmlTag (not . isTextTag) + return $ return $ B.rawInline "html" result -- Citations -cite :: GenParser Char ParserState Inline +cite :: Parser [Char] ParserState (F Inlines) cite = do - failIfStrict + guardEnabled Ext_citations + getOption readerCitations >>= guard . not . null citations <- textualCite <|> normalCite - return $ Cite citations [] + return $ flip B.cite mempty <$> citations -spnl :: GenParser Char st () -spnl = try $ do - skipSpaces - optional newline - skipSpaces - notFollowedBy (char '\n') - -textualCite :: GenParser Char ParserState [Citation] +textualCite :: Parser [Char] ParserState (F [Citation]) textualCite = try $ do (_, key) <- citeKey let first = Citation{ citationId = key @@ -1314,22 +1572,25 @@ textualCite = try $ do , citationNoteNum = 0 , citationHash = 0 } - rest <- option [] $ try $ spnl >> normalCite - if null rest - then option [first] $ bareloc first - else return $ first : rest + mbrest <- option Nothing $ try $ spnl >> Just <$> normalCite + case mbrest of + Just rest -> return $ (first:) <$> rest + Nothing -> option (return [first]) $ bareloc first -bareloc :: Citation -> GenParser Char ParserState [Citation] +bareloc :: Citation -> Parser [Char] ParserState (F [Citation]) bareloc c = try $ do spnl char '[' suff <- suffix - rest <- option [] $ try $ char ';' >> citeList + rest <- option (return []) $ try $ char ';' >> citeList spnl char ']' - return $ c{ citationSuffix = suff } : rest + return $ do + suff' <- suff + rest' <- rest + return $ c{ citationSuffix = B.toList suff' } : rest' -normalCite :: GenParser Char ParserState [Citation] +normalCite :: Parser [Char] ParserState (F [Citation]) normalCite = try $ do char '[' spnl @@ -1338,7 +1599,7 @@ normalCite = try $ do char ']' return citations -citeKey :: GenParser Char ParserState (Bool, String) +citeKey :: Parser [Char] ParserState (Bool, String) citeKey = try $ do suppress_author <- option False (char '-' >> return True) char '@' @@ -1346,34 +1607,37 @@ citeKey = try $ do let internal p = try $ p >>~ lookAhead (letter <|> digit) rest <- many $ letter <|> digit <|> internal (oneOf ":.#$%&-_?<>~") let key = first:rest - st <- getState - guard $ key `elem` stateCitations st + citations' <- getOption readerCitations + guard $ key `elem` citations' return (suppress_author, key) -suffix :: GenParser Char ParserState [Inline] +suffix :: Parser [Char] ParserState (F Inlines) suffix = try $ do hasSpace <- option False (notFollowedBy nonspaceChar >> return True) spnl - rest <- liftM normalizeSpaces $ many $ notFollowedBy (oneOf ";]") >> inline + rest <- trimInlinesF . mconcat <$> many (notFollowedBy (oneOf ";]") >> inline) return $ if hasSpace - then Space : rest + then (B.space <>) <$> rest else rest -prefix :: GenParser Char ParserState [Inline] -prefix = liftM normalizeSpaces $ +prefix :: Parser [Char] ParserState (F Inlines) +prefix = trimInlinesF . mconcat <$> manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey)) -citeList :: GenParser Char ParserState [Citation] -citeList = sepBy1 citation (try $ char ';' >> spnl) +citeList :: Parser [Char] ParserState (F [Citation]) +citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl) -citation :: GenParser Char ParserState Citation +citation :: Parser [Char] ParserState (F Citation) citation = try $ do pref <- prefix (suppress_author, key) <- citeKey suff <- suffix - return $ Citation{ citationId = key - , citationPrefix = pref - , citationSuffix = suff + return $ do + x <- pref + y <- suff + return $ Citation{ citationId = key + , citationPrefix = B.toList x + , citationSuffix = B.toList y , citationMode = if suppress_author then SuppressAuthor else NormalCitation @@ -1381,3 +1645,22 @@ citation = try $ do , citationHash = 0 } +smart :: Parser [Char] ParserState (F Inlines) +smart = do + getOption readerSmart >>= guard + doubleQuoted <|> singleQuoted <|> + choice (map (return . B.singleton <$>) [apostrophe, dash, ellipses]) + +singleQuoted :: Parser [Char] ParserState (F Inlines) +singleQuoted = try $ do + singleQuoteStart + withQuoteContext InSingleQuote $ + fmap B.singleQuoted . trimInlinesF . mconcat <$> + many1Till inline singleQuoteEnd + +doubleQuoted :: Parser [Char] ParserState (F Inlines) +doubleQuoted = try $ do + doubleQuoteStart + withQuoteContext InDoubleQuote $ + fmap B.doubleQuoted . trimInlinesF . mconcat <$> + many1Till inline doubleQuoteEnd diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs index 2c6fcc6e6..a0e5a0635 100644 --- a/src/Text/Pandoc/Readers/Native.hs +++ b/src/Text/Pandoc/Readers/Native.hs @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Native Copyright : Copyright (C) 2011 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -31,6 +31,7 @@ Conversion of a string representation of a pandoc type (@Pandoc@, module Text.Pandoc.Readers.Native ( readNative ) where import Text.Pandoc.Definition +import Text.Pandoc.Shared (safeRead) nullMeta :: Meta nullMeta = Meta{ docTitle = [] @@ -51,31 +52,31 @@ nullMeta = Meta{ docTitle = [] readNative :: String -- ^ String to parse (assuming @'\n'@ line endings) -> Pandoc readNative s = - case reads s of - (d,_):_ -> d - [] -> Pandoc nullMeta $ readBlocks s + case safeRead s of + Just d -> d + Nothing -> Pandoc nullMeta $ readBlocks s readBlocks :: String -> [Block] readBlocks s = - case reads s of - (d,_):_ -> d - [] -> [readBlock s] + case safeRead s of + Just d -> d + Nothing -> [readBlock s] readBlock :: String -> Block readBlock s = - case reads s of - (d,_):_ -> d - [] -> Plain $ readInlines s + case safeRead s of + Just d -> d + Nothing -> Plain $ readInlines s readInlines :: String -> [Inline] readInlines s = - case reads s of - (d,_):_ -> d - [] -> [readInline s] + case safeRead s of + Just d -> d + Nothing -> [readInline s] readInline :: String -> Inline readInline s = - case reads s of - (d,_):_ -> d - [] -> error "Cannot parse document" + case safeRead s of + Just d -> d + Nothing -> error "Cannot parse document" diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index d1010a736..9fb976903 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -17,9 +17,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.RST + Module : Text.Pandoc.Readers.RST Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -27,24 +27,24 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion from reStructuredText to 'Pandoc' document. -} -module Text.Pandoc.Readers.RST ( +module Text.Pandoc.Readers.RST ( readRST ) where import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.Parsing -import Text.ParserCombinators.Parsec -import Control.Monad ( when, liftM ) +import Text.Pandoc.Options +import Control.Monad ( when, liftM, guard, mzero ) import Data.List ( findIndex, intercalate, transpose, sort, deleteFirstsBy ) import qualified Data.Map as M import Text.Printf ( printf ) import Data.Maybe ( catMaybes ) -- | Parse reStructuredText string and return Pandoc document. -readRST :: ParserState -- ^ Parser state, including options for parser - -> String -- ^ String to parse (assuming @'\n'@ line endings) +readRST :: ReaderOptions -- ^ Reader options + -> String -- ^ String to parse (assuming @'\n'@ line endings) -> Pandoc -readRST state s = (readWith parseRST) state (s ++ "\n\n") +readRST opts s = (readWith parseRST) def{ stateOptions = opts } (s ++ "\n\n") -- -- Constants and data structure definitions @@ -58,7 +58,7 @@ underlineChars = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" -- treat these as potentially non-text when parsing inline: specialChars :: [Char] -specialChars = "\\`|*_<>$:[]()-.\"'\8216\8217\8220\8221" +specialChars = "\\`|*_<>$:/[]{}()-.\"'\8216\8217\8220\8221" -- -- parsing documents @@ -71,14 +71,14 @@ isHeader _ _ = False -- | Promote all headers in a list of blocks. (Part of -- title transformation for RST.) promoteHeaders :: Int -> [Block] -> [Block] -promoteHeaders num ((Header level text):rest) = +promoteHeaders num ((Header level text):rest) = (Header (level - num) text):(promoteHeaders num rest) promoteHeaders num (other:rest) = other:(promoteHeaders num rest) promoteHeaders _ [] = [] -- | If list of blocks starts with a header (or a header and subheader) -- of level that are not found elsewhere, return it as a title and --- promote all the other headers. +-- promote all the other headers. titleTransform :: [Block] -- ^ list of blocks -> ([Block], [Inline]) -- ^ modified list of blocks, title titleTransform ((Header 1 head1):(Header 2 head2):rest) | @@ -89,7 +89,7 @@ titleTransform ((Header 1 head1):rest) | (promoteHeaders 1 rest, head1) titleTransform blocks = (blocks, []) -parseRST :: GenParser Char ParserState Pandoc +parseRST :: Parser [Char] ParserState Pandoc parseRST = do optional blanklines -- skip blank lines at beginning of file startPos <- getPosition @@ -103,12 +103,13 @@ parseRST = do let reversedNotes = stateNotes st' updateState $ \s -> s { stateNotes = reverse reversedNotes } -- now parse it for real... - blocks <- parseBlocks + blocks <- parseBlocks let blocks' = filter (/= Null) blocks - state <- getState - let (blocks'', title) = if stateStandalone state + standalone <- getOption readerStandalone + let (blocks'', title) = if standalone then titleTransform blocks' else (blocks', []) + state <- getState let authors = stateAuthors state let date = stateDate state let title' = if (null title) then (stateTitle state) else title @@ -118,10 +119,10 @@ parseRST = do -- parsing blocks -- -parseBlocks :: GenParser Char ParserState [Block] +parseBlocks :: Parser [Char] ParserState [Block] parseBlocks = manyTill block eof -block :: GenParser Char ParserState Block +block :: Parser [Char] ParserState Block block = choice [ codeBlock , rawBlock , blockQuote @@ -146,7 +147,7 @@ block = choice [ codeBlock -- field list -- -rawFieldListItem :: String -> GenParser Char ParserState (String, String) +rawFieldListItem :: String -> Parser [Char] ParserState (String, String) rawFieldListItem indent = try $ do string indent char ':' @@ -160,7 +161,7 @@ rawFieldListItem indent = try $ do return (name, raw) fieldListItem :: String - -> GenParser Char ParserState (Maybe ([Inline], [[Block]])) + -> Parser [Char] ParserState (Maybe ([Inline], [[Block]])) fieldListItem indent = try $ do (name, raw) <- rawFieldListItem indent let term = [Str name] @@ -187,7 +188,7 @@ extractContents [Plain auth] = auth extractContents [Para auth] = auth extractContents _ = [] -fieldList :: GenParser Char ParserState Block +fieldList :: Parser [Char] ParserState Block fieldList = try $ do indent <- lookAhead $ many spaceChar items <- many1 $ fieldListItem indent @@ -199,7 +200,7 @@ fieldList = try $ do -- line block -- -lineBlockLine :: GenParser Char ParserState [Inline] +lineBlockLine :: Parser [Char] ParserState [Inline] lineBlockLine = try $ do char '|' char ' ' <|> lookAhead (char '\n') @@ -210,7 +211,7 @@ lineBlockLine = try $ do then normalizeSpaces line else Str white : normalizeSpaces line -lineBlock :: GenParser Char ParserState Block +lineBlock :: Parser [Char] ParserState Block lineBlock = try $ do lines' <- many1 lineBlockLine blanklines @@ -220,14 +221,14 @@ lineBlock = try $ do -- paragraph block -- -para :: GenParser Char ParserState Block +para :: Parser [Char] ParserState Block para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph" -codeBlockStart :: GenParser Char st Char +codeBlockStart :: Parser [Char] st Char codeBlockStart = string "::" >> blankline >> blankline -- paragraph that ends in a :: starting a code block -paraBeforeCodeBlock :: GenParser Char ParserState Block +paraBeforeCodeBlock :: Parser [Char] ParserState Block paraBeforeCodeBlock = try $ do result <- many1 (notFollowedBy' codeBlockStart >> inline) lookAhead (string "::") @@ -236,21 +237,21 @@ paraBeforeCodeBlock = try $ do else (normalizeSpaces result) ++ [Str ":"] -- regular paragraph -paraNormal :: GenParser Char ParserState Block -paraNormal = try $ do +paraNormal :: Parser [Char] ParserState Block +paraNormal = try $ do result <- many1 inline newline blanklines return $ Para $ normalizeSpaces result -plain :: GenParser Char ParserState Block -plain = many1 inline >>= return . Plain . normalizeSpaces +plain :: Parser [Char] ParserState Block +plain = many1 inline >>= return . Plain . normalizeSpaces -- -- image block -- -imageBlock :: GenParser Char ParserState Block +imageBlock :: Parser [Char] ParserState Block imageBlock = try $ do string ".. image:: " src <- manyTill anyChar newline @@ -265,11 +266,11 @@ imageBlock = try $ do -- header blocks -- -header :: GenParser Char ParserState Block +header :: Parser [Char] ParserState Block header = doubleHeader <|> singleHeader <?> "header" -- a header with lines on top and bottom -doubleHeader :: GenParser Char ParserState Block +doubleHeader :: Parser [Char] ParserState Block doubleHeader = try $ do c <- oneOf underlineChars rest <- many (char c) -- the top line @@ -283,7 +284,7 @@ doubleHeader = try $ do blankline -- spaces and newline count lenTop (char c) -- the bottom line blanklines - -- check to see if we've had this kind of header before. + -- check to see if we've had this kind of header before. -- if so, get appropriate level. if not, add to list. state <- getState let headerTable = stateHeaderTable state @@ -294,8 +295,8 @@ doubleHeader = try $ do return $ Header level (normalizeSpaces txt) -- a header with line on the bottom only -singleHeader :: GenParser Char ParserState Block -singleHeader = try $ do +singleHeader :: Parser [Char] ParserState Block +singleHeader = try $ do notFollowedBy' whitespace txt <- many1 (do {notFollowedBy blankline; inline}) pos <- getPosition @@ -317,7 +318,7 @@ singleHeader = try $ do -- hrule block -- -hrule :: GenParser Char st Block +hrule :: Parser [Char] st Block hrule = try $ do chr <- oneOf underlineChars count 3 (char chr) @@ -331,14 +332,14 @@ hrule = try $ do -- -- read a line indented by a given string -indentedLine :: String -> GenParser Char st [Char] +indentedLine :: String -> Parser [Char] st [Char] indentedLine indents = try $ do string indents manyTill anyChar newline -- one or more indented lines, possibly separated by blank lines. -- any amount of indentation will work. -indentedBlock :: GenParser Char st [Char] +indentedBlock :: Parser [Char] st [Char] indentedBlock = try $ do indents <- lookAhead $ many1 spaceChar lns <- many1 $ try $ do b <- option "" blanklines @@ -347,7 +348,7 @@ indentedBlock = try $ do optional blanklines return $ unlines lns -codeBlock :: GenParser Char st Block +codeBlock :: Parser [Char] st Block codeBlock = try $ do codeBlockStart result <- indentedBlock @@ -355,7 +356,7 @@ codeBlock = try $ do -- | The 'code-block' directive (from Sphinx) that allows a language to be -- specified. -customCodeBlock :: GenParser Char st Block +customCodeBlock :: Parser [Char] st Block customCodeBlock = try $ do string ".. code-block:: " language <- manyTill anyChar newline @@ -364,7 +365,7 @@ customCodeBlock = try $ do return $ CodeBlock ("", ["sourceCode", language], []) $ stripTrailingNewlines result -figureBlock :: GenParser Char ParserState Block +figureBlock :: Parser [Char] ParserState Block figureBlock = try $ do string ".. figure::" src <- removeLeadingTrailingSpace `fmap` manyTill anyChar newline @@ -372,24 +373,24 @@ figureBlock = try $ do caption <- parseFromString extractCaption body return $ Para [Image caption (src,"")] -extractCaption :: GenParser Char ParserState [Inline] +extractCaption :: Parser [Char] ParserState [Inline] extractCaption = try $ do manyTill anyLine blanklines many inline -- | The 'math' directive (from Sphinx) for display math. -mathBlock :: GenParser Char st Block +mathBlock :: Parser [Char] st Block mathBlock = try $ do string ".. math::" mathBlockMultiline <|> mathBlockOneLine -mathBlockOneLine :: GenParser Char st Block +mathBlockOneLine :: Parser [Char] st Block mathBlockOneLine = try $ do result <- manyTill anyChar newline blanklines return $ Para [Math DisplayMath $ removeLeadingTrailingSpace result] -mathBlockMultiline :: GenParser Char st Block +mathBlockMultiline :: Parser [Char] st Block mathBlockMultiline = try $ do blanklines result <- indentedBlock @@ -404,9 +405,9 @@ mathBlockMultiline = try $ do $ filter (not . null) $ splitBy null lns' return $ Para $ map (Math DisplayMath) eqs -lhsCodeBlock :: GenParser Char ParserState Block +lhsCodeBlock :: Parser [Char] ParserState Block lhsCodeBlock = try $ do - failUnlessLHS + guardEnabled Ext_literate_haskell optional codeBlockStart pos <- getPosition when (sourceColumn pos /= 1) $ fail "Not in first column" @@ -418,7 +419,7 @@ lhsCodeBlock = try $ do blanklines return $ CodeBlock ("", ["sourceCode", "literate", "haskell"], []) $ intercalate "\n" lns' -birdTrackLine :: GenParser Char st [Char] +birdTrackLine :: Parser [Char] st [Char] birdTrackLine = do char '>' manyTill anyChar newline @@ -427,7 +428,7 @@ birdTrackLine = do -- raw html/latex/etc -- -rawBlock :: GenParser Char st Block +rawBlock :: Parser [Char] st Block rawBlock = try $ do string ".. raw:: " lang <- many1 (letter <|> digit) @@ -439,7 +440,7 @@ rawBlock = try $ do -- block quotes -- -blockQuote :: GenParser Char ParserState Block +blockQuote :: Parser [Char] ParserState Block blockQuote = do raw <- indentedBlock -- parse the extracted block, which may contain various block elements: @@ -450,10 +451,10 @@ blockQuote = do -- list blocks -- -list :: GenParser Char ParserState Block +list :: Parser [Char] ParserState Block list = choice [ bulletList, orderedList, definitionList ] <?> "list" -definitionListItem :: GenParser Char ParserState ([Inline], [[Block]]) +definitionListItem :: Parser [Char] ParserState ([Inline], [[Block]]) definitionListItem = try $ do -- avoid capturing a directive or comment notFollowedBy (try $ char '.' >> char '.') @@ -463,11 +464,11 @@ definitionListItem = try $ do contents <- parseFromString parseBlocks $ raw ++ "\n" return (normalizeSpaces term, [contents]) -definitionList :: GenParser Char ParserState Block +definitionList :: Parser [Char] ParserState Block definitionList = many1 definitionListItem >>= return . DefinitionList -- parses bullet list start and returns its length (inc. following whitespace) -bulletListStart :: GenParser Char st Int +bulletListStart :: Parser [Char] st Int bulletListStart = try $ do notFollowedBy' hrule -- because hrules start out just like lists marker <- oneOf bulletListMarkers @@ -477,14 +478,14 @@ bulletListStart = try $ do -- parses ordered list start and returns its length (inc following whitespace) orderedListStart :: ListNumberStyle -> ListNumberDelim - -> GenParser Char ParserState Int + -> Parser [Char] ParserState Int orderedListStart style delim = try $ do (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim) white <- many1 spaceChar return $ markerLen + length white -- parse a line of a list item -listLine :: Int -> GenParser Char ParserState [Char] +listLine :: Int -> Parser [Char] ParserState [Char] listLine markerLength = try $ do notFollowedBy blankline indentWith markerLength @@ -492,36 +493,35 @@ listLine markerLength = try $ do return $ line ++ "\n" -- indent by specified number of spaces (or equiv. tabs) -indentWith :: Int -> GenParser Char ParserState [Char] +indentWith :: Int -> Parser [Char] ParserState [Char] indentWith num = do - state <- getState - let tabStop = stateTabStop state + tabStop <- getOption readerTabStop if (num < tabStop) then count num (char ' ') - else choice [ try (count num (char ' ')), - (try (char '\t' >> count (num - tabStop) (char ' '))) ] + else choice [ try (count num (char ' ')), + (try (char '\t' >> count (num - tabStop) (char ' '))) ] -- parse raw text for one list item, excluding start marker and continuations -rawListItem :: GenParser Char ParserState Int - -> GenParser Char ParserState (Int, [Char]) +rawListItem :: Parser [Char] ParserState Int + -> Parser [Char] ParserState (Int, [Char]) rawListItem start = try $ do markerLength <- start firstLine <- manyTill anyChar newline restLines <- many (listLine markerLength) return (markerLength, (firstLine ++ "\n" ++ (concat restLines))) --- continuation of a list item - indented and separated by blankline or --- (in compact lists) endline. +-- continuation of a list item - indented and separated by blankline or +-- (in compact lists) endline. -- Note: nested lists are parsed as continuations. -listContinuation :: Int -> GenParser Char ParserState [Char] +listContinuation :: Int -> Parser [Char] ParserState [Char] listContinuation markerLength = try $ do blanks <- many1 blankline result <- many1 (listLine markerLength) return $ blanks ++ concat result -listItem :: GenParser Char ParserState Int - -> GenParser Char ParserState [Block] -listItem start = try $ do +listItem :: Parser [Char] ParserState Int + -> Parser [Char] ParserState [Block] +listItem start = try $ do (markerLength, first) <- rawListItem start rest <- many (listContinuation markerLength) blanks <- choice [ try (many blankline >>~ lookAhead start), @@ -537,22 +537,22 @@ listItem start = try $ do updateState (\st -> st {stateParserContext = oldContext}) return parsed -orderedList :: GenParser Char ParserState Block +orderedList :: Parser [Char] ParserState Block orderedList = try $ do (start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar) items <- many1 (listItem (orderedListStart style delim)) let items' = compactify items return $ OrderedList (start, style, delim) items' -bulletList :: GenParser Char ParserState Block -bulletList = many1 (listItem bulletListStart) >>= +bulletList :: Parser [Char] ParserState Block +bulletList = many1 (listItem bulletListStart) >>= return . BulletList . compactify -- -- default-role block -- -defaultRoleBlock :: GenParser Char ParserState Block +defaultRoleBlock :: Parser [Char] ParserState Block defaultRoleBlock = try $ do string ".. default-role::" -- doesn't enforce any restrictions on the role name; embedded spaces shouldn't be allowed, for one @@ -570,7 +570,7 @@ defaultRoleBlock = try $ do -- unknown directive (e.g. comment) -- -unknownDirective :: GenParser Char st Block +unknownDirective :: Parser [Char] st Block unknownDirective = try $ do string ".." notFollowedBy (noneOf " \t\n") @@ -582,7 +582,7 @@ unknownDirective = try $ do --- note block --- -noteBlock :: GenParser Char ParserState [Char] +noteBlock :: Parser [Char] ParserState [Char] noteBlock = try $ do startPos <- getPosition string ".." @@ -601,7 +601,7 @@ noteBlock = try $ do -- return blanks so line count isn't affected return $ replicate (sourceLine endPos - sourceLine startPos) '\n' -noteMarker :: GenParser Char ParserState [Char] +noteMarker :: Parser [Char] ParserState [Char] noteMarker = do char '[' res <- many1 digit @@ -614,13 +614,13 @@ noteMarker = do -- reference key -- -quotedReferenceName :: GenParser Char ParserState [Inline] +quotedReferenceName :: Parser [Char] ParserState [Inline] quotedReferenceName = try $ do char '`' >> notFollowedBy (char '`') -- `` means inline code! - label' <- many1Till inline (char '`') + label' <- many1Till inline (char '`') return label' -unquotedReferenceName :: GenParser Char ParserState [Inline] +unquotedReferenceName :: Parser [Char] ParserState [Inline] unquotedReferenceName = try $ do label' <- many1Till inline (lookAhead $ char ':') return label' @@ -629,24 +629,24 @@ unquotedReferenceName = try $ do -- plus isolated (no two adjacent) internal hyphens, underscores, -- periods, colons and plus signs; no whitespace or other characters -- are allowed. -simpleReferenceName' :: GenParser Char st String +simpleReferenceName' :: Parser [Char] st String simpleReferenceName' = do x <- alphaNum xs <- many $ alphaNum <|> (try $ oneOf "-_:+." >> lookAhead alphaNum) return (x:xs) -simpleReferenceName :: GenParser Char st [Inline] +simpleReferenceName :: Parser [Char] st [Inline] simpleReferenceName = do raw <- simpleReferenceName' return [Str raw] -referenceName :: GenParser Char ParserState [Inline] +referenceName :: Parser [Char] ParserState [Inline] referenceName = quotedReferenceName <|> (try $ simpleReferenceName >>~ lookAhead (char ':')) <|> unquotedReferenceName -referenceKey :: GenParser Char ParserState [Char] +referenceKey :: Parser [Char] ParserState [Char] referenceKey = do startPos <- getPosition (key, target) <- choice [imageKey, anonymousKey, regularKey] @@ -658,38 +658,43 @@ referenceKey = do -- return enough blanks to replace key return $ replicate (sourceLine endPos - sourceLine startPos) '\n' -targetURI :: GenParser Char st [Char] +targetURI :: Parser [Char] st [Char] targetURI = do skipSpaces optional newline - contents <- many1 (try (many spaceChar >> newline >> + contents <- many1 (try (many spaceChar >> newline >> many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n") blanklines return $ escapeURI $ removeLeadingTrailingSpace $ contents -imageKey :: GenParser Char ParserState (Key, Target) +imageKey :: Parser [Char] ParserState (Key, Target) imageKey = try $ do string ".. |" - ref <- manyTill inline (char '|') + (_,ref) <- withRaw (manyTill inline (char '|')) skipSpaces string "image::" src <- targetURI - return (toKey (normalizeSpaces ref), (src, "")) + return (toKey $ init ref, (src, "")) -anonymousKey :: GenParser Char st (Key, Target) +anonymousKey :: Parser [Char] st (Key, Target) anonymousKey = try $ do oneOfStrings [".. __:", "__"] src <- targetURI pos <- getPosition - return (toKey [Str $ "_" ++ printf "%09d" (sourceLine pos)], (src, "")) + return (toKey $ "_" ++ printf "%09d" (sourceLine pos), (src, "")) -regularKey :: GenParser Char ParserState (Key, Target) +stripTicks :: String -> String +stripTicks = reverse . stripTick . reverse . stripTick + where stripTick ('`':xs) = xs + stripTick xs = xs + +regularKey :: Parser [Char] ParserState (Key, Target) regularKey = try $ do string ".. _" - ref <- referenceName + (_,ref) <- withRaw referenceName char ':' src <- targetURI - return (toKey (normalizeSpaces ref), (src, "")) + return (toKey $ stripTicks ref, (src, "")) -- -- tables @@ -702,37 +707,37 @@ regularKey = try $ do -- Simple tables TODO: -- - column spans -- - multiline support --- - ensure that rightmost column span does not need to reach end +-- - ensure that rightmost column span does not need to reach end -- - require at least 2 columns -- -- Grid tables TODO: -- - column spans -dashedLine :: Char -> GenParser Char st (Int, Int) +dashedLine :: Char -> Parser [Char] st (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many (char ' ') return (length dashes, length $ dashes ++ sp) -simpleDashedLines :: Char -> GenParser Char st [(Int,Int)] +simpleDashedLines :: Char -> Parser [Char] st [(Int,Int)] simpleDashedLines ch = try $ many1 (dashedLine ch) -- Parse a table row separator -simpleTableSep :: Char -> GenParser Char ParserState Char +simpleTableSep :: Char -> Parser [Char] ParserState Char simpleTableSep ch = try $ simpleDashedLines ch >> newline -- Parse a table footer -simpleTableFooter :: GenParser Char ParserState [Char] +simpleTableFooter :: Parser [Char] ParserState [Char] simpleTableFooter = try $ simpleTableSep '=' >> blanklines -- Parse a raw line and split it into chunks by indices. -simpleTableRawLine :: [Int] -> GenParser Char ParserState [String] +simpleTableRawLine :: [Int] -> Parser [Char] ParserState [String] simpleTableRawLine indices = do line <- many1Till anyChar newline return (simpleTableSplitLine indices line) -- Parse a table row and return a list of blocks (columns). -simpleTableRow :: [Int] -> GenParser Char ParserState [[Block]] +simpleTableRow :: [Int] -> Parser [Char] ParserState [[Block]] simpleTableRow indices = do notFollowedBy' simpleTableFooter firstLine <- simpleTableRawLine indices @@ -745,8 +750,8 @@ simpleTableSplitLine indices line = map removeLeadingTrailingSpace $ tail $ splitByIndices (init indices) line -simpleTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) +simpleTableHeader :: Bool -- ^ Headerless table + -> Parser [Char] ParserState ([[Block]], [Alignment], [Int]) simpleTableHeader headless = try $ do optional blanklines rawContent <- if headless @@ -766,28 +771,28 @@ simpleTableHeader headless = try $ do -- Parse a simple table. simpleTable :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block + -> Parser [Char] ParserState Block simpleTable headless = do - Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter (return []) + Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter -- Simple tables get 0s for relative column widths (i.e., use default) return $ Table c a (replicate (length a) 0) h l where sep = return () -- optional (simpleTableSep '-') gridTable :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block -gridTable = gridTableWith block (return []) + -> Parser [Char] ParserState Block +gridTable = gridTableWith parseBlocks -table :: GenParser Char ParserState Block +table :: Parser [Char] ParserState Block table = gridTable False <|> simpleTable False <|> gridTable True <|> simpleTable True <?> "table" - -- + -- -- inline -- -inline :: GenParser Char ParserState Inline +inline :: Parser [Char] ParserState Inline inline = choice [ whitespace , link , str @@ -805,44 +810,53 @@ inline = choice [ whitespace , escapedChar , symbol ] <?> "inline" -hyphens :: GenParser Char ParserState Inline +hyphens :: Parser [Char] ParserState Inline hyphens = do result <- many1 (char '-') - option Space endline + option Space endline -- don't want to treat endline after hyphen or dash as a space return $ Str result -escapedChar :: GenParser Char st Inline +escapedChar :: Parser [Char] st Inline escapedChar = do c <- escaped anyChar return $ if c == ' ' -- '\ ' is null in RST then Str "" else Str [c] -symbol :: GenParser Char ParserState Inline -symbol = do +symbol :: Parser [Char] ParserState Inline +symbol = do result <- oneOf specialChars return $ Str [result] -- parses inline code, between codeStart and codeEnd -code :: GenParser Char ParserState Inline -code = try $ do +code :: Parser [Char] ParserState Inline +code = try $ do string "``" result <- manyTill anyChar (try (string "``")) return $ Code nullAttr $ removeLeadingTrailingSpace $ intercalate " " $ lines result -emph :: GenParser Char ParserState Inline -emph = enclosed (char '*') (char '*') inline >>= +-- succeeds only if we're not right after a str (ie. in middle of word) +atStart :: Parser [Char] ParserState a -> Parser [Char] ParserState a +atStart p = do + pos <- getPosition + st <- getState + -- single quote start can't be right after str + guard $ stateLastStrPos st /= Just pos + p + +emph :: Parser [Char] ParserState Inline +emph = enclosed (atStart $ char '*') (char '*') inline >>= return . Emph . normalizeSpaces -strong :: GenParser Char ParserState Inline -strong = enclosed (string "**") (try $ string "**") inline >>= +strong :: Parser [Char] ParserState Inline +strong = enclosed (atStart $ string "**") (try $ string "**") inline >>= return . Strong . normalizeSpaces -- Parses inline interpreted text which is required to have the given role. -- This decision is based on the role marker (if present), -- and the current default interpreted text role. -interpreted :: [Char] -> GenParser Char ParserState [Char] +interpreted :: [Char] -> Parser [Char] ParserState [Char] interpreted role = try $ do state <- getState if role == stateRstDefaultRole state @@ -856,30 +870,30 @@ interpreted role = try $ do -- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules -- but it should be good enough for most purposes unmarkedInterpretedText = do - result <- enclosed (char '`') (char '`') anyChar + result <- enclosed (atStart $ char '`') (char '`') anyChar return result -superscript :: GenParser Char ParserState Inline +superscript :: Parser [Char] ParserState Inline superscript = interpreted "sup" >>= \x -> return (Superscript [Str x]) -subscript :: GenParser Char ParserState Inline +subscript :: Parser [Char] ParserState Inline subscript = interpreted "sub" >>= \x -> return (Subscript [Str x]) -math :: GenParser Char ParserState Inline +math :: Parser [Char] ParserState Inline math = interpreted "math" >>= \x -> return (Math InlineMath x) -whitespace :: GenParser Char ParserState Inline +whitespace :: Parser [Char] ParserState Inline whitespace = many1 spaceChar >> return Space <?> "whitespace" -str :: GenParser Char ParserState Inline +str :: Parser [Char] ParserState Inline str = do - result <- many1 (noneOf (specialChars ++ "\t\n ")) - pos <- getPosition - updateState $ \s -> s{ stateLastStrPos = Just pos } + let strChar = noneOf ("\t\n " ++ specialChars) + result <- many1 strChar + updateLastStrPos return $ Str result -- an endline character that can be treated as a space, not a structural break -endline :: GenParser Char ParserState Inline +endline :: Parser [Char] ParserState Inline endline = try $ do newline notFollowedBy blankline @@ -895,14 +909,14 @@ endline = try $ do -- links -- -link :: GenParser Char ParserState Inline +link :: Parser [Char] ParserState Inline link = choice [explicitLink, referenceLink, autoLink] <?> "link" -explicitLink :: GenParser Char ParserState Inline +explicitLink :: Parser [Char] ParserState Inline explicitLink = try $ do char '`' notFollowedBy (char '`') -- `` marks start of inline code - label' <- manyTill (notFollowedBy (char '`') >> inline) + label' <- manyTill (notFollowedBy (char '`') >> inline) (try (spaces >> char '<')) src <- manyTill (noneOf ">\n") (char '>') skipSpaces @@ -910,53 +924,53 @@ explicitLink = try $ do return $ Link (normalizeSpaces label') (escapeURI $ removeLeadingTrailingSpace src, "") -referenceLink :: GenParser Char ParserState Inline +referenceLink :: Parser [Char] ParserState Inline referenceLink = try $ do - label' <- (quotedReferenceName <|> simpleReferenceName) >>~ char '_' + (label',ref) <- withRaw (quotedReferenceName <|> simpleReferenceName) >>~ + char '_' state <- getState let keyTable = stateKeys state - let isAnonKey x = case fromKey x of - [Str ('_':_)] -> True - _ -> False - key <- option (toKey label') $ + let isAnonKey (Key ('_':_)) = True + isAnonKey _ = False + key <- option (toKey $ stripTicks ref) $ do char '_' let anonKeys = sort $ filter isAnonKey $ M.keys keyTable if null anonKeys - then pzero + then mzero else return (head anonKeys) - (src,tit) <- case lookupKeySrc keyTable key of + (src,tit) <- case M.lookup key keyTable of Nothing -> fail "no corresponding key" Just target -> return target -- if anonymous link, remove key so it won't be used again when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable } - return $ Link (normalizeSpaces label') (src, tit) + return $ Link (normalizeSpaces label') (src, tit) -autoURI :: GenParser Char ParserState Inline +autoURI :: Parser [Char] ParserState Inline autoURI = do (orig, src) <- uri return $ Link [Str orig] (src, "") -autoEmail :: GenParser Char ParserState Inline +autoEmail :: Parser [Char] ParserState Inline autoEmail = do (orig, src) <- emailAddress return $ Link [Str orig] (src, "") -autoLink :: GenParser Char ParserState Inline +autoLink :: Parser [Char] ParserState Inline autoLink = autoURI <|> autoEmail -- For now, we assume that all substitution references are for images. -image :: GenParser Char ParserState Inline +image :: Parser [Char] ParserState Inline image = try $ do char '|' - ref <- manyTill inline (char '|') + (alt,ref) <- withRaw (manyTill inline (char '|')) state <- getState let keyTable = stateKeys state - (src,tit) <- case lookupKeySrc keyTable (toKey ref) of + (src,tit) <- case M.lookup (toKey $ init ref) keyTable of Nothing -> fail "no corresponding key" Just target -> return target - return $ Image (normalizeSpaces ref) (src, tit) + return $ Image (normalizeSpaces alt) (src, tit) -note :: GenParser Char ParserState Inline +note :: Parser [Char] ParserState Inline note = try $ do ref <- noteMarker char '_' diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs index 67dfe6753..fe49a992e 100644 --- a/src/Text/Pandoc/Readers/TeXMath.hs +++ b/src/Text/Pandoc/Readers/TeXMath.hs @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.TeXMath Copyright : Copyright (C) 2007-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 348900d38..89f281ae8 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Textile Copyright : Copyright (C) 2010-2012 Paul Rivier and John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : Paul Rivier <paul*rivier#demotera*com> Stability : alpha @@ -56,29 +56,34 @@ TODO : refactor common patterns across readers : module Text.Pandoc.Readers.Textile ( readTextile) where import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Shared +import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag ) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) -import Text.ParserCombinators.Parsec import Text.HTML.TagSoup.Match import Data.Char ( digitToInt, isUpper ) import Control.Monad ( guard, liftM ) import Control.Applicative ((<$>), (*>), (<*)) -- | Parse a Textile text and return a Pandoc document. -readTextile :: ParserState -- ^ Parser state, including options for parser - -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc -readTextile state s = - (readWith parseTextile) state{ stateOldDashes = True } (s ++ "\n\n") +readTextile :: ReaderOptions -- ^ Reader options + -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Pandoc +readTextile opts s = + (readWith parseTextile) def{ stateOptions = opts } (s ++ "\n\n") -- | Generate a Pandoc ADT from a textile document -parseTextile :: GenParser Char ParserState Pandoc +parseTextile :: Parser [Char] ParserState Pandoc parseTextile = do -- textile allows raw HTML and does smart punctuation by default - updateState (\state -> state { stateParseRaw = True, stateSmart = True }) + oldOpts <- stateOptions `fmap` getState + updateState $ \state -> state{ stateOptions = + oldOpts{ readerSmart = True + , readerParseRaw = True + , readerOldDashes = True + } } many blankline startPos <- getPosition -- go through once just to get list of reference keys and notes @@ -93,10 +98,10 @@ parseTextile = do blocks <- parseBlocks return $ Pandoc (Meta [] [] []) blocks -- FIXME -noteMarker :: GenParser Char ParserState [Char] +noteMarker :: Parser [Char] ParserState [Char] noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.') -noteBlock :: GenParser Char ParserState [Char] +noteBlock :: Parser [Char] ParserState [Char] noteBlock = try $ do startPos <- getPosition ref <- noteMarker @@ -111,11 +116,11 @@ noteBlock = try $ do return $ replicate (sourceLine endPos - sourceLine startPos) '\n' -- | Parse document blocks -parseBlocks :: GenParser Char ParserState [Block] +parseBlocks :: Parser [Char] ParserState [Block] parseBlocks = manyTill block eof -- | Block parsers list tried in definition order -blockParsers :: [GenParser Char ParserState Block] +blockParsers :: [Parser [Char] ParserState Block] blockParsers = [ codeBlock , header , blockQuote @@ -128,20 +133,20 @@ blockParsers = [ codeBlock , nullBlock ] -- | Any block in the order of definition of blockParsers -block :: GenParser Char ParserState Block +block :: Parser [Char] ParserState Block block = choice blockParsers <?> "block" -codeBlock :: GenParser Char ParserState Block +codeBlock :: Parser [Char] ParserState Block codeBlock = codeBlockBc <|> codeBlockPre -codeBlockBc :: GenParser Char ParserState Block +codeBlockBc :: Parser [Char] ParserState Block codeBlockBc = try $ do string "bc. " contents <- manyTill anyLine blanklines return $ CodeBlock ("",[],[]) $ unlines contents -- | Code Blocks in Textile are between <pre> and </pre> -codeBlockPre :: GenParser Char ParserState Block +codeBlockPre :: Parser [Char] ParserState Block codeBlockPre = try $ do htmlTag (tagOpen (=="pre") null) result' <- manyTill anyChar (try $ htmlTag (tagClose (=="pre")) >> blockBreak) @@ -156,7 +161,7 @@ codeBlockPre = try $ do return $ CodeBlock ("",[],[]) result''' -- | Header of the form "hN. content" with N in 1..6 -header :: GenParser Char ParserState Block +header :: Parser [Char] ParserState Block header = try $ do char 'h' level <- digitToInt <$> oneOf "123456" @@ -165,14 +170,14 @@ header = try $ do return $ Header level name -- | Blockquote of the form "bq. content" -blockQuote :: GenParser Char ParserState Block +blockQuote :: Parser [Char] ParserState Block blockQuote = try $ do string "bq" >> optional attributes >> char '.' >> whitespace BlockQuote . singleton <$> para -- Horizontal rule -hrule :: GenParser Char st Block +hrule :: Parser [Char] st Block hrule = try $ do skipSpaces start <- oneOf "-*" @@ -187,39 +192,39 @@ hrule = try $ do -- | Can be a bullet list or an ordered list. This implementation is -- strict in the nesting, sublist must start at exactly "parent depth -- plus one" -anyList :: GenParser Char ParserState Block +anyList :: Parser [Char] ParserState Block anyList = try $ ( (anyListAtDepth 1) <* blanklines ) -- | This allow one type of list to be nested into an other type, -- provided correct nesting -anyListAtDepth :: Int -> GenParser Char ParserState Block +anyListAtDepth :: Int -> Parser [Char] ParserState Block anyListAtDepth depth = choice [ bulletListAtDepth depth, orderedListAtDepth depth, definitionList ] -- | Bullet List of given depth, depth being the number of leading '*' -bulletListAtDepth :: Int -> GenParser Char ParserState Block +bulletListAtDepth :: Int -> Parser [Char] ParserState Block bulletListAtDepth depth = try $ BulletList <$> many1 (bulletListItemAtDepth depth) -- | Bullet List Item of given depth, depth being the number of -- leading '*' -bulletListItemAtDepth :: Int -> GenParser Char ParserState [Block] +bulletListItemAtDepth :: Int -> Parser [Char] ParserState [Block] bulletListItemAtDepth = genericListItemAtDepth '*' -- | Ordered List of given depth, depth being the number of -- leading '#' -orderedListAtDepth :: Int -> GenParser Char ParserState Block +orderedListAtDepth :: Int -> Parser [Char] ParserState Block orderedListAtDepth depth = try $ do items <- many1 (orderedListItemAtDepth depth) return (OrderedList (1, DefaultStyle, DefaultDelim) items) -- | Ordered List Item of given depth, depth being the number of -- leading '#' -orderedListItemAtDepth :: Int -> GenParser Char ParserState [Block] +orderedListItemAtDepth :: Int -> Parser [Char] ParserState [Block] orderedListItemAtDepth = genericListItemAtDepth '#' -- | Common implementation of list items -genericListItemAtDepth :: Char -> Int -> GenParser Char ParserState [Block] +genericListItemAtDepth :: Char -> Int -> Parser [Char] ParserState [Block] genericListItemAtDepth c depth = try $ do count depth (char c) >> optional attributes >> whitespace p <- inlines @@ -227,22 +232,22 @@ genericListItemAtDepth c depth = try $ do return ((Plain p):sublist) -- | A definition list is a set of consecutive definition items -definitionList :: GenParser Char ParserState Block +definitionList :: Parser [Char] ParserState Block definitionList = try $ DefinitionList <$> many1 definitionListItem - + -- | A definition list item in textile begins with '- ', followed by -- the term defined, then spaces and ":=". The definition follows, on -- the same single line, or spaned on multiple line, after a line -- break. -definitionListItem :: GenParser Char ParserState ([Inline], [[Block]]) +definitionListItem :: Parser [Char] ParserState ([Inline], [[Block]]) definitionListItem = try $ do string "- " term <- many1Till inline (try (whitespace >> string ":=")) - def <- inlineDef <|> multilineDef - return (term, def) - where inlineDef :: GenParser Char ParserState [[Block]] + def' <- inlineDef <|> multilineDef + return (term, def') + where inlineDef :: Parser [Char] ParserState [[Block]] inlineDef = liftM (\d -> [[Plain d]]) $ try (whitespace >> inlines) - multilineDef :: GenParser Char ParserState [[Block]] + multilineDef :: Parser [Char] ParserState [[Block]] multilineDef = try $ do optional whitespace >> newline s <- many1Till anyChar (try (string "=:" >> newline)) @@ -252,76 +257,76 @@ definitionListItem = try $ do -- | This terminates a block such as a paragraph. Because of raw html -- blocks support, we have to lookAhead for a rawHtmlBlock. -blockBreak :: GenParser Char ParserState () +blockBreak :: Parser [Char] ParserState () blockBreak = try (newline >> blanklines >> return ()) <|> (lookAhead rawHtmlBlock >> return ()) -- raw content -- | A raw Html Block, optionally followed by blanklines -rawHtmlBlock :: GenParser Char ParserState Block +rawHtmlBlock :: Parser [Char] ParserState Block rawHtmlBlock = try $ do (_,b) <- htmlTag isBlockTag optional blanklines return $ RawBlock "html" b -- | Raw block of LaTeX content -rawLaTeXBlock' :: GenParser Char ParserState Block +rawLaTeXBlock' :: Parser [Char] ParserState Block rawLaTeXBlock' = do - failIfStrict + guardEnabled Ext_raw_tex RawBlock "latex" <$> (rawLaTeXBlock <* spaces) -- | In textile, paragraphs are separated by blank lines. -para :: GenParser Char ParserState Block +para :: Parser [Char] ParserState Block para = try $ Para . normalizeSpaces <$> manyTill inline blockBreak -- Tables - + -- | A table cell spans until a pipe | -tableCell :: GenParser Char ParserState TableCell +tableCell :: Parser [Char] ParserState TableCell tableCell = do c <- many1 (noneOf "|\n") content <- parseFromString (many1 inline) c return $ [ Plain $ normalizeSpaces content ] -- | A table row is made of many table cells -tableRow :: GenParser Char ParserState [TableCell] +tableRow :: Parser [Char] ParserState [TableCell] tableRow = try $ ( char '|' *> (endBy1 tableCell (char '|')) <* newline) -- | Many table rows -tableRows :: GenParser Char ParserState [[TableCell]] +tableRows :: Parser [Char] ParserState [[TableCell]] tableRows = many1 tableRow -- | Table headers are made of cells separated by a tag "|_." -tableHeaders :: GenParser Char ParserState [TableCell] +tableHeaders :: Parser [Char] ParserState [TableCell] tableHeaders = let separator = (try $ string "|_.") in try $ ( separator *> (sepBy1 tableCell separator) <* char '|' <* newline ) - + -- | A table with an optional header. Current implementation can -- handle tables with and without header, but will parse cells -- alignment attributes as content. -table :: GenParser Char ParserState Block +table :: Parser [Char] ParserState Block table = try $ do headers <- option [] tableHeaders rows <- tableRows blanklines let nbOfCols = max (length headers) (length $ head rows) - return $ Table [] + return $ Table [] (replicate nbOfCols AlignDefault) (replicate nbOfCols 0.0) headers rows - + -- | Blocks like 'p' and 'table' do not need explicit block tag. -- However, they can be used to set HTML/CSS attributes when needed. maybeExplicitBlock :: String -- ^ block tag name - -> GenParser Char ParserState Block -- ^ implicit block - -> GenParser Char ParserState Block + -> Parser [Char] ParserState Block -- ^ implicit block + -> Parser [Char] ParserState Block maybeExplicitBlock name blk = try $ do - optional $ try $ string name >> optional attributes >> char '.' >> + optional $ try $ string name >> optional attributes >> char '.' >> ((try whitespace) <|> endline) blk @@ -333,15 +338,15 @@ maybeExplicitBlock name blk = try $ do -- | Any inline element -inline :: GenParser Char ParserState Inline +inline :: Parser [Char] ParserState Inline inline = choice inlineParsers <?> "inline" -- | List of consecutive inlines before a newline -inlines :: GenParser Char ParserState [Inline] +inlines :: Parser [Char] ParserState [Inline] inlines = manyTill inline newline -- | Inline parsers tried in order -inlineParsers :: [GenParser Char ParserState Inline] +inlineParsers :: [Parser [Char] ParserState Inline] inlineParsers = [ autoLink , str , whitespace @@ -362,7 +367,7 @@ inlineParsers = [ autoLink ] -- | Inline markups -inlineMarkup :: GenParser Char ParserState Inline +inlineMarkup :: Parser [Char] ParserState Inline inlineMarkup = choice [ simpleInline (string "??") (Cite []) , simpleInline (string "**") Strong , simpleInline (string "__") Emph @@ -375,29 +380,29 @@ inlineMarkup = choice [ simpleInline (string "??") (Cite []) ] -- | Trademark, registered, copyright -mark :: GenParser Char st Inline +mark :: Parser [Char] st Inline mark = try $ char '(' >> (try tm <|> try reg <|> copy) -reg :: GenParser Char st Inline +reg :: Parser [Char] st Inline reg = do oneOf "Rr" char ')' return $ Str "\174" -tm :: GenParser Char st Inline +tm :: Parser [Char] st Inline tm = do oneOf "Tt" oneOf "Mm" char ')' return $ Str "\8482" -copy :: GenParser Char st Inline +copy :: Parser [Char] st Inline copy = do oneOf "Cc" char ')' return $ Str "\169" -note :: GenParser Char ParserState Inline +note :: Parser [Char] ParserState Inline note = try $ do ref <- (char '[' *> many1 digit <* char ']') notes <- stateNotes <$> getState @@ -405,7 +410,7 @@ note = try $ do Nothing -> fail "note not found" Just raw -> liftM Note $ parseFromString parseBlocks raw --- | Special chars +-- | Special chars markupChars :: [Char] markupChars = "\\[]*#_@~-+^|%=" @@ -421,17 +426,17 @@ wordBoundaries :: [Char] wordBoundaries = markupChars ++ stringBreakers -- | Parse a hyphened sequence of words -hyphenedWords :: GenParser Char ParserState String +hyphenedWords :: Parser [Char] ParserState String hyphenedWords = try $ do hd <- noneOf wordBoundaries - tl <- many ( (noneOf wordBoundaries) <|> + tl <- many ( (noneOf wordBoundaries) <|> try (oneOf markupChars <* lookAhead (noneOf wordBoundaries) ) ) let wd = hd:tl - option wd $ try $ + option wd $ try $ (\r -> concat [wd, "-", r]) <$> (char '-' *> hyphenedWords) -- | Any string -str :: GenParser Char ParserState Inline +str :: Parser [Char] ParserState Inline str = do baseStr <- hyphenedWords -- RedCloth compliance : if parsed word is uppercase and immediatly @@ -444,44 +449,57 @@ str = do return $ Str fullStr -- | Textile allows HTML span infos, we discard them -htmlSpan :: GenParser Char ParserState Inline +htmlSpan :: Parser [Char] ParserState Inline htmlSpan = try $ Str <$> ( char '%' *> attributes *> manyTill anyChar (char '%') ) -- | Some number of space chars -whitespace :: GenParser Char ParserState Inline +whitespace :: Parser [Char] ParserState Inline whitespace = many1 spaceChar >> return Space <?> "whitespace" -- | In Textile, an isolated endline character is a line break -endline :: GenParser Char ParserState Inline +endline :: Parser [Char] ParserState Inline endline = try $ do newline >> notFollowedBy blankline return LineBreak -rawHtmlInline :: GenParser Char ParserState Inline +rawHtmlInline :: Parser [Char] ParserState Inline rawHtmlInline = RawInline "html" . snd <$> htmlTag isInlineTag - --- | Raw LaTeX Inline -rawLaTeXInline' :: GenParser Char ParserState Inline + +-- | Raw LaTeX Inline +rawLaTeXInline' :: Parser [Char] ParserState Inline rawLaTeXInline' = try $ do - failIfStrict + guardEnabled Ext_raw_tex rawLaTeXInline --- | Textile standard link syntax is "label":target -link :: GenParser Char ParserState Inline -link = try $ do +-- | Textile standard link syntax is "label":target. But we +-- can also have ["label":target]. +link :: Parser [Char] ParserState Inline +link = linkB <|> linkNoB + +linkNoB :: Parser [Char] ParserState Inline +linkNoB = try $ do + name <- surrounded (char '"') inline + char ':' + let stopChars = "!.,;:" + url <- manyTill nonspaceChar (lookAhead $ space <|> try (oneOf stopChars >> (space <|> newline))) + return $ Link name (url, "") + +linkB :: Parser [Char] ParserState Inline +linkB = try $ do + char '[' name <- surrounded (char '"') inline char ':' - url <- manyTill (anyChar) (lookAhead $ (space <|> try (oneOf ".;,:" >> (space <|> newline)))) + url <- manyTill nonspaceChar (char ']') return $ Link name (url, "") -- | Detect plain links to http or email. -autoLink :: GenParser Char ParserState Inline +autoLink :: Parser [Char] ParserState Inline autoLink = do (orig, src) <- (try uri <|> try emailAddress) return $ Link [Str orig] (src, "") -- | image embedding -image :: GenParser Char ParserState Inline +image :: Parser [Char] ParserState Inline image = try $ do char '!' >> notFollowedBy space src <- manyTill anyChar (lookAhead $ oneOf "!(") @@ -489,49 +507,49 @@ image = try $ do char '!' return $ Image [Str alt] (src, alt) -escapedInline :: GenParser Char ParserState Inline +escapedInline :: Parser [Char] ParserState Inline escapedInline = escapedEqs <|> escapedTag -escapedEqs :: GenParser Char ParserState Inline +escapedEqs :: Parser [Char] ParserState Inline escapedEqs = Str <$> (try $ string "==" *> manyTill anyChar (try $ string "==")) -- | literal text escaped btw <notextile> tags -escapedTag :: GenParser Char ParserState Inline +escapedTag :: Parser [Char] ParserState Inline escapedTag = Str <$> (try $ string "<notextile>" *> manyTill anyChar (try $ string "</notextile>")) -- | Any special symbol defined in wordBoundaries -symbol :: GenParser Char ParserState Inline +symbol :: Parser [Char] ParserState Inline symbol = Str . singleton <$> oneOf wordBoundaries -- | Inline code -code :: GenParser Char ParserState Inline +code :: Parser [Char] ParserState Inline code = code1 <|> code2 -code1 :: GenParser Char ParserState Inline +code1 :: Parser [Char] ParserState Inline code1 = Code nullAttr <$> surrounded (char '@') anyChar -code2 :: GenParser Char ParserState Inline +code2 :: Parser [Char] ParserState Inline code2 = do htmlTag (tagOpen (=="tt") null) Code nullAttr <$> manyTill anyChar (try $ htmlTag $ tagClose (=="tt")) -- | Html / CSS attributes -attributes :: GenParser Char ParserState String +attributes :: Parser [Char] ParserState String attributes = choice [ enclosed (char '(') (char ')') anyChar, enclosed (char '{') (char '}') anyChar, enclosed (char '[') (char ']') anyChar] -- | Parses material surrounded by a parser. -surrounded :: GenParser Char st t -- ^ surrounding parser - -> GenParser Char st a -- ^ content parser (to be used repeatedly) - -> GenParser Char st [a] +surrounded :: Parser [Char] st t -- ^ surrounding parser + -> Parser [Char] st a -- ^ content parser (to be used repeatedly) + -> Parser [Char] st [a] surrounded border = enclosed border (try border) -- | Inlines are most of the time of the same form -simpleInline :: GenParser Char ParserState t -- ^ surrounding parser +simpleInline :: Parser [Char] ParserState t -- ^ surrounding parser -> ([Inline] -> Inline) -- ^ Inline constructor - -> GenParser Char ParserState Inline -- ^ content parser (to be used repeatedly) + -> Parser [Char] ParserState Inline -- ^ content parser (to be used repeatedly) simpleInline border construct = surrounded border (inlineWithAttribute) >>= return . construct . normalizeSpaces where inlineWithAttribute = (try $ optional attributes) >> inline diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index a80ab0c63..7a21f6f3a 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -42,7 +42,7 @@ import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>)) import Data.Char (toLower, isAscii, isAlphaNum) import Codec.Compression.GZip as Gzip import qualified Data.ByteString.Lazy as L -import Text.Pandoc.Shared (findDataFile) +import Text.Pandoc.Shared (findDataFile, renderTags') import Text.Pandoc.MIME (getMimeType) import System.Directory (doesFileExist) @@ -102,14 +102,14 @@ convertTag userdata t@(TagOpen "script" as) = src -> do (raw, mime) <- getRaw userdata (fromAttrib "type" t) src let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw) - return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"]) + return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"]) convertTag userdata t@(TagOpen "link" as) = case fromAttrib "href" t of [] -> return t src -> do (raw, mime) <- getRaw userdata (fromAttrib "type" t) src let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw) - return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"]) + return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"]) convertTag _ t = return t cssURLs :: Maybe FilePath -> FilePath -> ByteString -> IO ByteString @@ -163,14 +163,3 @@ makeSelfContained userdata inp = do out' <- mapM (convertTag userdata) tags return $ renderTags' out' --- repeated from HTML reader: -renderTags' :: [Tag String] -> String -renderTags' = renderTagsOptions - renderOptions{ optMinimize = \x -> - let y = map toLower x - in y == "hr" || y == "br" || - y == "img" || y == "meta" || - y == "link" - , optRawTag = \x -> - let y = map toLower x - in y == "script" || y == "style" } diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index f14a57c1f..d86f9a390 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Shared Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -59,13 +59,8 @@ module Text.Pandoc.Shared ( uniqueIdent, isHeaderBlock, headerShift, - -- * Writer options - HTMLMathMethod (..), - CiteMethod (..), - ObfuscationMethod (..), - HTMLSlideVariant (..), - WriterOptions (..), - defaultWriterOptions, + -- * TagSoup HTML handling + renderTags', -- * File handling inDirectory, findDataFile, @@ -73,6 +68,8 @@ module Text.Pandoc.Shared ( -- * Error handling err, warn, + -- * Safe read + safeRead ) where import Text.Pandoc.Definition @@ -90,11 +87,12 @@ import Data.Generics (Typeable, Data) import qualified Control.Monad.State as S import Control.Monad (msum) import Paths_pandoc (getDataFileName) -import Text.Pandoc.Highlighting (Style, pygments) import Text.Pandoc.Pretty (charWidth) import System.Locale (defaultTimeLocale) import Data.Time import System.IO (stderr) +import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..), + renderOptions) -- -- List processing @@ -149,7 +147,7 @@ backslashEscapes = map (\ch -> (ch, ['\\',ch])) -- characters and strings. escapeStringUsing :: [(Char, String)] -> String -> String escapeStringUsing _ [] = "" -escapeStringUsing escapeTable (x:xs) = +escapeStringUsing escapeTable (x:xs) = case (lookup x escapeTable) of Just str -> str ++ rest Nothing -> x:rest @@ -176,7 +174,7 @@ stripFirstAndLast :: String -> String stripFirstAndLast str = drop 1 $ take ((length str) - 1) str --- | Change CamelCase word to hyphenated lowercase (e.g., camel-case). +-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case). camelCaseToHyphenated :: String -> String camelCaseToHyphenated [] = "" camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b = @@ -247,13 +245,13 @@ normalizeDate s = fmap (formatTime defaultTimeLocale "%F") -- | Generate infinite lazy list of markers for an ordered list, -- depending on list attributes. orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String] -orderedListMarkers (start, numstyle, numdelim) = +orderedListMarkers (start, numstyle, numdelim) = let singleton c = [c] nums = case numstyle of DefaultStyle -> map show [start..] Example -> map show [start..] Decimal -> map show [start..] - UpperAlpha -> drop (start - 1) $ cycle $ + UpperAlpha -> drop (start - 1) $ cycle $ map singleton ['A'..'Z'] LowerAlpha -> drop (start - 1) $ cycle $ map singleton ['a'..'z'] @@ -271,13 +269,12 @@ orderedListMarkers (start, numstyle, numdelim) = -- remove empty Str elements. normalizeSpaces :: [Inline] -> [Inline] normalizeSpaces = cleanup . dropWhile isSpaceOrEmpty - where cleanup [] = [] - cleanup (Space:rest) = let rest' = dropWhile isSpaceOrEmpty rest - in case rest' of - [] -> [] - _ -> Space : cleanup rest' + where cleanup [] = [] + cleanup (Space:rest) = case dropWhile isSpaceOrEmpty rest of + [] -> [] + (x:xs) -> Space : x : cleanup xs cleanup ((Str ""):rest) = cleanup rest - cleanup (x:rest) = x : cleanup rest + cleanup (x:rest) = x : cleanup rest isSpaceOrEmpty :: Inline -> Bool isSpaceOrEmpty Space = True @@ -386,7 +383,7 @@ isPara (Para _) = True isPara _ = False -- | Data structure for defining hierarchical Pandoc documents -data Element = Blk Block +data Element = Blk Block | Sec Int [Int] String [Inline] [Element] -- lvl num ident label contents deriving (Eq, Read, Show, Typeable, Data) @@ -414,7 +411,7 @@ hierarchicalizeWithIds ((Header level title'):xs) = do let ident = uniqueIdent title' usedIdents let lastnum' = take level lastnum let newnum = if length lastnum' >= level - then init lastnum' ++ [last lastnum' + 1] + then init lastnum' ++ [last lastnum' + 1] else lastnum ++ replicate (level - length lastnum - 1) 0 ++ [1] S.put (newnum, (ident : usedIdents)) let (sectionContents, rest) = break (headerLtEq level) xs @@ -456,112 +453,20 @@ headerShift n = bottomUp shift shift x = x -- --- Writer options +-- TagSoup HTML handling -- -data HTMLMathMethod = PlainMath - | LaTeXMathML (Maybe String) -- url of LaTeXMathML.js - | JsMath (Maybe String) -- url of jsMath load script - | GladTeX - | WebTeX String -- url of TeX->image script. - | MathML (Maybe String) -- url of MathMLinHTML.js - | MathJax String -- url of MathJax.js - deriving (Show, Read, Eq) - -data CiteMethod = Citeproc -- use citeproc to render them - | Natbib -- output natbib cite commands - | Biblatex -- output biblatex cite commands - deriving (Show, Read, Eq) - --- | Methods for obfuscating email addresses in HTML. -data ObfuscationMethod = NoObfuscation - | ReferenceObfuscation - | JavascriptObfuscation - deriving (Show, Read, Eq) - --- | Varieties of HTML slide shows. -data HTMLSlideVariant = S5Slides - | SlidySlides - | SlideousSlides - | DZSlides - | NoSlides - deriving (Show, Read, Eq) - --- | Options for writers -data WriterOptions = WriterOptions - { writerStandalone :: Bool -- ^ Include header and footer - , writerTemplate :: String -- ^ Template to use in standalone mode - , writerVariables :: [(String, String)] -- ^ Variables to set in template - , writerEPUBMetadata :: String -- ^ Metadata to include in EPUB - , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs - , writerTableOfContents :: Bool -- ^ Include table of contents - , writerSlideVariant :: HTMLSlideVariant -- ^ Are we writing S5, Slidy or Slideous? - , writerIncremental :: Bool -- ^ True if lists should be incremental - , writerXeTeX :: Bool -- ^ Create latex suitable for use by xetex - , writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML - , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc) - , writerNumberSections :: Bool -- ^ Number sections in LaTeX - , writerSectionDivs :: Bool -- ^ Put sections in div tags in HTML - , writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax - , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst - , writerWrapText :: Bool -- ^ Wrap text to line length - , writerColumns :: Int -- ^ Characters in a line (for text wrapping) - , writerLiterateHaskell :: Bool -- ^ Write as literate haskell - , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails - , writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML - , writerSourceDirectory :: FilePath -- ^ Directory path of 1st source file - , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory - , writerCiteMethod :: CiteMethod -- ^ How to print cites - , writerBiblioFiles :: [FilePath] -- ^ Biblio files to use for citations - , writerHtml5 :: Bool -- ^ Produce HTML5 - , writerBeamer :: Bool -- ^ Produce beamer LaTeX slide show - , writerSlideLevel :: Maybe Int -- ^ Force header level of slides - , writerChapters :: Bool -- ^ Use "chapter" for top-level sects - , writerListings :: Bool -- ^ Use listings package for code - , writerHighlight :: Bool -- ^ Highlight source code - , writerHighlightStyle :: Style -- ^ Style to use for highlighting - , writerSetextHeaders :: Bool -- ^ Use setext headers for levels 1-2 in markdown - , writerTeXLigatures :: Bool -- ^ Use tex ligatures quotes, dashes in latex - } deriving Show - -{-# DEPRECATED writerXeTeX "writerXeTeX no longer does anything" #-} --- | Default writer options. -defaultWriterOptions :: WriterOptions -defaultWriterOptions = - WriterOptions { writerStandalone = False - , writerTemplate = "" - , writerVariables = [] - , writerEPUBMetadata = "" - , writerTabStop = 4 - , writerTableOfContents = False - , writerSlideVariant = NoSlides - , writerIncremental = False - , writerXeTeX = False - , writerHTMLMathMethod = PlainMath - , writerIgnoreNotes = False - , writerNumberSections = False - , writerSectionDivs = False - , writerStrictMarkdown = False - , writerReferenceLinks = False - , writerWrapText = True - , writerColumns = 72 - , writerLiterateHaskell = False - , writerEmailObfuscation = JavascriptObfuscation - , writerIdentifierPrefix = "" - , writerSourceDirectory = "." - , writerUserDataDir = Nothing - , writerCiteMethod = Citeproc - , writerBiblioFiles = [] - , writerHtml5 = False - , writerBeamer = False - , writerSlideLevel = Nothing - , writerChapters = False - , writerListings = False - , writerHighlight = False - , writerHighlightStyle = pygments - , writerSetextHeaders = True - , writerTeXLigatures = True - } +-- | Render HTML tags. +renderTags' :: [Tag String] -> String +renderTags' = renderTagsOptions + renderOptions{ optMinimize = \x -> + let y = map toLower x + in y == "hr" || y == "br" || + y == "img" || y == "meta" || + y == "link" + , optRawTag = \x -> + let y = map toLower x + in y == "script" || y == "style" } -- -- File handling @@ -606,3 +511,15 @@ warn :: String -> IO () warn msg = do name <- getProgName UTF8.hPutStrLn stderr $ name ++ ": " ++ msg + +-- +-- Safe read +-- + +safeRead :: (Monad m, Read a) => String -> m a +safeRead s = case reads s of + (d,x):_ + | all isSpace x -> return d + _ -> fail $ "Could not read `" ++ s ++ "'" + + diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index dfdcd8e63..4f5ad54bd 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -30,7 +30,7 @@ A simple templating system with variable substitution and conditionals. Example: > renderTemplate [("name","Sam"),("salary","50,000")] $ -> "Hi, $name$. $if(salary)$You make $$$salary$.$else$No salary data.$endif$" +> "Hi, $name$. $if(salary)$You make $$$salary$.$else$No salary data.$endif$" > "Hi, John. You make $50,000." A slot for an interpolated variable is a variable name surrounded @@ -68,8 +68,8 @@ module Text.Pandoc.Templates ( renderTemplate , TemplateTarget , getDefaultTemplate ) where -import Text.ParserCombinators.Parsec -import Control.Monad (liftM, when, forM) +import Text.Parsec +import Control.Monad (liftM, when, forM, mzero) import System.FilePath import Data.List (intercalate, intersperse) #if MIN_VERSION_blaze_html(0,5,0) @@ -83,22 +83,26 @@ import Text.Pandoc.Shared (readDataFile) import qualified Control.Exception.Extensible as E (try, IOException) -- | Get default template for the specified writer. -getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first - -> String -- ^ Name of writer +getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first + -> String -- ^ Name of writer -> IO (Either E.IOException String) -getDefaultTemplate _ "native" = return $ Right "" -getDefaultTemplate _ "json" = return $ Right "" -getDefaultTemplate _ "docx" = return $ Right "" -getDefaultTemplate user "odt" = getDefaultTemplate user "opendocument" -getDefaultTemplate user "epub" = getDefaultTemplate user "html" getDefaultTemplate user writer = do - let format = takeWhile (/='+') writer -- strip off "+lhs" if present - let fname = "templates" </> "default" <.> format - E.try $ readDataFile user fname + let format = takeWhile (`notElem` "+-") writer -- strip off extensions + case format of + "native" -> return $ Right "" + "json" -> return $ Right "" + "docx" -> return $ Right "" + "epub" -> return $ Right "" + "odt" -> getDefaultTemplate user "opendocument" + "markdown_strict" -> getDefaultTemplate user "markdown" + "multimarkdown" -> getDefaultTemplate user "markdown" + "markdown_github" -> getDefaultTemplate user "markdown" + _ -> let fname = "templates" </> "default" <.> format + in E.try $ readDataFile user fname data TemplateState = TemplateState Int [(String,String)] -adjustPosition :: String -> GenParser Char TemplateState String +adjustPosition :: String -> Parsec [Char] TemplateState String adjustPosition str = do let lastline = takeWhile (/= '\n') $ reverse str updateState $ \(TemplateState pos x) -> @@ -108,18 +112,18 @@ adjustPosition str = do return str class TemplateTarget a where - toTarget :: String -> a + toTarget :: String -> a instance TemplateTarget String where toTarget = id -instance TemplateTarget ByteString where +instance TemplateTarget ByteString where toTarget = fromString instance TemplateTarget Html where toTarget = preEscapedString --- | Renders a template +-- | Renders a template renderTemplate :: TemplateTarget a => [(String,String)] -- ^ Assoc. list of values for variables -> String -- ^ Template @@ -132,21 +136,21 @@ renderTemplate vals templ = reservedWords :: [String] reservedWords = ["else","endif","for","endfor","sep"] -parseTemplate :: GenParser Char TemplateState [String] +parseTemplate :: Parsec [Char] TemplateState [String] parseTemplate = many $ (plaintext <|> escapedDollar <|> conditional <|> for <|> variable) >>= adjustPosition -plaintext :: GenParser Char TemplateState String +plaintext :: Parsec [Char] TemplateState String plaintext = many1 $ noneOf "$" -escapedDollar :: GenParser Char TemplateState String +escapedDollar :: Parsec [Char] TemplateState String escapedDollar = try $ string "$$" >> return "$" -skipEndline :: GenParser Char st () +skipEndline :: Parsec [Char] st () skipEndline = try $ skipMany (oneOf " \t") >> newline >> return () -conditional :: GenParser Char TemplateState String +conditional :: Parsec [Char] TemplateState String conditional = try $ do TemplateState pos vars <- getState string "$if(" @@ -170,7 +174,7 @@ conditional = try $ do then ifContents else elseContents -for :: GenParser Char TemplateState String +for :: Parsec [Char] TemplateState String for = try $ do TemplateState pos vars <- getState string "$for(" @@ -178,14 +182,14 @@ for = try $ do string ")$" -- if newline after the "for", then a newline after "endfor" will be swallowed multiline <- option False $ try $ skipEndline >> return True - let matches = filter (\(k,_) -> k == id') vars + let matches = filter (\(k,_) -> k == id') vars let indent = replicate pos ' ' contents <- forM matches $ \m -> do updateState $ \(TemplateState p v) -> TemplateState p (m:v) raw <- liftM concat $ lookAhead parseTemplate return $ intercalate ('\n':indent) $ lines $ raw ++ "\n" parseTemplate - sep <- option "" $ do try (string "$sep$") + sep <- option "" $ do try (string "$sep$") when multiline $ optional skipEndline liftM concat parseTemplate string "$endfor$" @@ -193,16 +197,16 @@ for = try $ do setState $ TemplateState pos vars return $ concat $ intersperse sep contents -ident :: GenParser Char TemplateState String +ident :: Parsec [Char] TemplateState String ident = do first <- letter rest <- many (alphaNum <|> oneOf "_-") let id' = first : rest if id' `elem` reservedWords - then pzero + then mzero else return id' -variable :: GenParser Char TemplateState String +variable :: Parsec [Char] TemplateState String variable = try $ do char '$' id' <- ident diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index e2959eae7..508ad30a9 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.UTF8 Copyright : Copyright (C) 2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index 1913eb92b..e314cf70e 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -40,8 +40,8 @@ module Text.Pandoc.Writers.AsciiDoc (writeAsciiDoc) where import Text.Pandoc.Definition import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Shared -import Text.Pandoc.Parsing hiding (blankline) -import Text.ParserCombinators.Parsec ( runParser, GenParser ) +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (blankline, space) import Data.List ( isPrefixOf, intersperse, intercalate ) import Text.Pandoc.Pretty import Control.Monad.State @@ -93,7 +93,7 @@ escapeString = escapeStringUsing escs where escs = backslashEscapes "{" -- | Ordered list start parser for use in Para below. -olMarker :: GenParser Char ParserState Char +olMarker :: Parser [Char] ParserState Char olMarker = do (start, style', delim) <- anyOrderedListMarker if delim == Period && (style' == UpperAlpha || (style' == UpperRoman && diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 964320eb2..df11d79cc 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -20,10 +20,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.ConTeXt Copyright : Copyright (C) 2007-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha + Stability : alpha Portability : portable Conversion of 'Pandoc' format into ConTeXt. @@ -31,6 +31,7 @@ Conversion of 'Pandoc' format into ConTeXt. module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where import Text.Pandoc.Definition import Text.Pandoc.Shared +import Text.Pandoc.Options import Text.Pandoc.Generic (queryWith) import Text.Printf ( printf ) import Data.List ( intercalate ) @@ -39,23 +40,23 @@ import Text.Pandoc.Pretty import Text.Pandoc.Templates ( renderTemplate ) import Network.URI ( isURI, unEscapeString ) -data WriterState = +data WriterState = WriterState { stNextRef :: Int -- number of next URL reference , stOrderedListLevel :: Int -- level of ordered list , stOptions :: WriterOptions -- writer options } orderedListStyles :: [[Char]] -orderedListStyles = cycle ["[n]","[a]", "[r]", "[g]"] +orderedListStyles = cycle ["[n]","[a]", "[r]", "[g]"] -- | Convert Pandoc to ConTeXt. writeConTeXt :: WriterOptions -> Pandoc -> String -writeConTeXt options document = +writeConTeXt options document = let defaultWriterState = WriterState { stNextRef = 1 , stOrderedListLevel = 0 , stOptions = options - } - in evalState (pandocToConTeXt options document) defaultWriterState + } + in evalState (pandocToConTeXt options document) defaultWriterState pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState String pandocToConTeXt options (Pandoc (Meta title authors date) blocks) = do @@ -120,7 +121,7 @@ elementToConTeXt opts (Sec level _ id' title' elements) = do return $ vcat (header' : innerContents) -- | Convert Pandoc block element to ConTeXt. -blockToConTeXt :: Block +blockToConTeXt :: Block -> State WriterState Doc blockToConTeXt Null = return empty blockToConTeXt (Plain lst) = inlineListToConTeXt lst @@ -128,7 +129,7 @@ blockToConTeXt (Para [Image txt (src,_)]) = do capt <- inlineListToConTeXt txt return $ blankline $$ "\\placefigure[here,nonumber]" <> braces capt <> braces ("\\externalfigure" <> brackets (text src)) <> blankline -blockToConTeXt (Para lst) = do +blockToConTeXt (Para lst) = do contents <- inlineListToConTeXt lst return $ contents <> blankline blockToConTeXt (BlockQuote lst) = do @@ -147,18 +148,18 @@ blockToConTeXt (OrderedList (start, style', delim) lst) = do let level = stOrderedListLevel st put $ st {stOrderedListLevel = level + 1} contents <- mapM listItemToConTeXt lst - put $ st {stOrderedListLevel = level} + put $ st {stOrderedListLevel = level} let start' = if start == 1 then "" else "start=" ++ show start let delim' = case delim of DefaultDelim -> "" - Period -> "stopper=." - OneParen -> "stopper=)" + Period -> "stopper=." + OneParen -> "stopper=)" TwoParens -> "left=(,stopper=)" - let width = maximum $ map length $ take (length contents) + let width = maximum $ map length $ take (length contents) (orderedListMarkers (start, style', delim)) let width' = (toEnum width + 1) / 2 - let width'' = if width' > (1.5 :: Double) - then "width=" ++ show width' ++ "em" + let width'' = if width' > (1.5 :: Double) + then "width=" ++ show width' ++ "em" else "" let specs2Items = filter (not . null) [start', delim', width''] let specs2 = if null specs2Items @@ -166,8 +167,8 @@ blockToConTeXt (OrderedList (start, style', delim) lst) = do else "[" ++ intercalate "," specs2Items ++ "]" let style'' = case style' of DefaultStyle -> orderedListStyles !! level - Decimal -> "[n]" - Example -> "[n]" + Decimal -> "[n]" + Example -> "[n]" LowerRoman -> "[r]" UpperRoman -> "[R]" LowerAlpha -> "[a]" @@ -182,21 +183,21 @@ blockToConTeXt HorizontalRule = return $ "\\thinrule" <> blankline blockToConTeXt (Header level lst) = sectionHeader "" level lst blockToConTeXt (Table caption aligns widths heads rows) = do let colDescriptor colWidth alignment = (case alignment of - AlignLeft -> 'l' + AlignLeft -> 'l' AlignRight -> 'r' AlignCenter -> 'c' AlignDefault -> 'l'): if colWidth == 0 then "|" else ("p(" ++ printf "%.2f" colWidth ++ "\\textwidth)|") - let colDescriptors = "|" ++ (concat $ + let colDescriptors = "|" ++ (concat $ zipWith colDescriptor widths aligns) headers <- if all null heads then return empty - else liftM ($$ "\\HL") $ tableRowToConTeXt heads - captionText <- inlineListToConTeXt caption + else liftM ($$ "\\HL") $ tableRowToConTeXt heads + captionText <- inlineListToConTeXt caption let captionText' = if null caption then text "none" else captionText - rows' <- mapM tableRowToConTeXt rows + rows' <- mapM tableRowToConTeXt rows return $ "\\placetable[here]" <> braces captionText' $$ "\\starttable" <> brackets (text colDescriptors) $$ "\\HL" $$ headers $$ @@ -230,7 +231,7 @@ inlineListToConTeXt lst = liftM hcat $ mapM inlineToConTeXt lst -- | Convert inline element to ConTeXt inlineToConTeXt :: Inline -- ^ Inline to convert -> State WriterState Doc -inlineToConTeXt (Emph lst) = do +inlineToConTeXt (Emph lst) = do contents <- inlineListToConTeXt lst return $ braces $ "\\em " <> contents inlineToConTeXt (Strong lst) = do diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 1bcf99dcf..e696fc63e 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Docbook Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -31,6 +31,7 @@ module Text.Pandoc.Writers.Docbook ( writeDocbook) where import Text.Pandoc.Definition import Text.Pandoc.XML import Text.Pandoc.Shared +import Text.Pandoc.Options import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Readers.TeXMath import Data.List ( isPrefixOf, intercalate, isSuffixOf ) @@ -47,23 +48,23 @@ authorToDocbook opts name' = let name = render Nothing $ inlinesToDocbook opts name' in if ',' `elem` name then -- last name first - let (lastname, rest) = break (==',') name + let (lastname, rest) = break (==',') name firstname = removeLeadingSpace rest in - inTagsSimple "firstname" (text $ escapeStringForXML firstname) <> - inTagsSimple "surname" (text $ escapeStringForXML lastname) + inTagsSimple "firstname" (text $ escapeStringForXML firstname) <> + inTagsSimple "surname" (text $ escapeStringForXML lastname) else -- last name last let namewords = words name - lengthname = length namewords + lengthname = length namewords (firstname, lastname) = case lengthname of - 0 -> ("","") + 0 -> ("","") 1 -> ("", name) n -> (intercalate " " (take (n-1) namewords), last namewords) - in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$ - inTagsSimple "surname" (text $ escapeStringForXML lastname) + in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$ + inTagsSimple "surname" (text $ escapeStringForXML lastname) -- | Convert Pandoc document to string in Docbook format. writeDocbook :: WriterOptions -> Pandoc -> String -writeDocbook opts (Pandoc (Meta tit auths dat) blocks) = +writeDocbook opts (Pandoc (Meta tit auths dat) blocks) = let title = inlinesToDocbook opts tit authors = map (authorToDocbook opts) auths date = inlinesToDocbook opts dat @@ -92,7 +93,7 @@ writeDocbook opts (Pandoc (Meta tit auths dat) blocks) = -- | Convert an Element to Docbook. elementToDocbook :: WriterOptions -> Int -> Element -> Doc -elementToDocbook opts _ (Blk block) = blockToDocbook opts block +elementToDocbook opts _ (Blk block) = blockToDocbook opts block elementToDocbook opts lvl (Sec _ _num id' title elements) = -- Docbook doesn't allow sections with no content, so insert some if needed let elements' = if null elements @@ -115,10 +116,10 @@ plainToPara :: Block -> Block plainToPara (Plain x) = Para x plainToPara x = x --- | Convert a list of pairs of terms and definitions into a list of +-- | Convert a list of pairs of terms and definitions into a list of -- Docbook varlistentrys. deflistItemsToDocbook :: WriterOptions -> [([Inline],[[Block]])] -> Doc -deflistItemsToDocbook opts items = +deflistItemsToDocbook opts items = vcat $ map (\(term, defs) -> deflistItemToDocbook opts term defs) items -- | Convert a term and a list of blocks into a Docbook varlistentry. @@ -144,13 +145,16 @@ blockToDocbook _ Null = empty blockToDocbook _ (Header _ _) = empty -- should not occur after hierarchicalize blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst blockToDocbook opts (Para [Image txt (src,_)]) = - let capt = inlinesToDocbook opts txt + let alt = inlinesToDocbook opts txt + capt = if null txt + then empty + else inTagsSimple "title" alt in inTagsIndented "figure" $ - inTagsSimple "title" capt $$ + capt $$ (inTagsIndented "mediaobject" $ (inTagsIndented "imageobject" (selfClosingTag "imagedata" [("fileref",src)])) $$ - inTagsSimple "textobject" (inTagsSimple "phrase" capt)) + inTagsSimple "textobject" (inTagsSimple "phrase" alt)) blockToDocbook opts (Para lst) = inTagsIndented "para" $ inlinesToDocbook opts lst blockToDocbook opts (BlockQuote blocks) = @@ -167,9 +171,9 @@ blockToDocbook _ (CodeBlock (_,classes,_) str) = then [s] else languagesByExtension . map toLower $ s langs = concatMap langsFrom classes -blockToDocbook opts (BulletList lst) = - inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst -blockToDocbook _ (OrderedList _ []) = empty +blockToDocbook opts (BulletList lst) = + inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst +blockToDocbook _ (OrderedList _ []) = empty blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = let attribs = case numstyle of DefaultStyle -> [] @@ -182,12 +186,12 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = items = if start == 1 then listItemsToDocbook opts (first:rest) else (inTags True "listitem" [("override",show start)] - (blocksToDocbook opts $ map plainToPara first)) $$ - listItemsToDocbook opts rest + (blocksToDocbook opts $ map plainToPara first)) $$ + listItemsToDocbook opts rest in inTags True "orderedlist" attribs items -blockToDocbook opts (DefinitionList lst) = - inTagsIndented "variablelist" $ deflistItemsToDocbook opts lst -blockToDocbook _ (RawBlock "docbook" str) = text str -- raw XML block +blockToDocbook opts (DefinitionList lst) = + inTagsIndented "variablelist" $ deflistItemsToDocbook opts lst +blockToDocbook _ (RawBlock "docbook" str) = text str -- raw XML block -- we allow html for compatibility with earlier versions of pandoc blockToDocbook _ (RawBlock "html" str) = text str -- raw XML block blockToDocbook _ (RawBlock _ _) = empty @@ -237,26 +241,26 @@ inlinesToDocbook opts lst = hcat $ map (inlineToDocbook opts) lst -- | Convert an inline element to Docbook. inlineToDocbook :: WriterOptions -> Inline -> Doc -inlineToDocbook _ (Str str) = text $ escapeStringForXML str -inlineToDocbook opts (Emph lst) = +inlineToDocbook _ (Str str) = text $ escapeStringForXML str +inlineToDocbook opts (Emph lst) = inTagsSimple "emphasis" $ inlinesToDocbook opts lst -inlineToDocbook opts (Strong lst) = +inlineToDocbook opts (Strong lst) = inTags False "emphasis" [("role", "strong")] $ inlinesToDocbook opts lst -inlineToDocbook opts (Strikeout lst) = +inlineToDocbook opts (Strikeout lst) = inTags False "emphasis" [("role", "strikethrough")] $ inlinesToDocbook opts lst -inlineToDocbook opts (Superscript lst) = +inlineToDocbook opts (Superscript lst) = inTagsSimple "superscript" $ inlinesToDocbook opts lst -inlineToDocbook opts (Subscript lst) = +inlineToDocbook opts (Subscript lst) = inTagsSimple "subscript" $ inlinesToDocbook opts lst -inlineToDocbook opts (SmallCaps lst) = +inlineToDocbook opts (SmallCaps lst) = inTags False "emphasis" [("role", "smallcaps")] $ inlinesToDocbook opts lst -inlineToDocbook opts (Quoted _ lst) = +inlineToDocbook opts (Quoted _ lst) = inTagsSimple "quote" $ inlinesToDocbook opts lst inlineToDocbook opts (Cite _ lst) = - inlinesToDocbook opts lst -inlineToDocbook _ (Code _ str) = + inlinesToDocbook opts lst +inlineToDocbook _ (Code _ str) = inTagsSimple "literal" $ text (escapeStringForXML str) inlineToDocbook opts (Math t str) | isMathML (writerHTMLMathMethod opts) = @@ -282,7 +286,7 @@ inlineToDocbook _ Space = space inlineToDocbook opts (Link txt (src, _)) = if isPrefixOf "mailto:" src then let src' = drop 7 src - emailLink = inTagsSimple "email" $ text $ + emailLink = inTagsSimple "email" $ text $ escapeStringForXML $ src' in case txt of [Code _ s] | s == src' -> emailLink @@ -292,14 +296,14 @@ inlineToDocbook opts (Link txt (src, _)) = then inTags False "link" [("linkend", drop 1 src)] else inTags False "ulink" [("url", src)]) $ inlinesToDocbook opts txt -inlineToDocbook _ (Image _ (src, tit)) = +inlineToDocbook _ (Image _ (src, tit)) = let titleDoc = if null tit then empty else inTagsIndented "objectinfo" $ inTagsIndented "title" (text $ escapeStringForXML tit) in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $ titleDoc $$ selfClosingTag "imagedata" [("fileref", src)] -inlineToDocbook opts (Note contents) = +inlineToDocbook opts (Note contents) = inTagsIndented "footnote" $ blocksToDocbook opts contents isMathML :: HTMLMathMethod -> Bool diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 396e7a482..05c9555c6 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -43,6 +43,7 @@ import Text.Pandoc.Generic import System.Directory import Text.Pandoc.ImageSize import Text.Pandoc.Shared hiding (Element) +import Text.Pandoc.Options import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Highlighting ( highlight ) import Text.Highlighting.Kate.Types () @@ -93,14 +94,13 @@ mknode s attrs = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) . node (unqual s) -- | Produce an Docx file from a Pandoc document. -writeDocx :: Maybe FilePath -- ^ Path specified by --reference-docx - -> WriterOptions -- ^ Writer options +writeDocx :: WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert -> IO B.ByteString -writeDocx mbRefDocx opts doc@(Pandoc (Meta tit auths date) _) = do +writeDocx opts doc@(Pandoc (Meta tit auths date) _) = do let datadir = writerUserDataDir opts refArchive <- liftM toArchive $ - case mbRefDocx of + case writerReferenceDocx opts of Just f -> B.readFile f Nothing -> do let defaultDocx = getDataFileName "reference.docx" >>= B.readFile @@ -543,7 +543,7 @@ inlineToOpenXML opts (SmallCaps lst) = inlineToOpenXML opts (Strikeout lst) = withTextProp (mknode "w:strike" [] ()) $ inlinesToOpenXML opts lst -inlineToOpenXML _ LineBreak = return [ mknode "w:br" [] () ] +inlineToOpenXML _ LineBreak = return [br] inlineToOpenXML _ (RawInline f str) | f == "openxml" = return [ x | Elem x <- parseXML str ] | otherwise = return [] @@ -562,16 +562,14 @@ inlineToOpenXML opts (Math DisplayMath str) = Left _ -> do fallback <- inlinesToOpenXML opts (readTeXMath str) return $ [br] ++ fallback ++ [br] - where br = mknode "w:br" [] () inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst inlineToOpenXML _ (Code attrs str) = withTextProp (rStyle "VerbatimChar") $ case highlight formatOpenXML attrs str of - Nothing -> intercalate [mknode "w:br" [] ()] + Nothing -> intercalate [br] `fmap` (mapM formattedString $ lines str) Just h -> return h - where formatOpenXML _fmtOpts = intercalate [mknode "w:br" [] ()] . - map (map toHlTok) + where formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) toHlTok (toktype,tok) = mknode "w:r" [] [ mknode "w:rPr" [] [ rStyle $ show toktype ] @@ -669,3 +667,6 @@ inlineToOpenXML opts (Image alt (src, tit)) = do liftIO $ UTF8.hPutStrLn stderr $ "Could not find image `" ++ src ++ "', skipping..." inlinesToOpenXML opts alt + +br :: Element +br = mknode "w:r" [] [mknode "w:cr" [] () ] diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index b423f136f..46310e398 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -38,6 +38,7 @@ import Data.ByteString.Lazy.UTF8 ( fromString ) import Codec.Archive.Zip import Data.Time.Clock.POSIX import Text.Pandoc.Shared hiding ( Element ) +import Text.Pandoc.Options import Text.Pandoc.Definition import Text.Pandoc.Generic import Control.Monad.State @@ -52,12 +53,10 @@ import Prelude hiding (catch) import Control.Exception (catch, SomeException) -- | Produce an EPUB file from a Pandoc document. -writeEPUB :: Maybe String -- ^ EPUB stylesheet specified at command line - -> [FilePath] -- ^ Paths to fonts to embed - -> WriterOptions -- ^ Writer options +writeEPUB :: WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert -> IO B.ByteString -writeEPUB mbStylesheet fonts opts doc@(Pandoc meta _) = do +writeEPUB opts doc@(Pandoc meta _) = do epochtime <- floor `fmap` getPOSIXTime let mkEntry path content = toEntry path epochtime content let opts' = opts{ writerEmailObfuscation = NoObfuscation @@ -107,7 +106,7 @@ writeEPUB mbStylesheet fonts opts doc@(Pandoc meta _) = do -- handle fonts let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f - fontEntries <- mapM mkFontEntry fonts + fontEntries <- mapM mkFontEntry $ writerEpubFonts opts -- body pages let isH1 (Header 1 _) = True @@ -232,7 +231,7 @@ writeEPUB mbStylesheet fonts opts doc@(Pandoc meta _) = do let appleEntry = mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple -- stylesheet - stylesheet <- case mbStylesheet of + stylesheet <- case writerEpubStylesheet opts of Just s -> return s Nothing -> readDataFile (writerUserDataDir opts) "epub.css" let stylesheetEntry = mkEntry "stylesheet.css" $ fromString stylesheet @@ -249,12 +248,14 @@ metadataElement metadataXML uuid lang title authors date mbCoverImage = let userNodes = parseXML metadataXML elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ - filter isDublinCoreElement $ onlyElems userNodes + filter isMetadataElement $ onlyElems userNodes dublinElements = ["contributor","coverage","creator","date", "description","format","identifier","language","publisher", "relation","rights","source","subject","title","type"] - isDublinCoreElement e = qPrefix (elName e) == Just "dc" && - qName (elName e) `elem` dublinElements + isMetadataElement e = (qPrefix (elName e) == Just "dc" && + qName (elName e) `elem` dublinElements) || + (qPrefix (elName e) == Nothing && + qName (elName e) `elem` ["link","meta"]) contains e n = not (null (findElements (QName n Nothing (Just "dc")) e)) newNodes = [ unode "dc:title" title | not (elt `contains` "title") ] ++ [ unode "dc:language" lang | not (elt `contains` "language") ] ++ @@ -288,10 +289,8 @@ transformInlines _ sourceDir picsRef (Image lab (src,tit) : xs) = do transformInlines (MathML _) _ _ (x@(Math _ _) : xs) = do let writeHtmlInline opts z = removeTrailingSpace $ writeHtmlString opts $ Pandoc (Meta [] [] []) [Plain [z]] - mathml = writeHtmlInline defaultWriterOptions{ - writerHTMLMathMethod = MathML Nothing } x - fallback = writeHtmlInline defaultWriterOptions{ - writerHTMLMathMethod = PlainMath } x + mathml = writeHtmlInline def{writerHTMLMathMethod = MathML Nothing } x + fallback = writeHtmlInline def{writerHTMLMathMethod = PlainMath } x inOps = "<ops:switch xmlns:ops=\"http://www.idpf.org/2007/ops\">" ++ "<ops:case required-namespace=\"http://www.w3.org/1998/Math/MathML\">" ++ mathml ++ "</ops:case><ops:default>" ++ fallback ++ "</ops:default>" ++ @@ -312,9 +311,9 @@ ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . unEntity ('&':'#':xs) = let (ds,ys) = break (==';') xs rest = drop 1 ys - in case reads ('\'':'\\':ds ++ "'") of - ((x,_):_) -> x : unEntity rest - _ -> '&':'#':unEntity xs + in case safeRead ('\'':'\\':ds ++ "'") of + Just x -> x : unEntity rest + Nothing -> '&':'#':unEntity xs unEntity (x:xs) = x : unEntity xs imageTypeOf :: FilePath -> Maybe String diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs new file mode 100644 index 000000000..301d80c54 --- /dev/null +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -0,0 +1,616 @@ +{- +Copyright (c) 2011-2012, Sergey Astanin +All rights reserved. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | Conversion of 'Pandoc' documents to FB2 (FictionBook2) format. + +FictionBook is an XML-based e-book format. For more information see: +<http://www.fictionbook.org/index.php/Eng:XML_Schema_Fictionbook_2.1> + +-} +module Text.Pandoc.Writers.FB2 (writeFB2) where + +import Control.Monad.State (StateT, evalStateT, get, modify) +import Control.Monad.State (liftM, liftM2, liftIO) +import Data.ByteString.Base64 (encode) +import Data.Char (toUpper, toLower, isSpace, isAscii, isControl) +import Data.List (intersperse, intercalate, isPrefixOf) +import Data.Either (lefts, rights) +import Network.Browser (browse, request, setAllowRedirects, setOutHandler) +import Network.HTTP (catchIO_, getRequest, getHeaders, getResponseBody) +import Network.HTTP (lookupHeader, HeaderName(..), urlEncode) +import Network.URI (isURI, unEscapeString) +import System.FilePath (takeExtension) +import Text.XML.Light +import qualified Control.Exception as E +import qualified Data.ByteString as B +import qualified Text.XML.Light as X +import qualified Text.XML.Light.Cursor as XC + +import Text.Pandoc.Definition +import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def) +import Text.Pandoc.Shared (orderedListMarkers) +import Text.Pandoc.Generic (bottomUp) + +-- | Data to be written at the end of the document: +-- (foot)notes, URLs, references, images. +data FbRenderState = FbRenderState + { footnotes :: [ (Int, String, [Content]) ] -- ^ #, ID, text + , imagesToFetch :: [ (String, String) ] -- ^ filename, URL or path + , parentListMarker :: String -- ^ list marker of the parent ordered list + , parentBulletLevel :: Int -- ^ nesting level of the unordered list + , writerOptions :: WriterOptions + } deriving (Show) + +-- | FictionBook building monad. +type FBM = StateT FbRenderState IO + +newFB :: FbRenderState +newFB = FbRenderState { footnotes = [], imagesToFetch = [] + , parentListMarker = "", parentBulletLevel = 0 + , writerOptions = def } + +data ImageMode = NormalImage | InlineImage deriving (Eq) +instance Show ImageMode where + show NormalImage = "imageType" + show InlineImage = "inlineImageType" + +-- | Produce an FB2 document from a 'Pandoc' document. +writeFB2 :: WriterOptions -- ^ conversion options + -> Pandoc -- ^ document to convert + -> IO String -- ^ FictionBook2 document (not encoded yet) +writeFB2 opts (Pandoc meta blocks) = flip evalStateT newFB $ do + modify (\s -> s { writerOptions = opts { writerStandalone = True } }) + desc <- description meta + fp <- frontpage meta + secs <- renderSections 1 blocks + let body = el "body" $ fp ++ secs + notes <- renderFootnotes + (imgs,missing) <- liftM imagesToFetch get >>= \s -> liftIO (fetchImages s) + let body' = replaceImagesWithAlt missing body + let fb2_xml = el "FictionBook" (fb2_attrs, [desc, body'] ++ notes ++ imgs) + return $ xml_head ++ (showContent fb2_xml) + where + xml_head = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + fb2_attrs = + let xmlns = "http://www.gribuser.ru/xml/fictionbook/2.0" + xlink = "http://www.w3.org/1999/xlink" + in [ uattr "xmlns" xmlns + , attr ("xmlns", "l") xlink ] + -- + frontpage :: Meta -> FBM [Content] + frontpage meta' = do + t <- cMapM toXml . docTitle $ meta' + return $ + [ el "title" (el "p" t) + , el "annotation" (map (el "p" . cMap plain) + (docAuthors meta' ++ [docDate meta'])) + ] + description :: Meta -> FBM Content + description meta' = do + bt <- booktitle meta' + let as = authors meta' + dd <- docdate meta' + return $ el "description" + [ el "title-info" (bt ++ as ++ dd) + , el "document-info" [ el "program-used" "pandoc" ] -- FIXME: +version + ] + booktitle :: Meta -> FBM [Content] + booktitle meta' = do + t <- cMapM toXml . docTitle $ meta' + return $ if null t + then [] + else [ el "book-title" t ] + authors :: Meta -> [Content] + authors meta' = cMap author (docAuthors meta') + author :: [Inline] -> [Content] + author ss = + let ws = words . cMap plain $ ss + email = (el "email") `fmap` (take 1 $ filter ('@' `elem`) ws) + ws' = filter ('@' `notElem`) ws + names = case ws' of + (nickname:[]) -> [ el "nickname" nickname ] + (fname:lname:[]) -> [ el "first-name" fname + , el "last-name" lname ] + (fname:rest) -> [ el "first-name" fname + , el "middle-name" (concat . init $ rest) + , el "last-name" (last rest) ] + ([]) -> [] + in list $ el "author" (names ++ email) + docdate :: Meta -> FBM [Content] + docdate meta' = do + let ss = docDate meta' + d <- cMapM toXml ss + return $ if null d + then [] + else [el "date" d] + +-- | Divide the stream of blocks into sections and convert to XML +-- representation. +renderSections :: Int -> [Block] -> FBM [Content] +renderSections level blocks = do + let secs = splitSections level blocks + mapM (renderSection level) secs + +renderSection :: Int -> ([Inline], [Block]) -> FBM Content +renderSection level (ttl, body) = do + title <- if null ttl + then return [] + else return . list . el "title" . formatTitle $ ttl + content <- if (hasSubsections body) + then renderSections (level + 1) body + else cMapM blockToXml body + return $ el "section" (title ++ content) + where + hasSubsections = any isHeader + isHeader (Header _ _) = True + isHeader _ = False + +-- | Only <p> and <empty-line> are allowed within <title> in FB2. +formatTitle :: [Inline] -> [Content] +formatTitle inlines = + let lns = split isLineBreak inlines + lns' = map (el "p" . cMap plain) lns + in intersperse (el "empty-line" ()) lns' + +split :: (a -> Bool) -> [a] -> [[a]] +split _ [] = [] +split cond xs = let (b,a) = break cond xs + in (b:split cond (drop 1 a)) + +isLineBreak :: Inline -> Bool +isLineBreak LineBreak = True +isLineBreak _ = False + +-- | Divide the stream of block elements into sections: [(title, blocks)]. +splitSections :: Int -> [Block] -> [([Inline], [Block])] +splitSections level blocks = reverse $ revSplit (reverse blocks) + where + revSplit [] = [] + revSplit rblocks = + let (lastsec, before) = break sameLevel rblocks + (header, prevblocks) = + case before of + ((Header n title):prevblocks') -> + if n == level + then (title, prevblocks') + else ([], before) + _ -> ([], before) + in (header, reverse lastsec) : revSplit prevblocks + sameLevel (Header n _) = n == level + sameLevel _ = False + +-- | Make another FictionBook body with footnotes. +renderFootnotes :: FBM [Content] +renderFootnotes = do + fns <- footnotes `liftM` get + if null fns + then return [] -- no footnotes + else return . list $ + el "body" ([uattr "name" "notes"], map renderFN (reverse fns)) + where + renderFN (n, idstr, cs) = + let fn_texts = (el "title" (el "p" (show n))) : cs + in el "section" ([uattr "id" idstr], fn_texts) + +-- | Fetch images and encode them for the FictionBook XML. +-- Return image data and a list of hrefs of the missing images. +fetchImages :: [(String,String)] -> IO ([Content],[String]) +fetchImages links = do + imgs <- mapM (uncurry fetchImage) links + return $ (rights imgs, lefts imgs) + +-- | Fetch image data from disk or from network and make a <binary> XML section. +-- Return either (Left hrefOfMissingImage) or (Right xmlContent). +fetchImage :: String -> String -> IO (Either String Content) +fetchImage href link = do + mbimg <- + case (isURI link, readDataURI link) of + (True, Just (mime,_,True,base64)) -> + let mime' = map toLower mime + in if mime' == "image/png" || mime' == "image/jpeg" + then return (Just (mime',base64)) + else return Nothing + (True, Just _) -> return Nothing -- not base64-encoded + (True, Nothing) -> fetchURL link + (False, _) -> do + d <- nothingOnError $ B.readFile (unEscapeString link) + let t = case map toLower (takeExtension link) of + ".png" -> Just "image/png" + ".jpg" -> Just "image/jpeg" + ".jpeg" -> Just "image/jpeg" + ".jpe" -> Just "image/jpeg" + _ -> Nothing -- only PNG and JPEG are supported in FB2 + return $ liftM2 (,) t (liftM (toStr . encode) d) + case mbimg of + Just (imgtype, imgdata) -> do + return . Right $ el "binary" + ( [uattr "id" href + , uattr "content-type" imgtype] + , txt imgdata ) + _ -> return (Left ('#':href)) + where + nothingOnError :: (IO B.ByteString) -> (IO (Maybe B.ByteString)) + nothingOnError action = liftM Just action `E.catch` omnihandler + omnihandler :: E.SomeException -> IO (Maybe B.ByteString) + omnihandler _ = return Nothing + +-- | Extract mime type and encoded data from the Data URI. +readDataURI :: String -- ^ URI + -> Maybe (String,String,Bool,String) + -- ^ Maybe (mime,charset,isBase64,data) +readDataURI uri = + let prefix = "data:" + in if not (prefix `isPrefixOf` uri) + then Nothing + else + let rest = drop (length prefix) uri + meta = takeWhile (/= ',') rest -- without trailing ',' + uridata = drop (length meta + 1) rest + parts = split (== ';') meta + (mime,cs,enc)=foldr upd ("text/plain","US-ASCII",False) parts + in Just (mime,cs,enc,uridata) + where + upd str m@(mime,cs,enc) + | isMimeType str = (str,cs,enc) + | "charset=" `isPrefixOf` str = (mime,drop (length "charset=") str,enc) + | str == "base64" = (mime,cs,True) + | otherwise = m + +-- Without parameters like ;charset=...; see RFC 2045, 5.1 +isMimeType :: String -> Bool +isMimeType s = + case split (=='/') s of + [mtype,msubtype] -> + ((map toLower mtype) `elem` types + || "x-" `isPrefixOf` (map toLower mtype)) + && all valid mtype + && all valid msubtype + _ -> False + where + types = ["text","image","audio","video","application","message","multipart"] + valid c = isAscii c && not (isControl c) && not (isSpace c) && + c `notElem` "()<>@,;:\\\"/[]?=" + +-- | Fetch URL, return its Content-Type and binary data on success. +fetchURL :: String -> IO (Maybe (String, String)) +fetchURL url = do + flip catchIO_ (return Nothing) $ do + r <- browse $ do + setOutHandler (const (return ())) + setAllowRedirects True + liftM snd . request . getRequest $ url + let content_type = lookupHeader HdrContentType (getHeaders r) + content <- liftM (Just . toStr . encode . toBS) . getResponseBody $ Right r + return $ liftM2 (,) content_type content + where + +toBS :: String -> B.ByteString +toBS = B.pack . map (toEnum . fromEnum) + +toStr :: B.ByteString -> String +toStr = map (toEnum . fromEnum) . B.unpack + +footnoteID :: Int -> String +footnoteID i = "n" ++ (show i) + +linkID :: Int -> String +linkID i = "l" ++ (show i) + +-- | Convert a block-level Pandoc's element to FictionBook XML representation. +blockToXml :: Block -> FBM [Content] +blockToXml (Plain ss) = cMapM toXml ss -- FIXME: can lead to malformed FB2 +blockToXml (Para [Math DisplayMath formula]) = insertMath NormalImage formula +blockToXml (Para [img@(Image _ _)]) = insertImage NormalImage img +blockToXml (Para ss) = liftM (list . el "p") $ cMapM toXml ss +blockToXml (CodeBlock _ s) = return . spaceBeforeAfter . + map (el "p" . el "code") . lines $ s +blockToXml (RawBlock _ s) = return . spaceBeforeAfter . + map (el "p" . el "code") . lines $ s +blockToXml (BlockQuote bs) = liftM (list . el "cite") $ cMapM blockToXml bs +blockToXml (OrderedList a bss) = do + state <- get + let pmrk = parentListMarker state + let markers = map ((pmrk ++ " ") ++) $ orderedListMarkers a + let mkitem mrk bs = do + modify (\s -> s { parentListMarker = mrk }) + itemtext <- cMapM blockToXml . paraToPlain $ bs + modify (\s -> s { parentListMarker = pmrk }) -- old parent marker + return . el "p" $ [ txt mrk, txt " " ] ++ itemtext + mapM (uncurry mkitem) (zip markers bss) +blockToXml (BulletList bss) = do + state <- get + let level = parentBulletLevel state + let pmrk = parentListMarker state + let prefix = replicate (length pmrk) ' ' + let bullets = ["\x2022", "\x25e6", "*", "\x2043", "\x2023"] + let mrk = prefix ++ bullets !! (level `mod` (length bullets)) + let mkitem bs = do + modify (\s -> s { parentBulletLevel = (level+1) }) + itemtext <- cMapM blockToXml . paraToPlain $ bs + modify (\s -> s { parentBulletLevel = level }) -- restore bullet level + return $ el "p" $ [ txt (mrk ++ " ") ] ++ itemtext + mapM mkitem bss +blockToXml (DefinitionList defs) = + cMapM mkdef defs + where + mkdef (term, bss) = do + def' <- cMapM (cMapM blockToXml . sep . paraToPlain . map indent) bss + t <- wrap "strong" term + return [ el "p" t, el "p" def' ] + sep blocks = + if all needsBreak blocks then + blocks ++ [Plain [LineBreak]] + else + blocks + needsBreak (Para _) = False + needsBreak (Plain ins) = LineBreak `notElem` ins + needsBreak _ = True +blockToXml (Header _ _) = -- should never happen, see renderSections + error "unexpected header in section text" +blockToXml HorizontalRule = return + [ el "empty-line" () + , el "p" (txt (replicate 10 '—')) + , el "empty-line" () ] +blockToXml (Table caption aligns _ headers rows) = do + hd <- mkrow "th" headers aligns + bd <- mapM (\r -> mkrow "td" r aligns) rows + c <- return . el "emphasis" =<< cMapM toXml caption + return [el "table" (hd : bd), el "p" c] + where + mkrow :: String -> [TableCell] -> [Alignment] -> FBM Content + mkrow tag cells aligns' = + (el "tr") `liftM` (mapM (mkcell tag) (zip cells aligns')) + -- + mkcell :: String -> (TableCell, Alignment) -> FBM Content + mkcell tag (cell, align) = do + cblocks <- cMapM blockToXml cell + return $ el tag ([align_attr align], cblocks) + -- + align_attr a = Attr (QName "align" Nothing Nothing) (align_str a) + align_str AlignLeft = "left" + align_str AlignCenter = "center" + align_str AlignRight = "right" + align_str AlignDefault = "left" +blockToXml Null = return [] + +-- Replace paragraphs with plain text and line break. +-- Necessary to simulate multi-paragraph lists in FB2. +paraToPlain :: [Block] -> [Block] +paraToPlain [] = [] +paraToPlain (Para inlines : rest) = + let p = (Plain (inlines ++ [LineBreak])) + in p : paraToPlain rest +paraToPlain (p:rest) = p : paraToPlain rest + +-- Simulate increased indentation level. Will not really work +-- for multi-line paragraphs. +indent :: Block -> Block +indent = indentBlock + where + -- indentation space + spacer :: String + spacer = replicate 4 ' ' + -- + indentBlock (Plain ins) = Plain ((Str spacer):ins) + indentBlock (Para ins) = Para ((Str spacer):ins) + indentBlock (CodeBlock a s) = + let s' = unlines . map (spacer++) . lines $ s + in CodeBlock a s' + indentBlock (BlockQuote bs) = BlockQuote (map indent bs) + indentBlock (Header l ins) = Header l (indentLines ins) + indentBlock everythingElse = everythingElse + -- indent every (explicit) line + indentLines :: [Inline] -> [Inline] + indentLines ins = let lns = split isLineBreak ins :: [[Inline]] + in intercalate [LineBreak] $ map ((Str spacer):) lns + +-- | Convert a Pandoc's Inline element to FictionBook XML representation. +toXml :: Inline -> FBM [Content] +toXml (Str s) = return [txt s] +toXml (Emph ss) = list `liftM` wrap "emphasis" ss +toXml (Strong ss) = list `liftM` wrap "strong" ss +toXml (Strikeout ss) = list `liftM` wrap "strikethrough" ss +toXml (Superscript ss) = list `liftM` wrap "sup" ss +toXml (Subscript ss) = list `liftM` wrap "sub" ss +toXml (SmallCaps ss) = cMapM toXml $ bottomUp (map toUpper) ss +toXml (Quoted SingleQuote ss) = do -- FIXME: should be language-specific + inner <- cMapM toXml ss + return $ [txt "‘"] ++ inner ++ [txt "’"] +toXml (Quoted DoubleQuote ss) = do + inner <- cMapM toXml ss + return $ [txt "“"] ++ inner ++ [txt "”"] +toXml (Cite _ ss) = cMapM toXml ss -- FIXME: support citation styles +toXml (Code _ s) = return [el "code" s] +toXml Space = return [txt " "] +toXml LineBreak = return [el "empty-line" ()] +toXml (Math _ formula) = insertMath InlineImage formula +toXml (RawInline _ _) = return [] -- raw TeX and raw HTML are suppressed +toXml (Link text (url,ttl)) = do + fns <- footnotes `liftM` get + let n = 1 + length fns + let ln_id = linkID n + let ln_ref = list . el "sup" . txt $ "[" ++ show n ++ "]" + ln_text <- cMapM toXml text + let ln_desc = + let ttl' = dropWhile isSpace ttl + in if null ttl' + then list . el "p" $ el "code" url + else list . el "p" $ [ txt (ttl' ++ ": "), el "code" url ] + modify (\s -> s { footnotes = (n, ln_id, ln_desc) : fns }) + return $ ln_text ++ + [ el "a" + ( [ attr ("l","href") ('#':ln_id) + , uattr "type" "note" ] + , ln_ref) ] +toXml img@(Image _ _) = insertImage InlineImage img +toXml (Note bs) = do + fns <- footnotes `liftM` get + let n = 1 + length fns + let fn_id = footnoteID n + fn_desc <- cMapM blockToXml bs + modify (\s -> s { footnotes = (n, fn_id, fn_desc) : fns }) + let fn_ref = el "sup" . txt $ "[" ++ show n ++ "]" + return . list $ el "a" ( [ attr ("l","href") ('#':fn_id) + , uattr "type" "note" ] + , fn_ref ) + +insertMath :: ImageMode -> String -> FBM [Content] +insertMath immode formula = do + htmlMath <- return . writerHTMLMathMethod . writerOptions =<< get + case htmlMath of + WebTeX url -> do + let alt = [Code nullAttr formula] + let imgurl = url ++ urlEncode formula + let img = Image alt (imgurl, "") + insertImage immode img + _ -> return [el "code" formula] + +insertImage :: ImageMode -> Inline -> FBM [Content] +insertImage immode (Image alt (url,ttl)) = do + images <- imagesToFetch `liftM` get + let n = 1 + length images + let fname = "image" ++ show n + modify (\s -> s { imagesToFetch = (fname, url) : images }) + let ttlattr = case (immode, null ttl) of + (NormalImage, False) -> [ uattr "title" ttl ] + _ -> [] + return . list $ + el "image" $ + [ attr ("l","href") ('#':fname) + , attr ("l","type") (show immode) + , uattr "alt" (cMap plain alt) ] + ++ ttlattr +insertImage _ _ = error "unexpected inline instead of image" + +replaceImagesWithAlt :: [String] -> Content -> Content +replaceImagesWithAlt missingHrefs body = + let cur = XC.fromContent body + cur' = replaceAll cur + in XC.toTree . XC.root $ cur' + where + -- + replaceAll :: XC.Cursor -> XC.Cursor + replaceAll c = + let n = XC.current c + c' = if isImage n && isMissing n + then XC.modifyContent replaceNode c + else c + in case XC.nextDF c' of + (Just cnext) -> replaceAll cnext + Nothing -> c' -- end of document + -- + isImage :: Content -> Bool + isImage (Elem e) = (elName e) == (uname "image") + isImage _ = False + -- + isMissing (Elem img@(Element _ _ _ _)) = + let imgAttrs = elAttribs img + badAttrs = map (attr ("l","href")) missingHrefs + in any (`elem` imgAttrs) badAttrs + isMissing _ = False + -- + replaceNode :: Content -> Content + replaceNode n@(Elem img@(Element _ _ _ _)) = + let attrs = elAttribs img + alt = getAttrVal attrs (uname "alt") + imtype = getAttrVal attrs (qname "l" "type") + in case (alt, imtype) of + (Just alt', Just imtype') -> + if imtype' == show NormalImage + then el "p" alt' + else txt alt' + (Just alt', Nothing) -> txt alt' -- no type attribute + _ -> n -- don't replace if alt text is not found + replaceNode n = n + -- + getAttrVal :: [X.Attr] -> QName -> Maybe String + getAttrVal attrs name = + case filter ((name ==) . attrKey) attrs of + (a:_) -> Just (attrVal a) + _ -> Nothing + + +-- | Wrap all inlines with an XML tag (given its unqualified name). +wrap :: String -> [Inline] -> FBM Content +wrap tagname inlines = el tagname `liftM` cMapM toXml inlines + +-- " Create a singleton list. +list :: a -> [a] +list = (:[]) + +-- | Convert an 'Inline' to plaintext. +plain :: Inline -> String +plain (Str s) = s +plain (Emph ss) = concat (map plain ss) +plain (Strong ss) = concat (map plain ss) +plain (Strikeout ss) = concat (map plain ss) +plain (Superscript ss) = concat (map plain ss) +plain (Subscript ss) = concat (map plain ss) +plain (SmallCaps ss) = concat (map plain ss) +plain (Quoted _ ss) = concat (map plain ss) +plain (Cite _ ss) = concat (map plain ss) -- FIXME +plain (Code _ s) = s +plain Space = " " +plain LineBreak = "\n" +plain (Math _ s) = s +plain (RawInline _ s) = s +plain (Link text (url,_)) = concat (map plain text ++ [" <", url, ">"]) +plain (Image alt _) = concat (map plain alt) +plain (Note _) = "" -- FIXME + +-- | Create an XML element. +el :: (Node t) + => String -- ^ unqualified element name + -> t -- ^ node contents + -> Content -- ^ XML content +el name cs = Elem $ unode name cs + +-- | Put empty lines around content +spaceBeforeAfter :: [Content] -> [Content] +spaceBeforeAfter cs = + let emptyline = el "empty-line" () + in [emptyline] ++ cs ++ [emptyline] + +-- | Create a plain-text XML content. +txt :: String -> Content +txt s = Text $ CData CDataText s Nothing + +-- | Create an XML attribute with an unqualified name. +uattr :: String -> String -> Text.XML.Light.Attr +uattr name val = Attr (uname name) val + +-- | Create an XML attribute with a qualified name from given namespace. +attr :: (String, String) -> String -> Text.XML.Light.Attr +attr (ns, name) val = Attr (qname ns name) val + +-- | Unqualified name +uname :: String -> QName +uname name = QName name Nothing Nothing + +-- | Qualified name +qname :: String -> String -> QName +qname ns name = QName name Nothing (Just ns) + +-- | Abbreviation for 'concatMap'. +cMap :: (a -> [b]) -> [a] -> [b] +cMap = concatMap + +-- | Monadic equivalent of 'concatMap'. +cMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b] +cMapM f xs = concat `liftM` mapM f xs
\ No newline at end of file diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index b8474ee3f..c6c4a8fd7 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -32,6 +32,7 @@ Conversion of 'Pandoc' documents to HTML. module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where import Text.Pandoc.Definition import Text.Pandoc.Shared +import Text.Pandoc.Options import Text.Pandoc.Templates import Text.Pandoc.Generic import Text.Pandoc.Readers.TeXMath @@ -272,7 +273,7 @@ elementToHtml slideLevel opts (Sec level num id' title' elements) = do -- title slides have no content of their own then filter isSec elements else elements - let header'' = if (writerStrictMarkdown opts || writerSectionDivs opts || + let header'' = if (writerSectionDivs opts || writerSlideVariant opts == S5Slides || slide) then header' else header' ! prefixedId opts id' @@ -378,13 +379,17 @@ blockToHtml _ Null = return mempty blockToHtml opts (Plain lst) = inlineListToHtml opts lst blockToHtml opts (Para [Image txt (s,tit)]) = do img <- inlineToHtml opts (Image txt (s,tit)) - capt <- inlineListToHtml opts txt + let tocapt = if writerHtml5 opts + then H5.figcaption + else H.p ! A.class_ "caption" + capt <- if null txt + then return mempty + else tocapt `fmap` inlineListToHtml opts txt return $ if writerHtml5 opts then H5.figure $ mconcat - [nl opts, img, H5.figcaption capt, nl opts] + [nl opts, img, capt, nl opts] else H.div ! A.class_ "figure" $ mconcat - [nl opts, img, H.p ! A.class_ "caption" $ capt, - nl opts] + [nl opts, img, capt, nl opts] blockToHtml opts (Para lst) = do contents <- inlineListToHtml opts lst return $ H.p contents @@ -392,7 +397,7 @@ blockToHtml _ (RawBlock "html" str) = return $ preEscapedString str blockToHtml _ (RawBlock _ _) = return mempty blockToHtml opts (HorizontalRule) = return $ if writerHtml5 opts then H5.hr else H.hr blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do - let tolhs = writerLiterateHaskell opts && + let tolhs = isEnabled Ext_literate_haskell opts && any (\c -> map toLower c == "haskell") classes && any (\c -> map toLower c == "literate") classes classes' = if tolhs @@ -618,7 +623,7 @@ inlineToHtml opts inline = ! A.src (toValue $ url ++ urlEncode str) ! A.alt (toValue str) ! A.title (toValue str) - let brtag = if writerHtml5 opts then H5.br else H.br + let brtag = if writerHtml5 opts then H5.br else H.br return $ case t of InlineMath -> m DisplayMath -> brtag >> m >> brtag @@ -638,7 +643,7 @@ inlineToHtml opts inline = Left _ -> inlineListToHtml opts (readTeXMath str) >>= return . (H.span ! A.class_ "math") - MathJax _ -> return $ toHtml $ + MathJax _ -> return $ H.span ! A.class_ "math" $ toHtml $ case t of InlineMath -> "\\(" ++ str ++ "\\)" DisplayMath -> "\\[" ++ str ++ "\\]" diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 7beee2d42..abbbd4d01 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -32,6 +32,7 @@ module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.Shared +import Text.Pandoc.Options import Text.Pandoc.Templates import Text.Printf ( printf ) import Network.URI ( isAbsoluteURI, unEscapeString ) @@ -265,10 +266,12 @@ blockToLaTeX :: Block -- ^ Block to convert blockToLaTeX Null = return empty blockToLaTeX (Plain lst) = inlineListToLaTeX lst blockToLaTeX (Para [Image txt (src,tit)]) = do - capt <- inlineListToLaTeX txt + capt <- if null txt + then return empty + else (\c -> "\\caption" <> braces c) `fmap` inlineListToLaTeX txt img <- inlineToLaTeX (Image txt (src,tit)) return $ "\\begin{figure}[htbp]" $$ "\\centering" $$ img $$ - ("\\caption{" <> capt <> char '}') $$ "\\end{figure}" + capt $$ "\\end{figure}" blockToLaTeX (Para lst) = do result <- inlineListToLaTeX lst return result @@ -287,7 +290,7 @@ blockToLaTeX (BlockQuote lst) = do blockToLaTeX (CodeBlock (_,classes,keyvalAttr) str) = do opts <- gets stOptions case () of - _ | writerLiterateHaskell opts && "haskell" `elem` classes && + _ | isEnabled Ext_literate_haskell opts && "haskell" `elem` classes && "literate" `elem` classes -> lhsCodeBlock | writerListings opts -> listingsCodeBlock | writerHighlight opts && not (null classes) -> highlightedCodeBlock diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index c481e6c87..bececde25 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -17,9 +17,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Writers.Man + Module : Text.Pandoc.Writers.Man Copyright : Copyright (C) 2007-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -32,6 +32,7 @@ module Text.Pandoc.Writers.Man ( writeMan) where import Text.Pandoc.Definition import Text.Pandoc.Templates import Text.Pandoc.Shared +import Text.Pandoc.Options import Text.Pandoc.Readers.TeXMath import Text.Printf ( printf ) import Data.List ( isPrefixOf, intersperse, intercalate ) @@ -44,21 +45,21 @@ data WriterState = WriterState { stNotes :: Notes -- | Convert Pandoc to Man. writeMan :: WriterOptions -> Pandoc -> String -writeMan opts document = evalState (pandocToMan opts document) (WriterState [] False) +writeMan opts document = evalState (pandocToMan opts document) (WriterState [] False) -- | Return groff man representation of document. pandocToMan :: WriterOptions -> Pandoc -> State WriterState String pandocToMan opts (Pandoc (Meta title authors date) blocks) = do titleText <- inlineListToMan opts title authors' <- mapM (inlineListToMan opts) authors - date' <- inlineListToMan opts date + date' <- inlineListToMan opts date let colwidth = if writerWrapText opts then Just $ writerColumns opts else Nothing let render' = render colwidth let (cmdName, rest) = break (== ' ') $ render' titleText let (title', section) = case reverse cmdName of - (')':d:'(':xs) | d `elem` ['0'..'9'] -> + (')':d:'(':xs) | d `elem` ['0'..'9'] -> (text (reverse xs), char d) xs -> (text (reverse xs), doubleQuotes empty) let description = hsep $ @@ -86,7 +87,7 @@ notesToMan :: WriterOptions -> [[Block]] -> State WriterState Doc notesToMan opts notes = if null notes then return empty - else mapM (\(num, note) -> noteToMan opts num note) (zip [1..] notes) >>= + else mapM (\(num, note) -> noteToMan opts num note) (zip [1..] notes) >>= return . (text ".SH NOTES" $$) . vcat -- | Return man representation of a note. @@ -94,7 +95,7 @@ noteToMan :: WriterOptions -> Int -> [Block] -> State WriterState Doc noteToMan opts num note = do contents <- blockListToMan opts note let marker = cr <> text ".SS " <> brackets (text (show num)) - return $ marker $$ contents + return $ marker $$ contents -- | Association list of characters to escape. manEscapes :: [(Char, String)] @@ -104,7 +105,7 @@ manEscapes = [ ('\160', "\\ ") , ('\x2014', "\\[em]") , ('\x2013', "\\[en]") , ('\x2026', "\\&...") - ] ++ backslashEscapes "@\\" + ] ++ backslashEscapes "-@\\" -- | Escape special characters for Man. escapeString :: String -> String @@ -113,7 +114,7 @@ escapeString = escapeStringUsing manEscapes -- | Escape a literal (code) section for Man. escapeCode :: String -> String escapeCode = concat . intersperse "\n" . map escapeLine . lines where - escapeLine codeline = + escapeLine codeline = case escapeStringUsing (manEscapes ++ backslashEscapes "\t ") codeline of a@('.':_) -> "\\&" ++ a b -> b @@ -150,14 +151,14 @@ splitSentences xs = -- | Convert Pandoc block element to man. blockToMan :: WriterOptions -- ^ Options -> Block -- ^ Block element - -> State WriterState Doc + -> State WriterState Doc blockToMan _ Null = return empty -blockToMan opts (Plain inlines) = +blockToMan opts (Plain inlines) = liftM vcat $ mapM (inlineListToMan opts) $ splitSentences inlines blockToMan opts (Para inlines) = do contents <- liftM vcat $ mapM (inlineListToMan opts) $ splitSentences inlines - return $ text ".PP" $$ contents + return $ text ".PP" $$ contents blockToMan _ (RawBlock "man" str) = return $ text str blockToMan _ (RawBlock _ _) = return empty blockToMan _ HorizontalRule = return $ text ".PP" $$ text " * * * * *" @@ -166,7 +167,7 @@ blockToMan opts (Header level inlines) = do let heading = case level of 1 -> ".SH " _ -> ".SS " - return $ text heading <> contents + return $ text heading <> contents blockToMan _ (CodeBlock _ str) = return $ text ".IP" $$ text ".nf" $$ @@ -174,10 +175,10 @@ blockToMan _ (CodeBlock _ str) = return $ text (escapeCode str) $$ text "\\f[]" $$ text ".fi" -blockToMan opts (BlockQuote blocks) = do +blockToMan opts (BlockQuote blocks) = do contents <- blockListToMan opts blocks return $ text ".RS" $$ contents $$ text ".RE" -blockToMan opts (Table caption alignments widths headers rows) = +blockToMan opts (Table caption alignments widths headers rows) = let aligncode AlignLeft = "l" aligncode AlignRight = "r" aligncode AlignCenter = "c" @@ -190,53 +191,53 @@ blockToMan opts (Table caption alignments widths headers rows) = else map (printf "w(%0.2fn)" . (70 *)) widths -- 78n default width - 8n indent = 70n let coldescriptions = text $ intercalate " " - (zipWith (\align width -> aligncode align ++ width) + (zipWith (\align width -> aligncode align ++ width) alignments iwidths) ++ "." colheadings <- mapM (blockListToMan opts) headers - let makeRow cols = text "T{" $$ - (vcat $ intersperse (text "T}@T{") cols) $$ + let makeRow cols = text "T{" $$ + (vcat $ intersperse (text "T}@T{") cols) $$ text "T}" let colheadings' = if all null headers then empty else makeRow colheadings $$ char '_' - body <- mapM (\row -> do + body <- mapM (\row -> do cols <- mapM (blockListToMan opts) row return $ makeRow cols) rows - return $ text ".PP" $$ caption' $$ - text ".TS" $$ text "tab(@);" $$ coldescriptions $$ + return $ text ".PP" $$ caption' $$ + text ".TS" $$ text "tab(@);" $$ coldescriptions $$ colheadings' $$ vcat body $$ text ".TE" blockToMan opts (BulletList items) = do contents <- mapM (bulletListItemToMan opts) items - return (vcat contents) + return (vcat contents) blockToMan opts (OrderedList attribs items) = do - let markers = take (length items) $ orderedListMarkers attribs + let markers = take (length items) $ orderedListMarkers attribs let indent = 1 + (maximum $ map length markers) contents <- mapM (\(num, item) -> orderedListItemToMan opts num indent item) $ - zip markers items + zip markers items return (vcat contents) -blockToMan opts (DefinitionList items) = do +blockToMan opts (DefinitionList items) = do contents <- mapM (definitionListItemToMan opts) items return (vcat contents) -- | Convert bullet list item (list of blocks) to man. bulletListItemToMan :: WriterOptions -> [Block] -> State WriterState Doc bulletListItemToMan _ [] = return empty -bulletListItemToMan opts ((Para first):rest) = +bulletListItemToMan opts ((Para first):rest) = bulletListItemToMan opts ((Plain first):rest) bulletListItemToMan opts ((Plain first):rest) = do - first' <- blockToMan opts (Plain first) + first' <- blockToMan opts (Plain first) rest' <- blockListToMan opts rest let first'' = text ".IP \\[bu] 2" $$ first' let rest'' = if null rest then empty else text ".RS 2" $$ rest' $$ text ".RE" - return (first'' $$ rest'') + return (first'' $$ rest'') bulletListItemToMan opts (first:rest) = do first' <- blockToMan opts first rest' <- blockListToMan opts rest return $ text "\\[bu] .RS 2" $$ first' $$ rest' $$ text ".RE" - + -- | Convert ordered list item (a list of blocks) to man. orderedListItemToMan :: WriterOptions -- ^ options -> String -- ^ order marker for list item @@ -244,7 +245,7 @@ orderedListItemToMan :: WriterOptions -- ^ options -> [Block] -- ^ list item (list of blocks) -> State WriterState Doc orderedListItemToMan _ _ _ [] = return empty -orderedListItemToMan opts num indent ((Para first):rest) = +orderedListItemToMan opts num indent ((Para first):rest) = orderedListItemToMan opts num indent ((Plain first):rest) orderedListItemToMan opts num indent (first:rest) = do first' <- blockToMan opts first @@ -254,17 +255,17 @@ orderedListItemToMan opts num indent (first:rest) = do let rest'' = if null rest then empty else text ".RS 4" $$ rest' $$ text ".RE" - return $ first'' $$ rest'' + return $ first'' $$ rest'' -- | Convert definition list item (label, list of blocks) to man. definitionListItemToMan :: WriterOptions - -> ([Inline],[[Block]]) + -> ([Inline],[[Block]]) -> State WriterState Doc definitionListItemToMan opts (label, defs) = do labelText <- inlineListToMan opts label - contents <- if null defs + contents <- if null defs then return empty - else liftM vcat $ forM defs $ \blocks -> do + else liftM vcat $ forM defs $ \blocks -> do let (first, rest) = case blocks of ((Para x):y) -> (Plain x,y) (x:y) -> (x,y) @@ -278,7 +279,7 @@ definitionListItemToMan opts (label, defs) = do -- | Convert list of Pandoc block elements to man. blockListToMan :: WriterOptions -- ^ Options -> [Block] -- ^ List of block elements - -> State WriterState Doc + -> State WriterState Doc blockListToMan opts blocks = mapM (blockToMan opts) blocks >>= (return . vcat) @@ -292,7 +293,7 @@ inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat) -- | Convert Pandoc inline element to man. inlineToMan :: WriterOptions -> Inline -> State WriterState Doc -inlineToMan opts (Emph lst) = do +inlineToMan opts (Emph lst) = do contents <- inlineListToMan opts lst return $ text "\\f[I]" <> contents <> text "\\f[]" inlineToMan opts (Strong lst) = do @@ -333,16 +334,16 @@ inlineToMan opts (Link txt (src, _)) = do let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src return $ case txt of [Code _ s] - | s == srcSuffix -> char '<' <> text srcSuffix <> char '>' + | s == srcSuffix -> char '<' <> text srcSuffix <> char '>' _ -> linktext <> text " (" <> text src <> char ')' inlineToMan opts (Image alternate (source, tit)) = do - let txt = if (null alternate) || (alternate == [Str ""]) || + let txt = if (null alternate) || (alternate == [Str ""]) || (alternate == [Str source]) -- to prevent autolinks then [Str "image"] else alternate - linkPart <- inlineToMan opts (Link txt (source, tit)) + linkPart <- inlineToMan opts (Link txt (source, tit)) return $ char '[' <> text "IMAGE: " <> linkPart <> char ']' -inlineToMan _ (Note contents) = do +inlineToMan _ (Note contents) = do -- add to notes in state modify $ \st -> st{ stNotes = contents : stNotes st } notes <- liftM stNotes get diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 9cbcaeb47..d88419feb 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE OverloadedStrings, TupleSections #-} {- Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> @@ -18,9 +18,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Writers.Markdown + Module : Text.Pandoc.Writers.Markdown Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -35,11 +35,15 @@ import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Shared -import Text.Pandoc.Parsing hiding (blankline) -import Text.ParserCombinators.Parsec ( runParser, GenParser ) +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (blankline, char, space) import Data.List ( group, isPrefixOf, find, intersperse, transpose ) import Text.Pandoc.Pretty import Control.Monad.State +import qualified Data.Set as Set +import Text.Pandoc.Writers.HTML (writeHtmlString) +import Text.Pandoc.Readers.TeXMath (readTeXMath) +import Text.HTML.TagSoup (renderTags, parseTags, isTagText, Tag(..)) type Notes = [[Block]] type Refs = [([Inline], Target)] @@ -49,7 +53,7 @@ data WriterState = WriterState { stNotes :: Notes -- | Convert Pandoc to Markdown. writeMarkdown :: WriterOptions -> Pandoc -> String -writeMarkdown opts document = +writeMarkdown opts document = evalState (pandocToMarkdown opts document) WriterState{ stNotes = [] , stRefs = [] , stPlain = False } @@ -58,7 +62,9 @@ writeMarkdown opts document = -- pictures, or inline formatting). writePlain :: WriterOptions -> Pandoc -> String writePlain opts document = - evalState (pandocToMarkdown opts{writerStrictMarkdown = True} + evalState (pandocToMarkdown opts{ + writerExtensions = Set.delete Ext_escaped_line_breaks $ + writerExtensions opts } document') WriterState{ stNotes = [] , stRefs = [] , stPlain = True } @@ -81,15 +87,41 @@ plainify = bottomUp go go (Cite _ cits) = SmallCaps cits go x = x +pandocTitleBlock :: Doc -> [Doc] -> Doc -> Doc +pandocTitleBlock tit auths dat = + hang 2 (text "% ") tit <> cr <> + hang 2 (text "% ") (hcat (intersperse (text "; ") auths)) <> cr <> + hang 2 (text "% ") dat <> cr + +mmdTitleBlock :: Doc -> [Doc] -> Doc -> Doc +mmdTitleBlock tit auths dat = + hang 8 (text "Title: ") tit <> cr <> + hang 8 (text "Author: ") (hcat (intersperse (text "; ") auths)) <> cr <> + hang 8 (text "Date: ") dat <> cr + +plainTitleBlock :: Doc -> [Doc] -> Doc -> Doc +plainTitleBlock tit auths dat = + tit <> cr <> + (hcat (intersperse (text "; ") auths)) <> cr <> + dat <> cr + -- | Return markdown representation of document. pandocToMarkdown :: WriterOptions -> Pandoc -> State WriterState String pandocToMarkdown opts (Pandoc (Meta title authors date) blocks) = do title' <- inlineListToMarkdown opts title authors' <- mapM (inlineListToMarkdown opts) authors date' <- inlineListToMarkdown opts date - let titleblock = not $ null title && null authors && null date + isPlain <- gets stPlain + let titleblock = case True of + _ | isPlain -> + plainTitleBlock title' authors' date' + | isEnabled Ext_pandoc_title_block opts -> + pandocTitleBlock title' authors' date' + | isEnabled Ext_mmd_title_block opts -> + mmdTitleBlock title' authors' date' + | otherwise -> empty let headerBlocks = filter isHeaderBlock blocks - let toc = if writerTableOfContents opts + let toc = if writerTableOfContents opts then tableOfContents opts headerBlocks else empty body <- blockListToMarkdown opts blocks @@ -106,11 +138,9 @@ pandocToMarkdown opts (Pandoc (Meta title authors date) blocks) = do let context = writerVariables opts ++ [ ("toc", render colwidth toc) , ("body", main) - , ("title", render colwidth title') - , ("date", render colwidth date') ] ++ - [ ("titleblock", "yes") | titleblock ] ++ - [ ("author", render colwidth a) | a <- authors' ] + [ ("titleblock", render colwidth titleblock) + | not (null title && null authors && null date) ] if writerStandalone opts then return $ renderTemplate context $ writerTemplate opts else return main @@ -119,9 +149,9 @@ pandocToMarkdown opts (Pandoc (Meta title authors date) blocks) = do refsToMarkdown :: WriterOptions -> Refs -> State WriterState Doc refsToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat --- | Return markdown representation of a reference key. -keyToMarkdown :: WriterOptions - -> ([Inline], (String, String)) +-- | Return markdown representation of a reference key. +keyToMarkdown :: WriterOptions + -> ([Inline], (String, String)) -> State WriterState Doc keyToMarkdown opts (label, (src, tit)) = do label' <- inlineListToMarkdown opts label @@ -133,7 +163,7 @@ keyToMarkdown opts (label, (src, tit)) = do -- | Return markdown representation of notes. notesToMarkdown :: WriterOptions -> [[Block]] -> State WriterState Doc -notesToMarkdown opts notes = +notesToMarkdown opts notes = mapM (\(num, note) -> noteToMarkdown opts num note) (zip [1..] notes) >>= return . vsep @@ -142,12 +172,16 @@ noteToMarkdown :: WriterOptions -> Int -> [Block] -> State WriterState Doc noteToMarkdown opts num blocks = do contents <- blockListToMarkdown opts blocks let num' = text $ show num - let marker = text "[^" <> num' <> text "]:" + let marker = if isEnabled Ext_footnotes opts + then text "[^" <> num' <> text "]:" + else text "[" <> num' <> text "]" let markerSize = 4 + offset num' let spacer = case writerTabStop opts - markerSize of n | n > 0 -> text $ replicate n ' ' _ -> text " " - return $ hang (writerTabStop opts) (marker <> spacer) contents + return $ if isEnabled Ext_footnotes opts + then hang (writerTabStop opts) (marker <> spacer) contents + else marker <> spacer <> contents -- | Escape special characters for Markdown. escapeString :: String -> String @@ -155,7 +189,7 @@ escapeString = escapeStringUsing markdownEscapes where markdownEscapes = backslashEscapes "\\`*_$<>#~^" -- | Construct table of contents from list of header blocks. -tableOfContents :: WriterOptions -> [Block] -> Doc +tableOfContents :: WriterOptions -> [Block] -> Doc tableOfContents opts headers = let opts' = opts { writerIgnoreNotes = True } contents = BulletList $ map elementToListItem $ hierarchicalize headers @@ -166,7 +200,7 @@ tableOfContents opts headers = -- | Converts an Element to a list item for a table of contents, elementToListItem :: Element -> [Block] elementToListItem (Blk _) = [] -elementToListItem (Sec _ _ _ headerText subsecs) = [Plain headerText] ++ +elementToListItem (Sec _ _ _ headerText subsecs) = [Plain headerText] ++ if null subsecs then [] else [BulletList $ map elementToListItem subsecs] @@ -188,9 +222,9 @@ attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys] <> "=\"" <> text v <> "\"") ks -- | Ordered list start parser for use in Para below. -olMarker :: GenParser Char ParserState Char +olMarker :: Parser [Char] ParserState Char olMarker = do (start, style', delim) <- anyOrderedListMarker - if delim == Period && + if delim == Period && (style' == UpperAlpha || (style' == UpperRoman && start `elem` [1, 5, 10, 50, 100, 500, 1000])) then spaceChar >> spaceChar @@ -206,7 +240,7 @@ beginsWithOrderedListMarker str = -- | Convert Pandoc block element to markdown. blockToMarkdown :: WriterOptions -- ^ Options -> Block -- ^ Block element - -> State WriterState Doc + -> State WriterState Doc blockToMarkdown _ Null = return empty blockToMarkdown opts (Plain inlines) = do contents <- inlineListToMarkdown opts inlines @@ -215,14 +249,21 @@ blockToMarkdown opts (Para inlines) = do contents <- inlineListToMarkdown opts inlines -- escape if para starts with ordered list marker st <- get - let esc = if (not (writerStrictMarkdown opts)) && + let esc = if isEnabled Ext_all_symbols_escapable opts && not (stPlain st) && beginsWithOrderedListMarker (render Nothing contents) then text "\x200B" -- zero-width space, a hack else empty return $ esc <> contents <> blankline -blockToMarkdown _ (RawBlock f str) - | f == "html" || f == "latex" || f == "tex" || f == "markdown" = do +blockToMarkdown opts (RawBlock f str) + | f == "html" = do + st <- get + if stPlain st + then return empty + else return $ if isEnabled Ext_markdown_attribute opts + then text (addMarkdownAttribute str) <> text "\n" + else text str <> text "\n" + | f == "latex" || f == "tex" || f == "markdown" = do st <- get if stPlain st then return empty @@ -243,88 +284,148 @@ blockToMarkdown opts (Header level inlines) = do contents <> cr <> text (replicate (offset contents) '-') <> blankline -- ghc interprets '#' characters in column 1 as linenum specifiers. - _ | stPlain st || writerLiterateHaskell opts -> + _ | stPlain st || isEnabled Ext_literate_haskell opts -> contents <> blankline _ -> text (replicate level '#') <> space <> contents <> blankline blockToMarkdown opts (CodeBlock (_,classes,_) str) | "haskell" `elem` classes && "literate" `elem` classes && - writerLiterateHaskell opts = + isEnabled Ext_literate_haskell opts = return $ prefixed "> " (text str) <> blankline blockToMarkdown opts (CodeBlock attribs str) = return $ - if writerStrictMarkdown opts || attribs == nullAttr - then nest (writerTabStop opts) (text str) <> blankline - else -- use delimited code block - (tildes <> space <> attrs <> cr <> text str <> - cr <> tildes) <> blankline - where tildes = text "~~~~" - attrs = attrsToMarkdown attribs + case attribs of + x | x /= nullAttr && isEnabled Ext_fenced_code_blocks opts -> + tildes <> space <> attrs <> cr <> text str <> + cr <> tildes <> blankline + (_,(cls:_),_) | isEnabled Ext_backtick_code_blocks opts -> + backticks <> space <> text cls <> cr <> text str <> + cr <> backticks <> blankline + _ -> nest (writerTabStop opts) (text str) <> blankline + where tildes = text $ case [ln | ln <- lines str, all (=='~') ln] of + [] -> "~~~~" + xs -> case maximum $ map length xs of + n | n < 3 -> "~~~~" + | otherwise -> replicate (n+1) '~' + backticks = text "```" + attrs = if isEnabled Ext_fenced_code_attributes opts + then attrsToMarkdown attribs + else empty blockToMarkdown opts (BlockQuote blocks) = do st <- get -- if we're writing literate haskell, put a space before the bird tracks -- so they won't be interpreted as lhs... - let leader = if writerLiterateHaskell opts + let leader = if isEnabled Ext_literate_haskell opts then " > " else if stPlain st then " " else "> " contents <- blockListToMarkdown opts blocks return $ (prefixed leader contents) <> blankline -blockToMarkdown opts (Table caption aligns widths headers rows) = do +blockToMarkdown opts t@(Table caption aligns widths headers rows) = do caption' <- inlineListToMarkdown opts caption - let caption'' = if null caption + let caption'' = if null caption || not (isEnabled Ext_table_captions opts) then empty else blankline <> ": " <> caption' <> blankline - headers' <- mapM (blockListToMarkdown opts) headers + rawHeaders <- mapM (blockListToMarkdown opts) headers + rawRows <- mapM (mapM (blockListToMarkdown opts)) rows + let isSimple = all (==0) widths + (nst,tbl) <- case isSimple of + True | isEnabled Ext_simple_tables opts -> fmap (nest 2,) $ + pandocTable opts (all null headers) aligns widths + rawHeaders rawRows + | isEnabled Ext_pipe_tables opts -> fmap (id,) $ + pipeTable (all null headers) aligns rawHeaders rawRows + | otherwise -> fmap (id,) $ + return $ text $ writeHtmlString def + $ Pandoc (Meta [] [] []) [t] + False | isEnabled Ext_multiline_tables opts -> fmap (nest 2,) $ + pandocTable opts (all null headers) aligns widths + rawHeaders rawRows + | otherwise -> fmap (id,) $ + return $ text $ writeHtmlString def + $ Pandoc (Meta [] [] []) [t] + return $ nst $ tbl $$ blankline $$ caption'' $$ blankline +blockToMarkdown opts (BulletList items) = do + contents <- mapM (bulletListItemToMarkdown opts) items + return $ cat contents <> blankline +blockToMarkdown opts (OrderedList (start,sty,delim) items) = do + let start' = if isEnabled Ext_startnum opts then start else 1 + let sty' = if isEnabled Ext_fancy_lists opts then sty else DefaultStyle + let delim' = if isEnabled Ext_fancy_lists opts then delim else DefaultDelim + let attribs = (start', sty', delim') + let markers = orderedListMarkers attribs + let markers' = map (\m -> if length m < 3 + then m ++ replicate (3 - length m) ' ' + else m) markers + contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $ + zip markers' items + return $ cat contents <> blankline +blockToMarkdown opts (DefinitionList items) = do + contents <- mapM (definitionListItemToMarkdown opts) items + return $ cat contents <> blankline + +addMarkdownAttribute :: String -> String +addMarkdownAttribute s = + case span isTagText $ reverse $ parseTags s of + (xs,(TagOpen t attrs:rest)) -> + renderTags $ reverse rest ++ (TagOpen t attrs' : reverse xs) + where attrs' = ("markdown","1"):[(x,y) | (x,y) <- attrs, + x /= "markdown"] + _ -> s + +pipeTable :: Bool -> [Alignment] -> [Doc] -> [[Doc]] -> State WriterState Doc +pipeTable headless aligns rawHeaders rawRows = do + let torow cs = nowrap $ text "|" <> + hcat (intersperse (text "|") $ map chomp cs) <> text "|" + let toborder (a, h) = let wid = max (offset h) 3 + in text $ case a of + AlignLeft -> ':':replicate (wid - 1) '-' + AlignCenter -> ':':replicate (wid - 2) '-' ++ ":" + AlignRight -> replicate (wid - 1) '-' ++ ":" + AlignDefault -> replicate wid '-' + let header = if headless then empty else torow rawHeaders + let border = torow $ map toborder $ zip aligns rawHeaders + let body = vcat $ map torow rawRows + return $ header $$ border $$ body + +pandocTable :: WriterOptions -> Bool -> [Alignment] -> [Double] + -> [Doc] -> [[Doc]] -> State WriterState Doc +pandocTable opts headless aligns widths rawHeaders rawRows = do + let isSimple = all (==0) widths let alignHeader alignment = case alignment of AlignLeft -> lblock AlignCenter -> cblock AlignRight -> rblock AlignDefault -> lblock - rawRows <- mapM (mapM (blockListToMarkdown opts)) rows - let isSimple = all (==0) widths let numChars = maximum . map offset - let widthsInChars = - if isSimple - then map ((+2) . numChars) $ transpose (headers' : rawRows) - else map (floor . (fromIntegral (writerColumns opts) *)) widths + let widthsInChars = if isSimple + then map ((+2) . numChars) + $ transpose (rawHeaders : rawRows) + else map + (floor . (fromIntegral (writerColumns opts) *)) + widths let makeRow = hcat . intersperse (lblock 1 (text " ")) . (zipWith3 alignHeader aligns widthsInChars) let rows' = map makeRow rawRows - let head' = makeRow headers' + let head' = makeRow rawHeaders let maxRowHeight = maximum $ map height (head':rows') let underline = cat $ intersperse (text " ") $ map (\width -> text (replicate width '-')) widthsInChars let border = if maxRowHeight > 1 then text (replicate (sum widthsInChars + length widthsInChars - 1) '-') - else if all null headers + else if headless then underline else empty - let head'' = if all null headers + let head'' = if headless then empty else border <> cr <> head' let body = if maxRowHeight > 1 then vsep rows' else vcat rows' - let bottom = if all null headers + let bottom = if headless then underline else border - return $ nest 2 $ head'' $$ underline $$ body $$ - bottom $$ blankline $$ caption'' $$ blankline -blockToMarkdown opts (BulletList items) = do - contents <- mapM (bulletListItemToMarkdown opts) items - return $ cat contents <> blankline -blockToMarkdown opts (OrderedList attribs items) = do - let markers = orderedListMarkers attribs - let markers' = map (\m -> if length m < 3 - then m ++ replicate (3 - length m) ' ' - else m) markers - contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $ - zip markers' items - return $ cat contents <> blankline -blockToMarkdown opts (DefinitionList items) = do - contents <- mapM (definitionListItemToMarkdown opts) items - return $ cat contents <> blankline + return $ head'' $$ underline $$ body $$ bottom -- | Convert bullet list item (list of blocks) to markdown. bulletListItemToMarkdown :: WriterOptions -> [Block] -> State WriterState Doc @@ -349,32 +450,38 @@ orderedListItemToMarkdown opts marker items = do -- | Convert definition list item (label, list of blocks) to markdown. definitionListItemToMarkdown :: WriterOptions - -> ([Inline],[[Block]]) + -> ([Inline],[[Block]]) -> State WriterState Doc definitionListItemToMarkdown opts (label, defs) = do labelText <- inlineListToMarkdown opts label - let tabStop = writerTabStop opts - st <- get - let leader = if stPlain st then " " else ": " - let sps = case writerTabStop opts - 3 of - n | n > 0 -> text $ replicate n ' ' - _ -> text " " defs' <- mapM (mapM (blockToMarkdown opts)) defs - let contents = vcat $ map (\d -> hang tabStop (leader <> sps) $ vcat d <> cr) defs' - return $ nowrap labelText <> cr <> contents <> cr + if isEnabled Ext_definition_lists opts + then do + let tabStop = writerTabStop opts + st <- get + let leader = if stPlain st then " " else ": " + let sps = case writerTabStop opts - 3 of + n | n > 0 -> text $ replicate n ' ' + _ -> text " " + let contents = vcat $ map (\d -> hang tabStop (leader <> sps) $ vcat d <> cr) defs' + return $ nowrap labelText <> cr <> contents <> cr + else do + return $ nowrap labelText <> text " " <> cr <> + vsep (map vsep defs') <> blankline -- | Convert list of Pandoc block elements to markdown. blockListToMarkdown :: WriterOptions -- ^ Options -> [Block] -- ^ List of block elements - -> State WriterState Doc + -> State WriterState Doc blockListToMarkdown opts blocks = mapM (blockToMarkdown opts) (fixBlocks blocks) >>= return . cat -- insert comment between list and indented code block, or the -- code block will be treated as a list continuation paragraph where fixBlocks (b : CodeBlock attr x : rest) - | (writerStrictMarkdown opts || attr == nullAttr) && isListBlock b = + | (not (isEnabled Ext_fenced_code_blocks opts) || attr == nullAttr) + && isListBlock b = b : RawBlock "html" "<!-- -->\n" : CodeBlock attr x : - fixBlocks rest + fixBlocks rest fixBlocks (x : xs) = x : fixBlocks xs fixBlocks [] = [] isListBlock (BulletList _) = True @@ -412,7 +519,7 @@ escapeSpaces x = x -- | Convert Pandoc inline element to markdown. inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc -inlineToMarkdown opts (Emph lst) = do +inlineToMarkdown opts (Emph lst) = do contents <- inlineListToMarkdown opts lst return $ "*" <> contents <> "*" inlineToMarkdown opts (Strong lst) = do @@ -420,15 +527,21 @@ inlineToMarkdown opts (Strong lst) = do return $ "**" <> contents <> "**" inlineToMarkdown opts (Strikeout lst) = do contents <- inlineListToMarkdown opts lst - return $ "~~" <> contents <> "~~" + return $ if isEnabled Ext_strikeout opts + then "~~" <> contents <> "~~" + else "<s>" <> contents <> "</s>" inlineToMarkdown opts (Superscript lst) = do let lst' = bottomUp escapeSpaces lst contents <- inlineListToMarkdown opts lst' - return $ "^" <> contents <> "^" + return $ if isEnabled Ext_superscript opts + then "^" <> contents <> "^" + else "<sup>" <> contents <> "</sup>" inlineToMarkdown opts (Subscript lst) = do let lst' = bottomUp escapeSpaces lst contents <- inlineListToMarkdown opts lst' - return $ "~" <> contents <> "~" + return $ if isEnabled Ext_subscript opts + then "~" <> contents <> "~" + else "<sub>" <> contents <> "</sub>" inlineToMarkdown opts (SmallCaps lst) = inlineListToMarkdown opts lst inlineToMarkdown opts (Quoted SingleQuote lst) = do contents <- inlineListToMarkdown opts lst @@ -437,33 +550,46 @@ inlineToMarkdown opts (Quoted DoubleQuote lst) = do contents <- inlineListToMarkdown opts lst return $ "“" <> contents <> "”" inlineToMarkdown opts (Code attr str) = - let tickGroups = filter (\s -> '`' `elem` s) $ group str + let tickGroups = filter (\s -> '`' `elem` s) $ group str longest = if null tickGroups then 0 - else maximum $ map length tickGroups - marker = replicate (longest + 1) '`' + else maximum $ map length tickGroups + marker = replicate (longest + 1) '`' spacer = if (longest == 0) then "" else " " - attrs = if writerStrictMarkdown opts || attr == nullAttr - then empty - else attrsToMarkdown attr + attrs = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr + then attrsToMarkdown attr + else empty in return $ text (marker ++ spacer ++ str ++ spacer ++ marker) <> attrs inlineToMarkdown _ (Str str) = do st <- get if stPlain st then return $ text str else return $ text $ escapeString str -inlineToMarkdown _ (Math InlineMath str) = - return $ "$" <> text str <> "$" -inlineToMarkdown _ (Math DisplayMath str) = - return $ "$$" <> text str <> "$$" -inlineToMarkdown _ (RawInline f str) - | f == "html" || f == "latex" || f == "tex" || f == "markdown" = +inlineToMarkdown opts (Math InlineMath str) + | isEnabled Ext_tex_math_dollars opts = + return $ "$" <> text str <> "$" + | isEnabled Ext_tex_math_single_backslash opts = + return $ "\\(" <> text str <> "\\)" + | isEnabled Ext_tex_math_double_backslash opts = + return $ "\\\\(" <> text str <> "\\\\)" + | otherwise = inlineListToMarkdown opts $ readTeXMath str +inlineToMarkdown opts (Math DisplayMath str) + | isEnabled Ext_tex_math_dollars opts = + return $ "$$" <> text str <> "$$" + | isEnabled Ext_tex_math_single_backslash opts = + return $ "\\[" <> text str <> "\\]" + | isEnabled Ext_tex_math_double_backslash opts = + return $ "\\\\[" <> text str <> "\\\\]" + | otherwise = (\x -> cr <> x <> cr) `fmap` + inlineListToMarkdown opts (readTeXMath str) +inlineToMarkdown opts (RawInline f str) + | f == "html" || f == "markdown" || + (isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) = return $ text str inlineToMarkdown _ (RawInline _ _) = return empty -inlineToMarkdown opts (LineBreak) = return $ - if writerStrictMarkdown opts - then " " <> cr - else "\\" <> cr +inlineToMarkdown opts (LineBreak) + | isEnabled Ext_escaped_line_breaks opts = return $ "\\" <> cr + | otherwise = return $ " " <> cr inlineToMarkdown _ Space = return space inlineToMarkdown opts (Cite (c:cs) lst) | writerCiteMethod opts == Citeproc = inlineListToMarkdown opts lst @@ -513,7 +639,7 @@ inlineToMarkdown opts (Link txt (src, tit)) = do then "[]" else "[" <> reftext <> "]" in first <> second - else "[" <> linktext <> "](" <> + else "[" <> linktext <> "](" <> text src <> linktitle <> ")" inlineToMarkdown opts (Image alternate (source, tit)) = do let txt = if null alternate || alternate == [Str source] @@ -522,8 +648,10 @@ inlineToMarkdown opts (Image alternate (source, tit)) = do else alternate linkPart <- inlineToMarkdown opts (Link txt (source, tit)) return $ "!" <> linkPart -inlineToMarkdown _ (Note contents) = do +inlineToMarkdown opts (Note contents) = do modify (\st -> st{ stNotes = contents : stNotes st }) st <- get let ref = show $ (length $ stNotes st) - return $ "[^" <> text ref <> "]" + if isEnabled Ext_footnotes opts + then return $ "[^" <> text ref <> "]" + else return $ "[" <> text ref <> "]" diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index b32c5327d..84d7393c1 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -17,9 +17,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Writers.MediaWiki + Module : Text.Pandoc.Writers.MediaWiki Copyright : Copyright (C) 2008-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -31,7 +31,8 @@ MediaWiki: <http://www.mediawiki.org/wiki/MediaWiki> -} module Text.Pandoc.Writers.MediaWiki ( writeMediaWiki ) where import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Options +import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.XML ( escapeStringForXML ) import Data.List ( intersect, intercalate ) @@ -46,9 +47,9 @@ data WriterState = WriterState { -- | Convert Pandoc to MediaWiki. writeMediaWiki :: WriterOptions -> Pandoc -> String -writeMediaWiki opts document = - evalState (pandocToMediaWiki opts document) - (WriterState { stNotes = False, stListLevel = [], stUseTags = False }) +writeMediaWiki opts document = + evalState (pandocToMediaWiki opts document) + (WriterState { stNotes = False, stListLevel = [], stUseTags = False }) -- | Return MediaWiki representation of document. pandocToMediaWiki :: WriterOptions -> Pandoc -> State WriterState String @@ -57,7 +58,7 @@ pandocToMediaWiki opts (Pandoc _ blocks) = do notesExist <- get >>= return . stNotes let notes = if notesExist then "\n<references />" - else "" + else "" let main = body ++ notes let context = writerVariables opts ++ [ ("body", main) ] ++ @@ -70,22 +71,23 @@ pandocToMediaWiki opts (Pandoc _ blocks) = do escapeString :: String -> String escapeString = escapeStringForXML --- | Convert Pandoc block element to MediaWiki. +-- | Convert Pandoc block element to MediaWiki. blockToMediaWiki :: WriterOptions -- ^ Options -> Block -- ^ Block element - -> State WriterState String + -> State WriterState String blockToMediaWiki _ Null = return "" -blockToMediaWiki opts (Plain inlines) = +blockToMediaWiki opts (Plain inlines) = inlineListToMediaWiki opts inlines blockToMediaWiki opts (Para [Image txt (src,tit)]) = do - capt <- inlineListToMediaWiki opts txt + capt <- if null txt + then return "" + else ("|caption " ++) `fmap` inlineListToMediaWiki opts txt let opt = if null txt then "" - else "|alt=" ++ if null tit then capt else tit ++ - "|caption " ++ capt + else "|alt=" ++ if null tit then capt else tit ++ capt return $ "[[Image:" ++ src ++ "|frame|none" ++ opt ++ "]]\n" blockToMediaWiki opts (Para inlines) = do @@ -115,7 +117,7 @@ blockToMediaWiki _ (CodeBlock (_,classes,_) str) = do "javascript", "latex", "lisp", "lua", "matlab", "mirc", "mpasm", "mysql", "nsis", "objc", "ocaml", "ocaml-brief", "oobas", "oracle8", "pascal", "perl", "php", "php-brief", "plsql", "python", "qbasic", "rails", "reg", "robots", "ruby", "sas", "scheme", "sdlbasic", - "smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl", + "smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl", "visualfoxpro", "winbatch", "xml", "xpp", "z80"] let (beg, end) = if null at then ("<pre" ++ if null classes then ">" else " class=\"" ++ unwords classes ++ "\">", "</pre>") @@ -124,7 +126,7 @@ blockToMediaWiki _ (CodeBlock (_,classes,_) str) = do blockToMediaWiki opts (BlockQuote blocks) = do contents <- blockListToMediaWiki opts blocks - return $ "<blockquote>" ++ contents ++ "</blockquote>" + return $ "<blockquote>" ++ contents ++ "</blockquote>" blockToMediaWiki opts (Table capt aligns widths headers rows') = do let alignStrings = map alignmentToString aligns @@ -221,7 +223,7 @@ listItemToMediaWiki opts items = do -- | Convert definition list item (label, list of blocks) to MediaWiki. definitionListItemToMediaWiki :: WriterOptions - -> ([Inline],[[Block]]) + -> ([Inline],[[Block]]) -> State WriterState String definitionListItemToMediaWiki opts (label, items) = do labelText <- inlineListToMediaWiki opts label @@ -242,7 +244,7 @@ isSimpleList x = BulletList items -> all isSimpleListItem items OrderedList (num, sty, _) items -> all isSimpleListItem items && num == 1 && sty `elem` [DefaultStyle, Decimal] - DefinitionList items -> all isSimpleListItem $ concatMap snd items + DefinitionList items -> all isSimpleListItem $ concatMap snd items _ -> False -- | True if list item can be handled with the simple wiki syntax. False if @@ -287,8 +289,8 @@ tableRowToMediaWiki opts alignStrings rownum cols' = do 0 -> "header" x | x `rem` 2 == 1 -> "odd" _ -> "even" - cols'' <- sequence $ zipWith - (\alignment item -> tableItemToMediaWiki opts celltype alignment item) + cols'' <- sequence $ zipWith + (\alignment item -> tableItemToMediaWiki opts celltype alignment item) alignStrings cols' return $ "<tr class=\"" ++ rowclass ++ "\">\n" ++ unlines cols'' ++ "</tr>" @@ -313,7 +315,7 @@ tableItemToMediaWiki opts celltype align' item = do -- | Convert list of Pandoc block elements to MediaWiki. blockListToMediaWiki :: WriterOptions -- ^ Options -> [Block] -- ^ List of block elements - -> State WriterState String + -> State WriterState String blockListToMediaWiki opts blocks = mapM (blockToMediaWiki opts) blocks >>= return . vcat @@ -325,9 +327,9 @@ inlineListToMediaWiki opts lst = -- | Convert Pandoc inline element to MediaWiki. inlineToMediaWiki :: WriterOptions -> Inline -> State WriterState String -inlineToMediaWiki opts (Emph lst) = do +inlineToMediaWiki opts (Emph lst) = do contents <- inlineListToMediaWiki opts lst - return $ "''" ++ contents ++ "''" + return $ "''" ++ contents ++ "''" inlineToMediaWiki opts (Strong lst) = do contents <- inlineListToMediaWiki opts lst @@ -365,8 +367,8 @@ inlineToMediaWiki _ (Str str) = return $ escapeString str inlineToMediaWiki _ (Math _ str) = return $ "<math>" ++ str ++ "</math>" -- note: str should NOT be escaped -inlineToMediaWiki _ (RawInline "mediawiki" str) = return str -inlineToMediaWiki _ (RawInline "html" str) = return str +inlineToMediaWiki _ (RawInline "mediawiki" str) = return str +inlineToMediaWiki _ (RawInline "html" str) = return str inlineToMediaWiki _ (RawInline _ _) = return "" inlineToMediaWiki _ (LineBreak) = return "<br />\n" @@ -392,7 +394,7 @@ inlineToMediaWiki opts (Image alt (source, tit)) = do else "|" ++ tit return $ "[[Image:" ++ source ++ txt ++ "]]" -inlineToMediaWiki opts (Note contents) = do +inlineToMediaWiki opts (Note contents) = do contents' <- blockListToMediaWiki opts contents modify (\s -> s { stNotes = True }) return $ "<ref>" ++ contents' ++ "</ref>" diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs index d2b56cd17..7fb304e86 100644 --- a/src/Text/Pandoc/Writers/Native.hs +++ b/src/Text/Pandoc/Writers/Native.hs @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Native Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -34,7 +34,7 @@ metadata. -} module Text.Pandoc.Writers.Native ( writeNative ) where -import Text.Pandoc.Shared ( WriterOptions(..) ) +import Text.Pandoc.Options ( WriterOptions(..) ) import Data.List ( intersperse ) import Text.Pandoc.Definition import Text.Pandoc.Pretty @@ -47,17 +47,17 @@ prettyList ds = prettyBlock :: Block -> Doc prettyBlock (BlockQuote blocks) = "BlockQuote" $$ prettyList (map prettyBlock blocks) -prettyBlock (OrderedList attribs blockLists) = +prettyBlock (OrderedList attribs blockLists) = "OrderedList" <> space <> text (show attribs) $$ (prettyList $ map (prettyList . map prettyBlock) blockLists) -prettyBlock (BulletList blockLists) = +prettyBlock (BulletList blockLists) = "BulletList" $$ (prettyList $ map (prettyList . map prettyBlock) blockLists) prettyBlock (DefinitionList items) = "DefinitionList" $$ (prettyList $ map deflistitem items) where deflistitem (term, defs) = "(" <> text (show term) <> "," <> cr <> nest 1 (prettyList $ map (prettyList . map prettyBlock) defs) <> ")" -prettyBlock (Table caption aligns widths header rows) = +prettyBlock (Table caption aligns widths header rows) = "Table " <> text (show caption) <> " " <> text (show aligns) <> " " <> text (show widths) $$ prettyRow header $$ diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 9e3dba98a..f43d0a087 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -36,7 +36,8 @@ import Data.ByteString.Lazy.UTF8 ( fromString ) import Codec.Archive.Zip import Data.Time.Clock.POSIX import Paths_pandoc ( getDataFileName ) -import Text.Pandoc.Shared hiding (Element) +import Text.Pandoc.Options ( WriterOptions(..) ) +import Text.Pandoc.Shared ( stringify ) import Text.Pandoc.ImageSize ( readImageSize, sizeInPoints ) import Text.Pandoc.MIME ( getMimeType ) import Text.Pandoc.Definition @@ -47,16 +48,16 @@ import Control.Monad (liftM) import Network.URI ( unEscapeString ) import Text.Pandoc.XML import Text.Pandoc.Pretty +import qualified Control.Exception as E -- | Produce an ODT file from a Pandoc document. -writeODT :: Maybe FilePath -- ^ Path specified by --reference-odt - -> WriterOptions -- ^ Writer options +writeODT :: WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert -> IO B.ByteString -writeODT mbRefOdt opts doc@(Pandoc (Meta title _ _) _) = do +writeODT opts doc@(Pandoc (Meta title _ _) _) = do let datadir = writerUserDataDir opts refArchive <- liftM toArchive $ - case mbRefOdt of + case writerReferenceODT opts of Just f -> B.readFile f Nothing -> do let defaultODT = getDataFileName "reference.odt" >>= B.readFile @@ -128,9 +129,9 @@ transformPic sourceDir entriesRef (Image lab (src,tit)) = do Nothing -> tit entries <- readIORef entriesRef let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src' - catch (readEntry [] (sourceDir </> src') >>= \entry -> - modifyIORef entriesRef (entry{ eRelativePath = newsrc } :) >> - return (Image lab (newsrc, tit'))) - (\_ -> return (Emph lab)) + E.catch (readEntry [] (sourceDir </> src') >>= \entry -> + modifyIORef entriesRef (entry{ eRelativePath = newsrc } :) >> + return (Image lab (newsrc, tit'))) + (\e -> let _ = (e :: E.SomeException) in return (Emph lab)) transformPic _ _ x = return x diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index a0317511a..027ddfda1 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -31,7 +31,7 @@ Conversion of 'Pandoc' documents to OpenDocument XML. -} module Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) where import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Options import Text.Pandoc.XML import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Readers.TeXMath diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 7eb943a22..b885a7a40 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Org Copyright : Copyright (C) 2010 Puneeth Chaganti - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : Puneeth Chaganti <punchagan@gmail.com> Stability : alpha @@ -32,14 +32,15 @@ Org-Mode: <http://orgmode.org> -} module Text.Pandoc.Writers.Org ( writeOrg) where import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Options +import Text.Pandoc.Shared import Text.Pandoc.Pretty import Text.Pandoc.Templates (renderTemplate) import Data.List ( intersect, intersperse, transpose ) import Control.Monad.State import Control.Applicative ( (<$>) ) -data WriterState = +data WriterState = WriterState { stNotes :: [[Block]] , stLinks :: Bool , stImages :: Bool @@ -49,7 +50,7 @@ data WriterState = -- | Convert Pandoc to Org. writeOrg :: WriterOptions -> Pandoc -> String -writeOrg opts document = +writeOrg opts document = let st = WriterState { stNotes = [], stLinks = False, stImages = False, stHasMath = False, stOptions = opts } @@ -82,8 +83,8 @@ pandocToOrg (Pandoc (Meta tit auth dat) blocks) = do -- | Return Org representation of notes. notesToOrg :: [[Block]] -> State WriterState Doc -notesToOrg notes = - mapM (\(num, note) -> noteToOrg num note) (zip [1..] notes) >>= +notesToOrg notes = + mapM (\(num, note) -> noteToOrg num note) (zip [1..] notes) >>= return . vsep -- | Return Org representation of a note. @@ -106,21 +107,24 @@ titleToOrg :: [Inline] -> State WriterState Doc titleToOrg [] = return empty titleToOrg lst = do contents <- inlineListToOrg lst - return $ "#+TITLE: " <> contents + return $ "#+TITLE: " <> contents --- | Convert Pandoc block element to Org. +-- | Convert Pandoc block element to Org. blockToOrg :: Block -- ^ Block element - -> State WriterState Doc + -> State WriterState Doc blockToOrg Null = return empty blockToOrg (Plain inlines) = inlineListToOrg inlines blockToOrg (Para [Image txt (src,tit)]) = do - capt <- inlineListToOrg txt + capt <- if null txt + then return empty + else (\c -> "#+CAPTION: " <> c <> blankline) `fmap` + inlineListToOrg txt img <- inlineToOrg (Image txt (src,tit)) - return $ "#+CAPTION: " <> capt <> blankline <> img + return $ capt <> img blockToOrg (Para inlines) = do contents <- inlineListToOrg inlines return $ contents <> blankline -blockToOrg (RawBlock "html" str) = +blockToOrg (RawBlock "html" str) = return $ blankline $$ "#+BEGIN_HTML" $$ nest 2 (text str) $$ "#+END_HTML" $$ blankline blockToOrg (RawBlock f str) | f == "org" || f == "latex" || f == "tex" = @@ -134,17 +138,17 @@ blockToOrg (Header level inlines) = do blockToOrg (CodeBlock (_,classes,_) str) = do opts <- stOptions <$> get let tabstop = writerTabStop opts - let at = classes `intersect` ["asymptote", "C", "clojure", "css", "ditaa", - "dot", "emacs-lisp", "gnuplot", "haskell", "js", "latex", - "ledger", "lisp", "matlab", "mscgen", "ocaml", "octave", - "oz", "perl", "plantuml", "python", "R", "ruby", "sass", + let at = classes `intersect` ["asymptote", "C", "clojure", "css", "ditaa", + "dot", "emacs-lisp", "gnuplot", "haskell", "js", "latex", + "ledger", "lisp", "matlab", "mscgen", "ocaml", "octave", + "oz", "perl", "plantuml", "python", "R", "ruby", "sass", "scheme", "screen", "sh", "sql", "sqlite"] let (beg, end) = case at of [] -> ("#+BEGIN_EXAMPLE", "#+END_EXAMPLE") (x:_) -> ("#+BEGIN_SRC " ++ x, "#+END_SRC") return $ text beg $$ nest tabstop (text str) $$ text end $$ blankline blockToOrg (BlockQuote blocks) = do - contents <- blockListToOrg blocks + contents <- blockListToOrg blocks return $ blankline $$ "#+BEGIN_QUOTE" $$ nest 2 contents $$ "#+END_QUOTE" $$ blankline blockToOrg (Table caption' _ _ headers rows) = do @@ -155,11 +159,11 @@ blockToOrg (Table caption' _ _ headers rows) = do headers' <- mapM blockListToOrg headers rawRows <- mapM (mapM blockListToOrg) rows let numChars = maximum . map offset - -- FIXME: width is not being used. + -- FIXME: width is not being used. let widthsInChars = map ((+2) . numChars) $ transpose (headers' : rawRows) - -- FIXME: Org doesn't allow blocks with height more than 1. - let hpipeBlocks blocks = hcat [beg, middle, end] + -- FIXME: Org doesn't allow blocks with height more than 1. + let hpipeBlocks blocks = hcat [beg, middle, end] where h = maximum (map height blocks) sep' = lblock 3 $ vcat (map text $ replicate h " | ") beg = lblock 2 $ vcat (map text $ replicate h "| ") @@ -170,7 +174,7 @@ blockToOrg (Table caption' _ _ headers rows) = do rows' <- mapM (\row -> do cols <- mapM blockListToOrg row return $ makeRow cols) rows let border ch = char '|' <> char ch <> - (hcat $ intersperse (char ch <> char '+' <> char ch) $ + (hcat $ intersperse (char ch <> char '+' <> char ch) $ map (\l -> text $ replicate l ch) widthsInChars) <> char ch <> char '|' let body = vcat rows' @@ -186,7 +190,7 @@ blockToOrg (OrderedList (start, _, delim) items) = do let delim' = case delim of TwoParens -> OneParen x -> x - let markers = take (length items) $ orderedListMarkers + let markers = take (length items) $ orderedListMarkers (start, Decimal, delim') let maxMarkerLength = maximum $ map length markers let markers' = map (\m -> let s = maxMarkerLength - length m @@ -222,7 +226,7 @@ definitionListItemToOrg (label, defs) = do -- | Convert list of Pandoc block elements to Org. blockListToOrg :: [Block] -- ^ List of block elements - -> State WriterState Doc + -> State WriterState Doc blockListToOrg blocks = mapM blockToOrg blocks >>= return . vcat -- | Convert list of Pandoc inline elements to Org. @@ -231,19 +235,19 @@ inlineListToOrg lst = mapM inlineToOrg lst >>= return . hcat -- | Convert Pandoc inline element to Org. inlineToOrg :: Inline -> State WriterState Doc -inlineToOrg (Emph lst) = do +inlineToOrg (Emph lst) = do contents <- inlineListToOrg lst return $ "/" <> contents <> "/" inlineToOrg (Strong lst) = do contents <- inlineListToOrg lst return $ "*" <> contents <> "*" -inlineToOrg (Strikeout lst) = do +inlineToOrg (Strikeout lst) = do contents <- inlineListToOrg lst return $ "+" <> contents <> "+" -inlineToOrg (Superscript lst) = do +inlineToOrg (Superscript lst) = do contents <- inlineListToOrg lst return $ "^{" <> contents <> "}" -inlineToOrg (Subscript lst) = do +inlineToOrg (Subscript lst) = do contents <- inlineListToOrg lst return $ "_{" <> contents <> "}" inlineToOrg (SmallCaps lst) = inlineListToOrg lst @@ -276,7 +280,7 @@ inlineToOrg (Link txt (src, _)) = do inlineToOrg (Image _ (source, _)) = do modify $ \s -> s{ stImages = True } return $ "[[" <> text source <> "]]" -inlineToOrg (Note contents) = do +inlineToOrg (Note contents) = do -- add to notes in state notes <- get >>= (return . stNotes) modify $ \st -> st { stNotes = contents:notes } diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index d98079940..5b0b5a414 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -18,9 +18,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Writers.RST + Module : Text.Pandoc.Writers.RST Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -32,7 +32,8 @@ reStructuredText: <http://docutils.sourceforge.net/rst.html> -} module Text.Pandoc.Writers.RST ( writeRST) where import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Options +import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate) import Data.List ( isPrefixOf, intersperse, transpose ) import Text.Pandoc.Pretty @@ -42,7 +43,7 @@ import Data.Char (isSpace) type Refs = [([Inline], Target)] -data WriterState = +data WriterState = WriterState { stNotes :: [[Block]] , stLinks :: Refs , stImages :: Refs @@ -52,7 +53,7 @@ data WriterState = -- | Convert Pandoc to RST. writeRST :: WriterOptions -> Pandoc -> String -writeRST opts document = +writeRST opts document = let st = WriterState { stNotes = [], stLinks = [], stImages = [], stHasMath = False, stOptions = opts } @@ -89,8 +90,8 @@ pandocToRST (Pandoc (Meta tit auth dat) blocks) = do refsToRST :: Refs -> State WriterState Doc refsToRST refs = mapM keyToRST refs >>= return . vcat --- | Return RST representation of a reference key. -keyToRST :: ([Inline], (String, String)) +-- | Return RST representation of a reference key. +keyToRST :: ([Inline], (String, String)) -> State WriterState Doc keyToRST (label, (src, _)) = do label' <- inlineListToRST label @@ -101,7 +102,7 @@ keyToRST (label, (src, _)) = do -- | Return RST representation of notes. notesToRST :: [[Block]] -> State WriterState Doc -notesToRST notes = +notesToRST notes = mapM (\(num, note) -> noteToRST num note) (zip [1..] notes) >>= return . vsep @@ -116,8 +117,8 @@ noteToRST num note = do pictRefsToRST :: Refs -> State WriterState Doc pictRefsToRST refs = mapM pictToRST refs >>= return . vcat --- | Return RST representation of a picture substitution reference. -pictToRST :: ([Inline], (String, String)) +-- | Return RST representation of a picture substitution reference. +pictToRST :: ([Inline], (String, String)) -> State WriterState Doc pictToRST (label, (src, _)) = do label' <- inlineListToRST label @@ -135,9 +136,9 @@ titleToRST lst = do let border = text (replicate titleLength '=') return $ border $$ contents $$ border --- | Convert Pandoc block element to RST. +-- | Convert Pandoc block element to RST. blockToRST :: Block -- ^ Block element - -> State WriterState Doc + -> State WriterState Doc blockToRST Null = return empty blockToRST (Plain inlines) = inlineListToRST inlines blockToRST (Para [Image txt (src,tit)]) = do @@ -163,12 +164,12 @@ blockToRST (CodeBlock (_,classes,_) str) = do opts <- stOptions <$> get let tabstop = writerTabStop opts if "haskell" `elem` classes && "literate" `elem` classes && - writerLiterateHaskell opts + isEnabled Ext_literate_haskell opts then return $ prefixed "> " (text str) $$ blankline else return $ "::" $+$ nest tabstop (text str) $$ blankline blockToRST (BlockQuote blocks) = do tabstop <- get >>= (return . writerTabStop . stOptions) - contents <- blockListToRST blocks + contents <- blockListToRST blocks return $ (nest tabstop contents) <> blankline blockToRST (Table caption _ widths headers rows) = do caption' <- inlineListToRST caption @@ -184,7 +185,7 @@ blockToRST (Table caption _ widths headers rows) = do if isSimple then map ((+2) . numChars) $ transpose (headers' : rawRows) else map (floor . (fromIntegral (writerColumns opts) *)) widths - let hpipeBlocks blocks = hcat [beg, middle, end] + let hpipeBlocks blocks = hcat [beg, middle, end] where h = maximum (map height blocks) sep' = lblock 3 $ vcat (map text $ replicate h " | ") beg = lblock 2 $ vcat (map text $ replicate h "| ") @@ -195,7 +196,7 @@ blockToRST (Table caption _ widths headers rows) = do rows' <- mapM (\row -> do cols <- mapM blockListToRST row return $ makeRow cols) rows let border ch = char '+' <> char ch <> - (hcat $ intersperse (char ch <> char '+' <> char ch) $ + (hcat $ intersperse (char ch <> char '+' <> char ch) $ map (\l -> text $ replicate l ch) widthsInChars) <> char ch <> char '+' let body = vcat $ intersperse (border '-') rows' @@ -208,9 +209,9 @@ blockToRST (BulletList items) = do -- ensure that sublists have preceding blank line return $ blankline $$ vcat contents $$ blankline blockToRST (OrderedList (start, style', delim) items) = do - let markers = if start == 1 && style' == DefaultStyle && delim == DefaultDelim + let markers = if start == 1 && style' == DefaultStyle && delim == DefaultDelim then take (length items) $ repeat "#." - else take (length items) $ orderedListMarkers + else take (length items) $ orderedListMarkers (start, style', delim) let maxMarkerLength = maximum $ map length markers let markers' = map (\m -> let s = maxMarkerLength - length m @@ -249,7 +250,7 @@ definitionListItemToRST (label, defs) = do -- | Convert list of Pandoc block elements to RST. blockListToRST :: [Block] -- ^ List of block elements - -> State WriterState Doc + -> State WriterState Doc blockListToRST blocks = mapM blockToRST blocks >>= return . vcat -- | Convert list of Pandoc inline elements to RST. @@ -303,19 +304,19 @@ inlineListToRST lst = mapM inlineToRST (insertBS lst) >>= return . hcat -- | Convert Pandoc inline element to RST. inlineToRST :: Inline -> State WriterState Doc -inlineToRST (Emph lst) = do +inlineToRST (Emph lst) = do contents <- inlineListToRST lst return $ "*" <> contents <> "*" inlineToRST (Strong lst) = do contents <- inlineListToRST lst return $ "**" <> contents <> "**" -inlineToRST (Strikeout lst) = do +inlineToRST (Strikeout lst) = do contents <- inlineListToRST lst return $ "[STRIKEOUT:" <> contents <> "]" -inlineToRST (Superscript lst) = do +inlineToRST (Superscript lst) = do contents <- inlineListToRST lst return $ ":sup:`" <> contents <> "`" -inlineToRST (Subscript lst) = do +inlineToRST (Subscript lst) = do contents <- inlineListToRST lst return $ ":sub:`" <> contents <> "`" inlineToRST (SmallCaps lst) = inlineListToRST lst @@ -358,7 +359,7 @@ inlineToRST (Link txt (src, tit)) = do else return $ "`" <> linktext <> " <" <> text src <> ">`_" inlineToRST (Image alternate (source, tit)) = do pics <- get >>= return . stImages - let labelsUsed = map fst pics + let labelsUsed = map fst pics let txt = if null alternate || alternate == [Str ""] || alternate `elem` labelsUsed then [Str $ "image" ++ show (length pics)] @@ -369,7 +370,7 @@ inlineToRST (Image alternate (source, tit)) = do modify $ \st -> st { stImages = pics' } label <- inlineListToRST txt return $ "|" <> label <> "|" -inlineToRST (Note contents) = do +inlineToRST (Note contents) = do -- add to notes in state notes <- get >>= return . stNotes modify $ \st -> st { stNotes = contents:notes } diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 4e7c2a7cd..1919eb3f2 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -19,16 +19,17 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.RTF Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha + Stability : alpha Portability : portable Conversion of 'Pandoc' documents to RTF (rich text format). -} module Text.Pandoc.Writers.RTF ( writeRTF, rtfEmbedImage ) where import Text.Pandoc.Definition +import Text.Pandoc.Options import Text.Pandoc.Shared import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Templates (renderTemplate) @@ -38,6 +39,7 @@ import System.FilePath ( takeExtension ) import qualified Data.ByteString as B import Text.Printf ( printf ) import Network.URI ( isAbsoluteURI, unEscapeString ) +import qualified Control.Exception as E -- | Convert Image inlines into a raw RTF embedded image, read from a file. -- If file not found or filetype not jpeg or png, leave the inline unchanged. @@ -47,7 +49,8 @@ rtfEmbedImage x@(Image _ (src,_)) = do if ext `elem` [".jpg",".jpeg",".png"] && not (isAbsoluteURI src) then do let src' = unEscapeString src - imgdata <- catch (B.readFile src') (\_ -> return B.empty) + imgdata <- E.catch (B.readFile src') + (\e -> let _ = (e :: E.SomeException) in return B.empty) let bytes = map (printf "%02x") $ B.unpack imgdata let filetype = case ext of ".jpg" -> "\\jpegblip" @@ -63,7 +66,7 @@ rtfEmbedImage x = return x -- | Convert Pandoc to a string in rich text format. writeRTF :: WriterOptions -> Pandoc -> String -writeRTF options (Pandoc (Meta title authors date) blocks) = +writeRTF options (Pandoc (Meta title authors date) blocks) = let titletext = inlineListToRTF title authorstext = map inlineListToRTF authors datetext = inlineListToRTF date @@ -82,11 +85,11 @@ writeRTF options (Pandoc (Meta title authors date) blocks) = else body -- | Construct table of contents from list of header blocks. -tableOfContents :: [Block] -> String +tableOfContents :: [Block] -> String tableOfContents headers = let contentsTree = hierarchicalize headers - in concatMap (blockToRTF 0 AlignDefault) $ - [Header 1 [Str "Contents"], + in concatMap (blockToRTF 0 AlignDefault) $ + [Header 1 [Str "Contents"], BulletList (map elementToListItem contentsTree)] elementToListItem :: Element -> [Block] @@ -100,7 +103,7 @@ elementToListItem (Sec _ _ _ sectext subsecs) = [Plain sectext] ++ handleUnicode :: String -> String handleUnicode [] = [] handleUnicode (c:cs) = - if ord c > 127 + if ord c > 127 then '\\':'u':(show (ord c)) ++ "?" ++ handleUnicode cs else c:(handleUnicode cs) @@ -130,32 +133,32 @@ rtfParSpaced :: Int -- ^ space after (in twips) -> Int -- ^ first line indent (relative to block) (in twips) -> Alignment -- ^ alignment -> String -- ^ string with content - -> String -rtfParSpaced spaceAfter indent firstLineIndent alignment content = + -> String +rtfParSpaced spaceAfter indent firstLineIndent alignment content = let alignString = case alignment of AlignLeft -> "\\ql " AlignRight -> "\\qr " AlignCenter -> "\\qc " AlignDefault -> "\\ql " in "{\\pard " ++ alignString ++ - "\\f0 \\sa" ++ (show spaceAfter) ++ " \\li" ++ (show indent) ++ + "\\f0 \\sa" ++ (show spaceAfter) ++ " \\li" ++ (show indent) ++ " \\fi" ++ (show firstLineIndent) ++ " " ++ content ++ "\\par}\n" --- | Default paragraph. +-- | Default paragraph. rtfPar :: Int -- ^ block indent (in twips) -> Int -- ^ first line indent (relative to block) (in twips) -> Alignment -- ^ alignment -> String -- ^ string with content - -> String -rtfPar = rtfParSpaced 180 + -> String +rtfPar = rtfParSpaced 180 -- | Compact paragraph (e.g. for compact list items). rtfCompact :: Int -- ^ block indent (in twips) -> Int -- ^ first line indent (relative to block) (in twips) -> Alignment -- ^ alignment -> String -- ^ string with content - -> String -rtfCompact = rtfParSpaced 0 + -> String +rtfCompact = rtfParSpaced 0 -- number of twips to indent indentIncrement :: Int @@ -172,7 +175,7 @@ bulletMarker indent = case indent `mod` 720 of -- | Returns appropriate (list of) ordered list markers for indent level. orderedMarkers :: Int -> ListAttributes -> [String] -orderedMarkers indent (start, style, delim) = +orderedMarkers indent (start, style, delim) = if style == DefaultStyle && delim == DefaultDelim then case indent `mod` 720 of 0 -> orderedListMarkers (start, Decimal, Period) @@ -185,30 +188,30 @@ blockToRTF :: Int -- ^ indent level -> Block -- ^ block to convert -> String blockToRTF _ _ Null = "" -blockToRTF indent alignment (Plain lst) = +blockToRTF indent alignment (Plain lst) = rtfCompact indent 0 alignment $ inlineListToRTF lst -blockToRTF indent alignment (Para lst) = +blockToRTF indent alignment (Para lst) = rtfPar indent 0 alignment $ inlineListToRTF lst -blockToRTF indent alignment (BlockQuote lst) = - concatMap (blockToRTF (indent + indentIncrement) alignment) lst +blockToRTF indent alignment (BlockQuote lst) = + concatMap (blockToRTF (indent + indentIncrement) alignment) lst blockToRTF indent _ (CodeBlock _ str) = rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str)) blockToRTF _ _ (RawBlock "rtf" str) = str blockToRTF _ _ (RawBlock _ _) = "" -blockToRTF indent alignment (BulletList lst) = spaceAtEnd $ +blockToRTF indent alignment (BulletList lst) = spaceAtEnd $ concatMap (listItemToRTF alignment indent (bulletMarker indent)) lst -blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $ +blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $ zipWith (listItemToRTF alignment indent) (orderedMarkers indent attribs) lst -blockToRTF indent alignment (DefinitionList lst) = spaceAtEnd $ +blockToRTF indent alignment (DefinitionList lst) = spaceAtEnd $ concatMap (definitionListItemToRTF alignment indent) lst -blockToRTF indent _ HorizontalRule = +blockToRTF indent _ HorizontalRule = rtfPar indent 0 AlignCenter "\\emdash\\emdash\\emdash\\emdash\\emdash" blockToRTF indent alignment (Header level lst) = rtfPar indent 0 alignment $ "\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ inlineListToRTF lst -blockToRTF indent alignment (Table caption aligns sizes headers rows) = +blockToRTF indent alignment (Table caption aligns sizes headers rows) = (if all null headers then "" - else tableRowToRTF True indent aligns sizes headers) ++ + else tableRowToRTF True indent aligns sizes headers) ++ concatMap (tableRowToRTF False indent aligns sizes) rows ++ rtfPar indent 0 alignment (inlineListToRTF caption) @@ -230,7 +233,7 @@ tableRowToRTF header indent aligns sizes' cols = end = "}\n\\intbl\\row}\n" in start ++ columns ++ end -tableItemToRTF :: Int -> Alignment -> [Block] -> String +tableItemToRTF :: Int -> Alignment -> [Block] -> String tableItemToRTF indent alignment item = let contents = concatMap (blockToRTF indent alignment) item in "{\\intbl " ++ contents ++ "\\cell}\n" @@ -238,7 +241,7 @@ tableItemToRTF indent alignment item = -- | Ensure that there's the same amount of space after compact -- lists as after regular lists. spaceAtEnd :: String -> String -spaceAtEnd str = +spaceAtEnd str = if isSuffixOf "\\par}\n" str then (take ((length str) - 6) str) ++ "\\sa180\\par}\n" else str @@ -249,10 +252,10 @@ listItemToRTF :: Alignment -- ^ alignment -> String -- ^ list start marker -> [Block] -- ^ list item (list of blocks) -> [Char] -listItemToRTF alignment indent marker [] = - rtfCompact (indent + listIncrement) (0 - listIncrement) alignment - (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ") -listItemToRTF alignment indent marker list = +listItemToRTF alignment indent marker [] = + rtfCompact (indent + listIncrement) (0 - listIncrement) alignment + (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ") +listItemToRTF alignment indent marker list = let (first:rest) = map (blockToRTF (indent + listIncrement) alignment) list listMarker = "\\fi" ++ show (0 - listIncrement) ++ " " ++ marker ++ "\\tx" ++ show listIncrement ++ "\\tab" @@ -275,7 +278,7 @@ definitionListItemToRTF alignment indent (label, defs) = let labelText = blockToRTF indent alignment (Plain label) itemsText = concatMap (blockToRTF (indent + listIncrement) alignment) $ concat defs - in labelText ++ itemsText + in labelText ++ itemsText -- | Convert list of inline items to RTF. inlineListToRTF :: [Inline] -- ^ list of inlines to convert @@ -291,9 +294,9 @@ inlineToRTF (Strikeout lst) = "{\\strike " ++ (inlineListToRTF lst) ++ "}" inlineToRTF (Superscript lst) = "{\\super " ++ (inlineListToRTF lst) ++ "}" inlineToRTF (Subscript lst) = "{\\sub " ++ (inlineListToRTF lst) ++ "}" inlineToRTF (SmallCaps lst) = "{\\scaps " ++ (inlineListToRTF lst) ++ "}" -inlineToRTF (Quoted SingleQuote lst) = +inlineToRTF (Quoted SingleQuote lst) = "\\u8216'" ++ (inlineListToRTF lst) ++ "\\u8217'" -inlineToRTF (Quoted DoubleQuote lst) = +inlineToRTF (Quoted DoubleQuote lst) = "\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\"" inlineToRTF (Code _ str) = "{\\f1 " ++ (codeStringToRTF str) ++ "}" inlineToRTF (Str str) = stringToRTF str @@ -303,11 +306,11 @@ inlineToRTF (RawInline "rtf" str) = str inlineToRTF (RawInline _ _) = "" inlineToRTF (LineBreak) = "\\line " inlineToRTF Space = " " -inlineToRTF (Link text (src, _)) = - "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++ +inlineToRTF (Link text (src, _)) = + "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++ "\"}}{\\fldrslt{\\ul\n" ++ (inlineListToRTF text) ++ "\n}}}\n" -inlineToRTF (Image _ (source, _)) = - "{\\cf1 [image: " ++ source ++ "]\\cf0}" +inlineToRTF (Image _ (source, _)) = + "{\\cf1 [image: " ++ source ++ "]\\cf0}" inlineToRTF (Note contents) = - "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++ + "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++ (concatMap (blockToRTF 0 AlignDefault) contents) ++ "}" diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 6bb782899..40e76c615 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -19,16 +19,17 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Texinfo Copyright : Copyright (C) 2008-2010 John MacFarlane and Peter Wang - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha + Stability : alpha Portability : portable Conversion of 'Pandoc' format into Texinfo. -} module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where import Text.Pandoc.Definition +import Text.Pandoc.Options import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate) import Text.Printf ( printf ) @@ -40,7 +41,7 @@ import Text.Pandoc.Pretty import Network.URI ( isAbsoluteURI, unEscapeString ) import System.FilePath -data WriterState = +data WriterState = WriterState { stStrikeout :: Bool -- document contains strikeout , stSuperscript :: Bool -- document contains superscript , stSubscript :: Bool -- document contains subscript @@ -53,8 +54,8 @@ data WriterState = -- | Convert Pandoc to Texinfo. writeTexinfo :: WriterOptions -> Pandoc -> String -writeTexinfo options document = - evalState (pandocToTexinfo options $ wrapTop document) $ +writeTexinfo options document = + evalState (pandocToTexinfo options $ wrapTop document) $ WriterState { stStrikeout = False, stSuperscript = False, stSubscript = False } -- | Add a "Top" node around the document, needed by Texinfo. @@ -116,10 +117,12 @@ blockToTexinfo (Plain lst) = inlineListToTexinfo lst blockToTexinfo (Para [Image txt (src,tit)]) = do - capt <- inlineListToTexinfo txt + capt <- if null txt + then return empty + else (\c -> text "@caption" <> braces c) `fmap` + inlineListToTexinfo txt img <- inlineToTexinfo (Image txt (src,tit)) - return $ text "@float" $$ img $$ (text "@caption{" <> capt <> char '}') $$ - text "@end float" + return $ text "@float" $$ img $$ capt $$ text "@end float" blockToTexinfo (Para lst) = inlineListToTexinfo lst -- this is handled differently from Plain in blockListToTexinfo @@ -217,7 +220,7 @@ blockToTexinfo (Table caption aligns widths heads rows) = do else return $ "@columnfractions " ++ concatMap (printf "%.2f ") widths let tableBody = text ("@multitable " ++ colDescriptors) $$ headers $$ - vcat rowsText $$ + vcat rowsText $$ text "@end multitable" return $ if isEmpty captionText then tableBody <> blankline @@ -241,7 +244,7 @@ tableAnyRowToTexinfo :: String -> [[Block]] -> State WriterState Doc tableAnyRowToTexinfo itemtype aligns cols = - zipWithM alignedBlock aligns cols >>= + zipWithM alignedBlock aligns cols >>= return . (text itemtype $$) . foldl (\row item -> row $$ (if isEmpty row then empty else text " @tab ") <> item) empty @@ -358,8 +361,8 @@ inlineToTexinfo :: Inline -- ^ Inline to convert inlineToTexinfo (Emph lst) = inlineListToTexinfo lst >>= return . inCmd "emph" -inlineToTexinfo (Strong lst) = - inlineListToTexinfo lst >>= return . inCmd "strong" +inlineToTexinfo (Strong lst) = + inlineListToTexinfo lst >>= return . inCmd "strong" inlineToTexinfo (Strikeout lst) = do modify $ \st -> st{ stStrikeout = True } diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 26d5ec6d7..5f3bb6bcd 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Textile Copyright : Copyright (C) 2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -31,7 +31,8 @@ Textile: <http://thresholdstate.com/articles/4312/the-textile-reference-manual> -} module Text.Pandoc.Writers.Textile ( writeTextile ) where import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Options +import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.XML ( escapeStringForXML ) import Data.List ( intercalate ) @@ -46,9 +47,9 @@ data WriterState = WriterState { -- | Convert Pandoc to Textile. writeTextile :: WriterOptions -> Pandoc -> String -writeTextile opts document = - evalState (pandocToTextile opts document) - (WriterState { stNotes = [], stListLevel = [], stUseTags = False }) +writeTextile opts document = + evalState (pandocToTextile opts document) + (WriterState { stNotes = [], stListLevel = [], stUseTags = False }) -- | Return Textile representation of document. pandocToTextile :: WriterOptions -> Pandoc -> State WriterState String @@ -90,14 +91,14 @@ escapeCharForTextile x = case x of escapeStringForTextile :: String -> String escapeStringForTextile = concatMap escapeCharForTextile --- | Convert Pandoc block element to Textile. +-- | Convert Pandoc block element to Textile. blockToTextile :: WriterOptions -- ^ Options -> Block -- ^ Block element - -> State WriterState String + -> State WriterState String blockToTextile _ Null = return "" -blockToTextile opts (Plain inlines) = +blockToTextile opts (Plain inlines) = inlineListToTextile opts inlines blockToTextile opts (Para [Image txt (src,tit)]) = do @@ -236,7 +237,7 @@ listItemToTextile opts items = do -- | Convert definition list item (label, list of blocks) to Textile. definitionListItemToTextile :: WriterOptions - -> ([Inline],[[Block]]) + -> ([Inline],[[Block]]) -> State WriterState String definitionListItemToTextile opts (label, items) = do labelText <- inlineListToTextile opts label @@ -294,8 +295,8 @@ tableRowToTextile opts alignStrings rownum cols' = do 0 -> "header" x | x `rem` 2 == 1 -> "odd" _ -> "even" - cols'' <- sequence $ zipWith - (\alignment item -> tableItemToTextile opts celltype alignment item) + cols'' <- sequence $ zipWith + (\alignment item -> tableItemToTextile opts celltype alignment item) alignStrings cols' return $ "<tr class=\"" ++ rowclass ++ "\">\n" ++ unlines cols'' ++ "</tr>" @@ -320,7 +321,7 @@ tableItemToTextile opts celltype align' item = do -- | Convert list of Pandoc block elements to Textile. blockListToTextile :: WriterOptions -- ^ Options -> [Block] -- ^ List of block elements - -> State WriterState String + -> State WriterState String blockListToTextile opts blocks = mapM (blockToTextile opts) blocks >>= return . vcat @@ -332,11 +333,11 @@ inlineListToTextile opts lst = -- | Convert Pandoc inline element to Textile. inlineToTextile :: WriterOptions -> Inline -> State WriterState String -inlineToTextile opts (Emph lst) = do +inlineToTextile opts (Emph lst) = do contents <- inlineListToTextile opts lst return $ if '_' `elem` contents then "<em>" ++ contents ++ "</em>" - else "_" ++ contents ++ "_" + else "_" ++ contents ++ "_" inlineToTextile opts (Strong lst) = do contents <- inlineListToTextile opts lst @@ -377,7 +378,7 @@ inlineToTextile opts (Cite _ lst) = inlineListToTextile opts lst inlineToTextile _ (Code _ str) = return $ if '@' `elem` str then "<tt>" ++ escapeStringForXML str ++ "</tt>" - else "@" ++ str ++ "@" + else "@" ++ str ++ "@" inlineToTextile _ (Str str) = return $ escapeStringForTextile str |