diff options
Diffstat (limited to 'src')
53 files changed, 3592 insertions, 1590 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index fd849316b..dd361f8d7 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -37,10 +37,12 @@ inline links: > module Main where > import Text.Pandoc +> import Text.Pandoc.Error (handleError) > > markdownToRST :: String -> String -> markdownToRST = -> (writeRST def {writerReferenceLinks = True}) . readMarkdown def +> markdownToRST = handleError . +> writeRST def {writerReferenceLinks = True} . +> readMarkdown def > > main = getContents >>= putStrLn . markdownToRST @@ -66,6 +68,7 @@ module Text.Pandoc , mkStringReader , readDocx , readMarkdown + , readCommonMark , readMediaWiki , readRST , readOrg @@ -77,6 +80,7 @@ module Text.Pandoc , readHaddock , readNative , readJSON + , readTWiki , readTxt2Tags , readTxt2TagsNoMacros , readEPUB @@ -108,6 +112,7 @@ module Text.Pandoc , writeOrg , writeAsciiDoc , writeHaddock + , writeCommonMark , writeCustom -- * Rendering templates and default templates , module Text.Pandoc.Templates @@ -123,6 +128,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.JSON import Text.Pandoc.Readers.Markdown +import Text.Pandoc.Readers.CommonMark import Text.Pandoc.Readers.MediaWiki import Text.Pandoc.Readers.RST import Text.Pandoc.Readers.Org @@ -133,6 +139,7 @@ import Text.Pandoc.Readers.HTML import Text.Pandoc.Readers.Textile import Text.Pandoc.Readers.Native import Text.Pandoc.Readers.Haddock +import Text.Pandoc.Readers.TWiki import Text.Pandoc.Readers.Docx import Text.Pandoc.Readers.Txt2Tags import Text.Pandoc.Readers.EPUB @@ -159,11 +166,13 @@ import Text.Pandoc.Writers.Textile import Text.Pandoc.Writers.Org import Text.Pandoc.Writers.AsciiDoc import Text.Pandoc.Writers.Haddock +import Text.Pandoc.Writers.CommonMark import Text.Pandoc.Writers.Custom import Text.Pandoc.Templates import Text.Pandoc.Options -import Text.Pandoc.Shared (safeRead, warn) +import Text.Pandoc.Shared (safeRead, warn, mapLeft) import Text.Pandoc.MediaBag (MediaBag) +import Text.Pandoc.Error import Data.Aeson import qualified Data.ByteString.Lazy as BL import Data.List (intercalate) @@ -199,32 +208,35 @@ parseFormatSpec = parse formatSpec "" '-' -> Set.delete ext _ -> Set.insert ext --- auxiliary function for readers: -markdown :: ReaderOptions -> String -> IO Pandoc -markdown o s = do - let (doc, warnings) = readMarkdownWithWarnings o s - mapM_ warn warnings - return doc -data Reader = StringReader (ReaderOptions -> String -> IO Pandoc) - | ByteStringReader (ReaderOptions -> BL.ByteString -> IO (Pandoc, MediaBag)) +data Reader = StringReader (ReaderOptions -> String -> IO (Either PandocError Pandoc)) + | ByteStringReader (ReaderOptions -> BL.ByteString -> IO (Either PandocError (Pandoc,MediaBag))) -mkStringReader :: (ReaderOptions -> String -> Pandoc) -> Reader +mkStringReader :: (ReaderOptions -> String -> (Either PandocError Pandoc)) -> Reader mkStringReader r = StringReader (\o s -> return $ r o s) -mkBSReader :: (ReaderOptions -> BL.ByteString -> (Pandoc, MediaBag)) -> Reader +mkStringReaderWithWarnings :: (ReaderOptions -> String -> Either PandocError (Pandoc, [String])) -> Reader +mkStringReaderWithWarnings r = StringReader $ \o s -> do + case r o s of + Left err -> return $ Left err + Right (doc, warnings) -> do + mapM_ warn warnings + return (Right doc) + +mkBSReader :: (ReaderOptions -> BL.ByteString -> (Either PandocError (Pandoc, MediaBag))) -> Reader mkBSReader r = ByteStringReader (\o s -> return $ r o s) -- | Association list of formats and readers. readers :: [(String, Reader)] readers = [ ("native" , StringReader $ \_ s -> return $ readNative s) ,("json" , mkStringReader readJSON ) - ,("markdown" , StringReader markdown) - ,("markdown_strict" , StringReader markdown) - ,("markdown_phpextra" , StringReader markdown) - ,("markdown_github" , StringReader markdown) - ,("markdown_mmd", StringReader markdown) - ,("rst" , mkStringReader readRST ) + ,("markdown" , mkStringReaderWithWarnings readMarkdownWithWarnings) + ,("markdown_strict" , mkStringReaderWithWarnings readMarkdownWithWarnings) + ,("markdown_phpextra" , mkStringReaderWithWarnings readMarkdownWithWarnings) + ,("markdown_github" , mkStringReaderWithWarnings readMarkdownWithWarnings) + ,("markdown_mmd", mkStringReaderWithWarnings readMarkdownWithWarnings) + ,("commonmark" , mkStringReader readCommonMark) + ,("rst" , mkStringReaderWithWarnings readRSTWithWarnings ) ,("mediawiki" , mkStringReader readMediaWiki) ,("docbook" , mkStringReader readDocBook) ,("opml" , mkStringReader readOPML) @@ -233,6 +245,7 @@ readers = [ ("native" , StringReader $ \_ s -> return $ readNative s) ,("html" , mkStringReader readHtml) ,("latex" , mkStringReader readLaTeX) ,("haddock" , mkStringReader readHaddock) + ,("twiki" , mkStringReader readTWiki) ,("docx" , mkBSReader readDocx) ,("t2t" , mkStringReader readTxt2TagsNoMacros) ,("epub" , mkBSReader readEPUB) @@ -294,6 +307,7 @@ writers = [ ,("org" , PureStringWriter writeOrg) ,("asciidoc" , PureStringWriter writeAsciiDoc) ,("haddock" , PureStringWriter writeHaddock) + ,("commonmark" , PureStringWriter writeCommonMark) ] getDefaultExtensions :: String -> Set Extension @@ -355,8 +369,8 @@ class ToJSONFilter a => ToJsonFilter a where toJsonFilter :: a -> IO () toJsonFilter = toJSONFilter -readJSON :: ReaderOptions -> String -> Pandoc -readJSON _ = either error id . eitherDecode' . UTF8.fromStringLazy +readJSON :: ReaderOptions -> String -> Either PandocError Pandoc +readJSON _ = mapLeft ParseFailure . eitherDecode' . UTF8.fromStringLazy writeJSON :: WriterOptions -> Pandoc -> String writeJSON _ = UTF8.toStringLazy . encode diff --git a/src/Text/Pandoc/Compat/Locale.hs b/src/Text/Pandoc/Compat/Locale.hs new file mode 100644 index 000000000..ac791136c --- /dev/null +++ b/src/Text/Pandoc/Compat/Locale.hs @@ -0,0 +1,9 @@ +{-# LANGUAGE CPP #-} +module Text.Pandoc.Compat.Locale ( defaultTimeLocale ) +where + +#if MIN_VERSION_time(1,5,0) +import Data.Time.Format ( defaultTimeLocale ) +#else +import System.Locale ( defaultTimeLocale ) +#endif diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs new file mode 100644 index 000000000..73d1e8f08 --- /dev/null +++ b/src/Text/Pandoc/Error.hs @@ -0,0 +1,64 @@ +{- +Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} +{- | + Module : Text.Pandoc.Error + Copyright : Copyright (C) 2006-2015 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +This module provides a standard way to deal with possible errors encounted +during parsing. + +-} +module Text.Pandoc.Error (PandocError(..), handleError) where + +import Text.Parsec.Error +import Text.Parsec.Pos hiding (Line) +import Text.Pandoc.Compat.Except + +type Input = String + +data PandocError = -- | Generic parse failure + ParseFailure String + -- | Error thrown by a Parsec parser + | ParsecError Input ParseError + deriving (Show) + + +instance Error PandocError where + strMsg = ParseFailure + + +-- | An unsafe method to handle `PandocError`s. +handleError :: Either PandocError a -> a +handleError (Right r) = r +handleError (Left err) = + case err of + ParseFailure string -> error string + ParsecError input err' -> + let errPos = errorPos err' + errLine = sourceLine errPos + errColumn = sourceColumn errPos + theline = (lines input ++ [""]) !! (errLine - 1) + in error $ "\nError at " ++ show err' ++ "\n" ++ + theline ++ "\n" ++ replicate (errColumn - 1) ' ' ++ + "^" + diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index 68b34dcf3..8f0a991ba 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -1,4 +1,5 @@ {-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-} +{-# OPTIONS_GHC -fno-warn-type-defaults #-} {- Copyright (C) 2011-2014 John MacFarlane <jgm@berkeley.edu> @@ -38,8 +39,11 @@ import Control.Monad import Data.Bits import Data.Binary import Data.Binary.Get -import Text.Pandoc.Shared (safeRead) +import Text.Pandoc.Shared (safeRead, hush) import qualified Data.Map as M +import Text.Pandoc.Compat.Except +import Control.Monad.Trans +import Data.Maybe (fromMaybe) -- quick and dirty functions to get image sizes -- algorithms borrowed from wwwis.pl @@ -64,7 +68,7 @@ imageType img = case B.take 4 img of "%!PS" | (B.take 4 $ B.drop 1 $ B.dropWhile (/=' ') img) == "EPSF" -> return Eps - _ -> fail "Unknown image type" + _ -> (hush . Left) "Unknown image type" imageSize :: ByteString -> Maybe ImageSize imageSize img = do @@ -114,7 +118,7 @@ pngSize img = do ([w1,w2,w3,w4,h1,h2,h3,h4] :: [Integer]) -> return ((shift w1 24) + (shift w2 16) + (shift w3 8) + w4, (shift h1 24) + (shift h2 16) + (shift h3 8) + h4) - _ -> fail "PNG parse error" + _ -> (hush . Left) "PNG parse error" let (dpix, dpiy) = findpHYs rest'' return $ ImageSize { pxX = x, pxY = y, dpiX = dpix, dpiY = dpiy } @@ -143,7 +147,7 @@ gifSize img = do dpiX = 72, dpiY = 72 } - _ -> fail "GIF parse error" + _ -> (hush . Left) "GIF parse error" jpegSize :: ByteString -> Maybe ImageSize jpegSize img = do @@ -174,36 +178,37 @@ findJfifSize bs = do Just (c,bs'') | c >= '\xc0' && c <= '\xc3' -> do case map fromIntegral $ unpack $ B.take 4 $ B.drop 3 bs'' of [h1,h2,w1,w2] -> return (shift w1 8 + w2, shift h1 8 + h2) - _ -> fail "JPEG parse error" + _ -> (hush . Left) "JPEG parse error" Just (_,bs'') -> do case map fromIntegral $ unpack $ B.take 2 bs'' of [c1,c2] -> do let len = shift c1 8 + c2 -- skip variables findJfifSize $ B.drop len bs'' - _ -> fail "JPEG parse error" - Nothing -> fail "Did not find length record" + _ -> (hush . Left) "JPEG parse error" + Nothing -> (hush . Left) "Did not find length record" exifSize :: ByteString -> Maybe ImageSize -exifSize bs = runGet (Just <$> exifHeader bl) bl +exifSize bs = hush . runGet header $ bl where bl = BL.fromChunks [bs] + header = runExceptT $ exifHeader bl -- NOTE: It would be nicer to do -- runGet ((Just <$> exifHeader) <|> return Nothing) -- which would prevent pandoc from raising an error when an exif header can't -- be parsed. But we only get an Alternative instance for Get in binary 0.6, -- and binary 0.5 ships with ghc 7.6. -exifHeader :: BL.ByteString -> Get ImageSize +exifHeader :: BL.ByteString -> ExceptT String Get ImageSize exifHeader hdr = do - _app1DataSize <- getWord16be - exifHdr <- getWord32be - unless (exifHdr == 0x45786966) $ fail "Did not find exif header" - zeros <- getWord16be - unless (zeros == 0) $ fail "Expected zeros after exif header" + _app1DataSize <- lift getWord16be + exifHdr <- lift getWord32be + unless (exifHdr == 0x45786966) $ throwError "Did not find exif header" + zeros <- lift getWord16be + unless (zeros == 0) $ throwError "Expected zeros after exif header" -- beginning of tiff header -- we read whole thing to use -- in getting data from offsets: let tiffHeader = BL.drop 8 hdr - byteAlign <- getWord16be + byteAlign <- lift getWord16be let bigEndian = byteAlign == 0x4d4d let (getWord16, getWord32, getWord64) = if bigEndian @@ -213,17 +218,17 @@ exifHeader hdr = do num <- getWord32 den <- getWord32 return $ fromIntegral num / fromIntegral den - tagmark <- getWord16 - unless (tagmark == 0x002a) $ fail "Failed alignment sanity check" - ifdOffset <- getWord32 - skip (fromIntegral ifdOffset - 8) -- skip to IDF - numentries <- getWord16 - let ifdEntry = do - tag <- getWord16 >>= \t -> - maybe (return UnknownTagType) return - (M.lookup t tagTypeTable) - dataFormat <- getWord16 - numComponents <- getWord32 + tagmark <- lift getWord16 + unless (tagmark == 0x002a) $ throwError "Failed alignment sanity check" + ifdOffset <- lift getWord32 + lift $ skip (fromIntegral ifdOffset - 8) -- skip to IDF + numentries <- lift getWord16 + let ifdEntry :: ExceptT String Get (TagType, DataFormat) + ifdEntry = do + tag <- fromMaybe UnknownTagType . flip M.lookup tagTypeTable + <$> lift getWord16 + dataFormat <- lift getWord16 + numComponents <- lift getWord32 (fmt, bytesPerComponent) <- case dataFormat of 1 -> return (UnsignedByte . runGet getWord8, 1) @@ -238,9 +243,10 @@ exifHeader hdr = do 10 -> return (SignedRational . runGet getRational, 8) 11 -> return (SingleFloat . runGet getWord32 {- TODO -}, 4) 12 -> return (DoubleFloat . runGet getWord64 {- TODO -}, 8) - _ -> fail $ "Unknown data format " ++ show dataFormat + _ -> throwError $ "Unknown data format " ++ show dataFormat let totalBytes = fromIntegral $ numComponents * bytesPerComponent - payload <- if totalBytes <= 4 -- data is right here + payload <- lift $ + if totalBytes <= 4 -- data is right here then fmt <$> (getLazyByteString (fromIntegral totalBytes) <* skip (4 - totalBytes)) @@ -252,9 +258,9 @@ exifHeader hdr = do entries <- sequence $ replicate (fromIntegral numentries) ifdEntry subentries <- case lookup ExifOffset entries of Just (UnsignedLong offset) -> do - pos <- bytesRead - skip (fromIntegral offset - (fromIntegral pos - 8)) - numsubentries <- getWord16 + pos <- lift bytesRead + lift $ skip (fromIntegral offset - (fromIntegral pos - 8)) + numsubentries <- lift getWord16 sequence $ replicate (fromIntegral numsubentries) ifdEntry _ -> return [] diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index 3b3b3b5b3..2fdba93e0 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -328,7 +328,7 @@ mimeTypesList = -- List borrowed from happstack-server. ,("oth","application/vnd.oasis.opendocument.text-web") ,("otp","application/vnd.oasis.opendocument.presentation-template") ,("ots","application/vnd.oasis.opendocument.spreadsheet-template") - ,("otf","application/x-font-opentype") + ,("otf","application/vnd.ms-opentype") ,("ott","application/vnd.oasis.opendocument.text-template") ,("oza","application/x-oz-application") ,("p","text/x-pascal") @@ -477,6 +477,7 @@ mimeTypesList = -- List borrowed from happstack-server. ,("vrml","model/vrml") ,("vs","text/plain") ,("vsd","application/vnd.visio") + ,("vtt","text/vtt") ,("wad","application/x-doom") ,("wav","audio/x-wav") ,("wax","audio/x-ms-wax") diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index 5921b56cf..1246cdc8f 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE GeneralizedNewtypeDeriving, DeriveDataTypeable #-} {- Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu> @@ -46,13 +46,15 @@ import Text.Pandoc.MIME (MimeType, getMimeTypeDef) import qualified Text.Pandoc.UTF8 as UTF8 import Data.Maybe (fromMaybe) import System.IO (stderr) +import Data.Data (Data) +import Data.Typeable (Typeable) -- | A container for a collection of binary resources, with names and -- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty' -- can be used for an empty 'MediaBag', and '<>' can be used to append -- two 'MediaBag's. -newtype MediaBag = MediaBag (M.Map String (MimeType, BL.ByteString)) - deriving (Monoid) +newtype MediaBag = MediaBag (M.Map [String] (MimeType, BL.ByteString)) + deriving (Monoid, Data, Typeable) instance Show MediaBag where show bag = "MediaBag " ++ show (mediaDirectory bag) @@ -65,7 +67,7 @@ insertMedia :: FilePath -- ^ relative path and canonical name of resource -> MediaBag -> MediaBag insertMedia fp mbMime contents (MediaBag mediamap) = - MediaBag (M.insert fp (mime, contents) mediamap) + MediaBag (M.insert (splitPath fp) (mime, contents) mediamap) where mime = fromMaybe fallback mbMime fallback = case takeExtension fp of ".gz" -> getMimeTypeDef $ dropExtension fp @@ -75,14 +77,14 @@ insertMedia fp mbMime contents (MediaBag mediamap) = lookupMedia :: FilePath -> MediaBag -> Maybe (MimeType, BL.ByteString) -lookupMedia fp (MediaBag mediamap) = M.lookup fp mediamap +lookupMedia fp (MediaBag mediamap) = M.lookup (splitPath fp) mediamap -- | Get a list of the file paths stored in a 'MediaBag', with -- their corresponding mime types and the lengths in bytes of the contents. mediaDirectory :: MediaBag -> [(String, MimeType, Int)] mediaDirectory (MediaBag mediamap) = M.foldWithKey (\fp (mime,contents) -> - ((fp, mime, fromIntegral $ BL.length contents):)) [] mediamap + (((joinPath fp), mime, fromIntegral $ BL.length contents):)) [] mediamap -- | Extract contents of MediaBag to a given directory. Print informational -- messages if 'verbose' is true. @@ -93,7 +95,7 @@ extractMediaBag :: Bool extractMediaBag verbose dir (MediaBag mediamap) = do sequence_ $ M.foldWithKey (\fp (_ ,contents) -> - ((writeMedia verbose dir (fp, contents)):)) [] mediamap + ((writeMedia verbose dir (joinPath fp, contents)):)) [] mediamap writeMedia :: Bool -> FilePath -> (FilePath, BL.ByteString) -> IO () writeMedia verbose dir (subpath, bs) = do diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 84ccbbdc9..1776d14e5 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE DeriveDataTypeable #-} {- Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu> @@ -51,6 +52,8 @@ import Data.Default import Text.Pandoc.Highlighting (Style, pygments) import Text.Pandoc.MediaBag (MediaBag) import Data.Monoid +import Data.Data (Data) +import Data.Typeable (Typeable) -- | Individually selectable syntax extensions. data Extension = @@ -74,7 +77,7 @@ data Extension = | Ext_latex_macros -- ^ Parse LaTeX macro definitions (for math only) | Ext_fenced_code_blocks -- ^ Parse fenced code blocks | Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks - | Ext_backtick_code_blocks -- ^ Github style ``` code blocks + | Ext_backtick_code_blocks -- ^ GitHub style ``` code blocks | Ext_inline_code_attributes -- ^ Allow attributes on inline code | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks | Ext_native_divs -- ^ Use Div blocks for contents of <div> tags @@ -109,7 +112,8 @@ data Extension = | Ext_implicit_header_references -- ^ Implicit reference links for headers | Ext_line_blocks -- ^ RST style line blocks | Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML - deriving (Show, Read, Enum, Eq, Ord, Bounded) + | Ext_shortcut_reference_links -- ^ Shortcut reference links + deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable) pandocExtensions :: Set Extension pandocExtensions = Set.fromList @@ -151,6 +155,7 @@ pandocExtensions = Set.fromList , Ext_header_attributes , Ext_implicit_header_references , Ext_line_blocks + , Ext_shortcut_reference_links ] phpMarkdownExtraExtensions :: Set Extension @@ -164,6 +169,7 @@ phpMarkdownExtraExtensions = Set.fromList , Ext_intraword_underscores , Ext_header_attributes , Ext_abbreviations + , Ext_shortcut_reference_links ] githubMarkdownExtensions :: Set Extension @@ -180,6 +186,7 @@ githubMarkdownExtensions = Set.fromList , Ext_strikeout , Ext_hard_line_breaks , Ext_lists_without_preceding_blankline + , Ext_shortcut_reference_links ] multimarkdownExtensions :: Set Extension @@ -202,7 +209,9 @@ multimarkdownExtensions = Set.fromList strictExtensions :: Set Extension strictExtensions = Set.fromList - [ Ext_raw_html ] + [ Ext_raw_html + , Ext_shortcut_reference_links + ] data ReaderOptions = ReaderOptions{ readerExtensions :: Set Extension -- ^ Syntax extensions @@ -220,7 +229,7 @@ data ReaderOptions = ReaderOptions{ , readerDefaultImageExtension :: String -- ^ Default extension for images , readerTrace :: Bool -- ^ Print debugging info , readerTrackChanges :: TrackChanges -} deriving (Show, Read) +} deriving (Show, Read, Data, Typeable) instance Default ReaderOptions where def = ReaderOptions{ @@ -242,7 +251,7 @@ instance Default ReaderOptions -- Writer options -- -data EPUBVersion = EPUB2 | EPUB3 deriving (Eq, Show, Read) +data EPUBVersion = EPUB2 | EPUB3 deriving (Eq, Show, Read, Data, Typeable) data HTMLMathMethod = PlainMath | LaTeXMathML (Maybe String) -- url of LaTeXMathML.js @@ -251,18 +260,19 @@ data HTMLMathMethod = PlainMath | WebTeX String -- url of TeX->image script. | MathML (Maybe String) -- url of MathMLinHTML.js | MathJax String -- url of MathJax.js - deriving (Show, Read, Eq) + | KaTeX String String -- url of stylesheet and katex.js + deriving (Show, Read, Eq, Data, Typeable) data CiteMethod = Citeproc -- use citeproc to render them | Natbib -- output natbib cite commands | Biblatex -- output biblatex cite commands - deriving (Show, Read, Eq) + deriving (Show, Read, Eq, Data, Typeable) -- | Methods for obfuscating email addresses in HTML. data ObfuscationMethod = NoObfuscation | ReferenceObfuscation | JavascriptObfuscation - deriving (Show, Read, Eq) + deriving (Show, Read, Eq, Data, Typeable) -- | Varieties of HTML slide shows. data HTMLSlideVariant = S5Slides @@ -271,13 +281,13 @@ data HTMLSlideVariant = S5Slides | DZSlides | RevealJsSlides | NoSlides - deriving (Show, Read, Eq) + deriving (Show, Read, Eq, Data, Typeable) -- | Options for accepting or rejecting MS Word track-changes. data TrackChanges = AcceptChanges | RejectChanges | AllChanges - deriving (Show, Read, Eq) + deriving (Show, Read, Eq, Data, Typeable) -- | Options for writers data WriterOptions = WriterOptions @@ -322,7 +332,9 @@ data WriterOptions = WriterOptions , writerReferenceODT :: Maybe FilePath -- ^ Path to reference ODT if specified , writerReferenceDocx :: Maybe FilePath -- ^ Path to reference DOCX if specified , writerMediaBag :: MediaBag -- ^ Media collected by docx or epub reader - } deriving Show + , writerVerbose :: Bool -- ^ Verbose debugging output + , writerLaTeXArgs :: [String] -- ^ Flags to pass to latex-engine + } deriving (Show, Data, Typeable) instance Default WriterOptions where def = WriterOptions { writerStandalone = False @@ -365,6 +377,8 @@ instance Default WriterOptions where , writerReferenceODT = Nothing , writerReferenceDocx = Nothing , writerMediaBag = mempty + , writerVerbose = False + , writerLaTeXArgs = [] } -- | Returns True if the given extension is enabled. diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index d5f7c609d..2d602a0df 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -36,10 +36,11 @@ import qualified Data.ByteString.Lazy.Char8 as BC import qualified Data.ByteString as BS import System.Exit (ExitCode (..)) import System.FilePath +import System.IO (stderr, stdout) import System.Directory import Data.Digest.Pure.SHA (showDigest, sha1) import System.Environment -import Control.Monad (unless, (<=<)) +import Control.Monad (unless, when, (<=<)) import qualified Control.Exception as E import Control.Applicative ((<$)) import Data.List (isInfixOf) @@ -70,7 +71,8 @@ makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex) makePDF program writer opts doc = withTempDir "tex2pdf." $ \tmpdir -> do doc' <- handleImages opts tmpdir doc let source = writer opts doc' - tex2pdf' tmpdir program source + args = writerLaTeXArgs opts + tex2pdf' (writerVerbose opts) args tmpdir program source handleImages :: WriterOptions -> FilePath -- ^ temp dir to store images @@ -106,8 +108,7 @@ convertImages tmpdir (Image ils (src, tit)) = do img <- convertImage tmpdir src newPath <- case img of - Left e -> src <$ - warn ("Unable to convert image `" ++ src ++ "':\n" ++ e) + Left e -> src <$ warn e Right fp -> return fp return (Image ils (newPath, tit)) convertImages _ x = return x @@ -121,7 +122,8 @@ convertImage tmpdir fname = Just "application/pdf" -> doNothing _ -> JP.readImage fname >>= \res -> case res of - Left msg -> return $ Left msg + Left _ -> return $ Left $ "Unable to convert `" ++ + fname ++ "' for use with pdflatex." Right img -> E.catch (Right fileOut <$ JP.savePngImage fileOut img) $ \(e :: E.SomeException) -> return (Left (show e)) @@ -130,22 +132,25 @@ convertImage tmpdir fname = mime = getMimeType fname doNothing = return (Right fname) -tex2pdf' :: FilePath -- ^ temp directory for output +tex2pdf' :: Bool -- ^ Verbose output + -> [String] -- ^ Arguments to the latex-engine + -> FilePath -- ^ temp directory for output -> String -- ^ tex program -> String -- ^ tex source -> IO (Either ByteString ByteString) -tex2pdf' tmpDir program source = do +tex2pdf' verbose args tmpDir program source = do let numruns = if "\\tableofcontents" `isInfixOf` source then 3 -- to get page numbers else 2 -- 1 run won't give you PDF bookmarks - (exit, log', mbPdf) <- runTeXProgram program numruns tmpDir source + (exit, log', mbPdf) <- runTeXProgram verbose program args 1 numruns tmpDir source case (exit, mbPdf) of (ExitFailure _, _) -> do let logmsg = extractMsg log' let extramsg = case logmsg of - x | "! Package inputenc Error" `BC.isPrefixOf` x -> - "\nTry running pandoc with --latex-engine=xelatex." + x | ("! Package inputenc Error" `BC.isPrefixOf` x + && program /= "xelatex") + -> "\nTry running pandoc with --latex-engine=xelatex." _ -> "" return $ Left $ logmsg <> extramsg (ExitSuccess, Nothing) -> return $ Left "" @@ -170,9 +175,9 @@ extractMsg log' = do -- Run a TeX program on an input bytestring and return (exit code, -- contents of stdout, contents of produced PDF if any). Rerun -- a fixed number of times to resolve references. -runTeXProgram :: String -> Int -> FilePath -> String +runTeXProgram :: Bool -> String -> [String] -> Int -> Int -> FilePath -> String -> IO (ExitCode, ByteString, Maybe ByteString) -runTeXProgram program runsLeft tmpDir source = do +runTeXProgram verbose program args runNumber numRuns tmpDir source = do let file = tmpDir </> "input.tex" exists <- doesFileExist file unless exists $ UTF8.writeFile file source @@ -185,16 +190,31 @@ runTeXProgram program runsLeft tmpDir source = do let file' = file #endif let programArgs = ["-halt-on-error", "-interaction", "nonstopmode", - "-output-directory", tmpDir', file'] + "-output-directory", tmpDir', file'] ++ args env' <- getEnvironment let sep = searchPathSeparator:[] let texinputs = maybe (tmpDir' ++ sep) ((tmpDir' ++ sep) ++) $ lookup "TEXINPUTS" env' let env'' = ("TEXINPUTS", texinputs) : [(k,v) | (k,v) <- env', k /= "TEXINPUTS"] + when (verbose && runNumber == 1) $ do + putStrLn $ "[makePDF] Command line:" + putStrLn $ program ++ " " ++ unwords (map show programArgs) + putStr "\n" + putStrLn $ "[makePDF] Environment:" + mapM_ print env'' + putStr "\n" + putStrLn $ "[makePDF] Contents of " ++ file' ++ ":" + B.readFile file' >>= B.putStr + putStr "\n" (exit, out, err) <- pipeProcess (Just env'') program programArgs BL.empty - if runsLeft > 1 - then runTeXProgram program (runsLeft - 1) tmpDir source + when verbose $ do + putStrLn $ "[makePDF] Run #" ++ show runNumber + B.hPutStr stdout out + B.hPutStr stderr err + putStr "\n" + if runNumber <= numRuns + then runTeXProgram verbose program args (runNumber + 1) numRuns tmpDir source else do let pdfFile = replaceDirectory (replaceExtension file ".pdf") tmpDir pdfExists <- doesFileExist pdfFile diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index d1fba1e21..33120e55d 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -65,6 +65,8 @@ module Text.Pandoc.Parsing ( anyLine, widthsFromIndices, gridTableWith, readWith, + returnWarnings, + returnState, readWithM, testStringWith, guardEnabled, @@ -103,11 +105,8 @@ module Text.Pandoc.Parsing ( anyLine, applyMacros', Parser, ParserT, - F(..), - runF, - askF, - asksF, token, + generalize, -- * Re-exports from Text.Pandoc.Parsec Stream, runParser, @@ -162,6 +161,8 @@ module Text.Pandoc.Parsing ( anyLine, setSourceColumn, setSourceLine, newPos, + addWarning, + (<+?>) ) where @@ -186,30 +187,16 @@ import Data.Default import qualified Data.Set as Set import Control.Monad.Reader import Control.Monad.Identity -import Control.Applicative ((<$>), (<*>), (*>), (<*), (<$), Applicative) +import Control.Applicative ((<$>), (<*>), (*>), (<*), (<$)) import Data.Monoid import Data.Maybe (catMaybes) +import Text.Pandoc.Error + type Parser t s = Parsec t s type ParserT = ParsecT -newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Applicative, Functor) - -runF :: F a -> ParserState -> a -runF = runReader . unF - -askF :: F ParserState -askF = F ask - -asksF :: (ParserState -> a) -> F a -asksF f = F $ asks f - -instance Monoid a => Monoid (F a) where - mempty = return mempty - mappend = liftM2 mappend - mconcat = liftM mconcat . sequence - -- | Parse any line of text anyLine :: Stream [Char] m Char => ParserT [Char] st m [Char] anyLine = do @@ -312,12 +299,14 @@ stringAnyCase (x:xs) = do return (firstChar:rest) -- | Parse contents of 'str' using 'parser' and return result. -parseFromString :: Stream s m t => ParserT s st m a -> s -> ParserT s st m a +parseFromString :: Monad m => ParserT String st m a -> String -> ParserT String st m a parseFromString parser str = do oldPos <- getPosition oldInput <- getInput setInput str result <- parser + spaces + eof setInput oldInput setPosition oldPos return result @@ -452,7 +441,7 @@ uri = try $ do let percentEscaped = try $ char '%' >> skipMany1 (satisfy isHexDigit) let entity = () <$ characterReference let punct = skipMany1 (char ',') - <|> () <$ (satisfy (\c -> not (isSpace c) && c /= '<')) + <|> () <$ (satisfy (\c -> not (isSpace c) && c /= '<' && c /= '>')) let uriChunk = skipMany1 wordChar <|> percentEscaped <|> entity @@ -472,7 +461,12 @@ mathInlineWith op cl = try $ do string op notFollowedBy space words' <- many1Till (count 1 (noneOf " \t\n\\") - <|> (char '\\' >> anyChar >>= \c -> return ['\\',c]) + <|> (char '\\' >> + -- This next clause is needed because \text{..} can + -- contain $, \(\), etc. + (try (string "text" >> + (("\\text" ++) <$> inBalancedBraces 0 "")) + <|> (\c -> ['\\',c]) <$> anyChar)) <|> do (blankline <* notFollowedBy' blankline) <|> (oneOf " \t" <* skipMany (oneOf " \t")) notFollowedBy (char '$') @@ -480,6 +474,23 @@ mathInlineWith op cl = try $ do ) (try $ string cl) notFollowedBy digit -- to prevent capture of $5 return $ concat words' + where + inBalancedBraces :: Stream s m Char => Int -> String -> ParserT s st m String + inBalancedBraces 0 "" = do + c <- anyChar + if c == '{' + then inBalancedBraces 1 "{" + else mzero + inBalancedBraces 0 s = return $ reverse s + inBalancedBraces numOpen ('\\':xs) = do + c <- anyChar + inBalancedBraces numOpen (c:'\\':xs) + inBalancedBraces numOpen xs = do + c <- anyChar + case c of + '}' -> inBalancedBraces (numOpen - 1) (c:xs) + '{' -> inBalancedBraces (numOpen + 1) (c:xs) + _ -> inBalancedBraces numOpen (c:xs) mathDisplayWith :: Stream s m Char => String -> String -> ParserT s st m String mathDisplayWith op cl = try $ do @@ -837,27 +848,30 @@ readWithM :: (Monad m, Functor m) => ParserT [Char] st m a -- ^ parser -> st -- ^ initial state -> String -- ^ input - -> m a + -> m (Either PandocError a) readWithM parser state input = - handleError <$> (runParserT parser state "source" input) - where - handleError (Left err') = - let errPos = errorPos err' - errLine = sourceLine errPos - errColumn = sourceColumn errPos - theline = (lines input ++ [""]) !! (errLine - 1) - in error $ "\nError at " ++ show err' ++ "\n" ++ - theline ++ "\n" ++ replicate (errColumn - 1) ' ' ++ - "^" - handleError (Right result) = result + mapLeft (ParsecError input) <$> runParserT parser state "source" input + -- | Parse a string with a given parser and state readWith :: Parser [Char] st a -> st -> String - -> a + -> Either PandocError a readWith p t inp = runIdentity $ readWithM p t inp +returnWarnings :: (Stream s m c) + => ParserT s ParserState m a + -> ParserT s ParserState m (a, [String]) +returnWarnings p = do + doc <- p + warnings <- stateWarnings <$> getState + return (doc, warnings) + +-- | Return the final internal state with the result of a parser +returnState :: (Stream s m c) => ParsecT s st m a -> ParsecT s st m (a, st) +returnState p = (,) <$> p <*> getState + -- | Parse a string with @parser@ (for testing). testStringWith :: (Show a, Stream [Char] Identity Char) => ParserT [Char] ParserState Identity a @@ -879,7 +893,6 @@ data ParserState = ParserState stateNotes :: NoteTable, -- ^ List of notes (raw bodies) stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies) stateMeta :: Meta, -- ^ Document metadata - stateMeta' :: F Meta, -- ^ Document metadata stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used stateHeaders :: M.Map Inlines String, -- ^ List of headers and ids (used for implicit ref links) stateIdentifiers :: [String], -- ^ List of header identifiers used @@ -888,14 +901,14 @@ data ParserState = ParserState stateHasChapters :: Bool, -- ^ True if \chapter encountered stateMacros :: [Macro], -- ^ List of macros defined so far stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role - stateRstCustomRoles :: M.Map String (String, Maybe String, Attr -> (String, Attr)), -- ^ Current rST custom text roles + stateRstCustomRoles :: M.Map String (String, Maybe String, Attr), -- ^ Current rST custom text roles -- Triple represents: 1) Base role, 2) Optional format (only for :raw: - -- roles), 3) Source language annotation for code (could be used to - -- annotate role classes too). + -- roles), 3) Additional classes (rest of Attr is unused)). stateCaption :: Maybe Inlines, -- ^ Caption in current environment stateInHtmlBlock :: Maybe String, -- ^ Tag type of HTML block being parsed stateMarkdownAttribute :: Bool, -- ^ True if in markdown=1 context - stateWarnings :: [String] -- ^ Warnings generated by the parser + stateWarnings :: [String], -- ^ Warnings generated by the parser + stateInFootnote :: Bool -- ^ True if in a footnote block. } instance Default ParserState where @@ -977,7 +990,6 @@ defaultParserState = stateNotes = [], stateNotes' = [], stateMeta = nullMeta, - stateMeta' = return nullMeta, stateHeaderTable = [], stateHeaders = M.empty, stateIdentifiers = [], @@ -990,7 +1002,8 @@ defaultParserState = stateCaption = Nothing, stateInHtmlBlock = Nothing, stateMarkdownAttribute = False, - stateWarnings = []} + stateWarnings = [], + stateInFootnote = False } -- | Succeed only if the extension is enabled. guardEnabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m () @@ -1029,7 +1042,7 @@ data QuoteContext type NoteTable = [(String, String)] -type NoteTable' = [(String, F Blocks)] -- used in markdown reader +type NoteTable' = [(String, Blocks)] -- used in markdown reader newtype Key = Key String deriving (Show, Read, Eq, Ord) @@ -1178,7 +1191,7 @@ citeKey = try $ do guard =<< notAfterString suppress_author <- option False (char '-' *> return True) char '@' - firstChar <- letter <|> char '_' + firstChar <- alphaNum <|> char '_' let regchar = satisfy (\c -> isAlphaNum c || c == '_') let internal p = try $ p <* lookAhead regchar rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") @@ -1223,3 +1236,17 @@ applyMacros' target = do then do macros <- extractMacros <$> getState return $ applyMacros macros target else return target + +-- | Append a warning to the log. +addWarning :: (Stream s m c) => Maybe SourcePos -> String -> ParserT s ParserState m () +addWarning mbpos msg = + updateState $ \st -> st{ + stateWarnings = (msg ++ maybe "" (\pos -> " " ++ show pos) mbpos) : + stateWarnings st } + +generalize :: (Monad m) => Parser s st a -> ParserT s st m a +generalize m = mkPT (\ s -> (return $ (return . runIdentity) <$> runIdentity (runParsecT m s))) + +infixr 5 <+?> +(<+?>) :: (Monoid a, Monad m) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a +a <+?> b = a >>= flip fmap (try b <|> return mempty) . (<>) diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index 1e72c2040..9a97dfc21 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -250,6 +250,11 @@ renderDoc :: (IsString a, Monoid a) => Doc -> DocState a renderDoc = renderList . toList . unDoc +data IsBlock = IsBlock Int [String] + +-- This would be nicer with a pattern synonym +-- pattern VBlock i s <- mkIsBlock -> Just (IsBlock ..) + renderList :: (IsString a, Monoid a) => [D] -> DocState a renderList [] = return () @@ -286,6 +291,9 @@ renderList (BlankLines num : xs) = do | otherwise -> replicateM_ (1 + num - newlines st) (outp (-1) "\n") renderList xs +renderList (CarriageReturn : BlankLines m : xs) = + renderList (BlankLines m : xs) + renderList (CarriageReturn : xs) = do st <- get if newlines st > 0 || null xs @@ -320,11 +328,11 @@ renderList (BreakingSpace : xs) = do outp 1 " " renderList xs' -renderList (b1@Block{} : b2@Block{} : xs) = - renderList (mergeBlocks False b1 b2 : xs) +renderList (Block i1 s1 : Block i2 s2 : xs) = + renderList (mergeBlocks False (IsBlock i1 s1) (IsBlock i2 s2) : xs) -renderList (b1@Block{} : BreakingSpace : b2@Block{} : xs) = - renderList (mergeBlocks True b1 b2 : xs) +renderList (Block i1 s1 : BreakingSpace : Block i2 s2 : xs) = + renderList (mergeBlocks True (IsBlock i1 s1) (IsBlock i2 s2) : xs) renderList (Block width lns : xs) = do st <- get @@ -336,15 +344,14 @@ renderList (Block width lns : xs) = do modify $ \s -> s{ prefix = oldPref } renderList xs -mergeBlocks :: Bool -> D -> D -> D -mergeBlocks addSpace (Block w1 lns1) (Block w2 lns2) = +mergeBlocks :: Bool -> IsBlock -> IsBlock -> D +mergeBlocks addSpace (IsBlock w1 lns1) (IsBlock w2 lns2) = Block (w1 + w2 + if addSpace then 1 else 0) $ zipWith (\l1 l2 -> pad w1 l1 ++ l2) (lns1 ++ empties) (map sp lns2 ++ empties) where empties = replicate (abs $ length lns1 - length lns2) "" pad n s = s ++ replicate (n - realLength s) ' ' sp "" = "" sp xs = if addSpace then (' ' : xs) else xs -mergeBlocks _ _ _ = error "mergeBlocks tried on non-Block!" blockToDoc :: Int -> [String] -> Doc blockToDoc _ lns = text $ intercalate "\n" lns @@ -531,4 +538,4 @@ charWidth c = -- | Get real length of string, taking into account combining and double-wide -- characters. realLength :: String -> Int -realLength = sum . map charWidth +realLength = foldr (\a b -> charWidth a + b) 0 diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs new file mode 100644 index 000000000..51a35c8ad --- /dev/null +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -0,0 +1,119 @@ +{- +Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.CommonMark + Copyright : Copyright (C) 2015 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of CommonMark-formatted plain text to 'Pandoc' document. + +CommonMark is a strongly specified variant of Markdown: http://commonmark.org. +-} +module Text.Pandoc.Readers.CommonMark (readCommonMark) +where + +import CMark +import Data.Text (unpack, pack) +import Data.List (groupBy) +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Error + +-- | Parse a CommonMark formatted string into a 'Pandoc' structure. +readCommonMark :: ReaderOptions -> String -> Either PandocError Pandoc +readCommonMark opts = Right . nodeToPandoc . commonmarkToNode opts' . pack + where opts' = if readerSmart opts + then [optNormalize, optSmart] + else [optNormalize] + +nodeToPandoc :: Node -> Pandoc +nodeToPandoc (Node _ DOCUMENT nodes) = + Pandoc nullMeta $ foldr addBlock [] nodes +nodeToPandoc n = -- shouldn't happen + Pandoc nullMeta $ foldr addBlock [] [n] + +addBlocks :: [Node] -> [Block] +addBlocks = foldr addBlock [] + +addBlock :: Node -> [Block] -> [Block] +addBlock (Node _ PARAGRAPH nodes) = + (Para (addInlines nodes) :) +addBlock (Node _ HRULE _) = + (HorizontalRule :) +addBlock (Node _ BLOCK_QUOTE nodes) = + (BlockQuote (addBlocks nodes) :) +addBlock (Node _ (HTML t) _) = + (RawBlock (Format "html") (unpack t) :) +addBlock (Node _ (CODE_BLOCK info t) _) = + (CodeBlock ("", take 1 (words (unpack info)), []) (unpack t) :) +addBlock (Node _ (HEADER lev) nodes) = + (Header lev ("",[],[]) (addInlines nodes) :) +addBlock (Node _ (LIST listAttrs) nodes) = + (constructor (map (setTightness . addBlocks . children) nodes) :) + where constructor = case listType listAttrs of + BULLET_LIST -> BulletList + ORDERED_LIST -> OrderedList + (start, DefaultStyle, delim) + start = listStart listAttrs + setTightness = if listTight listAttrs + then map paraToPlain + else id + paraToPlain (Para xs) = Plain (xs) + paraToPlain x = x + delim = case listDelim listAttrs of + PERIOD_DELIM -> Period + PAREN_DELIM -> OneParen +addBlock (Node _ ITEM _) = id -- handled in LIST +addBlock _ = id + +children :: Node -> [Node] +children (Node _ _ ns) = ns + +addInlines :: [Node] -> [Inline] +addInlines = foldr addInline [] + +addInline :: Node -> [Inline] -> [Inline] +addInline (Node _ (TEXT t) _) = (map toinl clumps ++) + where raw = unpack t + clumps = groupBy samekind raw + samekind ' ' ' ' = True + samekind ' ' _ = False + samekind _ ' ' = False + samekind _ _ = True + toinl (' ':_) = Space + toinl xs = Str xs +addInline (Node _ LINEBREAK _) = (LineBreak :) +addInline (Node _ SOFTBREAK _) = (Space :) +addInline (Node _ (INLINE_HTML t) _) = + (RawInline (Format "html") (unpack t) :) +addInline (Node _ (CODE t) _) = + (Code ("",[],[]) (unpack t) :) +addInline (Node _ EMPH nodes) = + (Emph (addInlines nodes) :) +addInline (Node _ STRONG nodes) = + (Strong (addInlines nodes) :) +addInline (Node _ (LINK url title) nodes) = + (Link (addInlines nodes) (unpack url, unpack title) :) +addInline (Node _ (IMAGE url title) nodes) = + (Image (addInlines nodes) (unpack url, unpack title) :) +addInline _ = id diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 59ff3e717..98a142840 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -15,6 +15,9 @@ import Control.Applicative ((<$>)) import Data.List (intersperse) import Data.Maybe (fromMaybe) import Text.TeXMath (readMathML, writeTeX) +import Text.Pandoc.Error (PandocError) +import Text.Pandoc.Compat.Except +import Data.Default {- @@ -70,8 +73,8 @@ List of all DocBook tags, with [x] indicating implemented, [x] book - A book [x] bookinfo - Meta-information for a Book [x] bridgehead - A free-floating heading -[ ] callout - A “called out” description of a marked Area -[ ] calloutlist - A list of Callouts +[x] callout - A “called out” description of a marked Area +[x] calloutlist - A list of Callouts [x] caption - A caption [x] caution - A note of caution [x] chapter - A chapter, as of a book @@ -81,7 +84,7 @@ List of all DocBook tags, with [x] indicating implemented, [ ] citerefentry - A citation to a reference page [ ] citetitle - The title of a cited work [ ] city - The name of a city in an address -[ ] classname - The name of a class, in the object-oriented programming sense +[x] classname - The name of a class, in the object-oriented programming sense [ ] classsynopsis - The syntax summary for a class definition [ ] classsynopsisinfo - Information supplementing the contents of a ClassSynopsis @@ -169,9 +172,9 @@ List of all DocBook tags, with [x] indicating implemented, [ ] guibutton - The text on a button in a GUI [ ] guiicon - Graphic and/or text appearing as a icon in a GUI [ ] guilabel - The text of a label in a GUI -[ ] guimenu - The name of a menu in a GUI -[ ] guimenuitem - The name of a terminal menu item in a GUI -[ ] guisubmenu - The name of a submenu in a GUI +[x] guimenu - The name of a menu in a GUI +[x] guimenuitem - The name of a terminal menu item in a GUI +[x] guisubmenu - The name of a submenu in a GUI [ ] hardware - A physical part of a computer system [ ] highlights - A summary of the main points of the discussed component [ ] holder - The name of the individual or organization that holds a copyright @@ -206,10 +209,10 @@ List of all DocBook tags, with [x] indicating implemented, other dingbat [ ] itermset - A set of index terms in the meta-information of a document [ ] jobtitle - The title of an individual in an organization -[ ] keycap - The text printed on a key on a keyboard +[x] keycap - The text printed on a key on a keyboard [ ] keycode - The internal, frequently numeric, identifier for a key on a keyboard -[ ] keycombo - A combination of input actions +[x] keycombo - A combination of input actions [ ] keysym - The symbolic name of a key on a keyboard [ ] keyword - One of a set of keywords describing the content of a document [ ] keywordset - A set of keywords describing the content of a document @@ -237,7 +240,7 @@ List of all DocBook tags, with [x] indicating implemented, [x] mediaobject - A displayed media object (video, audio, image, etc.) [ ] mediaobjectco - A media object that contains callouts [x] member - An element of a simple list -[ ] menuchoice - A selection or series of selections from a menu +[x] menuchoice - A selection or series of selections from a menu [ ] methodname - The name of a method [ ] methodparam - Parameters to a method [ ] methodsynopsis - A syntax summary for a method @@ -471,7 +474,7 @@ List of all DocBook tags, with [x] indicating implemented, [ ] token - A unit of information [x] tr - A row in an HTML table [ ] trademark - A trademark -[ ] type - The classification of a value +[x] type - The classification of a value [x] ulink - A link that addresses its target by means of a URL (Uniform Resource Locator) [x] uri - A Uniform Resource Identifier @@ -497,7 +500,7 @@ List of all DocBook tags, with [x] indicating implemented, [x] ?asciidoc-br? - line break from asciidoc docbook output -} -type DB = State DBState +type DB = ExceptT PandocError (State DBState) data DBState = DBState{ dbSectionLevel :: Int , dbQuoteType :: QuoteType @@ -507,16 +510,18 @@ data DBState = DBState{ dbSectionLevel :: Int , dbFigureTitle :: Inlines } deriving Show -readDocBook :: ReaderOptions -> String -> Pandoc -readDocBook _ inp = Pandoc (dbMeta st') (toList $ mconcat bs) - where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp') - DBState{ dbSectionLevel = 0 - , dbQuoteType = DoubleQuote - , dbMeta = mempty - , dbAcceptsMeta = False - , dbBook = False - , dbFigureTitle = mempty - } +instance Default DBState where + def = DBState{ dbSectionLevel = 0 + , dbQuoteType = DoubleQuote + , dbMeta = mempty + , dbAcceptsMeta = False + , dbBook = False + , dbFigureTitle = mempty } + + +readDocBook :: ReaderOptions -> String -> Either PandocError Pandoc +readDocBook _ inp = (\blocks -> Pandoc (dbMeta st') (toList . mconcat $ blocks)) <$> bs + where (bs , st') = flip runState def . runExceptT . mapM parseBlock . normalizeTree . parseXML $ inp' inp' = handleInstructions inp -- We treat <?asciidoc-br?> specially (issue #1236), converting it @@ -603,7 +608,7 @@ isBlockElement (Elem e) = qName (elName e) `elem` blocktags "important","caution","note","tip","warning","qandadiv", "question","answer","abstract","itemizedlist","orderedlist", "variablelist","article","book","table","informaltable", - "screen","programlisting","example"] + "screen","programlisting","example","calloutlist"] isBlockElement _ = False -- Trim leading and trailing newline characters @@ -712,6 +717,7 @@ parseBlock (Elem e) = "question" -> addToStart (strong (str "Q:") <> str " ") <$> getBlocks e "answer" -> addToStart (strong (str "A:") <> str " ") <$> getBlocks e "abstract" -> blockQuote <$> getBlocks e + "calloutlist" -> bulletList <$> callouts "itemizedlist" -> bulletList <$> listitems "orderedlist" -> do let listStyle = case attrValue "numeration" e of @@ -772,11 +778,6 @@ parseBlock (Elem e) = x -> [x] return $ codeBlockWith (attrValue "id" e, classes', []) $ trimNl $ strContentRecursive e - strContentRecursive = strContent . (\e' -> e'{ elContent = - map elementToStr $ elContent e' }) - elementToStr :: Content -> Content - elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing - elementToStr x = x parseBlockquote = do attrib <- case filterChild (named "attribution") e of Nothing -> return mempty @@ -785,6 +786,7 @@ parseBlock (Elem e) = contents <- getBlocks e return $ blockQuote (contents <> attrib) listitems = mapM getBlocks $ filterChildren (named "listitem") e + callouts = mapM getBlocks $ filterChildren (named "callout") e deflistitems = mapM parseVarListEntry $ filterChildren (named "varlistentry") e parseVarListEntry e' = do @@ -866,18 +868,29 @@ parseBlock (Elem e) = parseRow = mapM (parseMixed plain . elContent) . filterChildren isEntry sect n = do isbook <- gets dbBook let n' = if isbook || n == 0 then n + 1 else n - headerText <- case filterChild (named "title") e of + headerText <- case filterChild (named "title") e `mplus` + (filterChild (named "info") e >>= + filterChild (named "title")) of Just t -> getInlines t Nothing -> return mempty modify $ \st -> st{ dbSectionLevel = n } b <- getBlocks e + let ident = attrValue "id" e modify $ \st -> st{ dbSectionLevel = n - 1 } - return $ header n' headerText <> b + return $ headerWith (ident,[],[]) n' headerText <> b metaBlock = acceptingMetadata (getBlocks e) >> return mempty getInlines :: Element -> DB Inlines getInlines e' = (trimInlines . mconcat) <$> (mapM parseInline $ elContent e') +strContentRecursive :: Element -> String +strContentRecursive = strContent . + (\e' -> e'{ elContent = map elementToStr $ elContent e' }) + +elementToStr :: Content -> Content +elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing +elementToStr x = x + parseInline :: Content -> DB Inlines parseInline (Text (CData _ s _)) = return $ text s parseInline (CRef ref) = @@ -901,6 +914,7 @@ parseInline (Elem e) = else doubleQuoted contents "simplelist" -> simpleList "segmentedlist" -> segmentedList + "classname" -> codeWithLang "code" -> codeWithLang "filename" -> codeWithLang "literal" -> codeWithLang @@ -920,6 +934,10 @@ parseInline (Elem e) = "constant" -> codeWithLang "userinput" -> codeWithLang "varargs" -> return $ code "(...)" + "keycap" -> return (str $ strContent e) + "keycombo" -> keycombo <$> (mapM parseInline $ elContent e) + "menuchoice" -> menuchoice <$> (mapM parseInline $ + filter isGuiMenu $ elContent e) "xref" -> return $ str "?" -- so at least you know something is there "email" -> return $ link ("mailto:" ++ strContent e) "" $ str $ strContent e @@ -959,7 +977,7 @@ parseInline (Elem e) = let classes' = case attrValue "language" e of "" -> [] l -> [l] - return $ codeWith (attrValue "id" e,classes',[]) $ strContent e + return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e simpleList = (mconcat . intersperse (str "," <> space)) <$> mapM getInlines (filterChildren (named "member") e) segmentedList = do @@ -974,3 +992,10 @@ parseInline (Elem e) = then mempty else strong tit <> linebreak return $ linebreak <> tit' <> segs + keycombo = spanWith ("",["keycombo"],[]) . + mconcat . intersperse (str "+") + menuchoice = spanWith ("",["menuchoice"],[]) . + mconcat . intersperse (text " > ") + isGuiMenu (Elem x) = named "guimenu" x || named "guisubmenu" x || + named "guimenuitem" x + isGuiMenu _ = False diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 8ebe59569..67a97ae85 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -84,8 +84,7 @@ import Text.Pandoc.Readers.Docx.Lists import Text.Pandoc.Readers.Docx.Reducible import Text.Pandoc.Shared import Text.Pandoc.MediaBag (insertMedia, MediaBag) -import Data.Maybe (isJust) -import Data.List (delete, stripPrefix, (\\), intersect) +import Data.List (delete, (\\), intersect) import Data.Monoid import Text.TeXMath (writeTeX) import Data.Default (Default) @@ -97,14 +96,17 @@ import Control.Applicative ((<$>)) import Data.Sequence (ViewL(..), viewl) import qualified Data.Sequence as Seq (null) +import Text.Pandoc.Error +import Text.Pandoc.Compat.Except + readDocx :: ReaderOptions -> B.ByteString - -> (Pandoc, MediaBag) + -> Either PandocError (Pandoc, MediaBag) readDocx opts bytes = case archiveToDocx (toArchive bytes) of - Right docx -> (Pandoc meta blks, mediaBag) where - (meta, blks, mediaBag) = (docxToOutput opts docx) - Left _ -> error $ "couldn't parse docx file" + Right docx -> (\(meta, blks, mediaBag) -> (Pandoc meta blks, mediaBag)) + <$> (docxToOutput opts docx) + Left _ -> Left (ParseFailure "couldn't parse docx file") data DState = DState { docxAnchorMap :: M.Map String String , docxMediaBag :: MediaBag @@ -123,10 +125,10 @@ data DEnv = DEnv { docxOptions :: ReaderOptions instance Default DEnv where def = DEnv def False -type DocxContext = ReaderT DEnv (State DState) +type DocxContext = ExceptT PandocError (ReaderT DEnv (State DState)) -evalDocxContext :: DocxContext a -> DEnv -> DState -> a -evalDocxContext ctx env st = evalState (runReaderT ctx env) st +evalDocxContext :: DocxContext a -> DEnv -> DState -> Either PandocError a +evalDocxContext ctx env st = flip evalState st . flip runReaderT env . runExceptT $ ctx -- This is empty, but we put it in for future-proofing. spansToKeep :: [String] @@ -197,9 +199,6 @@ fixAuthors mv = mv codeStyles :: [String] codeStyles = ["VerbatimChar"] -blockQuoteDivs :: [String] -blockQuoteDivs = ["Quote", "BlockQuote", "BlockQuotation"] - codeDivs :: [String] codeDivs = ["SourceCode"] @@ -281,7 +280,13 @@ runToInlines :: Run -> DocxContext Inlines runToInlines (Run rs runElems) | Just (s, _) <- rStyle rs , s `elem` codeStyles = - return $ code $ concatMap runElemToString runElems + let rPr = resolveDependentRunStyle rs + codeString = code $ concatMap runElemToString runElems + in + return $ case rVertAlign rPr of + Just SupScrpt -> superscript codeString + Just SubScrpt -> subscript codeString + _ -> codeString | otherwise = do let ils = concatReduce (map runElemToInlines runElems) return $ (runStyleToTransform $ resolveDependentRunStyle rs) ils @@ -401,7 +406,9 @@ singleParaToPlain blks singleParaToPlain blks = blks cellToBlocks :: Cell -> DocxContext Blocks -cellToBlocks (Cell bps) = concatReduce <$> mapM bodyPartToBlocks bps +cellToBlocks (Cell bps) = do + blks <- concatReduce <$> mapM bodyPartToBlocks bps + return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks rowToBlocksList :: Row -> DocxContext [Blocks] rowToBlocksList (Row cells) = do @@ -427,9 +434,9 @@ parStyleToTransform pPr let pPr' = pPr { pStyle = cs, indentation = Nothing} in (divWith ("", [c], [])) . (parStyleToTransform pPr') - | (c:cs) <- pStyle pPr - , c `elem` blockQuoteDivs = - let pPr' = pPr { pStyle = cs \\ blockQuoteDivs } + | (_:cs) <- pStyle pPr + , Just True <- pBlockQuote pPr = + let pPr' = pPr { pStyle = cs } in blockQuote . (parStyleToTransform pPr') | (_:cs) <- pStyle pPr = @@ -460,13 +467,11 @@ bodyPartToBlocks (Paragraph pPr parparts) $ parStyleToTransform pPr $ codeBlock $ concatMap parPartToString parparts - | (c : cs) <- filter (isJust . isHeaderClass) $ pStyle pPr - , Just n <- isHeaderClass c = do + | Just (style, n) <- pHeading pPr = do ils <- local (\s-> s{docxInHeaderBlock=True}) $ (concatReduce <$> mapM parPartToInlines parparts) - makeHeaderAnchor $ - headerWith ("", delete ("Heading" ++ show n) cs, []) n ils + headerWith ("", delete style (pStyle pPr), []) n ils | otherwise = do ils <- concatReduce <$> mapM parPartToInlines parparts >>= (return . fromList . trimLineBreaks . normalizeSpaces . toList) @@ -535,34 +540,21 @@ rewriteLink' l@(Link ils ('#':target, title)) = do Nothing -> l rewriteLink' il = return il -rewriteLink :: Blocks -> DocxContext Blocks -rewriteLink ils = case viewl $ unMany ils of - (x :< xs) -> do - x' <- walkM rewriteLink' x - xs' <- rewriteLink $ Many xs - return $ (singleton x') <> xs' - EmptyL -> return ils +rewriteLinks :: [Block] -> DocxContext [Block] +rewriteLinks = mapM (walkM rewriteLink') bodyToOutput :: Body -> DocxContext (Meta, [Block], MediaBag) bodyToOutput (Body bps) = do let (metabps, blkbps) = sepBodyParts bps meta <- bodyPartsToMeta metabps blks <- concatReduce <$> mapM bodyPartToBlocks blkbps - blks' <- rewriteLink blks + blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks mediaBag <- gets docxMediaBag return $ (meta, - blocksToDefinitions $ blocksToBullets $ toList blks', + blks', mediaBag) -docxToOutput :: ReaderOptions -> Docx -> (Meta, [Block], MediaBag) +docxToOutput :: ReaderOptions -> Docx -> Either PandocError (Meta, [Block], MediaBag) docxToOutput opts (Docx (Document _ body)) = let dEnv = def { docxOptions = opts} in evalDocxContext (bodyToOutput body) dEnv def - -isHeaderClass :: String -> Maybe Int -isHeaderClass s | Just s' <- stripPrefix "Heading" s = - case reads s' :: [(Int, String)] of - [] -> Nothing - ((n, "") : []) -> Just n - _ -> Nothing -isHeaderClass _ = Nothing diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs index ea195c14a..c265ad074 100644 --- a/src/Text/Pandoc/Readers/Docx/Lists.hs +++ b/src/Text/Pandoc/Readers/Docx/Lists.hs @@ -160,8 +160,14 @@ flatToBullets' num xs@(b : elems) flatToBullets :: [Block] -> [Block] flatToBullets elems = flatToBullets' (-1) elems +singleItemHeaderToHeader :: Block -> Block +singleItemHeaderToHeader (OrderedList _ [[h@(Header _ _ _)]]) = h +singleItemHeaderToHeader blk = blk + + blocksToBullets :: [Block] -> [Block] blocksToBullets blks = + map singleItemHeaderToHeader $ bottomUp removeListDivs $ flatToBullets $ (handleListParagraphs blks) @@ -221,7 +227,3 @@ removeListDivs = concatMap removeListDivs' blocksToDefinitions :: [Block] -> [Block] blocksToDefinitions = blocksToDefinitions' [] [] - - - - diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index e7a6c3ffb..cce80fb48 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE PatternGuards, ViewPatterns #-} +{-# LANGUAGE PatternGuards, ViewPatterns, FlexibleInstances #-} {- Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu> @@ -65,7 +65,8 @@ import Text.Pandoc.Compat.Except import Text.TeXMath.Readers.OMML (readOMML) import Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..)) import Text.TeXMath (Exp) -import Data.Char (readLitChar, ord, chr) +import Text.Pandoc.Readers.Docx.Util +import Data.Char (readLitChar, ord, chr, isDigit) data ReaderEnv = ReaderEnv { envNotes :: Notes , envNumbering :: Numbering @@ -73,6 +74,7 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes , envMedia :: Media , envFont :: Maybe Font , envCharStyles :: CharStyleMap + , envParStyles :: ParStyleMap } deriving Show @@ -107,8 +109,6 @@ mapD f xs = in concatMapM handler xs -type NameSpaces = [(String, String)] - data Docx = Docx Document deriving Show @@ -122,8 +122,12 @@ type Media = [(FilePath, B.ByteString)] type CharStyle = (String, RunStyle) +type ParStyle = (String, ParStyleData) + type CharStyleMap = M.Map String RunStyle +type ParStyleMap = M.Map String ParStyleData + data Numbering = Numbering NameSpaces [Numb] [AbstractNumb] deriving Show @@ -152,6 +156,9 @@ data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer data ParagraphStyle = ParagraphStyle { pStyle :: [String] , indentation :: Maybe ParIndentation , dropCap :: Bool + , pHeading :: Maybe (String, Int) + , pNumInfo :: Maybe (String, String) + , pBlockQuote :: Maybe Bool } deriving Show @@ -159,6 +166,9 @@ defaultParagraphStyle :: ParagraphStyle defaultParagraphStyle = ParagraphStyle { pStyle = [] , indentation = Nothing , dropCap = False + , pHeading = Nothing + , pNumInfo = Nothing + , pBlockQuote = Nothing } @@ -213,6 +223,12 @@ data RunStyle = RunStyle { isBold :: Maybe Bool , rStyle :: Maybe CharStyle} deriving Show +data ParStyleData = ParStyleData { headingLev :: Maybe (String, Int) + , isBlockQuote :: Maybe Bool + , numInfo :: Maybe (String, String) + , psStyle :: Maybe ParStyle} + deriving Show + defaultRunStyle :: RunStyle defaultRunStyle = RunStyle { isBold = Nothing , isItalic = Nothing @@ -232,18 +248,14 @@ type ChangeId = String type Author = String type ChangeDate = String -attrToNSPair :: Attr -> Maybe (String, String) -attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) -attrToNSPair _ = Nothing - archiveToDocx :: Archive -> Either DocxError Docx archiveToDocx archive = do let notes = archiveToNotes archive numbering = archiveToNumbering archive rels = archiveToRelationships archive media = archiveToMedia archive - styles = archiveToStyles archive - rEnv = ReaderEnv notes numbering rels media Nothing styles + (styles, parstyles) = archiveToStyles archive + rEnv = ReaderEnv notes numbering rels media Nothing styles parstyles doc <- runD (archiveToDocument archive) rEnv return $ Docx doc @@ -252,7 +264,7 @@ archiveToDocument :: Archive -> D Document archiveToDocument zf = do entry <- maybeToD $ findEntryByPath "word/document.xml" zf docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry - let namespaces = mapMaybe attrToNSPair (elAttribs docElem) + let namespaces = elemToNameSpaces docElem bodyElem <- maybeToD $ findChild (elemName namespaces "w" "body") docElem body <- elemToBody namespaces bodyElem return $ Document namespaces body @@ -263,47 +275,69 @@ elemToBody ns element | isElem ns "w" "body" element = (\bps -> return $ Body bps) elemToBody _ _ = throwError WrongElem -archiveToStyles :: Archive -> CharStyleMap +archiveToStyles :: Archive -> (CharStyleMap, ParStyleMap) archiveToStyles zf = let stylesElem = findEntryByPath "word/styles.xml" zf >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) in case stylesElem of - Nothing -> M.empty + Nothing -> (M.empty, M.empty) Just styElem -> - let namespaces = mapMaybe attrToNSPair (elAttribs styElem) + let namespaces = elemToNameSpaces styElem in - M.fromList $ buildBasedOnList namespaces styElem Nothing + ( M.fromList $ buildBasedOnList namespaces styElem + (Nothing :: Maybe CharStyle), + M.fromList $ buildBasedOnList namespaces styElem + (Nothing :: Maybe ParStyle) ) -isBasedOnStyle :: NameSpaces -> Element -> Maybe CharStyle -> Bool +isBasedOnStyle :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> Bool isBasedOnStyle ns element parentStyle | isElem ns "w" "style" element - , Just "character" <- findAttr (elemName ns "w" "type") element + , Just styleType <- findAttr (elemName ns "w" "type") element + , styleType == cStyleType parentStyle , Just basedOnVal <- findChild (elemName ns "w" "basedOn") element >>= findAttr (elemName ns "w" "val") - , Just (parentId, _) <- parentStyle = (basedOnVal == parentId) + , Just ps <- parentStyle = (basedOnVal == getStyleId ps) | isElem ns "w" "style" element - , Just "character" <- findAttr (elemName ns "w" "type") element + , Just styleType <- findAttr (elemName ns "w" "type") element + , styleType == cStyleType parentStyle , Nothing <- findChild (elemName ns "w" "basedOn") element , Nothing <- parentStyle = True | otherwise = False -elemToCharStyle :: NameSpaces -> Element -> Maybe CharStyle -> Maybe CharStyle -elemToCharStyle ns element parentStyle - | isElem ns "w" "style" element - , Just "character" <- findAttr (elemName ns "w" "type") element - , Just styleId <- findAttr (elemName ns "w" "styleId") element = - Just (styleId, elemToRunStyle ns element parentStyle) - | otherwise = Nothing - -getStyleChildren :: NameSpaces -> Element -> Maybe CharStyle -> [CharStyle] +class ElemToStyle a where + cStyleType :: Maybe a -> String + elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a + getStyleId :: a -> String + +instance ElemToStyle CharStyle where + cStyleType _ = "character" + elemToStyle ns element parentStyle + | isElem ns "w" "style" element + , Just "character" <- findAttr (elemName ns "w" "type") element + , Just styleId <- findAttr (elemName ns "w" "styleId") element = + Just (styleId, elemToRunStyle ns element parentStyle) + | otherwise = Nothing + getStyleId s = fst s + +instance ElemToStyle ParStyle where + cStyleType _ = "paragraph" + elemToStyle ns element parentStyle + | isElem ns "w" "style" element + , Just "paragraph" <- findAttr (elemName ns "w" "type") element + , Just styleId <- findAttr (elemName ns "w" "styleId") element = + Just (styleId, elemToParStyleData ns element parentStyle) + | otherwise = Nothing + getStyleId s = fst s + +getStyleChildren :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a] getStyleChildren ns element parentStyle | isElem ns "w" "styles" element = - mapMaybe (\e -> elemToCharStyle ns e parentStyle) $ + mapMaybe (\e -> elemToStyle ns e parentStyle) $ filterChildren (\e' -> isBasedOnStyle ns e' parentStyle) element | otherwise = [] -buildBasedOnList :: NameSpaces -> Element -> Maybe CharStyle -> [CharStyle] +buildBasedOnList :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a] buildBasedOnList ns element rootStyle = case (getStyleChildren ns element rootStyle) of [] -> [] @@ -317,10 +351,10 @@ archiveToNotes zf = enElem = findEntryByPath "word/endnotes.xml" zf >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) fn_namespaces = case fnElem of - Just e -> mapMaybe attrToNSPair (elAttribs e) + Just e -> elemToNameSpaces e Nothing -> [] en_namespaces = case enElem of - Just e -> mapMaybe attrToNSPair (elAttribs e) + Just e -> elemToNameSpaces e Nothing -> [] ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces fn = fnElem >>= (elemToNotes ns "footnote") @@ -420,7 +454,7 @@ archiveToNumbering' zf = do Nothing -> Just $ Numbering [] [] [] Just entry -> do numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry - let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem) + let namespaces = elemToNameSpaces numberingElem numElems = findChildren (QName "num" (lookup "w" namespaces) (Just "w")) numberingElem @@ -449,15 +483,6 @@ elemToNotes _ _ _ = Nothing --------------------------------------------- --------------------------------------------- -elemName :: NameSpaces -> String -> String -> QName -elemName ns prefix name = (QName name (lookup prefix ns) (Just prefix)) - -isElem :: NameSpaces -> String -> String -> Element -> Bool -isElem ns prefix name element = - qName (elName element) == name && - qURI (elName element) == (lookup prefix ns) - - elemToTblGrid :: NameSpaces -> Element -> D TblGrid elemToTblGrid ns element | isElem ns "w" "tblGrid" element = let cols = findChildren (elemName ns "w" "gridCol") element @@ -510,20 +535,6 @@ elemToParIndentation ns element | isElem ns "w" "ind" element = stringToInteger} elemToParIndentation _ _ = Nothing - -elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String) -elemToNumInfo ns element | isElem ns "w" "p" element = do - let pPr = findChild (elemName ns "w" "pPr") element - numPr = pPr >>= findChild (elemName ns "w" "numPr") - lvl <- numPr >>= - findChild (elemName ns "w" "ilvl") >>= - findAttr (elemName ns "w" "val") - numId <- numPr >>= - findChild (elemName ns "w" "numId") >>= - findAttr (elemName ns "w" "val") - return (numId, lvl) -elemToNumInfo _ _ = Nothing - testBitMask :: String -> Int -> Bool testBitMask bitMaskS n = case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of @@ -542,18 +553,28 @@ elemToBodyPart ns element return $ OMathPara expsLst elemToBodyPart ns element | isElem ns "w" "p" element - , Just (numId, lvl) <- elemToNumInfo ns element = do - let parstyle = elemToParagraphStyle ns element + , Just (numId, lvl) <- getNumInfo ns element = do + sty <- asks envParStyles + let parstyle = elemToParagraphStyle ns element sty parparts <- mapD (elemToParPart ns) (elChildren element) num <- asks envNumbering case lookupLevel numId lvl num of - Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts - Nothing -> throwError WrongElem + Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts + Nothing -> throwError WrongElem elemToBodyPart ns element | isElem ns "w" "p" element = do - let parstyle = elemToParagraphStyle ns element - parparts <- mapD (elemToParPart ns) (elChildren element) - return $ Paragraph parstyle parparts + sty <- asks envParStyles + let parstyle = elemToParagraphStyle ns element sty + parparts <- mapD (elemToParPart ns) (elChildren element) + case pNumInfo parstyle of + Just (numId, lvl) -> do + num <- asks envNumbering + case lookupLevel numId lvl num of + Just levelInfo -> + return $ ListItem parstyle numId lvl levelInfo parparts + Nothing -> + throwError WrongElem + Nothing -> return $ Paragraph parstyle parparts elemToBodyPart ns element | isElem ns "w" "tbl" element = do let caption' = findChild (elemName ns "w" "tblPr") element @@ -584,7 +605,7 @@ expandDrawingId s = do target <- asks (lookupRelationship s . envRelationships) case target of Just filepath -> do - bytes <- asks (lookup (combine "word" filepath) . envMedia) + bytes <- asks (lookup ("word/" ++ filepath) . envMedia) case bytes of Just bs -> return (filepath, bs) Nothing -> throwError DocxError @@ -601,6 +622,16 @@ elemToParPart ns element case drawing of Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp bs) Nothing -> throwError WrongElem +-- The below is an attempt to deal with images in deprecated vml format. +elemToParPart ns element + | isElem ns "w" "r" element + , Just _ <- findChild (elemName ns "w" "pict") element = + let drawing = findElement (elemName ns "v" "imagedata") element + >>= findAttr (elemName ns "r" "id") + in + case drawing of + Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp bs) + Nothing -> throwError WrongElem elemToParPart ns element | isElem ns "w" "r" element = elemToRun ns element >>= (\r -> return $ PlainRun r) @@ -625,17 +656,20 @@ elemToParPart ns element return $ BookMark bmId bmName elemToParPart ns element | isElem ns "w" "hyperlink" element - , Just anchor <- findAttr (elemName ns "w" "anchor") element = do + , Just relId <- findAttr (elemName ns "r" "id") element = do runs <- mapD (elemToRun ns) (elChildren element) - return $ InternalHyperLink anchor runs + rels <- asks envRelationships + case lookupRelationship relId rels of + Just target -> do + case findAttr (elemName ns "w" "anchor") element of + Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs + Nothing -> return $ ExternalHyperLink target runs + Nothing -> return $ ExternalHyperLink "" runs elemToParPart ns element | isElem ns "w" "hyperlink" element - , Just relId <- findAttr (elemName ns "r" "id") element = do + , Just anchor <- findAttr (elemName ns "w" "anchor") element = do runs <- mapD (elemToRun ns) (elChildren element) - rels <- asks envRelationships - return $ case lookupRelationship relId rels of - Just target -> ExternalHyperLink target runs - Nothing -> ExternalHyperLink "" runs + return $ InternalHyperLink anchor runs elemToParPart ns element | isElem ns "m" "oMath" element = (eitherToD $ readOMML $ showElement element) >>= (return . PlainOMath) @@ -684,14 +718,30 @@ elemToRun ns element return $ Run runStyle runElems elemToRun _ _ = throwError WrongElem -elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle -elemToParagraphStyle ns element +getParentStyleValue :: (ParStyleData -> Maybe a) -> ParStyleData -> Maybe a +getParentStyleValue field style + | Just value <- field style = Just value + | Just parentStyle <- psStyle style + = getParentStyleValue field (snd parentStyle) +getParentStyleValue _ _ = Nothing + +getParStyleField :: (ParStyleData -> Maybe a) -> ParStyleMap -> [String] -> + Maybe a +getParStyleField field stylemap styles + | x <- mapMaybe (\x -> M.lookup x stylemap) styles + , (y:_) <- mapMaybe (getParentStyleValue field) x + = Just y +getParStyleField _ _ _ = Nothing + +elemToParagraphStyle :: NameSpaces -> Element -> ParStyleMap -> ParagraphStyle +elemToParagraphStyle ns element sty | Just pPr <- findChild (elemName ns "w" "pPr") element = - ParagraphStyle - {pStyle = + let style = mapMaybe (findAttr (elemName ns "w" "val")) (findChildren (elemName ns "w" "pStyle") pPr) + in ParagraphStyle + {pStyle = style , indentation = findChild (elemName ns "w" "ind") pPr >>= elemToParIndentation ns @@ -703,8 +753,11 @@ elemToParagraphStyle ns element Just "none" -> False Just _ -> True Nothing -> False + , pHeading = getParStyleField headingLev sty style + , pNumInfo = getParStyleField numInfo sty style + , pBlockQuote = getParStyleField isBlockQuote sty style } -elemToParagraphStyle _ _ = defaultParagraphStyle +elemToParagraphStyle _ _ _ = defaultParagraphStyle checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool checkOnOff ns rPr tag @@ -758,6 +811,59 @@ elemToRunStyle ns element parentStyle } elemToRunStyle _ _ _ = defaultRunStyle +isNumericNotNull :: String -> Bool +isNumericNotNull str = (str /= []) && (all isDigit str) + +getHeaderLevel :: NameSpaces -> Element -> Maybe (String,Int) +getHeaderLevel ns element + | Just styleId <- findAttr (elemName ns "w" "styleId") element + , Just index <- stripPrefix "Heading" styleId + , isNumericNotNull index = Just (styleId, read index) + | Just styleId <- findAttr (elemName ns "w" "styleId") element + , Just index <- findChild (elemName ns "w" "name") element >>= + findAttr (elemName ns "w" "val") >>= + stripPrefix "heading " + , isNumericNotNull index = Just (styleId, read index) +getHeaderLevel _ _ = Nothing + +blockQuoteStyleIds :: [String] +blockQuoteStyleIds = ["Quote", "BlockQuote", "BlockQuotation"] + +blockQuoteStyleNames :: [String] +blockQuoteStyleNames = ["Quote", "Block Text"] + +getBlockQuote :: NameSpaces -> Element -> Maybe Bool +getBlockQuote ns element + | Just styleId <- findAttr (elemName ns "w" "styleId") element + , styleId `elem` blockQuoteStyleIds = Just True + | Just styleName <- findChild (elemName ns "w" "name") element >>= + findAttr (elemName ns "w" "val") + , styleName `elem` blockQuoteStyleNames = Just True +getBlockQuote _ _ = Nothing + +getNumInfo :: NameSpaces -> Element -> Maybe (String, String) +getNumInfo ns element = do + let numPr = findChild (elemName ns "w" "pPr") element >>= + findChild (elemName ns "w" "numPr") + lvl = fromMaybe "0" (numPr >>= + findChild (elemName ns "w" "ilvl") >>= + findAttr (elemName ns "w" "val")) + numId <- numPr >>= + findChild (elemName ns "w" "numId") >>= + findAttr (elemName ns "w" "val") + return (numId, lvl) + + +elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> ParStyleData +elemToParStyleData ns element parentStyle = + ParStyleData + { + headingLev = getHeaderLevel ns element + , isBlockQuote = getBlockQuote ns element + , numInfo = getNumInfo ns element + , psStyle = parentStyle + } + elemToRunElem :: NameSpaces -> Element -> D RunElem elemToRunElem ns element | isElem ns "w" "t" element diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs new file mode 100644 index 000000000..2901ea2a3 --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/StyleMap.hs @@ -0,0 +1,106 @@ +module Text.Pandoc.Readers.Docx.StyleMap ( StyleMaps(..) + , defaultStyleMaps + , getStyleMaps + , getStyleId + , hasStyleName + ) where + +import Text.XML.Light +import Text.Pandoc.Readers.Docx.Util +import Control.Monad.State +import Data.Char (toLower) +import qualified Data.Map as M + +newtype ParaStyleMap = ParaStyleMap ( M.Map String String ) +newtype CharStyleMap = CharStyleMap ( M.Map String String ) + +class StyleMap a where + alterMap :: (M.Map String String -> M.Map String String) -> a -> a + getMap :: a -> M.Map String String + +instance StyleMap ParaStyleMap where + alterMap f (ParaStyleMap m) = ParaStyleMap $ f m + getMap (ParaStyleMap m) = m + +instance StyleMap CharStyleMap where + alterMap f (CharStyleMap m) = CharStyleMap $ f m + getMap (CharStyleMap m) = m + +insert :: (StyleMap a) => Maybe String -> Maybe String -> a -> a +insert (Just k) (Just v) m = alterMap (M.insert k v) m +insert _ _ m = m + +getStyleId :: (StyleMap a) => String -> a -> String +getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) . getMap + +hasStyleName :: (StyleMap a) => String -> a -> Bool +hasStyleName styleName = M.member (map toLower styleName) . getMap + +data StyleMaps = StyleMaps { sNameSpaces :: NameSpaces + , sParaStyleMap :: ParaStyleMap + , sCharStyleMap :: CharStyleMap + } + +data StyleType = ParaStyle | CharStyle + +defaultStyleMaps :: StyleMaps +defaultStyleMaps = StyleMaps { sNameSpaces = [] + , sParaStyleMap = ParaStyleMap M.empty + , sCharStyleMap = CharStyleMap M.empty + } + +type StateM a = State StyleMaps a + +getStyleMaps :: Element -> StyleMaps +getStyleMaps docElem = execState genStyleMap state' + where + state' = defaultStyleMaps {sNameSpaces = elemToNameSpaces docElem} + genStyleItem e = do + styleType <- getStyleType e + styleId <- getAttrStyleId e + nameValLowercase <- fmap (map toLower) `fmap` getNameVal e + case styleType of + Just ParaStyle -> modParaStyleMap $ insert nameValLowercase styleId + Just CharStyle -> modCharStyleMap $ insert nameValLowercase styleId + _ -> return () + genStyleMap = do + style <- elemName' "style" + let styles = findChildren style docElem + forM_ styles genStyleItem + +modParaStyleMap :: (ParaStyleMap -> ParaStyleMap) -> StateM () +modParaStyleMap f = modify $ \s -> + s {sParaStyleMap = f $ sParaStyleMap s} + +modCharStyleMap :: (CharStyleMap -> CharStyleMap) -> StateM () +modCharStyleMap f = modify $ \s -> + s {sCharStyleMap = f $ sCharStyleMap s} + +getStyleType :: Element -> StateM (Maybe StyleType) +getStyleType e = do + styleTypeStr <- getAttrType e + case styleTypeStr of + Just "paragraph" -> return $ Just ParaStyle + Just "character" -> return $ Just CharStyle + _ -> return Nothing + +getAttrType :: Element -> StateM (Maybe String) +getAttrType el = do + name <- elemName' "type" + return $ findAttr name el + +getAttrStyleId :: Element -> StateM (Maybe String) +getAttrStyleId el = do + name <- elemName' "styleId" + return $ findAttr name el + +getNameVal :: Element -> StateM (Maybe String) +getNameVal el = do + name <- elemName' "name" + val <- elemName' "val" + return $ findChild name el >>= findAttr val + +elemName' :: String -> StateM QName +elemName' name = do + namespaces <- gets sNameSpaces + return $ elemName namespaces "w" name diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs new file mode 100644 index 000000000..891f107b0 --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/Util.hs @@ -0,0 +1,26 @@ +module Text.Pandoc.Readers.Docx.Util ( + NameSpaces + , elemName + , isElem + , elemToNameSpaces + ) where + +import Text.XML.Light +import Data.Maybe (mapMaybe) + +type NameSpaces = [(String, String)] + +elemToNameSpaces :: Element -> NameSpaces +elemToNameSpaces = mapMaybe attrToNSPair . elAttribs + +attrToNSPair :: Attr -> Maybe (String, String) +attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) +attrToNSPair _ = Nothing + +elemName :: NameSpaces -> String -> String -> QName +elemName ns prefix name = QName name (lookup prefix ns) (Just prefix) + +isElem :: NameSpaces -> String -> String -> Element -> Bool +isElem ns prefix name element = + qName (elName element) == name && + qURI (elName element) == lookup prefix ns diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index b061d8683..338540533 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -35,18 +35,20 @@ import Control.DeepSeq.Generics (deepseq, NFData) import Debug.Trace (trace) +import Text.Pandoc.Error + type Items = M.Map String (FilePath, MimeType) -readEPUB :: ReaderOptions -> BL.ByteString -> (Pandoc, MediaBag) +readEPUB :: ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag) readEPUB opts bytes = runEPUB (archiveToEPUB opts $ toArchive bytes) -runEPUB :: Except String a -> a -runEPUB = either error id . runExcept +runEPUB :: Except PandocError a -> Either PandocError a +runEPUB = runExcept -- Note that internal reference are aggresively normalised so that all ids -- are of the form "filename#id" -- -archiveToEPUB :: (MonadError String m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag) +archiveToEPUB :: (MonadError PandocError m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag) archiveToEPUB os archive = do -- root is path to folder with manifest file in (root, content) <- getManifest archive @@ -64,19 +66,20 @@ archiveToEPUB os archive = do return $ (ast, mediaBag) where os' = os {readerParseRaw = True} - parseSpineElem :: MonadError String m => FilePath -> (FilePath, MimeType) -> m Pandoc + parseSpineElem :: MonadError PandocError m => FilePath -> (FilePath, MimeType) -> m Pandoc parseSpineElem (normalise -> r) (normalise -> path, mime) = do when (readerTrace os) (traceM path) doc <- mimeToReader mime r path let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty return $ docSpan <> doc - mimeToReader :: MonadError String m => MimeType -> FilePath -> FilePath -> m Pandoc + mimeToReader :: MonadError PandocError m => MimeType -> FilePath -> FilePath -> m Pandoc mimeToReader "application/xhtml+xml" (normalise -> root) (normalise -> path) = do fname <- findEntryByPathE (root </> path) archive - return $ fixInternalReferences path . + html <- either throwError return . readHtml os' . UTF8.toStringLazy $ fromEntry fname + return $ fixInternalReferences path html mimeToReader s _ path | s `elem` imageMimes = return $ imageToPandoc path | otherwise = return $ mempty @@ -114,7 +117,7 @@ imageMimes = ["image/gif", "image/jpeg", "image/png"] type CoverImage = FilePath -parseManifest :: (MonadError String m) => Element -> m (Maybe CoverImage, Items) +parseManifest :: (MonadError PandocError m) => Element -> m (Maybe CoverImage, Items) parseManifest content = do manifest <- findElementE (dfName "manifest") content let items = findChildren (dfName "item") manifest @@ -130,7 +133,7 @@ parseManifest content = do mime <- findAttrE (emptyName "media-type") e return (uid, (href, mime)) -parseSpine :: MonadError String m => Items -> Element -> m [(FilePath, MimeType)] +parseSpine :: MonadError PandocError m => Items -> Element -> m [(FilePath, MimeType)] parseSpine is e = do spine <- findElementE (dfName "spine") e let itemRefs = findChildren (dfName "itemref") spine @@ -141,7 +144,7 @@ parseSpine is e = do guard linear findAttr (emptyName "idref") ref -parseMeta :: MonadError String m => Element -> m Meta +parseMeta :: MonadError PandocError m => Element -> m Meta parseMeta content = do meta <- findElementE (dfName "metadata") content let dcspace (QName _ (Just "http://purl.org/dc/elements/1.1/") (Just "dc")) = True @@ -159,7 +162,7 @@ renameMeta :: String -> String renameMeta "creator" = "author" renameMeta s = s -getManifest :: MonadError String m => Archive -> m (String, Element) +getManifest :: MonadError PandocError m => Archive -> m (String, Element) getManifest archive = do metaEntry <- findEntryByPathE ("META-INF" </> "container.xml") archive docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry @@ -266,18 +269,18 @@ emptyName s = QName s Nothing Nothing -- Convert Maybe interface to Either -findAttrE :: MonadError String m => QName -> Element -> m String +findAttrE :: MonadError PandocError m => QName -> Element -> m String findAttrE q e = mkE "findAttr" $ findAttr q e -findEntryByPathE :: MonadError String m => FilePath -> Archive -> m Entry +findEntryByPathE :: MonadError PandocError m => FilePath -> Archive -> m Entry findEntryByPathE (normalise -> path) a = mkE ("No entry on path: " ++ path) $ findEntryByPath path a -parseXMLDocE :: MonadError String m => String -> m Element +parseXMLDocE :: MonadError PandocError m => String -> m Element parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc -findElementE :: MonadError String m => QName -> Element -> m Element +findElementE :: MonadError PandocError m => QName -> Element -> m Element findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x -mkE :: MonadError String m => String -> Maybe a -> m a -mkE s = maybe (throwError s) return +mkE :: MonadError PandocError m => String -> Maybe a -> m a +mkE s = maybe (throwError . ParseFailure $ s) return diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 4ea5f41d5..52358e51e 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -1,4 +1,5 @@ -{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses #-} +{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses, +ViewPatterns#-} {- Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> @@ -43,7 +44,7 @@ import Text.Pandoc.Definition import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..)) import Text.Pandoc.Shared ( extractSpaces, renderTags' - , escapeURI, safeRead ) + , escapeURI, safeRead, mapLeft ) import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace) , Extension (Ext_epub_html_exts, Ext_native_divs, Ext_native_spans)) @@ -62,15 +63,18 @@ import Text.TeXMath (readMathML, writeTeX) import Data.Default (Default (..), def) import Control.Monad.Reader (Reader,ask, asks, local, runReader) +import Text.Pandoc.Error + +import Text.Parsec.Error + -- | Convert HTML-formatted string to 'Pandoc' document. readHtml :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assumes @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readHtml opts inp = - case flip runReader def $ runParserT parseDoc (HTMLState def{ stateOptions = opts } []) "source" tags of - Left err' -> error $ "\nError at " ++ show err' - Right result -> result + mapLeft (ParseFailure . getError) . flip runReader def $ + runParserT parseDoc (HTMLState def{ stateOptions = opts } []) "source" tags where tags = stripPrefixes . canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } inp parseDoc = do @@ -78,6 +82,9 @@ readHtml opts inp = meta <- stateMeta . parserState <$> getState bs' <- replaceNotes (B.toList blocks) return $ Pandoc meta bs' + getError (errorMessages -> ms) = case ms of + [] -> "" + (m:_) -> messageString m replaceNotes :: [Block] -> TagParser [Block] replaceNotes = walkM replaceNotes' @@ -374,12 +381,20 @@ pTable = try $ do caption <- option mempty $ pInTags "caption" inline <* skipMany pBlank -- TODO actually read these and take width information from them widths' <- pColgroup <|> many pCol - head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th") - skipMany pBlank - rows <- pOptInTag "tbody" - $ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td") - skipMany pBlank + let pTh = option [] $ pInTags "tr" (pCell "th") + pTr = try $ skipMany pBlank >> pInTags "tr" (pCell "td" <|> pCell "th") + pTBody = do pOptInTag "tbody" $ many1 pTr + head'' <- pOptInTag "thead" pTh + head' <- pOptInTag "tbody" $ do + if null head'' + then pTh + else return head'' + rowsLs <- many pTBody + rows' <- pOptInTag "tfoot" $ many pTr TagClose _ <- pSatisfy (~== TagClose "table") + let rows = (concat rowsLs) ++ rows' + -- fail on empty table + guard $ not $ null head' && null rows let isSinglePlain x = case B.toList x of [Plain _] -> True _ -> False @@ -440,7 +455,7 @@ pCodeBlock :: TagParser Blocks pCodeBlock = try $ do TagOpen _ attr <- pSatisfy (~== TagOpen "pre" []) contents <- manyTill pAnyTag (pCloses "pre" <|> eof) - let rawText = concatMap fromTagText $ filter isTagText contents + let rawText = concatMap tagToString contents -- drop leading newline if any let result' = case rawText of '\n':xs -> xs @@ -451,6 +466,11 @@ pCodeBlock = try $ do _ -> result' return $ B.codeBlockWith (mkAttr attr) result +tagToString :: Tag String -> String +tagToString (TagText s) = s +tagToString (TagOpen "br" _) = "\n" +tagToString _ = "" + inline :: TagParser Inlines inline = choice [ eNoteref @@ -619,14 +639,17 @@ pInTags tagtype parser = try $ do pSatisfy (~== TagOpen tagtype []) mconcat <$> manyTill parser (pCloses tagtype <|> eof) -pOptInTag :: String -> TagParser a - -> TagParser a -pOptInTag tagtype parser = try $ do - open <- option False (pSatisfy (~== TagOpen tagtype []) >> return True) +-- parses p, preceeded by an optional opening tag +-- and followed by an optional closing tags +pOptInTag :: String -> TagParser a -> TagParser a +pOptInTag tagtype p = try $ do + skipMany pBlank + optional $ pSatisfy (~== TagOpen tagtype []) + skipMany pBlank + x <- p skipMany pBlank - x <- parser + optional $ pSatisfy (~== TagClose tagtype) skipMany pBlank - when open $ pCloses tagtype return x pCloses :: String -> TagParser () @@ -735,7 +758,7 @@ pSpace = many1 (satisfy isSpace) >> return B.space -- eitherBlockOrInline :: [String] -eitherBlockOrInline = ["audio", "applet", "button", "iframe", +eitherBlockOrInline = ["audio", "applet", "button", "iframe", "embed", "del", "ins", "progress", "map", "area", "noscript", "script", "object", "svg", "video", "source"] @@ -753,7 +776,7 @@ blockHtmlTags :: [String] blockHtmlTags = ["?xml", "!DOCTYPE", "address", "article", "aside", "blockquote", "body", "button", "canvas", "caption", "center", "col", "colgroup", "dd", "dir", "div", - "dl", "dt", "embed", "fieldset", "figcaption", "figure", + "dl", "dt", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html", "isindex", "menu", "noframes", "ol", "output", "p", "pre", @@ -864,7 +887,7 @@ htmlTag :: Monad m => (Tag String -> Bool) -> ParserT [Char] st m (Tag String, String) htmlTag f = try $ do - lookAhead $ char '<' >> (oneOf "/!?" <|> letter) + lookAhead $ char '<' >> ((oneOf "/!?" >> nonspaceChar) <|> letter) (next : _) <- getInput >>= return . canonicalizeTags . parseTags guard $ f next -- advance the parser diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index 4b46c869d..aa2534afc 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE CPP #-} {- | Module : Text.Pandoc.Readers.Haddock Copyright : Copyright (C) 2013 David Lazar @@ -25,11 +26,18 @@ import Documentation.Haddock.Parser import Documentation.Haddock.Types import Debug.Trace (trace) +import Text.Pandoc.Error + -- | Parse Haddock markup and return a 'Pandoc' document. readHaddock :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse - -> Pandoc -readHaddock opts = B.doc . docHToBlocks . trace' . parseParas + -> Either PandocError Pandoc +readHaddock opts = +#if MIN_VERSION_haddock_library(1,2,0) + Right . B.doc . docHToBlocks . trace' . _doc . parseParas +#else + Right . B.doc . docHToBlocks . trace' . parseParas +#endif where trace' x = if readerTrace opts then trace (show x) x else x diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 9f51e9a8f..08aa0b20e 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -42,26 +42,25 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing hiding ((<|>), many, optional, space, mathDisplay, mathInline) import qualified Text.Pandoc.UTF8 as UTF8 -import Data.Char ( chr, ord ) +import Data.Char ( chr, ord, isLetter, isAlphaNum ) import Control.Monad.Trans (lift) import Control.Monad import Text.Pandoc.Builder -import Data.Char (isLetter, isAlphaNum) import Control.Applicative import Data.Monoid -import Data.Maybe (fromMaybe) +import Data.Maybe (fromMaybe, maybeToList) import System.Environment (getEnv) -import System.FilePath (replaceExtension, (</>)) -import Data.List (intercalate, intersperse) +import System.FilePath (replaceExtension, (</>), takeExtension, addExtension) +import Data.List (intercalate) import qualified Data.Map as M import qualified Control.Exception as E -import System.FilePath (takeExtension, addExtension) import Text.Pandoc.Highlighting (fromListingsLanguage) +import Text.Pandoc.Error -- | Parse LaTeX from string and return 'Pandoc' document. readLaTeX :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assumes @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readLaTeX opts = readWith parseLaTeX def{ stateOptions = opts } parseLaTeX :: LP Pandoc @@ -73,17 +72,16 @@ parseLaTeX = do let (Pandoc _ bs') = doc bs return $ Pandoc meta bs' -type LP = Parser [Char] ParserState +type LP = Parser String ParserState anyControlSeq :: LP String anyControlSeq = do char '\\' next <- option '\n' anyChar - name <- case next of - '\n' -> return "" - c | isLetter c -> (c:) <$> (many letter <* optional sp) - | otherwise -> return [c] - return name + case next of + '\n' -> return "" + c | isLetter c -> (c:) <$> (many letter <* optional sp) + | otherwise -> return [c] controlSeq :: String -> LP String controlSeq name = try $ do @@ -103,7 +101,7 @@ dimenarg = try $ do sp :: LP () sp = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t') - <|> (try $ newline <* lookAhead anyChar <* notFollowedBy blankline) + <|> try (newline <* lookAhead anyChar <* notFollowedBy blankline) isLowerHex :: Char -> Bool isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f' @@ -161,49 +159,48 @@ mathInline :: LP String -> LP Inlines mathInline p = math <$> (try p >>= applyMacros') mathChars :: LP String -mathChars = concat <$> - many ( many1 (satisfy (\c -> c /= '$' && c /='\\')) - <|> (\c -> ['\\',c]) <$> (try $ char '\\' *> anyChar) - ) +mathChars = (concat <$>) $ + many $ + many1 (satisfy (\c -> c /= '$' && c /='\\')) + <|> (\c -> ['\\',c]) <$> try (char '\\' *> anyChar) quoted' :: (Inlines -> Inlines) -> LP String -> LP () -> LP Inlines quoted' f starter ender = do startchs <- starter try ((f . mconcat) <$> manyTill inline ender) <|> lit startchs -double_quote :: LP Inlines -double_quote = - ( quoted' doubleQuoted (try $ string "``") (void $ try $ string "''") +doubleQuote :: LP Inlines +doubleQuote = + quoted' doubleQuoted (try $ string "``") (void $ try $ string "''") <|> quoted' doubleQuoted (string "“") (void $ char '”') -- the following is used by babel for localized quotes: <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'") <|> quoted' doubleQuoted (string "\"") (void $ char '"') - ) -single_quote :: LP Inlines -single_quote = - ( quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter) +singleQuote :: LP Inlines +singleQuote = + quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter) <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter) - ) inline :: LP Inlines inline = (mempty <$ comment) <|> (space <$ sp) <|> inlineText <|> inlineCommand + <|> inlineEnvironment <|> inlineGroup <|> (char '-' *> option (str "-") - ((char '-') *> option (str "–") (str "—" <$ char '-'))) - <|> double_quote - <|> single_quote + (char '-' *> option (str "–") (str "—" <$ char '-'))) + <|> doubleQuote + <|> singleQuote <|> (str "”" <$ try (string "''")) <|> (str "”" <$ char '”') <|> (str "’" <$ char '\'') <|> (str "’" <$ char '’') <|> (str "\160" <$ char '~') - <|> (mathDisplay $ string "$$" *> mathChars <* string "$$") - <|> (mathInline $ char '$' *> mathChars <* char '$') - <|> (superscript <$> (char '^' *> tok)) + <|> mathDisplay (string "$$" *> mathChars <* string "$$") + <|> mathInline (char '$' *> mathChars <* char '$') + <|> try (superscript <$> (char '^' *> tok)) <|> (subscript <$> (char '_' *> tok)) <|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb) <|> (str . (:[]) <$> tildeEscape) @@ -237,20 +234,32 @@ block = (mempty <$ comment) blocks :: LP Blocks blocks = mconcat <$> many block +getRawCommand :: String -> LP String +getRawCommand name' = do + rawargs <- withRaw (skipopts *> option "" dimenarg *> many braced) + return $ '\\' : name' ++ snd rawargs + +lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v +lookupListDefault d = (fromMaybe d .) . lookupList + where + lookupList l m = msum $ map (`M.lookup` m) l + blockCommand :: LP Blocks blockCommand = try $ do name <- anyControlSeq guard $ name /= "begin" && name /= "end" star <- option "" (string "*" <* optional sp) let name' = name ++ star - case M.lookup name' blockCommands of - Just p -> p - Nothing -> case M.lookup name blockCommands of - Just p -> p - Nothing -> mzero + let raw = do + rawcommand <- getRawCommand name' + transformed <- applyMacros' rawcommand + guard $ transformed /= rawcommand + notFollowedBy $ parseFromString inlines transformed + parseFromString blocks transformed + lookupListDefault raw [name',name] blockCommands inBrackets :: Inlines -> Inlines -inBrackets x = (str "[") <> x <> (str "]") +inBrackets x = str "[" <> x <> str "]" -- eat an optional argument and one or more arguments in braces ignoreInlines :: String -> (String, LP Inlines) @@ -258,19 +267,21 @@ ignoreInlines name = (name, doraw <|> (mempty <$ optargs)) where optargs = skipopts *> skipMany (try $ optional sp *> braced) contseq = '\\':name doraw = (rawInline "latex" . (contseq ++) . snd) <$> - (getOption readerParseRaw >>= guard >> (withRaw optargs)) + (getOption readerParseRaw >>= guard >> withRaw optargs) ignoreBlocks :: String -> (String, LP Blocks) ignoreBlocks name = (name, doraw <|> (mempty <$ optargs)) where optargs = skipopts *> skipMany (try $ optional sp *> braced) contseq = '\\':name doraw = (rawBlock "latex" . (contseq ++) . snd) <$> - (getOption readerParseRaw >>= guard >> (withRaw optargs)) + (getOption readerParseRaw >>= guard >> withRaw optargs) blockCommands :: M.Map String (LP Blocks) blockCommands = M.fromList $ [ ("par", mempty <$ skipopts) - , ("title", mempty <$ (skipopts *> tok >>= addMeta "title")) + , ("title", mempty <$ (skipopts *> + (grouped inline >>= addMeta "title") + <|> (grouped block >>= addMeta "title"))) , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle")) , ("author", mempty <$ (skipopts *> authors)) -- -- in letter class, temp. store address & sig as title, author @@ -301,10 +312,10 @@ blockCommands = M.fromList $ -- , ("hrule", pure horizontalRule) , ("rule", skipopts *> tok *> tok *> pure horizontalRule) - , ("item", skipopts *> loose_item) + , ("item", skipopts *> looseItem) , ("documentclass", skipopts *> braced *> preamble) , ("centerline", (para . trimInlines) <$> (skipopts *> tok)) - , ("caption", skipopts *> tok >>= setCaption) + , ("caption", skipopts *> setCaption) , ("PandocStartInclude", startInclude) , ("PandocEndInclude", endInclude) , ("bibliography", mempty <$ (skipopts *> braced >>= @@ -336,9 +347,16 @@ addMeta field val = updateState $ \st -> splitBibs :: String -> [Inlines] splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',') -setCaption :: Inlines -> LP Blocks -setCaption ils = do - updateState $ \st -> st{ stateCaption = Just ils } +setCaption :: LP Blocks +setCaption = do + ils <- tok + mblabel <- option Nothing $ + try $ spaces' >> controlSeq "label" >> (Just <$> tok) + let ils' = case mblabel of + Just lab -> ils <> spanWith + ("",[],[("data-label", stringify lab)]) mempty + Nothing -> ils + updateState $ \st -> st{ stateCaption = Just ils' } return mempty resetCaption :: LP () @@ -361,7 +379,7 @@ section (ident, classes, kvs) lvl = do let lvl' = if hasChapters then lvl + 1 else lvl skipopts contents <- grouped inline - lab <- option ident $ try (spaces >> controlSeq "label" >> spaces >> braced) + lab <- option ident $ try (spaces' >> controlSeq "label" >> spaces' >> braced) attr' <- registerHeader (lab, classes, kvs) contents return $ headerWith attr' lvl' contents @@ -374,25 +392,39 @@ inlineCommand = try $ do star <- option "" (string "*") let name' = name ++ star let raw = do - rawargs <- withRaw (skipopts *> option "" dimenarg *> many braced) - let rawcommand = '\\' : name ++ star ++ snd rawargs + rawcommand <- getRawCommand name' transformed <- applyMacros' rawcommand if transformed /= rawcommand then parseFromString inlines transformed else if parseRaw then return $ rawInline "latex" rawcommand else return mempty - case M.lookup name' inlineCommands of - Just p -> p <|> raw - Nothing -> case M.lookup name inlineCommands of - Just p -> p <|> raw - Nothing -> raw + lookupListDefault mzero [name',name] inlineCommands + <|> raw unlessParseRaw :: LP () unlessParseRaw = getOption readerParseRaw >>= guard . not isBlockCommand :: String -> Bool -isBlockCommand s = maybe False (const True) $ M.lookup s blockCommands +isBlockCommand s = s `M.member` blockCommands + + +inlineEnvironments :: M.Map String (LP Inlines) +inlineEnvironments = M.fromList + [ ("displaymath", mathEnv id Nothing "displaymath") + , ("equation", mathEnv id Nothing "equation") + , ("equation*", mathEnv id Nothing "equation*") + , ("gather", mathEnv id (Just "gathered") "gather") + , ("gather*", mathEnv id (Just "gathered") "gather*") + , ("multline", mathEnv id (Just "gathered") "multline") + , ("multline*", mathEnv id (Just "gathered") "multline*") + , ("eqnarray", mathEnv id (Just "aligned") "eqnarray") + , ("eqnarray*", mathEnv id (Just "aligned") "eqnarray*") + , ("align", mathEnv id (Just "aligned") "align") + , ("align*", mathEnv id (Just "aligned") "align*") + , ("alignat", mathEnv id (Just "aligned") "alignat") + , ("alignat*", mathEnv id (Just "aligned") "alignat*") + ] inlineCommands :: M.Map String (LP Inlines) inlineCommands = M.fromList $ @@ -414,9 +446,14 @@ inlineCommands = M.fromList $ , ("sim", lit "~") , ("label", unlessParseRaw >> (inBrackets <$> tok)) , ("ref", unlessParseRaw >> (inBrackets <$> tok)) + , ("noindent", unlessParseRaw >> return mempty) + , ("textgreek", tok) + , ("sep", lit ",") + , ("cref", unlessParseRaw >> (inBrackets <$> tok)) -- from cleveref.sty , ("(", mathInline $ manyTill anyChar (try $ string "\\)")) , ("[", mathDisplay $ manyTill anyChar (try $ string "\\]")) - , ("ensuremath", mathInline $ braced) + , ("ensuremath", mathInline braced) + , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok) , ("P", lit "¶") , ("S", lit "§") , ("$", lit "$") @@ -464,7 +501,7 @@ inlineCommands = M.fromList $ , ("v", option (str "v") $ try $ tok >>= accent hacek) , ("u", option (str "u") $ try $ tok >>= accent breve) , ("i", lit "i") - , ("\\", linebreak <$ (optional (bracketed inline) *> optional sp)) + , ("\\", linebreak <$ (optional (bracketed inline) *> spaces')) , (",", pure mempty) , ("@", pure mempty) , (" ", lit "\160") @@ -477,7 +514,7 @@ inlineCommands = M.fromList $ , ("thanks", (note . mconcat) <$> (char '{' *> manyTill block (char '}'))) , ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}'))) , ("verb", doverb) - , ("lstinline", doverb) + , ("lstinline", skipopts *> doverb) , ("Verb", doverb) , ("texttt", (code . stringify . toList) <$> tok) , ("url", (unescapeURL <$> braced) >>= \url -> @@ -494,6 +531,7 @@ inlineCommands = M.fromList $ , ("citealp", citation "citealp" NormalCitation False) , ("citealp*", citation "citealp*" NormalCitation False) , ("autocite", citation "autocite" NormalCitation False) + , ("smartcite", citation "smartcite" NormalCitation False) , ("footcite", inNote <$> citation "footcite" NormalCitation False) , ("parencite", citation "parencite" NormalCitation False) , ("supercite", citation "supercite" NormalCitation False) @@ -516,6 +554,7 @@ inlineCommands = M.fromList $ , ("supercites", citation "supercites" NormalCitation True) , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True) , ("Autocite", citation "Autocite" NormalCitation False) + , ("Smartcite", citation "Smartcite" NormalCitation False) , ("Footcite", citation "Footcite" NormalCitation False) , ("Parencite", citation "Parencite" NormalCitation False) , ("Supercite", citation "Supercite" NormalCitation False) @@ -542,7 +581,7 @@ inlineCommands = M.fromList $ ] ++ map ignoreInlines -- these commands will be ignored unless --parse-raw is specified, -- in which case they will appear as raw latex blocks: - [ "noindent", "index" ] + [ "index" ] mkImage :: String -> LP Inlines mkImage src = do @@ -559,7 +598,7 @@ inNote ils = unescapeURL :: String -> String unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs - where isEscapable c = c `elem` "#$%&~_^\\{}" + where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String) unescapeURL (x:xs) = x:unescapeURL xs unescapeURL [] = "" @@ -585,7 +624,7 @@ lit = pure . str accent :: (Char -> String) -> Inlines -> LP Inlines accent f ils = case toList ils of - (Str (x:xs) : ys) -> return $ fromList $ (Str (f x ++ xs) : ys) + (Str (x:xs) : ys) -> return $ fromList (Str (f x ++ xs) : ys) [] -> mzero _ -> return ils @@ -774,7 +813,7 @@ breve 'u' = "ŭ" breve c = [c] tok :: LP Inlines -tok = try $ grouped inline <|> inlineCommand <|> str <$> (count 1 $ inlineChar) +tok = try $ grouped inline <|> inlineCommand <|> str <$> count 1 inlineChar opt :: LP Inlines opt = bracketed inline <* optional sp @@ -792,9 +831,14 @@ environment :: LP Blocks environment = do controlSeq "begin" name <- braced - case M.lookup name environments of - Just p -> p <|> rawEnv name - Nothing -> rawEnv name + M.findWithDefault mzero name environments + <|> rawEnv name + +inlineEnvironment :: LP Inlines +inlineEnvironment = try $ do + controlSeq "begin" + name <- braced + M.findWithDefault mzero name inlineEnvironments rawEnv :: String -> LP Blocks rawEnv name = do @@ -807,15 +851,11 @@ rawEnv name = do ---- -type IncludeParser = ParserT [Char] [String] IO String +type IncludeParser = ParserT String [String] IO String -- | Replace "include" commands with file contents. -handleIncludes :: String -> IO String -handleIncludes s = do - res <- runParserT includeParser' [] "input" s - case res of - Right s' -> return s' - Left e -> error $ show e +handleIncludes :: String -> IO (Either PandocError String) +handleIncludes s = mapLeft (ParsecError s) <$> runParserT includeParser' [] "input" s includeParser' :: IncludeParser includeParser' = @@ -857,6 +897,12 @@ backslash' = string "\\" braced' :: IncludeParser braced' = try $ char '{' *> manyTill (satisfy (/='}')) (char '}') +maybeAddExtension :: String -> FilePath -> FilePath +maybeAddExtension ext fp = + if null (takeExtension fp) + then addExtension fp ext + else fp + include' :: IncludeParser include' = do fs' <- try $ do @@ -865,11 +911,11 @@ include' = do <|> try (string "input") <|> string "usepackage" -- skip options - skipMany $ try $ char '[' *> (manyTill anyChar (char ']')) + skipMany $ try $ char '[' *> manyTill anyChar (char ']') fs <- (map trim . splitBy (==',')) <$> braced' return $ if name == "usepackage" - then map (flip replaceExtension ".sty") fs - else map (flip replaceExtension ".tex") fs + then map (maybeAddExtension ".sty") fs + else map (maybeAddExtension ".tex") fs pos <- getPosition containers <- getState let fn = case containers of @@ -938,14 +984,14 @@ keyvals = try $ char '[' *> manyTill keyval (char ']') alltt :: String -> LP Blocks alltt t = walk strToCode <$> parseFromString blocks (substitute " " "\\ " $ substitute "%" "\\%" $ - concat $ intersperse "\\\\\n" $ lines t) + intercalate "\\\\\n" $ lines t) where strToCode (Str s) = Code nullAttr s strToCode x = x -rawLaTeXBlock :: Parser [Char] ParserState String +rawLaTeXBlock :: LP String rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand)) -rawLaTeXInline :: Parser [Char] ParserState Inline +rawLaTeXInline :: LP Inline rawLaTeXInline = do raw <- (snd <$> withRaw inlineCommand) <|> (snd <$> withRaw blockCommand) RawInline "latex" <$> applyMacros' raw @@ -954,41 +1000,42 @@ addImageCaption :: Blocks -> LP Blocks addImageCaption = walkM go where go (Image alt (src,tit)) = do mbcapt <- stateCaption <$> getState - case mbcapt of - Just ils -> return (Image (toList ils) (src, "fig:")) - Nothing -> return (Image alt (src,tit)) + return $ case mbcapt of + Just ils -> Image (toList ils) (src, "fig:") + Nothing -> Image alt (src,tit) go x = return x addTableCaption :: Blocks -> LP Blocks addTableCaption = walkM go where go (Table c als ws hs rs) = do mbcapt <- stateCaption <$> getState - case mbcapt of - Just ils -> return (Table (toList ils) als ws hs rs) - Nothing -> return (Table c als ws hs rs) + return $ case mbcapt of + Just ils -> Table (toList ils) als ws hs rs + Nothing -> Table c als ws hs rs go x = return x environments :: M.Map String (LP Blocks) environments = M.fromList [ ("document", env "document" blocks <* skipMany anyChar) - , ("letter", env "letter" letter_contents) + , ("letter", env "letter" letterContents) , ("figure", env "figure" $ resetCaption *> skipopts *> blocks >>= addImageCaption) , ("center", env "center" blocks) , ("table", env "table" $ resetCaption *> skipopts *> blocks >>= addTableCaption) - , ("tabular", env "tabular" simpTable) + , ("tabular*", env "tabular" $ simpTable True) + , ("tabular", env "tabular" $ simpTable False) , ("quote", blockQuote <$> env "quote" blocks) , ("quotation", blockQuote <$> env "quotation" blocks) , ("verse", blockQuote <$> env "verse" blocks) , ("itemize", bulletList <$> listenv "itemize" (many item)) , ("description", definitionList <$> listenv "description" (many descItem)) - , ("enumerate", ordered_list) + , ("enumerate", orderedList') , ("alltt", alltt =<< verbEnv "alltt") , ("code", guardEnabled Ext_literate_haskell *> (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> verbEnv "code")) - , ("verbatim", codeBlock <$> (verbEnv "verbatim")) + , ("verbatim", codeBlock <$> verbEnv "verbatim") , ("Verbatim", do options <- option [] keyvals let kvs = [ (if k == "firstnumber" then "startFrom" @@ -996,17 +1043,17 @@ environments = M.fromList let classes = [ "numberLines" | lookup "numbers" options == Just "left" ] let attr = ("",classes,kvs) - codeBlockWith attr <$> (verbEnv "Verbatim")) + codeBlockWith attr <$> verbEnv "Verbatim") , ("lstlisting", do options <- option [] keyvals let kvs = [ (if k == "firstnumber" then "startFrom" else k, v) | (k,v) <- options ] let classes = [ "numberLines" | lookup "numbers" options == Just "left" ] - ++ maybe [] (:[]) (lookup "language" options + ++ maybeToList (lookup "language" options >>= fromListingsLanguage) let attr = (fromMaybe "" (lookup "label" options),classes,kvs) - codeBlockWith attr <$> (verbEnv "lstlisting")) + codeBlockWith attr <$> verbEnv "lstlisting") , ("minted", do options <- option [] keyvals lang <- grouped (many1 $ satisfy (/='}')) let kvs = [ (if k == "firstnumber" @@ -1016,27 +1063,27 @@ environments = M.fromList [ "numberLines" | lookup "linenos" options == Just "true" ] let attr = ("",classes,kvs) - codeBlockWith attr <$> (verbEnv "minted")) + codeBlockWith attr <$> verbEnv "minted") , ("obeylines", parseFromString (para . trimInlines . mconcat <$> many inline) =<< intercalate "\\\\\n" . lines <$> verbEnv "obeylines") - , ("displaymath", mathEnv Nothing "displaymath") - , ("equation", mathEnv Nothing "equation") - , ("equation*", mathEnv Nothing "equation*") - , ("gather", mathEnv (Just "gathered") "gather") - , ("gather*", mathEnv (Just "gathered") "gather*") - , ("multline", mathEnv (Just "gathered") "multline") - , ("multline*", mathEnv (Just "gathered") "multline*") - , ("eqnarray", mathEnv (Just "aligned") "eqnarray") - , ("eqnarray*", mathEnv (Just "aligned") "eqnarray*") - , ("align", mathEnv (Just "aligned") "align") - , ("align*", mathEnv (Just "aligned") "align*") - , ("alignat", mathEnv (Just "aligned") "alignat") - , ("alignat*", mathEnv (Just "aligned") "alignat*") + , ("displaymath", mathEnv para Nothing "displaymath") + , ("equation", mathEnv para Nothing "equation") + , ("equation*", mathEnv para Nothing "equation*") + , ("gather", mathEnv para (Just "gathered") "gather") + , ("gather*", mathEnv para (Just "gathered") "gather*") + , ("multline", mathEnv para (Just "gathered") "multline") + , ("multline*", mathEnv para (Just "gathered") "multline*") + , ("eqnarray", mathEnv para (Just "aligned") "eqnarray") + , ("eqnarray*", mathEnv para (Just "aligned") "eqnarray*") + , ("align", mathEnv para (Just "aligned") "align") + , ("align*", mathEnv para (Just "aligned") "align*") + , ("alignat", mathEnv para (Just "aligned") "alignat") + , ("alignat*", mathEnv para (Just "aligned") "alignat*") ] -letter_contents :: LP Blocks -letter_contents = do +letterContents :: LP Blocks +letterContents = do bs <- blocks st <- getState -- add signature (author) and address (title) @@ -1063,8 +1110,8 @@ closing = do item :: LP Blocks item = blocks *> controlSeq "item" *> skipopts *> blocks -loose_item :: LP Blocks -loose_item = do +looseItem :: LP Blocks +looseItem = do ctx <- stateParserContext `fmap` getState if ctx == ListItemState then mzero @@ -1092,8 +1139,8 @@ listenv name p = try $ do updateState $ \st -> st{ stateParserContext = oldCtx } return res -mathEnv :: Maybe String -> String -> LP Blocks -mathEnv innerEnv name = para <$> mathDisplay (inner <$> verbEnv name) +mathEnv :: (Inlines -> a) -> Maybe String -> String -> LP a +mathEnv f innerEnv name = f <$> mathDisplay (inner <$> verbEnv name) where inner x = case innerEnv of Nothing -> x Just y -> "\\begin{" ++ y ++ "}\n" ++ x ++ @@ -1107,8 +1154,8 @@ verbEnv name = do res <- manyTill anyChar endEnv return $ stripTrailingNewlines res -ordered_list :: LP Blocks -ordered_list = do +orderedList' :: LP Blocks +orderedList' = do optional sp (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $ try $ char '[' *> anyOrderedListMarker <* char ']' @@ -1120,7 +1167,7 @@ ordered_list = do optional sp num <- grouped (many1 digit) spaces - return $ (read num + 1 :: Int) + return (read num + 1 :: Int) bs <- listenv "enumerate" (many item) return $ orderedListWith (start, style, delim) bs @@ -1134,14 +1181,14 @@ paragraph = do preamble :: LP Blocks preamble = mempty <$> manyTill preambleBlock beginDoc where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}" - preambleBlock = (void comment) - <|> (void sp) - <|> (void blanklines) - <|> (void macro) - <|> (void blockCommand) - <|> (void anyControlSeq) - <|> (void braced) - <|> (void anyChar) + preambleBlock = void comment + <|> void sp + <|> void blanklines + <|> void macro + <|> void blockCommand + <|> void anyControlSeq + <|> void braced + <|> void anyChar ------- @@ -1183,7 +1230,7 @@ citationLabel = optional sp *> <* optional sp <* optional (char ',') <* optional sp) - where isBibtexKeyChar c = isAlphaNum c || c `elem` ".:;?!`'()/*@_+=-[]*" + where isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]*" :: String) cites :: CitationMode -> Bool -> LP [Citation] cites mode multi = try $ do @@ -1217,7 +1264,7 @@ complexNatbibCitation mode = try $ do suff <- ils skipSpaces optional $ char ';' - return $ addPrefix pref $ addSuffix suff $ cits' + return $ addPrefix pref $ addSuffix suff cits' (c:cits, raw) <- withRaw $ grouped parseOne return $ cite (c{ citationMode = mode }:cits) (rawInline "latex" $ "\\citetext" ++ raw) @@ -1227,7 +1274,7 @@ complexNatbibCitation mode = try $ do parseAligns :: LP [Alignment] parseAligns = try $ do char '{' - let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ try (string "@{}") + let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ (char '@' >> braced) maybeBar let cAlign = AlignCenter <$ char 'c' let lAlign = AlignLeft <$ char 'l' @@ -1241,13 +1288,13 @@ parseAligns = try $ do return aligns' hline :: LP () -hline = () <$ (try $ spaces >> controlSeq "hline") +hline = () <$ try (spaces' *> controlSeq "hline" <* spaces') lbreak :: LP () -lbreak = () <$ (try $ spaces *> controlSeq "\\") +lbreak = () <$ try (spaces' *> controlSeq "\\" <* spaces') amp :: LP () -amp = () <$ (try $ spaces *> char '&') +amp = () <$ try (spaces' *> char '&') parseTableRow :: Int -- ^ number of columns -> LP [Blocks] @@ -1260,19 +1307,22 @@ parseTableRow cols = try $ do guard $ cells' /= [mempty] -- note: a & b in a three-column table leaves an empty 3rd cell: let cells'' = cells' ++ replicate (cols - numcells) mempty - spaces + spaces' return cells'' -simpTable :: LP Blocks -simpTable = try $ do - spaces +spaces' :: LP () +spaces' = spaces *> skipMany (comment *> spaces) + +simpTable :: Bool -> LP Blocks +simpTable hasWidthParameter = try $ do + when hasWidthParameter $ () <$ (spaces' >> tok) + skipopts aligns <- parseAligns let cols = length aligns optional hline header' <- option [] $ try (parseTableRow cols <* lbreak <* hline) rows <- sepEndBy (parseTableRow cols) (lbreak <* optional hline) - spaces - skipMany (comment *> spaces) + spaces' let header'' = if null header' then replicate cols mempty else header' diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 02a787670..5e0cef4f8 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,4 +1,5 @@ {-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7 +{-# LANGUAGE ScopedTypeVariables #-} {- Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> @@ -31,11 +32,11 @@ Conversion of markdown-formatted plain text to 'Pandoc' document. module Text.Pandoc.Readers.Markdown ( readMarkdown, readMarkdownWithWarnings ) where -import Data.List ( transpose, sortBy, findIndex, intersperse, intercalate ) +import Data.List ( transpose, sortBy, intersperse, intercalate, elemIndex) import qualified Data.Map as M import Data.Scientific (coefficient, base10Exponent) import Data.Ord ( comparing ) -import Data.Char ( isAlphaNum, toLower ) +import Data.Char ( isSpace, isAlphaNum, toLower ) import Data.Maybe import Text.Pandoc.Definition import qualified Data.Text as T @@ -55,38 +56,41 @@ import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag, isCommentTag ) import Data.Monoid (mconcat, mempty) -import Control.Applicative ((<$>), (<*), (*>), (<$)) +import Control.Applicative ((<$>), (<*), (*>), (<$), (<*>)) import Control.Monad +import Control.Monad.Reader import System.FilePath (takeExtension, addExtension) import Text.HTML.TagSoup import Text.HTML.TagSoup.Match (tagOpen) import qualified Data.Set as Set import Text.Printf (printf) import Debug.Trace (trace) +import Text.Pandoc.Error -type MarkdownParser = Parser [Char] ParserState +type MarkdownParser a = ParserT [Char] ParserState (Reader ParserState) a -- | Read markdown from an input string and return a Pandoc document. readMarkdown :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readMarkdown opts s = - (readWith parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n") + runMarkdown opts s parseMarkdown -- | Read markdown from an input string and return a pair of a Pandoc document -- and a list of warnings. readMarkdownWithWarnings :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> (Pandoc, [String]) -readMarkdownWithWarnings opts s = - (readWith parseMarkdownWithWarnings) def{ stateOptions = opts } (s ++ "\n\n") - where parseMarkdownWithWarnings = do - doc <- parseMarkdown - warnings <- stateWarnings <$> getState - return (doc, warnings) - -trimInlinesF :: F Inlines -> F Inlines -trimInlinesF = liftM trimInlines + -> Either PandocError (Pandoc, [String]) +readMarkdownWithWarnings opts s = runMarkdown opts s (returnWarnings parseMarkdown) + +runMarkdown :: forall a . ReaderOptions -> String -> MarkdownParser a -> Either PandocError a +runMarkdown opts inp p = fst <$> res + where + imd = readWithM (returnState p) def{ stateOptions = opts } (inp ++ "\n\n") + res :: Either PandocError (a, ParserState) + res = runReader imd s + s :: ParserState + s = either def snd res -- -- Constants and data structure definitions @@ -117,10 +121,16 @@ isBlank _ = False -- auxiliary functions -- -isNull :: F Inlines -> Bool -isNull ils = B.isNull $ runF ils def +-- | Succeeds when we're in list context. +inList :: MarkdownParser () +inList = do + ctx <- stateParserContext <$> getState + guard (ctx == ListItemState) + +isNull :: Inlines -> Bool +isNull = B.isNull -spnl :: Parser [Char] st () +spnl :: Monad m => ParserT [Char] st m () spnl = try $ do skipSpaces optional newline @@ -160,9 +170,9 @@ litChar = escapedChar' -- | Parse a sequence of inline elements between square brackets, -- including inlines between balanced pairs of square brackets. -inlinesInBalancedBrackets :: MarkdownParser (F Inlines) +inlinesInBalancedBrackets :: MarkdownParser Inlines inlinesInBalancedBrackets = charsInBalancedBrackets >>= - parseFromString (trimInlinesF . mconcat <$> many inline) + parseFromString (trimInlines . mconcat <$> many inline) charsInBalancedBrackets :: MarkdownParser [Char] charsInBalancedBrackets = do @@ -179,16 +189,16 @@ charsInBalancedBrackets = do -- document structure -- -titleLine :: MarkdownParser (F Inlines) +titleLine :: MarkdownParser Inlines titleLine = try $ do char '%' skipSpaces res <- many $ (notFollowedBy newline >> inline) <|> try (endline >> whitespace) newline - return $ trimInlinesF $ mconcat res + return $ trimInlines $ mconcat res -authorsLine :: MarkdownParser (F [Inlines]) +authorsLine :: MarkdownParser [Inlines] authorsLine = try $ do char '%' skipSpaces @@ -197,13 +207,13 @@ authorsLine = try $ do (char ';' <|> try (newline >> notFollowedBy blankline >> spaceChar)) newline - return $ sequence $ filter (not . isNull) $ map (trimInlinesF . mconcat) authors + return $ filter (not . isNull) $ map (trimInlines . mconcat) authors -dateLine :: MarkdownParser (F Inlines) +dateLine :: MarkdownParser Inlines dateLine = try $ do char '%' skipSpaces - trimInlinesF . mconcat <$> manyTill inline newline + trimInlines . mconcat <$> manyTill inline newline titleBlock :: MarkdownParser () titleBlock = pandocTitleBlock <|> mmdTitleBlock @@ -213,20 +223,16 @@ pandocTitleBlock = try $ do guardEnabled Ext_pandoc_title_block lookAhead (char '%') title <- option mempty titleLine - author <- option (return []) authorsLine + author <- option [] authorsLine date <- option mempty dateLine optional blanklines - let meta' = do title' <- title - author' <- author - date' <- date - return $ - (if B.isNull title' then id else B.setMeta "title" title') - . (if null author' then id else B.setMeta "author" author') - . (if B.isNull date' then id else B.setMeta "date" date') - $ nullMeta - updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } - -yamlMetaBlock :: MarkdownParser (F Blocks) + let meta' = (if B.isNull title then id else B.setMeta "title" title) + . (if null author then id else B.setMeta "author" author) + . (if B.isNull date then id else B.setMeta "date" date) + $ nullMeta + updateState $ \st -> st{ stateMeta = stateMeta st <> meta' } + +yamlMetaBlock :: MarkdownParser Blocks yamlMetaBlock = try $ do guardEnabled Ext_yaml_metadata_block pos <- getPosition @@ -239,17 +245,18 @@ yamlMetaBlock = try $ do optional blanklines opts <- stateOptions <$> getState meta' <- case Yaml.decodeEither' $ UTF8.fromString rawYaml of - Right (Yaml.Object hashmap) -> return $ return $ + Right (Yaml.Object hashmap) -> return $ H.foldrWithKey (\k v m -> if ignorable k then m - else B.setMeta (T.unpack k) - (yamlToMeta opts v) m) + else case yamlToMeta opts v of + Left _ -> m + Right v' -> B.setMeta (T.unpack k) v' m) nullMeta hashmap - Right Yaml.Null -> return $ return nullMeta + Right Yaml.Null -> return nullMeta Right _ -> do addWarning (Just pos) "YAML header is not an object" - return $ return nullMeta + return nullMeta Left err' -> do case err' of InvalidYaml (Just YamlParseException{ @@ -268,41 +275,50 @@ yamlMetaBlock = try $ do _ -> addWarning (Just pos) $ "Could not parse YAML header: " ++ show err' - return $ return nullMeta - updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } + return nullMeta + updateState $ \st -> st{ stateMeta = stateMeta st <> meta' } return mempty -- ignore fields ending with _ ignorable :: Text -> Bool -ignorable t = (T.pack "_") `T.isSuffixOf` t - -toMetaValue :: ReaderOptions -> Text -> MetaValue -toMetaValue opts x = - case readMarkdown opts (T.unpack x) of - Pandoc _ [Plain xs] -> MetaInlines xs - Pandoc _ [Para xs] +ignorable t = T.pack "_" `T.isSuffixOf` t + +toMetaValue :: ReaderOptions -> Text -> Either PandocError MetaValue +toMetaValue opts x = toMeta <$> readMarkdown opts' (T.unpack x) + where + toMeta p = + case p of + Pandoc _ [Plain xs] -> MetaInlines xs + Pandoc _ [Para xs] | endsWithNewline x -> MetaBlocks [Para xs] | otherwise -> MetaInlines xs - Pandoc _ bs -> MetaBlocks bs - where endsWithNewline t = (T.pack "\n") `T.isSuffixOf` t - -yamlToMeta :: ReaderOptions -> Yaml.Value -> MetaValue + Pandoc _ bs -> MetaBlocks bs + endsWithNewline t = T.pack "\n" `T.isSuffixOf` t + opts' = opts{readerExtensions=readerExtensions opts `Set.difference` meta_exts} + meta_exts = Set.fromList [ Ext_pandoc_title_block + , Ext_mmd_title_block + , Ext_yaml_metadata_block + ] + +yamlToMeta :: ReaderOptions -> Yaml.Value -> Either PandocError MetaValue yamlToMeta opts (Yaml.String t) = toMetaValue opts t yamlToMeta _ (Yaml.Number n) -- avoid decimal points for numbers that don't need them: - | base10Exponent n >= 0 = MetaString $ show + | base10Exponent n >= 0 = return $ MetaString $ show $ coefficient n * (10 ^ base10Exponent n) - | otherwise = MetaString $ show n -yamlToMeta _ (Yaml.Bool b) = MetaBool b -yamlToMeta opts (Yaml.Array xs) = B.toMetaValue $ map (yamlToMeta opts) - $ V.toList xs -yamlToMeta opts (Yaml.Object o) = MetaMap $ H.foldrWithKey (\k v m -> + | otherwise = return $ MetaString $ show n +yamlToMeta _ (Yaml.Bool b) = return $ MetaBool b +yamlToMeta opts (Yaml.Array xs) = B.toMetaValue <$> mapM (yamlToMeta opts) + (V.toList xs) +yamlToMeta opts (Yaml.Object o) = MetaMap <$> H.foldrWithKey (\k v m -> if ignorable k then m - else M.insert (T.unpack k) - (yamlToMeta opts v) m) - M.empty o -yamlToMeta _ _ = MetaString "" + else (do + v' <- yamlToMeta opts v + m' <- m + return (M.insert (T.unpack k) v' m'))) + (return M.empty) o +yamlToMeta _ _ = return $ MetaString "" stopLine :: MarkdownParser () stopLine = try $ (string "---" <|> string "...") >> blankline >> return () @@ -312,17 +328,21 @@ mmdTitleBlock = try $ do guardEnabled Ext_mmd_title_block kvPairs <- many1 kvPair blanklines - updateState $ \st -> st{ stateMeta' = stateMeta' st <> - return (Meta $ M.fromList kvPairs) } + updateState $ \st -> st{ stateMeta = stateMeta st <> + (Meta $ M.fromList kvPairs) } kvPair :: MarkdownParser (String, MetaValue) kvPair = try $ do key <- many1Till (alphaNum <|> oneOf "_- ") (char ':') + skipMany1 spaceNoNewline val <- manyTill anyChar (try $ newline >> lookAhead (blankline <|> nonspaceChar)) + guard $ not . null . trim $ val let key' = concat $ words $ map toLower key let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ trim val return (key',val') + where + spaceNoNewline = satisfy (\x -> isSpace x && (x/='\n') && (x/='\r')) parseMarkdown :: MarkdownParser Pandoc parseMarkdown = do @@ -333,17 +353,11 @@ parseMarkdown = do optional titleBlock blocks <- parseBlocks st <- getState - let meta = runF (stateMeta' st) st - let Pandoc _ bs = B.doc $ runF blocks st + let meta = stateMeta st + let Pandoc _ bs = B.doc blocks return $ Pandoc meta bs -addWarning :: Maybe SourcePos -> String -> MarkdownParser () -addWarning mbpos msg = - updateState $ \st -> st{ - stateWarnings = (msg ++ maybe "" (\pos -> " " ++ show pos) mbpos) : - stateWarnings st } - -referenceKey :: MarkdownParser (F Blocks) +referenceKey :: MarkdownParser Blocks referenceKey = try $ do pos <- getPosition skipNonindentSpaces @@ -370,7 +384,7 @@ referenceKey = try $ do Just _ -> addWarning (Just pos) $ "Duplicate link reference `" ++ raw ++ "'" Nothing -> return () updateState $ \s -> s { stateKeys = M.insert key target oldkeys } - return $ return mempty + return mempty referenceTitle :: MarkdownParser String referenceTitle = try $ do @@ -390,7 +404,7 @@ quotedTitle c = try $ do -- | PHP Markdown Extra style abbreviation key. Currently -- we just skip them, since Pandoc doesn't have an element for -- an abbreviation. -abbrevKey :: MarkdownParser (F Blocks) +abbrevKey :: MarkdownParser Blocks abbrevKey = do guardEnabled Ext_abbreviations try $ do @@ -399,7 +413,7 @@ abbrevKey = do char ':' skipMany (satisfy (/= '\n')) blanklines - return $ return mempty + return mempty noteMarker :: MarkdownParser String noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']') @@ -417,7 +431,7 @@ rawLines = do rest <- many rawLine return $ unlines (first:rest) -noteBlock :: MarkdownParser (F Blocks) +noteBlock :: MarkdownParser Blocks noteBlock = try $ do pos <- getPosition skipNonindentSpaces @@ -429,7 +443,7 @@ noteBlock = try $ do rest <- many $ try $ blanklines >> indentSpaces >> rawLines let raw = unlines (first:rest) ++ "\n" optional blanklines - parsed <- parseFromString parseBlocks raw + parsed <- parseFromString (inFootnote parseBlocks) raw let newnote = (ref, parsed) oldnotes <- stateNotes' <$> getState case lookup ref oldnotes of @@ -438,32 +452,40 @@ noteBlock = try $ do updateState $ \s -> s { stateNotes' = newnote : oldnotes } return mempty +inFootnote :: MarkdownParser a -> MarkdownParser a +inFootnote p = do + st <- stateInFootnote <$> getState + updateState (\s -> s { stateInFootnote = True } ) + r <- p + updateState (\s -> s { stateInFootnote = st } ) + return r + -- -- parsing blocks -- -parseBlocks :: MarkdownParser (F Blocks) +parseBlocks :: MarkdownParser Blocks parseBlocks = mconcat <$> manyTill block eof -block :: MarkdownParser (F Blocks) +block :: MarkdownParser Blocks block = do tr <- getOption readerTrace pos <- getPosition res <- choice [ mempty <$ blanklines , codeBlockFenced , yamlMetaBlock - , guardEnabled Ext_latex_macros *> (macro >>= return . return) + , guardEnabled Ext_latex_macros *> macro -- note: bulletList needs to be before header because of -- the possibility of empty list items: - , bulletList , header , lhsCodeBlock - , rawTeXBlock , divHtml , htmlBlock , table - , lineBlock , codeBlockIndented + , rawTeXBlock + , lineBlock , blockQuote , hrule , orderedList @@ -474,29 +496,28 @@ block = do , para , plain ] <?> "block" - when tr $ do - st <- getState + when tr $ trace (printf "line %d: %s" (sourceLine pos) - (take 60 $ show $ B.toList $ runF res st)) (return ()) + (take 60 . show . B.toList $ res)) (return ()) return res -- -- header blocks -- -header :: MarkdownParser (F Blocks) +header :: MarkdownParser Blocks header = setextHeader <|> atxHeader <?> "header" -atxHeader :: MarkdownParser (F Blocks) +atxHeader :: MarkdownParser Blocks atxHeader = try $ do - level <- many1 (char '#') >>= return . length + level <- length <$> many1 (char '#') notFollowedBy $ guardEnabled Ext_fancy_lists >> (char '.' <|> char ')') -- this would be a list skipSpaces - text <- trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline) + text <- trimInlines . mconcat <$> many (notFollowedBy atxClosing >> inline) attr <- atxClosing - attr' <- registerHeader attr (runF text defaultParserState) - return $ B.headerWith attr' level <$> text + attr' <- registerHeader attr text + return $ B.headerWith attr' level text atxClosing :: MarkdownParser Attr atxClosing = try $ do @@ -523,25 +544,25 @@ mmdHeaderIdentifier = do skipSpaces return (ident,[],[]) -setextHeader :: MarkdownParser (F Blocks) +setextHeader :: MarkdownParser Blocks setextHeader = try $ do -- This lookahead prevents us from wasting time parsing Inlines -- unless necessary -- it gives a significant performance boost. lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline - text <- trimInlinesF . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline) + text <- trimInlines . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline) attr <- setextHeaderEnd underlineChar <- oneOf setextHChars many (char underlineChar) blanklines - let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1 - attr' <- registerHeader attr (runF text defaultParserState) - return $ B.headerWith attr' level <$> text + let level = (fromMaybe 0 $ elemIndex underlineChar setextHChars) + 1 + attr' <- registerHeader attr text + return $ B.headerWith attr' level text -- -- hrule block -- -hrule :: Parser [Char] st (F Blocks) +hrule :: Monad m => ParserT [Char] st m Blocks hrule = try $ do skipSpaces start <- satisfy isHruleChar @@ -549,24 +570,24 @@ hrule = try $ do skipMany (spaceChar <|> char start) newline optional blanklines - return $ return B.horizontalRule + return B.horizontalRule -- -- code blocks -- indentedLine :: MarkdownParser String -indentedLine = indentSpaces >> anyLine >>= return . (++ "\n") +indentedLine = indentSpaces >> ((++ "\n") <$> anyLine) -blockDelimiter :: (Char -> Bool) +blockDelimiter :: Monad m + => (Char -> Bool) -> Maybe Int - -> Parser [Char] st Int + -> ParserT [Char] st m Int blockDelimiter f len = try $ do c <- lookAhead (satisfy f) case len of Just l -> count l (char c) >> many (char c) >> return l - Nothing -> count 3 (char c) >> many (char c) >>= - return . (+ 3) . length + Nothing -> count 3 (char c) >> ((+ 3) . length <$> many (char c)) attributes :: MarkdownParser Attr attributes = try $ do @@ -611,7 +632,7 @@ specialAttr = do char '-' return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs) -codeBlockFenced :: MarkdownParser (F Blocks) +codeBlockFenced :: MarkdownParser Blocks codeBlockFenced = try $ do c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~')) <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`')) @@ -623,7 +644,7 @@ codeBlockFenced = try $ do blankline contents <- manyTill anyLine (blockDelimiter (== c) (Just size)) blanklines - return $ return $ B.codeBlockWith attr $ intercalate "\n" contents + return $ B.codeBlockWith attr $ intercalate "\n" contents -- correctly handle github language identifiers toLanguageId :: String -> String @@ -632,7 +653,7 @@ toLanguageId = map toLower . go go "objective-c" = "objectivec" go x = x -codeBlockIndented :: MarkdownParser (F Blocks) +codeBlockIndented :: MarkdownParser Blocks codeBlockIndented = do contents <- many1 (indentedLine <|> try (do b <- blanklines @@ -640,15 +661,15 @@ codeBlockIndented = do return $ b ++ l)) optional blanklines classes <- getOption readerIndentedCodeClasses - return $ return $ B.codeBlockWith ("", classes, []) $ + return $ B.codeBlockWith ("", classes, []) $ stripTrailingNewlines $ concat contents -lhsCodeBlock :: MarkdownParser (F Blocks) +lhsCodeBlock :: MarkdownParser Blocks lhsCodeBlock = do guardEnabled Ext_literate_haskell - (return . B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> + (B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> (lhsCodeBlockBird <|> lhsCodeBlockLaTeX)) - <|> (return . B.codeBlockWith ("",["sourceCode","haskell"],[]) <$> + <|> (B.codeBlockWith ("",["sourceCode","haskell"],[]) <$> lhsCodeBlockInverseBird) lhsCodeBlockLaTeX :: MarkdownParser String @@ -677,7 +698,7 @@ lhsCodeBlockBirdWith c = try $ do blanklines return $ intercalate "\n" lns' -birdTrackLine :: Char -> Parser [Char] st String +birdTrackLine :: Monad m => Char -> ParserT [Char] st m String birdTrackLine c = try $ do char c -- allow html tags on left margin: @@ -705,12 +726,12 @@ emailBlockQuote = try $ do optional blanklines return raw -blockQuote :: MarkdownParser (F Blocks) +blockQuote :: MarkdownParser Blocks blockQuote = do raw <- emailBlockQuote -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n" - return $ B.blockQuote <$> contents + contents <- parseFromString parseBlocks $ intercalate "\n" raw ++ "\n\n" + return $ B.blockQuote contents -- -- list blocks @@ -735,9 +756,9 @@ anyOrderedListStart = try $ do skipNonindentSpaces notFollowedBy $ string "p." >> spaceChar >> digit -- page number res <- do guardDisabled Ext_fancy_lists - many1 digit + start <- many1 digit >>= safeRead char '.' - return (1, DefaultStyle, DefaultDelim) + return (start, DefaultStyle, DefaultDelim) <|> do (num, style, delim) <- anyOrderedListMarker -- if it could be an abbreviated first name, -- insist on more than one space @@ -753,7 +774,7 @@ anyOrderedListStart = try $ do return res listStart :: MarkdownParser () -listStart = bulletListStart <|> (anyOrderedListStart >> return ()) +listStart = bulletListStart <|> void anyOrderedListStart listLine :: MarkdownParser String listLine = try $ do @@ -808,7 +829,7 @@ listContinuationLine = try $ do return $ result ++ "\n" listItem :: MarkdownParser a - -> MarkdownParser (F Blocks) + -> MarkdownParser Blocks listItem start = try $ do first <- rawListItem start continuations <- many listContinuation @@ -824,14 +845,14 @@ listItem start = try $ do updateState (\st -> st {stateParserContext = oldContext}) return contents -orderedList :: MarkdownParser (F Blocks) +orderedList :: MarkdownParser Blocks orderedList = try $ do (start, style, delim) <- lookAhead anyOrderedListStart unless (style `elem` [DefaultStyle, Decimal, Example] && delim `elem` [DefaultDelim, Period]) $ guardEnabled Ext_fancy_lists when (style == Example) $ guardEnabled Ext_example_lists - items <- fmap sequence $ many1 $ listItem + items <- many1 $ listItem ( try $ do optional newline -- if preceded by Plain block in a list startpos <- sourceColumn <$> getPosition @@ -843,12 +864,12 @@ orderedList = try $ do atMostSpaces (tabStop - (endpos - startpos)) return res ) start' <- option 1 $ guardEnabled Ext_startnum >> return start - return $ B.orderedListWith (start', style, delim) <$> fmap compactify' items + return $ B.orderedListWith (start', style, delim) (compactify' items) -bulletList :: MarkdownParser (F Blocks) +bulletList :: MarkdownParser Blocks bulletList = do - items <- fmap sequence $ many1 $ listItem bulletListStart - return $ B.bulletList <$> fmap compactify' items + items <- many1 $ listItem bulletListStart + return $ B.bulletList (compactify' items) -- definition lists @@ -863,14 +884,14 @@ defListMarker = do else mzero return () -definitionListItem :: Bool -> MarkdownParser (F (Inlines, [Blocks])) +definitionListItem :: Bool -> MarkdownParser (Inlines, [Blocks]) definitionListItem compact = try $ do rawLine' <- anyLine raw <- many1 $ defRawBlock compact - term <- parseFromString (trimInlinesF . mconcat <$> many inline) rawLine' + term <- parseFromString (trimInlines . mconcat <$> many inline) rawLine' contents <- mapM (parseFromString parseBlocks) raw optional blanklines - return $ liftM2 (,) term (sequence contents) + return (term, contents) defRawBlock :: Bool -> MarkdownParser String defRawBlock compact = try $ do @@ -893,32 +914,32 @@ defRawBlock compact = try $ do return $ trimr (firstline ++ "\n" ++ unlines rawlines ++ cont) ++ if hasBlank || not (null cont) then "\n\n" else "" -definitionList :: MarkdownParser (F Blocks) +definitionList :: MarkdownParser Blocks definitionList = try $ do lookAhead (anyLine >> optional blankline >> defListMarker) compactDefinitionList <|> normalDefinitionList -compactDefinitionList :: MarkdownParser (F Blocks) +compactDefinitionList :: MarkdownParser Blocks compactDefinitionList = do guardEnabled Ext_compact_definition_lists - items <- fmap sequence $ many1 $ definitionListItem True - return $ B.definitionList <$> fmap compactify'DL items + items <- many1 $ definitionListItem True + return $ B.definitionList (compactify'DL items) -normalDefinitionList :: MarkdownParser (F Blocks) +normalDefinitionList :: MarkdownParser Blocks normalDefinitionList = do guardEnabled Ext_definition_lists - items <- fmap sequence $ many1 $ definitionListItem False - return $ B.definitionList <$> items + items <- many1 $ definitionListItem False + return $ B.definitionList items -- -- paragraph block -- -para :: MarkdownParser (F Blocks) +para :: MarkdownParser Blocks para = try $ do exts <- getOption readerExtensions - result <- trimInlinesF . mconcat <$> many1 inline - option (B.plain <$> result) + result <- trimInlines . mconcat <$> many1 inline + option (B.plain result) $ try $ do newline (blanklines >> return mempty) @@ -926,6 +947,8 @@ para = try $ do <|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced) <|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header) <|> (guardEnabled Ext_lists_without_preceding_blankline >> + -- Avoid creating a paragraph in a nested list. + notFollowedBy' inList >> () <$ lookAhead listStart) <|> do guardEnabled Ext_native_divs inHtmlBlock <- stateInHtmlBlock <$> getState @@ -933,18 +956,17 @@ para = try $ do Just "div" -> () <$ lookAhead (htmlTag (~== TagClose "div")) _ -> mzero - return $ do - result' <- result - case B.toList result' of + return $ + case B.toList result of [Image alt (src,tit)] | Ext_implicit_figures `Set.member` exts -> -- the fig: at beginning of title indicates a figure - return $ B.para $ B.singleton + B.para $ B.singleton $ Image alt (src,'f':'i':'g':':':tit) - _ -> return $ B.para result' + _ -> B.para result -plain :: MarkdownParser (F Blocks) -plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline +plain :: MarkdownParser Blocks +plain = B.plain . trimInlines . mconcat <$> many1 inline -- -- raw html @@ -955,13 +977,13 @@ htmlElement = rawVerbatimBlock <|> strictHtmlBlock <|> liftM snd (htmlTag isBlockTag) -htmlBlock :: MarkdownParser (F Blocks) +htmlBlock :: MarkdownParser Blocks htmlBlock = do guardEnabled Ext_raw_html try (do (TagOpen t attrs) <- lookAhead $ fst <$> htmlTag isBlockTag (guard (t `elem` ["pre","style","script"]) >> - (return . B.rawBlock "html") <$> rawVerbatimBlock) + B.rawBlock "html" <$> rawVerbatimBlock) <|> (do guardEnabled Ext_markdown_attribute oldMarkdownAttribute <- stateMarkdownAttribute <$> getState markdownAttribute <- @@ -980,35 +1002,35 @@ htmlBlock = do <|> (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks)) <|> htmlBlock' -htmlBlock' :: MarkdownParser (F Blocks) +htmlBlock' :: MarkdownParser Blocks htmlBlock' = try $ do first <- htmlElement skipMany spaceChar optional blanklines - return $ return $ B.rawBlock "html" first + return $ B.rawBlock "html" first strictHtmlBlock :: MarkdownParser String strictHtmlBlock = htmlInBalanced (not . isInlineTag) rawVerbatimBlock :: MarkdownParser String rawVerbatimBlock = try $ do - (TagOpen tag _, open) <- htmlTag (tagOpen (flip elem - ["pre", "style", "script"]) - (const True)) + (TagOpen tag _, open) <- + htmlTag (tagOpen (`elem` ["pre", "style", "script"]) + (const True)) contents <- manyTill anyChar (htmlTag (~== TagClose tag)) return $ open ++ contents ++ renderTags' [TagClose tag] -rawTeXBlock :: MarkdownParser (F Blocks) +rawTeXBlock :: MarkdownParser Blocks rawTeXBlock = do guardEnabled Ext_raw_tex result <- (B.rawBlock "latex" . concat <$> - rawLaTeXBlock `sepEndBy1` blankline) + generalize rawLaTeXBlock `sepEndBy1` blankline) <|> (B.rawBlock "context" . concat <$> rawConTeXtEnvironment `sepEndBy1` blankline) spaces - return $ return result + return result -rawHtmlBlocks :: MarkdownParser (F Blocks) +rawHtmlBlocks :: MarkdownParser Blocks rawHtmlBlocks = do (TagOpen tagtype _, raw) <- htmlTag isBlockTag -- try to find closing tag @@ -1020,10 +1042,10 @@ rawHtmlBlocks = do contents <- mconcat <$> many (notFollowedBy' closer >> block) result <- (closer >>= \(_, rawcloser) -> return ( - return (B.rawBlock "html" $ stripMarkdownAttribute raw) <> + (B.rawBlock "html" $ stripMarkdownAttribute raw) <> contents <> - return (B.rawBlock "html" rawcloser))) - <|> return (return (B.rawBlock "html" raw) <> contents) + (B.rawBlock "html" rawcloser))) + <|> return (B.rawBlock "html" raw <> contents) updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock } return result @@ -1038,12 +1060,12 @@ stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s -- line block -- -lineBlock :: MarkdownParser (F Blocks) +lineBlock :: MarkdownParser Blocks lineBlock = try $ do guardEnabled Ext_line_blocks lines' <- lineBlockLines >>= - mapM (parseFromString (trimInlinesF . mconcat <$> many inline)) - return $ B.para <$> (mconcat $ intersperse (return B.linebreak) lines') + mapM (parseFromString (trimInlines . mconcat <$> many inline)) + return $ B.para (mconcat $ intersperse B.linebreak lines') -- -- Tables @@ -1051,17 +1073,19 @@ lineBlock = try $ do -- Parse a dashed line with optional trailing spaces; return its length -- and the length including trailing space. -dashedLine :: Char - -> Parser [Char] st (Int, Int) +dashedLine :: Monad m => Char + -> ParserT [Char] st m (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many spaceChar - return $ (length dashes, length $ dashes ++ sp) + let lengthDashes = length dashes + lengthSp = length sp + return (lengthDashes, lengthDashes + lengthSp) -- Parse a table header with dashed lines of '-' preceded by -- one (or zero) line of text. simpleTableHeader :: Bool -- ^ Headerless table - -> MarkdownParser (F [Blocks], [Alignment], [Int]) + -> MarkdownParser ([Blocks], [Alignment], [Int]) simpleTableHeader headless = try $ do rawContent <- if headless then return "" @@ -1080,9 +1104,8 @@ simpleTableHeader headless = try $ do let rawHeads' = if headless then replicate (length dashes) "" else rawHeads - heads <- fmap sequence - $ mapM (parseFromString (mconcat <$> many plain)) - $ map trim rawHeads' + heads <- + mapM (parseFromString (mconcat <$> many plain) . trim) rawHeads' return (heads, aligns, indices) -- Returns an alignment type for a table, based on a list of strings @@ -1123,30 +1146,30 @@ rawTableLine indices = do -- Parse a table line and return a list of lists of blocks (columns). tableLine :: [Int] - -> MarkdownParser (F [Blocks]) + -> MarkdownParser [Blocks] tableLine indices = rawTableLine indices >>= - fmap sequence . mapM (parseFromString (mconcat <$> many plain)) + mapM (parseFromString (mconcat <$> many plain)) -- Parse a multiline table row and return a list of blocks (columns). multilineRow :: [Int] - -> MarkdownParser (F [Blocks]) + -> MarkdownParser [Blocks] multilineRow indices = do colLines <- many1 (rawTableLine indices) let cols = map unlines $ transpose colLines - fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) cols + mapM (parseFromString (mconcat <$> many plain)) cols -- Parses a table caption: inlines beginning with 'Table:' -- and followed by blank lines. -tableCaption :: MarkdownParser (F Inlines) +tableCaption :: MarkdownParser Inlines tableCaption = try $ do guardEnabled Ext_table_captions skipNonindentSpaces string ":" <|> string "Table:" - trimInlinesF . mconcat <$> many1 inline <* blanklines + trimInlines . mconcat <$> many1 inline <* blanklines -- Parse a simple table with '---' header and one line per row. simpleTable :: Bool -- ^ Headerless table - -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) + -> MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]]) simpleTable headless = do (aligns, _widths, heads', lines') <- tableWith (simpleTableHeader headless) tableLine @@ -1160,12 +1183,12 @@ simpleTable headless = do -- which may be multiline, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). multilineTable :: Bool -- ^ Headerless table - -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) + -> MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]]) multilineTable headless = tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter multilineTableHeader :: Bool -- ^ Headerless table - -> MarkdownParser (F [Blocks], [Alignment], [Int]) + -> MarkdownParser ([Blocks], [Alignment], [Int]) multilineTableHeader headless = try $ do unless headless $ tableSep >> notFollowedBy blankline @@ -1187,7 +1210,7 @@ multilineTableHeader headless = try $ do let rawHeads = if headless then replicate (length dashes) "" else map (unlines . map trim) rawHeadsList - heads <- fmap sequence $ + heads <- mapM (parseFromString (mconcat <$> many plain)) $ map trim rawHeads return (heads, aligns, indices) @@ -1197,7 +1220,7 @@ multilineTableHeader headless = try $ do -- which may be grid, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). gridTable :: Bool -- ^ Headerless table - -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) + -> MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]]) gridTable headless = tableWith (gridTableHeader headless) gridTableRow (gridTableSep '-') gridTableFooter @@ -1206,13 +1229,14 @@ gridTableSplitLine :: [Int] -> String -> [String] gridTableSplitLine indices line = map removeFinalBar $ tail $ splitStringByIndices (init indices) $ trimr line -gridPart :: Char -> Parser [Char] st (Int, Int) +gridPart :: Monad m => Char -> ParserT [Char] st m (Int, Int) gridPart ch = do dashes <- many1 (char ch) char '+' - return (length dashes, length dashes + 1) + let lengthDashes = length dashes + return (lengthDashes, lengthDashes + 1) -gridDashedLines :: Char -> Parser [Char] st [(Int,Int)] +gridDashedLines :: Monad m => Char -> ParserT [Char] st m [(Int,Int)] gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline removeFinalBar :: String -> String @@ -1225,7 +1249,7 @@ gridTableSep ch = try $ gridDashedLines ch >> return '\n' -- | Parse header for a grid table. gridTableHeader :: Bool -- ^ Headerless table - -> MarkdownParser (F [Blocks], [Alignment], [Int]) + -> MarkdownParser ([Blocks], [Alignment], [Int]) gridTableHeader headless = try $ do optional blanklines dashes <- gridDashedLines '-' @@ -1234,9 +1258,7 @@ gridTableHeader headless = try $ do else many1 (notFollowedBy (gridTableSep '=') >> char '|' >> many1Till anyChar newline) - if headless - then return () - else gridTableSep '=' >> return () + unless headless (void $ gridTableSep '=') let lines' = map snd dashes let indices = scanl (+) 0 lines' let aligns = replicate (length lines') AlignDefault @@ -1245,7 +1267,7 @@ gridTableHeader headless = try $ do then replicate (length dashes) "" else map (unlines . map trim) $ transpose $ map (gridTableSplitLine indices) rawContent - heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads + heads <- mapM (parseFromString parseBlocks . trim) rawHeads return (heads, aligns, indices) gridTableRawLine :: [Int] -> MarkdownParser [String] @@ -1256,12 +1278,12 @@ gridTableRawLine indices = do -- | Parse row of grid table. gridTableRow :: [Int] - -> MarkdownParser (F [Blocks]) + -> MarkdownParser [Blocks] gridTableRow indices = do colLines <- many1 (gridTableRawLine indices) let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ transpose colLines - fmap compactify' <$> fmap sequence (mapM (parseFromString parseBlocks) cols) + compactify' <$> mapM (parseFromString parseBlocks) cols removeOneLeadingSpace :: [String] -> [String] removeOneLeadingSpace xs = @@ -1287,16 +1309,12 @@ pipeBreak = try $ do blankline return (first:rest) -pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) +pipeTable :: MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]]) pipeTable = try $ do - (heads,aligns) <- try ( pipeBreak >>= \als -> - return (return $ replicate (length als) mempty, als)) - <|> ( pipeTableRow >>= \row -> pipeBreak >>= \als -> - - return (row, als) ) - lines' <- sequence <$> many1 pipeTableRow + (heads,aligns) <- (,) <$> pipeTableRow <*> pipeBreak + lines' <- many pipeTableRow let widths = replicate (length aligns) 0.0 - return $ (aligns, widths, heads, lines') + return (aligns, widths, heads, lines') sepPipe :: MarkdownParser () sepPipe = try $ do @@ -1304,7 +1322,7 @@ sepPipe = try $ do notFollowedBy blankline -- parse a row, also returning probable alignments for org-table cells -pipeTableRow :: MarkdownParser (F [Blocks]) +pipeTableRow :: MarkdownParser [Blocks] pipeTableRow = do nonindentSpaces openPipe <- (True <$ char '|') <|> return False @@ -1316,16 +1334,14 @@ pipeTableRow = do guard $ not (null rest && not openPipe) optional (char '|') blankline - let cells = sequence (first:rest) - return $ do - cells' <- cells - return $ map - (\ils -> + let cells = first:rest + return $ + map (\ils -> case trimInlines ils of ils' | B.isNull ils' -> mempty - | otherwise -> B.plain $ ils') cells' + | otherwise -> B.plain ils') cells -pipeTableHeaderPart :: Parser [Char] st Alignment +pipeTableHeaderPart :: Monad m => ParserT [Char] st m Alignment pipeTableHeaderPart = try $ do skipMany spaceChar left <- optionMaybe (char ':') @@ -1340,7 +1356,7 @@ pipeTableHeaderPart = try $ do (Just _,Just _) -> AlignCenter -- Succeed only if current line contains a pipe. -scanForPipe :: Parser [Char] st () +scanForPipe :: Monad m => ParserT [Char] st m () scanForPipe = do inp <- getInput case break (\c -> c == '\n' || c == '|') inp of @@ -1350,22 +1366,22 @@ scanForPipe = do -- | Parse a table using 'headerParser', 'rowParser', -- 'lineParser', and 'footerParser'. Variant of the version in -- Text.Pandoc.Parsing. -tableWith :: MarkdownParser (F [Blocks], [Alignment], [Int]) - -> ([Int] -> MarkdownParser (F [Blocks])) +tableWith :: MarkdownParser ([Blocks], [Alignment], [Int]) + -> ([Int] -> MarkdownParser [Blocks]) -> MarkdownParser sep -> MarkdownParser end - -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) + -> MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]]) tableWith headerParser rowParser lineParser footerParser = try $ do (heads, aligns, indices) <- headerParser - lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser + lines' <- rowParser indices `sepEndBy1` lineParser footerParser numColumns <- getOption readerColumns - let widths = if (indices == []) - then replicate (length aligns) 0.0 - else widthsFromIndices numColumns indices - return $ (aligns, widths, heads, lines') + let widths = case indices of + [] -> replicate (length aligns) 0.0 + _ -> widthsFromIndices numColumns indices + return (aligns, widths, heads, lines') -table :: MarkdownParser (F Blocks) +table :: MarkdownParser Blocks table = try $ do frontCaption <- option Nothing (Just <$> tableCaption) (aligns, widths, heads, lns) <- @@ -1380,19 +1396,15 @@ table = try $ do (gridTable False <|> gridTable True)) <?> "table" optional blanklines caption <- case frontCaption of - Nothing -> option (return mempty) tableCaption + Nothing -> option mempty tableCaption Just c -> return c - return $ do - caption' <- caption - heads' <- heads - lns' <- lns - return $ B.table caption' (zip aligns widths) heads' lns' + return $ B.table caption (zip aligns widths) heads lns -- -- inline -- -inline :: MarkdownParser (F Inlines) +inline :: MarkdownParser Inlines inline = choice [ whitespace , bareURL , str @@ -1415,7 +1427,7 @@ inline = choice [ whitespace , rawLaTeXInline' , exampleRef , smart - , return . B.singleton <$> charRef + , B.singleton <$> charRef , symbol , ltSign ] <?> "inline" @@ -1426,43 +1438,42 @@ escapedChar' = try $ do (guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum)) <|> oneOf "\\`*_{}[]()>#+-.!~\"" -escapedChar :: MarkdownParser (F Inlines) +escapedChar :: MarkdownParser Inlines escapedChar = do result <- escapedChar' case result of - ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space + ' ' -> return $ B.str "\160" -- "\ " is a nonbreaking space '\n' -> guardEnabled Ext_escaped_line_breaks >> - return (return B.linebreak) -- "\[newline]" is a linebreak - _ -> return $ return $ B.str [result] + return B.linebreak -- "\[newline]" is a linebreak + _ -> return $ B.str [result] -ltSign :: MarkdownParser (F Inlines) +ltSign :: MarkdownParser Inlines ltSign = do guardDisabled Ext_raw_html <|> (notFollowedByHtmlCloser >> notFollowedBy' (htmlTag isBlockTag)) char '<' - return $ return $ B.str "<" + return $ B.str "<" -exampleRef :: MarkdownParser (F Inlines) +exampleRef :: MarkdownParser Inlines exampleRef = try $ do guardEnabled Ext_example_lists char '@' lab <- many1 (alphaNum <|> oneOf "-_") - return $ do - st <- askF - return $ case M.lookup lab (stateExamples st) of - Just n -> B.str (show n) - Nothing -> B.str ('@':lab) + st <- ask + return $ case M.lookup lab (stateExamples st) of + Just n -> B.str (show n) + Nothing -> B.str ('@':lab) -symbol :: MarkdownParser (F Inlines) +symbol :: MarkdownParser Inlines symbol = do result <- noneOf "<\\\n\t " <|> try (do lookAhead $ char '\\' notFollowedBy' (() <$ rawTeXBlock) char '\\') - return $ return $ B.str [result] + return $ B.str [result] -- parses inline code, between n `s and n `s -code :: MarkdownParser (F Inlines) +code :: MarkdownParser Inlines code = try $ do starts <- many1 (char '`') skipSpaces @@ -1472,16 +1483,17 @@ code = try $ do notFollowedBy (char '`'))) attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes >> optional whitespace >> attributes) - return $ return $ B.codeWith attr $ trim $ concat result + return $ B.codeWith attr $ trim $ concat result -math :: MarkdownParser (F Inlines) -math = (return . B.displayMath <$> (mathDisplay >>= applyMacros')) - <|> (return . B.math <$> (mathInline >>= applyMacros')) +math :: MarkdownParser Inlines +math = (B.displayMath <$> (mathDisplay >>= applyMacros')) + <|> ((B.math <$> (mathInline >>= applyMacros')) <+?> + ((getOption readerSmart >>= guard) *> apostrophe <* notFollowedBy space)) -- Parses material enclosed in *s, **s, _s, or __s. -- Designed to avoid backtracking. enclosure :: Char - -> MarkdownParser (F Inlines) + -> MarkdownParser Inlines enclosure c = do -- we can't start an enclosure with _ if after a string and -- the intraword_underscores extension is enabled: @@ -1489,13 +1501,13 @@ enclosure c = do <|> guard (c == '*') <|> (guard =<< notAfterString) cs <- many1 (char c) - (return (B.str cs) <>) <$> whitespace - <|> do + (B.str cs <>) <$> whitespace + <|> case length cs of 3 -> three c 2 -> two c mempty 1 -> one c mempty - _ -> return (return $ B.str cs) + _ -> return $ B.str cs ender :: Char -> Int -> MarkdownParser () ender c n = try $ do @@ -1508,74 +1520,74 @@ ender c n = try $ do -- If one c, emit emph and then parse two. -- If two cs, emit strong and then parse one. -- Otherwise, emit ccc then the results. -three :: Char -> MarkdownParser (F Inlines) +three :: Char -> MarkdownParser Inlines three c = do contents <- mconcat <$> many (notFollowedBy (ender c 1) >> inline) - (ender c 3 >> return ((B.strong . B.emph) <$> contents)) - <|> (ender c 2 >> one c (B.strong <$> contents)) - <|> (ender c 1 >> two c (B.emph <$> contents)) - <|> return (return (B.str [c,c,c]) <> contents) + (ender c 3 >> return ((B.strong . B.emph) contents)) + <|> (ender c 2 >> one c (B.strong contents)) + <|> (ender c 1 >> two c (B.emph contents)) + <|> return (B.str [c,c,c] <> contents) -- Parse inlines til you hit two c's, and emit strong. -- If you never do hit two cs, emit ** plus inlines parsed. -two :: Char -> F Inlines -> MarkdownParser (F Inlines) +two :: Char -> Inlines -> MarkdownParser Inlines two c prefix' = do contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline) - (ender c 2 >> return (B.strong <$> (prefix' <> contents))) - <|> return (return (B.str [c,c]) <> (prefix' <> contents)) + (ender c 2 >> return (B.strong (prefix' <> contents))) + <|> return (B.str [c,c] <> (prefix' <> contents)) -- Parse inlines til you hit a c, and emit emph. -- If you never hit a c, emit * plus inlines parsed. -one :: Char -> F Inlines -> MarkdownParser (F Inlines) +one :: Char -> Inlines -> MarkdownParser Inlines one c prefix' = do contents <- mconcat <$> many ( (notFollowedBy (ender c 1) >> inline) <|> try (string [c,c] >> notFollowedBy (ender c 1) >> two c mempty) ) - (ender c 1 >> return (B.emph <$> (prefix' <> contents))) - <|> return (return (B.str [c]) <> (prefix' <> contents)) + (ender c 1 >> return (B.emph (prefix' <> contents))) + <|> return (B.str [c] <> (prefix' <> contents)) -strongOrEmph :: MarkdownParser (F Inlines) +strongOrEmph :: MarkdownParser Inlines strongOrEmph = enclosure '*' <|> enclosure '_' --- | Parses a list of inlines between start and end delimiters. +-- | Parses a list oInlines between start and end delimiters. inlinesBetween :: (Show b) => MarkdownParser a -> MarkdownParser b - -> MarkdownParser (F Inlines) + -> MarkdownParser Inlines inlinesBetween start end = - (trimInlinesF . mconcat) <$> try (start >> many1Till inner end) + (trimInlines . mconcat) <$> try (start >> many1Till inner end) where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline) innerSpace = try $ whitespace <* notFollowedBy' end -strikeout :: MarkdownParser (F Inlines) -strikeout = fmap B.strikeout <$> +strikeout :: MarkdownParser Inlines +strikeout = B.strikeout <$> (guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd) where strikeStart = string "~~" >> lookAhead nonspaceChar >> notFollowedBy (char '~') strikeEnd = try $ string "~~" -superscript :: MarkdownParser (F Inlines) -superscript = fmap B.superscript <$> try (do +superscript :: MarkdownParser Inlines +superscript = B.superscript <$> try (do guardEnabled Ext_superscript char '^' mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '^')) -subscript :: MarkdownParser (F Inlines) -subscript = fmap B.subscript <$> try (do +subscript :: MarkdownParser Inlines +subscript = B.subscript <$> try (do guardEnabled Ext_subscript char '~' mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '~')) -whitespace :: MarkdownParser (F Inlines) -whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace" +whitespace :: MarkdownParser Inlines +whitespace = spaceChar >> (lb <|> regsp) <?> "whitespace" where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak) regsp = skipMany spaceChar >> return B.space -nonEndline :: Parser [Char] st Char +nonEndline :: Monad m => ParserT [Char] st m Char nonEndline = satisfy (/='\n') -str :: MarkdownParser (F Inlines) +str :: MarkdownParser Inlines str = do result <- many1 alphaNum updateLastStrPos @@ -1583,14 +1595,14 @@ str = do isSmart <- getOption readerSmart if isSmart then case likelyAbbrev result of - [] -> return $ return $ B.str result + [] -> return $ B.str result xs -> choice (map (\x -> try (string x >> oneOf " \n" >> lookAhead alphaNum >> - return (return $ B.str - $ result ++ spacesToNbr x ++ "\160"))) xs) - <|> (return $ return $ B.str result) - else return $ return $ B.str result + return (B.str $ + result ++ spacesToNbr x ++ "\160"))) xs) + <|> (return $ B.str result) + else return $ B.str result -- | if the string matches the beginning of an abbreviation (before -- the first period, return strings that would finish the abbreviation. @@ -1605,13 +1617,12 @@ likelyAbbrev x = in map snd $ filter (\(y,_) -> y == x) abbrPairs -- an endline character that can be treated as a space, not a structural break -endline :: MarkdownParser (F Inlines) +endline :: MarkdownParser Inlines endline = try $ do newline notFollowedBy blankline -- parse potential list-starts differently if in a list: - st <- getState - when (stateParserContext st == ListItemState) $ notFollowedBy listStart + notFollowedBy (inList >> listStart) guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header @@ -1619,18 +1630,18 @@ endline = try $ do notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced)) notFollowedByHtmlCloser (eof >> return mempty) - <|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak)) + <|> (guardEnabled Ext_hard_line_breaks >> return B.linebreak) <|> (guardEnabled Ext_ignore_line_breaks >> return mempty) - <|> (return $ return B.space) + <|> return B.space -- -- links -- -- a reference label for a link -reference :: MarkdownParser (F Inlines, String) +reference :: MarkdownParser (Inlines, String) reference = do notFollowedBy' (string "[^") -- footnote reference - withRaw $ trimInlinesF <$> inlinesInBalancedBrackets + withRaw $ trimInlines <$> inlinesInBalancedBrackets parenthesizedChars :: MarkdownParser [Char] parenthesizedChars = do @@ -1658,7 +1669,7 @@ source = do linkTitle :: MarkdownParser String linkTitle = quotedTitle '"' <|> quotedTitle '\'' -link :: MarkdownParser (F Inlines) +link :: MarkdownParser Inlines link = try $ do st <- getState guard $ stateAllowLinks st @@ -1668,43 +1679,43 @@ link = try $ do regLink B.link lab <|> referenceLink B.link (lab,raw) regLink :: (String -> String -> Inlines -> Inlines) - -> F Inlines -> MarkdownParser (F Inlines) + -> Inlines -> MarkdownParser Inlines regLink constructor lab = try $ do (src, tit) <- source - return $ constructor src tit <$> lab + return $ constructor src tit lab -- a link like [this][ref] or [this][] or [this] referenceLink :: (String -> String -> Inlines -> Inlines) - -> (F Inlines, String) -> MarkdownParser (F Inlines) + -> (Inlines, String) -> MarkdownParser Inlines referenceLink constructor (lab, raw) = do sp <- (True <$ lookAhead (char ' ')) <|> return False - (ref,raw') <- try - (skipSpaces >> optional (newline >> skipSpaces) >> reference) - <|> return (mempty, "") + (ref,raw') <- option (mempty, "") $ + lookAhead (try (spnl >> normalCite >> return (mempty, ""))) + <|> + try (spnl >> reference) + when (raw' == "") $ guardEnabled Ext_shortcut_reference_links let labIsRef = raw' == "" || raw' == "[]" let key = toKey $ if labIsRef then raw else raw' parsedRaw <- parseFromString (mconcat <$> many inline) raw' fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw implicitHeaderRefs <- option False $ True <$ guardEnabled Ext_implicit_header_references - let makeFallback = do - parsedRaw' <- parsedRaw - fallback' <- fallback - return $ B.str "[" <> fallback' <> B.str "]" <> + let makeFallback = + B.str "[" <> fallback <> B.str "]" <> (if sp && not (null raw) then B.space else mempty) <> - parsedRaw' - return $ do - keys <- asksF stateKeys - case M.lookup key keys of - Nothing -> do - headers <- asksF stateHeaders - ref' <- if labIsRef then lab else ref - if implicitHeaderRefs - then case M.lookup ref' headers of - Just ident -> constructor ('#':ident) "" <$> lab - Nothing -> makeFallback - else makeFallback - Just (src,tit) -> constructor src tit <$> lab + parsedRaw + keys <- asks stateKeys + headers <- asks stateHeaders + return $ + case M.lookup key keys of + Nothing -> + let ref' = if labIsRef then lab else ref in + if implicitHeaderRefs + then case M.lookup ref' headers of + Just ident -> constructor ('#':ident) "" lab + Nothing -> makeFallback + else makeFallback + Just (src,tit) -> constructor src tit lab dropBrackets :: String -> String dropBrackets = reverse . dropRB . reverse . dropLB @@ -1713,14 +1724,14 @@ dropBrackets = reverse . dropRB . reverse . dropLB dropLB ('[':xs) = xs dropLB xs = xs -bareURL :: MarkdownParser (F Inlines) +bareURL :: MarkdownParser Inlines bareURL = try $ do guardEnabled Ext_autolink_bare_uris (orig, src) <- uri <|> emailAddress notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a") - return $ return $ B.link src "" (B.str orig) + return $ B.link src "" (B.str orig) -autoLink :: MarkdownParser (F Inlines) +autoLink :: MarkdownParser Inlines autoLink = try $ do char '<' (orig, src) <- uri <|> emailAddress @@ -1729,9 +1740,9 @@ autoLink = try $ do -- final punctuation. for example: in `<http://hi---there>`, -- the URI parser will stop before the dashes. extra <- fromEntities <$> manyTill nonspaceChar (char '>') - return $ return $ B.link (src ++ escapeURI extra) "" (B.str $ orig ++ extra) + return $ B.link (src ++ escapeURI extra) "" (B.str $ orig ++ extra) -image :: MarkdownParser (F Inlines) +image :: MarkdownParser Inlines image = try $ do char '!' (lab,raw) <- reference @@ -1741,38 +1752,33 @@ image = try $ do _ -> B.image src regLink constructor lab <|> referenceLink constructor (lab,raw) -note :: MarkdownParser (F Inlines) +note :: MarkdownParser Inlines note = try $ do guardEnabled Ext_footnotes + (stateInFootnote <$> getState) >>= guard . not ref <- noteMarker - return $ do - notes <- asksF stateNotes' + notes <- asks stateNotes' + return $ case lookup ref notes of - Nothing -> return $ B.str $ "[^" ++ ref ++ "]" - Just contents -> do - st <- askF - -- process the note in a context that doesn't resolve - -- notes, to avoid infinite looping with notes inside - -- notes: - let contents' = runF contents st{ stateNotes' = [] } - return $ B.note contents' - -inlineNote :: MarkdownParser (F Inlines) + Nothing -> B.str $ "[^" ++ ref ++ "]" + Just contents -> B.note contents + +inlineNote :: MarkdownParser Inlines inlineNote = try $ do guardEnabled Ext_inline_notes char '^' contents <- inlinesInBalancedBrackets - return $ B.note . B.para <$> contents + return . B.note . B.para $ contents -rawLaTeXInline' :: MarkdownParser (F Inlines) +rawLaTeXInline' :: MarkdownParser Inlines rawLaTeXInline' = try $ do guardEnabled Ext_raw_tex lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env - RawInline _ s <- rawLaTeXInline - return $ return $ B.rawInline "tex" s + RawInline _ s <- generalize rawLaTeXInline + return $ B.rawInline "tex" s -- "tex" because it might be context or latex -rawConTeXtEnvironment :: Parser [Char] st String +rawConTeXtEnvironment :: Monad m => ParserT [Char] st m String rawConTeXtEnvironment = try $ do string "\\start" completion <- inBrackets (letter <|> digit <|> spaceChar) @@ -1781,14 +1787,14 @@ rawConTeXtEnvironment = try $ do (try $ string "\\stop" >> string completion) return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion -inBrackets :: (Parser [Char] st Char) -> Parser [Char] st String +inBrackets :: Monad m => (ParserT [Char] st m Char) -> ParserT [Char] st m String inBrackets parser = do char '[' contents <- many parser char ']' return $ "[" ++ contents ++ "]" -spanHtml :: MarkdownParser (F Inlines) +spanHtml :: MarkdownParser Inlines spanHtml = try $ do guardEnabled Ext_native_spans (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" []) @@ -1800,10 +1806,10 @@ spanHtml = try $ do Just s | null ident && null classes && map toLower (filter (`notElem` " \t;") s) == "font-variant:small-caps" - -> return $ B.smallcaps <$> contents - _ -> return $ B.spanWith (ident, classes, keyvals) <$> contents + -> return $ B.smallcaps contents + _ -> return $ B.spanWith (ident, classes, keyvals) contents -divHtml :: MarkdownParser (F Blocks) +divHtml :: MarkdownParser Blocks divHtml = try $ do guardEnabled Ext_native_divs (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" []) @@ -1821,11 +1827,11 @@ divHtml = try $ do let ident = fromMaybe "" $ lookup "id" attrs let classes = maybe [] words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] - return $ B.divWith (ident, classes, keyvals) <$> contents + return $ B.divWith (ident, classes, keyvals) contents else -- avoid backtracing - return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents + return $ B.rawBlock "html" (rawtag <> bls) <> contents -rawHtmlInline :: MarkdownParser (F Inlines) +rawHtmlInline :: MarkdownParser Inlines rawHtmlInline = do guardEnabled Ext_raw_html inHtmlBlock <- stateInHtmlBlock <$> getState @@ -1840,19 +1846,17 @@ rawHtmlInline = do then (\x -> isInlineTag x && not (isCloseBlockTag x)) else not . isTextTag - return $ return $ B.rawInline "html" result + return $ B.rawInline "html" result -- Citations -cite :: MarkdownParser (F Inlines) +cite :: MarkdownParser Inlines cite = do guardEnabled Ext_citations - citations <- textualCite - <|> do (cs, raw) <- withRaw normalCite - return $ (flip B.cite (B.text raw)) <$> cs - return citations + textualCite <|> do (cs, raw) <- withRaw normalCite + return $ B.cite cs (B.text raw) -textualCite :: MarkdownParser (F Inlines) +textualCite :: MarkdownParser Inlines textualCite = try $ do (_, key) <- citeKey let first = Citation{ citationId = key @@ -1866,29 +1870,26 @@ textualCite = try $ do case mbrest of Just (rest, raw) -> return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw) . (first:)) - <$> rest + rest Nothing -> (do (cs, raw) <- withRaw $ bareloc first - return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw)) <$> cs) - <|> return (do st <- askF - return $ case M.lookup key (stateExamples st) of - Just n -> B.str (show n) - _ -> B.cite [first] $ B.str $ '@':key) + return $ B.cite cs (B.text $ '@':key ++ " " ++ raw)) + <|> do st <- ask + return $ case M.lookup key (stateExamples st) of + Just n -> B.str (show n) + _ -> B.cite [first] $ B.str $ '@':key -bareloc :: Citation -> MarkdownParser (F [Citation]) +bareloc :: Citation -> MarkdownParser [Citation] bareloc c = try $ do spnl char '[' suff <- suffix - rest <- option (return []) $ try $ char ';' >> citeList + rest <- option [] $ try $ char ';' >> citeList spnl char ']' - return $ do - suff' <- suff - rest' <- rest - return $ c{ citationSuffix = B.toList suff' } : rest' + return $ c{ citationSuffix = B.toList suff } : rest -normalCite :: MarkdownParser (F [Citation]) +normalCite :: MarkdownParser [Citation] normalCite = try $ do char '[' spnl @@ -1897,60 +1898,57 @@ normalCite = try $ do char ']' return citations -suffix :: MarkdownParser (F Inlines) +suffix :: MarkdownParser Inlines suffix = try $ do hasSpace <- option False (notFollowedBy nonspaceChar >> return True) spnl - rest <- trimInlinesF . mconcat <$> many (notFollowedBy (oneOf ";]") >> inline) + rest <- trimInlines . mconcat <$> many (notFollowedBy (oneOf ";]") >> inline) return $ if hasSpace - then (B.space <>) <$> rest + then B.space <> rest else rest -prefix :: MarkdownParser (F Inlines) -prefix = trimInlinesF . mconcat <$> +prefix :: MarkdownParser Inlines +prefix = trimInlines . mconcat <$> manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey)) -citeList :: MarkdownParser (F [Citation]) -citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl) +citeList :: MarkdownParser [Citation] +citeList = sepBy1 citation (try $ char ';' >> spnl) -citation :: MarkdownParser (F Citation) +citation :: MarkdownParser Citation citation = try $ do pref <- prefix (suppress_author, key) <- citeKey suff <- suffix - return $ do - x <- pref - y <- suff - return $ Citation{ citationId = key - , citationPrefix = B.toList x - , citationSuffix = B.toList y - , citationMode = if suppress_author - then SuppressAuthor - else NormalCitation - , citationNoteNum = 0 - , citationHash = 0 - } - -smart :: MarkdownParser (F Inlines) + return Citation{ citationId = key + , citationPrefix = B.toList pref + , citationSuffix = B.toList suff + , citationMode = if suppress_author + then SuppressAuthor + else NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } + +smart :: MarkdownParser Inlines smart = do getOption readerSmart >>= guard doubleQuoted <|> singleQuoted <|> - choice (map (return <$>) [apostrophe, dash, ellipses]) + choice [apostrophe, dash, ellipses] -singleQuoted :: MarkdownParser (F Inlines) +singleQuoted :: MarkdownParser Inlines singleQuoted = try $ do singleQuoteStart withQuoteContext InSingleQuote $ - fmap B.singleQuoted . trimInlinesF . mconcat <$> + B.singleQuoted . trimInlines . mconcat <$> many1Till inline singleQuoteEnd -- doubleQuoted will handle regular double-quoted sections, as well -- as dialogues with an open double-quote without a close double-quote -- in the same paragraph. -doubleQuoted :: MarkdownParser (F Inlines) +doubleQuoted :: MarkdownParser Inlines doubleQuoted = try $ do doubleQuoteStart contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) - (withQuoteContext InDoubleQuote $ doubleQuoteEnd >> return - (fmap B.doubleQuoted . trimInlinesF $ contents)) - <|> (return $ return (B.str "\8220") <> contents) + (withQuoteContext InDoubleQuote doubleQuoteEnd >> return + (B.doubleQuoted . trimInlines $ contents)) + <|> return (B.str "\8220" <> contents) diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index e43b8a86c..939d10fb2 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -58,21 +58,21 @@ import Data.Maybe (fromMaybe) import Text.Printf (printf) import Debug.Trace (trace) +import Text.Pandoc.Error + -- | Read mediawiki from an input string and return a Pandoc document. readMediaWiki :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readMediaWiki opts s = - case runParser parseMediaWiki MWState{ mwOptions = opts + readWith parseMediaWiki MWState{ mwOptions = opts , mwMaxNestingLevel = 4 , mwNextLinkNumber = 1 , mwCategoryLinks = [] , mwHeaderMap = M.empty , mwIdentifierList = [] } - "source" (s ++ "\n") of - Left err' -> error $ "\nError:\n" ++ show err' - Right result -> result + (s ++ "\n") data MWState = MWState { mwOptions :: ReaderOptions , mwMaxNestingLevel :: Int @@ -593,11 +593,17 @@ imageOption = <|> try (many1 (oneOf "x0123456789") <* string "px") <|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]")) +collapseUnderscores :: String -> String +collapseUnderscores [] = [] +collapseUnderscores ('_':'_':xs) = collapseUnderscores ('_':xs) +collapseUnderscores (x:xs) = x : collapseUnderscores xs + +addUnderscores :: String -> String +addUnderscores = collapseUnderscores . intercalate "_" . words + internalLink :: MWParser Inlines internalLink = try $ do sym "[[" - let addUnderscores x = let (pref,suff) = break (=='#') x - in pref ++ intercalate "_" (words suff) pagename <- unwords . words <$> many (noneOf "|]") label <- option (B.text pagename) $ char '|' *> ( (mconcat <$> many1 (notFollowedBy (char ']') *> inline)) diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs index f4dfa62c1..fc6b3362a 100644 --- a/src/Text/Pandoc/Readers/Native.hs +++ b/src/Text/Pandoc/Readers/Native.hs @@ -3,7 +3,7 @@ Copyright (C) 2011-2014 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or +the Free Software Foundation; Either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, @@ -33,6 +33,9 @@ module Text.Pandoc.Readers.Native ( readNative ) where import Text.Pandoc.Definition import Text.Pandoc.Shared (safeRead) +import Text.Pandoc.Error +import Control.Applicative + -- | Read native formatted text and return a Pandoc document. -- The input may be a full pandoc document, a block list, a block, -- an inline list, or an inline. Thus, for example, @@ -44,33 +47,18 @@ import Text.Pandoc.Shared (safeRead) -- > Pandoc nullMeta [Plain [Str "hi"]] -- readNative :: String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc -readNative s = - case safeRead s of - Just d -> d - Nothing -> Pandoc nullMeta $ readBlocks s + -> Either PandocError Pandoc +readNative s = maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) -readBlocks :: String -> [Block] -readBlocks s = - case safeRead s of - Just d -> d - Nothing -> [readBlock s] +readBlocks :: String -> Either PandocError [Block] +readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s) -readBlock :: String -> Block -readBlock s = - case safeRead s of - Just d -> d - Nothing -> Plain $ readInlines s +readBlock :: String -> Either PandocError Block +readBlock s = maybe (Plain <$> readInlines s) Right (safeRead s) -readInlines :: String -> [Inline] -readInlines s = - case safeRead s of - Just d -> d - Nothing -> [readInline s] +readInlines :: String -> Either PandocError [Inline] +readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead s) -readInline :: String -> Inline -readInline s = - case safeRead s of - Just d -> d - Nothing -> error "Cannot parse document" +readInline :: String -> Either PandocError Inline +readInline s = maybe (Left . ParseFailure $ "Could not read: " ++ s) Right (safeRead s) diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index 35d01e877..19ddba36b 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE FlexibleContexts #-} module Text.Pandoc.Readers.OPML ( readOPML ) where import Data.Char (toUpper) import Text.Pandoc.Options @@ -11,8 +12,11 @@ import Data.Generics import Data.Monoid import Control.Monad.State import Control.Applicative ((<$>), (<$)) +import Data.Default +import Text.Pandoc.Compat.Except +import Text.Pandoc.Error -type OPML = State OPMLState +type OPML = ExceptT PandocError (State OPMLState) data OPMLState = OPMLState{ opmlSectionLevel :: Int @@ -21,17 +25,19 @@ data OPMLState = OPMLState{ , opmlDocDate :: Inlines } deriving Show -readOPML :: ReaderOptions -> String -> Pandoc +instance Default OPMLState where + def = OPMLState{ opmlSectionLevel = 0 + , opmlDocTitle = mempty + , opmlDocAuthors = [] + , opmlDocDate = mempty + } + +readOPML :: ReaderOptions -> String -> Either PandocError Pandoc readOPML _ inp = setTitle (opmlDocTitle st') - $ setAuthors (opmlDocAuthors st') - $ setDate (opmlDocDate st') - $ doc $ mconcat bs - where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp) - OPMLState{ opmlSectionLevel = 0 - , opmlDocTitle = mempty - , opmlDocAuthors = [] - , opmlDocDate = mempty - } + . setAuthors (opmlDocAuthors st') + . setDate (opmlDocDate st') + . doc . mconcat <$> bs + where (bs, st') = flip runState def . runExceptT $ (mapM parseBlock $ normalizeTree $ parseXML inp) -- normalize input, consolidating adjacent Text and CRef elements normalizeTree :: [Content] -> [Content] @@ -58,14 +64,16 @@ attrValue attr elt = Just z -> z Nothing -> "" -asHtml :: String -> Inlines -asHtml s = case readHtml def s of - Pandoc _ [Plain ils] -> fromList ils - _ -> mempty +exceptT :: Either PandocError a -> OPML a +exceptT = either throwError return + +asHtml :: String -> OPML Inlines +asHtml s = (\(Pandoc _ bs) -> case bs of + [Plain ils] -> fromList ils + _ -> mempty) <$> exceptT (readHtml def s) -asMarkdown :: String -> Blocks -asMarkdown s = fromList bs - where Pandoc _ bs = readMarkdown def s +asMarkdown :: String -> OPML Blocks +asMarkdown s = (\(Pandoc _ bs) -> fromList bs) <$> exceptT (readMarkdown def s) getBlocks :: Element -> OPML Blocks getBlocks e = mconcat <$> (mapM parseBlock $ elContent e) @@ -82,8 +90,8 @@ parseBlock (Elem e) = "outline" -> gets opmlSectionLevel >>= sect . (+1) "?xml" -> return mempty _ -> getBlocks e - where sect n = do let headerText = asHtml $ attrValue "text" e - let noteBlocks = asMarkdown $ attrValue "_note" e + where sect n = do headerText <- asHtml $ attrValue "text" e + noteBlocks <- asMarkdown $ attrValue "_note" e modify $ \st -> st{ opmlSectionLevel = n } bs <- getBlocks e modify $ \st -> st{ opmlSectionLevel = n - 1 } diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 62421d2fb..fc63cc11e 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -1,5 +1,9 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} {- Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de> @@ -35,47 +39,85 @@ import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), (<>) import Text.Pandoc.Definition import Text.Pandoc.Options import qualified Text.Pandoc.Parsing as P -import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF - , newline, orderedListMarker - , parseFromString +import Text.Pandoc.Parsing hiding ( newline, orderedListMarker + , parseFromString, blanklines ) import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) import Text.Pandoc.Shared (compactify', compactify'DL) import Text.TeXMath (readTeX, writePandoc, DisplayType(..)) +import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap -import Control.Applicative ( Applicative, pure +import Control.Applicative ( pure , (<$>), (<$), (<*>), (<*), (*>) ) import Control.Arrow (first) -import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when) -import Control.Monad.Reader (Reader, runReader, ask, asks) +import Control.Monad (guard, mplus, mzero, when) +import Control.Monad.Reader (Reader, runReader, asks, local) import Data.Char (isAlphaNum, toLower) import Data.Default -import Data.List (intersperse, isPrefixOf, isSuffixOf) +import Data.List (intersperse, isPrefixOf, isSuffixOf, foldl') import qualified Data.Map as M import Data.Maybe (fromMaybe, isJust) -import Data.Monoid (Monoid, mconcat, mempty, mappend) +import Data.Monoid (mconcat, mempty, mappend) import Network.HTTP (urlEncode) +import Text.Pandoc.Error + -- | Parse org-mode string and return a Pandoc document. readOrg :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc -readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n") + -> Either PandocError Pandoc +readOrg opts s = runOrg opts s parseOrg + +data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext + , finalState :: OrgParserState } -type OrgParser = Parser [Char] OrgParserState +type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal) + +runOrg :: ReaderOptions -> String -> OrgParser a -> Either PandocError a +runOrg opts inp p = fst <$> res + where + imd = readWithM (returnState p) def{ orgStateOptions = opts } (inp ++ "\n\n") + res = runReader imd def { finalState = s } + s :: OrgParserState + s = either def snd res parseOrg :: OrgParser Pandoc parseOrg = do blocks' <- parseBlocks st <- getState - let meta = runF (orgStateMeta' st) st - return $ Pandoc meta $ filter (/= Null) (B.toList $ runF blocks' st) + let meta = orgStateMeta st + let removeUnwantedBlocks = dropCommentTrees . filter (/= Null) + return $ Pandoc meta $ removeUnwantedBlocks (B.toList $ blocks') + +-- | Drop COMMENT headers and the document tree below those headers. +dropCommentTrees :: [Block] -> [Block] +dropCommentTrees [] = [] +dropCommentTrees blks@(b:bs) = + maybe blks (flip dropUntilHeaderAboveLevel bs) $ commentHeaderLevel b + +-- | Return the level of a header starting a comment tree and Nothing +-- otherwise. +commentHeaderLevel :: Block -> Maybe Int +commentHeaderLevel blk = + case blk of + (Header level _ ((Str "COMMENT"):_)) -> Just level + _ -> Nothing + +-- | Drop blocks until a header on or above the given level is seen +dropUntilHeaderAboveLevel :: Int -> [Block] -> [Block] +dropUntilHeaderAboveLevel n = dropWhile (not . isHeaderLevelLowerEq n) + +isHeaderLevelLowerEq :: Int -> Block -> Bool +isHeaderLevelLowerEq n blk = + case blk of + (Header level _ _) -> n >= level + _ -> False -- -- Parser State for Org -- -type OrgNoteRecord = (String, F Blocks) +type OrgNoteRecord = (String, Blocks) type OrgNoteTable = [OrgNoteRecord] type OrgBlockAttributes = M.Map String String @@ -94,10 +136,12 @@ data OrgParserState = OrgParserState , orgStateLastStrPos :: Maybe SourcePos , orgStateLinkFormatters :: OrgLinkFormatters , orgStateMeta :: Meta - , orgStateMeta' :: F Meta , orgStateNotes' :: OrgNoteTable } +instance Default OrgParserLocal where + def = OrgParserLocal NoQuote def + instance HasReaderOptions OrgParserState where extractReaderOptions = orgStateOptions @@ -111,6 +155,10 @@ instance HasLastStrPosition OrgParserState where getLastStrPos = orgStateLastStrPos setLastStrPos pos st = st{ orgStateLastStrPos = Just pos } +instance HasQuoteContext st (Reader OrgParserLocal) where + getQuoteContext = asks orgLocalQuoteContext + withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q}) + instance Default OrgParserState where def = defaultOrgParserState @@ -126,13 +174,13 @@ defaultOrgParserState = OrgParserState , orgStateLastStrPos = Nothing , orgStateLinkFormatters = M.empty , orgStateMeta = nullMeta - , orgStateMeta' = return nullMeta , orgStateNotes' = [] } recordAnchorId :: String -> OrgParser () recordAnchorId i = updateState $ \s -> - s{ orgStateAnchorIds = i : (orgStateAnchorIds s) } + let as = orgStateAnchorIds s in + s{ orgStateAnchorIds = i : as } addBlockAttribute :: String -> String -> OrgParser () addBlockAttribute key val = updateState $ \s -> @@ -211,30 +259,6 @@ parseFromString parser str' = do -- Adaptions and specializations of parsing utilities -- -newtype F a = F { unF :: Reader OrgParserState a - } deriving (Monad, Applicative, Functor) - -runF :: F a -> OrgParserState -> a -runF = runReader . unF - -askF :: F OrgParserState -askF = F ask - -asksF :: (OrgParserState -> a) -> F a -asksF f = F $ asks f - -instance Monoid a => Monoid (F a) where - mempty = return mempty - mappend = liftM2 mappend - mconcat = fmap mconcat . sequence - -trimInlinesF :: F Inlines -> F Inlines -trimInlinesF = liftM trimInlines - -returnF :: a -> OrgParser (F a) -returnF = return . return - - -- | Like @Text.Parsec.Char.newline@, but causes additional state changes. newline :: OrgParser Char newline = @@ -242,14 +266,21 @@ newline = <* updateLastPreCharPos <* updateLastForbiddenCharPos +-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes. +blanklines :: OrgParser [Char] +blanklines = + P.blanklines + <* updateLastPreCharPos + <* updateLastForbiddenCharPos + -- -- parsing blocks -- -parseBlocks :: OrgParser (F Blocks) +parseBlocks :: OrgParser Blocks parseBlocks = mconcat <$> manyTill block eof -block :: OrgParser (F Blocks) +block :: OrgParser Blocks block = choice [ mempty <$ blanklines , optionalAttributes $ choice [ orgBlock @@ -260,14 +291,14 @@ block = choice [ mempty <$ blanklines , drawer , specialLine , header - , return <$> hline + , hline , list , latexFragment , noteBlock , paraOrPlain ] <?> "block" -optionalAttributes :: OrgParser (F Blocks) -> OrgParser (F Blocks) +optionalAttributes :: OrgParser Blocks -> OrgParser Blocks optionalAttributes parser = try $ resetBlockAttributes *> parseBlockAttributes *> parser @@ -287,7 +318,7 @@ parseAndAddAttribute key value = do let key' = map toLower key () <$ addBlockAttribute key' value -lookupInlinesAttr :: String -> OrgParser (Maybe (F Inlines)) +lookupInlinesAttr :: String -> OrgParser (Maybe Inlines) lookupInlinesAttr attr = try $ do val <- lookupBlockAttribute attr maybe (return Nothing) @@ -301,20 +332,20 @@ lookupInlinesAttr attr = try $ do type BlockProperties = (Int, String) -- (Indentation, Block-Type) -orgBlock :: OrgParser (F Blocks) +orgBlock :: OrgParser Blocks orgBlock = try $ do blockProp@(_, blkType) <- blockHeaderStart ($ blockProp) $ case blkType of "comment" -> withRaw' (const mempty) - "html" -> withRaw' (return . (B.rawBlock blkType)) - "latex" -> withRaw' (return . (B.rawBlock blkType)) - "ascii" -> withRaw' (return . (B.rawBlock blkType)) - "example" -> withRaw' (return . exampleCode) - "quote" -> withParsed (fmap B.blockQuote) + "html" -> withRaw' (B.rawBlock blkType) + "latex" -> withRaw' (B.rawBlock blkType) + "ascii" -> withRaw' (B.rawBlock blkType) + "example" -> withRaw' exampleCode + "quote" -> withParsed B.blockQuote "verse" -> verseBlock "src" -> codeBlock - _ -> withParsed (fmap $ divWithClass blkType) + _ -> withParsed (divWithClass blkType) blockHeaderStart :: OrgParser (Int, String) blockHeaderStart = try $ (,) <$> indent <*> blockType @@ -322,10 +353,10 @@ blockHeaderStart = try $ (,) <$> indent <*> blockType indent = length <$> many spaceChar blockType = map toLower <$> (stringAnyCase "#+begin_" *> orgArgWord) -withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) +withRaw' :: (String -> Blocks) -> BlockProperties -> OrgParser Blocks withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp)) -withParsed :: (F Blocks -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) +withParsed :: (Blocks -> Blocks) -> BlockProperties -> OrgParser Blocks withParsed f blockProp = (ignHeaders *> (f <$> parsedBlockContent blockProp)) ignHeaders :: OrgParser () @@ -334,11 +365,11 @@ ignHeaders = (() <$ newline) <|> (() <$ anyLine) divWithClass :: String -> Blocks -> Blocks divWithClass cls = B.divWith ("", [cls], []) -verseBlock :: BlockProperties -> OrgParser (F Blocks) +verseBlock :: BlockProperties -> OrgParser Blocks verseBlock blkProp = try $ do ignHeaders content <- rawBlockContent blkProp - fmap B.para . mconcat . intersperse (pure B.linebreak) + B.para . mconcat . intersperse B.linebreak <$> mapM (parseFromString parseInlines) (lines content) exportsCode :: [(String, String)] -> Bool @@ -355,7 +386,7 @@ followingResultsBlock = *> blankline *> (unlines <$> many1 exampleLine)) -codeBlock :: BlockProperties -> OrgParser (F Blocks) +codeBlock :: BlockProperties -> OrgParser Blocks codeBlock blkProp = do skipSpaces (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) @@ -365,17 +396,15 @@ codeBlock blkProp = do let includeCode = exportsCode kv let includeResults = exportsResults kv let codeBlck = B.codeBlockWith ( id', classes, kv ) content - labelledBlck <- maybe (pure codeBlck) - (labelDiv codeBlck) + labelledBlck <- maybe codeBlck (labelDiv codeBlck) <$> lookupInlinesAttr "caption" - let resultBlck = pure $ maybe mempty (exampleCode) resultsContent + let resultBlck = maybe mempty exampleCode resultsContent return $ (if includeCode then labelledBlck else mempty) <> (if includeResults then resultBlck else mempty) where labelDiv blk value = - B.divWith nullAttr <$> (mappend <$> labelledBlock value - <*> pure blk) - labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) + B.divWith nullAttr (labelledBlock value <> blk) + labelledBlock = B.plain . B.spanWith ("", ["label"], []) rawBlockContent :: BlockProperties -> OrgParser String rawBlockContent (indent, blockType) = try $ @@ -384,7 +413,7 @@ rawBlockContent (indent, blockType) = try $ indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine) blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType) -parsedBlockContent :: BlockProperties -> OrgParser (F Blocks) +parsedBlockContent :: BlockProperties -> OrgParser Blocks parsedBlockContent blkProps = try $ do raw <- rawBlockContent blkProps parseFromString parseBlocks (raw ++ "\n") @@ -475,9 +504,9 @@ commaEscaped (',':cs@('*':_)) = cs commaEscaped (',':cs@('#':'+':_)) = cs commaEscaped cs = cs -example :: OrgParser (F Blocks) +example :: OrgParser Blocks example = try $ do - return . return . exampleCode =<< unlines <$> many1 exampleLine + return . exampleCode =<< unlines <$> many1 exampleLine exampleCode :: String -> Blocks exampleCode = B.codeBlockWith ("", ["example"], []) @@ -486,7 +515,7 @@ exampleLine :: OrgParser String exampleLine = try $ skipSpaces *> string ": " *> anyLine -- Drawers for properties or a logbook -drawer :: OrgParser (F Blocks) +drawer :: OrgParser Blocks drawer = try $ do drawerStart manyTill drawerLine (try drawerEnd) @@ -512,14 +541,12 @@ drawerEnd = try $ -- -- Figures (Image on a line by itself, preceded by name and/or caption) -figure :: OrgParser (F Blocks) +figure :: OrgParser Blocks figure = try $ do (cap, nam) <- nameAndCaption src <- skipSpaces *> selfTarget <* skipSpaces <* P.newline guard (isImageFilename src) - return $ do - cap' <- cap - return $ B.para $ B.image src nam cap' + return $ B.para $ B.image src nam cap where nameAndCaption = do @@ -535,8 +562,8 @@ figure = try $ do -- -- Comments, Options and Metadata -specialLine :: OrgParser (F Blocks) -specialLine = fmap return . try $ metaLine <|> commentLine +specialLine :: OrgParser Blocks +specialLine = try $ metaLine <|> commentLine metaLine :: OrgParser Blocks metaLine = try $ mempty @@ -556,14 +583,14 @@ commentLineStart = try $ mappend <$> many spaceChar <*> string "# " declarationLine :: OrgParser () declarationLine = try $ do key <- metaKey - inlinesF <- metaInlines + inlines <- metaInlines updateState $ \st -> - let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta - in st { orgStateMeta' = orgStateMeta' st <> meta' } + let meta' = B.setMeta key inlines nullMeta + in st { orgStateMeta = orgStateMeta st <> meta' } return () -metaInlines :: OrgParser (F MetaValue) -metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline +metaInlines :: OrgParser MetaValue +metaInlines = (MetaInlines . B.toList) <$> inlinesTillNewline metaKey :: OrgParser String metaKey = map toLower <$> many1 (noneOf ": \n\r") @@ -604,11 +631,11 @@ parseFormat = try $ do -- -- | Headers -header :: OrgParser (F Blocks) +header :: OrgParser Blocks header = try $ do level <- headerStart title <- inlinesTillNewline - return $ B.header level <$> title + return $ B.header level title headerStart :: OrgParser Int headerStart = try $ @@ -632,7 +659,7 @@ hline = try $ do -- Tables -- -data OrgTableRow = OrgContentRow (F [Blocks]) +data OrgTableRow = OrgContentRow [Blocks] | OrgAlignRow [Alignment] | OrgHlineRow @@ -643,13 +670,13 @@ data OrgTable = OrgTable , orgTableRows :: [[Blocks]] } -table :: OrgParser (F Blocks) +table :: OrgParser Blocks table = try $ do lookAhead tableStart do rows <- tableRows - cptn <- fromMaybe (pure "") <$> lookupInlinesAttr "caption" - return $ (<$> cptn) . orgToPandocTable . normalizeTable =<< rowsToTable rows + (cptn :: Inlines) <- fromMaybe "" <$> lookupInlinesAttr "caption" + return $ ($ cptn) . orgToPandocTable . normalizeTable . rowsToTable $ rows orgToPandocTable :: OrgTable -> Inlines @@ -665,11 +692,11 @@ tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow) tableContentRow :: OrgParser OrgTableRow tableContentRow = try $ - OrgContentRow . sequence <$> (tableStart *> manyTill tableContentCell newline) + OrgContentRow <$> (tableStart *> manyTill tableContentCell newline) -tableContentCell :: OrgParser (F Blocks) +tableContentCell :: OrgParser Blocks tableContentCell = try $ - fmap B.plain . trimInlinesF . mconcat <$> many1Till inline endOfCell + B.plain . trimInlines . mconcat <$> many1Till inline endOfCell endOfCell :: OrgParser Char endOfCell = try $ char '|' <|> lookAhead newline @@ -701,8 +728,8 @@ tableHline = try $ OrgHlineRow <$ (tableStart *> char '-' *> anyLine) rowsToTable :: [OrgTableRow] - -> F OrgTable -rowsToTable = foldM (flip rowToContent) zeroTable + -> OrgTable +rowsToTable = foldl' (flip rowToContent) zeroTable where zeroTable = OrgTable 0 mempty mempty mempty normalizeTable :: OrgTable @@ -721,45 +748,43 @@ normalizeTable (OrgTable cols aligns heads lns) = -- line as a header. All other horizontal lines are discarded. rowToContent :: OrgTableRow -> OrgTable - -> F OrgTable + -> OrgTable rowToContent OrgHlineRow t = maybeBodyToHeader t -rowToContent (OrgAlignRow as) t = setLongestRow as =<< setAligns as t -rowToContent (OrgContentRow rf) t = do - rs <- rf - setLongestRow rs =<< appendToBody rs t +rowToContent (OrgAlignRow as) t = setLongestRow as . setAligns as $ t +rowToContent (OrgContentRow rf) t = setLongestRow rf . appendToBody rf $ t setLongestRow :: [a] -> OrgTable - -> F OrgTable + -> OrgTable setLongestRow rs t = - return t{ orgTableColumns = max (length rs) (orgTableColumns t) } + t{ orgTableColumns = max (length rs) (orgTableColumns t) } maybeBodyToHeader :: OrgTable - -> F OrgTable + -> OrgTable maybeBodyToHeader t = case t of OrgTable{ orgTableHeader = [], orgTableRows = b:[] } -> - return t{ orgTableHeader = b , orgTableRows = [] } - _ -> return t + t{ orgTableHeader = b , orgTableRows = [] } + _ -> t appendToBody :: [Blocks] -> OrgTable - -> F OrgTable -appendToBody r t = return t{ orgTableRows = orgTableRows t ++ [r] } + -> OrgTable +appendToBody r t = t{ orgTableRows = orgTableRows t ++ [r] } setAligns :: [Alignment] -> OrgTable - -> F OrgTable -setAligns aligns t = return $ t{ orgTableAlignments = aligns } + -> OrgTable +setAligns aligns t = t{ orgTableAlignments = aligns } -- -- LaTeX fragments -- -latexFragment :: OrgParser (F Blocks) +latexFragment :: OrgParser Blocks latexFragment = try $ do envName <- latexEnvStart content <- mconcat <$> manyTill anyLineNewline (latexEnd envName) - return . return $ B.rawBlock "latex" (content `inLatexEnv` envName) + return $ B.rawBlock "latex" (content `inLatexEnv` envName) where c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n" , c @@ -789,7 +814,7 @@ latexEnvName = try $ do -- -- Footnote defintions -- -noteBlock :: OrgParser (F Blocks) +noteBlock :: OrgParser Blocks noteBlock = try $ do ref <- noteMarker <* skipSpaces content <- mconcat <$> blocksTillHeaderOrNote @@ -801,35 +826,37 @@ noteBlock = try $ do <|> () <$ lookAhead headerStart) -- Paragraphs or Plain text -paraOrPlain :: OrgParser (F Blocks) +paraOrPlain :: OrgParser Blocks paraOrPlain = try $ do ils <- parseInlines nl <- option False (newline >> return True) try (guard nl >> notFollowedBy (orderedListStart <|> bulletListStart) >> - return (B.para <$> ils)) - <|> (return (B.plain <$> ils)) + (return $ B.para ils)) + <|> (return $ B.plain ils) -inlinesTillNewline :: OrgParser (F Inlines) -inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline +inlinesTillNewline :: OrgParser Inlines +inlinesTillNewline = trimInlines . mconcat <$> manyTill inline newline -- -- list blocks -- -list :: OrgParser (F Blocks) +list :: OrgParser Blocks list = choice [ definitionList, bulletList, orderedList ] <?> "list" -definitionList :: OrgParser (F Blocks) -definitionList = fmap B.definitionList . fmap compactify'DL . sequence - <$> many1 (definitionListItem bulletListStart) +definitionList :: OrgParser Blocks +definitionList = try $ do n <- lookAhead (bulletListStart' Nothing) + B.definitionList . compactify'DL + <$> many1 (definitionListItem $ bulletListStart' (Just n)) -bulletList :: OrgParser (F Blocks) -bulletList = fmap B.bulletList . fmap compactify' . sequence - <$> many1 (listItem bulletListStart) +bulletList :: OrgParser Blocks +bulletList = try $ do n <- lookAhead (bulletListStart' Nothing) + B.bulletList . compactify' + <$> many1 (listItem (bulletListStart' $ Just n)) -orderedList :: OrgParser (F Blocks) -orderedList = fmap B.orderedList . fmap compactify' . sequence +orderedList :: OrgParser Blocks +orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart) genericListStart :: OrgParser String @@ -838,10 +865,27 @@ genericListStart listMarker = try $ (+) <$> (length <$> many spaceChar) <*> (length <$> listMarker <* many1 spaceChar) --- parses bullet list start and returns its length (excl. following whitespace) +-- parses bullet list marker. maybe we know the indent level bulletListStart :: OrgParser Int -bulletListStart = genericListStart bulletListMarker - where bulletListMarker = pure <$> oneOf "*-+" +bulletListStart = bulletListStart' Nothing + +bulletListStart' :: Maybe Int -> OrgParser Int +-- returns length of bulletList prefix, inclusive of marker +bulletListStart' Nothing = do ind <- length <$> many spaceChar + when (ind == 0) $ notFollowedBy (char '*') + oneOf bullets + many1 spaceChar + return (ind + 1) + -- Unindented lists are legal, but they can't use '*' bullets + -- We return n to maintain compatibility with the generic listItem +bulletListStart' (Just n) = do count (n-1) spaceChar + when (n == 1) $ notFollowedBy (char '*') + oneOf bullets + many1 spaceChar + return n + +bullets :: String +bullets = "*+-" orderedListStart :: OrgParser Int orderedListStart = genericListStart orderedListMarker @@ -849,21 +893,21 @@ orderedListStart = genericListStart orderedListMarker where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") definitionListItem :: OrgParser Int - -> OrgParser (F (Inlines, [Blocks])) + -> OrgParser (Inlines, [Blocks]) definitionListItem parseMarkerGetLength = try $ do markerLength <- parseMarkerGetLength term <- manyTill (noneOf "\n\r") (try $ string "::") line1 <- anyLineNewline blank <- option "" ("\n" <$ blankline) cont <- concat <$> many (listContinuation markerLength) - term' <- parseFromString inline term + term' <- parseFromString parseInlines term contents' <- parseFromString parseBlocks $ line1 ++ blank ++ cont - return $ (,) <$> term' <*> fmap (:[]) contents' + return (term', [contents']) -- parse raw text for one list item, excluding start marker and continuations listItem :: OrgParser Int - -> OrgParser (F Blocks) + -> OrgParser Blocks listItem start = try $ do markerLength <- try start firstLine <- anyLineNewline @@ -889,7 +933,7 @@ anyLineNewline = (++ "\n") <$> anyLine -- inline -- -inline :: OrgParser (F Inlines) +inline :: OrgParser Inlines inline = choice [ whitespace , linebreak @@ -911,35 +955,36 @@ inline = , subscript , superscript , inlineLaTeX + , smart , symbol ] <* (guard =<< newlinesCountWithinLimits) <?> "inline" -parseInlines :: OrgParser (F Inlines) -parseInlines = trimInlinesF . mconcat <$> many1 inline +parseInlines :: OrgParser Inlines +parseInlines = trimInlines . mconcat <$> many1 inline -- treat these as potentially non-text when parsing inline: specialChars :: [Char] -specialChars = "\"$'()*+-./:<=>[\\]^_{|}~" +specialChars = "\"$'()*+-,./:<=>[\\]^_{|}~" -whitespace :: OrgParser (F Inlines) -whitespace = pure B.space <$ skipMany1 spaceChar +whitespace :: OrgParser Inlines +whitespace = B.space <$ skipMany1 spaceChar <* updateLastPreCharPos <* updateLastForbiddenCharPos <?> "whitespace" -linebreak :: OrgParser (F Inlines) -linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline +linebreak :: OrgParser Inlines +linebreak = try $ B.linebreak <$ string "\\\\" <* skipSpaces <* newline -str :: OrgParser (F Inlines) -str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") +str :: OrgParser Inlines +str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") <* updateLastStrPos -- | An endline character that can be treated as a space, not a structural -- break. This should reflect the values of the Emacs variable -- @org-element-pagaraph-separate@. -endline :: OrgParser (F Inlines) +endline :: OrgParser Inlines endline = try $ do newline notFollowedBy blankline @@ -957,77 +1002,72 @@ endline = try $ do decEmphasisNewlinesCount guard =<< newlinesCountWithinLimits updateLastPreCharPos - return . return $ B.space + return $ B.space -cite :: OrgParser (F Inlines) +cite :: OrgParser Inlines cite = try $ do guardEnabled Ext_citations (cs, raw) <- withRaw normalCite - return $ (flip B.cite (B.text raw)) <$> cs + return $ flip B.cite (B.text raw) cs -normalCite :: OrgParser (F [Citation]) +normalCite :: OrgParser [Citation] normalCite = try $ char '[' *> skipSpaces *> citeList <* skipSpaces <* char ']' -citeList :: OrgParser (F [Citation]) -citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces) +citeList :: OrgParser [Citation] +citeList = sepBy1 citation (try $ char ';' *> skipSpaces) -citation :: OrgParser (F Citation) +citation :: OrgParser Citation citation = try $ do pref <- prefix (suppress_author, key) <- citeKey suff <- suffix - return $ do - x <- pref - y <- suff - return $ Citation{ citationId = key - , citationPrefix = B.toList x - , citationSuffix = B.toList y - , citationMode = if suppress_author - then SuppressAuthor - else NormalCitation - , citationNoteNum = 0 - , citationHash = 0 - } + return $ Citation{ citationId = key + , citationPrefix = B.toList pref + , citationSuffix = B.toList suff + , citationMode = if suppress_author + then SuppressAuthor + else NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } where - prefix = trimInlinesF . mconcat <$> + prefix = trimInlines . mconcat <$> manyTill inline (char ']' <|> (']' <$ lookAhead citeKey)) suffix = try $ do hasSpace <- option False (notFollowedBy nonspaceChar >> return True) skipSpaces - rest <- trimInlinesF . mconcat <$> + rest <- trimInlines . mconcat <$> many (notFollowedBy (oneOf ";]") *> inline) - return $ if hasSpace - then (B.space <>) <$> rest - else rest + return $ + if hasSpace + then B.space <> rest + else rest -footnote :: OrgParser (F Inlines) +footnote :: OrgParser Inlines footnote = try $ inlineNote <|> referencedNote -inlineNote :: OrgParser (F Inlines) +inlineNote :: OrgParser Inlines inlineNote = try $ do string "[fn:" ref <- many alphaNum char ':' - note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']') + note <- B.para . trimInlines . mconcat <$> many1Till inline (char ']') when (not $ null ref) $ addToNotesTable ("fn:" ++ ref, note) - return $ B.note <$> note + return $ B.note note -referencedNote :: OrgParser (F Inlines) +referencedNote :: OrgParser Inlines referencedNote = try $ do ref <- noteMarker - return $ do - notes <- asksF orgStateNotes' + notes <- asks (orgStateNotes' . finalState) + return $ case lookup ref notes of - Nothing -> return $ B.str $ "[" ++ ref ++ "]" - Just contents -> do - st <- askF - let contents' = runF contents st{ orgStateNotes' = [] } - return $ B.note contents' + Just contents -> B.note contents + Nothing -> B.str $ "[" ++ ref ++ "]" noteMarker :: OrgParser String noteMarker = try $ do @@ -1037,37 +1077,37 @@ noteMarker = try $ do <*> many1Till (noneOf "\n\r\t ") (char ']') ] -linkOrImage :: OrgParser (F Inlines) +linkOrImage :: OrgParser Inlines linkOrImage = explicitOrImageLink <|> selflinkOrImage <|> angleLink <|> plainLink <?> "link or image" -explicitOrImageLink :: OrgParser (F Inlines) +explicitOrImageLink :: OrgParser Inlines explicitOrImageLink = try $ do char '[' - srcF <- applyCustomLinkFormat =<< linkTarget + src <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget title <- enclosedRaw (char '[') (char ']') title' <- parseFromString (mconcat <$> many inline) title char ']' - return $ do - src <- srcF - if isImageFilename src && isImageFilename title - then pure $ B.link src "" $ B.image title mempty mempty - else linkToInlinesF src =<< title' + alt <- internalLink src title' + return $ + (if isImageFilename title + then B.link src "" $ B.image title mempty mempty + else fromMaybe alt (linkToInlines src title')) -selflinkOrImage :: OrgParser (F Inlines) +selflinkOrImage :: OrgParser Inlines selflinkOrImage = try $ do src <- char '[' *> linkTarget <* char ']' - return $ linkToInlinesF src (B.str src) + return $ fromMaybe "" (linkToInlines src (B.str src)) -plainLink :: OrgParser (F Inlines) +plainLink :: OrgParser Inlines plainLink = try $ do (orig, src) <- uri - returnF $ B.link src "" (B.str orig) + return $ B.link src "" (B.str orig) -angleLink :: OrgParser (F Inlines) +angleLink :: OrgParser Inlines angleLink = try $ do char '<' link <- plainLink @@ -1080,34 +1120,53 @@ selfTarget = try $ char '[' *> linkTarget <* char ']' linkTarget :: OrgParser String linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]") -applyCustomLinkFormat :: String -> OrgParser (F String) +possiblyEmptyLinkTarget :: OrgParser String +possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]") + +applyCustomLinkFormat :: String -> OrgParser String applyCustomLinkFormat link = do let (linkType, rest) = break (== ':') link - return $ do - formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters - return $ maybe link ($ drop 1 rest) formatter - - -linkToInlinesF :: String -> Inlines -> F Inlines -linkToInlinesF s@('#':_) = pure . B.link s "" -linkToInlinesF s - | isImageFilename s = const . pure $ B.image s "" "" - | isUri s = pure . B.link s "" - | isRelativeUrl s = pure . B.link s "" -linkToInlinesF s = \title -> do - anchorB <- (s `elem`) <$> asksF orgStateAnchorIds - if anchorB - then pure $ B.link ('#':s) "" title - else pure $ B.emph title - -isRelativeUrl :: String -> Bool -isRelativeUrl s = (':' `notElem` s) && ("./" `isPrefixOf` s) + fmts <- asks finalState + return $ + case M.lookup linkType (orgStateLinkFormatters fmts) of + Just v -> (v (drop 1 rest)) + Nothing -> link + +-- TODO: might be a lot smarter/cleaner to use parsec and ADTs for this kind +-- of parsing. +linkToInlines :: String -> Inlines -> Maybe Inlines +linkToInlines = \s -> + case s of + _ | null s -> Just . B.link "" "" + _ | isAnchor s -> Just . B.link s "" + _ | isImageFilename s -> const . Just $ B.image s "" "" + _ | isFileLink s -> Just . B.link (dropLinkType s) "" + _ | isUri s -> Just . B.link s "" + _ | isAbsoluteFilePath s -> Just . B.link ("file://" ++ s) "" + _ | isRelativeFilePath s -> Just . B.link s "" + _ -> const Nothing + +isAnchor :: String -> Bool +isAnchor s = "#" `isPrefixOf` s + +isFileLink :: String -> Bool +isFileLink s = ("file:" `isPrefixOf` s) && not ("file://" `isPrefixOf` s) + +dropLinkType :: String -> String +dropLinkType = tail . snd . break (== ':') + +isRelativeFilePath :: String -> Bool +isRelativeFilePath s = (("./" `isPrefixOf` s) || ("../" `isPrefixOf` s)) && + (':' `notElem` s) isUri :: String -> Bool isUri s = let (scheme, path) = break (== ':') s - in all (\c -> isAlphaNum c || c `elem` ".-") scheme + in all (\c -> isAlphaNum c || c `elem` (".-" :: String)) scheme && not (null path) +isAbsoluteFilePath :: String -> Bool +isAbsoluteFilePath = ('/' ==) . head + isImageFilename :: String -> Bool isImageFilename filename = any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && @@ -1117,17 +1176,25 @@ isImageFilename filename = imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] protocols = [ "file", "http", "https" ] +internalLink :: String -> Inlines -> OrgParser Inlines +internalLink link title = do + anchorB <- asks finalState + return $ + if link `elem` (orgStateAnchorIds anchorB) + then B.link ('#':link) "" title + else B.emph title + -- | Parse an anchor like @<<anchor-id>>@ and return an empty span with -- @anchor-id@ set as id. Legal anchors in org-mode are defined through -- @org-target-regexp@, which is fairly liberal. Since no link is created if -- @anchor-id@ contains spaces, we are more restrictive in what is accepted as -- an anchor. -anchor :: OrgParser (F Inlines) +anchor :: OrgParser Inlines anchor = try $ do anchorId <- parseAnchor recordAnchorId anchorId - returnF $ B.spanWith (solidify anchorId, [], []) mempty + return $ B.spanWith (solidify anchorId, [], []) mempty where parseAnchor = string "<<" *> many1 (noneOf "\t\n\r<>\"' ") @@ -1141,11 +1208,11 @@ solidify :: String -> String solidify = map replaceSpecialChar where replaceSpecialChar c | isAlphaNum c = c - | c `elem` "_.-:" = c + | c `elem` ("_.-:" :: String) = c | otherwise = '-' -- | Parses an inline code block and marks it as an babel block. -inlineCodeBlock :: OrgParser (F Inlines) +inlineCodeBlock :: OrgParser Inlines inlineCodeBlock = try $ do string "src_" lang <- many1 orgArgWordChar @@ -1153,7 +1220,7 @@ inlineCodeBlock = try $ do inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r") let attrClasses = [translateLang lang, rundocBlockClass] let attrKeyVal = map toRundocAttrib (("language", lang) : opts) - returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode + return $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode enclosedByPair :: Char -- ^ opening char -> Char -- ^ closing char @@ -1161,50 +1228,51 @@ enclosedByPair :: Char -- ^ opening char -> OrgParser [a] enclosedByPair s e p = char s *> many1Till p (char e) -emph :: OrgParser (F Inlines) -emph = fmap B.emph <$> emphasisBetween '/' +emph :: OrgParser Inlines +emph = B.emph <$> emphasisBetween '/' -strong :: OrgParser (F Inlines) -strong = fmap B.strong <$> emphasisBetween '*' +strong :: OrgParser Inlines +strong = B.strong <$> emphasisBetween '*' -strikeout :: OrgParser (F Inlines) -strikeout = fmap B.strikeout <$> emphasisBetween '+' +strikeout :: OrgParser Inlines +strikeout = B.strikeout <$> emphasisBetween '+' -- There is no underline, so we use strong instead. -underline :: OrgParser (F Inlines) -underline = fmap B.strong <$> emphasisBetween '_' +underline :: OrgParser Inlines +underline = B.strong <$> emphasisBetween '_' -verbatim :: OrgParser (F Inlines) -verbatim = return . B.code <$> verbatimBetween '=' +verbatim :: OrgParser Inlines +verbatim = B.code <$> verbatimBetween '=' -code :: OrgParser (F Inlines) -code = return . B.code <$> verbatimBetween '~' +code :: OrgParser Inlines +code = B.code <$> verbatimBetween '~' -subscript :: OrgParser (F Inlines) -subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr) +subscript :: OrgParser Inlines +subscript = B.subscript <$> try (char '_' *> subOrSuperExpr) -superscript :: OrgParser (F Inlines) -superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr) +superscript :: OrgParser Inlines +superscript = B.superscript <$> try (char '^' *> subOrSuperExpr) -math :: OrgParser (F Inlines) -math = return . B.math <$> choice [ math1CharBetween '$' +math :: OrgParser Inlines +math = B.math <$> choice [ math1CharBetween '$' , mathStringBetween '$' , rawMathBetween "\\(" "\\)" ] -displayMath :: OrgParser (F Inlines) -displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" - , rawMathBetween "$$" "$$" - ] -symbol :: OrgParser (F Inlines) -symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions) - where updatePositions c - | c `elem` emphasisPreChars = c <$ updateLastPreCharPos - | c `elem` emphasisForbiddenBorderChars = c <$ updateLastForbiddenCharPos - | otherwise = return c +displayMath :: OrgParser Inlines +displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" + , rawMathBetween "$$" "$$" + ] + +symbol :: OrgParser Inlines +symbol = B.str . (: "") <$> (oneOf specialChars >>= updatePositions) + where updatePositions c = do + when (c `elem` emphasisPreChars) updateLastPreCharPos + when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos + return c emphasisBetween :: Char - -> OrgParser (F Inlines) + -> OrgParser Inlines emphasisBetween c = try $ do startEmphasisNewlinesCounting emphasisAllowedNewlines res <- enclosedInlines (emphasisStart c) (emphasisEnd c) @@ -1281,9 +1349,9 @@ mathEnd c = try $ do enclosedInlines :: OrgParser a -> OrgParser b - -> OrgParser (F Inlines) + -> OrgParser Inlines enclosedInlines start end = try $ - trimInlinesF . mconcat <$> enclosed start end inline + trimInlines . mconcat <$> enclosed start end inline enclosedRaw :: OrgParser a -> OrgParser b @@ -1362,7 +1430,7 @@ notAfterForbiddenBorderChar = do return $ lastFBCPos /= Just pos -- | Read a sub- or superscript expression -subOrSuperExpr :: OrgParser (F Inlines) +subOrSuperExpr :: OrgParser Inlines subOrSuperExpr = try $ choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r") , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r") @@ -1377,10 +1445,11 @@ simpleSubOrSuperString = try $ <*> many1 alphaNum ] -inlineLaTeX :: OrgParser (F Inlines) +inlineLaTeX :: OrgParser Inlines inlineLaTeX = try $ do cmd <- inlineLaTeXCommand - maybe mzero returnF $ parseAsMath cmd `mplus` parseAsInlineLaTeX cmd + maybe mzero return $ + parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` parseAsInlineLaTeX cmd where parseAsMath :: String -> Maybe Inlines parseAsMath cs = B.fromList <$> texMathToPandoc cs @@ -1388,6 +1457,11 @@ inlineLaTeX = try $ do parseAsInlineLaTeX :: String -> Maybe Inlines parseAsInlineLaTeX cs = maybeRight $ runParser inlineCommand state "" cs + parseAsMathMLSym :: String -> Maybe Inlines + parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs) + -- dropWhileEnd would be nice here, but it's not available before base 4.5 + where clean = reverse . dropWhile (`elem` ("{}" :: String)) . reverse . drop 1 + state :: ParserState state = def{ stateOptions = def{ readerParseRaw = True }} @@ -1406,3 +1480,31 @@ inlineLaTeXCommand = try $ do count len anyChar return cs _ -> mzero + +smart :: OrgParser Inlines +smart = do + getOption readerSmart >>= guard + doubleQuoted <|> singleQuoted <|> + choice [orgApostrophe, dash, ellipses] + where orgApostrophe = + (char '\'' <|> char '\8217') <* updateLastPreCharPos + <* updateLastForbiddenCharPos + *> return (B.str "\x2019") + +singleQuoted :: OrgParser Inlines +singleQuoted = try $ do + singleQuoteStart + withQuoteContext InSingleQuote $ + B.singleQuoted . trimInlines . mconcat <$> + many1Till inline singleQuoteEnd + +-- doubleQuoted will handle regular double-quoted sections, as well +-- as dialogues with an open double-quote without a close double-quote +-- in the same paragraph. +doubleQuoted :: OrgParser Inlines +doubleQuoted = try $ do + doubleQuoteStart + contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) + (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return + (B.doubleQuoted . trimInlines $ contents)) + <|> (return $ (B.str "\8220") <> contents) diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index e5eccb116..a8112bc81 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -1,4 +1,5 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE FlexibleContexts #-} {- Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> @@ -29,32 +30,38 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion from reStructuredText to 'Pandoc' document. -} module Text.Pandoc.Readers.RST ( - readRST + readRST, + readRSTWithWarnings ) where import Text.Pandoc.Definition import Text.Pandoc.Builder (setMeta, fromList) import Text.Pandoc.Shared import Text.Pandoc.Parsing import Text.Pandoc.Options -import Control.Monad ( when, liftM, guard, mzero, mplus ) +import Control.Monad ( when, liftM, guard, mzero ) import Data.List ( findIndex, intersperse, intercalate, - transpose, sort, deleteFirstsBy, isSuffixOf ) + transpose, sort, deleteFirstsBy, isSuffixOf , nub, union) import Data.Maybe (fromMaybe) import qualified Data.Map as M import Text.Printf ( printf ) -import Control.Applicative ((<$>), (<$), (<*), (*>), (<*>)) +import Control.Applicative ((<$>), (<$), (<*), (*>), (<*>), pure) import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) import qualified Text.Pandoc.Builder as B import Data.Monoid (mconcat, mempty) import Data.Sequence (viewr, ViewR(..)) -import Data.Char (toLower, isHexDigit) +import Data.Char (toLower, isHexDigit, isSpace) + +import Text.Pandoc.Error -- | Parse reStructuredText string and return Pandoc document. readRST :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readRST opts s = (readWith parseRST) def{ stateOptions = opts } (s ++ "\n\n") +readRSTWithWarnings :: ReaderOptions -> String -> Either PandocError (Pandoc, [String]) +readRSTWithWarnings opts s = (readWith (returnWarnings parseRST)) def{ stateOptions = opts } (s ++ "\n\n") + type RSTParser = Parser [Char] ParserState -- @@ -335,6 +342,13 @@ indentedBlock = try $ do optional blanklines return $ unlines lns +quotedBlock :: Parser [Char] st [Char] +quotedBlock = try $ do + quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" + lns <- many1 $ lookAhead (char quote) >> anyLine + optional blanklines + return $ unlines lns + codeBlockStart :: Parser [Char] st Char codeBlockStart = string "::" >> blankline >> blankline @@ -342,7 +356,8 @@ codeBlock :: Parser [Char] st Blocks codeBlock = try $ codeBlockStart >> codeBlockBody codeBlockBody :: Parser [Char] st Blocks -codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$> indentedBlock +codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$> + (indentedBlock <|> quotedBlock) lhsCodeBlock :: RSTParser Blocks lhsCodeBlock = try $ do @@ -513,7 +528,6 @@ directive = try $ do -- TODO: line-block, parsed-literal, table, csv-table, list-table -- date -- include --- class -- title directive' :: RSTParser Blocks directive' = do @@ -594,38 +608,69 @@ directive' = do Just t -> B.link (escapeURI $ trim t) "" $ B.image src "" alt Nothing -> B.image src "" alt - _ -> return mempty + "class" -> do + let attrs = ("", (splitBy isSpace $ trim top), map (\(k,v) -> (k, trimr v)) fields) + -- directive content or the first immediately following element + children <- case body of + "" -> block + _ -> parseFromString parseBlocks body' + return $ B.divWith attrs children + other -> do + pos <- getPosition + addWarning (Just pos) $ "ignoring unknown directive: " ++ other + return mempty -- TODO: -- - Silently ignores illegal fields --- - Silently drops classes -- - Only supports :format: fields with a single format for :raw: roles, -- change Text.Pandoc.Definition.Format to fix addNewRole :: String -> [(String, String)] -> RSTParser Blocks addNewRole roleString fields = do (role, parentRole) <- parseFromString inheritedRole roleString customRoles <- stateRstCustomRoles <$> getState - baseRole <- case M.lookup parentRole customRoles of - Just (base, _, _) -> return base - Nothing -> return parentRole - - let fmt = if baseRole == "raw" then lookup "format" fields else Nothing - annotate = maybe id addLanguage $ - if baseRole == "code" + let (baseRole, baseFmt, baseAttr) = + maybe (parentRole, Nothing, nullAttr) id $ + M.lookup parentRole customRoles + fmt = if parentRole == "raw" then lookup "format" fields else baseFmt + annotate :: [String] -> [String] + annotate = maybe id (:) $ + if parentRole == "code" then lookup "language" fields else Nothing + attr = let (ident, classes, keyValues) = baseAttr + -- nub in case role name & language class are the same + in (ident, nub . (role :) . annotate $ classes, keyValues) + + -- warn about syntax we ignore + flip mapM_ fields $ \(key, _) -> case key of + "language" -> when (parentRole /= "code") $ addWarning Nothing $ + "ignoring :language: field because the parent of role :" ++ + role ++ ": is :" ++ parentRole ++ ": not :code:" + "format" -> when (parentRole /= "raw") $ addWarning Nothing $ + "ignoring :format: field because the parent of role :" ++ + role ++ ": is :" ++ parentRole ++ ": not :raw:" + _ -> addWarning Nothing $ "ignoring unknown field :" ++ key ++ + ": in definition of role :" ++ role ++ ": in" + when (parentRole == "raw" && countKeys "format" > 1) $ + addWarning Nothing $ + "ignoring :format: fields after the first in the definition of role :" + ++ role ++": in" + when (parentRole == "code" && countKeys "language" > 1) $ + addWarning Nothing $ + "ignoring :language: fields after the first in the definition of role :" + ++ role ++": in" updateState $ \s -> s { stateRstCustomRoles = - M.insert role (baseRole, fmt, (,) parentRole . annotate) customRoles + M.insert role (baseRole, fmt, attr) customRoles } return $ B.singleton Null where - addLanguage lang (ident, classes, keyValues) = - (ident, "sourceCode" : lang : classes, keyValues) + countKeys k = length . filter (== k) . map fst $ fields inheritedRole = - (,) <$> roleNameEndingIn (char '(') <*> roleNameEndingIn (char ')') + (,) <$> roleName <*> ((char '(' *> roleName <* char ')') <|> pure "span") + -- Can contain character codes as decimal numbers or -- hexadecimal numbers, prefixed by 0x, x, \x, U+, u, or \u @@ -666,7 +711,7 @@ extractCaption = do toChunks :: String -> [String] toChunks = dropWhile null . map (trim . unlines) - . splitBy (all (`elem` " \t")) . lines + . splitBy (all (`elem` (" \t" :: String))) . lines codeblock :: Maybe String -> String -> String -> RSTParser Blocks codeblock numberLines lang body = @@ -985,21 +1030,23 @@ renderRole contents fmt role attr = case role of "RFC" -> return $ rfcLink contents "pep-reference" -> return $ pepLink contents "PEP" -> return $ pepLink contents - "literal" -> return $ B.str contents + "literal" -> return $ B.codeWith attr contents "math" -> return $ B.math contents "title-reference" -> titleRef contents "title" -> titleRef contents "t" -> titleRef contents - "code" -> return $ B.codeWith attr contents + "code" -> return $ B.codeWith (addClass "sourceCode" attr) contents + "span" -> return $ B.spanWith attr $ B.str contents "raw" -> return $ B.rawInline (fromMaybe "" fmt) contents custom -> do - customRole <- stateRstCustomRoles <$> getState - case M.lookup custom customRole of - Just (_, newFmt, inherit) -> let - fmtStr = fmt `mplus` newFmt - (newRole, newAttr) = inherit attr - in renderRole contents fmtStr newRole newAttr - Nothing -> return $ B.str contents -- Undefined role + customRoles <- stateRstCustomRoles <$> getState + case M.lookup custom customRoles of + Just (newRole, newFmt, newAttr) -> + renderRole contents newFmt newRole newAttr + Nothing -> do + pos <- getPosition + addWarning (Just pos) $ "ignoring unknown role :" ++ custom ++ ": in" + return $ B.str contents -- Undefined role where titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo) @@ -1008,11 +1055,14 @@ renderRole contents fmt role attr = case role of where padNo = replicate (4 - length pepNo) '0' ++ pepNo pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/" -roleNameEndingIn :: RSTParser Char -> RSTParser String -roleNameEndingIn end = many1Till (letter <|> char '-') end +addClass :: String -> Attr -> Attr +addClass c (ident, classes, keyValues) = (ident, union classes [c], keyValues) + +roleName :: RSTParser String +roleName = many1 (letter <|> char '-') roleMarker :: RSTParser String -roleMarker = char ':' *> roleNameEndingIn (char ':') +roleMarker = char ':' *> roleName <* char ':' roleBefore :: RSTParser (String,String) roleBefore = try $ do diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs new file mode 100644 index 000000000..07b414431 --- /dev/null +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -0,0 +1,527 @@ +{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances, FlexibleContexts #-} +-- RelaxedPolyRec needed for inlinesBetween on GHC < 7 +{- + Copyright (C) 2014 Alexander Sulfrian <alexander.sulfrian@fu-berlin.de> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.TWiki + Copyright : Copyright (C) 2014 Alexander Sulfrian + License : GNU GPL, version 2 or above + + Maintainer : Alexander Sulfrian <alexander.sulfrian@fu-berlin.de> + Stability : alpha + Portability : portable + +Conversion of twiki text to 'Pandoc' document. +-} +module Text.Pandoc.Readers.TWiki ( readTWiki + , readTWikiWithWarnings + ) where + +import Text.Pandoc.Definition +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (enclosed, macro, nested) +import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag) +import Data.Monoid (Monoid, mconcat, mempty) +import Control.Applicative ((<$>), (<*), (*>), (<$)) +import Control.Monad +import Text.Printf (printf) +import Debug.Trace (trace) +import Text.Pandoc.XML (fromEntities) +import Data.Maybe (fromMaybe) +import Text.HTML.TagSoup +import Data.Char (isAlphaNum) +import qualified Data.Foldable as F +import Text.Pandoc.Error + +-- | Read twiki from an input string and return a Pandoc document. +readTWiki :: ReaderOptions -- ^ Reader options + -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Either PandocError Pandoc +readTWiki opts s = + (readWith parseTWiki) def{ stateOptions = opts } (s ++ "\n\n") + +readTWikiWithWarnings :: ReaderOptions -- ^ Reader options + -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Either PandocError (Pandoc, [String]) +readTWikiWithWarnings opts s = + (readWith parseTWikiWithWarnings) def{ stateOptions = opts } (s ++ "\n\n") + where parseTWikiWithWarnings = do + doc <- parseTWiki + warnings <- stateWarnings <$> getState + return (doc, warnings) + +type TWParser = Parser [Char] ParserState + +-- +-- utility functions +-- + +tryMsg :: String -> TWParser a -> TWParser a +tryMsg msg p = try p <?> msg + +skip :: TWParser a -> TWParser () +skip parser = parser >> return () + +nested :: TWParser a -> TWParser a +nested p = do + nestlevel <- stateMaxNestingLevel <$> getState + guard $ nestlevel > 0 + updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 } + res <- p + updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } + return res + +htmlElement :: String -> TWParser (Attr, String) +htmlElement tag = tryMsg tag $ do + (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag []) + content <- manyTill anyChar (endtag <|> endofinput) + return (htmlAttrToPandoc attr, trim content) + where + endtag = skip $ htmlTag (~== TagClose tag) + endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof + trim = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse + +htmlAttrToPandoc :: [Attribute String] -> Attr +htmlAttrToPandoc attrs = (ident, classes, keyvals) + where + ident = fromMaybe "" $ lookup "id" attrs + classes = maybe [] words $ lookup "class" attrs + keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] + +parseHtmlContentWithAttrs :: String -> TWParser a -> TWParser (Attr, [a]) +parseHtmlContentWithAttrs tag parser = do + (attr, content) <- htmlElement tag + parsedContent <- try $ parseContent content + return (attr, parsedContent) + where + parseContent = parseFromString $ nested $ manyTill parser endOfContent + endOfContent = try $ skipMany blankline >> skipSpaces >> eof + +parseHtmlContent :: String -> TWParser a -> TWParser [a] +parseHtmlContent tag p = parseHtmlContentWithAttrs tag p >>= return . snd + +-- +-- main parser +-- + +parseTWiki :: TWParser Pandoc +parseTWiki = do + bs <- mconcat <$> many block + spaces + eof + return $ B.doc bs + + +-- +-- block parsers +-- + +block :: TWParser B.Blocks +block = do + tr <- getOption readerTrace + pos <- getPosition + res <- mempty <$ skipMany1 blankline + <|> blockElements + <|> para + skipMany blankline + when tr $ + trace (printf "line %d: %s" (sourceLine pos) + (take 60 $ show $ B.toList res)) (return ()) + return res + +blockElements :: TWParser B.Blocks +blockElements = choice [ separator + , header + , verbatim + , literal + , list "" + , table + , blockQuote + , noautolink + ] + +separator :: TWParser B.Blocks +separator = tryMsg "separator" $ string "---" >> newline >> return B.horizontalRule + +header :: TWParser B.Blocks +header = tryMsg "header" $ do + string "---" + level <- many1 (char '+') >>= return . length + guard $ level <= 6 + classes <- option [] $ string "!!" >> return ["unnumbered"] + skipSpaces + content <- B.trimInlines . mconcat <$> manyTill inline newline + attr <- registerHeader ("", classes, []) content + return $ B.headerWith attr level $ content + +verbatim :: TWParser B.Blocks +verbatim = (htmlElement "verbatim" <|> htmlElement "pre") + >>= return . (uncurry B.codeBlockWith) + +literal :: TWParser B.Blocks +literal = htmlElement "literal" >>= return . rawBlock + where + format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs + rawBlock (attrs, content) = B.rawBlock (format attrs) content + +list :: String -> TWParser B.Blocks +list prefix = choice [ bulletList prefix + , orderedList prefix + , definitionList prefix] + +definitionList :: String -> TWParser B.Blocks +definitionList prefix = tryMsg "definitionList" $ do + indent <- lookAhead $ string prefix *> (many1 $ string " ") <* string "$ " + elements <- many $ parseDefinitionListItem (prefix ++ concat indent) + return $ B.definitionList elements + where + parseDefinitionListItem :: String -> TWParser (B.Inlines, [B.Blocks]) + parseDefinitionListItem indent = do + string (indent ++ "$ ") >> skipSpaces + term <- many1Till inline $ string ": " + line <- listItemLine indent $ string "$ " + return $ (mconcat term, [line]) + +bulletList :: String -> TWParser B.Blocks +bulletList prefix = tryMsg "bulletList" $ + parseList prefix (char '*') (char ' ') + +orderedList :: String -> TWParser B.Blocks +orderedList prefix = tryMsg "orderedList" $ + parseList prefix (oneOf "1iIaA") (string ". ") + +parseList :: Show a => String -> TWParser Char -> TWParser a -> TWParser B.Blocks +parseList prefix marker delim = do + (indent, style) <- lookAhead $ string prefix *> listStyle <* delim + blocks <- many $ parseListItem (prefix ++ indent) (char style <* delim) + return $ case style of + '1' -> B.orderedListWith (1, DefaultStyle, DefaultDelim) blocks + 'i' -> B.orderedListWith (1, LowerRoman, DefaultDelim) blocks + 'I' -> B.orderedListWith (1, UpperRoman, DefaultDelim) blocks + 'a' -> B.orderedListWith (1, LowerAlpha, DefaultDelim) blocks + 'A' -> B.orderedListWith (1, UpperAlpha, DefaultDelim) blocks + _ -> B.bulletList blocks + where + listStyle = do + indent <- many1 $ string " " + style <- marker + return (concat indent, style) + +parseListItem :: Show a => String -> TWParser a -> TWParser B.Blocks +parseListItem prefix marker = string prefix >> marker >> listItemLine prefix marker + +listItemLine :: Show a => String -> TWParser a -> TWParser B.Blocks +listItemLine prefix marker = lineContent >>= parseContent >>= return . mconcat + where + lineContent = do + content <- anyLine + continuation <- optionMaybe listContinuation + return $ filterSpaces content ++ "\n" ++ (maybe "" (" " ++) continuation) + filterSpaces = reverse . dropWhile (== ' ') . reverse + listContinuation = notFollowedBy (string prefix >> marker) >> + string " " >> lineContent + parseContent = parseFromString $ many1 $ nestedList <|> parseInline + parseInline = many1Till inline (lastNewline <|> newlineBeforeNestedList) >>= + return . B.plain . mconcat + nestedList = list prefix + lastNewline = try $ char '\n' <* eof + newlineBeforeNestedList = try $ char '\n' <* lookAhead nestedList + +table :: TWParser B.Blocks +table = try $ do + tableHead <- optionMaybe $ many1Till tableParseHeader newline >>= return . unzip + rows <- many1 tableParseRow + return $ buildTable mempty rows $ fromMaybe (align rows, columns rows) tableHead + where + buildTable caption rows (aligns, heads) + = B.table caption aligns heads rows + align rows = replicate (columCount rows) (AlignDefault, 0) + columns rows = replicate (columCount rows) mempty + columCount rows = length $ head rows + +tableParseHeader :: TWParser ((Alignment, Double), B.Blocks) +tableParseHeader = try $ do + char '|' + leftSpaces <- many spaceChar >>= return . length + char '*' + content <- tableColumnContent (char '*' >> skipSpaces >> char '|') + char '*' + rightSpaces <- many spaceChar >>= return . length + optional tableEndOfRow + return (tableAlign leftSpaces rightSpaces, content) + where + tableAlign left right + | left >= 2 && left == right = (AlignCenter, 0) + | left > right = (AlignRight, 0) + | otherwise = (AlignLeft, 0) + +tableParseRow :: TWParser [B.Blocks] +tableParseRow = many1Till tableParseColumn newline + +tableParseColumn :: TWParser B.Blocks +tableParseColumn = char '|' *> skipSpaces *> + tableColumnContent (skipSpaces >> char '|') + <* skipSpaces <* optional tableEndOfRow + +tableEndOfRow :: TWParser Char +tableEndOfRow = lookAhead (try $ char '|' >> char '\n') >> char '|' + +tableColumnContent :: Show a => TWParser a -> TWParser B.Blocks +tableColumnContent end = manyTill content (lookAhead $ try end) >>= return . B.plain . mconcat + where + content = continuation <|> inline + continuation = try $ char '\\' >> newline >> return mempty + +blockQuote :: TWParser B.Blocks +blockQuote = parseHtmlContent "blockquote" block >>= return . B.blockQuote . mconcat + +noautolink :: TWParser B.Blocks +noautolink = do + (_, content) <- htmlElement "noautolink" + st <- getState + setState $ st{ stateAllowLinks = False } + blocks <- try $ parseContent content + setState $ st{ stateAllowLinks = True } + return $ mconcat blocks + where + parseContent = parseFromString $ many $ block + +para :: TWParser B.Blocks +para = many1Till inline endOfParaElement >>= return . result . mconcat + where + endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement + endOfInput = try $ skipMany blankline >> skipSpaces >> eof + endOfPara = try $ blankline >> skipMany1 blankline + newBlockElement = try $ blankline >> skip blockElements + result content = if F.all (==Space) content + then mempty + else B.para $ B.trimInlines content + + +-- +-- inline parsers +-- + +inline :: TWParser B.Inlines +inline = choice [ whitespace + , br + , macro + , strong + , strongHtml + , strongAndEmph + , emph + , emphHtml + , boldCode + , smart + , link + , htmlComment + , code + , codeHtml + , nop + , autoLink + , str + , symbol + ] <?> "inline" + +whitespace :: TWParser B.Inlines +whitespace = (lb <|> regsp) >>= return + where lb = try $ skipMany spaceChar >> linebreak >> return B.space + regsp = try $ skipMany1 spaceChar >> return B.space + +br :: TWParser B.Inlines +br = try $ string "%BR%" >> return B.linebreak + +linebreak :: TWParser B.Inlines +linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline) + where lastNewline = eof >> return mempty + innerNewline = return B.space + +between :: (Show b, Monoid c) => TWParser a -> TWParser b -> (TWParser b -> TWParser c) -> TWParser c +between start end p = + mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end) + +enclosed :: (Show a, Monoid b) => TWParser a -> (TWParser a -> TWParser b) -> TWParser b +enclosed sep p = between sep (try $ sep <* endMarker) p + where + endMarker = lookAhead $ skip endSpace <|> skip (oneOf ".,!?:)|") <|> eof + endSpace = (spaceChar <|> newline) >> return B.space + +macro :: TWParser B.Inlines +macro = macroWithParameters <|> withoutParameters + where + withoutParameters = enclosed (char '%') (\_ -> macroName) >>= return . emptySpan + emptySpan name = buildSpan name [] mempty + +macroWithParameters :: TWParser B.Inlines +macroWithParameters = try $ do + char '%' + name <- macroName + (content, kvs) <- attributes + char '%' + return $ buildSpan name kvs $ B.str content + +buildSpan :: String -> [(String, String)] -> B.Inlines -> B.Inlines +buildSpan className kvs = B.spanWith attrs + where + attrs = ("", ["twiki-macro", className] ++ additionalClasses, kvsWithoutClasses) + additionalClasses = maybe [] words $ lookup "class" kvs + kvsWithoutClasses = [(k,v) | (k,v) <- kvs, k /= "class"] + +macroName :: TWParser String +macroName = do + first <- letter + rest <- many $ alphaNum <|> char '_' + return (first:rest) + +attributes :: TWParser (String, [(String, String)]) +attributes = char '{' *> spnl *> many (attribute <* spnl) <* char '}' >>= + return . foldr (either mkContent mkKvs) ([], []) + where + spnl = skipMany (spaceChar <|> newline) + mkContent c ([], kvs) = (c, kvs) + mkContent c (rest, kvs) = (c ++ " " ++ rest, kvs) + mkKvs kv (cont, rest) = (cont, (kv : rest)) + +attribute :: TWParser (Either String (String, String)) +attribute = withKey <|> withoutKey + where + withKey = try $ do + key <- macroName + char '=' + parseValue False >>= return . (curry Right key) + withoutKey = try $ parseValue True >>= return . Left + parseValue allowSpaces = (withQuotes <|> withoutQuotes allowSpaces) >>= return . fromEntities + withQuotes = between (char '"') (char '"') (\_ -> count 1 $ noneOf ['"']) + withoutQuotes allowSpaces + | allowSpaces == True = many1 $ noneOf "}" + | otherwise = many1 $ noneOf " }" + +nestedInlines :: Show a => TWParser a -> TWParser B.Inlines +nestedInlines end = innerSpace <|> nestedInline + where + innerSpace = try $ whitespace <* (notFollowedBy end) + nestedInline = notFollowedBy whitespace >> nested inline + +strong :: TWParser B.Inlines +strong = try $ enclosed (char '*') nestedInlines >>= return . B.strong + +strongHtml :: TWParser B.Inlines +strongHtml = (parseHtmlContent "strong" inline <|> parseHtmlContent "b" inline) + >>= return . B.strong . mconcat + +strongAndEmph :: TWParser B.Inlines +strongAndEmph = try $ enclosed (string "__") nestedInlines >>= return . B.emph . B.strong + +emph :: TWParser B.Inlines +emph = try $ enclosed (char '_') nestedInlines >>= return . B.emph + +emphHtml :: TWParser B.Inlines +emphHtml = (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline) + >>= return . B.emph . mconcat + +nestedString :: Show a => TWParser a -> TWParser String +nestedString end = innerSpace <|> (count 1 nonspaceChar) + where + innerSpace = try $ many1 spaceChar <* notFollowedBy end + +boldCode :: TWParser B.Inlines +boldCode = try $ enclosed (string "==") nestedString >>= return . B.strong . B.code . fromEntities + +htmlComment :: TWParser B.Inlines +htmlComment = htmlTag isCommentTag >> return mempty + +code :: TWParser B.Inlines +code = try $ enclosed (char '=') nestedString >>= return . B.code . fromEntities + +codeHtml :: TWParser B.Inlines +codeHtml = do + (attrs, content) <- parseHtmlContentWithAttrs "code" anyChar + return $ B.codeWith attrs $ fromEntities content + +autoLink :: TWParser B.Inlines +autoLink = try $ do + state <- getState + guard $ stateAllowLinks state + (text, url) <- parseLink + guard $ checkLink (head $ reverse url) + return $ makeLink (text, url) + where + parseLink = notFollowedBy nop >> (uri <|> emailAddress) + makeLink (text, url) = B.link url "" $ B.str text + checkLink c + | c == '/' = True + | otherwise = isAlphaNum c + +str :: TWParser B.Inlines +str = (many1 alphaNum <|> count 1 characterReference) >>= return . B.str + +nop :: TWParser B.Inlines +nop = try $ (skip exclamation <|> skip nopTag) >> followContent + where + exclamation = char '!' + nopTag = stringAnyCase "<nop>" + followContent = many1 nonspaceChar >>= return . B.str . fromEntities + +symbol :: TWParser B.Inlines +symbol = count 1 nonspaceChar >>= return . B.str + +smart :: TWParser B.Inlines +smart = do + getOption readerSmart >>= guard + doubleQuoted <|> singleQuoted <|> + choice [ apostrophe + , dash + , ellipses + ] + +singleQuoted :: TWParser B.Inlines +singleQuoted = try $ do + singleQuoteStart + withQuoteContext InSingleQuote $ + many1Till inline singleQuoteEnd >>= + (return . B.singleQuoted . B.trimInlines . mconcat) + +doubleQuoted :: TWParser B.Inlines +doubleQuoted = try $ do + doubleQuoteStart + contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) + (withQuoteContext InDoubleQuote $ doubleQuoteEnd >> + return (B.doubleQuoted $ B.trimInlines contents)) + <|> (return $ (B.str "\8220") B.<> contents) + +link :: TWParser B.Inlines +link = try $ do + st <- getState + guard $ stateAllowLinks st + setState $ st{ stateAllowLinks = False } + (url, title, content) <- linkText + setState $ st{ stateAllowLinks = True } + return $ B.link url title content + +linkText :: TWParser (String, String, B.Inlines) +linkText = do + string "[[" + url <- many1Till anyChar (char ']') + content <- option [B.str url] linkContent + char ']' + return (url, "", mconcat content) + where + linkContent = (char '[') >> many1Till anyChar (char ']') >>= parseLinkContent + parseLinkContent = parseFromString $ many1 inline diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index ee64e8f2a..4565b26a1 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -57,6 +57,7 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag ) +import Text.Pandoc.Shared (trim) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..)) import Text.HTML.TagSoup.Match @@ -67,11 +68,12 @@ import Text.Printf import Control.Applicative ((<$>), (*>), (<*), (<$)) import Data.Monoid import Debug.Trace (trace) +import Text.Pandoc.Error -- | Parse a Textile text and return a Pandoc document. readTextile :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readTextile opts s = (readWith parseTextile) def{ stateOptions = opts } (s ++ "\n\n") @@ -325,33 +327,30 @@ para = B.para . trimInlines . mconcat <$> many1 inline -- Tables -- | A table cell spans until a pipe | -tableCell :: Parser [Char] ParserState Blocks -tableCell = do - c <- many1 (noneOf "|\n") - content <- trimInlines . mconcat <$> parseFromString (many1 inline) c +tableCell :: Bool -> Parser [Char] ParserState Blocks +tableCell headerCell = try $ do + char '|' + when headerCell $ () <$ string "_." + notFollowedBy blankline + raw <- trim <$> + many (noneOf "|\n" <|> try (char '\n' <* notFollowedBy blankline)) + content <- mconcat <$> parseFromString (many inline) raw return $ B.plain content -- | A table row is made of many table cells tableRow :: Parser [Char] ParserState [Blocks] -tableRow = try $ ( char '|' *> - (endBy1 tableCell (optional blankline *> char '|')) <* newline) - --- | Many table rows -tableRows :: Parser [Char] ParserState [[Blocks]] -tableRows = many1 tableRow +tableRow = many1 (tableCell False) <* char '|' <* newline --- | Table headers are made of cells separated by a tag "|_." -tableHeaders :: Parser [Char] ParserState [Blocks] -tableHeaders = let separator = (try $ string "|_.") in - try $ ( separator *> (sepBy1 tableCell separator) <* char '|' <* newline ) +tableHeader :: Parser [Char] ParserState [Blocks] +tableHeader = many1 (tableCell True) <* char '|' <* newline -- | A table with an optional header. Current implementation can -- handle tables with and without header, but will parse cells -- alignment attributes as content. table :: Parser [Char] ParserState Blocks table = try $ do - headers <- option mempty tableHeaders - rows <- tableRows + headers <- option mempty $ tableHeader + rows <- many1 tableRow blanklines let nbOfCols = max (length headers) (length $ head rows) return $ B.table mempty diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index 6f8c19ac7..304d6d4c5 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -48,11 +48,12 @@ import Data.Monoid (Monoid, mconcat, mempty, mappend) import Control.Monad (void, guard, when) import Data.Default import Control.Monad.Reader (Reader, runReader, asks) +import Text.Pandoc.Error import Data.Time.LocalTime (getZonedTime) import Text.Pandoc.Compat.Directory(getModificationTime) import Data.Time.Format (formatTime) -import System.Locale (defaultTimeLocale) +import Text.Pandoc.Compat.Locale (defaultTimeLocale) import System.IO.Error (catchIOError) type T2T = ParserT String ParserState (Reader T2TMeta) @@ -83,12 +84,12 @@ getT2TMeta inps out = do return $ T2TMeta curDate curMtime (intercalate ", " inps) out -- | Read Txt2Tags from an input string returning a Pandoc document -readTxt2Tags :: T2TMeta -> ReaderOptions -> String -> Pandoc +readTxt2Tags :: T2TMeta -> ReaderOptions -> String -> Either PandocError Pandoc readTxt2Tags t opts s = flip runReader t $ readWithM parseT2T (def {stateOptions = opts}) (s ++ "\n\n") -- | Read Txt2Tags (ignoring all macros) from an input string returning -- a Pandoc document -readTxt2TagsNoMacros :: ReaderOptions -> String -> Pandoc +readTxt2TagsNoMacros :: ReaderOptions -> String -> Either PandocError Pandoc readTxt2TagsNoMacros = readTxt2Tags def parseT2T :: T2T Pandoc @@ -576,4 +577,3 @@ atStart = (sourceColumn <$> getPosition) >>= guard . (== 1) ignoreSpacesCap :: T2T String -> T2T String ignoreSpacesCap p = map toLower <$> (spaces *> p <* spaces) - diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 36839ddd0..d0dacd5b6 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -45,20 +45,32 @@ import Text.Pandoc.MediaBag (MediaBag) import Text.Pandoc.MIME (MimeType) import Text.Pandoc.UTF8 (toString, fromString) import Text.Pandoc.Options (WriterOptions(..)) +import Data.List (isPrefixOf) isOk :: Char -> Bool isOk c = isAscii c && isAlphaNum c +makeDataURI :: String -> ByteString -> String +makeDataURI mime raw = + if textual + then "data:" ++ mime' ++ "," ++ escapeURIString isOk (toString raw) + else "data:" ++ mime' ++ ";base64," ++ toString (encode raw) + where textual = "text/" `Data.List.isPrefixOf` mime + mime' = if textual && ';' `notElem` mime + then mime ++ ";charset=utf-8" + else mime -- mime type already has charset + convertTag :: MediaBag -> Maybe String -> Tag String -> IO (Tag String) convertTag media sourceURL t@(TagOpen tagname as) - | tagname `elem` ["img", "embed", "video", "input", "audio", "source"] = do + | tagname `elem` + ["img", "embed", "video", "input", "audio", "source", "track"] = do as' <- mapM processAttribute as return $ TagOpen tagname as' where processAttribute (x,y) = if x == "src" || x == "href" || x == "poster" then do (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) y - let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw) + let enc = makeDataURI mime raw return (x, enc) else return (x,y) convertTag media sourceURL t@(TagOpen "script" as) = @@ -66,14 +78,14 @@ convertTag media sourceURL t@(TagOpen "script" as) = [] -> return t src -> do (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src - let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw) + let enc = makeDataURI mime raw return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"]) convertTag media sourceURL t@(TagOpen "link" as) = case fromAttrib "href" t of [] -> return t src -> do (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src - let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw) + let enc = makeDataURI mime raw return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"]) convertTag _ _ t = return t @@ -95,8 +107,7 @@ cssURLs media sourceURL d orig = else d </> url (raw, mime) <- getRaw media sourceURL "" url' rest <- cssURLs media sourceURL d v - let enc = "data:" `B.append` fromString mime `B.append` - ";base64," `B.append` (encode raw) + let enc = fromString $ makeDataURI mime raw return $ x `B.append` "url(" `B.append` enc `B.append` rest getRaw :: MediaBag -> Maybe String -> MimeType -> String diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 2d7c08718..e0460c66e 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -1,5 +1,6 @@ {-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses, - FlexibleContexts, ScopedTypeVariables, PatternGuards #-} + FlexibleContexts, ScopedTypeVariables, PatternGuards, + ViewPatterns #-} {- Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> @@ -84,6 +85,8 @@ module Text.Pandoc.Shared ( -- * Error handling err, warn, + mapLeft, + hush, -- * Safe read safeRead, -- * Temp directory @@ -106,15 +109,15 @@ import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo, unEscapeString, parseURIReference, isAllowedInURI ) import qualified Data.Set as Set import System.Directory -import System.FilePath (joinPath, splitDirectories) +import System.FilePath (joinPath, splitDirectories, pathSeparator, isPathSeparator) import Text.Pandoc.MIME (MimeType, getMimeType) import System.FilePath ( (</>), takeExtension, dropExtension) import Data.Generics (Typeable, Data) import qualified Control.Monad.State as S import qualified Control.Exception as E -import Control.Monad (msum, unless) +import Control.Monad (msum, unless, MonadPlus(..)) import Text.Pandoc.Pretty (charWidth) -import System.Locale (defaultTimeLocale) +import Text.Pandoc.Compat.Locale (defaultTimeLocale) import Data.Time import System.IO (stderr) import System.IO.Temp @@ -734,12 +737,10 @@ renderTags' = renderTagsOptions -- | Perform an IO action in a directory, returning to starting directory. inDirectory :: FilePath -> IO a -> IO a -inDirectory path action = do - oldDir <- getCurrentDirectory - setCurrentDirectory path - result <- action - setCurrentDirectory oldDir - return result +inDirectory path action = E.bracket + getCurrentDirectory + setCurrentDirectory + (const $ setCurrentDirectory path >> action) readDefaultDataFile :: FilePath -> IO BS.ByteString readDefaultDataFile fname = @@ -856,6 +857,14 @@ warn msg = do name <- getProgName UTF8.hPutStrLn stderr $ name ++ ": " ++ msg +mapLeft :: (a -> b) -> Either a c -> Either b c +mapLeft f (Left x) = Left (f x) +mapLeft _ (Right x) = Right x + +hush :: Either a b -> Maybe b +hush (Left _) = Nothing +hush (Right x) = Just x + -- | Remove intermediate "." and ".." directories from a path. -- -- > collapseFilePath "./foo" == "foo" @@ -871,21 +880,24 @@ collapseFilePath = joinPath . reverse . foldl go [] . splitDirectories go rs "." = rs go r@(p:rs) ".." = case p of ".." -> ("..":r) - "/" -> ("..":r) + (checkPathSeperator -> Just True) -> ("..":r) _ -> rs - go _ "/" = ["/"] + go _ (checkPathSeperator -> Just True) = [[pathSeparator]] go rs x = x:rs - + isSingleton [] = Nothing + isSingleton [x] = Just x + isSingleton _ = Nothing + checkPathSeperator = fmap isPathSeparator . isSingleton -- -- Safe read -- -safeRead :: (Monad m, Read a) => String -> m a +safeRead :: (MonadPlus m, Read a) => String -> m a safeRead s = case reads s of (d,x):_ | all isSpace x -> return d - _ -> fail $ "Could not read `" ++ s ++ "'" + _ -> mzero -- -- Temp directory diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 4ae6a6d8a..eefce2744 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -124,11 +124,12 @@ getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first -> String -- ^ Name of writer -> IO (Either E.IOException String) getDefaultTemplate user writer = do - let format = takeWhile (`notElem` "+-") writer -- strip off extensions + let format = takeWhile (`notElem` ("+-" :: String)) writer -- strip off extensions case format of "native" -> return $ Right "" "json" -> return $ Right "" "docx" -> return $ Right "" + "fb2" -> return $ Right "" "odt" -> getDefaultTemplate user "opendocument" "markdown_strict" -> getDefaultTemplate user "markdown" "multimarkdown" -> getDefaultTemplate user "markdown" @@ -287,7 +288,7 @@ reservedWords :: [Text] reservedWords = ["else","endif","for","endfor","sep"] skipEndline :: Parser () -skipEndline = P.try $ P.skipMany (P.satisfy (`elem` " \t")) >> P.char '\n' >> return () +skipEndline = P.try $ P.skipMany (P.satisfy (`elem` (" \t" :: String))) >> P.char '\n' >> return () pConditional :: Parser Template pConditional = do diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index e5b8c5167..1c33b004a 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -126,7 +126,7 @@ blockToAsciiDoc :: WriterOptions -- ^ Options blockToAsciiDoc _ Null = return empty blockToAsciiDoc opts (Plain inlines) = do contents <- inlineListToAsciiDoc opts inlines - return $ contents <> cr + return $ contents <> blankline blockToAsciiDoc opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = do blockToAsciiDoc opts (Para [Image alt (src,tit)]) blockToAsciiDoc opts (Para inlines) = do @@ -272,7 +272,7 @@ bulletListItemToAsciiDoc opts blocks = do contents <- foldM addBlock empty blocks modify $ \s -> s{ bulletListLevel = lev } let marker = text (replicate lev '*') - return $ marker <> space <> contents <> cr + return $ marker <> text " " <> contents <> cr -- | Convert ordered list item (a list of blocks) to asciidoc. orderedListItemToAsciiDoc :: WriterOptions -- ^ options @@ -292,7 +292,7 @@ orderedListItemToAsciiDoc opts marker blocks = do modify $ \s -> s{ orderedListLevel = lev + 1 } contents <- foldM addBlock empty blocks modify $ \s -> s{ orderedListLevel = lev } - return $ text marker <> space <> contents <> cr + return $ text marker <> text " " <> contents <> cr -- | Convert definition list item (label, list of blocks) to asciidoc. definitionListItemToAsciiDoc :: WriterOptions diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs new file mode 100644 index 000000000..706b27175 --- /dev/null +++ b/src/Text/Pandoc/Writers/CommonMark.hs @@ -0,0 +1,178 @@ +{- +Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.CommonMark + Copyright : Copyright (C) 2015 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to CommonMark. + +CommonMark: <http://commonmark.org> +-} +module Text.Pandoc.Writers.CommonMark (writeCommonMark) where + +import Text.Pandoc.Writers.HTML (writeHtmlString) +import Text.Pandoc.Definition +import Text.Pandoc.Shared (isTightList) +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Writers.Shared +import Text.Pandoc.Options +import CMark +import qualified Data.Text as T +import Control.Monad.Identity (runIdentity, Identity) +import Control.Monad.State (runState, State, modify, get) +import Text.Pandoc.Walk (walkM) + +-- | Convert Pandoc to CommonMark. +writeCommonMark :: WriterOptions -> Pandoc -> String +writeCommonMark opts (Pandoc meta blocks) = rendered + where main = runIdentity $ blocksToCommonMark opts (blocks' ++ notes') + (blocks', notes) = runState (walkM processNotes blocks) [] + notes' = if null notes + then [] + else [OrderedList (1, Decimal, Period) $ reverse notes] + metadata = runIdentity $ metaToJSON opts + (blocksToCommonMark opts) + (inlinesToCommonMark opts) + meta + context = defField "body" main $ metadata + rendered = if writerStandalone opts + then renderTemplate' (writerTemplate opts) context + else main + +processNotes :: Inline -> State [[Block]] Inline +processNotes (Note bs) = do + modify (bs :) + notes <- get + return $ Str $ "[" ++ show (length notes) ++ "]" +processNotes x = return x + +node :: NodeType -> [Node] -> Node +node = Node Nothing + +blocksToCommonMark :: WriterOptions -> [Block] -> Identity String +blocksToCommonMark opts bs = return $ + T.unpack $ nodeToCommonmark cmarkOpts colwidth + $ node DOCUMENT (blocksToNodes bs) + where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] + colwidth = if writerWrapText opts + then writerColumns opts + else 0 + +inlinesToCommonMark :: WriterOptions -> [Inline] -> Identity String +inlinesToCommonMark opts ils = return $ + T.unpack $ nodeToCommonmark cmarkOpts colwidth + $ node PARAGRAPH (inlinesToNodes ils) + where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] + colwidth = if writerWrapText opts + then writerColumns opts + else 0 + +blocksToNodes :: [Block] -> [Node] +blocksToNodes = foldr blockToNodes [] + +blockToNodes :: Block -> [Node] -> [Node] +blockToNodes (Plain xs) = (node PARAGRAPH (inlinesToNodes xs) :) +blockToNodes (Para xs) = (node PARAGRAPH (inlinesToNodes xs) :) +blockToNodes (CodeBlock (_,classes,_) xs) = + (node (CODE_BLOCK (T.pack (unwords classes)) (T.pack xs)) [] :) +blockToNodes (RawBlock fmt xs) + | fmt == Format "html" = (node (HTML (T.pack xs)) [] :) + | otherwise = id +blockToNodes (BlockQuote bs) = + (node BLOCK_QUOTE (blocksToNodes bs) :) +blockToNodes (BulletList items) = + (node (LIST ListAttributes{ + listType = BULLET_LIST, + listDelim = PERIOD_DELIM, + listTight = isTightList items, + listStart = 1 }) (map (node ITEM . blocksToNodes) items) :) +blockToNodes (OrderedList (start, _sty, delim) items) = + (node (LIST ListAttributes{ + listType = ORDERED_LIST, + listDelim = case delim of + OneParen -> PAREN_DELIM + TwoParens -> PAREN_DELIM + _ -> PERIOD_DELIM, + listTight = isTightList items, + listStart = start }) (map (node ITEM . blocksToNodes) items) :) +blockToNodes HorizontalRule = (node HRULE [] :) +blockToNodes (Header lev _ ils) = (node (HEADER lev) (inlinesToNodes ils) :) +blockToNodes (Div _ bs) = (blocksToNodes bs ++) +blockToNodes (DefinitionList items) = blockToNodes (BulletList items') + where items' = map dlToBullet items + dlToBullet (term, ((Para xs : ys) : zs)) = + Para (term ++ [LineBreak] ++ xs) : ys ++ concat zs + dlToBullet (term, ((Plain xs : ys) : zs)) = + Plain (term ++ [LineBreak] ++ xs) : ys ++ concat zs + dlToBullet (term, xs) = + Para term : concat xs +blockToNodes t@(Table _ _ _ _ _) = + (node (HTML (T.pack $! writeHtmlString def $! Pandoc nullMeta [t])) [] :) +blockToNodes Null = id + +inlinesToNodes :: [Inline] -> [Node] +inlinesToNodes = foldr inlineToNodes [] + +inlineToNodes :: Inline -> [Node] -> [Node] +inlineToNodes (Str s) = (node (TEXT (T.pack s)) [] :) +inlineToNodes Space = (node (TEXT (T.pack " ")) [] :) +inlineToNodes LineBreak = (node LINEBREAK [] :) +inlineToNodes (Emph xs) = (node EMPH (inlinesToNodes xs) :) +inlineToNodes (Strong xs) = (node STRONG (inlinesToNodes xs) :) +inlineToNodes (Strikeout xs) = + ((node (INLINE_HTML (T.pack "<s>")) [] : inlinesToNodes xs ++ + [node (INLINE_HTML (T.pack "</s>")) []]) ++ ) +inlineToNodes (Superscript xs) = + ((node (INLINE_HTML (T.pack "<sub>")) [] : inlinesToNodes xs ++ + [node (INLINE_HTML (T.pack "</sub>")) []]) ++ ) +inlineToNodes (Subscript xs) = + ((node (INLINE_HTML (T.pack "<sup>")) [] : inlinesToNodes xs ++ + [node (INLINE_HTML (T.pack "</sup>")) []]) ++ ) +inlineToNodes (SmallCaps xs) = + ((node (INLINE_HTML (T.pack "<span style=\"font-variant:small-caps;\">")) [] + : inlinesToNodes xs ++ + [node (INLINE_HTML (T.pack "</span>")) []]) ++ ) +inlineToNodes (Link ils (url,tit)) = + (node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes ils) :) +inlineToNodes (Image ils (url,tit)) = + (node (IMAGE (T.pack url) (T.pack tit)) (inlinesToNodes ils) :) +inlineToNodes (RawInline fmt xs) + | fmt == Format "html" = (node (INLINE_HTML (T.pack xs)) [] :) + | otherwise = id +inlineToNodes (Quoted qt ils) = + ((node (TEXT start) [] : inlinesToNodes ils ++ [node (TEXT end) []]) ++) + where (start, end) = case qt of + SingleQuote -> (T.pack "‘", T.pack "’") + DoubleQuote -> (T.pack "“", T.pack "”") +inlineToNodes (Code _ str) = (node (CODE (T.pack str)) [] :) +inlineToNodes (Math mt str) = + case mt of + InlineMath -> + (node (INLINE_HTML (T.pack ("\\(" ++ str ++ "\\)"))) [] :) + DisplayMath -> + (node (INLINE_HTML (T.pack ("\\[" ++ str ++ "\\]"))) [] :) +inlineToNodes (Span _ ils) = (inlinesToNodes ils ++) +inlineToNodes (Cite _ ils) = (inlinesToNodes ils ++) +inlineToNodes (Note _) = id -- should not occur +-- we remove Note elements in preprocessing diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index bbca7f858..edfb4d0ff 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -36,6 +36,7 @@ import Text.Pandoc.Options import Text.Pandoc.Walk (query) import Text.Printf ( printf ) import Data.List ( intercalate ) +import Data.Char ( ord ) import Control.Monad.State import Text.Pandoc.Pretty import Text.Pandoc.Templates ( renderTemplate' ) @@ -114,6 +115,13 @@ escapeCharForConTeXt opts ch = stringToConTeXt :: WriterOptions -> String -> String stringToConTeXt opts = concatMap (escapeCharForConTeXt opts) +-- | Sanitize labels +toLabel :: String -> String +toLabel z = concatMap go z + where go x + | elem x ("\\#[]\",{}%()|=" :: String) = "ux" ++ printf "%x" (ord x) + | otherwise = [x] + -- | Convert Elements to ConTeXt elementToConTeXt :: WriterOptions -> Element -> State WriterState Doc elementToConTeXt _ (Blk block) = blockToConTeXt block @@ -286,15 +294,16 @@ inlineToConTeXt Space = return space -- Handle HTML-like internal document references to sections inlineToConTeXt (Link txt (('#' : ref), _)) = do opts <- gets stOptions - label <- inlineListToConTeXt txt + contents <- inlineListToConTeXt txt + let ref' = toLabel $ stringToConTeXt opts ref return $ text "\\in" <> braces (if writerNumberSections opts - then label <+> text "(\\S" - else label) -- prefix + then contents <+> text "(\\S" + else contents) -- prefix <> braces (if writerNumberSections opts then text ")" else empty) -- suffix - <> brackets (text ref) + <> brackets (text ref') inlineToConTeXt (Link txt (src, _)) = do let isAutolink = txt == [Str (unEscapeString src)] @@ -302,13 +311,13 @@ inlineToConTeXt (Link txt (src, _)) = do let next = stNextRef st put $ st {stNextRef = next + 1} let ref = "url" ++ show next - label <- inlineListToConTeXt txt + contents <- inlineListToConTeXt txt return $ "\\useURL" <> brackets (text ref) <> brackets (text $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src) <> (if isAutolink then empty - else brackets empty <> brackets label) + else brackets empty <> brackets contents) <> "\\from" <> brackets (text ref) inlineToConTeXt (Image _ (src, _)) = do @@ -337,6 +346,7 @@ sectionHeader (ident,classes,_) hdrLevel lst = do st <- get let opts = stOptions st let level' = if writerChapters opts then hdrLevel - 1 else hdrLevel + let ident' = toLabel ident let (section, chapter) = if "unnumbered" `elem` classes then (text "subject", text "title") else (text "section", text "chapter") @@ -344,7 +354,7 @@ sectionHeader (ident,classes,_) hdrLevel lst = do then char '\\' <> text (concat (replicate (level' - 1) "sub")) <> section - <> (if (not . null) ident then brackets (text ident) else empty) + <> (if (not . null) ident' then brackets (text ident') else empty) <> braces contents <> blankline else if level' == 0 diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index 914d61850..6fc3b9b3c 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -1,5 +1,5 @@ {-# LANGUAGE OverlappingInstances, FlexibleInstances, OverloadedStrings, - ScopedTypeVariables #-} + ScopedTypeVariables, DeriveDataTypeable #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {- Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu> @@ -35,6 +35,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import Data.List ( intersperse ) import Data.Char ( toLower ) +import Data.Typeable import Scripting.Lua (LuaState, StackValue, callfunc) import Text.Pandoc.Writers.Shared import qualified Scripting.Lua as Lua @@ -42,6 +43,8 @@ import Text.Pandoc.UTF8 (fromString, toString) import Data.ByteString (ByteString) import qualified Data.ByteString.Char8 as C8 import Data.Monoid +import Control.Monad (when) +import Control.Exception import qualified Data.Map as M import Text.Pandoc.Templates @@ -145,13 +148,22 @@ instance StackValue Citation where peek = undefined valuetype _ = Lua.TTABLE +data PandocLuaException = PandocLuaException String + deriving (Show, Typeable) + +instance Exception PandocLuaException + -- | Convert Pandoc to custom markup. writeCustom :: FilePath -> WriterOptions -> Pandoc -> IO String writeCustom luaFile opts doc@(Pandoc meta _) = do luaScript <- C8.unpack `fmap` C8.readFile luaFile lua <- Lua.newstate Lua.openlibs lua - Lua.loadstring lua luaScript "custom" + status <- Lua.loadstring lua luaScript luaFile + -- check for error in lua script (later we'll change the return type + -- to handle this more gracefully): + when (status /= 0) $ + Lua.tostring lua 1 >>= throw . PandocLuaException Lua.call lua 0 0 -- TODO - call hierarchicalize, so we have that info rendered <- docToCustom lua opts doc diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index b10317506..19f8f2f11 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -114,7 +114,8 @@ elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) = n | n == 0 -> "chapter" | n >= 1 && n <= 5 -> "sect" ++ show n | otherwise -> "simplesect" - in inTags True tag [("id", writerIdentifierPrefix opts ++ id')] $ + in inTags True tag [("id", writerIdentifierPrefix opts ++ id') | + not (null id')] $ inTagsSimple "title" (inlinesToDocbook opts title) $$ vcat (map (elementToDocbook opts (lvl + 1)) elements') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 09321d1cc..4809d2a14 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE ScopedTypeVariables, PatternGuards #-} +{-# LANGUAGE ScopedTypeVariables, PatternGuards, ViewPatterns #-} {- Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu> @@ -41,7 +41,7 @@ import Data.Time.Clock.POSIX import Data.Time.Clock import Data.Time.Format import System.Environment -import System.Locale +import Text.Pandoc.Compat.Locale (defaultTimeLocale) import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.ImageSize @@ -52,8 +52,10 @@ import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Highlighting ( highlight ) import Text.Pandoc.Walk import Text.Highlighting.Kate.Types () -import Text.XML.Light +import Text.XML.Light as XML import Text.TeXMath +import Text.Pandoc.Readers.Docx.StyleMap +import Text.Pandoc.Readers.Docx.Util (elemName) import Control.Monad.State import Text.Highlighting.Kate import Data.Unique (hashUnique, newUnique) @@ -62,8 +64,9 @@ import Text.Printf (printf) import qualified Control.Exception as E import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef, extensionFromMimeType) -import Control.Applicative ((<$>), (<|>)) -import Data.Maybe (fromMaybe, mapMaybe) +import Control.Applicative ((<$>), (<|>), (<*>)) +import Data.Maybe (fromMaybe, mapMaybe, maybeToList) +import Data.Char (ord) data ListMarker = NoMarker | BulletMarker @@ -104,13 +107,17 @@ data WriterState = WriterState{ , stInDel :: Bool , stChangesAuthor :: String , stChangesDate :: String + , stPrintWidth :: Integer + , stStyleMaps :: StyleMaps + , stFirstPara :: Bool + , stTocTitle :: [Inline] } defaultWriterState :: WriterState defaultWriterState = WriterState{ stTextProperties = [] , stParaProperties = [] - , stFootnotes = [] + , stFootnotes = defaultFootnotes , stSectionIds = [] , stExternalLinks = M.empty , stImages = M.empty @@ -122,6 +129,10 @@ defaultWriterState = WriterState{ , stInDel = False , stChangesAuthor = "unknown" , stChangesDate = "1969-12-31T19:00:00Z" + , stPrintWidth = 1 + , stStyleMaps = defaultStyleMaps + , stFirstPara = False + , stTocTitle = normalizeInlines [Str "Table of Contents"] } type WS a = StateT WriterState IO a @@ -143,24 +154,106 @@ renderXml :: Element -> BL.ByteString renderXml elt = BL8.pack "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" <> UTF8.fromStringLazy (showElement elt) +renumIdMap :: Int -> [Element] -> M.Map String String +renumIdMap _ [] = M.empty +renumIdMap n (e:es) + | Just oldId <- findAttr (QName "Id" Nothing Nothing) e = + M.insert oldId ("rId" ++ (show n)) (renumIdMap (n+1) es) + | otherwise = renumIdMap n es + +replaceAttr :: (QName -> Bool) -> String -> [XML.Attr] -> [XML.Attr] +replaceAttr _ _ [] = [] +replaceAttr f val (a:as) | f (attrKey a) = + (XML.Attr (attrKey a) val) : (replaceAttr f val as) + | otherwise = a : (replaceAttr f val as) + +renumId :: (QName -> Bool) -> (M.Map String String) -> Element -> Element +renumId f renumMap e + | Just oldId <- findAttrBy f e + , Just newId <- M.lookup oldId renumMap = + let attrs' = replaceAttr f newId (elAttribs e) + in + e { elAttribs = attrs' } + | otherwise = e + +renumIds :: (QName -> Bool) -> (M.Map String String) -> [Element] -> [Element] +renumIds f renumMap = map (renumId f renumMap) + +-- | Certain characters are invalid in XML even if escaped. +-- See #1992 +stripInvalidChars :: Pandoc -> Pandoc +stripInvalidChars = bottomUp (filter isValidChar) + +-- | See XML reference +isValidChar :: Char -> Bool +isValidChar (ord -> c) + | c == 0x9 = True + | c == 0xA = True + | c == 0xD = True + | 0x20 <= c && c <= 0xD7FF = True + | 0xE000 <= c && c <= 0xFFFD = True + | 0x10000 <= c && c <= 0x10FFFF = True + | otherwise = False + +metaValueToInlines :: MetaValue -> [Inline] +metaValueToInlines (MetaString s) = normalizeInlines [Str s] +metaValueToInlines (MetaInlines ils) = ils +metaValueToInlines (MetaBlocks bs) = query return bs +metaValueToInlines (MetaBool b) = [Str $ show b] +metaValueToInlines _ = [] + -- | Produce an Docx file from a Pandoc document. writeDocx :: WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert -> IO BL.ByteString writeDocx opts doc@(Pandoc meta _) = do let datadir = writerUserDataDir opts - let doc' = walk fixDisplayMath doc + let doc' = stripInvalidChars . walk fixDisplayMath $ doc username <- lookup "USERNAME" <$> getEnvironment utctime <- getCurrentTime refArchive <- liftM (toArchive . toLazy) $ case writerReferenceDocx opts of Just f -> B.readFile f Nothing -> readDataFile datadir "reference.docx" - distArchive <- liftM (toArchive . toLazy) $ readDataFile Nothing "reference.docx" + distArchive <- liftM (toArchive . toLazy) $ readDataFile datadir "reference.docx" + + parsedDoc <- parseXml refArchive distArchive "word/document.xml" + let wname f qn = qPrefix qn == Just "w" && f (qName qn) + let mbsectpr = filterElementName (wname (=="sectPr")) parsedDoc + + -- Gets the template size + let mbpgsz = mbsectpr >>= (filterElementName (wname (=="pgSz"))) + let mbAttrSzWidth = (elAttribs <$> mbpgsz) >>= (lookupAttrBy ((=="w") . qName)) + + let mbpgmar = mbsectpr >>= (filterElementName (wname (=="pgMar"))) + let mbAttrMarLeft = (elAttribs <$> mbpgmar) >>= (lookupAttrBy ((=="left") . qName)) + let mbAttrMarRight = (elAttribs <$> mbpgmar) >>= (lookupAttrBy ((=="right") . qName)) + + -- Get the avaible area (converting the size and the margins to int and + -- doing the difference + let pgContentWidth = (-) <$> (read <$> mbAttrSzWidth ::Maybe Integer) + <*> ( + (+) <$> (read <$> mbAttrMarRight ::Maybe Integer) + <*> (read <$> mbAttrMarLeft ::Maybe Integer) + ) + + -- styles + let stylepath = "word/styles.xml" + styledoc <- parseXml refArchive distArchive stylepath + + -- parse styledoc for heading styles + let styleMaps = getStyleMaps styledoc + + let tocTitle = fromMaybe (stTocTitle defaultWriterState) $ + metaValueToInlines <$> lookupMeta "toc-title" meta ((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc') defaultWriterState{ stChangesAuthor = fromMaybe "unknown" username - , stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime} + , stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime + , stPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth) + , stStyleMaps = styleMaps + , stTocTitle = tocTitle + } let epochtime = floor $ utcTimeToPOSIXSeconds utctime let imgs = M.elems $ stImages st @@ -168,13 +261,6 @@ writeDocx opts doc@(Pandoc meta _) = do let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img let imageEntries = map toImageEntry imgs - -- adjust contents to add sectPr from reference.docx - parsedDoc <- parseXml refArchive distArchive "word/document.xml" - let wname f qn = qPrefix qn == Just "w" && f (qName qn) - let mbsectpr = filterElementName (wname (=="sectPr")) parsedDoc - - let sectpr = fromMaybe (mknode "w:sectPr" [] ()) mbsectpr - let stdAttributes = [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main") ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math") @@ -186,9 +272,6 @@ writeDocx opts doc@(Pandoc meta _) = do ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture") ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")] - let contents' = contents ++ [sectpr] - let docContents = mknode "w:document" stdAttributes - $ mknode "w:body" [] contents' parsedRels <- parseXml refArchive distArchive "word/_rels/document.xml.rels" let isHeaderNode e = findAttr (QName "Type" Nothing Nothing) e == Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header" @@ -255,7 +338,7 @@ writeDocx opts doc@(Pandoc meta _) = do [("Type",url') ,("Id",id') ,("Target",target')] () - let baserels = map toBaseRel + let baserels' = map toBaseRel [("http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering", "rId1", "numbering.xml") @@ -277,8 +360,12 @@ writeDocx opts doc@(Pandoc meta _) = do ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes", "rId7", "footnotes.xml") - ] ++ - headers ++ footers + ] + + let idMap = renumIdMap (length baserels' + 1) (headers ++ footers) + let renumHeaders = renumIds (\q -> qName q == "Id") idMap headers + let renumFooters = renumIds (\q -> qName q == "Id") idMap footers + let baserels = baserels' ++ renumHeaders ++ renumFooters let toImgRel (ident,path,_,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] () let imgrels = map toImgRel imgs let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] () @@ -288,6 +375,23 @@ writeDocx opts doc@(Pandoc meta _) = do $ renderXml reldoc + -- adjust contents to add sectPr from reference.docx + let sectpr = case mbsectpr of + Just sectpr' -> let cs = renumIds + (\q -> qName q == "id" && qPrefix q == Just "r") + idMap + (elChildren sectpr') + in + add_attrs (elAttribs sectpr') $ mknode "w:sectPr" [] cs + Nothing -> (mknode "w:sectPr" [] ()) + + -- let sectpr = fromMaybe (mknode "w:sectPr" [] ()) mbsectpr' + let contents' = contents ++ [sectpr] + let docContents = mknode "w:document" stdAttributes + $ mknode "w:body" [] contents' + + + -- word/document.xml let contentEntry = toEntry "word/document.xml" epochtime $ renderXml docContents @@ -302,11 +406,18 @@ writeDocx opts doc@(Pandoc meta _) = do linkrels -- styles - let newstyles = styleToOpenXml $ writerHighlightStyle opts - let stylepath = "word/styles.xml" - styledoc <- parseXml refArchive distArchive stylepath - let styledoc' = styledoc{ elContent = elContent styledoc ++ - [Elem x | x <- newstyles, writerHighlight opts] } + let newstyles = styleToOpenXml styleMaps $ writerHighlightStyle opts + let styledoc' = styledoc{ elContent = modifyContent (elContent styledoc) } + where + modifyContent + | writerHighlight opts = (++ map Elem newstyles) + | otherwise = filter notTokStyle + notTokStyle (Elem el) = notStyle el || notTokId el + notTokStyle _ = True + notStyle = (/= elemName' "style") . elName + notTokId = maybe True (`notElem` tokStys) . findAttr (elemName' "styleId") + tokStys = "SourceCode" : map show (enumFromTo KeywordTok NormalTok) + elemName' = elemName (sNameSpaces styleMaps) "w" let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' -- construct word/numbering.xml @@ -351,6 +462,17 @@ writeDocx opts doc@(Pandoc meta _) = do ] let relsEntry = toEntry relsPath epochtime $ renderXml rels + -- we use dist archive for settings.xml, because Word sometimes + -- adds references to footnotes or endnotes we don't have... + -- we do, however, copy some settings over from reference + let settingsPath = "word/settings.xml" + settingsList = [ "w:autoHyphenation" + , "w:consecutiveHyphenLimit" + , "w:hyphenationZone" + , "w:doNotHyphenateCap" + ] + settingsEntry <- copyChildren refArchive distArchive settingsPath epochtime settingsList + let entryFromArchive arch path = maybe (fail $ path ++ " corrupt or missing in reference docx") return @@ -358,9 +480,6 @@ writeDocx opts doc@(Pandoc meta _) = do docPropsAppEntry <- entryFromArchive refArchive "docProps/app.xml" themeEntry <- entryFromArchive refArchive "word/theme/theme1.xml" fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml" - -- we use dist archive for settings.xml, because Word sometimes - -- adds references to footnotes or endnotes we don't have... - settingsEntry <- entryFromArchive distArchive "word/settings.xml" webSettingsEntry <- entryFromArchive refArchive "word/webSettings.xml" headerFooterEntries <- mapM (entryFromArchive refArchive) $ mapMaybe (fmap ("word/" ++) . extractTarget) @@ -383,10 +502,13 @@ writeDocx opts doc@(Pandoc meta _) = do miscRelEntries ++ otherMediaEntries return $ fromArchive archive -styleToOpenXml :: Style -> [Element] -styleToOpenXml style = parStyle : map toStyle alltoktypes +styleToOpenXml :: StyleMaps -> Style -> [Element] +styleToOpenXml sm style = + maybeToList parStyle ++ mapMaybe toStyle alltoktypes where alltoktypes = enumFromTo KeywordTok NormalTok - toStyle toktype = mknode "w:style" [("w:type","character"), + toStyle toktype | hasStyleName (show toktype) (sCharStyleMap sm) = Nothing + | otherwise = Just $ + mknode "w:style" [("w:type","character"), ("w:customStyle","1"),("w:styleId",show toktype)] [ mknode "w:name" [("w:val",show toktype)] () , mknode "w:basedOn" [("w:val","VerbatimChar")] () @@ -407,17 +529,35 @@ styleToOpenXml style = parStyle : map toStyle alltoktypes tokBg toktype = maybe "auto" (drop 1 . fromColor) $ (tokenBackground =<< lookup toktype tokStyles) `mplus` backgroundColor style - parStyle = mknode "w:style" [("w:type","paragraph"), + parStyle | hasStyleName "Source Code" (sParaStyleMap sm) = Nothing + | otherwise = Just $ + mknode "w:style" [("w:type","paragraph"), ("w:customStyle","1"),("w:styleId","SourceCode")] [ mknode "w:name" [("w:val","Source Code")] () , mknode "w:basedOn" [("w:val","Normal")] () , mknode "w:link" [("w:val","VerbatimChar")] () , mknode "w:pPr" [] $ mknode "w:wordWrap" [("w:val","off")] () + : mknode "w:noProof" [] () : ( maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()]) $ backgroundColor style ) ] +copyChildren :: Archive -> Archive -> String -> Integer -> [String] -> IO Entry +copyChildren refArchive distArchive path timestamp elNames = do + ref <- parseXml refArchive distArchive path + dist <- parseXml distArchive distArchive path + return $ toEntry path timestamp $ renderXml dist{ + elContent = elContent dist ++ copyContent ref + } + where + strName QName{qName=name, qPrefix=prefix} + | Just p <- prefix = p++":"++name + | otherwise = name + shouldCopy = (`elem` elNames) . strName + cleanElem el@Element{elName=name} = Elem el{elName=name{qURI=Nothing}} + copyContent = map cleanElem . filterChildrenName shouldCopy + -- this is the lowest number used for a list numId baseListId :: Int baseListId = 1000 @@ -495,6 +635,34 @@ mkLvl marker lvl = getNumId :: WS Int getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists +makeTOC :: WriterOptions -> WS [Element] +makeTOC opts | writerTableOfContents opts = do + let depth = "1-"++(show (writerTOCDepth opts)) + let tocCmd = "TOC \\o \""++depth++"\" \\h \\z \\u" + tocTitle <- gets stTocTitle + title <- withParaPropM (pStyleM "TOC Heading") (blocksToOpenXML opts [Para tocTitle]) + return $ + [mknode "w:sdt" [] ([ + mknode "w:sdtPr" [] ( + mknode "w:docPartObj" [] ( + [mknode "w:docPartGallery" [("w:val","Table of Contents")] (), + mknode "w:docPartUnique" [] ()] + ) -- w:docPartObj + ), -- w:sdtPr + mknode "w:sdtContent" [] (title++[ + mknode "w:p" [] ( + mknode "w:r" [] ([ + mknode "w:fldChar" [("w:fldCharType","begin"),("w:dirty","true")] (), + mknode "w:instrText" [("xml:space","preserve")] tocCmd, + mknode "w:fldChar" [("w:fldCharType","separate")] (), + mknode "w:fldChar" [("w:fldCharType","end")] () + ]) -- w:r + ) -- w:p + ]) + ])] -- w:sdt +makeTOC _ = return [] + + -- | Convert Pandoc document to two lists of -- OpenXML elements (the main document and footnotes). writeOpenXML :: WriterOptions -> Pandoc -> WS ([Element], [Element]) @@ -513,32 +681,45 @@ writeOpenXML opts (Pandoc meta blocks) = do Just (MetaBlocks [Para xs]) -> xs Just (MetaInlines xs) -> xs _ -> [] - title <- withParaProp (pStyle "Title") $ blocksToOpenXML opts [Para tit | not (null tit)] - subtitle <- withParaProp (pStyle "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')] - authors <- withParaProp (pStyle "Author") $ blocksToOpenXML opts $ + title <- withParaPropM (pStyleM "Title") $ blocksToOpenXML opts [Para tit | not (null tit)] + subtitle <- withParaPropM (pStyleM "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')] + authors <- withParaProp (pCustomStyle "Author") $ blocksToOpenXML opts $ map Para auths - date <- withParaProp (pStyle "Date") $ blocksToOpenXML opts [Para dat | not (null dat)] + date <- withParaPropM (pStyleM "Date") $ blocksToOpenXML opts [Para dat | not (null dat)] abstract <- if null abstract' then return [] - else withParaProp (pStyle "Abstract") $ blocksToOpenXML opts abstract' + else withParaProp (pCustomStyle "Abstract") $ blocksToOpenXML opts abstract' let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs convertSpace xs = xs let blocks' = bottomUp convertSpace blocks - doc' <- blocksToOpenXML opts blocks' + doc' <- (setFirstPara >> blocksToOpenXML opts blocks') notes' <- reverse `fmap` gets stFootnotes - let meta' = title ++ subtitle ++ authors ++ date ++ abstract + toc <- makeTOC opts + let meta' = title ++ subtitle ++ authors ++ date ++ abstract ++ toc return (meta' ++ doc', notes') -- | Convert a list of Pandoc blocks to OpenXML. blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element] blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls -pStyle :: String -> Element -pStyle sty = mknode "w:pStyle" [("w:val",sty)] () +pCustomStyle :: String -> Element +pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] () + +pStyleM :: String -> WS XML.Element +pStyleM styleName = do + styleMaps <- gets stStyleMaps + let sty' = getStyleId styleName $ sParaStyleMap styleMaps + return $ mknode "w:pStyle" [("w:val",sty')] () + +rCustomStyle :: String -> Element +rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] () -rStyle :: String -> Element -rStyle sty = mknode "w:rStyle" [("w:val",sty)] () +rStyleM :: String -> WS XML.Element +rStyleM styleName = do + styleMaps <- gets stStyleMaps + let sty' = getStyleId styleName $ sCharStyleMap styleMaps + return $ mknode "w:rStyle" [("w:val",sty')] () getUniqueId :: MonadIO m => m String -- the + 20 is to ensure that there are no clashes with the rIds @@ -552,12 +733,13 @@ blockToOpenXML opts (Div (_,["references"],_) bs) = do let (hs, bs') = span isHeaderBlock bs header <- blocksToOpenXML opts hs -- We put the Bibliography style on paragraphs after the header - rest <- withParaProp (pStyle "Bibliography") $ blocksToOpenXML opts bs' + rest <- withParaPropM (pStyleM "Bibliography") $ blocksToOpenXML opts bs' return (header ++ rest) blockToOpenXML opts (Div _ bs) = blocksToOpenXML opts bs blockToOpenXML opts (Header lev (ident,_,_) lst) = do - paraProps <- withParaProp (pStyle $ "Heading" ++ show lev) $ - getParaProps False + setFirstPara + paraProps <- withParaPropM (pStyleM ("Heading "++show lev)) $ + getParaProps False contents <- inlinesToOpenXML opts lst usedIdents <- gets stSectionIds let bookmarkName = if null ident @@ -569,40 +751,60 @@ blockToOpenXML opts (Header lev (ident,_,_) lst) = do ,("w:name",bookmarkName)] () let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () return [mknode "w:p" [] (paraProps ++ [bookmarkStart, bookmarkEnd] ++ contents)] -blockToOpenXML opts (Plain lst) = withParaProp (pStyle "Compact") +blockToOpenXML opts (Plain lst) = withParaProp (pCustomStyle "Compact") $ blockToOpenXML opts (Para lst) -- title beginning with fig: indicates that the image is a figure blockToOpenXML opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = do + setFirstPara + pushParaProp $ pCustomStyle $ + if null alt + then "Figure" + else "FigureWithCaption" paraProps <- getParaProps False + popParaProp contents <- inlinesToOpenXML opts [Image alt (src,tit)] - captionNode <- withParaProp (pStyle "ImageCaption") + captionNode <- withParaProp (pCustomStyle "ImageCaption") $ blockToOpenXML opts (Para alt) return $ mknode "w:p" [] (paraProps ++ contents) : captionNode -- fixDisplayMath sometimes produces a Para [] as artifact blockToOpenXML _ (Para []) = return [] blockToOpenXML opts (Para lst) = do - paraProps <- getParaProps $ case lst of - [Math DisplayMath _] -> True - _ -> False - contents <- inlinesToOpenXML opts lst - return [mknode "w:p" [] (paraProps ++ contents)] + isFirstPara <- gets stFirstPara + paraProps <- getParaProps $ case lst of + [Math DisplayMath _] -> True + _ -> False + bodyTextStyle <- pStyleM "Body Text" + let paraProps' = case paraProps of + [] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]] + [] -> [mknode "w:pPr" [] [bodyTextStyle]] + ps -> ps + modify $ \s -> s { stFirstPara = False } + contents <- inlinesToOpenXML opts lst + return [mknode "w:p" [] (paraProps' ++ contents)] blockToOpenXML _ (RawBlock format str) | format == Format "openxml" = return [ x | Elem x <- parseXML str ] | otherwise = return [] -blockToOpenXML opts (BlockQuote blocks) = - withParaProp (pStyle "BlockQuote") $ blocksToOpenXML opts blocks -blockToOpenXML opts (CodeBlock attrs str) = - withParaProp (pStyle "SourceCode") $ blockToOpenXML opts $ Para [Code attrs str] -blockToOpenXML _ HorizontalRule = return [ - mknode "w:p" [] $ mknode "w:r" [] $ mknode "w:pict" [] +blockToOpenXML opts (BlockQuote blocks) = do + p <- withParaPropM (pStyleM "Block Text") $ blocksToOpenXML opts blocks + setFirstPara + return p +blockToOpenXML opts (CodeBlock attrs str) = do + p <- withParaProp (pCustomStyle "SourceCode") (blockToOpenXML opts $ Para [Code attrs str]) + setFirstPara + return p +blockToOpenXML _ HorizontalRule = do + setFirstPara + return [ + mknode "w:p" [] $ mknode "w:r" [] $ mknode "w:pict" [] $ mknode "v:rect" [("style","width:0;height:1.5pt"), ("o:hralign","center"), ("o:hrstd","t"),("o:hr","t")] () ] blockToOpenXML opts (Table caption aligns widths headers rows) = do + setFirstPara let captionStr = stringify caption caption' <- if null caption then return [] - else withParaProp (pStyle "TableCaption") + else withParaProp (pCustomStyle "TableCaption") $ blockToOpenXML opts (Para caption) let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] () let cellToOpenXML (al, cell) = withParaProp (alignmentFor al) @@ -613,51 +815,62 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do [ mknode "w:tcBorders" [] $ mknode "w:bottom" [("w:val","single")] () , mknode "w:vAlign" [("w:val","bottom")] () ] - let emptyCell = [mknode "w:p" [] [mknode "w:pPr" [] - [mknode "w:pStyle" [("w:val","Compact")] ()]]] + let emptyCell = [mknode "w:p" [] [pCustomStyle "Compact"]] let mkcell border contents = mknode "w:tc" [] $ [ borderProps | border ] ++ if null contents then emptyCell else contents - let mkrow border cells = mknode "w:tr" [] $ map (mkcell border) cells + let mkrow border cells = mknode "w:tr" [] $ + [mknode "w:trPr" [] [ + mknode "w:cnfStyle" [("w:firstRow","1")] ()] | border] + ++ map (mkcell border) cells let textwidth = 7920 -- 5.5 in in twips, 1/20 pt let fullrow = 5000 -- 100% specified in pct let rowwidth = fullrow * sum widths let mkgridcol w = mknode "w:gridCol" [("w:w", show (floor (textwidth * w) :: Integer))] () + let hasHeader = not (all null headers) return $ - mknode "w:tbl" [] + caption' ++ + [mknode "w:tbl" [] ( mknode "w:tblPr" [] ( mknode "w:tblStyle" [("w:val","TableNormal")] () : mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () : + mknode "w:tblLook" [("w:firstRow","1") | hasHeader ] () : [ mknode "w:tblCaption" [("w:val", captionStr)] () | not (null caption) ] ) : mknode "w:tblGrid" [] (if all (==0) widths then [] else map mkgridcol widths) - : [ mkrow True headers' | not (all null headers) ] ++ + : [ mkrow True headers' | hasHeader ] ++ map (mkrow False) rows' - ) : caption' + )] blockToOpenXML opts (BulletList lst) = do let marker = BulletMarker addList marker numid <- getNumId - asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst + l <- asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst + setFirstPara + return l blockToOpenXML opts (OrderedList (start, numstyle, numdelim) lst) = do let marker = NumberMarker numstyle numdelim start addList marker numid <- getNumId - asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst -blockToOpenXML opts (DefinitionList items) = - concat `fmap` mapM (definitionListItemToOpenXML opts) items + l <- asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst + setFirstPara + return l +blockToOpenXML opts (DefinitionList items) = do + l <- concat `fmap` mapM (definitionListItemToOpenXML opts) items + setFirstPara + return l definitionListItemToOpenXML :: WriterOptions -> ([Inline],[[Block]]) -> WS [Element] definitionListItemToOpenXML opts (term,defs) = do - term' <- withParaProp (pStyle "DefinitionTerm") + term' <- withParaProp (pCustomStyle "DefinitionTerm") $ blockToOpenXML opts (Para term) - defs' <- withParaProp (pStyle "Definition") + defs' <- withParaProp (pCustomStyle "Definition") $ concat `fmap` mapM (blocksToOpenXML opts) defs return $ term' ++ defs' @@ -721,6 +934,9 @@ withTextProp d p = do popTextProp return res +withTextPropM :: WS Element -> WS a -> WS a +withTextPropM = (. flip withTextProp) . (>>=) + getParaProps :: Bool -> WS [Element] getParaProps displayMathPara = do props <- gets stParaProperties @@ -749,6 +965,9 @@ withParaProp d p = do popParaProp return res +withParaPropM :: WS Element -> WS a -> WS a +withParaPropM = (. flip withParaProp) . (>>=) + formattedString :: String -> WS [Element] formattedString str = do props <- getTextProps @@ -758,6 +977,9 @@ formattedString str = do [ mknode (if inDel then "w:delText" else "w:t") [("xml:space","preserve")] str ] ] +setFirstPara :: WS () +setFirstPara = modify $ \s -> s { stFirstPara = True } + -- | Convert an inline element to OpenXML. inlineToOpenXML :: WriterOptions -> Inline -> WS [Element] inlineToOpenXML _ (Str str) = formattedString str @@ -828,25 +1050,26 @@ inlineToOpenXML opts (Math mathType str) = do Right r -> return [r] Left _ -> inlinesToOpenXML opts (texMathToInlines mathType str) inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst -inlineToOpenXML opts (Code attrs str) = - withTextProp (rStyle "VerbatimChar") - $ if writerHighlight opts - then case highlight formatOpenXML attrs str of - Nothing -> unhighlighted - Just h -> return h - else unhighlighted - where unhighlighted = intercalate [br] `fmap` - (mapM formattedString $ lines str) - formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) - toHlTok (toktype,tok) = mknode "w:r" [] - [ mknode "w:rPr" [] - [ rStyle $ show toktype ] - , mknode "w:t" [("xml:space","preserve")] tok ] +inlineToOpenXML opts (Code attrs str) = do + let unhighlighted = intercalate [br] `fmap` + (mapM formattedString $ lines str) + formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) + toHlTok (toktype,tok) = mknode "w:r" [] + [ mknode "w:rPr" [] + [ rCustomStyle (show toktype) ] + , mknode "w:t" [("xml:space","preserve")] tok ] + withTextProp (rCustomStyle "VerbatimChar") + $ if writerHighlight opts + then case highlight formatOpenXML attrs str of + Nothing -> unhighlighted + Just h -> return h + else unhighlighted inlineToOpenXML opts (Note bs) = do notes <- gets stFootnotes notenum <- getUniqueId + footnoteStyle <- rStyleM "Footnote Reference" let notemarker = mknode "w:r" [] - [ mknode "w:rPr" [] (rStyle "FootnoteRef") + [ mknode "w:rPr" [] footnoteStyle , mknode "w:footnoteRef" [] () ] let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : ils) : xs @@ -856,22 +1079,22 @@ inlineToOpenXML opts (Note bs) = do oldParaProperties <- gets stParaProperties oldTextProperties <- gets stTextProperties modify $ \st -> st{ stListLevel = -1, stParaProperties = [], stTextProperties = [] } - contents <- withParaProp (pStyle "FootnoteText") $ blocksToOpenXML opts + contents <- withParaPropM (pStyleM "Footnote Text") $ blocksToOpenXML opts $ insertNoteRef bs modify $ \st -> st{ stListLevel = oldListLevel, stParaProperties = oldParaProperties, stTextProperties = oldTextProperties } let newnote = mknode "w:footnote" [("w:id", notenum)] $ contents modify $ \s -> s{ stFootnotes = newnote : notes } return [ mknode "w:r" [] - [ mknode "w:rPr" [] (rStyle "FootnoteRef") + [ mknode "w:rPr" [] footnoteStyle , mknode "w:footnoteReference" [("w:id", notenum)] () ] ] -- internal link: inlineToOpenXML opts (Link txt ('#':xs,_)) = do - contents <- withTextProp (rStyle "Link") $ inlinesToOpenXML opts txt + contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt return [ mknode "w:hyperlink" [("w:anchor",xs)] contents ] -- external link: inlineToOpenXML opts (Link txt (src,_)) = do - contents <- withTextProp (rStyle "Link") $ inlinesToOpenXML opts txt + contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt extlinks <- gets stExternalLinks id' <- case M.lookup src extlinks of Just i -> return i @@ -883,6 +1106,7 @@ inlineToOpenXML opts (Link txt (src,_)) = do return [ mknode "w:hyperlink" [("r:id",id')] contents ] inlineToOpenXML opts (Image alt (src, tit)) = do -- first, check to see if we've already done this image + pageWidth <- gets stPrintWidth imgs <- gets stImages case M.lookup src imgs of Just (_,_,_,elt,_) -> return [elt] @@ -899,7 +1123,7 @@ inlineToOpenXML opts (Image alt (src, tit)) = do let size = imageSize img let (xpt,ypt) = maybe (120,120) sizeInPoints size -- 12700 emu = 1 pt - let (xemu,yemu) = fitToPage (xpt * 12700, ypt * 12700) + let (xemu,yemu) = fitToPage (xpt * 12700, ypt * 12700) (pageWidth * 12700) let cNvPicPr = mknode "pic:cNvPicPr" [] $ mknode "a:picLocks" [("noChangeArrowheads","1"),("noChangeAspect","1")] () let nvPicPr = mknode "pic:nvPicPr" [] @@ -957,6 +1181,22 @@ inlineToOpenXML opts (Image alt (src, tit)) = do br :: Element br = mknode "w:r" [] [mknode "w:br" [("w:type","textWrapping")] () ] +-- Word will insert these footnotes into the settings.xml file +-- (whether or not they're visible in the document). If they're in the +-- file, but not in the footnotes.xml file, it will produce +-- problems. So we want to make sure we insert them into our document. +defaultFootnotes :: [Element] +defaultFootnotes = [ mknode "w:footnote" + [("w:type", "separator"), ("w:id", "-1")] $ + [ mknode "w:p" [] $ + [mknode "w:r" [] $ + [ mknode "w:separator" [] ()]]] + , mknode "w:footnote" + [("w:type", "continuationSeparator"), ("w:id", "0")] $ + [ mknode "w:p" [] $ + [ mknode "w:r" [] $ + [ mknode "w:continuationSeparator" [] ()]]]] + parseXml :: Archive -> Archive -> String -> IO Element parseXml refArchive distArchive relpath = case ((findEntryByPath relpath refArchive `mplus` @@ -966,9 +1206,11 @@ parseXml refArchive distArchive relpath = Nothing -> fail $ relpath ++ " corrupt or missing in reference docx" -- | Scales the image to fit the page -fitToPage :: (Integer, Integer) -> (Integer, Integer) -fitToPage (x, y) - --5440680 is the emu width size of a letter page in portrait, minus the margins - | x > 5440680 = - (5440680, round $ (5440680 / ((fromIntegral :: Integer -> Double) x)) * (fromIntegral y)) +-- sizes are passed in emu +fitToPage :: (Integer, Integer) -> Integer -> (Integer, Integer) +fitToPage (x, y) pageWidth + -- Fixes width to the page width and scales the height + | x > pageWidth = + (pageWidth, round $ + ((fromIntegral pageWidth) / ((fromIntegral :: Integer -> Double) x)) * (fromIntegral y)) | otherwise = (x, y) diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs index 8c1d360aa..17ff8a279 100644 --- a/src/Text/Pandoc/Writers/DokuWiki.hs +++ b/src/Text/Pandoc/Writers/DokuWiki.hs @@ -134,7 +134,9 @@ blockToDokuWiki opts (Para [Image txt (src,'f':'i':'g':':':tit)]) = do let opt = if null txt then "" else "|" ++ if null tit then capt else tit ++ capt - return $ "{{:" ++ src ++ opt ++ "}}\n" + -- Relative links fail isURI and receive a colon + prefix = if isURI src then "" else ":" + return $ "{{" ++ prefix ++ src ++ opt ++ "}}\n" blockToDokuWiki opts (Para inlines) = do indent <- stIndent <$> ask @@ -178,7 +180,7 @@ blockToDokuWiki _ (CodeBlock (_,classes,_) str) = do blockToDokuWiki opts (BlockQuote blocks) = do contents <- blockListToDokuWiki opts blocks if isSimpleBlockQuote blocks - then return $ "> " ++ contents + then return $ unlines $ map ("> " ++) $ lines contents else return $ "<HTML><blockquote>\n" ++ contents ++ "</blockquote></HTML>" blockToDokuWiki opts (Table capt aligns _ headers rows) = do @@ -352,9 +354,7 @@ isPlainOrPara (Para _) = True isPlainOrPara _ = False isSimpleBlockQuote :: [Block] -> Bool -isSimpleBlockQuote [BlockQuote bs] = isSimpleBlockQuote bs -isSimpleBlockQuote [b] = isPlainOrPara b -isSimpleBlockQuote _ = False +isSimpleBlockQuote bs = all isPlainOrPara bs -- | Concatenates strings with line breaks between them. vcat :: [String] -> String @@ -459,7 +459,7 @@ inlineToDokuWiki _ (RawInline f str) | f == Format "html" = return $ "<html>" ++ str ++ "</html>" | otherwise = return "" -inlineToDokuWiki _ (LineBreak) = return "\\\\ " +inlineToDokuWiki _ (LineBreak) = return "\\\\\n" inlineToDokuWiki _ Space = return " " @@ -480,7 +480,9 @@ inlineToDokuWiki opts (Image alt (source, tit)) = do ("", []) -> "" ("", _ ) -> "|" ++ alt' (_ , _ ) -> "|" ++ tit - return $ "{{:" ++ source ++ txt ++ "}}" + -- Relative links fail isURI and receive a colon + prefix = if isURI source then "" else ":" + return $ "{{" ++ prefix ++ source ++ txt ++ "}}" inlineToDokuWiki opts (Note contents) = do contents' <- blockListToDokuWiki opts contents diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index e4f2d1335..29ea44e02 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables, ViewPatterns #-} +{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables, ViewPatterns, FlexibleContexts #-} {- Copyright (C) 2010-2014 John MacFarlane <jgm@berkeley.edu> @@ -35,16 +35,17 @@ import Data.Maybe ( fromMaybe ) import Data.List ( isPrefixOf, isInfixOf, intercalate ) import System.Environment ( getEnv ) import Text.Printf (printf) -import System.FilePath ( (</>), takeExtension, takeFileName ) +import System.FilePath ( takeExtension, takeFileName ) +import System.FilePath.Glob ( namesMatching ) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as B8 import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.SelfContained ( makeSelfContained ) import Codec.Archive.Zip ( emptyArchive, addEntryToArchive, eRelativePath, fromEntry , Entry, toEntry, fromArchive) -import Control.Applicative ((<$>)) +import Control.Applicative ((<$>), (<$)) import Data.Time.Clock.POSIX ( getPOSIXTime ) import Data.Time (getCurrentTime,UTCTime, formatTime) -import System.Locale ( defaultTimeLocale ) +import Text.Pandoc.Compat.Locale ( defaultTimeLocale ) import Text.Pandoc.Shared ( trimr, renderTags', safeRead, uniqueIdent, trim , normalizeDate, readDataFile, stringify, warn , hierarchicalize, fetchItem' ) @@ -57,14 +58,13 @@ import Text.Pandoc.Options ( WriterOptions(..) import Text.Pandoc.Definition import Text.Pandoc.Walk (walk, walkM) import Control.Monad.State (modify, get, execState, State, put, evalState) -import Control.Monad (foldM, when, mplus, liftM) +import Control.Monad (foldM, mplus, liftM, when) import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs , strContent, lookupAttr, Node(..), QName(..), parseXML , onlyElems, node, ppElement) import Text.Pandoc.UUID (getRandomUUID) import Text.Pandoc.Writers.HTML (writeHtmlString, writeHtml) import Data.Char ( toLower, isDigit, isAlphaNum ) -import Network.URI ( unEscapeString ) import Text.Pandoc.MIME (MimeType, getMimeType) import qualified Control.Exception as E import Text.Blaze.Html.Renderer.Utf8 (renderHtml) @@ -344,7 +344,6 @@ writeEPUB opts doc@(Pandoc meta _) = do , writerStandalone = True , writerSectionDivs = True , writerHtml5 = epub3 - , writerTableOfContents = False -- we always have one in epub , writerVariables = vars , writerHTMLMathMethod = if epub3 @@ -359,8 +358,9 @@ writeEPUB opts doc@(Pandoc meta _) = do Nothing -> return ([],[]) Just img -> do let coverImage = "media/" ++ takeFileName img - let cpContent = renderHtml $ writeHtml opts' - (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"]) + let cpContent = renderHtml $ writeHtml + opts'{ writerVariables = ("coverpage","true"):vars } + (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"]) imgContent <- B.readFile img return ( [mkEntry "cover.xhtml" cpContent] , [mkEntry coverImage imgContent] ) @@ -388,8 +388,14 @@ writeEPUB opts doc@(Pandoc meta _) = do picEntries <- foldM readPicEntry [] pics -- handle fonts + let matchingGlob f = do + xs <- namesMatching f + when (null xs) $ + warn $ f ++ " did not match any font files." + return xs let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f - fontEntries <- mapM mkFontEntry $ writerEpubFonts opts' + fontFiles <- concat <$> mapM matchingGlob (writerEpubFonts opts') + fontEntries <- mapM mkFontEntry fontFiles -- set page progression direction attribution let progressionDirection = case epubPageDirection metadata of @@ -489,6 +495,9 @@ writeEPUB opts doc@(Pandoc meta _) = do [] -> "UNTITLED" (x:_) -> titleText x x -> stringify x + + let tocTitle = fromMaybe plainTitle $ + metaValueToString <$> lookupMeta "toc-title" meta let uuid = case epubIdentifier metadata of (x:_) -> identifierText x -- use first identifier as UUID [] -> error "epubIdentifier is null" -- shouldn't happen @@ -528,14 +537,12 @@ writeEPUB opts doc@(Pandoc meta _) = do case lookupMeta "title" meta of Just _ -> "yes" Nothing -> "no")] $ ()) : - (unode "itemref" ! [("idref", "nav") - ,("linear", if writerTableOfContents opts - then "yes" - else "no")] $ ()) : + [unode "itemref" ! [("idref", "nav")] $ () + | writerTableOfContents opts ] ++ map chapterRefNode chapterEntries) , unode "guide" $ [ unode "reference" ! - [("type","toc"),("title",plainTitle), + [("type","toc"),("title", tocTitle), ("href","nav.xhtml")] $ () ] ++ [ unode "reference" ! @@ -572,8 +579,7 @@ writeEPUB opts doc@(Pandoc meta _) = do let navMapFormatter :: Int -> String -> String -> [Element] -> Element navMapFormatter n tit src subs = unode "navPoint" ! - [("id", "navPoint-" ++ show n) - ,("playOrder", show n)] $ + [("id", "navPoint-" ++ show n)] $ [ unode "navLabel" $ unode "text" tit , unode "content" ! [("src", src)] $ () ] ++ subs @@ -598,7 +604,7 @@ writeEPUB opts doc@(Pandoc meta _) = do Nothing -> [] Just img -> [unode "meta" ! [("name","cover"), ("content", toId img)] $ ()] - , unode "docTitle'" $ unode "text" $ plainTitle + , unode "docTitle" $ unode "text" $ plainTitle , unode "navMap" $ tpNode : evalState (mapM (navPointNode navMapFormatter) secs) 1 ] @@ -614,17 +620,35 @@ writeEPUB opts doc@(Pandoc meta _) = do (_:_) -> [unode "ol" ! [("class","toc")] $ subs] let navtag = if epub3 then "nav" else "div" - let navData = UTF8.fromStringLazy $ ppTopElement $ - unode "html" ! [("xmlns","http://www.w3.org/1999/xhtml") - ,("xmlns:epub","http://www.idpf.org/2007/ops")] $ - [ unode "head" $ - [ unode "title" plainTitle - , unode "link" ! [("rel","stylesheet"),("type","text/css"),("href","stylesheet.css")] $ () ] - , unode "body" $ - unode navtag ! [("epub:type","toc") | epub3] $ - [ unode "h1" ! [("id","toc-title")] $ plainTitle - , unode "ol" ! [("class","toc")] $ evalState (mapM (navPointNode navXhtmlFormatter) secs) 1] - ] + let navBlocks = [RawBlock (Format "html") $ ppElement $ + unode navtag ! ([("epub:type","toc") | epub3] ++ + [("id","toc")]) $ + [ unode "h1" ! [("id","toc-title")] $ tocTitle + , unode "ol" ! [("class","toc")] $ evalState (mapM (navPointNode navXhtmlFormatter) secs) 1]] + let landmarks = if epub3 + then [RawBlock (Format "html") $ ppElement $ + unode "nav" ! [("epub:type","landmarks") + ,("hidden","hidden")] $ + [ unode "ol" $ + [ unode "li" + [ unode "a" ! [("href", "cover.xhtml") + ,("epub:type", "cover")] $ + "Cover"] | + epubCoverImage metadata /= Nothing + ] ++ + [ unode "li" + [ unode "a" ! [("href", "#toc") + ,("epub:type", "toc")] $ + "Table of contents" + ] | writerTableOfContents opts + ] + ] + ] + else [] + let navData = renderHtml $ writeHtml opts' + (Pandoc (setMeta "title" + (walk removeNote $ fromList $ docTitle' meta) nullMeta) + (navBlocks ++ landmarks)) let navEntry = mkEntry "nav.xhtml" navData -- mimetype @@ -767,23 +791,20 @@ metadataElement version md currentTime = showDateTimeISO8601 :: UTCTime -> String showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" -transformTag :: WriterOptions - -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media +transformTag :: IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media -> Tag String -> IO (Tag String) -transformTag opts mediaRef tag@(TagOpen name attr) +transformTag mediaRef tag@(TagOpen name attr) | name `elem` ["video", "source", "img", "audio"] = do let src = fromAttrib "src" tag let poster = fromAttrib "poster" tag - let oldsrc = maybe src (</> src) $ writerSourceURL opts - let oldposter = maybe poster (</> poster) $ writerSourceURL opts - newsrc <- modifyMediaRef mediaRef oldsrc - newposter <- modifyMediaRef mediaRef oldposter + newsrc <- modifyMediaRef mediaRef src + newposter <- modifyMediaRef mediaRef poster let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++ [("src", newsrc) | not (null newsrc)] ++ [("poster", newposter) | not (null newposter)] return $ TagOpen name attr' -transformTag _ _ tag = return tag +transformTag _ tag = return tag modifyMediaRef :: IORef [(FilePath, FilePath)] -> FilePath -> IO FilePath modifyMediaRef _ "" = return "" @@ -793,7 +814,7 @@ modifyMediaRef mediaRef oldsrc = do Just n -> return n Nothing -> do let new = "media/file" ++ show (length media) ++ - takeExtension oldsrc + takeExtension (takeWhile (/='?') oldsrc) -- remove query modifyIORef mediaRef ( (oldsrc, new): ) return new @@ -801,10 +822,10 @@ transformBlock :: WriterOptions -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media -> Block -> IO Block -transformBlock opts mediaRef (RawBlock fmt raw) +transformBlock _ mediaRef (RawBlock fmt raw) | fmt == Format "html" = do let tags = parseTags raw - tags' <- mapM (transformTag opts mediaRef) tags + tags' <- mapM (transformTag mediaRef) tags return $ RawBlock fmt (renderTags' tags') transformBlock _ _ b = return b @@ -812,19 +833,17 @@ transformInline :: WriterOptions -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media -> Inline -> IO Inline -transformInline opts mediaRef (Image lab (src,tit)) = do - let src' = unEscapeString src - let oldsrc = maybe src' (</> src) $ writerSourceURL opts - newsrc <- modifyMediaRef mediaRef oldsrc +transformInline _ mediaRef (Image lab (src,tit)) = do + newsrc <- modifyMediaRef mediaRef src return $ Image lab (newsrc, tit) transformInline opts _ (x@(Math _ _)) | WebTeX _ <- writerHTMLMathMethod opts = do raw <- makeSelfContained opts $ writeHtmlInline opts x return $ RawInline (Format "html") raw -transformInline opts mediaRef (RawInline fmt raw) +transformInline _ mediaRef (RawInline fmt raw) | fmt == Format "html" = do let tags = parseTags raw - tags' <- mapM (transformTag opts mediaRef) tags + tags' <- mapM (transformTag mediaRef) tags return $ RawInline fmt (renderTags' tags') transformInline _ _ x = return x @@ -885,20 +904,27 @@ addIdentifiers bs = evalState (mapM go bs) [] -- was "header-1" might turn into "ch006.xhtml#header". correlateRefs :: Int -> [Block] -> [(String,String)] correlateRefs chapterHeaderLevel bs = - identTable $ execState (mapM_ go bs) + identTable $ execState (walkM goBlock bs >>= walkM goInline) IdentState{ chapterNumber = 0 , identTable = [] } - where go :: Block -> State IdentState () - go (Header n (ident,_,_) _) = do - when (n <= chapterHeaderLevel) $ - modify $ \s -> s{ chapterNumber = chapterNumber s + 1 } + where goBlock :: Block -> State IdentState Block + goBlock x@(Header n (ident,_,_) _) = x <$ addIdentifier (Just n) ident + goBlock x@(Div (ident,_,_) _) = x <$ addIdentifier Nothing ident + goBlock x = return x + goInline :: Inline -> State IdentState Inline + goInline x@(Span (ident,_,_) _) = x <$ addIdentifier Nothing ident + goInline x = return x + addIdentifier mbHeaderLevel ident = do + case mbHeaderLevel of + Just n | n <= chapterHeaderLevel -> + modify $ \s -> s{ chapterNumber = chapterNumber s + 1 } + _ -> return () st <- get let chapterid = showChapter (chapterNumber st) ++ - if n <= chapterHeaderLevel - then "" - else '#' : ident + case mbHeaderLevel of + Just n | n <= chapterHeaderLevel -> "" + _ -> '#' : ident modify $ \s -> s{ identTable = (ident, chapterid) : identTable st } - go _ = return () -- Replace internal link references using the table produced -- by correlateRefs. diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 233b8b32b..31fa4bee8 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -85,7 +85,7 @@ writeFB2 opts (Pandoc meta blocks) = flip evalStateT newFB $ do (imgs,missing) <- liftM imagesToFetch get >>= \s -> liftIO (fetchImages s) let body' = replaceImagesWithAlt missing body let fb2_xml = el "FictionBook" (fb2_attrs, [desc, body'] ++ notes ++ imgs) - return $ xml_head ++ (showContent fb2_xml) + return $ xml_head ++ (showContent fb2_xml) ++ "\n" where xml_head = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" fb2_attrs = diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 9ead604d7..53dc931cc 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE OverloadedStrings, CPP #-} +{-# LANGUAGE OverloadedStrings, CPP, ViewPatterns, ScopedTypeVariables #-} {-# OPTIONS_GHC -fno-warn-deprecations #-} {- Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> @@ -49,7 +49,10 @@ import Data.String ( fromString ) import Data.Maybe ( catMaybes, fromMaybe ) import Control.Monad.State import Text.Blaze.Html hiding(contents) +#if MIN_VERSION_blaze_markup(0,6,3) +#else import Text.Blaze.Internal(preEscapedString) +#endif #if MIN_VERSION_blaze_html(0,5,1) import qualified Text.Blaze.XHtml5 as H5 #else @@ -60,6 +63,8 @@ import qualified Text.Blaze.XHtml1.Transitional.Attributes as A import Text.Blaze.Renderer.String (renderHtml) import Text.TeXMath import Text.XML.Light.Output +import Text.XML.Light (unode, elChildren, unqual) +import qualified Text.XML.Light as XML import System.FilePath (takeExtension) import Data.Monoid import Data.Aeson (Value) @@ -71,11 +76,13 @@ data WriterState = WriterState , stQuotes :: Bool -- ^ <q> tag is used , stHighlighting :: Bool -- ^ Syntax highlighting is used , stSecNum :: [Int] -- ^ Number of current section + , stElement :: Bool -- ^ Processing an Element } defaultWriterState :: WriterState defaultWriterState = WriterState {stNotes= [], stMath = False, stQuotes = False, - stHighlighting = False, stSecNum = []} + stHighlighting = False, stSecNum = [], + stElement = False} -- Helpers to render HTML with the appropriate function. @@ -155,6 +162,10 @@ pandocToHtml opts (Pandoc meta blocks) = do H.script ! A.src (toValue url) ! A.type_ "text/javascript" $ mempty + KaTeX js css -> + (H.script ! A.src (toValue js) $ mempty) <> + (H.link ! A.rel "stylesheet" ! A.href (toValue css)) <> + (H.script ! A.type_ "text/javascript" $ toHtml renderKaTeX) _ -> case lookup "mathml-script" (writerVariables opts) of Just s | not (writerHtml5 opts) -> H.script ! A.type_ "text/javascript" @@ -274,7 +285,13 @@ elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elemen let titleSlide = slide && level < slideLevel header' <- if title' == [Str "\0"] -- marker for hrule then return mempty - else blockToHtml opts (Header level' (id',classes,keyvals) title') + else do + modify (\st -> st{ stElement = True}) + res <- blockToHtml opts + (Header level' (id',classes,keyvals) title') + modify (\st -> st{ stElement = False}) + return res + let isSec (Sec _ _ _ _ _) = True isSec (Blk _) = False let isPause (Blk x) = x == Para [Str ".",Space,Str ".",Space,Str "."] @@ -342,10 +359,10 @@ parseMailto s = do _ -> fail "not a mailto: URL" -- | Obfuscate a "mailto:" link. -obfuscateLink :: WriterOptions -> String -> String -> Html +obfuscateLink :: WriterOptions -> Html -> String -> Html obfuscateLink opts txt s | writerEmailObfuscation opts == NoObfuscation = - H.a ! A.href (toValue s) $ toHtml txt -obfuscateLink opts txt s = + H.a ! A.href (toValue s) $ txt +obfuscateLink opts (renderHtml -> txt) s = let meth = writerEmailObfuscation opts s' = map toLower (take 7 s) ++ drop 7 s in case parseMailto s' of @@ -424,24 +441,30 @@ blockToHtml opts (Para [Image txt (s,'f':'i':'g':':':tit)]) = do then H5.figure $ mconcat [nl opts, img, capt, nl opts] else H.div ! A.class_ "figure" $ mconcat - [nl opts, img, capt, nl opts] + [nl opts, img, nl opts, capt, nl opts] blockToHtml opts (Para lst) = do contents <- inlineListToHtml opts lst return $ H.p contents blockToHtml opts (Div attr@(_,classes,_) bs) = do - contents <- blockListToHtml opts bs + let speakerNotes = "notes" `elem` classes + -- we don't want incremental output inside speaker notes, see #1394 + let opts' = if speakerNotes then opts{ writerIncremental = False } else opts + contents <- blockListToHtml opts' bs let contents' = nl opts >> contents >> nl opts return $ - if "notes" `elem` classes - then let opts' = opts{ writerIncremental = False } in - -- we don't want incremental output inside speaker notes - case writerSlideVariant opts of + if speakerNotes + then case writerSlideVariant opts of RevealJsSlides -> addAttrs opts' attr $ H5.aside $ contents' NoSlides -> addAttrs opts' attr $ H.div $ contents' _ -> mempty else addAttrs opts attr $ H.div $ contents' -blockToHtml _ (RawBlock f str) +blockToHtml opts (RawBlock f str) | f == Format "html" = return $ preEscapedString str + | f == Format "latex" = + case writerHTMLMathMethod opts of + MathJax _ -> do modify (\st -> st{ stMath = True }) + return $ toHtml str + _ -> return mempty | otherwise = return mempty blockToHtml opts (HorizontalRule) = return $ if writerHtml5 opts then H5.hr else H.hr blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do @@ -485,7 +508,7 @@ blockToHtml opts (BlockQuote blocks) = else do contents <- blockListToHtml opts blocks return $ H.blockquote $ nl opts >> contents >> nl opts -blockToHtml opts (Header level (_,classes,_) lst) = do +blockToHtml opts (Header level attr@(_,classes,_) lst) = do contents <- inlineListToHtml opts lst secnum <- liftM stSecNum get let contents' = if writerNumberSections opts && not (null secnum) @@ -493,7 +516,9 @@ blockToHtml opts (Header level (_,classes,_) lst) = do then (H.span ! A.class_ "header-section-number" $ toHtml $ showSecNum secnum) >> strToHtml " " >> contents else contents - return $ case level of + inElement <- gets stElement + return $ (if inElement then id else addAttrs opts attr) + $ case level of 1 -> H.h1 contents' 2 -> H.h2 contents' 3 -> H.h3 contents' @@ -506,7 +531,9 @@ blockToHtml opts (BulletList lst) = do return $ unordList opts contents blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do contents <- mapM (blockListToHtml opts) lst - let numstyle' = camelCaseToHyphenated $ show numstyle + let numstyle' = case numstyle of + Example -> "decimal" + _ -> camelCaseToHyphenated $ show numstyle let attribs = (if startnum /= 1 then [A.start $ toValue startnum] else []) ++ @@ -615,13 +642,28 @@ inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html inlineListToHtml opts lst = mapM (inlineToHtml opts) lst >>= return . mconcat +-- | Annotates a MathML expression with the tex source +annotateMML :: XML.Element -> String -> XML.Element +annotateMML e tex = math (unode "semantics" [cs, unode "annotation" (annotAttrs, tex)]) + where + cs = case elChildren e of + [] -> unode "mrow" () + [x] -> x + xs -> unode "mrow" xs + math childs = XML.Element q as [XML.Elem childs] l + where + (XML.Element q as _ l) = e + annotAttrs = [XML.Attr (unqual "encoding") "application/x-tex"] + + -- | Convert Pandoc inline element to HTML. inlineToHtml :: WriterOptions -> Inline -> State WriterState Html inlineToHtml opts inline = case inline of (Str str) -> return $ strToHtml str (Space) -> return $ strToHtml " " - (LineBreak) -> return $ if writerHtml5 opts then H5.br else H.br + (LineBreak) -> return $ (if writerHtml5 opts then H5.br else H.br) + <> strToHtml "\n" (Span (id',classes,kvs) ils) -> inlineListToHtml opts ils >>= return . addAttrs opts attr' . H.span @@ -669,69 +711,78 @@ inlineToHtml opts inline = H.q `fmap` inlineListToHtml opts lst else (\x -> leftQuote >> x >> rightQuote) `fmap` inlineListToHtml opts lst - (Math t str) -> modify (\st -> st {stMath = True}) >> - (case writerHTMLMathMethod opts of - LaTeXMathML _ -> - -- putting LaTeXMathML in container with class "LaTeX" prevents - -- non-math elements on the page from being treated as math by - -- the javascript - return $ H.span ! A.class_ "LaTeX" $ - case t of - InlineMath -> toHtml ("$" ++ str ++ "$") - DisplayMath -> toHtml ("$$" ++ str ++ "$$") - JsMath _ -> do - let m = preEscapedString str - return $ case t of - InlineMath -> H.span ! A.class_ "math" $ m - DisplayMath -> H.div ! A.class_ "math" $ m - WebTeX url -> do - let imtag = if writerHtml5 opts then H5.img else H.img - let m = imtag ! A.style "vertical-align:middle" - ! A.src (toValue $ url ++ urlEncode str) - ! A.alt (toValue str) - ! A.title (toValue str) - let brtag = if writerHtml5 opts then H5.br else H.br - return $ case t of - InlineMath -> m - DisplayMath -> brtag >> m >> brtag - GladTeX -> - return $ case t of - InlineMath -> preEscapedString $ "<EQ ENV=\"math\">" ++ str ++ "</EQ>" - DisplayMath -> preEscapedString $ "<EQ ENV=\"displaymath\">" ++ str ++ "</EQ>" - MathML _ -> do - let dt = if t == InlineMath - then DisplayInline - else DisplayBlock - let conf = useShortEmptyTags (const False) - defaultConfigPP - case writeMathML dt <$> readTeX str of - Right r -> return $ preEscapedString $ - ppcElement conf r - Left _ -> inlineListToHtml opts - (texMathToInlines t str) >>= - return . (H.span ! A.class_ "math") - MathJax _ -> return $ H.span ! A.class_ "math" $ toHtml $ - case t of - InlineMath -> "\\(" ++ str ++ "\\)" - DisplayMath -> "\\[" ++ str ++ "\\]" - PlainMath -> do - x <- inlineListToHtml opts (texMathToInlines t str) - let m = H.span ! A.class_ "math" $ x - let brtag = if writerHtml5 opts then H5.br else H.br - return $ case t of - InlineMath -> m - DisplayMath -> brtag >> m >> brtag ) + (Math t str) -> do + modify (\st -> st {stMath = True}) + let mathClass = toValue $ ("math " :: String) ++ + if t == InlineMath then "inline" else "display" + case writerHTMLMathMethod opts of + LaTeXMathML _ -> + -- putting LaTeXMathML in container with class "LaTeX" prevents + -- non-math elements on the page from being treated as math by + -- the javascript + return $ H.span ! A.class_ "LaTeX" $ + case t of + InlineMath -> toHtml ("$" ++ str ++ "$") + DisplayMath -> toHtml ("$$" ++ str ++ "$$") + JsMath _ -> do + let m = preEscapedString str + return $ case t of + InlineMath -> H.span ! A.class_ mathClass $ m + DisplayMath -> H.div ! A.class_ mathClass $ m + WebTeX url -> do + let imtag = if writerHtml5 opts then H5.img else H.img + let m = imtag ! A.style "vertical-align:middle" + ! A.src (toValue $ url ++ urlEncode str) + ! A.alt (toValue str) + ! A.title (toValue str) + let brtag = if writerHtml5 opts then H5.br else H.br + return $ case t of + InlineMath -> m + DisplayMath -> brtag >> m >> brtag + GladTeX -> + return $ case t of + InlineMath -> preEscapedString $ "<EQ ENV=\"math\">" ++ str ++ "</EQ>" + DisplayMath -> preEscapedString $ "<EQ ENV=\"displaymath\">" ++ str ++ "</EQ>" + MathML _ -> do + let dt = if t == InlineMath + then DisplayInline + else DisplayBlock + let conf = useShortEmptyTags (const False) + defaultConfigPP + case writeMathML dt <$> readTeX str of + Right r -> return $ preEscapedString $ + ppcElement conf (annotateMML r str) + Left _ -> inlineListToHtml opts + (texMathToInlines t str) >>= + return . (H.span ! A.class_ mathClass) + MathJax _ -> return $ H.span ! A.class_ mathClass $ toHtml $ + case t of + InlineMath -> "\\(" ++ str ++ "\\)" + DisplayMath -> "\\[" ++ str ++ "\\]" + KaTeX _ _ -> return $ H.span ! A.class_ mathClass $ + toHtml (case t of + InlineMath -> str + DisplayMath -> "\\displaystyle " ++ str) + PlainMath -> do + x <- inlineListToHtml opts (texMathToInlines t str) + let m = H.span ! A.class_ mathClass $ x + let brtag = if writerHtml5 opts then H5.br else H.br + return $ case t of + InlineMath -> m + DisplayMath -> brtag >> m >> brtag (RawInline f str) | f == Format "latex" -> case writerHTMLMathMethod opts of LaTeXMathML _ -> do modify (\st -> st {stMath = True}) return $ toHtml str + MathJax _ -> do modify (\st -> st {stMath = True}) + return $ toHtml str _ -> return mempty | f == Format "html" -> return $ preEscapedString str | otherwise -> return mempty (Link txt (s,_)) | "mailto:" `isPrefixOf` s -> do linkText <- inlineListToHtml opts txt - return $ obfuscateLink opts (renderHtml linkText) s + return $ obfuscateLink opts linkText s (Link txt (s,tit)) -> do linkText <- inlineListToHtml opts txt let s' = case s of @@ -746,22 +797,15 @@ inlineToHtml opts inline = then link' else link' ! A.title (toValue tit) (Image txt (s,tit)) | treatAsImage s -> do - let alternate' = stringify txt let attributes = [A.src $ toValue s] ++ - (if null tit - then [] - else [A.title $ toValue tit]) ++ - if null txt - then [] - else [A.alt $ toValue alternate'] + [A.title $ toValue tit | not $ null tit] ++ + [A.alt $ toValue $ stringify txt] let tag = if writerHtml5 opts then H5.img else H.img return $ foldl (!) tag attributes -- note: null title included, as in Markdown.pl (Image _ (s,tit)) -> do let attributes = [A.src $ toValue s] ++ - (if null tit - then [] - else [A.title $ toValue tit]) + [A.title $ toValue tit | not $ null tit] return $ foldl (!) H5.embed attributes -- note: null title included, as in Markdown.pl (Note contents) @@ -815,3 +859,14 @@ blockListToNote opts ref blocks = Just EPUB3 -> noteItem ! customAttribute "epub:type" "footnote" _ -> noteItem return $ nl opts >> noteItem' + +-- Javascript snippet to render all KaTeX elements +renderKaTeX :: String +renderKaTeX = unlines [ + "window.onload = function(){var mathElements = document.getElementsByClassName(\"math\");" + , "for (var i=0; i < mathElements.length; i++)" + , "{" + , " var texText = mathElements[i].firstChild" + , " katex.render(texText.data, mathElements[i])" + , "}}" + ] diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs index ae20efd4b..6af4b7aa3 100644 --- a/src/Text/Pandoc/Writers/ICML.hs +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE OverloadedStrings, FlexibleContexts #-} {- | Module : Text.Pandoc.Writers.ICML @@ -70,7 +70,6 @@ linkName = "Link" -- block element names (appear in InDesign's paragraph styles pane) paragraphName :: String codeBlockName :: String -rawBlockName :: String blockQuoteName :: String orderedListName :: String bulletListName :: String @@ -93,7 +92,6 @@ subListParName :: String footnoteName :: String paragraphName = "Paragraph" codeBlockName = "CodeBlock" -rawBlockName = "Rawblock" blockQuoteName = "Blockquote" orderedListName = "NumList" bulletListName = "BulList" @@ -278,7 +276,9 @@ blockToICML :: WriterOptions -> Style -> Block -> WS Doc blockToICML opts style (Plain lst) = parStyle opts style lst blockToICML opts style (Para lst) = parStyle opts (paragraphName:style) lst blockToICML opts style (CodeBlock _ str) = parStyle opts (codeBlockName:style) $ [Str str] -blockToICML opts style (RawBlock _ str) = parStyle opts (rawBlockName:style) $ [Str str] +blockToICML _ _ (RawBlock f str) + | f == Format "icml" = return $ text str + | otherwise = return empty blockToICML opts style (BlockQuote blocks) = blocksToICML opts (blockQuoteName:style) blocks blockToICML opts style (OrderedList attribs lst) = listItemsToICML opts orderedListName style (Just attribs) lst blockToICML opts style (BulletList lst) = listItemsToICML opts bulletListName style Nothing lst @@ -399,12 +399,14 @@ inlineToICML opts style (Subscript lst) = inlinesToICML opts (subscriptName:styl inlineToICML opts style (SmallCaps lst) = inlinesToICML opts (smallCapsName:style) lst inlineToICML opts style (Quoted SingleQuote lst) = inlinesToICML opts style $ [Str "‘"] ++ lst ++ [Str "’"] inlineToICML opts style (Quoted DoubleQuote lst) = inlinesToICML opts style $ [Str "“"] ++ lst ++ [Str "”"] -inlineToICML opts style (Cite _ lst) = footnoteToICML opts style [Para lst] +inlineToICML opts style (Cite _ lst) = inlinesToICML opts style lst inlineToICML _ style (Code _ str) = charStyle (codeName:style) $ text $ escapeStringForXML str inlineToICML _ style Space = charStyle style space inlineToICML _ style LineBreak = charStyle style $ text lineSeparator inlineToICML _ style (Math _ str) = charStyle style $ text $ escapeStringForXML str --InDesign doesn't really do math -inlineToICML _ style (RawInline _ str) = charStyle style $ text $ escapeStringForXML str +inlineToICML _ _ (RawInline f str) + | f == Format "icml" = return $ text str + | otherwise = return empty inlineToICML opts style (Link lst (url, title)) = do content <- inlinesToICML opts (linkName:style) lst state $ \st -> diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 0fa1e4857..49bc27b58 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -42,6 +42,7 @@ import Data.List ( (\\), isSuffixOf, isInfixOf, stripPrefix, isPrefixOf, intercalate, intersperse ) import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord ) import Data.Maybe ( fromMaybe ) +import Data.Aeson.Types ( (.:), parseMaybe, withObject ) import Control.Applicative ((<|>)) import Control.Monad.State import Text.Pandoc.Pretty @@ -54,6 +55,7 @@ data WriterState = WriterState { stInNote :: Bool -- true if we're in a note , stInQuote :: Bool -- true if in a blockquote , stInMinipage :: Bool -- true if in minipage + , stInHeading :: Bool -- true if in a section heading , stNotes :: [Doc] -- notes in a minipage , stOLLevel :: Int -- level of ordered list nesting , stOptions :: WriterOptions -- writer options, so they don't have to be parameter @@ -76,9 +78,9 @@ writeLaTeX :: WriterOptions -> Pandoc -> String writeLaTeX options document = evalState (pandocToLaTeX options document) $ WriterState { stInNote = False, stInQuote = False, - stInMinipage = False, stNotes = [], - stOLLevel = 1, stOptions = options, - stVerbInNote = False, + stInMinipage = False, stInHeading = False, + stNotes = [], stOLLevel = 1, + stOptions = options, stVerbInNote = False, stTable = False, stStrikeout = False, stUrl = False, stGraphics = False, stLHS = False, stBook = writerChapters options, @@ -101,8 +103,16 @@ pandocToLaTeX options (Pandoc meta blocks) = do modify $ \s -> s{ stInternalLinks = query isInternalLink blocks' } let template = writerTemplate options -- set stBook depending on documentclass + let colwidth = if writerWrapText options + then Just $ writerColumns options + else Nothing + metadata <- metaToJSON options + (fmap (render colwidth) . blockListToLaTeX) + (fmap (render colwidth) . inlineListToLaTeX) + meta let bookClasses = ["memoir","book","report","scrreprt","scrbook"] - case lookup "documentclass" (writerVariables options) of + case lookup "documentclass" (writerVariables options) `mplus` + parseMaybe (withObject "object" (.: "documentclass")) metadata of Just x | x `elem` bookClasses -> modify $ \s -> s{stBook = True} | otherwise -> return () Nothing | any (\x -> "\\documentclass" `isPrefixOf` x && @@ -113,13 +123,6 @@ pandocToLaTeX options (Pandoc meta blocks) = do -- \enquote{...} for smart quotes: when ("{csquotes}" `isInfixOf` template) $ modify $ \s -> s{stCsquotes = True} - let colwidth = if writerWrapText options - then Just $ writerColumns options - else Nothing - metadata <- metaToJSON options - (fmap (render colwidth) . blockListToLaTeX) - (fmap (render colwidth) . inlineListToLaTeX) - meta let (blocks'', lastHeader) = if writerCiteMethod options == Citeproc then (blocks', []) else case last blocks' of @@ -179,7 +182,9 @@ pandocToLaTeX options (Pandoc meta blocks) = do elementToLaTeX :: WriterOptions -> Element -> State WriterState Doc elementToLaTeX _ (Blk block) = blockToLaTeX block elementToLaTeX opts (Sec level _ (id',classes,_) title' elements) = do + modify $ \s -> s{stInHeading = True} header' <- sectionHeader ("unnumbered" `elem` classes) id' level title' + modify $ \s -> s{stInHeading = False} innerContents <- mapM (elementToLaTeX opts) elements return $ vsep (header' : innerContents) @@ -203,7 +208,7 @@ stringToLaTeX ctx (x:xs) = do '€' -> "\\euro{}" ++ rest '{' -> "\\{" ++ rest '}' -> "\\}" ++ rest - '$' -> "\\$" ++ rest + '$' | not isUrl -> "\\$" ++ rest '%' -> "\\%" ++ rest '&' -> "\\&" ++ rest '_' | not isUrl -> "\\_" ++ rest @@ -237,7 +242,7 @@ toLabel z = go `fmap` stringToLaTeX URLString z where go [] = "" go (x:xs) | (isLetter x || isDigit x) && isAscii x = x:go xs - | elem x "-+=:;." = x:go xs + | elem x ("-+=:;." :: String) = x:go xs | otherwise = "ux" ++ printf "%x" (ord x) ++ go xs -- | Puts contents into LaTeX command. @@ -466,8 +471,11 @@ blockToLaTeX (DefinitionList lst) = do "\\end{description}" blockToLaTeX HorizontalRule = return $ "\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}" -blockToLaTeX (Header level (id',classes,_) lst) = - sectionHeader ("unnumbered" `elem` classes) id' level lst +blockToLaTeX (Header level (id',classes,_) lst) = do + modify $ \s -> s{stInHeading = True} + hdr <- sectionHeader ("unnumbered" `elem` classes) id' level lst + modify $ \s -> s{stInHeading = False} + return hdr blockToLaTeX (Table caption aligns widths heads rows) = do headers <- if all null heads then return empty @@ -539,10 +547,16 @@ fixLineBreaks' ils = case splitBy (== LineBreak) ils of where tohbox ys = RawInline "tex" "\\hbox{\\strut " : ys ++ [RawInline "tex" "}"] +-- We also change display math to inline math, since display +-- math breaks in simple tables. +displayMathToInline :: Inline -> Inline +displayMathToInline (Math DisplayMath x) = Math InlineMath x +displayMathToInline x = x + tableCellToLaTeX :: Bool -> (Double, Alignment, [Block]) -> State WriterState Doc tableCellToLaTeX _ (0, _, blocks) = - blockListToLaTeX $ walk fixLineBreaks blocks + blockListToLaTeX $ walk fixLineBreaks $ walk displayMathToInline blocks tableCellToLaTeX header (width, align, blocks) = do modify $ \st -> st{ stInMinipage = True, stNotes = [] } cellContents <- blockListToLaTeX blocks @@ -607,6 +621,7 @@ sectionHeader :: Bool -- True for unnumbered sectionHeader unnumbered ref level lst = do txt <- inlineListToLaTeX lst lab <- text `fmap` toLabel ref + plain <- stringToLaTeX TextString $ foldl (++) "" $ map stringify lst let noNote (Note _) = Str "" noNote x = x let lstNoNotes = walk noNote lst @@ -619,7 +634,12 @@ sectionHeader unnumbered ref level lst = do then return empty else do return $ brackets txtNoNotes - let stuffing = star <> optional <> braces txt + let contents = if render Nothing txt == plain + then braces txt + else braces (text "\\texorpdfstring" + <> braces txt + <> braces (text plain)) + let stuffing = star <> optional <> contents book <- gets stBook opts <- gets stOptions let level' = if book || writerChapters opts then level - 1 else level @@ -663,7 +683,7 @@ sectionHeader unnumbered ref level lst = do inlineListToLaTeX :: [Inline] -- ^ Inlines to convert -> State WriterState Doc inlineListToLaTeX lst = - mapM inlineToLaTeX (fixLineInitialSpaces lst) + mapM inlineToLaTeX (fixBreaks $ fixLineInitialSpaces lst) >>= return . hcat -- nonbreaking spaces (~) in LaTeX don't work after line breaks, -- so we turn nbsps after hard breaks to \hspace commands. @@ -675,6 +695,14 @@ inlineListToLaTeX lst = fixNbsps s = let (ys,zs) = span (=='\160') s in replicate (length ys) hspace ++ [Str zs] hspace = RawInline "latex" "\\hspace*{0.333em}" + -- linebreaks after blank lines cause problems: + fixBreaks [] = [] + fixBreaks ys@(LineBreak : LineBreak : _) = + case span (== LineBreak) ys of + (lbs, rest) -> RawInline "latex" + ("\\\\[" ++ show (length lbs) ++ + "\\baselineskip]") : fixBreaks rest + fixBreaks (y:ys) = y : fixBreaks ys isQuoted :: Inline -> Bool isQuoted (Quoted _ _) = True @@ -724,22 +752,27 @@ inlineToLaTeX (Cite cits lst) = do inlineToLaTeX (Code (_,classes,_) str) = do opts <- gets stOptions + inHeading <- gets stInHeading case () of - _ | writerListings opts -> listingsCode + _ | writerListings opts && not inHeading -> listingsCode | writerHighlight opts && not (null classes) -> highlightCode - | otherwise -> rawCode + | otherwise -> rawCode where listingsCode = do inNote <- gets stInNote when inNote $ modify $ \s -> s{ stVerbInNote = True } - let chr = ((enumFromTo '!' '~') \\ str) !! 0 + let chr = case "!\"&'()*,-./:;?@_" \\ str of + (c:_) -> c + [] -> '!' return $ text $ "\\lstinline" ++ [chr] ++ str ++ [chr] highlightCode = do case highlight formatLaTeXInline ("",classes,[]) str of Nothing -> rawCode Just h -> modify (\st -> st{ stHighlighting = True }) >> return (text h) - rawCode = liftM (text . (\s -> "\\texttt{" ++ s ++ "}")) + rawCode = liftM (text . (\s -> "\\texttt{" ++ escapeSpaces s ++ "}")) $ stringToLaTeX CodeString str + where + escapeSpaces = concatMap (\c -> if c == ' ' then "\\ " else [c]) inlineToLaTeX (Quoted qt lst) = do contents <- inlineListToLaTeX lst csquotes <- liftM stCsquotes get @@ -772,7 +805,7 @@ inlineToLaTeX (RawInline f str) | f == Format "latex" || f == Format "tex" = return $ text str | otherwise = return empty -inlineToLaTeX (LineBreak) = return "\\\\" +inlineToLaTeX (LineBreak) = return $ "\\\\" <> cr inlineToLaTeX Space = return space inlineToLaTeX (Link txt ('#':ident, _)) = do contents <- inlineListToLaTeX txt @@ -801,7 +834,10 @@ inlineToLaTeX (Image _ (source, _)) = do then source else unEscapeString source source'' <- stringToLaTeX URLString source' - return $ "\\includegraphics" <> braces (text source'') + inHeading <- gets stInHeading + return $ + (if inHeading then "\\protect\\includegraphics" else "\\includegraphics") + <> braces (text source'') inlineToLaTeX (Note contents) = do inMinipage <- gets stInMinipage modify (\s -> s{stInNote = True}) diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index f06f1d6cc..dee4d56a4 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -57,12 +57,15 @@ import qualified Data.Text as T type Notes = [[Block]] type Refs = [([Inline], Target)] -data WriterState = WriterState { stNotes :: Notes - , stRefs :: Refs - , stIds :: [String] - , stPlain :: Bool } +data WriterState = WriterState { stNotes :: Notes + , stRefs :: Refs + , stRefShortcutable :: Bool + , stInList :: Bool + , stIds :: [String] + , stPlain :: Bool } instance Default WriterState - where def = WriterState{ stNotes = [], stRefs = [], stIds = [], stPlain = False } + where def = WriterState{ stNotes = [], stRefs = [], stRefShortcutable = True, + stInList = False, stIds = [], stPlain = False } -- | Convert Pandoc to Markdown. writeMarkdown :: WriterOptions -> Pandoc -> String @@ -323,9 +326,9 @@ blockToMarkdown opts (Plain inlines) = do then Just $ writerColumns opts else Nothing let rendered = render colwidth contents - let escapeDelimiter (x:xs) | x `elem` ".()" = '\\':x:xs - | otherwise = x : escapeDelimiter xs - escapeDelimiter [] = [] + let escapeDelimiter (x:xs) | x `elem` (".()" :: String) = '\\':x:xs + | otherwise = x : escapeDelimiter xs + escapeDelimiter [] = [] let contents' = if isEnabled Ext_all_symbols_escapable opts && not (stPlain st) && beginsWithOrderedListMarker rendered then text $ escapeDelimiter rendered @@ -453,7 +456,7 @@ blockToMarkdown opts t@(Table caption aligns widths headers rows) = do $ Pandoc nullMeta [t] return $ nst $ tbl $$ blankline $$ caption'' $$ blankline blockToMarkdown opts (BulletList items) = do - contents <- mapM (bulletListItemToMarkdown opts) items + contents <- inList $ mapM (bulletListItemToMarkdown opts) items return $ cat contents <> blankline blockToMarkdown opts (OrderedList (start,sty,delim) items) = do let start' = if isEnabled Ext_startnum opts then start else 1 @@ -464,13 +467,22 @@ blockToMarkdown opts (OrderedList (start,sty,delim) items) = do let markers' = map (\m -> if length m < 3 then m ++ replicate (3 - length m) ' ' else m) markers - contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $ + contents <- inList $ + mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $ zip markers' items return $ cat contents <> blankline blockToMarkdown opts (DefinitionList items) = do - contents <- mapM (definitionListItemToMarkdown opts) items + contents <- inList $ mapM (definitionListItemToMarkdown opts) items return $ cat contents <> blankline +inList :: State WriterState a -> State WriterState a +inList p = do + oldInList <- gets stInList + modify $ \st -> st{ stInList = True } + res <- p + modify $ \st -> st{ stInList = oldInList } + return res + addMarkdownAttribute :: String -> String addMarkdownAttribute s = case span isTagText $ reverse $ parseTags s of @@ -497,7 +509,12 @@ pipeTable headless aligns rawHeaders rawRows = do AlignCenter -> ':':replicate w '-' ++ ":" AlignRight -> replicate (w + 1) '-' ++ ":" AlignDefault -> replicate (w + 2) '-' - let header = if headless then empty else torow rawHeaders + -- note: pipe tables can't completely lack a + -- header; for a headerless table, we need a header of empty cells. + -- see jgm/pandoc#1996. + let header = if headless + then torow (replicate (length aligns) empty) + else torow rawHeaders let border = nowrap $ text "|" <> hcat (intersperse (text "|") $ map toborder $ zip aligns widths) <> text "|" let body = vcat $ map torow rawRows @@ -677,12 +694,53 @@ getReference label (src, tit) = do -- | Convert list of Pandoc inline elements to markdown. inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc -inlineListToMarkdown opts lst = - mapM (inlineToMarkdown opts) (avoidBadWraps lst) >>= return . cat - where avoidBadWraps [] = [] - avoidBadWraps (Space:Str (c:cs):xs) - | c `elem` "-*+>" = Str (' ':c:cs) : avoidBadWraps xs - avoidBadWraps (x:xs) = x : avoidBadWraps xs +inlineListToMarkdown opts lst = do + inlist <- gets stInList + go (if inlist then avoidBadWrapsInList lst else lst) + where go [] = return empty + go (i:is) = case i of + (Link _ _) -> case is of + -- If a link is followed by another link or '[' we don't shortcut + (Link _ _):_ -> unshortcutable + Space:(Link _ _):_ -> unshortcutable + Space:(Str('[':_)):_ -> unshortcutable + Space:(RawInline _ ('[':_)):_ -> unshortcutable + Space:(Cite _ _):_ -> unshortcutable + (Cite _ _):_ -> unshortcutable + Str ('[':_):_ -> unshortcutable + (RawInline _ ('[':_)):_ -> unshortcutable + (RawInline _ (' ':'[':_)):_ -> unshortcutable + _ -> shortcutable + _ -> shortcutable + where shortcutable = liftM2 (<>) (inlineToMarkdown opts i) (go is) + unshortcutable = do + iMark <- withState (\s -> s { stRefShortcutable = False }) + (inlineToMarkdown opts i) + modify (\s -> s {stRefShortcutable = True }) + fmap (iMark <>) (go is) + +avoidBadWrapsInList :: [Inline] -> [Inline] +avoidBadWrapsInList [] = [] +avoidBadWrapsInList (Space:Str ('>':cs):xs) = + Str (' ':'>':cs) : avoidBadWrapsInList xs +avoidBadWrapsInList (Space:Str [c]:[]) + | c `elem` ['-','*','+'] = Str [' ', c] : [] +avoidBadWrapsInList (Space:Str [c]:Space:xs) + | c `elem` ['-','*','+'] = Str [' ', c] : Space : avoidBadWrapsInList xs +avoidBadWrapsInList (Space:Str cs:Space:xs) + | isOrderedListMarker cs = Str (' ':cs) : Space : avoidBadWrapsInList xs +avoidBadWrapsInList (Space:Str cs:[]) + | isOrderedListMarker cs = Str (' ':cs) : [] +avoidBadWrapsInList (x:xs) = x : avoidBadWrapsInList xs + +isOrderedListMarker :: String -> Bool +isOrderedListMarker xs = (last xs `elem` ['.',')']) && + isRight (runParserT (anyOrderedListMarker >> eof) + defaultParserState "" xs) + +isRight :: Either a b -> Bool +isRight (Right _) = True +isRight (Left _) = False escapeSpaces :: Inline -> Inline escapeSpaces (Str s) = Str $ substitute " " "\\ " s @@ -692,8 +750,10 @@ escapeSpaces x = x -- | Convert Pandoc inline element to markdown. inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc inlineToMarkdown opts (Span attrs ils) = do + plain <- gets stPlain contents <- inlineListToMarkdown opts ils - return $ if isEnabled Ext_raw_html opts + return $ if not plain && + (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts) then tagWithAttrs "span" attrs <> contents <> text "</span>" else contents inlineToMarkdown opts (Emph lst) = do @@ -726,13 +786,14 @@ inlineToMarkdown opts (Subscript lst) = do else "<sub>" <> contents <> "</sub>" inlineToMarkdown opts (SmallCaps lst) = do plain <- gets stPlain - if plain - then inlineListToMarkdown opts $ capitalize lst - else do + if not plain && + (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts) + then do contents <- inlineListToMarkdown opts lst return $ tagWithAttrs "span" - ("",[],[("style","font-variant:small-caps;")]) + ("",[],[("style","font-variant:small-caps;")]) <> contents <> text "</span>" + else inlineListToMarkdown opts $ capitalize lst inlineToMarkdown opts (Quoted SingleQuote lst) = do contents <- inlineListToMarkdown opts lst return $ "‘" <> contents <> "’" @@ -821,8 +882,8 @@ inlineToMarkdown opts (Cite (c:cs) lst) sdoc <- inlineListToMarkdown opts sinlines let k' = text (modekey m ++ "@" ++ k) r = case sinlines of - Str (y:_):_ | y `elem` ",;]@" -> k' <> sdoc - _ -> k' <+> sdoc + Str (y:_):_ | y `elem` (",;]@" :: String) -> k' <> sdoc + _ -> k' <+> sdoc return $ pdoc <+> r modekey SuppressAuthor = "-" modekey _ = "" @@ -838,6 +899,9 @@ inlineToMarkdown opts (Link txt (src, tit)) = do [Str s] | escapeURI s == srcSuffix -> True _ -> False let useRefLinks = writerReferenceLinks opts && not useAuto + shortcutable <- gets stRefShortcutable + let useShortcutRefLinks = shortcutable && + isEnabled Ext_shortcut_reference_links opts ref <- if useRefLinks then getReference txt (src, tit) else return [] reftext <- inlineListToMarkdown opts ref return $ if useAuto @@ -847,7 +911,9 @@ inlineToMarkdown opts (Link txt (src, tit)) = do else if useRefLinks then let first = "[" <> linktext <> "]" second = if txt == ref - then "[]" + then if useShortcutRefLinks + then "" + else "[]" else "[" <> reftext <> "]" in first <> second else if plain diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index 3f392a5d0..b49c60867 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -107,7 +107,7 @@ blockToMediaWiki (Para [Image txt (src,'f':'i':'g':':':tit)]) = do let opt = if null txt then "" else "|alt=" ++ if null tit then capt else tit ++ capt - return $ "[[Image:" ++ src ++ "|frame|none" ++ opt ++ "]]\n" + return $ "[[File:" ++ src ++ "|frame|none" ++ opt ++ "]]\n" blockToMediaWiki (Para inlines) = do tags <- asks useTags @@ -375,14 +375,14 @@ inlineToMediaWiki (RawInline f str) | f == Format "html" = return str | otherwise = return "" -inlineToMediaWiki (LineBreak) = return "<br />" +inlineToMediaWiki (LineBreak) = return "<br />\n" inlineToMediaWiki Space = return " " inlineToMediaWiki (Link txt (src, _)) = do label <- inlineListToMediaWiki txt case txt of - [Str s] | escapeURI s == src -> return src + [Str s] | isURI src && escapeURI s == src -> return src _ -> return $ if isURI src then "[" ++ src ++ " " ++ label ++ "]" else "[[" ++ src' ++ "|" ++ label ++ "]]" @@ -397,7 +397,7 @@ inlineToMediaWiki (Image alt (source, tit)) = do then "" else '|' : alt' else '|' : tit - return $ "[[Image:" ++ source ++ txt ++ "]]" + return $ "[[File:" ++ source ++ txt ++ "]]" inlineToMediaWiki (Note contents) = do contents' <- blockListToMediaWiki contents diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 03f8e8ba4..81bbdaf3f 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -41,7 +41,7 @@ import Control.Applicative ((<$>)) import Text.Pandoc.Options ( WriterOptions(..) ) import Text.Pandoc.Shared ( stringify, readDataFile, fetchItem', warn ) import Text.Pandoc.ImageSize ( imageSize, sizeInPoints ) -import Text.Pandoc.MIME ( getMimeType ) +import Text.Pandoc.MIME ( getMimeType, extensionFromMimeType ) import Text.Pandoc.Definition import Text.Pandoc.Walk import Text.Pandoc.Writers.Shared ( fixDisplayMath ) @@ -51,7 +51,7 @@ import Text.Pandoc.XML import Text.Pandoc.Pretty import qualified Control.Exception as E import Data.Time.Clock.POSIX ( getPOSIXTime ) -import System.FilePath ( takeExtension, takeDirectory ) +import System.FilePath ( takeExtension, takeDirectory, (<.>)) -- | Produce an ODT file from a Pandoc document. writeODT :: WriterOptions -- ^ Writer options @@ -127,23 +127,27 @@ writeODT opts doc@(Pandoc meta _) = do return $ fromArchive archive'' transformPicMath :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline -transformPicMath opts entriesRef (Image lab (src,_)) = do +transformPicMath opts entriesRef (Image lab (src,t)) = do res <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src case res of Left (_ :: E.SomeException) -> do warn $ "Could not find image `" ++ src ++ "', skipping..." return $ Emph lab - Right (img, _) -> do + Right (img, mbMimeType) -> do let size = imageSize img let (w,h) = fromMaybe (0,0) $ sizeInPoints `fmap` size let tit' = show w ++ "x" ++ show h entries <- readIORef entriesRef - let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src + let extension = fromMaybe (takeExtension $ takeWhile (/='?') src) + (mbMimeType >>= extensionFromMimeType) + let newsrc = "Pictures/" ++ show (length entries) <.> extension let toLazy = B.fromChunks . (:[]) epochtime <- floor `fmap` getPOSIXTime let entry = toEntry newsrc epochtime $ toLazy img modifyIORef entriesRef (entry:) - return $ Image lab (newsrc, tit') + let fig | "fig:" `isPrefixOf` t = "fig:" + | otherwise = "" + return $ Image lab (newsrc, fig++tit') transformPicMath _ entriesRef (Math t math) = do entries <- readIORef entriesRef let dt = if t == InlineMath then DisplayInline else DisplayBlock diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs index dd359f3f5..5c8ef8c45 100644 --- a/src/Text/Pandoc/Writers/OPML.hs +++ b/src/Text/Pandoc/Writers/OPML.hs @@ -38,7 +38,7 @@ import Text.Pandoc.Writers.HTML (writeHtmlString) import Text.Pandoc.Writers.Markdown (writeMarkdown) import Text.Pandoc.Pretty import Data.Time -import System.Locale (defaultTimeLocale) +import Text.Pandoc.Compat.Locale (defaultTimeLocale) import qualified Text.Pandoc.Builder as B -- | Convert Pandoc document to string in OPML format. @@ -87,4 +87,3 @@ elementToOPML opts (Sec _ _num _ title elements) = | not (null blocks)] in inTags True "outline" attrs $ vcat (map (elementToOPML opts) rest) - diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 773d142f4..aee656413 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE PatternGuards, OverloadedStrings #-} +{-# LANGUAGE PatternGuards, OverloadedStrings, FlexibleContexts #-} {- Copyright (C) 2008-2014 Andrea Rossato <andrea.rossato@ing.unitn.it> and John MacFarlane. @@ -288,6 +288,8 @@ blockToOpenDocument o bs | Plain b <- bs = if null b then return empty else inParagraphTags =<< inlinesToOpenDocument o b + | Para [Image c (s,'f':'i':'g':':':t)] <- bs + = figure c s t | Para b <- bs = if null b then return empty else inParagraphTags =<< inlinesToOpenDocument o b @@ -334,7 +336,7 @@ blockToOpenDocument o bs mapM_ addParaStyle . newPara $ paraHStyles ++ paraStyles captionDoc <- if null c then return empty - else withParagraphStyle o "Caption" [Para c] + else withParagraphStyle o "TableCaption" [Para c] th <- if all null h then return empty else colHeadsToOpenDocument o name (map fst paraHStyles) h @@ -342,6 +344,12 @@ blockToOpenDocument o bs return $ inTags True "table:table" [ ("table:name" , name) , ("table:style-name", name) ] (vcat columns $$ th $$ vcat tr) $$ captionDoc + figure caption source title | null caption = + withParagraphStyle o "Figure" [Para [Image caption (source,title)]] + | otherwise = do + imageDoc <- withParagraphStyle o "FigureWithCaption" [Para [Image caption (source,title)]] + captionDoc <- withParagraphStyle o "FigureCaption" [Para caption] + return $ imageDoc $$ captionDoc colHeadsToOpenDocument :: WriterOptions -> String -> [String] -> [[Block]] -> State WriterState Doc colHeadsToOpenDocument o tn ns hs = @@ -370,7 +378,7 @@ inlineToOpenDocument :: WriterOptions -> Inline -> State WriterState Doc inlineToOpenDocument o ils | Space <- ils = inTextStyle space | Span _ xs <- ils = inlinesToOpenDocument o xs - | LineBreak <- ils = return $ selfClosingTag "text:line-break" [] + | LineBreak <- ils = return $ selfClosingTag "text:line-break" [] <> cr | Str s <- ils = inTextStyle $ handleSpaces $ escapeStringForXML s | Emph l <- ils = withTextStyle Italic $ inlinesToOpenDocument o l | Strong l <- ils = withTextStyle Bold $ inlinesToOpenDocument o l @@ -553,4 +561,3 @@ textStyleAttr s ,("style:font-name-asian" ,"Courier New") ,("style:font-name-complex" ,"Courier New")] | otherwise = [] - diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 57ebfc360..2dd899680 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -48,11 +48,13 @@ import Data.Char (isSpace, toLower) type Refs = [([Inline], Target)] data WriterState = - WriterState { stNotes :: [[Block]] - , stLinks :: Refs - , stImages :: [([Inline], (String, String, Maybe String))] - , stHasMath :: Bool - , stOptions :: WriterOptions + WriterState { stNotes :: [[Block]] + , stLinks :: Refs + , stImages :: [([Inline], (String, String, Maybe String))] + , stHasMath :: Bool + , stHasRawTeX :: Bool + , stOptions :: WriterOptions + , stTopLevel :: Bool } -- | Convert Pandoc to RST. @@ -60,7 +62,8 @@ writeRST :: WriterOptions -> Pandoc -> String writeRST opts document = let st = WriterState { stNotes = [], stLinks = [], stImages = [], stHasMath = False, - stOptions = opts } + stHasRawTeX = False, stOptions = opts, + stTopLevel = True} in evalState (pandocToRST document) st -- | Return RST representation of document. @@ -78,23 +81,32 @@ pandocToRST (Pandoc meta blocks) = do (fmap (render colwidth) . blockListToRST) (fmap (trimr . render colwidth) . inlineListToRST) $ deleteMeta "title" $ deleteMeta "subtitle" meta - body <- blockListToRST blocks + body <- blockListToRST' True $ normalizeHeadings 1 blocks notes <- liftM (reverse . stNotes) get >>= notesToRST -- note that the notes may contain refs, so we do them first refs <- liftM (reverse . stLinks) get >>= refsToRST pics <- liftM (reverse . stImages) get >>= pictRefsToRST hasMath <- liftM stHasMath get + rawTeX <- liftM stHasRawTeX get let main = render colwidth $ foldl ($+$) empty $ [body, notes, refs, pics] let context = defField "body" main $ defField "toc" (writerTableOfContents opts) - $ defField "toc-depth" (writerTOCDepth opts) + $ defField "toc-depth" (show $ writerTOCDepth opts) $ defField "math" hasMath $ defField "title" (render Nothing title :: String) $ defField "math" hasMath + $ defField "rawtex" rawTeX $ metadata if writerStandalone opts then return $ renderTemplate' (writerTemplate opts) context else return main + where + normalizeHeadings lev (Header l a i:bs) = Header lev a i:normalizeHeadings (lev+1) cont ++ normalizeHeadings lev bs' + where (cont,bs') = break (headerLtEq l) bs + headerLtEq level (Header l' _ _) = l' <= level + headerLtEq _ _ = False + normalizeHeadings lev (b:bs) = b:normalizeHeadings lev bs + normalizeHeadings _ [] = [] -- | Return RST representation of reference key table. refsToRST :: Refs -> State WriterState Doc @@ -105,7 +117,7 @@ keyToRST :: ([Inline], (String, String)) -> State WriterState Doc keyToRST (label, (src, _)) = do label' <- inlineListToRST label - let label'' = if ':' `elem` (render Nothing label') + let label'' = if ':' `elem` ((render Nothing label') :: String) then char '`' <> label' <> char '`' else label' return $ nowrap $ ".. _" <> label'' <> ": " <> text src @@ -173,11 +185,11 @@ blockToRST (Para [Image txt (src,'f':'i':'g':':':tit)]) = do capt <- inlineListToRST txt let fig = "figure:: " <> text src let alt = ":alt: " <> if null tit then capt else text tit - return $ hang 3 ".. " $ fig $$ alt $+$ capt $$ blankline + return $ hang 3 ".. " (fig $$ alt $+$ capt) $$ blankline blockToRST (Para inlines) | LineBreak `elem` inlines = do -- use line block if LineBreaks lns <- mapM inlineListToRST $ splitBy (==LineBreak) inlines - return $ (vcat $ map (text "| " <>) lns) <> blankline + return $ (vcat $ map (hang 2 (text "| ")) lns) <> blankline | otherwise = do contents <- inlineListToRST inlines return $ contents <> blankline @@ -188,11 +200,21 @@ blockToRST (RawBlock f@(Format f') str) (nest 3 $ text str) $$ blankline blockToRST HorizontalRule = return $ blankline $$ "--------------" $$ blankline -blockToRST (Header level _ inlines) = do +blockToRST (Header level (name,classes,_) inlines) = do contents <- inlineListToRST inlines - let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1) - let border = text $ replicate (offset contents) headerChar - return $ nowrap $ contents $$ border $$ blankline + isTopLevel <- gets stTopLevel + if isTopLevel + then do + let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1) + let border = text $ replicate (offset contents) headerChar + return $ nowrap $ contents $$ border $$ blankline + else do + let rub = "rubric:: " <> contents + let name' | null name = empty + | otherwise = ":name: " <> text name + let cls | null classes = empty + | otherwise = ":class: " <> text (unwords classes) + return $ nowrap $ hang 3 ".. " (rub $$ name' $$ cls) $$ blankline blockToRST (CodeBlock (_,classes,kvs) str) = do opts <- stOptions <$> get let tabstop = writerTabStop opts @@ -239,8 +261,7 @@ blockToRST (Table caption _ widths headers rows) = do middle = hcat $ intersperse sep' blocks let makeRow = hpipeBlocks . zipWith lblock widthsInChars let head' = makeRow headers' - rows' <- mapM (\row -> do cols <- mapM blockListToRST row - return $ makeRow cols) rows + let rows' = map makeRow rawRows let border ch = char '+' <> char ch <> (hcat $ intersperse (char ch <> char '+' <> char ch) $ map (\l -> text $ replicate l ch) widthsInChars) <> @@ -253,7 +274,7 @@ blockToRST (Table caption _ widths headers rows) = do blockToRST (BulletList items) = do contents <- mapM bulletListItemToRST items -- ensure that sublists have preceding blank line - return $ blankline $$ vcat contents $$ blankline + return $ blankline $$ chomp (vcat contents) $$ blankline blockToRST (OrderedList (start, style', delim) items) = do let markers = if start == 1 && style' == DefaultStyle && delim == DefaultDelim then take (length items) $ repeat "#." @@ -265,11 +286,11 @@ blockToRST (OrderedList (start, style', delim) items) = do contents <- mapM (\(item, num) -> orderedListItemToRST item num) $ zip markers' items -- ensure that sublists have preceding blank line - return $ blankline $$ vcat contents $$ blankline + return $ blankline $$ chomp (vcat contents) $$ blankline blockToRST (DefinitionList items) = do contents <- mapM definitionListItemToRST items -- ensure that sublists have preceding blank line - return $ blankline $$ vcat contents $$ blankline + return $ blankline $$ chomp (vcat contents) $$ blankline -- | Convert bullet list item (list of blocks) to RST. bulletListItemToRST :: [Block] -> State WriterState Doc @@ -295,9 +316,19 @@ definitionListItemToRST (label, defs) = do return $ label' $$ nest tabstop (nestle contents <> cr) -- | Convert list of Pandoc block elements to RST. +blockListToRST' :: Bool + -> [Block] -- ^ List of block elements + -> State WriterState Doc +blockListToRST' topLevel blocks = do + tl <- gets stTopLevel + modify (\s->s{stTopLevel=topLevel}) + res <- vcat `fmap` mapM blockToRST blocks + modify (\s->s{stTopLevel=tl}) + return res + blockListToRST :: [Block] -- ^ List of block elements -> State WriterState Doc -blockListToRST blocks = mapM blockToRST blocks >>= return . vcat +blockListToRST = blockListToRST' False -- | Convert list of Pandoc inline elements to RST. inlineListToRST :: [Inline] -> State WriterState Doc @@ -334,12 +365,12 @@ inlineListToRST lst = okAfterComplex :: Inline -> Bool okAfterComplex Space = True okAfterComplex LineBreak = True - okAfterComplex (Str (c:_)) = isSpace c || c `elem` "-.,:;!?\\/'\")]}>–—" + okAfterComplex (Str (c:_)) = isSpace c || c `elem` ("-.,:;!?\\/'\")]}>–—" :: String) okAfterComplex _ = False okBeforeComplex :: Inline -> Bool okBeforeComplex Space = True okBeforeComplex LineBreak = True - okBeforeComplex (Str (c:_)) = isSpace c || c `elem` "-:/'\"<([{–—" + okBeforeComplex (Str (c:_)) = isSpace c || c `elem` ("-:/'\"<([{–—" :: String) okBeforeComplex _ = False isComplex :: Inline -> Bool isComplex (Emph _) = True @@ -393,6 +424,9 @@ inlineToRST (Math t str) = do else blankline $$ (".. math:: " <> text str) $$ blankline inlineToRST (RawInline f x) | f == "rst" = return $ text x + | f == "latex" || f == "tex" = do + modify $ \st -> st{ stHasRawTeX = True } + return $ ":raw-latex:`" <> text x <> "`" | otherwise = return empty inlineToRST (LineBreak) = return cr -- there's no line break in RST (see Para) inlineToRST Space = return space @@ -427,7 +461,7 @@ inlineToRST (Image alternate (source, tit)) = do return $ "|" <> label <> "|" inlineToRST (Note contents) = do -- add to notes in state - notes <- get >>= return . stNotes + notes <- gets stNotes modify $ \st -> st { stNotes = contents:notes } let ref = show $ (length notes) + 1 return $ " [" <> text ref <> "]_" diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 43405ce3c..dfad4b0e2 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -106,7 +106,9 @@ writeRTF options (Pandoc meta@(Meta metamap) blocks) = $ metadata in if writerStandalone options then renderTemplate' (writerTemplate options) context - else body + else case reverse body of + ('\n':_) -> body + _ -> body ++ "\n" -- | Construct table of contents from list of header blocks. tableOfContents :: [Block] -> String diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 8ac717bab..792718e95 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -368,7 +368,7 @@ inlineListForNode = return . text . stringToTexinfo . -- periods, commas, colons, and parentheses are disallowed in node names disallowedInNode :: Char -> Bool -disallowedInNode c = c `elem` ".,:()" +disallowedInNode c = c `elem` (".,:()" :: String) -- | Convert inline element to Texinfo inlineToTexinfo :: Inline -- ^ Inline to convert @@ -421,8 +421,8 @@ inlineToTexinfo (RawInline f str) return $ text "@tex" $$ text str $$ text "@end tex" | f == "texinfo" = return $ text str | otherwise = return empty -inlineToTexinfo (LineBreak) = return $ text "@*" -inlineToTexinfo Space = return $ char ' ' +inlineToTexinfo (LineBreak) = return $ text "@*" <> cr +inlineToTexinfo Space = return space inlineToTexinfo (Link txt (src@('#':_), _)) = do contents <- escapeCommas $ inlineListToTexinfo txt |