diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/MediaBag.hs | 6 | ||||
-rw-r--r-- | src/Text/Pandoc/Options.hs | 33 | ||||
-rw-r--r-- | src/Text/Pandoc/PDF.hs | 16 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/CommonMark.hs | 119 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 8 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 80 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/StyleMap.hs | 106 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Util.hs | 26 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 32 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 30 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/MediaWiki.hs | 10 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org.hs | 60 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/AsciiDoc.hs | 6 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docbook.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docx.hs | 178 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 20 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/LaTeX.hs | 25 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Markdown.hs | 108 |
19 files changed, 649 insertions, 220 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 89f61089b..b36046a5e 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -66,6 +66,7 @@ module Text.Pandoc , mkStringReader , readDocx , readMarkdown + , readCommonMark , readMediaWiki , readRST , readOrg @@ -124,6 +125,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.JSON import Text.Pandoc.Readers.Markdown +import Text.Pandoc.Readers.CommonMark import Text.Pandoc.Readers.MediaWiki import Text.Pandoc.Readers.RST import Text.Pandoc.Readers.Org @@ -229,6 +231,7 @@ readers = [ ("native" , StringReader $ \_ s -> return $ readNative s) ,("markdown_phpextra" , mkStringReaderWithWarnings readMarkdownWithWarnings) ,("markdown_github" , mkStringReaderWithWarnings readMarkdownWithWarnings) ,("markdown_mmd", mkStringReaderWithWarnings readMarkdownWithWarnings) + ,("commonmark" , mkStringReader readCommonMark) ,("rst" , mkStringReaderWithWarnings readRSTWithWarnings ) ,("mediawiki" , mkStringReader readMediaWiki) ,("docbook" , mkStringReader readDocBook) diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index a55d5417e..1246cdc8f 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE GeneralizedNewtypeDeriving, DeriveDataTypeable #-} {- Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu> @@ -46,13 +46,15 @@ import Text.Pandoc.MIME (MimeType, getMimeTypeDef) import qualified Text.Pandoc.UTF8 as UTF8 import Data.Maybe (fromMaybe) import System.IO (stderr) +import Data.Data (Data) +import Data.Typeable (Typeable) -- | A container for a collection of binary resources, with names and -- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty' -- can be used for an empty 'MediaBag', and '<>' can be used to append -- two 'MediaBag's. newtype MediaBag = MediaBag (M.Map [String] (MimeType, BL.ByteString)) - deriving (Monoid) + deriving (Monoid, Data, Typeable) instance Show MediaBag where show bag = "MediaBag " ++ show (mediaDirectory bag) diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 24e31fbb6..1776d14e5 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE DeriveDataTypeable #-} {- Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu> @@ -51,6 +52,8 @@ import Data.Default import Text.Pandoc.Highlighting (Style, pygments) import Text.Pandoc.MediaBag (MediaBag) import Data.Monoid +import Data.Data (Data) +import Data.Typeable (Typeable) -- | Individually selectable syntax extensions. data Extension = @@ -74,7 +77,7 @@ data Extension = | Ext_latex_macros -- ^ Parse LaTeX macro definitions (for math only) | Ext_fenced_code_blocks -- ^ Parse fenced code blocks | Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks - | Ext_backtick_code_blocks -- ^ Github style ``` code blocks + | Ext_backtick_code_blocks -- ^ GitHub style ``` code blocks | Ext_inline_code_attributes -- ^ Allow attributes on inline code | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks | Ext_native_divs -- ^ Use Div blocks for contents of <div> tags @@ -109,7 +112,8 @@ data Extension = | Ext_implicit_header_references -- ^ Implicit reference links for headers | Ext_line_blocks -- ^ RST style line blocks | Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML - deriving (Show, Read, Enum, Eq, Ord, Bounded) + | Ext_shortcut_reference_links -- ^ Shortcut reference links + deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable) pandocExtensions :: Set Extension pandocExtensions = Set.fromList @@ -151,6 +155,7 @@ pandocExtensions = Set.fromList , Ext_header_attributes , Ext_implicit_header_references , Ext_line_blocks + , Ext_shortcut_reference_links ] phpMarkdownExtraExtensions :: Set Extension @@ -164,6 +169,7 @@ phpMarkdownExtraExtensions = Set.fromList , Ext_intraword_underscores , Ext_header_attributes , Ext_abbreviations + , Ext_shortcut_reference_links ] githubMarkdownExtensions :: Set Extension @@ -180,6 +186,7 @@ githubMarkdownExtensions = Set.fromList , Ext_strikeout , Ext_hard_line_breaks , Ext_lists_without_preceding_blankline + , Ext_shortcut_reference_links ] multimarkdownExtensions :: Set Extension @@ -202,7 +209,9 @@ multimarkdownExtensions = Set.fromList strictExtensions :: Set Extension strictExtensions = Set.fromList - [ Ext_raw_html ] + [ Ext_raw_html + , Ext_shortcut_reference_links + ] data ReaderOptions = ReaderOptions{ readerExtensions :: Set Extension -- ^ Syntax extensions @@ -220,7 +229,7 @@ data ReaderOptions = ReaderOptions{ , readerDefaultImageExtension :: String -- ^ Default extension for images , readerTrace :: Bool -- ^ Print debugging info , readerTrackChanges :: TrackChanges -} deriving (Show, Read) +} deriving (Show, Read, Data, Typeable) instance Default ReaderOptions where def = ReaderOptions{ @@ -242,7 +251,7 @@ instance Default ReaderOptions -- Writer options -- -data EPUBVersion = EPUB2 | EPUB3 deriving (Eq, Show, Read) +data EPUBVersion = EPUB2 | EPUB3 deriving (Eq, Show, Read, Data, Typeable) data HTMLMathMethod = PlainMath | LaTeXMathML (Maybe String) -- url of LaTeXMathML.js @@ -252,18 +261,18 @@ data HTMLMathMethod = PlainMath | MathML (Maybe String) -- url of MathMLinHTML.js | MathJax String -- url of MathJax.js | KaTeX String String -- url of stylesheet and katex.js - deriving (Show, Read, Eq) + deriving (Show, Read, Eq, Data, Typeable) data CiteMethod = Citeproc -- use citeproc to render them | Natbib -- output natbib cite commands | Biblatex -- output biblatex cite commands - deriving (Show, Read, Eq) + deriving (Show, Read, Eq, Data, Typeable) -- | Methods for obfuscating email addresses in HTML. data ObfuscationMethod = NoObfuscation | ReferenceObfuscation | JavascriptObfuscation - deriving (Show, Read, Eq) + deriving (Show, Read, Eq, Data, Typeable) -- | Varieties of HTML slide shows. data HTMLSlideVariant = S5Slides @@ -272,13 +281,13 @@ data HTMLSlideVariant = S5Slides | DZSlides | RevealJsSlides | NoSlides - deriving (Show, Read, Eq) + deriving (Show, Read, Eq, Data, Typeable) -- | Options for accepting or rejecting MS Word track-changes. data TrackChanges = AcceptChanges | RejectChanges | AllChanges - deriving (Show, Read, Eq) + deriving (Show, Read, Eq, Data, Typeable) -- | Options for writers data WriterOptions = WriterOptions @@ -324,7 +333,8 @@ data WriterOptions = WriterOptions , writerReferenceDocx :: Maybe FilePath -- ^ Path to reference DOCX if specified , writerMediaBag :: MediaBag -- ^ Media collected by docx or epub reader , writerVerbose :: Bool -- ^ Verbose debugging output - } deriving Show + , writerLaTeXArgs :: [String] -- ^ Flags to pass to latex-engine + } deriving (Show, Data, Typeable) instance Default WriterOptions where def = WriterOptions { writerStandalone = False @@ -368,6 +378,7 @@ instance Default WriterOptions where , writerReferenceDocx = Nothing , writerMediaBag = mempty , writerVerbose = False + , writerLaTeXArgs = [] } -- | Returns True if the given extension is enabled. diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index ea6699ac4..59a6ebede 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -71,7 +71,8 @@ makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex) makePDF program writer opts doc = withTempDir "tex2pdf." $ \tmpdir -> do doc' <- handleImages opts tmpdir doc let source = writer opts doc' - tex2pdf' (writerVerbose opts) tmpdir program source + args = writerLaTeXArgs opts + tex2pdf' (writerVerbose opts) args tmpdir program source handleImages :: WriterOptions -> FilePath -- ^ temp dir to store images @@ -132,15 +133,16 @@ convertImage tmpdir fname = doNothing = return (Right fname) tex2pdf' :: Bool -- ^ Verbose output + -> [String] -- ^ Arguments to the latex-engine -> FilePath -- ^ temp directory for output -> String -- ^ tex program -> String -- ^ tex source -> IO (Either ByteString ByteString) -tex2pdf' verbose tmpDir program source = do +tex2pdf' verbose args tmpDir program source = do let numruns = if "\\tableofcontents" `isInfixOf` source then 3 -- to get page numbers else 2 -- 1 run won't give you PDF bookmarks - (exit, log', mbPdf) <- runTeXProgram verbose program 1 numruns tmpDir source + (exit, log', mbPdf) <- runTeXProgram verbose program args 1 numruns tmpDir source case (exit, mbPdf) of (ExitFailure _, _) -> do let logmsg = extractMsg log' @@ -173,9 +175,9 @@ extractMsg log' = do -- Run a TeX program on an input bytestring and return (exit code, -- contents of stdout, contents of produced PDF if any). Rerun -- a fixed number of times to resolve references. -runTeXProgram :: Bool -> String -> Int -> Int -> FilePath -> String +runTeXProgram :: Bool -> String -> [String] -> Int -> Int -> FilePath -> String -> IO (ExitCode, ByteString, Maybe ByteString) -runTeXProgram verbose program runNumber numRuns tmpDir source = do +runTeXProgram verbose program args runNumber numRuns tmpDir source = do let file = tmpDir </> "input.tex" exists <- doesFileExist file unless exists $ UTF8.writeFile file source @@ -188,7 +190,7 @@ runTeXProgram verbose program runNumber numRuns tmpDir source = do let file' = file #endif let programArgs = ["-halt-on-error", "-interaction", "nonstopmode", - "-output-directory", tmpDir', file'] + "-output-directory", tmpDir', file'] ++ args env' <- getEnvironment let sep = searchPathSeparator:[] let texinputs = maybe (tmpDir' ++ sep) ((tmpDir' ++ sep) ++) @@ -212,7 +214,7 @@ runTeXProgram verbose program runNumber numRuns tmpDir source = do B.hPutStr stderr err putStr "\n" if runNumber <= numRuns - then runTeXProgram verbose program (runNumber + 1) numRuns tmpDir source + then runTeXProgram verbose program args (runNumber + 1) numRuns tmpDir source else do let pdfFile = replaceDirectory (replaceExtension file ".pdf") tmpDir pdfExists <- doesFileExist pdfFile diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs new file mode 100644 index 000000000..51a35c8ad --- /dev/null +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -0,0 +1,119 @@ +{- +Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.CommonMark + Copyright : Copyright (C) 2015 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of CommonMark-formatted plain text to 'Pandoc' document. + +CommonMark is a strongly specified variant of Markdown: http://commonmark.org. +-} +module Text.Pandoc.Readers.CommonMark (readCommonMark) +where + +import CMark +import Data.Text (unpack, pack) +import Data.List (groupBy) +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Error + +-- | Parse a CommonMark formatted string into a 'Pandoc' structure. +readCommonMark :: ReaderOptions -> String -> Either PandocError Pandoc +readCommonMark opts = Right . nodeToPandoc . commonmarkToNode opts' . pack + where opts' = if readerSmart opts + then [optNormalize, optSmart] + else [optNormalize] + +nodeToPandoc :: Node -> Pandoc +nodeToPandoc (Node _ DOCUMENT nodes) = + Pandoc nullMeta $ foldr addBlock [] nodes +nodeToPandoc n = -- shouldn't happen + Pandoc nullMeta $ foldr addBlock [] [n] + +addBlocks :: [Node] -> [Block] +addBlocks = foldr addBlock [] + +addBlock :: Node -> [Block] -> [Block] +addBlock (Node _ PARAGRAPH nodes) = + (Para (addInlines nodes) :) +addBlock (Node _ HRULE _) = + (HorizontalRule :) +addBlock (Node _ BLOCK_QUOTE nodes) = + (BlockQuote (addBlocks nodes) :) +addBlock (Node _ (HTML t) _) = + (RawBlock (Format "html") (unpack t) :) +addBlock (Node _ (CODE_BLOCK info t) _) = + (CodeBlock ("", take 1 (words (unpack info)), []) (unpack t) :) +addBlock (Node _ (HEADER lev) nodes) = + (Header lev ("",[],[]) (addInlines nodes) :) +addBlock (Node _ (LIST listAttrs) nodes) = + (constructor (map (setTightness . addBlocks . children) nodes) :) + where constructor = case listType listAttrs of + BULLET_LIST -> BulletList + ORDERED_LIST -> OrderedList + (start, DefaultStyle, delim) + start = listStart listAttrs + setTightness = if listTight listAttrs + then map paraToPlain + else id + paraToPlain (Para xs) = Plain (xs) + paraToPlain x = x + delim = case listDelim listAttrs of + PERIOD_DELIM -> Period + PAREN_DELIM -> OneParen +addBlock (Node _ ITEM _) = id -- handled in LIST +addBlock _ = id + +children :: Node -> [Node] +children (Node _ _ ns) = ns + +addInlines :: [Node] -> [Inline] +addInlines = foldr addInline [] + +addInline :: Node -> [Inline] -> [Inline] +addInline (Node _ (TEXT t) _) = (map toinl clumps ++) + where raw = unpack t + clumps = groupBy samekind raw + samekind ' ' ' ' = True + samekind ' ' _ = False + samekind _ ' ' = False + samekind _ _ = True + toinl (' ':_) = Space + toinl xs = Str xs +addInline (Node _ LINEBREAK _) = (LineBreak :) +addInline (Node _ SOFTBREAK _) = (Space :) +addInline (Node _ (INLINE_HTML t) _) = + (RawInline (Format "html") (unpack t) :) +addInline (Node _ (CODE t) _) = + (Code ("",[],[]) (unpack t) :) +addInline (Node _ EMPH nodes) = + (Emph (addInlines nodes) :) +addInline (Node _ STRONG nodes) = + (Strong (addInlines nodes) :) +addInline (Node _ (LINK url title) nodes) = + (Link (addInlines nodes) (unpack url, unpack title) :) +addInline (Node _ (IMAGE url title) nodes) = + (Image (addInlines nodes) (unpack url, unpack title) :) +addInline _ = id diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 900d0a1bf..67a97ae85 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -280,7 +280,13 @@ runToInlines :: Run -> DocxContext Inlines runToInlines (Run rs runElems) | Just (s, _) <- rStyle rs , s `elem` codeStyles = - return $ code $ concatMap runElemToString runElems + let rPr = resolveDependentRunStyle rs + codeString = code $ concatMap runElemToString runElems + in + return $ case rVertAlign rPr of + Just SupScrpt -> superscript codeString + Just SubScrpt -> subscript codeString + _ -> codeString | otherwise = do let ils = concatReduce (map runElemToInlines runElems) return $ (runStyleToTransform $ resolveDependentRunStyle rs) ils diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 671d2acf3..cce80fb48 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -65,6 +65,7 @@ import Text.Pandoc.Compat.Except import Text.TeXMath.Readers.OMML (readOMML) import Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..)) import Text.TeXMath (Exp) +import Text.Pandoc.Readers.Docx.Util import Data.Char (readLitChar, ord, chr, isDigit) data ReaderEnv = ReaderEnv { envNotes :: Notes @@ -108,8 +109,6 @@ mapD f xs = in concatMapM handler xs -type NameSpaces = [(String, String)] - data Docx = Docx Document deriving Show @@ -158,6 +157,7 @@ data ParagraphStyle = ParagraphStyle { pStyle :: [String] , indentation :: Maybe ParIndentation , dropCap :: Bool , pHeading :: Maybe (String, Int) + , pNumInfo :: Maybe (String, String) , pBlockQuote :: Maybe Bool } deriving Show @@ -167,6 +167,7 @@ defaultParagraphStyle = ParagraphStyle { pStyle = [] , indentation = Nothing , dropCap = False , pHeading = Nothing + , pNumInfo = Nothing , pBlockQuote = Nothing } @@ -224,6 +225,7 @@ data RunStyle = RunStyle { isBold :: Maybe Bool data ParStyleData = ParStyleData { headingLev :: Maybe (String, Int) , isBlockQuote :: Maybe Bool + , numInfo :: Maybe (String, String) , psStyle :: Maybe ParStyle} deriving Show @@ -246,10 +248,6 @@ type ChangeId = String type Author = String type ChangeDate = String -attrToNSPair :: Attr -> Maybe (String, String) -attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) -attrToNSPair _ = Nothing - archiveToDocx :: Archive -> Either DocxError Docx archiveToDocx archive = do let notes = archiveToNotes archive @@ -266,7 +264,7 @@ archiveToDocument :: Archive -> D Document archiveToDocument zf = do entry <- maybeToD $ findEntryByPath "word/document.xml" zf docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry - let namespaces = mapMaybe attrToNSPair (elAttribs docElem) + let namespaces = elemToNameSpaces docElem bodyElem <- maybeToD $ findChild (elemName namespaces "w" "body") docElem body <- elemToBody namespaces bodyElem return $ Document namespaces body @@ -285,7 +283,7 @@ archiveToStyles zf = case stylesElem of Nothing -> (M.empty, M.empty) Just styElem -> - let namespaces = mapMaybe attrToNSPair (elAttribs styElem) + let namespaces = elemToNameSpaces styElem in ( M.fromList $ buildBasedOnList namespaces styElem (Nothing :: Maybe CharStyle), @@ -353,10 +351,10 @@ archiveToNotes zf = enElem = findEntryByPath "word/endnotes.xml" zf >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) fn_namespaces = case fnElem of - Just e -> mapMaybe attrToNSPair (elAttribs e) + Just e -> elemToNameSpaces e Nothing -> [] en_namespaces = case enElem of - Just e -> mapMaybe attrToNSPair (elAttribs e) + Just e -> elemToNameSpaces e Nothing -> [] ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces fn = fnElem >>= (elemToNotes ns "footnote") @@ -456,7 +454,7 @@ archiveToNumbering' zf = do Nothing -> Just $ Numbering [] [] [] Just entry -> do numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry - let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem) + let namespaces = elemToNameSpaces numberingElem numElems = findChildren (QName "num" (lookup "w" namespaces) (Just "w")) numberingElem @@ -485,15 +483,6 @@ elemToNotes _ _ _ = Nothing --------------------------------------------- --------------------------------------------- -elemName :: NameSpaces -> String -> String -> QName -elemName ns prefix name = (QName name (lookup prefix ns) (Just prefix)) - -isElem :: NameSpaces -> String -> String -> Element -> Bool -isElem ns prefix name element = - qName (elName element) == name && - qURI (elName element) == (lookup prefix ns) - - elemToTblGrid :: NameSpaces -> Element -> D TblGrid elemToTblGrid ns element | isElem ns "w" "tblGrid" element = let cols = findChildren (elemName ns "w" "gridCol") element @@ -546,20 +535,6 @@ elemToParIndentation ns element | isElem ns "w" "ind" element = stringToInteger} elemToParIndentation _ _ = Nothing - -elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String) -elemToNumInfo ns element | isElem ns "w" "p" element = do - let pPr = findChild (elemName ns "w" "pPr") element - numPr = pPr >>= findChild (elemName ns "w" "numPr") - lvl <- numPr >>= - findChild (elemName ns "w" "ilvl") >>= - findAttr (elemName ns "w" "val") - numId <- numPr >>= - findChild (elemName ns "w" "numId") >>= - findAttr (elemName ns "w" "val") - return (numId, lvl) -elemToNumInfo _ _ = Nothing - testBitMask :: String -> Int -> Bool testBitMask bitMaskS n = case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of @@ -578,20 +553,28 @@ elemToBodyPart ns element return $ OMathPara expsLst elemToBodyPart ns element | isElem ns "w" "p" element - , Just (numId, lvl) <- elemToNumInfo ns element = do + , Just (numId, lvl) <- getNumInfo ns element = do sty <- asks envParStyles let parstyle = elemToParagraphStyle ns element sty parparts <- mapD (elemToParPart ns) (elChildren element) num <- asks envNumbering case lookupLevel numId lvl num of - Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts - Nothing -> throwError WrongElem + Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts + Nothing -> throwError WrongElem elemToBodyPart ns element | isElem ns "w" "p" element = do - sty <- asks envParStyles - let parstyle = elemToParagraphStyle ns element sty - parparts <- mapD (elemToParPart ns) (elChildren element) - return $ Paragraph parstyle parparts + sty <- asks envParStyles + let parstyle = elemToParagraphStyle ns element sty + parparts <- mapD (elemToParPart ns) (elChildren element) + case pNumInfo parstyle of + Just (numId, lvl) -> do + num <- asks envNumbering + case lookupLevel numId lvl num of + Just levelInfo -> + return $ ListItem parstyle numId lvl levelInfo parparts + Nothing -> + throwError WrongElem + Nothing -> return $ Paragraph parstyle parparts elemToBodyPart ns element | isElem ns "w" "tbl" element = do let caption' = findChild (elemName ns "w" "tblPr") element @@ -771,6 +754,7 @@ elemToParagraphStyle ns element sty Just _ -> True Nothing -> False , pHeading = getParStyleField headingLev sty style + , pNumInfo = getParStyleField numInfo sty style , pBlockQuote = getParStyleField isBlockQuote sty style } elemToParagraphStyle _ _ _ = defaultParagraphStyle @@ -857,12 +841,26 @@ getBlockQuote ns element , styleName `elem` blockQuoteStyleNames = Just True getBlockQuote _ _ = Nothing +getNumInfo :: NameSpaces -> Element -> Maybe (String, String) +getNumInfo ns element = do + let numPr = findChild (elemName ns "w" "pPr") element >>= + findChild (elemName ns "w" "numPr") + lvl = fromMaybe "0" (numPr >>= + findChild (elemName ns "w" "ilvl") >>= + findAttr (elemName ns "w" "val")) + numId <- numPr >>= + findChild (elemName ns "w" "numId") >>= + findAttr (elemName ns "w" "val") + return (numId, lvl) + + elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> ParStyleData elemToParStyleData ns element parentStyle = ParStyleData { headingLev = getHeaderLevel ns element , isBlockQuote = getBlockQuote ns element + , numInfo = getNumInfo ns element , psStyle = parentStyle } diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs new file mode 100644 index 000000000..2901ea2a3 --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/StyleMap.hs @@ -0,0 +1,106 @@ +module Text.Pandoc.Readers.Docx.StyleMap ( StyleMaps(..) + , defaultStyleMaps + , getStyleMaps + , getStyleId + , hasStyleName + ) where + +import Text.XML.Light +import Text.Pandoc.Readers.Docx.Util +import Control.Monad.State +import Data.Char (toLower) +import qualified Data.Map as M + +newtype ParaStyleMap = ParaStyleMap ( M.Map String String ) +newtype CharStyleMap = CharStyleMap ( M.Map String String ) + +class StyleMap a where + alterMap :: (M.Map String String -> M.Map String String) -> a -> a + getMap :: a -> M.Map String String + +instance StyleMap ParaStyleMap where + alterMap f (ParaStyleMap m) = ParaStyleMap $ f m + getMap (ParaStyleMap m) = m + +instance StyleMap CharStyleMap where + alterMap f (CharStyleMap m) = CharStyleMap $ f m + getMap (CharStyleMap m) = m + +insert :: (StyleMap a) => Maybe String -> Maybe String -> a -> a +insert (Just k) (Just v) m = alterMap (M.insert k v) m +insert _ _ m = m + +getStyleId :: (StyleMap a) => String -> a -> String +getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) . getMap + +hasStyleName :: (StyleMap a) => String -> a -> Bool +hasStyleName styleName = M.member (map toLower styleName) . getMap + +data StyleMaps = StyleMaps { sNameSpaces :: NameSpaces + , sParaStyleMap :: ParaStyleMap + , sCharStyleMap :: CharStyleMap + } + +data StyleType = ParaStyle | CharStyle + +defaultStyleMaps :: StyleMaps +defaultStyleMaps = StyleMaps { sNameSpaces = [] + , sParaStyleMap = ParaStyleMap M.empty + , sCharStyleMap = CharStyleMap M.empty + } + +type StateM a = State StyleMaps a + +getStyleMaps :: Element -> StyleMaps +getStyleMaps docElem = execState genStyleMap state' + where + state' = defaultStyleMaps {sNameSpaces = elemToNameSpaces docElem} + genStyleItem e = do + styleType <- getStyleType e + styleId <- getAttrStyleId e + nameValLowercase <- fmap (map toLower) `fmap` getNameVal e + case styleType of + Just ParaStyle -> modParaStyleMap $ insert nameValLowercase styleId + Just CharStyle -> modCharStyleMap $ insert nameValLowercase styleId + _ -> return () + genStyleMap = do + style <- elemName' "style" + let styles = findChildren style docElem + forM_ styles genStyleItem + +modParaStyleMap :: (ParaStyleMap -> ParaStyleMap) -> StateM () +modParaStyleMap f = modify $ \s -> + s {sParaStyleMap = f $ sParaStyleMap s} + +modCharStyleMap :: (CharStyleMap -> CharStyleMap) -> StateM () +modCharStyleMap f = modify $ \s -> + s {sCharStyleMap = f $ sCharStyleMap s} + +getStyleType :: Element -> StateM (Maybe StyleType) +getStyleType e = do + styleTypeStr <- getAttrType e + case styleTypeStr of + Just "paragraph" -> return $ Just ParaStyle + Just "character" -> return $ Just CharStyle + _ -> return Nothing + +getAttrType :: Element -> StateM (Maybe String) +getAttrType el = do + name <- elemName' "type" + return $ findAttr name el + +getAttrStyleId :: Element -> StateM (Maybe String) +getAttrStyleId el = do + name <- elemName' "styleId" + return $ findAttr name el + +getNameVal :: Element -> StateM (Maybe String) +getNameVal el = do + name <- elemName' "name" + val <- elemName' "val" + return $ findChild name el >>= findAttr val + +elemName' :: String -> StateM QName +elemName' name = do + namespaces <- gets sNameSpaces + return $ elemName namespaces "w" name diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs new file mode 100644 index 000000000..891f107b0 --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/Util.hs @@ -0,0 +1,26 @@ +module Text.Pandoc.Readers.Docx.Util ( + NameSpaces + , elemName + , isElem + , elemToNameSpaces + ) where + +import Text.XML.Light +import Data.Maybe (mapMaybe) + +type NameSpaces = [(String, String)] + +elemToNameSpaces :: Element -> NameSpaces +elemToNameSpaces = mapMaybe attrToNSPair . elAttribs + +attrToNSPair :: Attr -> Maybe (String, String) +attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) +attrToNSPair _ = Nothing + +elemName :: NameSpaces -> String -> String -> QName +elemName ns prefix name = QName name (lookup prefix ns) (Just prefix) + +isElem :: NameSpaces -> String -> String -> Element -> Bool +isElem ns prefix name element = + qName (elName element) == name && + qURI (elName element) == lookup prefix ns diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 633388db8..a517f9566 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -272,7 +272,9 @@ ignoreBlocks name = (name, doraw <|> (mempty <$ optargs)) blockCommands :: M.Map String (LP Blocks) blockCommands = M.fromList $ [ ("par", mempty <$ skipopts) - , ("title", mempty <$ (skipopts *> tok >>= addMeta "title")) + , ("title", mempty <$ (skipopts *> + (grouped inline >>= addMeta "title") + <|> (grouped block >>= addMeta "title"))) , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle")) , ("author", mempty <$ (skipopts *> authors)) -- -- in letter class, temp. store address & sig as title, author @@ -342,7 +344,7 @@ setCaption :: LP Blocks setCaption = do ils <- tok mblabel <- option Nothing $ - try $ spaces >> controlSeq "label" >> (Just <$> tok) + try $ spaces' >> controlSeq "label" >> (Just <$> tok) let ils' = case mblabel of Just lab -> ils <> spanWith ("",[],[("data-label", stringify lab)]) mempty @@ -370,7 +372,7 @@ section (ident, classes, kvs) lvl = do let lvl' = if hasChapters then lvl + 1 else lvl skipopts contents <- grouped inline - lab <- option ident $ try (spaces >> controlSeq "label" >> spaces >> braced) + lab <- option ident $ try (spaces' >> controlSeq "label" >> spaces' >> braced) attr' <- registerHeader (lab, classes, kvs) contents return $ headerWith attr' lvl' contents @@ -496,7 +498,7 @@ inlineCommands = M.fromList $ , ("v", option (str "v") $ try $ tok >>= accent hacek) , ("u", option (str "u") $ try $ tok >>= accent breve) , ("i", lit "i") - , ("\\", linebreak <$ (optional (bracketed inline) *> optional sp)) + , ("\\", linebreak <$ (optional (bracketed inline) *> spaces')) , (",", pure mempty) , ("@", pure mempty) , (" ", lit "\160") @@ -509,7 +511,7 @@ inlineCommands = M.fromList $ , ("thanks", (note . mconcat) <$> (char '{' *> manyTill block (char '}'))) , ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}'))) , ("verb", doverb) - , ("lstinline", doverb) + , ("lstinline", skipopts *> doverb) , ("Verb", doverb) , ("texttt", (code . stringify . toList) <$> tok) , ("url", (unescapeURL <$> braced) >>= \url -> @@ -1272,7 +1274,7 @@ complexNatbibCitation mode = try $ do parseAligns :: LP [Alignment] parseAligns = try $ do char '{' - let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ try (string "@{}") + let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ (char '@' >> braced) maybeBar let cAlign = AlignCenter <$ char 'c' let lAlign = AlignLeft <$ char 'l' @@ -1286,13 +1288,13 @@ parseAligns = try $ do return aligns' hline :: LP () -hline = () <$ (try $ spaces >> controlSeq "hline") +hline = () <$ (try $ spaces' *> controlSeq "hline" <* spaces') lbreak :: LP () -lbreak = () <$ (try $ spaces *> controlSeq "\\") +lbreak = () <$ (try $ spaces' *> controlSeq "\\" <* spaces') amp :: LP () -amp = () <$ (try $ spaces *> char '&') +amp = () <$ (try $ spaces' *> char '&') parseTableRow :: Int -- ^ number of columns -> LP [Blocks] @@ -1305,20 +1307,22 @@ parseTableRow cols = try $ do guard $ cells' /= [mempty] -- note: a & b in a three-column table leaves an empty 3rd cell: let cells'' = cells' ++ replicate (cols - numcells) mempty - spaces + spaces' return cells'' +spaces' :: LP () +spaces' = spaces *> skipMany (comment *> spaces) + simpTable :: Bool -> LP Blocks simpTable hasWidthParameter = try $ do - when hasWidthParameter $ () <$ (spaces >> tok) - spaces + when hasWidthParameter $ () <$ (spaces' >> tok) + skipopts aligns <- parseAligns let cols = length aligns optional hline header' <- option [] $ try (parseTableRow cols <* lbreak <* hline) rows <- sepEndBy (parseTableRow cols) (lbreak <* optional hline) - spaces - skipMany (comment *> spaces) + spaces' let header'' = if null header' then replicate cols mempty else header' diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index b43dda3a1..369c889d1 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -36,7 +36,7 @@ import Data.List ( transpose, sortBy, intersperse, intercalate, elemIndex) import qualified Data.Map as M import Data.Scientific (coefficient, base10Exponent) import Data.Ord ( comparing ) -import Data.Char ( isAlphaNum, toLower ) +import Data.Char ( isSpace, isAlphaNum, toLower ) import Data.Maybe import Text.Pandoc.Definition import qualified Data.Text as T @@ -56,7 +56,7 @@ import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag, isCommentTag ) import Data.Monoid (mconcat, mempty) -import Control.Applicative ((<$>), (<*), (*>), (<$)) +import Control.Applicative ((<$>), (<*), (*>), (<$), (<*>)) import Control.Monad import Control.Monad.Reader import System.FilePath (takeExtension, addExtension) @@ -80,7 +80,7 @@ readMarkdown opts s = -- and a list of warnings. readMarkdownWithWarnings :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Either PandocError (Pandoc, [String]) + -> Either PandocError (Pandoc, [String]) readMarkdownWithWarnings opts s = runMarkdown opts s (returnWarnings parseMarkdown) runMarkdown :: forall a . ReaderOptions -> String -> MarkdownParser a -> Either PandocError a @@ -284,7 +284,7 @@ ignorable :: Text -> Bool ignorable t = T.pack "_" `T.isSuffixOf` t toMetaValue :: ReaderOptions -> Text -> Either PandocError MetaValue -toMetaValue opts x = toMeta <$> readMarkdown opts (T.unpack x) +toMetaValue opts x = toMeta <$> readMarkdown opts' (T.unpack x) where toMeta p = case p of @@ -294,6 +294,11 @@ toMetaValue opts x = toMeta <$> readMarkdown opts (T.unpack x) | otherwise -> MetaInlines xs Pandoc _ bs -> MetaBlocks bs endsWithNewline t = T.pack "\n" `T.isSuffixOf` t + opts' = opts{readerExtensions=readerExtensions opts `Set.difference` meta_exts} + meta_exts = Set.fromList [ Ext_pandoc_title_block + , Ext_mmd_title_block + , Ext_yaml_metadata_block + ] yamlToMeta :: ReaderOptions -> Yaml.Value -> Either PandocError MetaValue yamlToMeta opts (Yaml.String t) = toMetaValue opts t @@ -329,11 +334,15 @@ mmdTitleBlock = try $ do kvPair :: MarkdownParser (String, MetaValue) kvPair = try $ do key <- many1Till (alphaNum <|> oneOf "_- ") (char ':') + skipMany1 spaceNoNewline val <- manyTill anyChar (try $ newline >> lookAhead (blankline <|> nonspaceChar)) + guard $ not . null . trim $ val let key' = concat $ words $ map toLower key let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ trim val return (key',val') + where + spaceNoNewline = satisfy (\x -> isSpace x && (x/='\n') && (x/='\r')) parseMarkdown :: MarkdownParser Pandoc parseMarkdown = do @@ -471,12 +480,12 @@ block = do , bulletList , header , lhsCodeBlock - , rawTeXBlock , divHtml , htmlBlock , table - , lineBlock , codeBlockIndented + , rawTeXBlock + , lineBlock , blockQuote , hrule , orderedList @@ -1299,12 +1308,8 @@ pipeBreak = try $ do pipeTable :: MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]]) pipeTable = try $ do - (heads,aligns) <- try ( pipeBreak >>= \als -> - return (replicate (length als) mempty, als)) - <|> ( pipeTableRow >>= \row -> pipeBreak >>= \als -> - - return (row, als) ) - lines' <- many1 pipeTableRow + (heads,aligns) <- (,) <$> pipeTableRow <*> pipeBreak + lines' <- many pipeTableRow let widths = replicate (length aligns) 0.0 return (aligns, widths, heads, lines') @@ -1684,6 +1689,7 @@ referenceLink constructor (lab, raw) = do lookAhead (try (spnl >> normalCite >> return (mempty, ""))) <|> try (spnl >> reference) + when (raw' == "") $ guardEnabled Ext_shortcut_reference_links let labIsRef = raw' == "" || raw' == "[]" let key = toKey $ if labIsRef then raw else raw' parsedRaw <- parseFromString (mconcat <$> many inline) raw' diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 0bb6dd436..939d10fb2 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -593,11 +593,17 @@ imageOption = <|> try (many1 (oneOf "x0123456789") <* string "px") <|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]")) +collapseUnderscores :: String -> String +collapseUnderscores [] = [] +collapseUnderscores ('_':'_':xs) = collapseUnderscores ('_':xs) +collapseUnderscores (x:xs) = x : collapseUnderscores xs + +addUnderscores :: String -> String +addUnderscores = collapseUnderscores . intercalate "_" . words + internalLink :: MWParser Inlines internalLink = try $ do sym "[[" - let addUnderscores x = let (pref,suff) = break (=='#') x - in pref ++ intercalate "_" (words suff) pagename <- unwords . words <$> many (noneOf "|]") label <- option (B.text pagename) $ char '|' *> ( (mconcat <$> many1 (notFollowedBy (char ']') *> inline)) diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 9d2c355ee..1dfbdd700 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -2,6 +2,8 @@ {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} {- Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de> @@ -49,7 +51,7 @@ import Control.Applicative ( pure , (<$>), (<$), (<*>), (<*), (*>) ) import Control.Arrow (first) import Control.Monad (guard, mplus, mzero, when) -import Control.Monad.Reader (Reader, runReader, ask, asks) +import Control.Monad.Reader (Reader, runReader, asks, local) import Data.Char (isAlphaNum, toLower) import Data.Default import Data.List (intersperse, isPrefixOf, isSuffixOf, foldl') @@ -66,16 +68,19 @@ readOrg :: ReaderOptions -- ^ Reader options -> Either PandocError Pandoc readOrg opts s = runOrg opts s parseOrg +data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext + , finalState :: OrgParserState } + +type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal) + runOrg :: ReaderOptions -> String -> OrgParser a -> Either PandocError a runOrg opts inp p = fst <$> res where imd = readWithM (returnState p) def{ orgStateOptions = opts } (inp ++ "\n\n") - res = runReader imd s + res = runReader imd def { finalState = s } s :: OrgParserState s = either def snd res -type OrgParser a = ParserT [Char] OrgParserState (Reader OrgParserState) a - parseOrg :: OrgParser Pandoc parseOrg = do blocks' <- parseBlocks @@ -134,6 +139,9 @@ data OrgParserState = OrgParserState , orgStateNotes' :: OrgNoteTable } +instance Default OrgParserLocal where + def = OrgParserLocal NoQuote def + instance HasReaderOptions OrgParserState where extractReaderOptions = orgStateOptions @@ -147,6 +155,10 @@ instance HasLastStrPosition OrgParserState where getLastStrPos = orgStateLastStrPos setLastStrPos pos st = st{ orgStateLastStrPos = Just pos } +instance HasQuoteContext st (Reader OrgParserLocal) where + getQuoteContext = asks orgLocalQuoteContext + withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q}) + instance Default OrgParserState where def = defaultOrgParserState @@ -943,6 +955,7 @@ inline = , subscript , superscript , inlineLaTeX + , smart , symbol ] <* (guard =<< newlinesCountWithinLimits) <?> "inline" @@ -1050,7 +1063,7 @@ inlineNote = try $ do referencedNote :: OrgParser Inlines referencedNote = try $ do ref <- noteMarker - notes <- asks orgStateNotes' + notes <- asks (orgStateNotes' . finalState) return $ case lookup ref notes of Just contents -> B.note contents @@ -1113,7 +1126,7 @@ possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]") applyCustomLinkFormat :: String -> OrgParser String applyCustomLinkFormat link = do let (linkType, rest) = break (== ':') link - fmts <- ask + fmts <- asks finalState return $ case M.lookup linkType (orgStateLinkFormatters fmts) of Just v -> (v (drop 1 rest)) @@ -1165,7 +1178,7 @@ isImageFilename filename = internalLink :: String -> Inlines -> OrgParser Inlines internalLink link title = do - anchorB <- ask + anchorB <- asks finalState return $ if link `elem` (orgStateAnchorIds anchorB) then B.link ('#':link) "" title @@ -1248,8 +1261,9 @@ math = B.math <$> choice [ math1CharBetween '$' displayMath :: OrgParser Inlines displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" - , rawMathBetween "$$" "$$" - ] + , rawMathBetween "$$" "$$" + ] + symbol :: OrgParser Inlines symbol = B.str . (: "") <$> (oneOf specialChars >>= updatePositions) where updatePositions c = do @@ -1466,3 +1480,31 @@ inlineLaTeXCommand = try $ do count len anyChar return cs _ -> mzero + +smart :: OrgParser Inlines +smart = do + getOption readerSmart >>= guard + doubleQuoted <|> singleQuoted <|> + choice [orgApostrophe, dash, ellipses] + where orgApostrophe = + (char '\'' <|> char '\8217') <* updateLastPreCharPos + <* updateLastForbiddenCharPos + *> return (B.str "\x2019") + +singleQuoted :: OrgParser Inlines +singleQuoted = try $ do + singleQuoteStart + withQuoteContext InSingleQuote $ + B.singleQuoted . trimInlines . mconcat <$> + many1Till inline singleQuoteEnd + +-- doubleQuoted will handle regular double-quoted sections, as well +-- as dialogues with an open double-quote without a close double-quote +-- in the same paragraph. +doubleQuoted :: OrgParser Inlines +doubleQuoted = try $ do + doubleQuoteStart + contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) + (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return + (B.doubleQuoted . trimInlines $ contents)) + <|> (return $ (B.str "\8220") <> contents) diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index e5b8c5167..1c33b004a 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -126,7 +126,7 @@ blockToAsciiDoc :: WriterOptions -- ^ Options blockToAsciiDoc _ Null = return empty blockToAsciiDoc opts (Plain inlines) = do contents <- inlineListToAsciiDoc opts inlines - return $ contents <> cr + return $ contents <> blankline blockToAsciiDoc opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = do blockToAsciiDoc opts (Para [Image alt (src,tit)]) blockToAsciiDoc opts (Para inlines) = do @@ -272,7 +272,7 @@ bulletListItemToAsciiDoc opts blocks = do contents <- foldM addBlock empty blocks modify $ \s -> s{ bulletListLevel = lev } let marker = text (replicate lev '*') - return $ marker <> space <> contents <> cr + return $ marker <> text " " <> contents <> cr -- | Convert ordered list item (a list of blocks) to asciidoc. orderedListItemToAsciiDoc :: WriterOptions -- ^ options @@ -292,7 +292,7 @@ orderedListItemToAsciiDoc opts marker blocks = do modify $ \s -> s{ orderedListLevel = lev + 1 } contents <- foldM addBlock empty blocks modify $ \s -> s{ orderedListLevel = lev } - return $ text marker <> space <> contents <> cr + return $ text marker <> text " " <> contents <> cr -- | Convert definition list item (label, list of blocks) to asciidoc. definitionListItemToAsciiDoc :: WriterOptions diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index b10317506..19f8f2f11 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -114,7 +114,8 @@ elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) = n | n == 0 -> "chapter" | n >= 1 && n <= 5 -> "sect" ++ show n | otherwise -> "simplesect" - in inTags True tag [("id", writerIdentifierPrefix opts ++ id')] $ + in inTags True tag [("id", writerIdentifierPrefix opts ++ id') | + not (null id')] $ inTagsSimple "title" (inlinesToDocbook opts title) $$ vcat (map (elementToDocbook opts (lvl + 1)) elements') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 441392918..81369e278 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -29,7 +29,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to docx. -} module Text.Pandoc.Writers.Docx ( writeDocx ) where -import Data.List ( intercalate, isPrefixOf, isSuffixOf, stripPrefix ) +import Data.List ( intercalate, isPrefixOf, isSuffixOf ) import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BL8 @@ -54,6 +54,8 @@ import Text.Pandoc.Walk import Text.Highlighting.Kate.Types () import Text.XML.Light as XML import Text.TeXMath +import Text.Pandoc.Readers.Docx.StyleMap +import Text.Pandoc.Readers.Docx.Util (elemName) import Control.Monad.State import Text.Highlighting.Kate import Data.Unique (hashUnique, newUnique) @@ -63,8 +65,7 @@ import qualified Control.Exception as E import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef, extensionFromMimeType) import Control.Applicative ((<$>), (<|>), (<*>)) -import Data.Maybe (fromMaybe, mapMaybe) -import Data.Char (isDigit) +import Data.Maybe (fromMaybe, mapMaybe, maybeToList) data ListMarker = NoMarker | BulletMarker @@ -106,7 +107,7 @@ data WriterState = WriterState{ , stChangesAuthor :: String , stChangesDate :: String , stPrintWidth :: Integer - , stHeadingStyles :: [(Int,String)] + , stStyleMaps :: StyleMaps , stFirstPara :: Bool } @@ -127,7 +128,7 @@ defaultWriterState = WriterState{ , stChangesAuthor = "unknown" , stChangesDate = "1969-12-31T19:00:00Z" , stPrintWidth = 1 - , stHeadingStyles = [] + , stStyleMaps = defaultStyleMaps , stFirstPara = False } @@ -215,32 +216,14 @@ writeDocx opts doc@(Pandoc meta _) = do styledoc <- parseXml refArchive distArchive stylepath -- parse styledoc for heading styles - let styleNamespaces = map ((,) <$> qName . attrKey <*> attrVal) . - filter ((==Just "xmlns") . qPrefix . attrKey) . - elAttribs $ styledoc - let headingStyles = - let - mywURI = lookup "w" styleNamespaces - myName name = QName name mywURI (Just "w") - getAttrStyleId = findAttr (myName "styleId") - getNameVal = findChild (myName "name") >=> findAttr (myName "val") - getNum s | not $ null s, all isDigit s = Just (read s :: Int) - | otherwise = Nothing - getEngHeader = getAttrStyleId >=> stripPrefix "Heading" >=> getNum - getIntHeader = getNameVal >=> stripPrefix "heading " >=> getNum - toTuple getF = liftM2 (,) <$> getF <*> getAttrStyleId - toMap getF = mapMaybe (toTuple getF) $ - findChildren (myName "style") styledoc - select a b | not $ null a = a - | otherwise = b - in - select (toMap getEngHeader) (toMap getIntHeader) + let styleMaps = getStyleMaps styledoc ((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc') defaultWriterState{ stChangesAuthor = fromMaybe "unknown" username , stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime , stPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth) - , stHeadingStyles = headingStyles} + , stStyleMaps = styleMaps + } let epochtime = floor $ utcTimeToPOSIXSeconds utctime let imgs = M.elems $ stImages st @@ -393,9 +376,18 @@ writeDocx opts doc@(Pandoc meta _) = do linkrels -- styles - let newstyles = styleToOpenXml $ writerHighlightStyle opts - let styledoc' = styledoc{ elContent = elContent styledoc ++ - [Elem x | x <- newstyles, writerHighlight opts] } + let newstyles = styleToOpenXml styleMaps $ writerHighlightStyle opts + let styledoc' = styledoc{ elContent = modifyContent (elContent styledoc) } + where + modifyContent + | writerHighlight opts = (++ map Elem newstyles) + | otherwise = filter notTokStyle + notTokStyle (Elem el) = notStyle el || notTokId el + notTokStyle _ = True + notStyle = (/= elemName' "style") . elName + notTokId = maybe True (`notElem` tokStys) . findAttr (elemName' "styleId") + tokStys = "SourceCode" : map show (enumFromTo KeywordTok NormalTok) + elemName' = elemName (sNameSpaces styleMaps) "w" let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' -- construct word/numbering.xml @@ -472,10 +464,13 @@ writeDocx opts doc@(Pandoc meta _) = do miscRelEntries ++ otherMediaEntries return $ fromArchive archive -styleToOpenXml :: Style -> [Element] -styleToOpenXml style = parStyle : map toStyle alltoktypes +styleToOpenXml :: StyleMaps -> Style -> [Element] +styleToOpenXml sm style = + maybeToList parStyle ++ mapMaybe toStyle alltoktypes where alltoktypes = enumFromTo KeywordTok NormalTok - toStyle toktype = mknode "w:style" [("w:type","character"), + toStyle toktype | hasStyleName (show toktype) (sCharStyleMap sm) = Nothing + | otherwise = Just $ + mknode "w:style" [("w:type","character"), ("w:customStyle","1"),("w:styleId",show toktype)] [ mknode "w:name" [("w:val",show toktype)] () , mknode "w:basedOn" [("w:val","VerbatimChar")] () @@ -496,7 +491,9 @@ styleToOpenXml style = parStyle : map toStyle alltoktypes tokBg toktype = maybe "auto" (drop 1 . fromColor) $ (tokenBackground =<< lookup toktype tokStyles) `mplus` backgroundColor style - parStyle = mknode "w:style" [("w:type","paragraph"), + parStyle | hasStyleName "Source Code" (sParaStyleMap sm) = Nothing + | otherwise = Just $ + mknode "w:style" [("w:type","paragraph"), ("w:customStyle","1"),("w:styleId","SourceCode")] [ mknode "w:name" [("w:val","Source Code")] () , mknode "w:basedOn" [("w:val","Normal")] () @@ -602,14 +599,14 @@ writeOpenXML opts (Pandoc meta blocks) = do Just (MetaBlocks [Para xs]) -> xs Just (MetaInlines xs) -> xs _ -> [] - title <- withParaProp (pStyle "Title") $ blocksToOpenXML opts [Para tit | not (null tit)] - subtitle <- withParaProp (pStyle "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')] - authors <- withParaProp (pStyle "Author") $ blocksToOpenXML opts $ + title <- withParaPropM (pStyleM "Title") $ blocksToOpenXML opts [Para tit | not (null tit)] + subtitle <- withParaPropM (pStyleM "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')] + authors <- withParaProp (pCustomStyle "Author") $ blocksToOpenXML opts $ map Para auths - date <- withParaProp (pStyle "Date") $ blocksToOpenXML opts [Para dat | not (null dat)] + date <- withParaPropM (pStyleM "Date") $ blocksToOpenXML opts [Para dat | not (null dat)] abstract <- if null abstract' then return [] - else withParaProp (pStyle "Abstract") $ blocksToOpenXML opts abstract' + else withParaProp (pCustomStyle "Abstract") $ blocksToOpenXML opts abstract' let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs convertSpace xs = xs @@ -623,11 +620,23 @@ writeOpenXML opts (Pandoc meta blocks) = do blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element] blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls -pStyle :: String -> Element -pStyle sty = mknode "w:pStyle" [("w:val",sty)] () +pCustomStyle :: String -> Element +pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] () -rStyle :: String -> Element -rStyle sty = mknode "w:rStyle" [("w:val",sty)] () +pStyleM :: String -> WS XML.Element +pStyleM styleName = do + styleMaps <- gets stStyleMaps + let sty' = getStyleId styleName $ sParaStyleMap styleMaps + return $ mknode "w:pStyle" [("w:val",sty')] () + +rCustomStyle :: String -> Element +rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] () + +rStyleM :: String -> WS XML.Element +rStyleM styleName = do + styleMaps <- gets stStyleMaps + let sty' = getStyleId styleName $ sCharStyleMap styleMaps + return $ mknode "w:rStyle" [("w:val",sty')] () getUniqueId :: MonadIO m => m String -- the + 20 is to ensure that there are no clashes with the rIds @@ -641,13 +650,12 @@ blockToOpenXML opts (Div (_,["references"],_) bs) = do let (hs, bs') = span isHeaderBlock bs header <- blocksToOpenXML opts hs -- We put the Bibliography style on paragraphs after the header - rest <- withParaProp (pStyle "Bibliography") $ blocksToOpenXML opts bs' + rest <- withParaPropM (pStyleM "Bibliography") $ blocksToOpenXML opts bs' return (header ++ rest) blockToOpenXML opts (Div _ bs) = blocksToOpenXML opts bs blockToOpenXML opts (Header lev (ident,_,_) lst) = do setFirstPara - headingStyles <- gets stHeadingStyles - paraProps <- maybe id (withParaProp . pStyle) (lookup lev headingStyles) $ + paraProps <- withParaPropM (pStyleM ("Heading "++show lev)) $ getParaProps False contents <- inlinesToOpenXML opts lst usedIdents <- gets stSectionIds @@ -660,26 +668,27 @@ blockToOpenXML opts (Header lev (ident,_,_) lst) = do ,("w:name",bookmarkName)] () let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () return [mknode "w:p" [] (paraProps ++ [bookmarkStart, bookmarkEnd] ++ contents)] -blockToOpenXML opts (Plain lst) = withParaProp (pStyle "Compact") +blockToOpenXML opts (Plain lst) = withParaProp (pCustomStyle "Compact") $ blockToOpenXML opts (Para lst) -- title beginning with fig: indicates that the image is a figure blockToOpenXML opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = do setFirstPara paraProps <- getParaProps False contents <- inlinesToOpenXML opts [Image alt (src,tit)] - captionNode <- withParaProp (pStyle "ImageCaption") + captionNode <- withParaProp (pCustomStyle "ImageCaption") $ blockToOpenXML opts (Para alt) return $ mknode "w:p" [] (paraProps ++ contents) : captionNode -- fixDisplayMath sometimes produces a Para [] as artifact blockToOpenXML _ (Para []) = return [] blockToOpenXML opts (Para lst) = do - isFirstPara <- gets stFirstPara + isFirstPara <- gets stFirstPara paraProps <- getParaProps $ case lst of [Math DisplayMath _] -> True _ -> False + bodyTextStyle <- pStyleM "Body Text" let paraProps' = case paraProps of - [] | isFirstPara -> [mknode "w:pPr" [] [(pStyle "FirstParagraph")]] - [] -> [mknode "w:pPr" [] [(pStyle "BodyText")]] + [] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]] + [] -> [mknode "w:pPr" [] [bodyTextStyle]] ps -> ps modify $ \s -> s { stFirstPara = False } contents <- inlinesToOpenXML opts lst @@ -688,11 +697,11 @@ blockToOpenXML _ (RawBlock format str) | format == Format "openxml" = return [ x | Elem x <- parseXML str ] | otherwise = return [] blockToOpenXML opts (BlockQuote blocks) = do - p <- withParaProp (pStyle "BlockQuote") $ blocksToOpenXML opts blocks + p <- withParaPropM (pStyleM "Block Text") $ blocksToOpenXML opts blocks setFirstPara return p blockToOpenXML opts (CodeBlock attrs str) = do - p <- withParaProp (pStyle "SourceCode") $ (blockToOpenXML opts $ Para [Code attrs str]) + p <- withParaProp (pCustomStyle "SourceCode") (blockToOpenXML opts $ Para [Code attrs str]) setFirstPara return p blockToOpenXML _ HorizontalRule = do @@ -707,7 +716,7 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do let captionStr = stringify caption caption' <- if null caption then return [] - else withParaProp (pStyle "TableCaption") + else withParaProp (pCustomStyle "TableCaption") $ blockToOpenXML opts (Para caption) let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] () let cellToOpenXML (al, cell) = withParaProp (alignmentFor al) @@ -718,32 +727,36 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do [ mknode "w:tcBorders" [] $ mknode "w:bottom" [("w:val","single")] () , mknode "w:vAlign" [("w:val","bottom")] () ] - let emptyCell = [mknode "w:p" [] [mknode "w:pPr" [] - [mknode "w:pStyle" [("w:val","Compact")] ()]]] + let emptyCell = [mknode "w:p" [] [pCustomStyle "Compact"]] let mkcell border contents = mknode "w:tc" [] $ [ borderProps | border ] ++ if null contents then emptyCell else contents - let mkrow border cells = mknode "w:tr" [] $ map (mkcell border) cells + let mkrow border cells = mknode "w:tr" [] $ + [mknode "w:trPr" [] [ + mknode "w:cnfStyle" [("w:firstRow","1")] ()] | border] + ++ map (mkcell border) cells let textwidth = 7920 -- 5.5 in in twips, 1/20 pt let fullrow = 5000 -- 100% specified in pct let rowwidth = fullrow * sum widths let mkgridcol w = mknode "w:gridCol" [("w:w", show (floor (textwidth * w) :: Integer))] () + let hasHeader = not (all null headers) return $ caption' ++ [mknode "w:tbl" [] ( mknode "w:tblPr" [] ( mknode "w:tblStyle" [("w:val","TableNormal")] () : mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () : + mknode "w:tblLook" [("w:firstRow","1") | hasHeader ] () : [ mknode "w:tblCaption" [("w:val", captionStr)] () | not (null caption) ] ) : mknode "w:tblGrid" [] (if all (==0) widths then [] else map mkgridcol widths) - : [ mkrow True headers' | not (all null headers) ] ++ + : [ mkrow True headers' | hasHeader ] ++ map (mkrow False) rows' )] blockToOpenXML opts (BulletList lst) = do @@ -767,9 +780,9 @@ blockToOpenXML opts (DefinitionList items) = do definitionListItemToOpenXML :: WriterOptions -> ([Inline],[[Block]]) -> WS [Element] definitionListItemToOpenXML opts (term,defs) = do - term' <- withParaProp (pStyle "DefinitionTerm") + term' <- withParaProp (pCustomStyle "DefinitionTerm") $ blockToOpenXML opts (Para term) - defs' <- withParaProp (pStyle "Definition") + defs' <- withParaProp (pCustomStyle "Definition") $ concat `fmap` mapM (blocksToOpenXML opts) defs return $ term' ++ defs' @@ -833,6 +846,9 @@ withTextProp d p = do popTextProp return res +withTextPropM :: WS Element -> WS a -> WS a +withTextPropM = (. flip withTextProp) . (>>=) + getParaProps :: Bool -> WS [Element] getParaProps displayMathPara = do props <- gets stParaProperties @@ -861,6 +877,9 @@ withParaProp d p = do popParaProp return res +withParaPropM :: WS Element -> WS a -> WS a +withParaPropM = (. flip withParaProp) . (>>=) + formattedString :: String -> WS [Element] formattedString str = do props <- getTextProps @@ -943,25 +962,26 @@ inlineToOpenXML opts (Math mathType str) = do Right r -> return [r] Left _ -> inlinesToOpenXML opts (texMathToInlines mathType str) inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst -inlineToOpenXML opts (Code attrs str) = - withTextProp (rStyle "VerbatimChar") - $ if writerHighlight opts - then case highlight formatOpenXML attrs str of - Nothing -> unhighlighted - Just h -> return h - else unhighlighted - where unhighlighted = intercalate [br] `fmap` - (mapM formattedString $ lines str) - formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) - toHlTok (toktype,tok) = mknode "w:r" [] - [ mknode "w:rPr" [] - [ rStyle $ show toktype ] - , mknode "w:t" [("xml:space","preserve")] tok ] +inlineToOpenXML opts (Code attrs str) = do + let unhighlighted = intercalate [br] `fmap` + (mapM formattedString $ lines str) + formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) + toHlTok (toktype,tok) = mknode "w:r" [] + [ mknode "w:rPr" [] + [ rCustomStyle (show toktype) ] + , mknode "w:t" [("xml:space","preserve")] tok ] + withTextProp (rCustomStyle "VerbatimChar") + $ if writerHighlight opts + then case highlight formatOpenXML attrs str of + Nothing -> unhighlighted + Just h -> return h + else unhighlighted inlineToOpenXML opts (Note bs) = do notes <- gets stFootnotes notenum <- getUniqueId + footnoteStyle <- rStyleM "Footnote Reference" let notemarker = mknode "w:r" [] - [ mknode "w:rPr" [] (rStyle "FootnoteRef") + [ mknode "w:rPr" [] footnoteStyle , mknode "w:footnoteRef" [] () ] let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : ils) : xs @@ -971,22 +991,22 @@ inlineToOpenXML opts (Note bs) = do oldParaProperties <- gets stParaProperties oldTextProperties <- gets stTextProperties modify $ \st -> st{ stListLevel = -1, stParaProperties = [], stTextProperties = [] } - contents <- withParaProp (pStyle "FootnoteText") $ blocksToOpenXML opts + contents <- withParaPropM (pStyleM "Footnote Text") $ blocksToOpenXML opts $ insertNoteRef bs modify $ \st -> st{ stListLevel = oldListLevel, stParaProperties = oldParaProperties, stTextProperties = oldTextProperties } let newnote = mknode "w:footnote" [("w:id", notenum)] $ contents modify $ \s -> s{ stFootnotes = newnote : notes } return [ mknode "w:r" [] - [ mknode "w:rPr" [] (rStyle "FootnoteRef") + [ mknode "w:rPr" [] footnoteStyle , mknode "w:footnoteReference" [("w:id", notenum)] () ] ] -- internal link: inlineToOpenXML opts (Link txt ('#':xs,_)) = do - contents <- withTextProp (rStyle "Link") $ inlinesToOpenXML opts txt + contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt return [ mknode "w:hyperlink" [("w:anchor",xs)] contents ] -- external link: inlineToOpenXML opts (Link txt (src,_)) = do - contents <- withTextProp (rStyle "Link") $ inlinesToOpenXML opts txt + contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt extlinks <- gets stExternalLinks id' <- case M.lookup src extlinks of Just i -> return i @@ -1088,7 +1108,7 @@ defaultFootnotes = [ mknode "w:footnote" [ mknode "w:p" [] $ [ mknode "w:r" [] $ [ mknode "w:continuationSeparator" [] ()]]]] - + parseXml :: Archive -> Archive -> String -> IO Element parseXml refArchive distArchive relpath = case ((findEntryByPath relpath refArchive `mplus` diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index ef00ea036..53dc931cc 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -446,19 +446,25 @@ blockToHtml opts (Para lst) = do contents <- inlineListToHtml opts lst return $ H.p contents blockToHtml opts (Div attr@(_,classes,_) bs) = do - contents <- blockListToHtml opts bs + let speakerNotes = "notes" `elem` classes + -- we don't want incremental output inside speaker notes, see #1394 + let opts' = if speakerNotes then opts{ writerIncremental = False } else opts + contents <- blockListToHtml opts' bs let contents' = nl opts >> contents >> nl opts return $ - if "notes" `elem` classes - then let opts' = opts{ writerIncremental = False } in - -- we don't want incremental output inside speaker notes - case writerSlideVariant opts of + if speakerNotes + then case writerSlideVariant opts of RevealJsSlides -> addAttrs opts' attr $ H5.aside $ contents' NoSlides -> addAttrs opts' attr $ H.div $ contents' _ -> mempty else addAttrs opts attr $ H.div $ contents' -blockToHtml _ (RawBlock f str) +blockToHtml opts (RawBlock f str) | f == Format "html" = return $ preEscapedString str + | f == Format "latex" = + case writerHTMLMathMethod opts of + MathJax _ -> do modify (\st -> st{ stMath = True }) + return $ toHtml str + _ -> return mempty | otherwise = return mempty blockToHtml opts (HorizontalRule) = return $ if writerHtml5 opts then H5.hr else H.hr blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do @@ -769,6 +775,8 @@ inlineToHtml opts inline = case writerHTMLMathMethod opts of LaTeXMathML _ -> do modify (\st -> st {stMath = True}) return $ toHtml str + MathJax _ -> do modify (\st -> st {stMath = True}) + return $ toHtml str _ -> return mempty | f == Format "html" -> return $ preEscapedString str | otherwise -> return mempty diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 0e5ec5c18..58456e3ab 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -42,6 +42,7 @@ import Data.List ( (\\), isSuffixOf, isInfixOf, stripPrefix, isPrefixOf, intercalate, intersperse ) import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord ) import Data.Maybe ( fromMaybe ) +import Data.Aeson.Types ( (.:), parseMaybe, withObject ) import Control.Applicative ((<|>)) import Control.Monad.State import Text.Pandoc.Pretty @@ -102,8 +103,16 @@ pandocToLaTeX options (Pandoc meta blocks) = do modify $ \s -> s{ stInternalLinks = query isInternalLink blocks' } let template = writerTemplate options -- set stBook depending on documentclass + let colwidth = if writerWrapText options + then Just $ writerColumns options + else Nothing + metadata <- metaToJSON options + (fmap (render colwidth) . blockListToLaTeX) + (fmap (render colwidth) . inlineListToLaTeX) + meta let bookClasses = ["memoir","book","report","scrreprt","scrbook"] - case lookup "documentclass" (writerVariables options) of + case lookup "documentclass" (writerVariables options) `mplus` + parseMaybe (withObject "object" (.: "documentclass")) metadata of Just x | x `elem` bookClasses -> modify $ \s -> s{stBook = True} | otherwise -> return () Nothing | any (\x -> "\\documentclass" `isPrefixOf` x && @@ -114,13 +123,6 @@ pandocToLaTeX options (Pandoc meta blocks) = do -- \enquote{...} for smart quotes: when ("{csquotes}" `isInfixOf` template) $ modify $ \s -> s{stCsquotes = True} - let colwidth = if writerWrapText options - then Just $ writerColumns options - else Nothing - metadata <- metaToJSON options - (fmap (render colwidth) . blockListToLaTeX) - (fmap (render colwidth) . inlineListToLaTeX) - meta let (blocks'', lastHeader) = if writerCiteMethod options == Citeproc then (blocks', []) else case last blocks' of @@ -701,7 +703,7 @@ inlineListToLaTeX lst = ("\\\\[" ++ show (length lbs) ++ "\\baselineskip]") : fixBreaks rest fixBreaks (y:ys) = y : fixBreaks ys - + isQuoted :: Inline -> Bool isQuoted (Quoted _ _) = True isQuoted _ = False @@ -750,10 +752,11 @@ inlineToLaTeX (Cite cits lst) = do inlineToLaTeX (Code (_,classes,_) str) = do opts <- gets stOptions + inHeading <- gets stInHeading case () of - _ | writerListings opts -> listingsCode + _ | writerListings opts && not inHeading -> listingsCode | writerHighlight opts && not (null classes) -> highlightCode - | otherwise -> rawCode + | otherwise -> rawCode where listingsCode = do inNote <- gets stInNote when inNote $ modify $ \s -> s{ stVerbInNote = True } diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index d71f0daf8..dee4d56a4 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -57,12 +57,15 @@ import qualified Data.Text as T type Notes = [[Block]] type Refs = [([Inline], Target)] -data WriterState = WriterState { stNotes :: Notes - , stRefs :: Refs - , stIds :: [String] - , stPlain :: Bool } +data WriterState = WriterState { stNotes :: Notes + , stRefs :: Refs + , stRefShortcutable :: Bool + , stInList :: Bool + , stIds :: [String] + , stPlain :: Bool } instance Default WriterState - where def = WriterState{ stNotes = [], stRefs = [], stIds = [], stPlain = False } + where def = WriterState{ stNotes = [], stRefs = [], stRefShortcutable = True, + stInList = False, stIds = [], stPlain = False } -- | Convert Pandoc to Markdown. writeMarkdown :: WriterOptions -> Pandoc -> String @@ -453,7 +456,7 @@ blockToMarkdown opts t@(Table caption aligns widths headers rows) = do $ Pandoc nullMeta [t] return $ nst $ tbl $$ blankline $$ caption'' $$ blankline blockToMarkdown opts (BulletList items) = do - contents <- mapM (bulletListItemToMarkdown opts) items + contents <- inList $ mapM (bulletListItemToMarkdown opts) items return $ cat contents <> blankline blockToMarkdown opts (OrderedList (start,sty,delim) items) = do let start' = if isEnabled Ext_startnum opts then start else 1 @@ -464,13 +467,22 @@ blockToMarkdown opts (OrderedList (start,sty,delim) items) = do let markers' = map (\m -> if length m < 3 then m ++ replicate (3 - length m) ' ' else m) markers - contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $ + contents <- inList $ + mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $ zip markers' items return $ cat contents <> blankline blockToMarkdown opts (DefinitionList items) = do - contents <- mapM (definitionListItemToMarkdown opts) items + contents <- inList $ mapM (definitionListItemToMarkdown opts) items return $ cat contents <> blankline +inList :: State WriterState a -> State WriterState a +inList p = do + oldInList <- gets stInList + modify $ \st -> st{ stInList = True } + res <- p + modify $ \st -> st{ stInList = oldInList } + return res + addMarkdownAttribute :: String -> String addMarkdownAttribute s = case span isTagText $ reverse $ parseTags s of @@ -497,7 +509,12 @@ pipeTable headless aligns rawHeaders rawRows = do AlignCenter -> ':':replicate w '-' ++ ":" AlignRight -> replicate (w + 1) '-' ++ ":" AlignDefault -> replicate (w + 2) '-' - let header = if headless then empty else torow rawHeaders + -- note: pipe tables can't completely lack a + -- header; for a headerless table, we need a header of empty cells. + -- see jgm/pandoc#1996. + let header = if headless + then torow (replicate (length aligns) empty) + else torow rawHeaders let border = nowrap $ text "|" <> hcat (intersperse (text "|") $ map toborder $ zip aligns widths) <> text "|" let body = vcat $ map torow rawRows @@ -677,12 +694,53 @@ getReference label (src, tit) = do -- | Convert list of Pandoc inline elements to markdown. inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc -inlineListToMarkdown opts lst = - mapM (inlineToMarkdown opts) (avoidBadWraps lst) >>= return . cat - where avoidBadWraps [] = [] - avoidBadWraps (Space:Str (c:cs):xs) - | c `elem` ("-*+>" :: String) = Str (' ':c:cs) : avoidBadWraps xs - avoidBadWraps (x:xs) = x : avoidBadWraps xs +inlineListToMarkdown opts lst = do + inlist <- gets stInList + go (if inlist then avoidBadWrapsInList lst else lst) + where go [] = return empty + go (i:is) = case i of + (Link _ _) -> case is of + -- If a link is followed by another link or '[' we don't shortcut + (Link _ _):_ -> unshortcutable + Space:(Link _ _):_ -> unshortcutable + Space:(Str('[':_)):_ -> unshortcutable + Space:(RawInline _ ('[':_)):_ -> unshortcutable + Space:(Cite _ _):_ -> unshortcutable + (Cite _ _):_ -> unshortcutable + Str ('[':_):_ -> unshortcutable + (RawInline _ ('[':_)):_ -> unshortcutable + (RawInline _ (' ':'[':_)):_ -> unshortcutable + _ -> shortcutable + _ -> shortcutable + where shortcutable = liftM2 (<>) (inlineToMarkdown opts i) (go is) + unshortcutable = do + iMark <- withState (\s -> s { stRefShortcutable = False }) + (inlineToMarkdown opts i) + modify (\s -> s {stRefShortcutable = True }) + fmap (iMark <>) (go is) + +avoidBadWrapsInList :: [Inline] -> [Inline] +avoidBadWrapsInList [] = [] +avoidBadWrapsInList (Space:Str ('>':cs):xs) = + Str (' ':'>':cs) : avoidBadWrapsInList xs +avoidBadWrapsInList (Space:Str [c]:[]) + | c `elem` ['-','*','+'] = Str [' ', c] : [] +avoidBadWrapsInList (Space:Str [c]:Space:xs) + | c `elem` ['-','*','+'] = Str [' ', c] : Space : avoidBadWrapsInList xs +avoidBadWrapsInList (Space:Str cs:Space:xs) + | isOrderedListMarker cs = Str (' ':cs) : Space : avoidBadWrapsInList xs +avoidBadWrapsInList (Space:Str cs:[]) + | isOrderedListMarker cs = Str (' ':cs) : [] +avoidBadWrapsInList (x:xs) = x : avoidBadWrapsInList xs + +isOrderedListMarker :: String -> Bool +isOrderedListMarker xs = (last xs `elem` ['.',')']) && + isRight (runParserT (anyOrderedListMarker >> eof) + defaultParserState "" xs) + +isRight :: Either a b -> Bool +isRight (Right _) = True +isRight (Left _) = False escapeSpaces :: Inline -> Inline escapeSpaces (Str s) = Str $ substitute " " "\\ " s @@ -692,8 +750,10 @@ escapeSpaces x = x -- | Convert Pandoc inline element to markdown. inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc inlineToMarkdown opts (Span attrs ils) = do + plain <- gets stPlain contents <- inlineListToMarkdown opts ils - return $ if isEnabled Ext_raw_html opts + return $ if not plain && + (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts) then tagWithAttrs "span" attrs <> contents <> text "</span>" else contents inlineToMarkdown opts (Emph lst) = do @@ -726,13 +786,14 @@ inlineToMarkdown opts (Subscript lst) = do else "<sub>" <> contents <> "</sub>" inlineToMarkdown opts (SmallCaps lst) = do plain <- gets stPlain - if plain - then inlineListToMarkdown opts $ capitalize lst - else do + if not plain && + (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts) + then do contents <- inlineListToMarkdown opts lst return $ tagWithAttrs "span" - ("",[],[("style","font-variant:small-caps;")]) + ("",[],[("style","font-variant:small-caps;")]) <> contents <> text "</span>" + else inlineListToMarkdown opts $ capitalize lst inlineToMarkdown opts (Quoted SingleQuote lst) = do contents <- inlineListToMarkdown opts lst return $ "‘" <> contents <> "’" @@ -838,6 +899,9 @@ inlineToMarkdown opts (Link txt (src, tit)) = do [Str s] | escapeURI s == srcSuffix -> True _ -> False let useRefLinks = writerReferenceLinks opts && not useAuto + shortcutable <- gets stRefShortcutable + let useShortcutRefLinks = shortcutable && + isEnabled Ext_shortcut_reference_links opts ref <- if useRefLinks then getReference txt (src, tit) else return [] reftext <- inlineListToMarkdown opts ref return $ if useAuto @@ -847,7 +911,9 @@ inlineToMarkdown opts (Link txt (src, tit)) = do else if useRefLinks then let first = "[" <> linktext <> "]" second = if txt == ref - then "[]" + then if useShortcutRefLinks + then "" + else "[]" else "[" <> reftext <> "]" in first <> second else if plain |